llama_cpp 0.11.0 → 0.11.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 58b6e91201c53b1ced4db60f325d3ced3fa486e24a84d53b0e5c62f613e33fc9
4
- data.tar.gz: 7b1c4594a79c8ac86aef84be3608dbd51e397c8fe4226d65b3ee87aa1fc800b2
3
+ metadata.gz: 705f8a7e2228a324f14fa23ca093e2ce4408eacb839d891284c40e319b07940a
4
+ data.tar.gz: fc04d232f2c7ecfa1402aa711eda63e36a03d287a6bc7c8e0d2c791194ad9e9a
5
5
  SHA512:
6
- metadata.gz: aece2e7a49f08d0799ff6eb24904ef176fc916eeb57380916b2c8397ea3236991b52fd806aa8c76822a7c1beac86348f3ceb7094880c8d79015debc62babaa0c
7
- data.tar.gz: 2049d26027e8be4e47bbbb12a9a521776c369ca45d05743dec3c96249a09fe67e31a21aa09dcb8d717f39ee29904ee082bcbfa292fd6c1e956d6e319809ca31c
6
+ metadata.gz: 888ba5bfa23ab51746d49c2cc071f8a220d3de39f6c3a34576f35bcb993fc0be841481dcbca9762504397ca6555571f43b4ba2c0ae3dae3fcd8d29bd2735ae16
7
+ data.tar.gz: e8940f8ab7a542569a71ad5c869ac781b8ad958ca02d2a5547b792008c228ae1e9ff23cc5b9552e83b0c631805ec7edfea6138c5ae9d922daca06e5ab3f1490d
data/CHANGELOG.md CHANGED
@@ -1,3 +1,9 @@
1
+ ## [[0.11.1](https://github.com/yoshoku/llama_cpp.rb/compare/v0.11.0...v0.11.1)] - 2024-01-08
2
+
3
+ - Fix to set the values of `@n_tokens` and `@has_evaluated` instance variables in `decode` method of `Context`.
4
+ - Add document for `logits` method in `Context`.
5
+ - Add example script for simple text completion: examples/simple.rb
6
+
1
7
  ## [[0.11.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.10.3...v0.11.0)] - 2024-01-07
2
8
 
3
9
  - Add `set_n_seq_id` and `get_n_seq_id` methods to `Batch`.
@@ -0,0 +1,93 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # simple.rb is a simple text completion script.
5
+ # It is created with reference to simple.cpp in llama.cpp examples:
6
+ # https://github.com/ggerganov/llama.cpp/blob/master/examples/simple/simple.cpp
7
+
8
+ require 'llama_cpp'
9
+ require 'thor'
10
+
11
+ class Simple < Thor # rubocop:disable Style/Documentation
12
+ default_command :main
13
+ desc 'main', 'Simple completion'
14
+ option :model, type: :string, aliases: '-m', desc: 'path to model file', required: true
15
+ option :prompt, type: :string, aliases: '-p', desc: 'prompt to start with', default: 'Hello my name is'
16
+ def main # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
17
+ n_len = 32
18
+ model_params = LLaMACpp::ModelParams.new
19
+ model = LLaMACpp::Model.new(model_path: options[:model], params: model_params)
20
+ context_params = LLaMACpp::ContextParams.new
21
+ context_params.seed = 1234
22
+ context_params.n_ctx = 2048
23
+ context_params.logits_all = true
24
+ context_params.n_threads = 4
25
+ context = LLaMACpp::Context.new(model: model, params: context_params)
26
+
27
+ tokens_list = context.model.tokenize(text: options[:prompt], add_bos: true)
28
+ n_ctx = context.n_ctx
29
+ n_kv_req = tokens_list.size + (n_len - tokens_list.size)
30
+ raise 'n_kv_req > n_ctx, the required KV cache size is not big enough' if n_kv_req > n_ctx
31
+
32
+ print("\nmain: n_len = #{n_len}, n_ctx = #{n_ctx}, n_kv_req = #{n_kv_req}\n\n")
33
+
34
+ tokens_list.each { |token| print(context.model.token_to_piece(token)) }
35
+
36
+ batch = LLaMACpp::Batch.new(max_n_token: 512, n_embd: 0, max_n_seq: 1)
37
+ tokens_list.each_with_index do |token, id|
38
+ batch.set_token(batch.n_tokens, token)
39
+ batch.set_pos(batch.n_tokens, id)
40
+ batch.set_n_seq_id(batch.n_tokens, 1)
41
+ batch.set_seq_id(batch.n_tokens, 0, 0)
42
+ batch.set_logits(batch.n_tokens, false)
43
+ batch.n_tokens = batch.n_tokens + 1
44
+ end
45
+
46
+ batch.set_logits(batch.n_tokens - 1, true)
47
+
48
+ context.decode(batch)
49
+
50
+ n_cur = batch.n_tokens
51
+ n_decode = 0
52
+ n_vocab = context.model.n_vocab
53
+
54
+ t_start = Time.now
55
+
56
+ while n_cur <= n_len
57
+ logits = context.logits[((batch.n_tokens - 1) * n_vocab)..]
58
+
59
+ base_candidates = Array.new(n_vocab) { |i| LLaMACpp::TokenData.new(id: i, logit: logits[i] || 0.0, p: 0.0) }
60
+ candidates = LLaMACpp::TokenDataArray.new(base_candidates)
61
+
62
+ new_token_id = context.sample_token_greedy(candidates)
63
+
64
+ if new_token_id == context.model.token_eos || n_cur == n_len
65
+ print("\n")
66
+ break
67
+ end
68
+
69
+ print(context.model.token_to_piece(new_token_id))
70
+
71
+ batch.n_tokens = 0
72
+
73
+ batch.set_token(batch.n_tokens, new_token_id)
74
+ batch.set_pos(batch.n_tokens, n_cur)
75
+ batch.set_n_seq_id(batch.n_tokens, 1)
76
+ batch.set_seq_id(batch.n_tokens, 0, 0)
77
+ batch.set_logits(batch.n_tokens, true)
78
+ batch.n_tokens = batch.n_tokens + 1
79
+
80
+ n_decode += 1
81
+ n_cur += 1
82
+ context.decode(batch)
83
+ end
84
+
85
+ t_end = Time.now
86
+
87
+ print("\nmain: decoded #{n_decode} tokens in #{(t_end - t_start).floor(2)} s, speed: #{n_decode.fdiv(t_end - t_start).floor(2)} t/s\n\n")
88
+
89
+ LLaMACpp.backend_free
90
+ end
91
+ end
92
+
93
+ Simple.start(ARGV)
@@ -2162,6 +2162,8 @@ private:
2162
2162
  rb_raise(rb_eRuntimeError, "Failed to decode");
2163
2163
  return Qnil;
2164
2164
  }
2165
+ rb_iv_set(self, "@n_tokens", INT2NUM(batch_ptr->batch.n_tokens));
2166
+ rb_iv_set(self, "@has_evaluated", Qtrue);
2165
2167
  return Qnil;
2166
2168
  }
2167
2169
 
@@ -3,7 +3,7 @@
3
3
  # llama_cpp.rb provides Ruby bindings for the llama.cpp.
4
4
  module LLaMACpp
5
5
  # The version of llama_cpp.rb you install.
6
- VERSION = '0.11.0'
6
+ VERSION = '0.11.1'
7
7
 
8
8
  # The version of llama.cpp bundled with llama_cpp.rb.
9
9
  LLAMA_CPP_VERSION = 'b1768'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: llama_cpp
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.11.0
4
+ version: 0.11.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-01-07 00:00:00.000000000 Z
11
+ date: 2024-01-08 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
14
14
  email:
@@ -26,6 +26,7 @@ files:
26
26
  - examples/chat.rb
27
27
  - examples/embedding.rb
28
28
  - examples/prompt_jp.txt
29
+ - examples/simple.rb
29
30
  - ext/llama_cpp/extconf.rb
30
31
  - ext/llama_cpp/llama_cpp.cpp
31
32
  - ext/llama_cpp/llama_cpp.h