llama_cpp 0.11.0 → 0.11.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 58b6e91201c53b1ced4db60f325d3ced3fa486e24a84d53b0e5c62f613e33fc9
4
- data.tar.gz: 7b1c4594a79c8ac86aef84be3608dbd51e397c8fe4226d65b3ee87aa1fc800b2
3
+ metadata.gz: 705f8a7e2228a324f14fa23ca093e2ce4408eacb839d891284c40e319b07940a
4
+ data.tar.gz: fc04d232f2c7ecfa1402aa711eda63e36a03d287a6bc7c8e0d2c791194ad9e9a
5
5
  SHA512:
6
- metadata.gz: aece2e7a49f08d0799ff6eb24904ef176fc916eeb57380916b2c8397ea3236991b52fd806aa8c76822a7c1beac86348f3ceb7094880c8d79015debc62babaa0c
7
- data.tar.gz: 2049d26027e8be4e47bbbb12a9a521776c369ca45d05743dec3c96249a09fe67e31a21aa09dcb8d717f39ee29904ee082bcbfa292fd6c1e956d6e319809ca31c
6
+ metadata.gz: 888ba5bfa23ab51746d49c2cc071f8a220d3de39f6c3a34576f35bcb993fc0be841481dcbca9762504397ca6555571f43b4ba2c0ae3dae3fcd8d29bd2735ae16
7
+ data.tar.gz: e8940f8ab7a542569a71ad5c869ac781b8ad958ca02d2a5547b792008c228ae1e9ff23cc5b9552e83b0c631805ec7edfea6138c5ae9d922daca06e5ab3f1490d
data/CHANGELOG.md CHANGED
@@ -1,3 +1,9 @@
1
+ ## [[0.11.1](https://github.com/yoshoku/llama_cpp.rb/compare/v0.11.0...v0.11.1)] - 2024-01-08
2
+
3
+ - Fix to set the values of `@n_tokens` and `@has_evaluated` instance variables in `decode` method of `Context`.
4
+ - Add document for `logits` method in `Context`.
5
+ - Add example script for simple text completion: examples/simple.rb
6
+
1
7
  ## [[0.11.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.10.3...v0.11.0)] - 2024-01-07
2
8
 
3
9
  - Add `set_n_seq_id` and `get_n_seq_id` methods to `Batch`.
@@ -0,0 +1,93 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # simple.rb is a simple text completion script.
5
+ # It is created with reference to simple.cpp in llama.cpp examples:
6
+ # https://github.com/ggerganov/llama.cpp/blob/master/examples/simple/simple.cpp
7
+
8
+ require 'llama_cpp'
9
+ require 'thor'
10
+
11
+ class Simple < Thor # rubocop:disable Style/Documentation
12
+ default_command :main
13
+ desc 'main', 'Simple completion'
14
+ option :model, type: :string, aliases: '-m', desc: 'path to model file', required: true
15
+ option :prompt, type: :string, aliases: '-p', desc: 'prompt to start with', default: 'Hello my name is'
16
+ def main # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
17
+ n_len = 32
18
+ model_params = LLaMACpp::ModelParams.new
19
+ model = LLaMACpp::Model.new(model_path: options[:model], params: model_params)
20
+ context_params = LLaMACpp::ContextParams.new
21
+ context_params.seed = 1234
22
+ context_params.n_ctx = 2048
23
+ context_params.logits_all = true
24
+ context_params.n_threads = 4
25
+ context = LLaMACpp::Context.new(model: model, params: context_params)
26
+
27
+ tokens_list = context.model.tokenize(text: options[:prompt], add_bos: true)
28
+ n_ctx = context.n_ctx
29
+ n_kv_req = tokens_list.size + (n_len - tokens_list.size)
30
+ raise 'n_kv_req > n_ctx, the required KV cache size is not big enough' if n_kv_req > n_ctx
31
+
32
+ print("\nmain: n_len = #{n_len}, n_ctx = #{n_ctx}, n_kv_req = #{n_kv_req}\n\n")
33
+
34
+ tokens_list.each { |token| print(context.model.token_to_piece(token)) }
35
+
36
+ batch = LLaMACpp::Batch.new(max_n_token: 512, n_embd: 0, max_n_seq: 1)
37
+ tokens_list.each_with_index do |token, id|
38
+ batch.set_token(batch.n_tokens, token)
39
+ batch.set_pos(batch.n_tokens, id)
40
+ batch.set_n_seq_id(batch.n_tokens, 1)
41
+ batch.set_seq_id(batch.n_tokens, 0, 0)
42
+ batch.set_logits(batch.n_tokens, false)
43
+ batch.n_tokens = batch.n_tokens + 1
44
+ end
45
+
46
+ batch.set_logits(batch.n_tokens - 1, true)
47
+
48
+ context.decode(batch)
49
+
50
+ n_cur = batch.n_tokens
51
+ n_decode = 0
52
+ n_vocab = context.model.n_vocab
53
+
54
+ t_start = Time.now
55
+
56
+ while n_cur <= n_len
57
+ logits = context.logits[((batch.n_tokens - 1) * n_vocab)..]
58
+
59
+ base_candidates = Array.new(n_vocab) { |i| LLaMACpp::TokenData.new(id: i, logit: logits[i] || 0.0, p: 0.0) }
60
+ candidates = LLaMACpp::TokenDataArray.new(base_candidates)
61
+
62
+ new_token_id = context.sample_token_greedy(candidates)
63
+
64
+ if new_token_id == context.model.token_eos || n_cur == n_len
65
+ print("\n")
66
+ break
67
+ end
68
+
69
+ print(context.model.token_to_piece(new_token_id))
70
+
71
+ batch.n_tokens = 0
72
+
73
+ batch.set_token(batch.n_tokens, new_token_id)
74
+ batch.set_pos(batch.n_tokens, n_cur)
75
+ batch.set_n_seq_id(batch.n_tokens, 1)
76
+ batch.set_seq_id(batch.n_tokens, 0, 0)
77
+ batch.set_logits(batch.n_tokens, true)
78
+ batch.n_tokens = batch.n_tokens + 1
79
+
80
+ n_decode += 1
81
+ n_cur += 1
82
+ context.decode(batch)
83
+ end
84
+
85
+ t_end = Time.now
86
+
87
+ print("\nmain: decoded #{n_decode} tokens in #{(t_end - t_start).floor(2)} s, speed: #{n_decode.fdiv(t_end - t_start).floor(2)} t/s\n\n")
88
+
89
+ LLaMACpp.backend_free
90
+ end
91
+ end
92
+
93
+ Simple.start(ARGV)
@@ -2162,6 +2162,8 @@ private:
2162
2162
  rb_raise(rb_eRuntimeError, "Failed to decode");
2163
2163
  return Qnil;
2164
2164
  }
2165
+ rb_iv_set(self, "@n_tokens", INT2NUM(batch_ptr->batch.n_tokens));
2166
+ rb_iv_set(self, "@has_evaluated", Qtrue);
2165
2167
  return Qnil;
2166
2168
  }
2167
2169
 
@@ -3,7 +3,7 @@
3
3
  # llama_cpp.rb provides Ruby bindings for the llama.cpp.
4
4
  module LLaMACpp
5
5
  # The version of llama_cpp.rb you install.
6
- VERSION = '0.11.0'
6
+ VERSION = '0.11.1'
7
7
 
8
8
  # The version of llama.cpp bundled with llama_cpp.rb.
9
9
  LLAMA_CPP_VERSION = 'b1768'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: llama_cpp
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.11.0
4
+ version: 0.11.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-01-07 00:00:00.000000000 Z
11
+ date: 2024-01-08 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
14
14
  email:
@@ -26,6 +26,7 @@ files:
26
26
  - examples/chat.rb
27
27
  - examples/embedding.rb
28
28
  - examples/prompt_jp.txt
29
+ - examples/simple.rb
29
30
  - ext/llama_cpp/extconf.rb
30
31
  - ext/llama_cpp/llama_cpp.cpp
31
32
  - ext/llama_cpp/llama_cpp.h