llama_cpp 0.11.0 → 0.11.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/examples/simple.rb +93 -0
- data/ext/llama_cpp/llama_cpp.cpp +2 -0
- data/lib/llama_cpp/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 705f8a7e2228a324f14fa23ca093e2ce4408eacb839d891284c40e319b07940a
|
4
|
+
data.tar.gz: fc04d232f2c7ecfa1402aa711eda63e36a03d287a6bc7c8e0d2c791194ad9e9a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 888ba5bfa23ab51746d49c2cc071f8a220d3de39f6c3a34576f35bcb993fc0be841481dcbca9762504397ca6555571f43b4ba2c0ae3dae3fcd8d29bd2735ae16
|
7
|
+
data.tar.gz: e8940f8ab7a542569a71ad5c869ac781b8ad958ca02d2a5547b792008c228ae1e9ff23cc5b9552e83b0c631805ec7edfea6138c5ae9d922daca06e5ab3f1490d
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,9 @@
|
|
1
|
+
## [[0.11.1](https://github.com/yoshoku/llama_cpp.rb/compare/v0.11.0...v0.11.1)] - 2024-01-08
|
2
|
+
|
3
|
+
- Fix to set the values of `@n_tokens` and `@has_evaluated` instance variables in `decode` method of `Context`.
|
4
|
+
- Add document for `logits` method in `Context`.
|
5
|
+
- Add example script for simple text completion: examples/simple.rb
|
6
|
+
|
1
7
|
## [[0.11.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.10.3...v0.11.0)] - 2024-01-07
|
2
8
|
|
3
9
|
- Add `set_n_seq_id` and `get_n_seq_id` methods to `Batch`.
|
data/examples/simple.rb
ADDED
@@ -0,0 +1,93 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
# simple.rb is a simple text completion script.
|
5
|
+
# It is created with reference to simple.cpp in llama.cpp examples:
|
6
|
+
# https://github.com/ggerganov/llama.cpp/blob/master/examples/simple/simple.cpp
|
7
|
+
|
8
|
+
require 'llama_cpp'
|
9
|
+
require 'thor'
|
10
|
+
|
11
|
+
class Simple < Thor # rubocop:disable Style/Documentation
|
12
|
+
default_command :main
|
13
|
+
desc 'main', 'Simple completion'
|
14
|
+
option :model, type: :string, aliases: '-m', desc: 'path to model file', required: true
|
15
|
+
option :prompt, type: :string, aliases: '-p', desc: 'prompt to start with', default: 'Hello my name is'
|
16
|
+
def main # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
17
|
+
n_len = 32
|
18
|
+
model_params = LLaMACpp::ModelParams.new
|
19
|
+
model = LLaMACpp::Model.new(model_path: options[:model], params: model_params)
|
20
|
+
context_params = LLaMACpp::ContextParams.new
|
21
|
+
context_params.seed = 1234
|
22
|
+
context_params.n_ctx = 2048
|
23
|
+
context_params.logits_all = true
|
24
|
+
context_params.n_threads = 4
|
25
|
+
context = LLaMACpp::Context.new(model: model, params: context_params)
|
26
|
+
|
27
|
+
tokens_list = context.model.tokenize(text: options[:prompt], add_bos: true)
|
28
|
+
n_ctx = context.n_ctx
|
29
|
+
n_kv_req = tokens_list.size + (n_len - tokens_list.size)
|
30
|
+
raise 'n_kv_req > n_ctx, the required KV cache size is not big enough' if n_kv_req > n_ctx
|
31
|
+
|
32
|
+
print("\nmain: n_len = #{n_len}, n_ctx = #{n_ctx}, n_kv_req = #{n_kv_req}\n\n")
|
33
|
+
|
34
|
+
tokens_list.each { |token| print(context.model.token_to_piece(token)) }
|
35
|
+
|
36
|
+
batch = LLaMACpp::Batch.new(max_n_token: 512, n_embd: 0, max_n_seq: 1)
|
37
|
+
tokens_list.each_with_index do |token, id|
|
38
|
+
batch.set_token(batch.n_tokens, token)
|
39
|
+
batch.set_pos(batch.n_tokens, id)
|
40
|
+
batch.set_n_seq_id(batch.n_tokens, 1)
|
41
|
+
batch.set_seq_id(batch.n_tokens, 0, 0)
|
42
|
+
batch.set_logits(batch.n_tokens, false)
|
43
|
+
batch.n_tokens = batch.n_tokens + 1
|
44
|
+
end
|
45
|
+
|
46
|
+
batch.set_logits(batch.n_tokens - 1, true)
|
47
|
+
|
48
|
+
context.decode(batch)
|
49
|
+
|
50
|
+
n_cur = batch.n_tokens
|
51
|
+
n_decode = 0
|
52
|
+
n_vocab = context.model.n_vocab
|
53
|
+
|
54
|
+
t_start = Time.now
|
55
|
+
|
56
|
+
while n_cur <= n_len
|
57
|
+
logits = context.logits[((batch.n_tokens - 1) * n_vocab)..]
|
58
|
+
|
59
|
+
base_candidates = Array.new(n_vocab) { |i| LLaMACpp::TokenData.new(id: i, logit: logits[i] || 0.0, p: 0.0) }
|
60
|
+
candidates = LLaMACpp::TokenDataArray.new(base_candidates)
|
61
|
+
|
62
|
+
new_token_id = context.sample_token_greedy(candidates)
|
63
|
+
|
64
|
+
if new_token_id == context.model.token_eos || n_cur == n_len
|
65
|
+
print("\n")
|
66
|
+
break
|
67
|
+
end
|
68
|
+
|
69
|
+
print(context.model.token_to_piece(new_token_id))
|
70
|
+
|
71
|
+
batch.n_tokens = 0
|
72
|
+
|
73
|
+
batch.set_token(batch.n_tokens, new_token_id)
|
74
|
+
batch.set_pos(batch.n_tokens, n_cur)
|
75
|
+
batch.set_n_seq_id(batch.n_tokens, 1)
|
76
|
+
batch.set_seq_id(batch.n_tokens, 0, 0)
|
77
|
+
batch.set_logits(batch.n_tokens, true)
|
78
|
+
batch.n_tokens = batch.n_tokens + 1
|
79
|
+
|
80
|
+
n_decode += 1
|
81
|
+
n_cur += 1
|
82
|
+
context.decode(batch)
|
83
|
+
end
|
84
|
+
|
85
|
+
t_end = Time.now
|
86
|
+
|
87
|
+
print("\nmain: decoded #{n_decode} tokens in #{(t_end - t_start).floor(2)} s, speed: #{n_decode.fdiv(t_end - t_start).floor(2)} t/s\n\n")
|
88
|
+
|
89
|
+
LLaMACpp.backend_free
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
Simple.start(ARGV)
|
data/ext/llama_cpp/llama_cpp.cpp
CHANGED
data/lib/llama_cpp/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: llama_cpp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.11.
|
4
|
+
version: 0.11.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-01-
|
11
|
+
date: 2024-01-08 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
|
14
14
|
email:
|
@@ -26,6 +26,7 @@ files:
|
|
26
26
|
- examples/chat.rb
|
27
27
|
- examples/embedding.rb
|
28
28
|
- examples/prompt_jp.txt
|
29
|
+
- examples/simple.rb
|
29
30
|
- ext/llama_cpp/extconf.rb
|
30
31
|
- ext/llama_cpp/llama_cpp.cpp
|
31
32
|
- ext/llama_cpp/llama_cpp.h
|