llama_cpp 0.11.0 → 0.11.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/examples/simple.rb +93 -0
- data/ext/llama_cpp/llama_cpp.cpp +2 -0
- data/lib/llama_cpp/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 705f8a7e2228a324f14fa23ca093e2ce4408eacb839d891284c40e319b07940a
|
4
|
+
data.tar.gz: fc04d232f2c7ecfa1402aa711eda63e36a03d287a6bc7c8e0d2c791194ad9e9a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 888ba5bfa23ab51746d49c2cc071f8a220d3de39f6c3a34576f35bcb993fc0be841481dcbca9762504397ca6555571f43b4ba2c0ae3dae3fcd8d29bd2735ae16
|
7
|
+
data.tar.gz: e8940f8ab7a542569a71ad5c869ac781b8ad958ca02d2a5547b792008c228ae1e9ff23cc5b9552e83b0c631805ec7edfea6138c5ae9d922daca06e5ab3f1490d
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,9 @@
|
|
1
|
+
## [[0.11.1](https://github.com/yoshoku/llama_cpp.rb/compare/v0.11.0...v0.11.1)] - 2024-01-08
|
2
|
+
|
3
|
+
- Fix to set the values of `@n_tokens` and `@has_evaluated` instance variables in `decode` method of `Context`.
|
4
|
+
- Add document for `logits` method in `Context`.
|
5
|
+
- Add example script for simple text completion: examples/simple.rb
|
6
|
+
|
1
7
|
## [[0.11.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.10.3...v0.11.0)] - 2024-01-07
|
2
8
|
|
3
9
|
- Add `set_n_seq_id` and `get_n_seq_id` methods to `Batch`.
|
data/examples/simple.rb
ADDED
@@ -0,0 +1,93 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
# simple.rb is a simple text completion script.
|
5
|
+
# It is created with reference to simple.cpp in llama.cpp examples:
|
6
|
+
# https://github.com/ggerganov/llama.cpp/blob/master/examples/simple/simple.cpp
|
7
|
+
|
8
|
+
require 'llama_cpp'
|
9
|
+
require 'thor'
|
10
|
+
|
11
|
+
class Simple < Thor # rubocop:disable Style/Documentation
|
12
|
+
default_command :main
|
13
|
+
desc 'main', 'Simple completion'
|
14
|
+
option :model, type: :string, aliases: '-m', desc: 'path to model file', required: true
|
15
|
+
option :prompt, type: :string, aliases: '-p', desc: 'prompt to start with', default: 'Hello my name is'
|
16
|
+
def main # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
17
|
+
n_len = 32
|
18
|
+
model_params = LLaMACpp::ModelParams.new
|
19
|
+
model = LLaMACpp::Model.new(model_path: options[:model], params: model_params)
|
20
|
+
context_params = LLaMACpp::ContextParams.new
|
21
|
+
context_params.seed = 1234
|
22
|
+
context_params.n_ctx = 2048
|
23
|
+
context_params.logits_all = true
|
24
|
+
context_params.n_threads = 4
|
25
|
+
context = LLaMACpp::Context.new(model: model, params: context_params)
|
26
|
+
|
27
|
+
tokens_list = context.model.tokenize(text: options[:prompt], add_bos: true)
|
28
|
+
n_ctx = context.n_ctx
|
29
|
+
n_kv_req = tokens_list.size + (n_len - tokens_list.size)
|
30
|
+
raise 'n_kv_req > n_ctx, the required KV cache size is not big enough' if n_kv_req > n_ctx
|
31
|
+
|
32
|
+
print("\nmain: n_len = #{n_len}, n_ctx = #{n_ctx}, n_kv_req = #{n_kv_req}\n\n")
|
33
|
+
|
34
|
+
tokens_list.each { |token| print(context.model.token_to_piece(token)) }
|
35
|
+
|
36
|
+
batch = LLaMACpp::Batch.new(max_n_token: 512, n_embd: 0, max_n_seq: 1)
|
37
|
+
tokens_list.each_with_index do |token, id|
|
38
|
+
batch.set_token(batch.n_tokens, token)
|
39
|
+
batch.set_pos(batch.n_tokens, id)
|
40
|
+
batch.set_n_seq_id(batch.n_tokens, 1)
|
41
|
+
batch.set_seq_id(batch.n_tokens, 0, 0)
|
42
|
+
batch.set_logits(batch.n_tokens, false)
|
43
|
+
batch.n_tokens = batch.n_tokens + 1
|
44
|
+
end
|
45
|
+
|
46
|
+
batch.set_logits(batch.n_tokens - 1, true)
|
47
|
+
|
48
|
+
context.decode(batch)
|
49
|
+
|
50
|
+
n_cur = batch.n_tokens
|
51
|
+
n_decode = 0
|
52
|
+
n_vocab = context.model.n_vocab
|
53
|
+
|
54
|
+
t_start = Time.now
|
55
|
+
|
56
|
+
while n_cur <= n_len
|
57
|
+
logits = context.logits[((batch.n_tokens - 1) * n_vocab)..]
|
58
|
+
|
59
|
+
base_candidates = Array.new(n_vocab) { |i| LLaMACpp::TokenData.new(id: i, logit: logits[i] || 0.0, p: 0.0) }
|
60
|
+
candidates = LLaMACpp::TokenDataArray.new(base_candidates)
|
61
|
+
|
62
|
+
new_token_id = context.sample_token_greedy(candidates)
|
63
|
+
|
64
|
+
if new_token_id == context.model.token_eos || n_cur == n_len
|
65
|
+
print("\n")
|
66
|
+
break
|
67
|
+
end
|
68
|
+
|
69
|
+
print(context.model.token_to_piece(new_token_id))
|
70
|
+
|
71
|
+
batch.n_tokens = 0
|
72
|
+
|
73
|
+
batch.set_token(batch.n_tokens, new_token_id)
|
74
|
+
batch.set_pos(batch.n_tokens, n_cur)
|
75
|
+
batch.set_n_seq_id(batch.n_tokens, 1)
|
76
|
+
batch.set_seq_id(batch.n_tokens, 0, 0)
|
77
|
+
batch.set_logits(batch.n_tokens, true)
|
78
|
+
batch.n_tokens = batch.n_tokens + 1
|
79
|
+
|
80
|
+
n_decode += 1
|
81
|
+
n_cur += 1
|
82
|
+
context.decode(batch)
|
83
|
+
end
|
84
|
+
|
85
|
+
t_end = Time.now
|
86
|
+
|
87
|
+
print("\nmain: decoded #{n_decode} tokens in #{(t_end - t_start).floor(2)} s, speed: #{n_decode.fdiv(t_end - t_start).floor(2)} t/s\n\n")
|
88
|
+
|
89
|
+
LLaMACpp.backend_free
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
Simple.start(ARGV)
|
data/ext/llama_cpp/llama_cpp.cpp
CHANGED
data/lib/llama_cpp/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: llama_cpp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.11.
|
4
|
+
version: 0.11.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-01-
|
11
|
+
date: 2024-01-08 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
|
14
14
|
email:
|
@@ -26,6 +26,7 @@ files:
|
|
26
26
|
- examples/chat.rb
|
27
27
|
- examples/embedding.rb
|
28
28
|
- examples/prompt_jp.txt
|
29
|
+
- examples/simple.rb
|
29
30
|
- ext/llama_cpp/extconf.rb
|
30
31
|
- ext/llama_cpp/llama_cpp.cpp
|
31
32
|
- ext/llama_cpp/llama_cpp.h
|