llama_cpp 0.17.10 → 0.18.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/LICENSE.txt +1 -1
- data/README.md +8 -29
- data/ext/llama_cpp/extconf.rb +0 -3
- data/ext/llama_cpp/llama_cpp.c +5157 -0
- data/ext/llama_cpp/llama_cpp.h +0 -5
- data/lib/llama_cpp/version.rb +3 -3
- data/lib/llama_cpp.rb +38 -83
- data/sig/llama_cpp.rbs +0 -59
- metadata +4 -12
- data/examples/README.md +0 -92
- data/examples/chat.rb +0 -198
- data/examples/embedding.rb +0 -42
- data/examples/prompt_jp.txt +0 -8
- data/examples/simple.rb +0 -96
- data/ext/llama_cpp/llama_cpp.cpp +0 -3764
data/examples/simple.rb
DELETED
@@ -1,96 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
# frozen_string_literal: true
|
3
|
-
|
4
|
-
# simple.rb is a simple text completion script.
|
5
|
-
# It is created with reference to simple.cpp in llama.cpp examples:
|
6
|
-
# https://github.com/ggerganov/llama.cpp/blob/master/examples/simple/simple.cpp
|
7
|
-
|
8
|
-
require 'llama_cpp'
|
9
|
-
require 'thor'
|
10
|
-
require 'etc'
|
11
|
-
|
12
|
-
class Simple < Thor # rubocop:disable Style/Documentation
|
13
|
-
default_command :main
|
14
|
-
desc 'main', 'Simple completion'
|
15
|
-
option :model, type: :string, aliases: '-m', desc: 'path to model file', required: true
|
16
|
-
option :prompt, type: :string, aliases: '-p', desc: 'prompt to start with', default: 'Hello my name is'
|
17
|
-
option :n_threads, type: :numeric, desc: 'number of threads', default: Etc.nprocessors
|
18
|
-
def main # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
19
|
-
n_len = 32
|
20
|
-
model_params = LLaMACpp::ModelParams.new
|
21
|
-
model = LLaMACpp::Model.new(model_path: options[:model], params: model_params)
|
22
|
-
context_params = LLaMACpp::ContextParams.new
|
23
|
-
context_params.seed = 1234
|
24
|
-
context_params.n_ctx = 2048
|
25
|
-
context_params.logits_all = true
|
26
|
-
context_params.n_threads = options[:n_threads]
|
27
|
-
context_params.n_threads_batch = options[:n_threads]
|
28
|
-
context = LLaMACpp::Context.new(model: model, params: context_params)
|
29
|
-
|
30
|
-
tokens_list = context.model.tokenize(text: options[:prompt], add_bos: true)
|
31
|
-
n_ctx = context.n_ctx
|
32
|
-
n_kv_req = tokens_list.size + (n_len - tokens_list.size)
|
33
|
-
raise 'n_kv_req > n_ctx, the required KV cache size is not big enough' if n_kv_req > n_ctx
|
34
|
-
|
35
|
-
print("\nmain: n_len = #{n_len}, n_ctx = #{n_ctx}, n_kv_req = #{n_kv_req}\n\n")
|
36
|
-
|
37
|
-
tokens_list.each { |token| print(context.model.token_to_piece(token)) }
|
38
|
-
|
39
|
-
batch = LLaMACpp::Batch.new(max_n_token: 512, n_embd: 0, max_n_seq: 1)
|
40
|
-
tokens_list.each_with_index do |token, id|
|
41
|
-
batch.set_token(batch.n_tokens, token)
|
42
|
-
batch.set_pos(batch.n_tokens, id)
|
43
|
-
batch.set_n_seq_id(batch.n_tokens, 1)
|
44
|
-
batch.set_seq_id(batch.n_tokens, 0, 0)
|
45
|
-
batch.set_logits(batch.n_tokens, false)
|
46
|
-
batch.n_tokens = batch.n_tokens + 1
|
47
|
-
end
|
48
|
-
|
49
|
-
batch.set_logits(batch.n_tokens - 1, true)
|
50
|
-
|
51
|
-
context.decode(batch)
|
52
|
-
|
53
|
-
n_cur = batch.n_tokens
|
54
|
-
n_decode = 0
|
55
|
-
n_vocab = context.model.n_vocab
|
56
|
-
|
57
|
-
t_start = Time.now
|
58
|
-
|
59
|
-
while n_cur <= n_len
|
60
|
-
logits = context.logits[((batch.n_tokens - 1) * n_vocab)..]
|
61
|
-
|
62
|
-
base_candidates = Array.new(n_vocab) { |i| LLaMACpp::TokenData.new(id: i, logit: logits[i] || 0.0, p: 0.0) }
|
63
|
-
candidates = LLaMACpp::TokenDataArray.new(base_candidates)
|
64
|
-
|
65
|
-
new_token_id = context.sample_token_greedy(candidates)
|
66
|
-
|
67
|
-
if new_token_id == context.model.token_eos || n_cur == n_len
|
68
|
-
print("\n")
|
69
|
-
break
|
70
|
-
end
|
71
|
-
|
72
|
-
print(context.model.token_to_piece(new_token_id))
|
73
|
-
|
74
|
-
batch.n_tokens = 0
|
75
|
-
|
76
|
-
batch.set_token(batch.n_tokens, new_token_id)
|
77
|
-
batch.set_pos(batch.n_tokens, n_cur)
|
78
|
-
batch.set_n_seq_id(batch.n_tokens, 1)
|
79
|
-
batch.set_seq_id(batch.n_tokens, 0, 0)
|
80
|
-
batch.set_logits(batch.n_tokens, true)
|
81
|
-
batch.n_tokens = batch.n_tokens + 1
|
82
|
-
|
83
|
-
n_decode += 1
|
84
|
-
n_cur += 1
|
85
|
-
context.decode(batch)
|
86
|
-
end
|
87
|
-
|
88
|
-
t_end = Time.now
|
89
|
-
|
90
|
-
print("\nmain: decoded #{n_decode} tokens in #{(t_end - t_start).floor(2)} s, speed: #{n_decode.fdiv(t_end - t_start).floor(2)} t/s\n\n")
|
91
|
-
|
92
|
-
LLaMACpp.backend_free
|
93
|
-
end
|
94
|
-
end
|
95
|
-
|
96
|
-
Simple.start(ARGV)
|