llama_cpp 0.17.9 → 0.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/examples/simple.rb DELETED
@@ -1,96 +0,0 @@
1
- #!/usr/bin/env ruby
2
- # frozen_string_literal: true
3
-
4
- # simple.rb is a simple text completion script.
5
- # It is created with reference to simple.cpp in llama.cpp examples:
6
- # https://github.com/ggerganov/llama.cpp/blob/master/examples/simple/simple.cpp
7
-
8
- require 'llama_cpp'
9
- require 'thor'
10
- require 'etc'
11
-
12
- class Simple < Thor # rubocop:disable Style/Documentation
13
- default_command :main
14
- desc 'main', 'Simple completion'
15
- option :model, type: :string, aliases: '-m', desc: 'path to model file', required: true
16
- option :prompt, type: :string, aliases: '-p', desc: 'prompt to start with', default: 'Hello my name is'
17
- option :n_threads, type: :numeric, desc: 'number of threads', default: Etc.nprocessors
18
- def main # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
19
- n_len = 32
20
- model_params = LLaMACpp::ModelParams.new
21
- model = LLaMACpp::Model.new(model_path: options[:model], params: model_params)
22
- context_params = LLaMACpp::ContextParams.new
23
- context_params.seed = 1234
24
- context_params.n_ctx = 2048
25
- context_params.logits_all = true
26
- context_params.n_threads = options[:n_threads]
27
- context_params.n_threads_batch = options[:n_threads]
28
- context = LLaMACpp::Context.new(model: model, params: context_params)
29
-
30
- tokens_list = context.model.tokenize(text: options[:prompt], add_bos: true)
31
- n_ctx = context.n_ctx
32
- n_kv_req = tokens_list.size + (n_len - tokens_list.size)
33
- raise 'n_kv_req > n_ctx, the required KV cache size is not big enough' if n_kv_req > n_ctx
34
-
35
- print("\nmain: n_len = #{n_len}, n_ctx = #{n_ctx}, n_kv_req = #{n_kv_req}\n\n")
36
-
37
- tokens_list.each { |token| print(context.model.token_to_piece(token)) }
38
-
39
- batch = LLaMACpp::Batch.new(max_n_token: 512, n_embd: 0, max_n_seq: 1)
40
- tokens_list.each_with_index do |token, id|
41
- batch.set_token(batch.n_tokens, token)
42
- batch.set_pos(batch.n_tokens, id)
43
- batch.set_n_seq_id(batch.n_tokens, 1)
44
- batch.set_seq_id(batch.n_tokens, 0, 0)
45
- batch.set_logits(batch.n_tokens, false)
46
- batch.n_tokens = batch.n_tokens + 1
47
- end
48
-
49
- batch.set_logits(batch.n_tokens - 1, true)
50
-
51
- context.decode(batch)
52
-
53
- n_cur = batch.n_tokens
54
- n_decode = 0
55
- n_vocab = context.model.n_vocab
56
-
57
- t_start = Time.now
58
-
59
- while n_cur <= n_len
60
- logits = context.logits[((batch.n_tokens - 1) * n_vocab)..]
61
-
62
- base_candidates = Array.new(n_vocab) { |i| LLaMACpp::TokenData.new(id: i, logit: logits[i] || 0.0, p: 0.0) }
63
- candidates = LLaMACpp::TokenDataArray.new(base_candidates)
64
-
65
- new_token_id = context.sample_token_greedy(candidates)
66
-
67
- if new_token_id == context.model.token_eos || n_cur == n_len
68
- print("\n")
69
- break
70
- end
71
-
72
- print(context.model.token_to_piece(new_token_id))
73
-
74
- batch.n_tokens = 0
75
-
76
- batch.set_token(batch.n_tokens, new_token_id)
77
- batch.set_pos(batch.n_tokens, n_cur)
78
- batch.set_n_seq_id(batch.n_tokens, 1)
79
- batch.set_seq_id(batch.n_tokens, 0, 0)
80
- batch.set_logits(batch.n_tokens, true)
81
- batch.n_tokens = batch.n_tokens + 1
82
-
83
- n_decode += 1
84
- n_cur += 1
85
- context.decode(batch)
86
- end
87
-
88
- t_end = Time.now
89
-
90
- print("\nmain: decoded #{n_decode} tokens in #{(t_end - t_start).floor(2)} s, speed: #{n_decode.fdiv(t_end - t_start).floor(2)} t/s\n\n")
91
-
92
- LLaMACpp.backend_free
93
- end
94
- end
95
-
96
- Simple.start(ARGV)