llama_cpp 0.17.9 → 0.18.0

Sign up to get free protection for your applications and to get access to all the features.
data/examples/simple.rb DELETED
@@ -1,96 +0,0 @@
1
- #!/usr/bin/env ruby
2
- # frozen_string_literal: true
3
-
4
- # simple.rb is a simple text completion script.
5
- # It is created with reference to simple.cpp in llama.cpp examples:
6
- # https://github.com/ggerganov/llama.cpp/blob/master/examples/simple/simple.cpp
7
-
8
- require 'llama_cpp'
9
- require 'thor'
10
- require 'etc'
11
-
12
- class Simple < Thor # rubocop:disable Style/Documentation
13
- default_command :main
14
- desc 'main', 'Simple completion'
15
- option :model, type: :string, aliases: '-m', desc: 'path to model file', required: true
16
- option :prompt, type: :string, aliases: '-p', desc: 'prompt to start with', default: 'Hello my name is'
17
- option :n_threads, type: :numeric, desc: 'number of threads', default: Etc.nprocessors
18
- def main # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
19
- n_len = 32
20
- model_params = LLaMACpp::ModelParams.new
21
- model = LLaMACpp::Model.new(model_path: options[:model], params: model_params)
22
- context_params = LLaMACpp::ContextParams.new
23
- context_params.seed = 1234
24
- context_params.n_ctx = 2048
25
- context_params.logits_all = true
26
- context_params.n_threads = options[:n_threads]
27
- context_params.n_threads_batch = options[:n_threads]
28
- context = LLaMACpp::Context.new(model: model, params: context_params)
29
-
30
- tokens_list = context.model.tokenize(text: options[:prompt], add_bos: true)
31
- n_ctx = context.n_ctx
32
- n_kv_req = tokens_list.size + (n_len - tokens_list.size)
33
- raise 'n_kv_req > n_ctx, the required KV cache size is not big enough' if n_kv_req > n_ctx
34
-
35
- print("\nmain: n_len = #{n_len}, n_ctx = #{n_ctx}, n_kv_req = #{n_kv_req}\n\n")
36
-
37
- tokens_list.each { |token| print(context.model.token_to_piece(token)) }
38
-
39
- batch = LLaMACpp::Batch.new(max_n_token: 512, n_embd: 0, max_n_seq: 1)
40
- tokens_list.each_with_index do |token, id|
41
- batch.set_token(batch.n_tokens, token)
42
- batch.set_pos(batch.n_tokens, id)
43
- batch.set_n_seq_id(batch.n_tokens, 1)
44
- batch.set_seq_id(batch.n_tokens, 0, 0)
45
- batch.set_logits(batch.n_tokens, false)
46
- batch.n_tokens = batch.n_tokens + 1
47
- end
48
-
49
- batch.set_logits(batch.n_tokens - 1, true)
50
-
51
- context.decode(batch)
52
-
53
- n_cur = batch.n_tokens
54
- n_decode = 0
55
- n_vocab = context.model.n_vocab
56
-
57
- t_start = Time.now
58
-
59
- while n_cur <= n_len
60
- logits = context.logits[((batch.n_tokens - 1) * n_vocab)..]
61
-
62
- base_candidates = Array.new(n_vocab) { |i| LLaMACpp::TokenData.new(id: i, logit: logits[i] || 0.0, p: 0.0) }
63
- candidates = LLaMACpp::TokenDataArray.new(base_candidates)
64
-
65
- new_token_id = context.sample_token_greedy(candidates)
66
-
67
- if new_token_id == context.model.token_eos || n_cur == n_len
68
- print("\n")
69
- break
70
- end
71
-
72
- print(context.model.token_to_piece(new_token_id))
73
-
74
- batch.n_tokens = 0
75
-
76
- batch.set_token(batch.n_tokens, new_token_id)
77
- batch.set_pos(batch.n_tokens, n_cur)
78
- batch.set_n_seq_id(batch.n_tokens, 1)
79
- batch.set_seq_id(batch.n_tokens, 0, 0)
80
- batch.set_logits(batch.n_tokens, true)
81
- batch.n_tokens = batch.n_tokens + 1
82
-
83
- n_decode += 1
84
- n_cur += 1
85
- context.decode(batch)
86
- end
87
-
88
- t_end = Time.now
89
-
90
- print("\nmain: decoded #{n_decode} tokens in #{(t_end - t_start).floor(2)} s, speed: #{n_decode.fdiv(t_end - t_start).floor(2)} t/s\n\n")
91
-
92
- LLaMACpp.backend_free
93
- end
94
- end
95
-
96
- Simple.start(ARGV)