llama_cpp 0.5.3 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c45589a61587acfbe88add77ffb135a7949619ba2936178c59126c24c30e23cc
4
- data.tar.gz: 5866b5b5f8dab59432cc91beca290f927a8d1bc694f83c8ccbe366c6f636f47c
3
+ metadata.gz: 854493444a65cd1239649b991c8e6538c542c02a052932f6a69c56c984e28f58
4
+ data.tar.gz: 4e0b70de25eb2661b693af0d488efd25f570c3f62d4b9044fdd5c14fb5b9fac6
5
5
  SHA512:
6
- metadata.gz: 89714a2a920172c1ddc4fff56a11390ad97db62eb6bd4eefe3ba9376132bd6646eda7569acea49ff3a1ce87486cac0e623cac4fddfcb2b57629c30ee3457d38b
7
- data.tar.gz: 2c5494528f55b86c57fccb18658350058210e85f31e9ecb8b3587a4da68a1465a0987a98e86dbae16fa5fea9a9502aed96afec3679e5553a82805f4436ef3020
6
+ metadata.gz: b2524b8eb6e8568116f3c33eb57b764044083ee2ff2bbb7f15fc6301b024197ea8fca75968535b302a9e70449c9f9f28e0760cf4bfefb00ed8137c18e84137d5
7
+ data.tar.gz: faf26b552a8a862a97129b5bd25e05b3ae3edd2f8b118622b119634e4b004c05d200653c40085e4a28243c8994c517699baa35d3a8096ad8ac598fd637cf0565
data/CHANGELOG.md CHANGED
@@ -1,3 +1,12 @@
1
+ ## [[0.6.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.5.3...v0.6.0)] - 2023-09-30
2
+
3
+ **Breaking Changes**
4
+ - Bump bundled llama.cpp from b1266 to b1292.
5
+ - There are many API changes, so please refer to the commits.
6
+
7
+ It is becoming difficult to keep up with major changes in llama.cpp,
8
+ and I may give up on developing this gem in the future to prioritize my own life.
9
+
1
10
  ## [[0.5.3](https://github.com/yoshoku/llama_cpp.rb/compare/v0.5.2...v0.5.3)] - 2023-09-23
2
11
 
3
12
  - Bump bundled llama.cpp from b1 to b1266.
data/README.md CHANGED
@@ -59,13 +59,14 @@ An example of Ruby code that generates sentences with the quantization model is
59
59
  ```ruby
60
60
  require 'llama_cpp'
61
61
 
62
- params = LLaMACpp::ContextParams.new
63
- params.seed = 42
62
+ model_params = LLaMACpp::ModelParams.new
63
+ model = LLaMACpp::Model.new(model_path: '/home/user/llama.cpp/models/open_llama_7b/ggml-model-q4_0.bin', params: model_params)
64
64
 
65
- model = LLaMACpp::Model.new(model_path: '/home/user/llama.cpp/models/open_llama_7b/ggml-model-q4_0.bin', params: params)
66
- context = LLaMACpp::Context.new(model: model)
65
+ context_params = LLaMACpp::ContextParams.new
66
+ context_params.seed = 42
67
+ context = LLaMACpp::Context.new(model: model, params: context_params)
67
68
 
68
- puts LLaMACpp.generate(context, 'Hello, World.', n_threads: 4)
69
+ puts LLaMACpp.generate(context, 'Hello, World.')
69
70
  ```
70
71
 
71
72
  ## Examples
data/examples/chat.rb CHANGED
@@ -14,7 +14,6 @@ class Chat < Thor # rubocop:disable Metrics/ClassLength, Style/Documentation
14
14
  default_command :main
15
15
  desc 'main', 'Start chat'
16
16
  option :seed, type: :numeric, aliases: '-s', desc: 'random seed', default: -1
17
- option :threads, type: :numeric, aliases: '-t', desc: 'number of threads', default: 2
18
17
  option :model, type: :string, aliases: '-m', desc: 'path to model file', required: true
19
18
  option :file, type: :string, aliases: '-f', desc: 'prompt file to start generation'
20
19
  option :reverse_prompt, type: :string, aliases: '-r', desc: 'halt generation at PROMPT, return control in interactive mode'
@@ -32,16 +31,17 @@ class Chat < Thor # rubocop:disable Metrics/ClassLength, Style/Documentation
32
31
  option :temp, type: :numeric, desc: 'temperature', default: 0.8
33
32
  option :n_gpu_layers, type: :numeric, desc: 'number of layers on GPU', default: 0
34
33
  def main # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
35
- params = LLaMACpp::ContextParams.new
36
- params.seed = options[:seed] if options[:seed] != -1
37
- params.n_gpu_layers = options[:n_gpu_layers]
38
- model = LLaMACpp::Model.new(model_path: options[:model], params: params)
39
- context = LLaMACpp::Context.new(model: model)
34
+ mdl_params = LLaMACpp::ModelParams.new
35
+ mdl_params.n_gpu_layers = options[:n_gpu_layers]
36
+ model = LLaMACpp::Model.new(model_path: options[:model], params: mdl_params)
37
+ ctx_params = LLaMACpp::ContextParams.new
38
+ ctx_params.seed = options[:seed] if options[:seed] != -1
39
+ context = LLaMACpp::Context.new(model: model, params: ctx_params)
40
40
 
41
41
  antiprompt = options[:reverse_prompt] || 'User:'
42
42
  start_prompt = read_prompt(options[:file]) || default_prompt(antiprompt)
43
43
 
44
- embd_input = context.tokenize(text: start_prompt, add_bos: true)
44
+ embd_input = context.model.tokenize(text: start_prompt, add_bos: true)
45
45
 
46
46
  n_ctx = context.n_ctx
47
47
  raise ArgumentError, "prompt is too long #{embd_input.size} tokens, maximum is #{n_ctx - 4}" if embd_input.size > n_ctx - 4
@@ -58,7 +58,7 @@ class Chat < Thor # rubocop:disable Metrics/ClassLength, Style/Documentation
58
58
  n_consumed = 0
59
59
  n_past = 0
60
60
  n_remain = options[:n_predict]
61
- n_vocab = context.n_vocab
61
+ n_vocab = context.model.n_vocab
62
62
 
63
63
  while interactive
64
64
  unless embd.empty?
@@ -70,7 +70,7 @@ class Chat < Thor # rubocop:disable Metrics/ClassLength, Style/Documentation
70
70
 
71
71
  0.step(embd.size - 1, options[:batch_size]) do |i|
72
72
  n_eval = [options[:batch_size], embd.size - i].min
73
- context.eval(tokens: embd[i...i + n_eval], n_past: n_past, n_threads: options[:threads])
73
+ context.eval(tokens: embd[i...i + n_eval], n_past: n_past)
74
74
  n_past += n_eval
75
75
  end
76
76
  end
@@ -102,7 +102,7 @@ class Chat < Thor # rubocop:disable Metrics/ClassLength, Style/Documentation
102
102
  if id == context.token_eos
103
103
  id = context.token_nl
104
104
  unless antiprompt.empty?
105
- first_antiprompt = context.tokenize(text: antiprompt, add_bos: false)
105
+ first_antiprompt = context.model.tokenize(text: antiprompt, add_bos: false)
106
106
  embd_input.concat(first_antiprompt)
107
107
  end
108
108
  end
@@ -122,7 +122,7 @@ class Chat < Thor # rubocop:disable Metrics/ClassLength, Style/Documentation
122
122
 
123
123
  if input_echo
124
124
  output = []
125
- embd.each { |token| output << context.token_to_piece(token) }
125
+ embd.each { |token| output << context.model.token_to_piece(token) }
126
126
  output_str = output.join
127
127
  output_str.chomp!(antiprompt) if first_input
128
128
  print(output_str)
@@ -131,7 +131,7 @@ class Chat < Thor # rubocop:disable Metrics/ClassLength, Style/Documentation
131
131
  if embd_input.size <= n_consumed
132
132
  if antiprompt.size.positive?
133
133
  last_output = []
134
- last_n_tokens.each { |token| last_output << context.token_to_piece(token) }
134
+ last_n_tokens.each { |token| last_output << context.model.token_to_piece(token) }
135
135
  last_output_str = last_output.join
136
136
 
137
137
  search_start_pos = last_output_str.size > antiprompt.size ? last_output_str.size - antiprompt.size : 0
@@ -150,7 +150,7 @@ class Chat < Thor # rubocop:disable Metrics/ClassLength, Style/Documentation
150
150
  break interactive = false if buffer.nil?
151
151
 
152
152
  if buffer.size > 1
153
- line_input = context.tokenize(text: "#{buffer}\n", add_bos: false)
153
+ line_input = context.model.tokenize(text: "#{buffer}\n", add_bos: false)
154
154
  embd_input.concat(line_input)
155
155
  n_remain -= line_input.size
156
156
  end
@@ -12,23 +12,23 @@ class Embedding < Thor # rubocop:disable Style/Documentation
12
12
  default_command :main
13
13
  desc 'main', 'Extract embedding from prompt'
14
14
  option :seed, type: :numeric, aliases: '-s', desc: 'random seed', default: -1
15
- option :threads, type: :numeric, aliases: '-t', desc: 'number of threads', default: 2
16
15
  option :model, type: :string, aliases: '-m', desc: 'path to model file', required: true
17
16
  option :prompt, type: :string, aliases: '-p', desc: 'prompt to generate embedding', required: true
18
17
  option :n_gpu_layers, type: :numeric, desc: 'number of layers on GPU', default: 0
19
18
  def main # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
20
- params = LLaMACpp::ContextParams.new
21
- params.seed = options[:seed] if options[:seed] != -1
22
- params.n_gpu_layers = options[:n_gpu_layers]
23
- params.embedding = true
24
- model = LLaMACpp::Model.new(model_path: options[:model], params: params)
25
- context = LLaMACpp::Context.new(model: model)
19
+ mdl_params = LLaMACpp::ModelParams.new
20
+ mdl_params.n_gpu_layers = options[:n_gpu_layers]
21
+ model = LLaMACpp::Model.new(model_path: options[:model], params: mdl_params)
22
+ ctx_params = LLaMACpp::ContextParams.new
23
+ ctx_params.embedding = true
24
+ ctx_params.seed = options[:seed] if options[:seed] != -1
25
+ context = LLaMACpp::Context.new(model: model, params: ctx_params)
26
26
 
27
- embd_input = context.tokenize(text: options[:prompt], add_bos: true)
27
+ embd_input = context.model.tokenize(text: options[:prompt], add_bos: true)
28
28
 
29
29
  return unless embd_input.size.positive?
30
30
 
31
- context.eval(tokens: embd_input, n_past: 0, n_threads: options[:threads])
31
+ context.eval(tokens: embd_input, n_past: 0)
32
32
 
33
33
  context.embeddings.each { |val| print("#{val} ") }
34
34
  print("\n")