llama_cpp 0.5.3 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c45589a61587acfbe88add77ffb135a7949619ba2936178c59126c24c30e23cc
4
- data.tar.gz: 5866b5b5f8dab59432cc91beca290f927a8d1bc694f83c8ccbe366c6f636f47c
3
+ metadata.gz: 854493444a65cd1239649b991c8e6538c542c02a052932f6a69c56c984e28f58
4
+ data.tar.gz: 4e0b70de25eb2661b693af0d488efd25f570c3f62d4b9044fdd5c14fb5b9fac6
5
5
  SHA512:
6
- metadata.gz: 89714a2a920172c1ddc4fff56a11390ad97db62eb6bd4eefe3ba9376132bd6646eda7569acea49ff3a1ce87486cac0e623cac4fddfcb2b57629c30ee3457d38b
7
- data.tar.gz: 2c5494528f55b86c57fccb18658350058210e85f31e9ecb8b3587a4da68a1465a0987a98e86dbae16fa5fea9a9502aed96afec3679e5553a82805f4436ef3020
6
+ metadata.gz: b2524b8eb6e8568116f3c33eb57b764044083ee2ff2bbb7f15fc6301b024197ea8fca75968535b302a9e70449c9f9f28e0760cf4bfefb00ed8137c18e84137d5
7
+ data.tar.gz: faf26b552a8a862a97129b5bd25e05b3ae3edd2f8b118622b119634e4b004c05d200653c40085e4a28243c8994c517699baa35d3a8096ad8ac598fd637cf0565
data/CHANGELOG.md CHANGED
@@ -1,3 +1,12 @@
1
+ ## [[0.6.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.5.3...v0.6.0)] - 2023-09-30
2
+
3
+ **Breaking Changes**
4
+ - Bump bundled llama.cpp from b1266 to b1292.
5
+ - There are many API changes, so please refer to the commits.
6
+
7
+ It is becoming difficult to keep up with major changes in llama.cpp,
8
+ and I may give up on developing this gem in the future to prioritize my own life.
9
+
1
10
  ## [[0.5.3](https://github.com/yoshoku/llama_cpp.rb/compare/v0.5.2...v0.5.3)] - 2023-09-23
2
11
 
3
12
  - Bump bundled llama.cpp from b1 to b1266.
data/README.md CHANGED
@@ -59,13 +59,14 @@ An example of Ruby code that generates sentences with the quantization model is
59
59
  ```ruby
60
60
  require 'llama_cpp'
61
61
 
62
- params = LLaMACpp::ContextParams.new
63
- params.seed = 42
62
+ model_params = LLaMACpp::ModelParams.new
63
+ model = LLaMACpp::Model.new(model_path: '/home/user/llama.cpp/models/open_llama_7b/ggml-model-q4_0.bin', params: model_params)
64
64
 
65
- model = LLaMACpp::Model.new(model_path: '/home/user/llama.cpp/models/open_llama_7b/ggml-model-q4_0.bin', params: params)
66
- context = LLaMACpp::Context.new(model: model)
65
+ context_params = LLaMACpp::ContextParams.new
66
+ context_params.seed = 42
67
+ context = LLaMACpp::Context.new(model: model, params: context_params)
67
68
 
68
- puts LLaMACpp.generate(context, 'Hello, World.', n_threads: 4)
69
+ puts LLaMACpp.generate(context, 'Hello, World.')
69
70
  ```
70
71
 
71
72
  ## Examples
data/examples/chat.rb CHANGED
@@ -14,7 +14,6 @@ class Chat < Thor # rubocop:disable Metrics/ClassLength, Style/Documentation
14
14
  default_command :main
15
15
  desc 'main', 'Start chat'
16
16
  option :seed, type: :numeric, aliases: '-s', desc: 'random seed', default: -1
17
- option :threads, type: :numeric, aliases: '-t', desc: 'number of threads', default: 2
18
17
  option :model, type: :string, aliases: '-m', desc: 'path to model file', required: true
19
18
  option :file, type: :string, aliases: '-f', desc: 'prompt file to start generation'
20
19
  option :reverse_prompt, type: :string, aliases: '-r', desc: 'halt generation at PROMPT, return control in interactive mode'
@@ -32,16 +31,17 @@ class Chat < Thor # rubocop:disable Metrics/ClassLength, Style/Documentation
32
31
  option :temp, type: :numeric, desc: 'temperature', default: 0.8
33
32
  option :n_gpu_layers, type: :numeric, desc: 'number of layers on GPU', default: 0
34
33
  def main # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
35
- params = LLaMACpp::ContextParams.new
36
- params.seed = options[:seed] if options[:seed] != -1
37
- params.n_gpu_layers = options[:n_gpu_layers]
38
- model = LLaMACpp::Model.new(model_path: options[:model], params: params)
39
- context = LLaMACpp::Context.new(model: model)
34
+ mdl_params = LLaMACpp::ModelParams.new
35
+ mdl_params.n_gpu_layers = options[:n_gpu_layers]
36
+ model = LLaMACpp::Model.new(model_path: options[:model], params: mdl_params)
37
+ ctx_params = LLaMACpp::ContextParams.new
38
+ ctx_params.seed = options[:seed] if options[:seed] != -1
39
+ context = LLaMACpp::Context.new(model: model, params: ctx_params)
40
40
 
41
41
  antiprompt = options[:reverse_prompt] || 'User:'
42
42
  start_prompt = read_prompt(options[:file]) || default_prompt(antiprompt)
43
43
 
44
- embd_input = context.tokenize(text: start_prompt, add_bos: true)
44
+ embd_input = context.model.tokenize(text: start_prompt, add_bos: true)
45
45
 
46
46
  n_ctx = context.n_ctx
47
47
  raise ArgumentError, "prompt is too long #{embd_input.size} tokens, maximum is #{n_ctx - 4}" if embd_input.size > n_ctx - 4
@@ -58,7 +58,7 @@ class Chat < Thor # rubocop:disable Metrics/ClassLength, Style/Documentation
58
58
  n_consumed = 0
59
59
  n_past = 0
60
60
  n_remain = options[:n_predict]
61
- n_vocab = context.n_vocab
61
+ n_vocab = context.model.n_vocab
62
62
 
63
63
  while interactive
64
64
  unless embd.empty?
@@ -70,7 +70,7 @@ class Chat < Thor # rubocop:disable Metrics/ClassLength, Style/Documentation
70
70
 
71
71
  0.step(embd.size - 1, options[:batch_size]) do |i|
72
72
  n_eval = [options[:batch_size], embd.size - i].min
73
- context.eval(tokens: embd[i...i + n_eval], n_past: n_past, n_threads: options[:threads])
73
+ context.eval(tokens: embd[i...i + n_eval], n_past: n_past)
74
74
  n_past += n_eval
75
75
  end
76
76
  end
@@ -102,7 +102,7 @@ class Chat < Thor # rubocop:disable Metrics/ClassLength, Style/Documentation
102
102
  if id == context.token_eos
103
103
  id = context.token_nl
104
104
  unless antiprompt.empty?
105
- first_antiprompt = context.tokenize(text: antiprompt, add_bos: false)
105
+ first_antiprompt = context.model.tokenize(text: antiprompt, add_bos: false)
106
106
  embd_input.concat(first_antiprompt)
107
107
  end
108
108
  end
@@ -122,7 +122,7 @@ class Chat < Thor # rubocop:disable Metrics/ClassLength, Style/Documentation
122
122
 
123
123
  if input_echo
124
124
  output = []
125
- embd.each { |token| output << context.token_to_piece(token) }
125
+ embd.each { |token| output << context.model.token_to_piece(token) }
126
126
  output_str = output.join
127
127
  output_str.chomp!(antiprompt) if first_input
128
128
  print(output_str)
@@ -131,7 +131,7 @@ class Chat < Thor # rubocop:disable Metrics/ClassLength, Style/Documentation
131
131
  if embd_input.size <= n_consumed
132
132
  if antiprompt.size.positive?
133
133
  last_output = []
134
- last_n_tokens.each { |token| last_output << context.token_to_piece(token) }
134
+ last_n_tokens.each { |token| last_output << context.model.token_to_piece(token) }
135
135
  last_output_str = last_output.join
136
136
 
137
137
  search_start_pos = last_output_str.size > antiprompt.size ? last_output_str.size - antiprompt.size : 0
@@ -150,7 +150,7 @@ class Chat < Thor # rubocop:disable Metrics/ClassLength, Style/Documentation
150
150
  break interactive = false if buffer.nil?
151
151
 
152
152
  if buffer.size > 1
153
- line_input = context.tokenize(text: "#{buffer}\n", add_bos: false)
153
+ line_input = context.model.tokenize(text: "#{buffer}\n", add_bos: false)
154
154
  embd_input.concat(line_input)
155
155
  n_remain -= line_input.size
156
156
  end
@@ -12,23 +12,23 @@ class Embedding < Thor # rubocop:disable Style/Documentation
12
12
  default_command :main
13
13
  desc 'main', 'Extract embedding from prompt'
14
14
  option :seed, type: :numeric, aliases: '-s', desc: 'random seed', default: -1
15
- option :threads, type: :numeric, aliases: '-t', desc: 'number of threads', default: 2
16
15
  option :model, type: :string, aliases: '-m', desc: 'path to model file', required: true
17
16
  option :prompt, type: :string, aliases: '-p', desc: 'prompt to generate embedding', required: true
18
17
  option :n_gpu_layers, type: :numeric, desc: 'number of layers on GPU', default: 0
19
18
  def main # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
20
- params = LLaMACpp::ContextParams.new
21
- params.seed = options[:seed] if options[:seed] != -1
22
- params.n_gpu_layers = options[:n_gpu_layers]
23
- params.embedding = true
24
- model = LLaMACpp::Model.new(model_path: options[:model], params: params)
25
- context = LLaMACpp::Context.new(model: model)
19
+ mdl_params = LLaMACpp::ModelParams.new
20
+ mdl_params.n_gpu_layers = options[:n_gpu_layers]
21
+ model = LLaMACpp::Model.new(model_path: options[:model], params: mdl_params)
22
+ ctx_params = LLaMACpp::ContextParams.new
23
+ ctx_params.embedding = true
24
+ ctx_params.seed = options[:seed] if options[:seed] != -1
25
+ context = LLaMACpp::Context.new(model: model, params: ctx_params)
26
26
 
27
- embd_input = context.tokenize(text: options[:prompt], add_bos: true)
27
+ embd_input = context.model.tokenize(text: options[:prompt], add_bos: true)
28
28
 
29
29
  return unless embd_input.size.positive?
30
30
 
31
- context.eval(tokens: embd_input, n_past: 0, n_threads: options[:threads])
31
+ context.eval(tokens: embd_input, n_past: 0)
32
32
 
33
33
  context.embeddings.each { |val| print("#{val} ") }
34
34
  print("\n")