llama_cpp 0.5.3 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/llama_cpp.rb CHANGED
@@ -5,9 +5,6 @@ require_relative 'llama_cpp/llama_cpp'
5
5
 
6
6
  # llama_cpp.rb provides Ruby bindings for the llama.cpp.
7
7
  module LLaMACpp
8
- # Class alias to match interface of whispercpp gem.
9
- Params = ContextParams
10
-
11
8
  module_function
12
9
 
13
10
  # Generates sentences following the given prompt for operation check.
@@ -15,7 +12,6 @@ module LLaMACpp
15
12
  # @param context [LLaMACpp::Context] The context to use.
16
13
  # @param prompt [String] The prompt to start generation with.
17
14
  # @param n_predict [Integer] The number of tokens to predict.
18
- # @param n_threads [Integer] The number of threads.
19
15
  # @param n_keep [Integer] The number of tokens to keep in the context.
20
16
  # @param n_batch [Integer] The number of tokens to process in a batch.
21
17
  # @param repeat_last_n [Integer] The number of tokens to consider for repetition penalty.
@@ -29,14 +25,14 @@ module LLaMACpp
29
25
  # @param temperature [Float] The temperature for temperature sampling.
30
26
  # @return [String]
31
27
  def generate(context, prompt, # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/ParameterLists, Metrics/PerceivedComplexity
32
- n_predict: 128, n_threads: 1, n_keep: 10, n_batch: 512, repeat_last_n: 64,
28
+ n_predict: 128, n_keep: 10, n_batch: 512, repeat_last_n: 64,
33
29
  repeat_penalty: 1.1, frequency: 0.0, presence: 0.0, top_k: 40,
34
30
  top_p: 0.95, tfs_z: 1.0, typical_p: 1.0, temperature: 0.8)
35
31
  raise ArgumentError, 'context must be an instance of LLaMACpp::Context' unless context.is_a?(LLaMACpp::Context)
36
32
  raise ArgumentError, 'prompt must be a String' unless prompt.is_a?(String)
37
33
 
38
34
  spaced_prompt = " #{prompt}"
39
- embd_input = context.tokenize(text: spaced_prompt, add_bos: true)
35
+ embd_input = context.model.tokenize(text: spaced_prompt, add_bos: true)
40
36
 
41
37
  n_ctx = context.n_ctx
42
38
  raise ArgumentError, "prompt is too long #{embd_input.size} tokens, maximum is #{n_ctx - 4}" if embd_input.size > n_ctx - 4
@@ -47,7 +43,7 @@ module LLaMACpp
47
43
  n_consumed = 0
48
44
  n_past = 0
49
45
  n_remain = n_predict
50
- n_vocab = context.n_vocab
46
+ n_vocab = context.model.n_vocab
51
47
  output = []
52
48
 
53
49
  while n_remain != 0
@@ -58,7 +54,7 @@ module LLaMACpp
58
54
  embd.insert(0, last_n_tokens[(n_ctx - (n_left / 2) - embd.size)...-embd.size])
59
55
  end
60
56
 
61
- context.eval(tokens: embd, n_past: n_past, n_threads: n_threads)
57
+ context.eval(tokens: embd, n_past: n_past)
62
58
  end
63
59
 
64
60
  n_past += embd.size
@@ -99,7 +95,7 @@ module LLaMACpp
99
95
  end
100
96
  end
101
97
 
102
- embd.each { |token| output << context.token_to_piece(token) }
98
+ embd.each { |token| output << context.model.token_to_piece(token) }
103
99
 
104
100
  break if !embd.empty? && embd[-1] == context.token_eos
105
101
  end
data/sig/llama_cpp.rbs CHANGED
@@ -67,14 +67,13 @@ module LLaMACpp
67
67
  class Model
68
68
  public
69
69
 
70
- def initialize: (model_path: String, params: ::LLaMACpp::ContextParams) -> void
70
+ def initialize: (model_path: String, params: ::LLaMACpp::ModelParams) -> void
71
71
  | () -> void
72
72
  def empty?: () -> bool
73
73
  def free: () -> void
74
- def load: (model_path: String, params: ::LLaMACpp::ContextParams) -> void
75
- def apply_lora_from_file: (lora_path: String, ?base_model_path: String, ?n_threads: Integer) -> void
74
+ def load: (model_path: String, params: ::LLaMACpp::ModelParams) -> void
75
+ def apply_lora_from_file: (lora_path: String, ?scale: Float, ?base_model_path: String, ?n_threads: Integer) -> void
76
76
  def n_vocab: () -> Integer
77
- def n_ctx: () -> Integer
78
77
  def n_ctx_train: () -> Integer
79
78
  def n_embd: () -> Integer
80
79
  def token_to_piece: (Integer) -> String
@@ -98,10 +97,50 @@ module LLaMACpp
98
97
  def n_eval: () -> Integer
99
98
  end
100
99
 
100
+ class ModelParams
101
+ public
102
+
103
+ def n_gpu_layers: () -> Integer
104
+ def n_gpu_layers=: (Integer) -> Integer
105
+ def main_gpu: () -> Integer
106
+ def main_gpu=: (Integer) -> Integer
107
+ def tensor_split: () -> Array[Float]
108
+ def vocab_only: () -> bool
109
+ def vocab_only=: (bool) -> bool
110
+ def use_mmap: () -> bool
111
+ def use_mmap=: (bool) -> bool
112
+ def use_mlock: () -> bool
113
+ def use_mlock=: (bool) -> bool
114
+ end
115
+
116
+ class Batch
117
+ public
118
+
119
+ def initialize: (n_tokens: Integer, embd: Integer) -> void
120
+ def n_tokens=: (Integer) -> Integer
121
+ def n_tokens: () -> Integer
122
+ def all_pos_zero=: (Integer) -> Integer
123
+ def all_pos_zero: () -> Integer
124
+ def all_pos_one=: (Integer) -> Integer
125
+ def all_pos_one: () -> Integer
126
+ def all_seq_id=: (Integer) -> Integer
127
+ def all_seq_id: () -> Integer
128
+ def set_token: (Integer, Integer) -> Integer
129
+ def get_token: (Integer) -> Integer
130
+ def set_pos: (Integer, Integer) -> Integer
131
+ def get_pos: (Integer) -> Integer
132
+ def set_seq_id: (Integer, Integer) -> Integer
133
+ def get_seq_id: (Integer) -> Integer
134
+ def set_logit: (Integer, bool) -> bool
135
+ def get_logit: (Integer) -> bool
136
+ end
137
+
101
138
  class Context
102
139
  public
103
140
 
104
- def initialize: (model: ::LLaMACpp::Model) -> void
141
+ attr_reader model: ::LLaMACpp::Model
142
+
143
+ def initialize: (model: ::LLaMACpp::Model, params: ::LLaMACpp::ContextParams) -> void
105
144
  def embeddings: () -> Array[Float]
106
145
  def text: (Integer) -> String
107
146
  def score: (Integer) -> Float
@@ -109,20 +148,20 @@ module LLaMACpp
109
148
  def token_bos: () -> Integer
110
149
  def token_eos: () -> Integer
111
150
  def token_nl: () -> Integer
112
- def eval: (tokens: Array[Integer], n_past: Integer, ?n_tokens: Integer, ?n_threads: Integer) -> void
113
- def eval_embd: (tokens: Array[Float], n_past: Integer, ?n_tokens: Integer, ?n_threads: Integer) -> void
114
- def eval_export: (String) -> bool
151
+ def eval: (tokens: Array[Integer], n_past: Integer, ?n_tokens: Integer) -> void
152
+ def eval_embd: (tokens: Array[Float], n_past: Integer, ?n_tokens: Integer) -> void
153
+ def decode: (::LLaMACpp::Batch) -> void
115
154
  def logits: () -> Array[Float]
116
155
  def n_ctx: () -> Integer
117
- def n_ctx_train: () -> Integer
118
- def n_embd: () -> Integer
119
- def n_vocab: () -> Integer
120
156
  def timings: () -> ::LLaMACpp::Timings
121
157
  def print_timings: () -> void
122
158
  def reset_timings: () -> void
123
- def token_to_piece: (Integer) -> String
124
- def tokenize: (text: String, ?n_max_tokens: Integer, ?add_bos: bool) -> Array[Integer]
125
159
  def kv_cache_token_count: () -> Integer
160
+ def kv_cache_tokens_rm: (Integer, Integer) -> void
161
+ def kv_cache_seq_rm: (Integer, Integer,Integer) -> void
162
+ def kv_cache_seq_cp: (Integer, Integer,Integer, Integer) -> void
163
+ def kv_cache_seq_keep: (Integer) -> void
164
+ def kv_cache_seq_shift: (Integer, Integer, Ingteger, Integer) -> void
126
165
  def set_rng_seed: (Integer) -> void
127
166
  def load_session_file: (session_path: String) -> void
128
167
  def save_session_file: (session_path: String, session_tokens: Array[Integer]) -> void
@@ -134,6 +173,7 @@ module LLaMACpp
134
173
  def sample_top_p: (::LLaMACpp::TokenDataArray, prob: Float, ?min_keep: Integer) -> void
135
174
  def sample_tail_free: (::LLaMACpp::TokenDataArray, z: Float, ?min_keep: Integer) -> void
136
175
  def sample_typical: (::LLaMACpp::TokenDataArray, prob: Float, ?min_keep: Integer) -> void
176
+ def sample_temp: (::LLaMACpp::TokenDataArray, temp: Float) -> void
137
177
  def sample_temperature: (::LLaMACpp::TokenDataArray, temperature: Float) -> void
138
178
  def sample_token_mirostat: (::LLaMACpp::TokenDataArray, tau: Float, eta: Float, m: Integer, mu: Float) -> [Integer, Float]
139
179
  def sample_token_mirostat_v2: (::LLaMACpp::TokenDataArray, tau: Float, eta: Float, mu: Float) -> [Integer, Float]
@@ -146,37 +186,28 @@ module LLaMACpp
146
186
  class ContextParams
147
187
  public
148
188
 
149
- def embedding: () -> bool
150
- def embedding=: (bool) -> bool
151
- def f16_kv: () -> bool
152
- def f16_kv=: (bool) -> bool
153
- def logits_all: () -> bool
154
- def logits_all=: (bool) -> bool
189
+ def seed: () -> Integer
190
+ def seed=: (Integer) -> Integer
155
191
  def n_ctx: () -> Integer
156
192
  def n_ctx=: (Integer) -> Integer
157
193
  def n_batch: () -> Integer
158
194
  def n_batch=: (Integer) -> Integer
159
- def n_gpu_layers: () -> Integer
160
- def n_gpu_layers=: (Integer) -> Integer
161
- def main_gpu: () -> Integer
162
- def main_gpu=: (Integer) -> Integer
163
- def tensor_split: () -> Array[Float]
195
+ def n_threads: () -> Integer
196
+ def n_threads=: (Integer) -> Integer
197
+ def n_threads_batch: () -> Integer
198
+ def n_threads_batch=: (Integer) -> Integer
164
199
  def rope_freq_base=: (Float) -> Float
165
200
  def rope_freq_base: () -> Float
166
201
  def rope_freq_scale=: (Float) -> Float
167
202
  def rope_freq_scale: () -> Float
168
- def low_vram: () -> bool
169
- def low_vram=: (bool) -> bool
170
203
  def mul_mat_q: () -> bool
171
204
  def mul_mat_q=: (bool) -> bool
172
- def seed: () -> Integer
173
- def seed=: (Integer) -> Integer
174
- def use_mlock: () -> bool
175
- def use_mlock=: (bool) -> bool
176
- def use_mmap: () -> bool
177
- def use_mmap=: (bool) -> bool
178
- def vocab_only: () -> bool
179
- def vocab_only=: (bool) -> bool
205
+ def f16_kv: () -> bool
206
+ def f16_kv=: (bool) -> bool
207
+ def logits_all: () -> bool
208
+ def logits_all=: (bool) -> bool
209
+ def embedding: () -> bool
210
+ def embedding=: (bool) -> bool
180
211
  end
181
212
 
182
213
  class ModelQuantizeParams
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: llama_cpp
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.3
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-09-22 00:00:00.000000000 Z
11
+ date: 2023-09-30 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
14
14
  email:
@@ -75,7 +75,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
75
75
  - !ruby/object:Gem::Version
76
76
  version: '0'
77
77
  requirements: []
78
- rubygems_version: 3.3.26
78
+ rubygems_version: 3.4.19
79
79
  signing_key:
80
80
  specification_version: 4
81
81
  summary: Ruby bindings for the llama.cpp.