llama_cpp 0.17.10 → 0.18.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/sig/llama_cpp.rbs DELETED
@@ -1,425 +0,0 @@
1
- module LLaMACpp
2
- VERSION: String
3
- LLAMA_CPP_VERSION: String
4
- LLAMA_DEFALUT_SEED: String
5
-
6
- LLAMA_FILE_MAGIC_GGLA: String
7
- LLAMA_FILE_MAGIC_GGSN: String
8
- LLAMA_FILE_MAGIC_GGSQ: String
9
- LLAMA_SESSION_MAGIC: String
10
- LLAMA_SESSION_VERSION: String
11
- LLAMA_STATE_SEQ_MAGIC: String
12
- LLAMA_STATE_SEQ_VERSION: String
13
-
14
- LLAMA_VOCAB_TYPE_NONE: Integer
15
- LLAMA_VOCAB_TYPE_SPM: Integer
16
- LLAMA_VOCAB_TYPE_BPE: Integer
17
- LLAMA_VOCAB_TYPE_WPM: Integer
18
- LLAMA_VOCAB_TYPE_UGM: Integer
19
- LLAMA_VOCAB_TYPE_RWKV: Integer
20
-
21
- LLAMA_VOCAB_PRE_TYPE_DEFAULT: Integer
22
- LLAMA_VOCAB_PRE_TYPE_LLAMA3: Integer
23
- LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_LLM: Integer
24
- LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_CODER: Integer
25
- LLAMA_VOCAB_PRE_TYPE_FALCON: Integer
26
- LLAMA_VOCAB_PRE_TYPE_MPT: Integer
27
- LLAMA_VOCAB_PRE_TYPE_STARCODER: Integer
28
- LLAMA_VOCAB_PRE_TYPE_GPT2: Integer
29
- LLAMA_VOCAB_PRE_TYPE_REFACT: Integer
30
- LLAMA_VOCAB_PRE_TYPE_COMMAND_R: Integer
31
- LLAMA_VOCAB_PRE_TYPE_STABLELM2: Integer
32
- LLAMA_VOCAB_PRE_TYPE_QWEN2: Integer
33
- LLAMA_VOCAB_PRE_TYPE_OLMO: Integer
34
- LLAMA_VOCAB_PRE_TYPE_DBRX: Integer
35
- LLAMA_VOCAB_PRE_TYPE_SMAUG: Integer
36
- LLAMA_VOCAB_PRE_TYPE_PORO: Integer
37
- LLAMA_VOCAB_PRE_TYPE_CHATGLM3: Integer
38
- LLAMA_VOCAB_PRE_TYPE_CHATGLM4: Integer
39
- LLAMA_VOCAB_PRE_TYPE_VIKING: Integer
40
- LLAMA_VOCAB_PRE_TYPE_JAIS: Integer
41
- LLAMA_VOCAB_PRE_TYPE_TEKKEN: Integer
42
- LLAMA_VOCAB_PRE_TYPE_SMOLLM: Integer
43
- LLAMA_VOCAB_PRE_TYPE_CODESHELL: Integer
44
- LLAMA_VOCAB_PRE_TYPE_BLOOM: Integer
45
- LLAMA_VOCAB_PRE_TYPE_GPT3_FINNISH: Integer
46
- LLAMA_VOCAB_PRE_TYPE_EXAONE: Integer
47
-
48
- LLAMA_TOKEN_ATTR_UNDEFINED: Integer
49
- LLAMA_TOKEN_ATTR_UNKNOWN: Integer
50
- LLAMA_TOKEN_ATTR_UNUSED: Integer
51
- LLAMA_TOKEN_ATTR_NORMAL: Integer
52
- LLAMA_TOKEN_ATTR_CONTROL: Integer
53
- LLAMA_TOKEN_ATTR_USER_DEFINED: Integer
54
- LLAMA_TOKEN_ATTR_BYTE: Integer
55
- LLAMA_TOKEN_ATTR_NORMALIZED: Integer
56
- LLAMA_TOKEN_ATTR_LSTRIP: Integer
57
- LLAMA_TOKEN_ATTR_RSTRIP: Integer
58
- LLAMA_TOKEN_ATTR_SINGLE_WORD: Integer
59
-
60
- LLAMA_FTYPE_ALL_F32: Integer
61
- LLAMA_FTYPE_MOSTLY_F16: Integer
62
- LLAMA_FTYPE_MOSTLY_Q4_0: Integer
63
- LLAMA_FTYPE_MOSTLY_Q4_1: Integer
64
- LLAMA_FTYPE_MOSTLY_Q8_0: Integer
65
- LLAMA_FTYPE_MOSTLY_Q5_0: Integer
66
- LLAMA_FTYPE_MOSTLY_Q5_1: Integer
67
- LLAMA_FTYPE_MOSTLY_Q2_K: Integer
68
- LLAMA_FTYPE_MOSTLY_Q3_K_S: Integer
69
- LLAMA_FTYPE_MOSTLY_Q3_K_M: Integer
70
- LLAMA_FTYPE_MOSTLY_Q3_K_L: Integer
71
- LLAMA_FTYPE_MOSTLY_Q4_K_S: Integer
72
- LLAMA_FTYPE_MOSTLY_Q4_K_M: Integer
73
- LLAMA_FTYPE_MOSTLY_Q5_K_S: Integer
74
- LLAMA_FTYPE_MOSTLY_Q5_K_M: Integer
75
- LLAMA_FTYPE_MOSTLY_Q6_K: Integer
76
- LLAMA_FTYPE_MOSTLY_IQ2_XXS: Integer
77
- LLAMA_FTYPE_MOSTLY_IQ2_XS: Integer
78
- LLAMA_FTYPE_MOSTLY_Q2_K_S: Integer
79
- LLAMA_FTYPE_MOSTLY_IQ3_XS: Integer
80
- LLAMA_FTYPE_MOSTLY_IQ3_XXS: Integer
81
- LLAMA_FTYPE_MOSTLY_IQ1_S: Integer
82
- LLAMA_FTYPE_MOSTLY_IQ4_NL: Integer
83
- LLAMA_FTYPE_MOSTLY_IQ3_S: Integer
84
- LLAMA_FTYPE_MOSTLY_IQ3_M: Integer
85
- LLAMA_FTYPE_MOSTLY_IQ4_XS: Integer
86
- LLAMA_FTYPE_MOSTLY_IQ1_M: Integer
87
- LLAMA_FTYPE_MOSTLY_BF16: Integer
88
- LLAMA_FTYPE_MOSTLY_Q4_0_4_4: Integer
89
- LLAMA_FTYPE_MOSTLY_Q4_0_4_8: Integer
90
- LLAMA_FTYPE_MOSTLY_Q4_0_8_8: Integer
91
- LLAMA_FTYPE_MOSTLY_TQ1_0: Integer
92
- LLAMA_FTYPE_MOSTLY_TQ2_0: Integer
93
- LLAMA_FTYPE_GUESSED: Integer
94
-
95
- LLAMA_KV_OVERRIDE_TYPE_INT: Integer
96
- LLAMA_KV_OVERRIDE_TYPE_FLOAT: Integer
97
- LLAMA_KV_OVERRIDE_TYPE_BOOL: Integer
98
- LLAMA_KV_OVERRIDE_TYPE_STR: Integer
99
-
100
- LLAMA_GRETYPE_END: Integer
101
- LLAMA_GRETYPE_ALT: Integer
102
- LLAMA_GRETYPE_RULE_REF: Integer
103
- LLAMA_GRETYPE_CHAR: Integer
104
- LLAMA_GRETYPE_CHAR_NOT: Integer
105
- LLAMA_GRETYPE_CHAR_RNG_UPPER: Integer
106
- LLAMA_GRETYPE_CHAR_ALT: Integer
107
- LLAMA_GRETYPE_CHAR_ANY: Integer
108
-
109
- LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED: Integer
110
- LLAMA_ROPE_SCALING_TYPE_NONE: Integer
111
- LLAMA_ROPE_SCALING_TYPE_LINEAR: Integer
112
- LLAMA_ROPE_SCALING_TYPE_YARN: Integer
113
- LLAMA_ROPE_SCALING_TYPE_MAX_VALUE: Integer
114
-
115
- LLAMA_POOLING_TYPE_UNSPECIFIED: Integer
116
- LLAMA_POOLING_TYPE_NONE: Integer
117
- LLAMA_POOLING_TYPE_MEAN: Integer
118
- LLAMA_POOLING_TYPE_CLS: Integer
119
- LLAMA_POOLING_TYPE_LAST: Integer
120
-
121
- LLAMA_ATTENTION_TYPE_UNSPECIFIED: Integer
122
- LLAMA_ATTENTION_TYPE_CAUSAL: Integer
123
- LLAMA_ATTENTION_TYPE_NON_CAUSAL: Integer
124
-
125
- LLAMA_SPLIT_MODE_NONE: Integer
126
- LLAMA_SPLIT_MODE_LAYER: Integer
127
- LLAMA_SPLIT_MODE_ROW: Integer
128
-
129
- def self?.backend_init: () -> void
130
- def self?.backend_free: () -> void
131
- def self?.numa_init: (Integer) -> void
132
- def self?.model_quantize: (input_path: String, output_path: String, params: ModelQuantizeParams) -> void
133
- def self?.generate: (::LLaMACpp::Context, String,
134
- ?n_predict: Integer, ?n_threads: Integer, ?n_keep: Integer, ?n_batch: Integer,
135
- ?repeat_last_n: Integer, ?repeat_penalty: Float, ?frequency: Float, ?presence: Float,
136
- ?top_k: Integer, ?top_p: Float, ?tfs_z: Float, ?typical_p: Float, ?temperature: Float) -> String
137
- def self?.print_system_info: () -> void
138
- def self?.time_us: () -> Integer
139
- def self?.max_devices: () -> Integer
140
- def self?.supports_mmap?: () -> bool
141
- def self?.supports_mlock?: () -> bool
142
- def self?.supports_gpu_offload?: () -> bool
143
-
144
- class TokenData
145
- public
146
-
147
- def initialize: (id: Integer, logit: Float, p: Float) -> void
148
- def id: () -> Integer
149
- def id=: (Integer) -> Integer
150
- def logit: () -> Float
151
- def logit=: (Float) -> Float
152
- def p: () -> Float
153
- def p=: (Float) -> Float
154
- end
155
-
156
- class TokenDataArray
157
- public
158
-
159
- def initialize: (Array[::LLaMACpp::TokenData], ?sorted: bool) -> void
160
- def size: () -> Integer
161
- def sorted: () -> bool
162
- end
163
-
164
- class Model
165
- public
166
-
167
- def initialize: (model_path: String, params: ::LLaMACpp::ModelParams) -> void
168
- | () -> void
169
- def empty?: () -> bool
170
- def free: () -> void
171
- def load: (model_path: String, params: ::LLaMACpp::ModelParams) -> void
172
- def vocab_type: () -> Integer
173
- def rope_type: () -> Integer
174
- def n_vocab: () -> Integer
175
- def n_ctx_train: () -> Integer
176
- def n_embd: () -> Integer
177
- def n_layer: () -> Integer
178
- def rope_freq_scale_train: () -> Float
179
- def token_to_piece: (Integer, ?lstrip: Integer, ?special: bool) -> String
180
- def tokenize: (text: String, ?n_max_tokens: Integer, ?add_bos: bool, ?special: bool) -> Array[Integer]
181
- def desc: () -> String
182
- def size: () -> Integer
183
- def n_params: () -> Integer
184
- def text: (Integer) -> String
185
- def score: (Integer) -> Float
186
- def token_attr: (Integer) -> Integer
187
- def token_bos: () -> Integer
188
- def token_eos: () -> Integer
189
- def token_cls: () -> Integer
190
- def token_sep: () -> Integer
191
- def token_nl: () -> Integer
192
- def token_pad: () -> Integer
193
- def add_bos_token?: () -> bool
194
- def add_eos_token?: () -> bool
195
- def token_prefix: () -> Integer
196
- def token_middle: () -> Integer
197
- def token_suffix: () -> Integer
198
- def token_eot: () -> Integer
199
- def token_is_eog?: (Integer) -> bool
200
- def token_is_control?: (Integer) -> bool
201
- def has_encoder?: () -> bool
202
- def has_decoder?: () -> bool
203
- def decoder_start_token: () -> Integer
204
- def is_recurrent?: () -> bool
205
- def detokenize: (Array[Integer], ?remove_special: bool, ?unparse_special: bool) -> String
206
- end
207
-
208
- class Timings
209
- public
210
-
211
- def t_start_ms: () -> Float
212
- def t_end_ms: () -> Float
213
- def t_load_ms: () -> Float
214
- def t_sample_ms: () -> Float
215
- def t_p_eval_ms: () -> Float
216
- def t_eval_ms: () -> Float
217
- def n_sample: () -> Integer
218
- def n_p_eval: () -> Integer
219
- def n_eval: () -> Integer
220
- end
221
-
222
- class ModelKVOverride
223
- public
224
-
225
- def key: () -> String
226
- def tag: () -> Integer
227
- def val_i64: () -> Integer
228
- def val_f64: () -> Float
229
- def val_bool: () -> bool
230
- def val_str: () -> String
231
- end
232
-
233
- class ModelParams
234
- public
235
-
236
- def n_gpu_layers: () -> Integer
237
- def n_gpu_layers=: (Integer) -> Integer
238
- def split_mode: () -> Integer
239
- def split_mode=: (Integer) -> Integer
240
- def main_gpu: () -> Integer
241
- def main_gpu=: (Integer) -> Integer
242
- def tensor_split: () -> Array[Float]
243
- def vocab_only: () -> bool
244
- def vocab_only=: (bool) -> bool
245
- def use_mmap: () -> bool
246
- def use_mmap=: (bool) -> bool
247
- def use_mlock: () -> bool
248
- def use_mlock=: (bool) -> bool
249
- def check_tensors: () -> bool
250
- def check_tensors=: (bool) -> bool
251
- end
252
-
253
- class Batch
254
- public
255
-
256
- def self.get_one: (tokens: Array[Integer], n_tokens: Integer, pos_zero: Integer, seq_id: Integer) -> ::LLaMACpp::Batch
257
- def initialize: (max_n_token: Integer, n_embd: Integer, max_n_seq: Integer) -> void
258
- def n_tokens=: (Integer) -> Integer
259
- def n_tokens: () -> Integer
260
- def all_pos_zero=: (Integer) -> Integer
261
- def all_pos_zero: () -> Integer
262
- def all_pos_one=: (Integer) -> Integer
263
- def all_pos_one: () -> Integer
264
- def all_seq_id=: (Integer) -> Integer
265
- def all_seq_id: () -> Integer
266
- def set_token: (Integer, Integer) -> Integer
267
- def get_token: (Integer) -> Integer
268
- def set_pos: (Integer, Integer) -> Integer
269
- def get_pos: (Integer) -> Integer
270
- def set_n_seq_id: (Integer, Integer) -> Integer
271
- def get_n_seq_id: (Integer) -> Integer
272
- def set_seq_id: (Integer, Integer, Integer) -> Integer
273
- def get_seq_id: (Integer, Integer) -> Integer
274
- def set_logit: (Integer, bool) -> bool
275
- def get_logit: (Integer) -> bool
276
- end
277
-
278
- class Context
279
- public
280
-
281
- attr_reader model: ::LLaMACpp::Model
282
-
283
- def initialize: (model: ::LLaMACpp::Model, params: ::LLaMACpp::ContextParams) -> void
284
- def embeddings: () -> Array[Float]
285
- def embeddings_ith: (Integer) -> Array[Float]
286
- def embeddings_seq: (Integer) -> Array[Float]
287
- def encode: (::LLaMACpp::Batch) -> void
288
- def decode: (::LLaMACpp::Batch) -> void
289
- def logits: () -> Array[Float]
290
- def set_embeddings: (bool) -> void
291
- def set_n_threads: (n_threads: Integer, n_threads_batch: Integer) -> void
292
- def n_ctx: () -> Integer
293
- def n_batch: () -> Integer
294
- def n_ubatch: () -> Integer
295
- def n_seq_max: () -> Integer
296
- def n_threads: () -> Integer
297
- def n_threads_batch: () -> Integer
298
- def timings: () -> ::LLaMACpp::Timings
299
- def print_timings: () -> void
300
- def reset_timings: () -> void
301
- def kv_cache_token_count: () -> Integer
302
- def kv_cache_clear: () -> void
303
- def kv_cache_seq_rm: (Integer, Integer,Integer) -> void
304
- def kv_cache_seq_cp: (Integer, Integer,Integer, Integer) -> void
305
- def kv_cache_seq_keep: (Integer) -> void
306
- def kv_cache_seq_add: (Integer, Integer, Integer, Integer) -> void
307
- def kv_cache_seq_div: (Integer, Integer, Integer, Integer) -> void
308
- def kv_cache_seq_pos_max: (Integer) -> Integer
309
- def kv_cache_defrag: () -> void
310
- def kv_cache_update: () -> void
311
- def set_rng_seed: (Integer) -> void
312
- def set_causal_attn: (bool) -> void
313
- def synchronize: () -> void
314
- def load_session_file: (session_path: String) -> void
315
- def save_session_file: (session_path: String, session_tokens: Array[Integer]) -> void
316
- def sample_repetition_penalties: (::LLaMACpp::TokenDataArray, Array[Integer], penalty_repeat: Float, penalty_freq: Float, penalty_present: Float) -> void
317
- def sample_apply_guidance: (logits: Array[Float], logits_guidance: Array[Float], scale: Float) -> void
318
- def sample_softmax: (::LLaMACpp::TokenDataArray) -> void
319
- def sample_top_k: (::LLaMACpp::TokenDataArray, k: Integer, ?min_keep: Integer) -> void
320
- def sample_top_p: (::LLaMACpp::TokenDataArray, prob: Float, ?min_keep: Integer) -> void
321
- def sample_min_p: (::LLaMACpp::TokenDataArray, prob: Float, ?min_keep: Integer) -> void
322
- def sample_tail_free: (::LLaMACpp::TokenDataArray, z: Float, ?min_keep: Integer) -> void
323
- def sample_typical: (::LLaMACpp::TokenDataArray, prob: Float, ?min_keep: Integer) -> void
324
- def sample_temp: (::LLaMACpp::TokenDataArray, temp: Float) -> void
325
- def sample_entropy: (::LLaMACpp::TokenDataArray, min_temp: Float, max_temp: Float, exponent_val: Float) -> void
326
- def sample_token_mirostat: (::LLaMACpp::TokenDataArray, tau: Float, eta: Float, m: Integer, mu: Float) -> [Integer, Float]
327
- def sample_token_mirostat_v2: (::LLaMACpp::TokenDataArray, tau: Float, eta: Float, mu: Float) -> [Integer, Float]
328
- def sample_token_greedy: (::LLaMACpp::TokenDataArray) -> Integer
329
- def sample_token: (::LLaMACpp::TokenDataArray) -> Integer
330
- def sample_grammar: (::LLaMACpp::TokenDataArray, grammar: ::LLaMACpp::Grammar) -> void
331
- def grammar_accept_token: (grammar: ::LLaMACpp::Grammar, token: Integer) -> void
332
- def apply_control_vector: (data: Array[Float], n_embd: Integer, il_start: Integer, il_end: Integer) -> void
333
- def pooling_type: () -> Integer
334
- end
335
-
336
- class ContextParams
337
- public
338
-
339
- def seed: () -> Integer
340
- def seed=: (Integer) -> Integer
341
- def n_ctx: () -> Integer
342
- def n_ctx=: (Integer) -> Integer
343
- def n_batch: () -> Integer
344
- def n_batch=: (Integer) -> Integer
345
- def n_ubatch: () -> Integer
346
- def n_ubatch=: (Integer) -> Integer
347
- def n_seq_max: () -> Integer
348
- def n_seq_max=: (Integer) -> Integer
349
- def n_threads: () -> Integer
350
- def n_threads=: (Integer) -> Integer
351
- def n_threads_batch: () -> Integer
352
- def n_threads_batch=: (Integer) -> Integer
353
- def rope_scaling_type=: (Integer) -> Integer
354
- def rope_scaling_type: () -> Integer
355
- def pooling_type=: (Integer) -> Integer
356
- def pooling_type: () -> Integer
357
- def attention_type=: (Integer) -> Integer
358
- def attention_type: () -> Integer
359
- def rope_freq_base=: (Float) -> Float
360
- def rope_freq_base: () -> Float
361
- def rope_freq_scale=: (Float) -> Float
362
- def rope_freq_scale: () -> Float
363
- def yarn_ext_factor=: (Float) -> Float
364
- def yarn_ext_factor: () -> Float
365
- def yarn_attn_factor=: (Float) -> Float
366
- def yarn_attn_factor: () -> Float
367
- def yarn_beta_fast=: (Float) -> Float
368
- def yarn_beta_fast: () -> Float
369
- def yarn_beta_slow=: (Float) -> Float
370
- def yarn_beta_slow: () -> Float
371
- def yarn_orig_ctx=: (Integer) -> Integer
372
- def yarn_orig_ctx: () -> Integer
373
- def defrag_thold=: (Float) -> Float
374
- def defrag_thold: () -> Float
375
- def type_k=: (Integer) -> Integer
376
- def type_k: () -> Integer
377
- def type_v=: (Integer) -> Integer
378
- def type_v: () -> Integer
379
- def logits_all: () -> bool
380
- def logits_all=: (bool) -> bool
381
- def embeddings: () -> bool
382
- def embeddings=: (bool) -> bool
383
- def offload_kqv: () -> bool
384
- def offload_kqv=: (bool) -> bool
385
- def flash_attn: () -> bool
386
- def flash_attn=: (bool) -> bool
387
- end
388
-
389
- class ModelQuantizeParams
390
- public
391
-
392
- attr_reader params: ::LLaMACpp::ModelParams
393
-
394
- def n_thread: () -> Integer
395
- def n_thread=: (Integer) -> Integer
396
- def ftype: () -> Integer
397
- def ftype=: (Integer) -> Integer
398
- def allow_quantization: () -> bool
399
- def allow_quantization=: (bool) -> bool
400
- def quantize_output_tensor: () -> bool
401
- def quantize_output_tensor=: (bool) -> bool
402
- def only_copy: () -> bool
403
- def only_copy=: (bool) -> bool
404
- def pure: () -> bool
405
- def pure=: (bool) -> bool
406
- def keep_split: () -> bool
407
- def keep_split=: (bool) -> bool
408
- end
409
-
410
- class Params = ContextParams
411
-
412
- class GrammarElement
413
- public
414
-
415
- def initialize: (?type: Integer, ?value: Integer) -> void
416
- def type: () -> Integer
417
- def type=: (Integer) -> Integer
418
- def value: () -> Integer
419
- def value=: (Integer) -> Integer
420
- end
421
-
422
- class Grammar
423
- def initialize: (rules: Array[Array[LLaMACpp::GrammarElement]], start_rule_index: Integer) -> void
424
- end
425
- end