llama_cpp 0.17.10 → 0.18.1

Sign up to get free protection for your applications and to get access to all the features.
data/sig/llama_cpp.rbs DELETED
@@ -1,425 +0,0 @@
1
- module LLaMACpp
2
- VERSION: String
3
- LLAMA_CPP_VERSION: String
4
- LLAMA_DEFALUT_SEED: String
5
-
6
- LLAMA_FILE_MAGIC_GGLA: String
7
- LLAMA_FILE_MAGIC_GGSN: String
8
- LLAMA_FILE_MAGIC_GGSQ: String
9
- LLAMA_SESSION_MAGIC: String
10
- LLAMA_SESSION_VERSION: String
11
- LLAMA_STATE_SEQ_MAGIC: String
12
- LLAMA_STATE_SEQ_VERSION: String
13
-
14
- LLAMA_VOCAB_TYPE_NONE: Integer
15
- LLAMA_VOCAB_TYPE_SPM: Integer
16
- LLAMA_VOCAB_TYPE_BPE: Integer
17
- LLAMA_VOCAB_TYPE_WPM: Integer
18
- LLAMA_VOCAB_TYPE_UGM: Integer
19
- LLAMA_VOCAB_TYPE_RWKV: Integer
20
-
21
- LLAMA_VOCAB_PRE_TYPE_DEFAULT: Integer
22
- LLAMA_VOCAB_PRE_TYPE_LLAMA3: Integer
23
- LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_LLM: Integer
24
- LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_CODER: Integer
25
- LLAMA_VOCAB_PRE_TYPE_FALCON: Integer
26
- LLAMA_VOCAB_PRE_TYPE_MPT: Integer
27
- LLAMA_VOCAB_PRE_TYPE_STARCODER: Integer
28
- LLAMA_VOCAB_PRE_TYPE_GPT2: Integer
29
- LLAMA_VOCAB_PRE_TYPE_REFACT: Integer
30
- LLAMA_VOCAB_PRE_TYPE_COMMAND_R: Integer
31
- LLAMA_VOCAB_PRE_TYPE_STABLELM2: Integer
32
- LLAMA_VOCAB_PRE_TYPE_QWEN2: Integer
33
- LLAMA_VOCAB_PRE_TYPE_OLMO: Integer
34
- LLAMA_VOCAB_PRE_TYPE_DBRX: Integer
35
- LLAMA_VOCAB_PRE_TYPE_SMAUG: Integer
36
- LLAMA_VOCAB_PRE_TYPE_PORO: Integer
37
- LLAMA_VOCAB_PRE_TYPE_CHATGLM3: Integer
38
- LLAMA_VOCAB_PRE_TYPE_CHATGLM4: Integer
39
- LLAMA_VOCAB_PRE_TYPE_VIKING: Integer
40
- LLAMA_VOCAB_PRE_TYPE_JAIS: Integer
41
- LLAMA_VOCAB_PRE_TYPE_TEKKEN: Integer
42
- LLAMA_VOCAB_PRE_TYPE_SMOLLM: Integer
43
- LLAMA_VOCAB_PRE_TYPE_CODESHELL: Integer
44
- LLAMA_VOCAB_PRE_TYPE_BLOOM: Integer
45
- LLAMA_VOCAB_PRE_TYPE_GPT3_FINNISH: Integer
46
- LLAMA_VOCAB_PRE_TYPE_EXAONE: Integer
47
-
48
- LLAMA_TOKEN_ATTR_UNDEFINED: Integer
49
- LLAMA_TOKEN_ATTR_UNKNOWN: Integer
50
- LLAMA_TOKEN_ATTR_UNUSED: Integer
51
- LLAMA_TOKEN_ATTR_NORMAL: Integer
52
- LLAMA_TOKEN_ATTR_CONTROL: Integer
53
- LLAMA_TOKEN_ATTR_USER_DEFINED: Integer
54
- LLAMA_TOKEN_ATTR_BYTE: Integer
55
- LLAMA_TOKEN_ATTR_NORMALIZED: Integer
56
- LLAMA_TOKEN_ATTR_LSTRIP: Integer
57
- LLAMA_TOKEN_ATTR_RSTRIP: Integer
58
- LLAMA_TOKEN_ATTR_SINGLE_WORD: Integer
59
-
60
- LLAMA_FTYPE_ALL_F32: Integer
61
- LLAMA_FTYPE_MOSTLY_F16: Integer
62
- LLAMA_FTYPE_MOSTLY_Q4_0: Integer
63
- LLAMA_FTYPE_MOSTLY_Q4_1: Integer
64
- LLAMA_FTYPE_MOSTLY_Q8_0: Integer
65
- LLAMA_FTYPE_MOSTLY_Q5_0: Integer
66
- LLAMA_FTYPE_MOSTLY_Q5_1: Integer
67
- LLAMA_FTYPE_MOSTLY_Q2_K: Integer
68
- LLAMA_FTYPE_MOSTLY_Q3_K_S: Integer
69
- LLAMA_FTYPE_MOSTLY_Q3_K_M: Integer
70
- LLAMA_FTYPE_MOSTLY_Q3_K_L: Integer
71
- LLAMA_FTYPE_MOSTLY_Q4_K_S: Integer
72
- LLAMA_FTYPE_MOSTLY_Q4_K_M: Integer
73
- LLAMA_FTYPE_MOSTLY_Q5_K_S: Integer
74
- LLAMA_FTYPE_MOSTLY_Q5_K_M: Integer
75
- LLAMA_FTYPE_MOSTLY_Q6_K: Integer
76
- LLAMA_FTYPE_MOSTLY_IQ2_XXS: Integer
77
- LLAMA_FTYPE_MOSTLY_IQ2_XS: Integer
78
- LLAMA_FTYPE_MOSTLY_Q2_K_S: Integer
79
- LLAMA_FTYPE_MOSTLY_IQ3_XS: Integer
80
- LLAMA_FTYPE_MOSTLY_IQ3_XXS: Integer
81
- LLAMA_FTYPE_MOSTLY_IQ1_S: Integer
82
- LLAMA_FTYPE_MOSTLY_IQ4_NL: Integer
83
- LLAMA_FTYPE_MOSTLY_IQ3_S: Integer
84
- LLAMA_FTYPE_MOSTLY_IQ3_M: Integer
85
- LLAMA_FTYPE_MOSTLY_IQ4_XS: Integer
86
- LLAMA_FTYPE_MOSTLY_IQ1_M: Integer
87
- LLAMA_FTYPE_MOSTLY_BF16: Integer
88
- LLAMA_FTYPE_MOSTLY_Q4_0_4_4: Integer
89
- LLAMA_FTYPE_MOSTLY_Q4_0_4_8: Integer
90
- LLAMA_FTYPE_MOSTLY_Q4_0_8_8: Integer
91
- LLAMA_FTYPE_MOSTLY_TQ1_0: Integer
92
- LLAMA_FTYPE_MOSTLY_TQ2_0: Integer
93
- LLAMA_FTYPE_GUESSED: Integer
94
-
95
- LLAMA_KV_OVERRIDE_TYPE_INT: Integer
96
- LLAMA_KV_OVERRIDE_TYPE_FLOAT: Integer
97
- LLAMA_KV_OVERRIDE_TYPE_BOOL: Integer
98
- LLAMA_KV_OVERRIDE_TYPE_STR: Integer
99
-
100
- LLAMA_GRETYPE_END: Integer
101
- LLAMA_GRETYPE_ALT: Integer
102
- LLAMA_GRETYPE_RULE_REF: Integer
103
- LLAMA_GRETYPE_CHAR: Integer
104
- LLAMA_GRETYPE_CHAR_NOT: Integer
105
- LLAMA_GRETYPE_CHAR_RNG_UPPER: Integer
106
- LLAMA_GRETYPE_CHAR_ALT: Integer
107
- LLAMA_GRETYPE_CHAR_ANY: Integer
108
-
109
- LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED: Integer
110
- LLAMA_ROPE_SCALING_TYPE_NONE: Integer
111
- LLAMA_ROPE_SCALING_TYPE_LINEAR: Integer
112
- LLAMA_ROPE_SCALING_TYPE_YARN: Integer
113
- LLAMA_ROPE_SCALING_TYPE_MAX_VALUE: Integer
114
-
115
- LLAMA_POOLING_TYPE_UNSPECIFIED: Integer
116
- LLAMA_POOLING_TYPE_NONE: Integer
117
- LLAMA_POOLING_TYPE_MEAN: Integer
118
- LLAMA_POOLING_TYPE_CLS: Integer
119
- LLAMA_POOLING_TYPE_LAST: Integer
120
-
121
- LLAMA_ATTENTION_TYPE_UNSPECIFIED: Integer
122
- LLAMA_ATTENTION_TYPE_CAUSAL: Integer
123
- LLAMA_ATTENTION_TYPE_NON_CAUSAL: Integer
124
-
125
- LLAMA_SPLIT_MODE_NONE: Integer
126
- LLAMA_SPLIT_MODE_LAYER: Integer
127
- LLAMA_SPLIT_MODE_ROW: Integer
128
-
129
- def self?.backend_init: () -> void
130
- def self?.backend_free: () -> void
131
- def self?.numa_init: (Integer) -> void
132
- def self?.model_quantize: (input_path: String, output_path: String, params: ModelQuantizeParams) -> void
133
- def self?.generate: (::LLaMACpp::Context, String,
134
- ?n_predict: Integer, ?n_threads: Integer, ?n_keep: Integer, ?n_batch: Integer,
135
- ?repeat_last_n: Integer, ?repeat_penalty: Float, ?frequency: Float, ?presence: Float,
136
- ?top_k: Integer, ?top_p: Float, ?tfs_z: Float, ?typical_p: Float, ?temperature: Float) -> String
137
- def self?.print_system_info: () -> void
138
- def self?.time_us: () -> Integer
139
- def self?.max_devices: () -> Integer
140
- def self?.supports_mmap?: () -> bool
141
- def self?.supports_mlock?: () -> bool
142
- def self?.supports_gpu_offload?: () -> bool
143
-
144
- class TokenData
145
- public
146
-
147
- def initialize: (id: Integer, logit: Float, p: Float) -> void
148
- def id: () -> Integer
149
- def id=: (Integer) -> Integer
150
- def logit: () -> Float
151
- def logit=: (Float) -> Float
152
- def p: () -> Float
153
- def p=: (Float) -> Float
154
- end
155
-
156
- class TokenDataArray
157
- public
158
-
159
- def initialize: (Array[::LLaMACpp::TokenData], ?sorted: bool) -> void
160
- def size: () -> Integer
161
- def sorted: () -> bool
162
- end
163
-
164
- class Model
165
- public
166
-
167
- def initialize: (model_path: String, params: ::LLaMACpp::ModelParams) -> void
168
- | () -> void
169
- def empty?: () -> bool
170
- def free: () -> void
171
- def load: (model_path: String, params: ::LLaMACpp::ModelParams) -> void
172
- def vocab_type: () -> Integer
173
- def rope_type: () -> Integer
174
- def n_vocab: () -> Integer
175
- def n_ctx_train: () -> Integer
176
- def n_embd: () -> Integer
177
- def n_layer: () -> Integer
178
- def rope_freq_scale_train: () -> Float
179
- def token_to_piece: (Integer, ?lstrip: Integer, ?special: bool) -> String
180
- def tokenize: (text: String, ?n_max_tokens: Integer, ?add_bos: bool, ?special: bool) -> Array[Integer]
181
- def desc: () -> String
182
- def size: () -> Integer
183
- def n_params: () -> Integer
184
- def text: (Integer) -> String
185
- def score: (Integer) -> Float
186
- def token_attr: (Integer) -> Integer
187
- def token_bos: () -> Integer
188
- def token_eos: () -> Integer
189
- def token_cls: () -> Integer
190
- def token_sep: () -> Integer
191
- def token_nl: () -> Integer
192
- def token_pad: () -> Integer
193
- def add_bos_token?: () -> bool
194
- def add_eos_token?: () -> bool
195
- def token_prefix: () -> Integer
196
- def token_middle: () -> Integer
197
- def token_suffix: () -> Integer
198
- def token_eot: () -> Integer
199
- def token_is_eog?: (Integer) -> bool
200
- def token_is_control?: (Integer) -> bool
201
- def has_encoder?: () -> bool
202
- def has_decoder?: () -> bool
203
- def decoder_start_token: () -> Integer
204
- def is_recurrent?: () -> bool
205
- def detokenize: (Array[Integer], ?remove_special: bool, ?unparse_special: bool) -> String
206
- end
207
-
208
- class Timings
209
- public
210
-
211
- def t_start_ms: () -> Float
212
- def t_end_ms: () -> Float
213
- def t_load_ms: () -> Float
214
- def t_sample_ms: () -> Float
215
- def t_p_eval_ms: () -> Float
216
- def t_eval_ms: () -> Float
217
- def n_sample: () -> Integer
218
- def n_p_eval: () -> Integer
219
- def n_eval: () -> Integer
220
- end
221
-
222
- class ModelKVOverride
223
- public
224
-
225
- def key: () -> String
226
- def tag: () -> Integer
227
- def val_i64: () -> Integer
228
- def val_f64: () -> Float
229
- def val_bool: () -> bool
230
- def val_str: () -> String
231
- end
232
-
233
- class ModelParams
234
- public
235
-
236
- def n_gpu_layers: () -> Integer
237
- def n_gpu_layers=: (Integer) -> Integer
238
- def split_mode: () -> Integer
239
- def split_mode=: (Integer) -> Integer
240
- def main_gpu: () -> Integer
241
- def main_gpu=: (Integer) -> Integer
242
- def tensor_split: () -> Array[Float]
243
- def vocab_only: () -> bool
244
- def vocab_only=: (bool) -> bool
245
- def use_mmap: () -> bool
246
- def use_mmap=: (bool) -> bool
247
- def use_mlock: () -> bool
248
- def use_mlock=: (bool) -> bool
249
- def check_tensors: () -> bool
250
- def check_tensors=: (bool) -> bool
251
- end
252
-
253
- class Batch
254
- public
255
-
256
- def self.get_one: (tokens: Array[Integer], n_tokens: Integer, pos_zero: Integer, seq_id: Integer) -> ::LLaMACpp::Batch
257
- def initialize: (max_n_token: Integer, n_embd: Integer, max_n_seq: Integer) -> void
258
- def n_tokens=: (Integer) -> Integer
259
- def n_tokens: () -> Integer
260
- def all_pos_zero=: (Integer) -> Integer
261
- def all_pos_zero: () -> Integer
262
- def all_pos_one=: (Integer) -> Integer
263
- def all_pos_one: () -> Integer
264
- def all_seq_id=: (Integer) -> Integer
265
- def all_seq_id: () -> Integer
266
- def set_token: (Integer, Integer) -> Integer
267
- def get_token: (Integer) -> Integer
268
- def set_pos: (Integer, Integer) -> Integer
269
- def get_pos: (Integer) -> Integer
270
- def set_n_seq_id: (Integer, Integer) -> Integer
271
- def get_n_seq_id: (Integer) -> Integer
272
- def set_seq_id: (Integer, Integer, Integer) -> Integer
273
- def get_seq_id: (Integer, Integer) -> Integer
274
- def set_logit: (Integer, bool) -> bool
275
- def get_logit: (Integer) -> bool
276
- end
277
-
278
- class Context
279
- public
280
-
281
- attr_reader model: ::LLaMACpp::Model
282
-
283
- def initialize: (model: ::LLaMACpp::Model, params: ::LLaMACpp::ContextParams) -> void
284
- def embeddings: () -> Array[Float]
285
- def embeddings_ith: (Integer) -> Array[Float]
286
- def embeddings_seq: (Integer) -> Array[Float]
287
- def encode: (::LLaMACpp::Batch) -> void
288
- def decode: (::LLaMACpp::Batch) -> void
289
- def logits: () -> Array[Float]
290
- def set_embeddings: (bool) -> void
291
- def set_n_threads: (n_threads: Integer, n_threads_batch: Integer) -> void
292
- def n_ctx: () -> Integer
293
- def n_batch: () -> Integer
294
- def n_ubatch: () -> Integer
295
- def n_seq_max: () -> Integer
296
- def n_threads: () -> Integer
297
- def n_threads_batch: () -> Integer
298
- def timings: () -> ::LLaMACpp::Timings
299
- def print_timings: () -> void
300
- def reset_timings: () -> void
301
- def kv_cache_token_count: () -> Integer
302
- def kv_cache_clear: () -> void
303
- def kv_cache_seq_rm: (Integer, Integer,Integer) -> void
304
- def kv_cache_seq_cp: (Integer, Integer,Integer, Integer) -> void
305
- def kv_cache_seq_keep: (Integer) -> void
306
- def kv_cache_seq_add: (Integer, Integer, Integer, Integer) -> void
307
- def kv_cache_seq_div: (Integer, Integer, Integer, Integer) -> void
308
- def kv_cache_seq_pos_max: (Integer) -> Integer
309
- def kv_cache_defrag: () -> void
310
- def kv_cache_update: () -> void
311
- def set_rng_seed: (Integer) -> void
312
- def set_causal_attn: (bool) -> void
313
- def synchronize: () -> void
314
- def load_session_file: (session_path: String) -> void
315
- def save_session_file: (session_path: String, session_tokens: Array[Integer]) -> void
316
- def sample_repetition_penalties: (::LLaMACpp::TokenDataArray, Array[Integer], penalty_repeat: Float, penalty_freq: Float, penalty_present: Float) -> void
317
- def sample_apply_guidance: (logits: Array[Float], logits_guidance: Array[Float], scale: Float) -> void
318
- def sample_softmax: (::LLaMACpp::TokenDataArray) -> void
319
- def sample_top_k: (::LLaMACpp::TokenDataArray, k: Integer, ?min_keep: Integer) -> void
320
- def sample_top_p: (::LLaMACpp::TokenDataArray, prob: Float, ?min_keep: Integer) -> void
321
- def sample_min_p: (::LLaMACpp::TokenDataArray, prob: Float, ?min_keep: Integer) -> void
322
- def sample_tail_free: (::LLaMACpp::TokenDataArray, z: Float, ?min_keep: Integer) -> void
323
- def sample_typical: (::LLaMACpp::TokenDataArray, prob: Float, ?min_keep: Integer) -> void
324
- def sample_temp: (::LLaMACpp::TokenDataArray, temp: Float) -> void
325
- def sample_entropy: (::LLaMACpp::TokenDataArray, min_temp: Float, max_temp: Float, exponent_val: Float) -> void
326
- def sample_token_mirostat: (::LLaMACpp::TokenDataArray, tau: Float, eta: Float, m: Integer, mu: Float) -> [Integer, Float]
327
- def sample_token_mirostat_v2: (::LLaMACpp::TokenDataArray, tau: Float, eta: Float, mu: Float) -> [Integer, Float]
328
- def sample_token_greedy: (::LLaMACpp::TokenDataArray) -> Integer
329
- def sample_token: (::LLaMACpp::TokenDataArray) -> Integer
330
- def sample_grammar: (::LLaMACpp::TokenDataArray, grammar: ::LLaMACpp::Grammar) -> void
331
- def grammar_accept_token: (grammar: ::LLaMACpp::Grammar, token: Integer) -> void
332
- def apply_control_vector: (data: Array[Float], n_embd: Integer, il_start: Integer, il_end: Integer) -> void
333
- def pooling_type: () -> Integer
334
- end
335
-
336
- class ContextParams
337
- public
338
-
339
- def seed: () -> Integer
340
- def seed=: (Integer) -> Integer
341
- def n_ctx: () -> Integer
342
- def n_ctx=: (Integer) -> Integer
343
- def n_batch: () -> Integer
344
- def n_batch=: (Integer) -> Integer
345
- def n_ubatch: () -> Integer
346
- def n_ubatch=: (Integer) -> Integer
347
- def n_seq_max: () -> Integer
348
- def n_seq_max=: (Integer) -> Integer
349
- def n_threads: () -> Integer
350
- def n_threads=: (Integer) -> Integer
351
- def n_threads_batch: () -> Integer
352
- def n_threads_batch=: (Integer) -> Integer
353
- def rope_scaling_type=: (Integer) -> Integer
354
- def rope_scaling_type: () -> Integer
355
- def pooling_type=: (Integer) -> Integer
356
- def pooling_type: () -> Integer
357
- def attention_type=: (Integer) -> Integer
358
- def attention_type: () -> Integer
359
- def rope_freq_base=: (Float) -> Float
360
- def rope_freq_base: () -> Float
361
- def rope_freq_scale=: (Float) -> Float
362
- def rope_freq_scale: () -> Float
363
- def yarn_ext_factor=: (Float) -> Float
364
- def yarn_ext_factor: () -> Float
365
- def yarn_attn_factor=: (Float) -> Float
366
- def yarn_attn_factor: () -> Float
367
- def yarn_beta_fast=: (Float) -> Float
368
- def yarn_beta_fast: () -> Float
369
- def yarn_beta_slow=: (Float) -> Float
370
- def yarn_beta_slow: () -> Float
371
- def yarn_orig_ctx=: (Integer) -> Integer
372
- def yarn_orig_ctx: () -> Integer
373
- def defrag_thold=: (Float) -> Float
374
- def defrag_thold: () -> Float
375
- def type_k=: (Integer) -> Integer
376
- def type_k: () -> Integer
377
- def type_v=: (Integer) -> Integer
378
- def type_v: () -> Integer
379
- def logits_all: () -> bool
380
- def logits_all=: (bool) -> bool
381
- def embeddings: () -> bool
382
- def embeddings=: (bool) -> bool
383
- def offload_kqv: () -> bool
384
- def offload_kqv=: (bool) -> bool
385
- def flash_attn: () -> bool
386
- def flash_attn=: (bool) -> bool
387
- end
388
-
389
- class ModelQuantizeParams
390
- public
391
-
392
- attr_reader params: ::LLaMACpp::ModelParams
393
-
394
- def n_thread: () -> Integer
395
- def n_thread=: (Integer) -> Integer
396
- def ftype: () -> Integer
397
- def ftype=: (Integer) -> Integer
398
- def allow_quantization: () -> bool
399
- def allow_quantization=: (bool) -> bool
400
- def quantize_output_tensor: () -> bool
401
- def quantize_output_tensor=: (bool) -> bool
402
- def only_copy: () -> bool
403
- def only_copy=: (bool) -> bool
404
- def pure: () -> bool
405
- def pure=: (bool) -> bool
406
- def keep_split: () -> bool
407
- def keep_split=: (bool) -> bool
408
- end
409
-
410
- class Params = ContextParams
411
-
412
- class GrammarElement
413
- public
414
-
415
- def initialize: (?type: Integer, ?value: Integer) -> void
416
- def type: () -> Integer
417
- def type=: (Integer) -> Integer
418
- def value: () -> Integer
419
- def value=: (Integer) -> Integer
420
- end
421
-
422
- class Grammar
423
- def initialize: (rules: Array[Array[LLaMACpp::GrammarElement]], start_rule_index: Integer) -> void
424
- end
425
- end