llama_cpp 0.18.0 → 0.18.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0a9263eee75a3d91907c711565799fd25820d8d2b0f0ae9818a24a0798b49bf4
4
- data.tar.gz: 9f05051e8972baea44c4bd33f63190d5da8a62156c419fc02ec6b96ea3545c31
3
+ metadata.gz: a043b8beda10d1eed3dfd6ab21932f567fa9ff69a82e14048f19993bbcff9c3a
4
+ data.tar.gz: '08bba39155267edd1d5a99ad4170e44b3075fe552b096d16bac38d438a9004e5'
5
5
  SHA512:
6
- metadata.gz: 2543f1022462d32694649f2226d097537672bd6921af4fe6948687e00ef007b2e0425110abc7a3240d5dceb25131a3641bc9614f0e1117f3a7a40dcc55b23190
7
- data.tar.gz: 0c54ea7e7617e99f52b0f005e8f871d5f69423e92d350bc93562ba56b48ff85a88ee6c3f01e1e6a71a0782a1bcf212f4900c7b88229a63935349cbdabee95cfc
6
+ metadata.gz: 404c50dedbd90e8c457113145d3029f130a2c6cf8cd9aee9be867c03ac7061c271907649cfb59dd23ff1331d9c64eaf234434a2354c8f3079265a4efe7cd9ea0
7
+ data.tar.gz: b2b75ed8a826c5eb5683010dcf7ca0ad9f99d3f0cfd5c2016acf1717489e17eef005ad9eaa9997fb2529c9c75f40bc301c4c3804e24b3334036244dec69f4b92
data/CHANGELOG.md CHANGED
@@ -1,6 +1,20 @@
1
+
2
+ ## [[0.18.1](https://github.com/yoshoku/llama_cpp.rb/compare/v0.18.1...v0.18.2)] - 2025-03-01
3
+
4
+ - Change supported llama.cpp version to b4793
5
+ - Add `llama_model_n_head_kv` module function.
6
+ - Add `LLAMA_VOCAB_PRE_TYPE_GPT4O` constant.
7
+
8
+ ## [[0.18.1](https://github.com/yoshoku/llama_cpp.rb/compare/v0.18.0...v0.18.1)] - 2025-02-15
9
+
10
+ - Change supported llama.cpp version to b4713
11
+ - Add `llama_sampler_init_top_n_sigma` module function.
12
+ - Remove old type declaration file.
13
+
1
14
  ## [[0.18.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.17.10...v0.18.0)] - 2025-02-02
2
15
 
3
16
  **Breaking Changes**
17
+
4
18
  All the native extensions code was rewritten in C. The high-level API has been removed and replaced with a simple bindings library.
5
19
  The fast update speed of llama.cpp makes it difficult to keep up with the creation of this binding library.
6
20
  [As previously noted](https://github.com/yoshoku/llama_cpp.rb/blob/main/CHANGELOG.md#060---2023-09-30),
data/README.md CHANGED
@@ -6,8 +6,6 @@
6
6
 
7
7
  llama_cpp.rb provides Ruby bindings for the [llama.cpp](https://github.com/ggerganov/llama.cpp).
8
8
 
9
- This gem is still under development and may undergo many changes in the future.
10
-
11
9
  ## Installation
12
10
 
13
11
  Install the llama.cpp. If you use homebrew, install it by executing:
@@ -1452,6 +1452,20 @@ static VALUE rb_llama_model_n_head(VALUE self, VALUE model) {
1452
1452
  return INT2NUM(llama_model_n_head(model_wrapper->model));
1453
1453
  }
1454
1454
 
1455
+ /**
1456
+ * @overload llama_model_n_head_kv(model)
1457
+ * @param [LlamaModel] model
1458
+ * @return [Integer]
1459
+ */
1460
+ static VALUE rb_llama_model_n_head_kv(VALUE self, VALUE model) {
1461
+ if (!rb_obj_is_kind_of(model, rb_cLlamaModel)) {
1462
+ rb_raise(rb_eArgError, "model must be a LlamaModel");
1463
+ return Qnil;
1464
+ }
1465
+ llama_model_wrapper* model_wrapper = get_llama_model_wrapper(model);
1466
+ return INT2NUM(llama_model_n_head_kv(model_wrapper->model));
1467
+ }
1468
+
1455
1469
  /**
1456
1470
  * @overload llama_model_rope_freq_scale_train(model)
1457
1471
  * @param [LlamaModel] model
@@ -3367,6 +3381,20 @@ static VALUE rb_llama_sampler_init_xtc(VALUE self, VALUE p, VALUE t, VALUE min_k
3367
3381
  return TypedData_Wrap_Struct(rb_cLlamaSampler, &llama_sampler_data_type, sampler);
3368
3382
  }
3369
3383
 
3384
+ /**
3385
+ * @overload llama_sampler_init_top_n_sigma(n)
3386
+ * @param [Float] n
3387
+ * @return [LlamaSampler]
3388
+ */
3389
+ static VALUE rb_llama_sampler_init_top_n_sigma(VALUE self, VALUE n) {
3390
+ if (!RB_FLOAT_TYPE_P(n)) {
3391
+ rb_raise(rb_eArgError, "n must be a Float");
3392
+ return Qnil;
3393
+ }
3394
+ struct llama_sampler* sampler = llama_sampler_init_top_n_sigma(NUM2DBL(n));
3395
+ return TypedData_Wrap_Struct(rb_cLlamaSampler, &llama_sampler_data_type, sampler);
3396
+ }
3397
+
3370
3398
  /**
3371
3399
  * @overload llama_sampler_init_mirostat(n_vocab, seed, tau, eta, m)
3372
3400
  * @param [Integer] n_vocab
@@ -3881,6 +3909,7 @@ void Init_llama_cpp(void) {
3881
3909
  rb_define_const(rb_mLlamaCpp, "LLAMA_VOCAB_PRE_TYPE_CHAMELEON", INT2NUM(LLAMA_VOCAB_PRE_TYPE_CHAMELEON));
3882
3910
  rb_define_const(rb_mLlamaCpp, "LLAMA_VOCAB_PRE_TYPE_MINERVA", INT2NUM(LLAMA_VOCAB_PRE_TYPE_MINERVA));
3883
3911
  rb_define_const(rb_mLlamaCpp, "LLAMA_VOCAB_PRE_TYPE_DEEPSEEK3_LLM", INT2NUM(LLAMA_VOCAB_PRE_TYPE_DEEPSEEK3_LLM));
3912
+ rb_define_const(rb_mLlamaCpp, "LLAMA_VOCAB_PRE_TYPE_GPT4O", INT2NUM(LLAMA_VOCAB_PRE_TYPE_GPT4O));
3884
3913
  /* llama_rope_type */
3885
3914
  /* Document-const: LlamaCpp::LLAMA_ROPE_TYPE_NONE */
3886
3915
  rb_define_const(rb_mLlamaCpp, "LLAMA_ROPE_TYPE_NONE", INT2NUM(LLAMA_ROPE_TYPE_NONE));
@@ -4700,6 +4729,9 @@ void Init_llama_cpp(void) {
4700
4729
  /* llama_model_n_head */
4701
4730
  rb_define_module_function(rb_mLlamaCpp, "llama_model_n_head", rb_llama_model_n_head, 1);
4702
4731
 
4732
+ /* llama_model_n_head_kv */
4733
+ rb_define_module_function(rb_mLlamaCpp, "llama_model_n_head_kv", rb_llama_model_n_head_kv, 1);
4734
+
4703
4735
  /* llama_model_rope_freq_scale_train */
4704
4736
  rb_define_module_function(rb_mLlamaCpp, "llama_model_rope_freq_scale_train", rb_llama_model_rope_freq_scale_train, 1);
4705
4737
 
@@ -5047,6 +5079,9 @@ void Init_llama_cpp(void) {
5047
5079
  /* llama_sampler_init_xtc */
5048
5080
  rb_define_module_function(rb_mLlamaCpp, "llama_sampler_init_xtc", rb_llama_sampler_init_xtc, 4);
5049
5081
 
5082
+ /* llama_sampler_init_top_n_sigma */
5083
+ rb_define_module_function(rb_mLlamaCpp, "llama_sampler_init_top_n_sigma", rb_llama_sampler_init_top_n_sigma, 1);
5084
+
5050
5085
  /* llama_sampler_init_mirostat */
5051
5086
  rb_define_module_function(rb_mLlamaCpp, "llama_sampler_init_mirostat", rb_llama_sampler_init_mirostat, 5);
5052
5087
 
@@ -3,8 +3,8 @@
3
3
  # llama_cpp.rb provides Ruby bindings for the llama.cpp.
4
4
  module LlamaCpp
5
5
  # The version of llama_cpp.rb you install.
6
- VERSION = '0.18.0'
6
+ VERSION = '0.18.2'
7
7
 
8
8
  # The supported version of llama.cpp.
9
- LLAMA_CPP_VERSION = 'b4611'
9
+ LLAMA_CPP_VERSION = 'b4793'
10
10
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: llama_cpp
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.18.0
4
+ version: 0.18.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  bindir: exe
9
9
  cert_chain: []
10
- date: 2025-02-02 00:00:00.000000000 Z
10
+ date: 2025-03-01 00:00:00.000000000 Z
11
11
  dependencies: []
12
12
  description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
13
13
  email:
@@ -26,7 +26,6 @@ files:
26
26
  - ext/llama_cpp/llama_cpp.h
27
27
  - lib/llama_cpp.rb
28
28
  - lib/llama_cpp/version.rb
29
- - sig/llama_cpp.rbs
30
29
  homepage: https://github.com/yoshoku/llama_cpp.rb
31
30
  licenses:
32
31
  - MIT
data/sig/llama_cpp.rbs DELETED
@@ -1,366 +0,0 @@
1
- module LLaMACpp
2
- VERSION: String
3
- LLAMA_CPP_VERSION: String
4
- LLAMA_DEFALUT_SEED: String
5
-
6
- LLAMA_FILE_MAGIC_GGLA: String
7
- LLAMA_FILE_MAGIC_GGSN: String
8
- LLAMA_FILE_MAGIC_GGSQ: String
9
- LLAMA_SESSION_MAGIC: String
10
- LLAMA_SESSION_VERSION: String
11
- LLAMA_STATE_SEQ_MAGIC: String
12
- LLAMA_STATE_SEQ_VERSION: String
13
-
14
- LLAMA_VOCAB_TYPE_NONE: Integer
15
- LLAMA_VOCAB_TYPE_SPM: Integer
16
- LLAMA_VOCAB_TYPE_BPE: Integer
17
- LLAMA_VOCAB_TYPE_WPM: Integer
18
- LLAMA_VOCAB_TYPE_UGM: Integer
19
- LLAMA_VOCAB_TYPE_RWKV: Integer
20
-
21
- LLAMA_VOCAB_PRE_TYPE_DEFAULT: Integer
22
- LLAMA_VOCAB_PRE_TYPE_LLAMA3: Integer
23
- LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_LLM: Integer
24
- LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_CODER: Integer
25
- LLAMA_VOCAB_PRE_TYPE_FALCON: Integer
26
- LLAMA_VOCAB_PRE_TYPE_MPT: Integer
27
- LLAMA_VOCAB_PRE_TYPE_STARCODER: Integer
28
- LLAMA_VOCAB_PRE_TYPE_GPT2: Integer
29
- LLAMA_VOCAB_PRE_TYPE_REFACT: Integer
30
- LLAMA_VOCAB_PRE_TYPE_COMMAND_R: Integer
31
- LLAMA_VOCAB_PRE_TYPE_STABLELM2: Integer
32
- LLAMA_VOCAB_PRE_TYPE_QWEN2: Integer
33
- LLAMA_VOCAB_PRE_TYPE_OLMO: Integer
34
- LLAMA_VOCAB_PRE_TYPE_DBRX: Integer
35
- LLAMA_VOCAB_PRE_TYPE_SMAUG: Integer
36
- LLAMA_VOCAB_PRE_TYPE_PORO: Integer
37
- LLAMA_VOCAB_PRE_TYPE_CHATGLM3: Integer
38
- LLAMA_VOCAB_PRE_TYPE_CHATGLM4: Integer
39
- LLAMA_VOCAB_PRE_TYPE_VIKING: Integer
40
- LLAMA_VOCAB_PRE_TYPE_JAIS: Integer
41
- LLAMA_VOCAB_PRE_TYPE_TEKKEN: Integer
42
- LLAMA_VOCAB_PRE_TYPE_SMOLLM: Integer
43
- LLAMA_VOCAB_PRE_TYPE_CODESHELL: Integer
44
- LLAMA_VOCAB_PRE_TYPE_BLOOM: Integer
45
- LLAMA_VOCAB_PRE_TYPE_GPT3_FINNISH: Integer
46
- LLAMA_VOCAB_PRE_TYPE_EXAONE: Integer
47
-
48
- LLAMA_TOKEN_ATTR_UNDEFINED: Integer
49
- LLAMA_TOKEN_ATTR_UNKNOWN: Integer
50
- LLAMA_TOKEN_ATTR_UNUSED: Integer
51
- LLAMA_TOKEN_ATTR_NORMAL: Integer
52
- LLAMA_TOKEN_ATTR_CONTROL: Integer
53
- LLAMA_TOKEN_ATTR_USER_DEFINED: Integer
54
- LLAMA_TOKEN_ATTR_BYTE: Integer
55
- LLAMA_TOKEN_ATTR_NORMALIZED: Integer
56
- LLAMA_TOKEN_ATTR_LSTRIP: Integer
57
- LLAMA_TOKEN_ATTR_RSTRIP: Integer
58
- LLAMA_TOKEN_ATTR_SINGLE_WORD: Integer
59
-
60
- LLAMA_FTYPE_ALL_F32: Integer
61
- LLAMA_FTYPE_MOSTLY_F16: Integer
62
- LLAMA_FTYPE_MOSTLY_Q4_0: Integer
63
- LLAMA_FTYPE_MOSTLY_Q4_1: Integer
64
- LLAMA_FTYPE_MOSTLY_Q8_0: Integer
65
- LLAMA_FTYPE_MOSTLY_Q5_0: Integer
66
- LLAMA_FTYPE_MOSTLY_Q5_1: Integer
67
- LLAMA_FTYPE_MOSTLY_Q2_K: Integer
68
- LLAMA_FTYPE_MOSTLY_Q3_K_S: Integer
69
- LLAMA_FTYPE_MOSTLY_Q3_K_M: Integer
70
- LLAMA_FTYPE_MOSTLY_Q3_K_L: Integer
71
- LLAMA_FTYPE_MOSTLY_Q4_K_S: Integer
72
- LLAMA_FTYPE_MOSTLY_Q4_K_M: Integer
73
- LLAMA_FTYPE_MOSTLY_Q5_K_S: Integer
74
- LLAMA_FTYPE_MOSTLY_Q5_K_M: Integer
75
- LLAMA_FTYPE_MOSTLY_Q6_K: Integer
76
- LLAMA_FTYPE_MOSTLY_IQ2_XXS: Integer
77
- LLAMA_FTYPE_MOSTLY_IQ2_XS: Integer
78
- LLAMA_FTYPE_MOSTLY_Q2_K_S: Integer
79
- LLAMA_FTYPE_MOSTLY_IQ3_XS: Integer
80
- LLAMA_FTYPE_MOSTLY_IQ3_XXS: Integer
81
- LLAMA_FTYPE_MOSTLY_IQ1_S: Integer
82
- LLAMA_FTYPE_MOSTLY_IQ4_NL: Integer
83
- LLAMA_FTYPE_MOSTLY_IQ3_S: Integer
84
- LLAMA_FTYPE_MOSTLY_IQ3_M: Integer
85
- LLAMA_FTYPE_MOSTLY_IQ4_XS: Integer
86
- LLAMA_FTYPE_MOSTLY_IQ1_M: Integer
87
- LLAMA_FTYPE_MOSTLY_BF16: Integer
88
- LLAMA_FTYPE_MOSTLY_Q4_0_4_4: Integer
89
- LLAMA_FTYPE_MOSTLY_Q4_0_4_8: Integer
90
- LLAMA_FTYPE_MOSTLY_Q4_0_8_8: Integer
91
- LLAMA_FTYPE_MOSTLY_TQ1_0: Integer
92
- LLAMA_FTYPE_MOSTLY_TQ2_0: Integer
93
- LLAMA_FTYPE_GUESSED: Integer
94
-
95
- LLAMA_KV_OVERRIDE_TYPE_INT: Integer
96
- LLAMA_KV_OVERRIDE_TYPE_FLOAT: Integer
97
- LLAMA_KV_OVERRIDE_TYPE_BOOL: Integer
98
- LLAMA_KV_OVERRIDE_TYPE_STR: Integer
99
-
100
- LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED: Integer
101
- LLAMA_ROPE_SCALING_TYPE_NONE: Integer
102
- LLAMA_ROPE_SCALING_TYPE_LINEAR: Integer
103
- LLAMA_ROPE_SCALING_TYPE_YARN: Integer
104
- LLAMA_ROPE_SCALING_TYPE_MAX_VALUE: Integer
105
-
106
- LLAMA_POOLING_TYPE_UNSPECIFIED: Integer
107
- LLAMA_POOLING_TYPE_NONE: Integer
108
- LLAMA_POOLING_TYPE_MEAN: Integer
109
- LLAMA_POOLING_TYPE_CLS: Integer
110
- LLAMA_POOLING_TYPE_LAST: Integer
111
-
112
- LLAMA_ATTENTION_TYPE_UNSPECIFIED: Integer
113
- LLAMA_ATTENTION_TYPE_CAUSAL: Integer
114
- LLAMA_ATTENTION_TYPE_NON_CAUSAL: Integer
115
-
116
- LLAMA_SPLIT_MODE_NONE: Integer
117
- LLAMA_SPLIT_MODE_LAYER: Integer
118
- LLAMA_SPLIT_MODE_ROW: Integer
119
-
120
- def self?.backend_init: () -> void
121
- def self?.backend_free: () -> void
122
- def self?.numa_init: (Integer) -> void
123
- def self?.model_quantize: (input_path: String, output_path: String, params: ModelQuantizeParams) -> void
124
- def self?.generate: (::LLaMACpp::Context, String,
125
- ?n_predict: Integer, ?n_threads: Integer, ?n_keep: Integer, ?n_batch: Integer,
126
- ?repeat_last_n: Integer, ?repeat_penalty: Float, ?frequency: Float, ?presence: Float,
127
- ?top_k: Integer, ?top_p: Float, ?tfs_z: Float, ?typical_p: Float, ?temperature: Float) -> String
128
- def self?.print_system_info: () -> void
129
- def self?.time_us: () -> Integer
130
- def self?.max_devices: () -> Integer
131
- def self?.supports_mmap?: () -> bool
132
- def self?.supports_mlock?: () -> bool
133
- def self?.supports_gpu_offload?: () -> bool
134
-
135
- class TokenData
136
- public
137
-
138
- def initialize: (id: Integer, logit: Float, p: Float) -> void
139
- def id: () -> Integer
140
- def id=: (Integer) -> Integer
141
- def logit: () -> Float
142
- def logit=: (Float) -> Float
143
- def p: () -> Float
144
- def p=: (Float) -> Float
145
- end
146
-
147
- class TokenDataArray
148
- public
149
-
150
- def initialize: (Array[::LLaMACpp::TokenData], ?sorted: bool) -> void
151
- def size: () -> Integer
152
- def sorted: () -> bool
153
- end
154
-
155
- class Model
156
- public
157
-
158
- def initialize: (model_path: String, params: ::LLaMACpp::ModelParams) -> void
159
- | () -> void
160
- def empty?: () -> bool
161
- def free: () -> void
162
- def load: (model_path: String, params: ::LLaMACpp::ModelParams) -> void
163
- def vocab_type: () -> Integer
164
- def rope_type: () -> Integer
165
- def n_vocab: () -> Integer
166
- def n_ctx_train: () -> Integer
167
- def n_embd: () -> Integer
168
- def n_layer: () -> Integer
169
- def rope_freq_scale_train: () -> Float
170
- def token_to_piece: (Integer, ?lstrip: Integer, ?special: bool) -> String
171
- def tokenize: (text: String, ?n_max_tokens: Integer, ?add_bos: bool, ?special: bool) -> Array[Integer]
172
- def desc: () -> String
173
- def size: () -> Integer
174
- def n_params: () -> Integer
175
- def text: (Integer) -> String
176
- def score: (Integer) -> Float
177
- def token_attr: (Integer) -> Integer
178
- def token_bos: () -> Integer
179
- def token_eos: () -> Integer
180
- def token_cls: () -> Integer
181
- def token_sep: () -> Integer
182
- def token_nl: () -> Integer
183
- def token_pad: () -> Integer
184
- def add_bos_token?: () -> bool
185
- def add_eos_token?: () -> bool
186
- def token_prefix: () -> Integer
187
- def token_middle: () -> Integer
188
- def token_suffix: () -> Integer
189
- def token_eot: () -> Integer
190
- def token_is_eog?: (Integer) -> bool
191
- def token_is_control?: (Integer) -> bool
192
- def has_encoder?: () -> bool
193
- def has_decoder?: () -> bool
194
- def decoder_start_token: () -> Integer
195
- def is_recurrent?: () -> bool
196
- def detokenize: (Array[Integer], ?remove_special: bool, ?unparse_special: bool) -> String
197
- end
198
-
199
- class ModelKVOverride
200
- public
201
-
202
- def key: () -> String
203
- def tag: () -> Integer
204
- def val_i64: () -> Integer
205
- def val_f64: () -> Float
206
- def val_bool: () -> bool
207
- def val_str: () -> String
208
- end
209
-
210
- class ModelParams
211
- public
212
-
213
- def n_gpu_layers: () -> Integer
214
- def n_gpu_layers=: (Integer) -> Integer
215
- def split_mode: () -> Integer
216
- def split_mode=: (Integer) -> Integer
217
- def main_gpu: () -> Integer
218
- def main_gpu=: (Integer) -> Integer
219
- def tensor_split: () -> Array[Float]
220
- def vocab_only: () -> bool
221
- def vocab_only=: (bool) -> bool
222
- def use_mmap: () -> bool
223
- def use_mmap=: (bool) -> bool
224
- def use_mlock: () -> bool
225
- def use_mlock=: (bool) -> bool
226
- def check_tensors: () -> bool
227
- def check_tensors=: (bool) -> bool
228
- end
229
-
230
- class Batch
231
- public
232
-
233
- def self.get_one: (tokens: Array[Integer], n_tokens: Integer, pos_zero: Integer, seq_id: Integer) -> ::LLaMACpp::Batch
234
- def initialize: (max_n_token: Integer, n_embd: Integer, max_n_seq: Integer) -> void
235
- def n_tokens=: (Integer) -> Integer
236
- def n_tokens: () -> Integer
237
- def all_pos_zero=: (Integer) -> Integer
238
- def all_pos_zero: () -> Integer
239
- def all_pos_one=: (Integer) -> Integer
240
- def all_pos_one: () -> Integer
241
- def all_seq_id=: (Integer) -> Integer
242
- def all_seq_id: () -> Integer
243
- def set_token: (Integer, Integer) -> Integer
244
- def get_token: (Integer) -> Integer
245
- def set_pos: (Integer, Integer) -> Integer
246
- def get_pos: (Integer) -> Integer
247
- def set_n_seq_id: (Integer, Integer) -> Integer
248
- def get_n_seq_id: (Integer) -> Integer
249
- def set_seq_id: (Integer, Integer, Integer) -> Integer
250
- def get_seq_id: (Integer, Integer) -> Integer
251
- def set_logit: (Integer, bool) -> bool
252
- def get_logit: (Integer) -> bool
253
- end
254
-
255
- class Context
256
- public
257
-
258
- attr_reader model: ::LLaMACpp::Model
259
-
260
- def initialize: (model: ::LLaMACpp::Model, params: ::LLaMACpp::ContextParams) -> void
261
- def embeddings: () -> Array[Float]
262
- def embeddings_ith: (Integer) -> Array[Float]
263
- def embeddings_seq: (Integer) -> Array[Float]
264
- def encode: (::LLaMACpp::Batch) -> void
265
- def decode: (::LLaMACpp::Batch) -> void
266
- def logits: () -> Array[Float]
267
- def set_embeddings: (bool) -> void
268
- def set_n_threads: (n_threads: Integer, n_threads_batch: Integer) -> void
269
- def n_ctx: () -> Integer
270
- def n_batch: () -> Integer
271
- def n_ubatch: () -> Integer
272
- def n_seq_max: () -> Integer
273
- def n_threads: () -> Integer
274
- def n_threads_batch: () -> Integer
275
- def kv_cache_token_count: () -> Integer
276
- def kv_cache_clear: () -> void
277
- def kv_cache_seq_rm: (Integer, Integer,Integer) -> void
278
- def kv_cache_seq_cp: (Integer, Integer,Integer, Integer) -> void
279
- def kv_cache_seq_keep: (Integer) -> void
280
- def kv_cache_seq_add: (Integer, Integer, Integer, Integer) -> void
281
- def kv_cache_seq_div: (Integer, Integer, Integer, Integer) -> void
282
- def kv_cache_seq_pos_max: (Integer) -> Integer
283
- def kv_cache_defrag: () -> void
284
- def kv_cache_update: () -> void
285
- def set_causal_attn: (bool) -> void
286
- def synchronize: () -> void
287
- def load_session_file: (session_path: String) -> void
288
- def save_session_file: (session_path: String, session_tokens: Array[Integer]) -> void
289
- def apply_control_vector: (data: Array[Float], n_embd: Integer, il_start: Integer, il_end: Integer) -> void
290
- def pooling_type: () -> Integer
291
- end
292
-
293
- class ContextParams
294
- public
295
-
296
- def n_ctx: () -> Integer
297
- def n_ctx=: (Integer) -> Integer
298
- def n_batch: () -> Integer
299
- def n_batch=: (Integer) -> Integer
300
- def n_ubatch: () -> Integer
301
- def n_ubatch=: (Integer) -> Integer
302
- def n_seq_max: () -> Integer
303
- def n_seq_max=: (Integer) -> Integer
304
- def n_threads: () -> Integer
305
- def n_threads=: (Integer) -> Integer
306
- def n_threads_batch: () -> Integer
307
- def n_threads_batch=: (Integer) -> Integer
308
- def rope_scaling_type=: (Integer) -> Integer
309
- def rope_scaling_type: () -> Integer
310
- def pooling_type=: (Integer) -> Integer
311
- def pooling_type: () -> Integer
312
- def attention_type=: (Integer) -> Integer
313
- def attention_type: () -> Integer
314
- def rope_freq_base=: (Float) -> Float
315
- def rope_freq_base: () -> Float
316
- def rope_freq_scale=: (Float) -> Float
317
- def rope_freq_scale: () -> Float
318
- def yarn_ext_factor=: (Float) -> Float
319
- def yarn_ext_factor: () -> Float
320
- def yarn_attn_factor=: (Float) -> Float
321
- def yarn_attn_factor: () -> Float
322
- def yarn_beta_fast=: (Float) -> Float
323
- def yarn_beta_fast: () -> Float
324
- def yarn_beta_slow=: (Float) -> Float
325
- def yarn_beta_slow: () -> Float
326
- def yarn_orig_ctx=: (Integer) -> Integer
327
- def yarn_orig_ctx: () -> Integer
328
- def defrag_thold=: (Float) -> Float
329
- def defrag_thold: () -> Float
330
- def type_k=: (Integer) -> Integer
331
- def type_k: () -> Integer
332
- def type_v=: (Integer) -> Integer
333
- def type_v: () -> Integer
334
- def logits_all: () -> bool
335
- def logits_all=: (bool) -> bool
336
- def embeddings: () -> bool
337
- def embeddings=: (bool) -> bool
338
- def offload_kqv: () -> bool
339
- def offload_kqv=: (bool) -> bool
340
- def flash_attn: () -> bool
341
- def flash_attn=: (bool) -> bool
342
- end
343
-
344
- class ModelQuantizeParams
345
- public
346
-
347
- attr_reader params: ::LLaMACpp::ModelParams
348
-
349
- def n_thread: () -> Integer
350
- def n_thread=: (Integer) -> Integer
351
- def ftype: () -> Integer
352
- def ftype=: (Integer) -> Integer
353
- def allow_quantization: () -> bool
354
- def allow_quantization=: (bool) -> bool
355
- def quantize_output_tensor: () -> bool
356
- def quantize_output_tensor=: (bool) -> bool
357
- def only_copy: () -> bool
358
- def only_copy=: (bool) -> bool
359
- def pure: () -> bool
360
- def pure=: (bool) -> bool
361
- def keep_split: () -> bool
362
- def keep_split=: (bool) -> bool
363
- end
364
-
365
- class Params = ContextParams
366
- end