llama_cpp 0.21.1 → 0.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +27 -1
- data/ext/llama_cpp/llama_cpp.c +59 -353
- data/lib/llama_cpp/version.rb +2 -2
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 44ea7eb462df4d37cd20d802e5f305d4ff05b605e3d442ca966458714a57881b
|
4
|
+
data.tar.gz: f3e73839dda61b43efd71c0d325384b78c6b47fc8535ab62d25a54c66aa6d09f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b2060e02b8bb154b5d34c242b54f331b840cc5e3f46bce6bcc307b52de4e6eb2613a5e725154b51c9e272576c00d004f72827acb035f62c280862fae94a9f5a5
|
7
|
+
data.tar.gz: 43a110ce026562b17b6f7da9b9bd013be7671bf457c3ddb84a3de362333a7b7f665031577755c7de984e0e84da39f1c474f96dc830040fa08185b5b405169697
|
data/CHANGELOG.md
CHANGED
@@ -1,10 +1,36 @@
|
|
1
|
+
## [[0.22.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.21.2...v0.22.0)] - 2025-08-23
|
2
|
+
|
3
|
+
- Change supported llama.cpp version to b6240.
|
4
|
+
- Add `llama_state_seq_get_size_ext` module function.
|
5
|
+
- Add `LLAMA_STATE_SEQ_FLAGS_SWA_ONLY` constant.
|
6
|
+
- Remove `LlamaKvCache` class.
|
7
|
+
- Remove `llama_get_kv_self` module function.
|
8
|
+
- Remove ` llama_get_kv_self_clear` module function.
|
9
|
+
- Remove `llama_kv_self_seq_rm` module function.
|
10
|
+
- Remove `llama_kv_self_seq_cp` module function.
|
11
|
+
- Remove `llama_kv_self_seq_keep` module function.
|
12
|
+
- Remove `llama_kv_self_seq_add` module function.
|
13
|
+
- Remove `llama_kv_self_seq_div` module function.
|
14
|
+
- Remove `llama_kv_self_seq_pos_min` module function.
|
15
|
+
- Remove `llama_kv_self_seq_pos_max` module function.
|
16
|
+
- Remove `llama_kv_self_deflag` module function.
|
17
|
+
- Remove `llama_kv_self_can_shift?` module function.
|
18
|
+
- Remove `llama_kv_self_update` module function.
|
19
|
+
|
20
|
+
## [[0.21.2](https://github.com/yoshoku/llama_cpp.rb/compare/v0.21.1...v0.21.2)] - 2025-08-09
|
21
|
+
|
22
|
+
- Change supported llama.cpp version to b6100.
|
23
|
+
- Add `LLAMA_FTYPE_MOSTLY_MXFP4_MOE` constant.
|
24
|
+
- Add `use_extra_bufts` accessor to `LlamaModelParams`.
|
25
|
+
- Add `llama_model_is_diffusion?` module function.
|
26
|
+
|
1
27
|
## [[0.21.1](https://github.com/yoshoku/llama_cpp.rb/compare/v0.21.0...v0.21.1)] - 2025-07-19
|
2
28
|
|
3
29
|
- Change supported llama.cpp version to b5930.
|
4
30
|
- Add `n_reused` reader to `LlamaPerfContextData`.
|
5
31
|
- Add `llama_vocab_mask` module function.
|
6
32
|
- Add `kv_unified` accessor to `LlamaContextParams`.
|
7
|
-
- Add `LLAMA_VOCAB_TYPE_PLAMO2`
|
33
|
+
- Add `LLAMA_VOCAB_TYPE_PLAMO2` constant.
|
8
34
|
|
9
35
|
## [[0.21.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.20.4...v0.21.0)] - 2025-07-12
|
10
36
|
|
data/ext/llama_cpp/llama_cpp.c
CHANGED
@@ -11,7 +11,6 @@ VALUE rb_cLlamaModelQuantizeParams;
|
|
11
11
|
VALUE rb_cLlamaLogitBias;
|
12
12
|
VALUE rb_cLlamaAdapterLora;
|
13
13
|
VALUE rb_cLlamaMemoryT;
|
14
|
-
VALUE rb_cLlamaKvCache;
|
15
14
|
VALUE rb_cLlamaTokenDataArray;
|
16
15
|
VALUE rb_cLlamaBatch;
|
17
16
|
VALUE rb_cLlamaSampler;
|
@@ -530,6 +529,17 @@ static VALUE llama_model_params_set_check_tensors(VALUE self, VALUE check_tensor
|
|
530
529
|
return check_tensors;
|
531
530
|
}
|
532
531
|
|
532
|
+
static VALUE llama_model_params_get_use_extra_bufts(VALUE self) {
|
533
|
+
struct llama_model_params* data = get_llama_model_params(self);
|
534
|
+
return data->use_extra_bufts ? Qtrue : Qfalse;
|
535
|
+
}
|
536
|
+
|
537
|
+
static VALUE llama_model_params_set_use_extra_bufts(VALUE self, VALUE use_extra_bufts) {
|
538
|
+
struct llama_model_params* data = get_llama_model_params(self);
|
539
|
+
data->use_extra_bufts = RTEST(use_extra_bufts) ? true : false;
|
540
|
+
return use_extra_bufts;
|
541
|
+
}
|
542
|
+
|
533
543
|
/* struct llama_context_params */
|
534
544
|
static void llama_context_params_free(void *ptr) {
|
535
545
|
if (ptr) {
|
@@ -1774,6 +1784,20 @@ static VALUE rb_llama_model_is_recurrent(VALUE self, VALUE model) {
|
|
1774
1784
|
return llama_model_is_recurrent(model_wrapper->model) ? Qtrue : Qfalse;
|
1775
1785
|
}
|
1776
1786
|
|
1787
|
+
/**
|
1788
|
+
* @overload llama_model_is_diffusion?(model)
|
1789
|
+
* @param [LlamaModel] model
|
1790
|
+
* @return [Boolean]
|
1791
|
+
*/
|
1792
|
+
static VALUE rb_llama_model_is_diffusion(VALUE self, VALUE model) {
|
1793
|
+
if (!rb_obj_is_kind_of(model, rb_cLlamaModel)) {
|
1794
|
+
rb_raise(rb_eArgError, "model must be a LlamaModel");
|
1795
|
+
return Qnil;
|
1796
|
+
}
|
1797
|
+
llama_model_wrapper* model_wrapper = get_llama_model_wrapper(model);
|
1798
|
+
return llama_model_is_diffusion(model_wrapper->model) ? Qtrue : Qfalse;
|
1799
|
+
}
|
1800
|
+
|
1777
1801
|
/**
|
1778
1802
|
* @overload llama_model_quantize(fname_inp, fname_out, params)
|
1779
1803
|
* @param [String] fname_inp
|
@@ -2147,264 +2171,29 @@ static VALUE rb_llama_get_memory(VALUE self, VALUE ctx) {
|
|
2147
2171
|
return TypedData_Wrap_Struct(rb_cLlamaMemoryT, &llama_memory_t_wrapper_data_type, memory_wrapper);
|
2148
2172
|
}
|
2149
2173
|
|
2150
|
-
/* llama_kv_cache wrapper */
|
2151
|
-
typedef struct {
|
2152
|
-
struct llama_kv_cache* kv_cache;
|
2153
|
-
} llama_kv_cache_wrapper;
|
2154
|
-
|
2155
|
-
static void llama_kv_cache_wrapper_free(void *ptr) {
|
2156
|
-
if (ptr) {
|
2157
|
-
ruby_xfree(ptr);
|
2158
|
-
}
|
2159
|
-
}
|
2160
|
-
|
2161
|
-
static size_t llama_kv_cache_wrapper_size(const void *ptr) {
|
2162
|
-
return sizeof(*((llama_kv_cache_wrapper*)ptr));
|
2163
|
-
}
|
2164
|
-
|
2165
|
-
static rb_data_type_t llama_kv_cache_wrapper_data_type = {
|
2166
|
-
"LlamaKvCache",
|
2167
|
-
{ NULL,
|
2168
|
-
llama_kv_cache_wrapper_free,
|
2169
|
-
llama_kv_cache_wrapper_size },
|
2170
|
-
NULL,
|
2171
|
-
NULL,
|
2172
|
-
RUBY_TYPED_FREE_IMMEDIATELY
|
2173
|
-
};
|
2174
|
-
|
2175
|
-
static VALUE llama_kv_cache_wrapper_alloc(VALUE self) {
|
2176
|
-
llama_kv_cache_wrapper* data = (llama_kv_cache_wrapper*)ruby_xmalloc(sizeof(llama_kv_cache_wrapper));
|
2177
|
-
data->kv_cache = NULL;
|
2178
|
-
return TypedData_Wrap_Struct(self, &llama_kv_cache_wrapper_data_type, data);
|
2179
|
-
}
|
2180
|
-
|
2181
|
-
// static llama_kv_cache_wrapper* get_llama_kv_cache_wrapper(VALUE self) {
|
2182
|
-
// llama_kv_cache_wrapper* data = NULL;
|
2183
|
-
// TypedData_Get_Struct(self, llama_kv_cache_wrapper, &llama_kv_cache_wrapper_data_type, data);
|
2184
|
-
// return data;
|
2185
|
-
// }
|
2186
|
-
|
2187
|
-
/**
|
2188
|
-
* @overload llama_get_kv_self(context)
|
2189
|
-
* @param [LlamaContext] context
|
2190
|
-
* @return [LlamaKvCache]
|
2191
|
-
*/
|
2192
|
-
static VALUE rb_llama_get_kv_self(VALUE self, VALUE ctx) {
|
2193
|
-
if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
|
2194
|
-
rb_raise(rb_eArgError, "ctx must be a LlamaContext");
|
2195
|
-
return Qnil;
|
2196
|
-
}
|
2197
|
-
llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
|
2198
|
-
llama_kv_cache_wrapper* kv_cache_wrapper = (llama_kv_cache_wrapper*)ruby_xmalloc(sizeof(llama_kv_cache_wrapper));
|
2199
|
-
kv_cache_wrapper->kv_cache = llama_get_kv_self(context_wrapper->context);
|
2200
|
-
RB_GC_GUARD(ctx);
|
2201
|
-
return TypedData_Wrap_Struct(rb_cLlamaKvCache, &llama_kv_cache_wrapper_data_type, kv_cache_wrapper);
|
2202
|
-
}
|
2203
|
-
|
2204
|
-
/**
|
2205
|
-
* @overload llama_kv_self_clear(context)
|
2206
|
-
* @param [LlamaContext] context
|
2207
|
-
* @return [NilClass]
|
2208
|
-
*/
|
2209
|
-
static VALUE rb_llama_kv_self_clear(VALUE self, VALUE ctx) {
|
2210
|
-
if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
|
2211
|
-
rb_raise(rb_eArgError, "ctx must be a LlamaContext");
|
2212
|
-
return Qnil;
|
2213
|
-
}
|
2214
|
-
llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
|
2215
|
-
llama_kv_self_clear(context_wrapper->context);
|
2216
|
-
RB_GC_GUARD(ctx);
|
2217
|
-
return Qnil;
|
2218
|
-
}
|
2219
|
-
|
2220
|
-
/**
|
2221
|
-
* @overload llama_kv_self_seq_rm(context, seq_id, p0, p1)
|
2222
|
-
* @param [LlamaContext] context
|
2223
|
-
* @param [Integer] seq_id
|
2224
|
-
* @param [Integer] p0
|
2225
|
-
* @param [Integer] p1
|
2226
|
-
* @return [Boolean]
|
2227
|
-
*/
|
2228
|
-
static VALUE rb_llama_kv_self_seq_rm(VALUE self, VALUE ctx, VALUE seq_id, VALUE p0, VALUE p1) {
|
2229
|
-
if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
|
2230
|
-
rb_raise(rb_eArgError, "ctx must be a LlamaContext");
|
2231
|
-
return Qnil;
|
2232
|
-
}
|
2233
|
-
if (!RB_INTEGER_TYPE_P(seq_id)) {
|
2234
|
-
rb_raise(rb_eArgError, "seq_id must be an Integer");
|
2235
|
-
return Qnil;
|
2236
|
-
}
|
2237
|
-
if (!RB_INTEGER_TYPE_P(p0)) {
|
2238
|
-
rb_raise(rb_eArgError, "p0 must be an Integer");
|
2239
|
-
return Qnil;
|
2240
|
-
}
|
2241
|
-
if (!RB_INTEGER_TYPE_P(p1)) {
|
2242
|
-
rb_raise(rb_eArgError, "p1 must be an Integer");
|
2243
|
-
return Qnil;
|
2244
|
-
}
|
2245
|
-
llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
|
2246
|
-
const bool res = llama_kv_self_seq_rm(context_wrapper->context, NUM2INT(seq_id), NUM2INT(p0), NUM2INT(p1));
|
2247
|
-
RB_GC_GUARD(ctx);
|
2248
|
-
return res ? Qtrue : Qfalse;
|
2249
|
-
}
|
2250
|
-
|
2251
|
-
/**
|
2252
|
-
* @overload llama_kv_self_seq_cp(context, seq_id_src, seq_id_dst, p0, p1)
|
2253
|
-
* @param [LlamaContext] context
|
2254
|
-
* @param [Integer] seq_id_src
|
2255
|
-
* @param [Integer] seq_id_dst
|
2256
|
-
* @param [Integer] p0
|
2257
|
-
* @param [Integer] p1
|
2258
|
-
* @return [NilClass]
|
2259
|
-
*/
|
2260
|
-
static VALUE rb_llama_kv_self_seq_cp(VALUE self, VALUE ctx, VALUE seq_id_src, VALUE seq_id_dst, VALUE p0, VALUE p1) {
|
2261
|
-
if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
|
2262
|
-
rb_raise(rb_eArgError, "ctx must be a LlamaContext");
|
2263
|
-
return Qnil;
|
2264
|
-
}
|
2265
|
-
if (!RB_INTEGER_TYPE_P(seq_id_src)) {
|
2266
|
-
rb_raise(rb_eArgError, "seq_id_src must be an Integer");
|
2267
|
-
return Qnil;
|
2268
|
-
}
|
2269
|
-
if (!RB_INTEGER_TYPE_P(seq_id_dst)) {
|
2270
|
-
rb_raise(rb_eArgError, "seq_id_dst must be an Integer");
|
2271
|
-
return Qnil;
|
2272
|
-
}
|
2273
|
-
if (!RB_INTEGER_TYPE_P(p0)) {
|
2274
|
-
rb_raise(rb_eArgError, "p0 must be an Integer");
|
2275
|
-
return Qnil;
|
2276
|
-
}
|
2277
|
-
if (!RB_INTEGER_TYPE_P(p1)) {
|
2278
|
-
rb_raise(rb_eArgError, "p1 must be an Integer");
|
2279
|
-
return Qnil;
|
2280
|
-
}
|
2281
|
-
llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
|
2282
|
-
llama_kv_self_seq_cp(context_wrapper->context, NUM2INT(seq_id_src), NUM2INT(seq_id_dst), NUM2INT(p0), NUM2INT(p1));
|
2283
|
-
RB_GC_GUARD(ctx);
|
2284
|
-
return Qnil;
|
2285
|
-
}
|
2286
|
-
|
2287
|
-
/**
|
2288
|
-
* @overload llama_kv_self_seq_keep(context, seq_id)
|
2289
|
-
* @param [LlamaContext] context
|
2290
|
-
* @param [Integer] seq_id
|
2291
|
-
* @return [NilClass]
|
2292
|
-
*/
|
2293
|
-
static VALUE rb_llama_kv_self_seq_keep(VALUE self, VALUE ctx, VALUE seq_id) {
|
2294
|
-
if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
|
2295
|
-
rb_raise(rb_eArgError, "ctx must be a LlamaContext");
|
2296
|
-
return Qnil;
|
2297
|
-
}
|
2298
|
-
if (!RB_INTEGER_TYPE_P(seq_id)) {
|
2299
|
-
rb_raise(rb_eArgError, "seq_id must be an Integer");
|
2300
|
-
return Qnil;
|
2301
|
-
}
|
2302
|
-
llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
|
2303
|
-
llama_kv_self_seq_keep(context_wrapper->context, NUM2INT(seq_id));
|
2304
|
-
RB_GC_GUARD(ctx);
|
2305
|
-
return Qnil;
|
2306
|
-
}
|
2307
|
-
|
2308
|
-
/**
|
2309
|
-
* @overload llama_kv_self_seq_add(context, seq_id, p0, p1, delta)
|
2310
|
-
* @param [LlamaContext] context
|
2311
|
-
* @param [Integer] seq_id
|
2312
|
-
* @param [Integer] p0
|
2313
|
-
* @param [Integer] p1
|
2314
|
-
* @param [Integer] delta
|
2315
|
-
* @return [NilClass]
|
2316
|
-
*/
|
2317
|
-
static VALUE rb_llama_kv_self_seq_add(VALUE self, VALUE ctx, VALUE seq_id, VALUE p0, VALUE p1, VALUE delta) {
|
2318
|
-
if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
|
2319
|
-
rb_raise(rb_eArgError, "ctx must be a LlamaContext");
|
2320
|
-
return Qnil;
|
2321
|
-
}
|
2322
|
-
if (!RB_INTEGER_TYPE_P(seq_id)) {
|
2323
|
-
rb_raise(rb_eArgError, "seq_id must be an Integer");
|
2324
|
-
return Qnil;
|
2325
|
-
}
|
2326
|
-
if (!RB_INTEGER_TYPE_P(p0)) {
|
2327
|
-
rb_raise(rb_eArgError, "p0 must be an Integer");
|
2328
|
-
return Qnil;
|
2329
|
-
}
|
2330
|
-
if (!RB_INTEGER_TYPE_P(p1)) {
|
2331
|
-
rb_raise(rb_eArgError, "p1 must be an Integer");
|
2332
|
-
return Qnil;
|
2333
|
-
}
|
2334
|
-
if (!RB_INTEGER_TYPE_P(delta)) {
|
2335
|
-
rb_raise(rb_eArgError, "delta must be an Integer");
|
2336
|
-
return Qnil;
|
2337
|
-
}
|
2338
|
-
llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
|
2339
|
-
llama_kv_self_seq_add(context_wrapper->context, NUM2INT(seq_id), NUM2INT(p0), NUM2INT(p1), NUM2INT(delta));
|
2340
|
-
RB_GC_GUARD(ctx);
|
2341
|
-
return Qnil;
|
2342
|
-
}
|
2343
|
-
|
2344
|
-
/**
|
2345
|
-
* @overload llama_kv_self_seq_div(context, seq_id, p0, p1, d)
|
2346
|
-
* @param [LlamaContext] context
|
2347
|
-
* @param [Integer] seq_id
|
2348
|
-
* @param [Integer] p0
|
2349
|
-
* @param [Integer] p1
|
2350
|
-
* @param [Integer] d
|
2351
|
-
* @return [NilClass]
|
2352
|
-
*/
|
2353
|
-
static VALUE rb_llama_kv_self_seq_div(VALUE self, VALUE ctx, VALUE seq_id, VALUE p0, VALUE p1, VALUE d) {
|
2354
|
-
if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
|
2355
|
-
rb_raise(rb_eArgError, "ctx must be a LlamaContext");
|
2356
|
-
return Qnil;
|
2357
|
-
}
|
2358
|
-
if (!RB_INTEGER_TYPE_P(seq_id)) {
|
2359
|
-
rb_raise(rb_eArgError, "seq_id must be an Integer");
|
2360
|
-
return Qnil;
|
2361
|
-
}
|
2362
|
-
if (!RB_INTEGER_TYPE_P(p0)) {
|
2363
|
-
rb_raise(rb_eArgError, "p0 must be an Integer");
|
2364
|
-
return Qnil;
|
2365
|
-
}
|
2366
|
-
if (!RB_INTEGER_TYPE_P(p1)) {
|
2367
|
-
rb_raise(rb_eArgError, "p1 must be an Integer");
|
2368
|
-
return Qnil;
|
2369
|
-
}
|
2370
|
-
if (!RB_INTEGER_TYPE_P(d)) {
|
2371
|
-
rb_raise(rb_eArgError, "d must be an Integer");
|
2372
|
-
return Qnil;
|
2373
|
-
}
|
2374
|
-
llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
|
2375
|
-
llama_kv_self_seq_div(context_wrapper->context, NUM2INT(seq_id), NUM2INT(p0), NUM2INT(p1), NUM2INT(d));
|
2376
|
-
RB_GC_GUARD(ctx);
|
2377
|
-
return Qnil;
|
2378
|
-
}
|
2379
|
-
|
2380
2174
|
/**
|
2381
|
-
* @overload
|
2175
|
+
* @overload llama_state_get_size(context)
|
2382
2176
|
* @param [LlamaContext] context
|
2383
|
-
* @param [Integer] seq_id
|
2384
2177
|
* @return [Integer]
|
2385
2178
|
*/
|
2386
|
-
static VALUE
|
2179
|
+
static VALUE rb_llama_state_get_size(VALUE self, VALUE ctx) {
|
2387
2180
|
if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
|
2388
2181
|
rb_raise(rb_eArgError, "ctx must be a LlamaContext");
|
2389
2182
|
return Qnil;
|
2390
2183
|
}
|
2391
|
-
if (!RB_INTEGER_TYPE_P(seq_id)) {
|
2392
|
-
rb_raise(rb_eArgError, "seq_id must be an Integer");
|
2393
|
-
return Qnil;
|
2394
|
-
}
|
2395
2184
|
llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
|
2396
|
-
const
|
2185
|
+
const size_t size = llama_state_get_size(context_wrapper->context);
|
2397
2186
|
RB_GC_GUARD(ctx);
|
2398
|
-
return
|
2187
|
+
return SIZET2NUM(size);
|
2399
2188
|
}
|
2400
2189
|
|
2401
2190
|
/**
|
2402
|
-
* @overload
|
2191
|
+
* @overload llama_state_seq_get_size(context, seq_id)
|
2403
2192
|
* @param [LlamaContext] context
|
2404
2193
|
* @param [Integer] seq_id
|
2405
2194
|
* @return [Integer]
|
2406
2195
|
*/
|
2407
|
-
static VALUE
|
2196
|
+
static VALUE rb_llama_state_seq_get_size(VALUE self, VALUE ctx, VALUE seq_id) {
|
2408
2197
|
if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
|
2409
2198
|
rb_raise(rb_eArgError, "ctx must be a LlamaContext");
|
2410
2199
|
return Qnil;
|
@@ -2414,82 +2203,19 @@ static VALUE rb_llama_kv_self_seq_pos_max(VALUE self, VALUE ctx, VALUE seq_id) {
|
|
2414
2203
|
return Qnil;
|
2415
2204
|
}
|
2416
2205
|
llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
|
2417
|
-
const
|
2418
|
-
RB_GC_GUARD(ctx);
|
2419
|
-
return INT2NUM(pos_max);
|
2420
|
-
}
|
2421
|
-
|
2422
|
-
/**
|
2423
|
-
* @overload llama_kv_self_defrag(context)
|
2424
|
-
* @param [LlamaContext] context
|
2425
|
-
* @return [NilClass]
|
2426
|
-
*/
|
2427
|
-
static VALUE rb_llama_kv_self_defrag(VALUE self, VALUE ctx) {
|
2428
|
-
if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
|
2429
|
-
rb_raise(rb_eArgError, "ctx must be a LlamaContext");
|
2430
|
-
return Qnil;
|
2431
|
-
}
|
2432
|
-
llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
|
2433
|
-
llama_kv_self_defrag(context_wrapper->context);
|
2434
|
-
RB_GC_GUARD(ctx);
|
2435
|
-
return Qnil;
|
2436
|
-
}
|
2437
|
-
|
2438
|
-
/**
|
2439
|
-
* @overload llama_kv_self_update(context)
|
2440
|
-
* @param [LlamaContext] context
|
2441
|
-
* @return [NilClass]
|
2442
|
-
*/
|
2443
|
-
static VALUE rb_llama_kv_self_update(VALUE self, VALUE ctx) {
|
2444
|
-
if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
|
2445
|
-
rb_raise(rb_eArgError, "ctx must be a LlamaContext");
|
2446
|
-
return Qnil;
|
2447
|
-
}
|
2448
|
-
llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
|
2449
|
-
llama_kv_self_update(context_wrapper->context);
|
2450
|
-
RB_GC_GUARD(ctx);
|
2451
|
-
return Qnil;
|
2452
|
-
}
|
2453
|
-
|
2454
|
-
/**
|
2455
|
-
* @overload llama_kv_self_can_shift?(context)
|
2456
|
-
* @param [LlamaContext] context
|
2457
|
-
* @return [Boolean]
|
2458
|
-
*/
|
2459
|
-
static VALUE rb_llama_kv_self_can_shift(VALUE self, VALUE ctx) {
|
2460
|
-
if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
|
2461
|
-
rb_raise(rb_eArgError, "ctx must be a LlamaContext");
|
2462
|
-
return Qnil;
|
2463
|
-
}
|
2464
|
-
llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
|
2465
|
-
const bool res = llama_kv_self_can_shift(context_wrapper->context);
|
2466
|
-
RB_GC_GUARD(ctx);
|
2467
|
-
return res ? Qtrue : Qfalse;
|
2468
|
-
}
|
2469
|
-
|
2470
|
-
/**
|
2471
|
-
* @overload llama_state_get_size(context)
|
2472
|
-
* @param [LlamaContext] context
|
2473
|
-
* @return [Integer]
|
2474
|
-
*/
|
2475
|
-
static VALUE rb_llama_state_get_size(VALUE self, VALUE ctx) {
|
2476
|
-
if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
|
2477
|
-
rb_raise(rb_eArgError, "ctx must be a LlamaContext");
|
2478
|
-
return Qnil;
|
2479
|
-
}
|
2480
|
-
llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
|
2481
|
-
const size_t size = llama_state_get_size(context_wrapper->context);
|
2206
|
+
const size_t size = llama_state_seq_get_size(context_wrapper->context, NUM2INT(seq_id));
|
2482
2207
|
RB_GC_GUARD(ctx);
|
2483
2208
|
return SIZET2NUM(size);
|
2484
2209
|
}
|
2485
2210
|
|
2486
2211
|
/**
|
2487
|
-
* @overload
|
2212
|
+
* @overload llama_state_seq_get_size_ext(context, seq_id, flags)
|
2488
2213
|
* @param [LlamaContext] context
|
2489
2214
|
* @param [Integer] seq_id
|
2215
|
+
* @param [Integer] flags
|
2490
2216
|
* @return [Integer]
|
2491
2217
|
*/
|
2492
|
-
static VALUE
|
2218
|
+
static VALUE rb_llama_state_seq_get_size_ext(VALUE self, VALUE ctx, VALUE seq_id, VALUE flags) {
|
2493
2219
|
if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
|
2494
2220
|
rb_raise(rb_eArgError, "ctx must be a LlamaContext");
|
2495
2221
|
return Qnil;
|
@@ -2498,8 +2224,12 @@ static VALUE rb_llama_state_seq_get_size(VALUE self, VALUE ctx, VALUE seq_id) {
|
|
2498
2224
|
rb_raise(rb_eArgError, "seq_id must be an Integer");
|
2499
2225
|
return Qnil;
|
2500
2226
|
}
|
2227
|
+
if (!RB_INTEGER_TYPE_P(flags)) {
|
2228
|
+
rb_raise(rb_eArgError, "flags must be an Integer");
|
2229
|
+
return Qnil;
|
2230
|
+
}
|
2501
2231
|
llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
|
2502
|
-
const size_t size =
|
2232
|
+
const size_t size = llama_state_seq_get_size_ext(context_wrapper->context, NUM2INT(seq_id), (uint32_t)NUM2UINT(flags));
|
2503
2233
|
RB_GC_GUARD(ctx);
|
2504
2234
|
return SIZET2NUM(size);
|
2505
2235
|
}
|
@@ -4162,6 +3892,7 @@ void Init_llama_cpp(void) {
|
|
4162
3892
|
rb_define_const(rb_mLlamaCpp, "LLAMA_DEFAULT_SEED", rb_str_new2(tmp));
|
4163
3893
|
|
4164
3894
|
rb_define_const(rb_mLlamaCpp, "LLAMA_TOKEN_NULL", INT2NUM(LLAMA_TOKEN_NULL));
|
3895
|
+
rb_define_const(rb_mLlamaCpp, "LLAMA_STATE_SEQ_FLAGS_SWA_ONLY", INT2NUM(LLAMA_STATE_SEQ_FLAGS_SWA_ONLY));
|
4165
3896
|
|
4166
3897
|
sprintf(tmp, "0x%x", LLAMA_FILE_MAGIC_GGLA);
|
4167
3898
|
rb_define_const(rb_mLlamaCpp, "LLAMA_FILE_MAGIC_GGLA", rb_str_new2(tmp));
|
@@ -4251,6 +3982,7 @@ void Init_llama_cpp(void) {
|
|
4251
3982
|
rb_define_const(rb_mLlamaCpp, "LLAMA_FTYPE_MOSTLY_BF16", INT2NUM(LLAMA_FTYPE_MOSTLY_BF16));
|
4252
3983
|
rb_define_const(rb_mLlamaCpp, "LLAMA_FTYPE_MOSTLY_TQ1_0", INT2NUM(LLAMA_FTYPE_MOSTLY_TQ1_0));
|
4253
3984
|
rb_define_const(rb_mLlamaCpp, "LLAMA_FTYPE_MOSTLY_TQ2_0", INT2NUM(LLAMA_FTYPE_MOSTLY_TQ2_0));
|
3985
|
+
rb_define_const(rb_mLlamaCpp, "LLAMA_FTYPE_MOSTLY_MXFP4_MOE", INT2NUM(LLAMA_FTYPE_MOSTLY_MXFP4_MOE));
|
4254
3986
|
rb_define_const(rb_mLlamaCpp, "LLAMA_FTYPE_GUESSED", INT2NUM(LLAMA_FTYPE_GUESSED));
|
4255
3987
|
/* llama_rope_scaling_type */
|
4256
3988
|
/* Document-const: LlamaCpp::LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED */
|
@@ -4496,6 +4228,17 @@ void Init_llama_cpp(void) {
|
|
4496
4228
|
* @return [Boolean]
|
4497
4229
|
*/
|
4498
4230
|
rb_define_method(rb_cLlamaModelParams, "check_tensors=", RUBY_METHOD_FUNC(llama_model_params_set_check_tensors), 1);
|
4231
|
+
/**
|
4232
|
+
* Document-method: use_extra_bufts
|
4233
|
+
* @return [Boolean]
|
4234
|
+
*/
|
4235
|
+
rb_define_method(rb_cLlamaModelParams, "use_extra_bufts", RUBY_METHOD_FUNC(llama_model_params_get_use_extra_bufts), 0);
|
4236
|
+
/**
|
4237
|
+
* Document-method: use_extra_bufts=
|
4238
|
+
* @param [Boolean] use_extra_bufts
|
4239
|
+
* @return [Boolean]
|
4240
|
+
*/
|
4241
|
+
rb_define_method(rb_cLlamaModelParams, "use_extra_bufts=", RUBY_METHOD_FUNC(llama_model_params_set_use_extra_bufts), 1);
|
4499
4242
|
|
4500
4243
|
/**
|
4501
4244
|
* Document-class: LlamaCpp::LlamaContextParams
|
@@ -5029,9 +4772,6 @@ void Init_llama_cpp(void) {
|
|
5029
4772
|
/* TODO: llama_get_model */
|
5030
4773
|
rb_define_module_function(rb_mLlamaCpp, "llama_get_model", rb_llama_get_model, 1);
|
5031
4774
|
|
5032
|
-
/* llama_get_kv_self */
|
5033
|
-
rb_define_module_function(rb_mLlamaCpp, "llama_get_kv_self", rb_llama_get_kv_self, 1);
|
5034
|
-
|
5035
4775
|
/* llama_get_memory */
|
5036
4776
|
rb_define_module_function(rb_mLlamaCpp, "llama_get_memory", rb_llama_get_memory, 1);
|
5037
4777
|
|
@@ -5106,6 +4846,9 @@ void Init_llama_cpp(void) {
|
|
5106
4846
|
/* llama_model_is_recurrent */
|
5107
4847
|
rb_define_module_function(rb_mLlamaCpp, "llama_model_is_recurrent?", rb_llama_model_is_recurrent, 1);
|
5108
4848
|
|
4849
|
+
/* llama_model_is_diffusion */
|
4850
|
+
rb_define_module_function(rb_mLlamaCpp, "llama_model_is_diffusion?", rb_llama_model_is_diffusion, 1);
|
4851
|
+
|
5109
4852
|
/* llama_model_quantize */
|
5110
4853
|
rb_define_module_function(rb_mLlamaCpp, "llama_model_quantize", rb_llama_model_quantize, 3);
|
5111
4854
|
|
@@ -5160,46 +4903,6 @@ void Init_llama_cpp(void) {
|
|
5160
4903
|
/* llama_memory_can_shift */
|
5161
4904
|
rb_define_module_function(rb_mLlamaCpp, "llama_memory_can_shift?", rb_llama_memory_can_shift, 1);
|
5162
4905
|
|
5163
|
-
/**
|
5164
|
-
* Document-class: LlamaCpp::LlamaKvCache
|
5165
|
-
* "struct llama_kv_cache" wrapper class
|
5166
|
-
*/
|
5167
|
-
rb_cLlamaKvCache = rb_define_class_under(rb_mLlamaCpp, "LlamaKvCache", rb_cObject);
|
5168
|
-
rb_define_alloc_func(rb_cLlamaKvCache, llama_kv_cache_wrapper_alloc);
|
5169
|
-
|
5170
|
-
/* llama_kv_self_clear */
|
5171
|
-
rb_define_module_function(rb_mLlamaCpp, "llama_kv_self_clear", rb_llama_kv_self_clear, 1);
|
5172
|
-
|
5173
|
-
/* llama_kv_self_seq_rm */
|
5174
|
-
rb_define_module_function(rb_mLlamaCpp, "llama_kv_self_seq_rm", rb_llama_kv_self_seq_rm, 4);
|
5175
|
-
|
5176
|
-
/* llama_kv_self_seq_cp */
|
5177
|
-
rb_define_module_function(rb_mLlamaCpp, "llama_kv_self_seq_cp", rb_llama_kv_self_seq_cp, 5);
|
5178
|
-
|
5179
|
-
/* llama_kv_self_seq_keep */
|
5180
|
-
rb_define_module_function(rb_mLlamaCpp, "llama_kv_self_seq_keep", rb_llama_kv_self_seq_keep, 2);
|
5181
|
-
|
5182
|
-
/* llama_kv_self_seq_add */
|
5183
|
-
rb_define_module_function(rb_mLlamaCpp, "llama_kv_self_seq_add", rb_llama_kv_self_seq_add, 5);
|
5184
|
-
|
5185
|
-
/* llama_kv_self_seq_div */
|
5186
|
-
rb_define_module_function(rb_mLlamaCpp, "llama_kv_self_seq_div", rb_llama_kv_self_seq_div, 5);
|
5187
|
-
|
5188
|
-
/* llama_kv_self_seq_pos_min */
|
5189
|
-
rb_define_module_function(rb_mLlamaCpp, "llama_kv_self_seq_pos_min", rb_llama_kv_self_seq_pos_min, 2);
|
5190
|
-
|
5191
|
-
/* llama_kv_self_seq_pos_max */
|
5192
|
-
rb_define_module_function(rb_mLlamaCpp, "llama_kv_self_seq_pos_max", rb_llama_kv_self_seq_pos_max, 2);
|
5193
|
-
|
5194
|
-
/* llama_kv_self_defrag */
|
5195
|
-
rb_define_module_function(rb_mLlamaCpp, "llama_kv_self_defrag", rb_llama_kv_self_defrag, 1);
|
5196
|
-
|
5197
|
-
/* llama_kv_self_update */
|
5198
|
-
rb_define_module_function(rb_mLlamaCpp, "llama_kv_self_update", rb_llama_kv_self_update, 1);
|
5199
|
-
|
5200
|
-
/* llama_kv_self_can_shift */
|
5201
|
-
rb_define_module_function(rb_mLlamaCpp, "llama_kv_self_can_shift?", rb_llama_kv_self_can_shift, 1);
|
5202
|
-
|
5203
4906
|
/* llama_state_get_size */
|
5204
4907
|
rb_define_module_function(rb_mLlamaCpp, "llama_state_get_size", rb_llama_state_get_size, 1);
|
5205
4908
|
|
@@ -5215,6 +4918,9 @@ void Init_llama_cpp(void) {
|
|
5215
4918
|
/* TODO: llama_state_seq_set_data */
|
5216
4919
|
/* TODO: llama_state_seq_save_file */
|
5217
4920
|
/* TODO: llama_state_seq_load_file */
|
4921
|
+
rb_define_module_function(rb_mLlamaCpp, "llama_state_seq_get_size_ext", rb_llama_state_seq_get_size_ext, 3);
|
4922
|
+
/* TODO: llama_state_seq_get_data_ext */
|
4923
|
+
/* TODO: llama_state_seq_set_data_ext */
|
5218
4924
|
|
5219
4925
|
/* llama_batch_get_one */
|
5220
4926
|
rb_define_module_function(rb_mLlamaCpp, "llama_batch_get_one", rb_llama_batch_get_one, 1);
|
data/lib/llama_cpp/version.rb
CHANGED
@@ -3,8 +3,8 @@
|
|
3
3
|
# llama_cpp.rb provides Ruby bindings for the llama.cpp.
|
4
4
|
module LlamaCpp
|
5
5
|
# The version of llama_cpp.rb you install.
|
6
|
-
VERSION = '0.
|
6
|
+
VERSION = '0.22.0'
|
7
7
|
|
8
8
|
# The supported version of llama.cpp.
|
9
|
-
LLAMA_CPP_VERSION = '
|
9
|
+
LLAMA_CPP_VERSION = 'b6240'
|
10
10
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: llama_cpp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.22.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
@@ -49,7 +49,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
49
49
|
- !ruby/object:Gem::Version
|
50
50
|
version: '0'
|
51
51
|
requirements: []
|
52
|
-
rubygems_version: 3.
|
52
|
+
rubygems_version: 3.7.0
|
53
53
|
specification_version: 4
|
54
54
|
summary: Ruby bindings for the llama.cpp.
|
55
55
|
test_files: []
|