llama_cpp 0.25.2 → 0.25.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2c2a769491201813ec3b65cc286f283342ca492cf8a893b24ba46a4536b7669d
4
- data.tar.gz: 95799ab5fa55ccdd691d65f81e5c48cd0702163c4eff2f589c2384e43d64af45
3
+ metadata.gz: 7918f80b77eb32fa218c3cd9845594eb6f882e066e600caa2af31a979f916610
4
+ data.tar.gz: 2a631621308ba8206d5c27c8dad8d01b415865d99c1f632e197c4bea6ae43941
5
5
  SHA512:
6
- metadata.gz: a58b1879d867586b9b60a44b468babb69996359fb91bf8cf27f02a3c1b8a30538311f973a762f172d62e47dbeef8e2652c44d0b88cbbed345bba5532a5210d77
7
- data.tar.gz: 5ca17aa628b3c1d2a95bb1e21101c0531bbd7216efffe07fc3ac184cf7512fef54192e85762f74d363c090d1e91a2d3497e13f030beaf89d3f162f2dee15ac2b
6
+ metadata.gz: 7429e8a0d7255b8d000fa6d2be3d346573743f8238d7fe3d29c4f6a0cdafeb17ef7f5cd6faf1dbad9272e402af1a85e924ebaec1992e2a1077cb5796b2e8521f
7
+ data.tar.gz: d3eaeb4ac110955a9475aca37ac16c660e5e7780275c6861450ced9e1463478c9f7c30a8f6cc61aa22bca2cfff8a4bae1e10fd3c93e3170bb7d5e2ea1206c210
data/CHANGELOG.md CHANGED
@@ -1,12 +1,30 @@
1
+ ## [[0.25.4](https://github.com/yoshoku/llama_cpp.rb/compare/v0.25.3...v0.25.4)] - 2026-06-13
2
+
3
+ - Change supported llama.cpp version to b9610.
4
+ - Add `n_outputs_max` accessor to `LlamaContextParams`.
5
+
6
+ ## [[0.25.3](https://github.com/yoshoku/llama_cpp.rb/compare/v0.25.2...v0.25.3)] - 2026-05-24
7
+
8
+ - Add `llama_model_chat_template` module function to `LlamaCpp`.
9
+ - Add `llama_model_meta_val_str` module function to `LlamaCpp`.
10
+ - Add `llama_model_meta_key_by_index` module function to `LlamaCpp`.
11
+ - Add `llama_model_meta_val_str_by_index` module function to `LlamaCpp`.
12
+ - Change supported llama.cpp version to b9290.
13
+ - Add `LLAMA_CONTEXT_TYPE_DEFAULT` constant value.
14
+ - Add `LLAMA_CONTEXT_TYPE_MTP` constant value.
15
+ - Add `ctx_type` accessor to `LlamaContextParams`.
16
+ - Add `n_rs_seq` accessor to `LlamaContextParams`.
17
+ - Add `n_rs_seq` module function to `LlamaCpp`.
18
+
1
19
  ## [[0.25.2](https://github.com/yoshoku/llama_cpp.rb/compare/v0.25.1...v0.25.2)] - 2026-05-16
2
20
 
3
21
  - Change supported llama.cpp version to b9150.
4
- - ADD `LLAMA_STATE_SEQ_FLAGS_NONE` constant value.
22
+ - Add `LLAMA_STATE_SEQ_FLAGS_NONE` constant value.
5
23
 
6
24
  ## [[0.25.1](https://github.com/yoshoku/llama_cpp.rb/compare/v0.25.0...v0.25.1)] - 2026-05-09
7
25
 
8
26
  - Change supported llama.cpp version to b9070.
9
- - ADD `LLAMA_STATE_SEQ_FLAGS_ON_DEVICE` constant value.
27
+ - Add `LLAMA_STATE_SEQ_FLAGS_ON_DEVICE` constant value.
10
28
 
11
29
  ## [[0.25.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.24.3...v0.25.0)] - 2026-04-25
12
30
 
@@ -652,6 +652,28 @@ static VALUE llama_context_params_set_n_seq_max(VALUE self, VALUE n_seq_max) {
652
652
  return n_seq_max;
653
653
  }
654
654
 
655
+ static VALUE llama_context_params_get_n_rs_seq(VALUE self) {
656
+ struct llama_context_params* data = get_llama_context_params(self);
657
+ return UINT2NUM(data->n_rs_seq);
658
+ }
659
+
660
+ static VALUE llama_context_params_set_n_rs_seq(VALUE self, VALUE n_rs_seq) {
661
+ struct llama_context_params* data = get_llama_context_params(self);
662
+ data->n_rs_seq = NUM2UINT(n_rs_seq);
663
+ return n_rs_seq;
664
+ }
665
+
666
+ static VALUE llama_context_params_get_n_outputs_max(VALUE self) {
667
+ struct llama_context_params* data = get_llama_context_params(self);
668
+ return UINT2NUM(data->n_outputs_max);
669
+ }
670
+
671
+ static VALUE llama_context_params_set_n_outputs_max(VALUE self, VALUE n_outputs_max) {
672
+ struct llama_context_params* data = get_llama_context_params(self);
673
+ data->n_outputs_max = NUM2UINT(n_outputs_max);
674
+ return n_outputs_max;
675
+ }
676
+
655
677
  static VALUE llama_context_params_get_n_threads(VALUE self) {
656
678
  struct llama_context_params* data = get_llama_context_params(self);
657
679
  return INT2NUM(data->n_threads);
@@ -674,6 +696,17 @@ static VALUE llama_context_params_set_n_threads_batch(VALUE self, VALUE n_thread
674
696
  return n_threads_batch;
675
697
  }
676
698
 
699
+ static VALUE llama_context_params_get_ctx_type(VALUE self) {
700
+ struct llama_context_params* data = get_llama_context_params(self);
701
+ return INT2NUM(data->ctx_type);
702
+ }
703
+
704
+ static VALUE llama_context_params_set_ctx_type(VALUE self, VALUE ctx_type) {
705
+ struct llama_context_params* data = get_llama_context_params(self);
706
+ data->ctx_type = (enum llama_context_type)NUM2INT(ctx_type);
707
+ return ctx_type;
708
+ }
709
+
677
710
  static VALUE llama_context_params_get_rope_scaling_type(VALUE self) {
678
711
  struct llama_context_params* data = get_llama_context_params(self);
679
712
  return INT2NUM(data->rope_scaling_type);
@@ -1602,6 +1635,20 @@ static VALUE rb_llama_n_seq_max(VALUE self, VALUE ctx) {
1602
1635
  return UINT2NUM(llama_n_seq_max(context_wrapper->context));
1603
1636
  }
1604
1637
 
1638
+ /**
1639
+ * @overload llama_n_rs_seq(context)
1640
+ * @param [LlamaContext] context
1641
+ * @return [Integer]
1642
+ */
1643
+ static VALUE rb_llama_n_rs_seq(VALUE self, VALUE ctx) {
1644
+ if (!rb_obj_is_kind_of(ctx, rb_cLlamaContext)) {
1645
+ rb_raise(rb_eArgError, "ctx must be a LlamaContext");
1646
+ return Qnil;
1647
+ }
1648
+ llama_context_wrapper* context_wrapper = get_llama_context_wrapper(ctx);
1649
+ return UINT2NUM(llama_n_rs_seq(context_wrapper->context));
1650
+ }
1651
+
1605
1652
  /**
1606
1653
  * @overload llama_get_model(context)
1607
1654
  * @param [LlamaContext] context
@@ -1887,6 +1934,116 @@ static VALUE rb_llama_model_meta_key_str(VALUE self, VALUE key) {
1887
1934
  return rb_utf8_str_new_cstr(key_str);
1888
1935
  }
1889
1936
 
1937
+ /**
1938
+ * @overload llama_model_meta_val_str(model, key)
1939
+ * @param [LlamaModel] model
1940
+ * @param [String] key
1941
+ * @return [String, nil] nil if the key is not found
1942
+ */
1943
+ static VALUE rb_llama_model_meta_val_str(VALUE self, VALUE model, VALUE key) {
1944
+ if (!rb_obj_is_kind_of(model, rb_cLlamaModel)) {
1945
+ rb_raise(rb_eArgError, "model must be a LlamaModel");
1946
+ return Qnil;
1947
+ }
1948
+ if (!RB_TYPE_P(key, T_STRING)) {
1949
+ rb_raise(rb_eArgError, "key must be a String");
1950
+ return Qnil;
1951
+ }
1952
+ llama_model_wrapper* model_wrapper = get_llama_model_wrapper(model);
1953
+ const char* key_ = StringValueCStr(key);
1954
+ char stack_buf[1024];
1955
+ int32_t n = llama_model_meta_val_str(model_wrapper->model, key_, stack_buf, sizeof(stack_buf));
1956
+ if (n < 0) {
1957
+ RB_GC_GUARD(model);
1958
+ RB_GC_GUARD(key);
1959
+ return Qnil;
1960
+ }
1961
+ VALUE result;
1962
+ if ((size_t)n < sizeof(stack_buf)) {
1963
+ result = rb_utf8_str_new(stack_buf, n);
1964
+ } else {
1965
+ char* heap_buf = (char*)ruby_xmalloc((size_t)n + 1);
1966
+ llama_model_meta_val_str(model_wrapper->model, key_, heap_buf, (size_t)n + 1);
1967
+ result = rb_utf8_str_new(heap_buf, n);
1968
+ ruby_xfree(heap_buf);
1969
+ }
1970
+ RB_GC_GUARD(model);
1971
+ RB_GC_GUARD(key);
1972
+ return result;
1973
+ }
1974
+
1975
+ /**
1976
+ * @overload llama_model_meta_key_by_index(model, idx)
1977
+ * @param [LlamaModel] model
1978
+ * @param [Integer] idx
1979
+ * @return [String, nil] nil if the index is out of range
1980
+ */
1981
+ static VALUE rb_llama_model_meta_key_by_index(VALUE self, VALUE model, VALUE idx) {
1982
+ if (!rb_obj_is_kind_of(model, rb_cLlamaModel)) {
1983
+ rb_raise(rb_eArgError, "model must be a LlamaModel");
1984
+ return Qnil;
1985
+ }
1986
+ if (!RB_INTEGER_TYPE_P(idx)) {
1987
+ rb_raise(rb_eArgError, "i must be an Integer");
1988
+ return Qnil;
1989
+ }
1990
+ llama_model_wrapper* model_wrapper = get_llama_model_wrapper(model);
1991
+ int32_t idx_ = NUM2INT(idx);
1992
+ char stack_buf[1024];
1993
+ int32_t n = llama_model_meta_key_by_index(model_wrapper->model, idx_, stack_buf, sizeof(stack_buf));
1994
+ if (n < 0) {
1995
+ RB_GC_GUARD(model);
1996
+ return Qnil;
1997
+ }
1998
+ VALUE result;
1999
+ if ((size_t)n < sizeof(stack_buf)) {
2000
+ result = rb_utf8_str_new(stack_buf, n);
2001
+ } else {
2002
+ char* heap_buf = (char*)ruby_xmalloc((size_t)n + 1);
2003
+ llama_model_meta_key_by_index(model_wrapper->model, idx_, heap_buf, (size_t)n + 1);
2004
+ result = rb_utf8_str_new(heap_buf, n);
2005
+ ruby_xfree(heap_buf);
2006
+ }
2007
+ RB_GC_GUARD(model);
2008
+ return result;
2009
+ }
2010
+
2011
+ /**
2012
+ * @overload llama_model_meta_val_str_by_index(model, idx)
2013
+ * @param [LlamaModel] model
2014
+ * @param [Integer] idx
2015
+ * @return [String, nil] nil if the index is out of range
2016
+ */
2017
+ static VALUE rb_llama_model_meta_val_str_by_index(VALUE self, VALUE model, VALUE idx) {
2018
+ if (!rb_obj_is_kind_of(model, rb_cLlamaModel)) {
2019
+ rb_raise(rb_eArgError, "model must be a LlamaModel");
2020
+ return Qnil;
2021
+ }
2022
+ if (!RB_INTEGER_TYPE_P(idx)) {
2023
+ rb_raise(rb_eArgError, "i must be an Integer");
2024
+ return Qnil;
2025
+ }
2026
+ llama_model_wrapper* model_wrapper = get_llama_model_wrapper(model);
2027
+ int32_t idx_ = NUM2INT(idx);
2028
+ char stack_buf[1024];
2029
+ int32_t n = llama_model_meta_val_str_by_index(model_wrapper->model, idx_, stack_buf, sizeof(stack_buf));
2030
+ if (n < 0) {
2031
+ RB_GC_GUARD(model);
2032
+ return Qnil;
2033
+ }
2034
+ VALUE result;
2035
+ if ((size_t)n < sizeof(stack_buf)) {
2036
+ result = rb_utf8_str_new(stack_buf, n);
2037
+ } else {
2038
+ char* heap_buf = (char*)ruby_xmalloc((size_t)n + 1);
2039
+ llama_model_meta_val_str_by_index(model_wrapper->model, idx_, heap_buf, (size_t)n + 1);
2040
+ result = rb_utf8_str_new(heap_buf, n);
2041
+ ruby_xfree(heap_buf);
2042
+ }
2043
+ RB_GC_GUARD(model);
2044
+ return result;
2045
+ }
2046
+
1890
2047
  /**
1891
2048
  * @overload llama_model_desc(model)
1892
2049
  * @param [LlamaModel] model
@@ -1918,19 +2075,28 @@ static VALUE rb_llama_model_size(VALUE self, VALUE model) {
1918
2075
  return ULONG2NUM(llama_model_size(model_wrapper->model));
1919
2076
  }
1920
2077
 
1921
- /* llama_model_chat_template */
1922
- /*
1923
- static VALUE rb_llama_model_chat_template(VALUE self, VALUE model) {
2078
+ /**
2079
+ * @overload llama_model_chat_template(model, name)
2080
+ * @param [LlamaModel] model
2081
+ * @param [String, nil] name pass nil to get the default chat template
2082
+ * @return [String, nil] nil if no chat template is available
2083
+ */
2084
+ static VALUE rb_llama_model_chat_template(VALUE self, VALUE model, VALUE name) {
1924
2085
  if (!rb_obj_is_kind_of(model, rb_cLlamaModel)) {
1925
2086
  rb_raise(rb_eArgError, "model must be a LlamaModel");
1926
2087
  return Qnil;
1927
2088
  }
2089
+ if (!NIL_P(name) && !RB_TYPE_P(name, T_STRING)) {
2090
+ rb_raise(rb_eArgError, "name must be a String or nil");
2091
+ return Qnil;
2092
+ }
1928
2093
  llama_model_wrapper* model_wrapper = get_llama_model_wrapper(model);
1929
- const char* templ = llama_model_chat_template(model_wrapper->model)
2094
+ const char* name_ = NIL_P(name) ? NULL : StringValueCStr(name);
2095
+ const char* templ = llama_model_chat_template(model_wrapper->model, name_);
1930
2096
  RB_GC_GUARD(model);
1931
- return rb_utf8_str_new_cstr(templ);
2097
+ RB_GC_GUARD(name);
2098
+ return templ == NULL ? Qnil : rb_utf8_str_new_cstr(templ);
1932
2099
  }
1933
- */
1934
2100
 
1935
2101
  /**
1936
2102
  * @overload llama_model_n_params(model)
@@ -4312,6 +4478,10 @@ void Init_llama_cpp(void) {
4312
4478
  rb_define_const(rb_mLlamaCpp, "LLAMA_SPLIT_MODE_LAYER", INT2NUM(LLAMA_SPLIT_MODE_LAYER));
4313
4479
  rb_define_const(rb_mLlamaCpp, "LLAMA_SPLIT_MODE_ROW", INT2NUM(LLAMA_SPLIT_MODE_ROW));
4314
4480
  rb_define_const(rb_mLlamaCpp, "LLAMA_SPLIT_MODE_TENSOR", INT2NUM(LLAMA_SPLIT_MODE_TENSOR));
4481
+ /* llama_context_type */
4482
+ /* Document-const: LlamaCpp::LLAMA_CONTEXT_TYPE_DEFAULT */
4483
+ rb_define_const(rb_mLlamaCpp, "LLAMA_CONTEXT_TYPE_DEFAULT", INT2NUM(LLAMA_CONTEXT_TYPE_DEFAULT));
4484
+ rb_define_const(rb_mLlamaCpp, "LLAMA_CONTEXT_TYPE_MTP", INT2NUM(LLAMA_CONTEXT_TYPE_MTP));
4315
4485
 
4316
4486
  rb_define_module_function(rb_mLlamaCpp, "llama_flash_attn_type_name", rb_llama_flash_attn_type_name, 1);
4317
4487
 
@@ -4644,6 +4814,28 @@ void Init_llama_cpp(void) {
4644
4814
  * @return [Integer]
4645
4815
  */
4646
4816
  rb_define_method(rb_cLlamaContextParams, "n_seq_max=", RUBY_METHOD_FUNC(llama_context_params_set_n_seq_max), 1);
4817
+ /**
4818
+ * Document-method: n_rs_seq
4819
+ * @return [Integer]
4820
+ */
4821
+ rb_define_method(rb_cLlamaContextParams, "n_rs_seq", RUBY_METHOD_FUNC(llama_context_params_get_n_rs_seq), 0);
4822
+ /**
4823
+ * Document-method: n_rs_seq=
4824
+ * @param [Integer] n_rs_seq
4825
+ * @return [Integer]
4826
+ */
4827
+ rb_define_method(rb_cLlamaContextParams, "n_rs_seq=", RUBY_METHOD_FUNC(llama_context_params_set_n_rs_seq), 1);
4828
+ /**
4829
+ * Document-method: n_outputs_max
4830
+ * @return [Integer]
4831
+ */
4832
+ rb_define_method(rb_cLlamaContextParams, "n_outputs_max", RUBY_METHOD_FUNC(llama_context_params_get_n_outputs_max), 0);
4833
+ /**
4834
+ * Document-method: n_outputs_max=
4835
+ * @param [Integer] n_outputs_max
4836
+ * @return [Integer]
4837
+ */
4838
+ rb_define_method(rb_cLlamaContextParams, "n_outputs_max=", RUBY_METHOD_FUNC(llama_context_params_set_n_outputs_max), 1);
4647
4839
  /**
4648
4840
  * Document-method: n_threads
4649
4841
  * @return [Integer]
@@ -4666,6 +4858,17 @@ void Init_llama_cpp(void) {
4666
4858
  * @return [Integer]
4667
4859
  */
4668
4860
  rb_define_method(rb_cLlamaContextParams, "n_threads_batch=", RUBY_METHOD_FUNC(llama_context_params_set_n_threads_batch), 1);
4861
+ /**
4862
+ * Document-method: ctx_type
4863
+ * @return [Integer]
4864
+ */
4865
+ rb_define_method(rb_cLlamaContextParams, "ctx_type", RUBY_METHOD_FUNC(llama_context_params_get_ctx_type), 0);
4866
+ /**
4867
+ * Document-method: ctx_type=
4868
+ * @param [Integer] ctx_type
4869
+ * @return [Integer]
4870
+ */
4871
+ rb_define_method(rb_cLlamaContextParams, "ctx_type=", RUBY_METHOD_FUNC(llama_context_params_set_ctx_type), 1);
4669
4872
  /**
4670
4873
  * Document-method: rope_scaling_type
4671
4874
  * @return [Integer]
@@ -4885,6 +5088,7 @@ void Init_llama_cpp(void) {
4885
5088
 
4886
5089
  /* TODO: struct llama_sampler_seq_config * samplers */
4887
5090
  /* TODO: size_t n_samplers */
5091
+ /* TODO: struct llama_context * ctx_other */
4888
5092
 
4889
5093
  /**
4890
5094
  * Document-method: kv_unified=
@@ -5186,6 +5390,9 @@ void Init_llama_cpp(void) {
5186
5390
  /* llama_n_seq_max */
5187
5391
  rb_define_module_function(rb_mLlamaCpp, "llama_n_seq_max", rb_llama_n_seq_max, 1);
5188
5392
 
5393
+ /* llama_n_rs_seq */
5394
+ rb_define_module_function(rb_mLlamaCpp, "llama_n_rs_seq", rb_llama_n_rs_seq, 1);
5395
+
5189
5396
  /* TODO: llama_get_model */
5190
5397
  rb_define_module_function(rb_mLlamaCpp, "llama_get_model", rb_llama_get_model, 1);
5191
5398
 
@@ -5244,9 +5451,12 @@ void Init_llama_cpp(void) {
5244
5451
  rb_define_module_function(rb_mLlamaCpp, "llama_model_meta_count", rb_llama_model_meta_count, 1);
5245
5452
  /* llama_model_meta_key_str */
5246
5453
  rb_define_module_function(rb_mLlamaCpp, "llama_model_meta_key_str", rb_llama_model_meta_key_str, 1);
5247
- /* TODO: llama_model_meta_val_str */
5248
- /* TODO: llama_model_meta_key_by_index */
5249
- /* TODO: llama_model_meta_val_str_by_index */
5454
+ /* llama_model_meta_val_str */
5455
+ rb_define_module_function(rb_mLlamaCpp, "llama_model_meta_val_str", rb_llama_model_meta_val_str, 2);
5456
+ /* llama_model_meta_key_by_index */
5457
+ rb_define_module_function(rb_mLlamaCpp, "llama_model_meta_key_by_index", rb_llama_model_meta_key_by_index, 2);
5458
+ /* llama_model_meta_val_str_by_index */
5459
+ rb_define_module_function(rb_mLlamaCpp, "llama_model_meta_val_str_by_index", rb_llama_model_meta_val_str_by_index, 2);
5250
5460
 
5251
5461
  /* llama_model_desc */
5252
5462
  rb_define_module_function(rb_mLlamaCpp, "llama_model_desc", rb_llama_model_desc, 1);
@@ -5254,8 +5464,8 @@ void Init_llama_cpp(void) {
5254
5464
  /* llama_model_size */
5255
5465
  rb_define_module_function(rb_mLlamaCpp, "llama_model_size", rb_llama_model_size, 1);
5256
5466
 
5257
- /* TODO: llama_model_chat_template */
5258
- /* rb_define_module_function(rb_mLlamaCpp, "llama_model_chat_template", rb_llama_model_chat_template, 1); */
5467
+ /* llama_model_chat_template */
5468
+ rb_define_module_function(rb_mLlamaCpp, "llama_model_chat_template", rb_llama_model_chat_template, 2);
5259
5469
 
5260
5470
  /* llama_model_n_params */
5261
5471
  rb_define_module_function(rb_mLlamaCpp, "llama_model_n_params", rb_llama_model_n_params, 1);
@@ -3,8 +3,8 @@
3
3
  # llama_cpp.rb provides Ruby bindings for the llama.cpp.
4
4
  module LlamaCpp
5
5
  # The version of llama_cpp.rb you install.
6
- VERSION = '0.25.2'
6
+ VERSION = '0.25.4'
7
7
 
8
8
  # The supported version of llama.cpp.
9
- LLAMA_CPP_VERSION = 'b9150'
9
+ LLAMA_CPP_VERSION = 'b9610'
10
10
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: llama_cpp
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.25.2
4
+ version: 0.25.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
@@ -33,7 +33,7 @@ metadata:
33
33
  homepage_uri: https://github.com/yoshoku/llama_cpp.rb
34
34
  source_code_uri: https://github.com/yoshoku/llama_cpp.rb
35
35
  changelog_uri: https://github.com/yoshoku/llama_cpp.rb/blob/main/CHANGELOG.md
36
- documentation_uri: https://gemdocs.org/gems/llama_cpp/0.25.2/
36
+ documentation_uri: https://gemdocs.org/gems/llama_cpp/0.25.4/
37
37
  rubygems_mfa_required: 'true'
38
38
  rdoc_options: []
39
39
  require_paths:
@@ -49,7 +49,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
49
49
  - !ruby/object:Gem::Version
50
50
  version: '0'
51
51
  requirements: []
52
- rubygems_version: 4.0.10
52
+ rubygems_version: 4.0.13
53
53
  specification_version: 4
54
54
  summary: Ruby bindings for the llama.cpp.
55
55
  test_files: []