llama_cpp 0.17.1 → 0.17.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2b2fec35458bc9b745aa4e2526b2c50ca52201e8f29f608d84993b1eddff5a2f
4
- data.tar.gz: 00f8d95bec17dcb422eb833623d5eca5028598e2a212dd71a248ad5f63434165
3
+ metadata.gz: b3da0d7b5c81ad7e21d2761f4d78fd8f892abea918a05c4e37a1a0e7d84f65a5
4
+ data.tar.gz: 5bc2d81ecf2c722084ee6cb44aab2a851283962780b5a963004c4ff4e4a85051
5
5
  SHA512:
6
- metadata.gz: fd7e98833df714d4c355820995e79964b74f31e0a4dc516360191a9c8c290108a5bf3d90b1ae704f1920ebb3db0152c2de17e2a8ec955fdc3ae1e979abae66ae
7
- data.tar.gz: c1e32582670b1069187a1c2f8277296b0878c5dc613dca3c733378689086dc10fca2e1ee7d8e6aec6d9db95ebdc2bdfbb5b5c108808b5e489735d1ba19c52cd4
6
+ metadata.gz: 119188683fdb32b0dce2664038b1fe05a7e4e75df64f2316e50dc19706ee300ac90b59e3a5cce33995fc5d8511f5b3bb8bdf918ce9da51d0d0a81fbde2f6bc58
7
+ data.tar.gz: c2699dbcefaf135ee8e0520014bba3c6f671569c617c7ff69bb3aff057d16aeff330fd58750c8c9815c7def686bd6e2876c760944f2ae8045042f203056e5cdb
data/CHANGELOG.md CHANGED
@@ -1,3 +1,13 @@
1
+ ## [[0.17.2](https://github.com/yoshoku/llama_cpp.rb/compare/v0.17.1...v0.17.2)] - 2024-07-14
2
+
3
+ - Change supported llama.cpp version to b3358.
4
+ - Add vocabulary pre-tokenization type constants.
5
+ - Add attention type constants.
6
+ - Add `attention_type` accessor to `ContextParams`.
7
+ - Add `lstrip` and `special` keyword arguments to `token_to_piece` method in `Model`.
8
+ - Add `has_encoder?`, `decoder_start_token`, and `detokenize` methods to `Model`.
9
+ - Add `encode` method to `Context`.
10
+
1
11
  ## [[0.17.1](https://github.com/yoshoku/llama_cpp.rb/compare/v0.17.0...v0.17.1)] - 2024-07-06
2
12
 
3
13
  - Update usage section on README.
@@ -978,6 +978,8 @@ public:
978
978
  rb_define_method(rb_cLLaMAContextParams, "rope_scaling_type", RUBY_METHOD_FUNC(_llama_context_params_get_rope_scaling_type), 0);
979
979
  rb_define_method(rb_cLLaMAContextParams, "pooling_type=", RUBY_METHOD_FUNC(_llama_context_params_set_pooling_type), 1);
980
980
  rb_define_method(rb_cLLaMAContextParams, "pooling_type", RUBY_METHOD_FUNC(_llama_context_params_get_pooling_type), 0);
981
+ rb_define_method(rb_cLLaMAContextParams, "attention_type=", RUBY_METHOD_FUNC(_llama_context_params_set_attention_type), 1);
982
+ rb_define_method(rb_cLLaMAContextParams, "attention_type", RUBY_METHOD_FUNC(_llama_context_params_get_attention_type), 0);
981
983
  rb_define_method(rb_cLLaMAContextParams, "rope_freq_base=", RUBY_METHOD_FUNC(_llama_context_params_set_rope_freq_base), 1);
982
984
  rb_define_method(rb_cLLaMAContextParams, "rope_freq_base", RUBY_METHOD_FUNC(_llama_context_params_get_rope_freq_base), 0);
983
985
  rb_define_method(rb_cLLaMAContextParams, "rope_freq_scale=", RUBY_METHOD_FUNC(_llama_context_params_set_rope_freq_scale), 1);
@@ -1129,6 +1131,18 @@ private:
1129
1131
  return INT2NUM(ptr->params.pooling_type);
1130
1132
  }
1131
1133
 
1134
+ // attention_type
1135
+ static VALUE _llama_context_params_set_attention_type(VALUE self, VALUE scaling_type) {
1136
+ LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
1137
+ ptr->params.attention_type = static_cast<enum llama_attention_type>(NUM2INT(scaling_type));
1138
+ return INT2NUM(ptr->params.attention_type);
1139
+ }
1140
+
1141
+ static VALUE _llama_context_params_get_attention_type(VALUE self) {
1142
+ LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
1143
+ return INT2NUM(ptr->params.attention_type);
1144
+ }
1145
+
1132
1146
  // rope_freq_base
1133
1147
  static VALUE _llama_context_params_set_rope_freq_base(VALUE self, VALUE rope_freq_base) {
1134
1148
  LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
@@ -1516,7 +1530,7 @@ public:
1516
1530
  rb_define_method(rb_cLLaMAModel, "n_embd", RUBY_METHOD_FUNC(_llama_model_get_model_n_embd), 0);
1517
1531
  rb_define_method(rb_cLLaMAModel, "n_layer", RUBY_METHOD_FUNC(_llama_model_get_model_n_layer), 0);
1518
1532
  rb_define_method(rb_cLLaMAModel, "rope_freq_scale_train", RUBY_METHOD_FUNC(_llama_model_rope_freq_scale_train), 0);
1519
- rb_define_method(rb_cLLaMAModel, "token_to_piece", RUBY_METHOD_FUNC(_llama_model_token_to_piece), 1);
1533
+ rb_define_method(rb_cLLaMAModel, "token_to_piece", RUBY_METHOD_FUNC(_llama_model_token_to_piece), -1);
1520
1534
  rb_define_method(rb_cLLaMAModel, "tokenize", RUBY_METHOD_FUNC(_llama_model_tokenize), -1);
1521
1535
  rb_define_method(rb_cLLaMAModel, "desc", RUBY_METHOD_FUNC(_llama_model_get_model_desc), 0);
1522
1536
  rb_define_method(rb_cLLaMAModel, "size", RUBY_METHOD_FUNC(_llama_model_get_model_size), 0);
@@ -1538,6 +1552,9 @@ public:
1538
1552
  rb_define_method(rb_cLLaMAModel, "token_eot", RUBY_METHOD_FUNC(_llama_model_token_eot), 0);
1539
1553
  rb_define_method(rb_cLLaMAModel, "token_is_eog?", RUBY_METHOD_FUNC(_llama_model_token_is_eog), 1);
1540
1554
  rb_define_method(rb_cLLaMAModel, "token_is_control?", RUBY_METHOD_FUNC(_llama_model_token_is_control), 1);
1555
+ rb_define_method(rb_cLLaMAModel, "has_encoder?", RUBY_METHOD_FUNC(_llama_model_has_encoder), 0);
1556
+ rb_define_method(rb_cLLaMAModel, "decoder_start_token", RUBY_METHOD_FUNC(_llama_model_decoder_start_token), 0);
1557
+ rb_define_method(rb_cLLaMAModel, "detokenize", RUBY_METHOD_FUNC(_llama_model_detokenize), -1);
1541
1558
  }
1542
1559
 
1543
1560
  private:
@@ -1677,18 +1694,33 @@ private:
1677
1694
  return DBL2NUM(llama_rope_freq_scale_train(ptr->model));
1678
1695
  }
1679
1696
 
1680
- static VALUE _llama_model_token_to_piece(VALUE self, VALUE token_) {
1697
+ static VALUE _llama_model_token_to_piece(int argc, VALUE* argv, VALUE self) {
1698
+ VALUE kw_args = Qnil;
1699
+ ID kw_table[2] = { rb_intern("lstrip"), rb_intern("special") };
1700
+ VALUE kw_values[2] = { Qundef, Qundef };
1701
+ VALUE token_ = Qnil;
1702
+ rb_scan_args(argc, argv, "1:", &token_, &kw_args);
1703
+ rb_get_kwargs(kw_args, kw_table, 0, 2, kw_values);
1704
+
1681
1705
  if (!RB_INTEGER_TYPE_P(token_)) {
1682
1706
  rb_raise(rb_eArgError, "token must be an integer");
1683
1707
  return Qnil;
1684
1708
  }
1709
+ if (kw_values[0] != Qundef && !RB_INTEGER_TYPE_P(kw_values[0])) {
1710
+ rb_raise(rb_eArgError, "lstrip must be an integer");
1711
+ return Qnil;
1712
+ }
1713
+
1685
1714
  const llama_token token = NUM2INT(token_);
1715
+ const int32_t lstrip = kw_values[0] != Qundef ? NUM2INT(kw_values[0]) : 0;
1716
+ const bool special = kw_values[1] != Qundef ? RTEST(kw_values[1]) : false;
1717
+
1686
1718
  LLaMAModelWrapper* ptr = get_llama_model(self);
1687
1719
  std::vector<char> result(8, 0);
1688
- const int n_tokens = llama_token_to_piece(ptr->model, token, result.data(), result.size(), false);
1720
+ const int n_tokens = llama_token_to_piece(ptr->model, token, result.data(), result.size(), lstrip, special);
1689
1721
  if (n_tokens < 0) {
1690
1722
  result.resize(-n_tokens);
1691
- const int check = llama_token_to_piece(ptr->model, token, result.data(), result.size(), false);
1723
+ const int check = llama_token_to_piece(ptr->model, token, result.data(), result.size(), lstrip, special);
1692
1724
  if (check != -n_tokens) {
1693
1725
  rb_raise(rb_eRuntimeError, "failed to convert");
1694
1726
  return Qnil;
@@ -1865,6 +1897,58 @@ private:
1865
1897
  LLaMAModelWrapper* ptr = get_llama_model(self);
1866
1898
  return llama_token_is_control(ptr->model, token) ? Qtrue : Qfalse;
1867
1899
  }
1900
+
1901
+ static VALUE _llama_model_has_encoder(VALUE self) {
1902
+ LLaMAModelWrapper* ptr = get_llama_model(self);
1903
+ return llama_model_has_encoder(ptr->model) ? Qtrue : Qfalse;
1904
+ }
1905
+
1906
+ static VALUE _llama_model_decoder_start_token(VALUE self) {
1907
+ LLaMAModelWrapper* ptr = get_llama_model(self);
1908
+ return INT2NUM(llama_model_decoder_start_token(ptr->model));
1909
+ }
1910
+
1911
+ static VALUE _llama_model_detokenize(int argc, VALUE* argv, VALUE self) {
1912
+ VALUE kw_args = Qnil;
1913
+ ID kw_table[2] = { rb_intern("remove_special"), rb_intern("unparse_special") };
1914
+ VALUE kw_values[2] = { Qundef, Qundef };
1915
+ VALUE tokens_ = Qnil;
1916
+ rb_scan_args(argc, argv, "1:", &tokens_, &kw_args);
1917
+ rb_get_kwargs(kw_args, kw_table, 0, 2, kw_values);
1918
+
1919
+ if (!RB_TYPE_P(tokens_, T_ARRAY)) {
1920
+ rb_raise(rb_eArgError, "tokens must be an array");
1921
+ return Qnil;
1922
+ }
1923
+
1924
+ const int32_t n_tokens = RARRAY_LEN(tokens_);
1925
+ llama_token* tokens = ALLOCA_N(llama_token, n_tokens);
1926
+ for (int32_t i = 0; i < n_tokens; i++) {
1927
+ tokens[i] = NUM2INT(rb_ary_entry(tokens_, i));
1928
+ }
1929
+
1930
+ std::string text;
1931
+ text.resize(std::max(text.capacity(), static_cast<unsigned long>(n_tokens)));
1932
+ const int32_t text_len_max = text.size();
1933
+
1934
+ bool remove_special = kw_values[0] != Qundef ? RTEST(kw_values[0]) : false;
1935
+ bool unparse_special = kw_values[1] != Qundef ? RTEST(kw_values[1]) : false;
1936
+
1937
+ LLaMAModelWrapper* ptr = get_llama_model(self);
1938
+ std::string result;
1939
+ int32_t n_chars = llama_detokenize(ptr->model, tokens, n_tokens, &text[0], text_len_max, remove_special, unparse_special);
1940
+ if (n_chars < 0) {
1941
+ text.resize(-n_chars);
1942
+ n_chars = llama_detokenize(ptr->model, tokens, n_tokens, &text[0], text_len_max, remove_special, unparse_special);
1943
+ if (n_chars <= text.size()) {
1944
+ rb_raise(rb_eRuntimeError, "Failed to detokenize");
1945
+ return Qnil;
1946
+ }
1947
+ }
1948
+
1949
+ text.resize(n_chars);
1950
+ return rb_utf8_str_new_cstr(text.c_str());
1951
+ }
1868
1952
  };
1869
1953
 
1870
1954
  const rb_data_type_t RbLLaMAModel::llama_model_type = {
@@ -2134,6 +2218,7 @@ public:
2134
2218
  rb_define_alloc_func(rb_cLLaMAContext, llama_context_alloc);
2135
2219
  rb_define_attr(rb_cLLaMAContext, "model", 1, 0);
2136
2220
  rb_define_method(rb_cLLaMAContext, "initialize", RUBY_METHOD_FUNC(_llama_context_initialize), -1);
2221
+ rb_define_method(rb_cLLaMAContext, "encode", RUBY_METHOD_FUNC(_llama_context_encode), 1);
2137
2222
  rb_define_method(rb_cLLaMAContext, "decode", RUBY_METHOD_FUNC(_llama_context_decode), 1);
2138
2223
  rb_define_method(rb_cLLaMAContext, "logits", RUBY_METHOD_FUNC(_llama_context_logits), 0);
2139
2224
  rb_define_method(rb_cLLaMAContext, "embeddings", RUBY_METHOD_FUNC(_llama_context_embeddings), 0);
@@ -2228,6 +2313,24 @@ private:
2228
2313
  return Qnil;
2229
2314
  }
2230
2315
 
2316
+ static VALUE _llama_context_encode(VALUE self, VALUE batch) {
2317
+ LLaMAContextWrapper* ptr = get_llama_context(self);
2318
+ if (ptr->ctx == NULL) {
2319
+ rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
2320
+ return Qnil;
2321
+ }
2322
+ if (!rb_obj_is_kind_of(batch, rb_cLLaMABatch)) {
2323
+ rb_raise(rb_eArgError, "batch must be a Batch");
2324
+ return Qnil;
2325
+ }
2326
+ LLaMABatchWrapper* batch_ptr = RbLLaMABatch::get_llama_batch(batch);
2327
+ if (llama_encode(ptr->ctx, batch_ptr->batch) < 0) {
2328
+ rb_raise(rb_eRuntimeError, "Failed to encode");
2329
+ return Qnil;
2330
+ }
2331
+ return Qnil;
2332
+ }
2333
+
2231
2334
  static VALUE _llama_context_decode(VALUE self, VALUE batch) {
2232
2335
  LLaMAContextWrapper* ptr = get_llama_context(self);
2233
2336
  if (ptr->ctx == NULL) {
@@ -2774,7 +2877,7 @@ private:
2774
2877
  ID kw_table[3] = { rb_intern("logits"), rb_intern("logits_guidance"), rb_intern("scale") };
2775
2878
  VALUE kw_values[3] = { Qundef, Qundef, Qundef };
2776
2879
  rb_scan_args(argc, argv, ":", &kw_args);
2777
- rb_get_kwargs(kw_args, kw_table, 0, 3, kw_values);
2880
+ rb_get_kwargs(kw_args, kw_table, 3, 0, kw_values);
2778
2881
 
2779
2882
  if (!RB_TYPE_P(kw_values[0], T_ARRAY)) {
2780
2883
  rb_raise(rb_eArgError, "logits must be an Array");
@@ -3513,6 +3616,8 @@ extern "C" void Init_llama_cpp(void) {
3513
3616
  rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_DBRX", INT2NUM(LLAMA_VOCAB_PRE_TYPE_DBRX));
3514
3617
  rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_SMAUG", INT2NUM(LLAMA_VOCAB_PRE_TYPE_SMAUG));
3515
3618
  rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_PORO", INT2NUM(LLAMA_VOCAB_PRE_TYPE_PORO));
3619
+ rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_CHATGLM3", INT2NUM(LLAMA_VOCAB_PRE_TYPE_CHATGLM3));
3620
+ rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_CHATGLM4", INT2NUM(LLAMA_VOCAB_PRE_TYPE_CHATGLM4));
3516
3621
  rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_VIKING", INT2NUM(LLAMA_VOCAB_PRE_TYPE_VIKING));
3517
3622
  rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_JAIS", INT2NUM(LLAMA_VOCAB_PRE_TYPE_JAIS));
3518
3623
 
@@ -3594,6 +3699,10 @@ extern "C" void Init_llama_cpp(void) {
3594
3699
  rb_define_const(rb_mLLaMACpp, "LLAMA_POOLING_TYPE_CLS", INT2NUM(LLAMA_POOLING_TYPE_CLS));
3595
3700
  rb_define_const(rb_mLLaMACpp, "LLAMA_POOLING_TYPE_LAST", INT2NUM(LLAMA_POOLING_TYPE_LAST));
3596
3701
 
3702
+ rb_define_const(rb_mLLaMACpp, "LLAMA_ATTENTION_TYPE_UNSPECIFIED", INT2NUM(LLAMA_ATTENTION_TYPE_UNSPECIFIED));
3703
+ rb_define_const(rb_mLLaMACpp, "LLAMA_ATTENTION_TYPE_CAUSAL", INT2NUM(LLAMA_ATTENTION_TYPE_CAUSAL));
3704
+ rb_define_const(rb_mLLaMACpp, "LLAMA_ATTENTION_TYPE_NON_CAUSAL", INT2NUM(LLAMA_ATTENTION_TYPE_NON_CAUSAL));
3705
+
3597
3706
  rb_define_const(rb_mLLaMACpp, "LLAMA_SPLIT_MODE_NONE", INT2NUM(LLAMA_SPLIT_MODE_NONE));
3598
3707
  rb_define_const(rb_mLLaMACpp, "LLAMA_SPLIT_MODE_LAYER", INT2NUM(LLAMA_SPLIT_MODE_LAYER));
3599
3708
  rb_define_const(rb_mLLaMACpp, "LLAMA_SPLIT_MODE_ROW", INT2NUM(LLAMA_SPLIT_MODE_ROW));
@@ -1,6 +1,7 @@
1
1
  #ifndef LLAMA_CPP_RB_H
2
2
  #define LLAMA_CPP_RB_H 1
3
3
 
4
+ #include <algorithm>
4
5
  #include <sstream>
5
6
  #include <string>
6
7
  #include <vector>
@@ -3,8 +3,8 @@
3
3
  # llama_cpp.rb provides Ruby bindings for the llama.cpp.
4
4
  module LLaMACpp
5
5
  # The version of llama_cpp.rb you install.
6
- VERSION = '0.17.1'
6
+ VERSION = '0.17.2'
7
7
 
8
8
  # The supported version of llama.cpp.
9
- LLAMA_CPP_VERSION = 'b3291'
9
+ LLAMA_CPP_VERSION = 'b3358'
10
10
  end
data/sig/llama_cpp.rbs CHANGED
@@ -33,6 +33,8 @@ module LLaMACpp
33
33
  LLAMA_VOCAB_PRE_TYPE_DBRX: Integer
34
34
  LLAMA_VOCAB_PRE_TYPE_SMAUG: Integer
35
35
  LLAMA_VOCAB_PRE_TYPE_PORO: Integer
36
+ LLAMA_VOCAB_PRE_TYPE_CHATGLM3: Integer
37
+ LLAMA_VOCAB_PRE_TYPE_CHATGLM4: Integer
36
38
  LLAMA_VOCAB_PRE_TYPE_VIKING: Integer
37
39
  LLAMA_VOCAB_PRE_TYPE_JAIS: Integer
38
40
 
@@ -104,6 +106,10 @@ module LLaMACpp
104
106
  LLAMA_POOLING_TYPE_CLS: Integer
105
107
  LLAMA_POOLING_TYPE_LAST: Integer
106
108
 
109
+ LLAMA_ATTENTION_TYPE_UNSPECIFIED: Integer
110
+ LLAMA_ATTENTION_TYPE_CAUSAL: Integer
111
+ LLAMA_ATTENTION_TYPE_NON_CAUSAL: Integer
112
+
107
113
  LLAMA_SPLIT_MODE_NONE: Integer
108
114
  LLAMA_SPLIT_MODE_LAYER: Integer
109
115
  LLAMA_SPLIT_MODE_ROW: Integer
@@ -158,7 +164,7 @@ module LLaMACpp
158
164
  def n_embd: () -> Integer
159
165
  def n_layer: () -> Integer
160
166
  def rope_freq_scale_train: () -> Float
161
- def token_to_piece: (Integer) -> String
167
+ def token_to_piece: (Integer, ?lstrip: Integer, ?special: bool) -> String
162
168
  def tokenize: (text: String, ?n_max_tokens: Integer, ?add_bos: bool, ?special: bool) -> Array[Integer]
163
169
  def desc: () -> String
164
170
  def size: () -> Integer
@@ -180,6 +186,9 @@ module LLaMACpp
180
186
  def token_eot: () -> Integer
181
187
  def token_is_eog?: (Integer) -> bool
182
188
  def token_is_control?: (Integer) -> bool
189
+ def has_encoder?: () -> bool
190
+ def decoder_start_token: () -> Integer
191
+ def detokenize: (Array[Integer], ?remove_special: bool, ?unparse_special: bool) -> String
183
192
  end
184
193
 
185
194
  class Timings
@@ -261,6 +270,7 @@ module LLaMACpp
261
270
  def embeddings: () -> Array[Float]
262
271
  def embeddings_ith: (Integer) -> Array[Float]
263
272
  def embeddings_seq: (Integer) -> Array[Float]
273
+ def encode: (::LLaMACpp::Batch) -> void
264
274
  def decode: (::LLaMACpp::Batch) -> void
265
275
  def logits: () -> Array[Float]
266
276
  def set_embeddings: (bool) -> void
@@ -330,6 +340,8 @@ module LLaMACpp
330
340
  def rope_scaling_type: () -> Integer
331
341
  def pooling_type=: (Integer) -> Integer
332
342
  def pooling_type: () -> Integer
343
+ def attention_type=: (Integer) -> Integer
344
+ def attention_type: () -> Integer
333
345
  def rope_freq_base=: (Float) -> Float
334
346
  def rope_freq_base: () -> Float
335
347
  def rope_freq_scale=: (Float) -> Float
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: llama_cpp
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.17.1
4
+ version: 0.17.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-07-06 00:00:00.000000000 Z
11
+ date: 2024-07-14 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
14
14
  email: