llama_cpp 0.17.1 → 0.17.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2b2fec35458bc9b745aa4e2526b2c50ca52201e8f29f608d84993b1eddff5a2f
4
- data.tar.gz: 00f8d95bec17dcb422eb833623d5eca5028598e2a212dd71a248ad5f63434165
3
+ metadata.gz: b3da0d7b5c81ad7e21d2761f4d78fd8f892abea918a05c4e37a1a0e7d84f65a5
4
+ data.tar.gz: 5bc2d81ecf2c722084ee6cb44aab2a851283962780b5a963004c4ff4e4a85051
5
5
  SHA512:
6
- metadata.gz: fd7e98833df714d4c355820995e79964b74f31e0a4dc516360191a9c8c290108a5bf3d90b1ae704f1920ebb3db0152c2de17e2a8ec955fdc3ae1e979abae66ae
7
- data.tar.gz: c1e32582670b1069187a1c2f8277296b0878c5dc613dca3c733378689086dc10fca2e1ee7d8e6aec6d9db95ebdc2bdfbb5b5c108808b5e489735d1ba19c52cd4
6
+ metadata.gz: 119188683fdb32b0dce2664038b1fe05a7e4e75df64f2316e50dc19706ee300ac90b59e3a5cce33995fc5d8511f5b3bb8bdf918ce9da51d0d0a81fbde2f6bc58
7
+ data.tar.gz: c2699dbcefaf135ee8e0520014bba3c6f671569c617c7ff69bb3aff057d16aeff330fd58750c8c9815c7def686bd6e2876c760944f2ae8045042f203056e5cdb
data/CHANGELOG.md CHANGED
@@ -1,3 +1,13 @@
1
+ ## [[0.17.2](https://github.com/yoshoku/llama_cpp.rb/compare/v0.17.1...v0.17.2)] - 2024-07-14
2
+
3
+ - Change supported llama.cpp version to b3358.
4
+ - Add vocabulary pre-tokenization type constants.
5
+ - Add attention type constants.
6
+ - Add `attention_type` accessor to `ContextParams`.
7
+ - Add `lstrip` and `special` keyword arguments to `token_to_piece` method in `Model`.
8
+ - Add `has_encoder?`, `decoder_start_token`, and `detokenize` methods to `Model`.
9
+ - Add `encode` method to `Context`.
10
+
1
11
  ## [[0.17.1](https://github.com/yoshoku/llama_cpp.rb/compare/v0.17.0...v0.17.1)] - 2024-07-06
2
12
 
3
13
  - Update usage section on README.
@@ -978,6 +978,8 @@ public:
978
978
  rb_define_method(rb_cLLaMAContextParams, "rope_scaling_type", RUBY_METHOD_FUNC(_llama_context_params_get_rope_scaling_type), 0);
979
979
  rb_define_method(rb_cLLaMAContextParams, "pooling_type=", RUBY_METHOD_FUNC(_llama_context_params_set_pooling_type), 1);
980
980
  rb_define_method(rb_cLLaMAContextParams, "pooling_type", RUBY_METHOD_FUNC(_llama_context_params_get_pooling_type), 0);
981
+ rb_define_method(rb_cLLaMAContextParams, "attention_type=", RUBY_METHOD_FUNC(_llama_context_params_set_attention_type), 1);
982
+ rb_define_method(rb_cLLaMAContextParams, "attention_type", RUBY_METHOD_FUNC(_llama_context_params_get_attention_type), 0);
981
983
  rb_define_method(rb_cLLaMAContextParams, "rope_freq_base=", RUBY_METHOD_FUNC(_llama_context_params_set_rope_freq_base), 1);
982
984
  rb_define_method(rb_cLLaMAContextParams, "rope_freq_base", RUBY_METHOD_FUNC(_llama_context_params_get_rope_freq_base), 0);
983
985
  rb_define_method(rb_cLLaMAContextParams, "rope_freq_scale=", RUBY_METHOD_FUNC(_llama_context_params_set_rope_freq_scale), 1);
@@ -1129,6 +1131,18 @@ private:
1129
1131
  return INT2NUM(ptr->params.pooling_type);
1130
1132
  }
1131
1133
 
1134
+ // attention_type
1135
+ static VALUE _llama_context_params_set_attention_type(VALUE self, VALUE scaling_type) {
1136
+ LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
1137
+ ptr->params.attention_type = static_cast<enum llama_attention_type>(NUM2INT(scaling_type));
1138
+ return INT2NUM(ptr->params.attention_type);
1139
+ }
1140
+
1141
+ static VALUE _llama_context_params_get_attention_type(VALUE self) {
1142
+ LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
1143
+ return INT2NUM(ptr->params.attention_type);
1144
+ }
1145
+
1132
1146
  // rope_freq_base
1133
1147
  static VALUE _llama_context_params_set_rope_freq_base(VALUE self, VALUE rope_freq_base) {
1134
1148
  LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
@@ -1516,7 +1530,7 @@ public:
1516
1530
  rb_define_method(rb_cLLaMAModel, "n_embd", RUBY_METHOD_FUNC(_llama_model_get_model_n_embd), 0);
1517
1531
  rb_define_method(rb_cLLaMAModel, "n_layer", RUBY_METHOD_FUNC(_llama_model_get_model_n_layer), 0);
1518
1532
  rb_define_method(rb_cLLaMAModel, "rope_freq_scale_train", RUBY_METHOD_FUNC(_llama_model_rope_freq_scale_train), 0);
1519
- rb_define_method(rb_cLLaMAModel, "token_to_piece", RUBY_METHOD_FUNC(_llama_model_token_to_piece), 1);
1533
+ rb_define_method(rb_cLLaMAModel, "token_to_piece", RUBY_METHOD_FUNC(_llama_model_token_to_piece), -1);
1520
1534
  rb_define_method(rb_cLLaMAModel, "tokenize", RUBY_METHOD_FUNC(_llama_model_tokenize), -1);
1521
1535
  rb_define_method(rb_cLLaMAModel, "desc", RUBY_METHOD_FUNC(_llama_model_get_model_desc), 0);
1522
1536
  rb_define_method(rb_cLLaMAModel, "size", RUBY_METHOD_FUNC(_llama_model_get_model_size), 0);
@@ -1538,6 +1552,9 @@ public:
1538
1552
  rb_define_method(rb_cLLaMAModel, "token_eot", RUBY_METHOD_FUNC(_llama_model_token_eot), 0);
1539
1553
  rb_define_method(rb_cLLaMAModel, "token_is_eog?", RUBY_METHOD_FUNC(_llama_model_token_is_eog), 1);
1540
1554
  rb_define_method(rb_cLLaMAModel, "token_is_control?", RUBY_METHOD_FUNC(_llama_model_token_is_control), 1);
1555
+ rb_define_method(rb_cLLaMAModel, "has_encoder?", RUBY_METHOD_FUNC(_llama_model_has_encoder), 0);
1556
+ rb_define_method(rb_cLLaMAModel, "decoder_start_token", RUBY_METHOD_FUNC(_llama_model_decoder_start_token), 0);
1557
+ rb_define_method(rb_cLLaMAModel, "detokenize", RUBY_METHOD_FUNC(_llama_model_detokenize), -1);
1541
1558
  }
1542
1559
 
1543
1560
  private:
@@ -1677,18 +1694,33 @@ private:
1677
1694
  return DBL2NUM(llama_rope_freq_scale_train(ptr->model));
1678
1695
  }
1679
1696
 
1680
- static VALUE _llama_model_token_to_piece(VALUE self, VALUE token_) {
1697
+ static VALUE _llama_model_token_to_piece(int argc, VALUE* argv, VALUE self) {
1698
+ VALUE kw_args = Qnil;
1699
+ ID kw_table[2] = { rb_intern("lstrip"), rb_intern("special") };
1700
+ VALUE kw_values[2] = { Qundef, Qundef };
1701
+ VALUE token_ = Qnil;
1702
+ rb_scan_args(argc, argv, "1:", &token_, &kw_args);
1703
+ rb_get_kwargs(kw_args, kw_table, 0, 2, kw_values);
1704
+
1681
1705
  if (!RB_INTEGER_TYPE_P(token_)) {
1682
1706
  rb_raise(rb_eArgError, "token must be an integer");
1683
1707
  return Qnil;
1684
1708
  }
1709
+ if (kw_values[0] != Qundef && !RB_INTEGER_TYPE_P(kw_values[0])) {
1710
+ rb_raise(rb_eArgError, "lstrip must be an integer");
1711
+ return Qnil;
1712
+ }
1713
+
1685
1714
  const llama_token token = NUM2INT(token_);
1715
+ const int32_t lstrip = kw_values[0] != Qundef ? NUM2INT(kw_values[0]) : 0;
1716
+ const bool special = kw_values[1] != Qundef ? RTEST(kw_values[1]) : false;
1717
+
1686
1718
  LLaMAModelWrapper* ptr = get_llama_model(self);
1687
1719
  std::vector<char> result(8, 0);
1688
- const int n_tokens = llama_token_to_piece(ptr->model, token, result.data(), result.size(), false);
1720
+ const int n_tokens = llama_token_to_piece(ptr->model, token, result.data(), result.size(), lstrip, special);
1689
1721
  if (n_tokens < 0) {
1690
1722
  result.resize(-n_tokens);
1691
- const int check = llama_token_to_piece(ptr->model, token, result.data(), result.size(), false);
1723
+ const int check = llama_token_to_piece(ptr->model, token, result.data(), result.size(), lstrip, special);
1692
1724
  if (check != -n_tokens) {
1693
1725
  rb_raise(rb_eRuntimeError, "failed to convert");
1694
1726
  return Qnil;
@@ -1865,6 +1897,58 @@ private:
1865
1897
  LLaMAModelWrapper* ptr = get_llama_model(self);
1866
1898
  return llama_token_is_control(ptr->model, token) ? Qtrue : Qfalse;
1867
1899
  }
1900
+
1901
+ static VALUE _llama_model_has_encoder(VALUE self) {
1902
+ LLaMAModelWrapper* ptr = get_llama_model(self);
1903
+ return llama_model_has_encoder(ptr->model) ? Qtrue : Qfalse;
1904
+ }
1905
+
1906
+ static VALUE _llama_model_decoder_start_token(VALUE self) {
1907
+ LLaMAModelWrapper* ptr = get_llama_model(self);
1908
+ return INT2NUM(llama_model_decoder_start_token(ptr->model));
1909
+ }
1910
+
1911
+ static VALUE _llama_model_detokenize(int argc, VALUE* argv, VALUE self) {
1912
+ VALUE kw_args = Qnil;
1913
+ ID kw_table[2] = { rb_intern("remove_special"), rb_intern("unparse_special") };
1914
+ VALUE kw_values[2] = { Qundef, Qundef };
1915
+ VALUE tokens_ = Qnil;
1916
+ rb_scan_args(argc, argv, "1:", &tokens_, &kw_args);
1917
+ rb_get_kwargs(kw_args, kw_table, 0, 2, kw_values);
1918
+
1919
+ if (!RB_TYPE_P(tokens_, T_ARRAY)) {
1920
+ rb_raise(rb_eArgError, "tokens must be an array");
1921
+ return Qnil;
1922
+ }
1923
+
1924
+ const int32_t n_tokens = RARRAY_LEN(tokens_);
1925
+ llama_token* tokens = ALLOCA_N(llama_token, n_tokens);
1926
+ for (int32_t i = 0; i < n_tokens; i++) {
1927
+ tokens[i] = NUM2INT(rb_ary_entry(tokens_, i));
1928
+ }
1929
+
1930
+ std::string text;
1931
+ text.resize(std::max(text.capacity(), static_cast<unsigned long>(n_tokens)));
1932
+ const int32_t text_len_max = text.size();
1933
+
1934
+ bool remove_special = kw_values[0] != Qundef ? RTEST(kw_values[0]) : false;
1935
+ bool unparse_special = kw_values[1] != Qundef ? RTEST(kw_values[1]) : false;
1936
+
1937
+ LLaMAModelWrapper* ptr = get_llama_model(self);
1938
+ std::string result;
1939
+ int32_t n_chars = llama_detokenize(ptr->model, tokens, n_tokens, &text[0], text_len_max, remove_special, unparse_special);
1940
+ if (n_chars < 0) {
1941
+ text.resize(-n_chars);
1942
+ n_chars = llama_detokenize(ptr->model, tokens, n_tokens, &text[0], text_len_max, remove_special, unparse_special);
1943
+ if (n_chars <= text.size()) {
1944
+ rb_raise(rb_eRuntimeError, "Failed to detokenize");
1945
+ return Qnil;
1946
+ }
1947
+ }
1948
+
1949
+ text.resize(n_chars);
1950
+ return rb_utf8_str_new_cstr(text.c_str());
1951
+ }
1868
1952
  };
1869
1953
 
1870
1954
  const rb_data_type_t RbLLaMAModel::llama_model_type = {
@@ -2134,6 +2218,7 @@ public:
2134
2218
  rb_define_alloc_func(rb_cLLaMAContext, llama_context_alloc);
2135
2219
  rb_define_attr(rb_cLLaMAContext, "model", 1, 0);
2136
2220
  rb_define_method(rb_cLLaMAContext, "initialize", RUBY_METHOD_FUNC(_llama_context_initialize), -1);
2221
+ rb_define_method(rb_cLLaMAContext, "encode", RUBY_METHOD_FUNC(_llama_context_encode), 1);
2137
2222
  rb_define_method(rb_cLLaMAContext, "decode", RUBY_METHOD_FUNC(_llama_context_decode), 1);
2138
2223
  rb_define_method(rb_cLLaMAContext, "logits", RUBY_METHOD_FUNC(_llama_context_logits), 0);
2139
2224
  rb_define_method(rb_cLLaMAContext, "embeddings", RUBY_METHOD_FUNC(_llama_context_embeddings), 0);
@@ -2228,6 +2313,24 @@ private:
2228
2313
  return Qnil;
2229
2314
  }
2230
2315
 
2316
+ static VALUE _llama_context_encode(VALUE self, VALUE batch) {
2317
+ LLaMAContextWrapper* ptr = get_llama_context(self);
2318
+ if (ptr->ctx == NULL) {
2319
+ rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
2320
+ return Qnil;
2321
+ }
2322
+ if (!rb_obj_is_kind_of(batch, rb_cLLaMABatch)) {
2323
+ rb_raise(rb_eArgError, "batch must be a Batch");
2324
+ return Qnil;
2325
+ }
2326
+ LLaMABatchWrapper* batch_ptr = RbLLaMABatch::get_llama_batch(batch);
2327
+ if (llama_encode(ptr->ctx, batch_ptr->batch) < 0) {
2328
+ rb_raise(rb_eRuntimeError, "Failed to encode");
2329
+ return Qnil;
2330
+ }
2331
+ return Qnil;
2332
+ }
2333
+
2231
2334
  static VALUE _llama_context_decode(VALUE self, VALUE batch) {
2232
2335
  LLaMAContextWrapper* ptr = get_llama_context(self);
2233
2336
  if (ptr->ctx == NULL) {
@@ -2774,7 +2877,7 @@ private:
2774
2877
  ID kw_table[3] = { rb_intern("logits"), rb_intern("logits_guidance"), rb_intern("scale") };
2775
2878
  VALUE kw_values[3] = { Qundef, Qundef, Qundef };
2776
2879
  rb_scan_args(argc, argv, ":", &kw_args);
2777
- rb_get_kwargs(kw_args, kw_table, 0, 3, kw_values);
2880
+ rb_get_kwargs(kw_args, kw_table, 3, 0, kw_values);
2778
2881
 
2779
2882
  if (!RB_TYPE_P(kw_values[0], T_ARRAY)) {
2780
2883
  rb_raise(rb_eArgError, "logits must be an Array");
@@ -3513,6 +3616,8 @@ extern "C" void Init_llama_cpp(void) {
3513
3616
  rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_DBRX", INT2NUM(LLAMA_VOCAB_PRE_TYPE_DBRX));
3514
3617
  rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_SMAUG", INT2NUM(LLAMA_VOCAB_PRE_TYPE_SMAUG));
3515
3618
  rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_PORO", INT2NUM(LLAMA_VOCAB_PRE_TYPE_PORO));
3619
+ rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_CHATGLM3", INT2NUM(LLAMA_VOCAB_PRE_TYPE_CHATGLM3));
3620
+ rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_CHATGLM4", INT2NUM(LLAMA_VOCAB_PRE_TYPE_CHATGLM4));
3516
3621
  rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_VIKING", INT2NUM(LLAMA_VOCAB_PRE_TYPE_VIKING));
3517
3622
  rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_JAIS", INT2NUM(LLAMA_VOCAB_PRE_TYPE_JAIS));
3518
3623
 
@@ -3594,6 +3699,10 @@ extern "C" void Init_llama_cpp(void) {
3594
3699
  rb_define_const(rb_mLLaMACpp, "LLAMA_POOLING_TYPE_CLS", INT2NUM(LLAMA_POOLING_TYPE_CLS));
3595
3700
  rb_define_const(rb_mLLaMACpp, "LLAMA_POOLING_TYPE_LAST", INT2NUM(LLAMA_POOLING_TYPE_LAST));
3596
3701
 
3702
+ rb_define_const(rb_mLLaMACpp, "LLAMA_ATTENTION_TYPE_UNSPECIFIED", INT2NUM(LLAMA_ATTENTION_TYPE_UNSPECIFIED));
3703
+ rb_define_const(rb_mLLaMACpp, "LLAMA_ATTENTION_TYPE_CAUSAL", INT2NUM(LLAMA_ATTENTION_TYPE_CAUSAL));
3704
+ rb_define_const(rb_mLLaMACpp, "LLAMA_ATTENTION_TYPE_NON_CAUSAL", INT2NUM(LLAMA_ATTENTION_TYPE_NON_CAUSAL));
3705
+
3597
3706
  rb_define_const(rb_mLLaMACpp, "LLAMA_SPLIT_MODE_NONE", INT2NUM(LLAMA_SPLIT_MODE_NONE));
3598
3707
  rb_define_const(rb_mLLaMACpp, "LLAMA_SPLIT_MODE_LAYER", INT2NUM(LLAMA_SPLIT_MODE_LAYER));
3599
3708
  rb_define_const(rb_mLLaMACpp, "LLAMA_SPLIT_MODE_ROW", INT2NUM(LLAMA_SPLIT_MODE_ROW));
@@ -1,6 +1,7 @@
1
1
  #ifndef LLAMA_CPP_RB_H
2
2
  #define LLAMA_CPP_RB_H 1
3
3
 
4
+ #include <algorithm>
4
5
  #include <sstream>
5
6
  #include <string>
6
7
  #include <vector>
@@ -3,8 +3,8 @@
3
3
  # llama_cpp.rb provides Ruby bindings for the llama.cpp.
4
4
  module LLaMACpp
5
5
  # The version of llama_cpp.rb you install.
6
- VERSION = '0.17.1'
6
+ VERSION = '0.17.2'
7
7
 
8
8
  # The supported version of llama.cpp.
9
- LLAMA_CPP_VERSION = 'b3291'
9
+ LLAMA_CPP_VERSION = 'b3358'
10
10
  end
data/sig/llama_cpp.rbs CHANGED
@@ -33,6 +33,8 @@ module LLaMACpp
33
33
  LLAMA_VOCAB_PRE_TYPE_DBRX: Integer
34
34
  LLAMA_VOCAB_PRE_TYPE_SMAUG: Integer
35
35
  LLAMA_VOCAB_PRE_TYPE_PORO: Integer
36
+ LLAMA_VOCAB_PRE_TYPE_CHATGLM3: Integer
37
+ LLAMA_VOCAB_PRE_TYPE_CHATGLM4: Integer
36
38
  LLAMA_VOCAB_PRE_TYPE_VIKING: Integer
37
39
  LLAMA_VOCAB_PRE_TYPE_JAIS: Integer
38
40
 
@@ -104,6 +106,10 @@ module LLaMACpp
104
106
  LLAMA_POOLING_TYPE_CLS: Integer
105
107
  LLAMA_POOLING_TYPE_LAST: Integer
106
108
 
109
+ LLAMA_ATTENTION_TYPE_UNSPECIFIED: Integer
110
+ LLAMA_ATTENTION_TYPE_CAUSAL: Integer
111
+ LLAMA_ATTENTION_TYPE_NON_CAUSAL: Integer
112
+
107
113
  LLAMA_SPLIT_MODE_NONE: Integer
108
114
  LLAMA_SPLIT_MODE_LAYER: Integer
109
115
  LLAMA_SPLIT_MODE_ROW: Integer
@@ -158,7 +164,7 @@ module LLaMACpp
158
164
  def n_embd: () -> Integer
159
165
  def n_layer: () -> Integer
160
166
  def rope_freq_scale_train: () -> Float
161
- def token_to_piece: (Integer) -> String
167
+ def token_to_piece: (Integer, ?lstrip: Integer, ?special: bool) -> String
162
168
  def tokenize: (text: String, ?n_max_tokens: Integer, ?add_bos: bool, ?special: bool) -> Array[Integer]
163
169
  def desc: () -> String
164
170
  def size: () -> Integer
@@ -180,6 +186,9 @@ module LLaMACpp
180
186
  def token_eot: () -> Integer
181
187
  def token_is_eog?: (Integer) -> bool
182
188
  def token_is_control?: (Integer) -> bool
189
+ def has_encoder?: () -> bool
190
+ def decoder_start_token: () -> Integer
191
+ def detokenize: (Array[Integer], ?remove_special: bool, ?unparse_special: bool) -> String
183
192
  end
184
193
 
185
194
  class Timings
@@ -261,6 +270,7 @@ module LLaMACpp
261
270
  def embeddings: () -> Array[Float]
262
271
  def embeddings_ith: (Integer) -> Array[Float]
263
272
  def embeddings_seq: (Integer) -> Array[Float]
273
+ def encode: (::LLaMACpp::Batch) -> void
264
274
  def decode: (::LLaMACpp::Batch) -> void
265
275
  def logits: () -> Array[Float]
266
276
  def set_embeddings: (bool) -> void
@@ -330,6 +340,8 @@ module LLaMACpp
330
340
  def rope_scaling_type: () -> Integer
331
341
  def pooling_type=: (Integer) -> Integer
332
342
  def pooling_type: () -> Integer
343
+ def attention_type=: (Integer) -> Integer
344
+ def attention_type: () -> Integer
333
345
  def rope_freq_base=: (Float) -> Float
334
346
  def rope_freq_base: () -> Float
335
347
  def rope_freq_scale=: (Float) -> Float
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: llama_cpp
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.17.1
4
+ version: 0.17.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-07-06 00:00:00.000000000 Z
11
+ date: 2024-07-14 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
14
14
  email: