RubyGems - llama_cpp - Versions diffs - 0.17.1 → 0.17.2 - Mend

llama_cpp 0.17.1 → 0.17.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +10 -0
data/ext/llama_cpp/llama_cpp.cpp +114 -5
data/ext/llama_cpp/llama_cpp.h +1 -0
data/lib/llama_cpp/version.rb +2 -2
data/sig/llama_cpp.rbs +13 -1
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 2b2fec35458bc9b745aa4e2526b2c50ca52201e8f29f608d84993b1eddff5a2f
-  data.tar.gz: 00f8d95bec17dcb422eb833623d5eca5028598e2a212dd71a248ad5f63434165
+  metadata.gz: b3da0d7b5c81ad7e21d2761f4d78fd8f892abea918a05c4e37a1a0e7d84f65a5
+  data.tar.gz: 5bc2d81ecf2c722084ee6cb44aab2a851283962780b5a963004c4ff4e4a85051
 SHA512:
-  metadata.gz: fd7e98833df714d4c355820995e79964b74f31e0a4dc516360191a9c8c290108a5bf3d90b1ae704f1920ebb3db0152c2de17e2a8ec955fdc3ae1e979abae66ae
-  data.tar.gz: c1e32582670b1069187a1c2f8277296b0878c5dc613dca3c733378689086dc10fca2e1ee7d8e6aec6d9db95ebdc2bdfbb5b5c108808b5e489735d1ba19c52cd4
+  metadata.gz: 119188683fdb32b0dce2664038b1fe05a7e4e75df64f2316e50dc19706ee300ac90b59e3a5cce33995fc5d8511f5b3bb8bdf918ce9da51d0d0a81fbde2f6bc58
+  data.tar.gz: c2699dbcefaf135ee8e0520014bba3c6f671569c617c7ff69bb3aff057d16aeff330fd58750c8c9815c7def686bd6e2876c760944f2ae8045042f203056e5cdb

data/CHANGELOG.md CHANGED Viewed

@@ -1,3 +1,13 @@
+## [[0.17.2](https://github.com/yoshoku/llama_cpp.rb/compare/v0.17.1...v0.17.2)] - 2024-07-14
+- Change supported llama.cpp version to b3358.
+  - Add vocabulary pre-tokenization type constants.
+  - Add attention type constants.
+  - Add `attention_type` accessor to `ContextParams`.
+  - Add `lstrip` and `special` keyword arguments to `token_to_piece` method in `Model`.
+  - Add `has_encoder?`, `decoder_start_token`, and `detokenize` methods to `Model`.
+  - Add `encode` method to `Context`.
 ## [[0.17.1](https://github.com/yoshoku/llama_cpp.rb/compare/v0.17.0...v0.17.1)] - 2024-07-06
 - Update usage section on README.

data/ext/llama_cpp/llama_cpp.cpp CHANGED Viewed

@@ -978,6 +978,8 @@ public:
     rb_define_method(rb_cLLaMAContextParams, "rope_scaling_type", RUBY_METHOD_FUNC(_llama_context_params_get_rope_scaling_type), 0);
     rb_define_method(rb_cLLaMAContextParams, "pooling_type=", RUBY_METHOD_FUNC(_llama_context_params_set_pooling_type), 1);
     rb_define_method(rb_cLLaMAContextParams, "pooling_type", RUBY_METHOD_FUNC(_llama_context_params_get_pooling_type), 0);
+    rb_define_method(rb_cLLaMAContextParams, "attention_type=", RUBY_METHOD_FUNC(_llama_context_params_set_attention_type), 1);
+    rb_define_method(rb_cLLaMAContextParams, "attention_type", RUBY_METHOD_FUNC(_llama_context_params_get_attention_type), 0);
     rb_define_method(rb_cLLaMAContextParams, "rope_freq_base=", RUBY_METHOD_FUNC(_llama_context_params_set_rope_freq_base), 1);
     rb_define_method(rb_cLLaMAContextParams, "rope_freq_base", RUBY_METHOD_FUNC(_llama_context_params_get_rope_freq_base), 0);
     rb_define_method(rb_cLLaMAContextParams, "rope_freq_scale=", RUBY_METHOD_FUNC(_llama_context_params_set_rope_freq_scale), 1);
@@ -1129,6 +1131,18 @@ private:
     return INT2NUM(ptr->params.pooling_type);
   }
+  // attention_type
+  static VALUE _llama_context_params_set_attention_type(VALUE self, VALUE scaling_type) {
+    LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
+    ptr->params.attention_type = static_cast<enum llama_attention_type>(NUM2INT(scaling_type));
+    return INT2NUM(ptr->params.attention_type);
+  }
+  static VALUE _llama_context_params_get_attention_type(VALUE self) {
+    LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
+    return INT2NUM(ptr->params.attention_type);
+  }
   // rope_freq_base
   static VALUE _llama_context_params_set_rope_freq_base(VALUE self, VALUE rope_freq_base) {
     LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
@@ -1516,7 +1530,7 @@ public:
     rb_define_method(rb_cLLaMAModel, "n_embd", RUBY_METHOD_FUNC(_llama_model_get_model_n_embd), 0);
     rb_define_method(rb_cLLaMAModel, "n_layer", RUBY_METHOD_FUNC(_llama_model_get_model_n_layer), 0);
     rb_define_method(rb_cLLaMAModel, "rope_freq_scale_train", RUBY_METHOD_FUNC(_llama_model_rope_freq_scale_train), 0);
-    rb_define_method(rb_cLLaMAModel, "token_to_piece", RUBY_METHOD_FUNC(_llama_model_token_to_piece), 1);
+    rb_define_method(rb_cLLaMAModel, "token_to_piece", RUBY_METHOD_FUNC(_llama_model_token_to_piece), -1);
     rb_define_method(rb_cLLaMAModel, "tokenize", RUBY_METHOD_FUNC(_llama_model_tokenize), -1);
     rb_define_method(rb_cLLaMAModel, "desc", RUBY_METHOD_FUNC(_llama_model_get_model_desc), 0);
     rb_define_method(rb_cLLaMAModel, "size", RUBY_METHOD_FUNC(_llama_model_get_model_size), 0);
@@ -1538,6 +1552,9 @@ public:
     rb_define_method(rb_cLLaMAModel, "token_eot", RUBY_METHOD_FUNC(_llama_model_token_eot), 0);
     rb_define_method(rb_cLLaMAModel, "token_is_eog?", RUBY_METHOD_FUNC(_llama_model_token_is_eog), 1);
     rb_define_method(rb_cLLaMAModel, "token_is_control?", RUBY_METHOD_FUNC(_llama_model_token_is_control), 1);
+    rb_define_method(rb_cLLaMAModel, "has_encoder?", RUBY_METHOD_FUNC(_llama_model_has_encoder), 0);
+    rb_define_method(rb_cLLaMAModel, "decoder_start_token", RUBY_METHOD_FUNC(_llama_model_decoder_start_token), 0);
+    rb_define_method(rb_cLLaMAModel, "detokenize", RUBY_METHOD_FUNC(_llama_model_detokenize), -1);
   }
 private:
@@ -1677,18 +1694,33 @@ private:
     return DBL2NUM(llama_rope_freq_scale_train(ptr->model));
   }
-  static VALUE _llama_model_token_to_piece(VALUE self, VALUE token_) {
+  static VALUE _llama_model_token_to_piece(int argc, VALUE* argv, VALUE self) {
+    VALUE kw_args = Qnil;
+    ID kw_table[2] = { rb_intern("lstrip"), rb_intern("special") };
+    VALUE kw_values[2] = { Qundef, Qundef };
+    VALUE token_ = Qnil;
+    rb_scan_args(argc, argv, "1:", &token_, &kw_args);
+    rb_get_kwargs(kw_args, kw_table, 0, 2, kw_values);
     if (!RB_INTEGER_TYPE_P(token_)) {
       rb_raise(rb_eArgError, "token must be an integer");
       return Qnil;
     }
+    if (kw_values[0] != Qundef && !RB_INTEGER_TYPE_P(kw_values[0])) {
+      rb_raise(rb_eArgError, "lstrip must be an integer");
+      return Qnil;
+    }
     const llama_token token = NUM2INT(token_);
+    const int32_t lstrip = kw_values[0] != Qundef ? NUM2INT(kw_values[0]) : 0;
+    const bool special = kw_values[1] != Qundef ? RTEST(kw_values[1]) : false;
     LLaMAModelWrapper* ptr = get_llama_model(self);
     std::vector<char> result(8, 0);
-    const int n_tokens = llama_token_to_piece(ptr->model, token, result.data(), result.size(), false);
+    const int n_tokens = llama_token_to_piece(ptr->model, token, result.data(), result.size(), lstrip, special);
     if (n_tokens < 0) {
       result.resize(-n_tokens);
-      const int check = llama_token_to_piece(ptr->model, token, result.data(), result.size(), false);
+      const int check = llama_token_to_piece(ptr->model, token, result.data(), result.size(), lstrip, special);
       if (check != -n_tokens) {
         rb_raise(rb_eRuntimeError, "failed to convert");
         return Qnil;
@@ -1865,6 +1897,58 @@ private:
     LLaMAModelWrapper* ptr = get_llama_model(self);
     return llama_token_is_control(ptr->model, token) ? Qtrue : Qfalse;
   }
+  static VALUE _llama_model_has_encoder(VALUE self) {
+    LLaMAModelWrapper* ptr = get_llama_model(self);
+    return llama_model_has_encoder(ptr->model) ? Qtrue : Qfalse;
+  }
+  static VALUE _llama_model_decoder_start_token(VALUE self) {
+    LLaMAModelWrapper* ptr = get_llama_model(self);
+    return INT2NUM(llama_model_decoder_start_token(ptr->model));
+  }
+  static VALUE _llama_model_detokenize(int argc, VALUE* argv, VALUE self) {
+    VALUE kw_args = Qnil;
+    ID kw_table[2] = { rb_intern("remove_special"), rb_intern("unparse_special") };
+    VALUE kw_values[2] = { Qundef, Qundef };
+    VALUE tokens_ = Qnil;
+    rb_scan_args(argc, argv, "1:", &tokens_, &kw_args);
+    rb_get_kwargs(kw_args, kw_table, 0, 2, kw_values);
+    if (!RB_TYPE_P(tokens_, T_ARRAY)) {
+      rb_raise(rb_eArgError, "tokens must be an array");
+      return Qnil;
+    }
+    const int32_t n_tokens = RARRAY_LEN(tokens_);
+    llama_token* tokens = ALLOCA_N(llama_token, n_tokens);
+    for (int32_t i = 0; i < n_tokens; i++) {
+      tokens[i] = NUM2INT(rb_ary_entry(tokens_, i));
+    }
+    std::string text;
+    text.resize(std::max(text.capacity(), static_cast<unsigned long>(n_tokens)));
+    const int32_t text_len_max = text.size();
+    bool remove_special = kw_values[0] != Qundef ? RTEST(kw_values[0]) : false;
+    bool unparse_special = kw_values[1] != Qundef ? RTEST(kw_values[1]) : false;
+    LLaMAModelWrapper* ptr = get_llama_model(self);
+    std::string result;
+    int32_t n_chars = llama_detokenize(ptr->model, tokens, n_tokens, &text[0], text_len_max, remove_special, unparse_special);
+    if (n_chars < 0) {
+      text.resize(-n_chars);
+      n_chars = llama_detokenize(ptr->model, tokens, n_tokens, &text[0], text_len_max, remove_special, unparse_special);
+      if (n_chars <= text.size()) {
+        rb_raise(rb_eRuntimeError, "Failed to detokenize");
+        return Qnil;
+      }
+    }
+    text.resize(n_chars);
+    return rb_utf8_str_new_cstr(text.c_str());
+  }
 };
 const rb_data_type_t RbLLaMAModel::llama_model_type = {
@@ -2134,6 +2218,7 @@ public:
     rb_define_alloc_func(rb_cLLaMAContext, llama_context_alloc);
     rb_define_attr(rb_cLLaMAContext, "model", 1, 0);
     rb_define_method(rb_cLLaMAContext, "initialize", RUBY_METHOD_FUNC(_llama_context_initialize), -1);
+    rb_define_method(rb_cLLaMAContext, "encode", RUBY_METHOD_FUNC(_llama_context_encode), 1);
     rb_define_method(rb_cLLaMAContext, "decode", RUBY_METHOD_FUNC(_llama_context_decode), 1);
     rb_define_method(rb_cLLaMAContext, "logits", RUBY_METHOD_FUNC(_llama_context_logits), 0);
     rb_define_method(rb_cLLaMAContext, "embeddings", RUBY_METHOD_FUNC(_llama_context_embeddings), 0);
@@ -2228,6 +2313,24 @@ private:
     return Qnil;
   }
+  static VALUE _llama_context_encode(VALUE self, VALUE batch) {
+    LLaMAContextWrapper* ptr = get_llama_context(self);
+    if (ptr->ctx == NULL) {
+      rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
+      return Qnil;
+    }
+    if (!rb_obj_is_kind_of(batch, rb_cLLaMABatch)) {
+      rb_raise(rb_eArgError, "batch must be a Batch");
+      return Qnil;
+    }
+    LLaMABatchWrapper* batch_ptr = RbLLaMABatch::get_llama_batch(batch);
+    if (llama_encode(ptr->ctx, batch_ptr->batch) < 0) {
+      rb_raise(rb_eRuntimeError, "Failed to encode");
+      return Qnil;
+    }
+    return Qnil;
+  }
   static VALUE _llama_context_decode(VALUE self, VALUE batch) {
     LLaMAContextWrapper* ptr = get_llama_context(self);
     if (ptr->ctx == NULL) {
@@ -2774,7 +2877,7 @@ private:
     ID kw_table[3] = { rb_intern("logits"), rb_intern("logits_guidance"), rb_intern("scale") };
     VALUE kw_values[3] = { Qundef, Qundef, Qundef };
     rb_scan_args(argc, argv, ":", &kw_args);
-    rb_get_kwargs(kw_args, kw_table, 0, 3, kw_values);
+    rb_get_kwargs(kw_args, kw_table, 3, 0, kw_values);
     if (!RB_TYPE_P(kw_values[0], T_ARRAY)) {
       rb_raise(rb_eArgError, "logits must be an Array");
@@ -3513,6 +3616,8 @@ extern "C" void Init_llama_cpp(void) {
   rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_DBRX", INT2NUM(LLAMA_VOCAB_PRE_TYPE_DBRX));
   rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_SMAUG", INT2NUM(LLAMA_VOCAB_PRE_TYPE_SMAUG));
   rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_PORO", INT2NUM(LLAMA_VOCAB_PRE_TYPE_PORO));
+  rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_CHATGLM3", INT2NUM(LLAMA_VOCAB_PRE_TYPE_CHATGLM3));
+  rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_CHATGLM4", INT2NUM(LLAMA_VOCAB_PRE_TYPE_CHATGLM4));
   rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_VIKING", INT2NUM(LLAMA_VOCAB_PRE_TYPE_VIKING));
   rb_define_const(rb_mLLaMACpp, "LLAMA_VOCAB_PRE_TYPE_JAIS", INT2NUM(LLAMA_VOCAB_PRE_TYPE_JAIS));
@@ -3594,6 +3699,10 @@ extern "C" void Init_llama_cpp(void) {
   rb_define_const(rb_mLLaMACpp, "LLAMA_POOLING_TYPE_CLS", INT2NUM(LLAMA_POOLING_TYPE_CLS));
   rb_define_const(rb_mLLaMACpp, "LLAMA_POOLING_TYPE_LAST", INT2NUM(LLAMA_POOLING_TYPE_LAST));
+  rb_define_const(rb_mLLaMACpp, "LLAMA_ATTENTION_TYPE_UNSPECIFIED", INT2NUM(LLAMA_ATTENTION_TYPE_UNSPECIFIED));
+  rb_define_const(rb_mLLaMACpp, "LLAMA_ATTENTION_TYPE_CAUSAL", INT2NUM(LLAMA_ATTENTION_TYPE_CAUSAL));
+  rb_define_const(rb_mLLaMACpp, "LLAMA_ATTENTION_TYPE_NON_CAUSAL", INT2NUM(LLAMA_ATTENTION_TYPE_NON_CAUSAL));
   rb_define_const(rb_mLLaMACpp, "LLAMA_SPLIT_MODE_NONE", INT2NUM(LLAMA_SPLIT_MODE_NONE));
   rb_define_const(rb_mLLaMACpp, "LLAMA_SPLIT_MODE_LAYER", INT2NUM(LLAMA_SPLIT_MODE_LAYER));
   rb_define_const(rb_mLLaMACpp, "LLAMA_SPLIT_MODE_ROW", INT2NUM(LLAMA_SPLIT_MODE_ROW));

data/ext/llama_cpp/llama_cpp.h CHANGED Viewed

@@ -1,6 +1,7 @@
 #ifndef LLAMA_CPP_RB_H
 #define LLAMA_CPP_RB_H 1
+#include <algorithm>
 #include <sstream>
 #include <string>
 #include <vector>

data/lib/llama_cpp/version.rb CHANGED Viewed

@@ -3,8 +3,8 @@
 # llama_cpp.rb provides Ruby bindings for the llama.cpp.
 module LLaMACpp
   # The version of llama_cpp.rb you install.
-  VERSION = '0.17.1'
+  VERSION = '0.17.2'
   # The supported version of llama.cpp.
-  LLAMA_CPP_VERSION = 'b3291'
+  LLAMA_CPP_VERSION = 'b3358'
 end

data/sig/llama_cpp.rbs CHANGED Viewed

@@ -33,6 +33,8 @@ module LLaMACpp
   LLAMA_VOCAB_PRE_TYPE_DBRX: Integer
   LLAMA_VOCAB_PRE_TYPE_SMAUG: Integer
   LLAMA_VOCAB_PRE_TYPE_PORO: Integer
+  LLAMA_VOCAB_PRE_TYPE_CHATGLM3: Integer
+  LLAMA_VOCAB_PRE_TYPE_CHATGLM4: Integer
   LLAMA_VOCAB_PRE_TYPE_VIKING: Integer
   LLAMA_VOCAB_PRE_TYPE_JAIS: Integer
@@ -104,6 +106,10 @@ module LLaMACpp
   LLAMA_POOLING_TYPE_CLS: Integer
   LLAMA_POOLING_TYPE_LAST: Integer
+  LLAMA_ATTENTION_TYPE_UNSPECIFIED: Integer
+  LLAMA_ATTENTION_TYPE_CAUSAL: Integer
+  LLAMA_ATTENTION_TYPE_NON_CAUSAL: Integer
   LLAMA_SPLIT_MODE_NONE: Integer
   LLAMA_SPLIT_MODE_LAYER: Integer
   LLAMA_SPLIT_MODE_ROW: Integer
@@ -158,7 +164,7 @@ module LLaMACpp
     def n_embd: () -> Integer
     def n_layer: () -> Integer
     def rope_freq_scale_train: () -> Float
-    def token_to_piece: (Integer) -> String
+    def token_to_piece: (Integer, ?lstrip: Integer, ?special: bool) -> String
     def tokenize: (text: String, ?n_max_tokens: Integer, ?add_bos: bool, ?special: bool) -> Array[Integer]
     def desc: () -> String
     def size: () -> Integer
@@ -180,6 +186,9 @@ module LLaMACpp
     def token_eot: () -> Integer
     def token_is_eog?: (Integer) -> bool
     def token_is_control?: (Integer) -> bool
+    def has_encoder?: () -> bool
+    def decoder_start_token: () -> Integer
+    def detokenize: (Array[Integer], ?remove_special: bool, ?unparse_special: bool) -> String
   end
   class Timings
@@ -261,6 +270,7 @@ module LLaMACpp
     def embeddings: () -> Array[Float]
     def embeddings_ith: (Integer) -> Array[Float]
     def embeddings_seq: (Integer) -> Array[Float]
+    def encode: (::LLaMACpp::Batch) -> void
     def decode: (::LLaMACpp::Batch) -> void
     def logits: () -> Array[Float]
     def set_embeddings: (bool) -> void
@@ -330,6 +340,8 @@ module LLaMACpp
     def rope_scaling_type: () -> Integer
     def pooling_type=: (Integer) -> Integer
     def pooling_type: () -> Integer
+    def attention_type=: (Integer) -> Integer
+    def attention_type: () -> Integer
     def rope_freq_base=: (Float) -> Float
     def rope_freq_base: () -> Float
     def rope_freq_scale=: (Float) -> Float

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: llama_cpp
 version: !ruby/object:Gem::Version
-  version: 0.17.1
+  version: 0.17.2
 platform: ruby
 authors:
 - yoshoku
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2024-07-06 00:00:00.000000000 Z
+date: 2024-07-14 00:00:00.000000000 Z
 dependencies: []
 description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
 email: