RubyGems - llama_cpp - Versions diffs - 0.8.0 → 0.9.0 - Mend

llama_cpp 0.8.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +10 -0
data/examples/chat.rb +8 -6
data/ext/llama_cpp/extconf.rb +2 -2
data/ext/llama_cpp/llama_cpp.cpp +81 -162
data/ext/llama_cpp/src/ggml-cuda.cu +188 -20
data/ext/llama_cpp/src/ggml-metal.m +13 -5
data/ext/llama_cpp/src/ggml-metal.metal +9 -1
data/ext/llama_cpp/src/ggml-opencl.cpp +161 -169
data/ext/llama_cpp/src/ggml.c +362 -84
data/ext/llama_cpp/src/ggml.h +8 -7
data/ext/llama_cpp/src/llama.cpp +100 -95
data/ext/llama_cpp/src/llama.h +16 -21
data/lib/llama_cpp/version.rb +2 -2
data/lib/llama_cpp.rb +4 -4
data/sig/llama_cpp.rbs +11 -12
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 8045208b5f7801979212a4f6ed395217e78f06bcfbc2d0362aaaa04c529745cd
-  data.tar.gz: 4011dfe279d8d4041c6c79dc5a6bad199777f83b5f0559f11ccd2f68c957e462
+  metadata.gz: 683f2d81aff9e82234925ba08cd5b46b56a2283ff8397a6c06ce50d34a95dbfc
+  data.tar.gz: d3005cab273b8d85f47f4cb4314fbab3a540d366a42829e5ec8d2c29576ae09e
 SHA512:
-  metadata.gz: d15e74da491773961006eca8ca6c6d80b30ffc995c56a9140961be0002eb09134f1a029c4e8ee192497fb7256fe36cf1c3ed928967ce57ece4c7a0904392c8fe
-  data.tar.gz: a863596304ddb9ac5e4be2b2b65bebc7d3913705b8a0f516debfee0ca213f9dca69707edda8d70cfafb15500fcb6e70cffb6d5d1119302d24e05059c50f0da77
+  metadata.gz: 559f1ba1253a704c38480336decd315c65b4d80e6895ad1dc0faa3b5b81570a1faeaadcb6ec7ee3145f0fff758ab5e38e6cb8163382ce9b693d893deebe9a8f9
+  data.tar.gz: cb3d96b8c3f79cd20d4169a175270e8768c04bcaa24e51cb2c4d7872db88bc6e3349e6b1e93a130b89d21daab8be6e57b5305412059ea722084c7cb7d4a01e93

data/CHANGELOG.md CHANGED Viewed

@@ -1,3 +1,13 @@
+## [[0.9.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.8.0...v0.9.0)] - 2023-10-28
+- Fix missing object file for ggml-backend when building with metal and cublas options.
+**Breaking Changes**
+- Bump bundled llama.cpp from b1405 to b1429
+  - Move following methods from Context to Model:
+    - text, score, type, token_bos, token_eos, token_nl, token_prefix, token_middle, token_suffix, and token_eos.
+  - Add `sample_repetition_penalties` method, which integrates sample_frequency_and_presence_penalties and sample_repetition_penalty methods.
 ## [[0.8.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.7.1...v0.8.0)] - 2023-10-21
 **Breaking Changes**

data/examples/chat.rb CHANGED Viewed

@@ -83,10 +83,12 @@ class Chat < Thor # rubocop:disable Metrics/ClassLength, Style/Documentation
         candidates = LLaMACpp::TokenDataArray.new(base_candidates)
         last_n_repeat = [last_n_tokens.size, options[:repeat_last_n], n_ctx].min
-        context.sample_repetition_penalty(candidates, last_n_tokens[-last_n_repeat..], penalty: options[:repeat_penalty])
-        context.sample_frequency_and_presence_penalties(
-          candidates, last_n_tokens[-last_n_repeat..],
-          frequency: options[:frequency_penalty], presence: options[:presence_penalty]
+        context.sample_repetition_penalties(
+          candidates,
+          last_n_tokens[-last_n_repeat..],
+          penalty_repeat: options[:repeat_penalty],
+          penalty_freq: options[:frequency_penalty],
+          penalty_present: options[:presence_penalty]
         )
         context.sample_top_k(candidates, k: options[:top_k])
@@ -99,8 +101,8 @@ class Chat < Thor # rubocop:disable Metrics/ClassLength, Style/Documentation
         last_n_tokens.shift
         last_n_tokens.push(id)
-        if id == context.token_eos
-          id = context.token_nl
+        if id == context.model.token_eos
+          id = context.model.token_nl
           unless antiprompt.empty?
             first_antiprompt = context.model.tokenize(text: antiprompt, add_bos: false)
             embd_input.concat(first_antiprompt)

data/ext/llama_cpp/extconf.rb CHANGED Viewed

@@ -53,7 +53,7 @@ if with_config('metal')
   $CFLAGS << ' -DGGML_USE_METAL'
   $CXXFLAGS << ' -DGGML_USE_METAL'
   $LDFLAGS << ' -framework Foundation -framework Metal -framework MetalKit'
-  $objs = %w[ggml.o ggml-alloc.o ggml-metal.o llama.o llama_cpp.o]
+  $objs = %w[ggml.o ggml-backend.o ggml-alloc.o ggml-metal.o llama.o llama_cpp.o]
   $objs << 'k_quants.o' unless with_config('no_k_quants')
 end
@@ -61,7 +61,7 @@ if with_config('cublas')
   $CFLAGS << ' -DGGML_USE_CUBLAS -I/usr/local/cuda/include'
   $CXXFLAGS << ' -DGGML_USE_CUBLAS -I/usr/local/cuda/include'
   $LDFLAGS << ' -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64'
-  $objs = %w[ggml.o ggml-alloc.o ggml-cuda.o llama.o llama_cpp.o]
+  $objs = %w[ggml.o ggml-backend.o ggml-alloc.o ggml-cuda.o llama.o llama_cpp.o]
   $objs << 'k_quants.o' unless with_config('no_k_quants')
 end

data/ext/llama_cpp/llama_cpp.cpp CHANGED Viewed

@@ -1148,6 +1148,16 @@ public:
     rb_define_method(rb_cLLaMAModel, "desc", RUBY_METHOD_FUNC(_llama_model_get_model_desc), 0);
     rb_define_method(rb_cLLaMAModel, "size", RUBY_METHOD_FUNC(_llama_model_get_model_size), 0);
     rb_define_method(rb_cLLaMAModel, "n_params", RUBY_METHOD_FUNC(_llama_model_get_model_n_params), 0);
+    rb_define_method(rb_cLLaMAModel, "text", RUBY_METHOD_FUNC(_llama_model_get_text), 1);
+    rb_define_method(rb_cLLaMAModel, "score", RUBY_METHOD_FUNC(_llama_model_get_score), 1);
+    rb_define_method(rb_cLLaMAModel, "type", RUBY_METHOD_FUNC(_llama_model_get_type), 1);
+    rb_define_method(rb_cLLaMAModel, "token_bos", RUBY_METHOD_FUNC(_llama_model_token_bos), 0);
+    rb_define_method(rb_cLLaMAModel, "token_eos", RUBY_METHOD_FUNC(_llama_model_token_eos), 0);
+    rb_define_method(rb_cLLaMAModel, "token_nl", RUBY_METHOD_FUNC(_llama_model_token_nl), 0);
+    rb_define_method(rb_cLLaMAModel, "token_prefix", RUBY_METHOD_FUNC(_llama_model_token_prefix), 0);
+    rb_define_method(rb_cLLaMAModel, "token_middle", RUBY_METHOD_FUNC(_llama_model_token_middle), 0);
+    rb_define_method(rb_cLLaMAModel, "token_suffix", RUBY_METHOD_FUNC(_llama_model_token_suffix), 0);
+    rb_define_method(rb_cLLaMAModel, "token_eot", RUBY_METHOD_FUNC(_llama_model_token_eot), 0);
   }
 private:
@@ -1396,6 +1406,62 @@ private:
     LLaMAModelWrapper* ptr = get_llama_model(self);
     return UINT2NUM(llama_model_n_params(ptr->model));
   }
+  static VALUE _llama_model_get_text(VALUE self, VALUE token_) {
+    LLaMAModelWrapper* ptr = get_llama_model(self);
+    const llama_token token = NUM2INT(token_);
+    const char* text = llama_token_get_text(ptr->model, token);
+    return rb_utf8_str_new_cstr(text);
+  }
+  static VALUE _llama_model_get_score(VALUE self, VALUE token_) {
+    LLaMAModelWrapper* ptr = get_llama_model(self);
+    const llama_token token = NUM2INT(token_);
+    const float score = llama_token_get_score(ptr->model, token);
+    return DBL2NUM(score);
+  }
+  static VALUE _llama_model_get_type(VALUE self, VALUE token_) {
+    LLaMAModelWrapper* ptr = get_llama_model(self);
+    const llama_token token = NUM2INT(token_);
+    const int type = llama_token_get_type(ptr->model, token);
+    return INT2NUM(type);
+  }
+  static VALUE _llama_model_token_bos(VALUE self) {
+    LLaMAModelWrapper* ptr = get_llama_model(self);
+    return INT2NUM(llama_token_bos(ptr->model));
+  }
+  static VALUE _llama_model_token_eos(VALUE self) {
+    LLaMAModelWrapper* ptr = get_llama_model(self);
+    return INT2NUM(llama_token_eos(ptr->model));
+  }
+  static VALUE _llama_model_token_nl(VALUE self) {
+    LLaMAModelWrapper* ptr = get_llama_model(self);
+    return INT2NUM(llama_token_nl(ptr->model));
+  }
+  static VALUE _llama_model_token_prefix(VALUE self) {
+    LLaMAModelWrapper* ptr = get_llama_model(self);
+    return INT2NUM(llama_token_prefix(ptr->model));
+  }
+  static VALUE _llama_model_token_middle(VALUE self) {
+    LLaMAModelWrapper* ptr = get_llama_model(self);
+    return INT2NUM(llama_token_middle(ptr->model));
+  }
+  static VALUE _llama_model_token_suffix(VALUE self) {
+    LLaMAModelWrapper* ptr = get_llama_model(self);
+    return INT2NUM(llama_token_suffix(ptr->model));
+  }
+  static VALUE _llama_model_token_eot(VALUE self) {
+    LLaMAModelWrapper* ptr = get_llama_model(self);
+    return INT2NUM(llama_token_eot(ptr->model));
+  }
 };
 const rb_data_type_t RbLLaMAModel::llama_model_type = {
@@ -1670,16 +1736,6 @@ public:
     rb_define_method(rb_cLLaMAContext, "decode", RUBY_METHOD_FUNC(_llama_context_decode), 1);
     rb_define_method(rb_cLLaMAContext, "logits", RUBY_METHOD_FUNC(_llama_context_logits), 0);
     rb_define_method(rb_cLLaMAContext, "embeddings", RUBY_METHOD_FUNC(_llama_context_embeddings), 0);
-    rb_define_method(rb_cLLaMAContext, "text", RUBY_METHOD_FUNC(_llama_context_text), 1);
-    rb_define_method(rb_cLLaMAContext, "score", RUBY_METHOD_FUNC(_llama_context_score), 1);
-    rb_define_method(rb_cLLaMAContext, "type", RUBY_METHOD_FUNC(_llama_context_type), 1);
-    rb_define_method(rb_cLLaMAContext, "token_bos", RUBY_METHOD_FUNC(_llama_context_token_bos), 0);
-    rb_define_method(rb_cLLaMAContext, "token_eos", RUBY_METHOD_FUNC(_llama_context_token_eos), 0);
-    rb_define_method(rb_cLLaMAContext, "token_nl", RUBY_METHOD_FUNC(_llama_context_token_nl), 0);
-    rb_define_method(rb_cLLaMAContext, "token_prefix", RUBY_METHOD_FUNC(_llama_context_token_prefix), 0);
-    rb_define_method(rb_cLLaMAContext, "token_middle", RUBY_METHOD_FUNC(_llama_context_token_middle), 0);
-    rb_define_method(rb_cLLaMAContext, "token_suffix", RUBY_METHOD_FUNC(_llama_context_token_suffix), 0);
-    rb_define_method(rb_cLLaMAContext, "token_eot", RUBY_METHOD_FUNC(_llama_context_token_eot), 0);
     rb_define_method(rb_cLLaMAContext, "n_ctx", RUBY_METHOD_FUNC(_llama_context_n_ctx), 0);
     rb_define_method(rb_cLLaMAContext, "timings", RUBY_METHOD_FUNC(_llama_context_get_timings), 0);
     rb_define_method(rb_cLLaMAContext, "print_timings", RUBY_METHOD_FUNC(_llama_context_print_timings), 0);
@@ -1693,8 +1749,7 @@ public:
     rb_define_method(rb_cLLaMAContext, "set_rng_seed", RUBY_METHOD_FUNC(_llama_context_set_rng_seed), 1);
     rb_define_method(rb_cLLaMAContext, "load_session_file", RUBY_METHOD_FUNC(_llama_context_load_session_file), -1);
     rb_define_method(rb_cLLaMAContext, "save_session_file", RUBY_METHOD_FUNC(_llama_context_save_session_file), -1);
-    rb_define_method(rb_cLLaMAContext, "sample_repetition_penalty", RUBY_METHOD_FUNC(_llama_context_sample_repetition_penalty), -1);
-    rb_define_method(rb_cLLaMAContext, "sample_frequency_and_presence_penalties", RUBY_METHOD_FUNC(_llama_context_sample_frequency_and_presence_penalties), -1);
+    rb_define_method(rb_cLLaMAContext, "sample_repetition_penalties", RUBY_METHOD_FUNC(_llama_context_sample_repetition_penalties), -1);
     rb_define_method(rb_cLLaMAContext, "sample_classifier_free_guidance", RUBY_METHOD_FUNC(_llama_context_sample_classifier_free_guidance), -1);
     rb_define_method(rb_cLLaMAContext, "sample_softmax", RUBY_METHOD_FUNC(_llama_context_sample_softmax), 1);
     rb_define_method(rb_cLLaMAContext, "sample_top_k", RUBY_METHOD_FUNC(_llama_context_sample_top_k), -1);
@@ -1927,102 +1982,6 @@ private:
     return output;
   }
-  static VALUE _llama_context_text(VALUE self, VALUE token_) {
-    LLaMAContextWrapper* ptr = get_llama_context(self);
-    if (ptr->ctx == NULL) {
-      rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
-      return Qnil;
-    }
-    const llama_token token = NUM2INT(token_);
-    const char* text = llama_token_get_text(ptr->ctx, token);
-    return rb_utf8_str_new_cstr(text);
-  }
-  static VALUE _llama_context_score(VALUE self, VALUE token_) {
-    LLaMAContextWrapper* ptr = get_llama_context(self);
-    if (ptr->ctx == NULL) {
-      rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
-      return Qnil;
-    }
-    const llama_token token = NUM2INT(token_);
-    const float score = llama_token_get_score(ptr->ctx, token);
-    return DBL2NUM(score);
-  }
-  static VALUE _llama_context_type(VALUE self, VALUE token_) {
-    LLaMAContextWrapper* ptr = get_llama_context(self);
-    if (ptr->ctx == NULL) {
-      rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
-      return Qnil;
-    }
-    const llama_token token = NUM2INT(token_);
-    const int type = llama_token_get_type(ptr->ctx, token);
-    return INT2NUM(type);
-  }
-  static VALUE _llama_context_token_bos(VALUE self) {
-    LLaMAContextWrapper* ptr = get_llama_context(self);
-    if (ptr->ctx == NULL) {
-      rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
-      return Qnil;
-    }
-    return INT2NUM(llama_token_bos(ptr->ctx));
-  }
-  static VALUE _llama_context_token_eos(VALUE self) {
-    LLaMAContextWrapper* ptr = get_llama_context(self);
-    if (ptr->ctx == NULL) {
-      rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
-      return Qnil;
-    }
-    return INT2NUM(llama_token_eos(ptr->ctx));
-  }
-  static VALUE _llama_context_token_nl(VALUE self) {
-    LLaMAContextWrapper* ptr = get_llama_context(self);
-    if (ptr->ctx == NULL) {
-      rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
-      return Qnil;
-    }
-    return INT2NUM(llama_token_nl(ptr->ctx));
-  }
-  static VALUE _llama_context_token_prefix(VALUE self) {
-    LLaMAContextWrapper* ptr = get_llama_context(self);
-    if (ptr->ctx == NULL) {
-      rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
-      return Qnil;
-    }
-    return INT2NUM(llama_token_prefix(ptr->ctx));
-  }
-  static VALUE _llama_context_token_middle(VALUE self) {
-    LLaMAContextWrapper* ptr = get_llama_context(self);
-    if (ptr->ctx == NULL) {
-      rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
-      return Qnil;
-    }
-    return INT2NUM(llama_token_middle(ptr->ctx));
-  }
-  static VALUE _llama_context_token_suffix(VALUE self) {
-    LLaMAContextWrapper* ptr = get_llama_context(self);
-    if (ptr->ctx == NULL) {
-      rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
-      return Qnil;
-    }
-    return INT2NUM(llama_token_suffix(ptr->ctx));
-  }
-  static VALUE _llama_context_token_eot(VALUE self) {
-    LLaMAContextWrapper* ptr = get_llama_context(self);
-    if (ptr->ctx == NULL) {
-      rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
-      return Qnil;
-    }
-    return INT2NUM(llama_token_eot(ptr->ctx));
-  }
   static VALUE _llama_context_n_ctx(VALUE self) {
     LLaMAContextWrapper* ptr = get_llama_context(self);
     if (ptr->ctx == NULL) {
@@ -2231,14 +2190,14 @@ private:
     return Qnil;
   }
-  static VALUE _llama_context_sample_repetition_penalty(int argc, VALUE* argv, VALUE self) {
+  static VALUE _llama_context_sample_repetition_penalties(int argc, VALUE* argv, VALUE self) {
     VALUE kw_args = Qnil;
-    ID kw_table[1] = { rb_intern("penalty") };
-    VALUE kw_values[1] = { Qundef };
+    ID kw_table[3] = { rb_intern("penalty_repeat"), rb_intern("penalty_freq"), rb_intern("penalty_present") };
+    VALUE kw_values[3] = { Qundef, Qundef, Qundef };
     VALUE candidates = Qnil;
     VALUE last_n_tokens = Qnil;
     rb_scan_args(argc, argv, "2:", &candidates, &last_n_tokens, &kw_args);
-    rb_get_kwargs(kw_args, kw_table, 1, 0, kw_values);
+    rb_get_kwargs(kw_args, kw_table, 3, 0, kw_values);
     if (!rb_obj_is_kind_of(candidates, rb_cLLaMATokenDataArray)) {
       rb_raise(rb_eArgError, "candidates must be a TokenDataArray");
@@ -2249,56 +2208,15 @@ private:
       return Qnil;
     }
     if (!RB_FLOAT_TYPE_P(kw_values[0])) {
-      rb_raise(rb_eArgError, "penalty must be a float");
+      rb_raise(rb_eArgError, "penalty_repeat must be a float");
       return Qnil;
     }
-    const size_t last_tokens_size = RARRAY_LEN(last_n_tokens);
-    std::vector<llama_token> last_n_tokens_data(last_tokens_size);
-    for (size_t i = 0; i < last_tokens_size; i++) {
-      last_n_tokens_data[i] = NUM2INT(rb_ary_entry(last_n_tokens, i));
-    }
-    LLaMAContextWrapper* ctx_ptr = get_llama_context(self);
-    if (ctx_ptr->ctx == NULL) {
-      rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
-      return Qnil;
-    }
-    LLaMATokenDataArrayWrapper* cnd_ptr = RbLLaMATokenDataArray::get_llama_token_data_array(candidates);
-    if (cnd_ptr->array.data == nullptr) {
-      rb_raise(rb_eRuntimeError, "TokenDataArray is empty");
-      return Qnil;
-    }
-    const float penalty = NUM2DBL(kw_values[0]);
-    llama_sample_repetition_penalty(ctx_ptr->ctx, &(cnd_ptr->array), last_n_tokens_data.data(), last_tokens_size, penalty);
-    return Qnil;
-  }
-  static VALUE _llama_context_sample_frequency_and_presence_penalties(int argc, VALUE* argv, VALUE self) {
-    VALUE kw_args = Qnil;
-    ID kw_table[2] = { rb_intern("frequency"), rb_intern("presence") };
-    VALUE kw_values[2] = { Qundef, Qundef };
-    VALUE candidates = Qnil;
-    VALUE last_n_tokens = Qnil;
-    rb_scan_args(argc, argv, "2:", &candidates, &last_n_tokens, &kw_args);
-    rb_get_kwargs(kw_args, kw_table, 2, 0, kw_values);
-    if (!rb_obj_is_kind_of(candidates, rb_cLLaMATokenDataArray)) {
-      rb_raise(rb_eArgError, "candidates must be a TokenDataArray");
-      return Qnil;
-    }
-    if (!RB_TYPE_P(last_n_tokens, T_ARRAY)) {
-      rb_raise(rb_eArgError, "last_n_tokens must be an Array");
-      return Qnil;
-    }
-    if (!RB_FLOAT_TYPE_P(kw_values[0])) {
-      rb_raise(rb_eArgError, "frequency must be a float");
+    if (!RB_FLOAT_TYPE_P(kw_values[1])) {
+      rb_raise(rb_eArgError, "penalty_freq must be a float");
       return Qnil;
     }
-    if (!RB_FLOAT_TYPE_P(kw_values[1])) {
-      rb_raise(rb_eArgError, "presence must be a float");
+    if (!RB_FLOAT_TYPE_P(kw_values[2])) {
+      rb_raise(rb_eArgError, "penalty_present must be a float");
       return Qnil;
     }
@@ -2318,11 +2236,12 @@ private:
       rb_raise(rb_eRuntimeError, "TokenDataArray is empty");
       return Qnil;
     }
+    const float penalty_repeat = NUM2DBL(kw_values[0]);
+    const float penalty_freq = NUM2DBL(kw_values[1]);
+    const float penalty_present = NUM2DBL(kw_values[2]);
-    const float alpha_frequency = NUM2DBL(kw_values[0]);
-    const float alpha_presence = NUM2DBL(kw_values[1]);
-    llama_sample_frequency_and_presence_penalties(ctx_ptr->ctx, &(cnd_ptr->array), last_n_tokens_data.data(), last_tokens_size, alpha_frequency, alpha_presence);
+    llama_sample_repetition_penalties(ctx_ptr->ctx, &(cnd_ptr->array), last_n_tokens_data.data(), last_tokens_size,
+        penalty_repeat, penalty_freq, penalty_present);
     return Qnil;
   }