RubyGems - llama_cpp - Versions diffs - 0.0.2 → 0.0.4 - Mend

llama_cpp 0.0.2 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +27 -2
data/README.md +3 -2
data/ext/llama_cpp/extconf.rb +26 -0
data/ext/llama_cpp/llama_cpp.cpp +97 -3
data/ext/llama_cpp/src/ggml.c +1254 -670
data/ext/llama_cpp/src/ggml.h +110 -42
data/ext/llama_cpp/src/llama.cpp +878 -757
data/ext/llama_cpp/src/llama.h +42 -1
data/ext/llama_cpp/src/llama_util.h +389 -0
data/lib/llama_cpp/version.rb +2 -2
data/lib/llama_cpp.rb +4 -1
data/sig/llama_cpp.rbs +55 -0
metadata +4 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: e4b9a70ca3137fb187c1455291828001086e373db7d9189f7f8d45f0d252b0dc
-  data.tar.gz: 22d67fa3d1c71d73569735876aebe953038bb0465a67b07ea991dc8568d11bac
+  metadata.gz: 0e659b4cc27e9ab45b524ec3d341892b72cebdf84ccad823f24ff7e472f2ffa8
+  data.tar.gz: 790f4d2f6dc9ddf211701f6014ae91ca19e0492efd7c64eb881e66729f929544
 SHA512:
-  metadata.gz: 3767e7950004aba7980a27dbffaec2c360a18295e845b58ab647eff4b9f90515e47c646e48e5d75cac261908415602df50908e429fca0637664e93b2efd7dc1a
-  data.tar.gz: b08e00960ab036fe7ac7778dd33a5a72795153cd7c8beea642b5422da41575a19ea41e1b865e25d16f36afe2879ff4b5b3f303d49598c30888a95ecf459501da
+  metadata.gz: 54eb4dd6c4ca9affc7b6f2da1c9dc719f60c8dc3841645cf47b8f0310ff31ad16a5bc841051663f03e962bc7424f56f1d7a1c5c5eabd03e3f5e7b706467bb0ba
+  data.tar.gz: b2a16c6be3d7f117fabea5cc837b9306b0768d9ad99104a6fa2752932d1e1a034312983a87ebfe9e3ccb1bf83257d5ce40520e049a40291c64fb2fab8663882a

data/CHANGELOG.md CHANGED Viewed

@@ -1,8 +1,33 @@
 ## [Unreleased]
-## [0.0.2] - 2023-04-02
+## [[0.0.4](https://github.com/yoshoku/llama_cpp.rb/compare/v0.0.3...v0.0.4)] - 2023-04-15
-- Bump bundled llama.cpp from master-2a98bc1 to master-5b70e7d
+- Bump bundled llama.cpp from master-698f7b5 to master-c85e03d.
+- Add parameterless constructor to LLaMACpp::Context.
+- Add free and load methods to LLaMACpp::Context.
+  ```ruby
+  require 'llama_cpp'
+  context = LLaMACpp::Context.new
+  params = LLaMACpp::ContextParams.new
+  context.load(model_path: '/path/to/ggml-model-q4_0.bin', params: params)
+  # ...
+  context.free
+  ```
+## [[0.0.3](https://github.com/yoshoku/llama_cpp.rb/compare/v0.0.2...v0.0.3)] - 2023-04-08
+- Bump bundled llama.cpp from master-5b70e7d to master-698f7b5.
+- Add logits method to LLaMACpp::Context.
+- Add type signatures.
+- Add class alias Params for LLaMACpp::ContextParams.
+## [[0.0.2](https://github.com/yoshoku/llama_cpp.rb/compare/v0.0.1...v0.0.2)] - 2023-04-02
+- Bump bundled llama.cpp from master-2a98bc1 to master-5b70e7d.
 - Add n_threads arguments to generate method.
 ## [0.0.1] - 2023-04-02

data/README.md CHANGED Viewed

@@ -26,11 +26,12 @@ Prepare a quantized model file by refering to [the usage section on the llama.cp
 require 'llama_cpp'
 params = LLaMACpp::ContextParams.new
-params.seed = 123456
+params.seed = 12
 context = LLaMACpp::Context.new(model_path: '/path/to/ggml-model-q4_0.bin', params: params)
-puts LLaMACpp.generate(context, 'Please tell me the largest city in Japan.')
+puts LLaMACpp.generate(context, 'Please tell me the largest city in Japan.', n_threads: 4)
+# => "There are two major cities in Japan, Tokyo and Osaka, which have about 30 million populations."
 ```
 ## Contributing

data/ext/llama_cpp/extconf.rb CHANGED Viewed

@@ -10,4 +10,30 @@ $CXXFLAGS << ' -std=c++11'
 $INCFLAGS << ' -I$(srcdir)/src'
 $VPATH << '$(srcdir)/src'
+if RUBY_PLATFORM.match?(/darwin|linux|bsd/) && try_compile('#include <stdio.h>', '-pthread')
+  $CFLAGS << ' -pthread'
+  $CXXFLAGS << ' -pthread'
+end
+UNAME_M = RbConfig::CONFIG['build_cpu'] || RbConfig::CONFIG['host_cpu'] || RbConfig::CONFIG['target_cpu']
+# rubocop:disable Layout/LineLength
+if UNAME_M.match?(/x86_64|i686/) && try_compile('#include <stdio.h>', '-march=native -mtune=native')
+  $CFLAGS << ' -march=native -mtune=native'
+  $CXXFLAGS << ' -march=native -mtune=native'
+elsif UNAME_M.match?(/aarch64/) && try_compile('#include <stdio.h>', '-mcpu=native')
+  $CFLAGS << ' -mcpu=native'
+  $CXXFLAGS << ' -mcpu=native'
+elsif UNAME_M.match?(/armv6/) && try_compile('#include <stdio.h>', '-mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access')
+  $CFLAGS << ' -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access'
+  $CXXFLAGS << ' -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access'
+elsif UNAME_M.match?(/armv7/) && try_compile('#include <stdio.h>', '-mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations')
+  $CFLAGS << ' -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations'
+  $CXXFLAGS << ' -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations'
+elsif UNAME_M.match?(/armv8/) && try_compile('#include <stdio.h>', '-mfp16-format=ieee -mno-unaligned-access')
+  $CFLAGS << ' -mfp16-format=ieee -mno-unaligned-access'
+  $CXXFLAGS << ' -mfp16-format=ieee -mno-unaligned-access'
+end
+# rubocop:enable Layout/LineLength
 create_makefile('llama_cpp/llama_cpp')

data/ext/llama_cpp/llama_cpp.cpp CHANGED Viewed

@@ -217,7 +217,7 @@ public:
     rb_define_method(rb_cLLaMAContext, "initialize", RUBY_METHOD_FUNC(_llama_context_initialize), -1);
     rb_define_method(rb_cLLaMAContext, "eval", RUBY_METHOD_FUNC(_llama_context_eval), -1);
     rb_define_method(rb_cLLaMAContext, "tokenize", RUBY_METHOD_FUNC(_llama_context_tokenize), -1);
-    // rb_define_method(rb_cLLaMAContext, "logits", RUBY_METHOD_FUNC(_llama_context_logits), 0);
+    rb_define_method(rb_cLLaMAContext, "logits", RUBY_METHOD_FUNC(_llama_context_logits), 0);
     rb_define_method(rb_cLLaMAContext, "embeddings", RUBY_METHOD_FUNC(_llama_context_embeddings), 0);
     rb_define_method(rb_cLLaMAContext, "token_to_str", RUBY_METHOD_FUNC(_llama_context_token_to_str), 1);
     rb_define_method(rb_cLLaMAContext, "sample_top_p_top_k", RUBY_METHOD_FUNC(_llama_context_sample_top_p_top_k), -1);
@@ -226,6 +226,8 @@ public:
     rb_define_method(rb_cLLaMAContext, "n_embd", RUBY_METHOD_FUNC(_llama_context_n_embd), 0);
     rb_define_method(rb_cLLaMAContext, "print_timings", RUBY_METHOD_FUNC(_llama_context_print_timings), 0);
     rb_define_method(rb_cLLaMAContext, "reset_timings", RUBY_METHOD_FUNC(_llama_context_reset_timings), 0);
+    rb_define_method(rb_cLLaMAContext, "free", RUBY_METHOD_FUNC(_llama_context_free), 0);
+    rb_define_method(rb_cLLaMAContext, "load", RUBY_METHOD_FUNC(_llama_context_load), -1);
   };
 private:
@@ -236,7 +238,13 @@ private:
     ID kw_table[2] = { rb_intern("model_path"), rb_intern("params") };
     VALUE kw_values[2] = { Qundef, Qundef };
     rb_scan_args(argc, argv, ":", &kw_args);
-    rb_get_kwargs(kw_args, kw_table, 2, 0, kw_values);
+    rb_get_kwargs(kw_args, kw_table, 0, 2, kw_values);
+    if (kw_values[0] == Qundef && kw_values[1] == Qundef) {
+      rb_iv_set(self, "@params", Qnil);
+      rb_iv_set(self, "@has_evaluated", Qfalse);
+      return Qnil;
+    }
     if (!RB_TYPE_P(kw_values[0], T_STRING)) {
       rb_raise(rb_eArgError, "model_path must be a string");
@@ -256,8 +264,11 @@ private:
       return Qnil;
     }
+    rb_iv_set(self, "@params", kw_values[1]);
+    rb_iv_set(self, "@has_evaluated", Qfalse);
     RB_GC_GUARD(filename);
-    return self;
+    return Qnil;
   };
   static VALUE _llama_context_eval(int argc, VALUE* argv, VALUE self) {
@@ -305,6 +316,9 @@ private:
       return Qnil;
     }
+    rb_iv_set(self, "@n_tokens", INT2NUM(n_tokens));
+    rb_iv_set(self, "@has_evaluated", Qtrue);
     return Qnil;
   };
@@ -361,12 +375,44 @@ private:
     return str != nullptr ? rb_utf8_str_new_cstr(str) : rb_utf8_str_new_cstr("");
   };
+  static VALUE _llama_context_logits(VALUE self) {
+    LLaMAContextWrapper* ptr = get_llama_context(self);
+    if (ptr->ctx == NULL) {
+      rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
+      return Qnil;
+    }
+    if (rb_iv_get(self, "@has_evaluated") != Qtrue) {
+      rb_raise(rb_eRuntimeError, "LLaMA context has not been evaluated");
+      return Qnil;
+    }
+    LLaMAContextParamsWrapper* prms_ptr = RbLLaMAContextParams::get_llama_context_params(rb_iv_get(self, "@params"));
+    const int n_tokens = prms_ptr->params.logits_all ? NUM2INT(rb_iv_get(self, "@n_tokens")) : 1;
+    const int n_vocab = llama_n_vocab(ptr->ctx);
+    const float* logits = llama_get_logits(ptr->ctx);
+    VALUE output = rb_ary_new();
+    for (int i = 0; i < n_tokens * n_vocab; i++) {
+      rb_ary_push(output, DBL2NUM((double)(logits[i])));
+    }
+    return output;
+  };
   static VALUE _llama_context_embeddings(VALUE self) {
     LLaMAContextWrapper* ptr = get_llama_context(self);
     if (ptr->ctx == NULL) {
       rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
       return Qnil;
     }
+    LLaMAContextParamsWrapper* prms_ptr = RbLLaMAContextParams::get_llama_context_params(rb_iv_get(self, "@params"));
+    if (!prms_ptr->params.embedding) {
+      rb_raise(rb_eRuntimeError, "embedding parameter is false");
+      return Qnil;
+    }
+    if (rb_iv_get(self, "@has_evaluated") != Qtrue) {
+      rb_raise(rb_eRuntimeError, "LLaMA context has not been evaluated");
+      return Qnil;
+    }
     const int n_embd = llama_n_embd(ptr->ctx);
     const float* embd = llama_get_embeddings(ptr->ctx);
@@ -454,6 +500,54 @@ private:
     llama_reset_timings(ptr->ctx);
     return Qnil;
   };
+  static VALUE _llama_context_free(VALUE self) {
+    LLaMAContextWrapper* ptr = get_llama_context(self);
+    if (ptr->ctx != NULL) {
+      llama_free(ptr->ctx);
+      ptr->ctx = NULL;
+      rb_iv_set(self, "@params", Qnil);
+      rb_iv_set(self, "@has_evaluated", Qfalse);
+    }
+    return Qnil;
+  }
+  static VALUE _llama_context_load(int argc, VALUE* argv, VALUE self) {
+    VALUE kw_args = Qnil;
+    ID kw_table[2] = { rb_intern("model_path"), rb_intern("params") };
+    VALUE kw_values[2] = { Qundef, Qundef };
+    rb_scan_args(argc, argv, ":", &kw_args);
+    rb_get_kwargs(kw_args, kw_table, 2, 0, kw_values);
+    if (!RB_TYPE_P(kw_values[0], T_STRING)) {
+      rb_raise(rb_eArgError, "model_path must be a string");
+      return Qnil;
+    }
+    if (!rb_obj_is_kind_of(kw_values[1], rb_cLLaMAContextParams)) {
+      rb_raise(rb_eArgError, "params must be a LLaMAContextParams");
+      return Qnil;
+    }
+    LLaMAContextWrapper* ctx_ptr = get_llama_context(self);
+    if (ctx_ptr->ctx != NULL) {
+      rb_raise(rb_eRuntimeError, "LLaMA context is already loaded");
+      return Qnil;
+    }
+    VALUE filename = kw_values[0];
+    LLaMAContextParamsWrapper* prms_ptr = RbLLaMAContextParams::get_llama_context_params(kw_values[1]);
+    ctx_ptr->ctx = llama_init_from_file(StringValueCStr(filename), prms_ptr->params);
+    if (ctx_ptr->ctx == NULL) {
+      rb_raise(rb_eRuntimeError, "Failed to initialize LLaMA context");
+      return Qnil;
+    }
+    rb_iv_set(self, "@params", kw_values[1]);
+    rb_iv_set(self, "@has_evaluated", Qfalse);
+    RB_GC_GUARD(filename);
+    return Qnil;
+  };
 };
 const rb_data_type_t RbLLaMAContext::llama_context_type = {