RubyGems - llama_cpp - Versions diffs - 0.0.3 → 0.0.4 - Mend

llama_cpp 0.0.3 → 0.0.4

Files changed (13) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +18 -0
data/README.md +2 -2
data/ext/llama_cpp/extconf.rb +26 -0
data/ext/llama_cpp/llama_cpp.cpp +58 -2
data/ext/llama_cpp/src/ggml.c +735 -253
data/ext/llama_cpp/src/ggml.h +74 -16
data/ext/llama_cpp/src/llama.cpp +800 -718
data/ext/llama_cpp/src/llama.h +25 -1
data/ext/llama_cpp/src/llama_util.h +389 -0
data/lib/llama_cpp/version.rb +2 -2
data/sig/llama_cpp.rbs +3 -0
metadata +3 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 3ce894c9b013134688dffb18229c6f18073cdc8aceafa7d8a519803ae8ffc8a4
-  data.tar.gz: b9a09f3b7217c120d0eae5e89ecf15a4ccbedcdef92db7d5c4508d03ecd65d3c
+  metadata.gz: 0e659b4cc27e9ab45b524ec3d341892b72cebdf84ccad823f24ff7e472f2ffa8
+  data.tar.gz: 790f4d2f6dc9ddf211701f6014ae91ca19e0492efd7c64eb881e66729f929544
 SHA512:
-  metadata.gz: a979c8a488ec410f214873664288f618af9363d60b6ef6b3ef44de9bd7486bd223b8b38704eab09c1cec1f210c55e5d08ba03af8d6ddc87c10d8836da983c1de
-  data.tar.gz: 47228be684c3ce577b066b2255482c42f6979c4cce5852c22e85a9f0b66bdcaea58d667c56f2eefef6cfc121822a2761406cd2911abccd754c07e8568bb8550e
+  metadata.gz: 54eb4dd6c4ca9affc7b6f2da1c9dc719f60c8dc3841645cf47b8f0310ff31ad16a5bc841051663f03e962bc7424f56f1d7a1c5c5eabd03e3f5e7b706467bb0ba
+  data.tar.gz: b2a16c6be3d7f117fabea5cc837b9306b0768d9ad99104a6fa2752932d1e1a034312983a87ebfe9e3ccb1bf83257d5ce40520e049a40291c64fb2fab8663882a

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,23 @@
 ## [Unreleased]
+## [[0.0.4](https://github.com/yoshoku/llama_cpp.rb/compare/v0.0.3...v0.0.4)] - 2023-04-15
+- Bump bundled llama.cpp from master-698f7b5 to master-c85e03d.
+- Add parameterless constructor to LLaMACpp::Context.
+- Add free and load methods to LLaMACpp::Context.
+  ```ruby
+  require 'llama_cpp'
+  context = LLaMACpp::Context.new
+  params = LLaMACpp::ContextParams.new
+  context.load(model_path: '/path/to/ggml-model-q4_0.bin', params: params)
+  # ...
+  context.free
+  ```
 ## [[0.0.3](https://github.com/yoshoku/llama_cpp.rb/compare/v0.0.2...v0.0.3)] - 2023-04-08
 - Bump bundled llama.cpp from master-5b70e7d to master-698f7b5.

data/README.md CHANGED Viewed

@@ -26,11 +26,11 @@ Prepare a quantized model file by refering to [the usage section on the llama.cp
 require 'llama_cpp'
 params = LLaMACpp::ContextParams.new
-params.seed = 123456
+params.seed = 12
 context = LLaMACpp::Context.new(model_path: '/path/to/ggml-model-q4_0.bin', params: params)
-puts LLaMACpp.generate(context, 'Please tell me the largest city in Japan.')
+puts LLaMACpp.generate(context, 'Please tell me the largest city in Japan.', n_threads: 4)
 # => "There are two major cities in Japan, Tokyo and Osaka, which have about 30 million populations."
 ```

data/ext/llama_cpp/extconf.rb CHANGED Viewed

@@ -10,4 +10,30 @@ $CXXFLAGS << ' -std=c++11'
 $INCFLAGS << ' -I$(srcdir)/src'
 $VPATH << '$(srcdir)/src'
+if RUBY_PLATFORM.match?(/darwin|linux|bsd/) && try_compile('#include <stdio.h>', '-pthread')
+  $CFLAGS << ' -pthread'
+  $CXXFLAGS << ' -pthread'
+end
+UNAME_M = RbConfig::CONFIG['build_cpu'] || RbConfig::CONFIG['host_cpu'] || RbConfig::CONFIG['target_cpu']
+# rubocop:disable Layout/LineLength
+if UNAME_M.match?(/x86_64|i686/) && try_compile('#include <stdio.h>', '-march=native -mtune=native')
+  $CFLAGS << ' -march=native -mtune=native'
+  $CXXFLAGS << ' -march=native -mtune=native'
+elsif UNAME_M.match?(/aarch64/) && try_compile('#include <stdio.h>', '-mcpu=native')
+  $CFLAGS << ' -mcpu=native'
+  $CXXFLAGS << ' -mcpu=native'
+elsif UNAME_M.match?(/armv6/) && try_compile('#include <stdio.h>', '-mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access')
+  $CFLAGS << ' -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access'
+  $CXXFLAGS << ' -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access'
+elsif UNAME_M.match?(/armv7/) && try_compile('#include <stdio.h>', '-mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations')
+  $CFLAGS << ' -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations'
+  $CXXFLAGS << ' -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations'
+elsif UNAME_M.match?(/armv8/) && try_compile('#include <stdio.h>', '-mfp16-format=ieee -mno-unaligned-access')
+  $CFLAGS << ' -mfp16-format=ieee -mno-unaligned-access'
+  $CXXFLAGS << ' -mfp16-format=ieee -mno-unaligned-access'
+end
+# rubocop:enable Layout/LineLength
 create_makefile('llama_cpp/llama_cpp')

data/ext/llama_cpp/llama_cpp.cpp CHANGED Viewed

@@ -226,6 +226,8 @@ public:
     rb_define_method(rb_cLLaMAContext, "n_embd", RUBY_METHOD_FUNC(_llama_context_n_embd), 0);
     rb_define_method(rb_cLLaMAContext, "print_timings", RUBY_METHOD_FUNC(_llama_context_print_timings), 0);
     rb_define_method(rb_cLLaMAContext, "reset_timings", RUBY_METHOD_FUNC(_llama_context_reset_timings), 0);
+    rb_define_method(rb_cLLaMAContext, "free", RUBY_METHOD_FUNC(_llama_context_free), 0);
+    rb_define_method(rb_cLLaMAContext, "load", RUBY_METHOD_FUNC(_llama_context_load), -1);
   };
 private:
@@ -236,7 +238,13 @@ private:
     ID kw_table[2] = { rb_intern("model_path"), rb_intern("params") };
     VALUE kw_values[2] = { Qundef, Qundef };
     rb_scan_args(argc, argv, ":", &kw_args);
-    rb_get_kwargs(kw_args, kw_table, 2, 0, kw_values);
+    rb_get_kwargs(kw_args, kw_table, 0, 2, kw_values);
+    if (kw_values[0] == Qundef && kw_values[1] == Qundef) {
+      rb_iv_set(self, "@params", Qnil);
+      rb_iv_set(self, "@has_evaluated", Qfalse);
+      return Qnil;
+    }
     if (!RB_TYPE_P(kw_values[0], T_STRING)) {
       rb_raise(rb_eArgError, "model_path must be a string");
@@ -260,7 +268,7 @@ private:
     rb_iv_set(self, "@has_evaluated", Qfalse);
     RB_GC_GUARD(filename);
-    return self;
+    return Qnil;
   };
   static VALUE _llama_context_eval(int argc, VALUE* argv, VALUE self) {
@@ -492,6 +500,54 @@ private:
     llama_reset_timings(ptr->ctx);
     return Qnil;
   };
+  static VALUE _llama_context_free(VALUE self) {
+    LLaMAContextWrapper* ptr = get_llama_context(self);
+    if (ptr->ctx != NULL) {
+      llama_free(ptr->ctx);
+      ptr->ctx = NULL;
+      rb_iv_set(self, "@params", Qnil);
+      rb_iv_set(self, "@has_evaluated", Qfalse);
+    }
+    return Qnil;
+  }
+  static VALUE _llama_context_load(int argc, VALUE* argv, VALUE self) {
+    VALUE kw_args = Qnil;
+    ID kw_table[2] = { rb_intern("model_path"), rb_intern("params") };
+    VALUE kw_values[2] = { Qundef, Qundef };
+    rb_scan_args(argc, argv, ":", &kw_args);
+    rb_get_kwargs(kw_args, kw_table, 2, 0, kw_values);
+    if (!RB_TYPE_P(kw_values[0], T_STRING)) {
+      rb_raise(rb_eArgError, "model_path must be a string");
+      return Qnil;
+    }
+    if (!rb_obj_is_kind_of(kw_values[1], rb_cLLaMAContextParams)) {
+      rb_raise(rb_eArgError, "params must be a LLaMAContextParams");
+      return Qnil;
+    }
+    LLaMAContextWrapper* ctx_ptr = get_llama_context(self);
+    if (ctx_ptr->ctx != NULL) {
+      rb_raise(rb_eRuntimeError, "LLaMA context is already loaded");
+      return Qnil;
+    }
+    VALUE filename = kw_values[0];
+    LLaMAContextParamsWrapper* prms_ptr = RbLLaMAContextParams::get_llama_context_params(kw_values[1]);
+    ctx_ptr->ctx = llama_init_from_file(StringValueCStr(filename), prms_ptr->params);
+    if (ctx_ptr->ctx == NULL) {
+      rb_raise(rb_eRuntimeError, "Failed to initialize LLaMA context");
+      return Qnil;
+    }
+    rb_iv_set(self, "@params", kw_values[1]);
+    rb_iv_set(self, "@has_evaluated", Qfalse);
+    RB_GC_GUARD(filename);
+    return Qnil;
+  };
 };
 const rb_data_type_t RbLLaMAContext::llama_context_type = {