RubyGems - llama_cpp - Versions diffs - 0.0.1 → 0.0.3 - Mend

llama_cpp 0.0.1 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +12 -0
data/README.md +3 -0
data/ext/llama_cpp/llama_cpp.cpp +39 -1
data/ext/llama_cpp/src/ggml.c +914 -509
data/ext/llama_cpp/src/ggml.h +42 -27
data/ext/llama_cpp/src/llama.cpp +293 -303
data/ext/llama_cpp/src/llama.h +19 -2
data/lib/llama_cpp/version.rb +2 -2
data/lib/llama_cpp.rb +6 -2
data/sig/llama_cpp.rbs +52 -0
metadata +3 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: a6bf717ec1012d78b7d83f3f7a7546f589fbf368c1b2babc69a99fd28a5d9ff3
-  data.tar.gz: 6ab2e2ae4b6410f32890a86b7ac2dbb93ab9e2f43888158b7cbfd9b16f435447
+  metadata.gz: 3ce894c9b013134688dffb18229c6f18073cdc8aceafa7d8a519803ae8ffc8a4
+  data.tar.gz: b9a09f3b7217c120d0eae5e89ecf15a4ccbedcdef92db7d5c4508d03ecd65d3c
 SHA512:
-  metadata.gz: cd1ae63e518a422dbe3a281a598b18b9397fdf880867f92bad20e56b5a60756a1a929a62879f7aed0c7c24012b87b85353e175c773aeed4f8d87294ba0422cb1
-  data.tar.gz: 2828321d0589ac16713745b2770844d5c6fed848ff0efed90304370152650a8e0619657a91184f74c402eb9351800ac3517c20f775faf52db91331d95ac1c87d
+  metadata.gz: a979c8a488ec410f214873664288f618af9363d60b6ef6b3ef44de9bd7486bd223b8b38704eab09c1cec1f210c55e5d08ba03af8d6ddc87c10d8836da983c1de
+  data.tar.gz: 47228be684c3ce577b066b2255482c42f6979c4cce5852c22e85a9f0b66bdcaea58d667c56f2eefef6cfc121822a2761406cd2911abccd754c07e8568bb8550e

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,17 @@
 ## [Unreleased]
+## [[0.0.3](https://github.com/yoshoku/llama_cpp.rb/compare/v0.0.2...v0.0.3)] - 2023-04-08
+- Bump bundled llama.cpp from master-5b70e7d to master-698f7b5.
+- Add logits method to LLaMACpp::Context.
+- Add type signatures.
+- Add class alias Params for LLaMACpp::ContextParams.
+## [[0.0.2](https://github.com/yoshoku/llama_cpp.rb/compare/v0.0.1...v0.0.2)] - 2023-04-02
+- Bump bundled llama.cpp from master-2a98bc1 to master-5b70e7d.
+- Add n_threads arguments to generate method.
 ## [0.0.1] - 2023-04-02
 - Initial release

data/README.md CHANGED Viewed

@@ -20,6 +20,8 @@ If bundler is not being used to manage dependencies, install the gem by executin
 ## Usage
+Prepare a quantized model file by refering to [the usage section on the llama.cpp README](https://github.com/ggerganov/llama.cpp#usage).
 ```ruby
 require 'llama_cpp'
@@ -29,6 +31,7 @@ params.seed = 123456
 context = LLaMACpp::Context.new(model_path: '/path/to/ggml-model-q4_0.bin', params: params)
 puts LLaMACpp.generate(context, 'Please tell me the largest city in Japan.')
+# => "There are two major cities in Japan, Tokyo and Osaka, which have about 30 million populations."
 ```
 ## Contributing

data/ext/llama_cpp/llama_cpp.cpp CHANGED Viewed

@@ -217,7 +217,7 @@ public:
     rb_define_method(rb_cLLaMAContext, "initialize", RUBY_METHOD_FUNC(_llama_context_initialize), -1);
     rb_define_method(rb_cLLaMAContext, "eval", RUBY_METHOD_FUNC(_llama_context_eval), -1);
     rb_define_method(rb_cLLaMAContext, "tokenize", RUBY_METHOD_FUNC(_llama_context_tokenize), -1);
-    // rb_define_method(rb_cLLaMAContext, "logits", RUBY_METHOD_FUNC(_llama_context_logits), 0);
+    rb_define_method(rb_cLLaMAContext, "logits", RUBY_METHOD_FUNC(_llama_context_logits), 0);
     rb_define_method(rb_cLLaMAContext, "embeddings", RUBY_METHOD_FUNC(_llama_context_embeddings), 0);
     rb_define_method(rb_cLLaMAContext, "token_to_str", RUBY_METHOD_FUNC(_llama_context_token_to_str), 1);
     rb_define_method(rb_cLLaMAContext, "sample_top_p_top_k", RUBY_METHOD_FUNC(_llama_context_sample_top_p_top_k), -1);
@@ -256,6 +256,9 @@ private:
       return Qnil;
     }
+    rb_iv_set(self, "@params", kw_values[1]);
+    rb_iv_set(self, "@has_evaluated", Qfalse);
     RB_GC_GUARD(filename);
     return self;
   };
@@ -305,6 +308,9 @@ private:
       return Qnil;
     }
+    rb_iv_set(self, "@n_tokens", INT2NUM(n_tokens));
+    rb_iv_set(self, "@has_evaluated", Qtrue);
     return Qnil;
   };
@@ -361,12 +367,44 @@ private:
     return str != nullptr ? rb_utf8_str_new_cstr(str) : rb_utf8_str_new_cstr("");
   };
+  static VALUE _llama_context_logits(VALUE self) {
+    LLaMAContextWrapper* ptr = get_llama_context(self);
+    if (ptr->ctx == NULL) {
+      rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
+      return Qnil;
+    }
+    if (rb_iv_get(self, "@has_evaluated") != Qtrue) {
+      rb_raise(rb_eRuntimeError, "LLaMA context has not been evaluated");
+      return Qnil;
+    }
+    LLaMAContextParamsWrapper* prms_ptr = RbLLaMAContextParams::get_llama_context_params(rb_iv_get(self, "@params"));
+    const int n_tokens = prms_ptr->params.logits_all ? NUM2INT(rb_iv_get(self, "@n_tokens")) : 1;
+    const int n_vocab = llama_n_vocab(ptr->ctx);
+    const float* logits = llama_get_logits(ptr->ctx);
+    VALUE output = rb_ary_new();
+    for (int i = 0; i < n_tokens * n_vocab; i++) {
+      rb_ary_push(output, DBL2NUM((double)(logits[i])));
+    }
+    return output;
+  };
   static VALUE _llama_context_embeddings(VALUE self) {
     LLaMAContextWrapper* ptr = get_llama_context(self);
     if (ptr->ctx == NULL) {
       rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
       return Qnil;
     }
+    LLaMAContextParamsWrapper* prms_ptr = RbLLaMAContextParams::get_llama_context_params(rb_iv_get(self, "@params"));
+    if (!prms_ptr->params.embedding) {
+      rb_raise(rb_eRuntimeError, "embedding parameter is false");
+      return Qnil;
+    }
+    if (rb_iv_get(self, "@has_evaluated") != Qtrue) {
+      rb_raise(rb_eRuntimeError, "LLaMA context has not been evaluated");
+      return Qnil;
+    }
     const int n_embd = llama_n_embd(ptr->ctx);
     const float* embd = llama_get_embeddings(ptr->ctx);