RubyGems - llama_cpp - Versions diffs - 0.0.1 → 0.0.3 - Mend

llama_cpp 0.0.1 → 0.0.3

Files changed (12) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +12 -0
data/README.md +3 -0
data/ext/llama_cpp/llama_cpp.cpp +39 -1
data/ext/llama_cpp/src/ggml.c +914 -509
data/ext/llama_cpp/src/ggml.h +42 -27
data/ext/llama_cpp/src/llama.cpp +293 -303
data/ext/llama_cpp/src/llama.h +19 -2
data/lib/llama_cpp/version.rb +2 -2
data/lib/llama_cpp.rb +6 -2
data/sig/llama_cpp.rbs +52 -0
metadata +3 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: a6bf717ec1012d78b7d83f3f7a7546f589fbf368c1b2babc69a99fd28a5d9ff3
-  data.tar.gz: 6ab2e2ae4b6410f32890a86b7ac2dbb93ab9e2f43888158b7cbfd9b16f435447
+  metadata.gz: 3ce894c9b013134688dffb18229c6f18073cdc8aceafa7d8a519803ae8ffc8a4
+  data.tar.gz: b9a09f3b7217c120d0eae5e89ecf15a4ccbedcdef92db7d5c4508d03ecd65d3c
 SHA512:
-  metadata.gz: cd1ae63e518a422dbe3a281a598b18b9397fdf880867f92bad20e56b5a60756a1a929a62879f7aed0c7c24012b87b85353e175c773aeed4f8d87294ba0422cb1
-  data.tar.gz: 2828321d0589ac16713745b2770844d5c6fed848ff0efed90304370152650a8e0619657a91184f74c402eb9351800ac3517c20f775faf52db91331d95ac1c87d
+  metadata.gz: a979c8a488ec410f214873664288f618af9363d60b6ef6b3ef44de9bd7486bd223b8b38704eab09c1cec1f210c55e5d08ba03af8d6ddc87c10d8836da983c1de
+  data.tar.gz: 47228be684c3ce577b066b2255482c42f6979c4cce5852c22e85a9f0b66bdcaea58d667c56f2eefef6cfc121822a2761406cd2911abccd754c07e8568bb8550e

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,17 @@
 ## [Unreleased]
+## [[0.0.3](https://github.com/yoshoku/llama_cpp.rb/compare/v0.0.2...v0.0.3)] - 2023-04-08
+- Bump bundled llama.cpp from master-5b70e7d to master-698f7b5.
+- Add logits method to LLaMACpp::Context.
+- Add type signatures.
+- Add class alias Params for LLaMACpp::ContextParams.
+## [[0.0.2](https://github.com/yoshoku/llama_cpp.rb/compare/v0.0.1...v0.0.2)] - 2023-04-02
+- Bump bundled llama.cpp from master-2a98bc1 to master-5b70e7d.
+- Add n_threads arguments to generate method.
 ## [0.0.1] - 2023-04-02
 - Initial release

data/README.md CHANGED Viewed

@@ -20,6 +20,8 @@ If bundler is not being used to manage dependencies, install the gem by executin
 ## Usage
+Prepare a quantized model file by refering to [the usage section on the llama.cpp README](https://github.com/ggerganov/llama.cpp#usage).
 ```ruby
 require 'llama_cpp'
@@ -29,6 +31,7 @@ params.seed = 123456
 context = LLaMACpp::Context.new(model_path: '/path/to/ggml-model-q4_0.bin', params: params)
 puts LLaMACpp.generate(context, 'Please tell me the largest city in Japan.')
+# => "There are two major cities in Japan, Tokyo and Osaka, which have about 30 million populations."
 ```
 ## Contributing

data/ext/llama_cpp/llama_cpp.cpp CHANGED Viewed

@@ -217,7 +217,7 @@ public:
     rb_define_method(rb_cLLaMAContext, "initialize", RUBY_METHOD_FUNC(_llama_context_initialize), -1);
     rb_define_method(rb_cLLaMAContext, "eval", RUBY_METHOD_FUNC(_llama_context_eval), -1);
     rb_define_method(rb_cLLaMAContext, "tokenize", RUBY_METHOD_FUNC(_llama_context_tokenize), -1);
-    // rb_define_method(rb_cLLaMAContext, "logits", RUBY_METHOD_FUNC(_llama_context_logits), 0);
+    rb_define_method(rb_cLLaMAContext, "logits", RUBY_METHOD_FUNC(_llama_context_logits), 0);
     rb_define_method(rb_cLLaMAContext, "embeddings", RUBY_METHOD_FUNC(_llama_context_embeddings), 0);
     rb_define_method(rb_cLLaMAContext, "token_to_str", RUBY_METHOD_FUNC(_llama_context_token_to_str), 1);
     rb_define_method(rb_cLLaMAContext, "sample_top_p_top_k", RUBY_METHOD_FUNC(_llama_context_sample_top_p_top_k), -1);
@@ -256,6 +256,9 @@ private:
       return Qnil;
     }
+    rb_iv_set(self, "@params", kw_values[1]);
+    rb_iv_set(self, "@has_evaluated", Qfalse);
     RB_GC_GUARD(filename);
     return self;
   };
@@ -305,6 +308,9 @@ private:
       return Qnil;
     }
+    rb_iv_set(self, "@n_tokens", INT2NUM(n_tokens));
+    rb_iv_set(self, "@has_evaluated", Qtrue);
     return Qnil;
   };
@@ -361,12 +367,44 @@ private:
     return str != nullptr ? rb_utf8_str_new_cstr(str) : rb_utf8_str_new_cstr("");
   };
+  static VALUE _llama_context_logits(VALUE self) {
+    LLaMAContextWrapper* ptr = get_llama_context(self);
+    if (ptr->ctx == NULL) {
+      rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
+      return Qnil;
+    }
+    if (rb_iv_get(self, "@has_evaluated") != Qtrue) {
+      rb_raise(rb_eRuntimeError, "LLaMA context has not been evaluated");
+      return Qnil;
+    }
+    LLaMAContextParamsWrapper* prms_ptr = RbLLaMAContextParams::get_llama_context_params(rb_iv_get(self, "@params"));
+    const int n_tokens = prms_ptr->params.logits_all ? NUM2INT(rb_iv_get(self, "@n_tokens")) : 1;
+    const int n_vocab = llama_n_vocab(ptr->ctx);
+    const float* logits = llama_get_logits(ptr->ctx);
+    VALUE output = rb_ary_new();
+    for (int i = 0; i < n_tokens * n_vocab; i++) {
+      rb_ary_push(output, DBL2NUM((double)(logits[i])));
+    }
+    return output;
+  };
   static VALUE _llama_context_embeddings(VALUE self) {
     LLaMAContextWrapper* ptr = get_llama_context(self);
     if (ptr->ctx == NULL) {
       rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
       return Qnil;
     }
+    LLaMAContextParamsWrapper* prms_ptr = RbLLaMAContextParams::get_llama_context_params(rb_iv_get(self, "@params"));
+    if (!prms_ptr->params.embedding) {
+      rb_raise(rb_eRuntimeError, "embedding parameter is false");
+      return Qnil;
+    }
+    if (rb_iv_get(self, "@has_evaluated") != Qtrue) {
+      rb_raise(rb_eRuntimeError, "LLaMA context has not been evaluated");
+      return Qnil;
+    }
     const int n_embd = llama_n_embd(ptr->ctx);
     const float* embd = llama_get_embeddings(ptr->ctx);