RubyGems - llama_cpp - Versions diffs - 0.2.0 → 0.2.2 - Mend

llama_cpp 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +11 -0
data/examples/README.md +92 -0
data/examples/chat.rb +195 -0
data/examples/embedding.rb +37 -0
data/ext/llama_cpp/llama_cpp.cpp +52 -0
data/ext/llama_cpp/src/ggml-cuda.cu +1218 -411
data/ext/llama_cpp/src/ggml-cuda.h +4 -1
data/ext/llama_cpp/src/ggml-metal.h +5 -1
data/ext/llama_cpp/src/ggml-metal.m +703 -514
data/ext/llama_cpp/src/ggml-metal.metal +574 -122
data/ext/llama_cpp/src/ggml-opencl.cpp +496 -36
data/ext/llama_cpp/src/ggml-opencl.h +1 -2
data/ext/llama_cpp/src/ggml.c +2715 -476
data/ext/llama_cpp/src/ggml.h +266 -11
data/ext/llama_cpp/src/llama.cpp +266 -135
data/ext/llama_cpp/src/llama.h +19 -11
data/lib/llama_cpp/version.rb +2 -2
data/sig/llama_cpp.rbs +3 -0
metadata +5 -2

data/ext/llama_cpp/src/llama.h CHANGED Viewed

@@ -71,27 +71,27 @@ extern "C" {
     typedef void (*llama_progress_callback)(float progress, void *ctx);
-    struct llama_context_params {
+   struct llama_context_params {
+        int seed;                              // RNG seed, -1 for random
         int n_ctx;                             // text context
         int n_batch;                           // prompt processing batch size
         int n_gpu_layers;                      // number of layers to store in VRAM
         int main_gpu;                          // the GPU that is used for scratch and small tensors
         float tensor_split[LLAMA_MAX_DEVICES]; // how to split layers across multiple GPUs
-        int seed;                              // RNG seed, -1 for random
+        // called with a progress value between 0 and 1, pass NULL to disable
+        llama_progress_callback progress_callback;
+        // context pointer passed to the progress callback
+        void * progress_callback_user_data;
+        // Keep the booleans together to avoid misalignment during copy-by-value.
+        bool low_vram;   // if true, reduce VRAM usage at the cost of performance
         bool f16_kv;     // use fp16 for KV cache
         bool logits_all; // the llama_eval() call computes all logits, not just the last one
         bool vocab_only; // only load the vocabulary, no weights
         bool use_mmap;   // use mmap if possible
         bool use_mlock;  // force system to keep model in RAM
         bool embedding;  // embedding mode only
-        // called with a progress value between 0 and 1, pass NULL to disable
-        llama_progress_callback progress_callback;
-        // context pointer passed to the progress callback
-        void * progress_callback_user_data;
     };
     // model file types
     enum llama_ftype {
         LLAMA_FTYPE_ALL_F32              = 0,
@@ -220,6 +220,14 @@ extern "C" {
     LLAMA_API int llama_n_ctx  (const struct llama_context * ctx);
     LLAMA_API int llama_n_embd (const struct llama_context * ctx);
+    // Get the vocabulary as output parameters.
+    // Returns number of results.
+    LLAMA_API int llama_get_vocab(
+            const struct llama_context * ctx,
+                          const char * * strings,
+                                 float * scores,
+                                   int   capacity);
     // Token logits obtained from the last call to llama_eval()
     // The logits for the last token are stored in the last row
     // Can be mutated in order to change the probabilities of the next token
@@ -235,9 +243,9 @@ extern "C" {
     LLAMA_API const char * llama_token_to_str(const struct llama_context * ctx, llama_token token);
     // Special tokens
-    LLAMA_API llama_token llama_token_bos();
-    LLAMA_API llama_token llama_token_eos();
-    LLAMA_API llama_token llama_token_nl();
+    LLAMA_API llama_token llama_token_bos();  // beginning-of-sentence
+    LLAMA_API llama_token llama_token_eos();  // end-of-sentence
+    LLAMA_API llama_token llama_token_nl();   // next-line
     // Sampling functions

data/lib/llama_cpp/version.rb CHANGED Viewed

@@ -3,8 +3,8 @@
 # llama_cpp.rb provides Ruby bindings for the llama.cpp.
 module LLaMACpp
   # The version of llama_cpp.rb you install.
-  VERSION = '0.2.0'
+  VERSION = '0.2.2'
   # The version of llama.cpp bundled with llama_cpp.rb.
-  LLAMA_CPP_VERSION = 'master-4de0334'
+  LLAMA_CPP_VERSION = 'master-7487137'
 end

data/sig/llama_cpp.rbs CHANGED Viewed

@@ -70,6 +70,7 @@ module LLaMACpp
     def n_ctx: () -> Integer
     def n_embd: () -> Integer
     def n_vocab: () -> Integer
+    def vocab: (capacity: Integer) -> [Array[String], Array[Float]]
     def print_timings: () -> void
     def reset_timings: () -> void
     def token_to_str: (Integer) -> String
@@ -111,6 +112,8 @@ module LLaMACpp
     def main_gpu: () -> Integer
     def main_gpu=: (Integer) -> Integer
     def tensor_split: () -> Array[Float]
+    def low_vram: () -> bool
+    def low_vram=: (bool) -> bool
     def seed: () -> Integer
     def seed=: (Integer) -> Integer
     def use_mlock: () -> bool

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: llama_cpp
 version: !ruby/object:Gem::Version
-  version: 0.2.0
+  version: 0.2.2
 platform: ruby
 authors:
 - yoshoku
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2023-06-11 00:00:00.000000000 Z
+date: 2023-06-23 00:00:00.000000000 Z
 dependencies: []
 description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
 email:
@@ -22,6 +22,9 @@ files:
 - CODE_OF_CONDUCT.md
 - LICENSE.txt
 - README.md
+- examples/README.md
+- examples/chat.rb
+- examples/embedding.rb
 - ext/llama_cpp/extconf.rb
 - ext/llama_cpp/llama_cpp.cpp
 - ext/llama_cpp/llama_cpp.h