RubyGems - llama_cpp - Versions diffs - 0.2.0 → 0.2.1 - Mend

llama_cpp 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +7 -0
data/examples/README.md +60 -0
data/examples/chat.rb +195 -0
data/ext/llama_cpp/llama_cpp.cpp +52 -0
data/ext/llama_cpp/src/ggml-cuda.cu +697 -130
data/ext/llama_cpp/src/ggml-cuda.h +4 -1
data/ext/llama_cpp/src/ggml-metal.h +1 -0
data/ext/llama_cpp/src/ggml-metal.m +548 -497
data/ext/llama_cpp/src/ggml-metal.metal +425 -122
data/ext/llama_cpp/src/ggml-opencl.cpp +3 -32
data/ext/llama_cpp/src/ggml-opencl.h +1 -2
data/ext/llama_cpp/src/ggml.c +1904 -303
data/ext/llama_cpp/src/ggml.h +126 -2
data/ext/llama_cpp/src/llama.cpp +212 -108
data/ext/llama_cpp/src/llama.h +12 -3
data/lib/llama_cpp/version.rb +2 -2
data/sig/llama_cpp.rbs +3 -0
metadata +4 -2

data/ext/llama_cpp/src/llama.h CHANGED Viewed

@@ -77,6 +77,7 @@ extern "C" {
         int n_gpu_layers;                      // number of layers to store in VRAM
         int main_gpu;                          // the GPU that is used for scratch and small tensors
         float tensor_split[LLAMA_MAX_DEVICES]; // how to split layers across multiple GPUs
+        bool low_vram;                         // if true, reduce VRAM usage at the cost of performance
         int seed;                              // RNG seed, -1 for random
         bool f16_kv;     // use fp16 for KV cache
@@ -220,6 +221,14 @@ extern "C" {
     LLAMA_API int llama_n_ctx  (const struct llama_context * ctx);
     LLAMA_API int llama_n_embd (const struct llama_context * ctx);
+    // Get the vocabulary as output parameters.
+    // Returns number of results.
+    LLAMA_API int llama_get_vocab(
+            const struct llama_context * ctx,
+                          const char * * strings,
+                                 float * scores,
+                                   int   capacity);
     // Token logits obtained from the last call to llama_eval()
     // The logits for the last token are stored in the last row
     // Can be mutated in order to change the probabilities of the next token
@@ -235,9 +244,9 @@ extern "C" {
     LLAMA_API const char * llama_token_to_str(const struct llama_context * ctx, llama_token token);
     // Special tokens
-    LLAMA_API llama_token llama_token_bos();
-    LLAMA_API llama_token llama_token_eos();
-    LLAMA_API llama_token llama_token_nl();
+    LLAMA_API llama_token llama_token_bos();  // beginning-of-sentence
+    LLAMA_API llama_token llama_token_eos();  // end-of-sentence
+    LLAMA_API llama_token llama_token_nl();   // next-line
     // Sampling functions

data/lib/llama_cpp/version.rb CHANGED Viewed

@@ -3,8 +3,8 @@
 # llama_cpp.rb provides Ruby bindings for the llama.cpp.
 module LLaMACpp
   # The version of llama_cpp.rb you install.
-  VERSION = '0.2.0'
+  VERSION = '0.2.1'
   # The version of llama.cpp bundled with llama_cpp.rb.
-  LLAMA_CPP_VERSION = 'master-4de0334'
+  LLAMA_CPP_VERSION = 'master-a09f919'
 end

data/sig/llama_cpp.rbs CHANGED Viewed

@@ -70,6 +70,7 @@ module LLaMACpp
     def n_ctx: () -> Integer
     def n_embd: () -> Integer
     def n_vocab: () -> Integer
+    def vocab: (capacity: Integer) -> [Array[String], Array[Float]]
     def print_timings: () -> void
     def reset_timings: () -> void
     def token_to_str: (Integer) -> String
@@ -111,6 +112,8 @@ module LLaMACpp
     def main_gpu: () -> Integer
     def main_gpu=: (Integer) -> Integer
     def tensor_split: () -> Array[Float]
+    def low_vram: () -> bool
+    def low_vram=: (bool) -> bool
     def seed: () -> Integer
     def seed=: (Integer) -> Integer
     def use_mlock: () -> bool

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: llama_cpp
 version: !ruby/object:Gem::Version
-  version: 0.2.0
+  version: 0.2.1
 platform: ruby
 authors:
 - yoshoku
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2023-06-11 00:00:00.000000000 Z
+date: 2023-06-17 00:00:00.000000000 Z
 dependencies: []
 description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
 email:
@@ -22,6 +22,8 @@ files:
 - CODE_OF_CONDUCT.md
 - LICENSE.txt
 - README.md
+- examples/README.md
+- examples/chat.rb
 - ext/llama_cpp/extconf.rb
 - ext/llama_cpp/llama_cpp.cpp
 - ext/llama_cpp/llama_cpp.h