RubyGems - llama_cpp - Versions diffs - 0.7.0 → 0.8.0 - Mend

llama_cpp 0.7.0 → 0.8.0

Files changed (23) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +12 -0
data/ext/llama_cpp/extconf.rb +1 -1
data/ext/llama_cpp/llama_cpp.cpp +41 -21
data/ext/llama_cpp/src/ggml-alloc.c +62 -107
data/ext/llama_cpp/src/ggml-alloc.h +11 -5
data/ext/llama_cpp/src/ggml-backend.c +385 -0
data/ext/llama_cpp/src/ggml-backend.h +143 -0
data/ext/llama_cpp/src/ggml-cuda.cu +500 -78
data/ext/llama_cpp/src/ggml-cuda.h +4 -0
data/ext/llama_cpp/src/ggml-metal.h +18 -1
data/ext/llama_cpp/src/ggml-metal.m +396 -127
data/ext/llama_cpp/src/ggml-metal.metal +290 -46
data/ext/llama_cpp/src/ggml-opencl.cpp +47 -71
data/ext/llama_cpp/src/ggml.c +71 -55
data/ext/llama_cpp/src/ggml.h +15 -9
data/ext/llama_cpp/src/k_quants.c +12 -20
data/ext/llama_cpp/src/k_quants.h +5 -5
data/ext/llama_cpp/src/llama.cpp +1851 -250
data/ext/llama_cpp/src/llama.h +18 -12
data/lib/llama_cpp/version.rb +2 -2
data/sig/llama_cpp.rbs +4 -4
metadata +5 -3

data/ext/llama_cpp/src/llama.h CHANGED Viewed

@@ -133,11 +133,12 @@ extern "C" {
     typedef struct llama_batch {
         int32_t n_tokens;
-        llama_token  * token;
-        float        * embd;
-        llama_pos    * pos;
-        llama_seq_id * seq_id;
-        int8_t       * logits;
+        llama_token  *  token;
+        float        *  embd;
+        llama_pos    *  pos;
+        int32_t      *  n_seq_id;
+        llama_seq_id ** seq_id;
+        int8_t       *  logits;
         // NOTE: helpers for smooth API transition - can be deprecated in the future
         //       for future-proof code, use the above fields instead and ignore everything below
@@ -446,7 +447,8 @@ extern "C" {
                     llama_pos   pos_0,
                  llama_seq_id   seq_id);
-    // Allocates a batch of tokens on the heap
+    // Allocates a batch of tokens on the heap that can hold a maximum of n_tokens
+    // Each token can be assigned up to n_seq_max sequence ids
     // The batch has to be freed with llama_batch_free()
     // If embd != 0, llama_batch.embd will be allocated with size of n_tokens * embd * sizeof(float)
     // Otherwise, llama_batch.token will be allocated to store n_tokens llama_token
@@ -454,7 +456,8 @@ extern "C" {
     // All members are left uninitialized
     LLAMA_API struct llama_batch llama_batch_init(
             int32_t n_tokens,
-            int32_t embd);
+            int32_t embd,
+            int32_t n_seq_max);
     // Frees a batch of tokens allocated with llama_batch_init()
     LLAMA_API void llama_batch_free(struct llama_batch batch);
@@ -511,17 +514,20 @@ extern "C" {
     // Tokenization
     //
-    // Convert the provided text into tokens.
-    // The tokens pointer must be large enough to hold the resulting tokens.
-    // Returns the number of tokens on success, no more than n_max_tokens
-    // Returns a negative number on failure - the number of tokens that would have been returned
+    /// @details Convert the provided text into tokens.
+    /// @param tokens The tokens pointer must be large enough to hold the resulting tokens.
+    /// @return Returns the number of tokens on success, no more than n_max_tokens
+    /// @return Returns a negative number on failure - the number of tokens that would have been returned
+    /// @param special Allow tokenizing special and/or control tokens which otherwise are not exposed and treated as plaintext.
+    ///                Does not insert a leading space.
     LLAMA_API int llama_tokenize(
         const struct llama_model * model,
                       const char * text,
                              int   text_len,
                      llama_token * tokens,
                              int   n_max_tokens,
-                            bool   add_bos);
+                            bool   add_bos,
+                            bool   special);
     // Token Id -> Piece.
     // Uses the vocabulary in the provided context.

data/lib/llama_cpp/version.rb CHANGED Viewed

@@ -3,8 +3,8 @@
 # llama_cpp.rb provides Ruby bindings for the llama.cpp.
 module LLaMACpp
   # The version of llama_cpp.rb you install.
-  VERSION = '0.7.0'
+  VERSION = '0.8.0'
   # The version of llama.cpp bundled with llama_cpp.rb.
-  LLAMA_CPP_VERSION = 'b1334'
+  LLAMA_CPP_VERSION = 'b1405'
 end

data/sig/llama_cpp.rbs CHANGED Viewed

@@ -78,7 +78,7 @@ module LLaMACpp
     def n_embd: () -> Integer
     def rope_freq_scale_train: () -> Float
     def token_to_piece: (Integer) -> String
-    def tokenize: (text: String, ?n_max_tokens: Integer, ?add_bos: bool) -> Array[Integer]
+    def tokenize: (text: String, ?n_max_tokens: Integer, ?add_bos: bool, ?special: bool) -> Array[Integer]
     def desc: () -> String
     def size: () -> Integer
     def n_params: () -> Integer
@@ -117,7 +117,7 @@ module LLaMACpp
   class Batch
     public
-    def initialize: (n_tokens: Integer, embd: Integer) -> void
+    def initialize: (n_tokens: Integer, embd: Integer, n_seq_max: Integer) -> void
     def n_tokens=: (Integer) -> Integer
     def n_tokens: () -> Integer
     def all_pos_zero=: (Integer) -> Integer
@@ -130,8 +130,8 @@ module LLaMACpp
     def get_token: (Integer) -> Integer
     def set_pos: (Integer, Integer) -> Integer
     def get_pos: (Integer) -> Integer
-    def set_seq_id: (Integer, Integer) -> Integer
-    def get_seq_id: (Integer) -> Integer
+    def set_seq_id: (Integer, Integer, Integer) -> Integer
+    def get_seq_id: (Integer, Integer) -> Integer
     def set_logit: (Integer, bool) -> bool
     def get_logit: (Integer) -> bool
   end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: llama_cpp
 version: !ruby/object:Gem::Version
-  version: 0.7.0
+  version: 0.8.0
 platform: ruby
 authors:
 - yoshoku
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2023-10-07 00:00:00.000000000 Z
+date: 2023-10-21 00:00:00.000000000 Z
 dependencies: []
 description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
 email:
@@ -32,6 +32,8 @@ files:
 - ext/llama_cpp/src/LICENSE
 - ext/llama_cpp/src/ggml-alloc.c
 - ext/llama_cpp/src/ggml-alloc.h
+- ext/llama_cpp/src/ggml-backend.c
+- ext/llama_cpp/src/ggml-backend.h
 - ext/llama_cpp/src/ggml-cuda.cu
 - ext/llama_cpp/src/ggml-cuda.h
 - ext/llama_cpp/src/ggml-metal.h
@@ -76,7 +78,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 3.4.19
+rubygems_version: 3.4.20
 signing_key:
 specification_version: 4
 summary: Ruby bindings for the llama.cpp.