RubyGems - llama_cpp - Versions diffs - 0.5.0 → 0.5.2 - Mend

llama_cpp 0.5.0 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +14 -2
data/examples/prompt_jp.txt +1 -1
data/ext/llama_cpp/extconf.rb +1 -1
data/ext/llama_cpp/llama_cpp.cpp +30 -0
data/ext/llama_cpp/src/ggml-alloc.c +101 -24
data/ext/llama_cpp/src/ggml-cuda.cu +1094 -678
data/ext/llama_cpp/src/ggml-metal.m +89 -23
data/ext/llama_cpp/src/ggml-metal.metal +398 -211
data/ext/llama_cpp/src/ggml-opencl.cpp +7 -7
data/ext/llama_cpp/src/ggml.c +32 -56
data/ext/llama_cpp/src/ggml.h +1 -1
data/ext/llama_cpp/src/k_quants.c +49 -13
data/ext/llama_cpp/src/llama.cpp +833 -281
data/ext/llama_cpp/src/llama.h +11 -6
data/lib/llama_cpp/version.rb +2 -2
data/lib/llama_cpp.rb +1 -1
data/sig/llama_cpp.rbs +4 -0
metadata +2 -2

data/ext/llama_cpp/src/llama.h CHANGED Viewed

@@ -164,6 +164,7 @@ extern "C" {
         enum llama_ftype ftype;      // quantize to this llama_ftype
         bool allow_requantize;       // allow quantizing non-f32/f16 tensors
         bool quantize_output_tensor; // quantize output.weight
+        bool only_copy;              // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
     } llama_model_quantize_params;
     // grammar types
@@ -244,15 +245,17 @@ extern "C" {
     LLAMA_API bool llama_mmap_supported (void);
     LLAMA_API bool llama_mlock_supported(void);
-    LLAMA_API int llama_n_vocab(const struct llama_context * ctx);
-    LLAMA_API int llama_n_ctx  (const struct llama_context * ctx);
-    LLAMA_API int llama_n_embd (const struct llama_context * ctx);
+    LLAMA_API int llama_n_vocab    (const struct llama_context * ctx);
+    LLAMA_API int llama_n_ctx      (const struct llama_context * ctx);
+    LLAMA_API int llama_n_ctx_train(const struct llama_context * ctx);
+    LLAMA_API int llama_n_embd     (const struct llama_context * ctx);
     LLAMA_API enum llama_vocab_type llama_vocab_type(const struct llama_context * ctx);
-    LLAMA_API int llama_model_n_vocab(const struct llama_model * model);
-    LLAMA_API int llama_model_n_ctx  (const struct llama_model * model);
-    LLAMA_API int llama_model_n_embd (const struct llama_model * model);
+    LLAMA_API int llama_model_n_vocab    (const struct llama_model * model);
+    LLAMA_API int llama_model_n_ctx      (const struct llama_model * model);
+    LLAMA_API int llama_model_n_ctx_train(const struct llama_model * model);
+    LLAMA_API int llama_model_n_embd     (const struct llama_model * model);
     // Get a string describing the model type
     LLAMA_API int llama_model_desc(const struct llama_model * model, char * buf, size_t buf_size);
@@ -409,6 +412,8 @@ extern "C" {
     LLAMA_API void llama_grammar_free(struct llama_grammar * grammar);
+    LLAMA_API struct llama_grammar * llama_grammar_copy(const struct llama_grammar * grammar);
     //
     // Sampling functions
     //

data/lib/llama_cpp/version.rb CHANGED Viewed

@@ -3,8 +3,8 @@
 # llama_cpp.rb provides Ruby bindings for the llama.cpp.
 module LLaMACpp
   # The version of llama_cpp.rb you install.
-  VERSION = '0.5.0'
+  VERSION = '0.5.2'
   # The version of llama.cpp bundled with llama_cpp.rb.
-  LLAMA_CPP_VERSION = 'b1140'
+  LLAMA_CPP_VERSION = 'b1'
 end

data/lib/llama_cpp.rb CHANGED Viewed

@@ -104,7 +104,7 @@ module LLaMACpp
       break if !embd.empty? && embd[-1] == context.token_eos
     end
-    output.join.delete_prefix(spaced_prompt).strip
+    output.join.force_encoding('UTF-8').delete_prefix(spaced_prompt).strip
   end
 end

data/sig/llama_cpp.rbs CHANGED Viewed

@@ -75,6 +75,7 @@ module LLaMACpp
     def apply_lora_from_file: (lora_path: String, ?base_model_path: String, ?n_threads: Integer) -> void
     def n_vocab: () -> Integer
     def n_ctx: () -> Integer
+    def n_ctx_train: () -> Integer
     def n_embd: () -> Integer
     def token_to_piece: (Integer) -> String
     def tokenize: (text: String, ?n_max_tokens: Integer, ?add_bos: bool) -> Array[Integer]
@@ -113,6 +114,7 @@ module LLaMACpp
     def eval_export: (String) -> bool
     def logits: () -> Array[Float]
     def n_ctx: () -> Integer
+    def n_ctx_train: () -> Integer
     def n_embd: () -> Integer
     def n_vocab: () -> Integer
     def timings: () -> ::LLaMACpp::Timings
@@ -188,6 +190,8 @@ module LLaMACpp
     def allow_quantization=: (bool) -> bool
     def quantize_output_tensor: () -> bool
     def quantize_output_tensor=: (bool) -> bool
+    def only_copy: () -> bool
+    def only_copy=: (bool) -> bool
   end
   class Params = ContextParams

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: llama_cpp
 version: !ruby/object:Gem::Version
-  version: 0.5.0
+  version: 0.5.2
 platform: ruby
 authors:
 - yoshoku
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2023-09-02 00:00:00.000000000 Z
+date: 2023-09-16 00:00:00.000000000 Z
 dependencies: []
 description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
 email: