RubyGems - llama_cpp - Versions diffs - 0.12.2 → 0.12.4 - Mend

llama_cpp 0.12.2 → 0.12.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +15 -0
data/README.md +2 -2
data/ext/llama_cpp/extconf.rb +1 -0
data/ext/llama_cpp/llama_cpp.cpp +68 -6
data/lib/llama_cpp/version.rb +2 -2
data/sig/llama_cpp.rbs +6 -2
data/vendor/tmp/llama.cpp/Makefile +25 -3
data/vendor/tmp/llama.cpp/ggml-alloc.c +87 -27
data/vendor/tmp/llama.cpp/ggml-backend-impl.h +6 -0
data/vendor/tmp/llama.cpp/ggml-backend.c +176 -18
data/vendor/tmp/llama.cpp/ggml-backend.h +14 -0
data/vendor/tmp/llama.cpp/ggml-kompute.cpp +1990 -0
data/vendor/tmp/llama.cpp/ggml-kompute.h +46 -0
data/vendor/tmp/llama.cpp/ggml-metal.h +3 -0
data/vendor/tmp/llama.cpp/ggml-metal.m +144 -113
data/vendor/tmp/llama.cpp/ggml-metal.metal +303 -4
data/vendor/tmp/llama.cpp/ggml-opencl.cpp +95 -3
data/vendor/tmp/llama.cpp/ggml-opencl.h +1 -0
data/vendor/tmp/llama.cpp/ggml-quants.c +736 -59
data/vendor/tmp/llama.cpp/ggml-quants.h +20 -1
data/vendor/tmp/llama.cpp/ggml-sycl.cpp +15255 -0
data/vendor/tmp/llama.cpp/ggml-sycl.h +29 -0
data/vendor/tmp/llama.cpp/ggml-vulkan-shaders.hpp +60854 -0
data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +5270 -0
data/vendor/tmp/llama.cpp/ggml-vulkan.h +34 -0
data/vendor/tmp/llama.cpp/ggml.c +664 -117
data/vendor/tmp/llama.cpp/ggml.h +46 -11
data/vendor/tmp/llama.cpp/llama.cpp +1426 -341
data/vendor/tmp/llama.cpp/llama.h +24 -15
data/vendor/tmp/llama.cpp/unicode.h +2 -1
metadata +10 -3

data/vendor/tmp/llama.cpp/llama.h CHANGED Viewed

@@ -2,12 +2,8 @@
 #define LLAMA_H
 #include "ggml.h"
-#ifdef GGML_USE_CUBLAS
-#include "ggml-cuda.h"
-#define LLAMA_MAX_DEVICES GGML_CUDA_MAX_DEVICES
-#else
-#define LLAMA_MAX_DEVICES 1
-#endif // GGML_USE_CUBLAS
+#include "ggml-backend.h"
 #include <stddef.h>
 #include <stdint.h>
 #include <stdio.h>
@@ -45,11 +41,6 @@
 #define LLAMA_SESSION_MAGIC   LLAMA_FILE_MAGIC_GGSN
 #define LLAMA_SESSION_VERSION 4
-#if defined(GGML_USE_CUBLAS) || defined(GGML_USE_CLBLAST) || defined(GGML_USE_METAL)
-// Defined when llama.cpp is compiled with support for offloading model layers to GPU.
-#define LLAMA_SUPPORTS_GPU_OFFLOAD
-#endif
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -106,6 +97,8 @@ extern "C" {
         LLAMA_FTYPE_MOSTLY_IQ2_XXS       = 19, // except 1d tensors
         LLAMA_FTYPE_MOSTLY_IQ2_XS        = 20, // except 1d tensors
         LLAMA_FTYPE_MOSTLY_Q2_K_S        = 21, // except 1d tensors
+        LLAMA_FTYPE_MOSTLY_Q3_K_XS       = 22, // except 1d tensors
+        LLAMA_FTYPE_MOSTLY_IQ3_XXS       = 23, // except 1d tensors
         LLAMA_FTYPE_GUESSED = 1024, // not specified in the model file
     };
@@ -194,7 +187,7 @@ extern "C" {
         // LLAMA_SPLIT_LAYER: ignored
         int32_t main_gpu;
-        // proportion of the model (layers or rows) to offload to each GPU, size: LLAMA_MAX_DEVICES
+        // proportion of the model (layers or rows) to offload to each GPU, size: llama_max_devices()
         const float * tensor_split;
         // Called with a progress value between 0.0 and 1.0. Pass NULL to disable.
@@ -231,6 +224,9 @@ extern "C" {
         float    yarn_beta_slow;   // YaRN high correction dim
         uint32_t yarn_orig_ctx;    // YaRN original context size
+        ggml_backend_sched_eval_callback cb_eval;
+        void * cb_eval_user_data;
         enum ggml_type type_k; // data type for K cache
         enum ggml_type type_v; // data type for V cache
@@ -328,9 +324,14 @@ extern "C" {
     LLAMA_API int64_t llama_time_us(void);
-    LLAMA_API int32_t  llama_max_devices(void);
-    LLAMA_API bool llama_mmap_supported (void);
-    LLAMA_API bool llama_mlock_supported(void);
+    LLAMA_API size_t llama_max_devices(void);
+    LLAMA_API bool llama_supports_mmap       (void);
+    LLAMA_API bool llama_supports_mlock      (void);
+    LLAMA_API bool llama_supports_gpu_offload(void);
+    LLAMA_API DEPRECATED(bool llama_mmap_supported (void), "use llama_supports_mmap() instead");
+    LLAMA_API DEPRECATED(bool llama_mlock_supported(void), "use llama_supports_mlock() instead");
     LLAMA_API const struct llama_model * llama_get_model(const struct llama_context * ctx);
@@ -770,6 +771,14 @@ extern "C" {
                            float   p,
                           size_t   min_keep);
+    /// @details Dynamic temperature implementation described in the paper https://arxiv.org/abs/2309.02772.
+    LLAMA_API void llama_sample_entropy(
+            struct llama_context * ctx,
+          llama_token_data_array * candidates_p,
+                           float   min_temp,
+                           float   max_temp,
+                           float   exponent_val);
     LLAMA_API void llama_sample_temp(
             struct llama_context * ctx,
           llama_token_data_array * candidates,

data/vendor/tmp/llama.cpp/unicode.h CHANGED Viewed

@@ -2,8 +2,9 @@
 #include <cassert>
 #include <stdexcept>
-#include <vector>
+#include <string>
 #include <unordered_map>
+#include <vector>
 static const std::vector<std::pair<uint32_t, uint32_t>> digit_ranges = {
 {0x30, 0x39}, {0xB2, 0xB3}, {0xB9, 0xB9}, {0x660, 0x669}, {0x6F0, 0x6F9}, {0x7C0, 0x7C9}, {0x966, 0x96F}, {0x9E6, 0x9EF}, {0xA66, 0xA6F}, {0xAE6, 0xAEF}, {0xB66, 0xB6F}, {0xBE6, 0xBEF}, {0xC66, 0xC6F},

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: llama_cpp
 version: !ruby/object:Gem::Version
-  version: 0.12.2
+  version: 0.12.4
 platform: ruby
 authors:
 - yoshoku
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2024-01-20 00:00:00.000000000 Z
+date: 2024-02-03 00:00:00.000000000 Z
 dependencies: []
 description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
 email:
@@ -45,6 +45,8 @@ files:
 - vendor/tmp/llama.cpp/ggml-cuda.cu
 - vendor/tmp/llama.cpp/ggml-cuda.h
 - vendor/tmp/llama.cpp/ggml-impl.h
+- vendor/tmp/llama.cpp/ggml-kompute.cpp
+- vendor/tmp/llama.cpp/ggml-kompute.h
 - vendor/tmp/llama.cpp/ggml-metal.h
 - vendor/tmp/llama.cpp/ggml-metal.m
 - vendor/tmp/llama.cpp/ggml-metal.metal
@@ -54,6 +56,11 @@ files:
 - vendor/tmp/llama.cpp/ggml-opencl.h
 - vendor/tmp/llama.cpp/ggml-quants.c
 - vendor/tmp/llama.cpp/ggml-quants.h
+- vendor/tmp/llama.cpp/ggml-sycl.cpp
+- vendor/tmp/llama.cpp/ggml-sycl.h
+- vendor/tmp/llama.cpp/ggml-vulkan-shaders.hpp
+- vendor/tmp/llama.cpp/ggml-vulkan.cpp
+- vendor/tmp/llama.cpp/ggml-vulkan.h
 - vendor/tmp/llama.cpp/ggml.c
 - vendor/tmp/llama.cpp/ggml.h
 - vendor/tmp/llama.cpp/llama.cpp
@@ -84,7 +91,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 3.5.3
+rubygems_version: 3.4.19
 signing_key:
 specification_version: 4
 summary: Ruby bindings for the llama.cpp.