RubyGems - llama_cpp - Versions diffs - 0.12.3 → 0.12.4 - Mend

llama_cpp 0.12.3 → 0.12.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +9 -0
data/ext/llama_cpp/extconf.rb +1 -0
data/ext/llama_cpp/llama_cpp.cpp +22 -6
data/lib/llama_cpp/version.rb +2 -2
data/sig/llama_cpp.rbs +4 -2
data/vendor/tmp/llama.cpp/Makefile +23 -4
data/vendor/tmp/llama.cpp/ggml-alloc.c +85 -25
data/vendor/tmp/llama.cpp/ggml-backend-impl.h +6 -0
data/vendor/tmp/llama.cpp/ggml-backend.c +115 -3
data/vendor/tmp/llama.cpp/ggml-backend.h +3 -0
data/vendor/tmp/llama.cpp/ggml-kompute.cpp +1990 -0
data/vendor/tmp/llama.cpp/ggml-kompute.h +46 -0
data/vendor/tmp/llama.cpp/ggml-metal.h +3 -0
data/vendor/tmp/llama.cpp/ggml-metal.m +121 -86
data/vendor/tmp/llama.cpp/ggml-metal.metal +303 -4
data/vendor/tmp/llama.cpp/ggml-opencl.cpp +95 -3
data/vendor/tmp/llama.cpp/ggml-opencl.h +1 -0
data/vendor/tmp/llama.cpp/ggml-quants.c +706 -15
data/vendor/tmp/llama.cpp/ggml-quants.h +17 -1
data/vendor/tmp/llama.cpp/ggml-sycl.cpp +15255 -0
data/vendor/tmp/llama.cpp/ggml-sycl.h +29 -0
data/vendor/tmp/llama.cpp/ggml-vulkan-shaders.hpp +60854 -0
data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +5270 -0
data/vendor/tmp/llama.cpp/ggml-vulkan.h +34 -0
data/vendor/tmp/llama.cpp/ggml.c +350 -57
data/vendor/tmp/llama.cpp/ggml.h +7 -1
data/vendor/tmp/llama.cpp/llama.cpp +574 -39
data/vendor/tmp/llama.cpp/llama.h +11 -15
metadata +9 -2

data/vendor/tmp/llama.cpp/llama.h CHANGED Viewed

@@ -3,12 +3,7 @@
 #include "ggml.h"
 #include "ggml-backend.h"
-#ifdef GGML_USE_CUBLAS
-#include "ggml-cuda.h"
-#define LLAMA_MAX_DEVICES GGML_CUDA_MAX_DEVICES
-#else
-#define LLAMA_MAX_DEVICES 1
-#endif // GGML_USE_CUBLAS
 #include <stddef.h>
 #include <stdint.h>
 #include <stdio.h>
@@ -46,11 +41,6 @@
 #define LLAMA_SESSION_MAGIC   LLAMA_FILE_MAGIC_GGSN
 #define LLAMA_SESSION_VERSION 4
-#if defined(GGML_USE_CUBLAS) || defined(GGML_USE_CLBLAST) || defined(GGML_USE_METAL)
-// Defined when llama.cpp is compiled with support for offloading model layers to GPU.
-#define LLAMA_SUPPORTS_GPU_OFFLOAD
-#endif
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -108,6 +98,7 @@ extern "C" {
         LLAMA_FTYPE_MOSTLY_IQ2_XS        = 20, // except 1d tensors
         LLAMA_FTYPE_MOSTLY_Q2_K_S        = 21, // except 1d tensors
         LLAMA_FTYPE_MOSTLY_Q3_K_XS       = 22, // except 1d tensors
+        LLAMA_FTYPE_MOSTLY_IQ3_XXS       = 23, // except 1d tensors
         LLAMA_FTYPE_GUESSED = 1024, // not specified in the model file
     };
@@ -196,7 +187,7 @@ extern "C" {
         // LLAMA_SPLIT_LAYER: ignored
         int32_t main_gpu;
-        // proportion of the model (layers or rows) to offload to each GPU, size: LLAMA_MAX_DEVICES
+        // proportion of the model (layers or rows) to offload to each GPU, size: llama_max_devices()
         const float * tensor_split;
         // Called with a progress value between 0.0 and 1.0. Pass NULL to disable.
@@ -333,9 +324,14 @@ extern "C" {
     LLAMA_API int64_t llama_time_us(void);
-    LLAMA_API int32_t  llama_max_devices(void);
-    LLAMA_API bool llama_mmap_supported (void);
-    LLAMA_API bool llama_mlock_supported(void);
+    LLAMA_API size_t llama_max_devices(void);
+    LLAMA_API bool llama_supports_mmap       (void);
+    LLAMA_API bool llama_supports_mlock      (void);
+    LLAMA_API bool llama_supports_gpu_offload(void);
+    LLAMA_API DEPRECATED(bool llama_mmap_supported (void), "use llama_supports_mmap() instead");
+    LLAMA_API DEPRECATED(bool llama_mlock_supported(void), "use llama_supports_mlock() instead");
     LLAMA_API const struct llama_model * llama_get_model(const struct llama_context * ctx);

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: llama_cpp
 version: !ruby/object:Gem::Version
-  version: 0.12.3
+  version: 0.12.4
 platform: ruby
 authors:
 - yoshoku
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2024-01-27 00:00:00.000000000 Z
+date: 2024-02-03 00:00:00.000000000 Z
 dependencies: []
 description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
 email:
@@ -45,6 +45,8 @@ files:
 - vendor/tmp/llama.cpp/ggml-cuda.cu
 - vendor/tmp/llama.cpp/ggml-cuda.h
 - vendor/tmp/llama.cpp/ggml-impl.h
+- vendor/tmp/llama.cpp/ggml-kompute.cpp
+- vendor/tmp/llama.cpp/ggml-kompute.h
 - vendor/tmp/llama.cpp/ggml-metal.h
 - vendor/tmp/llama.cpp/ggml-metal.m
 - vendor/tmp/llama.cpp/ggml-metal.metal
@@ -54,6 +56,11 @@ files:
 - vendor/tmp/llama.cpp/ggml-opencl.h
 - vendor/tmp/llama.cpp/ggml-quants.c
 - vendor/tmp/llama.cpp/ggml-quants.h
+- vendor/tmp/llama.cpp/ggml-sycl.cpp
+- vendor/tmp/llama.cpp/ggml-sycl.h
+- vendor/tmp/llama.cpp/ggml-vulkan-shaders.hpp
+- vendor/tmp/llama.cpp/ggml-vulkan.cpp
+- vendor/tmp/llama.cpp/ggml-vulkan.h
 - vendor/tmp/llama.cpp/ggml.c
 - vendor/tmp/llama.cpp/ggml.h
 - vendor/tmp/llama.cpp/llama.cpp