RubyGems - llama_cpp - Versions diffs - 0.14.2 → 0.14.4 - Mend

llama_cpp 0.14.2 → 0.14.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +14 -0
data/ext/llama_cpp/llama_cpp.cpp +64 -0
data/lib/llama_cpp/version.rb +2 -2
data/sig/llama_cpp.rbs +6 -0
data/vendor/tmp/llama.cpp/Makefile +91 -21
data/vendor/tmp/llama.cpp/ggml-alloc.c +14 -5
data/vendor/tmp/llama.cpp/ggml-backend-impl.h +5 -0
data/vendor/tmp/llama.cpp/ggml-backend.c +155 -125
data/vendor/tmp/llama.cpp/ggml-backend.h +4 -4
data/vendor/tmp/llama.cpp/ggml-common.h +25 -2
data/vendor/tmp/llama.cpp/ggml-cuda.cu +1779 -10762
data/vendor/tmp/llama.cpp/ggml-cuda.h +6 -15
data/vendor/tmp/llama.cpp/ggml-kompute.cpp +5 -0
data/vendor/tmp/llama.cpp/ggml-metal.m +167 -124
data/vendor/tmp/llama.cpp/ggml-metal.metal +603 -303
data/vendor/tmp/llama.cpp/ggml-opencl.cpp +5 -0
data/vendor/tmp/llama.cpp/ggml-quants.c +663 -56
data/vendor/tmp/llama.cpp/ggml-quants.h +3 -0
data/vendor/tmp/llama.cpp/ggml-sycl.cpp +341 -469
data/vendor/tmp/llama.cpp/ggml-sycl.h +19 -4
data/vendor/tmp/llama.cpp/ggml-vulkan-shaders.hpp +37199 -14939
data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +335 -307
data/vendor/tmp/llama.cpp/ggml-vulkan.h +0 -11
data/vendor/tmp/llama.cpp/ggml.c +229 -107
data/vendor/tmp/llama.cpp/ggml.h +11 -5
data/vendor/tmp/llama.cpp/llama.cpp +2136 -464
data/vendor/tmp/llama.cpp/llama.h +86 -23
data/vendor/tmp/llama.cpp/unicode-data.cpp +1651 -0
data/vendor/tmp/llama.cpp/unicode-data.h +16 -0
data/vendor/tmp/llama.cpp/unicode.cpp +8 -1403
data/vendor/tmp/llama.cpp/unicode.h +2 -0
metadata +5 -3

data/vendor/tmp/llama.cpp/ggml.h CHANGED Viewed

@@ -214,9 +214,10 @@
 #    define GGML_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
 #endif
-#include <stdint.h>
-#include <stddef.h>
 #include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
 #define GGML_FILE_MAGIC   0x67676d6c // "ggml"
 #define GGML_FILE_VERSION 1
@@ -368,6 +369,7 @@ extern "C" {
         GGML_TYPE_I32     = 26,
         GGML_TYPE_I64     = 27,
         GGML_TYPE_F64     = 28,
+        GGML_TYPE_IQ1_M   = 29,
         GGML_TYPE_COUNT,
     };
@@ -407,6 +409,7 @@ extern "C" {
         GGML_FTYPE_MOSTLY_IQ3_S   = 20, // except 1d tensors
         GGML_FTYPE_MOSTLY_IQ2_S   = 21, // except 1d tensors
         GGML_FTYPE_MOSTLY_IQ4_XS  = 22, // except 1d tensors
+        GGML_FTYPE_MOSTLY_IQ1_M   = 23, // except 1d tensors
     };
     // available tensor operations:
@@ -708,6 +711,9 @@ extern "C" {
     GGML_API void    ggml_print_backtrace(void);
+    // accepts a UTF-8 path, even on Windows
+    GGML_API FILE *  ggml_fopen(const char * fname, const char * mode);
     GGML_API void    ggml_numa_init(enum ggml_numa_strategy numa); // call once for better performance on NUMA systems
     GGML_API bool    ggml_is_numa(void); // true if init detected that system has >1 NUMA node
@@ -744,6 +750,7 @@ extern "C" {
     GGML_API GGML_CALL bool ggml_is_transposed(const struct ggml_tensor * tensor);
     GGML_API GGML_CALL bool ggml_is_contiguous(const struct ggml_tensor * tensor);
     GGML_API GGML_CALL bool ggml_is_permuted  (const struct ggml_tensor * tensor);
+    GGML_API GGML_CALL bool ggml_is_empty     (const struct ggml_tensor * tensor);
     GGML_API           bool ggml_is_scalar    (const struct ggml_tensor * tensor);
     GGML_API           bool ggml_is_vector    (const struct ggml_tensor * tensor);
     GGML_API           bool ggml_is_matrix    (const struct ggml_tensor * tensor);
@@ -1157,8 +1164,7 @@ extern "C" {
     //  ggml_mul_mat_id(ctx, as, ids, id, b) ~= ggml_mul_mat(as[ids[id]], b)
     GGML_API struct ggml_tensor * ggml_mul_mat_id(
             struct ggml_context * ctx,
-            struct ggml_tensor  * const as[],
-            int                   n_as,
+            struct ggml_tensor  * as,
             struct ggml_tensor  * ids,
             int                   id,
             struct ggml_tensor  * b);
@@ -2350,7 +2356,7 @@ extern "C" {
     GGML_API int ggml_cpu_has_fp16_va    (void);
     GGML_API int ggml_cpu_has_wasm_simd  (void);
     GGML_API int ggml_cpu_has_blas       (void);
-    GGML_API int ggml_cpu_has_cublas     (void);
+    GGML_API int ggml_cpu_has_cuda       (void);
     GGML_API int ggml_cpu_has_clblast    (void);
     GGML_API int ggml_cpu_has_vulkan     (void);
     GGML_API int ggml_cpu_has_kompute    (void);