RubyGems - llama_cpp - Versions diffs - 0.12.4 → 0.12.6 - Mend

llama_cpp 0.12.4 → 0.12.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +11 -0
data/ext/llama_cpp/llama_cpp.cpp +46 -0
data/lib/llama_cpp/version.rb +2 -2
data/sig/llama_cpp.rbs +7 -0
data/vendor/tmp/llama.cpp/Makefile +146 -53
data/vendor/tmp/llama.cpp/ggml-alloc.c +563 -490
data/vendor/tmp/llama.cpp/ggml-alloc.h +39 -65
data/vendor/tmp/llama.cpp/ggml-backend.c +250 -262
data/vendor/tmp/llama.cpp/ggml-backend.h +8 -12
data/vendor/tmp/llama.cpp/ggml-cuda.cu +688 -270
data/vendor/tmp/llama.cpp/ggml-impl.h +2 -0
data/vendor/tmp/llama.cpp/ggml-metal.m +2 -0
data/vendor/tmp/llama.cpp/ggml-quants.c +386 -134
data/vendor/tmp/llama.cpp/ggml-quants.h +68 -59
data/vendor/tmp/llama.cpp/ggml-sycl.cpp +139 -145
data/vendor/tmp/llama.cpp/ggml-vulkan-shaders.hpp +1516 -10656
data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +1777 -1238
data/vendor/tmp/llama.cpp/ggml-vulkan.h +14 -9
data/vendor/tmp/llama.cpp/ggml.c +147 -70
data/vendor/tmp/llama.cpp/ggml.h +26 -6
data/vendor/tmp/llama.cpp/llama.cpp +920 -173
data/vendor/tmp/llama.cpp/llama.h +7 -1
data/vendor/tmp/llama.cpp/unicode.h +42 -30
metadata +2 -2

data/vendor/tmp/llama.cpp/ggml.h CHANGED Viewed

@@ -505,11 +505,17 @@ extern "C" {
     enum ggml_log_level {
         GGML_LOG_LEVEL_ERROR = 2,
-        GGML_LOG_LEVEL_WARN = 3,
-        GGML_LOG_LEVEL_INFO = 4,
+        GGML_LOG_LEVEL_WARN  = 3,
+        GGML_LOG_LEVEL_INFO  = 4,
         GGML_LOG_LEVEL_DEBUG = 5
     };
+    enum ggml_tensor_flag {
+        GGML_TENSOR_FLAG_INPUT  = 1,
+        GGML_TENSOR_FLAG_OUTPUT = 2,
+        GGML_TENSOR_FLAG_PARAM  = 4,
+    };
     // ggml object
     struct ggml_object {
         size_t offs;
@@ -543,7 +549,7 @@ extern "C" {
         // op params - allocated as int32_t for alignment
         int32_t op_params[GGML_MAX_OP_PARAMS / sizeof(int32_t)];
-        bool is_param;
+        int32_t flags;
         struct ggml_tensor * grad;
         struct ggml_tensor * src[GGML_MAX_SRC];
@@ -567,6 +573,11 @@ extern "C" {
     static const size_t GGML_TENSOR_SIZE = sizeof(struct ggml_tensor);
+    // Abort callback
+    // If not NULL, called before ggml computation
+    // If it returns true, the computation is aborted
+    typedef bool (*ggml_abort_callback)(void * data);
     // the compute plan that needs to be prepared for ggml_graph_compute()
     // since https://github.com/ggerganov/ggml/issues/287
     struct ggml_cplan {
@@ -576,8 +587,8 @@ extern "C" {
         int n_threads;
         // abort ggml_graph_compute when true
-        bool (*abort_callback)(void * data);
-        void * abort_callback_data;
+        ggml_abort_callback abort_callback;
+        void *              abort_callback_data;
     };
     enum ggml_cgraph_eval_order {
@@ -2087,6 +2098,12 @@ extern "C" {
             ggml_opt_callback callback,
             void * callback_data);
+    //
+    // tensor flags
+    //
+    GGML_API void ggml_set_input(struct ggml_tensor * tensor);
+    GGML_API void ggml_set_output(struct ggml_tensor * tensor);
     //
     // quantization
     //
@@ -2273,6 +2290,7 @@ extern "C" {
     GGML_API int ggml_cpu_has_ssse3      (void);
     GGML_API int ggml_cpu_has_sycl       (void);
     GGML_API int ggml_cpu_has_vsx        (void);
+    GGML_API int ggml_cpu_has_matmul_int8(void);
     //
     // Internal types and functions exposed for tests and benchmarks
@@ -2286,7 +2304,8 @@ extern "C" {
 #endif
     typedef void (*ggml_to_float_t)  (const void  * GGML_RESTRICT x, float * GGML_RESTRICT y, int k);
     typedef void (*ggml_from_float_t)(const float * GGML_RESTRICT x, void  * GGML_RESTRICT y, int k);
-    typedef void (*ggml_vec_dot_t)   (const int n, float * GGML_RESTRICT s, const void * GGML_RESTRICT x, const void * GGML_RESTRICT y);
+    typedef void (*ggml_vec_dot_t)   (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x, size_t bx,
+                                      const void * GGML_RESTRICT y, size_t by, int nrc);
     typedef struct {
         const char      * type_name;
@@ -2298,6 +2317,7 @@ extern "C" {
         ggml_from_float_t from_float_reference;
         ggml_vec_dot_t    vec_dot;
         enum ggml_type    vec_dot_type;
+        int64_t           nrows; // number of rows to process simultaneously;
     } ggml_type_traits_t;
     GGML_API ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type type);