RubyGems - llama_cpp - Versions diffs - 0.12.5 → 0.12.6 - Mend

llama_cpp 0.12.5 → 0.12.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +7 -0
data/ext/llama_cpp/llama_cpp.cpp +46 -0
data/lib/llama_cpp/version.rb +2 -2
data/sig/llama_cpp.rbs +7 -0
data/vendor/tmp/llama.cpp/Makefile +9 -1
data/vendor/tmp/llama.cpp/ggml-alloc.c +563 -490
data/vendor/tmp/llama.cpp/ggml-alloc.h +39 -65
data/vendor/tmp/llama.cpp/ggml-backend.c +250 -262
data/vendor/tmp/llama.cpp/ggml-backend.h +8 -12
data/vendor/tmp/llama.cpp/ggml-metal.m +2 -0
data/vendor/tmp/llama.cpp/ggml-quants.c +347 -40
data/vendor/tmp/llama.cpp/ggml-quants.h +14 -14
data/vendor/tmp/llama.cpp/ggml-sycl.cpp +14 -61
data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +89 -6
data/vendor/tmp/llama.cpp/ggml.c +134 -60
data/vendor/tmp/llama.cpp/ggml.h +26 -6
data/vendor/tmp/llama.cpp/llama.cpp +654 -130
data/vendor/tmp/llama.cpp/llama.h +6 -0
data/vendor/tmp/llama.cpp/unicode.h +42 -30
metadata +2 -2

data/vendor/tmp/llama.cpp/ggml-backend.h CHANGED Viewed

@@ -83,8 +83,9 @@ extern "C" {
     GGML_API ggml_backend_t ggml_backend_cpu_init(void);
-    GGML_API GGML_CALL bool ggml_backend_is_cpu           (ggml_backend_t backend);
-    GGML_API           void ggml_backend_cpu_set_n_threads(ggml_backend_t backend_cpu, int n_threads);
+    GGML_API GGML_CALL bool ggml_backend_is_cpu                (ggml_backend_t backend);
+    GGML_API           void ggml_backend_cpu_set_n_threads     (ggml_backend_t backend_cpu, int n_threads);
+    GGML_API           void ggml_backend_cpu_set_abort_callback(ggml_backend_t backend_cpu, ggml_abort_callback abort_callback, void * abort_callback_data);
     // Create a backend buffer from an existing pointer
     GGML_API GGML_CALL ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size);
@@ -129,11 +130,7 @@ extern "C" {
         // in build_graph:
         build_graph(...) {
-            // allocating tensors in a specific backend (optional, recommended: pre-allocate inputs in a different buffer)
-            alloc_cpu = ggml_backend_sched_get_allocr(sched, backend_cpu);
-            ggml_allocr_alloc(alloc_cpu, tensor);
-            // manually assigning nodes to a backend (optional, shouldn't be needed in most cases)
+            // manually assign nodes to a backend (optional, should not be needed in most cases)
             struct ggml_tensor * node = ggml_mul_mat(ctx, ...);
             ggml_backend_sched_set_node_backend(sched, node, backend_gpu);
         }
@@ -163,20 +160,19 @@ extern "C" {
     GGML_API ggml_backend_sched_t  ggml_backend_sched_new(ggml_backend_t * backends, ggml_backend_buffer_type_t * bufts, int n_backends, size_t graph_size);
     GGML_API void                  ggml_backend_sched_free(ggml_backend_sched_t sched);
     // Initialize backend buffers from a measure graph
-    GGML_API void                  ggml_backend_sched_init_measure(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph);
+    GGML_API bool                  ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph);
     // Get the number of splits of the last graph
     GGML_API int                   ggml_backend_sched_get_n_splits(ggml_backend_sched_t sched);
-    GGML_API ggml_tallocr_t        ggml_backend_sched_get_tallocr(ggml_backend_sched_t sched, ggml_backend_t backend);
-    GGML_API ggml_backend_buffer_t ggml_backend_sched_get_buffer (ggml_backend_sched_t sched, ggml_backend_t backend);
+    GGML_API size_t                ggml_backend_sched_get_buffer_size(ggml_backend_sched_t sched, ggml_backend_t backend);
     GGML_API void                  ggml_backend_sched_set_node_backend(ggml_backend_sched_t sched, struct ggml_tensor * node, ggml_backend_t backend);
     GGML_API ggml_backend_t        ggml_backend_sched_get_node_backend(ggml_backend_sched_t sched, struct ggml_tensor * node);
     // Allocate and compute graph on the backend scheduler
-    GGML_API void                  ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, struct ggml_cgraph * graph);
+    GGML_API bool                  ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, struct ggml_cgraph * graph);
-    // Reset all assignments and allocators - must be called before using the sched allocators to allocate inputs
+    // Reset all assignments and allocators - must be called before changing the node backends
     GGML_API void                  ggml_backend_sched_reset(ggml_backend_sched_t sched);
     // Set a callback to be called for each resulting node during graph compute

data/vendor/tmp/llama.cpp/ggml-metal.m CHANGED Viewed

@@ -687,6 +687,7 @@ static bool ggml_metal_graph_compute(
         struct ggml_metal_context * ctx,
                struct ggml_cgraph * gf) {
+    @autoreleasepool {
     MTLComputePassDescriptor * edesc = MTLComputePassDescriptor.computePassDescriptor;
     edesc.dispatchType = MTLDispatchTypeSerial;
@@ -2272,6 +2273,7 @@ static bool ggml_metal_graph_compute(
         [[MTLCaptureManager sharedCaptureManager] stopCapture];
     }
+    }
     return true;
 }