llama_cpp 0.12.5 → 0.12.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/ext/llama_cpp/llama_cpp.cpp +46 -0
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +7 -0
- data/vendor/tmp/llama.cpp/Makefile +9 -1
- data/vendor/tmp/llama.cpp/ggml-alloc.c +563 -490
- data/vendor/tmp/llama.cpp/ggml-alloc.h +39 -65
- data/vendor/tmp/llama.cpp/ggml-backend.c +250 -262
- data/vendor/tmp/llama.cpp/ggml-backend.h +8 -12
- data/vendor/tmp/llama.cpp/ggml-metal.m +2 -0
- data/vendor/tmp/llama.cpp/ggml-quants.c +347 -40
- data/vendor/tmp/llama.cpp/ggml-quants.h +14 -14
- data/vendor/tmp/llama.cpp/ggml-sycl.cpp +14 -61
- data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +89 -6
- data/vendor/tmp/llama.cpp/ggml.c +134 -60
- data/vendor/tmp/llama.cpp/ggml.h +26 -6
- data/vendor/tmp/llama.cpp/llama.cpp +654 -130
- data/vendor/tmp/llama.cpp/llama.h +6 -0
- data/vendor/tmp/llama.cpp/unicode.h +42 -30
- metadata +2 -2
@@ -83,8 +83,9 @@ extern "C" {
|
|
83
83
|
|
84
84
|
GGML_API ggml_backend_t ggml_backend_cpu_init(void);
|
85
85
|
|
86
|
-
GGML_API GGML_CALL bool ggml_backend_is_cpu
|
87
|
-
GGML_API void ggml_backend_cpu_set_n_threads(ggml_backend_t backend_cpu, int n_threads);
|
86
|
+
GGML_API GGML_CALL bool ggml_backend_is_cpu (ggml_backend_t backend);
|
87
|
+
GGML_API void ggml_backend_cpu_set_n_threads (ggml_backend_t backend_cpu, int n_threads);
|
88
|
+
GGML_API void ggml_backend_cpu_set_abort_callback(ggml_backend_t backend_cpu, ggml_abort_callback abort_callback, void * abort_callback_data);
|
88
89
|
|
89
90
|
// Create a backend buffer from an existing pointer
|
90
91
|
GGML_API GGML_CALL ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size);
|
@@ -129,11 +130,7 @@ extern "C" {
|
|
129
130
|
|
130
131
|
// in build_graph:
|
131
132
|
build_graph(...) {
|
132
|
-
//
|
133
|
-
alloc_cpu = ggml_backend_sched_get_allocr(sched, backend_cpu);
|
134
|
-
ggml_allocr_alloc(alloc_cpu, tensor);
|
135
|
-
|
136
|
-
// manually assigning nodes to a backend (optional, shouldn't be needed in most cases)
|
133
|
+
// manually assign nodes to a backend (optional, should not be needed in most cases)
|
137
134
|
struct ggml_tensor * node = ggml_mul_mat(ctx, ...);
|
138
135
|
ggml_backend_sched_set_node_backend(sched, node, backend_gpu);
|
139
136
|
}
|
@@ -163,20 +160,19 @@ extern "C" {
|
|
163
160
|
GGML_API ggml_backend_sched_t ggml_backend_sched_new(ggml_backend_t * backends, ggml_backend_buffer_type_t * bufts, int n_backends, size_t graph_size);
|
164
161
|
GGML_API void ggml_backend_sched_free(ggml_backend_sched_t sched);
|
165
162
|
// Initialize backend buffers from a measure graph
|
166
|
-
GGML_API
|
163
|
+
GGML_API bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph);
|
167
164
|
// Get the number of splits of the last graph
|
168
165
|
GGML_API int ggml_backend_sched_get_n_splits(ggml_backend_sched_t sched);
|
169
166
|
|
170
|
-
GGML_API
|
171
|
-
GGML_API ggml_backend_buffer_t ggml_backend_sched_get_buffer (ggml_backend_sched_t sched, ggml_backend_t backend);
|
167
|
+
GGML_API size_t ggml_backend_sched_get_buffer_size(ggml_backend_sched_t sched, ggml_backend_t backend);
|
172
168
|
|
173
169
|
GGML_API void ggml_backend_sched_set_node_backend(ggml_backend_sched_t sched, struct ggml_tensor * node, ggml_backend_t backend);
|
174
170
|
GGML_API ggml_backend_t ggml_backend_sched_get_node_backend(ggml_backend_sched_t sched, struct ggml_tensor * node);
|
175
171
|
|
176
172
|
// Allocate and compute graph on the backend scheduler
|
177
|
-
GGML_API
|
173
|
+
GGML_API bool ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, struct ggml_cgraph * graph);
|
178
174
|
|
179
|
-
// Reset all assignments and allocators - must be called before
|
175
|
+
// Reset all assignments and allocators - must be called before changing the node backends
|
180
176
|
GGML_API void ggml_backend_sched_reset(ggml_backend_sched_t sched);
|
181
177
|
|
182
178
|
// Set a callback to be called for each resulting node during graph compute
|
@@ -687,6 +687,7 @@ static bool ggml_metal_graph_compute(
|
|
687
687
|
struct ggml_metal_context * ctx,
|
688
688
|
struct ggml_cgraph * gf) {
|
689
689
|
|
690
|
+
@autoreleasepool {
|
690
691
|
MTLComputePassDescriptor * edesc = MTLComputePassDescriptor.computePassDescriptor;
|
691
692
|
edesc.dispatchType = MTLDispatchTypeSerial;
|
692
693
|
|
@@ -2272,6 +2273,7 @@ static bool ggml_metal_graph_compute(
|
|
2272
2273
|
[[MTLCaptureManager sharedCaptureManager] stopCapture];
|
2273
2274
|
}
|
2274
2275
|
|
2276
|
+
}
|
2275
2277
|
return true;
|
2276
2278
|
}
|
2277
2279
|
|