llama_cpp 0.12.5 → 0.12.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/ext/llama_cpp/llama_cpp.cpp +46 -0
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +7 -0
- data/vendor/tmp/llama.cpp/Makefile +9 -1
- data/vendor/tmp/llama.cpp/ggml-alloc.c +563 -490
- data/vendor/tmp/llama.cpp/ggml-alloc.h +39 -65
- data/vendor/tmp/llama.cpp/ggml-backend.c +250 -262
- data/vendor/tmp/llama.cpp/ggml-backend.h +8 -12
- data/vendor/tmp/llama.cpp/ggml-metal.m +2 -0
- data/vendor/tmp/llama.cpp/ggml-quants.c +347 -40
- data/vendor/tmp/llama.cpp/ggml-quants.h +14 -14
- data/vendor/tmp/llama.cpp/ggml-sycl.cpp +14 -61
- data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +89 -6
- data/vendor/tmp/llama.cpp/ggml.c +134 -60
- data/vendor/tmp/llama.cpp/ggml.h +26 -6
- data/vendor/tmp/llama.cpp/llama.cpp +654 -130
- data/vendor/tmp/llama.cpp/llama.h +6 -0
- data/vendor/tmp/llama.cpp/unicode.h +42 -30
- metadata +2 -2
@@ -83,8 +83,9 @@ extern "C" {
|
|
83
83
|
|
84
84
|
GGML_API ggml_backend_t ggml_backend_cpu_init(void);
|
85
85
|
|
86
|
-
GGML_API GGML_CALL bool ggml_backend_is_cpu
|
87
|
-
GGML_API void ggml_backend_cpu_set_n_threads(ggml_backend_t backend_cpu, int n_threads);
|
86
|
+
GGML_API GGML_CALL bool ggml_backend_is_cpu (ggml_backend_t backend);
|
87
|
+
GGML_API void ggml_backend_cpu_set_n_threads (ggml_backend_t backend_cpu, int n_threads);
|
88
|
+
GGML_API void ggml_backend_cpu_set_abort_callback(ggml_backend_t backend_cpu, ggml_abort_callback abort_callback, void * abort_callback_data);
|
88
89
|
|
89
90
|
// Create a backend buffer from an existing pointer
|
90
91
|
GGML_API GGML_CALL ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size);
|
@@ -129,11 +130,7 @@ extern "C" {
|
|
129
130
|
|
130
131
|
// in build_graph:
|
131
132
|
build_graph(...) {
|
132
|
-
//
|
133
|
-
alloc_cpu = ggml_backend_sched_get_allocr(sched, backend_cpu);
|
134
|
-
ggml_allocr_alloc(alloc_cpu, tensor);
|
135
|
-
|
136
|
-
// manually assigning nodes to a backend (optional, shouldn't be needed in most cases)
|
133
|
+
// manually assign nodes to a backend (optional, should not be needed in most cases)
|
137
134
|
struct ggml_tensor * node = ggml_mul_mat(ctx, ...);
|
138
135
|
ggml_backend_sched_set_node_backend(sched, node, backend_gpu);
|
139
136
|
}
|
@@ -163,20 +160,19 @@ extern "C" {
|
|
163
160
|
GGML_API ggml_backend_sched_t ggml_backend_sched_new(ggml_backend_t * backends, ggml_backend_buffer_type_t * bufts, int n_backends, size_t graph_size);
|
164
161
|
GGML_API void ggml_backend_sched_free(ggml_backend_sched_t sched);
|
165
162
|
// Initialize backend buffers from a measure graph
|
166
|
-
GGML_API
|
163
|
+
GGML_API bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph);
|
167
164
|
// Get the number of splits of the last graph
|
168
165
|
GGML_API int ggml_backend_sched_get_n_splits(ggml_backend_sched_t sched);
|
169
166
|
|
170
|
-
GGML_API
|
171
|
-
GGML_API ggml_backend_buffer_t ggml_backend_sched_get_buffer (ggml_backend_sched_t sched, ggml_backend_t backend);
|
167
|
+
GGML_API size_t ggml_backend_sched_get_buffer_size(ggml_backend_sched_t sched, ggml_backend_t backend);
|
172
168
|
|
173
169
|
GGML_API void ggml_backend_sched_set_node_backend(ggml_backend_sched_t sched, struct ggml_tensor * node, ggml_backend_t backend);
|
174
170
|
GGML_API ggml_backend_t ggml_backend_sched_get_node_backend(ggml_backend_sched_t sched, struct ggml_tensor * node);
|
175
171
|
|
176
172
|
// Allocate and compute graph on the backend scheduler
|
177
|
-
GGML_API
|
173
|
+
GGML_API bool ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, struct ggml_cgraph * graph);
|
178
174
|
|
179
|
-
// Reset all assignments and allocators - must be called before
|
175
|
+
// Reset all assignments and allocators - must be called before changing the node backends
|
180
176
|
GGML_API void ggml_backend_sched_reset(ggml_backend_sched_t sched);
|
181
177
|
|
182
178
|
// Set a callback to be called for each resulting node during graph compute
|
@@ -687,6 +687,7 @@ static bool ggml_metal_graph_compute(
|
|
687
687
|
struct ggml_metal_context * ctx,
|
688
688
|
struct ggml_cgraph * gf) {
|
689
689
|
|
690
|
+
@autoreleasepool {
|
690
691
|
MTLComputePassDescriptor * edesc = MTLComputePassDescriptor.computePassDescriptor;
|
691
692
|
edesc.dispatchType = MTLDispatchTypeSerial;
|
692
693
|
|
@@ -2272,6 +2273,7 @@ static bool ggml_metal_graph_compute(
|
|
2272
2273
|
[[MTLCaptureManager sharedCaptureManager] stopCapture];
|
2273
2274
|
}
|
2274
2275
|
|
2276
|
+
}
|
2275
2277
|
return true;
|
2276
2278
|
}
|
2277
2279
|
|