llama_cpp 0.12.5 → 0.12.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -83,8 +83,9 @@ extern "C" {
83
83
 
84
84
  GGML_API ggml_backend_t ggml_backend_cpu_init(void);
85
85
 
86
- GGML_API GGML_CALL bool ggml_backend_is_cpu (ggml_backend_t backend);
87
- GGML_API void ggml_backend_cpu_set_n_threads(ggml_backend_t backend_cpu, int n_threads);
86
+ GGML_API GGML_CALL bool ggml_backend_is_cpu (ggml_backend_t backend);
87
+ GGML_API void ggml_backend_cpu_set_n_threads (ggml_backend_t backend_cpu, int n_threads);
88
+ GGML_API void ggml_backend_cpu_set_abort_callback(ggml_backend_t backend_cpu, ggml_abort_callback abort_callback, void * abort_callback_data);
88
89
 
89
90
  // Create a backend buffer from an existing pointer
90
91
  GGML_API GGML_CALL ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size);
@@ -129,11 +130,7 @@ extern "C" {
129
130
 
130
131
  // in build_graph:
131
132
  build_graph(...) {
132
- // allocating tensors in a specific backend (optional, recommended: pre-allocate inputs in a different buffer)
133
- alloc_cpu = ggml_backend_sched_get_allocr(sched, backend_cpu);
134
- ggml_allocr_alloc(alloc_cpu, tensor);
135
-
136
- // manually assigning nodes to a backend (optional, shouldn't be needed in most cases)
133
+ // manually assign nodes to a backend (optional, should not be needed in most cases)
137
134
  struct ggml_tensor * node = ggml_mul_mat(ctx, ...);
138
135
  ggml_backend_sched_set_node_backend(sched, node, backend_gpu);
139
136
  }
@@ -163,20 +160,19 @@ extern "C" {
163
160
  GGML_API ggml_backend_sched_t ggml_backend_sched_new(ggml_backend_t * backends, ggml_backend_buffer_type_t * bufts, int n_backends, size_t graph_size);
164
161
  GGML_API void ggml_backend_sched_free(ggml_backend_sched_t sched);
165
162
  // Initialize backend buffers from a measure graph
166
- GGML_API void ggml_backend_sched_init_measure(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph);
163
+ GGML_API bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph);
167
164
  // Get the number of splits of the last graph
168
165
  GGML_API int ggml_backend_sched_get_n_splits(ggml_backend_sched_t sched);
169
166
 
170
- GGML_API ggml_tallocr_t ggml_backend_sched_get_tallocr(ggml_backend_sched_t sched, ggml_backend_t backend);
171
- GGML_API ggml_backend_buffer_t ggml_backend_sched_get_buffer (ggml_backend_sched_t sched, ggml_backend_t backend);
167
+ GGML_API size_t ggml_backend_sched_get_buffer_size(ggml_backend_sched_t sched, ggml_backend_t backend);
172
168
 
173
169
  GGML_API void ggml_backend_sched_set_node_backend(ggml_backend_sched_t sched, struct ggml_tensor * node, ggml_backend_t backend);
174
170
  GGML_API ggml_backend_t ggml_backend_sched_get_node_backend(ggml_backend_sched_t sched, struct ggml_tensor * node);
175
171
 
176
172
  // Allocate and compute graph on the backend scheduler
177
- GGML_API void ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, struct ggml_cgraph * graph);
173
+ GGML_API bool ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, struct ggml_cgraph * graph);
178
174
 
179
- // Reset all assignments and allocators - must be called before using the sched allocators to allocate inputs
175
+ // Reset all assignments and allocators - must be called before changing the node backends
180
176
  GGML_API void ggml_backend_sched_reset(ggml_backend_sched_t sched);
181
177
 
182
178
  // Set a callback to be called for each resulting node during graph compute
@@ -687,6 +687,7 @@ static bool ggml_metal_graph_compute(
687
687
  struct ggml_metal_context * ctx,
688
688
  struct ggml_cgraph * gf) {
689
689
 
690
+ @autoreleasepool {
690
691
  MTLComputePassDescriptor * edesc = MTLComputePassDescriptor.computePassDescriptor;
691
692
  edesc.dispatchType = MTLDispatchTypeSerial;
692
693
 
@@ -2272,6 +2273,7 @@ static bool ggml_metal_graph_compute(
2272
2273
  [[MTLCaptureManager sharedCaptureManager] stopCapture];
2273
2274
  }
2274
2275
 
2276
+ }
2275
2277
  return true;
2276
2278
  }
2277
2279