llama_cpp 0.12.5 → 0.12.6

Sign up to get free protection for your applications and to get access to all the features.
@@ -83,8 +83,9 @@ extern "C" {
83
83
 
84
84
  GGML_API ggml_backend_t ggml_backend_cpu_init(void);
85
85
 
86
- GGML_API GGML_CALL bool ggml_backend_is_cpu (ggml_backend_t backend);
87
- GGML_API void ggml_backend_cpu_set_n_threads(ggml_backend_t backend_cpu, int n_threads);
86
+ GGML_API GGML_CALL bool ggml_backend_is_cpu (ggml_backend_t backend);
87
+ GGML_API void ggml_backend_cpu_set_n_threads (ggml_backend_t backend_cpu, int n_threads);
88
+ GGML_API void ggml_backend_cpu_set_abort_callback(ggml_backend_t backend_cpu, ggml_abort_callback abort_callback, void * abort_callback_data);
88
89
 
89
90
  // Create a backend buffer from an existing pointer
90
91
  GGML_API GGML_CALL ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size);
@@ -129,11 +130,7 @@ extern "C" {
129
130
 
130
131
  // in build_graph:
131
132
  build_graph(...) {
132
- // allocating tensors in a specific backend (optional, recommended: pre-allocate inputs in a different buffer)
133
- alloc_cpu = ggml_backend_sched_get_allocr(sched, backend_cpu);
134
- ggml_allocr_alloc(alloc_cpu, tensor);
135
-
136
- // manually assigning nodes to a backend (optional, shouldn't be needed in most cases)
133
+ // manually assign nodes to a backend (optional, should not be needed in most cases)
137
134
  struct ggml_tensor * node = ggml_mul_mat(ctx, ...);
138
135
  ggml_backend_sched_set_node_backend(sched, node, backend_gpu);
139
136
  }
@@ -163,20 +160,19 @@ extern "C" {
163
160
  GGML_API ggml_backend_sched_t ggml_backend_sched_new(ggml_backend_t * backends, ggml_backend_buffer_type_t * bufts, int n_backends, size_t graph_size);
164
161
  GGML_API void ggml_backend_sched_free(ggml_backend_sched_t sched);
165
162
  // Initialize backend buffers from a measure graph
166
- GGML_API void ggml_backend_sched_init_measure(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph);
163
+ GGML_API bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph);
167
164
  // Get the number of splits of the last graph
168
165
  GGML_API int ggml_backend_sched_get_n_splits(ggml_backend_sched_t sched);
169
166
 
170
- GGML_API ggml_tallocr_t ggml_backend_sched_get_tallocr(ggml_backend_sched_t sched, ggml_backend_t backend);
171
- GGML_API ggml_backend_buffer_t ggml_backend_sched_get_buffer (ggml_backend_sched_t sched, ggml_backend_t backend);
167
+ GGML_API size_t ggml_backend_sched_get_buffer_size(ggml_backend_sched_t sched, ggml_backend_t backend);
172
168
 
173
169
  GGML_API void ggml_backend_sched_set_node_backend(ggml_backend_sched_t sched, struct ggml_tensor * node, ggml_backend_t backend);
174
170
  GGML_API ggml_backend_t ggml_backend_sched_get_node_backend(ggml_backend_sched_t sched, struct ggml_tensor * node);
175
171
 
176
172
  // Allocate and compute graph on the backend scheduler
177
- GGML_API void ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, struct ggml_cgraph * graph);
173
+ GGML_API bool ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, struct ggml_cgraph * graph);
178
174
 
179
- // Reset all assignments and allocators - must be called before using the sched allocators to allocate inputs
175
+ // Reset all assignments and allocators - must be called before changing the node backends
180
176
  GGML_API void ggml_backend_sched_reset(ggml_backend_sched_t sched);
181
177
 
182
178
  // Set a callback to be called for each resulting node during graph compute
@@ -687,6 +687,7 @@ static bool ggml_metal_graph_compute(
687
687
  struct ggml_metal_context * ctx,
688
688
  struct ggml_cgraph * gf) {
689
689
 
690
+ @autoreleasepool {
690
691
  MTLComputePassDescriptor * edesc = MTLComputePassDescriptor.computePassDescriptor;
691
692
  edesc.dispatchType = MTLDispatchTypeSerial;
692
693
 
@@ -2272,6 +2273,7 @@ static bool ggml_metal_graph_compute(
2272
2273
  [[MTLCaptureManager sharedCaptureManager] stopCapture];
2273
2274
  }
2274
2275
 
2276
+ }
2275
2277
  return true;
2276
2278
  }
2277
2279