llama_cpp 0.3.0 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,10 +8,6 @@ extern "C" {
8
8
 
9
9
  #define GGML_CUDA_MAX_DEVICES 16
10
10
 
11
- struct ggml_tensor_extra_gpu {
12
- void * data_device[GGML_CUDA_MAX_DEVICES]; // 1 pointer for each device for split tensors
13
- };
14
-
15
11
  void ggml_init_cublas(void);
16
12
  void ggml_cuda_set_tensor_split(const float * tensor_split);
17
13
 
@@ -29,6 +25,7 @@ void ggml_cuda_transform_tensor(void * data, struct ggml_tensor * tensor);
29
25
  void ggml_cuda_free_data(struct ggml_tensor * tensor);
30
26
  void ggml_cuda_assign_buffers(struct ggml_tensor * tensor);
31
27
  void ggml_cuda_assign_buffers_no_scratch(struct ggml_tensor * tensor);
28
+ void ggml_cuda_assign_buffers_force_inplace(struct ggml_tensor * tensor);
32
29
  void ggml_cuda_set_main_device(int main_device);
33
30
  void ggml_cuda_set_scratch_size(size_t scratch_size);
34
31
  void ggml_cuda_free_scratch(void);
@@ -202,7 +202,9 @@ struct ggml_metal_context * ggml_metal_init(void) {
202
202
 
203
203
  void ggml_metal_free(struct ggml_metal_context * ctx) {
204
204
  fprintf(stderr, "%s: deallocating\n", __func__);
205
-
205
+ for (int i = 0; i < ctx->n_buffers; ++i) {
206
+ [ctx->buffers[i].metal release];
207
+ }
206
208
  free(ctx);
207
209
  }
208
210