llama_cpp 0.3.0 → 0.3.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -8,10 +8,6 @@ extern "C" {
8
8
 
9
9
  #define GGML_CUDA_MAX_DEVICES 16
10
10
 
11
- struct ggml_tensor_extra_gpu {
12
- void * data_device[GGML_CUDA_MAX_DEVICES]; // 1 pointer for each device for split tensors
13
- };
14
-
15
11
  void ggml_init_cublas(void);
16
12
  void ggml_cuda_set_tensor_split(const float * tensor_split);
17
13
 
@@ -29,6 +25,7 @@ void ggml_cuda_transform_tensor(void * data, struct ggml_tensor * tensor);
29
25
  void ggml_cuda_free_data(struct ggml_tensor * tensor);
30
26
  void ggml_cuda_assign_buffers(struct ggml_tensor * tensor);
31
27
  void ggml_cuda_assign_buffers_no_scratch(struct ggml_tensor * tensor);
28
+ void ggml_cuda_assign_buffers_force_inplace(struct ggml_tensor * tensor);
32
29
  void ggml_cuda_set_main_device(int main_device);
33
30
  void ggml_cuda_set_scratch_size(size_t scratch_size);
34
31
  void ggml_cuda_free_scratch(void);
@@ -202,7 +202,9 @@ struct ggml_metal_context * ggml_metal_init(void) {
202
202
 
203
203
  void ggml_metal_free(struct ggml_metal_context * ctx) {
204
204
  fprintf(stderr, "%s: deallocating\n", __func__);
205
-
205
+ for (int i = 0; i < ctx->n_buffers; ++i) {
206
+ [ctx->buffers[i].metal release];
207
+ }
206
208
  free(ctx);
207
209
  }
208
210