llama_cpp 0.2.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -24,11 +24,14 @@ void ggml_cuda_mul_mat(const struct ggml_tensor * src0, const struct ggml_tens
24
24
  void * ggml_cuda_host_malloc(size_t size);
25
25
  void ggml_cuda_host_free(void * ptr);
26
26
 
27
- void ggml_cuda_load_data(const char * fname, struct ggml_tensor * tensors, size_t offset);
27
+ void ggml_cuda_transform_tensor(void * data, struct ggml_tensor * tensor);
28
+
28
29
  void ggml_cuda_free_data(struct ggml_tensor * tensor);
29
30
  void ggml_cuda_assign_buffers(struct ggml_tensor * tensor);
31
+ void ggml_cuda_assign_buffers_no_scratch(struct ggml_tensor * tensor);
30
32
  void ggml_cuda_set_main_device(int main_device);
31
33
  void ggml_cuda_set_scratch_size(size_t scratch_size);
34
+ void ggml_cuda_free_scratch(void);
32
35
  bool ggml_cuda_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor);
33
36
 
34
37
  #ifdef __cplusplus
@@ -55,6 +55,7 @@ void ggml_metal_set_tensor(struct ggml_metal_context * ctx, struct ggml_tensor *
55
55
  void ggml_metal_get_tensor(struct ggml_metal_context * ctx, struct ggml_tensor * t);
56
56
 
57
57
  // same as ggml_graph_compute but uses Metal
58
+ // creates gf->n_threads command buffers in parallel
58
59
  void ggml_metal_graph_compute(struct ggml_metal_context * ctx, struct ggml_cgraph * gf);
59
60
 
60
61
  #ifdef __cplusplus