llama_cpp 0.2.0 → 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -24,11 +24,14 @@ void ggml_cuda_mul_mat(const struct ggml_tensor * src0, const struct ggml_tens
24
24
  void * ggml_cuda_host_malloc(size_t size);
25
25
  void ggml_cuda_host_free(void * ptr);
26
26
 
27
- void ggml_cuda_load_data(const char * fname, struct ggml_tensor * tensors, size_t offset);
27
+ void ggml_cuda_transform_tensor(void * data, struct ggml_tensor * tensor);
28
+
28
29
  void ggml_cuda_free_data(struct ggml_tensor * tensor);
29
30
  void ggml_cuda_assign_buffers(struct ggml_tensor * tensor);
31
+ void ggml_cuda_assign_buffers_no_scratch(struct ggml_tensor * tensor);
30
32
  void ggml_cuda_set_main_device(int main_device);
31
33
  void ggml_cuda_set_scratch_size(size_t scratch_size);
34
+ void ggml_cuda_free_scratch(void);
32
35
  bool ggml_cuda_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor);
33
36
 
34
37
  #ifdef __cplusplus
@@ -41,12 +41,15 @@ void ggml_metal_free(struct ggml_metal_context * ctx);
41
41
  // - make sure to map all buffers used in the graph before calling ggml_metal_graph_compute
42
42
  // - the mapping is used during computation to determine the arguments of the compute kernels
43
43
  // - you don't need to keep the host memory buffer allocated as it is never accessed by Metal
44
+ // - max_size specifies the maximum size of a tensor and is used to create shared views such
45
+ // that it is guaranteed that the tensor will fit in at least one of the views
44
46
  //
45
47
  bool ggml_metal_add_buffer(
46
48
  struct ggml_metal_context * ctx,
47
49
  const char * name,
48
50
  void * data,
49
- size_t size);
51
+ size_t size,
52
+ size_t max_size);
50
53
 
51
54
  // set data from host memory into the device
52
55
  void ggml_metal_set_tensor(struct ggml_metal_context * ctx, struct ggml_tensor * t);
@@ -55,6 +58,7 @@ void ggml_metal_set_tensor(struct ggml_metal_context * ctx, struct ggml_tensor *
55
58
  void ggml_metal_get_tensor(struct ggml_metal_context * ctx, struct ggml_tensor * t);
56
59
 
57
60
  // same as ggml_graph_compute but uses Metal
61
+ // creates gf->n_threads command buffers in parallel
58
62
  void ggml_metal_graph_compute(struct ggml_metal_context * ctx, struct ggml_cgraph * gf);
59
63
 
60
64
  #ifdef __cplusplus