llama_cpp 0.3.8 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -2,6 +2,14 @@
2
2
 
3
3
  #include "ggml.h"
4
4
 
5
+ #ifdef GGML_USE_HIPBLAS
6
+ #define GGML_CUDA_NAME "ROCm"
7
+ #define GGML_CUBLAS_NAME "hipBLAS"
8
+ #else
9
+ #define GGML_CUDA_NAME "CUDA"
10
+ #define GGML_CUBLAS_NAME "cuBLAS"
11
+ #endif
12
+
5
13
  #ifdef __cplusplus
6
14
  extern "C" {
7
15
  #endif
@@ -16,9 +24,14 @@ GGML_API bool ggml_cuda_can_mul_mat(const struct ggml_tensor * src0, const str
16
24
  GGML_API void ggml_cuda_set_tensor_split(const float * tensor_split);
17
25
  GGML_API void ggml_cuda_transform_tensor(void * data, struct ggml_tensor * tensor);
18
26
  GGML_API void ggml_cuda_free_data(struct ggml_tensor * tensor);
27
+
19
28
  GGML_API void ggml_cuda_assign_buffers(struct ggml_tensor * tensor);
20
29
  GGML_API void ggml_cuda_assign_buffers_no_scratch(struct ggml_tensor * tensor);
21
30
  GGML_API void ggml_cuda_assign_buffers_force_inplace(struct ggml_tensor * tensor);
31
+
32
+ GGML_API void ggml_cuda_assign_buffers_no_alloc(struct ggml_tensor * tensor);
33
+ GGML_API void ggml_cuda_assign_scratch_offset(struct ggml_tensor * tensor, size_t offset);
34
+
22
35
  GGML_API void ggml_cuda_set_main_device(int main_device);
23
36
  GGML_API void ggml_cuda_set_mul_mat_q(bool mul_mat_q);
24
37
  GGML_API void ggml_cuda_set_scratch_size(size_t scratch_size);
@@ -24,6 +24,7 @@
24
24
 
25
25
  // max memory buffers that can be mapped to the device
26
26
  #define GGML_METAL_MAX_BUFFERS 16
27
+ #define GGML_METAL_MAX_COMMAND_BUFFERS 32
27
28
 
28
29
  struct ggml_tensor;
29
30
  struct ggml_cgraph;
@@ -38,6 +39,9 @@ struct ggml_metal_context;
38
39
  struct ggml_metal_context * ggml_metal_init(int n_cb);
39
40
  void ggml_metal_free(struct ggml_metal_context * ctx);
40
41
 
42
+ void * ggml_metal_host_malloc(size_t n);
43
+ void ggml_metal_host_free (void * data);
44
+
41
45
  // set the number of command buffers to use
42
46
  void ggml_metal_set_n_cb(struct ggml_metal_context * ctx, int n_cb);
43
47