llama_cpp 0.9.5 → 0.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,41 +7,44 @@
7
7
  extern "C" {
8
8
  #endif
9
9
 
10
+ typedef struct ggml_backend_buffer_type * ggml_backend_buffer_type_t;
11
+ typedef struct ggml_backend_buffer * ggml_backend_buffer_t;
12
+ typedef struct ggml_backend * ggml_backend_t;
13
+ typedef void * ggml_backend_graph_plan_t;
14
+
10
15
  //
11
16
  // Backend buffer
12
17
  //
13
18
 
14
- struct ggml_backend_buffer;
15
- typedef struct ggml_backend_buffer * ggml_backend_buffer_t;
19
+ // buffer type
20
+ GGML_API ggml_backend_buffer_t ggml_backend_buft_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size);
21
+ GGML_API size_t ggml_backend_buft_get_alignment (ggml_backend_buffer_type_t buft);
22
+ GGML_API size_t ggml_backend_buft_get_alloc_size(ggml_backend_buffer_type_t buft, struct ggml_tensor * tensor);
23
+ GGML_API bool ggml_backend_buft_supports_backend(ggml_backend_buffer_type_t buft, ggml_backend_t backend);
16
24
 
17
- // backend buffer functions
25
+ // buffer
18
26
  GGML_API void ggml_backend_buffer_free (ggml_backend_buffer_t buffer);
19
- GGML_API size_t ggml_backend_buffer_get_alignment (ggml_backend_buffer_t buffer);
20
27
  GGML_API void * ggml_backend_buffer_get_base (ggml_backend_buffer_t buffer);
21
28
  GGML_API size_t ggml_backend_buffer_get_size (ggml_backend_buffer_t buffer);
22
- GGML_API size_t ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
23
29
  GGML_API void ggml_backend_buffer_init_tensor (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
24
- GGML_API void ggml_backend_buffer_free_tensor (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
30
+ GGML_API size_t ggml_backend_buffer_get_alignment (ggml_backend_buffer_t buffer);
31
+ GGML_API size_t ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
32
+ GGML_API ggml_backend_buffer_type_t ggml_backend_buffer_type(ggml_backend_buffer_t buffer);
25
33
 
26
34
  //
27
35
  // Backend
28
36
  //
29
37
 
30
- struct ggml_backend;
31
- typedef struct ggml_backend * ggml_backend_t;
32
- typedef void * ggml_backend_graph_plan_t;
33
-
34
- GGML_API ggml_backend_t ggml_get_backend(const struct ggml_tensor * tensor);
35
38
 
36
39
  GGML_API const char * ggml_backend_name(ggml_backend_t backend);
37
40
  GGML_API void ggml_backend_free(ggml_backend_t backend);
38
41
 
39
- GGML_API ggml_backend_buffer_t ggml_backend_alloc_buffer(ggml_backend_t backend, size_t size);
40
-
41
- GGML_API size_t ggml_backend_get_alignment(ggml_backend_t backend);
42
+ GGML_API ggml_backend_buffer_type_t ggml_backend_get_default_buffer_type(ggml_backend_t backend);
43
+ GGML_API ggml_backend_buffer_t ggml_backend_alloc_buffer(ggml_backend_t backend, size_t size);
44
+ GGML_API size_t ggml_backend_get_alignment(ggml_backend_t backend);
42
45
 
43
- GGML_API void ggml_backend_tensor_set_async( struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
44
- GGML_API void ggml_backend_tensor_get_async(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
46
+ GGML_API void ggml_backend_tensor_set_async(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
47
+ GGML_API void ggml_backend_tensor_get_async(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
45
48
 
46
49
  GGML_API void ggml_backend_tensor_set( struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
47
50
  GGML_API void ggml_backend_tensor_get(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
@@ -57,6 +60,7 @@ extern "C" {
57
60
 
58
61
  // tensor copy between different backends
59
62
  GGML_API void ggml_backend_tensor_copy(struct ggml_tensor * src, struct ggml_tensor * dst);
63
+ GGML_API void ggml_backend_tensor_copy_async(ggml_backend_t backend, struct ggml_tensor * src, struct ggml_tensor * dst); // automatic fallback to sync copy
60
64
 
61
65
  //
62
66
  // CPU backend
@@ -68,8 +72,23 @@ extern "C" {
68
72
  GGML_API void ggml_backend_cpu_set_n_threads(ggml_backend_t backend_cpu, int n_threads);
69
73
 
70
74
  // Create a backend buffer from an existing pointer
71
- GGML_API ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(ggml_backend_t backend_cpu, void * ptr, size_t size);
75
+ GGML_API ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size);
76
+
77
+ GGML_API ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type(void);
72
78
 
79
+ //
80
+ // Backend registry
81
+ //
82
+
83
+ // The backend registry is a registry of all the available backends, and allows initializing backends in a generic way
84
+
85
+ GGML_API size_t ggml_backend_reg_get_count(void);
86
+ GGML_API size_t ggml_backend_reg_find_by_name(const char * name);
87
+ GGML_API ggml_backend_t ggml_backend_reg_init_backend_from_str(const char * backend_str); // str is name[:params]
88
+ GGML_API const char * ggml_backend_reg_get_name(size_t i);
89
+ GGML_API ggml_backend_t ggml_backend_reg_init_backend(size_t i, const char * params); // params is backend-specific
90
+ GGML_API ggml_backend_buffer_type_t ggml_backend_reg_get_default_buffer_type(size_t i);
91
+ GGML_API ggml_backend_buffer_t ggml_backend_reg_alloc_buffer(size_t i, size_t size);
73
92
 
74
93
  //
75
94
  // Backend scheduler
@@ -131,6 +150,32 @@ extern "C" {
131
150
  ggml_backend_sched_t sched,
132
151
  struct ggml_cgraph * graph);
133
152
 
153
+
154
+ //
155
+ // Utils
156
+ //
157
+
158
+ struct ggml_backend_graph_copy {
159
+ ggml_backend_buffer_t buffer;
160
+ struct ggml_context * ctx_allocated;
161
+ struct ggml_context * ctx_unallocated;
162
+ struct ggml_cgraph * graph;
163
+ };
164
+
165
+ // Copy a graph to a different backend
166
+ GGML_API struct ggml_backend_graph_copy ggml_backend_graph_copy(ggml_backend_t backend, struct ggml_cgraph * graph);
167
+ GGML_API void ggml_backend_graph_copy_free(struct ggml_backend_graph_copy copy);
168
+
169
+ typedef bool (*ggml_backend_eval_callback)(int node_index, struct ggml_tensor * t1, struct ggml_tensor * t2, void * user_data);
170
+
171
+ // Compare the output of two backends
172
+ GGML_API void ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data);
173
+
174
+ // Tensor initialization
175
+ GGML_API void ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr);
176
+ GGML_API void ggml_backend_view_init(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
177
+
178
+
134
179
  #ifdef __cplusplus
135
180
  }
136
181
  #endif