llama_cpp 0.12.1 → 0.12.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -17,22 +17,31 @@ extern "C" {
17
17
  //
18
18
 
19
19
  // buffer type
20
- GGML_API ggml_backend_buffer_t ggml_backend_buft_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size);
21
- GGML_API size_t ggml_backend_buft_get_alignment (ggml_backend_buffer_type_t buft);
22
- GGML_API size_t ggml_backend_buft_get_alloc_size(ggml_backend_buffer_type_t buft, struct ggml_tensor * tensor);
23
- GGML_API bool ggml_backend_buft_supports_backend(ggml_backend_buffer_type_t buft, ggml_backend_t backend);
24
- GGML_API bool ggml_backend_buft_is_host (ggml_backend_buffer_type_t buft);
20
+ GGML_API const char * ggml_backend_buft_name (ggml_backend_buffer_type_t buft);
21
+ GGML_API GGML_CALL ggml_backend_buffer_t ggml_backend_buft_alloc_buffer (ggml_backend_buffer_type_t buft, size_t size);
22
+ GGML_API size_t ggml_backend_buft_get_alignment (ggml_backend_buffer_type_t buft);
23
+ GGML_API GGML_CALL size_t ggml_backend_buft_get_alloc_size (ggml_backend_buffer_type_t buft, struct ggml_tensor * tensor);
24
+ GGML_API bool ggml_backend_buft_supports_backend(ggml_backend_buffer_type_t buft, ggml_backend_t backend);
25
+ GGML_API bool ggml_backend_buft_is_host (ggml_backend_buffer_type_t buft);
25
26
 
26
27
  // buffer
27
- GGML_API void ggml_backend_buffer_free (ggml_backend_buffer_t buffer);
28
- GGML_API void * ggml_backend_buffer_get_base (ggml_backend_buffer_t buffer);
29
- GGML_API size_t ggml_backend_buffer_get_size (ggml_backend_buffer_t buffer);
30
- GGML_API void ggml_backend_buffer_init_tensor (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
31
- GGML_API size_t ggml_backend_buffer_get_alignment (ggml_backend_buffer_t buffer);
32
- GGML_API size_t ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
33
- GGML_API void ggml_backend_buffer_clear (ggml_backend_buffer_t buffer, uint8_t value);
34
- GGML_API bool ggml_backend_buffer_is_host (ggml_backend_buffer_t buffer);
35
- GGML_API ggml_backend_buffer_type_t ggml_backend_buffer_type(ggml_backend_buffer_t buffer);
28
+ enum ggml_backend_buffer_usage {
29
+ GGML_BACKEND_BUFFER_USAGE_ANY = 0,
30
+ GGML_BACKEND_BUFFER_USAGE_WEIGHTS = 1,
31
+ };
32
+
33
+ GGML_API const char * ggml_backend_buffer_name (ggml_backend_buffer_t buffer);
34
+ GGML_API void ggml_backend_buffer_free (ggml_backend_buffer_t buffer);
35
+ GGML_API void * ggml_backend_buffer_get_base (ggml_backend_buffer_t buffer);
36
+ GGML_API size_t ggml_backend_buffer_get_size (ggml_backend_buffer_t buffer);
37
+ GGML_API GGML_CALL void ggml_backend_buffer_init_tensor (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
38
+ GGML_API size_t ggml_backend_buffer_get_alignment (ggml_backend_buffer_t buffer);
39
+ GGML_API size_t ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
40
+ GGML_API void ggml_backend_buffer_clear (ggml_backend_buffer_t buffer, uint8_t value);
41
+ GGML_API bool ggml_backend_buffer_is_host (ggml_backend_buffer_t buffer);
42
+ GGML_API void ggml_backend_buffer_set_usage (ggml_backend_buffer_t buffer, enum ggml_backend_buffer_usage usage);
43
+ GGML_API ggml_backend_buffer_type_t ggml_backend_buffer_get_type (ggml_backend_buffer_t buffer);
44
+ GGML_API void ggml_backend_buffer_reset (ggml_backend_buffer_t buffer);
36
45
 
37
46
  //
38
47
  // Backend
@@ -49,8 +58,8 @@ extern "C" {
49
58
  GGML_API void ggml_backend_tensor_set_async(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
50
59
  GGML_API void ggml_backend_tensor_get_async(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
51
60
 
52
- GGML_API void ggml_backend_tensor_set( struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
53
- GGML_API void ggml_backend_tensor_get(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
61
+ GGML_API GGML_CALL void ggml_backend_tensor_set( struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
62
+ GGML_API GGML_CALL void ggml_backend_tensor_get(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
54
63
 
55
64
  GGML_API void ggml_backend_synchronize(ggml_backend_t backend);
56
65
 
@@ -71,13 +80,13 @@ extern "C" {
71
80
 
72
81
  GGML_API ggml_backend_t ggml_backend_cpu_init(void);
73
82
 
74
- GGML_API bool ggml_backend_is_cpu(ggml_backend_t backend);
75
- GGML_API void ggml_backend_cpu_set_n_threads(ggml_backend_t backend_cpu, int n_threads);
83
+ GGML_API GGML_CALL bool ggml_backend_is_cpu (ggml_backend_t backend);
84
+ GGML_API void ggml_backend_cpu_set_n_threads(ggml_backend_t backend_cpu, int n_threads);
76
85
 
77
86
  // Create a backend buffer from an existing pointer
78
- GGML_API ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size);
87
+ GGML_API GGML_CALL ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size);
79
88
 
80
- GGML_API ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type(void);
89
+ GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type(void);
81
90
 
82
91
  #ifdef GGML_USE_CPU_HBM
83
92
  GGML_API ggml_backend_buffer_type_t ggml_backend_cpu_hbm_buffer_type(void);
@@ -140,23 +149,24 @@ extern "C" {
140
149
  typedef struct ggml_backend_sched * ggml_backend_sched_t;
141
150
 
142
151
  // Initialize a backend scheduler
143
- GGML_API ggml_backend_sched_t ggml_backend_sched_new(ggml_backend_t * backends, int n_backends);
144
-
145
- GGML_API void ggml_backend_sched_free(ggml_backend_sched_t sched);
146
-
152
+ GGML_API ggml_backend_sched_t ggml_backend_sched_new(ggml_backend_t * backends, ggml_backend_buffer_type_t * bufts, int n_backends, size_t graph_size);
153
+ GGML_API void ggml_backend_sched_free(ggml_backend_sched_t sched);
147
154
  // Initialize backend buffers from a measure graph
148
- GGML_API void ggml_backend_sched_init_measure(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph);
155
+ GGML_API void ggml_backend_sched_init_measure(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph);
156
+ // Get the number of splits of the last graph
157
+ GGML_API int ggml_backend_sched_get_n_splits(ggml_backend_sched_t sched);
149
158
 
150
159
  GGML_API ggml_tallocr_t ggml_backend_sched_get_tallocr(ggml_backend_sched_t sched, ggml_backend_t backend);
151
160
  GGML_API ggml_backend_buffer_t ggml_backend_sched_get_buffer (ggml_backend_sched_t sched, ggml_backend_t backend);
152
161
 
153
- GGML_API void ggml_backend_sched_set_node_backend(ggml_backend_sched_t sched, struct ggml_tensor * node, ggml_backend_t backend);
162
+ GGML_API void ggml_backend_sched_set_node_backend(ggml_backend_sched_t sched, struct ggml_tensor * node, ggml_backend_t backend);
163
+ GGML_API ggml_backend_t ggml_backend_sched_get_node_backend(ggml_backend_sched_t sched, struct ggml_tensor * node);
154
164
 
155
- // Allocate a graph on the backend scheduler
156
- GGML_API void ggml_backend_sched_graph_compute(
157
- ggml_backend_sched_t sched,
158
- struct ggml_cgraph * graph);
165
+ // Allocate and compute graph on the backend scheduler
166
+ GGML_API void ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, struct ggml_cgraph * graph);
159
167
 
168
+ // Reset all assignments and allocators - must be called before using the sched allocators to allocate inputs
169
+ GGML_API void ggml_backend_sched_reset(ggml_backend_sched_t sched);
160
170
 
161
171
  //
162
172
  // Utils
@@ -173,10 +183,10 @@ extern "C" {
173
183
  GGML_API struct ggml_backend_graph_copy ggml_backend_graph_copy(ggml_backend_t backend, struct ggml_cgraph * graph);
174
184
  GGML_API void ggml_backend_graph_copy_free(struct ggml_backend_graph_copy copy);
175
185
 
176
- typedef bool (*ggml_backend_eval_callback)(int node_index, struct ggml_tensor * t1, struct ggml_tensor * t2, void * user_data);
186
+ typedef bool (*GGML_CALL ggml_backend_eval_callback)(int node_index, struct ggml_tensor * t1, struct ggml_tensor * t2, void * user_data);
177
187
 
178
188
  // Compare the output of two backends
179
- GGML_API void ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data);
189
+ GGML_API bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data);
180
190
 
181
191
  // Tensor initialization
182
192
  GGML_API void ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr);