llama_cpp 0.7.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,385 @@
1
+ #include "ggml-backend.h"
2
+ #include "ggml-alloc.h"
3
+
4
+ #include <assert.h>
5
+ #include <stdarg.h>
6
+ #include <stdio.h>
7
+ #include <stdlib.h>
8
+ #include <string.h>
9
+
10
+ #define UNUSED GGML_UNUSED
11
+
12
+ #define MAX(a, b) ((a) > (b) ? (a) : (b))
13
+
14
+ // backend buffer
15
+
16
+ ggml_backend_buffer_t ggml_backend_buffer_init(
17
+ struct ggml_backend * backend,
18
+ struct ggml_backend_buffer_i iface,
19
+ ggml_backend_buffer_context_t context,
20
+ size_t size) {
21
+ ggml_backend_buffer_t buffer = malloc(sizeof(struct ggml_backend_buffer));
22
+
23
+ GGML_ASSERT(iface.get_base != NULL);
24
+
25
+ (*buffer) = (struct ggml_backend_buffer) {
26
+ /* .interface = */ iface,
27
+ /* .backend = */ backend,
28
+ /* .context = */ context,
29
+ /* .size = */ size,
30
+ };
31
+
32
+ return buffer;
33
+ }
34
+
35
+ void ggml_backend_buffer_free(ggml_backend_buffer_t buffer) {
36
+ if (buffer->iface.free_buffer != NULL) {
37
+ buffer->iface.free_buffer(buffer);
38
+ }
39
+ free(buffer);
40
+ }
41
+
42
+ size_t ggml_backend_buffer_get_alignment(ggml_backend_buffer_t buffer) {
43
+ return ggml_backend_get_alignment(buffer->backend);
44
+ }
45
+
46
+ void * ggml_backend_buffer_get_base(ggml_backend_buffer_t buffer) {
47
+ return buffer->iface.get_base(buffer);
48
+ }
49
+
50
+ size_t ggml_backend_buffer_get_size(ggml_backend_buffer_t buffer) {
51
+ return buffer->size;
52
+ }
53
+
54
+ size_t ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) {
55
+ if (buffer->iface.get_alloc_size) {
56
+ return buffer->iface.get_alloc_size(buffer, tensor);
57
+ }
58
+ return ggml_nbytes(tensor);
59
+ }
60
+
61
+ void ggml_backend_buffer_init_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) {
62
+ if (buffer->iface.init_tensor) {
63
+ buffer->iface.init_tensor(buffer, tensor);
64
+ }
65
+ }
66
+
67
+ void ggml_backend_buffer_free_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) {
68
+ if (buffer->iface.free_tensor) {
69
+ buffer->iface.free_tensor(buffer, tensor);
70
+ }
71
+ }
72
+
73
+ // backend
74
+
75
+ ggml_backend_t ggml_get_backend(const struct ggml_tensor * tensor) {
76
+ return tensor->buffer->backend;
77
+ }
78
+
79
+ const char * ggml_backend_name(ggml_backend_t backend) {
80
+ return backend->iface.get_name(backend);
81
+ }
82
+
83
+ void ggml_backend_free(ggml_backend_t backend) {
84
+ backend->iface.free(backend);
85
+ }
86
+
87
+ ggml_backend_buffer_t ggml_backend_alloc_buffer(ggml_backend_t backend, size_t size) {
88
+ return backend->iface.alloc_buffer(backend, size);
89
+ }
90
+
91
+ size_t ggml_backend_get_alignment(ggml_backend_t backend) {
92
+ return backend->iface.get_alignment(backend);
93
+ }
94
+
95
+ void ggml_backend_tensor_set_async(struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
96
+ ggml_get_backend(tensor)->iface.set_tensor_async(ggml_get_backend(tensor), tensor, data, offset, size);
97
+ }
98
+
99
+ void ggml_backend_tensor_get_async(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) {
100
+ ggml_get_backend(tensor)->iface.get_tensor_async(ggml_get_backend(tensor), tensor, data, offset, size);
101
+ }
102
+
103
+ void ggml_backend_tensor_set(struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
104
+ ggml_get_backend(tensor)->iface.set_tensor_async(ggml_get_backend(tensor), tensor, data, offset, size);
105
+ ggml_get_backend(tensor)->iface.synchronize(ggml_get_backend(tensor));
106
+ }
107
+
108
+ void ggml_backend_tensor_get(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) {
109
+ ggml_get_backend(tensor)->iface.get_tensor_async(ggml_get_backend(tensor), tensor, data, offset, size);
110
+ ggml_get_backend(tensor)->iface.synchronize(ggml_get_backend(tensor));
111
+ }
112
+
113
+ void ggml_backend_synchronize(ggml_backend_t backend) {
114
+ backend->iface.synchronize(backend);
115
+ }
116
+
117
+ ggml_backend_graph_plan_t ggml_backend_graph_plan_create(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
118
+ return backend->iface.graph_plan_create(backend, cgraph);
119
+ }
120
+
121
+ void ggml_backend_graph_plan_free(ggml_backend_t backend, ggml_backend_graph_plan_t plan) {
122
+ backend->iface.graph_plan_free(backend, plan);
123
+ }
124
+
125
+ void ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan) {
126
+ backend->iface.graph_plan_compute(backend, plan);
127
+ }
128
+
129
+ void ggml_backend_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
130
+ backend->iface.graph_compute(backend, cgraph);
131
+ }
132
+
133
+ bool ggml_backend_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) {
134
+ return backend->iface.supports_op(backend, op);
135
+ }
136
+
137
+ // backend copy
138
+
139
+ static bool ggml_are_same_layout(const struct ggml_tensor * a, const struct ggml_tensor * b) {
140
+ if (a->type != b->type) {
141
+ return false;
142
+ }
143
+ for (int i = 0; i < GGML_MAX_DIMS; i++) {
144
+ if (a->ne[i] != b->ne[i]) {
145
+ return false;
146
+ }
147
+ if (a->nb[i] != b->nb[i]) {
148
+ return false;
149
+ }
150
+ }
151
+ return true;
152
+ }
153
+
154
+ void ggml_backend_tensor_copy(struct ggml_tensor * src, struct ggml_tensor * dst) {
155
+ //printf("src: %s ne: [%d %d %d %d] nb: [%d %d %d %d]\n", src->name, (int)src->ne[0], (int)src->ne[1], (int)src->ne[2], (int)src->ne[3], (int)src->nb[0], (int)src->nb[1], (int)src->nb[2], (int)src->nb[3]);
156
+ //printf("dst: %s ne: [%d %d %d %d] nb: [%d %d %d %d]\n", dst->name, (int)dst->ne[0], (int)dst->ne[1], (int)dst->ne[2], (int)dst->ne[3], (int)dst->nb[0], (int)dst->nb[1], (int)dst->nb[2], (int)dst->nb[3]);
157
+ GGML_ASSERT(ggml_are_same_layout(src, dst) && "cannot copy tensors with different layouts");
158
+
159
+ // printf("cpy tensor %s from %s to %s (%lu bytes)\n", src->name, ggml_backend_name(src->backend), ggml_backend_name(dst->backend), ggml_nbytes(src));
160
+
161
+ if (src == dst) {
162
+ return;
163
+ }
164
+
165
+ // TODO: allow backends to support copy to/from same backend
166
+
167
+ if (ggml_get_backend(dst)->iface.cpy_tensor_from != NULL) {
168
+ ggml_get_backend(dst)->iface.cpy_tensor_from(ggml_get_backend(dst)->context, src, dst);
169
+ } else if (ggml_get_backend(src)->iface.cpy_tensor_to != NULL) {
170
+ ggml_get_backend(src)->iface.cpy_tensor_to(ggml_get_backend(src)->context, src, dst);
171
+ } else {
172
+ // shouldn't be hit when copying from/to CPU
173
+ #ifndef NDEBUG
174
+ fprintf(stderr, "ggml_backend_tensor_copy: neither cpy_tensor_from nor cpy_tensor_to are implemented for backends %s and %s, falling back to get/set\n", ggml_backend_name(src->buffer->backend), ggml_backend_name(dst->buffer->backend));
175
+ #endif
176
+ size_t nbytes = ggml_nbytes(src);
177
+ void * data = malloc(nbytes);
178
+ ggml_backend_tensor_get(src, data, 0, nbytes);
179
+ ggml_backend_tensor_set(dst, data, 0, nbytes);
180
+ free(data);
181
+ }
182
+ }
183
+
184
+ // backend CPU
185
+
186
+ struct ggml_backend_cpu_context {
187
+ int n_threads;
188
+ void * work_data;
189
+ size_t work_size;
190
+ };
191
+
192
+ static const char * ggml_backend_cpu_name(ggml_backend_t backend) {
193
+ return "CPU";
194
+
195
+ UNUSED(backend);
196
+ }
197
+
198
+ static void ggml_backend_cpu_free(ggml_backend_t backend) {
199
+ struct ggml_backend_cpu_context * cpu_ctx = (struct ggml_backend_cpu_context *)backend->context;
200
+ free(cpu_ctx->work_data);
201
+ free(cpu_ctx);
202
+ free(backend);
203
+ }
204
+
205
+ static void * ggml_backend_cpu_buffer_get_base(ggml_backend_buffer_t buffer) {
206
+ return (void *)buffer->context;
207
+ }
208
+
209
+ static void ggml_backend_cpu_buffer_free_buffer(ggml_backend_buffer_t buffer) {
210
+ free(buffer->context);
211
+ UNUSED(buffer);
212
+ }
213
+
214
+ static struct ggml_backend_buffer_i cpu_backend_buffer_i = {
215
+ /* .free_buffer = */ ggml_backend_cpu_buffer_free_buffer,
216
+ /* .get_base = */ ggml_backend_cpu_buffer_get_base,
217
+ /* .get_alloc_size = */ NULL, // defaults to ggml_nbytes
218
+ /* .init_tensor = */ NULL, // no initialization required
219
+ /* .free_tensor = */ NULL, // no cleanup required
220
+ };
221
+
222
+ // for buffers from ptr, free is not called
223
+ static struct ggml_backend_buffer_i cpu_backend_buffer_i_from_ptr = {
224
+ /* .free_buffer = */ NULL, // ptr is not owned by the buffer, so it does not need to be freed
225
+ /* .get_base = */ ggml_backend_cpu_buffer_get_base,
226
+ /* .get_alloc_size = */ NULL, // defaults to ggml_nbytes
227
+ /* .init_tensor = */ NULL,
228
+ /* .free_tensor = */ NULL,
229
+ };
230
+
231
+ static const size_t TENSOR_ALIGNMENT = 64; // should be enough for AVX 512
232
+
233
+ static ggml_backend_buffer_t ggml_backend_cpu_alloc_buffer(ggml_backend_t backend, size_t size) {
234
+ size += TENSOR_ALIGNMENT; // malloc may return an address that is not aligned
235
+ void * data = malloc(size); // TODO: maybe use GGML_ALIGNED_MALLOC?
236
+
237
+ return ggml_backend_buffer_init(backend, cpu_backend_buffer_i, data, size);
238
+ }
239
+
240
+ static size_t ggml_backend_cpu_get_alignment(ggml_backend_t backend) {
241
+ return TENSOR_ALIGNMENT;
242
+ UNUSED(backend);
243
+ }
244
+
245
+ static void ggml_backend_cpu_set_tensor_async(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
246
+ GGML_ASSERT(offset + size <= ggml_nbytes(tensor) && "tensor write out of bounds");
247
+ GGML_ASSERT(tensor->data != NULL && "tensor not allocated");
248
+
249
+ memcpy((char *)tensor->data + offset, data, size);
250
+
251
+ UNUSED(backend);
252
+ }
253
+
254
+ static void ggml_backend_cpu_get_tensor_async(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) {
255
+ GGML_ASSERT(offset + size <= ggml_nbytes(tensor) && "tensor read out of bounds");
256
+ GGML_ASSERT(tensor->data != NULL && "tensor not allocated");
257
+
258
+ memcpy(data, (const char *)tensor->data + offset, size);
259
+
260
+ UNUSED(backend);
261
+ }
262
+
263
+ static void ggml_backend_cpu_synchronize(ggml_backend_t backend) {
264
+ UNUSED(backend);
265
+ }
266
+
267
+ static void ggml_backend_cpu_cpy_tensor_from(ggml_backend_t backend, struct ggml_tensor * src, struct ggml_tensor * dst) {
268
+ ggml_backend_tensor_get(src, dst->data, 0, ggml_nbytes(src));
269
+
270
+ UNUSED(backend);
271
+ }
272
+
273
+ static void ggml_backend_cpu_cpy_tensor_to(ggml_backend_t backend, struct ggml_tensor * src, struct ggml_tensor * dst) {
274
+ // for a backend such as CUDA that can queue async calls, it is ok to do this asynchronously, but it may not be the case for other backends
275
+ ggml_backend_tensor_set_async(dst, src->data, 0, ggml_nbytes(src));
276
+
277
+ UNUSED(backend);
278
+ }
279
+
280
+ struct ggml_backend_plan_cpu {
281
+ struct ggml_cplan cplan;
282
+ struct ggml_cgraph cgraph;
283
+ };
284
+
285
+ static ggml_backend_graph_plan_t ggml_backend_cpu_graph_plan_create(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
286
+ struct ggml_backend_cpu_context * cpu_ctx = (struct ggml_backend_cpu_context *)backend->context;
287
+
288
+ struct ggml_backend_plan_cpu * cpu_plan = malloc(sizeof(struct ggml_backend_plan_cpu));
289
+
290
+ cpu_plan->cplan = ggml_graph_plan(cgraph, cpu_ctx->n_threads);
291
+ cpu_plan->cgraph = *cgraph;
292
+
293
+ if (cpu_plan->cplan.work_size > 0) {
294
+ cpu_plan->cplan.work_data = malloc(cpu_plan->cplan.work_size);
295
+ }
296
+
297
+ return cpu_plan;
298
+ }
299
+
300
+ static void ggml_backend_cpu_graph_plan_free(ggml_backend_t backend, ggml_backend_graph_plan_t plan) {
301
+ struct ggml_backend_plan_cpu * cpu_plan = (struct ggml_backend_plan_cpu *)plan;
302
+
303
+ free(cpu_plan->cplan.work_data);
304
+ free(cpu_plan);
305
+
306
+ UNUSED(backend);
307
+ }
308
+
309
+ static void ggml_backend_cpu_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan) {
310
+ struct ggml_backend_plan_cpu * cpu_plan = (struct ggml_backend_plan_cpu *)plan;
311
+
312
+ ggml_graph_compute(&cpu_plan->cgraph, &cpu_plan->cplan);
313
+
314
+ UNUSED(backend);
315
+ }
316
+
317
+ static void ggml_backend_cpu_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
318
+ struct ggml_backend_cpu_context * cpu_ctx = (struct ggml_backend_cpu_context *)backend->context;
319
+
320
+ struct ggml_cplan cplan = ggml_graph_plan(cgraph, cpu_ctx->n_threads);
321
+
322
+ if (cpu_ctx->work_size < cplan.work_size) {
323
+ // TODO: may be faster to free and use malloc to avoid the copy
324
+ cpu_ctx->work_data = realloc(cpu_ctx->work_data, cplan.work_size);
325
+ cpu_ctx->work_size = cplan.work_size;
326
+ }
327
+
328
+ cplan.work_data = cpu_ctx->work_data;
329
+
330
+ ggml_graph_compute(cgraph, &cplan);
331
+ }
332
+
333
+ static bool ggml_backend_cpu_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) {
334
+ return true;
335
+ UNUSED(backend);
336
+ UNUSED(op);
337
+ }
338
+
339
+ static struct ggml_backend_i cpu_backend_i = {
340
+ /* .get_name = */ ggml_backend_cpu_name,
341
+ /* .free = */ ggml_backend_cpu_free,
342
+ /* .alloc_buffer = */ ggml_backend_cpu_alloc_buffer,
343
+ /* .get_alignment = */ ggml_backend_cpu_get_alignment,
344
+ /* .set_tensor_async = */ ggml_backend_cpu_set_tensor_async,
345
+ /* .get_tensor_async = */ ggml_backend_cpu_get_tensor_async,
346
+ /* .synchronize = */ ggml_backend_cpu_synchronize,
347
+ /* .cpy_tensor_from = */ ggml_backend_cpu_cpy_tensor_from,
348
+ /* .cpy_tensor_to = */ ggml_backend_cpu_cpy_tensor_to,
349
+ /* .graph_plan_create = */ ggml_backend_cpu_graph_plan_create,
350
+ /* .graph_plan_free = */ ggml_backend_cpu_graph_plan_free,
351
+ /* .graph_plan_compute = */ ggml_backend_cpu_graph_plan_compute,
352
+ /* .graph_compute = */ ggml_backend_cpu_graph_compute,
353
+ /* .supports_op = */ ggml_backend_cpu_supports_op,
354
+ };
355
+
356
+ ggml_backend_t ggml_backend_cpu_init(void) {
357
+ struct ggml_backend_cpu_context * ctx = malloc(sizeof(struct ggml_backend_cpu_context));
358
+
359
+ ctx->n_threads = GGML_DEFAULT_N_THREADS;
360
+ ctx->work_data = NULL;
361
+ ctx->work_size = 0;
362
+
363
+ ggml_backend_t cpu_backend = malloc(sizeof(struct ggml_backend));
364
+
365
+ *cpu_backend = (struct ggml_backend) {
366
+ /* .interface = */ cpu_backend_i,
367
+ /* .context = */ ctx
368
+ };
369
+ return cpu_backend;
370
+ }
371
+
372
+ bool ggml_backend_is_cpu(ggml_backend_t backend) {
373
+ return backend->iface.get_name == ggml_backend_cpu_name;
374
+ }
375
+
376
+ void ggml_backend_cpu_set_n_threads(ggml_backend_t backend_cpu, int n_threads) {
377
+ GGML_ASSERT(ggml_backend_is_cpu(backend_cpu));
378
+
379
+ struct ggml_backend_cpu_context * ctx = (struct ggml_backend_cpu_context *)backend_cpu->context;
380
+ ctx->n_threads = n_threads;
381
+ }
382
+
383
+ ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(ggml_backend_t backend_cpu, void * ptr, size_t size) {
384
+ return ggml_backend_buffer_init(backend_cpu, cpu_backend_buffer_i_from_ptr, ptr, size);
385
+ }
@@ -0,0 +1,143 @@
1
+ #pragma once
2
+
3
+ #include "ggml.h"
4
+
5
+ #ifdef __cplusplus
6
+ extern "C" {
7
+ #endif
8
+ struct ggml_backend;
9
+ struct ggml_backend_buffer;
10
+
11
+ // type-erased backend-specific types / wrappers
12
+ typedef void * ggml_backend_context_t;
13
+ typedef void * ggml_backend_graph_plan_t;
14
+ typedef void * ggml_backend_buffer_context_t;
15
+
16
+ // avoid accessing internals of these types
17
+ typedef struct ggml_backend * ggml_backend_t;
18
+ typedef struct ggml_backend_buffer * ggml_backend_buffer_t;
19
+
20
+ //
21
+ // backend buffer
22
+ //
23
+
24
+ struct ggml_backend_buffer_i {
25
+ void (*free_buffer) (ggml_backend_buffer_t buffer);
26
+ void * (*get_base) (ggml_backend_buffer_t buffer); // get base pointer
27
+ size_t (*get_alloc_size)(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); // pre-allocation callback
28
+ void (*init_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); // post-allocation callback
29
+ void (*free_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); // pre-free callback
30
+ };
31
+
32
+ // TODO: hide behind API
33
+ struct ggml_backend_buffer {
34
+ struct ggml_backend_buffer_i iface;
35
+
36
+ ggml_backend_t backend;
37
+ ggml_backend_buffer_context_t context;
38
+
39
+ size_t size;
40
+ };
41
+
42
+ // backend buffer functions
43
+ GGML_API ggml_backend_buffer_t ggml_backend_buffer_init(
44
+ struct ggml_backend * backend,
45
+ struct ggml_backend_buffer_i iface,
46
+ ggml_backend_buffer_context_t context,
47
+ size_t size);
48
+
49
+ GGML_API void ggml_backend_buffer_free (ggml_backend_buffer_t buffer);
50
+ GGML_API size_t ggml_backend_buffer_get_alignment (ggml_backend_buffer_t buffer);
51
+ GGML_API void * ggml_backend_buffer_get_base (ggml_backend_buffer_t buffer);
52
+ GGML_API size_t ggml_backend_buffer_get_size (ggml_backend_buffer_t buffer);
53
+ GGML_API size_t ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
54
+ GGML_API void ggml_backend_buffer_init_tensor (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
55
+ GGML_API void ggml_backend_buffer_free_tensor (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
56
+
57
+ //
58
+ // backend
59
+ //
60
+
61
+ struct ggml_backend_i {
62
+ const char * (*get_name)(ggml_backend_t backend);
63
+
64
+ void (*free)(ggml_backend_t backend);
65
+
66
+ // buffer allocation
67
+ ggml_backend_buffer_t (*alloc_buffer)(ggml_backend_t backend, size_t size);
68
+
69
+ // get buffer alignment
70
+ size_t (*get_alignment)(ggml_backend_t backend);
71
+
72
+ // tensor data access
73
+ // these functions can be asynchronous, helper functions are provided for synchronous access that automatically call synchronize
74
+ void (*set_tensor_async)(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
75
+ void (*get_tensor_async)(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
76
+ void (*synchronize) (ggml_backend_t backend);
77
+
78
+ // (optional) copy tensor between different backends, allow for single-copy tranfers
79
+ void (*cpy_tensor_from)(ggml_backend_t backend, struct ggml_tensor * src, struct ggml_tensor * dst);
80
+ void (*cpy_tensor_to) (ggml_backend_t backend, struct ggml_tensor * src, struct ggml_tensor * dst);
81
+
82
+ // compute graph with a plan
83
+ ggml_backend_graph_plan_t (*graph_plan_create) (ggml_backend_t backend, struct ggml_cgraph * cgraph);
84
+ void (*graph_plan_free) (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
85
+ void (*graph_plan_compute)(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
86
+
87
+ // compute graph without a plan
88
+ void (*graph_compute)(ggml_backend_t backend, struct ggml_cgraph * cgraph);
89
+
90
+ // check if the backend supports an operation
91
+ bool (*supports_op)(ggml_backend_t backend, const struct ggml_tensor * op);
92
+ };
93
+
94
+ // TODO: hide behind API
95
+ struct ggml_backend {
96
+ struct ggml_backend_i iface;
97
+
98
+ ggml_backend_context_t context;
99
+ };
100
+
101
+ // backend helper functions
102
+ GGML_API ggml_backend_t ggml_get_backend(const struct ggml_tensor * tensor);
103
+
104
+ GGML_API const char * ggml_backend_name(ggml_backend_t backend);
105
+ GGML_API void ggml_backend_free(ggml_backend_t backend);
106
+
107
+ GGML_API ggml_backend_buffer_t ggml_backend_alloc_buffer(ggml_backend_t backend, size_t size);
108
+
109
+ GGML_API size_t ggml_backend_get_alignment(ggml_backend_t backend);
110
+
111
+ GGML_API void ggml_backend_tensor_set_async( struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
112
+ GGML_API void ggml_backend_tensor_get_async(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
113
+
114
+ GGML_API void ggml_backend_tensor_set( struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
115
+ GGML_API void ggml_backend_tensor_get(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
116
+
117
+ GGML_API void ggml_backend_synchronize(ggml_backend_t backend);
118
+
119
+ GGML_API ggml_backend_graph_plan_t ggml_backend_graph_plan_create (ggml_backend_t backend, struct ggml_cgraph * cgraph);
120
+
121
+ GGML_API void ggml_backend_graph_plan_free (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
122
+ GGML_API void ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
123
+ GGML_API void ggml_backend_graph_compute (ggml_backend_t backend, struct ggml_cgraph * cgraph);
124
+ GGML_API bool ggml_backend_supports_op (ggml_backend_t backend, const struct ggml_tensor * op);
125
+
126
+ // tensor copy between different backends
127
+ GGML_API void ggml_backend_tensor_copy(struct ggml_tensor * src, struct ggml_tensor * dst);
128
+
129
+ //
130
+ // CPU backend
131
+ //
132
+
133
+ GGML_API ggml_backend_t ggml_backend_cpu_init(void);
134
+
135
+ GGML_API bool ggml_backend_is_cpu(ggml_backend_t backend);
136
+
137
+ GGML_API void ggml_backend_cpu_set_n_threads(ggml_backend_t backend_cpu, int n_threads);
138
+
139
+ GGML_API ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(ggml_backend_t backend_cpu, void * ptr, size_t size);
140
+
141
+ #ifdef __cplusplus
142
+ }
143
+ #endif