llama_cpp 0.7.0 → 0.7.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,385 @@
1
+ #include "ggml-backend.h"
2
+ #include "ggml-alloc.h"
3
+
4
+ #include <assert.h>
5
+ #include <stdarg.h>
6
+ #include <stdio.h>
7
+ #include <stdlib.h>
8
+ #include <string.h>
9
+
10
+ #define UNUSED GGML_UNUSED
11
+
12
+ #define MAX(a, b) ((a) > (b) ? (a) : (b))
13
+
14
+ // backend buffer
15
+
16
+ ggml_backend_buffer_t ggml_backend_buffer_init(
17
+ struct ggml_backend * backend,
18
+ struct ggml_backend_buffer_i iface,
19
+ ggml_backend_buffer_context_t context,
20
+ size_t size) {
21
+ ggml_backend_buffer_t buffer = malloc(sizeof(struct ggml_backend_buffer));
22
+
23
+ GGML_ASSERT(iface.get_base != NULL);
24
+
25
+ (*buffer) = (struct ggml_backend_buffer) {
26
+ /* .interface = */ iface,
27
+ /* .backend = */ backend,
28
+ /* .context = */ context,
29
+ /* .size = */ size,
30
+ };
31
+
32
+ return buffer;
33
+ }
34
+
35
+ void ggml_backend_buffer_free(ggml_backend_buffer_t buffer) {
36
+ if (buffer->iface.free_buffer != NULL) {
37
+ buffer->iface.free_buffer(buffer);
38
+ }
39
+ free(buffer);
40
+ }
41
+
42
+ size_t ggml_backend_buffer_get_alignment(ggml_backend_buffer_t buffer) {
43
+ return ggml_backend_get_alignment(buffer->backend);
44
+ }
45
+
46
+ void * ggml_backend_buffer_get_base(ggml_backend_buffer_t buffer) {
47
+ return buffer->iface.get_base(buffer);
48
+ }
49
+
50
+ size_t ggml_backend_buffer_get_size(ggml_backend_buffer_t buffer) {
51
+ return buffer->size;
52
+ }
53
+
54
+ size_t ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) {
55
+ if (buffer->iface.get_alloc_size) {
56
+ return buffer->iface.get_alloc_size(buffer, tensor);
57
+ }
58
+ return ggml_nbytes(tensor);
59
+ }
60
+
61
+ void ggml_backend_buffer_init_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) {
62
+ if (buffer->iface.init_tensor) {
63
+ buffer->iface.init_tensor(buffer, tensor);
64
+ }
65
+ }
66
+
67
+ void ggml_backend_buffer_free_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) {
68
+ if (buffer->iface.free_tensor) {
69
+ buffer->iface.free_tensor(buffer, tensor);
70
+ }
71
+ }
72
+
73
+ // backend
74
+
75
+ ggml_backend_t ggml_get_backend(const struct ggml_tensor * tensor) {
76
+ return tensor->buffer->backend;
77
+ }
78
+
79
+ const char * ggml_backend_name(ggml_backend_t backend) {
80
+ return backend->iface.get_name(backend);
81
+ }
82
+
83
+ void ggml_backend_free(ggml_backend_t backend) {
84
+ backend->iface.free(backend);
85
+ }
86
+
87
+ ggml_backend_buffer_t ggml_backend_alloc_buffer(ggml_backend_t backend, size_t size) {
88
+ return backend->iface.alloc_buffer(backend, size);
89
+ }
90
+
91
+ size_t ggml_backend_get_alignment(ggml_backend_t backend) {
92
+ return backend->iface.get_alignment(backend);
93
+ }
94
+
95
+ void ggml_backend_tensor_set_async(struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
96
+ ggml_get_backend(tensor)->iface.set_tensor_async(ggml_get_backend(tensor), tensor, data, offset, size);
97
+ }
98
+
99
+ void ggml_backend_tensor_get_async(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) {
100
+ ggml_get_backend(tensor)->iface.get_tensor_async(ggml_get_backend(tensor), tensor, data, offset, size);
101
+ }
102
+
103
+ void ggml_backend_tensor_set(struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
104
+ ggml_get_backend(tensor)->iface.set_tensor_async(ggml_get_backend(tensor), tensor, data, offset, size);
105
+ ggml_get_backend(tensor)->iface.synchronize(ggml_get_backend(tensor));
106
+ }
107
+
108
+ void ggml_backend_tensor_get(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) {
109
+ ggml_get_backend(tensor)->iface.get_tensor_async(ggml_get_backend(tensor), tensor, data, offset, size);
110
+ ggml_get_backend(tensor)->iface.synchronize(ggml_get_backend(tensor));
111
+ }
112
+
113
+ void ggml_backend_synchronize(ggml_backend_t backend) {
114
+ backend->iface.synchronize(backend);
115
+ }
116
+
117
+ ggml_backend_graph_plan_t ggml_backend_graph_plan_create(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
118
+ return backend->iface.graph_plan_create(backend, cgraph);
119
+ }
120
+
121
+ void ggml_backend_graph_plan_free(ggml_backend_t backend, ggml_backend_graph_plan_t plan) {
122
+ backend->iface.graph_plan_free(backend, plan);
123
+ }
124
+
125
+ void ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan) {
126
+ backend->iface.graph_plan_compute(backend, plan);
127
+ }
128
+
129
+ void ggml_backend_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
130
+ backend->iface.graph_compute(backend, cgraph);
131
+ }
132
+
133
+ bool ggml_backend_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) {
134
+ return backend->iface.supports_op(backend, op);
135
+ }
136
+
137
+ // backend copy
138
+
139
+ static bool ggml_are_same_layout(const struct ggml_tensor * a, const struct ggml_tensor * b) {
140
+ if (a->type != b->type) {
141
+ return false;
142
+ }
143
+ for (int i = 0; i < GGML_MAX_DIMS; i++) {
144
+ if (a->ne[i] != b->ne[i]) {
145
+ return false;
146
+ }
147
+ if (a->nb[i] != b->nb[i]) {
148
+ return false;
149
+ }
150
+ }
151
+ return true;
152
+ }
153
+
154
+ void ggml_backend_tensor_copy(struct ggml_tensor * src, struct ggml_tensor * dst) {
155
+ //printf("src: %s ne: [%d %d %d %d] nb: [%d %d %d %d]\n", src->name, (int)src->ne[0], (int)src->ne[1], (int)src->ne[2], (int)src->ne[3], (int)src->nb[0], (int)src->nb[1], (int)src->nb[2], (int)src->nb[3]);
156
+ //printf("dst: %s ne: [%d %d %d %d] nb: [%d %d %d %d]\n", dst->name, (int)dst->ne[0], (int)dst->ne[1], (int)dst->ne[2], (int)dst->ne[3], (int)dst->nb[0], (int)dst->nb[1], (int)dst->nb[2], (int)dst->nb[3]);
157
+ GGML_ASSERT(ggml_are_same_layout(src, dst) && "cannot copy tensors with different layouts");
158
+
159
+ // printf("cpy tensor %s from %s to %s (%lu bytes)\n", src->name, ggml_backend_name(src->backend), ggml_backend_name(dst->backend), ggml_nbytes(src));
160
+
161
+ if (src == dst) {
162
+ return;
163
+ }
164
+
165
+ // TODO: allow backends to support copy to/from same backend
166
+
167
+ if (ggml_get_backend(dst)->iface.cpy_tensor_from != NULL) {
168
+ ggml_get_backend(dst)->iface.cpy_tensor_from(ggml_get_backend(dst)->context, src, dst);
169
+ } else if (ggml_get_backend(src)->iface.cpy_tensor_to != NULL) {
170
+ ggml_get_backend(src)->iface.cpy_tensor_to(ggml_get_backend(src)->context, src, dst);
171
+ } else {
172
+ // shouldn't be hit when copying from/to CPU
173
+ #ifndef NDEBUG
174
+ fprintf(stderr, "ggml_backend_tensor_copy: neither cpy_tensor_from nor cpy_tensor_to are implemented for backends %s and %s, falling back to get/set\n", ggml_backend_name(src->buffer->backend), ggml_backend_name(dst->buffer->backend));
175
+ #endif
176
+ size_t nbytes = ggml_nbytes(src);
177
+ void * data = malloc(nbytes);
178
+ ggml_backend_tensor_get(src, data, 0, nbytes);
179
+ ggml_backend_tensor_set(dst, data, 0, nbytes);
180
+ free(data);
181
+ }
182
+ }
183
+
184
+ // backend CPU
185
+
186
+ struct ggml_backend_cpu_context {
187
+ int n_threads;
188
+ void * work_data;
189
+ size_t work_size;
190
+ };
191
+
192
+ static const char * ggml_backend_cpu_name(ggml_backend_t backend) {
193
+ return "CPU";
194
+
195
+ UNUSED(backend);
196
+ }
197
+
198
+ static void ggml_backend_cpu_free(ggml_backend_t backend) {
199
+ struct ggml_backend_cpu_context * cpu_ctx = (struct ggml_backend_cpu_context *)backend->context;
200
+ free(cpu_ctx->work_data);
201
+ free(cpu_ctx);
202
+ free(backend);
203
+ }
204
+
205
+ static void * ggml_backend_cpu_buffer_get_base(ggml_backend_buffer_t buffer) {
206
+ return (void *)buffer->context;
207
+ }
208
+
209
+ static void ggml_backend_cpu_buffer_free_buffer(ggml_backend_buffer_t buffer) {
210
+ free(buffer->context);
211
+ UNUSED(buffer);
212
+ }
213
+
214
+ static struct ggml_backend_buffer_i cpu_backend_buffer_i = {
215
+ /* .free_buffer = */ ggml_backend_cpu_buffer_free_buffer,
216
+ /* .get_base = */ ggml_backend_cpu_buffer_get_base,
217
+ /* .get_alloc_size = */ NULL, // defaults to ggml_nbytes
218
+ /* .init_tensor = */ NULL, // no initialization required
219
+ /* .free_tensor = */ NULL, // no cleanup required
220
+ };
221
+
222
+ // for buffers from ptr, free is not called
223
+ static struct ggml_backend_buffer_i cpu_backend_buffer_i_from_ptr = {
224
+ /* .free_buffer = */ NULL, // ptr is not owned by the buffer, so it does not need to be freed
225
+ /* .get_base = */ ggml_backend_cpu_buffer_get_base,
226
+ /* .get_alloc_size = */ NULL, // defaults to ggml_nbytes
227
+ /* .init_tensor = */ NULL,
228
+ /* .free_tensor = */ NULL,
229
+ };
230
+
231
+ static const size_t TENSOR_ALIGNMENT = 64; // should be enough for AVX 512
232
+
233
+ static ggml_backend_buffer_t ggml_backend_cpu_alloc_buffer(ggml_backend_t backend, size_t size) {
234
+ size += TENSOR_ALIGNMENT; // malloc may return an address that is not aligned
235
+ void * data = malloc(size); // TODO: maybe use GGML_ALIGNED_MALLOC?
236
+
237
+ return ggml_backend_buffer_init(backend, cpu_backend_buffer_i, data, size);
238
+ }
239
+
240
+ static size_t ggml_backend_cpu_get_alignment(ggml_backend_t backend) {
241
+ return TENSOR_ALIGNMENT;
242
+ UNUSED(backend);
243
+ }
244
+
245
+ static void ggml_backend_cpu_set_tensor_async(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
246
+ GGML_ASSERT(offset + size <= ggml_nbytes(tensor) && "tensor write out of bounds");
247
+ GGML_ASSERT(tensor->data != NULL && "tensor not allocated");
248
+
249
+ memcpy((char *)tensor->data + offset, data, size);
250
+
251
+ UNUSED(backend);
252
+ }
253
+
254
+ static void ggml_backend_cpu_get_tensor_async(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) {
255
+ GGML_ASSERT(offset + size <= ggml_nbytes(tensor) && "tensor read out of bounds");
256
+ GGML_ASSERT(tensor->data != NULL && "tensor not allocated");
257
+
258
+ memcpy(data, (const char *)tensor->data + offset, size);
259
+
260
+ UNUSED(backend);
261
+ }
262
+
263
+ static void ggml_backend_cpu_synchronize(ggml_backend_t backend) {
264
+ UNUSED(backend);
265
+ }
266
+
267
+ static void ggml_backend_cpu_cpy_tensor_from(ggml_backend_t backend, struct ggml_tensor * src, struct ggml_tensor * dst) {
268
+ ggml_backend_tensor_get(src, dst->data, 0, ggml_nbytes(src));
269
+
270
+ UNUSED(backend);
271
+ }
272
+
273
+ static void ggml_backend_cpu_cpy_tensor_to(ggml_backend_t backend, struct ggml_tensor * src, struct ggml_tensor * dst) {
274
+ // for a backend such as CUDA that can queue async calls, it is ok to do this asynchronously, but it may not be the case for other backends
275
+ ggml_backend_tensor_set_async(dst, src->data, 0, ggml_nbytes(src));
276
+
277
+ UNUSED(backend);
278
+ }
279
+
280
+ struct ggml_backend_plan_cpu {
281
+ struct ggml_cplan cplan;
282
+ struct ggml_cgraph cgraph;
283
+ };
284
+
285
+ static ggml_backend_graph_plan_t ggml_backend_cpu_graph_plan_create(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
286
+ struct ggml_backend_cpu_context * cpu_ctx = (struct ggml_backend_cpu_context *)backend->context;
287
+
288
+ struct ggml_backend_plan_cpu * cpu_plan = malloc(sizeof(struct ggml_backend_plan_cpu));
289
+
290
+ cpu_plan->cplan = ggml_graph_plan(cgraph, cpu_ctx->n_threads);
291
+ cpu_plan->cgraph = *cgraph;
292
+
293
+ if (cpu_plan->cplan.work_size > 0) {
294
+ cpu_plan->cplan.work_data = malloc(cpu_plan->cplan.work_size);
295
+ }
296
+
297
+ return cpu_plan;
298
+ }
299
+
300
+ static void ggml_backend_cpu_graph_plan_free(ggml_backend_t backend, ggml_backend_graph_plan_t plan) {
301
+ struct ggml_backend_plan_cpu * cpu_plan = (struct ggml_backend_plan_cpu *)plan;
302
+
303
+ free(cpu_plan->cplan.work_data);
304
+ free(cpu_plan);
305
+
306
+ UNUSED(backend);
307
+ }
308
+
309
+ static void ggml_backend_cpu_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan) {
310
+ struct ggml_backend_plan_cpu * cpu_plan = (struct ggml_backend_plan_cpu *)plan;
311
+
312
+ ggml_graph_compute(&cpu_plan->cgraph, &cpu_plan->cplan);
313
+
314
+ UNUSED(backend);
315
+ }
316
+
317
+ static void ggml_backend_cpu_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
318
+ struct ggml_backend_cpu_context * cpu_ctx = (struct ggml_backend_cpu_context *)backend->context;
319
+
320
+ struct ggml_cplan cplan = ggml_graph_plan(cgraph, cpu_ctx->n_threads);
321
+
322
+ if (cpu_ctx->work_size < cplan.work_size) {
323
+ // TODO: may be faster to free and use malloc to avoid the copy
324
+ cpu_ctx->work_data = realloc(cpu_ctx->work_data, cplan.work_size);
325
+ cpu_ctx->work_size = cplan.work_size;
326
+ }
327
+
328
+ cplan.work_data = cpu_ctx->work_data;
329
+
330
+ ggml_graph_compute(cgraph, &cplan);
331
+ }
332
+
333
+ static bool ggml_backend_cpu_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) {
334
+ return true;
335
+ UNUSED(backend);
336
+ UNUSED(op);
337
+ }
338
+
339
+ static struct ggml_backend_i cpu_backend_i = {
340
+ /* .get_name = */ ggml_backend_cpu_name,
341
+ /* .free = */ ggml_backend_cpu_free,
342
+ /* .alloc_buffer = */ ggml_backend_cpu_alloc_buffer,
343
+ /* .get_alignment = */ ggml_backend_cpu_get_alignment,
344
+ /* .set_tensor_async = */ ggml_backend_cpu_set_tensor_async,
345
+ /* .get_tensor_async = */ ggml_backend_cpu_get_tensor_async,
346
+ /* .synchronize = */ ggml_backend_cpu_synchronize,
347
+ /* .cpy_tensor_from = */ ggml_backend_cpu_cpy_tensor_from,
348
+ /* .cpy_tensor_to = */ ggml_backend_cpu_cpy_tensor_to,
349
+ /* .graph_plan_create = */ ggml_backend_cpu_graph_plan_create,
350
+ /* .graph_plan_free = */ ggml_backend_cpu_graph_plan_free,
351
+ /* .graph_plan_compute = */ ggml_backend_cpu_graph_plan_compute,
352
+ /* .graph_compute = */ ggml_backend_cpu_graph_compute,
353
+ /* .supports_op = */ ggml_backend_cpu_supports_op,
354
+ };
355
+
356
+ ggml_backend_t ggml_backend_cpu_init(void) {
357
+ struct ggml_backend_cpu_context * ctx = malloc(sizeof(struct ggml_backend_cpu_context));
358
+
359
+ ctx->n_threads = GGML_DEFAULT_N_THREADS;
360
+ ctx->work_data = NULL;
361
+ ctx->work_size = 0;
362
+
363
+ ggml_backend_t cpu_backend = malloc(sizeof(struct ggml_backend));
364
+
365
+ *cpu_backend = (struct ggml_backend) {
366
+ /* .interface = */ cpu_backend_i,
367
+ /* .context = */ ctx
368
+ };
369
+ return cpu_backend;
370
+ }
371
+
372
+ bool ggml_backend_is_cpu(ggml_backend_t backend) {
373
+ return backend->iface.get_name == ggml_backend_cpu_name;
374
+ }
375
+
376
+ void ggml_backend_cpu_set_n_threads(ggml_backend_t backend_cpu, int n_threads) {
377
+ GGML_ASSERT(ggml_backend_is_cpu(backend_cpu));
378
+
379
+ struct ggml_backend_cpu_context * ctx = (struct ggml_backend_cpu_context *)backend_cpu->context;
380
+ ctx->n_threads = n_threads;
381
+ }
382
+
383
+ ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(ggml_backend_t backend_cpu, void * ptr, size_t size) {
384
+ return ggml_backend_buffer_init(backend_cpu, cpu_backend_buffer_i_from_ptr, ptr, size);
385
+ }
@@ -0,0 +1,143 @@
1
+ #pragma once
2
+
3
+ #include "ggml.h"
4
+
5
+ #ifdef __cplusplus
6
+ extern "C" {
7
+ #endif
8
+ struct ggml_backend;
9
+ struct ggml_backend_buffer;
10
+
11
+ // type-erased backend-specific types / wrappers
12
+ typedef void * ggml_backend_context_t;
13
+ typedef void * ggml_backend_graph_plan_t;
14
+ typedef void * ggml_backend_buffer_context_t;
15
+
16
+ // avoid accessing internals of these types
17
+ typedef struct ggml_backend * ggml_backend_t;
18
+ typedef struct ggml_backend_buffer * ggml_backend_buffer_t;
19
+
20
+ //
21
+ // backend buffer
22
+ //
23
+
24
+ struct ggml_backend_buffer_i {
25
+ void (*free_buffer) (ggml_backend_buffer_t buffer);
26
+ void * (*get_base) (ggml_backend_buffer_t buffer); // get base pointer
27
+ size_t (*get_alloc_size)(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); // pre-allocation callback
28
+ void (*init_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); // post-allocation callback
29
+ void (*free_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); // pre-free callback
30
+ };
31
+
32
+ // TODO: hide behind API
33
+ struct ggml_backend_buffer {
34
+ struct ggml_backend_buffer_i iface;
35
+
36
+ ggml_backend_t backend;
37
+ ggml_backend_buffer_context_t context;
38
+
39
+ size_t size;
40
+ };
41
+
42
+ // backend buffer functions
43
+ GGML_API ggml_backend_buffer_t ggml_backend_buffer_init(
44
+ struct ggml_backend * backend,
45
+ struct ggml_backend_buffer_i iface,
46
+ ggml_backend_buffer_context_t context,
47
+ size_t size);
48
+
49
+ GGML_API void ggml_backend_buffer_free (ggml_backend_buffer_t buffer);
50
+ GGML_API size_t ggml_backend_buffer_get_alignment (ggml_backend_buffer_t buffer);
51
+ GGML_API void * ggml_backend_buffer_get_base (ggml_backend_buffer_t buffer);
52
+ GGML_API size_t ggml_backend_buffer_get_size (ggml_backend_buffer_t buffer);
53
+ GGML_API size_t ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
54
+ GGML_API void ggml_backend_buffer_init_tensor (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
55
+ GGML_API void ggml_backend_buffer_free_tensor (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
56
+
57
+ //
58
+ // backend
59
+ //
60
+
61
+ struct ggml_backend_i {
62
+ const char * (*get_name)(ggml_backend_t backend);
63
+
64
+ void (*free)(ggml_backend_t backend);
65
+
66
+ // buffer allocation
67
+ ggml_backend_buffer_t (*alloc_buffer)(ggml_backend_t backend, size_t size);
68
+
69
+ // get buffer alignment
70
+ size_t (*get_alignment)(ggml_backend_t backend);
71
+
72
+ // tensor data access
73
+ // these functions can be asynchronous, helper functions are provided for synchronous access that automatically call synchronize
74
+ void (*set_tensor_async)(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
75
+ void (*get_tensor_async)(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
76
+ void (*synchronize) (ggml_backend_t backend);
77
+
78
+ // (optional) copy tensor between different backends, allow for single-copy tranfers
79
+ void (*cpy_tensor_from)(ggml_backend_t backend, struct ggml_tensor * src, struct ggml_tensor * dst);
80
+ void (*cpy_tensor_to) (ggml_backend_t backend, struct ggml_tensor * src, struct ggml_tensor * dst);
81
+
82
+ // compute graph with a plan
83
+ ggml_backend_graph_plan_t (*graph_plan_create) (ggml_backend_t backend, struct ggml_cgraph * cgraph);
84
+ void (*graph_plan_free) (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
85
+ void (*graph_plan_compute)(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
86
+
87
+ // compute graph without a plan
88
+ void (*graph_compute)(ggml_backend_t backend, struct ggml_cgraph * cgraph);
89
+
90
+ // check if the backend supports an operation
91
+ bool (*supports_op)(ggml_backend_t backend, const struct ggml_tensor * op);
92
+ };
93
+
94
+ // TODO: hide behind API
95
+ struct ggml_backend {
96
+ struct ggml_backend_i iface;
97
+
98
+ ggml_backend_context_t context;
99
+ };
100
+
101
+ // backend helper functions
102
+ GGML_API ggml_backend_t ggml_get_backend(const struct ggml_tensor * tensor);
103
+
104
+ GGML_API const char * ggml_backend_name(ggml_backend_t backend);
105
+ GGML_API void ggml_backend_free(ggml_backend_t backend);
106
+
107
+ GGML_API ggml_backend_buffer_t ggml_backend_alloc_buffer(ggml_backend_t backend, size_t size);
108
+
109
+ GGML_API size_t ggml_backend_get_alignment(ggml_backend_t backend);
110
+
111
+ GGML_API void ggml_backend_tensor_set_async( struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
112
+ GGML_API void ggml_backend_tensor_get_async(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
113
+
114
+ GGML_API void ggml_backend_tensor_set( struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
115
+ GGML_API void ggml_backend_tensor_get(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
116
+
117
+ GGML_API void ggml_backend_synchronize(ggml_backend_t backend);
118
+
119
+ GGML_API ggml_backend_graph_plan_t ggml_backend_graph_plan_create (ggml_backend_t backend, struct ggml_cgraph * cgraph);
120
+
121
+ GGML_API void ggml_backend_graph_plan_free (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
122
+ GGML_API void ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
123
+ GGML_API void ggml_backend_graph_compute (ggml_backend_t backend, struct ggml_cgraph * cgraph);
124
+ GGML_API bool ggml_backend_supports_op (ggml_backend_t backend, const struct ggml_tensor * op);
125
+
126
+ // tensor copy between different backends
127
+ GGML_API void ggml_backend_tensor_copy(struct ggml_tensor * src, struct ggml_tensor * dst);
128
+
129
+ //
130
+ // CPU backend
131
+ //
132
+
133
+ GGML_API ggml_backend_t ggml_backend_cpu_init(void);
134
+
135
+ GGML_API bool ggml_backend_is_cpu(ggml_backend_t backend);
136
+
137
+ GGML_API void ggml_backend_cpu_set_n_threads(ggml_backend_t backend_cpu, int n_threads);
138
+
139
+ GGML_API ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(ggml_backend_t backend_cpu, void * ptr, size_t size);
140
+
141
+ #ifdef __cplusplus
142
+ }
143
+ #endif