whispercpp 1.2.0.1 → 1.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (9) hide show
  1. checksums.yaml +4 -4
  2. data/Rakefile +4 -19
  3. data/ext/extconf.rb +9 -0
  4. data/ext/ggml.c +18380 -5241
  5. data/ext/ggml.h +2156 -502
  6. data/ext/ruby_whisper.cpp +2 -2
  7. data/ext/whisper.cpp +4184 -1774
  8. data/ext/whisper.h +348 -56
  9. metadata +3 -3
data/ext/ggml.h CHANGED
@@ -58,14 +58,15 @@
58
58
  // {
59
59
  // ...
60
60
  //
61
- // struct ggml_cgraph gf = ggml_build_forward(f);
61
+ // struct ggml_cgraph * gf = ggml_new_graph(ctx);
62
+ // ggml_build_forward_expand(gf, f);
62
63
  //
63
64
  // // set the input variable and parameter values
64
65
  // ggml_set_f32(x, 2.0f);
65
66
  // ggml_set_f32(a, 3.0f);
66
67
  // ggml_set_f32(b, 4.0f);
67
68
  //
68
- // ggml_graph_compute(ctx0, &gf);
69
+ // ggml_graph_compute_with_ctx(ctx, &gf, n_threads);
69
70
  //
70
71
  // printf("f = %f\n", ggml_get_f32_1d(f, 0));
71
72
  //
@@ -130,13 +131,16 @@
130
131
  // The data of the tensor is accessed via the "data" pointer. For example:
131
132
  //
132
133
  // {
133
- // struct ggml_tensor * a = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 2, 3);
134
+ // const int nx = 2;
135
+ // const int ny = 3;
134
136
  //
135
- // // a[1, 2] = 1.0f;
136
- // *(float *) ((char *) a->data + 2*a->nb[1] + 1*a->nb[0]) = 1.0f;
137
+ // struct ggml_tensor * a = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, nx, ny);
137
138
  //
138
- // // a[2, 0] = 2.0f;
139
- // *(float *) ((char *) a->data + 0*a->nb[1] + 2*a->nb[0]) = 2.0f;
139
+ // for (int y = 0; y < ny; y++) {
140
+ // for (int x = 0; x < nx; x++) {
141
+ // *(float *) ((char *) a->data + y*a->nb[1] + x*a->nb[0]) = x + y;
142
+ // }
143
+ // }
140
144
  //
141
145
  // ...
142
146
  // }
@@ -169,579 +173,2229 @@
169
173
  //
170
174
  //
171
175
 
172
- #ifdef __cplusplus
173
- extern "C" {
176
+ #ifdef GGML_SHARED
177
+ # if defined(_WIN32) && !defined(__MINGW32__)
178
+ # ifdef GGML_BUILD
179
+ # define GGML_API __declspec(dllexport)
180
+ # else
181
+ # define GGML_API __declspec(dllimport)
182
+ # endif
183
+ # else
184
+ # define GGML_API __attribute__ ((visibility ("default")))
185
+ # endif
186
+ #else
187
+ # define GGML_API
188
+ #endif
189
+
190
+ #ifdef GGML_MULTIPLATFORM
191
+ # if defined(_WIN32)
192
+ # define GGML_CALL
193
+ # else
194
+ # define GGML_CALL __attribute__((__ms_abi__))
195
+ # endif
196
+ #else
197
+ # define GGML_CALL
198
+ #endif
199
+
200
+ // TODO: support for clang
201
+ #ifdef __GNUC__
202
+ # define GGML_DEPRECATED(func, hint) func __attribute__((deprecated(hint)))
203
+ #elif defined(_MSC_VER)
204
+ # define GGML_DEPRECATED(func, hint) __declspec(deprecated(hint)) func
205
+ #else
206
+ # define GGML_DEPRECATED(func, hint) func
207
+ #endif
208
+
209
+ #ifndef __GNUC__
210
+ # define GGML_ATTRIBUTE_FORMAT(...)
211
+ #elif defined(__MINGW32__)
212
+ # define GGML_ATTRIBUTE_FORMAT(...) __attribute__((format(gnu_printf, __VA_ARGS__)))
213
+ #else
214
+ # define GGML_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
174
215
  #endif
175
216
 
176
- #include <stdint.h>
177
- #include <stddef.h>
178
217
  #include <stdbool.h>
218
+ #include <stddef.h>
219
+ #include <stdint.h>
220
+ #include <stdio.h>
221
+
222
+ #define GGML_FILE_MAGIC 0x67676d6c // "ggml"
223
+ #define GGML_FILE_VERSION 1
179
224
 
180
- #define GGML_MAX_DIMS 4
181
- #define GGML_MAX_NODES 4096
182
- #define GGML_MAX_PARAMS 16
183
- #define GGML_MAX_CONTEXTS 64
184
- #define GGML_MAX_OPT 4
225
+ #define GGML_QNT_VERSION 2 // bump this on quantization format changes
226
+ #define GGML_QNT_VERSION_FACTOR 1000 // do not change this
185
227
 
186
- #ifdef __ARM_NEON
187
- // we use the built-in 16-bit float type
188
- typedef __fp16 ggml_fp16_t;
228
+ #define GGML_MAX_DIMS 4
229
+ #define GGML_MAX_PARAMS 2048
230
+ #define GGML_MAX_CONTEXTS 64
231
+ #define GGML_MAX_SRC 10
232
+ #ifndef GGML_MAX_NAME
233
+ #define GGML_MAX_NAME 64
234
+ #endif
235
+ #define GGML_MAX_OP_PARAMS 64
236
+ #define GGML_DEFAULT_N_THREADS 4
237
+ #define GGML_DEFAULT_GRAPH_SIZE 2048
238
+ #if UINTPTR_MAX == 0xFFFFFFFF
239
+ #define GGML_MEM_ALIGN 4
189
240
  #else
190
- typedef uint16_t ggml_fp16_t;
241
+ #define GGML_MEM_ALIGN 16
191
242
  #endif
192
243
 
193
- // convert FP16 <-> FP32
194
- float ggml_fp16_to_fp32(ggml_fp16_t x);
195
- ggml_fp16_t ggml_fp32_to_fp16(float x);
196
-
197
- struct ggml_object;
198
- struct ggml_context;
199
-
200
- enum ggml_type {
201
- GGML_TYPE_I8,
202
- GGML_TYPE_I16,
203
- GGML_TYPE_I32,
204
- GGML_TYPE_F16,
205
- GGML_TYPE_F32,
206
- GGML_TYPE_COUNT,
207
- };
208
-
209
- // available tensor operations:
210
- enum ggml_op {
211
- GGML_OP_NONE = 0,
212
-
213
- GGML_OP_DUP,
214
- GGML_OP_ADD,
215
- GGML_OP_SUB,
216
- GGML_OP_MUL,
217
- GGML_OP_DIV,
218
- GGML_OP_SQR,
219
- GGML_OP_SQRT,
220
- GGML_OP_SUM,
221
- GGML_OP_MEAN,
222
- GGML_OP_REPEAT,
223
- GGML_OP_ABS,
224
- GGML_OP_SGN,
225
- GGML_OP_NEG,
226
- GGML_OP_STEP,
227
- GGML_OP_RELU,
228
- GGML_OP_GELU,
229
- GGML_OP_NORM, // normalize
230
-
231
- GGML_OP_MUL_MAT,
232
-
233
- GGML_OP_SCALE,
234
- GGML_OP_CPY,
235
- GGML_OP_RESHAPE,
236
- GGML_OP_VIEW,
237
- GGML_OP_PERMUTE,
238
- GGML_OP_TRANSPOSE,
239
- GGML_OP_GET_ROWS,
240
- GGML_OP_DIAG_MASK_INF,
241
- GGML_OP_SOFT_MAX,
242
- GGML_OP_ROPE,
243
- GGML_OP_CONV_1D_1S,
244
- GGML_OP_CONV_1D_2S,
245
-
246
- GGML_OP_FLASH_ATTN,
247
- GGML_OP_FLASH_FF,
248
-
249
- GGML_OP_COUNT,
250
- };
251
-
252
- // n-dimensional tensor
253
- struct ggml_tensor {
254
- enum ggml_type type;
255
-
256
- int n_dims;
257
- int ne[GGML_MAX_DIMS]; // number of elements
258
- size_t nb[GGML_MAX_DIMS]; // stride in bytes:
259
- // nb[0] = sizeof(type)
260
- // nb[1] = nb[0] * ne[0] + padding
261
- // nb[i] = nb[i-1] * ne[i-1]
262
-
263
- // compute data
264
- enum ggml_op op;
265
-
266
- bool is_param;
267
-
268
- struct ggml_tensor * grad;
269
- struct ggml_tensor * src0;
270
- struct ggml_tensor * src1;
271
- struct ggml_tensor * opt[GGML_MAX_OPT];
272
-
273
- // thread scheduling
274
- int n_tasks;
275
-
276
- // performance
277
- int perf_runs;
278
- int64_t perf_cycles;
279
- int64_t perf_time_us;
280
-
281
- void * data;
282
- char padding[8];
283
- };
284
-
285
- // computation graph
286
- struct ggml_cgraph {
287
- int n_nodes;
288
- int n_leafs;
289
- int n_threads;
290
-
291
- size_t work_size;
292
- struct ggml_tensor * work;
293
-
294
- struct ggml_tensor * nodes[GGML_MAX_NODES];
295
- struct ggml_tensor * grads[GGML_MAX_NODES];
296
- struct ggml_tensor * leafs[GGML_MAX_NODES];
297
-
298
- // performance
299
- int perf_runs;
300
- int64_t perf_cycles;
301
- int64_t perf_time_us;
302
- };
303
-
304
- // scratch buffer
305
- struct ggml_scratch {
306
- size_t offs;
307
- size_t size;
308
- void * data;
309
- };
310
-
311
- struct ggml_init_params {
312
- // memory pool
313
- size_t mem_size; // bytes
314
- void * mem_buffer; // if NULL, memory will be allocated internally
315
- };
316
-
317
- void ggml_time_init(void); // call this once at the beginning of the program
318
- int64_t ggml_time_ms(void);
319
- int64_t ggml_time_us(void);
320
- int64_t ggml_cycles(void);
321
- int64_t ggml_cycles_per_ms(void);
322
-
323
- void ggml_print_object (const struct ggml_object * obj);
324
- void ggml_print_objects(const struct ggml_context * ctx);
325
-
326
- int ggml_nelements(const struct ggml_tensor * tensor);
327
- size_t ggml_nbytes (const struct ggml_tensor * tensor);
328
-
329
- size_t ggml_type_size (enum ggml_type type);
330
- size_t ggml_element_size(const struct ggml_tensor * tensor);
331
-
332
- struct ggml_context * ggml_init(struct ggml_init_params params);
333
- void ggml_free(struct ggml_context * ctx);
334
-
335
- size_t ggml_used_mem(const struct ggml_context * ctx);
336
-
337
- size_t ggml_set_scratch(struct ggml_context * ctx, struct ggml_scratch scratch);
338
-
339
- struct ggml_tensor * ggml_new_tensor(
340
- struct ggml_context * ctx,
341
- enum ggml_type type,
342
- int n_dims,
343
- const int *ne);
244
+ #define GGML_EXIT_SUCCESS 0
245
+ #define GGML_EXIT_ABORTED 1
344
246
 
345
- struct ggml_tensor * ggml_new_tensor_1d(
346
- struct ggml_context * ctx,
347
- enum ggml_type type,
348
- int ne0);
247
+ #define GGUF_MAGIC "GGUF"
349
248
 
350
- struct ggml_tensor * ggml_new_tensor_2d(
351
- struct ggml_context * ctx,
352
- enum ggml_type type,
353
- int ne0,
354
- int ne1);
249
+ #define GGUF_VERSION 3
355
250
 
356
- struct ggml_tensor * ggml_new_tensor_3d(
357
- struct ggml_context * ctx,
358
- enum ggml_type type,
359
- int ne0,
360
- int ne1,
361
- int ne2);
251
+ #define GGUF_DEFAULT_ALIGNMENT 32
362
252
 
363
- struct ggml_tensor * ggml_new_tensor_4d(
364
- struct ggml_context * ctx,
365
- enum ggml_type type,
366
- int ne0,
367
- int ne1,
368
- int ne2,
369
- int ne3);
253
+ #define GGML_UNUSED(x) (void)(x)
370
254
 
371
- struct ggml_tensor * ggml_new_i32(struct ggml_context * ctx, int32_t value);
372
- struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value);
255
+ #define GGML_PAD(x, n) (((x) + (n) - 1) & ~((n) - 1))
373
256
 
374
- struct ggml_tensor * ggml_dup_tensor (struct ggml_context * ctx, const struct ggml_tensor * src);
375
- struct ggml_tensor * ggml_view_tensor(struct ggml_context * ctx, const struct ggml_tensor * src);
257
+ #define GGML_ASSERT(x) \
258
+ do { \
259
+ if (!(x)) { \
260
+ fflush(stdout); \
261
+ fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", __FILE__, __LINE__, #x); \
262
+ ggml_print_backtrace(); \
263
+ abort(); \
264
+ } \
265
+ } while (0)
376
266
 
377
- struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor);
378
- struct ggml_tensor * ggml_set_i32 (struct ggml_tensor * tensor, int32_t value);
379
- struct ggml_tensor * ggml_set_f32 (struct ggml_tensor * tensor, float value);
267
+ #ifndef NDEBUG
268
+ #define GGML_UNREACHABLE() GGML_ASSERT(!"statement should not be reached")
269
+ #elif defined(__GNUC__)
270
+ #define GGML_UNREACHABLE() __builtin_unreachable()
271
+ #elif defined(_MSC_VER)
272
+ #define GGML_UNREACHABLE() __assume(0)
273
+ #else
274
+ #define GGML_UNREACHABLE() ((void) 0)
275
+ #endif
380
276
 
381
- int32_t ggml_get_i32_1d(const struct ggml_tensor * tensor, int i);
382
- void ggml_set_i32_1d(const struct ggml_tensor * tensor, int i, int32_t value);
277
+ // used to copy the number of elements and stride in bytes of tensors into local variables.
278
+ // main purpose is to reduce code duplication and improve readability.
279
+ //
280
+ // example:
281
+ //
282
+ // GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne);
283
+ // GGML_TENSOR_LOCALS(size_t, nb1, src1, nb);
284
+ //
285
+ #define GGML_TENSOR_LOCALS_1(type, prefix, pointer, array) \
286
+ const type prefix##0 = (pointer)->array[0]; \
287
+ GGML_UNUSED(prefix##0);
288
+ #define GGML_TENSOR_LOCALS_2(type, prefix, pointer, array) \
289
+ GGML_TENSOR_LOCALS_1 (type, prefix, pointer, array) \
290
+ const type prefix##1 = (pointer)->array[1]; \
291
+ GGML_UNUSED(prefix##1);
292
+ #define GGML_TENSOR_LOCALS_3(type, prefix, pointer, array) \
293
+ GGML_TENSOR_LOCALS_2 (type, prefix, pointer, array) \
294
+ const type prefix##2 = (pointer)->array[2]; \
295
+ GGML_UNUSED(prefix##2);
296
+ #define GGML_TENSOR_LOCALS(type, prefix, pointer, array) \
297
+ GGML_TENSOR_LOCALS_3 (type, prefix, pointer, array) \
298
+ const type prefix##3 = (pointer)->array[3]; \
299
+ GGML_UNUSED(prefix##3);
300
+
301
+ #define GGML_TENSOR_UNARY_OP_LOCALS \
302
+ GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne) \
303
+ GGML_TENSOR_LOCALS(size_t, nb0, src0, nb) \
304
+ GGML_TENSOR_LOCALS(int64_t, ne, dst, ne) \
305
+ GGML_TENSOR_LOCALS(size_t, nb, dst, nb)
306
+
307
+ #define GGML_TENSOR_BINARY_OP_LOCALS \
308
+ GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne) \
309
+ GGML_TENSOR_LOCALS(size_t, nb0, src0, nb) \
310
+ GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne) \
311
+ GGML_TENSOR_LOCALS(size_t, nb1, src1, nb) \
312
+ GGML_TENSOR_LOCALS(int64_t, ne, dst, ne) \
313
+ GGML_TENSOR_LOCALS(size_t, nb, dst, nb)
383
314
 
384
- float ggml_get_f32_1d(const struct ggml_tensor * tensor, int i);
385
- void ggml_set_f32_1d(const struct ggml_tensor * tensor, int i, float value);
315
+ #ifdef __cplusplus
316
+ extern "C" {
317
+ #endif
386
318
 
387
- void * ggml_get_data (const struct ggml_tensor * tensor);
388
- float * ggml_get_data_f32(const struct ggml_tensor * tensor);
319
+ enum ggml_status {
320
+ GGML_STATUS_ALLOC_FAILED = -2,
321
+ GGML_STATUS_FAILED = -1,
322
+ GGML_STATUS_SUCCESS = 0,
323
+ GGML_STATUS_ABORTED = 1,
324
+ };
325
+
326
+ // get ggml_status name string
327
+ GGML_API GGML_CALL const char * ggml_status_to_string(enum ggml_status status);
328
+
329
+ typedef uint16_t ggml_fp16_t;
330
+
331
+ // convert FP16 <-> FP32
332
+ GGML_API float ggml_fp16_to_fp32(ggml_fp16_t x);
333
+ GGML_API ggml_fp16_t ggml_fp32_to_fp16(float x);
334
+
335
+ GGML_API void ggml_fp16_to_fp32_row(const ggml_fp16_t * x, float * y, int64_t n);
336
+ GGML_API void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, int64_t n);
337
+
338
+ struct ggml_object;
339
+ struct ggml_context;
340
+
341
+ // NOTE: always add types at the end of the enum to keep backward compatibility
342
+ enum ggml_type {
343
+ GGML_TYPE_F32 = 0,
344
+ GGML_TYPE_F16 = 1,
345
+ GGML_TYPE_Q4_0 = 2,
346
+ GGML_TYPE_Q4_1 = 3,
347
+ // GGML_TYPE_Q4_2 = 4, support has been removed
348
+ // GGML_TYPE_Q4_3 = 5, support has been removed
349
+ GGML_TYPE_Q5_0 = 6,
350
+ GGML_TYPE_Q5_1 = 7,
351
+ GGML_TYPE_Q8_0 = 8,
352
+ GGML_TYPE_Q8_1 = 9,
353
+ GGML_TYPE_Q2_K = 10,
354
+ GGML_TYPE_Q3_K = 11,
355
+ GGML_TYPE_Q4_K = 12,
356
+ GGML_TYPE_Q5_K = 13,
357
+ GGML_TYPE_Q6_K = 14,
358
+ GGML_TYPE_Q8_K = 15,
359
+ GGML_TYPE_IQ2_XXS = 16,
360
+ GGML_TYPE_IQ2_XS = 17,
361
+ GGML_TYPE_IQ3_XXS = 18,
362
+ GGML_TYPE_IQ1_S = 19,
363
+ GGML_TYPE_IQ4_NL = 20,
364
+ GGML_TYPE_IQ3_S = 21,
365
+ GGML_TYPE_IQ2_S = 22,
366
+ GGML_TYPE_IQ4_XS = 23,
367
+ GGML_TYPE_I8 = 24,
368
+ GGML_TYPE_I16 = 25,
369
+ GGML_TYPE_I32 = 26,
370
+ GGML_TYPE_I64 = 27,
371
+ GGML_TYPE_F64 = 28,
372
+ GGML_TYPE_IQ1_M = 29,
373
+ GGML_TYPE_COUNT,
374
+ };
375
+
376
+ // precision
377
+ enum ggml_prec {
378
+ GGML_PREC_DEFAULT,
379
+ GGML_PREC_F32,
380
+ };
381
+
382
+ enum ggml_backend_type {
383
+ GGML_BACKEND_TYPE_CPU = 0,
384
+ GGML_BACKEND_TYPE_GPU = 10,
385
+ GGML_BACKEND_TYPE_GPU_SPLIT = 20,
386
+ };
387
+
388
+ // model file types
389
+ enum ggml_ftype {
390
+ GGML_FTYPE_UNKNOWN = -1,
391
+ GGML_FTYPE_ALL_F32 = 0,
392
+ GGML_FTYPE_MOSTLY_F16 = 1, // except 1d tensors
393
+ GGML_FTYPE_MOSTLY_Q4_0 = 2, // except 1d tensors
394
+ GGML_FTYPE_MOSTLY_Q4_1 = 3, // except 1d tensors
395
+ GGML_FTYPE_MOSTLY_Q4_1_SOME_F16 = 4, // tok_embeddings.weight and output.weight are F16
396
+ GGML_FTYPE_MOSTLY_Q8_0 = 7, // except 1d tensors
397
+ GGML_FTYPE_MOSTLY_Q5_0 = 8, // except 1d tensors
398
+ GGML_FTYPE_MOSTLY_Q5_1 = 9, // except 1d tensors
399
+ GGML_FTYPE_MOSTLY_Q2_K = 10, // except 1d tensors
400
+ GGML_FTYPE_MOSTLY_Q3_K = 11, // except 1d tensors
401
+ GGML_FTYPE_MOSTLY_Q4_K = 12, // except 1d tensors
402
+ GGML_FTYPE_MOSTLY_Q5_K = 13, // except 1d tensors
403
+ GGML_FTYPE_MOSTLY_Q6_K = 14, // except 1d tensors
404
+ GGML_FTYPE_MOSTLY_IQ2_XXS = 15, // except 1d tensors
405
+ GGML_FTYPE_MOSTLY_IQ2_XS = 16, // except 1d tensors
406
+ GGML_FTYPE_MOSTLY_IQ3_XXS = 17, // except 1d tensors
407
+ GGML_FTYPE_MOSTLY_IQ1_S = 18, // except 1d tensors
408
+ GGML_FTYPE_MOSTLY_IQ4_NL = 19, // except 1d tensors
409
+ GGML_FTYPE_MOSTLY_IQ3_S = 20, // except 1d tensors
410
+ GGML_FTYPE_MOSTLY_IQ2_S = 21, // except 1d tensors
411
+ GGML_FTYPE_MOSTLY_IQ4_XS = 22, // except 1d tensors
412
+ GGML_FTYPE_MOSTLY_IQ1_M = 23, // except 1d tensors
413
+ };
414
+
415
+ // available tensor operations:
416
+ enum ggml_op {
417
+ GGML_OP_NONE = 0,
418
+
419
+ GGML_OP_DUP,
420
+ GGML_OP_ADD,
421
+ GGML_OP_ADD1,
422
+ GGML_OP_ACC,
423
+ GGML_OP_SUB,
424
+ GGML_OP_MUL,
425
+ GGML_OP_DIV,
426
+ GGML_OP_SQR,
427
+ GGML_OP_SQRT,
428
+ GGML_OP_LOG,
429
+ GGML_OP_SUM,
430
+ GGML_OP_SUM_ROWS,
431
+ GGML_OP_MEAN,
432
+ GGML_OP_ARGMAX,
433
+ GGML_OP_REPEAT,
434
+ GGML_OP_REPEAT_BACK,
435
+ GGML_OP_CONCAT,
436
+ GGML_OP_SILU_BACK,
437
+ GGML_OP_NORM, // normalize
438
+ GGML_OP_RMS_NORM,
439
+ GGML_OP_RMS_NORM_BACK,
440
+ GGML_OP_GROUP_NORM,
441
+
442
+ GGML_OP_MUL_MAT,
443
+ GGML_OP_MUL_MAT_ID,
444
+ GGML_OP_OUT_PROD,
445
+
446
+ GGML_OP_SCALE,
447
+ GGML_OP_SET,
448
+ GGML_OP_CPY,
449
+ GGML_OP_CONT,
450
+ GGML_OP_RESHAPE,
451
+ GGML_OP_VIEW,
452
+ GGML_OP_PERMUTE,
453
+ GGML_OP_TRANSPOSE,
454
+ GGML_OP_GET_ROWS,
455
+ GGML_OP_GET_ROWS_BACK,
456
+ GGML_OP_DIAG,
457
+ GGML_OP_DIAG_MASK_INF,
458
+ GGML_OP_DIAG_MASK_ZERO,
459
+ GGML_OP_SOFT_MAX,
460
+ GGML_OP_SOFT_MAX_BACK,
461
+ GGML_OP_ROPE,
462
+ GGML_OP_ROPE_BACK,
463
+ GGML_OP_ALIBI,
464
+ GGML_OP_CLAMP,
465
+ GGML_OP_CONV_TRANSPOSE_1D,
466
+ GGML_OP_IM2COL,
467
+ GGML_OP_CONV_TRANSPOSE_2D,
468
+ GGML_OP_POOL_1D,
469
+ GGML_OP_POOL_2D,
470
+ GGML_OP_UPSCALE, // nearest interpolate
471
+ GGML_OP_PAD,
472
+ GGML_OP_ARANGE,
473
+ GGML_OP_TIMESTEP_EMBEDDING,
474
+ GGML_OP_ARGSORT,
475
+ GGML_OP_LEAKY_RELU,
476
+
477
+ GGML_OP_FLASH_ATTN,
478
+ GGML_OP_FLASH_FF,
479
+ GGML_OP_FLASH_ATTN_BACK,
480
+ GGML_OP_SSM_CONV,
481
+ GGML_OP_SSM_SCAN,
482
+ GGML_OP_WIN_PART,
483
+ GGML_OP_WIN_UNPART,
484
+ GGML_OP_GET_REL_POS,
485
+ GGML_OP_ADD_REL_POS,
486
+
487
+ GGML_OP_UNARY,
488
+
489
+ GGML_OP_MAP_UNARY,
490
+ GGML_OP_MAP_BINARY,
491
+
492
+ GGML_OP_MAP_CUSTOM1_F32,
493
+ GGML_OP_MAP_CUSTOM2_F32,
494
+ GGML_OP_MAP_CUSTOM3_F32,
495
+
496
+ GGML_OP_MAP_CUSTOM1,
497
+ GGML_OP_MAP_CUSTOM2,
498
+ GGML_OP_MAP_CUSTOM3,
499
+
500
+ GGML_OP_CROSS_ENTROPY_LOSS,
501
+ GGML_OP_CROSS_ENTROPY_LOSS_BACK,
502
+
503
+ GGML_OP_COUNT,
504
+ };
505
+
506
+ enum ggml_unary_op {
507
+ GGML_UNARY_OP_ABS,
508
+ GGML_UNARY_OP_SGN,
509
+ GGML_UNARY_OP_NEG,
510
+ GGML_UNARY_OP_STEP,
511
+ GGML_UNARY_OP_TANH,
512
+ GGML_UNARY_OP_ELU,
513
+ GGML_UNARY_OP_RELU,
514
+ GGML_UNARY_OP_GELU,
515
+ GGML_UNARY_OP_GELU_QUICK,
516
+ GGML_UNARY_OP_SILU,
517
+ GGML_UNARY_OP_HARDSWISH,
518
+ GGML_UNARY_OP_HARDSIGMOID,
519
+
520
+ GGML_UNARY_OP_COUNT,
521
+ };
522
+
523
+ enum ggml_object_type {
524
+ GGML_OBJECT_TYPE_TENSOR,
525
+ GGML_OBJECT_TYPE_GRAPH,
526
+ GGML_OBJECT_TYPE_WORK_BUFFER
527
+ };
528
+
529
+ enum ggml_log_level {
530
+ GGML_LOG_LEVEL_ERROR = 2,
531
+ GGML_LOG_LEVEL_WARN = 3,
532
+ GGML_LOG_LEVEL_INFO = 4,
533
+ GGML_LOG_LEVEL_DEBUG = 5
534
+ };
535
+
536
+ enum ggml_tensor_flag {
537
+ GGML_TENSOR_FLAG_INPUT = 1,
538
+ GGML_TENSOR_FLAG_OUTPUT = 2,
539
+ GGML_TENSOR_FLAG_PARAM = 4,
540
+ };
541
+
542
+ // ggml object
543
+ struct ggml_object {
544
+ size_t offs;
545
+ size_t size;
546
+
547
+ struct ggml_object * next;
548
+
549
+ enum ggml_object_type type;
550
+
551
+ char padding[4];
552
+ };
553
+
554
+ static const size_t GGML_OBJECT_SIZE = sizeof(struct ggml_object);
555
+
556
+ // n-dimensional tensor
557
+ struct ggml_tensor {
558
+ enum ggml_type type;
559
+ enum ggml_backend_type backend;
560
+
561
+ struct ggml_backend_buffer * buffer;
562
+
563
+ int64_t ne[GGML_MAX_DIMS]; // number of elements
564
+ size_t nb[GGML_MAX_DIMS]; // stride in bytes:
565
+ // nb[0] = ggml_type_size(type)
566
+ // nb[1] = nb[0] * (ne[0] / ggml_blck_size(type)) + padding
567
+ // nb[i] = nb[i-1] * ne[i-1]
568
+
569
+ // compute data
570
+ enum ggml_op op;
571
+
572
+ // op params - allocated as int32_t for alignment
573
+ int32_t op_params[GGML_MAX_OP_PARAMS / sizeof(int32_t)];
574
+
575
+ int32_t flags;
576
+
577
+ struct ggml_tensor * grad;
578
+ struct ggml_tensor * src[GGML_MAX_SRC];
579
+
580
+ // performance
581
+ int perf_runs;
582
+ int64_t perf_cycles;
583
+ int64_t perf_time_us;
389
584
 
390
- //
391
- // operations on tensors with backpropagation
392
- //
585
+ struct ggml_tensor * view_src;
586
+ size_t view_offs;
587
+
588
+ void * data;
589
+
590
+ char name[GGML_MAX_NAME];
591
+
592
+ void * extra; // extra things e.g. for ggml-cuda.cu
593
+
594
+ char padding[8];
595
+ };
596
+
597
+ static const size_t GGML_TENSOR_SIZE = sizeof(struct ggml_tensor);
598
+
599
+ // Abort callback
600
+ // If not NULL, called before ggml computation
601
+ // If it returns true, the computation is aborted
602
+ typedef bool (*ggml_abort_callback)(void * data);
603
+
604
+ // the compute plan that needs to be prepared for ggml_graph_compute()
605
+ // since https://github.com/ggerganov/ggml/issues/287
606
+ struct ggml_cplan {
607
+ size_t work_size; // size of work buffer, calculated by `ggml_graph_plan()`
608
+ uint8_t * work_data; // work buffer, to be allocated by caller before calling to `ggml_graph_compute()`
609
+
610
+ int n_threads;
611
+
612
+ // abort ggml_graph_compute when true
613
+ ggml_abort_callback abort_callback;
614
+ void * abort_callback_data;
615
+ };
393
616
 
394
- struct ggml_tensor * ggml_dup(
395
- struct ggml_context * ctx,
396
- struct ggml_tensor * a);
617
+ enum ggml_cgraph_eval_order {
618
+ GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT = 0,
619
+ GGML_CGRAPH_EVAL_ORDER_RIGHT_TO_LEFT,
620
+ GGML_CGRAPH_EVAL_ORDER_COUNT
621
+ };
397
622
 
398
- struct ggml_tensor * ggml_add(
399
- struct ggml_context * ctx,
400
- struct ggml_tensor * a,
401
- struct ggml_tensor * b);
623
+ struct ggml_hash_set {
624
+ size_t size;
625
+ struct ggml_tensor ** keys;
626
+ };
402
627
 
403
- struct ggml_tensor * ggml_sub(
404
- struct ggml_context * ctx,
405
- struct ggml_tensor * a,
406
- struct ggml_tensor * b);
628
+ // computation graph
629
+ struct ggml_cgraph {
630
+ int size;
631
+ int n_nodes;
632
+ int n_leafs;
407
633
 
408
- struct ggml_tensor * ggml_mul(
409
- struct ggml_context * ctx,
410
- struct ggml_tensor * a,
411
- struct ggml_tensor * b);
634
+ struct ggml_tensor ** nodes;
635
+ struct ggml_tensor ** grads;
636
+ struct ggml_tensor ** leafs;
412
637
 
413
- struct ggml_tensor * ggml_div(
414
- struct ggml_context * ctx,
415
- struct ggml_tensor * a,
416
- struct ggml_tensor * b);
638
+ struct ggml_hash_set visited_hash_table;
417
639
 
418
- struct ggml_tensor * ggml_sqr(
419
- struct ggml_context * ctx,
420
- struct ggml_tensor * a);
640
+ enum ggml_cgraph_eval_order order;
421
641
 
422
- struct ggml_tensor * ggml_sqrt(
423
- struct ggml_context * ctx,
424
- struct ggml_tensor * a);
642
+ // performance
643
+ int perf_runs;
644
+ int64_t perf_cycles;
645
+ int64_t perf_time_us;
646
+ };
425
647
 
426
- // return scalar
427
- // TODO: compute sum along rows
428
- struct ggml_tensor * ggml_sum(
429
- struct ggml_context * ctx,
430
- struct ggml_tensor * a);
648
+ // scratch buffer
649
+ struct ggml_scratch {
650
+ size_t offs;
651
+ size_t size;
652
+ void * data;
653
+ };
431
654
 
432
- // mean along rows
433
- struct ggml_tensor * ggml_mean(
434
- struct ggml_context * ctx,
435
- struct ggml_tensor * a);
655
+ struct ggml_init_params {
656
+ // memory pool
657
+ size_t mem_size; // bytes
658
+ void * mem_buffer; // if NULL, memory will be allocated internally
659
+ bool no_alloc; // don't allocate memory for the tensor data
660
+ };
436
661
 
437
- // if a is the same shape as b, and a is not parameter, return a
438
- // otherwise, return a new tensor: repeat(a) to fit in b
439
- struct ggml_tensor * ggml_repeat(
440
- struct ggml_context * ctx,
441
- struct ggml_tensor * a,
442
- struct ggml_tensor * b);
443
662
 
444
- struct ggml_tensor * ggml_abs(
445
- struct ggml_context * ctx,
446
- struct ggml_tensor * a);
663
+ // compute types
447
664
 
448
- struct ggml_tensor * ggml_sgn(
449
- struct ggml_context * ctx,
450
- struct ggml_tensor * a);
665
+ // NOTE: the INIT or FINALIZE pass is not scheduled unless explicitly enabled.
666
+ // This behavior was changed since https://github.com/ggerganov/llama.cpp/pull/1995.
667
+ enum ggml_task_type {
668
+ GGML_TASK_TYPE_INIT = 0,
669
+ GGML_TASK_TYPE_COMPUTE,
670
+ GGML_TASK_TYPE_FINALIZE,
671
+ };
451
672
 
452
- struct ggml_tensor * ggml_neg(
453
- struct ggml_context * ctx,
454
- struct ggml_tensor * a);
673
+ struct ggml_compute_params {
674
+ enum ggml_task_type type;
455
675
 
456
- struct ggml_tensor * ggml_step(
457
- struct ggml_context * ctx,
458
- struct ggml_tensor * a);
676
+ // ith = thread index, nth = number of threads
677
+ int ith, nth;
678
+
679
+ // work buffer for all threads
680
+ size_t wsize;
681
+ void * wdata;
682
+ };
459
683
 
460
- struct ggml_tensor * ggml_relu(
461
- struct ggml_context * ctx,
462
- struct ggml_tensor * a);
684
+ // numa strategies
685
+ enum ggml_numa_strategy {
686
+ GGML_NUMA_STRATEGY_DISABLED = 0,
687
+ GGML_NUMA_STRATEGY_DISTRIBUTE = 1,
688
+ GGML_NUMA_STRATEGY_ISOLATE = 2,
689
+ GGML_NUMA_STRATEGY_NUMACTL = 3,
690
+ GGML_NUMA_STRATEGY_MIRROR = 4,
691
+ GGML_NUMA_STRATEGY_COUNT
692
+ };
463
693
 
464
- // TODO: double-check this computation is correct
465
- struct ggml_tensor * ggml_gelu(
466
- struct ggml_context * ctx,
467
- struct ggml_tensor * a);
694
+ //
695
+ // GUID
696
+ //
468
697
 
469
- // normalize along rows
470
- // TODO: eps is hardcoded to 1e-5 for now
471
- struct ggml_tensor * ggml_norm(
472
- struct ggml_context * ctx,
473
- struct ggml_tensor * a);
698
+ // GUID types
699
+ typedef uint8_t ggml_guid[16];
700
+ typedef ggml_guid * ggml_guid_t;
474
701
 
475
- // A: m rows, n columns
476
- // B: p rows, n columns (i.e. we transpose it internally)
477
- // result is m columns, p rows
478
- struct ggml_tensor * ggml_mul_mat(
479
- struct ggml_context * ctx,
480
- struct ggml_tensor * a,
481
- struct ggml_tensor * b);
702
+ GGML_API bool ggml_guid_matches(ggml_guid_t guid_a, ggml_guid_t guid_b);
482
703
 
483
- //
484
- // operations on tensors without backpropagation
485
- //
704
+ // misc
486
705
 
487
- // in-place, returns view(a)
488
- struct ggml_tensor * ggml_scale(
489
- struct ggml_context * ctx,
490
- struct ggml_tensor * a,
491
- struct ggml_tensor * b);
706
+ GGML_API void ggml_time_init(void); // call this once at the beginning of the program
707
+ GGML_API int64_t ggml_time_ms(void);
708
+ GGML_API int64_t ggml_time_us(void);
709
+ GGML_API int64_t ggml_cycles(void);
710
+ GGML_API int64_t ggml_cycles_per_ms(void);
492
711
 
493
- // a -> b, return view(b)
494
- struct ggml_tensor * ggml_cpy(
495
- struct ggml_context * ctx,
496
- struct ggml_tensor * a,
497
- struct ggml_tensor * b);
712
+ GGML_API void ggml_print_backtrace(void);
498
713
 
499
- // return view(a), b specifies the new shape
500
- // TODO: when we start computing gradient, make a copy instead of view
501
- struct ggml_tensor * ggml_reshape(
502
- struct ggml_context * ctx,
503
- struct ggml_tensor * a,
504
- struct ggml_tensor * b);
714
+ // accepts a UTF-8 path, even on Windows
715
+ GGML_API FILE * ggml_fopen(const char * fname, const char * mode);
505
716
 
506
- // return view(a)
507
- // TODO: when we start computing gradient, make a copy instead of view
508
- struct ggml_tensor * ggml_reshape_2d(
509
- struct ggml_context * ctx,
510
- struct ggml_tensor * a,
511
- int ne0,
512
- int ne1);
717
+ GGML_API void ggml_numa_init(enum ggml_numa_strategy numa); // call once for better performance on NUMA systems
718
+ GGML_API bool ggml_is_numa(void); // true if init detected that system has >1 NUMA node
513
719
 
514
- // return view(a)
515
- // TODO: when we start computing gradient, make a copy instead of view
516
- struct ggml_tensor * ggml_reshape_3d(
517
- struct ggml_context * ctx,
518
- struct ggml_tensor * a,
519
- int ne0,
520
- int ne1,
521
- int ne2);
720
+ GGML_API void ggml_print_object (const struct ggml_object * obj);
721
+ GGML_API void ggml_print_objects(const struct ggml_context * ctx);
522
722
 
523
- // offset in bytes
524
- struct ggml_tensor * ggml_view_1d(
525
- struct ggml_context * ctx,
526
- struct ggml_tensor * a,
527
- int ne0,
528
- size_t offset);
723
+ GGML_API GGML_CALL int64_t ggml_nelements (const struct ggml_tensor * tensor);
724
+ GGML_API GGML_CALL int64_t ggml_nrows (const struct ggml_tensor * tensor);
725
+ GGML_API GGML_CALL size_t ggml_nbytes (const struct ggml_tensor * tensor);
726
+ GGML_API size_t ggml_nbytes_pad (const struct ggml_tensor * tensor); // same as ggml_nbytes() but padded to GGML_MEM_ALIGN
529
727
 
530
- struct ggml_tensor * ggml_view_2d(
531
- struct ggml_context * ctx,
532
- struct ggml_tensor * a,
533
- int ne0,
534
- int ne1,
535
- size_t nb1, // row stride in bytes
536
- size_t offset);
728
+ GGML_API GGML_CALL int ggml_blck_size(enum ggml_type type);
729
+ GGML_API GGML_CALL size_t ggml_type_size(enum ggml_type type); // size in bytes for all elements in a block
730
+ GGML_API GGML_CALL size_t ggml_row_size (enum ggml_type type, int64_t ne); // size in bytes for all elements in a row
537
731
 
538
- struct ggml_tensor * ggml_permute(
539
- struct ggml_context * ctx,
540
- struct ggml_tensor * a,
541
- int axis0,
542
- int axis1,
543
- int axis2,
544
- int axis3);
732
+ GGML_DEPRECATED(
733
+ GGML_API double ggml_type_sizef(enum ggml_type type), // ggml_type_size()/ggml_blck_size() as float
734
+ "use ggml_row_size() instead");
545
735
 
546
- // alias for ggml_permute(ctx, a, 1, 0, 2, 3)
547
- struct ggml_tensor * ggml_transpose(
548
- struct ggml_context * ctx,
549
- struct ggml_tensor * a);
736
+ GGML_API GGML_CALL const char * ggml_type_name(enum ggml_type type);
737
+ GGML_API GGML_CALL const char * ggml_op_name (enum ggml_op op);
738
+ GGML_API const char * ggml_op_symbol(enum ggml_op op);
550
739
 
551
- struct ggml_tensor * ggml_get_rows(
552
- struct ggml_context * ctx,
553
- struct ggml_tensor * a,
554
- struct ggml_tensor * b);
740
+ GGML_API const char * ggml_unary_op_name(enum ggml_unary_op op);
741
+ GGML_API GGML_CALL const char * ggml_op_desc(const struct ggml_tensor * t); // unary or op name
555
742
 
556
- // set elements above the diagonal to -INF
557
- // in-place, returns view(a)
558
- struct ggml_tensor * ggml_diag_mask_inf(
559
- struct ggml_context * ctx,
560
- struct ggml_tensor * a,
561
- int n_past);
743
+ GGML_API GGML_CALL size_t ggml_element_size(const struct ggml_tensor * tensor);
562
744
 
563
- // in-place, returns view(a)
564
- struct ggml_tensor * ggml_soft_max(
565
- struct ggml_context * ctx,
566
- struct ggml_tensor * a);
745
+ GGML_API GGML_CALL bool ggml_is_quantized(enum ggml_type type);
567
746
 
568
- // rotary position embedding
569
- // in-place, returns view(a)
570
- // if mode == 1, skip n_past elements
571
- // TODO: avoid creating a new tensor every time
572
- struct ggml_tensor * ggml_rope(
573
- struct ggml_context * ctx,
574
- struct ggml_tensor * a,
575
- int n_past,
576
- int n_dims,
577
- int mode);
578
-
579
- // padding = 1
580
- // TODO: we don't support extra parameters for now
581
- // that's why we are hard-coding the stride, padding, and dilation
582
- // not great ..
583
- struct ggml_tensor * ggml_conv_1d_1s(
584
- struct ggml_context * ctx,
585
- struct ggml_tensor * a,
586
- struct ggml_tensor * b);
747
+ // TODO: temporary until model loading of ggml examples is refactored
748
+ GGML_API enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype);
587
749
 
588
- struct ggml_tensor * ggml_conv_1d_2s(
589
- struct ggml_context * ctx,
590
- struct ggml_tensor * a,
591
- struct ggml_tensor * b);
750
+ GGML_API GGML_CALL bool ggml_is_transposed(const struct ggml_tensor * tensor);
751
+ GGML_API GGML_CALL bool ggml_is_contiguous(const struct ggml_tensor * tensor);
752
+ GGML_API GGML_CALL bool ggml_is_permuted (const struct ggml_tensor * tensor);
753
+ GGML_API GGML_CALL bool ggml_is_empty (const struct ggml_tensor * tensor);
754
+ GGML_API bool ggml_is_scalar (const struct ggml_tensor * tensor);
755
+ GGML_API bool ggml_is_vector (const struct ggml_tensor * tensor);
756
+ GGML_API bool ggml_is_matrix (const struct ggml_tensor * tensor);
757
+ GGML_API bool ggml_is_3d (const struct ggml_tensor * tensor);
758
+ GGML_API int ggml_n_dims (const struct ggml_tensor * tensor); // returns 1 for scalars
592
759
 
593
- struct ggml_tensor * ggml_flash_attn(
594
- struct ggml_context * ctx,
595
- struct ggml_tensor * q,
596
- struct ggml_tensor * k,
597
- struct ggml_tensor * v,
598
- bool masked);
760
+ GGML_API bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
599
761
 
600
- struct ggml_tensor * ggml_flash_ff(
601
- struct ggml_context * ctx,
602
- struct ggml_tensor * a,
603
- struct ggml_tensor * b0,
604
- struct ggml_tensor * b1,
605
- struct ggml_tensor * c0,
606
- struct ggml_tensor * c1);
762
+ // use this to compute the memory overhead of a tensor
763
+ GGML_API size_t ggml_tensor_overhead(void);
607
764
 
608
- //
609
- // automatic differentiation
610
- //
765
+ // main
611
766
 
612
- void ggml_set_param(
613
- struct ggml_context * ctx,
614
- struct ggml_tensor * tensor);
767
+ GGML_API struct ggml_context * ggml_init(struct ggml_init_params params);
768
+ GGML_API void ggml_free(struct ggml_context * ctx);
615
769
 
616
- void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
770
+ GGML_API size_t ggml_used_mem(const struct ggml_context * ctx);
617
771
 
618
- struct ggml_cgraph ggml_build_forward (struct ggml_tensor * tensor);
619
- struct ggml_cgraph ggml_build_backward(struct ggml_context * ctx, struct ggml_cgraph * gf, bool keep);
772
+ GGML_API size_t ggml_set_scratch (struct ggml_context * ctx, struct ggml_scratch scratch);
773
+ GGML_API bool ggml_get_no_alloc(struct ggml_context * ctx);
774
+ GGML_API void ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc);
620
775
 
621
- void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph);
622
- void ggml_graph_reset (struct ggml_cgraph * cgraph);
776
+ GGML_API void * ggml_get_mem_buffer (const struct ggml_context * ctx);
777
+ GGML_API size_t ggml_get_mem_size (const struct ggml_context * ctx);
778
+ GGML_API size_t ggml_get_max_tensor_size(const struct ggml_context * ctx);
623
779
 
624
- // print info and performance information for the graph
625
- void ggml_graph_print(const struct ggml_cgraph * cgraph);
780
+ GGML_API struct ggml_tensor * ggml_new_tensor(
781
+ struct ggml_context * ctx,
782
+ enum ggml_type type,
783
+ int n_dims,
784
+ const int64_t *ne);
626
785
 
627
- // dump the graph into a file using the dot format
628
- void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph * gf, const char * filename);
786
+ GGML_API struct ggml_tensor * ggml_new_tensor_1d(
787
+ struct ggml_context * ctx,
788
+ enum ggml_type type,
789
+ int64_t ne0);
629
790
 
630
- //
631
- // optimization
632
- //
791
+ GGML_API struct ggml_tensor * ggml_new_tensor_2d(
792
+ struct ggml_context * ctx,
793
+ enum ggml_type type,
794
+ int64_t ne0,
795
+ int64_t ne1);
633
796
 
634
- // optimization methods
635
- enum ggml_opt_type {
636
- GGML_OPT_ADAM,
637
- GGML_OPT_LBFGS,
638
- };
797
+ GGML_API struct ggml_tensor * ggml_new_tensor_3d(
798
+ struct ggml_context * ctx,
799
+ enum ggml_type type,
800
+ int64_t ne0,
801
+ int64_t ne1,
802
+ int64_t ne2);
639
803
 
640
- // linesearch methods
641
- enum ggml_linesearch {
642
- GGML_LINESEARCH_DEFAULT = 1,
804
+ GGML_API struct ggml_tensor * ggml_new_tensor_4d(
805
+ struct ggml_context * ctx,
806
+ enum ggml_type type,
807
+ int64_t ne0,
808
+ int64_t ne1,
809
+ int64_t ne2,
810
+ int64_t ne3);
643
811
 
644
- GGML_LINESEARCH_BACKTRACKING_ARMIJO = 0,
645
- GGML_LINESEARCH_BACKTRACKING_WOLFE = 1,
646
- GGML_LINESEARCH_BACKTRACKING_STRONG_WOLFE = 2,
647
- };
812
+ GGML_API struct ggml_tensor * ggml_new_i32(struct ggml_context * ctx, int32_t value);
813
+ GGML_API struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value);
648
814
 
649
- // optimization return values
650
- enum ggml_opt_result {
651
- GGML_OPT_OK = 0,
652
- GGML_OPT_DID_NOT_CONVERGE,
653
- GGML_OPT_NO_CONTEXT,
654
- GGML_OPT_INVALID_WOLFE,
655
- GGML_OPT_FAIL,
815
+ GGML_API struct ggml_tensor * ggml_dup_tensor (struct ggml_context * ctx, const struct ggml_tensor * src);
816
+ GGML_API struct ggml_tensor * ggml_view_tensor(struct ggml_context * ctx, struct ggml_tensor * src);
656
817
 
657
- GGML_LINESEARCH_FAIL = -128,
658
- GGML_LINESEARCH_MINIMUM_STEP,
659
- GGML_LINESEARCH_MAXIMUM_STEP,
660
- GGML_LINESEARCH_MAXIMUM_ITERATIONS,
661
- GGML_LINESEARCH_INVALID_PARAMETERS,
662
- };
818
+ // Context tensor enumeration and lookup
819
+ GGML_API struct ggml_tensor * ggml_get_first_tensor(const struct ggml_context * ctx);
820
+ GGML_API struct ggml_tensor * ggml_get_next_tensor (const struct ggml_context * ctx, struct ggml_tensor * tensor);
821
+ GGML_API struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name);
663
822
 
664
- // optimization parameters
665
- //
666
- // see ggml.c (ggml_opt_default_params) for default values
667
- //
668
- struct ggml_opt_params {
669
- enum ggml_opt_type type;
823
+ GGML_API struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor);
824
+ GGML_API struct ggml_tensor * ggml_set_i32 (struct ggml_tensor * tensor, int32_t value);
825
+ GGML_API struct ggml_tensor * ggml_set_f32 (struct ggml_tensor * tensor, float value);
826
+
827
+ // Converts a flat index into coordinates
828
+ GGML_API void ggml_unravel_index(const struct ggml_tensor * tensor, int64_t i, int64_t * i0, int64_t * i1, int64_t * i2, int64_t * i3);
829
+
830
+ GGML_API int32_t ggml_get_i32_1d(const struct ggml_tensor * tensor, int i);
831
+ GGML_API void ggml_set_i32_1d(const struct ggml_tensor * tensor, int i, int32_t value);
832
+
833
+ GGML_API int32_t ggml_get_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3);
834
+ GGML_API void ggml_set_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, int32_t value);
835
+
836
+ GGML_API float ggml_get_f32_1d(const struct ggml_tensor * tensor, int i);
837
+ GGML_API void ggml_set_f32_1d(const struct ggml_tensor * tensor, int i, float value);
838
+
839
+ GGML_API float ggml_get_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3);
840
+ GGML_API void ggml_set_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, float value);
841
+
842
+ GGML_API void * ggml_get_data (const struct ggml_tensor * tensor);
843
+ GGML_API float * ggml_get_data_f32(const struct ggml_tensor * tensor);
670
844
 
671
- int n_threads;
845
+ GGML_API GGML_CALL enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor);
846
+
847
+ GGML_API const char * ggml_get_name (const struct ggml_tensor * tensor);
848
+ GGML_API struct ggml_tensor * ggml_set_name ( struct ggml_tensor * tensor, const char * name);
849
+ GGML_ATTRIBUTE_FORMAT(2, 3)
850
+ GGML_API struct ggml_tensor * ggml_format_name( struct ggml_tensor * tensor, const char * fmt, ...);
672
851
 
673
- // delta-based convergence test
674
852
  //
675
- // if past == 0 - disabled
676
- // if past > 0:
677
- // stop if |f(x) - f(x_past)| < delta * max(1, |f(x)|)
853
+ // operations on tensors with backpropagation
678
854
  //
679
- int past;
680
- float delta;
681
855
 
682
- // maximum number of iterations without improvement
856
+ GGML_API struct ggml_tensor * ggml_dup(
857
+ struct ggml_context * ctx,
858
+ struct ggml_tensor * a);
859
+
860
+ // in-place, returns view(a)
861
+ GGML_API struct ggml_tensor * ggml_dup_inplace(
862
+ struct ggml_context * ctx,
863
+ struct ggml_tensor * a);
864
+
865
+ GGML_API struct ggml_tensor * ggml_add(
866
+ struct ggml_context * ctx,
867
+ struct ggml_tensor * a,
868
+ struct ggml_tensor * b);
869
+
870
+ GGML_API struct ggml_tensor * ggml_add_inplace(
871
+ struct ggml_context * ctx,
872
+ struct ggml_tensor * a,
873
+ struct ggml_tensor * b);
874
+
875
+ GGML_API struct ggml_tensor * ggml_add_cast(
876
+ struct ggml_context * ctx,
877
+ struct ggml_tensor * a,
878
+ struct ggml_tensor * b,
879
+ enum ggml_type type);
880
+
881
+ GGML_API struct ggml_tensor * ggml_add1(
882
+ struct ggml_context * ctx,
883
+ struct ggml_tensor * a,
884
+ struct ggml_tensor * b);
885
+
886
+ GGML_API struct ggml_tensor * ggml_add1_inplace(
887
+ struct ggml_context * ctx,
888
+ struct ggml_tensor * a,
889
+ struct ggml_tensor * b);
890
+
891
+ // dst = a
892
+ // view(dst, nb1, nb2, nb3, offset) += b
893
+ // return dst
894
+ GGML_API struct ggml_tensor * ggml_acc(
895
+ struct ggml_context * ctx,
896
+ struct ggml_tensor * a,
897
+ struct ggml_tensor * b,
898
+ size_t nb1,
899
+ size_t nb2,
900
+ size_t nb3,
901
+ size_t offset);
902
+
903
+ GGML_API struct ggml_tensor * ggml_acc_inplace(
904
+ struct ggml_context * ctx,
905
+ struct ggml_tensor * a,
906
+ struct ggml_tensor * b,
907
+ size_t nb1,
908
+ size_t nb2,
909
+ size_t nb3,
910
+ size_t offset);
911
+
912
+ GGML_API struct ggml_tensor * ggml_sub(
913
+ struct ggml_context * ctx,
914
+ struct ggml_tensor * a,
915
+ struct ggml_tensor * b);
916
+
917
+ GGML_API struct ggml_tensor * ggml_sub_inplace(
918
+ struct ggml_context * ctx,
919
+ struct ggml_tensor * a,
920
+ struct ggml_tensor * b);
921
+
922
+ GGML_API struct ggml_tensor * ggml_mul(
923
+ struct ggml_context * ctx,
924
+ struct ggml_tensor * a,
925
+ struct ggml_tensor * b);
926
+
927
+ GGML_API struct ggml_tensor * ggml_mul_inplace(
928
+ struct ggml_context * ctx,
929
+ struct ggml_tensor * a,
930
+ struct ggml_tensor * b);
931
+
932
+ GGML_API struct ggml_tensor * ggml_div(
933
+ struct ggml_context * ctx,
934
+ struct ggml_tensor * a,
935
+ struct ggml_tensor * b);
936
+
937
+ GGML_API struct ggml_tensor * ggml_div_inplace(
938
+ struct ggml_context * ctx,
939
+ struct ggml_tensor * a,
940
+ struct ggml_tensor * b);
941
+
942
+ GGML_API struct ggml_tensor * ggml_sqr(
943
+ struct ggml_context * ctx,
944
+ struct ggml_tensor * a);
945
+
946
+ GGML_API struct ggml_tensor * ggml_sqr_inplace(
947
+ struct ggml_context * ctx,
948
+ struct ggml_tensor * a);
949
+
950
+ GGML_API struct ggml_tensor * ggml_sqrt(
951
+ struct ggml_context * ctx,
952
+ struct ggml_tensor * a);
953
+
954
+ GGML_API struct ggml_tensor * ggml_sqrt_inplace(
955
+ struct ggml_context * ctx,
956
+ struct ggml_tensor * a);
957
+
958
+ GGML_API struct ggml_tensor * ggml_log(
959
+ struct ggml_context * ctx,
960
+ struct ggml_tensor * a);
961
+
962
+ GGML_API struct ggml_tensor * ggml_log_inplace(
963
+ struct ggml_context * ctx,
964
+ struct ggml_tensor * a);
965
+
966
+ // return scalar
967
+ GGML_API struct ggml_tensor * ggml_sum(
968
+ struct ggml_context * ctx,
969
+ struct ggml_tensor * a);
970
+
971
+ // sums along rows, with input shape [a,b,c,d] return shape [1,b,c,d]
972
+ GGML_API struct ggml_tensor * ggml_sum_rows(
973
+ struct ggml_context * ctx,
974
+ struct ggml_tensor * a);
975
+
976
+ // mean along rows
977
+ GGML_API struct ggml_tensor * ggml_mean(
978
+ struct ggml_context * ctx,
979
+ struct ggml_tensor * a);
980
+
981
+ // argmax along rows
982
+ GGML_API struct ggml_tensor * ggml_argmax(
983
+ struct ggml_context * ctx,
984
+ struct ggml_tensor * a);
985
+
986
+ // if a is the same shape as b, and a is not parameter, return a
987
+ // otherwise, return a new tensor: repeat(a) to fit in b
988
+ GGML_API struct ggml_tensor * ggml_repeat(
989
+ struct ggml_context * ctx,
990
+ struct ggml_tensor * a,
991
+ struct ggml_tensor * b);
992
+
993
+ // sums repetitions in a into shape of b
994
+ GGML_API struct ggml_tensor * ggml_repeat_back(
995
+ struct ggml_context * ctx,
996
+ struct ggml_tensor * a,
997
+ struct ggml_tensor * b);
998
+
999
+ // concat a and b on dim 2
1000
+ // used in stable-diffusion
1001
+ GGML_API struct ggml_tensor * ggml_concat(
1002
+ struct ggml_context * ctx,
1003
+ struct ggml_tensor * a,
1004
+ struct ggml_tensor * b);
1005
+
1006
+ GGML_API struct ggml_tensor * ggml_abs(
1007
+ struct ggml_context * ctx,
1008
+ struct ggml_tensor * a);
1009
+
1010
+ GGML_API struct ggml_tensor * ggml_abs_inplace(
1011
+ struct ggml_context * ctx,
1012
+ struct ggml_tensor * a);
1013
+
1014
+ GGML_API struct ggml_tensor * ggml_sgn(
1015
+ struct ggml_context * ctx,
1016
+ struct ggml_tensor * a);
1017
+
1018
+ GGML_API struct ggml_tensor * ggml_sgn_inplace(
1019
+ struct ggml_context * ctx,
1020
+ struct ggml_tensor * a);
1021
+
1022
+ GGML_API struct ggml_tensor * ggml_neg(
1023
+ struct ggml_context * ctx,
1024
+ struct ggml_tensor * a);
1025
+
1026
+ GGML_API struct ggml_tensor * ggml_neg_inplace(
1027
+ struct ggml_context * ctx,
1028
+ struct ggml_tensor * a);
1029
+
1030
+ GGML_API struct ggml_tensor * ggml_step(
1031
+ struct ggml_context * ctx,
1032
+ struct ggml_tensor * a);
1033
+
1034
+ GGML_API struct ggml_tensor * ggml_step_inplace(
1035
+ struct ggml_context * ctx,
1036
+ struct ggml_tensor * a);
1037
+
1038
+ GGML_API struct ggml_tensor * ggml_tanh(
1039
+ struct ggml_context * ctx,
1040
+ struct ggml_tensor * a);
1041
+
1042
+ GGML_API struct ggml_tensor * ggml_tanh_inplace(
1043
+ struct ggml_context * ctx,
1044
+ struct ggml_tensor * a);
1045
+
1046
+ GGML_API struct ggml_tensor * ggml_elu(
1047
+ struct ggml_context * ctx,
1048
+ struct ggml_tensor * a);
1049
+
1050
+ GGML_API struct ggml_tensor * ggml_elu_inplace(
1051
+ struct ggml_context * ctx,
1052
+ struct ggml_tensor * a);
1053
+
1054
+ GGML_API struct ggml_tensor * ggml_relu(
1055
+ struct ggml_context * ctx,
1056
+ struct ggml_tensor * a);
1057
+
1058
+ GGML_API struct ggml_tensor * ggml_leaky_relu(
1059
+ struct ggml_context * ctx,
1060
+ struct ggml_tensor * a, float negative_slope, bool inplace);
1061
+
1062
+ GGML_API struct ggml_tensor * ggml_relu_inplace(
1063
+ struct ggml_context * ctx,
1064
+ struct ggml_tensor * a);
1065
+
1066
+ GGML_API struct ggml_tensor * ggml_gelu(
1067
+ struct ggml_context * ctx,
1068
+ struct ggml_tensor * a);
1069
+
1070
+ GGML_API struct ggml_tensor * ggml_gelu_inplace(
1071
+ struct ggml_context * ctx,
1072
+ struct ggml_tensor * a);
1073
+
1074
+ GGML_API struct ggml_tensor * ggml_gelu_quick(
1075
+ struct ggml_context * ctx,
1076
+ struct ggml_tensor * a);
1077
+
1078
+ GGML_API struct ggml_tensor * ggml_gelu_quick_inplace(
1079
+ struct ggml_context * ctx,
1080
+ struct ggml_tensor * a);
1081
+
1082
+ GGML_API struct ggml_tensor * ggml_silu(
1083
+ struct ggml_context * ctx,
1084
+ struct ggml_tensor * a);
1085
+
1086
+ GGML_API struct ggml_tensor * ggml_silu_inplace(
1087
+ struct ggml_context * ctx,
1088
+ struct ggml_tensor * a);
1089
+
1090
+ // a - x
1091
+ // b - dy
1092
+ GGML_API struct ggml_tensor * ggml_silu_back(
1093
+ struct ggml_context * ctx,
1094
+ struct ggml_tensor * a,
1095
+ struct ggml_tensor * b);
1096
+
1097
+ // hardswish(x) = x * relu6(x + 3) / 6
1098
+ GGML_API struct ggml_tensor * ggml_hardswish(
1099
+ struct ggml_context * ctx,
1100
+ struct ggml_tensor * a);
1101
+
1102
+ // hardsigmoid(x) = relu6(x + 3) / 6
1103
+ GGML_API struct ggml_tensor * ggml_hardsigmoid(
1104
+ struct ggml_context * ctx,
1105
+ struct ggml_tensor * a);
1106
+
1107
+ // normalize along rows
1108
+ GGML_API struct ggml_tensor * ggml_norm(
1109
+ struct ggml_context * ctx,
1110
+ struct ggml_tensor * a,
1111
+ float eps);
1112
+
1113
+ GGML_API struct ggml_tensor * ggml_norm_inplace(
1114
+ struct ggml_context * ctx,
1115
+ struct ggml_tensor * a,
1116
+ float eps);
1117
+
1118
+ GGML_API struct ggml_tensor * ggml_rms_norm(
1119
+ struct ggml_context * ctx,
1120
+ struct ggml_tensor * a,
1121
+ float eps);
1122
+
1123
+ GGML_API struct ggml_tensor * ggml_rms_norm_inplace(
1124
+ struct ggml_context * ctx,
1125
+ struct ggml_tensor * a,
1126
+ float eps);
1127
+
1128
+ // group normalize along ne0*ne1*n_groups
1129
+ // used in stable-diffusion
1130
+ // TODO: eps is hardcoded to 1e-6 for now
1131
+ GGML_API struct ggml_tensor * ggml_group_norm(
1132
+ struct ggml_context * ctx,
1133
+ struct ggml_tensor * a,
1134
+ int n_groups);
1135
+
1136
+ GGML_API struct ggml_tensor * ggml_group_norm_inplace(
1137
+ struct ggml_context * ctx,
1138
+ struct ggml_tensor * a,
1139
+ int n_groups);
1140
+
1141
+ // a - x
1142
+ // b - dy
1143
+ GGML_API struct ggml_tensor * ggml_rms_norm_back(
1144
+ struct ggml_context * ctx,
1145
+ struct ggml_tensor * a,
1146
+ struct ggml_tensor * b,
1147
+ float eps);
1148
+
1149
+ // A: k columns, n rows => [ne03, ne02, n, k]
1150
+ // B: k columns, m rows (i.e. we transpose it internally) => [ne03 * x, ne02 * y, m, k]
1151
+ // result is n columns, m rows => [ne03 * x, ne02 * y, m, n]
1152
+ GGML_API struct ggml_tensor * ggml_mul_mat(
1153
+ struct ggml_context * ctx,
1154
+ struct ggml_tensor * a,
1155
+ struct ggml_tensor * b);
1156
+
1157
+ // change the precision of a matrix multiplication
1158
+ // set to GGML_PREC_F32 for higher precision (useful for phi-2)
1159
+ GGML_API void ggml_mul_mat_set_prec(
1160
+ struct ggml_tensor * a,
1161
+ enum ggml_prec prec);
1162
+
1163
+ // indirect matrix multiplication
1164
+ // ggml_mul_mat_id(ctx, as, ids, id, b) ~= ggml_mul_mat(as[ids[id]], b)
1165
+ GGML_API struct ggml_tensor * ggml_mul_mat_id(
1166
+ struct ggml_context * ctx,
1167
+ struct ggml_tensor * as,
1168
+ struct ggml_tensor * ids,
1169
+ int id,
1170
+ struct ggml_tensor * b);
1171
+
1172
+ // A: m columns, n rows,
1173
+ // B: p columns, n rows,
1174
+ // result is m columns, p rows
1175
+ GGML_API struct ggml_tensor * ggml_out_prod(
1176
+ struct ggml_context * ctx,
1177
+ struct ggml_tensor * a,
1178
+ struct ggml_tensor * b);
1179
+
683
1180
  //
684
- // if 0 - disabled
685
- // if > 0:
686
- // assume convergence if no cost improvement in this number of iterations
1181
+ // operations on tensors without backpropagation
687
1182
  //
688
- int max_no_improvement;
689
-
690
- bool print_forward_graph;
691
- bool print_backward_graph;
692
-
693
- // ADAM parameters
694
- struct {
695
- int n_iter;
696
-
697
- float alpha; // learning rate
698
- float beta1;
699
- float beta2;
700
- float eps; // epsilon for numerical stability
701
- float eps_f; // epsilon for convergence test
702
- float eps_g; // epsilon for convergence test
703
- } adam;
704
-
705
- // LBFGS parameters
706
- struct {
707
- int m; // number of corrections to approximate the inv. Hessian
708
- int n_iter;
709
- int max_linesearch;
710
-
711
- float eps; // convergence tolerance
712
- float ftol; // line search tolerance
713
- float wolfe;
714
- float min_step;
715
- float max_step;
716
-
717
- enum ggml_linesearch linesearch;
718
- } lbfgs;
719
- };
720
-
721
- struct ggml_opt_params ggml_opt_default_params(enum ggml_opt_type type);
722
-
723
- // optimize the function defined by the tensor f
724
- enum ggml_opt_result ggml_opt(
1183
+
1184
+ GGML_API struct ggml_tensor * ggml_scale(
1185
+ struct ggml_context * ctx,
1186
+ struct ggml_tensor * a,
1187
+ float s);
1188
+
1189
+ // in-place, returns view(a)
1190
+ GGML_API struct ggml_tensor * ggml_scale_inplace(
1191
+ struct ggml_context * ctx,
1192
+ struct ggml_tensor * a,
1193
+ float s);
1194
+
1195
+ // b -> view(a,offset,nb1,nb2,3), return modified a
1196
+ GGML_API struct ggml_tensor * ggml_set(
1197
+ struct ggml_context * ctx,
1198
+ struct ggml_tensor * a,
1199
+ struct ggml_tensor * b,
1200
+ size_t nb1,
1201
+ size_t nb2,
1202
+ size_t nb3,
1203
+ size_t offset);
1204
+
1205
+ // b -> view(a,offset,nb1,nb2,3), return view(a)
1206
+ GGML_API struct ggml_tensor * ggml_set_inplace(
1207
+ struct ggml_context * ctx,
1208
+ struct ggml_tensor * a,
1209
+ struct ggml_tensor * b,
1210
+ size_t nb1,
1211
+ size_t nb2,
1212
+ size_t nb3,
1213
+ size_t offset);
1214
+
1215
+ GGML_API struct ggml_tensor * ggml_set_1d(
1216
+ struct ggml_context * ctx,
1217
+ struct ggml_tensor * a,
1218
+ struct ggml_tensor * b,
1219
+ size_t offset);
1220
+
1221
+ GGML_API struct ggml_tensor * ggml_set_1d_inplace(
1222
+ struct ggml_context * ctx,
1223
+ struct ggml_tensor * a,
1224
+ struct ggml_tensor * b,
1225
+ size_t offset);
1226
+
1227
+ // b -> view(a,offset,nb1,nb2,3), return modified a
1228
+ GGML_API struct ggml_tensor * ggml_set_2d(
1229
+ struct ggml_context * ctx,
1230
+ struct ggml_tensor * a,
1231
+ struct ggml_tensor * b,
1232
+ size_t nb1,
1233
+ size_t offset);
1234
+
1235
+ // b -> view(a,offset,nb1,nb2,3), return view(a)
1236
+ GGML_API struct ggml_tensor * ggml_set_2d_inplace(
1237
+ struct ggml_context * ctx,
1238
+ struct ggml_tensor * a,
1239
+ struct ggml_tensor * b,
1240
+ size_t nb1,
1241
+ size_t offset);
1242
+
1243
+ // a -> b, return view(b)
1244
+ GGML_API struct ggml_tensor * ggml_cpy(
1245
+ struct ggml_context * ctx,
1246
+ struct ggml_tensor * a,
1247
+ struct ggml_tensor * b);
1248
+
1249
+ GGML_API struct ggml_tensor * ggml_cast(
1250
+ struct ggml_context * ctx,
1251
+ struct ggml_tensor * a,
1252
+ enum ggml_type type);
1253
+
1254
+ // make contiguous
1255
+ GGML_API struct ggml_tensor * ggml_cont(
1256
+ struct ggml_context * ctx,
1257
+ struct ggml_tensor * a);
1258
+
1259
+ // make contiguous, with new shape
1260
+ GGML_API struct ggml_tensor * ggml_cont_1d(
1261
+ struct ggml_context * ctx,
1262
+ struct ggml_tensor * a,
1263
+ int64_t ne0);
1264
+
1265
+ GGML_API struct ggml_tensor * ggml_cont_2d(
1266
+ struct ggml_context * ctx,
1267
+ struct ggml_tensor * a,
1268
+ int64_t ne0,
1269
+ int64_t ne1);
1270
+
1271
+ GGML_API struct ggml_tensor * ggml_cont_3d(
1272
+ struct ggml_context * ctx,
1273
+ struct ggml_tensor * a,
1274
+ int64_t ne0,
1275
+ int64_t ne1,
1276
+ int64_t ne2);
1277
+
1278
+ GGML_API struct ggml_tensor * ggml_cont_4d(
1279
+ struct ggml_context * ctx,
1280
+ struct ggml_tensor * a,
1281
+ int64_t ne0,
1282
+ int64_t ne1,
1283
+ int64_t ne2,
1284
+ int64_t ne3);
1285
+
1286
+ // return view(a), b specifies the new shape
1287
+ // TODO: when we start computing gradient, make a copy instead of view
1288
+ GGML_API struct ggml_tensor * ggml_reshape(
1289
+ struct ggml_context * ctx,
1290
+ struct ggml_tensor * a,
1291
+ struct ggml_tensor * b);
1292
+
1293
+ // return view(a)
1294
+ // TODO: when we start computing gradient, make a copy instead of view
1295
+ GGML_API struct ggml_tensor * ggml_reshape_1d(
1296
+ struct ggml_context * ctx,
1297
+ struct ggml_tensor * a,
1298
+ int64_t ne0);
1299
+
1300
+ GGML_API struct ggml_tensor * ggml_reshape_2d(
1301
+ struct ggml_context * ctx,
1302
+ struct ggml_tensor * a,
1303
+ int64_t ne0,
1304
+ int64_t ne1);
1305
+
1306
+ // return view(a)
1307
+ // TODO: when we start computing gradient, make a copy instead of view
1308
+ GGML_API struct ggml_tensor * ggml_reshape_3d(
1309
+ struct ggml_context * ctx,
1310
+ struct ggml_tensor * a,
1311
+ int64_t ne0,
1312
+ int64_t ne1,
1313
+ int64_t ne2);
1314
+
1315
+ GGML_API struct ggml_tensor * ggml_reshape_4d(
1316
+ struct ggml_context * ctx,
1317
+ struct ggml_tensor * a,
1318
+ int64_t ne0,
1319
+ int64_t ne1,
1320
+ int64_t ne2,
1321
+ int64_t ne3);
1322
+
1323
+ // offset in bytes
1324
+ GGML_API struct ggml_tensor * ggml_view_1d(
1325
+ struct ggml_context * ctx,
1326
+ struct ggml_tensor * a,
1327
+ int64_t ne0,
1328
+ size_t offset);
1329
+
1330
+ GGML_API struct ggml_tensor * ggml_view_2d(
1331
+ struct ggml_context * ctx,
1332
+ struct ggml_tensor * a,
1333
+ int64_t ne0,
1334
+ int64_t ne1,
1335
+ size_t nb1, // row stride in bytes
1336
+ size_t offset);
1337
+
1338
+ GGML_API struct ggml_tensor * ggml_view_3d(
1339
+ struct ggml_context * ctx,
1340
+ struct ggml_tensor * a,
1341
+ int64_t ne0,
1342
+ int64_t ne1,
1343
+ int64_t ne2,
1344
+ size_t nb1, // row stride in bytes
1345
+ size_t nb2, // slice stride in bytes
1346
+ size_t offset);
1347
+
1348
+ GGML_API struct ggml_tensor * ggml_view_4d(
1349
+ struct ggml_context * ctx,
1350
+ struct ggml_tensor * a,
1351
+ int64_t ne0,
1352
+ int64_t ne1,
1353
+ int64_t ne2,
1354
+ int64_t ne3,
1355
+ size_t nb1, // row stride in bytes
1356
+ size_t nb2, // slice stride in bytes
1357
+ size_t nb3,
1358
+ size_t offset);
1359
+
1360
+ GGML_API struct ggml_tensor * ggml_permute(
1361
+ struct ggml_context * ctx,
1362
+ struct ggml_tensor * a,
1363
+ int axis0,
1364
+ int axis1,
1365
+ int axis2,
1366
+ int axis3);
1367
+
1368
+ // alias for ggml_permute(ctx, a, 1, 0, 2, 3)
1369
+ GGML_API struct ggml_tensor * ggml_transpose(
1370
+ struct ggml_context * ctx,
1371
+ struct ggml_tensor * a);
1372
+
1373
+ // supports 3D: a->ne[2] == b->ne[1]
1374
+ GGML_API struct ggml_tensor * ggml_get_rows(
1375
+ struct ggml_context * ctx,
1376
+ struct ggml_tensor * a,
1377
+ struct ggml_tensor * b);
1378
+
1379
+ GGML_API struct ggml_tensor * ggml_get_rows_back(
1380
+ struct ggml_context * ctx,
1381
+ struct ggml_tensor * a,
1382
+ struct ggml_tensor * b,
1383
+ struct ggml_tensor * c);
1384
+
1385
+ GGML_API struct ggml_tensor * ggml_diag(
1386
+ struct ggml_context * ctx,
1387
+ struct ggml_tensor * a);
1388
+
1389
+ // set elements above the diagonal to -INF
1390
+ GGML_API struct ggml_tensor * ggml_diag_mask_inf(
1391
+ struct ggml_context * ctx,
1392
+ struct ggml_tensor * a,
1393
+ int n_past);
1394
+
1395
+ // in-place, returns view(a)
1396
+ GGML_API struct ggml_tensor * ggml_diag_mask_inf_inplace(
1397
+ struct ggml_context * ctx,
1398
+ struct ggml_tensor * a,
1399
+ int n_past);
1400
+
1401
+ // set elements above the diagonal to 0
1402
+ GGML_API struct ggml_tensor * ggml_diag_mask_zero(
1403
+ struct ggml_context * ctx,
1404
+ struct ggml_tensor * a,
1405
+ int n_past);
1406
+
1407
+ // in-place, returns view(a)
1408
+ GGML_API struct ggml_tensor * ggml_diag_mask_zero_inplace(
1409
+ struct ggml_context * ctx,
1410
+ struct ggml_tensor * a,
1411
+ int n_past);
1412
+
1413
+ GGML_API struct ggml_tensor * ggml_soft_max(
1414
+ struct ggml_context * ctx,
1415
+ struct ggml_tensor * a);
1416
+
1417
+ // in-place, returns view(a)
1418
+ GGML_API struct ggml_tensor * ggml_soft_max_inplace(
1419
+ struct ggml_context * ctx,
1420
+ struct ggml_tensor * a);
1421
+
1422
+ // fused soft_max(a*scale + mask + pos[i]*(ALiBi slope))
1423
+ // mask is optional
1424
+ // pos is required when max_bias > 0.0f
1425
+ // max_bias = 0.0f for no ALiBi
1426
+ GGML_API struct ggml_tensor * ggml_soft_max_ext(
1427
+ struct ggml_context * ctx,
1428
+ struct ggml_tensor * a,
1429
+ struct ggml_tensor * mask,
1430
+ struct ggml_tensor * pos,
1431
+ float scale,
1432
+ float max_bias);
1433
+
1434
+ GGML_API struct ggml_tensor * ggml_soft_max_back(
1435
+ struct ggml_context * ctx,
1436
+ struct ggml_tensor * a,
1437
+ struct ggml_tensor * b);
1438
+
1439
+ // in-place, returns view(a)
1440
+ GGML_API struct ggml_tensor * ggml_soft_max_back_inplace(
1441
+ struct ggml_context * ctx,
1442
+ struct ggml_tensor * a,
1443
+ struct ggml_tensor * b);
1444
+
1445
+ // rotary position embedding
1446
+ // if mode & 1 == 1, skip n_past elements (DEPRECATED)
1447
+ // if mode & 2 == 1, GPT-NeoX style
1448
+ // if mode & 4 == 1, ChatGLM style
1449
+ //
1450
+ // b is an int32 vector with size a->ne[2], it contains the positions
1451
+ GGML_API struct ggml_tensor * ggml_rope(
1452
+ struct ggml_context * ctx,
1453
+ struct ggml_tensor * a,
1454
+ struct ggml_tensor * b,
1455
+ int n_dims,
1456
+ int mode,
1457
+ int n_ctx);
1458
+
1459
+ // in-place, returns view(a)
1460
+ GGML_API struct ggml_tensor * ggml_rope_inplace(
1461
+ struct ggml_context * ctx,
1462
+ struct ggml_tensor * a,
1463
+ struct ggml_tensor * b,
1464
+ int n_dims,
1465
+ int mode,
1466
+ int n_ctx);
1467
+
1468
+ // custom RoPE
1469
+ GGML_API struct ggml_tensor * ggml_rope_custom(
1470
+ struct ggml_context * ctx,
1471
+ struct ggml_tensor * a,
1472
+ struct ggml_tensor * b,
1473
+ int n_dims,
1474
+ int mode,
1475
+ int n_ctx,
1476
+ int n_orig_ctx,
1477
+ float freq_base,
1478
+ float freq_scale,
1479
+ float ext_factor,
1480
+ float attn_factor,
1481
+ float beta_fast,
1482
+ float beta_slow);
1483
+
1484
+ // in-place, returns view(a)
1485
+ GGML_API struct ggml_tensor * ggml_rope_custom_inplace(
1486
+ struct ggml_context * ctx,
1487
+ struct ggml_tensor * a,
1488
+ struct ggml_tensor * b,
1489
+ int n_dims,
1490
+ int mode,
1491
+ int n_ctx,
1492
+ int n_orig_ctx,
1493
+ float freq_base,
1494
+ float freq_scale,
1495
+ float ext_factor,
1496
+ float attn_factor,
1497
+ float beta_fast,
1498
+ float beta_slow);
1499
+
1500
+ // compute correction dims for YaRN RoPE scaling
1501
+ GGML_CALL void ggml_rope_yarn_corr_dims(
1502
+ int n_dims, int n_orig_ctx, float freq_base, float beta_fast, float beta_slow, float dims[2]);
1503
+
1504
+ // xPos RoPE, in-place, returns view(a)
1505
+ GGML_API struct ggml_tensor * ggml_rope_xpos_inplace(
1506
+ struct ggml_context * ctx,
1507
+ struct ggml_tensor * a,
1508
+ struct ggml_tensor * b,
1509
+ int n_dims,
1510
+ float base,
1511
+ bool down);
1512
+
1513
+ // rotary position embedding backward, i.e compute dx from dy
1514
+ // a - dy
1515
+ GGML_API struct ggml_tensor * ggml_rope_back(
1516
+ struct ggml_context * ctx,
1517
+ struct ggml_tensor * a,
1518
+ struct ggml_tensor * b,
1519
+ int n_dims,
1520
+ int mode,
1521
+ int n_ctx,
1522
+ int n_orig_ctx,
1523
+ float freq_base,
1524
+ float freq_scale,
1525
+ float ext_factor,
1526
+ float attn_factor,
1527
+ float beta_fast,
1528
+ float beta_slow,
1529
+ float xpos_base,
1530
+ bool xpos_down);
1531
+
1532
+ // alibi position embedding
1533
+ // in-place, returns view(a)
1534
+ GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_alibi(
1535
+ struct ggml_context * ctx,
1536
+ struct ggml_tensor * a,
1537
+ int n_past,
1538
+ int n_head,
1539
+ float bias_max),
1540
+ "use ggml_soft_max_ext instead (will be removed in Mar 2024)");
1541
+
1542
+ // clamp
1543
+ // in-place, returns view(a)
1544
+ GGML_API struct ggml_tensor * ggml_clamp(
1545
+ struct ggml_context * ctx,
1546
+ struct ggml_tensor * a,
1547
+ float min,
1548
+ float max);
1549
+
1550
+ GGML_API struct ggml_tensor * ggml_im2col(
1551
+ struct ggml_context * ctx,
1552
+ struct ggml_tensor * a,
1553
+ struct ggml_tensor * b,
1554
+ int s0,
1555
+ int s1,
1556
+ int p0,
1557
+ int p1,
1558
+ int d0,
1559
+ int d1,
1560
+ bool is_2D,
1561
+ enum ggml_type dst_type);
1562
+
1563
+ GGML_API struct ggml_tensor * ggml_conv_depthwise_2d(
1564
+ struct ggml_context * ctx,
1565
+ struct ggml_tensor * a,
1566
+ struct ggml_tensor * b,
1567
+ int s0,
1568
+ int s1,
1569
+ int p0,
1570
+ int p1,
1571
+ int d0,
1572
+ int d1);
1573
+
1574
+ GGML_API struct ggml_tensor * ggml_conv_1d(
1575
+ struct ggml_context * ctx,
1576
+ struct ggml_tensor * a,
1577
+ struct ggml_tensor * b,
1578
+ int s0, // stride
1579
+ int p0, // padding
1580
+ int d0); // dilation
1581
+
1582
+ // conv_1d with padding = half
1583
+ // alias for ggml_conv_1d(a, b, s, a->ne[0]/2, d)
1584
+ GGML_API struct ggml_tensor* ggml_conv_1d_ph(
1585
+ struct ggml_context * ctx,
1586
+ struct ggml_tensor * a,
1587
+ struct ggml_tensor * b,
1588
+ int s,
1589
+ int d);
1590
+
1591
+ GGML_API struct ggml_tensor * ggml_conv_transpose_1d(
1592
+ struct ggml_context * ctx,
1593
+ struct ggml_tensor * a,
1594
+ struct ggml_tensor * b,
1595
+ int s0,
1596
+ int p0,
1597
+ int d0);
1598
+
1599
+ GGML_API struct ggml_tensor * ggml_conv_2d(
1600
+ struct ggml_context * ctx,
1601
+ struct ggml_tensor * a,
1602
+ struct ggml_tensor * b,
1603
+ int s0,
1604
+ int s1,
1605
+ int p0,
1606
+ int p1,
1607
+ int d0,
1608
+ int d1);
1609
+
1610
+
1611
+ // kernel size is a->ne[0] x a->ne[1]
1612
+ // stride is equal to kernel size
1613
+ // padding is zero
1614
+ // example:
1615
+ // a: 16 16 3 768
1616
+ // b: 1024 1024 3 1
1617
+ // res: 64 64 768 1
1618
+ // used in sam
1619
+ GGML_API struct ggml_tensor * ggml_conv_2d_sk_p0(
1620
+ struct ggml_context * ctx,
1621
+ struct ggml_tensor * a,
1622
+ struct ggml_tensor * b);
1623
+
1624
+ // kernel size is a->ne[0] x a->ne[1]
1625
+ // stride is 1
1626
+ // padding is half
1627
+ // example:
1628
+ // a: 3 3 256 256
1629
+ // b: 64 64 256 1
1630
+ // res: 64 64 256 1
1631
+ // used in sam
1632
+ GGML_API struct ggml_tensor * ggml_conv_2d_s1_ph(
1633
+ struct ggml_context * ctx,
1634
+ struct ggml_tensor * a,
1635
+ struct ggml_tensor * b);
1636
+
1637
+ GGML_API struct ggml_tensor * ggml_conv_transpose_2d_p0(
1638
+ struct ggml_context * ctx,
1639
+ struct ggml_tensor * a,
1640
+ struct ggml_tensor * b,
1641
+ int stride);
1642
+
1643
+ enum ggml_op_pool {
1644
+ GGML_OP_POOL_MAX,
1645
+ GGML_OP_POOL_AVG,
1646
+ GGML_OP_POOL_COUNT,
1647
+ };
1648
+
1649
+ GGML_API struct ggml_tensor * ggml_pool_1d(
1650
+ struct ggml_context * ctx,
1651
+ struct ggml_tensor * a,
1652
+ enum ggml_op_pool op,
1653
+ int k0, // kernel size
1654
+ int s0, // stride
1655
+ int p0); // padding
1656
+
1657
+ // the result will have 2*p0 padding for the first dimension
1658
+ // and 2*p1 padding for the second dimension
1659
+ GGML_API struct ggml_tensor * ggml_pool_2d(
1660
+ struct ggml_context * ctx,
1661
+ struct ggml_tensor * a,
1662
+ enum ggml_op_pool op,
1663
+ int k0,
1664
+ int k1,
1665
+ int s0,
1666
+ int s1,
1667
+ float p0,
1668
+ float p1);
1669
+
1670
+ // nearest interpolate
1671
+ // used in stable-diffusion
1672
+ GGML_API struct ggml_tensor * ggml_upscale(
1673
+ struct ggml_context * ctx,
1674
+ struct ggml_tensor * a,
1675
+ int scale_factor);
1676
+
1677
+ // pad each dimension with zeros: [x, ..., x] -> [x, ..., x, 0, ..., 0]
1678
+ GGML_API struct ggml_tensor * ggml_pad(
1679
+ struct ggml_context * ctx,
1680
+ struct ggml_tensor * a,
1681
+ int p0,
1682
+ int p1,
1683
+ int p2,
1684
+ int p3);
1685
+
1686
+ // Ref: https://github.com/CompVis/stable-diffusion/blob/main/ldm/modules/diffusionmodules/util.py#L151
1687
+ // timesteps: [N,]
1688
+ // return: [N, dim]
1689
+ GGML_API struct ggml_tensor * ggml_timestep_embedding(
1690
+ struct ggml_context * ctx,
1691
+ struct ggml_tensor * timesteps,
1692
+ int dim,
1693
+ int max_period);
1694
+
1695
+ // sort rows
1696
+ enum ggml_sort_order {
1697
+ GGML_SORT_ORDER_ASC,
1698
+ GGML_SORT_ORDER_DESC,
1699
+ };
1700
+
1701
+ GGML_API struct ggml_tensor * ggml_argsort(
1702
+ struct ggml_context * ctx,
1703
+ struct ggml_tensor * a,
1704
+ enum ggml_sort_order order);
1705
+
1706
+ GGML_API struct ggml_tensor * ggml_arange(
1707
+ struct ggml_context * ctx,
1708
+ float start,
1709
+ float stop,
1710
+ float step);
1711
+
1712
+ // top k elements per row
1713
+ GGML_API struct ggml_tensor * ggml_top_k(
1714
+ struct ggml_context * ctx,
1715
+ struct ggml_tensor * a,
1716
+ int k);
1717
+
1718
+ GGML_API struct ggml_tensor * ggml_flash_attn(
1719
+ struct ggml_context * ctx,
1720
+ struct ggml_tensor * q,
1721
+ struct ggml_tensor * k,
1722
+ struct ggml_tensor * v,
1723
+ bool masked);
1724
+
1725
+ GGML_API struct ggml_tensor * ggml_flash_attn_back(
1726
+ struct ggml_context * ctx,
1727
+ struct ggml_tensor * q,
1728
+ struct ggml_tensor * k,
1729
+ struct ggml_tensor * v,
1730
+ struct ggml_tensor * d,
1731
+ bool masked);
1732
+
1733
+ GGML_API struct ggml_tensor * ggml_flash_ff(
1734
+ struct ggml_context * ctx,
1735
+ struct ggml_tensor * a,
1736
+ struct ggml_tensor * b0,
1737
+ struct ggml_tensor * b1,
1738
+ struct ggml_tensor * c0,
1739
+ struct ggml_tensor * c1);
1740
+
1741
+ GGML_API struct ggml_tensor * ggml_ssm_conv(
1742
+ struct ggml_context * ctx,
1743
+ struct ggml_tensor * s,
1744
+ struct ggml_tensor * x,
1745
+ struct ggml_tensor * c,
1746
+ struct ggml_tensor * sq);
1747
+
1748
+ GGML_API struct ggml_tensor * ggml_ssm_scan(
1749
+ struct ggml_context * ctx,
1750
+ struct ggml_tensor * s,
1751
+ struct ggml_tensor * x,
1752
+ struct ggml_tensor * dt,
1753
+ struct ggml_tensor * A,
1754
+ struct ggml_tensor * B,
1755
+ struct ggml_tensor * C,
1756
+ struct ggml_tensor * sq);
1757
+
1758
+ // partition into non-overlapping windows with padding if needed
1759
+ // example:
1760
+ // a: 768 64 64 1
1761
+ // w: 14
1762
+ // res: 768 14 14 25
1763
+ // used in sam
1764
+ GGML_API struct ggml_tensor * ggml_win_part(
1765
+ struct ggml_context * ctx,
1766
+ struct ggml_tensor * a,
1767
+ int w);
1768
+
1769
+ // reverse of ggml_win_part
1770
+ // used in sam
1771
+ GGML_API struct ggml_tensor * ggml_win_unpart(
1772
+ struct ggml_context * ctx,
1773
+ struct ggml_tensor * a,
1774
+ int w0,
1775
+ int h0,
1776
+ int w);
1777
+
1778
+ GGML_API struct ggml_tensor * ggml_unary(
1779
+ struct ggml_context * ctx,
1780
+ struct ggml_tensor * a,
1781
+ enum ggml_unary_op op);
1782
+
1783
+ GGML_API struct ggml_tensor * ggml_unary_inplace(
725
1784
  struct ggml_context * ctx,
726
- struct ggml_opt_params params,
727
- struct ggml_tensor * f);
1785
+ struct ggml_tensor * a,
1786
+ enum ggml_unary_op op);
1787
+
1788
+ // used in sam
1789
+ GGML_API struct ggml_tensor * ggml_get_rel_pos(
1790
+ struct ggml_context * ctx,
1791
+ struct ggml_tensor * a,
1792
+ int qh,
1793
+ int kh);
1794
+
1795
+ // used in sam
1796
+ GGML_API struct ggml_tensor * ggml_add_rel_pos(
1797
+ struct ggml_context * ctx,
1798
+ struct ggml_tensor * a,
1799
+ struct ggml_tensor * pw,
1800
+ struct ggml_tensor * ph);
1801
+
1802
+ GGML_API struct ggml_tensor * ggml_add_rel_pos_inplace(
1803
+ struct ggml_context * ctx,
1804
+ struct ggml_tensor * a,
1805
+ struct ggml_tensor * pw,
1806
+ struct ggml_tensor * ph);
1807
+
1808
+ // custom operators
1809
+
1810
+ typedef void (*ggml_unary_op_f32_t) (const int, float *, const float *);
1811
+ typedef void (*ggml_binary_op_f32_t)(const int, float *, const float *, const float *);
1812
+
1813
+ typedef void (*ggml_custom1_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *);
1814
+ typedef void (*ggml_custom2_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
1815
+ typedef void (*ggml_custom3_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
1816
+
1817
+ GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_unary_f32(
1818
+ struct ggml_context * ctx,
1819
+ struct ggml_tensor * a,
1820
+ ggml_unary_op_f32_t fun),
1821
+ "use ggml_map_custom1 instead");
1822
+
1823
+ GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_unary_inplace_f32(
1824
+ struct ggml_context * ctx,
1825
+ struct ggml_tensor * a,
1826
+ ggml_unary_op_f32_t fun),
1827
+ "use ggml_map_custom1_inplace instead");
1828
+
1829
+ GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_binary_f32(
1830
+ struct ggml_context * ctx,
1831
+ struct ggml_tensor * a,
1832
+ struct ggml_tensor * b,
1833
+ ggml_binary_op_f32_t fun),
1834
+ "use ggml_map_custom2 instead");
1835
+
1836
+ GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_binary_inplace_f32(
1837
+ struct ggml_context * ctx,
1838
+ struct ggml_tensor * a,
1839
+ struct ggml_tensor * b,
1840
+ ggml_binary_op_f32_t fun),
1841
+ "use ggml_map_custom2_inplace instead");
1842
+
1843
+ GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom1_f32(
1844
+ struct ggml_context * ctx,
1845
+ struct ggml_tensor * a,
1846
+ ggml_custom1_op_f32_t fun),
1847
+ "use ggml_map_custom1 instead");
1848
+
1849
+ GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom1_inplace_f32(
1850
+ struct ggml_context * ctx,
1851
+ struct ggml_tensor * a,
1852
+ ggml_custom1_op_f32_t fun),
1853
+ "use ggml_map_custom1_inplace instead");
1854
+
1855
+ GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom2_f32(
1856
+ struct ggml_context * ctx,
1857
+ struct ggml_tensor * a,
1858
+ struct ggml_tensor * b,
1859
+ ggml_custom2_op_f32_t fun),
1860
+ "use ggml_map_custom2 instead");
1861
+
1862
+ GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom2_inplace_f32(
1863
+ struct ggml_context * ctx,
1864
+ struct ggml_tensor * a,
1865
+ struct ggml_tensor * b,
1866
+ ggml_custom2_op_f32_t fun),
1867
+ "use ggml_map_custom2_inplace instead");
1868
+
1869
+ GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom3_f32(
1870
+ struct ggml_context * ctx,
1871
+ struct ggml_tensor * a,
1872
+ struct ggml_tensor * b,
1873
+ struct ggml_tensor * c,
1874
+ ggml_custom3_op_f32_t fun),
1875
+ "use ggml_map_custom3 instead");
1876
+
1877
+ GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom3_inplace_f32(
1878
+ struct ggml_context * ctx,
1879
+ struct ggml_tensor * a,
1880
+ struct ggml_tensor * b,
1881
+ struct ggml_tensor * c,
1882
+ ggml_custom3_op_f32_t fun),
1883
+ "use ggml_map_custom3_inplace instead");
1884
+
1885
+ // custom operators v2
1886
+
1887
+ typedef void (*ggml_custom1_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, int ith, int nth, void * userdata);
1888
+ typedef void (*ggml_custom2_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, int ith, int nth, void * userdata);
1889
+ typedef void (*ggml_custom3_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, const struct ggml_tensor * c, int ith, int nth, void * userdata);
1890
+
1891
+ #define GGML_N_TASKS_MAX -1
1892
+
1893
+ GGML_API struct ggml_tensor * ggml_map_custom1(
1894
+ struct ggml_context * ctx,
1895
+ struct ggml_tensor * a,
1896
+ ggml_custom1_op_t fun,
1897
+ int n_tasks,
1898
+ void * userdata);
1899
+
1900
+ GGML_API struct ggml_tensor * ggml_map_custom1_inplace(
1901
+ struct ggml_context * ctx,
1902
+ struct ggml_tensor * a,
1903
+ ggml_custom1_op_t fun,
1904
+ int n_tasks,
1905
+ void * userdata);
1906
+
1907
+ GGML_API struct ggml_tensor * ggml_map_custom2(
1908
+ struct ggml_context * ctx,
1909
+ struct ggml_tensor * a,
1910
+ struct ggml_tensor * b,
1911
+ ggml_custom2_op_t fun,
1912
+ int n_tasks,
1913
+ void * userdata);
1914
+
1915
+ GGML_API struct ggml_tensor * ggml_map_custom2_inplace(
1916
+ struct ggml_context * ctx,
1917
+ struct ggml_tensor * a,
1918
+ struct ggml_tensor * b,
1919
+ ggml_custom2_op_t fun,
1920
+ int n_tasks,
1921
+ void * userdata);
1922
+
1923
+ GGML_API struct ggml_tensor * ggml_map_custom3(
1924
+ struct ggml_context * ctx,
1925
+ struct ggml_tensor * a,
1926
+ struct ggml_tensor * b,
1927
+ struct ggml_tensor * c,
1928
+ ggml_custom3_op_t fun,
1929
+ int n_tasks,
1930
+ void * userdata);
1931
+
1932
+ GGML_API struct ggml_tensor * ggml_map_custom3_inplace(
1933
+ struct ggml_context * ctx,
1934
+ struct ggml_tensor * a,
1935
+ struct ggml_tensor * b,
1936
+ struct ggml_tensor * c,
1937
+ ggml_custom3_op_t fun,
1938
+ int n_tasks,
1939
+ void * userdata);
1940
+
1941
+ // loss function
1942
+
1943
+ GGML_API struct ggml_tensor * ggml_cross_entropy_loss(
1944
+ struct ggml_context * ctx,
1945
+ struct ggml_tensor * a,
1946
+ struct ggml_tensor * b);
1947
+
1948
+ GGML_API struct ggml_tensor * ggml_cross_entropy_loss_back(
1949
+ struct ggml_context * ctx,
1950
+ struct ggml_tensor * a,
1951
+ struct ggml_tensor * b,
1952
+ struct ggml_tensor * c);
728
1953
 
729
- //
730
- // system info
731
- //
1954
+ //
1955
+ // automatic differentiation
1956
+ //
732
1957
 
733
- int ggml_cpu_has_avx(void);
734
- int ggml_cpu_has_avx2(void);
735
- int ggml_cpu_has_avx512(void);
736
- int ggml_cpu_has_fma(void);
737
- int ggml_cpu_has_neon(void);
738
- int ggml_cpu_has_arm_fma(void);
739
- int ggml_cpu_has_f16c(void);
740
- int ggml_cpu_has_fp16_va(void);
741
- int ggml_cpu_has_wasm_simd(void);
742
- int ggml_cpu_has_blas(void);
743
- int ggml_cpu_has_sse3(void);
744
- int ggml_cpu_has_vsx(void);
1958
+ GGML_API void ggml_set_param(
1959
+ struct ggml_context * ctx,
1960
+ struct ggml_tensor * tensor);
1961
+
1962
+
1963
+ GGML_API void ggml_build_forward_expand (struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
1964
+ GGML_API void ggml_build_backward_expand(struct ggml_context * ctx, struct ggml_cgraph * gf, struct ggml_cgraph * gb, bool keep);
1965
+
1966
+ // graph allocation in a context
1967
+ GGML_API struct ggml_cgraph * ggml_new_graph (struct ggml_context * ctx); // size = GGML_DEFAULT_GRAPH_SIZE, grads = false
1968
+ GGML_API struct ggml_cgraph * ggml_new_graph_custom (struct ggml_context * ctx, size_t size, bool grads);
1969
+ GGML_API struct ggml_cgraph * ggml_graph_dup (struct ggml_context * ctx, struct ggml_cgraph * cgraph);
1970
+ GGML_API struct ggml_cgraph ggml_graph_view (struct ggml_cgraph * cgraph, int i0, int i1);
1971
+ GGML_API void ggml_graph_cpy (struct ggml_cgraph * src, struct ggml_cgraph * dst);
1972
+ GGML_API void ggml_graph_reset (struct ggml_cgraph * cgraph); // zero grads
1973
+ GGML_API void ggml_graph_clear (struct ggml_cgraph * cgraph);
1974
+
1975
+ GGML_API size_t ggml_graph_overhead(void);
1976
+ GGML_API size_t ggml_graph_overhead_custom(size_t size, bool grads);
1977
+
1978
+ // ggml_graph_plan() has to be called before ggml_graph_compute()
1979
+ // when plan.work_size > 0, caller must allocate memory for plan.work_data
1980
+ GGML_API struct ggml_cplan ggml_graph_plan (const struct ggml_cgraph * cgraph, int n_threads /*= GGML_DEFAULT_N_THREADS*/);
1981
+ GGML_API enum ggml_status ggml_graph_compute ( struct ggml_cgraph * cgraph, struct ggml_cplan * cplan);
1982
+ // same as ggml_graph_compute() but the work data is allocated as a part of the context
1983
+ // note: the drawback of this API is that you must have ensured that the context has enough memory for the work data
1984
+ GGML_API enum ggml_status ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads);
1985
+
1986
+ GGML_API struct ggml_tensor * ggml_graph_get_tensor(struct ggml_cgraph * cgraph, const char * name);
1987
+
1988
+ GGML_API void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname);
1989
+ GGML_API struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context ** ctx_data, struct ggml_context ** ctx_eval);
1990
+
1991
+ // print info and performance information for the graph
1992
+ GGML_API void ggml_graph_print(const struct ggml_cgraph * cgraph);
1993
+
1994
+ // dump the graph into a file using the dot format
1995
+ GGML_API void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph * gf, const char * filename);
1996
+
1997
+ // build gradient checkpointing backward graph gb for gf using provided checkpoints
1998
+ // gb_tmp will contain original backward graph with rewritten backward process nodes,
1999
+ // but without the second forward pass nodes.
2000
+ GGML_API void ggml_build_backward_gradient_checkpointing(
2001
+ struct ggml_context * ctx,
2002
+ struct ggml_cgraph * gf,
2003
+ struct ggml_cgraph * gb,
2004
+ struct ggml_cgraph * gb_tmp,
2005
+ struct ggml_tensor * * checkpoints,
2006
+ int n_checkpoints);
2007
+ //
2008
+ // optimization
2009
+ //
2010
+
2011
+ // optimization methods
2012
+ enum ggml_opt_type {
2013
+ GGML_OPT_TYPE_ADAM,
2014
+ GGML_OPT_TYPE_LBFGS,
2015
+ };
2016
+
2017
+ // linesearch methods
2018
+ enum ggml_linesearch {
2019
+ GGML_LINESEARCH_DEFAULT = 1,
2020
+
2021
+ GGML_LINESEARCH_BACKTRACKING_ARMIJO = 0,
2022
+ GGML_LINESEARCH_BACKTRACKING_WOLFE = 1,
2023
+ GGML_LINESEARCH_BACKTRACKING_STRONG_WOLFE = 2,
2024
+ };
2025
+
2026
+ // optimization return values
2027
+ enum ggml_opt_result {
2028
+ GGML_OPT_RESULT_OK = 0,
2029
+ GGML_OPT_RESULT_DID_NOT_CONVERGE,
2030
+ GGML_OPT_RESULT_NO_CONTEXT,
2031
+ GGML_OPT_RESULT_INVALID_WOLFE,
2032
+ GGML_OPT_RESULT_FAIL,
2033
+ GGML_OPT_RESULT_CANCEL,
2034
+
2035
+ GGML_LINESEARCH_FAIL = -128,
2036
+ GGML_LINESEARCH_MINIMUM_STEP,
2037
+ GGML_LINESEARCH_MAXIMUM_STEP,
2038
+ GGML_LINESEARCH_MAXIMUM_ITERATIONS,
2039
+ GGML_LINESEARCH_INVALID_PARAMETERS,
2040
+ };
2041
+
2042
+ typedef void (*ggml_opt_callback)(void * data, int accum_step, float * sched, bool * cancel);
2043
+ typedef void (*ggml_log_callback)(enum ggml_log_level level, const char * text, void * user_data);
2044
+
2045
+ // optimization parameters
2046
+ //
2047
+ // see ggml.c (ggml_opt_default_params) for default values
2048
+ //
2049
+ struct ggml_opt_params {
2050
+ enum ggml_opt_type type;
2051
+
2052
+ size_t graph_size;
2053
+
2054
+ int n_threads;
2055
+
2056
+ // delta-based convergence test
2057
+ //
2058
+ // if past == 0 - disabled
2059
+ // if past > 0:
2060
+ // stop if |f(x) - f(x_past)| < delta * max(1, |f(x)|)
2061
+ //
2062
+ int past;
2063
+ float delta;
2064
+
2065
+ // maximum number of iterations without improvement
2066
+ //
2067
+ // if 0 - disabled
2068
+ // if > 0:
2069
+ // assume convergence if no cost improvement in this number of iterations
2070
+ //
2071
+ int max_no_improvement;
2072
+
2073
+ bool print_forward_graph;
2074
+ bool print_backward_graph;
2075
+
2076
+ int n_gradient_accumulation;
2077
+
2078
+ // ADAM parameters
2079
+ struct {
2080
+ int n_iter;
2081
+
2082
+ float sched; // schedule multiplier (fixed, decay or warmup)
2083
+ float decay; // weight decay for AdamW, use 0.0f to disable
2084
+ int decay_min_ndim; // minimum number of tensor dimension to apply weight decay
2085
+ float alpha; // learning rate
2086
+ float beta1;
2087
+ float beta2;
2088
+ float eps; // epsilon for numerical stability
2089
+ float eps_f; // epsilon for convergence test
2090
+ float eps_g; // epsilon for convergence test
2091
+ float gclip; // gradient clipping
2092
+ } adam;
2093
+
2094
+ // LBFGS parameters
2095
+ struct {
2096
+ int m; // number of corrections to approximate the inv. Hessian
2097
+ int n_iter;
2098
+ int max_linesearch;
2099
+
2100
+ float eps; // convergence tolerance
2101
+ float ftol; // line search tolerance
2102
+ float wolfe;
2103
+ float min_step;
2104
+ float max_step;
2105
+
2106
+ enum ggml_linesearch linesearch;
2107
+ } lbfgs;
2108
+ };
2109
+
2110
+ struct ggml_opt_context {
2111
+ struct ggml_context * ctx;
2112
+ struct ggml_opt_params params;
2113
+
2114
+ int iter;
2115
+ int64_t nx; // number of parameter elements
2116
+
2117
+ bool just_initialized;
2118
+
2119
+ float loss_before;
2120
+ float loss_after;
2121
+
2122
+ struct {
2123
+ struct ggml_tensor * g; // current gradient
2124
+ struct ggml_tensor * m; // first moment
2125
+ struct ggml_tensor * v; // second moment
2126
+ struct ggml_tensor * pf; // past function values
2127
+ float fx_best;
2128
+ float fx_prev;
2129
+ int n_no_improvement;
2130
+ } adam;
2131
+
2132
+ struct {
2133
+ struct ggml_tensor * x; // current parameters
2134
+ struct ggml_tensor * xp; // previous parameters
2135
+ struct ggml_tensor * g; // current gradient
2136
+ struct ggml_tensor * gp; // previous gradient
2137
+ struct ggml_tensor * d; // search direction
2138
+ struct ggml_tensor * pf; // past function values
2139
+ struct ggml_tensor * lmal; // the L-BFGS memory alpha
2140
+ struct ggml_tensor * lmys; // the L-BFGS memory ys
2141
+ struct ggml_tensor * lms; // the L-BFGS memory s
2142
+ struct ggml_tensor * lmy; // the L-BFGS memory y
2143
+ float fx_best;
2144
+ float step;
2145
+ int j;
2146
+ int k;
2147
+ int end;
2148
+ int n_no_improvement;
2149
+ } lbfgs;
2150
+ };
2151
+
2152
+ GGML_API struct ggml_opt_params ggml_opt_default_params(enum ggml_opt_type type);
2153
+
2154
+ // optimize the function defined by the tensor f
2155
+ GGML_API enum ggml_opt_result ggml_opt(
2156
+ struct ggml_context * ctx,
2157
+ struct ggml_opt_params params,
2158
+ struct ggml_tensor * f);
2159
+
2160
+ // initialize optimizer context
2161
+ GGML_API void ggml_opt_init(
2162
+ struct ggml_context * ctx,
2163
+ struct ggml_opt_context * opt,
2164
+ struct ggml_opt_params params,
2165
+ int64_t nx);
2166
+
2167
+ // continue optimizing the function defined by the tensor f
2168
+ GGML_API enum ggml_opt_result ggml_opt_resume(
2169
+ struct ggml_context * ctx,
2170
+ struct ggml_opt_context * opt,
2171
+ struct ggml_tensor * f);
2172
+
2173
+ // continue optimizing the function defined by the tensor f
2174
+ GGML_API enum ggml_opt_result ggml_opt_resume_g(
2175
+ struct ggml_context * ctx,
2176
+ struct ggml_opt_context * opt,
2177
+ struct ggml_tensor * f,
2178
+ struct ggml_cgraph * gf,
2179
+ struct ggml_cgraph * gb,
2180
+ ggml_opt_callback callback,
2181
+ void * callback_data);
2182
+
2183
+ //
2184
+ // tensor flags
2185
+ //
2186
+ GGML_API void ggml_set_input(struct ggml_tensor * tensor);
2187
+ GGML_API void ggml_set_output(struct ggml_tensor * tensor);
2188
+
2189
+ //
2190
+ // quantization
2191
+ //
2192
+
2193
+ // - ggml_quantize_init can be called multiple times with the same type
2194
+ // it will only initialize the quantization tables for the first call or after ggml_quantize_free
2195
+ // automatically called by ggml_quantize_chunk for convenience
2196
+ //
2197
+ // - ggml_quantize_free will free any memory allocated by ggml_quantize_init
2198
+ // call this at the end of the program to avoid memory leaks
2199
+ //
2200
+ // note: these are thread-safe
2201
+ //
2202
+ GGML_API void ggml_quantize_init(enum ggml_type type);
2203
+ GGML_API void ggml_quantize_free(void);
2204
+
2205
+ // some quantization type cannot be used without an importance matrix
2206
+ GGML_API bool ggml_quantize_requires_imatrix(enum ggml_type type);
2207
+
2208
+ // calls ggml_quantize_init internally (i.e. can allocate memory)
2209
+ GGML_API size_t ggml_quantize_chunk(
2210
+ enum ggml_type type,
2211
+ const float * src,
2212
+ void * dst,
2213
+ int64_t start,
2214
+ int64_t nrows,
2215
+ int64_t n_per_row,
2216
+ const float * imatrix);
2217
+
2218
+ //
2219
+ // gguf
2220
+ //
2221
+
2222
+ enum gguf_type {
2223
+ GGUF_TYPE_UINT8 = 0,
2224
+ GGUF_TYPE_INT8 = 1,
2225
+ GGUF_TYPE_UINT16 = 2,
2226
+ GGUF_TYPE_INT16 = 3,
2227
+ GGUF_TYPE_UINT32 = 4,
2228
+ GGUF_TYPE_INT32 = 5,
2229
+ GGUF_TYPE_FLOAT32 = 6,
2230
+ GGUF_TYPE_BOOL = 7,
2231
+ GGUF_TYPE_STRING = 8,
2232
+ GGUF_TYPE_ARRAY = 9,
2233
+ GGUF_TYPE_UINT64 = 10,
2234
+ GGUF_TYPE_INT64 = 11,
2235
+ GGUF_TYPE_FLOAT64 = 12,
2236
+ GGUF_TYPE_COUNT, // marks the end of the enum
2237
+ };
2238
+
2239
+ struct gguf_context;
2240
+
2241
+ struct gguf_init_params {
2242
+ bool no_alloc;
2243
+
2244
+ // if not NULL, create a ggml_context and allocate the tensor data in it
2245
+ struct ggml_context ** ctx;
2246
+ };
2247
+
2248
+ GGML_API struct gguf_context * gguf_init_empty(void);
2249
+ GGML_API struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params);
2250
+ //GGML_API struct gguf_context * gguf_init_from_buffer(..);
2251
+
2252
+ GGML_API void gguf_free(struct gguf_context * ctx);
2253
+
2254
+ GGML_API const char * gguf_type_name(enum gguf_type type);
2255
+
2256
+ GGML_API int gguf_get_version (const struct gguf_context * ctx);
2257
+ GGML_API size_t gguf_get_alignment (const struct gguf_context * ctx);
2258
+ GGML_API size_t gguf_get_data_offset(const struct gguf_context * ctx);
2259
+ GGML_API void * gguf_get_data (const struct gguf_context * ctx);
2260
+
2261
+ GGML_API int gguf_get_n_kv(const struct gguf_context * ctx);
2262
+ GGML_API int gguf_find_key(const struct gguf_context * ctx, const char * key);
2263
+ GGML_API const char * gguf_get_key (const struct gguf_context * ctx, int key_id);
2264
+
2265
+ GGML_API enum gguf_type gguf_get_kv_type (const struct gguf_context * ctx, int key_id);
2266
+ GGML_API enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int key_id);
2267
+
2268
+ // will abort if the wrong type is used for the key
2269
+ GGML_API uint8_t gguf_get_val_u8 (const struct gguf_context * ctx, int key_id);
2270
+ GGML_API int8_t gguf_get_val_i8 (const struct gguf_context * ctx, int key_id);
2271
+ GGML_API uint16_t gguf_get_val_u16 (const struct gguf_context * ctx, int key_id);
2272
+ GGML_API int16_t gguf_get_val_i16 (const struct gguf_context * ctx, int key_id);
2273
+ GGML_API uint32_t gguf_get_val_u32 (const struct gguf_context * ctx, int key_id);
2274
+ GGML_API int32_t gguf_get_val_i32 (const struct gguf_context * ctx, int key_id);
2275
+ GGML_API float gguf_get_val_f32 (const struct gguf_context * ctx, int key_id);
2276
+ GGML_API uint64_t gguf_get_val_u64 (const struct gguf_context * ctx, int key_id);
2277
+ GGML_API int64_t gguf_get_val_i64 (const struct gguf_context * ctx, int key_id);
2278
+ GGML_API double gguf_get_val_f64 (const struct gguf_context * ctx, int key_id);
2279
+ GGML_API bool gguf_get_val_bool(const struct gguf_context * ctx, int key_id);
2280
+ GGML_API const char * gguf_get_val_str (const struct gguf_context * ctx, int key_id);
2281
+ GGML_API const void * gguf_get_val_data(const struct gguf_context * ctx, int key_id);
2282
+ GGML_API int gguf_get_arr_n (const struct gguf_context * ctx, int key_id);
2283
+ GGML_API const void * gguf_get_arr_data(const struct gguf_context * ctx, int key_id);
2284
+ GGML_API const char * gguf_get_arr_str (const struct gguf_context * ctx, int key_id, int i);
2285
+
2286
+ GGML_API int gguf_get_n_tensors (const struct gguf_context * ctx);
2287
+ GGML_API int gguf_find_tensor (const struct gguf_context * ctx, const char * name);
2288
+ GGML_API size_t gguf_get_tensor_offset(const struct gguf_context * ctx, int i);
2289
+ GGML_API char * gguf_get_tensor_name (const struct gguf_context * ctx, int i);
2290
+ GGML_API enum ggml_type gguf_get_tensor_type (const struct gguf_context * ctx, int i);
2291
+
2292
+ // overrides existing values or adds a new one
2293
+ GGML_API void gguf_set_val_u8 (struct gguf_context * ctx, const char * key, uint8_t val);
2294
+ GGML_API void gguf_set_val_i8 (struct gguf_context * ctx, const char * key, int8_t val);
2295
+ GGML_API void gguf_set_val_u16 (struct gguf_context * ctx, const char * key, uint16_t val);
2296
+ GGML_API void gguf_set_val_i16 (struct gguf_context * ctx, const char * key, int16_t val);
2297
+ GGML_API void gguf_set_val_u32 (struct gguf_context * ctx, const char * key, uint32_t val);
2298
+ GGML_API void gguf_set_val_i32 (struct gguf_context * ctx, const char * key, int32_t val);
2299
+ GGML_API void gguf_set_val_f32 (struct gguf_context * ctx, const char * key, float val);
2300
+ GGML_API void gguf_set_val_u64 (struct gguf_context * ctx, const char * key, uint64_t val);
2301
+ GGML_API void gguf_set_val_i64 (struct gguf_context * ctx, const char * key, int64_t val);
2302
+ GGML_API void gguf_set_val_f64 (struct gguf_context * ctx, const char * key, double val);
2303
+ GGML_API void gguf_set_val_bool(struct gguf_context * ctx, const char * key, bool val);
2304
+ GGML_API void gguf_set_val_str (struct gguf_context * ctx, const char * key, const char * val);
2305
+ GGML_API void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_type type, const void * data, int n);
2306
+ GGML_API void gguf_set_arr_str (struct gguf_context * ctx, const char * key, const char ** data, int n);
2307
+
2308
+ // set or add KV pairs from another context
2309
+ GGML_API void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src);
2310
+
2311
+ // manage tensor info
2312
+ GGML_API void gguf_add_tensor(struct gguf_context * ctx, const struct ggml_tensor * tensor);
2313
+ GGML_API void gguf_set_tensor_type(struct gguf_context * ctx, const char * name, enum ggml_type type);
2314
+ GGML_API void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const void * data, size_t size);
2315
+
2316
+ // writing gguf files can be done in 2 ways:
2317
+ //
2318
+ // - write the entire gguf_context to a binary file in a single pass:
2319
+ //
2320
+ // gguf_write_to_file(ctx, fname);
2321
+ //
2322
+ // - first prepare a file with a placeholder for the meta data, write the tensor data, then write the meta data:
2323
+ //
2324
+ // FILE * f = fopen(fname, "wb");
2325
+ // fseek(f, gguf_get_meta_size(ctx), SEEK_SET);
2326
+ // fwrite(f, ...);
2327
+ // void * data = gguf_meta_get_meta_data(ctx);
2328
+ // fseek(f, 0, SEEK_SET);
2329
+ // fwrite(f, data, gguf_get_meta_size(ctx));
2330
+ // free(data);
2331
+ // fclose(f);
2332
+ //
2333
+
2334
+ // write the entire context to a binary file
2335
+ GGML_API void gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta);
2336
+
2337
+ // get the size in bytes of the meta data (header, kv pairs, tensor info) including padding
2338
+ GGML_API size_t gguf_get_meta_size(const struct gguf_context * ctx);
2339
+ GGML_API void gguf_get_meta_data(const struct gguf_context * ctx, void * data);
2340
+
2341
+ //
2342
+ // system info
2343
+ //
2344
+
2345
+ GGML_API int ggml_cpu_has_avx (void);
2346
+ GGML_API int ggml_cpu_has_avx_vnni (void);
2347
+ GGML_API int ggml_cpu_has_avx2 (void);
2348
+ GGML_API int ggml_cpu_has_avx512 (void);
2349
+ GGML_API int ggml_cpu_has_avx512_vbmi(void);
2350
+ GGML_API int ggml_cpu_has_avx512_vnni(void);
2351
+ GGML_API int ggml_cpu_has_fma (void);
2352
+ GGML_API int ggml_cpu_has_neon (void);
2353
+ GGML_API int ggml_cpu_has_arm_fma (void);
2354
+ GGML_API int ggml_cpu_has_metal (void);
2355
+ GGML_API int ggml_cpu_has_f16c (void);
2356
+ GGML_API int ggml_cpu_has_fp16_va (void);
2357
+ GGML_API int ggml_cpu_has_wasm_simd (void);
2358
+ GGML_API int ggml_cpu_has_blas (void);
2359
+ GGML_API int ggml_cpu_has_cuda (void);
2360
+ GGML_API int ggml_cpu_has_clblast (void);
2361
+ GGML_API int ggml_cpu_has_vulkan (void);
2362
+ GGML_API int ggml_cpu_has_kompute (void);
2363
+ GGML_API int ggml_cpu_has_gpublas (void);
2364
+ GGML_API int ggml_cpu_has_sse3 (void);
2365
+ GGML_API int ggml_cpu_has_ssse3 (void);
2366
+ GGML_API int ggml_cpu_has_sycl (void);
2367
+ GGML_API int ggml_cpu_has_vsx (void);
2368
+ GGML_API int ggml_cpu_has_matmul_int8(void);
2369
+
2370
+ //
2371
+ // Internal types and functions exposed for tests and benchmarks
2372
+ //
2373
+
2374
+ #ifdef __cplusplus
2375
+ // restrict not standard in C++
2376
+ #define GGML_RESTRICT
2377
+ #else
2378
+ #define GGML_RESTRICT restrict
2379
+ #endif
2380
+ typedef void (*ggml_to_float_t) (const void * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
2381
+ typedef void (*ggml_from_float_t)(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
2382
+ typedef void (*ggml_vec_dot_t) (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x, size_t bx,
2383
+ const void * GGML_RESTRICT y, size_t by, int nrc);
2384
+
2385
+ typedef struct {
2386
+ const char * type_name;
2387
+ int blck_size;
2388
+ size_t type_size;
2389
+ bool is_quantized;
2390
+ ggml_to_float_t to_float;
2391
+ ggml_from_float_t from_float;
2392
+ ggml_from_float_t from_float_reference;
2393
+ ggml_vec_dot_t vec_dot;
2394
+ enum ggml_type vec_dot_type;
2395
+ int64_t nrows; // number of rows to process simultaneously;
2396
+ } ggml_type_traits_t;
2397
+
2398
+ GGML_API ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type type);
745
2399
 
746
2400
  #ifdef __cplusplus
747
2401
  }