whispercpp 1.3.0 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +5 -0
- data/LICENSE +1 -1
- data/README.md +165 -434
- data/Rakefile +60 -11
- data/ext/.gitignore +13 -0
- data/ext/cpu.mk +9 -0
- data/ext/{dr_wav.h → examples/dr_wav.h} +3560 -1179
- data/ext/extconf.rb +185 -16
- data/ext/ggml/include/ggml-alloc.h +76 -0
- data/ext/ggml/include/ggml-backend.h +352 -0
- data/ext/ggml/include/ggml-blas.h +25 -0
- data/ext/ggml/include/ggml-cann.h +123 -0
- data/ext/ggml/include/ggml-cpp.h +38 -0
- data/ext/ggml/include/ggml-cpu.h +135 -0
- data/ext/ggml/include/ggml-cuda.h +47 -0
- data/ext/ggml/include/ggml-kompute.h +50 -0
- data/ext/ggml/include/ggml-metal.h +66 -0
- data/ext/ggml/include/ggml-opencl.h +26 -0
- data/ext/ggml/include/ggml-opt.h +216 -0
- data/ext/ggml/include/ggml-rpc.h +28 -0
- data/ext/ggml/include/ggml-sycl.h +49 -0
- data/ext/ggml/include/ggml-vulkan.h +31 -0
- data/ext/{ggml.h → ggml/include/ggml.h} +479 -596
- data/ext/ggml/src/ggml-alloc.c +1037 -0
- data/ext/ggml/src/ggml-amx/common.h +94 -0
- data/ext/ggml/src/ggml-amx/ggml-amx.cpp +446 -0
- data/ext/ggml/src/ggml-amx/mmq.cpp +2510 -0
- data/ext/ggml/src/ggml-amx/mmq.h +17 -0
- data/ext/ggml/src/ggml-backend-impl.h +256 -0
- data/ext/ggml/src/ggml-backend-reg.cpp +552 -0
- data/ext/ggml/src/ggml-backend.cpp +1999 -0
- data/ext/ggml/src/ggml-blas/ggml-blas.cpp +517 -0
- data/ext/ggml/src/ggml-cann/acl_tensor.cpp +175 -0
- data/ext/ggml/src/ggml-cann/acl_tensor.h +258 -0
- data/ext/ggml/src/ggml-cann/aclnn_ops.cpp +3427 -0
- data/ext/ggml/src/ggml-cann/aclnn_ops.h +592 -0
- data/ext/ggml/src/ggml-cann/common.h +286 -0
- data/ext/ggml/src/ggml-cann/ggml-cann.cpp +2188 -0
- data/ext/ggml/src/ggml-cann/kernels/ascendc_kernels.h +19 -0
- data/ext/ggml/src/ggml-cann/kernels/dup.cpp +236 -0
- data/ext/ggml/src/ggml-cann/kernels/get_row_f16.cpp +197 -0
- data/ext/ggml/src/ggml-cann/kernels/get_row_f32.cpp +190 -0
- data/ext/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +204 -0
- data/ext/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +191 -0
- data/ext/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +218 -0
- data/ext/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +216 -0
- data/ext/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +295 -0
- data/ext/ggml/src/ggml-common.h +1853 -0
- data/ext/ggml/src/ggml-cpu/amx/amx.cpp +220 -0
- data/ext/ggml/src/ggml-cpu/amx/amx.h +8 -0
- data/ext/ggml/src/ggml-cpu/amx/common.h +91 -0
- data/ext/ggml/src/ggml-cpu/amx/mmq.cpp +2511 -0
- data/ext/ggml/src/ggml-cpu/amx/mmq.h +10 -0
- data/ext/ggml/src/ggml-cpu/cpu-feats-x86.cpp +323 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +4262 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +8 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp +55 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-hbm.h +8 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-impl.h +386 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-quants.c +10835 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-quants.h +63 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-traits.cpp +36 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu-traits.h +38 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu.c +14123 -0
- data/ext/ggml/src/ggml-cpu/ggml-cpu.cpp +622 -0
- data/ext/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1884 -0
- data/ext/ggml/src/ggml-cpu/llamafile/sgemm.h +14 -0
- data/ext/ggml/src/ggml-cuda/vendors/cuda.h +14 -0
- data/ext/ggml/src/ggml-cuda/vendors/hip.h +186 -0
- data/ext/ggml/src/ggml-cuda/vendors/musa.h +134 -0
- data/ext/ggml/src/ggml-impl.h +556 -0
- data/ext/ggml/src/ggml-kompute/ggml-kompute.cpp +2251 -0
- data/ext/ggml/src/ggml-metal/ggml-metal-impl.h +288 -0
- data/ext/ggml/src/ggml-metal/ggml-metal.m +4884 -0
- data/ext/ggml/src/ggml-metal/ggml-metal.metal +6732 -0
- data/ext/ggml/src/ggml-opt.cpp +854 -0
- data/ext/ggml/src/ggml-quants.c +5238 -0
- data/ext/ggml/src/ggml-quants.h +100 -0
- data/ext/ggml/src/ggml-rpc/ggml-rpc.cpp +1406 -0
- data/ext/ggml/src/ggml-sycl/common.cpp +95 -0
- data/ext/ggml/src/ggml-sycl/concat.cpp +196 -0
- data/ext/ggml/src/ggml-sycl/conv.cpp +99 -0
- data/ext/ggml/src/ggml-sycl/convert.cpp +547 -0
- data/ext/ggml/src/ggml-sycl/dmmv.cpp +1023 -0
- data/ext/ggml/src/ggml-sycl/element_wise.cpp +1030 -0
- data/ext/ggml/src/ggml-sycl/ggml-sycl.cpp +4729 -0
- data/ext/ggml/src/ggml-sycl/im2col.cpp +126 -0
- data/ext/ggml/src/ggml-sycl/mmq.cpp +3031 -0
- data/ext/ggml/src/ggml-sycl/mmvq.cpp +1015 -0
- data/ext/ggml/src/ggml-sycl/norm.cpp +378 -0
- data/ext/ggml/src/ggml-sycl/outprod.cpp +56 -0
- data/ext/ggml/src/ggml-sycl/rope.cpp +276 -0
- data/ext/ggml/src/ggml-sycl/softmax.cpp +251 -0
- data/ext/ggml/src/ggml-sycl/tsembd.cpp +72 -0
- data/ext/ggml/src/ggml-sycl/wkv6.cpp +141 -0
- data/ext/ggml/src/ggml-threading.cpp +12 -0
- data/ext/ggml/src/ggml-threading.h +14 -0
- data/ext/ggml/src/ggml-vulkan/ggml-vulkan.cpp +8657 -0
- data/ext/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +593 -0
- data/ext/ggml/src/ggml.c +7694 -0
- data/ext/{whisper.h → include/whisper.h} +23 -22
- data/ext/metal-embed.mk +17 -0
- data/ext/metal.mk +6 -0
- data/ext/ruby_whisper.cpp +1492 -9
- data/ext/ruby_whisper.h +10 -0
- data/ext/scripts/get-flags.mk +38 -0
- data/ext/src/coreml/whisper-decoder-impl.h +146 -0
- data/ext/src/coreml/whisper-decoder-impl.m +201 -0
- data/ext/src/coreml/whisper-encoder-impl.h +142 -0
- data/ext/src/coreml/whisper-encoder-impl.m +197 -0
- data/ext/src/coreml/whisper-encoder.h +26 -0
- data/ext/src/openvino/whisper-openvino-encoder.cpp +108 -0
- data/ext/src/openvino/whisper-openvino-encoder.h +31 -0
- data/ext/{whisper.cpp → src/whisper.cpp} +661 -492
- data/extsources.rb +6 -0
- data/lib/whisper/model/uri.rb +157 -0
- data/lib/whisper.rb +2 -0
- data/tests/helper.rb +7 -0
- data/tests/jfk_reader/.gitignore +5 -0
- data/tests/jfk_reader/extconf.rb +3 -0
- data/tests/jfk_reader/jfk_reader.c +68 -0
- data/tests/test_callback.rb +160 -0
- data/tests/test_error.rb +20 -0
- data/tests/test_model.rb +71 -0
- data/tests/test_package.rb +31 -0
- data/tests/test_params.rb +160 -0
- data/tests/test_segment.rb +83 -0
- data/tests/test_whisper.rb +211 -123
- data/whispercpp.gemspec +36 -0
- metadata +137 -11
- data/ext/ggml.c +0 -21755
@@ -176,25 +176,15 @@
|
|
176
176
|
#ifdef GGML_SHARED
|
177
177
|
# if defined(_WIN32) && !defined(__MINGW32__)
|
178
178
|
# ifdef GGML_BUILD
|
179
|
-
# define GGML_API __declspec(dllexport)
|
179
|
+
# define GGML_API __declspec(dllexport) extern
|
180
180
|
# else
|
181
|
-
# define GGML_API __declspec(dllimport)
|
181
|
+
# define GGML_API __declspec(dllimport) extern
|
182
182
|
# endif
|
183
183
|
# else
|
184
|
-
# define GGML_API __attribute__ ((visibility ("default")))
|
184
|
+
# define GGML_API __attribute__ ((visibility ("default"))) extern
|
185
185
|
# endif
|
186
186
|
#else
|
187
|
-
# define GGML_API
|
188
|
-
#endif
|
189
|
-
|
190
|
-
#ifdef GGML_MULTIPLATFORM
|
191
|
-
# if defined(_WIN32)
|
192
|
-
# define GGML_CALL
|
193
|
-
# else
|
194
|
-
# define GGML_CALL __attribute__((__ms_abi__))
|
195
|
-
# endif
|
196
|
-
#else
|
197
|
-
# define GGML_CALL
|
187
|
+
# define GGML_API extern
|
198
188
|
#endif
|
199
189
|
|
200
190
|
// TODO: support for clang
|
@@ -220,21 +210,24 @@
|
|
220
210
|
#include <stdio.h>
|
221
211
|
|
222
212
|
#define GGML_FILE_MAGIC 0x67676d6c // "ggml"
|
223
|
-
#define GGML_FILE_VERSION
|
213
|
+
#define GGML_FILE_VERSION 2
|
224
214
|
|
225
215
|
#define GGML_QNT_VERSION 2 // bump this on quantization format changes
|
226
216
|
#define GGML_QNT_VERSION_FACTOR 1000 // do not change this
|
227
217
|
|
228
218
|
#define GGML_MAX_DIMS 4
|
229
219
|
#define GGML_MAX_PARAMS 2048
|
230
|
-
#define GGML_MAX_CONTEXTS 64
|
231
220
|
#define GGML_MAX_SRC 10
|
221
|
+
#define GGML_MAX_N_THREADS 512
|
222
|
+
#define GGML_MAX_OP_PARAMS 64
|
223
|
+
|
232
224
|
#ifndef GGML_MAX_NAME
|
233
|
-
#define GGML_MAX_NAME
|
225
|
+
# define GGML_MAX_NAME 64
|
234
226
|
#endif
|
235
|
-
|
227
|
+
|
236
228
|
#define GGML_DEFAULT_N_THREADS 4
|
237
229
|
#define GGML_DEFAULT_GRAPH_SIZE 2048
|
230
|
+
|
238
231
|
#if UINTPTR_MAX == 0xFFFFFFFF
|
239
232
|
#define GGML_MEM_ALIGN 4
|
240
233
|
#else
|
@@ -244,6 +237,10 @@
|
|
244
237
|
#define GGML_EXIT_SUCCESS 0
|
245
238
|
#define GGML_EXIT_ABORTED 1
|
246
239
|
|
240
|
+
#define GGML_ROPE_TYPE_NEOX 2
|
241
|
+
#define GGML_ROPE_TYPE_MROPE 8
|
242
|
+
#define GGML_ROPE_TYPE_VISION 24
|
243
|
+
|
247
244
|
#define GGUF_MAGIC "GGUF"
|
248
245
|
|
249
246
|
#define GGUF_VERSION 3
|
@@ -254,26 +251,27 @@
|
|
254
251
|
|
255
252
|
#define GGML_PAD(x, n) (((x) + (n) - 1) & ~((n) - 1))
|
256
253
|
|
257
|
-
#define GGML_ASSERT(x) \
|
258
|
-
do { \
|
259
|
-
if (!(x)) { \
|
260
|
-
fflush(stdout); \
|
261
|
-
fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", __FILE__, __LINE__, #x); \
|
262
|
-
ggml_print_backtrace(); \
|
263
|
-
abort(); \
|
264
|
-
} \
|
265
|
-
} while (0)
|
266
|
-
|
267
254
|
#ifndef NDEBUG
|
268
|
-
#define GGML_UNREACHABLE()
|
255
|
+
# define GGML_UNREACHABLE() do { fprintf(stderr, "statement should be unreachable\n"); abort(); } while(0)
|
269
256
|
#elif defined(__GNUC__)
|
270
|
-
#define GGML_UNREACHABLE() __builtin_unreachable()
|
257
|
+
# define GGML_UNREACHABLE() __builtin_unreachable()
|
271
258
|
#elif defined(_MSC_VER)
|
272
|
-
#define GGML_UNREACHABLE() __assume(0)
|
259
|
+
# define GGML_UNREACHABLE() __assume(0)
|
273
260
|
#else
|
274
|
-
#define GGML_UNREACHABLE() ((void) 0)
|
261
|
+
# define GGML_UNREACHABLE() ((void) 0)
|
275
262
|
#endif
|
276
263
|
|
264
|
+
#ifdef __cplusplus
|
265
|
+
# define GGML_NORETURN [[noreturn]]
|
266
|
+
#elif defined(_MSC_VER)
|
267
|
+
# define GGML_NORETURN __declspec(noreturn)
|
268
|
+
#else
|
269
|
+
# define GGML_NORETURN _Noreturn
|
270
|
+
#endif
|
271
|
+
|
272
|
+
#define GGML_ABORT(...) ggml_abort(__FILE__, __LINE__, __VA_ARGS__)
|
273
|
+
#define GGML_ASSERT(x) if (!(x)) GGML_ABORT("GGML_ASSERT(%s) failed", #x)
|
274
|
+
|
277
275
|
// used to copy the number of elements and stride in bytes of tensors into local variables.
|
278
276
|
// main purpose is to reduce code duplication and improve readability.
|
279
277
|
//
|
@@ -312,10 +310,19 @@
|
|
312
310
|
GGML_TENSOR_LOCALS(int64_t, ne, dst, ne) \
|
313
311
|
GGML_TENSOR_LOCALS(size_t, nb, dst, nb)
|
314
312
|
|
313
|
+
#define GGML_TENSOR_BINARY_OP_LOCALS01 \
|
314
|
+
GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne) \
|
315
|
+
GGML_TENSOR_LOCALS(size_t, nb0, src0, nb) \
|
316
|
+
GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne) \
|
317
|
+
GGML_TENSOR_LOCALS(size_t, nb1, src1, nb)
|
318
|
+
|
315
319
|
#ifdef __cplusplus
|
316
320
|
extern "C" {
|
317
321
|
#endif
|
318
322
|
|
323
|
+
GGML_NORETURN GGML_ATTRIBUTE_FORMAT(3, 4)
|
324
|
+
GGML_API void ggml_abort(const char * file, int line, const char * fmt, ...);
|
325
|
+
|
319
326
|
enum ggml_status {
|
320
327
|
GGML_STATUS_ALLOC_FAILED = -2,
|
321
328
|
GGML_STATUS_FAILED = -1,
|
@@ -324,19 +331,27 @@ extern "C" {
|
|
324
331
|
};
|
325
332
|
|
326
333
|
// get ggml_status name string
|
327
|
-
GGML_API
|
334
|
+
GGML_API const char * ggml_status_to_string(enum ggml_status status);
|
328
335
|
|
336
|
+
// ieee 754-2008 half-precision float16
|
337
|
+
// todo: make this not an integral type
|
329
338
|
typedef uint16_t ggml_fp16_t;
|
330
|
-
|
331
|
-
|
332
|
-
GGML_API
|
333
|
-
GGML_API
|
334
|
-
|
335
|
-
|
336
|
-
|
339
|
+
GGML_API float ggml_fp16_to_fp32(ggml_fp16_t);
|
340
|
+
GGML_API ggml_fp16_t ggml_fp32_to_fp16(float);
|
341
|
+
GGML_API void ggml_fp16_to_fp32_row(const ggml_fp16_t *, float *, int64_t);
|
342
|
+
GGML_API void ggml_fp32_to_fp16_row(const float *, ggml_fp16_t *, int64_t);
|
343
|
+
|
344
|
+
// google brain half-precision bfloat16
|
345
|
+
typedef struct { uint16_t bits; } ggml_bf16_t;
|
346
|
+
GGML_API ggml_bf16_t ggml_fp32_to_bf16(float);
|
347
|
+
GGML_API float ggml_bf16_to_fp32(ggml_bf16_t); // consider just doing << 16
|
348
|
+
GGML_API void ggml_bf16_to_fp32_row(const ggml_bf16_t *, float *, int64_t);
|
349
|
+
GGML_API void ggml_fp32_to_bf16_row_ref(const float *, ggml_bf16_t *, int64_t);
|
350
|
+
GGML_API void ggml_fp32_to_bf16_row(const float *, ggml_bf16_t *, int64_t);
|
337
351
|
|
338
352
|
struct ggml_object;
|
339
353
|
struct ggml_context;
|
354
|
+
struct ggml_cgraph;
|
340
355
|
|
341
356
|
// NOTE: always add types at the end of the enum to keep backward compatibility
|
342
357
|
enum ggml_type {
|
@@ -370,7 +385,16 @@ extern "C" {
|
|
370
385
|
GGML_TYPE_I64 = 27,
|
371
386
|
GGML_TYPE_F64 = 28,
|
372
387
|
GGML_TYPE_IQ1_M = 29,
|
373
|
-
|
388
|
+
GGML_TYPE_BF16 = 30,
|
389
|
+
// GGML_TYPE_Q4_0_4_4 = 31, support has been removed from gguf files
|
390
|
+
// GGML_TYPE_Q4_0_4_8 = 32,
|
391
|
+
// GGML_TYPE_Q4_0_8_8 = 33,
|
392
|
+
GGML_TYPE_TQ1_0 = 34,
|
393
|
+
GGML_TYPE_TQ2_0 = 35,
|
394
|
+
// GGML_TYPE_IQ4_NL_4_4 = 36,
|
395
|
+
// GGML_TYPE_IQ4_NL_4_8 = 37,
|
396
|
+
// GGML_TYPE_IQ4_NL_8_8 = 38,
|
397
|
+
GGML_TYPE_COUNT = 39,
|
374
398
|
};
|
375
399
|
|
376
400
|
// precision
|
@@ -410,6 +434,7 @@ extern "C" {
|
|
410
434
|
GGML_FTYPE_MOSTLY_IQ2_S = 21, // except 1d tensors
|
411
435
|
GGML_FTYPE_MOSTLY_IQ4_XS = 22, // except 1d tensors
|
412
436
|
GGML_FTYPE_MOSTLY_IQ1_M = 23, // except 1d tensors
|
437
|
+
GGML_FTYPE_MOSTLY_BF16 = 24, // except 1d tensors
|
413
438
|
};
|
414
439
|
|
415
440
|
// available tensor operations:
|
@@ -426,10 +451,13 @@ extern "C" {
|
|
426
451
|
GGML_OP_SQR,
|
427
452
|
GGML_OP_SQRT,
|
428
453
|
GGML_OP_LOG,
|
454
|
+
GGML_OP_SIN,
|
455
|
+
GGML_OP_COS,
|
429
456
|
GGML_OP_SUM,
|
430
457
|
GGML_OP_SUM_ROWS,
|
431
458
|
GGML_OP_MEAN,
|
432
459
|
GGML_OP_ARGMAX,
|
460
|
+
GGML_OP_COUNT_EQUAL,
|
433
461
|
GGML_OP_REPEAT,
|
434
462
|
GGML_OP_REPEAT_BACK,
|
435
463
|
GGML_OP_CONCAT,
|
@@ -460,22 +488,23 @@ extern "C" {
|
|
460
488
|
GGML_OP_SOFT_MAX_BACK,
|
461
489
|
GGML_OP_ROPE,
|
462
490
|
GGML_OP_ROPE_BACK,
|
463
|
-
GGML_OP_ALIBI,
|
464
491
|
GGML_OP_CLAMP,
|
465
492
|
GGML_OP_CONV_TRANSPOSE_1D,
|
466
493
|
GGML_OP_IM2COL,
|
494
|
+
GGML_OP_IM2COL_BACK,
|
467
495
|
GGML_OP_CONV_TRANSPOSE_2D,
|
468
496
|
GGML_OP_POOL_1D,
|
469
497
|
GGML_OP_POOL_2D,
|
498
|
+
GGML_OP_POOL_2D_BACK,
|
470
499
|
GGML_OP_UPSCALE, // nearest interpolate
|
471
500
|
GGML_OP_PAD,
|
501
|
+
GGML_OP_PAD_REFLECT_1D,
|
472
502
|
GGML_OP_ARANGE,
|
473
503
|
GGML_OP_TIMESTEP_EMBEDDING,
|
474
504
|
GGML_OP_ARGSORT,
|
475
505
|
GGML_OP_LEAKY_RELU,
|
476
506
|
|
477
|
-
|
478
|
-
GGML_OP_FLASH_FF,
|
507
|
+
GGML_OP_FLASH_ATTN_EXT,
|
479
508
|
GGML_OP_FLASH_ATTN_BACK,
|
480
509
|
GGML_OP_SSM_CONV,
|
481
510
|
GGML_OP_SSM_SCAN,
|
@@ -483,6 +512,7 @@ extern "C" {
|
|
483
512
|
GGML_OP_WIN_UNPART,
|
484
513
|
GGML_OP_GET_REL_POS,
|
485
514
|
GGML_OP_ADD_REL_POS,
|
515
|
+
GGML_OP_RWKV_WKV6,
|
486
516
|
|
487
517
|
GGML_OP_UNARY,
|
488
518
|
|
@@ -499,6 +529,7 @@ extern "C" {
|
|
499
529
|
|
500
530
|
GGML_OP_CROSS_ENTROPY_LOSS,
|
501
531
|
GGML_OP_CROSS_ENTROPY_LOSS_BACK,
|
532
|
+
GGML_OP_OPT_STEP_ADAMW,
|
502
533
|
|
503
534
|
GGML_OP_COUNT,
|
504
535
|
};
|
@@ -511,11 +542,13 @@ extern "C" {
|
|
511
542
|
GGML_UNARY_OP_TANH,
|
512
543
|
GGML_UNARY_OP_ELU,
|
513
544
|
GGML_UNARY_OP_RELU,
|
545
|
+
GGML_UNARY_OP_SIGMOID,
|
514
546
|
GGML_UNARY_OP_GELU,
|
515
547
|
GGML_UNARY_OP_GELU_QUICK,
|
516
548
|
GGML_UNARY_OP_SILU,
|
517
549
|
GGML_UNARY_OP_HARDSWISH,
|
518
550
|
GGML_UNARY_OP_HARDSIGMOID,
|
551
|
+
GGML_UNARY_OP_EXP,
|
519
552
|
|
520
553
|
GGML_UNARY_OP_COUNT,
|
521
554
|
};
|
@@ -527,36 +560,34 @@ extern "C" {
|
|
527
560
|
};
|
528
561
|
|
529
562
|
enum ggml_log_level {
|
530
|
-
|
563
|
+
GGML_LOG_LEVEL_NONE = 0,
|
564
|
+
GGML_LOG_LEVEL_DEBUG = 1,
|
565
|
+
GGML_LOG_LEVEL_INFO = 2,
|
531
566
|
GGML_LOG_LEVEL_WARN = 3,
|
532
|
-
|
533
|
-
|
567
|
+
GGML_LOG_LEVEL_ERROR = 4,
|
568
|
+
GGML_LOG_LEVEL_CONT = 5, // continue previous log
|
534
569
|
};
|
535
570
|
|
571
|
+
// this tensor...
|
536
572
|
enum ggml_tensor_flag {
|
537
|
-
GGML_TENSOR_FLAG_INPUT =
|
538
|
-
GGML_TENSOR_FLAG_OUTPUT =
|
539
|
-
GGML_TENSOR_FLAG_PARAM =
|
573
|
+
GGML_TENSOR_FLAG_INPUT = 1, // ...is an input for the GGML compute graph
|
574
|
+
GGML_TENSOR_FLAG_OUTPUT = 2, // ...is an output for the GGML compute graph
|
575
|
+
GGML_TENSOR_FLAG_PARAM = 4, // ...contains trainable parameters
|
576
|
+
GGML_TENSOR_FLAG_LOSS = 8, // ...defines loss for numerical optimization (multiple loss tensors add up)
|
540
577
|
};
|
541
578
|
|
542
|
-
|
543
|
-
|
544
|
-
size_t
|
545
|
-
|
546
|
-
|
547
|
-
struct ggml_object * next;
|
548
|
-
|
549
|
-
enum ggml_object_type type;
|
550
|
-
|
551
|
-
char padding[4];
|
579
|
+
struct ggml_init_params {
|
580
|
+
// memory pool
|
581
|
+
size_t mem_size; // bytes
|
582
|
+
void * mem_buffer; // if NULL, memory will be allocated internally
|
583
|
+
bool no_alloc; // don't allocate memory for the tensor data
|
552
584
|
};
|
553
585
|
|
554
|
-
static const size_t GGML_OBJECT_SIZE = sizeof(struct ggml_object);
|
555
|
-
|
556
586
|
// n-dimensional tensor
|
557
587
|
struct ggml_tensor {
|
558
|
-
enum ggml_type
|
559
|
-
|
588
|
+
enum ggml_type type;
|
589
|
+
|
590
|
+
GGML_DEPRECATED(enum ggml_backend_type backend, "use the buffer type to find the storage location of the tensor");
|
560
591
|
|
561
592
|
struct ggml_backend_buffer * buffer;
|
562
593
|
|
@@ -574,14 +605,9 @@ extern "C" {
|
|
574
605
|
|
575
606
|
int32_t flags;
|
576
607
|
|
577
|
-
struct ggml_tensor * grad;
|
578
608
|
struct ggml_tensor * src[GGML_MAX_SRC];
|
579
609
|
|
580
|
-
//
|
581
|
-
int perf_runs;
|
582
|
-
int64_t perf_cycles;
|
583
|
-
int64_t perf_time_us;
|
584
|
-
|
610
|
+
// source tensor and offset for views
|
585
611
|
struct ggml_tensor * view_src;
|
586
612
|
size_t view_offs;
|
587
613
|
|
@@ -601,95 +627,6 @@ extern "C" {
|
|
601
627
|
// If it returns true, the computation is aborted
|
602
628
|
typedef bool (*ggml_abort_callback)(void * data);
|
603
629
|
|
604
|
-
// the compute plan that needs to be prepared for ggml_graph_compute()
|
605
|
-
// since https://github.com/ggerganov/ggml/issues/287
|
606
|
-
struct ggml_cplan {
|
607
|
-
size_t work_size; // size of work buffer, calculated by `ggml_graph_plan()`
|
608
|
-
uint8_t * work_data; // work buffer, to be allocated by caller before calling to `ggml_graph_compute()`
|
609
|
-
|
610
|
-
int n_threads;
|
611
|
-
|
612
|
-
// abort ggml_graph_compute when true
|
613
|
-
ggml_abort_callback abort_callback;
|
614
|
-
void * abort_callback_data;
|
615
|
-
};
|
616
|
-
|
617
|
-
enum ggml_cgraph_eval_order {
|
618
|
-
GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT = 0,
|
619
|
-
GGML_CGRAPH_EVAL_ORDER_RIGHT_TO_LEFT,
|
620
|
-
GGML_CGRAPH_EVAL_ORDER_COUNT
|
621
|
-
};
|
622
|
-
|
623
|
-
struct ggml_hash_set {
|
624
|
-
size_t size;
|
625
|
-
struct ggml_tensor ** keys;
|
626
|
-
};
|
627
|
-
|
628
|
-
// computation graph
|
629
|
-
struct ggml_cgraph {
|
630
|
-
int size;
|
631
|
-
int n_nodes;
|
632
|
-
int n_leafs;
|
633
|
-
|
634
|
-
struct ggml_tensor ** nodes;
|
635
|
-
struct ggml_tensor ** grads;
|
636
|
-
struct ggml_tensor ** leafs;
|
637
|
-
|
638
|
-
struct ggml_hash_set visited_hash_table;
|
639
|
-
|
640
|
-
enum ggml_cgraph_eval_order order;
|
641
|
-
|
642
|
-
// performance
|
643
|
-
int perf_runs;
|
644
|
-
int64_t perf_cycles;
|
645
|
-
int64_t perf_time_us;
|
646
|
-
};
|
647
|
-
|
648
|
-
// scratch buffer
|
649
|
-
struct ggml_scratch {
|
650
|
-
size_t offs;
|
651
|
-
size_t size;
|
652
|
-
void * data;
|
653
|
-
};
|
654
|
-
|
655
|
-
struct ggml_init_params {
|
656
|
-
// memory pool
|
657
|
-
size_t mem_size; // bytes
|
658
|
-
void * mem_buffer; // if NULL, memory will be allocated internally
|
659
|
-
bool no_alloc; // don't allocate memory for the tensor data
|
660
|
-
};
|
661
|
-
|
662
|
-
|
663
|
-
// compute types
|
664
|
-
|
665
|
-
// NOTE: the INIT or FINALIZE pass is not scheduled unless explicitly enabled.
|
666
|
-
// This behavior was changed since https://github.com/ggerganov/llama.cpp/pull/1995.
|
667
|
-
enum ggml_task_type {
|
668
|
-
GGML_TASK_TYPE_INIT = 0,
|
669
|
-
GGML_TASK_TYPE_COMPUTE,
|
670
|
-
GGML_TASK_TYPE_FINALIZE,
|
671
|
-
};
|
672
|
-
|
673
|
-
struct ggml_compute_params {
|
674
|
-
enum ggml_task_type type;
|
675
|
-
|
676
|
-
// ith = thread index, nth = number of threads
|
677
|
-
int ith, nth;
|
678
|
-
|
679
|
-
// work buffer for all threads
|
680
|
-
size_t wsize;
|
681
|
-
void * wdata;
|
682
|
-
};
|
683
|
-
|
684
|
-
// numa strategies
|
685
|
-
enum ggml_numa_strategy {
|
686
|
-
GGML_NUMA_STRATEGY_DISABLED = 0,
|
687
|
-
GGML_NUMA_STRATEGY_DISTRIBUTE = 1,
|
688
|
-
GGML_NUMA_STRATEGY_ISOLATE = 2,
|
689
|
-
GGML_NUMA_STRATEGY_NUMACTL = 3,
|
690
|
-
GGML_NUMA_STRATEGY_MIRROR = 4,
|
691
|
-
GGML_NUMA_STRATEGY_COUNT
|
692
|
-
};
|
693
630
|
|
694
631
|
//
|
695
632
|
// GUID
|
@@ -709,67 +646,71 @@ extern "C" {
|
|
709
646
|
GGML_API int64_t ggml_cycles(void);
|
710
647
|
GGML_API int64_t ggml_cycles_per_ms(void);
|
711
648
|
|
712
|
-
GGML_API void ggml_print_backtrace(void);
|
713
|
-
|
714
649
|
// accepts a UTF-8 path, even on Windows
|
715
650
|
GGML_API FILE * ggml_fopen(const char * fname, const char * mode);
|
716
651
|
|
717
|
-
GGML_API void ggml_numa_init(enum ggml_numa_strategy numa); // call once for better performance on NUMA systems
|
718
|
-
GGML_API bool ggml_is_numa(void); // true if init detected that system has >1 NUMA node
|
719
|
-
|
720
652
|
GGML_API void ggml_print_object (const struct ggml_object * obj);
|
721
653
|
GGML_API void ggml_print_objects(const struct ggml_context * ctx);
|
722
654
|
|
723
|
-
GGML_API
|
724
|
-
GGML_API
|
725
|
-
GGML_API
|
726
|
-
GGML_API
|
655
|
+
GGML_API int64_t ggml_nelements (const struct ggml_tensor * tensor);
|
656
|
+
GGML_API int64_t ggml_nrows (const struct ggml_tensor * tensor);
|
657
|
+
GGML_API size_t ggml_nbytes (const struct ggml_tensor * tensor);
|
658
|
+
GGML_API size_t ggml_nbytes_pad(const struct ggml_tensor * tensor); // same as ggml_nbytes() but padded to GGML_MEM_ALIGN
|
727
659
|
|
728
|
-
GGML_API
|
729
|
-
GGML_API
|
730
|
-
GGML_API
|
660
|
+
GGML_API int64_t ggml_blck_size(enum ggml_type type);
|
661
|
+
GGML_API size_t ggml_type_size(enum ggml_type type); // size in bytes for all elements in a block
|
662
|
+
GGML_API size_t ggml_row_size (enum ggml_type type, int64_t ne); // size in bytes for all elements in a row
|
731
663
|
|
732
664
|
GGML_DEPRECATED(
|
733
665
|
GGML_API double ggml_type_sizef(enum ggml_type type), // ggml_type_size()/ggml_blck_size() as float
|
734
666
|
"use ggml_row_size() instead");
|
735
667
|
|
736
|
-
GGML_API
|
737
|
-
GGML_API
|
738
|
-
GGML_API
|
668
|
+
GGML_API const char * ggml_type_name(enum ggml_type type);
|
669
|
+
GGML_API const char * ggml_op_name (enum ggml_op op);
|
670
|
+
GGML_API const char * ggml_op_symbol(enum ggml_op op);
|
739
671
|
|
740
|
-
GGML_API
|
741
|
-
GGML_API
|
672
|
+
GGML_API const char * ggml_unary_op_name(enum ggml_unary_op op);
|
673
|
+
GGML_API const char * ggml_op_desc(const struct ggml_tensor * t); // unary or op name
|
742
674
|
|
743
|
-
GGML_API
|
675
|
+
GGML_API size_t ggml_element_size(const struct ggml_tensor * tensor);
|
744
676
|
|
745
|
-
GGML_API
|
677
|
+
GGML_API bool ggml_is_quantized(enum ggml_type type);
|
746
678
|
|
747
679
|
// TODO: temporary until model loading of ggml examples is refactored
|
748
680
|
GGML_API enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype);
|
749
681
|
|
750
|
-
GGML_API
|
751
|
-
GGML_API
|
752
|
-
GGML_API
|
753
|
-
GGML_API
|
754
|
-
GGML_API
|
755
|
-
GGML_API
|
756
|
-
GGML_API
|
757
|
-
GGML_API
|
758
|
-
|
682
|
+
GGML_API bool ggml_is_transposed(const struct ggml_tensor * tensor);
|
683
|
+
GGML_API bool ggml_is_permuted (const struct ggml_tensor * tensor);
|
684
|
+
GGML_API bool ggml_is_empty (const struct ggml_tensor * tensor);
|
685
|
+
GGML_API bool ggml_is_scalar (const struct ggml_tensor * tensor);
|
686
|
+
GGML_API bool ggml_is_vector (const struct ggml_tensor * tensor);
|
687
|
+
GGML_API bool ggml_is_matrix (const struct ggml_tensor * tensor);
|
688
|
+
GGML_API bool ggml_is_3d (const struct ggml_tensor * tensor);
|
689
|
+
GGML_API int ggml_n_dims (const struct ggml_tensor * tensor); // returns 1 for scalars
|
690
|
+
|
691
|
+
GGML_API bool ggml_is_contiguous (const struct ggml_tensor * tensor);
|
692
|
+
GGML_API bool ggml_is_contiguous_0(const struct ggml_tensor * tensor); // same as ggml_is_contiguous()
|
693
|
+
GGML_API bool ggml_is_contiguous_1(const struct ggml_tensor * tensor); // contiguous for dims >= 1
|
694
|
+
GGML_API bool ggml_is_contiguous_2(const struct ggml_tensor * tensor); // contiguous for dims >= 2
|
759
695
|
|
760
|
-
GGML_API bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
|
696
|
+
GGML_API bool ggml_are_same_shape (const struct ggml_tensor * t0, const struct ggml_tensor * t1);
|
697
|
+
GGML_API bool ggml_are_same_stride(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
|
698
|
+
|
699
|
+
GGML_API bool ggml_can_repeat(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
|
761
700
|
|
762
701
|
// use this to compute the memory overhead of a tensor
|
763
702
|
GGML_API size_t ggml_tensor_overhead(void);
|
764
703
|
|
704
|
+
GGML_API bool ggml_validate_row_data(enum ggml_type type, const void * data, size_t nbytes);
|
705
|
+
|
765
706
|
// main
|
766
707
|
|
767
|
-
GGML_API struct ggml_context * ggml_init(struct ggml_init_params params);
|
768
|
-
GGML_API void
|
708
|
+
GGML_API struct ggml_context * ggml_init (struct ggml_init_params params);
|
709
|
+
GGML_API void ggml_reset(struct ggml_context * ctx);
|
710
|
+
GGML_API void ggml_free (struct ggml_context * ctx);
|
769
711
|
|
770
712
|
GGML_API size_t ggml_used_mem(const struct ggml_context * ctx);
|
771
713
|
|
772
|
-
GGML_API size_t ggml_set_scratch (struct ggml_context * ctx, struct ggml_scratch scratch);
|
773
714
|
GGML_API bool ggml_get_no_alloc(struct ggml_context * ctx);
|
774
715
|
GGML_API void ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc);
|
775
716
|
|
@@ -809,8 +750,7 @@ extern "C" {
|
|
809
750
|
int64_t ne2,
|
810
751
|
int64_t ne3);
|
811
752
|
|
812
|
-
GGML_API
|
813
|
-
GGML_API struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value);
|
753
|
+
GGML_API void * ggml_new_buffer(struct ggml_context * ctx, size_t nbytes);
|
814
754
|
|
815
755
|
GGML_API struct ggml_tensor * ggml_dup_tensor (struct ggml_context * ctx, const struct ggml_tensor * src);
|
816
756
|
GGML_API struct ggml_tensor * ggml_view_tensor(struct ggml_context * ctx, struct ggml_tensor * src);
|
@@ -820,35 +760,25 @@ extern "C" {
|
|
820
760
|
GGML_API struct ggml_tensor * ggml_get_next_tensor (const struct ggml_context * ctx, struct ggml_tensor * tensor);
|
821
761
|
GGML_API struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name);
|
822
762
|
|
823
|
-
GGML_API struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor);
|
824
|
-
GGML_API struct ggml_tensor * ggml_set_i32 (struct ggml_tensor * tensor, int32_t value);
|
825
|
-
GGML_API struct ggml_tensor * ggml_set_f32 (struct ggml_tensor * tensor, float value);
|
826
|
-
|
827
763
|
// Converts a flat index into coordinates
|
828
|
-
GGML_API void
|
764
|
+
GGML_API void ggml_unravel_index(const struct ggml_tensor * tensor, int64_t i, int64_t * i0, int64_t * i1, int64_t * i2, int64_t * i3);
|
829
765
|
|
830
|
-
GGML_API
|
831
|
-
GGML_API void ggml_set_i32_1d(const struct ggml_tensor * tensor, int i, int32_t value);
|
832
|
-
|
833
|
-
GGML_API int32_t ggml_get_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3);
|
834
|
-
GGML_API void ggml_set_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, int32_t value);
|
835
|
-
|
836
|
-
GGML_API float ggml_get_f32_1d(const struct ggml_tensor * tensor, int i);
|
837
|
-
GGML_API void ggml_set_f32_1d(const struct ggml_tensor * tensor, int i, float value);
|
838
|
-
|
839
|
-
GGML_API float ggml_get_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3);
|
840
|
-
GGML_API void ggml_set_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, float value);
|
766
|
+
GGML_API enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor);
|
841
767
|
|
842
768
|
GGML_API void * ggml_get_data (const struct ggml_tensor * tensor);
|
843
769
|
GGML_API float * ggml_get_data_f32(const struct ggml_tensor * tensor);
|
844
770
|
|
845
|
-
GGML_API GGML_CALL enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor);
|
846
|
-
|
847
771
|
GGML_API const char * ggml_get_name (const struct ggml_tensor * tensor);
|
848
772
|
GGML_API struct ggml_tensor * ggml_set_name ( struct ggml_tensor * tensor, const char * name);
|
849
773
|
GGML_ATTRIBUTE_FORMAT(2, 3)
|
850
774
|
GGML_API struct ggml_tensor * ggml_format_name( struct ggml_tensor * tensor, const char * fmt, ...);
|
851
775
|
|
776
|
+
// Tensor flags
|
777
|
+
GGML_API void ggml_set_input(struct ggml_tensor * tensor);
|
778
|
+
GGML_API void ggml_set_output(struct ggml_tensor * tensor);
|
779
|
+
GGML_API void ggml_set_param(struct ggml_context * ctx, struct ggml_tensor * tensor);
|
780
|
+
GGML_API void ggml_set_loss(struct ggml_tensor * tensor);
|
781
|
+
|
852
782
|
//
|
853
783
|
// operations on tensors with backpropagation
|
854
784
|
//
|
@@ -963,6 +893,22 @@ extern "C" {
|
|
963
893
|
struct ggml_context * ctx,
|
964
894
|
struct ggml_tensor * a);
|
965
895
|
|
896
|
+
GGML_API struct ggml_tensor * ggml_sin(
|
897
|
+
struct ggml_context * ctx,
|
898
|
+
struct ggml_tensor * a);
|
899
|
+
|
900
|
+
GGML_API struct ggml_tensor * ggml_sin_inplace(
|
901
|
+
struct ggml_context * ctx,
|
902
|
+
struct ggml_tensor * a);
|
903
|
+
|
904
|
+
GGML_API struct ggml_tensor * ggml_cos(
|
905
|
+
struct ggml_context * ctx,
|
906
|
+
struct ggml_tensor * a);
|
907
|
+
|
908
|
+
GGML_API struct ggml_tensor * ggml_cos_inplace(
|
909
|
+
struct ggml_context * ctx,
|
910
|
+
struct ggml_tensor * a);
|
911
|
+
|
966
912
|
// return scalar
|
967
913
|
GGML_API struct ggml_tensor * ggml_sum(
|
968
914
|
struct ggml_context * ctx,
|
@@ -983,6 +929,12 @@ extern "C" {
|
|
983
929
|
struct ggml_context * ctx,
|
984
930
|
struct ggml_tensor * a);
|
985
931
|
|
932
|
+
// count number of equal elements in a and b
|
933
|
+
GGML_API struct ggml_tensor * ggml_count_equal(
|
934
|
+
struct ggml_context * ctx,
|
935
|
+
struct ggml_tensor * a,
|
936
|
+
struct ggml_tensor * b);
|
937
|
+
|
986
938
|
// if a is the same shape as b, and a is not parameter, return a
|
987
939
|
// otherwise, return a new tensor: repeat(a) to fit in b
|
988
940
|
GGML_API struct ggml_tensor * ggml_repeat(
|
@@ -996,12 +948,13 @@ extern "C" {
|
|
996
948
|
struct ggml_tensor * a,
|
997
949
|
struct ggml_tensor * b);
|
998
950
|
|
999
|
-
// concat a and b
|
951
|
+
// concat a and b along dim
|
1000
952
|
// used in stable-diffusion
|
1001
953
|
GGML_API struct ggml_tensor * ggml_concat(
|
1002
954
|
struct ggml_context * ctx,
|
1003
955
|
struct ggml_tensor * a,
|
1004
|
-
struct ggml_tensor * b
|
956
|
+
struct ggml_tensor * b,
|
957
|
+
int dim);
|
1005
958
|
|
1006
959
|
GGML_API struct ggml_tensor * ggml_abs(
|
1007
960
|
struct ggml_context * ctx,
|
@@ -1063,6 +1016,14 @@ extern "C" {
|
|
1063
1016
|
struct ggml_context * ctx,
|
1064
1017
|
struct ggml_tensor * a);
|
1065
1018
|
|
1019
|
+
GGML_API struct ggml_tensor * ggml_sigmoid(
|
1020
|
+
struct ggml_context * ctx,
|
1021
|
+
struct ggml_tensor * a);
|
1022
|
+
|
1023
|
+
GGML_API struct ggml_tensor * ggml_sigmoid_inplace(
|
1024
|
+
struct ggml_context * ctx,
|
1025
|
+
struct ggml_tensor * a);
|
1026
|
+
|
1066
1027
|
GGML_API struct ggml_tensor * ggml_gelu(
|
1067
1028
|
struct ggml_context * ctx,
|
1068
1029
|
struct ggml_tensor * a);
|
@@ -1104,6 +1065,14 @@ extern "C" {
|
|
1104
1065
|
struct ggml_context * ctx,
|
1105
1066
|
struct ggml_tensor * a);
|
1106
1067
|
|
1068
|
+
GGML_API struct ggml_tensor * ggml_exp(
|
1069
|
+
struct ggml_context * ctx,
|
1070
|
+
struct ggml_tensor * a);
|
1071
|
+
|
1072
|
+
GGML_API struct ggml_tensor * ggml_exp_inplace(
|
1073
|
+
struct ggml_context * ctx,
|
1074
|
+
struct ggml_tensor * a);
|
1075
|
+
|
1107
1076
|
// normalize along rows
|
1108
1077
|
GGML_API struct ggml_tensor * ggml_norm(
|
1109
1078
|
struct ggml_context * ctx,
|
@@ -1127,16 +1096,17 @@ extern "C" {
|
|
1127
1096
|
|
1128
1097
|
// group normalize along ne0*ne1*n_groups
|
1129
1098
|
// used in stable-diffusion
|
1130
|
-
// TODO: eps is hardcoded to 1e-6 for now
|
1131
1099
|
GGML_API struct ggml_tensor * ggml_group_norm(
|
1132
1100
|
struct ggml_context * ctx,
|
1133
1101
|
struct ggml_tensor * a,
|
1134
|
-
int n_groups
|
1102
|
+
int n_groups,
|
1103
|
+
float eps);
|
1135
1104
|
|
1136
1105
|
GGML_API struct ggml_tensor * ggml_group_norm_inplace(
|
1137
1106
|
struct ggml_context * ctx,
|
1138
1107
|
struct ggml_tensor * a,
|
1139
|
-
int n_groups
|
1108
|
+
int n_groups,
|
1109
|
+
float eps);
|
1140
1110
|
|
1141
1111
|
// a - x
|
1142
1112
|
// b - dy
|
@@ -1161,13 +1131,11 @@ extern "C" {
|
|
1161
1131
|
enum ggml_prec prec);
|
1162
1132
|
|
1163
1133
|
// indirect matrix multiplication
|
1164
|
-
// ggml_mul_mat_id(ctx, as, ids, id, b) ~= ggml_mul_mat(as[ids[id]], b)
|
1165
1134
|
GGML_API struct ggml_tensor * ggml_mul_mat_id(
|
1166
1135
|
struct ggml_context * ctx,
|
1167
1136
|
struct ggml_tensor * as,
|
1168
|
-
struct ggml_tensor *
|
1169
|
-
|
1170
|
-
struct ggml_tensor * b);
|
1137
|
+
struct ggml_tensor * b,
|
1138
|
+
struct ggml_tensor * ids);
|
1171
1139
|
|
1172
1140
|
// A: m columns, n rows,
|
1173
1141
|
// B: p columns, n rows,
|
@@ -1200,7 +1168,7 @@ extern "C" {
|
|
1200
1168
|
size_t nb1,
|
1201
1169
|
size_t nb2,
|
1202
1170
|
size_t nb3,
|
1203
|
-
size_t offset);
|
1171
|
+
size_t offset); // in bytes
|
1204
1172
|
|
1205
1173
|
// b -> view(a,offset,nb1,nb2,3), return view(a)
|
1206
1174
|
GGML_API struct ggml_tensor * ggml_set_inplace(
|
@@ -1210,19 +1178,19 @@ extern "C" {
|
|
1210
1178
|
size_t nb1,
|
1211
1179
|
size_t nb2,
|
1212
1180
|
size_t nb3,
|
1213
|
-
size_t offset);
|
1181
|
+
size_t offset); // in bytes
|
1214
1182
|
|
1215
1183
|
GGML_API struct ggml_tensor * ggml_set_1d(
|
1216
1184
|
struct ggml_context * ctx,
|
1217
1185
|
struct ggml_tensor * a,
|
1218
1186
|
struct ggml_tensor * b,
|
1219
|
-
size_t offset);
|
1187
|
+
size_t offset); // in bytes
|
1220
1188
|
|
1221
1189
|
GGML_API struct ggml_tensor * ggml_set_1d_inplace(
|
1222
1190
|
struct ggml_context * ctx,
|
1223
1191
|
struct ggml_tensor * a,
|
1224
1192
|
struct ggml_tensor * b,
|
1225
|
-
size_t offset);
|
1193
|
+
size_t offset); // in bytes
|
1226
1194
|
|
1227
1195
|
// b -> view(a,offset,nb1,nb2,3), return modified a
|
1228
1196
|
GGML_API struct ggml_tensor * ggml_set_2d(
|
@@ -1230,7 +1198,7 @@ extern "C" {
|
|
1230
1198
|
struct ggml_tensor * a,
|
1231
1199
|
struct ggml_tensor * b,
|
1232
1200
|
size_t nb1,
|
1233
|
-
size_t offset);
|
1201
|
+
size_t offset); // in bytes
|
1234
1202
|
|
1235
1203
|
// b -> view(a,offset,nb1,nb2,3), return view(a)
|
1236
1204
|
GGML_API struct ggml_tensor * ggml_set_2d_inplace(
|
@@ -1238,7 +1206,7 @@ extern "C" {
|
|
1238
1206
|
struct ggml_tensor * a,
|
1239
1207
|
struct ggml_tensor * b,
|
1240
1208
|
size_t nb1,
|
1241
|
-
size_t offset);
|
1209
|
+
size_t offset); // in bytes
|
1242
1210
|
|
1243
1211
|
// a -> b, return view(b)
|
1244
1212
|
GGML_API struct ggml_tensor * ggml_cpy(
|
@@ -1373,14 +1341,14 @@ extern "C" {
|
|
1373
1341
|
// supports 3D: a->ne[2] == b->ne[1]
|
1374
1342
|
GGML_API struct ggml_tensor * ggml_get_rows(
|
1375
1343
|
struct ggml_context * ctx,
|
1376
|
-
struct ggml_tensor * a,
|
1377
|
-
struct ggml_tensor * b);
|
1344
|
+
struct ggml_tensor * a, // data
|
1345
|
+
struct ggml_tensor * b); // row indices
|
1378
1346
|
|
1379
1347
|
GGML_API struct ggml_tensor * ggml_get_rows_back(
|
1380
1348
|
struct ggml_context * ctx,
|
1381
|
-
struct ggml_tensor * a,
|
1382
|
-
struct ggml_tensor * b,
|
1383
|
-
struct ggml_tensor * c);
|
1349
|
+
struct ggml_tensor * a, // gradients of ggml_get_rows result
|
1350
|
+
struct ggml_tensor * b, // row indices
|
1351
|
+
struct ggml_tensor * c); // data for ggml_get_rows, only used for its shape
|
1384
1352
|
|
1385
1353
|
GGML_API struct ggml_tensor * ggml_diag(
|
1386
1354
|
struct ggml_context * ctx,
|
@@ -1419,15 +1387,13 @@ extern "C" {
|
|
1419
1387
|
struct ggml_context * ctx,
|
1420
1388
|
struct ggml_tensor * a);
|
1421
1389
|
|
1422
|
-
// fused soft_max(a*scale + mask
|
1390
|
+
// fused soft_max(a*scale + mask*(ALiBi slope))
|
1423
1391
|
// mask is optional
|
1424
|
-
// pos is required when max_bias > 0.0f
|
1425
1392
|
// max_bias = 0.0f for no ALiBi
|
1426
1393
|
GGML_API struct ggml_tensor * ggml_soft_max_ext(
|
1427
1394
|
struct ggml_context * ctx,
|
1428
1395
|
struct ggml_tensor * a,
|
1429
1396
|
struct ggml_tensor * mask,
|
1430
|
-
struct ggml_tensor * pos,
|
1431
1397
|
float scale,
|
1432
1398
|
float max_bias);
|
1433
1399
|
|
@@ -1443,9 +1409,8 @@ extern "C" {
|
|
1443
1409
|
struct ggml_tensor * b);
|
1444
1410
|
|
1445
1411
|
// rotary position embedding
|
1446
|
-
// if mode & 1
|
1447
|
-
// if mode &
|
1448
|
-
// if mode & 4 == 1, ChatGLM style
|
1412
|
+
// if (mode & 1) - skip n_past elements (NOT SUPPORTED)
|
1413
|
+
// if (mode & GGML_ROPE_TYPE_NEOX) - GPT-NeoX style
|
1449
1414
|
//
|
1450
1415
|
// b is an int32 vector with size a->ne[2], it contains the positions
|
1451
1416
|
GGML_API struct ggml_tensor * ggml_rope(
|
@@ -1453,8 +1418,7 @@ extern "C" {
|
|
1453
1418
|
struct ggml_tensor * a,
|
1454
1419
|
struct ggml_tensor * b,
|
1455
1420
|
int n_dims,
|
1456
|
-
int mode
|
1457
|
-
int n_ctx);
|
1421
|
+
int mode);
|
1458
1422
|
|
1459
1423
|
// in-place, returns view(a)
|
1460
1424
|
GGML_API struct ggml_tensor * ggml_rope_inplace(
|
@@ -1462,18 +1426,18 @@ extern "C" {
|
|
1462
1426
|
struct ggml_tensor * a,
|
1463
1427
|
struct ggml_tensor * b,
|
1464
1428
|
int n_dims,
|
1465
|
-
int mode
|
1466
|
-
int n_ctx);
|
1429
|
+
int mode);
|
1467
1430
|
|
1468
1431
|
// custom RoPE
|
1469
|
-
|
1432
|
+
// c is freq factors (e.g. phi3-128k), (optional)
|
1433
|
+
GGML_API struct ggml_tensor * ggml_rope_ext(
|
1470
1434
|
struct ggml_context * ctx,
|
1471
1435
|
struct ggml_tensor * a,
|
1472
1436
|
struct ggml_tensor * b,
|
1437
|
+
struct ggml_tensor * c,
|
1473
1438
|
int n_dims,
|
1474
1439
|
int mode,
|
1475
|
-
int
|
1476
|
-
int n_orig_ctx,
|
1440
|
+
int n_ctx_orig,
|
1477
1441
|
float freq_base,
|
1478
1442
|
float freq_scale,
|
1479
1443
|
float ext_factor,
|
@@ -1481,15 +1445,15 @@ extern "C" {
|
|
1481
1445
|
float beta_fast,
|
1482
1446
|
float beta_slow);
|
1483
1447
|
|
1484
|
-
|
1485
|
-
GGML_API struct ggml_tensor * ggml_rope_custom_inplace(
|
1448
|
+
GGML_API struct ggml_tensor * ggml_rope_multi(
|
1486
1449
|
struct ggml_context * ctx,
|
1487
1450
|
struct ggml_tensor * a,
|
1488
1451
|
struct ggml_tensor * b,
|
1452
|
+
struct ggml_tensor * c,
|
1489
1453
|
int n_dims,
|
1454
|
+
int sections[4],
|
1490
1455
|
int mode,
|
1491
|
-
int
|
1492
|
-
int n_orig_ctx,
|
1456
|
+
int n_ctx_orig,
|
1493
1457
|
float freq_base,
|
1494
1458
|
float freq_scale,
|
1495
1459
|
float ext_factor,
|
@@ -1497,47 +1461,72 @@ extern "C" {
|
|
1497
1461
|
float beta_fast,
|
1498
1462
|
float beta_slow);
|
1499
1463
|
|
1500
|
-
//
|
1501
|
-
|
1502
|
-
int n_dims, int n_orig_ctx, float freq_base, float beta_fast, float beta_slow, float dims[2]);
|
1503
|
-
|
1504
|
-
// xPos RoPE, in-place, returns view(a)
|
1505
|
-
GGML_API struct ggml_tensor * ggml_rope_xpos_inplace(
|
1464
|
+
// in-place, returns view(a)
|
1465
|
+
GGML_API struct ggml_tensor * ggml_rope_ext_inplace(
|
1506
1466
|
struct ggml_context * ctx,
|
1507
1467
|
struct ggml_tensor * a,
|
1508
1468
|
struct ggml_tensor * b,
|
1469
|
+
struct ggml_tensor * c,
|
1509
1470
|
int n_dims,
|
1510
|
-
|
1511
|
-
|
1471
|
+
int mode,
|
1472
|
+
int n_ctx_orig,
|
1473
|
+
float freq_base,
|
1474
|
+
float freq_scale,
|
1475
|
+
float ext_factor,
|
1476
|
+
float attn_factor,
|
1477
|
+
float beta_fast,
|
1478
|
+
float beta_slow);
|
1512
1479
|
|
1513
|
-
|
1514
|
-
// a - dy
|
1515
|
-
GGML_API struct ggml_tensor * ggml_rope_back(
|
1480
|
+
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_rope_custom(
|
1516
1481
|
struct ggml_context * ctx,
|
1517
1482
|
struct ggml_tensor * a,
|
1518
1483
|
struct ggml_tensor * b,
|
1519
1484
|
int n_dims,
|
1520
1485
|
int mode,
|
1521
|
-
int
|
1522
|
-
int n_orig_ctx,
|
1486
|
+
int n_ctx_orig,
|
1523
1487
|
float freq_base,
|
1524
1488
|
float freq_scale,
|
1525
1489
|
float ext_factor,
|
1526
1490
|
float attn_factor,
|
1527
1491
|
float beta_fast,
|
1528
|
-
float beta_slow,
|
1529
|
-
|
1530
|
-
bool xpos_down);
|
1492
|
+
float beta_slow),
|
1493
|
+
"use ggml_rope_ext instead");
|
1531
1494
|
|
1532
|
-
|
1533
|
-
// in-place, returns view(a)
|
1534
|
-
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_alibi(
|
1495
|
+
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_rope_custom_inplace(
|
1535
1496
|
struct ggml_context * ctx,
|
1536
1497
|
struct ggml_tensor * a,
|
1537
|
-
|
1538
|
-
int
|
1539
|
-
|
1540
|
-
|
1498
|
+
struct ggml_tensor * b,
|
1499
|
+
int n_dims,
|
1500
|
+
int mode,
|
1501
|
+
int n_ctx_orig,
|
1502
|
+
float freq_base,
|
1503
|
+
float freq_scale,
|
1504
|
+
float ext_factor,
|
1505
|
+
float attn_factor,
|
1506
|
+
float beta_fast,
|
1507
|
+
float beta_slow),
|
1508
|
+
"use ggml_rope_ext_inplace instead");
|
1509
|
+
|
1510
|
+
// compute correction dims for YaRN RoPE scaling
|
1511
|
+
GGML_API void ggml_rope_yarn_corr_dims(
|
1512
|
+
int n_dims, int n_ctx_orig, float freq_base, float beta_fast, float beta_slow, float dims[2]);
|
1513
|
+
|
1514
|
+
// rotary position embedding backward, i.e compute dx from dy
|
1515
|
+
// a - dy
|
1516
|
+
GGML_API struct ggml_tensor * ggml_rope_back(
|
1517
|
+
struct ggml_context * ctx,
|
1518
|
+
struct ggml_tensor * a, // gradients of ggml_rope result
|
1519
|
+
struct ggml_tensor * b, // positions
|
1520
|
+
struct ggml_tensor * c, // freq factors
|
1521
|
+
int n_dims,
|
1522
|
+
int mode,
|
1523
|
+
int n_ctx_orig,
|
1524
|
+
float freq_base,
|
1525
|
+
float freq_scale,
|
1526
|
+
float ext_factor,
|
1527
|
+
float attn_factor,
|
1528
|
+
float beta_fast,
|
1529
|
+
float beta_slow);
|
1541
1530
|
|
1542
1531
|
// clamp
|
1543
1532
|
// in-place, returns view(a)
|
@@ -1547,34 +1536,49 @@ extern "C" {
|
|
1547
1536
|
float min,
|
1548
1537
|
float max);
|
1549
1538
|
|
1539
|
+
// im2col
|
1540
|
+
// converts data into a format that effectively results in a convolution when combined with matrix multiplication
|
1550
1541
|
GGML_API struct ggml_tensor * ggml_im2col(
|
1551
1542
|
struct ggml_context * ctx,
|
1552
|
-
struct ggml_tensor * a,
|
1553
|
-
struct ggml_tensor * b,
|
1554
|
-
int
|
1555
|
-
int
|
1556
|
-
int
|
1557
|
-
int
|
1558
|
-
int
|
1559
|
-
int
|
1560
|
-
bool
|
1561
|
-
enum ggml_type
|
1543
|
+
struct ggml_tensor * a, // convolution kernel
|
1544
|
+
struct ggml_tensor * b, // data
|
1545
|
+
int s0, // stride dimension 0
|
1546
|
+
int s1, // stride dimension 1
|
1547
|
+
int p0, // padding dimension 0
|
1548
|
+
int p1, // padding dimension 1
|
1549
|
+
int d0, // dilation dimension 0
|
1550
|
+
int d1, // dilation dimension 1
|
1551
|
+
bool is_2D,
|
1552
|
+
enum ggml_type dst_type);
|
1553
|
+
|
1554
|
+
GGML_API struct ggml_tensor * ggml_im2col_back(
|
1555
|
+
struct ggml_context * ctx,
|
1556
|
+
struct ggml_tensor * a, // convolution kernel
|
1557
|
+
struct ggml_tensor * b, // gradient of im2col output
|
1558
|
+
int64_t * ne, // shape of im2col input
|
1559
|
+
int s0, // stride dimension 0
|
1560
|
+
int s1, // stride dimension 1
|
1561
|
+
int p0, // padding dimension 0
|
1562
|
+
int p1, // padding dimension 1
|
1563
|
+
int d0, // dilation dimension 0
|
1564
|
+
int d1, // dilation dimension 1
|
1565
|
+
bool is_2D);
|
1562
1566
|
|
1563
1567
|
GGML_API struct ggml_tensor * ggml_conv_depthwise_2d(
|
1564
1568
|
struct ggml_context * ctx,
|
1565
|
-
struct ggml_tensor * a,
|
1566
|
-
struct ggml_tensor * b,
|
1567
|
-
int s0,
|
1568
|
-
int s1,
|
1569
|
-
int p0,
|
1570
|
-
int p1,
|
1571
|
-
int d0,
|
1572
|
-
int d1);
|
1569
|
+
struct ggml_tensor * a, // convolution kernel
|
1570
|
+
struct ggml_tensor * b, // data
|
1571
|
+
int s0, // stride dimension 0
|
1572
|
+
int s1, // stride dimension 1
|
1573
|
+
int p0, // padding dimension 0
|
1574
|
+
int p1, // padding dimension 1
|
1575
|
+
int d0, // dilation dimension 0
|
1576
|
+
int d1); // dilation dimension 1
|
1573
1577
|
|
1574
1578
|
GGML_API struct ggml_tensor * ggml_conv_1d(
|
1575
1579
|
struct ggml_context * ctx,
|
1576
|
-
struct ggml_tensor * a,
|
1577
|
-
struct ggml_tensor * b,
|
1580
|
+
struct ggml_tensor * a, // convolution kernel
|
1581
|
+
struct ggml_tensor * b, // data
|
1578
1582
|
int s0, // stride
|
1579
1583
|
int p0, // padding
|
1580
1584
|
int d0); // dilation
|
@@ -1583,29 +1587,29 @@ extern "C" {
|
|
1583
1587
|
// alias for ggml_conv_1d(a, b, s, a->ne[0]/2, d)
|
1584
1588
|
GGML_API struct ggml_tensor* ggml_conv_1d_ph(
|
1585
1589
|
struct ggml_context * ctx,
|
1586
|
-
struct ggml_tensor * a,
|
1587
|
-
struct ggml_tensor * b,
|
1588
|
-
int s,
|
1589
|
-
int d);
|
1590
|
+
struct ggml_tensor * a, // convolution kernel
|
1591
|
+
struct ggml_tensor * b, // data
|
1592
|
+
int s, // stride
|
1593
|
+
int d); // dilation
|
1590
1594
|
|
1591
1595
|
GGML_API struct ggml_tensor * ggml_conv_transpose_1d(
|
1592
1596
|
struct ggml_context * ctx,
|
1593
|
-
struct ggml_tensor * a,
|
1594
|
-
struct ggml_tensor * b,
|
1595
|
-
int s0,
|
1596
|
-
int p0,
|
1597
|
-
int d0);
|
1597
|
+
struct ggml_tensor * a, // convolution kernel
|
1598
|
+
struct ggml_tensor * b, // data
|
1599
|
+
int s0, // stride
|
1600
|
+
int p0, // padding
|
1601
|
+
int d0); // dilation
|
1598
1602
|
|
1599
1603
|
GGML_API struct ggml_tensor * ggml_conv_2d(
|
1600
1604
|
struct ggml_context * ctx,
|
1601
|
-
struct ggml_tensor * a,
|
1602
|
-
struct ggml_tensor * b,
|
1603
|
-
int s0,
|
1604
|
-
int s1,
|
1605
|
-
int p0,
|
1606
|
-
int p1,
|
1607
|
-
int d0,
|
1608
|
-
int d1);
|
1605
|
+
struct ggml_tensor * a, // convolution kernel
|
1606
|
+
struct ggml_tensor * b, // data
|
1607
|
+
int s0, // stride dimension 0
|
1608
|
+
int s1, // stride dimension 1
|
1609
|
+
int p0, // padding dimension 0
|
1610
|
+
int p1, // padding dimension 1
|
1611
|
+
int d0, // dilation dimension 0
|
1612
|
+
int d1); // dilation dimension 1
|
1609
1613
|
|
1610
1614
|
|
1611
1615
|
// kernel size is a->ne[0] x a->ne[1]
|
@@ -1667,13 +1671,37 @@ extern "C" {
|
|
1667
1671
|
float p0,
|
1668
1672
|
float p1);
|
1669
1673
|
|
1674
|
+
GGML_API struct ggml_tensor * ggml_pool_2d_back(
|
1675
|
+
struct ggml_context * ctx,
|
1676
|
+
struct ggml_tensor * a,
|
1677
|
+
struct ggml_tensor * af, // "a"/input used in forward pass
|
1678
|
+
enum ggml_op_pool op,
|
1679
|
+
int k0,
|
1680
|
+
int k1,
|
1681
|
+
int s0,
|
1682
|
+
int s1,
|
1683
|
+
float p0,
|
1684
|
+
float p1);
|
1685
|
+
|
1670
1686
|
// nearest interpolate
|
1687
|
+
// multiplies ne0 and ne1 by scale factor
|
1671
1688
|
// used in stable-diffusion
|
1672
1689
|
GGML_API struct ggml_tensor * ggml_upscale(
|
1673
1690
|
struct ggml_context * ctx,
|
1674
1691
|
struct ggml_tensor * a,
|
1675
1692
|
int scale_factor);
|
1676
1693
|
|
1694
|
+
// nearest interpolate
|
1695
|
+
// nearest interpolate to specified dimensions
|
1696
|
+
// used in tortoise.cpp
|
1697
|
+
GGML_API struct ggml_tensor * ggml_upscale_ext(
|
1698
|
+
struct ggml_context * ctx,
|
1699
|
+
struct ggml_tensor * a,
|
1700
|
+
int ne0,
|
1701
|
+
int ne1,
|
1702
|
+
int ne2,
|
1703
|
+
int ne3);
|
1704
|
+
|
1677
1705
|
// pad each dimension with zeros: [x, ..., x] -> [x, ..., x, 0, ..., 0]
|
1678
1706
|
GGML_API struct ggml_tensor * ggml_pad(
|
1679
1707
|
struct ggml_context * ctx,
|
@@ -1683,6 +1711,13 @@ extern "C" {
|
|
1683
1711
|
int p2,
|
1684
1712
|
int p3);
|
1685
1713
|
|
1714
|
+
// pad each dimension with reflection: [a, b, c, d] -> [b, a, b, c, d, c]
|
1715
|
+
GGML_API struct ggml_tensor * ggml_pad_reflect_1d(
|
1716
|
+
struct ggml_context * ctx,
|
1717
|
+
struct ggml_tensor * a,
|
1718
|
+
int p0,
|
1719
|
+
int p1);
|
1720
|
+
|
1686
1721
|
// Ref: https://github.com/CompVis/stable-diffusion/blob/main/ldm/modules/diffusionmodules/util.py#L151
|
1687
1722
|
// timesteps: [N,]
|
1688
1723
|
// return: [N, dim]
|
@@ -1715,13 +1750,31 @@ extern "C" {
|
|
1715
1750
|
struct ggml_tensor * a,
|
1716
1751
|
int k);
|
1717
1752
|
|
1718
|
-
|
1753
|
+
#define GGML_KQ_MASK_PAD 32
|
1754
|
+
|
1755
|
+
// q: [n_embd, n_batch, n_head, 1]
|
1756
|
+
// k: [n_embd, n_kv, n_head_kv, 1]
|
1757
|
+
// v: [n_embd, n_kv, n_head_kv, 1] !! not transposed !!
|
1758
|
+
// mask: [n_kv, n_batch_pad, 1, 1] !! n_batch_pad = GGML_PAD(n_batch, GGML_KQ_MASK_PAD) !!
|
1759
|
+
// res: [n_embd, n_head, n_batch, 1] !! permuted !!
|
1760
|
+
GGML_API struct ggml_tensor * ggml_flash_attn_ext(
|
1719
1761
|
struct ggml_context * ctx,
|
1720
1762
|
struct ggml_tensor * q,
|
1721
1763
|
struct ggml_tensor * k,
|
1722
1764
|
struct ggml_tensor * v,
|
1723
|
-
|
1765
|
+
struct ggml_tensor * mask,
|
1766
|
+
float scale,
|
1767
|
+
float max_bias,
|
1768
|
+
float logit_softcap);
|
1769
|
+
|
1770
|
+
GGML_API void ggml_flash_attn_ext_set_prec(
|
1771
|
+
struct ggml_tensor * a,
|
1772
|
+
enum ggml_prec prec);
|
1724
1773
|
|
1774
|
+
GGML_API enum ggml_prec ggml_flash_attn_ext_get_prec(
|
1775
|
+
const struct ggml_tensor * a);
|
1776
|
+
|
1777
|
+
// TODO: needs to be adapted to ggml_flash_attn_ext
|
1725
1778
|
GGML_API struct ggml_tensor * ggml_flash_attn_back(
|
1726
1779
|
struct ggml_context * ctx,
|
1727
1780
|
struct ggml_tensor * q,
|
@@ -1730,20 +1783,10 @@ extern "C" {
|
|
1730
1783
|
struct ggml_tensor * d,
|
1731
1784
|
bool masked);
|
1732
1785
|
|
1733
|
-
GGML_API struct ggml_tensor * ggml_flash_ff(
|
1734
|
-
struct ggml_context * ctx,
|
1735
|
-
struct ggml_tensor * a,
|
1736
|
-
struct ggml_tensor * b0,
|
1737
|
-
struct ggml_tensor * b1,
|
1738
|
-
struct ggml_tensor * c0,
|
1739
|
-
struct ggml_tensor * c1);
|
1740
|
-
|
1741
1786
|
GGML_API struct ggml_tensor * ggml_ssm_conv(
|
1742
1787
|
struct ggml_context * ctx,
|
1743
|
-
struct ggml_tensor *
|
1744
|
-
struct ggml_tensor *
|
1745
|
-
struct ggml_tensor * c,
|
1746
|
-
struct ggml_tensor * sq);
|
1788
|
+
struct ggml_tensor * sx,
|
1789
|
+
struct ggml_tensor * c);
|
1747
1790
|
|
1748
1791
|
GGML_API struct ggml_tensor * ggml_ssm_scan(
|
1749
1792
|
struct ggml_context * ctx,
|
@@ -1752,8 +1795,7 @@ extern "C" {
|
|
1752
1795
|
struct ggml_tensor * dt,
|
1753
1796
|
struct ggml_tensor * A,
|
1754
1797
|
struct ggml_tensor * B,
|
1755
|
-
struct ggml_tensor * C
|
1756
|
-
struct ggml_tensor * sq);
|
1798
|
+
struct ggml_tensor * C);
|
1757
1799
|
|
1758
1800
|
// partition into non-overlapping windows with padding if needed
|
1759
1801
|
// example:
|
@@ -1805,6 +1847,15 @@ extern "C" {
|
|
1805
1847
|
struct ggml_tensor * pw,
|
1806
1848
|
struct ggml_tensor * ph);
|
1807
1849
|
|
1850
|
+
GGML_API struct ggml_tensor * ggml_rwkv_wkv6(
|
1851
|
+
struct ggml_context * ctx,
|
1852
|
+
struct ggml_tensor * k,
|
1853
|
+
struct ggml_tensor * v,
|
1854
|
+
struct ggml_tensor * r,
|
1855
|
+
struct ggml_tensor * tf,
|
1856
|
+
struct ggml_tensor * td,
|
1857
|
+
struct ggml_tensor * state);
|
1858
|
+
|
1808
1859
|
// custom operators
|
1809
1860
|
|
1810
1861
|
typedef void (*ggml_unary_op_f32_t) (const int, float *, const float *);
|
@@ -1888,7 +1939,8 @@ extern "C" {
|
|
1888
1939
|
typedef void (*ggml_custom2_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, int ith, int nth, void * userdata);
|
1889
1940
|
typedef void (*ggml_custom3_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, const struct ggml_tensor * c, int ith, int nth, void * userdata);
|
1890
1941
|
|
1891
|
-
|
1942
|
+
#define GGML_N_TASKS_MAX (-1)
|
1943
|
+
// n_tasks == GGML_N_TASKS_MAX means to use max number of tasks
|
1892
1944
|
|
1893
1945
|
GGML_API struct ggml_tensor * ggml_map_custom1(
|
1894
1946
|
struct ggml_context * ctx,
|
@@ -1941,49 +1993,59 @@ extern "C" {
|
|
1941
1993
|
// loss function
|
1942
1994
|
|
1943
1995
|
GGML_API struct ggml_tensor * ggml_cross_entropy_loss(
|
1944
|
-
struct ggml_context
|
1945
|
-
struct ggml_tensor
|
1946
|
-
struct ggml_tensor
|
1996
|
+
struct ggml_context * ctx,
|
1997
|
+
struct ggml_tensor * a, // logits
|
1998
|
+
struct ggml_tensor * b); // labels
|
1947
1999
|
|
1948
2000
|
GGML_API struct ggml_tensor * ggml_cross_entropy_loss_back(
|
1949
|
-
struct ggml_context
|
1950
|
-
struct ggml_tensor
|
1951
|
-
struct ggml_tensor
|
1952
|
-
struct ggml_tensor
|
2001
|
+
struct ggml_context * ctx,
|
2002
|
+
struct ggml_tensor * a, // logits
|
2003
|
+
struct ggml_tensor * b, // labels
|
2004
|
+
struct ggml_tensor * c); // gradients of cross_entropy_loss result
|
2005
|
+
|
2006
|
+
// AdamW optimizer step
|
2007
|
+
// Paper: https://arxiv.org/pdf/1711.05101v3.pdf
|
2008
|
+
// PyTorch: https://pytorch.org/docs/stable/generated/torch.optim.AdamW.html
|
2009
|
+
GGML_API struct ggml_tensor * ggml_opt_step_adamw(
|
2010
|
+
struct ggml_context * ctx,
|
2011
|
+
struct ggml_tensor * a,
|
2012
|
+
struct ggml_tensor * grad,
|
2013
|
+
struct ggml_tensor * m,
|
2014
|
+
struct ggml_tensor * v,
|
2015
|
+
struct ggml_tensor * adamw_params); // parameters such a the learning rate
|
1953
2016
|
|
1954
2017
|
//
|
1955
2018
|
// automatic differentiation
|
1956
2019
|
//
|
1957
2020
|
|
1958
|
-
GGML_API void
|
1959
|
-
|
1960
|
-
|
2021
|
+
GGML_API void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
|
2022
|
+
GGML_API void ggml_build_backward_expand(
|
2023
|
+
struct ggml_context * ctx_static, // context for static gradients (loss + gradient accumulation)
|
2024
|
+
struct ggml_context * ctx_compute, // context for gradient computation
|
2025
|
+
struct ggml_cgraph * cgraph,
|
2026
|
+
bool accumulate); // whether or not gradients should be accumulated, requires static allocation of tensors in ctx_static
|
1961
2027
|
|
2028
|
+
// graph allocation in a context
|
2029
|
+
GGML_API struct ggml_cgraph * ggml_new_graph (struct ggml_context * ctx); // size = GGML_DEFAULT_GRAPH_SIZE, grads = false
|
2030
|
+
GGML_API struct ggml_cgraph * ggml_new_graph_custom(struct ggml_context * ctx, size_t size, bool grads);
|
2031
|
+
GGML_API struct ggml_cgraph * ggml_graph_dup (struct ggml_context * ctx, struct ggml_cgraph * cgraph);
|
2032
|
+
GGML_API void ggml_graph_cpy (struct ggml_cgraph * src, struct ggml_cgraph * dst);
|
2033
|
+
GGML_API void ggml_graph_reset (struct ggml_cgraph * cgraph); // set regular grads + optimizer momenta to 0, set loss grad to 1
|
2034
|
+
GGML_API void ggml_graph_clear (struct ggml_cgraph * cgraph);
|
1962
2035
|
|
1963
|
-
GGML_API
|
1964
|
-
GGML_API
|
2036
|
+
GGML_API int ggml_graph_size (struct ggml_cgraph * cgraph);
|
2037
|
+
GGML_API struct ggml_tensor * ggml_graph_node (struct ggml_cgraph * cgraph, int i); // if i < 0, returns nodes[n_nodes + i]
|
2038
|
+
GGML_API struct ggml_tensor ** ggml_graph_nodes (struct ggml_cgraph * cgraph);
|
2039
|
+
GGML_API int ggml_graph_n_nodes(struct ggml_cgraph * cgraph);
|
1965
2040
|
|
1966
|
-
|
1967
|
-
GGML_API struct ggml_cgraph * ggml_new_graph (struct ggml_context * ctx); // size = GGML_DEFAULT_GRAPH_SIZE, grads = false
|
1968
|
-
GGML_API struct ggml_cgraph * ggml_new_graph_custom (struct ggml_context * ctx, size_t size, bool grads);
|
1969
|
-
GGML_API struct ggml_cgraph * ggml_graph_dup (struct ggml_context * ctx, struct ggml_cgraph * cgraph);
|
1970
|
-
GGML_API struct ggml_cgraph ggml_graph_view (struct ggml_cgraph * cgraph, int i0, int i1);
|
1971
|
-
GGML_API void ggml_graph_cpy (struct ggml_cgraph * src, struct ggml_cgraph * dst);
|
1972
|
-
GGML_API void ggml_graph_reset (struct ggml_cgraph * cgraph); // zero grads
|
1973
|
-
GGML_API void ggml_graph_clear (struct ggml_cgraph * cgraph);
|
2041
|
+
GGML_API void ggml_graph_add_node(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
|
1974
2042
|
|
1975
2043
|
GGML_API size_t ggml_graph_overhead(void);
|
1976
2044
|
GGML_API size_t ggml_graph_overhead_custom(size_t size, bool grads);
|
1977
2045
|
|
1978
|
-
|
1979
|
-
|
1980
|
-
GGML_API struct
|
1981
|
-
GGML_API enum ggml_status ggml_graph_compute ( struct ggml_cgraph * cgraph, struct ggml_cplan * cplan);
|
1982
|
-
// same as ggml_graph_compute() but the work data is allocated as a part of the context
|
1983
|
-
// note: the drawback of this API is that you must have ensured that the context has enough memory for the work data
|
1984
|
-
GGML_API enum ggml_status ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads);
|
1985
|
-
|
1986
|
-
GGML_API struct ggml_tensor * ggml_graph_get_tensor(struct ggml_cgraph * cgraph, const char * name);
|
2046
|
+
GGML_API struct ggml_tensor * ggml_graph_get_tensor (const struct ggml_cgraph * cgraph, const char * name);
|
2047
|
+
GGML_API struct ggml_tensor * ggml_graph_get_grad (const struct ggml_cgraph * cgraph, const struct ggml_tensor * node);
|
2048
|
+
GGML_API struct ggml_tensor * ggml_graph_get_grad_acc(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node);
|
1987
2049
|
|
1988
2050
|
GGML_API void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname);
|
1989
2051
|
GGML_API struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context ** ctx_data, struct ggml_context ** ctx_eval);
|
@@ -1994,197 +2056,14 @@ extern "C" {
|
|
1994
2056
|
// dump the graph into a file using the dot format
|
1995
2057
|
GGML_API void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph * gf, const char * filename);
|
1996
2058
|
|
1997
|
-
//
|
1998
|
-
// gb_tmp will contain original backward graph with rewritten backward process nodes,
|
1999
|
-
// but without the second forward pass nodes.
|
2000
|
-
GGML_API void ggml_build_backward_gradient_checkpointing(
|
2001
|
-
struct ggml_context * ctx,
|
2002
|
-
struct ggml_cgraph * gf,
|
2003
|
-
struct ggml_cgraph * gb,
|
2004
|
-
struct ggml_cgraph * gb_tmp,
|
2005
|
-
struct ggml_tensor * * checkpoints,
|
2006
|
-
int n_checkpoints);
|
2007
|
-
//
|
2008
|
-
// optimization
|
2009
|
-
//
|
2010
|
-
|
2011
|
-
// optimization methods
|
2012
|
-
enum ggml_opt_type {
|
2013
|
-
GGML_OPT_TYPE_ADAM,
|
2014
|
-
GGML_OPT_TYPE_LBFGS,
|
2015
|
-
};
|
2016
|
-
|
2017
|
-
// linesearch methods
|
2018
|
-
enum ggml_linesearch {
|
2019
|
-
GGML_LINESEARCH_DEFAULT = 1,
|
2020
|
-
|
2021
|
-
GGML_LINESEARCH_BACKTRACKING_ARMIJO = 0,
|
2022
|
-
GGML_LINESEARCH_BACKTRACKING_WOLFE = 1,
|
2023
|
-
GGML_LINESEARCH_BACKTRACKING_STRONG_WOLFE = 2,
|
2024
|
-
};
|
2025
|
-
|
2026
|
-
// optimization return values
|
2027
|
-
enum ggml_opt_result {
|
2028
|
-
GGML_OPT_RESULT_OK = 0,
|
2029
|
-
GGML_OPT_RESULT_DID_NOT_CONVERGE,
|
2030
|
-
GGML_OPT_RESULT_NO_CONTEXT,
|
2031
|
-
GGML_OPT_RESULT_INVALID_WOLFE,
|
2032
|
-
GGML_OPT_RESULT_FAIL,
|
2033
|
-
GGML_OPT_RESULT_CANCEL,
|
2034
|
-
|
2035
|
-
GGML_LINESEARCH_FAIL = -128,
|
2036
|
-
GGML_LINESEARCH_MINIMUM_STEP,
|
2037
|
-
GGML_LINESEARCH_MAXIMUM_STEP,
|
2038
|
-
GGML_LINESEARCH_MAXIMUM_ITERATIONS,
|
2039
|
-
GGML_LINESEARCH_INVALID_PARAMETERS,
|
2040
|
-
};
|
2041
|
-
|
2042
|
-
typedef void (*ggml_opt_callback)(void * data, int accum_step, float * sched, bool * cancel);
|
2059
|
+
// TODO these functions were sandwiched in the old optimization interface, is there a better place for them?
|
2043
2060
|
typedef void (*ggml_log_callback)(enum ggml_log_level level, const char * text, void * user_data);
|
2044
2061
|
|
2045
|
-
//
|
2046
|
-
//
|
2047
|
-
|
2048
|
-
//
|
2049
|
-
struct ggml_opt_params {
|
2050
|
-
enum ggml_opt_type type;
|
2051
|
-
|
2052
|
-
size_t graph_size;
|
2053
|
-
|
2054
|
-
int n_threads;
|
2055
|
-
|
2056
|
-
// delta-based convergence test
|
2057
|
-
//
|
2058
|
-
// if past == 0 - disabled
|
2059
|
-
// if past > 0:
|
2060
|
-
// stop if |f(x) - f(x_past)| < delta * max(1, |f(x)|)
|
2061
|
-
//
|
2062
|
-
int past;
|
2063
|
-
float delta;
|
2064
|
-
|
2065
|
-
// maximum number of iterations without improvement
|
2066
|
-
//
|
2067
|
-
// if 0 - disabled
|
2068
|
-
// if > 0:
|
2069
|
-
// assume convergence if no cost improvement in this number of iterations
|
2070
|
-
//
|
2071
|
-
int max_no_improvement;
|
2072
|
-
|
2073
|
-
bool print_forward_graph;
|
2074
|
-
bool print_backward_graph;
|
2075
|
-
|
2076
|
-
int n_gradient_accumulation;
|
2077
|
-
|
2078
|
-
// ADAM parameters
|
2079
|
-
struct {
|
2080
|
-
int n_iter;
|
2081
|
-
|
2082
|
-
float sched; // schedule multiplier (fixed, decay or warmup)
|
2083
|
-
float decay; // weight decay for AdamW, use 0.0f to disable
|
2084
|
-
int decay_min_ndim; // minimum number of tensor dimension to apply weight decay
|
2085
|
-
float alpha; // learning rate
|
2086
|
-
float beta1;
|
2087
|
-
float beta2;
|
2088
|
-
float eps; // epsilon for numerical stability
|
2089
|
-
float eps_f; // epsilon for convergence test
|
2090
|
-
float eps_g; // epsilon for convergence test
|
2091
|
-
float gclip; // gradient clipping
|
2092
|
-
} adam;
|
2093
|
-
|
2094
|
-
// LBFGS parameters
|
2095
|
-
struct {
|
2096
|
-
int m; // number of corrections to approximate the inv. Hessian
|
2097
|
-
int n_iter;
|
2098
|
-
int max_linesearch;
|
2099
|
-
|
2100
|
-
float eps; // convergence tolerance
|
2101
|
-
float ftol; // line search tolerance
|
2102
|
-
float wolfe;
|
2103
|
-
float min_step;
|
2104
|
-
float max_step;
|
2105
|
-
|
2106
|
-
enum ggml_linesearch linesearch;
|
2107
|
-
} lbfgs;
|
2108
|
-
};
|
2109
|
-
|
2110
|
-
struct ggml_opt_context {
|
2111
|
-
struct ggml_context * ctx;
|
2112
|
-
struct ggml_opt_params params;
|
2113
|
-
|
2114
|
-
int iter;
|
2115
|
-
int64_t nx; // number of parameter elements
|
2116
|
-
|
2117
|
-
bool just_initialized;
|
2118
|
-
|
2119
|
-
float loss_before;
|
2120
|
-
float loss_after;
|
2121
|
-
|
2122
|
-
struct {
|
2123
|
-
struct ggml_tensor * g; // current gradient
|
2124
|
-
struct ggml_tensor * m; // first moment
|
2125
|
-
struct ggml_tensor * v; // second moment
|
2126
|
-
struct ggml_tensor * pf; // past function values
|
2127
|
-
float fx_best;
|
2128
|
-
float fx_prev;
|
2129
|
-
int n_no_improvement;
|
2130
|
-
} adam;
|
2131
|
-
|
2132
|
-
struct {
|
2133
|
-
struct ggml_tensor * x; // current parameters
|
2134
|
-
struct ggml_tensor * xp; // previous parameters
|
2135
|
-
struct ggml_tensor * g; // current gradient
|
2136
|
-
struct ggml_tensor * gp; // previous gradient
|
2137
|
-
struct ggml_tensor * d; // search direction
|
2138
|
-
struct ggml_tensor * pf; // past function values
|
2139
|
-
struct ggml_tensor * lmal; // the L-BFGS memory alpha
|
2140
|
-
struct ggml_tensor * lmys; // the L-BFGS memory ys
|
2141
|
-
struct ggml_tensor * lms; // the L-BFGS memory s
|
2142
|
-
struct ggml_tensor * lmy; // the L-BFGS memory y
|
2143
|
-
float fx_best;
|
2144
|
-
float step;
|
2145
|
-
int j;
|
2146
|
-
int k;
|
2147
|
-
int end;
|
2148
|
-
int n_no_improvement;
|
2149
|
-
} lbfgs;
|
2150
|
-
};
|
2151
|
-
|
2152
|
-
GGML_API struct ggml_opt_params ggml_opt_default_params(enum ggml_opt_type type);
|
2153
|
-
|
2154
|
-
// optimize the function defined by the tensor f
|
2155
|
-
GGML_API enum ggml_opt_result ggml_opt(
|
2156
|
-
struct ggml_context * ctx,
|
2157
|
-
struct ggml_opt_params params,
|
2158
|
-
struct ggml_tensor * f);
|
2159
|
-
|
2160
|
-
// initialize optimizer context
|
2161
|
-
GGML_API void ggml_opt_init(
|
2162
|
-
struct ggml_context * ctx,
|
2163
|
-
struct ggml_opt_context * opt,
|
2164
|
-
struct ggml_opt_params params,
|
2165
|
-
int64_t nx);
|
2166
|
-
|
2167
|
-
// continue optimizing the function defined by the tensor f
|
2168
|
-
GGML_API enum ggml_opt_result ggml_opt_resume(
|
2169
|
-
struct ggml_context * ctx,
|
2170
|
-
struct ggml_opt_context * opt,
|
2171
|
-
struct ggml_tensor * f);
|
2172
|
-
|
2173
|
-
// continue optimizing the function defined by the tensor f
|
2174
|
-
GGML_API enum ggml_opt_result ggml_opt_resume_g(
|
2175
|
-
struct ggml_context * ctx,
|
2176
|
-
struct ggml_opt_context * opt,
|
2177
|
-
struct ggml_tensor * f,
|
2178
|
-
struct ggml_cgraph * gf,
|
2179
|
-
struct ggml_cgraph * gb,
|
2180
|
-
ggml_opt_callback callback,
|
2181
|
-
void * callback_data);
|
2062
|
+
// Set callback for all future logging events.
|
2063
|
+
// If this is not called, or NULL is supplied, everything is output on stderr.
|
2064
|
+
GGML_API void ggml_log_set(ggml_log_callback log_callback, void * user_data);
|
2182
2065
|
|
2183
|
-
|
2184
|
-
// tensor flags
|
2185
|
-
//
|
2186
|
-
GGML_API void ggml_set_input(struct ggml_tensor * tensor);
|
2187
|
-
GGML_API void ggml_set_output(struct ggml_tensor * tensor);
|
2066
|
+
GGML_API struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor);
|
2188
2067
|
|
2189
2068
|
//
|
2190
2069
|
// quantization
|
@@ -2289,6 +2168,9 @@ extern "C" {
|
|
2289
2168
|
GGML_API char * gguf_get_tensor_name (const struct gguf_context * ctx, int i);
|
2290
2169
|
GGML_API enum ggml_type gguf_get_tensor_type (const struct gguf_context * ctx, int i);
|
2291
2170
|
|
2171
|
+
// removes key if it exists
|
2172
|
+
GGML_API void gguf_remove_key(struct gguf_context * ctx, const char * key);
|
2173
|
+
|
2292
2174
|
// overrides existing values or adds a new one
|
2293
2175
|
GGML_API void gguf_set_val_u8 (struct gguf_context * ctx, const char * key, uint8_t val);
|
2294
2176
|
GGML_API void gguf_set_val_i8 (struct gguf_context * ctx, const char * key, int8_t val);
|
@@ -2338,64 +2220,65 @@ extern "C" {
|
|
2338
2220
|
GGML_API size_t gguf_get_meta_size(const struct gguf_context * ctx);
|
2339
2221
|
GGML_API void gguf_get_meta_data(const struct gguf_context * ctx, void * data);
|
2340
2222
|
|
2341
|
-
|
2342
|
-
//
|
2343
|
-
|
2344
|
-
|
2345
|
-
|
2346
|
-
|
2347
|
-
|
2348
|
-
|
2349
|
-
|
2350
|
-
|
2351
|
-
|
2352
|
-
GGML_API int ggml_cpu_has_neon (void);
|
2353
|
-
GGML_API int ggml_cpu_has_arm_fma (void);
|
2354
|
-
GGML_API int ggml_cpu_has_metal (void);
|
2355
|
-
GGML_API int ggml_cpu_has_f16c (void);
|
2356
|
-
GGML_API int ggml_cpu_has_fp16_va (void);
|
2357
|
-
GGML_API int ggml_cpu_has_wasm_simd (void);
|
2358
|
-
GGML_API int ggml_cpu_has_blas (void);
|
2359
|
-
GGML_API int ggml_cpu_has_cuda (void);
|
2360
|
-
GGML_API int ggml_cpu_has_clblast (void);
|
2361
|
-
GGML_API int ggml_cpu_has_vulkan (void);
|
2362
|
-
GGML_API int ggml_cpu_has_kompute (void);
|
2363
|
-
GGML_API int ggml_cpu_has_gpublas (void);
|
2364
|
-
GGML_API int ggml_cpu_has_sse3 (void);
|
2365
|
-
GGML_API int ggml_cpu_has_ssse3 (void);
|
2366
|
-
GGML_API int ggml_cpu_has_sycl (void);
|
2367
|
-
GGML_API int ggml_cpu_has_vsx (void);
|
2368
|
-
GGML_API int ggml_cpu_has_matmul_int8(void);
|
2369
|
-
|
2370
|
-
//
|
2371
|
-
// Internal types and functions exposed for tests and benchmarks
|
2372
|
-
//
|
2373
|
-
|
2374
|
-
#ifdef __cplusplus
|
2375
|
-
// restrict not standard in C++
|
2376
|
-
#define GGML_RESTRICT
|
2223
|
+
#ifdef __cplusplus
|
2224
|
+
// restrict not standard in C++
|
2225
|
+
# if defined(__GNUC__)
|
2226
|
+
# define GGML_RESTRICT __restrict__
|
2227
|
+
# elif defined(__clang__)
|
2228
|
+
# define GGML_RESTRICT __restrict
|
2229
|
+
# elif defined(_MSC_VER)
|
2230
|
+
# define GGML_RESTRICT __restrict
|
2231
|
+
# else
|
2232
|
+
# define GGML_RESTRICT
|
2233
|
+
# endif
|
2377
2234
|
#else
|
2378
|
-
#define GGML_RESTRICT restrict
|
2235
|
+
# define GGML_RESTRICT restrict
|
2379
2236
|
#endif
|
2380
2237
|
typedef void (*ggml_to_float_t) (const void * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
2381
2238
|
typedef void (*ggml_from_float_t)(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
|
2382
|
-
|
2383
|
-
|
2384
|
-
|
2385
|
-
|
2386
|
-
|
2387
|
-
|
2388
|
-
|
2389
|
-
|
2390
|
-
|
2391
|
-
|
2392
|
-
|
2393
|
-
|
2394
|
-
|
2395
|
-
|
2396
|
-
|
2397
|
-
|
2398
|
-
|
2239
|
+
|
2240
|
+
struct ggml_type_traits {
|
2241
|
+
const char * type_name;
|
2242
|
+
int64_t blck_size;
|
2243
|
+
int64_t blck_size_interleave; // interleave elements in blocks
|
2244
|
+
size_t type_size;
|
2245
|
+
bool is_quantized;
|
2246
|
+
ggml_to_float_t to_float;
|
2247
|
+
ggml_from_float_t from_float_ref;
|
2248
|
+
};
|
2249
|
+
|
2250
|
+
GGML_API const struct ggml_type_traits * ggml_get_type_traits(enum ggml_type type);
|
2251
|
+
|
2252
|
+
// ggml threadpool
|
2253
|
+
// TODO: currently, only a few functions are in the base ggml API, while the rest are in the CPU backend
|
2254
|
+
// the goal should be to create an API that other backends can use move everything to the ggml base
|
2255
|
+
|
2256
|
+
// scheduling priorities
|
2257
|
+
enum ggml_sched_priority {
|
2258
|
+
GGML_SCHED_PRIO_NORMAL,
|
2259
|
+
GGML_SCHED_PRIO_MEDIUM,
|
2260
|
+
GGML_SCHED_PRIO_HIGH,
|
2261
|
+
GGML_SCHED_PRIO_REALTIME
|
2262
|
+
};
|
2263
|
+
|
2264
|
+
// threadpool params
|
2265
|
+
// Use ggml_threadpool_params_default() or ggml_threadpool_params_init() to populate the defaults
|
2266
|
+
struct ggml_threadpool_params {
|
2267
|
+
bool cpumask[GGML_MAX_N_THREADS]; // mask of cpu cores (all-zeros means use default affinity settings)
|
2268
|
+
int n_threads; // number of threads
|
2269
|
+
enum ggml_sched_priority prio; // thread priority
|
2270
|
+
uint32_t poll; // polling level (0 - no polling, 100 - aggressive polling)
|
2271
|
+
bool strict_cpu; // strict cpu placement
|
2272
|
+
bool paused; // start in paused state
|
2273
|
+
};
|
2274
|
+
|
2275
|
+
struct ggml_threadpool; // forward declaration, see ggml.c
|
2276
|
+
|
2277
|
+
typedef struct ggml_threadpool * ggml_threadpool_t;
|
2278
|
+
|
2279
|
+
GGML_API struct ggml_threadpool_params ggml_threadpool_params_default(int n_threads);
|
2280
|
+
GGML_API void ggml_threadpool_params_init (struct ggml_threadpool_params * p, int n_threads);
|
2281
|
+
GGML_API bool ggml_threadpool_params_match (const struct ggml_threadpool_params * p0, const struct ggml_threadpool_params * p1);
|
2399
2282
|
|
2400
2283
|
#ifdef __cplusplus
|
2401
2284
|
}
|