whisper.rn 0.3.0-rc.6 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -9
- package/android/src/main/jni/whisper/Whisper.mk +11 -8
- package/cpp/ggml.c +4627 -1594
- package/cpp/ggml.h +427 -25
- package/cpp/whisper.cpp +226 -102
- package/cpp/whisper.h +30 -6
- package/package.json +1 -1
- package/whisper-rn.podspec +10 -6
package/cpp/ggml.h
CHANGED
|
@@ -198,8 +198,11 @@
|
|
|
198
198
|
#define GGML_MAX_PARAMS 256
|
|
199
199
|
#define GGML_MAX_CONTEXTS 64
|
|
200
200
|
#define GGML_MAX_OPT 4
|
|
201
|
+
#define GGML_MAX_NAME 48
|
|
201
202
|
#define GGML_DEFAULT_N_THREADS 4
|
|
202
203
|
|
|
204
|
+
#define GGML_UNUSED(x) (void)(x)
|
|
205
|
+
|
|
203
206
|
#define GGML_ASSERT(x) \
|
|
204
207
|
do { \
|
|
205
208
|
if (!(x)) { \
|
|
@@ -208,6 +211,30 @@
|
|
|
208
211
|
} \
|
|
209
212
|
} while (0)
|
|
210
213
|
|
|
214
|
+
// used to copy the number of elements and stride in bytes of tensors into local variables.
|
|
215
|
+
// main purpose is to reduce code duplication and improve readability.
|
|
216
|
+
//
|
|
217
|
+
// example:
|
|
218
|
+
//
|
|
219
|
+
// GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne);
|
|
220
|
+
// GGML_TENSOR_LOCALS(size_t, nb1, src1, nb);
|
|
221
|
+
//
|
|
222
|
+
#define GGML_TENSOR_LOCALS_1(type, prefix, pointer, array) \
|
|
223
|
+
const type prefix##0 = (pointer)->array[0]; \
|
|
224
|
+
GGML_UNUSED(prefix##0);
|
|
225
|
+
#define GGML_TENSOR_LOCALS_2(type, prefix, pointer, array) \
|
|
226
|
+
GGML_TENSOR_LOCALS_1 (type, prefix, pointer, array) \
|
|
227
|
+
const type prefix##1 = (pointer)->array[1]; \
|
|
228
|
+
GGML_UNUSED(prefix##1);
|
|
229
|
+
#define GGML_TENSOR_LOCALS_3(type, prefix, pointer, array) \
|
|
230
|
+
GGML_TENSOR_LOCALS_2 (type, prefix, pointer, array) \
|
|
231
|
+
const type prefix##2 = (pointer)->array[2]; \
|
|
232
|
+
GGML_UNUSED(prefix##2);
|
|
233
|
+
#define GGML_TENSOR_LOCALS(type, prefix, pointer, array) \
|
|
234
|
+
GGML_TENSOR_LOCALS_3 (type, prefix, pointer, array) \
|
|
235
|
+
const type prefix##3 = (pointer)->array[3]; \
|
|
236
|
+
GGML_UNUSED(prefix##3);
|
|
237
|
+
|
|
211
238
|
#ifdef __cplusplus
|
|
212
239
|
extern "C" {
|
|
213
240
|
#endif
|
|
@@ -240,6 +267,13 @@ extern "C" {
|
|
|
240
267
|
GGML_TYPE_Q5_1 = 7,
|
|
241
268
|
GGML_TYPE_Q8_0 = 8,
|
|
242
269
|
GGML_TYPE_Q8_1 = 9,
|
|
270
|
+
// k-quantizations
|
|
271
|
+
GGML_TYPE_Q2_K = 10,
|
|
272
|
+
GGML_TYPE_Q3_K = 11,
|
|
273
|
+
GGML_TYPE_Q4_K = 12,
|
|
274
|
+
GGML_TYPE_Q5_K = 13,
|
|
275
|
+
GGML_TYPE_Q6_K = 14,
|
|
276
|
+
GGML_TYPE_Q8_K = 15,
|
|
243
277
|
GGML_TYPE_I8,
|
|
244
278
|
GGML_TYPE_I16,
|
|
245
279
|
GGML_TYPE_I32,
|
|
@@ -248,7 +282,8 @@ extern "C" {
|
|
|
248
282
|
|
|
249
283
|
enum ggml_backend {
|
|
250
284
|
GGML_BACKEND_CPU = 0,
|
|
251
|
-
|
|
285
|
+
GGML_BACKEND_GPU = 10,
|
|
286
|
+
GGML_BACKEND_GPU_SPLIT = 20,
|
|
252
287
|
};
|
|
253
288
|
|
|
254
289
|
// model file types
|
|
@@ -262,6 +297,11 @@ extern "C" {
|
|
|
262
297
|
GGML_FTYPE_MOSTLY_Q8_0 = 7, // except 1d tensors
|
|
263
298
|
GGML_FTYPE_MOSTLY_Q5_0 = 8, // except 1d tensors
|
|
264
299
|
GGML_FTYPE_MOSTLY_Q5_1 = 9, // except 1d tensors
|
|
300
|
+
GGML_FTYPE_MOSTLY_Q2_K = 10, // except 1d tensors
|
|
301
|
+
GGML_FTYPE_MOSTLY_Q3_K = 11, // except 1d tensors
|
|
302
|
+
GGML_FTYPE_MOSTLY_Q4_K = 12, // except 1d tensors
|
|
303
|
+
GGML_FTYPE_MOSTLY_Q5_K = 13, // except 1d tensors
|
|
304
|
+
GGML_FTYPE_MOSTLY_Q6_K = 14, // except 1d tensors
|
|
265
305
|
};
|
|
266
306
|
|
|
267
307
|
// available tensor operations:
|
|
@@ -281,13 +321,18 @@ extern "C" {
|
|
|
281
321
|
GGML_OP_SUM,
|
|
282
322
|
GGML_OP_SUM_ROWS,
|
|
283
323
|
GGML_OP_MEAN,
|
|
324
|
+
GGML_OP_ARGMAX,
|
|
284
325
|
GGML_OP_REPEAT,
|
|
326
|
+
GGML_OP_REPEAT_BACK,
|
|
285
327
|
GGML_OP_ABS,
|
|
286
328
|
GGML_OP_SGN,
|
|
287
329
|
GGML_OP_NEG,
|
|
288
330
|
GGML_OP_STEP,
|
|
331
|
+
GGML_OP_TANH,
|
|
332
|
+
GGML_OP_ELU,
|
|
289
333
|
GGML_OP_RELU,
|
|
290
334
|
GGML_OP_GELU,
|
|
335
|
+
GGML_OP_GELU_QUICK,
|
|
291
336
|
GGML_OP_SILU,
|
|
292
337
|
GGML_OP_SILU_BACK,
|
|
293
338
|
GGML_OP_NORM, // normalize
|
|
@@ -295,6 +340,7 @@ extern "C" {
|
|
|
295
340
|
GGML_OP_RMS_NORM_BACK,
|
|
296
341
|
|
|
297
342
|
GGML_OP_MUL_MAT,
|
|
343
|
+
GGML_OP_OUT_PROD,
|
|
298
344
|
|
|
299
345
|
GGML_OP_SCALE,
|
|
300
346
|
GGML_OP_SET,
|
|
@@ -310,19 +356,30 @@ extern "C" {
|
|
|
310
356
|
GGML_OP_DIAG_MASK_INF,
|
|
311
357
|
GGML_OP_DIAG_MASK_ZERO,
|
|
312
358
|
GGML_OP_SOFT_MAX,
|
|
359
|
+
GGML_OP_SOFT_MAX_BACK,
|
|
313
360
|
GGML_OP_ROPE,
|
|
314
361
|
GGML_OP_ROPE_BACK,
|
|
315
362
|
GGML_OP_ALIBI,
|
|
316
363
|
GGML_OP_CLAMP,
|
|
317
|
-
|
|
318
|
-
|
|
364
|
+
GGML_OP_CONV_1D,
|
|
365
|
+
GGML_OP_CONV_2D,
|
|
319
366
|
|
|
320
367
|
GGML_OP_FLASH_ATTN,
|
|
321
368
|
GGML_OP_FLASH_FF,
|
|
369
|
+
GGML_OP_FLASH_ATTN_BACK,
|
|
370
|
+
GGML_OP_WIN_PART,
|
|
371
|
+
GGML_OP_WIN_UNPART,
|
|
322
372
|
|
|
323
373
|
GGML_OP_MAP_UNARY,
|
|
324
374
|
GGML_OP_MAP_BINARY,
|
|
325
375
|
|
|
376
|
+
GGML_OP_MAP_CUSTOM1,
|
|
377
|
+
GGML_OP_MAP_CUSTOM2,
|
|
378
|
+
GGML_OP_MAP_CUSTOM3,
|
|
379
|
+
|
|
380
|
+
GGML_OP_CROSS_ENTROPY_LOSS,
|
|
381
|
+
GGML_OP_CROSS_ENTROPY_LOSS_BACK,
|
|
382
|
+
|
|
326
383
|
GGML_OP_COUNT,
|
|
327
384
|
};
|
|
328
385
|
|
|
@@ -371,11 +428,15 @@ extern "C" {
|
|
|
371
428
|
|
|
372
429
|
void * data;
|
|
373
430
|
|
|
374
|
-
char name[
|
|
431
|
+
char name[GGML_MAX_NAME];
|
|
432
|
+
|
|
433
|
+
void * extra; // extra things e.g. for ggml-cuda.cu
|
|
375
434
|
|
|
376
|
-
char padding[
|
|
435
|
+
char padding[4];
|
|
377
436
|
};
|
|
378
437
|
|
|
438
|
+
static const size_t GGML_TENSOR_SIZE = sizeof(struct ggml_tensor);
|
|
439
|
+
|
|
379
440
|
// computation graph
|
|
380
441
|
struct ggml_cgraph {
|
|
381
442
|
int n_nodes;
|
|
@@ -409,6 +470,28 @@ extern "C" {
|
|
|
409
470
|
bool no_alloc; // don't allocate memory for the tensor data
|
|
410
471
|
};
|
|
411
472
|
|
|
473
|
+
|
|
474
|
+
// compute types
|
|
475
|
+
|
|
476
|
+
// NOTE: the INIT or FINALIZE pass is not scheduled unless explicitly enabled.
|
|
477
|
+
// This behavior was changed since https://github.com/ggerganov/llama.cpp/pull/1995.
|
|
478
|
+
enum ggml_task_type {
|
|
479
|
+
GGML_TASK_INIT = 0,
|
|
480
|
+
GGML_TASK_COMPUTE,
|
|
481
|
+
GGML_TASK_FINALIZE,
|
|
482
|
+
};
|
|
483
|
+
|
|
484
|
+
struct ggml_compute_params {
|
|
485
|
+
enum ggml_task_type type;
|
|
486
|
+
|
|
487
|
+
// ith = thread index, nth = number of threads
|
|
488
|
+
int ith, nth;
|
|
489
|
+
|
|
490
|
+
// work buffer for all threads
|
|
491
|
+
size_t wsize;
|
|
492
|
+
void * wdata;
|
|
493
|
+
};
|
|
494
|
+
|
|
412
495
|
// misc
|
|
413
496
|
|
|
414
497
|
GGML_API void ggml_time_init(void); // call this once at the beginning of the program
|
|
@@ -417,17 +500,23 @@ extern "C" {
|
|
|
417
500
|
GGML_API int64_t ggml_cycles(void);
|
|
418
501
|
GGML_API int64_t ggml_cycles_per_ms(void);
|
|
419
502
|
|
|
503
|
+
GGML_API void ggml_numa_init(void); // call once for better performance on NUMA systems
|
|
504
|
+
GGML_API bool ggml_is_numa(void); // true if init detected that system has >1 NUMA node
|
|
505
|
+
|
|
420
506
|
GGML_API void ggml_print_object (const struct ggml_object * obj);
|
|
421
507
|
GGML_API void ggml_print_objects(const struct ggml_context * ctx);
|
|
422
508
|
|
|
423
|
-
GGML_API int64_t ggml_nelements(const struct ggml_tensor * tensor);
|
|
424
|
-
GGML_API
|
|
509
|
+
GGML_API int64_t ggml_nelements (const struct ggml_tensor * tensor);
|
|
510
|
+
GGML_API int64_t ggml_nrows (const struct ggml_tensor * tensor);
|
|
511
|
+
GGML_API size_t ggml_nbytes (const struct ggml_tensor * tensor);
|
|
512
|
+
GGML_API size_t ggml_nbytes_split(const struct ggml_tensor * tensor, int nrows_split);
|
|
425
513
|
|
|
426
514
|
GGML_API int ggml_blck_size (enum ggml_type type);
|
|
427
515
|
GGML_API size_t ggml_type_size (enum ggml_type type); // size in bytes for all elements in a block
|
|
428
516
|
GGML_API float ggml_type_sizef(enum ggml_type type); // ggml_type_size()/ggml_blck_size() as float
|
|
429
517
|
|
|
430
518
|
GGML_API const char * ggml_type_name(enum ggml_type type);
|
|
519
|
+
GGML_API const char * ggml_op_name (enum ggml_op op);
|
|
431
520
|
|
|
432
521
|
GGML_API size_t ggml_element_size(const struct ggml_tensor * tensor);
|
|
433
522
|
|
|
@@ -436,14 +525,26 @@ extern "C" {
|
|
|
436
525
|
// TODO: temporary until model loading of ggml examples is refactored
|
|
437
526
|
GGML_API enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype);
|
|
438
527
|
|
|
528
|
+
GGML_API bool ggml_is_transposed(const struct ggml_tensor * tensor);
|
|
529
|
+
GGML_API bool ggml_is_contiguous(const struct ggml_tensor * tensor);
|
|
530
|
+
GGML_API bool ggml_is_permuted (const struct ggml_tensor * tensor);
|
|
531
|
+
|
|
532
|
+
// use this to compute the memory overhead of a tensor
|
|
533
|
+
GGML_API size_t ggml_tensor_overhead(void);
|
|
534
|
+
|
|
439
535
|
// main
|
|
440
536
|
|
|
441
537
|
GGML_API struct ggml_context * ggml_init(struct ggml_init_params params);
|
|
442
|
-
GGML_API void
|
|
538
|
+
GGML_API void ggml_free(struct ggml_context * ctx);
|
|
443
539
|
|
|
444
540
|
GGML_API size_t ggml_used_mem(const struct ggml_context * ctx);
|
|
445
541
|
|
|
446
|
-
GGML_API size_t ggml_set_scratch(struct ggml_context * ctx, struct ggml_scratch scratch);
|
|
542
|
+
GGML_API size_t ggml_set_scratch (struct ggml_context * ctx, struct ggml_scratch scratch);
|
|
543
|
+
GGML_API void ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc);
|
|
544
|
+
|
|
545
|
+
GGML_API void * ggml_get_mem_buffer (const struct ggml_context * ctx);
|
|
546
|
+
GGML_API size_t ggml_get_mem_size (const struct ggml_context * ctx);
|
|
547
|
+
GGML_API size_t ggml_get_max_tensor_size(const struct ggml_context * ctx);
|
|
447
548
|
|
|
448
549
|
GGML_API struct ggml_tensor * ggml_new_tensor(
|
|
449
550
|
struct ggml_context * ctx,
|
|
@@ -483,6 +584,8 @@ extern "C" {
|
|
|
483
584
|
GGML_API struct ggml_tensor * ggml_dup_tensor (struct ggml_context * ctx, const struct ggml_tensor * src);
|
|
484
585
|
GGML_API struct ggml_tensor * ggml_view_tensor(struct ggml_context * ctx, const struct ggml_tensor * src);
|
|
485
586
|
|
|
587
|
+
GGML_API struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name);
|
|
588
|
+
|
|
486
589
|
GGML_API struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor);
|
|
487
590
|
GGML_API struct ggml_tensor * ggml_set_i32 (struct ggml_tensor * tensor, int32_t value);
|
|
488
591
|
GGML_API struct ggml_tensor * ggml_set_f32 (struct ggml_tensor * tensor, float value);
|
|
@@ -496,8 +599,9 @@ extern "C" {
|
|
|
496
599
|
GGML_API void * ggml_get_data (const struct ggml_tensor * tensor);
|
|
497
600
|
GGML_API float * ggml_get_data_f32(const struct ggml_tensor * tensor);
|
|
498
601
|
|
|
499
|
-
GGML_API const char *
|
|
500
|
-
GGML_API
|
|
602
|
+
GGML_API const char * ggml_get_name(const struct ggml_tensor * tensor);
|
|
603
|
+
GGML_API struct ggml_tensor * ggml_set_name(struct ggml_tensor * tensor, const char * name);
|
|
604
|
+
GGML_API struct ggml_tensor * ggml_format_name(struct ggml_tensor * tensor, const char * fmt, ...);
|
|
501
605
|
|
|
502
606
|
//
|
|
503
607
|
// operations on tensors with backpropagation
|
|
@@ -522,6 +626,11 @@ extern "C" {
|
|
|
522
626
|
struct ggml_tensor * a,
|
|
523
627
|
struct ggml_tensor * b);
|
|
524
628
|
|
|
629
|
+
GGML_API struct ggml_tensor * ggml_add1_inplace(
|
|
630
|
+
struct ggml_context * ctx,
|
|
631
|
+
struct ggml_tensor * a,
|
|
632
|
+
struct ggml_tensor * b);
|
|
633
|
+
|
|
525
634
|
GGML_API struct ggml_tensor * ggml_acc(
|
|
526
635
|
struct ggml_context * ctx,
|
|
527
636
|
struct ggml_tensor * a,
|
|
@@ -545,24 +654,47 @@ extern "C" {
|
|
|
545
654
|
struct ggml_tensor * a,
|
|
546
655
|
struct ggml_tensor * b);
|
|
547
656
|
|
|
657
|
+
GGML_API struct ggml_tensor * ggml_sub_inplace(
|
|
658
|
+
struct ggml_context * ctx,
|
|
659
|
+
struct ggml_tensor * a,
|
|
660
|
+
struct ggml_tensor * b);
|
|
661
|
+
|
|
548
662
|
GGML_API struct ggml_tensor * ggml_mul(
|
|
549
663
|
struct ggml_context * ctx,
|
|
550
664
|
struct ggml_tensor * a,
|
|
551
665
|
struct ggml_tensor * b);
|
|
552
666
|
|
|
667
|
+
GGML_API struct ggml_tensor * ggml_mul_inplace(
|
|
668
|
+
struct ggml_context * ctx,
|
|
669
|
+
struct ggml_tensor * a,
|
|
670
|
+
struct ggml_tensor * b);
|
|
671
|
+
|
|
553
672
|
GGML_API struct ggml_tensor * ggml_div(
|
|
554
673
|
struct ggml_context * ctx,
|
|
555
674
|
struct ggml_tensor * a,
|
|
556
675
|
struct ggml_tensor * b);
|
|
557
676
|
|
|
677
|
+
GGML_API struct ggml_tensor * ggml_div_inplace(
|
|
678
|
+
struct ggml_context * ctx,
|
|
679
|
+
struct ggml_tensor * a,
|
|
680
|
+
struct ggml_tensor * b);
|
|
681
|
+
|
|
558
682
|
GGML_API struct ggml_tensor * ggml_sqr(
|
|
559
683
|
struct ggml_context * ctx,
|
|
560
684
|
struct ggml_tensor * a);
|
|
561
685
|
|
|
686
|
+
GGML_API struct ggml_tensor * ggml_sqr_inplace(
|
|
687
|
+
struct ggml_context * ctx,
|
|
688
|
+
struct ggml_tensor * a);
|
|
689
|
+
|
|
562
690
|
GGML_API struct ggml_tensor * ggml_sqrt(
|
|
563
691
|
struct ggml_context * ctx,
|
|
564
692
|
struct ggml_tensor * a);
|
|
565
693
|
|
|
694
|
+
GGML_API struct ggml_tensor * ggml_sqrt_inplace(
|
|
695
|
+
struct ggml_context * ctx,
|
|
696
|
+
struct ggml_tensor * a);
|
|
697
|
+
|
|
566
698
|
GGML_API struct ggml_tensor * ggml_log(
|
|
567
699
|
struct ggml_context * ctx,
|
|
568
700
|
struct ggml_tensor * a);
|
|
@@ -586,6 +718,11 @@ extern "C" {
|
|
|
586
718
|
struct ggml_context * ctx,
|
|
587
719
|
struct ggml_tensor * a);
|
|
588
720
|
|
|
721
|
+
// argmax along rows
|
|
722
|
+
GGML_API struct ggml_tensor * ggml_argmax(
|
|
723
|
+
struct ggml_context * ctx,
|
|
724
|
+
struct ggml_tensor * a);
|
|
725
|
+
|
|
589
726
|
// if a is the same shape as b, and a is not parameter, return a
|
|
590
727
|
// otherwise, return a new tensor: repeat(a) to fit in b
|
|
591
728
|
GGML_API struct ggml_tensor * ggml_repeat(
|
|
@@ -593,35 +730,92 @@ extern "C" {
|
|
|
593
730
|
struct ggml_tensor * a,
|
|
594
731
|
struct ggml_tensor * b);
|
|
595
732
|
|
|
733
|
+
GGML_API struct ggml_tensor * ggml_repeat_back(
|
|
734
|
+
struct ggml_context * ctx,
|
|
735
|
+
struct ggml_tensor * a,
|
|
736
|
+
struct ggml_tensor * b);
|
|
737
|
+
|
|
596
738
|
GGML_API struct ggml_tensor * ggml_abs(
|
|
597
739
|
struct ggml_context * ctx,
|
|
598
740
|
struct ggml_tensor * a);
|
|
599
741
|
|
|
742
|
+
GGML_API struct ggml_tensor * ggml_abs_inplace(
|
|
743
|
+
struct ggml_context * ctx,
|
|
744
|
+
struct ggml_tensor * a);
|
|
745
|
+
|
|
600
746
|
GGML_API struct ggml_tensor * ggml_sgn(
|
|
601
747
|
struct ggml_context * ctx,
|
|
602
748
|
struct ggml_tensor * a);
|
|
603
749
|
|
|
750
|
+
GGML_API struct ggml_tensor * ggml_sgn_inplace(
|
|
751
|
+
struct ggml_context * ctx,
|
|
752
|
+
struct ggml_tensor * a);
|
|
753
|
+
|
|
604
754
|
GGML_API struct ggml_tensor * ggml_neg(
|
|
605
755
|
struct ggml_context * ctx,
|
|
606
756
|
struct ggml_tensor * a);
|
|
607
757
|
|
|
758
|
+
GGML_API struct ggml_tensor * ggml_neg_inplace(
|
|
759
|
+
struct ggml_context * ctx,
|
|
760
|
+
struct ggml_tensor * a);
|
|
761
|
+
|
|
608
762
|
GGML_API struct ggml_tensor * ggml_step(
|
|
609
763
|
struct ggml_context * ctx,
|
|
610
764
|
struct ggml_tensor * a);
|
|
611
765
|
|
|
766
|
+
GGML_API struct ggml_tensor * ggml_step_inplace(
|
|
767
|
+
struct ggml_context * ctx,
|
|
768
|
+
struct ggml_tensor * a);
|
|
769
|
+
|
|
770
|
+
GGML_API struct ggml_tensor * ggml_tanh(
|
|
771
|
+
struct ggml_context * ctx,
|
|
772
|
+
struct ggml_tensor * a);
|
|
773
|
+
|
|
774
|
+
GGML_API struct ggml_tensor * ggml_tanh_inplace(
|
|
775
|
+
struct ggml_context * ctx,
|
|
776
|
+
struct ggml_tensor * a);
|
|
777
|
+
|
|
778
|
+
GGML_API struct ggml_tensor * ggml_elu(
|
|
779
|
+
struct ggml_context * ctx,
|
|
780
|
+
struct ggml_tensor * a);
|
|
781
|
+
|
|
782
|
+
GGML_API struct ggml_tensor * ggml_elu_inplace(
|
|
783
|
+
struct ggml_context * ctx,
|
|
784
|
+
struct ggml_tensor * a);
|
|
785
|
+
|
|
612
786
|
GGML_API struct ggml_tensor * ggml_relu(
|
|
613
787
|
struct ggml_context * ctx,
|
|
614
788
|
struct ggml_tensor * a);
|
|
615
789
|
|
|
790
|
+
GGML_API struct ggml_tensor * ggml_relu_inplace(
|
|
791
|
+
struct ggml_context * ctx,
|
|
792
|
+
struct ggml_tensor * a);
|
|
793
|
+
|
|
616
794
|
// TODO: double-check this computation is correct
|
|
617
795
|
GGML_API struct ggml_tensor * ggml_gelu(
|
|
618
796
|
struct ggml_context * ctx,
|
|
619
797
|
struct ggml_tensor * a);
|
|
620
798
|
|
|
799
|
+
GGML_API struct ggml_tensor * ggml_gelu_inplace(
|
|
800
|
+
struct ggml_context * ctx,
|
|
801
|
+
struct ggml_tensor * a);
|
|
802
|
+
|
|
803
|
+
GGML_API struct ggml_tensor * ggml_gelu_quick(
|
|
804
|
+
struct ggml_context * ctx,
|
|
805
|
+
struct ggml_tensor * a);
|
|
806
|
+
|
|
807
|
+
GGML_API struct ggml_tensor * ggml_gelu_quick_inplace(
|
|
808
|
+
struct ggml_context * ctx,
|
|
809
|
+
struct ggml_tensor * a);
|
|
810
|
+
|
|
621
811
|
GGML_API struct ggml_tensor * ggml_silu(
|
|
622
812
|
struct ggml_context * ctx,
|
|
623
813
|
struct ggml_tensor * a);
|
|
624
814
|
|
|
815
|
+
GGML_API struct ggml_tensor * ggml_silu_inplace(
|
|
816
|
+
struct ggml_context * ctx,
|
|
817
|
+
struct ggml_tensor * a);
|
|
818
|
+
|
|
625
819
|
// a - x
|
|
626
820
|
// b - dy
|
|
627
821
|
GGML_API struct ggml_tensor * ggml_silu_back(
|
|
@@ -635,10 +829,18 @@ extern "C" {
|
|
|
635
829
|
struct ggml_context * ctx,
|
|
636
830
|
struct ggml_tensor * a);
|
|
637
831
|
|
|
832
|
+
GGML_API struct ggml_tensor * ggml_norm_inplace(
|
|
833
|
+
struct ggml_context * ctx,
|
|
834
|
+
struct ggml_tensor * a);
|
|
835
|
+
|
|
638
836
|
GGML_API struct ggml_tensor * ggml_rms_norm(
|
|
639
837
|
struct ggml_context * ctx,
|
|
640
838
|
struct ggml_tensor * a);
|
|
641
839
|
|
|
840
|
+
GGML_API struct ggml_tensor * ggml_rms_norm_inplace(
|
|
841
|
+
struct ggml_context * ctx,
|
|
842
|
+
struct ggml_tensor * a);
|
|
843
|
+
|
|
642
844
|
// a - x
|
|
643
845
|
// b - dy
|
|
644
846
|
GGML_API struct ggml_tensor * ggml_rms_norm_back(
|
|
@@ -646,14 +848,22 @@ extern "C" {
|
|
|
646
848
|
struct ggml_tensor * a,
|
|
647
849
|
struct ggml_tensor * b);
|
|
648
850
|
|
|
649
|
-
// A:
|
|
650
|
-
// B:
|
|
851
|
+
// A: n columns, m rows
|
|
852
|
+
// B: n columns, p rows (i.e. we transpose it internally)
|
|
651
853
|
// result is m columns, p rows
|
|
652
854
|
GGML_API struct ggml_tensor * ggml_mul_mat(
|
|
653
855
|
struct ggml_context * ctx,
|
|
654
856
|
struct ggml_tensor * a,
|
|
655
857
|
struct ggml_tensor * b);
|
|
656
858
|
|
|
859
|
+
// A: m columns, n rows,
|
|
860
|
+
// B: p columns, n rows,
|
|
861
|
+
// result is m columns, p rows
|
|
862
|
+
GGML_API struct ggml_tensor * ggml_out_prod(
|
|
863
|
+
struct ggml_context * ctx,
|
|
864
|
+
struct ggml_tensor * a,
|
|
865
|
+
struct ggml_tensor * b);
|
|
866
|
+
|
|
657
867
|
//
|
|
658
868
|
// operations on tensors without backpropagation
|
|
659
869
|
//
|
|
@@ -864,16 +1074,29 @@ extern "C" {
|
|
|
864
1074
|
struct ggml_context * ctx,
|
|
865
1075
|
struct ggml_tensor * a);
|
|
866
1076
|
|
|
1077
|
+
GGML_API struct ggml_tensor * ggml_soft_max_back(
|
|
1078
|
+
struct ggml_context * ctx,
|
|
1079
|
+
struct ggml_tensor * a,
|
|
1080
|
+
struct ggml_tensor * b);
|
|
1081
|
+
|
|
1082
|
+
// in-place, returns view(a)
|
|
1083
|
+
GGML_API struct ggml_tensor * ggml_soft_max_back_inplace(
|
|
1084
|
+
struct ggml_context * ctx,
|
|
1085
|
+
struct ggml_tensor * a,
|
|
1086
|
+
struct ggml_tensor * b);
|
|
1087
|
+
|
|
867
1088
|
// rotary position embedding
|
|
868
1089
|
// if mode & 1 == 1, skip n_past elements
|
|
869
1090
|
// if mode & 2 == 1, GPT-NeoX style
|
|
1091
|
+
// if mode & 4 == 1, ChatGLM style
|
|
870
1092
|
// TODO: avoid creating a new tensor every time
|
|
871
1093
|
GGML_API struct ggml_tensor * ggml_rope(
|
|
872
1094
|
struct ggml_context * ctx,
|
|
873
1095
|
struct ggml_tensor * a,
|
|
874
1096
|
int n_past,
|
|
875
1097
|
int n_dims,
|
|
876
|
-
int mode
|
|
1098
|
+
int mode,
|
|
1099
|
+
int n_ctx);
|
|
877
1100
|
|
|
878
1101
|
// in-place, returns view(a)
|
|
879
1102
|
GGML_API struct ggml_tensor * ggml_rope_inplace(
|
|
@@ -881,7 +1104,8 @@ extern "C" {
|
|
|
881
1104
|
struct ggml_tensor * a,
|
|
882
1105
|
int n_past,
|
|
883
1106
|
int n_dims,
|
|
884
|
-
int mode
|
|
1107
|
+
int mode,
|
|
1108
|
+
int n_ctx);
|
|
885
1109
|
|
|
886
1110
|
// rotary position embedding backward, i.e compute dx from dy
|
|
887
1111
|
// a - dy
|
|
@@ -909,19 +1133,33 @@ extern "C" {
|
|
|
909
1133
|
float min,
|
|
910
1134
|
float max);
|
|
911
1135
|
|
|
912
|
-
|
|
913
|
-
// TODO: we don't support extra parameters for now
|
|
914
|
-
// that's why we are hard-coding the stride, padding, and dilation
|
|
915
|
-
// not great ..
|
|
916
|
-
GGML_API struct ggml_tensor * ggml_conv_1d_1s(
|
|
1136
|
+
GGML_API struct ggml_tensor * ggml_conv_1d(
|
|
917
1137
|
struct ggml_context * ctx,
|
|
918
1138
|
struct ggml_tensor * a,
|
|
919
|
-
struct ggml_tensor * b
|
|
1139
|
+
struct ggml_tensor * b,
|
|
1140
|
+
int s0, // stride
|
|
1141
|
+
int p0, // padding
|
|
1142
|
+
int d0); // dilation
|
|
920
1143
|
|
|
921
|
-
GGML_API struct ggml_tensor *
|
|
1144
|
+
GGML_API struct ggml_tensor * ggml_conv_2d(
|
|
922
1145
|
struct ggml_context * ctx,
|
|
923
1146
|
struct ggml_tensor * a,
|
|
924
|
-
struct ggml_tensor * b
|
|
1147
|
+
struct ggml_tensor * b,
|
|
1148
|
+
int s0,
|
|
1149
|
+
int s1,
|
|
1150
|
+
int p0,
|
|
1151
|
+
int p1,
|
|
1152
|
+
int d0,
|
|
1153
|
+
int d1);
|
|
1154
|
+
|
|
1155
|
+
// conv_1d with padding = half
|
|
1156
|
+
// alias for ggml_conv_1d(a, b, s, a->ne[0]/2, d)
|
|
1157
|
+
GGML_API struct ggml_tensor* ggml_conv_1d_ph(
|
|
1158
|
+
struct ggml_context * ctx,
|
|
1159
|
+
struct ggml_tensor * a,
|
|
1160
|
+
struct ggml_tensor * b,
|
|
1161
|
+
int s,
|
|
1162
|
+
int d);
|
|
925
1163
|
|
|
926
1164
|
GGML_API struct ggml_tensor * ggml_flash_attn(
|
|
927
1165
|
struct ggml_context * ctx,
|
|
@@ -930,6 +1168,14 @@ extern "C" {
|
|
|
930
1168
|
struct ggml_tensor * v,
|
|
931
1169
|
bool masked);
|
|
932
1170
|
|
|
1171
|
+
GGML_API struct ggml_tensor * ggml_flash_attn_back(
|
|
1172
|
+
struct ggml_context * ctx,
|
|
1173
|
+
struct ggml_tensor * q,
|
|
1174
|
+
struct ggml_tensor * k,
|
|
1175
|
+
struct ggml_tensor * v,
|
|
1176
|
+
struct ggml_tensor * d,
|
|
1177
|
+
bool masked);
|
|
1178
|
+
|
|
933
1179
|
GGML_API struct ggml_tensor * ggml_flash_ff(
|
|
934
1180
|
struct ggml_context * ctx,
|
|
935
1181
|
struct ggml_tensor * a,
|
|
@@ -938,21 +1184,106 @@ extern "C" {
|
|
|
938
1184
|
struct ggml_tensor * c0,
|
|
939
1185
|
struct ggml_tensor * c1);
|
|
940
1186
|
|
|
941
|
-
//
|
|
942
|
-
|
|
1187
|
+
// partition into non-overlapping windows with padding if needed
|
|
1188
|
+
// example:
|
|
1189
|
+
// a: 768 64 64 1
|
|
1190
|
+
// w: 14
|
|
1191
|
+
// res: 768 14 14 25
|
|
1192
|
+
// used in sam
|
|
1193
|
+
GGML_API struct ggml_tensor * ggml_win_part(
|
|
1194
|
+
struct ggml_context * ctx,
|
|
1195
|
+
struct ggml_tensor * a,
|
|
1196
|
+
int w);
|
|
1197
|
+
|
|
1198
|
+
// reverse of ggml_win_part
|
|
1199
|
+
// used in sam
|
|
1200
|
+
GGML_API struct ggml_tensor * ggml_win_unpart(
|
|
1201
|
+
struct ggml_context * ctx,
|
|
1202
|
+
struct ggml_tensor * a,
|
|
1203
|
+
int w0,
|
|
1204
|
+
int h0,
|
|
1205
|
+
int w);
|
|
1206
|
+
|
|
1207
|
+
// custom operators
|
|
1208
|
+
|
|
1209
|
+
typedef void (*ggml_unary_op_f32_t) (const int, float *, const float *);
|
|
943
1210
|
typedef void (*ggml_binary_op_f32_t)(const int, float *, const float *, const float *);
|
|
944
1211
|
|
|
1212
|
+
typedef void (*ggml_custom1_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *);
|
|
1213
|
+
typedef void (*ggml_custom2_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
|
|
1214
|
+
typedef void (*ggml_custom3_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
|
|
1215
|
+
|
|
945
1216
|
GGML_API struct ggml_tensor * ggml_map_unary_f32(
|
|
946
1217
|
struct ggml_context * ctx,
|
|
947
1218
|
struct ggml_tensor * a,
|
|
948
1219
|
ggml_unary_op_f32_t fun);
|
|
949
1220
|
|
|
1221
|
+
GGML_API struct ggml_tensor * ggml_map_unary_inplace_f32(
|
|
1222
|
+
struct ggml_context * ctx,
|
|
1223
|
+
struct ggml_tensor * a,
|
|
1224
|
+
ggml_unary_op_f32_t fun);
|
|
1225
|
+
|
|
950
1226
|
GGML_API struct ggml_tensor * ggml_map_binary_f32(
|
|
951
1227
|
struct ggml_context * ctx,
|
|
952
1228
|
struct ggml_tensor * a,
|
|
953
1229
|
struct ggml_tensor * b,
|
|
954
1230
|
ggml_binary_op_f32_t fun);
|
|
955
1231
|
|
|
1232
|
+
GGML_API struct ggml_tensor * ggml_map_binary_inplace_f32(
|
|
1233
|
+
struct ggml_context * ctx,
|
|
1234
|
+
struct ggml_tensor * a,
|
|
1235
|
+
struct ggml_tensor * b,
|
|
1236
|
+
ggml_binary_op_f32_t fun);
|
|
1237
|
+
|
|
1238
|
+
GGML_API struct ggml_tensor * ggml_map_custom1_f32(
|
|
1239
|
+
struct ggml_context * ctx,
|
|
1240
|
+
struct ggml_tensor * a,
|
|
1241
|
+
ggml_custom1_op_f32_t fun);
|
|
1242
|
+
|
|
1243
|
+
GGML_API struct ggml_tensor * ggml_map_custom1_inplace_f32(
|
|
1244
|
+
struct ggml_context * ctx,
|
|
1245
|
+
struct ggml_tensor * a,
|
|
1246
|
+
ggml_custom1_op_f32_t fun);
|
|
1247
|
+
|
|
1248
|
+
GGML_API struct ggml_tensor * ggml_map_custom2_f32(
|
|
1249
|
+
struct ggml_context * ctx,
|
|
1250
|
+
struct ggml_tensor * a,
|
|
1251
|
+
struct ggml_tensor * b,
|
|
1252
|
+
ggml_custom2_op_f32_t fun);
|
|
1253
|
+
|
|
1254
|
+
GGML_API struct ggml_tensor * ggml_map_custom2_inplace_f32(
|
|
1255
|
+
struct ggml_context * ctx,
|
|
1256
|
+
struct ggml_tensor * a,
|
|
1257
|
+
struct ggml_tensor * b,
|
|
1258
|
+
ggml_custom2_op_f32_t fun);
|
|
1259
|
+
|
|
1260
|
+
GGML_API struct ggml_tensor * ggml_map_custom3_f32(
|
|
1261
|
+
struct ggml_context * ctx,
|
|
1262
|
+
struct ggml_tensor * a,
|
|
1263
|
+
struct ggml_tensor * b,
|
|
1264
|
+
struct ggml_tensor * c,
|
|
1265
|
+
ggml_custom3_op_f32_t fun);
|
|
1266
|
+
|
|
1267
|
+
GGML_API struct ggml_tensor * ggml_map_custom3_inplace_f32(
|
|
1268
|
+
struct ggml_context * ctx,
|
|
1269
|
+
struct ggml_tensor * a,
|
|
1270
|
+
struct ggml_tensor * b,
|
|
1271
|
+
struct ggml_tensor * c,
|
|
1272
|
+
ggml_custom3_op_f32_t fun);
|
|
1273
|
+
|
|
1274
|
+
// loss function
|
|
1275
|
+
|
|
1276
|
+
GGML_API struct ggml_tensor * ggml_cross_entropy_loss(
|
|
1277
|
+
struct ggml_context * ctx,
|
|
1278
|
+
struct ggml_tensor * a,
|
|
1279
|
+
struct ggml_tensor * b);
|
|
1280
|
+
|
|
1281
|
+
GGML_API struct ggml_tensor * ggml_cross_entropy_loss_back(
|
|
1282
|
+
struct ggml_context * ctx,
|
|
1283
|
+
struct ggml_tensor * a,
|
|
1284
|
+
struct ggml_tensor * b,
|
|
1285
|
+
struct ggml_tensor * c);
|
|
1286
|
+
|
|
956
1287
|
//
|
|
957
1288
|
// automatic differentiation
|
|
958
1289
|
//
|
|
@@ -969,6 +1300,11 @@ extern "C" {
|
|
|
969
1300
|
GGML_API void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph);
|
|
970
1301
|
GGML_API void ggml_graph_reset (struct ggml_cgraph * cgraph);
|
|
971
1302
|
|
|
1303
|
+
GGML_API struct ggml_tensor * ggml_graph_get_tensor(struct ggml_cgraph * cgraph, const char * name);
|
|
1304
|
+
|
|
1305
|
+
GGML_API void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname);
|
|
1306
|
+
GGML_API struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context ** ctx_data, struct ggml_context ** ctx_eval);
|
|
1307
|
+
|
|
972
1308
|
// print info and performance information for the graph
|
|
973
1309
|
GGML_API void ggml_graph_print(const struct ggml_cgraph * cgraph);
|
|
974
1310
|
|
|
@@ -1042,6 +1378,8 @@ extern "C" {
|
|
|
1042
1378
|
struct {
|
|
1043
1379
|
int n_iter;
|
|
1044
1380
|
|
|
1381
|
+
float sched; // schedule multiplier (fixed, decay or warmup)
|
|
1382
|
+
float decay; // weight decay for AdamW, use 0.0f to disable
|
|
1045
1383
|
float alpha; // learning rate
|
|
1046
1384
|
float beta1;
|
|
1047
1385
|
float beta2;
|
|
@@ -1066,6 +1404,49 @@ extern "C" {
|
|
|
1066
1404
|
} lbfgs;
|
|
1067
1405
|
};
|
|
1068
1406
|
|
|
1407
|
+
struct ggml_opt_context {
|
|
1408
|
+
struct ggml_context * ctx;
|
|
1409
|
+
struct ggml_opt_params params;
|
|
1410
|
+
|
|
1411
|
+
int iter;
|
|
1412
|
+
int64_t nx; // number of parameter elements
|
|
1413
|
+
|
|
1414
|
+
bool just_initialized;
|
|
1415
|
+
|
|
1416
|
+
struct {
|
|
1417
|
+
struct ggml_tensor * x; // view of the parameters
|
|
1418
|
+
struct ggml_tensor * g1; // gradient
|
|
1419
|
+
struct ggml_tensor * g2; // gradient squared
|
|
1420
|
+
struct ggml_tensor * m; // first moment
|
|
1421
|
+
struct ggml_tensor * v; // second moment
|
|
1422
|
+
struct ggml_tensor * mh; // first moment hat
|
|
1423
|
+
struct ggml_tensor * vh; // second moment hat
|
|
1424
|
+
struct ggml_tensor * pf; // past function values
|
|
1425
|
+
float fx_best;
|
|
1426
|
+
float fx_prev;
|
|
1427
|
+
int n_no_improvement;
|
|
1428
|
+
} adam;
|
|
1429
|
+
|
|
1430
|
+
struct {
|
|
1431
|
+
struct ggml_tensor * x; // current parameters
|
|
1432
|
+
struct ggml_tensor * xp; // previous parameters
|
|
1433
|
+
struct ggml_tensor * g; // current gradient
|
|
1434
|
+
struct ggml_tensor * gp; // previous gradient
|
|
1435
|
+
struct ggml_tensor * d; // search direction
|
|
1436
|
+
struct ggml_tensor * pf; // past function values
|
|
1437
|
+
struct ggml_tensor * lmal; // the L-BFGS memory alpha
|
|
1438
|
+
struct ggml_tensor * lmys; // the L-BFGS memory ys
|
|
1439
|
+
struct ggml_tensor * lms; // the L-BFGS memory s
|
|
1440
|
+
struct ggml_tensor * lmy; // the L-BFGS memory y
|
|
1441
|
+
float fx_best;
|
|
1442
|
+
float step;
|
|
1443
|
+
int j;
|
|
1444
|
+
int k;
|
|
1445
|
+
int end;
|
|
1446
|
+
int n_no_improvement;
|
|
1447
|
+
} lbfgs;
|
|
1448
|
+
};
|
|
1449
|
+
|
|
1069
1450
|
GGML_API struct ggml_opt_params ggml_opt_default_params(enum ggml_opt_type type);
|
|
1070
1451
|
|
|
1071
1452
|
// optimize the function defined by the tensor f
|
|
@@ -1074,6 +1455,27 @@ extern "C" {
|
|
|
1074
1455
|
struct ggml_opt_params params,
|
|
1075
1456
|
struct ggml_tensor * f);
|
|
1076
1457
|
|
|
1458
|
+
// initialize optimizer context
|
|
1459
|
+
GGML_API void ggml_opt_init(
|
|
1460
|
+
struct ggml_context * ctx,
|
|
1461
|
+
struct ggml_opt_context * opt,
|
|
1462
|
+
struct ggml_opt_params params,
|
|
1463
|
+
int64_t nx);
|
|
1464
|
+
|
|
1465
|
+
// continue optimizing the function defined by the tensor f
|
|
1466
|
+
GGML_API enum ggml_opt_result ggml_opt_resume(
|
|
1467
|
+
struct ggml_context * ctx,
|
|
1468
|
+
struct ggml_opt_context * opt,
|
|
1469
|
+
struct ggml_tensor * f);
|
|
1470
|
+
|
|
1471
|
+
// continue optimizing the function defined by the tensor f
|
|
1472
|
+
GGML_API enum ggml_opt_result ggml_opt_resume_g(
|
|
1473
|
+
struct ggml_context * ctx,
|
|
1474
|
+
struct ggml_opt_context * opt,
|
|
1475
|
+
struct ggml_tensor * f,
|
|
1476
|
+
struct ggml_cgraph * gf,
|
|
1477
|
+
struct ggml_cgraph * gb);
|
|
1478
|
+
|
|
1077
1479
|
//
|
|
1078
1480
|
// quantization
|
|
1079
1481
|
//
|