whisper.rn 0.3.0-rc.6 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/cpp/ggml.h CHANGED
@@ -198,8 +198,11 @@
198
198
  #define GGML_MAX_PARAMS 256
199
199
  #define GGML_MAX_CONTEXTS 64
200
200
  #define GGML_MAX_OPT 4
201
+ #define GGML_MAX_NAME 48
201
202
  #define GGML_DEFAULT_N_THREADS 4
202
203
 
204
+ #define GGML_UNUSED(x) (void)(x)
205
+
203
206
  #define GGML_ASSERT(x) \
204
207
  do { \
205
208
  if (!(x)) { \
@@ -208,6 +211,30 @@
208
211
  } \
209
212
  } while (0)
210
213
 
214
+ // used to copy the number of elements and stride in bytes of tensors into local variables.
215
+ // main purpose is to reduce code duplication and improve readability.
216
+ //
217
+ // example:
218
+ //
219
+ // GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne);
220
+ // GGML_TENSOR_LOCALS(size_t, nb1, src1, nb);
221
+ //
222
+ #define GGML_TENSOR_LOCALS_1(type, prefix, pointer, array) \
223
+ const type prefix##0 = (pointer)->array[0]; \
224
+ GGML_UNUSED(prefix##0);
225
+ #define GGML_TENSOR_LOCALS_2(type, prefix, pointer, array) \
226
+ GGML_TENSOR_LOCALS_1 (type, prefix, pointer, array) \
227
+ const type prefix##1 = (pointer)->array[1]; \
228
+ GGML_UNUSED(prefix##1);
229
+ #define GGML_TENSOR_LOCALS_3(type, prefix, pointer, array) \
230
+ GGML_TENSOR_LOCALS_2 (type, prefix, pointer, array) \
231
+ const type prefix##2 = (pointer)->array[2]; \
232
+ GGML_UNUSED(prefix##2);
233
+ #define GGML_TENSOR_LOCALS(type, prefix, pointer, array) \
234
+ GGML_TENSOR_LOCALS_3 (type, prefix, pointer, array) \
235
+ const type prefix##3 = (pointer)->array[3]; \
236
+ GGML_UNUSED(prefix##3);
237
+
211
238
  #ifdef __cplusplus
212
239
  extern "C" {
213
240
  #endif
@@ -240,6 +267,13 @@ extern "C" {
240
267
  GGML_TYPE_Q5_1 = 7,
241
268
  GGML_TYPE_Q8_0 = 8,
242
269
  GGML_TYPE_Q8_1 = 9,
270
+ // k-quantizations
271
+ GGML_TYPE_Q2_K = 10,
272
+ GGML_TYPE_Q3_K = 11,
273
+ GGML_TYPE_Q4_K = 12,
274
+ GGML_TYPE_Q5_K = 13,
275
+ GGML_TYPE_Q6_K = 14,
276
+ GGML_TYPE_Q8_K = 15,
243
277
  GGML_TYPE_I8,
244
278
  GGML_TYPE_I16,
245
279
  GGML_TYPE_I32,
@@ -248,7 +282,8 @@ extern "C" {
248
282
 
249
283
  enum ggml_backend {
250
284
  GGML_BACKEND_CPU = 0,
251
- GGML_BACKEND_CUDA = 1,
285
+ GGML_BACKEND_GPU = 10,
286
+ GGML_BACKEND_GPU_SPLIT = 20,
252
287
  };
253
288
 
254
289
  // model file types
@@ -262,6 +297,11 @@ extern "C" {
262
297
  GGML_FTYPE_MOSTLY_Q8_0 = 7, // except 1d tensors
263
298
  GGML_FTYPE_MOSTLY_Q5_0 = 8, // except 1d tensors
264
299
  GGML_FTYPE_MOSTLY_Q5_1 = 9, // except 1d tensors
300
+ GGML_FTYPE_MOSTLY_Q2_K = 10, // except 1d tensors
301
+ GGML_FTYPE_MOSTLY_Q3_K = 11, // except 1d tensors
302
+ GGML_FTYPE_MOSTLY_Q4_K = 12, // except 1d tensors
303
+ GGML_FTYPE_MOSTLY_Q5_K = 13, // except 1d tensors
304
+ GGML_FTYPE_MOSTLY_Q6_K = 14, // except 1d tensors
265
305
  };
266
306
 
267
307
  // available tensor operations:
@@ -281,13 +321,18 @@ extern "C" {
281
321
  GGML_OP_SUM,
282
322
  GGML_OP_SUM_ROWS,
283
323
  GGML_OP_MEAN,
324
+ GGML_OP_ARGMAX,
284
325
  GGML_OP_REPEAT,
326
+ GGML_OP_REPEAT_BACK,
285
327
  GGML_OP_ABS,
286
328
  GGML_OP_SGN,
287
329
  GGML_OP_NEG,
288
330
  GGML_OP_STEP,
331
+ GGML_OP_TANH,
332
+ GGML_OP_ELU,
289
333
  GGML_OP_RELU,
290
334
  GGML_OP_GELU,
335
+ GGML_OP_GELU_QUICK,
291
336
  GGML_OP_SILU,
292
337
  GGML_OP_SILU_BACK,
293
338
  GGML_OP_NORM, // normalize
@@ -295,6 +340,7 @@ extern "C" {
295
340
  GGML_OP_RMS_NORM_BACK,
296
341
 
297
342
  GGML_OP_MUL_MAT,
343
+ GGML_OP_OUT_PROD,
298
344
 
299
345
  GGML_OP_SCALE,
300
346
  GGML_OP_SET,
@@ -310,19 +356,30 @@ extern "C" {
310
356
  GGML_OP_DIAG_MASK_INF,
311
357
  GGML_OP_DIAG_MASK_ZERO,
312
358
  GGML_OP_SOFT_MAX,
359
+ GGML_OP_SOFT_MAX_BACK,
313
360
  GGML_OP_ROPE,
314
361
  GGML_OP_ROPE_BACK,
315
362
  GGML_OP_ALIBI,
316
363
  GGML_OP_CLAMP,
317
- GGML_OP_CONV_1D_1S,
318
- GGML_OP_CONV_1D_2S,
364
+ GGML_OP_CONV_1D,
365
+ GGML_OP_CONV_2D,
319
366
 
320
367
  GGML_OP_FLASH_ATTN,
321
368
  GGML_OP_FLASH_FF,
369
+ GGML_OP_FLASH_ATTN_BACK,
370
+ GGML_OP_WIN_PART,
371
+ GGML_OP_WIN_UNPART,
322
372
 
323
373
  GGML_OP_MAP_UNARY,
324
374
  GGML_OP_MAP_BINARY,
325
375
 
376
+ GGML_OP_MAP_CUSTOM1,
377
+ GGML_OP_MAP_CUSTOM2,
378
+ GGML_OP_MAP_CUSTOM3,
379
+
380
+ GGML_OP_CROSS_ENTROPY_LOSS,
381
+ GGML_OP_CROSS_ENTROPY_LOSS_BACK,
382
+
326
383
  GGML_OP_COUNT,
327
384
  };
328
385
 
@@ -371,11 +428,15 @@ extern "C" {
371
428
 
372
429
  void * data;
373
430
 
374
- char name[32];
431
+ char name[GGML_MAX_NAME];
432
+
433
+ void * extra; // extra things e.g. for ggml-cuda.cu
375
434
 
376
- char padding[16];
435
+ char padding[4];
377
436
  };
378
437
 
438
+ static const size_t GGML_TENSOR_SIZE = sizeof(struct ggml_tensor);
439
+
379
440
  // computation graph
380
441
  struct ggml_cgraph {
381
442
  int n_nodes;
@@ -409,6 +470,28 @@ extern "C" {
409
470
  bool no_alloc; // don't allocate memory for the tensor data
410
471
  };
411
472
 
473
+
474
+ // compute types
475
+
476
+ // NOTE: the INIT or FINALIZE pass is not scheduled unless explicitly enabled.
477
+ // This behavior was changed since https://github.com/ggerganov/llama.cpp/pull/1995.
478
+ enum ggml_task_type {
479
+ GGML_TASK_INIT = 0,
480
+ GGML_TASK_COMPUTE,
481
+ GGML_TASK_FINALIZE,
482
+ };
483
+
484
+ struct ggml_compute_params {
485
+ enum ggml_task_type type;
486
+
487
+ // ith = thread index, nth = number of threads
488
+ int ith, nth;
489
+
490
+ // work buffer for all threads
491
+ size_t wsize;
492
+ void * wdata;
493
+ };
494
+
412
495
  // misc
413
496
 
414
497
  GGML_API void ggml_time_init(void); // call this once at the beginning of the program
@@ -417,17 +500,23 @@ extern "C" {
417
500
  GGML_API int64_t ggml_cycles(void);
418
501
  GGML_API int64_t ggml_cycles_per_ms(void);
419
502
 
503
+ GGML_API void ggml_numa_init(void); // call once for better performance on NUMA systems
504
+ GGML_API bool ggml_is_numa(void); // true if init detected that system has >1 NUMA node
505
+
420
506
  GGML_API void ggml_print_object (const struct ggml_object * obj);
421
507
  GGML_API void ggml_print_objects(const struct ggml_context * ctx);
422
508
 
423
- GGML_API int64_t ggml_nelements(const struct ggml_tensor * tensor);
424
- GGML_API size_t ggml_nbytes (const struct ggml_tensor * tensor);
509
+ GGML_API int64_t ggml_nelements (const struct ggml_tensor * tensor);
510
+ GGML_API int64_t ggml_nrows (const struct ggml_tensor * tensor);
511
+ GGML_API size_t ggml_nbytes (const struct ggml_tensor * tensor);
512
+ GGML_API size_t ggml_nbytes_split(const struct ggml_tensor * tensor, int nrows_split);
425
513
 
426
514
  GGML_API int ggml_blck_size (enum ggml_type type);
427
515
  GGML_API size_t ggml_type_size (enum ggml_type type); // size in bytes for all elements in a block
428
516
  GGML_API float ggml_type_sizef(enum ggml_type type); // ggml_type_size()/ggml_blck_size() as float
429
517
 
430
518
  GGML_API const char * ggml_type_name(enum ggml_type type);
519
+ GGML_API const char * ggml_op_name (enum ggml_op op);
431
520
 
432
521
  GGML_API size_t ggml_element_size(const struct ggml_tensor * tensor);
433
522
 
@@ -436,14 +525,26 @@ extern "C" {
436
525
  // TODO: temporary until model loading of ggml examples is refactored
437
526
  GGML_API enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype);
438
527
 
528
+ GGML_API bool ggml_is_transposed(const struct ggml_tensor * tensor);
529
+ GGML_API bool ggml_is_contiguous(const struct ggml_tensor * tensor);
530
+ GGML_API bool ggml_is_permuted (const struct ggml_tensor * tensor);
531
+
532
+ // use this to compute the memory overhead of a tensor
533
+ GGML_API size_t ggml_tensor_overhead(void);
534
+
439
535
  // main
440
536
 
441
537
  GGML_API struct ggml_context * ggml_init(struct ggml_init_params params);
442
- GGML_API void ggml_free(struct ggml_context * ctx);
538
+ GGML_API void ggml_free(struct ggml_context * ctx);
443
539
 
444
540
  GGML_API size_t ggml_used_mem(const struct ggml_context * ctx);
445
541
 
446
- GGML_API size_t ggml_set_scratch(struct ggml_context * ctx, struct ggml_scratch scratch);
542
+ GGML_API size_t ggml_set_scratch (struct ggml_context * ctx, struct ggml_scratch scratch);
543
+ GGML_API void ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc);
544
+
545
+ GGML_API void * ggml_get_mem_buffer (const struct ggml_context * ctx);
546
+ GGML_API size_t ggml_get_mem_size (const struct ggml_context * ctx);
547
+ GGML_API size_t ggml_get_max_tensor_size(const struct ggml_context * ctx);
447
548
 
448
549
  GGML_API struct ggml_tensor * ggml_new_tensor(
449
550
  struct ggml_context * ctx,
@@ -483,6 +584,8 @@ extern "C" {
483
584
  GGML_API struct ggml_tensor * ggml_dup_tensor (struct ggml_context * ctx, const struct ggml_tensor * src);
484
585
  GGML_API struct ggml_tensor * ggml_view_tensor(struct ggml_context * ctx, const struct ggml_tensor * src);
485
586
 
587
+ GGML_API struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name);
588
+
486
589
  GGML_API struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor);
487
590
  GGML_API struct ggml_tensor * ggml_set_i32 (struct ggml_tensor * tensor, int32_t value);
488
591
  GGML_API struct ggml_tensor * ggml_set_f32 (struct ggml_tensor * tensor, float value);
@@ -496,8 +599,9 @@ extern "C" {
496
599
  GGML_API void * ggml_get_data (const struct ggml_tensor * tensor);
497
600
  GGML_API float * ggml_get_data_f32(const struct ggml_tensor * tensor);
498
601
 
499
- GGML_API const char * ggml_get_name(const struct ggml_tensor * tensor);
500
- GGML_API void ggml_set_name(struct ggml_tensor * tensor, const char * name);
602
+ GGML_API const char * ggml_get_name(const struct ggml_tensor * tensor);
603
+ GGML_API struct ggml_tensor * ggml_set_name(struct ggml_tensor * tensor, const char * name);
604
+ GGML_API struct ggml_tensor * ggml_format_name(struct ggml_tensor * tensor, const char * fmt, ...);
501
605
 
502
606
  //
503
607
  // operations on tensors with backpropagation
@@ -522,6 +626,11 @@ extern "C" {
522
626
  struct ggml_tensor * a,
523
627
  struct ggml_tensor * b);
524
628
 
629
+ GGML_API struct ggml_tensor * ggml_add1_inplace(
630
+ struct ggml_context * ctx,
631
+ struct ggml_tensor * a,
632
+ struct ggml_tensor * b);
633
+
525
634
  GGML_API struct ggml_tensor * ggml_acc(
526
635
  struct ggml_context * ctx,
527
636
  struct ggml_tensor * a,
@@ -545,24 +654,47 @@ extern "C" {
545
654
  struct ggml_tensor * a,
546
655
  struct ggml_tensor * b);
547
656
 
657
+ GGML_API struct ggml_tensor * ggml_sub_inplace(
658
+ struct ggml_context * ctx,
659
+ struct ggml_tensor * a,
660
+ struct ggml_tensor * b);
661
+
548
662
  GGML_API struct ggml_tensor * ggml_mul(
549
663
  struct ggml_context * ctx,
550
664
  struct ggml_tensor * a,
551
665
  struct ggml_tensor * b);
552
666
 
667
+ GGML_API struct ggml_tensor * ggml_mul_inplace(
668
+ struct ggml_context * ctx,
669
+ struct ggml_tensor * a,
670
+ struct ggml_tensor * b);
671
+
553
672
  GGML_API struct ggml_tensor * ggml_div(
554
673
  struct ggml_context * ctx,
555
674
  struct ggml_tensor * a,
556
675
  struct ggml_tensor * b);
557
676
 
677
+ GGML_API struct ggml_tensor * ggml_div_inplace(
678
+ struct ggml_context * ctx,
679
+ struct ggml_tensor * a,
680
+ struct ggml_tensor * b);
681
+
558
682
  GGML_API struct ggml_tensor * ggml_sqr(
559
683
  struct ggml_context * ctx,
560
684
  struct ggml_tensor * a);
561
685
 
686
+ GGML_API struct ggml_tensor * ggml_sqr_inplace(
687
+ struct ggml_context * ctx,
688
+ struct ggml_tensor * a);
689
+
562
690
  GGML_API struct ggml_tensor * ggml_sqrt(
563
691
  struct ggml_context * ctx,
564
692
  struct ggml_tensor * a);
565
693
 
694
+ GGML_API struct ggml_tensor * ggml_sqrt_inplace(
695
+ struct ggml_context * ctx,
696
+ struct ggml_tensor * a);
697
+
566
698
  GGML_API struct ggml_tensor * ggml_log(
567
699
  struct ggml_context * ctx,
568
700
  struct ggml_tensor * a);
@@ -586,6 +718,11 @@ extern "C" {
586
718
  struct ggml_context * ctx,
587
719
  struct ggml_tensor * a);
588
720
 
721
+ // argmax along rows
722
+ GGML_API struct ggml_tensor * ggml_argmax(
723
+ struct ggml_context * ctx,
724
+ struct ggml_tensor * a);
725
+
589
726
  // if a is the same shape as b, and a is not parameter, return a
590
727
  // otherwise, return a new tensor: repeat(a) to fit in b
591
728
  GGML_API struct ggml_tensor * ggml_repeat(
@@ -593,35 +730,92 @@ extern "C" {
593
730
  struct ggml_tensor * a,
594
731
  struct ggml_tensor * b);
595
732
 
733
+ GGML_API struct ggml_tensor * ggml_repeat_back(
734
+ struct ggml_context * ctx,
735
+ struct ggml_tensor * a,
736
+ struct ggml_tensor * b);
737
+
596
738
  GGML_API struct ggml_tensor * ggml_abs(
597
739
  struct ggml_context * ctx,
598
740
  struct ggml_tensor * a);
599
741
 
742
+ GGML_API struct ggml_tensor * ggml_abs_inplace(
743
+ struct ggml_context * ctx,
744
+ struct ggml_tensor * a);
745
+
600
746
  GGML_API struct ggml_tensor * ggml_sgn(
601
747
  struct ggml_context * ctx,
602
748
  struct ggml_tensor * a);
603
749
 
750
+ GGML_API struct ggml_tensor * ggml_sgn_inplace(
751
+ struct ggml_context * ctx,
752
+ struct ggml_tensor * a);
753
+
604
754
  GGML_API struct ggml_tensor * ggml_neg(
605
755
  struct ggml_context * ctx,
606
756
  struct ggml_tensor * a);
607
757
 
758
+ GGML_API struct ggml_tensor * ggml_neg_inplace(
759
+ struct ggml_context * ctx,
760
+ struct ggml_tensor * a);
761
+
608
762
  GGML_API struct ggml_tensor * ggml_step(
609
763
  struct ggml_context * ctx,
610
764
  struct ggml_tensor * a);
611
765
 
766
+ GGML_API struct ggml_tensor * ggml_step_inplace(
767
+ struct ggml_context * ctx,
768
+ struct ggml_tensor * a);
769
+
770
+ GGML_API struct ggml_tensor * ggml_tanh(
771
+ struct ggml_context * ctx,
772
+ struct ggml_tensor * a);
773
+
774
+ GGML_API struct ggml_tensor * ggml_tanh_inplace(
775
+ struct ggml_context * ctx,
776
+ struct ggml_tensor * a);
777
+
778
+ GGML_API struct ggml_tensor * ggml_elu(
779
+ struct ggml_context * ctx,
780
+ struct ggml_tensor * a);
781
+
782
+ GGML_API struct ggml_tensor * ggml_elu_inplace(
783
+ struct ggml_context * ctx,
784
+ struct ggml_tensor * a);
785
+
612
786
  GGML_API struct ggml_tensor * ggml_relu(
613
787
  struct ggml_context * ctx,
614
788
  struct ggml_tensor * a);
615
789
 
790
+ GGML_API struct ggml_tensor * ggml_relu_inplace(
791
+ struct ggml_context * ctx,
792
+ struct ggml_tensor * a);
793
+
616
794
  // TODO: double-check this computation is correct
617
795
  GGML_API struct ggml_tensor * ggml_gelu(
618
796
  struct ggml_context * ctx,
619
797
  struct ggml_tensor * a);
620
798
 
799
+ GGML_API struct ggml_tensor * ggml_gelu_inplace(
800
+ struct ggml_context * ctx,
801
+ struct ggml_tensor * a);
802
+
803
+ GGML_API struct ggml_tensor * ggml_gelu_quick(
804
+ struct ggml_context * ctx,
805
+ struct ggml_tensor * a);
806
+
807
+ GGML_API struct ggml_tensor * ggml_gelu_quick_inplace(
808
+ struct ggml_context * ctx,
809
+ struct ggml_tensor * a);
810
+
621
811
  GGML_API struct ggml_tensor * ggml_silu(
622
812
  struct ggml_context * ctx,
623
813
  struct ggml_tensor * a);
624
814
 
815
+ GGML_API struct ggml_tensor * ggml_silu_inplace(
816
+ struct ggml_context * ctx,
817
+ struct ggml_tensor * a);
818
+
625
819
  // a - x
626
820
  // b - dy
627
821
  GGML_API struct ggml_tensor * ggml_silu_back(
@@ -635,10 +829,18 @@ extern "C" {
635
829
  struct ggml_context * ctx,
636
830
  struct ggml_tensor * a);
637
831
 
832
+ GGML_API struct ggml_tensor * ggml_norm_inplace(
833
+ struct ggml_context * ctx,
834
+ struct ggml_tensor * a);
835
+
638
836
  GGML_API struct ggml_tensor * ggml_rms_norm(
639
837
  struct ggml_context * ctx,
640
838
  struct ggml_tensor * a);
641
839
 
840
+ GGML_API struct ggml_tensor * ggml_rms_norm_inplace(
841
+ struct ggml_context * ctx,
842
+ struct ggml_tensor * a);
843
+
642
844
  // a - x
643
845
  // b - dy
644
846
  GGML_API struct ggml_tensor * ggml_rms_norm_back(
@@ -646,14 +848,22 @@ extern "C" {
646
848
  struct ggml_tensor * a,
647
849
  struct ggml_tensor * b);
648
850
 
649
- // A: m rows, n columns
650
- // B: p rows, n columns (i.e. we transpose it internally)
851
+ // A: n columns, m rows
852
+ // B: n columns, p rows (i.e. we transpose it internally)
651
853
  // result is m columns, p rows
652
854
  GGML_API struct ggml_tensor * ggml_mul_mat(
653
855
  struct ggml_context * ctx,
654
856
  struct ggml_tensor * a,
655
857
  struct ggml_tensor * b);
656
858
 
859
+ // A: m columns, n rows,
860
+ // B: p columns, n rows,
861
+ // result is m columns, p rows
862
+ GGML_API struct ggml_tensor * ggml_out_prod(
863
+ struct ggml_context * ctx,
864
+ struct ggml_tensor * a,
865
+ struct ggml_tensor * b);
866
+
657
867
  //
658
868
  // operations on tensors without backpropagation
659
869
  //
@@ -864,16 +1074,29 @@ extern "C" {
864
1074
  struct ggml_context * ctx,
865
1075
  struct ggml_tensor * a);
866
1076
 
1077
+ GGML_API struct ggml_tensor * ggml_soft_max_back(
1078
+ struct ggml_context * ctx,
1079
+ struct ggml_tensor * a,
1080
+ struct ggml_tensor * b);
1081
+
1082
+ // in-place, returns view(a)
1083
+ GGML_API struct ggml_tensor * ggml_soft_max_back_inplace(
1084
+ struct ggml_context * ctx,
1085
+ struct ggml_tensor * a,
1086
+ struct ggml_tensor * b);
1087
+
867
1088
  // rotary position embedding
868
1089
  // if mode & 1 == 1, skip n_past elements
869
1090
  // if mode & 2 == 1, GPT-NeoX style
1091
+ // if mode & 4 == 1, ChatGLM style
870
1092
  // TODO: avoid creating a new tensor every time
871
1093
  GGML_API struct ggml_tensor * ggml_rope(
872
1094
  struct ggml_context * ctx,
873
1095
  struct ggml_tensor * a,
874
1096
  int n_past,
875
1097
  int n_dims,
876
- int mode);
1098
+ int mode,
1099
+ int n_ctx);
877
1100
 
878
1101
  // in-place, returns view(a)
879
1102
  GGML_API struct ggml_tensor * ggml_rope_inplace(
@@ -881,7 +1104,8 @@ extern "C" {
881
1104
  struct ggml_tensor * a,
882
1105
  int n_past,
883
1106
  int n_dims,
884
- int mode);
1107
+ int mode,
1108
+ int n_ctx);
885
1109
 
886
1110
  // rotary position embedding backward, i.e compute dx from dy
887
1111
  // a - dy
@@ -909,19 +1133,33 @@ extern "C" {
909
1133
  float min,
910
1134
  float max);
911
1135
 
912
- // padding = 1
913
- // TODO: we don't support extra parameters for now
914
- // that's why we are hard-coding the stride, padding, and dilation
915
- // not great ..
916
- GGML_API struct ggml_tensor * ggml_conv_1d_1s(
1136
+ GGML_API struct ggml_tensor * ggml_conv_1d(
917
1137
  struct ggml_context * ctx,
918
1138
  struct ggml_tensor * a,
919
- struct ggml_tensor * b);
1139
+ struct ggml_tensor * b,
1140
+ int s0, // stride
1141
+ int p0, // padding
1142
+ int d0); // dilation
920
1143
 
921
- GGML_API struct ggml_tensor * ggml_conv_1d_2s(
1144
+ GGML_API struct ggml_tensor * ggml_conv_2d(
922
1145
  struct ggml_context * ctx,
923
1146
  struct ggml_tensor * a,
924
- struct ggml_tensor * b);
1147
+ struct ggml_tensor * b,
1148
+ int s0,
1149
+ int s1,
1150
+ int p0,
1151
+ int p1,
1152
+ int d0,
1153
+ int d1);
1154
+
1155
+ // conv_1d with padding = half
1156
+ // alias for ggml_conv_1d(a, b, s, a->ne[0]/2, d)
1157
+ GGML_API struct ggml_tensor* ggml_conv_1d_ph(
1158
+ struct ggml_context * ctx,
1159
+ struct ggml_tensor * a,
1160
+ struct ggml_tensor * b,
1161
+ int s,
1162
+ int d);
925
1163
 
926
1164
  GGML_API struct ggml_tensor * ggml_flash_attn(
927
1165
  struct ggml_context * ctx,
@@ -930,6 +1168,14 @@ extern "C" {
930
1168
  struct ggml_tensor * v,
931
1169
  bool masked);
932
1170
 
1171
+ GGML_API struct ggml_tensor * ggml_flash_attn_back(
1172
+ struct ggml_context * ctx,
1173
+ struct ggml_tensor * q,
1174
+ struct ggml_tensor * k,
1175
+ struct ggml_tensor * v,
1176
+ struct ggml_tensor * d,
1177
+ bool masked);
1178
+
933
1179
  GGML_API struct ggml_tensor * ggml_flash_ff(
934
1180
  struct ggml_context * ctx,
935
1181
  struct ggml_tensor * a,
@@ -938,21 +1184,106 @@ extern "C" {
938
1184
  struct ggml_tensor * c0,
939
1185
  struct ggml_tensor * c1);
940
1186
 
941
- // Mapping operations
942
- typedef void (*ggml_unary_op_f32_t)(const int, float *, const float *);
1187
+ // partition into non-overlapping windows with padding if needed
1188
+ // example:
1189
+ // a: 768 64 64 1
1190
+ // w: 14
1191
+ // res: 768 14 14 25
1192
+ // used in sam
1193
+ GGML_API struct ggml_tensor * ggml_win_part(
1194
+ struct ggml_context * ctx,
1195
+ struct ggml_tensor * a,
1196
+ int w);
1197
+
1198
+ // reverse of ggml_win_part
1199
+ // used in sam
1200
+ GGML_API struct ggml_tensor * ggml_win_unpart(
1201
+ struct ggml_context * ctx,
1202
+ struct ggml_tensor * a,
1203
+ int w0,
1204
+ int h0,
1205
+ int w);
1206
+
1207
+ // custom operators
1208
+
1209
+ typedef void (*ggml_unary_op_f32_t) (const int, float *, const float *);
943
1210
  typedef void (*ggml_binary_op_f32_t)(const int, float *, const float *, const float *);
944
1211
 
1212
+ typedef void (*ggml_custom1_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *);
1213
+ typedef void (*ggml_custom2_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
1214
+ typedef void (*ggml_custom3_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
1215
+
945
1216
  GGML_API struct ggml_tensor * ggml_map_unary_f32(
946
1217
  struct ggml_context * ctx,
947
1218
  struct ggml_tensor * a,
948
1219
  ggml_unary_op_f32_t fun);
949
1220
 
1221
+ GGML_API struct ggml_tensor * ggml_map_unary_inplace_f32(
1222
+ struct ggml_context * ctx,
1223
+ struct ggml_tensor * a,
1224
+ ggml_unary_op_f32_t fun);
1225
+
950
1226
  GGML_API struct ggml_tensor * ggml_map_binary_f32(
951
1227
  struct ggml_context * ctx,
952
1228
  struct ggml_tensor * a,
953
1229
  struct ggml_tensor * b,
954
1230
  ggml_binary_op_f32_t fun);
955
1231
 
1232
+ GGML_API struct ggml_tensor * ggml_map_binary_inplace_f32(
1233
+ struct ggml_context * ctx,
1234
+ struct ggml_tensor * a,
1235
+ struct ggml_tensor * b,
1236
+ ggml_binary_op_f32_t fun);
1237
+
1238
+ GGML_API struct ggml_tensor * ggml_map_custom1_f32(
1239
+ struct ggml_context * ctx,
1240
+ struct ggml_tensor * a,
1241
+ ggml_custom1_op_f32_t fun);
1242
+
1243
+ GGML_API struct ggml_tensor * ggml_map_custom1_inplace_f32(
1244
+ struct ggml_context * ctx,
1245
+ struct ggml_tensor * a,
1246
+ ggml_custom1_op_f32_t fun);
1247
+
1248
+ GGML_API struct ggml_tensor * ggml_map_custom2_f32(
1249
+ struct ggml_context * ctx,
1250
+ struct ggml_tensor * a,
1251
+ struct ggml_tensor * b,
1252
+ ggml_custom2_op_f32_t fun);
1253
+
1254
+ GGML_API struct ggml_tensor * ggml_map_custom2_inplace_f32(
1255
+ struct ggml_context * ctx,
1256
+ struct ggml_tensor * a,
1257
+ struct ggml_tensor * b,
1258
+ ggml_custom2_op_f32_t fun);
1259
+
1260
+ GGML_API struct ggml_tensor * ggml_map_custom3_f32(
1261
+ struct ggml_context * ctx,
1262
+ struct ggml_tensor * a,
1263
+ struct ggml_tensor * b,
1264
+ struct ggml_tensor * c,
1265
+ ggml_custom3_op_f32_t fun);
1266
+
1267
+ GGML_API struct ggml_tensor * ggml_map_custom3_inplace_f32(
1268
+ struct ggml_context * ctx,
1269
+ struct ggml_tensor * a,
1270
+ struct ggml_tensor * b,
1271
+ struct ggml_tensor * c,
1272
+ ggml_custom3_op_f32_t fun);
1273
+
1274
+ // loss function
1275
+
1276
+ GGML_API struct ggml_tensor * ggml_cross_entropy_loss(
1277
+ struct ggml_context * ctx,
1278
+ struct ggml_tensor * a,
1279
+ struct ggml_tensor * b);
1280
+
1281
+ GGML_API struct ggml_tensor * ggml_cross_entropy_loss_back(
1282
+ struct ggml_context * ctx,
1283
+ struct ggml_tensor * a,
1284
+ struct ggml_tensor * b,
1285
+ struct ggml_tensor * c);
1286
+
956
1287
  //
957
1288
  // automatic differentiation
958
1289
  //
@@ -969,6 +1300,11 @@ extern "C" {
969
1300
  GGML_API void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph);
970
1301
  GGML_API void ggml_graph_reset (struct ggml_cgraph * cgraph);
971
1302
 
1303
+ GGML_API struct ggml_tensor * ggml_graph_get_tensor(struct ggml_cgraph * cgraph, const char * name);
1304
+
1305
+ GGML_API void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname);
1306
+ GGML_API struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context ** ctx_data, struct ggml_context ** ctx_eval);
1307
+
972
1308
  // print info and performance information for the graph
973
1309
  GGML_API void ggml_graph_print(const struct ggml_cgraph * cgraph);
974
1310
 
@@ -1042,6 +1378,8 @@ extern "C" {
1042
1378
  struct {
1043
1379
  int n_iter;
1044
1380
 
1381
+ float sched; // schedule multiplier (fixed, decay or warmup)
1382
+ float decay; // weight decay for AdamW, use 0.0f to disable
1045
1383
  float alpha; // learning rate
1046
1384
  float beta1;
1047
1385
  float beta2;
@@ -1066,6 +1404,49 @@ extern "C" {
1066
1404
  } lbfgs;
1067
1405
  };
1068
1406
 
1407
+ struct ggml_opt_context {
1408
+ struct ggml_context * ctx;
1409
+ struct ggml_opt_params params;
1410
+
1411
+ int iter;
1412
+ int64_t nx; // number of parameter elements
1413
+
1414
+ bool just_initialized;
1415
+
1416
+ struct {
1417
+ struct ggml_tensor * x; // view of the parameters
1418
+ struct ggml_tensor * g1; // gradient
1419
+ struct ggml_tensor * g2; // gradient squared
1420
+ struct ggml_tensor * m; // first moment
1421
+ struct ggml_tensor * v; // second moment
1422
+ struct ggml_tensor * mh; // first moment hat
1423
+ struct ggml_tensor * vh; // second moment hat
1424
+ struct ggml_tensor * pf; // past function values
1425
+ float fx_best;
1426
+ float fx_prev;
1427
+ int n_no_improvement;
1428
+ } adam;
1429
+
1430
+ struct {
1431
+ struct ggml_tensor * x; // current parameters
1432
+ struct ggml_tensor * xp; // previous parameters
1433
+ struct ggml_tensor * g; // current gradient
1434
+ struct ggml_tensor * gp; // previous gradient
1435
+ struct ggml_tensor * d; // search direction
1436
+ struct ggml_tensor * pf; // past function values
1437
+ struct ggml_tensor * lmal; // the L-BFGS memory alpha
1438
+ struct ggml_tensor * lmys; // the L-BFGS memory ys
1439
+ struct ggml_tensor * lms; // the L-BFGS memory s
1440
+ struct ggml_tensor * lmy; // the L-BFGS memory y
1441
+ float fx_best;
1442
+ float step;
1443
+ int j;
1444
+ int k;
1445
+ int end;
1446
+ int n_no_improvement;
1447
+ } lbfgs;
1448
+ };
1449
+
1069
1450
  GGML_API struct ggml_opt_params ggml_opt_default_params(enum ggml_opt_type type);
1070
1451
 
1071
1452
  // optimize the function defined by the tensor f
@@ -1074,6 +1455,27 @@ extern "C" {
1074
1455
  struct ggml_opt_params params,
1075
1456
  struct ggml_tensor * f);
1076
1457
 
1458
+ // initialize optimizer context
1459
+ GGML_API void ggml_opt_init(
1460
+ struct ggml_context * ctx,
1461
+ struct ggml_opt_context * opt,
1462
+ struct ggml_opt_params params,
1463
+ int64_t nx);
1464
+
1465
+ // continue optimizing the function defined by the tensor f
1466
+ GGML_API enum ggml_opt_result ggml_opt_resume(
1467
+ struct ggml_context * ctx,
1468
+ struct ggml_opt_context * opt,
1469
+ struct ggml_tensor * f);
1470
+
1471
+ // continue optimizing the function defined by the tensor f
1472
+ GGML_API enum ggml_opt_result ggml_opt_resume_g(
1473
+ struct ggml_context * ctx,
1474
+ struct ggml_opt_context * opt,
1475
+ struct ggml_tensor * f,
1476
+ struct ggml_cgraph * gf,
1477
+ struct ggml_cgraph * gb);
1478
+
1077
1479
  //
1078
1480
  // quantization
1079
1481
  //