llama_cpp 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -0
- data/examples/README.md +92 -0
- data/examples/chat.rb +195 -0
- data/examples/embedding.rb +37 -0
- data/ext/llama_cpp/llama_cpp.cpp +52 -0
- data/ext/llama_cpp/src/ggml-cuda.cu +1218 -411
- data/ext/llama_cpp/src/ggml-cuda.h +4 -1
- data/ext/llama_cpp/src/ggml-metal.h +5 -1
- data/ext/llama_cpp/src/ggml-metal.m +703 -514
- data/ext/llama_cpp/src/ggml-metal.metal +574 -122
- data/ext/llama_cpp/src/ggml-opencl.cpp +496 -36
- data/ext/llama_cpp/src/ggml-opencl.h +1 -2
- data/ext/llama_cpp/src/ggml.c +2715 -476
- data/ext/llama_cpp/src/ggml.h +266 -11
- data/ext/llama_cpp/src/llama.cpp +266 -135
- data/ext/llama_cpp/src/llama.h +19 -11
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +3 -0
- metadata +5 -2
data/ext/llama_cpp/src/ggml.h
CHANGED
@@ -296,12 +296,14 @@ extern "C" {
|
|
296
296
|
GGML_OP_SUM_ROWS,
|
297
297
|
GGML_OP_MEAN,
|
298
298
|
GGML_OP_REPEAT,
|
299
|
+
GGML_OP_REPEAT_BACK,
|
299
300
|
GGML_OP_ABS,
|
300
301
|
GGML_OP_SGN,
|
301
302
|
GGML_OP_NEG,
|
302
303
|
GGML_OP_STEP,
|
303
304
|
GGML_OP_RELU,
|
304
305
|
GGML_OP_GELU,
|
306
|
+
GGML_OP_GELU_QUICK,
|
305
307
|
GGML_OP_SILU,
|
306
308
|
GGML_OP_SILU_BACK,
|
307
309
|
GGML_OP_NORM, // normalize
|
@@ -309,6 +311,7 @@ extern "C" {
|
|
309
311
|
GGML_OP_RMS_NORM_BACK,
|
310
312
|
|
311
313
|
GGML_OP_MUL_MAT,
|
314
|
+
GGML_OP_OUT_PROD,
|
312
315
|
|
313
316
|
GGML_OP_SCALE,
|
314
317
|
GGML_OP_SET,
|
@@ -324,19 +327,27 @@ extern "C" {
|
|
324
327
|
GGML_OP_DIAG_MASK_INF,
|
325
328
|
GGML_OP_DIAG_MASK_ZERO,
|
326
329
|
GGML_OP_SOFT_MAX,
|
330
|
+
GGML_OP_SOFT_MAX_BACK,
|
327
331
|
GGML_OP_ROPE,
|
328
332
|
GGML_OP_ROPE_BACK,
|
329
333
|
GGML_OP_ALIBI,
|
330
334
|
GGML_OP_CLAMP,
|
331
|
-
|
332
|
-
|
335
|
+
GGML_OP_CONV_1D_S1_PH,
|
336
|
+
GGML_OP_CONV_1D_S2_PH,
|
337
|
+
GGML_OP_CONV_2D_SK_P0,
|
333
338
|
|
334
339
|
GGML_OP_FLASH_ATTN,
|
335
340
|
GGML_OP_FLASH_FF,
|
341
|
+
GGML_OP_FLASH_ATTN_BACK,
|
342
|
+
GGML_OP_WIN_PART,
|
343
|
+
GGML_OP_WIN_UNPART,
|
336
344
|
|
337
345
|
GGML_OP_MAP_UNARY,
|
338
346
|
GGML_OP_MAP_BINARY,
|
339
347
|
|
348
|
+
GGML_OP_CROSS_ENTROPY_LOSS,
|
349
|
+
GGML_OP_CROSS_ENTROPY_LOSS_BACK,
|
350
|
+
|
340
351
|
GGML_OP_COUNT,
|
341
352
|
};
|
342
353
|
|
@@ -478,6 +489,7 @@ extern "C" {
|
|
478
489
|
|
479
490
|
GGML_API bool ggml_is_transposed(const struct ggml_tensor * tensor);
|
480
491
|
GGML_API bool ggml_is_contiguous(const struct ggml_tensor * tensor);
|
492
|
+
GGML_API bool ggml_is_permuted (const struct ggml_tensor * tensor);
|
481
493
|
|
482
494
|
// use this to compute the memory overhead of a tensor
|
483
495
|
GGML_API size_t ggml_tensor_overhead(void);
|
@@ -492,8 +504,9 @@ extern "C" {
|
|
492
504
|
GGML_API size_t ggml_set_scratch (struct ggml_context * ctx, struct ggml_scratch scratch);
|
493
505
|
GGML_API void ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc);
|
494
506
|
|
495
|
-
GGML_API void * ggml_get_mem_buffer(struct ggml_context * ctx);
|
496
|
-
GGML_API size_t ggml_get_mem_size
|
507
|
+
GGML_API void * ggml_get_mem_buffer (const struct ggml_context * ctx);
|
508
|
+
GGML_API size_t ggml_get_mem_size (const struct ggml_context * ctx);
|
509
|
+
GGML_API size_t ggml_get_max_tensor_size(const struct ggml_context * ctx);
|
497
510
|
|
498
511
|
GGML_API struct ggml_tensor * ggml_new_tensor(
|
499
512
|
struct ggml_context * ctx,
|
@@ -548,8 +561,8 @@ extern "C" {
|
|
548
561
|
GGML_API void * ggml_get_data (const struct ggml_tensor * tensor);
|
549
562
|
GGML_API float * ggml_get_data_f32(const struct ggml_tensor * tensor);
|
550
563
|
|
551
|
-
GGML_API const char *
|
552
|
-
GGML_API
|
564
|
+
GGML_API const char * ggml_get_name(const struct ggml_tensor * tensor);
|
565
|
+
GGML_API struct ggml_tensor * ggml_set_name(struct ggml_tensor * tensor, const char * name);
|
553
566
|
|
554
567
|
//
|
555
568
|
// operations on tensors with backpropagation
|
@@ -574,6 +587,11 @@ extern "C" {
|
|
574
587
|
struct ggml_tensor * a,
|
575
588
|
struct ggml_tensor * b);
|
576
589
|
|
590
|
+
GGML_API struct ggml_tensor * ggml_add1_inplace(
|
591
|
+
struct ggml_context * ctx,
|
592
|
+
struct ggml_tensor * a,
|
593
|
+
struct ggml_tensor * b);
|
594
|
+
|
577
595
|
GGML_API struct ggml_tensor * ggml_acc(
|
578
596
|
struct ggml_context * ctx,
|
579
597
|
struct ggml_tensor * a,
|
@@ -597,24 +615,47 @@ extern "C" {
|
|
597
615
|
struct ggml_tensor * a,
|
598
616
|
struct ggml_tensor * b);
|
599
617
|
|
618
|
+
GGML_API struct ggml_tensor * ggml_sub_inplace(
|
619
|
+
struct ggml_context * ctx,
|
620
|
+
struct ggml_tensor * a,
|
621
|
+
struct ggml_tensor * b);
|
622
|
+
|
600
623
|
GGML_API struct ggml_tensor * ggml_mul(
|
601
624
|
struct ggml_context * ctx,
|
602
625
|
struct ggml_tensor * a,
|
603
626
|
struct ggml_tensor * b);
|
604
627
|
|
628
|
+
GGML_API struct ggml_tensor * ggml_mul_inplace(
|
629
|
+
struct ggml_context * ctx,
|
630
|
+
struct ggml_tensor * a,
|
631
|
+
struct ggml_tensor * b);
|
632
|
+
|
605
633
|
GGML_API struct ggml_tensor * ggml_div(
|
606
634
|
struct ggml_context * ctx,
|
607
635
|
struct ggml_tensor * a,
|
608
636
|
struct ggml_tensor * b);
|
609
637
|
|
638
|
+
GGML_API struct ggml_tensor * ggml_div_inplace(
|
639
|
+
struct ggml_context * ctx,
|
640
|
+
struct ggml_tensor * a,
|
641
|
+
struct ggml_tensor * b);
|
642
|
+
|
610
643
|
GGML_API struct ggml_tensor * ggml_sqr(
|
611
644
|
struct ggml_context * ctx,
|
612
645
|
struct ggml_tensor * a);
|
613
646
|
|
647
|
+
GGML_API struct ggml_tensor * ggml_sqr_inplace(
|
648
|
+
struct ggml_context * ctx,
|
649
|
+
struct ggml_tensor * a);
|
650
|
+
|
614
651
|
GGML_API struct ggml_tensor * ggml_sqrt(
|
615
652
|
struct ggml_context * ctx,
|
616
653
|
struct ggml_tensor * a);
|
617
654
|
|
655
|
+
GGML_API struct ggml_tensor * ggml_sqrt_inplace(
|
656
|
+
struct ggml_context * ctx,
|
657
|
+
struct ggml_tensor * a);
|
658
|
+
|
618
659
|
GGML_API struct ggml_tensor * ggml_log(
|
619
660
|
struct ggml_context * ctx,
|
620
661
|
struct ggml_tensor * a);
|
@@ -645,35 +686,76 @@ extern "C" {
|
|
645
686
|
struct ggml_tensor * a,
|
646
687
|
struct ggml_tensor * b);
|
647
688
|
|
689
|
+
GGML_API struct ggml_tensor * ggml_repeat_back(
|
690
|
+
struct ggml_context * ctx,
|
691
|
+
struct ggml_tensor * a,
|
692
|
+
struct ggml_tensor * b);
|
693
|
+
|
648
694
|
GGML_API struct ggml_tensor * ggml_abs(
|
649
695
|
struct ggml_context * ctx,
|
650
696
|
struct ggml_tensor * a);
|
651
697
|
|
698
|
+
GGML_API struct ggml_tensor * ggml_abs_inplace(
|
699
|
+
struct ggml_context * ctx,
|
700
|
+
struct ggml_tensor * a);
|
701
|
+
|
652
702
|
GGML_API struct ggml_tensor * ggml_sgn(
|
653
703
|
struct ggml_context * ctx,
|
654
704
|
struct ggml_tensor * a);
|
655
705
|
|
706
|
+
GGML_API struct ggml_tensor * ggml_sgn_inplace(
|
707
|
+
struct ggml_context * ctx,
|
708
|
+
struct ggml_tensor * a);
|
709
|
+
|
656
710
|
GGML_API struct ggml_tensor * ggml_neg(
|
657
711
|
struct ggml_context * ctx,
|
658
712
|
struct ggml_tensor * a);
|
659
713
|
|
714
|
+
GGML_API struct ggml_tensor * ggml_neg_inplace(
|
715
|
+
struct ggml_context * ctx,
|
716
|
+
struct ggml_tensor * a);
|
717
|
+
|
660
718
|
GGML_API struct ggml_tensor * ggml_step(
|
661
719
|
struct ggml_context * ctx,
|
662
720
|
struct ggml_tensor * a);
|
663
721
|
|
722
|
+
GGML_API struct ggml_tensor * ggml_step_inplace(
|
723
|
+
struct ggml_context * ctx,
|
724
|
+
struct ggml_tensor * a);
|
725
|
+
|
664
726
|
GGML_API struct ggml_tensor * ggml_relu(
|
665
727
|
struct ggml_context * ctx,
|
666
728
|
struct ggml_tensor * a);
|
667
729
|
|
730
|
+
GGML_API struct ggml_tensor * ggml_relu_inplace(
|
731
|
+
struct ggml_context * ctx,
|
732
|
+
struct ggml_tensor * a);
|
733
|
+
|
668
734
|
// TODO: double-check this computation is correct
|
669
735
|
GGML_API struct ggml_tensor * ggml_gelu(
|
670
736
|
struct ggml_context * ctx,
|
671
737
|
struct ggml_tensor * a);
|
672
738
|
|
739
|
+
GGML_API struct ggml_tensor * ggml_gelu_inplace(
|
740
|
+
struct ggml_context * ctx,
|
741
|
+
struct ggml_tensor * a);
|
742
|
+
|
743
|
+
GGML_API struct ggml_tensor * ggml_gelu_quick(
|
744
|
+
struct ggml_context * ctx,
|
745
|
+
struct ggml_tensor * a);
|
746
|
+
|
747
|
+
GGML_API struct ggml_tensor * ggml_gelu_quick_inplace(
|
748
|
+
struct ggml_context * ctx,
|
749
|
+
struct ggml_tensor * a);
|
750
|
+
|
673
751
|
GGML_API struct ggml_tensor * ggml_silu(
|
674
752
|
struct ggml_context * ctx,
|
675
753
|
struct ggml_tensor * a);
|
676
754
|
|
755
|
+
GGML_API struct ggml_tensor * ggml_silu_inplace(
|
756
|
+
struct ggml_context * ctx,
|
757
|
+
struct ggml_tensor * a);
|
758
|
+
|
677
759
|
// a - x
|
678
760
|
// b - dy
|
679
761
|
GGML_API struct ggml_tensor * ggml_silu_back(
|
@@ -687,10 +769,18 @@ extern "C" {
|
|
687
769
|
struct ggml_context * ctx,
|
688
770
|
struct ggml_tensor * a);
|
689
771
|
|
772
|
+
GGML_API struct ggml_tensor * ggml_norm_inplace(
|
773
|
+
struct ggml_context * ctx,
|
774
|
+
struct ggml_tensor * a);
|
775
|
+
|
690
776
|
GGML_API struct ggml_tensor * ggml_rms_norm(
|
691
777
|
struct ggml_context * ctx,
|
692
778
|
struct ggml_tensor * a);
|
693
779
|
|
780
|
+
GGML_API struct ggml_tensor * ggml_rms_norm_inplace(
|
781
|
+
struct ggml_context * ctx,
|
782
|
+
struct ggml_tensor * a);
|
783
|
+
|
694
784
|
// a - x
|
695
785
|
// b - dy
|
696
786
|
GGML_API struct ggml_tensor * ggml_rms_norm_back(
|
@@ -698,14 +788,22 @@ extern "C" {
|
|
698
788
|
struct ggml_tensor * a,
|
699
789
|
struct ggml_tensor * b);
|
700
790
|
|
701
|
-
// A:
|
702
|
-
// B:
|
791
|
+
// A: n columns, m rows
|
792
|
+
// B: n columns, p rows (i.e. we transpose it internally)
|
703
793
|
// result is m columns, p rows
|
704
794
|
GGML_API struct ggml_tensor * ggml_mul_mat(
|
705
795
|
struct ggml_context * ctx,
|
706
796
|
struct ggml_tensor * a,
|
707
797
|
struct ggml_tensor * b);
|
708
798
|
|
799
|
+
// A: m columns, n rows,
|
800
|
+
// B: p columns, n rows,
|
801
|
+
// result is m columns, p rows
|
802
|
+
GGML_API struct ggml_tensor * ggml_out_prod(
|
803
|
+
struct ggml_context * ctx,
|
804
|
+
struct ggml_tensor * a,
|
805
|
+
struct ggml_tensor * b);
|
806
|
+
|
709
807
|
//
|
710
808
|
// operations on tensors without backpropagation
|
711
809
|
//
|
@@ -916,6 +1014,17 @@ extern "C" {
|
|
916
1014
|
struct ggml_context * ctx,
|
917
1015
|
struct ggml_tensor * a);
|
918
1016
|
|
1017
|
+
GGML_API struct ggml_tensor * ggml_soft_max_back(
|
1018
|
+
struct ggml_context * ctx,
|
1019
|
+
struct ggml_tensor * a,
|
1020
|
+
struct ggml_tensor * b);
|
1021
|
+
|
1022
|
+
// in-place, returns view(a)
|
1023
|
+
GGML_API struct ggml_tensor * ggml_soft_max_back_inplace(
|
1024
|
+
struct ggml_context * ctx,
|
1025
|
+
struct ggml_tensor * a,
|
1026
|
+
struct ggml_tensor * b);
|
1027
|
+
|
919
1028
|
// rotary position embedding
|
920
1029
|
// if mode & 1 == 1, skip n_past elements
|
921
1030
|
// if mode & 2 == 1, GPT-NeoX style
|
@@ -961,16 +1070,55 @@ extern "C" {
|
|
961
1070
|
float min,
|
962
1071
|
float max);
|
963
1072
|
|
964
|
-
//
|
1073
|
+
// TODO: implement general-purpose convolutions
|
1074
|
+
// GGML_API struct ggml_tensor * ggml_conv_1d(
|
1075
|
+
// struct ggml_context * ctx,
|
1076
|
+
// struct ggml_tensor * a,
|
1077
|
+
// struct ggml_tensor * b,
|
1078
|
+
// int s0
|
1079
|
+
// int p0,
|
1080
|
+
// int d0);
|
1081
|
+
//
|
1082
|
+
// GGML_API struct ggml_tensor * ggml_conv_2d(
|
1083
|
+
// struct ggml_context * ctx,
|
1084
|
+
// struct ggml_tensor * a,
|
1085
|
+
// struct ggml_tensor * b,
|
1086
|
+
// int s0,
|
1087
|
+
// int s1,
|
1088
|
+
// int p0,
|
1089
|
+
// int p1,
|
1090
|
+
// int d0,
|
1091
|
+
// int d1);
|
1092
|
+
|
1093
|
+
// padding = half
|
965
1094
|
// TODO: we don't support extra parameters for now
|
966
1095
|
// that's why we are hard-coding the stride, padding, and dilation
|
967
1096
|
// not great ..
|
968
|
-
|
1097
|
+
// example:
|
1098
|
+
// a: 3 80 768 1
|
1099
|
+
// b: 3000 80 1 1
|
1100
|
+
// res: 3000 768 1 1
|
1101
|
+
// used in whisper
|
1102
|
+
GGML_API struct ggml_tensor * ggml_conv_1d_s1_ph(
|
1103
|
+
struct ggml_context * ctx,
|
1104
|
+
struct ggml_tensor * a,
|
1105
|
+
struct ggml_tensor * b);
|
1106
|
+
|
1107
|
+
// used in whisper
|
1108
|
+
GGML_API struct ggml_tensor * ggml_conv_1d_s2_ph(
|
969
1109
|
struct ggml_context * ctx,
|
970
1110
|
struct ggml_tensor * a,
|
971
1111
|
struct ggml_tensor * b);
|
972
1112
|
|
973
|
-
|
1113
|
+
// kernel size is a->ne[0] x a->ne[1]
|
1114
|
+
// stride is equal to kernel size
|
1115
|
+
// padding is zero
|
1116
|
+
// example:
|
1117
|
+
// a: 16 16 3 768
|
1118
|
+
// b: 1024 1024 3 1
|
1119
|
+
// res: 64 64 768 1
|
1120
|
+
// used in sam
|
1121
|
+
GGML_API struct ggml_tensor * ggml_conv_2d_sk_p0(
|
974
1122
|
struct ggml_context * ctx,
|
975
1123
|
struct ggml_tensor * a,
|
976
1124
|
struct ggml_tensor * b);
|
@@ -982,6 +1130,14 @@ extern "C" {
|
|
982
1130
|
struct ggml_tensor * v,
|
983
1131
|
bool masked);
|
984
1132
|
|
1133
|
+
GGML_API struct ggml_tensor * ggml_flash_attn_back(
|
1134
|
+
struct ggml_context * ctx,
|
1135
|
+
struct ggml_tensor * q,
|
1136
|
+
struct ggml_tensor * k,
|
1137
|
+
struct ggml_tensor * v,
|
1138
|
+
struct ggml_tensor * d,
|
1139
|
+
bool masked);
|
1140
|
+
|
985
1141
|
GGML_API struct ggml_tensor * ggml_flash_ff(
|
986
1142
|
struct ggml_context * ctx,
|
987
1143
|
struct ggml_tensor * a,
|
@@ -990,6 +1146,26 @@ extern "C" {
|
|
990
1146
|
struct ggml_tensor * c0,
|
991
1147
|
struct ggml_tensor * c1);
|
992
1148
|
|
1149
|
+
// partition into non-overlapping windows with padding if needed
|
1150
|
+
// example:
|
1151
|
+
// a: 768 64 64 1
|
1152
|
+
// w: 14
|
1153
|
+
// res: 768 14 14 25
|
1154
|
+
// used in sam
|
1155
|
+
GGML_API struct ggml_tensor * ggml_win_part(
|
1156
|
+
struct ggml_context * ctx,
|
1157
|
+
struct ggml_tensor * a,
|
1158
|
+
int w);
|
1159
|
+
|
1160
|
+
// reverse of ggml_win_part
|
1161
|
+
// used in sam
|
1162
|
+
GGML_API struct ggml_tensor * ggml_win_unpart(
|
1163
|
+
struct ggml_context * ctx,
|
1164
|
+
struct ggml_tensor * a,
|
1165
|
+
int w0,
|
1166
|
+
int h0,
|
1167
|
+
int w);
|
1168
|
+
|
993
1169
|
// Mapping operations
|
994
1170
|
typedef void (*ggml_unary_op_f32_t)(const int, float *, const float *);
|
995
1171
|
typedef void (*ggml_binary_op_f32_t)(const int, float *, const float *, const float *);
|
@@ -1005,6 +1181,19 @@ extern "C" {
|
|
1005
1181
|
struct ggml_tensor * b,
|
1006
1182
|
ggml_binary_op_f32_t fun);
|
1007
1183
|
|
1184
|
+
// loss function
|
1185
|
+
|
1186
|
+
GGML_API struct ggml_tensor * ggml_cross_entropy_loss(
|
1187
|
+
struct ggml_context * ctx,
|
1188
|
+
struct ggml_tensor * a,
|
1189
|
+
struct ggml_tensor * b);
|
1190
|
+
|
1191
|
+
GGML_API struct ggml_tensor * ggml_cross_entropy_loss_back(
|
1192
|
+
struct ggml_context * ctx,
|
1193
|
+
struct ggml_tensor * a,
|
1194
|
+
struct ggml_tensor * b,
|
1195
|
+
struct ggml_tensor * c);
|
1196
|
+
|
1008
1197
|
//
|
1009
1198
|
// automatic differentiation
|
1010
1199
|
//
|
@@ -1099,6 +1288,8 @@ extern "C" {
|
|
1099
1288
|
struct {
|
1100
1289
|
int n_iter;
|
1101
1290
|
|
1291
|
+
float sched; // schedule multiplier (fixed, decay or warmup)
|
1292
|
+
float decay; // weight decay for AdamW, use 0.0f to disable
|
1102
1293
|
float alpha; // learning rate
|
1103
1294
|
float beta1;
|
1104
1295
|
float beta2;
|
@@ -1123,6 +1314,49 @@ extern "C" {
|
|
1123
1314
|
} lbfgs;
|
1124
1315
|
};
|
1125
1316
|
|
1317
|
+
struct ggml_opt_context {
|
1318
|
+
struct ggml_context * ctx;
|
1319
|
+
struct ggml_opt_params params;
|
1320
|
+
|
1321
|
+
int iter;
|
1322
|
+
int64_t nx; // number of parameter elements
|
1323
|
+
|
1324
|
+
bool just_initialized;
|
1325
|
+
|
1326
|
+
struct {
|
1327
|
+
struct ggml_tensor * x; // view of the parameters
|
1328
|
+
struct ggml_tensor * g1; // gradient
|
1329
|
+
struct ggml_tensor * g2; // gradient squared
|
1330
|
+
struct ggml_tensor * m; // first moment
|
1331
|
+
struct ggml_tensor * v; // second moment
|
1332
|
+
struct ggml_tensor * mh; // first moment hat
|
1333
|
+
struct ggml_tensor * vh; // second moment hat
|
1334
|
+
struct ggml_tensor * pf; // past function values
|
1335
|
+
float fx_best;
|
1336
|
+
float fx_prev;
|
1337
|
+
int n_no_improvement;
|
1338
|
+
} adam;
|
1339
|
+
|
1340
|
+
struct {
|
1341
|
+
struct ggml_tensor * x; // current parameters
|
1342
|
+
struct ggml_tensor * xp; // previous parameters
|
1343
|
+
struct ggml_tensor * g; // current gradient
|
1344
|
+
struct ggml_tensor * gp; // previous gradient
|
1345
|
+
struct ggml_tensor * d; // search direction
|
1346
|
+
struct ggml_tensor * pf; // past function values
|
1347
|
+
struct ggml_tensor * lmal; // the L-BFGS memory alpha
|
1348
|
+
struct ggml_tensor * lmys; // the L-BFGS memory ys
|
1349
|
+
struct ggml_tensor * lms; // the L-BFGS memory s
|
1350
|
+
struct ggml_tensor * lmy; // the L-BFGS memory y
|
1351
|
+
float fx_best;
|
1352
|
+
float step;
|
1353
|
+
int j;
|
1354
|
+
int k;
|
1355
|
+
int end;
|
1356
|
+
int n_no_improvement;
|
1357
|
+
} lbfgs;
|
1358
|
+
};
|
1359
|
+
|
1126
1360
|
GGML_API struct ggml_opt_params ggml_opt_default_params(enum ggml_opt_type type);
|
1127
1361
|
|
1128
1362
|
// optimize the function defined by the tensor f
|
@@ -1131,6 +1365,27 @@ extern "C" {
|
|
1131
1365
|
struct ggml_opt_params params,
|
1132
1366
|
struct ggml_tensor * f);
|
1133
1367
|
|
1368
|
+
// initialize optimizer context
|
1369
|
+
GGML_API void ggml_opt_init(
|
1370
|
+
struct ggml_context * ctx,
|
1371
|
+
struct ggml_opt_context * opt,
|
1372
|
+
struct ggml_opt_params params,
|
1373
|
+
int64_t nx);
|
1374
|
+
|
1375
|
+
// continue optimizing the function defined by the tensor f
|
1376
|
+
GGML_API enum ggml_opt_result ggml_opt_resume(
|
1377
|
+
struct ggml_context * ctx,
|
1378
|
+
struct ggml_opt_context * opt,
|
1379
|
+
struct ggml_tensor * f);
|
1380
|
+
|
1381
|
+
// continue optimizing the function defined by the tensor f
|
1382
|
+
GGML_API enum ggml_opt_result ggml_opt_resume_g(
|
1383
|
+
struct ggml_context * ctx,
|
1384
|
+
struct ggml_opt_context * opt,
|
1385
|
+
struct ggml_tensor * f,
|
1386
|
+
struct ggml_cgraph * gf,
|
1387
|
+
struct ggml_cgraph * gb);
|
1388
|
+
|
1134
1389
|
//
|
1135
1390
|
// quantization
|
1136
1391
|
//
|