llama_cpp 0.2.0 → 0.2.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -0
- data/examples/README.md +92 -0
- data/examples/chat.rb +195 -0
- data/examples/embedding.rb +37 -0
- data/ext/llama_cpp/llama_cpp.cpp +52 -0
- data/ext/llama_cpp/src/ggml-cuda.cu +1218 -411
- data/ext/llama_cpp/src/ggml-cuda.h +4 -1
- data/ext/llama_cpp/src/ggml-metal.h +5 -1
- data/ext/llama_cpp/src/ggml-metal.m +703 -514
- data/ext/llama_cpp/src/ggml-metal.metal +574 -122
- data/ext/llama_cpp/src/ggml-opencl.cpp +496 -36
- data/ext/llama_cpp/src/ggml-opencl.h +1 -2
- data/ext/llama_cpp/src/ggml.c +2715 -476
- data/ext/llama_cpp/src/ggml.h +266 -11
- data/ext/llama_cpp/src/llama.cpp +266 -135
- data/ext/llama_cpp/src/llama.h +19 -11
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +3 -0
- metadata +5 -2
data/ext/llama_cpp/src/ggml.h
CHANGED
@@ -296,12 +296,14 @@ extern "C" {
|
|
296
296
|
GGML_OP_SUM_ROWS,
|
297
297
|
GGML_OP_MEAN,
|
298
298
|
GGML_OP_REPEAT,
|
299
|
+
GGML_OP_REPEAT_BACK,
|
299
300
|
GGML_OP_ABS,
|
300
301
|
GGML_OP_SGN,
|
301
302
|
GGML_OP_NEG,
|
302
303
|
GGML_OP_STEP,
|
303
304
|
GGML_OP_RELU,
|
304
305
|
GGML_OP_GELU,
|
306
|
+
GGML_OP_GELU_QUICK,
|
305
307
|
GGML_OP_SILU,
|
306
308
|
GGML_OP_SILU_BACK,
|
307
309
|
GGML_OP_NORM, // normalize
|
@@ -309,6 +311,7 @@ extern "C" {
|
|
309
311
|
GGML_OP_RMS_NORM_BACK,
|
310
312
|
|
311
313
|
GGML_OP_MUL_MAT,
|
314
|
+
GGML_OP_OUT_PROD,
|
312
315
|
|
313
316
|
GGML_OP_SCALE,
|
314
317
|
GGML_OP_SET,
|
@@ -324,19 +327,27 @@ extern "C" {
|
|
324
327
|
GGML_OP_DIAG_MASK_INF,
|
325
328
|
GGML_OP_DIAG_MASK_ZERO,
|
326
329
|
GGML_OP_SOFT_MAX,
|
330
|
+
GGML_OP_SOFT_MAX_BACK,
|
327
331
|
GGML_OP_ROPE,
|
328
332
|
GGML_OP_ROPE_BACK,
|
329
333
|
GGML_OP_ALIBI,
|
330
334
|
GGML_OP_CLAMP,
|
331
|
-
|
332
|
-
|
335
|
+
GGML_OP_CONV_1D_S1_PH,
|
336
|
+
GGML_OP_CONV_1D_S2_PH,
|
337
|
+
GGML_OP_CONV_2D_SK_P0,
|
333
338
|
|
334
339
|
GGML_OP_FLASH_ATTN,
|
335
340
|
GGML_OP_FLASH_FF,
|
341
|
+
GGML_OP_FLASH_ATTN_BACK,
|
342
|
+
GGML_OP_WIN_PART,
|
343
|
+
GGML_OP_WIN_UNPART,
|
336
344
|
|
337
345
|
GGML_OP_MAP_UNARY,
|
338
346
|
GGML_OP_MAP_BINARY,
|
339
347
|
|
348
|
+
GGML_OP_CROSS_ENTROPY_LOSS,
|
349
|
+
GGML_OP_CROSS_ENTROPY_LOSS_BACK,
|
350
|
+
|
340
351
|
GGML_OP_COUNT,
|
341
352
|
};
|
342
353
|
|
@@ -478,6 +489,7 @@ extern "C" {
|
|
478
489
|
|
479
490
|
GGML_API bool ggml_is_transposed(const struct ggml_tensor * tensor);
|
480
491
|
GGML_API bool ggml_is_contiguous(const struct ggml_tensor * tensor);
|
492
|
+
GGML_API bool ggml_is_permuted (const struct ggml_tensor * tensor);
|
481
493
|
|
482
494
|
// use this to compute the memory overhead of a tensor
|
483
495
|
GGML_API size_t ggml_tensor_overhead(void);
|
@@ -492,8 +504,9 @@ extern "C" {
|
|
492
504
|
GGML_API size_t ggml_set_scratch (struct ggml_context * ctx, struct ggml_scratch scratch);
|
493
505
|
GGML_API void ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc);
|
494
506
|
|
495
|
-
GGML_API void * ggml_get_mem_buffer(struct ggml_context * ctx);
|
496
|
-
GGML_API size_t ggml_get_mem_size
|
507
|
+
GGML_API void * ggml_get_mem_buffer (const struct ggml_context * ctx);
|
508
|
+
GGML_API size_t ggml_get_mem_size (const struct ggml_context * ctx);
|
509
|
+
GGML_API size_t ggml_get_max_tensor_size(const struct ggml_context * ctx);
|
497
510
|
|
498
511
|
GGML_API struct ggml_tensor * ggml_new_tensor(
|
499
512
|
struct ggml_context * ctx,
|
@@ -548,8 +561,8 @@ extern "C" {
|
|
548
561
|
GGML_API void * ggml_get_data (const struct ggml_tensor * tensor);
|
549
562
|
GGML_API float * ggml_get_data_f32(const struct ggml_tensor * tensor);
|
550
563
|
|
551
|
-
GGML_API const char *
|
552
|
-
GGML_API
|
564
|
+
GGML_API const char * ggml_get_name(const struct ggml_tensor * tensor);
|
565
|
+
GGML_API struct ggml_tensor * ggml_set_name(struct ggml_tensor * tensor, const char * name);
|
553
566
|
|
554
567
|
//
|
555
568
|
// operations on tensors with backpropagation
|
@@ -574,6 +587,11 @@ extern "C" {
|
|
574
587
|
struct ggml_tensor * a,
|
575
588
|
struct ggml_tensor * b);
|
576
589
|
|
590
|
+
GGML_API struct ggml_tensor * ggml_add1_inplace(
|
591
|
+
struct ggml_context * ctx,
|
592
|
+
struct ggml_tensor * a,
|
593
|
+
struct ggml_tensor * b);
|
594
|
+
|
577
595
|
GGML_API struct ggml_tensor * ggml_acc(
|
578
596
|
struct ggml_context * ctx,
|
579
597
|
struct ggml_tensor * a,
|
@@ -597,24 +615,47 @@ extern "C" {
|
|
597
615
|
struct ggml_tensor * a,
|
598
616
|
struct ggml_tensor * b);
|
599
617
|
|
618
|
+
GGML_API struct ggml_tensor * ggml_sub_inplace(
|
619
|
+
struct ggml_context * ctx,
|
620
|
+
struct ggml_tensor * a,
|
621
|
+
struct ggml_tensor * b);
|
622
|
+
|
600
623
|
GGML_API struct ggml_tensor * ggml_mul(
|
601
624
|
struct ggml_context * ctx,
|
602
625
|
struct ggml_tensor * a,
|
603
626
|
struct ggml_tensor * b);
|
604
627
|
|
628
|
+
GGML_API struct ggml_tensor * ggml_mul_inplace(
|
629
|
+
struct ggml_context * ctx,
|
630
|
+
struct ggml_tensor * a,
|
631
|
+
struct ggml_tensor * b);
|
632
|
+
|
605
633
|
GGML_API struct ggml_tensor * ggml_div(
|
606
634
|
struct ggml_context * ctx,
|
607
635
|
struct ggml_tensor * a,
|
608
636
|
struct ggml_tensor * b);
|
609
637
|
|
638
|
+
GGML_API struct ggml_tensor * ggml_div_inplace(
|
639
|
+
struct ggml_context * ctx,
|
640
|
+
struct ggml_tensor * a,
|
641
|
+
struct ggml_tensor * b);
|
642
|
+
|
610
643
|
GGML_API struct ggml_tensor * ggml_sqr(
|
611
644
|
struct ggml_context * ctx,
|
612
645
|
struct ggml_tensor * a);
|
613
646
|
|
647
|
+
GGML_API struct ggml_tensor * ggml_sqr_inplace(
|
648
|
+
struct ggml_context * ctx,
|
649
|
+
struct ggml_tensor * a);
|
650
|
+
|
614
651
|
GGML_API struct ggml_tensor * ggml_sqrt(
|
615
652
|
struct ggml_context * ctx,
|
616
653
|
struct ggml_tensor * a);
|
617
654
|
|
655
|
+
GGML_API struct ggml_tensor * ggml_sqrt_inplace(
|
656
|
+
struct ggml_context * ctx,
|
657
|
+
struct ggml_tensor * a);
|
658
|
+
|
618
659
|
GGML_API struct ggml_tensor * ggml_log(
|
619
660
|
struct ggml_context * ctx,
|
620
661
|
struct ggml_tensor * a);
|
@@ -645,35 +686,76 @@ extern "C" {
|
|
645
686
|
struct ggml_tensor * a,
|
646
687
|
struct ggml_tensor * b);
|
647
688
|
|
689
|
+
GGML_API struct ggml_tensor * ggml_repeat_back(
|
690
|
+
struct ggml_context * ctx,
|
691
|
+
struct ggml_tensor * a,
|
692
|
+
struct ggml_tensor * b);
|
693
|
+
|
648
694
|
GGML_API struct ggml_tensor * ggml_abs(
|
649
695
|
struct ggml_context * ctx,
|
650
696
|
struct ggml_tensor * a);
|
651
697
|
|
698
|
+
GGML_API struct ggml_tensor * ggml_abs_inplace(
|
699
|
+
struct ggml_context * ctx,
|
700
|
+
struct ggml_tensor * a);
|
701
|
+
|
652
702
|
GGML_API struct ggml_tensor * ggml_sgn(
|
653
703
|
struct ggml_context * ctx,
|
654
704
|
struct ggml_tensor * a);
|
655
705
|
|
706
|
+
GGML_API struct ggml_tensor * ggml_sgn_inplace(
|
707
|
+
struct ggml_context * ctx,
|
708
|
+
struct ggml_tensor * a);
|
709
|
+
|
656
710
|
GGML_API struct ggml_tensor * ggml_neg(
|
657
711
|
struct ggml_context * ctx,
|
658
712
|
struct ggml_tensor * a);
|
659
713
|
|
714
|
+
GGML_API struct ggml_tensor * ggml_neg_inplace(
|
715
|
+
struct ggml_context * ctx,
|
716
|
+
struct ggml_tensor * a);
|
717
|
+
|
660
718
|
GGML_API struct ggml_tensor * ggml_step(
|
661
719
|
struct ggml_context * ctx,
|
662
720
|
struct ggml_tensor * a);
|
663
721
|
|
722
|
+
GGML_API struct ggml_tensor * ggml_step_inplace(
|
723
|
+
struct ggml_context * ctx,
|
724
|
+
struct ggml_tensor * a);
|
725
|
+
|
664
726
|
GGML_API struct ggml_tensor * ggml_relu(
|
665
727
|
struct ggml_context * ctx,
|
666
728
|
struct ggml_tensor * a);
|
667
729
|
|
730
|
+
GGML_API struct ggml_tensor * ggml_relu_inplace(
|
731
|
+
struct ggml_context * ctx,
|
732
|
+
struct ggml_tensor * a);
|
733
|
+
|
668
734
|
// TODO: double-check this computation is correct
|
669
735
|
GGML_API struct ggml_tensor * ggml_gelu(
|
670
736
|
struct ggml_context * ctx,
|
671
737
|
struct ggml_tensor * a);
|
672
738
|
|
739
|
+
GGML_API struct ggml_tensor * ggml_gelu_inplace(
|
740
|
+
struct ggml_context * ctx,
|
741
|
+
struct ggml_tensor * a);
|
742
|
+
|
743
|
+
GGML_API struct ggml_tensor * ggml_gelu_quick(
|
744
|
+
struct ggml_context * ctx,
|
745
|
+
struct ggml_tensor * a);
|
746
|
+
|
747
|
+
GGML_API struct ggml_tensor * ggml_gelu_quick_inplace(
|
748
|
+
struct ggml_context * ctx,
|
749
|
+
struct ggml_tensor * a);
|
750
|
+
|
673
751
|
GGML_API struct ggml_tensor * ggml_silu(
|
674
752
|
struct ggml_context * ctx,
|
675
753
|
struct ggml_tensor * a);
|
676
754
|
|
755
|
+
GGML_API struct ggml_tensor * ggml_silu_inplace(
|
756
|
+
struct ggml_context * ctx,
|
757
|
+
struct ggml_tensor * a);
|
758
|
+
|
677
759
|
// a - x
|
678
760
|
// b - dy
|
679
761
|
GGML_API struct ggml_tensor * ggml_silu_back(
|
@@ -687,10 +769,18 @@ extern "C" {
|
|
687
769
|
struct ggml_context * ctx,
|
688
770
|
struct ggml_tensor * a);
|
689
771
|
|
772
|
+
GGML_API struct ggml_tensor * ggml_norm_inplace(
|
773
|
+
struct ggml_context * ctx,
|
774
|
+
struct ggml_tensor * a);
|
775
|
+
|
690
776
|
GGML_API struct ggml_tensor * ggml_rms_norm(
|
691
777
|
struct ggml_context * ctx,
|
692
778
|
struct ggml_tensor * a);
|
693
779
|
|
780
|
+
GGML_API struct ggml_tensor * ggml_rms_norm_inplace(
|
781
|
+
struct ggml_context * ctx,
|
782
|
+
struct ggml_tensor * a);
|
783
|
+
|
694
784
|
// a - x
|
695
785
|
// b - dy
|
696
786
|
GGML_API struct ggml_tensor * ggml_rms_norm_back(
|
@@ -698,14 +788,22 @@ extern "C" {
|
|
698
788
|
struct ggml_tensor * a,
|
699
789
|
struct ggml_tensor * b);
|
700
790
|
|
701
|
-
// A:
|
702
|
-
// B:
|
791
|
+
// A: n columns, m rows
|
792
|
+
// B: n columns, p rows (i.e. we transpose it internally)
|
703
793
|
// result is m columns, p rows
|
704
794
|
GGML_API struct ggml_tensor * ggml_mul_mat(
|
705
795
|
struct ggml_context * ctx,
|
706
796
|
struct ggml_tensor * a,
|
707
797
|
struct ggml_tensor * b);
|
708
798
|
|
799
|
+
// A: m columns, n rows,
|
800
|
+
// B: p columns, n rows,
|
801
|
+
// result is m columns, p rows
|
802
|
+
GGML_API struct ggml_tensor * ggml_out_prod(
|
803
|
+
struct ggml_context * ctx,
|
804
|
+
struct ggml_tensor * a,
|
805
|
+
struct ggml_tensor * b);
|
806
|
+
|
709
807
|
//
|
710
808
|
// operations on tensors without backpropagation
|
711
809
|
//
|
@@ -916,6 +1014,17 @@ extern "C" {
|
|
916
1014
|
struct ggml_context * ctx,
|
917
1015
|
struct ggml_tensor * a);
|
918
1016
|
|
1017
|
+
GGML_API struct ggml_tensor * ggml_soft_max_back(
|
1018
|
+
struct ggml_context * ctx,
|
1019
|
+
struct ggml_tensor * a,
|
1020
|
+
struct ggml_tensor * b);
|
1021
|
+
|
1022
|
+
// in-place, returns view(a)
|
1023
|
+
GGML_API struct ggml_tensor * ggml_soft_max_back_inplace(
|
1024
|
+
struct ggml_context * ctx,
|
1025
|
+
struct ggml_tensor * a,
|
1026
|
+
struct ggml_tensor * b);
|
1027
|
+
|
919
1028
|
// rotary position embedding
|
920
1029
|
// if mode & 1 == 1, skip n_past elements
|
921
1030
|
// if mode & 2 == 1, GPT-NeoX style
|
@@ -961,16 +1070,55 @@ extern "C" {
|
|
961
1070
|
float min,
|
962
1071
|
float max);
|
963
1072
|
|
964
|
-
//
|
1073
|
+
// TODO: implement general-purpose convolutions
|
1074
|
+
// GGML_API struct ggml_tensor * ggml_conv_1d(
|
1075
|
+
// struct ggml_context * ctx,
|
1076
|
+
// struct ggml_tensor * a,
|
1077
|
+
// struct ggml_tensor * b,
|
1078
|
+
// int s0
|
1079
|
+
// int p0,
|
1080
|
+
// int d0);
|
1081
|
+
//
|
1082
|
+
// GGML_API struct ggml_tensor * ggml_conv_2d(
|
1083
|
+
// struct ggml_context * ctx,
|
1084
|
+
// struct ggml_tensor * a,
|
1085
|
+
// struct ggml_tensor * b,
|
1086
|
+
// int s0,
|
1087
|
+
// int s1,
|
1088
|
+
// int p0,
|
1089
|
+
// int p1,
|
1090
|
+
// int d0,
|
1091
|
+
// int d1);
|
1092
|
+
|
1093
|
+
// padding = half
|
965
1094
|
// TODO: we don't support extra parameters for now
|
966
1095
|
// that's why we are hard-coding the stride, padding, and dilation
|
967
1096
|
// not great ..
|
968
|
-
|
1097
|
+
// example:
|
1098
|
+
// a: 3 80 768 1
|
1099
|
+
// b: 3000 80 1 1
|
1100
|
+
// res: 3000 768 1 1
|
1101
|
+
// used in whisper
|
1102
|
+
GGML_API struct ggml_tensor * ggml_conv_1d_s1_ph(
|
1103
|
+
struct ggml_context * ctx,
|
1104
|
+
struct ggml_tensor * a,
|
1105
|
+
struct ggml_tensor * b);
|
1106
|
+
|
1107
|
+
// used in whisper
|
1108
|
+
GGML_API struct ggml_tensor * ggml_conv_1d_s2_ph(
|
969
1109
|
struct ggml_context * ctx,
|
970
1110
|
struct ggml_tensor * a,
|
971
1111
|
struct ggml_tensor * b);
|
972
1112
|
|
973
|
-
|
1113
|
+
// kernel size is a->ne[0] x a->ne[1]
|
1114
|
+
// stride is equal to kernel size
|
1115
|
+
// padding is zero
|
1116
|
+
// example:
|
1117
|
+
// a: 16 16 3 768
|
1118
|
+
// b: 1024 1024 3 1
|
1119
|
+
// res: 64 64 768 1
|
1120
|
+
// used in sam
|
1121
|
+
GGML_API struct ggml_tensor * ggml_conv_2d_sk_p0(
|
974
1122
|
struct ggml_context * ctx,
|
975
1123
|
struct ggml_tensor * a,
|
976
1124
|
struct ggml_tensor * b);
|
@@ -982,6 +1130,14 @@ extern "C" {
|
|
982
1130
|
struct ggml_tensor * v,
|
983
1131
|
bool masked);
|
984
1132
|
|
1133
|
+
GGML_API struct ggml_tensor * ggml_flash_attn_back(
|
1134
|
+
struct ggml_context * ctx,
|
1135
|
+
struct ggml_tensor * q,
|
1136
|
+
struct ggml_tensor * k,
|
1137
|
+
struct ggml_tensor * v,
|
1138
|
+
struct ggml_tensor * d,
|
1139
|
+
bool masked);
|
1140
|
+
|
985
1141
|
GGML_API struct ggml_tensor * ggml_flash_ff(
|
986
1142
|
struct ggml_context * ctx,
|
987
1143
|
struct ggml_tensor * a,
|
@@ -990,6 +1146,26 @@ extern "C" {
|
|
990
1146
|
struct ggml_tensor * c0,
|
991
1147
|
struct ggml_tensor * c1);
|
992
1148
|
|
1149
|
+
// partition into non-overlapping windows with padding if needed
|
1150
|
+
// example:
|
1151
|
+
// a: 768 64 64 1
|
1152
|
+
// w: 14
|
1153
|
+
// res: 768 14 14 25
|
1154
|
+
// used in sam
|
1155
|
+
GGML_API struct ggml_tensor * ggml_win_part(
|
1156
|
+
struct ggml_context * ctx,
|
1157
|
+
struct ggml_tensor * a,
|
1158
|
+
int w);
|
1159
|
+
|
1160
|
+
// reverse of ggml_win_part
|
1161
|
+
// used in sam
|
1162
|
+
GGML_API struct ggml_tensor * ggml_win_unpart(
|
1163
|
+
struct ggml_context * ctx,
|
1164
|
+
struct ggml_tensor * a,
|
1165
|
+
int w0,
|
1166
|
+
int h0,
|
1167
|
+
int w);
|
1168
|
+
|
993
1169
|
// Mapping operations
|
994
1170
|
typedef void (*ggml_unary_op_f32_t)(const int, float *, const float *);
|
995
1171
|
typedef void (*ggml_binary_op_f32_t)(const int, float *, const float *, const float *);
|
@@ -1005,6 +1181,19 @@ extern "C" {
|
|
1005
1181
|
struct ggml_tensor * b,
|
1006
1182
|
ggml_binary_op_f32_t fun);
|
1007
1183
|
|
1184
|
+
// loss function
|
1185
|
+
|
1186
|
+
GGML_API struct ggml_tensor * ggml_cross_entropy_loss(
|
1187
|
+
struct ggml_context * ctx,
|
1188
|
+
struct ggml_tensor * a,
|
1189
|
+
struct ggml_tensor * b);
|
1190
|
+
|
1191
|
+
GGML_API struct ggml_tensor * ggml_cross_entropy_loss_back(
|
1192
|
+
struct ggml_context * ctx,
|
1193
|
+
struct ggml_tensor * a,
|
1194
|
+
struct ggml_tensor * b,
|
1195
|
+
struct ggml_tensor * c);
|
1196
|
+
|
1008
1197
|
//
|
1009
1198
|
// automatic differentiation
|
1010
1199
|
//
|
@@ -1099,6 +1288,8 @@ extern "C" {
|
|
1099
1288
|
struct {
|
1100
1289
|
int n_iter;
|
1101
1290
|
|
1291
|
+
float sched; // schedule multiplier (fixed, decay or warmup)
|
1292
|
+
float decay; // weight decay for AdamW, use 0.0f to disable
|
1102
1293
|
float alpha; // learning rate
|
1103
1294
|
float beta1;
|
1104
1295
|
float beta2;
|
@@ -1123,6 +1314,49 @@ extern "C" {
|
|
1123
1314
|
} lbfgs;
|
1124
1315
|
};
|
1125
1316
|
|
1317
|
+
struct ggml_opt_context {
|
1318
|
+
struct ggml_context * ctx;
|
1319
|
+
struct ggml_opt_params params;
|
1320
|
+
|
1321
|
+
int iter;
|
1322
|
+
int64_t nx; // number of parameter elements
|
1323
|
+
|
1324
|
+
bool just_initialized;
|
1325
|
+
|
1326
|
+
struct {
|
1327
|
+
struct ggml_tensor * x; // view of the parameters
|
1328
|
+
struct ggml_tensor * g1; // gradient
|
1329
|
+
struct ggml_tensor * g2; // gradient squared
|
1330
|
+
struct ggml_tensor * m; // first moment
|
1331
|
+
struct ggml_tensor * v; // second moment
|
1332
|
+
struct ggml_tensor * mh; // first moment hat
|
1333
|
+
struct ggml_tensor * vh; // second moment hat
|
1334
|
+
struct ggml_tensor * pf; // past function values
|
1335
|
+
float fx_best;
|
1336
|
+
float fx_prev;
|
1337
|
+
int n_no_improvement;
|
1338
|
+
} adam;
|
1339
|
+
|
1340
|
+
struct {
|
1341
|
+
struct ggml_tensor * x; // current parameters
|
1342
|
+
struct ggml_tensor * xp; // previous parameters
|
1343
|
+
struct ggml_tensor * g; // current gradient
|
1344
|
+
struct ggml_tensor * gp; // previous gradient
|
1345
|
+
struct ggml_tensor * d; // search direction
|
1346
|
+
struct ggml_tensor * pf; // past function values
|
1347
|
+
struct ggml_tensor * lmal; // the L-BFGS memory alpha
|
1348
|
+
struct ggml_tensor * lmys; // the L-BFGS memory ys
|
1349
|
+
struct ggml_tensor * lms; // the L-BFGS memory s
|
1350
|
+
struct ggml_tensor * lmy; // the L-BFGS memory y
|
1351
|
+
float fx_best;
|
1352
|
+
float step;
|
1353
|
+
int j;
|
1354
|
+
int k;
|
1355
|
+
int end;
|
1356
|
+
int n_no_improvement;
|
1357
|
+
} lbfgs;
|
1358
|
+
};
|
1359
|
+
|
1126
1360
|
GGML_API struct ggml_opt_params ggml_opt_default_params(enum ggml_opt_type type);
|
1127
1361
|
|
1128
1362
|
// optimize the function defined by the tensor f
|
@@ -1131,6 +1365,27 @@ extern "C" {
|
|
1131
1365
|
struct ggml_opt_params params,
|
1132
1366
|
struct ggml_tensor * f);
|
1133
1367
|
|
1368
|
+
// initialize optimizer context
|
1369
|
+
GGML_API void ggml_opt_init(
|
1370
|
+
struct ggml_context * ctx,
|
1371
|
+
struct ggml_opt_context * opt,
|
1372
|
+
struct ggml_opt_params params,
|
1373
|
+
int64_t nx);
|
1374
|
+
|
1375
|
+
// continue optimizing the function defined by the tensor f
|
1376
|
+
GGML_API enum ggml_opt_result ggml_opt_resume(
|
1377
|
+
struct ggml_context * ctx,
|
1378
|
+
struct ggml_opt_context * opt,
|
1379
|
+
struct ggml_tensor * f);
|
1380
|
+
|
1381
|
+
// continue optimizing the function defined by the tensor f
|
1382
|
+
GGML_API enum ggml_opt_result ggml_opt_resume_g(
|
1383
|
+
struct ggml_context * ctx,
|
1384
|
+
struct ggml_opt_context * opt,
|
1385
|
+
struct ggml_tensor * f,
|
1386
|
+
struct ggml_cgraph * gf,
|
1387
|
+
struct ggml_cgraph * gb);
|
1388
|
+
|
1134
1389
|
//
|
1135
1390
|
// quantization
|
1136
1391
|
//
|