llama_cpp 0.2.0 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/examples/README.md +60 -0
- data/examples/chat.rb +195 -0
- data/ext/llama_cpp/llama_cpp.cpp +52 -0
- data/ext/llama_cpp/src/ggml-cuda.cu +697 -130
- data/ext/llama_cpp/src/ggml-cuda.h +4 -1
- data/ext/llama_cpp/src/ggml-metal.h +1 -0
- data/ext/llama_cpp/src/ggml-metal.m +548 -497
- data/ext/llama_cpp/src/ggml-metal.metal +425 -122
- data/ext/llama_cpp/src/ggml-opencl.cpp +3 -32
- data/ext/llama_cpp/src/ggml-opencl.h +1 -2
- data/ext/llama_cpp/src/ggml.c +1904 -303
- data/ext/llama_cpp/src/ggml.h +126 -2
- data/ext/llama_cpp/src/llama.cpp +212 -108
- data/ext/llama_cpp/src/llama.h +12 -3
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +3 -0
- metadata +4 -2
data/ext/llama_cpp/src/ggml.h
CHANGED
@@ -296,6 +296,7 @@ extern "C" {
|
|
296
296
|
GGML_OP_SUM_ROWS,
|
297
297
|
GGML_OP_MEAN,
|
298
298
|
GGML_OP_REPEAT,
|
299
|
+
GGML_OP_REPEAT_BACK,
|
299
300
|
GGML_OP_ABS,
|
300
301
|
GGML_OP_SGN,
|
301
302
|
GGML_OP_NEG,
|
@@ -309,6 +310,7 @@ extern "C" {
|
|
309
310
|
GGML_OP_RMS_NORM_BACK,
|
310
311
|
|
311
312
|
GGML_OP_MUL_MAT,
|
313
|
+
GGML_OP_OUT_PROD,
|
312
314
|
|
313
315
|
GGML_OP_SCALE,
|
314
316
|
GGML_OP_SET,
|
@@ -324,6 +326,7 @@ extern "C" {
|
|
324
326
|
GGML_OP_DIAG_MASK_INF,
|
325
327
|
GGML_OP_DIAG_MASK_ZERO,
|
326
328
|
GGML_OP_SOFT_MAX,
|
329
|
+
GGML_OP_SOFT_MAX_BACK,
|
327
330
|
GGML_OP_ROPE,
|
328
331
|
GGML_OP_ROPE_BACK,
|
329
332
|
GGML_OP_ALIBI,
|
@@ -333,10 +336,14 @@ extern "C" {
|
|
333
336
|
|
334
337
|
GGML_OP_FLASH_ATTN,
|
335
338
|
GGML_OP_FLASH_FF,
|
339
|
+
GGML_OP_FLASH_ATTN_BACK,
|
336
340
|
|
337
341
|
GGML_OP_MAP_UNARY,
|
338
342
|
GGML_OP_MAP_BINARY,
|
339
343
|
|
344
|
+
GGML_OP_CROSS_ENTROPY_LOSS,
|
345
|
+
GGML_OP_CROSS_ENTROPY_LOSS_BACK,
|
346
|
+
|
340
347
|
GGML_OP_COUNT,
|
341
348
|
};
|
342
349
|
|
@@ -478,6 +485,7 @@ extern "C" {
|
|
478
485
|
|
479
486
|
GGML_API bool ggml_is_transposed(const struct ggml_tensor * tensor);
|
480
487
|
GGML_API bool ggml_is_contiguous(const struct ggml_tensor * tensor);
|
488
|
+
GGML_API bool ggml_is_permuted (const struct ggml_tensor * tensor);
|
481
489
|
|
482
490
|
// use this to compute the memory overhead of a tensor
|
483
491
|
GGML_API size_t ggml_tensor_overhead(void);
|
@@ -574,6 +582,11 @@ extern "C" {
|
|
574
582
|
struct ggml_tensor * a,
|
575
583
|
struct ggml_tensor * b);
|
576
584
|
|
585
|
+
GGML_API struct ggml_tensor * ggml_add1_inplace(
|
586
|
+
struct ggml_context * ctx,
|
587
|
+
struct ggml_tensor * a,
|
588
|
+
struct ggml_tensor * b);
|
589
|
+
|
577
590
|
GGML_API struct ggml_tensor * ggml_acc(
|
578
591
|
struct ggml_context * ctx,
|
579
592
|
struct ggml_tensor * a,
|
@@ -645,6 +658,11 @@ extern "C" {
|
|
645
658
|
struct ggml_tensor * a,
|
646
659
|
struct ggml_tensor * b);
|
647
660
|
|
661
|
+
GGML_API struct ggml_tensor * ggml_repeat_back(
|
662
|
+
struct ggml_context * ctx,
|
663
|
+
struct ggml_tensor * a,
|
664
|
+
struct ggml_tensor * b);
|
665
|
+
|
648
666
|
GGML_API struct ggml_tensor * ggml_abs(
|
649
667
|
struct ggml_context * ctx,
|
650
668
|
struct ggml_tensor * a);
|
@@ -698,14 +716,22 @@ extern "C" {
|
|
698
716
|
struct ggml_tensor * a,
|
699
717
|
struct ggml_tensor * b);
|
700
718
|
|
701
|
-
// A:
|
702
|
-
// B:
|
719
|
+
// A: n columns, m rows
|
720
|
+
// B: n columns, p rows (i.e. we transpose it internally)
|
703
721
|
// result is m columns, p rows
|
704
722
|
GGML_API struct ggml_tensor * ggml_mul_mat(
|
705
723
|
struct ggml_context * ctx,
|
706
724
|
struct ggml_tensor * a,
|
707
725
|
struct ggml_tensor * b);
|
708
726
|
|
727
|
+
// A: m columns, n rows,
|
728
|
+
// B: p columns, n rows,
|
729
|
+
// result is m columns, p rows
|
730
|
+
GGML_API struct ggml_tensor * ggml_out_prod(
|
731
|
+
struct ggml_context * ctx,
|
732
|
+
struct ggml_tensor * a,
|
733
|
+
struct ggml_tensor * b);
|
734
|
+
|
709
735
|
//
|
710
736
|
// operations on tensors without backpropagation
|
711
737
|
//
|
@@ -916,6 +942,17 @@ extern "C" {
|
|
916
942
|
struct ggml_context * ctx,
|
917
943
|
struct ggml_tensor * a);
|
918
944
|
|
945
|
+
GGML_API struct ggml_tensor * ggml_soft_max_back(
|
946
|
+
struct ggml_context * ctx,
|
947
|
+
struct ggml_tensor * a,
|
948
|
+
struct ggml_tensor * b);
|
949
|
+
|
950
|
+
// in-place, returns view(a)
|
951
|
+
GGML_API struct ggml_tensor * ggml_soft_max_back_inplace(
|
952
|
+
struct ggml_context * ctx,
|
953
|
+
struct ggml_tensor * a,
|
954
|
+
struct ggml_tensor * b);
|
955
|
+
|
919
956
|
// rotary position embedding
|
920
957
|
// if mode & 1 == 1, skip n_past elements
|
921
958
|
// if mode & 2 == 1, GPT-NeoX style
|
@@ -982,6 +1019,14 @@ extern "C" {
|
|
982
1019
|
struct ggml_tensor * v,
|
983
1020
|
bool masked);
|
984
1021
|
|
1022
|
+
GGML_API struct ggml_tensor * ggml_flash_attn_back(
|
1023
|
+
struct ggml_context * ctx,
|
1024
|
+
struct ggml_tensor * q,
|
1025
|
+
struct ggml_tensor * k,
|
1026
|
+
struct ggml_tensor * v,
|
1027
|
+
struct ggml_tensor * d,
|
1028
|
+
bool masked);
|
1029
|
+
|
985
1030
|
GGML_API struct ggml_tensor * ggml_flash_ff(
|
986
1031
|
struct ggml_context * ctx,
|
987
1032
|
struct ggml_tensor * a,
|
@@ -1005,6 +1050,19 @@ extern "C" {
|
|
1005
1050
|
struct ggml_tensor * b,
|
1006
1051
|
ggml_binary_op_f32_t fun);
|
1007
1052
|
|
1053
|
+
// loss function
|
1054
|
+
|
1055
|
+
GGML_API struct ggml_tensor * ggml_cross_entropy_loss(
|
1056
|
+
struct ggml_context * ctx,
|
1057
|
+
struct ggml_tensor * a,
|
1058
|
+
struct ggml_tensor * b);
|
1059
|
+
|
1060
|
+
GGML_API struct ggml_tensor * ggml_cross_entropy_loss_back(
|
1061
|
+
struct ggml_context * ctx,
|
1062
|
+
struct ggml_tensor * a,
|
1063
|
+
struct ggml_tensor * b,
|
1064
|
+
struct ggml_tensor * c);
|
1065
|
+
|
1008
1066
|
//
|
1009
1067
|
// automatic differentiation
|
1010
1068
|
//
|
@@ -1099,6 +1157,8 @@ extern "C" {
|
|
1099
1157
|
struct {
|
1100
1158
|
int n_iter;
|
1101
1159
|
|
1160
|
+
float sched; // schedule multiplier (fixed, decay or warmup)
|
1161
|
+
float decay; // weight decay for AdamW, use 0.0f to disable
|
1102
1162
|
float alpha; // learning rate
|
1103
1163
|
float beta1;
|
1104
1164
|
float beta2;
|
@@ -1123,6 +1183,49 @@ extern "C" {
|
|
1123
1183
|
} lbfgs;
|
1124
1184
|
};
|
1125
1185
|
|
1186
|
+
struct ggml_opt_context {
|
1187
|
+
struct ggml_context * ctx;
|
1188
|
+
struct ggml_opt_params params;
|
1189
|
+
|
1190
|
+
int iter;
|
1191
|
+
int64_t nx; // number of parameter elements
|
1192
|
+
|
1193
|
+
bool just_initialized;
|
1194
|
+
|
1195
|
+
struct {
|
1196
|
+
struct ggml_tensor * x; // view of the parameters
|
1197
|
+
struct ggml_tensor * g1; // gradient
|
1198
|
+
struct ggml_tensor * g2; // gradient squared
|
1199
|
+
struct ggml_tensor * m; // first moment
|
1200
|
+
struct ggml_tensor * v; // second moment
|
1201
|
+
struct ggml_tensor * mh; // first moment hat
|
1202
|
+
struct ggml_tensor * vh; // second moment hat
|
1203
|
+
struct ggml_tensor * pf; // past function values
|
1204
|
+
float fx_best;
|
1205
|
+
float fx_prev;
|
1206
|
+
int n_no_improvement;
|
1207
|
+
} adam;
|
1208
|
+
|
1209
|
+
struct {
|
1210
|
+
struct ggml_tensor * x; // current parameters
|
1211
|
+
struct ggml_tensor * xp; // previous parameters
|
1212
|
+
struct ggml_tensor * g; // current gradient
|
1213
|
+
struct ggml_tensor * gp; // previous gradient
|
1214
|
+
struct ggml_tensor * d; // search direction
|
1215
|
+
struct ggml_tensor * pf; // past function values
|
1216
|
+
struct ggml_tensor * lmal; // the L-BFGS memory alpha
|
1217
|
+
struct ggml_tensor * lmys; // the L-BFGS memory ys
|
1218
|
+
struct ggml_tensor * lms; // the L-BFGS memory s
|
1219
|
+
struct ggml_tensor * lmy; // the L-BFGS memory y
|
1220
|
+
float fx_best;
|
1221
|
+
float step;
|
1222
|
+
int j;
|
1223
|
+
int k;
|
1224
|
+
int end;
|
1225
|
+
int n_no_improvement;
|
1226
|
+
} lbfgs;
|
1227
|
+
};
|
1228
|
+
|
1126
1229
|
GGML_API struct ggml_opt_params ggml_opt_default_params(enum ggml_opt_type type);
|
1127
1230
|
|
1128
1231
|
// optimize the function defined by the tensor f
|
@@ -1131,6 +1234,27 @@ extern "C" {
|
|
1131
1234
|
struct ggml_opt_params params,
|
1132
1235
|
struct ggml_tensor * f);
|
1133
1236
|
|
1237
|
+
// initialize optimizer context
|
1238
|
+
GGML_API void ggml_opt_init(
|
1239
|
+
struct ggml_context * ctx,
|
1240
|
+
struct ggml_opt_context * opt,
|
1241
|
+
struct ggml_opt_params params,
|
1242
|
+
int64_t nx);
|
1243
|
+
|
1244
|
+
// continue optimizing the function defined by the tensor f
|
1245
|
+
GGML_API enum ggml_opt_result ggml_opt_resume(
|
1246
|
+
struct ggml_context * ctx,
|
1247
|
+
struct ggml_opt_context * opt,
|
1248
|
+
struct ggml_tensor * f);
|
1249
|
+
|
1250
|
+
// continue optimizing the function defined by the tensor f
|
1251
|
+
GGML_API enum ggml_opt_result ggml_opt_resume_g(
|
1252
|
+
struct ggml_context * ctx,
|
1253
|
+
struct ggml_opt_context * opt,
|
1254
|
+
struct ggml_tensor * f,
|
1255
|
+
struct ggml_cgraph * gf,
|
1256
|
+
struct ggml_cgraph * gb);
|
1257
|
+
|
1134
1258
|
//
|
1135
1259
|
// quantization
|
1136
1260
|
//
|