llama_cpp 0.2.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -296,6 +296,7 @@ extern "C" {
296
296
  GGML_OP_SUM_ROWS,
297
297
  GGML_OP_MEAN,
298
298
  GGML_OP_REPEAT,
299
+ GGML_OP_REPEAT_BACK,
299
300
  GGML_OP_ABS,
300
301
  GGML_OP_SGN,
301
302
  GGML_OP_NEG,
@@ -309,6 +310,7 @@ extern "C" {
309
310
  GGML_OP_RMS_NORM_BACK,
310
311
 
311
312
  GGML_OP_MUL_MAT,
313
+ GGML_OP_OUT_PROD,
312
314
 
313
315
  GGML_OP_SCALE,
314
316
  GGML_OP_SET,
@@ -324,6 +326,7 @@ extern "C" {
324
326
  GGML_OP_DIAG_MASK_INF,
325
327
  GGML_OP_DIAG_MASK_ZERO,
326
328
  GGML_OP_SOFT_MAX,
329
+ GGML_OP_SOFT_MAX_BACK,
327
330
  GGML_OP_ROPE,
328
331
  GGML_OP_ROPE_BACK,
329
332
  GGML_OP_ALIBI,
@@ -333,10 +336,14 @@ extern "C" {
333
336
 
334
337
  GGML_OP_FLASH_ATTN,
335
338
  GGML_OP_FLASH_FF,
339
+ GGML_OP_FLASH_ATTN_BACK,
336
340
 
337
341
  GGML_OP_MAP_UNARY,
338
342
  GGML_OP_MAP_BINARY,
339
343
 
344
+ GGML_OP_CROSS_ENTROPY_LOSS,
345
+ GGML_OP_CROSS_ENTROPY_LOSS_BACK,
346
+
340
347
  GGML_OP_COUNT,
341
348
  };
342
349
 
@@ -478,6 +485,7 @@ extern "C" {
478
485
 
479
486
  GGML_API bool ggml_is_transposed(const struct ggml_tensor * tensor);
480
487
  GGML_API bool ggml_is_contiguous(const struct ggml_tensor * tensor);
488
+ GGML_API bool ggml_is_permuted (const struct ggml_tensor * tensor);
481
489
 
482
490
  // use this to compute the memory overhead of a tensor
483
491
  GGML_API size_t ggml_tensor_overhead(void);
@@ -574,6 +582,11 @@ extern "C" {
574
582
  struct ggml_tensor * a,
575
583
  struct ggml_tensor * b);
576
584
 
585
+ GGML_API struct ggml_tensor * ggml_add1_inplace(
586
+ struct ggml_context * ctx,
587
+ struct ggml_tensor * a,
588
+ struct ggml_tensor * b);
589
+
577
590
  GGML_API struct ggml_tensor * ggml_acc(
578
591
  struct ggml_context * ctx,
579
592
  struct ggml_tensor * a,
@@ -645,6 +658,11 @@ extern "C" {
645
658
  struct ggml_tensor * a,
646
659
  struct ggml_tensor * b);
647
660
 
661
+ GGML_API struct ggml_tensor * ggml_repeat_back(
662
+ struct ggml_context * ctx,
663
+ struct ggml_tensor * a,
664
+ struct ggml_tensor * b);
665
+
648
666
  GGML_API struct ggml_tensor * ggml_abs(
649
667
  struct ggml_context * ctx,
650
668
  struct ggml_tensor * a);
@@ -698,14 +716,22 @@ extern "C" {
698
716
  struct ggml_tensor * a,
699
717
  struct ggml_tensor * b);
700
718
 
701
- // A: m rows, n columns
702
- // B: p rows, n columns (i.e. we transpose it internally)
719
+ // A: n columns, m rows
720
+ // B: n columns, p rows (i.e. we transpose it internally)
703
721
  // result is m columns, p rows
704
722
  GGML_API struct ggml_tensor * ggml_mul_mat(
705
723
  struct ggml_context * ctx,
706
724
  struct ggml_tensor * a,
707
725
  struct ggml_tensor * b);
708
726
 
727
+ // A: m columns, n rows,
728
+ // B: p columns, n rows,
729
+ // result is m columns, p rows
730
+ GGML_API struct ggml_tensor * ggml_out_prod(
731
+ struct ggml_context * ctx,
732
+ struct ggml_tensor * a,
733
+ struct ggml_tensor * b);
734
+
709
735
  //
710
736
  // operations on tensors without backpropagation
711
737
  //
@@ -916,6 +942,17 @@ extern "C" {
916
942
  struct ggml_context * ctx,
917
943
  struct ggml_tensor * a);
918
944
 
945
+ GGML_API struct ggml_tensor * ggml_soft_max_back(
946
+ struct ggml_context * ctx,
947
+ struct ggml_tensor * a,
948
+ struct ggml_tensor * b);
949
+
950
+ // in-place, returns view(a)
951
+ GGML_API struct ggml_tensor * ggml_soft_max_back_inplace(
952
+ struct ggml_context * ctx,
953
+ struct ggml_tensor * a,
954
+ struct ggml_tensor * b);
955
+
919
956
  // rotary position embedding
920
957
  // if mode & 1 == 1, skip n_past elements
921
958
  // if mode & 2 == 1, GPT-NeoX style
@@ -982,6 +1019,14 @@ extern "C" {
982
1019
  struct ggml_tensor * v,
983
1020
  bool masked);
984
1021
 
1022
+ GGML_API struct ggml_tensor * ggml_flash_attn_back(
1023
+ struct ggml_context * ctx,
1024
+ struct ggml_tensor * q,
1025
+ struct ggml_tensor * k,
1026
+ struct ggml_tensor * v,
1027
+ struct ggml_tensor * d,
1028
+ bool masked);
1029
+
985
1030
  GGML_API struct ggml_tensor * ggml_flash_ff(
986
1031
  struct ggml_context * ctx,
987
1032
  struct ggml_tensor * a,
@@ -1005,6 +1050,19 @@ extern "C" {
1005
1050
  struct ggml_tensor * b,
1006
1051
  ggml_binary_op_f32_t fun);
1007
1052
 
1053
+ // loss function
1054
+
1055
+ GGML_API struct ggml_tensor * ggml_cross_entropy_loss(
1056
+ struct ggml_context * ctx,
1057
+ struct ggml_tensor * a,
1058
+ struct ggml_tensor * b);
1059
+
1060
+ GGML_API struct ggml_tensor * ggml_cross_entropy_loss_back(
1061
+ struct ggml_context * ctx,
1062
+ struct ggml_tensor * a,
1063
+ struct ggml_tensor * b,
1064
+ struct ggml_tensor * c);
1065
+
1008
1066
  //
1009
1067
  // automatic differentiation
1010
1068
  //
@@ -1099,6 +1157,8 @@ extern "C" {
1099
1157
  struct {
1100
1158
  int n_iter;
1101
1159
 
1160
+ float sched; // schedule multiplier (fixed, decay or warmup)
1161
+ float decay; // weight decay for AdamW, use 0.0f to disable
1102
1162
  float alpha; // learning rate
1103
1163
  float beta1;
1104
1164
  float beta2;
@@ -1123,6 +1183,49 @@ extern "C" {
1123
1183
  } lbfgs;
1124
1184
  };
1125
1185
 
1186
+ struct ggml_opt_context {
1187
+ struct ggml_context * ctx;
1188
+ struct ggml_opt_params params;
1189
+
1190
+ int iter;
1191
+ int64_t nx; // number of parameter elements
1192
+
1193
+ bool just_initialized;
1194
+
1195
+ struct {
1196
+ struct ggml_tensor * x; // view of the parameters
1197
+ struct ggml_tensor * g1; // gradient
1198
+ struct ggml_tensor * g2; // gradient squared
1199
+ struct ggml_tensor * m; // first moment
1200
+ struct ggml_tensor * v; // second moment
1201
+ struct ggml_tensor * mh; // first moment hat
1202
+ struct ggml_tensor * vh; // second moment hat
1203
+ struct ggml_tensor * pf; // past function values
1204
+ float fx_best;
1205
+ float fx_prev;
1206
+ int n_no_improvement;
1207
+ } adam;
1208
+
1209
+ struct {
1210
+ struct ggml_tensor * x; // current parameters
1211
+ struct ggml_tensor * xp; // previous parameters
1212
+ struct ggml_tensor * g; // current gradient
1213
+ struct ggml_tensor * gp; // previous gradient
1214
+ struct ggml_tensor * d; // search direction
1215
+ struct ggml_tensor * pf; // past function values
1216
+ struct ggml_tensor * lmal; // the L-BFGS memory alpha
1217
+ struct ggml_tensor * lmys; // the L-BFGS memory ys
1218
+ struct ggml_tensor * lms; // the L-BFGS memory s
1219
+ struct ggml_tensor * lmy; // the L-BFGS memory y
1220
+ float fx_best;
1221
+ float step;
1222
+ int j;
1223
+ int k;
1224
+ int end;
1225
+ int n_no_improvement;
1226
+ } lbfgs;
1227
+ };
1228
+
1126
1229
  GGML_API struct ggml_opt_params ggml_opt_default_params(enum ggml_opt_type type);
1127
1230
 
1128
1231
  // optimize the function defined by the tensor f
@@ -1131,6 +1234,27 @@ extern "C" {
1131
1234
  struct ggml_opt_params params,
1132
1235
  struct ggml_tensor * f);
1133
1236
 
1237
+ // initialize optimizer context
1238
+ GGML_API void ggml_opt_init(
1239
+ struct ggml_context * ctx,
1240
+ struct ggml_opt_context * opt,
1241
+ struct ggml_opt_params params,
1242
+ int64_t nx);
1243
+
1244
+ // continue optimizing the function defined by the tensor f
1245
+ GGML_API enum ggml_opt_result ggml_opt_resume(
1246
+ struct ggml_context * ctx,
1247
+ struct ggml_opt_context * opt,
1248
+ struct ggml_tensor * f);
1249
+
1250
+ // continue optimizing the function defined by the tensor f
1251
+ GGML_API enum ggml_opt_result ggml_opt_resume_g(
1252
+ struct ggml_context * ctx,
1253
+ struct ggml_opt_context * opt,
1254
+ struct ggml_tensor * f,
1255
+ struct ggml_cgraph * gf,
1256
+ struct ggml_cgraph * gb);
1257
+
1134
1258
  //
1135
1259
  // quantization
1136
1260
  //