llama_cpp 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -296,6 +296,7 @@ extern "C" {
296
296
  GGML_OP_SUM_ROWS,
297
297
  GGML_OP_MEAN,
298
298
  GGML_OP_REPEAT,
299
+ GGML_OP_REPEAT_BACK,
299
300
  GGML_OP_ABS,
300
301
  GGML_OP_SGN,
301
302
  GGML_OP_NEG,
@@ -309,6 +310,7 @@ extern "C" {
309
310
  GGML_OP_RMS_NORM_BACK,
310
311
 
311
312
  GGML_OP_MUL_MAT,
313
+ GGML_OP_OUT_PROD,
312
314
 
313
315
  GGML_OP_SCALE,
314
316
  GGML_OP_SET,
@@ -324,6 +326,7 @@ extern "C" {
324
326
  GGML_OP_DIAG_MASK_INF,
325
327
  GGML_OP_DIAG_MASK_ZERO,
326
328
  GGML_OP_SOFT_MAX,
329
+ GGML_OP_SOFT_MAX_BACK,
327
330
  GGML_OP_ROPE,
328
331
  GGML_OP_ROPE_BACK,
329
332
  GGML_OP_ALIBI,
@@ -333,10 +336,14 @@ extern "C" {
333
336
 
334
337
  GGML_OP_FLASH_ATTN,
335
338
  GGML_OP_FLASH_FF,
339
+ GGML_OP_FLASH_ATTN_BACK,
336
340
 
337
341
  GGML_OP_MAP_UNARY,
338
342
  GGML_OP_MAP_BINARY,
339
343
 
344
+ GGML_OP_CROSS_ENTROPY_LOSS,
345
+ GGML_OP_CROSS_ENTROPY_LOSS_BACK,
346
+
340
347
  GGML_OP_COUNT,
341
348
  };
342
349
 
@@ -478,6 +485,7 @@ extern "C" {
478
485
 
479
486
  GGML_API bool ggml_is_transposed(const struct ggml_tensor * tensor);
480
487
  GGML_API bool ggml_is_contiguous(const struct ggml_tensor * tensor);
488
+ GGML_API bool ggml_is_permuted (const struct ggml_tensor * tensor);
481
489
 
482
490
  // use this to compute the memory overhead of a tensor
483
491
  GGML_API size_t ggml_tensor_overhead(void);
@@ -574,6 +582,11 @@ extern "C" {
574
582
  struct ggml_tensor * a,
575
583
  struct ggml_tensor * b);
576
584
 
585
+ GGML_API struct ggml_tensor * ggml_add1_inplace(
586
+ struct ggml_context * ctx,
587
+ struct ggml_tensor * a,
588
+ struct ggml_tensor * b);
589
+
577
590
  GGML_API struct ggml_tensor * ggml_acc(
578
591
  struct ggml_context * ctx,
579
592
  struct ggml_tensor * a,
@@ -645,6 +658,11 @@ extern "C" {
645
658
  struct ggml_tensor * a,
646
659
  struct ggml_tensor * b);
647
660
 
661
+ GGML_API struct ggml_tensor * ggml_repeat_back(
662
+ struct ggml_context * ctx,
663
+ struct ggml_tensor * a,
664
+ struct ggml_tensor * b);
665
+
648
666
  GGML_API struct ggml_tensor * ggml_abs(
649
667
  struct ggml_context * ctx,
650
668
  struct ggml_tensor * a);
@@ -698,14 +716,22 @@ extern "C" {
698
716
  struct ggml_tensor * a,
699
717
  struct ggml_tensor * b);
700
718
 
701
- // A: m rows, n columns
702
- // B: p rows, n columns (i.e. we transpose it internally)
719
+ // A: n columns, m rows
720
+ // B: n columns, p rows (i.e. we transpose it internally)
703
721
  // result is m columns, p rows
704
722
  GGML_API struct ggml_tensor * ggml_mul_mat(
705
723
  struct ggml_context * ctx,
706
724
  struct ggml_tensor * a,
707
725
  struct ggml_tensor * b);
708
726
 
727
+ // A: m columns, n rows,
728
+ // B: p columns, n rows,
729
+ // result is m columns, p rows
730
+ GGML_API struct ggml_tensor * ggml_out_prod(
731
+ struct ggml_context * ctx,
732
+ struct ggml_tensor * a,
733
+ struct ggml_tensor * b);
734
+
709
735
  //
710
736
  // operations on tensors without backpropagation
711
737
  //
@@ -916,6 +942,17 @@ extern "C" {
916
942
  struct ggml_context * ctx,
917
943
  struct ggml_tensor * a);
918
944
 
945
+ GGML_API struct ggml_tensor * ggml_soft_max_back(
946
+ struct ggml_context * ctx,
947
+ struct ggml_tensor * a,
948
+ struct ggml_tensor * b);
949
+
950
+ // in-place, returns view(a)
951
+ GGML_API struct ggml_tensor * ggml_soft_max_back_inplace(
952
+ struct ggml_context * ctx,
953
+ struct ggml_tensor * a,
954
+ struct ggml_tensor * b);
955
+
919
956
  // rotary position embedding
920
957
  // if mode & 1 == 1, skip n_past elements
921
958
  // if mode & 2 == 1, GPT-NeoX style
@@ -982,6 +1019,14 @@ extern "C" {
982
1019
  struct ggml_tensor * v,
983
1020
  bool masked);
984
1021
 
1022
+ GGML_API struct ggml_tensor * ggml_flash_attn_back(
1023
+ struct ggml_context * ctx,
1024
+ struct ggml_tensor * q,
1025
+ struct ggml_tensor * k,
1026
+ struct ggml_tensor * v,
1027
+ struct ggml_tensor * d,
1028
+ bool masked);
1029
+
985
1030
  GGML_API struct ggml_tensor * ggml_flash_ff(
986
1031
  struct ggml_context * ctx,
987
1032
  struct ggml_tensor * a,
@@ -1005,6 +1050,19 @@ extern "C" {
1005
1050
  struct ggml_tensor * b,
1006
1051
  ggml_binary_op_f32_t fun);
1007
1052
 
1053
+ // loss function
1054
+
1055
+ GGML_API struct ggml_tensor * ggml_cross_entropy_loss(
1056
+ struct ggml_context * ctx,
1057
+ struct ggml_tensor * a,
1058
+ struct ggml_tensor * b);
1059
+
1060
+ GGML_API struct ggml_tensor * ggml_cross_entropy_loss_back(
1061
+ struct ggml_context * ctx,
1062
+ struct ggml_tensor * a,
1063
+ struct ggml_tensor * b,
1064
+ struct ggml_tensor * c);
1065
+
1008
1066
  //
1009
1067
  // automatic differentiation
1010
1068
  //
@@ -1099,6 +1157,8 @@ extern "C" {
1099
1157
  struct {
1100
1158
  int n_iter;
1101
1159
 
1160
+ float sched; // schedule multiplier (fixed, decay or warmup)
1161
+ float decay; // weight decay for AdamW, use 0.0f to disable
1102
1162
  float alpha; // learning rate
1103
1163
  float beta1;
1104
1164
  float beta2;
@@ -1123,6 +1183,49 @@ extern "C" {
1123
1183
  } lbfgs;
1124
1184
  };
1125
1185
 
1186
+ struct ggml_opt_context {
1187
+ struct ggml_context * ctx;
1188
+ struct ggml_opt_params params;
1189
+
1190
+ int iter;
1191
+ int64_t nx; // number of parameter elements
1192
+
1193
+ bool just_initialized;
1194
+
1195
+ struct {
1196
+ struct ggml_tensor * x; // view of the parameters
1197
+ struct ggml_tensor * g1; // gradient
1198
+ struct ggml_tensor * g2; // gradient squared
1199
+ struct ggml_tensor * m; // first moment
1200
+ struct ggml_tensor * v; // second moment
1201
+ struct ggml_tensor * mh; // first moment hat
1202
+ struct ggml_tensor * vh; // second moment hat
1203
+ struct ggml_tensor * pf; // past function values
1204
+ float fx_best;
1205
+ float fx_prev;
1206
+ int n_no_improvement;
1207
+ } adam;
1208
+
1209
+ struct {
1210
+ struct ggml_tensor * x; // current parameters
1211
+ struct ggml_tensor * xp; // previous parameters
1212
+ struct ggml_tensor * g; // current gradient
1213
+ struct ggml_tensor * gp; // previous gradient
1214
+ struct ggml_tensor * d; // search direction
1215
+ struct ggml_tensor * pf; // past function values
1216
+ struct ggml_tensor * lmal; // the L-BFGS memory alpha
1217
+ struct ggml_tensor * lmys; // the L-BFGS memory ys
1218
+ struct ggml_tensor * lms; // the L-BFGS memory s
1219
+ struct ggml_tensor * lmy; // the L-BFGS memory y
1220
+ float fx_best;
1221
+ float step;
1222
+ int j;
1223
+ int k;
1224
+ int end;
1225
+ int n_no_improvement;
1226
+ } lbfgs;
1227
+ };
1228
+
1126
1229
  GGML_API struct ggml_opt_params ggml_opt_default_params(enum ggml_opt_type type);
1127
1230
 
1128
1231
  // optimize the function defined by the tensor f
@@ -1131,6 +1234,27 @@ extern "C" {
1131
1234
  struct ggml_opt_params params,
1132
1235
  struct ggml_tensor * f);
1133
1236
 
1237
+ // initialize optimizer context
1238
+ GGML_API void ggml_opt_init(
1239
+ struct ggml_context * ctx,
1240
+ struct ggml_opt_context * opt,
1241
+ struct ggml_opt_params params,
1242
+ int64_t nx);
1243
+
1244
+ // continue optimizing the function defined by the tensor f
1245
+ GGML_API enum ggml_opt_result ggml_opt_resume(
1246
+ struct ggml_context * ctx,
1247
+ struct ggml_opt_context * opt,
1248
+ struct ggml_tensor * f);
1249
+
1250
+ // continue optimizing the function defined by the tensor f
1251
+ GGML_API enum ggml_opt_result ggml_opt_resume_g(
1252
+ struct ggml_context * ctx,
1253
+ struct ggml_opt_context * opt,
1254
+ struct ggml_tensor * f,
1255
+ struct ggml_cgraph * gf,
1256
+ struct ggml_cgraph * gb);
1257
+
1134
1258
  //
1135
1259
  // quantization
1136
1260
  //