llama_cpp 0.2.1 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -198,7 +198,7 @@
198
198
  #define GGML_MAX_PARAMS 256
199
199
  #define GGML_MAX_CONTEXTS 64
200
200
  #define GGML_MAX_OPT 4
201
- #define GGML_MAX_NAME 32
201
+ #define GGML_MAX_NAME 48
202
202
  #define GGML_DEFAULT_N_THREADS 4
203
203
 
204
204
  #define GGML_ASSERT(x) \
@@ -303,6 +303,7 @@ extern "C" {
303
303
  GGML_OP_STEP,
304
304
  GGML_OP_RELU,
305
305
  GGML_OP_GELU,
306
+ GGML_OP_GELU_QUICK,
306
307
  GGML_OP_SILU,
307
308
  GGML_OP_SILU_BACK,
308
309
  GGML_OP_NORM, // normalize
@@ -331,16 +332,23 @@ extern "C" {
331
332
  GGML_OP_ROPE_BACK,
332
333
  GGML_OP_ALIBI,
333
334
  GGML_OP_CLAMP,
334
- GGML_OP_CONV_1D_1S,
335
- GGML_OP_CONV_1D_2S,
335
+ GGML_OP_CONV_1D_S1_PH,
336
+ GGML_OP_CONV_1D_S2_PH,
337
+ GGML_OP_CONV_2D_SK_P0,
336
338
 
337
339
  GGML_OP_FLASH_ATTN,
338
340
  GGML_OP_FLASH_FF,
339
341
  GGML_OP_FLASH_ATTN_BACK,
342
+ GGML_OP_WIN_PART,
343
+ GGML_OP_WIN_UNPART,
340
344
 
341
345
  GGML_OP_MAP_UNARY,
342
346
  GGML_OP_MAP_BINARY,
343
347
 
348
+ GGML_OP_MAP_CUSTOM1,
349
+ GGML_OP_MAP_CUSTOM2,
350
+ GGML_OP_MAP_CUSTOM3,
351
+
344
352
  GGML_OP_CROSS_ENTROPY_LOSS,
345
353
  GGML_OP_CROSS_ENTROPY_LOSS_BACK,
346
354
 
@@ -461,6 +469,9 @@ extern "C" {
461
469
  GGML_API int64_t ggml_cycles(void);
462
470
  GGML_API int64_t ggml_cycles_per_ms(void);
463
471
 
472
+ GGML_API void ggml_numa_init(void); // call once for better performance on NUMA systems
473
+ GGML_API bool ggml_is_numa(void); // true if init detected that system has >1 NUMA node
474
+
464
475
  GGML_API void ggml_print_object (const struct ggml_object * obj);
465
476
  GGML_API void ggml_print_objects(const struct ggml_context * ctx);
466
477
 
@@ -500,8 +511,9 @@ extern "C" {
500
511
  GGML_API size_t ggml_set_scratch (struct ggml_context * ctx, struct ggml_scratch scratch);
501
512
  GGML_API void ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc);
502
513
 
503
- GGML_API void * ggml_get_mem_buffer(struct ggml_context * ctx);
504
- GGML_API size_t ggml_get_mem_size (struct ggml_context * ctx);
514
+ GGML_API void * ggml_get_mem_buffer (const struct ggml_context * ctx);
515
+ GGML_API size_t ggml_get_mem_size (const struct ggml_context * ctx);
516
+ GGML_API size_t ggml_get_max_tensor_size(const struct ggml_context * ctx);
505
517
 
506
518
  GGML_API struct ggml_tensor * ggml_new_tensor(
507
519
  struct ggml_context * ctx,
@@ -556,8 +568,9 @@ extern "C" {
556
568
  GGML_API void * ggml_get_data (const struct ggml_tensor * tensor);
557
569
  GGML_API float * ggml_get_data_f32(const struct ggml_tensor * tensor);
558
570
 
559
- GGML_API const char * ggml_get_name(const struct ggml_tensor * tensor);
560
- GGML_API void ggml_set_name(struct ggml_tensor * tensor, const char * name);
571
+ GGML_API const char * ggml_get_name(const struct ggml_tensor * tensor);
572
+ GGML_API struct ggml_tensor * ggml_set_name(struct ggml_tensor * tensor, const char * name);
573
+ GGML_API struct ggml_tensor * ggml_format_name(struct ggml_tensor * tensor, const char * fmt, ...);
561
574
 
562
575
  //
563
576
  // operations on tensors with backpropagation
@@ -610,24 +623,47 @@ extern "C" {
610
623
  struct ggml_tensor * a,
611
624
  struct ggml_tensor * b);
612
625
 
626
+ GGML_API struct ggml_tensor * ggml_sub_inplace(
627
+ struct ggml_context * ctx,
628
+ struct ggml_tensor * a,
629
+ struct ggml_tensor * b);
630
+
613
631
  GGML_API struct ggml_tensor * ggml_mul(
614
632
  struct ggml_context * ctx,
615
633
  struct ggml_tensor * a,
616
634
  struct ggml_tensor * b);
617
635
 
636
+ GGML_API struct ggml_tensor * ggml_mul_inplace(
637
+ struct ggml_context * ctx,
638
+ struct ggml_tensor * a,
639
+ struct ggml_tensor * b);
640
+
618
641
  GGML_API struct ggml_tensor * ggml_div(
619
642
  struct ggml_context * ctx,
620
643
  struct ggml_tensor * a,
621
644
  struct ggml_tensor * b);
622
645
 
646
+ GGML_API struct ggml_tensor * ggml_div_inplace(
647
+ struct ggml_context * ctx,
648
+ struct ggml_tensor * a,
649
+ struct ggml_tensor * b);
650
+
623
651
  GGML_API struct ggml_tensor * ggml_sqr(
624
652
  struct ggml_context * ctx,
625
653
  struct ggml_tensor * a);
626
654
 
655
+ GGML_API struct ggml_tensor * ggml_sqr_inplace(
656
+ struct ggml_context * ctx,
657
+ struct ggml_tensor * a);
658
+
627
659
  GGML_API struct ggml_tensor * ggml_sqrt(
628
660
  struct ggml_context * ctx,
629
661
  struct ggml_tensor * a);
630
662
 
663
+ GGML_API struct ggml_tensor * ggml_sqrt_inplace(
664
+ struct ggml_context * ctx,
665
+ struct ggml_tensor * a);
666
+
631
667
  GGML_API struct ggml_tensor * ggml_log(
632
668
  struct ggml_context * ctx,
633
669
  struct ggml_tensor * a);
@@ -667,31 +703,67 @@ extern "C" {
667
703
  struct ggml_context * ctx,
668
704
  struct ggml_tensor * a);
669
705
 
706
+ GGML_API struct ggml_tensor * ggml_abs_inplace(
707
+ struct ggml_context * ctx,
708
+ struct ggml_tensor * a);
709
+
670
710
  GGML_API struct ggml_tensor * ggml_sgn(
671
711
  struct ggml_context * ctx,
672
712
  struct ggml_tensor * a);
673
713
 
714
+ GGML_API struct ggml_tensor * ggml_sgn_inplace(
715
+ struct ggml_context * ctx,
716
+ struct ggml_tensor * a);
717
+
674
718
  GGML_API struct ggml_tensor * ggml_neg(
675
719
  struct ggml_context * ctx,
676
720
  struct ggml_tensor * a);
677
721
 
722
+ GGML_API struct ggml_tensor * ggml_neg_inplace(
723
+ struct ggml_context * ctx,
724
+ struct ggml_tensor * a);
725
+
678
726
  GGML_API struct ggml_tensor * ggml_step(
679
727
  struct ggml_context * ctx,
680
728
  struct ggml_tensor * a);
681
729
 
730
+ GGML_API struct ggml_tensor * ggml_step_inplace(
731
+ struct ggml_context * ctx,
732
+ struct ggml_tensor * a);
733
+
682
734
  GGML_API struct ggml_tensor * ggml_relu(
683
735
  struct ggml_context * ctx,
684
736
  struct ggml_tensor * a);
685
737
 
738
+ GGML_API struct ggml_tensor * ggml_relu_inplace(
739
+ struct ggml_context * ctx,
740
+ struct ggml_tensor * a);
741
+
686
742
  // TODO: double-check this computation is correct
687
743
  GGML_API struct ggml_tensor * ggml_gelu(
688
744
  struct ggml_context * ctx,
689
745
  struct ggml_tensor * a);
690
746
 
747
+ GGML_API struct ggml_tensor * ggml_gelu_inplace(
748
+ struct ggml_context * ctx,
749
+ struct ggml_tensor * a);
750
+
751
+ GGML_API struct ggml_tensor * ggml_gelu_quick(
752
+ struct ggml_context * ctx,
753
+ struct ggml_tensor * a);
754
+
755
+ GGML_API struct ggml_tensor * ggml_gelu_quick_inplace(
756
+ struct ggml_context * ctx,
757
+ struct ggml_tensor * a);
758
+
691
759
  GGML_API struct ggml_tensor * ggml_silu(
692
760
  struct ggml_context * ctx,
693
761
  struct ggml_tensor * a);
694
762
 
763
+ GGML_API struct ggml_tensor * ggml_silu_inplace(
764
+ struct ggml_context * ctx,
765
+ struct ggml_tensor * a);
766
+
695
767
  // a - x
696
768
  // b - dy
697
769
  GGML_API struct ggml_tensor * ggml_silu_back(
@@ -705,10 +777,18 @@ extern "C" {
705
777
  struct ggml_context * ctx,
706
778
  struct ggml_tensor * a);
707
779
 
780
+ GGML_API struct ggml_tensor * ggml_norm_inplace(
781
+ struct ggml_context * ctx,
782
+ struct ggml_tensor * a);
783
+
708
784
  GGML_API struct ggml_tensor * ggml_rms_norm(
709
785
  struct ggml_context * ctx,
710
786
  struct ggml_tensor * a);
711
787
 
788
+ GGML_API struct ggml_tensor * ggml_rms_norm_inplace(
789
+ struct ggml_context * ctx,
790
+ struct ggml_tensor * a);
791
+
712
792
  // a - x
713
793
  // b - dy
714
794
  GGML_API struct ggml_tensor * ggml_rms_norm_back(
@@ -956,13 +1036,15 @@ extern "C" {
956
1036
  // rotary position embedding
957
1037
  // if mode & 1 == 1, skip n_past elements
958
1038
  // if mode & 2 == 1, GPT-NeoX style
1039
+ // if mode & 4 == 1, ChatGLM style
959
1040
  // TODO: avoid creating a new tensor every time
960
1041
  GGML_API struct ggml_tensor * ggml_rope(
961
1042
  struct ggml_context * ctx,
962
1043
  struct ggml_tensor * a,
963
1044
  int n_past,
964
1045
  int n_dims,
965
- int mode);
1046
+ int mode,
1047
+ int n_ctx);
966
1048
 
967
1049
  // in-place, returns view(a)
968
1050
  GGML_API struct ggml_tensor * ggml_rope_inplace(
@@ -970,7 +1052,8 @@ extern "C" {
970
1052
  struct ggml_tensor * a,
971
1053
  int n_past,
972
1054
  int n_dims,
973
- int mode);
1055
+ int mode,
1056
+ int n_ctx);
974
1057
 
975
1058
  // rotary position embedding backward, i.e compute dx from dy
976
1059
  // a - dy
@@ -998,16 +1081,55 @@ extern "C" {
998
1081
  float min,
999
1082
  float max);
1000
1083
 
1001
- // padding = 1
1084
+ // TODO: implement general-purpose convolutions
1085
+ // GGML_API struct ggml_tensor * ggml_conv_1d(
1086
+ // struct ggml_context * ctx,
1087
+ // struct ggml_tensor * a,
1088
+ // struct ggml_tensor * b,
1089
+ // int s0
1090
+ // int p0,
1091
+ // int d0);
1092
+ //
1093
+ // GGML_API struct ggml_tensor * ggml_conv_2d(
1094
+ // struct ggml_context * ctx,
1095
+ // struct ggml_tensor * a,
1096
+ // struct ggml_tensor * b,
1097
+ // int s0,
1098
+ // int s1,
1099
+ // int p0,
1100
+ // int p1,
1101
+ // int d0,
1102
+ // int d1);
1103
+
1104
+ // padding = half
1002
1105
  // TODO: we don't support extra parameters for now
1003
1106
  // that's why we are hard-coding the stride, padding, and dilation
1004
1107
  // not great ..
1005
- GGML_API struct ggml_tensor * ggml_conv_1d_1s(
1108
+ // example:
1109
+ // a: 3 80 768 1
1110
+ // b: 3000 80 1 1
1111
+ // res: 3000 768 1 1
1112
+ // used in whisper
1113
+ GGML_API struct ggml_tensor * ggml_conv_1d_s1_ph(
1006
1114
  struct ggml_context * ctx,
1007
1115
  struct ggml_tensor * a,
1008
1116
  struct ggml_tensor * b);
1009
1117
 
1010
- GGML_API struct ggml_tensor * ggml_conv_1d_2s(
1118
+ // used in whisper
1119
+ GGML_API struct ggml_tensor * ggml_conv_1d_s2_ph(
1120
+ struct ggml_context * ctx,
1121
+ struct ggml_tensor * a,
1122
+ struct ggml_tensor * b);
1123
+
1124
+ // kernel size is a->ne[0] x a->ne[1]
1125
+ // stride is equal to kernel size
1126
+ // padding is zero
1127
+ // example:
1128
+ // a: 16 16 3 768
1129
+ // b: 1024 1024 3 1
1130
+ // res: 64 64 768 1
1131
+ // used in sam
1132
+ GGML_API struct ggml_tensor * ggml_conv_2d_sk_p0(
1011
1133
  struct ggml_context * ctx,
1012
1134
  struct ggml_tensor * a,
1013
1135
  struct ggml_tensor * b);
@@ -1035,21 +1157,93 @@ extern "C" {
1035
1157
  struct ggml_tensor * c0,
1036
1158
  struct ggml_tensor * c1);
1037
1159
 
1038
- // Mapping operations
1039
- typedef void (*ggml_unary_op_f32_t)(const int, float *, const float *);
1160
+ // partition into non-overlapping windows with padding if needed
1161
+ // example:
1162
+ // a: 768 64 64 1
1163
+ // w: 14
1164
+ // res: 768 14 14 25
1165
+ // used in sam
1166
+ GGML_API struct ggml_tensor * ggml_win_part(
1167
+ struct ggml_context * ctx,
1168
+ struct ggml_tensor * a,
1169
+ int w);
1170
+
1171
+ // reverse of ggml_win_part
1172
+ // used in sam
1173
+ GGML_API struct ggml_tensor * ggml_win_unpart(
1174
+ struct ggml_context * ctx,
1175
+ struct ggml_tensor * a,
1176
+ int w0,
1177
+ int h0,
1178
+ int w);
1179
+
1180
+ // custom operators
1181
+
1182
+ typedef void (*ggml_unary_op_f32_t) (const int, float *, const float *);
1040
1183
  typedef void (*ggml_binary_op_f32_t)(const int, float *, const float *, const float *);
1041
1184
 
1185
+ typedef void (*ggml_custom1_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *);
1186
+ typedef void (*ggml_custom2_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
1187
+ typedef void (*ggml_custom3_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
1188
+
1042
1189
  GGML_API struct ggml_tensor * ggml_map_unary_f32(
1043
1190
  struct ggml_context * ctx,
1044
1191
  struct ggml_tensor * a,
1045
1192
  ggml_unary_op_f32_t fun);
1046
1193
 
1194
+ GGML_API struct ggml_tensor * ggml_map_unary_inplace_f32(
1195
+ struct ggml_context * ctx,
1196
+ struct ggml_tensor * a,
1197
+ ggml_unary_op_f32_t fun);
1198
+
1047
1199
  GGML_API struct ggml_tensor * ggml_map_binary_f32(
1048
1200
  struct ggml_context * ctx,
1049
1201
  struct ggml_tensor * a,
1050
1202
  struct ggml_tensor * b,
1051
1203
  ggml_binary_op_f32_t fun);
1052
1204
 
1205
+ GGML_API struct ggml_tensor * ggml_map_binary_inplace_f32(
1206
+ struct ggml_context * ctx,
1207
+ struct ggml_tensor * a,
1208
+ struct ggml_tensor * b,
1209
+ ggml_binary_op_f32_t fun);
1210
+
1211
+ GGML_API struct ggml_tensor * ggml_map_custom1_f32(
1212
+ struct ggml_context * ctx,
1213
+ struct ggml_tensor * a,
1214
+ ggml_custom1_op_f32_t fun);
1215
+
1216
+ GGML_API struct ggml_tensor * ggml_map_custom1_inplace_f32(
1217
+ struct ggml_context * ctx,
1218
+ struct ggml_tensor * a,
1219
+ ggml_custom1_op_f32_t fun);
1220
+
1221
+ GGML_API struct ggml_tensor * ggml_map_custom2_f32(
1222
+ struct ggml_context * ctx,
1223
+ struct ggml_tensor * a,
1224
+ struct ggml_tensor * b,
1225
+ ggml_custom2_op_f32_t fun);
1226
+
1227
+ GGML_API struct ggml_tensor * ggml_map_custom2_inplace_f32(
1228
+ struct ggml_context * ctx,
1229
+ struct ggml_tensor * a,
1230
+ struct ggml_tensor * b,
1231
+ ggml_custom2_op_f32_t fun);
1232
+
1233
+ GGML_API struct ggml_tensor * ggml_map_custom3_f32(
1234
+ struct ggml_context * ctx,
1235
+ struct ggml_tensor * a,
1236
+ struct ggml_tensor * b,
1237
+ struct ggml_tensor * c,
1238
+ ggml_custom3_op_f32_t fun);
1239
+
1240
+ GGML_API struct ggml_tensor * ggml_map_custom3_inplace_f32(
1241
+ struct ggml_context * ctx,
1242
+ struct ggml_tensor * a,
1243
+ struct ggml_tensor * b,
1244
+ struct ggml_tensor * c,
1245
+ ggml_custom3_op_f32_t fun);
1246
+
1053
1247
  // loss function
1054
1248
 
1055
1249
  GGML_API struct ggml_tensor * ggml_cross_entropy_loss(