llama_cpp 0.2.2 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -198,7 +198,7 @@
198
198
  #define GGML_MAX_PARAMS 256
199
199
  #define GGML_MAX_CONTEXTS 64
200
200
  #define GGML_MAX_OPT 4
201
- #define GGML_MAX_NAME 32
201
+ #define GGML_MAX_NAME 48
202
202
  #define GGML_DEFAULT_N_THREADS 4
203
203
 
204
204
  #define GGML_ASSERT(x) \
@@ -345,6 +345,10 @@ extern "C" {
345
345
  GGML_OP_MAP_UNARY,
346
346
  GGML_OP_MAP_BINARY,
347
347
 
348
+ GGML_OP_MAP_CUSTOM1,
349
+ GGML_OP_MAP_CUSTOM2,
350
+ GGML_OP_MAP_CUSTOM3,
351
+
348
352
  GGML_OP_CROSS_ENTROPY_LOSS,
349
353
  GGML_OP_CROSS_ENTROPY_LOSS_BACK,
350
354
 
@@ -465,6 +469,9 @@ extern "C" {
465
469
  GGML_API int64_t ggml_cycles(void);
466
470
  GGML_API int64_t ggml_cycles_per_ms(void);
467
471
 
472
+ GGML_API void ggml_numa_init(void); // call once for better performance on NUMA systems
473
+ GGML_API bool ggml_is_numa(void); // true if init detected that system has >1 NUMA node
474
+
468
475
  GGML_API void ggml_print_object (const struct ggml_object * obj);
469
476
  GGML_API void ggml_print_objects(const struct ggml_context * ctx);
470
477
 
@@ -563,6 +570,7 @@ extern "C" {
563
570
 
564
571
  GGML_API const char * ggml_get_name(const struct ggml_tensor * tensor);
565
572
  GGML_API struct ggml_tensor * ggml_set_name(struct ggml_tensor * tensor, const char * name);
573
+ GGML_API struct ggml_tensor * ggml_format_name(struct ggml_tensor * tensor, const char * fmt, ...);
566
574
 
567
575
  //
568
576
  // operations on tensors with backpropagation
@@ -1028,13 +1036,15 @@ extern "C" {
1028
1036
  // rotary position embedding
1029
1037
  // if mode & 1 == 1, skip n_past elements
1030
1038
  // if mode & 2 == 1, GPT-NeoX style
1039
+ // if mode & 4 == 1, ChatGLM style
1031
1040
  // TODO: avoid creating a new tensor every time
1032
1041
  GGML_API struct ggml_tensor * ggml_rope(
1033
1042
  struct ggml_context * ctx,
1034
1043
  struct ggml_tensor * a,
1035
1044
  int n_past,
1036
1045
  int n_dims,
1037
- int mode);
1046
+ int mode,
1047
+ int n_ctx);
1038
1048
 
1039
1049
  // in-place, returns view(a)
1040
1050
  GGML_API struct ggml_tensor * ggml_rope_inplace(
@@ -1042,7 +1052,8 @@ extern "C" {
1042
1052
  struct ggml_tensor * a,
1043
1053
  int n_past,
1044
1054
  int n_dims,
1045
- int mode);
1055
+ int mode,
1056
+ int n_ctx);
1046
1057
 
1047
1058
  // rotary position embedding backward, i.e compute dx from dy
1048
1059
  // a - dy
@@ -1166,21 +1177,73 @@ extern "C" {
1166
1177
  int h0,
1167
1178
  int w);
1168
1179
 
1169
- // Mapping operations
1170
- typedef void (*ggml_unary_op_f32_t)(const int, float *, const float *);
1180
+ // custom operators
1181
+
1182
+ typedef void (*ggml_unary_op_f32_t) (const int, float *, const float *);
1171
1183
  typedef void (*ggml_binary_op_f32_t)(const int, float *, const float *, const float *);
1172
1184
 
1185
+ typedef void (*ggml_custom1_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *);
1186
+ typedef void (*ggml_custom2_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
1187
+ typedef void (*ggml_custom3_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
1188
+
1173
1189
  GGML_API struct ggml_tensor * ggml_map_unary_f32(
1174
1190
  struct ggml_context * ctx,
1175
1191
  struct ggml_tensor * a,
1176
1192
  ggml_unary_op_f32_t fun);
1177
1193
 
1194
+ GGML_API struct ggml_tensor * ggml_map_unary_inplace_f32(
1195
+ struct ggml_context * ctx,
1196
+ struct ggml_tensor * a,
1197
+ ggml_unary_op_f32_t fun);
1198
+
1178
1199
  GGML_API struct ggml_tensor * ggml_map_binary_f32(
1179
1200
  struct ggml_context * ctx,
1180
1201
  struct ggml_tensor * a,
1181
1202
  struct ggml_tensor * b,
1182
1203
  ggml_binary_op_f32_t fun);
1183
1204
 
1205
+ GGML_API struct ggml_tensor * ggml_map_binary_inplace_f32(
1206
+ struct ggml_context * ctx,
1207
+ struct ggml_tensor * a,
1208
+ struct ggml_tensor * b,
1209
+ ggml_binary_op_f32_t fun);
1210
+
1211
+ GGML_API struct ggml_tensor * ggml_map_custom1_f32(
1212
+ struct ggml_context * ctx,
1213
+ struct ggml_tensor * a,
1214
+ ggml_custom1_op_f32_t fun);
1215
+
1216
+ GGML_API struct ggml_tensor * ggml_map_custom1_inplace_f32(
1217
+ struct ggml_context * ctx,
1218
+ struct ggml_tensor * a,
1219
+ ggml_custom1_op_f32_t fun);
1220
+
1221
+ GGML_API struct ggml_tensor * ggml_map_custom2_f32(
1222
+ struct ggml_context * ctx,
1223
+ struct ggml_tensor * a,
1224
+ struct ggml_tensor * b,
1225
+ ggml_custom2_op_f32_t fun);
1226
+
1227
+ GGML_API struct ggml_tensor * ggml_map_custom2_inplace_f32(
1228
+ struct ggml_context * ctx,
1229
+ struct ggml_tensor * a,
1230
+ struct ggml_tensor * b,
1231
+ ggml_custom2_op_f32_t fun);
1232
+
1233
+ GGML_API struct ggml_tensor * ggml_map_custom3_f32(
1234
+ struct ggml_context * ctx,
1235
+ struct ggml_tensor * a,
1236
+ struct ggml_tensor * b,
1237
+ struct ggml_tensor * c,
1238
+ ggml_custom3_op_f32_t fun);
1239
+
1240
+ GGML_API struct ggml_tensor * ggml_map_custom3_inplace_f32(
1241
+ struct ggml_context * ctx,
1242
+ struct ggml_tensor * a,
1243
+ struct ggml_tensor * b,
1244
+ struct ggml_tensor * c,
1245
+ ggml_custom3_op_f32_t fun);
1246
+
1184
1247
  // loss function
1185
1248
 
1186
1249
  GGML_API struct ggml_tensor * ggml_cross_entropy_loss(