llama_cpp 0.2.2 → 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -198,7 +198,7 @@
198
198
  #define GGML_MAX_PARAMS 256
199
199
  #define GGML_MAX_CONTEXTS 64
200
200
  #define GGML_MAX_OPT 4
201
- #define GGML_MAX_NAME 32
201
+ #define GGML_MAX_NAME 48
202
202
  #define GGML_DEFAULT_N_THREADS 4
203
203
 
204
204
  #define GGML_ASSERT(x) \
@@ -345,6 +345,10 @@ extern "C" {
345
345
  GGML_OP_MAP_UNARY,
346
346
  GGML_OP_MAP_BINARY,
347
347
 
348
+ GGML_OP_MAP_CUSTOM1,
349
+ GGML_OP_MAP_CUSTOM2,
350
+ GGML_OP_MAP_CUSTOM3,
351
+
348
352
  GGML_OP_CROSS_ENTROPY_LOSS,
349
353
  GGML_OP_CROSS_ENTROPY_LOSS_BACK,
350
354
 
@@ -465,6 +469,9 @@ extern "C" {
465
469
  GGML_API int64_t ggml_cycles(void);
466
470
  GGML_API int64_t ggml_cycles_per_ms(void);
467
471
 
472
+ GGML_API void ggml_numa_init(void); // call once for better performance on NUMA systems
473
+ GGML_API bool ggml_is_numa(void); // true if init detected that system has >1 NUMA node
474
+
468
475
  GGML_API void ggml_print_object (const struct ggml_object * obj);
469
476
  GGML_API void ggml_print_objects(const struct ggml_context * ctx);
470
477
 
@@ -563,6 +570,7 @@ extern "C" {
563
570
 
564
571
  GGML_API const char * ggml_get_name(const struct ggml_tensor * tensor);
565
572
  GGML_API struct ggml_tensor * ggml_set_name(struct ggml_tensor * tensor, const char * name);
573
+ GGML_API struct ggml_tensor * ggml_format_name(struct ggml_tensor * tensor, const char * fmt, ...);
566
574
 
567
575
  //
568
576
  // operations on tensors with backpropagation
@@ -1028,13 +1036,15 @@ extern "C" {
1028
1036
  // rotary position embedding
1029
1037
  // if mode & 1 == 1, skip n_past elements
1030
1038
  // if mode & 2 == 1, GPT-NeoX style
1039
+ // if mode & 4 == 1, ChatGLM style
1031
1040
  // TODO: avoid creating a new tensor every time
1032
1041
  GGML_API struct ggml_tensor * ggml_rope(
1033
1042
  struct ggml_context * ctx,
1034
1043
  struct ggml_tensor * a,
1035
1044
  int n_past,
1036
1045
  int n_dims,
1037
- int mode);
1046
+ int mode,
1047
+ int n_ctx);
1038
1048
 
1039
1049
  // in-place, returns view(a)
1040
1050
  GGML_API struct ggml_tensor * ggml_rope_inplace(
@@ -1042,7 +1052,8 @@ extern "C" {
1042
1052
  struct ggml_tensor * a,
1043
1053
  int n_past,
1044
1054
  int n_dims,
1045
- int mode);
1055
+ int mode,
1056
+ int n_ctx);
1046
1057
 
1047
1058
  // rotary position embedding backward, i.e compute dx from dy
1048
1059
  // a - dy
@@ -1166,21 +1177,73 @@ extern "C" {
1166
1177
  int h0,
1167
1178
  int w);
1168
1179
 
1169
- // Mapping operations
1170
- typedef void (*ggml_unary_op_f32_t)(const int, float *, const float *);
1180
+ // custom operators
1181
+
1182
+ typedef void (*ggml_unary_op_f32_t) (const int, float *, const float *);
1171
1183
  typedef void (*ggml_binary_op_f32_t)(const int, float *, const float *, const float *);
1172
1184
 
1185
+ typedef void (*ggml_custom1_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *);
1186
+ typedef void (*ggml_custom2_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
1187
+ typedef void (*ggml_custom3_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
1188
+
1173
1189
  GGML_API struct ggml_tensor * ggml_map_unary_f32(
1174
1190
  struct ggml_context * ctx,
1175
1191
  struct ggml_tensor * a,
1176
1192
  ggml_unary_op_f32_t fun);
1177
1193
 
1194
+ GGML_API struct ggml_tensor * ggml_map_unary_inplace_f32(
1195
+ struct ggml_context * ctx,
1196
+ struct ggml_tensor * a,
1197
+ ggml_unary_op_f32_t fun);
1198
+
1178
1199
  GGML_API struct ggml_tensor * ggml_map_binary_f32(
1179
1200
  struct ggml_context * ctx,
1180
1201
  struct ggml_tensor * a,
1181
1202
  struct ggml_tensor * b,
1182
1203
  ggml_binary_op_f32_t fun);
1183
1204
 
1205
+ GGML_API struct ggml_tensor * ggml_map_binary_inplace_f32(
1206
+ struct ggml_context * ctx,
1207
+ struct ggml_tensor * a,
1208
+ struct ggml_tensor * b,
1209
+ ggml_binary_op_f32_t fun);
1210
+
1211
+ GGML_API struct ggml_tensor * ggml_map_custom1_f32(
1212
+ struct ggml_context * ctx,
1213
+ struct ggml_tensor * a,
1214
+ ggml_custom1_op_f32_t fun);
1215
+
1216
+ GGML_API struct ggml_tensor * ggml_map_custom1_inplace_f32(
1217
+ struct ggml_context * ctx,
1218
+ struct ggml_tensor * a,
1219
+ ggml_custom1_op_f32_t fun);
1220
+
1221
+ GGML_API struct ggml_tensor * ggml_map_custom2_f32(
1222
+ struct ggml_context * ctx,
1223
+ struct ggml_tensor * a,
1224
+ struct ggml_tensor * b,
1225
+ ggml_custom2_op_f32_t fun);
1226
+
1227
+ GGML_API struct ggml_tensor * ggml_map_custom2_inplace_f32(
1228
+ struct ggml_context * ctx,
1229
+ struct ggml_tensor * a,
1230
+ struct ggml_tensor * b,
1231
+ ggml_custom2_op_f32_t fun);
1232
+
1233
+ GGML_API struct ggml_tensor * ggml_map_custom3_f32(
1234
+ struct ggml_context * ctx,
1235
+ struct ggml_tensor * a,
1236
+ struct ggml_tensor * b,
1237
+ struct ggml_tensor * c,
1238
+ ggml_custom3_op_f32_t fun);
1239
+
1240
+ GGML_API struct ggml_tensor * ggml_map_custom3_inplace_f32(
1241
+ struct ggml_context * ctx,
1242
+ struct ggml_tensor * a,
1243
+ struct ggml_tensor * b,
1244
+ struct ggml_tensor * c,
1245
+ ggml_custom3_op_f32_t fun);
1246
+
1184
1247
  // loss function
1185
1248
 
1186
1249
  GGML_API struct ggml_tensor * ggml_cross_entropy_loss(