llama_cpp 0.3.4 → 0.3.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -199,6 +199,7 @@
199
199
  #define GGML_MAX_CONTEXTS 64
200
200
  #define GGML_MAX_SRC 6
201
201
  #define GGML_MAX_NAME 48
202
+ #define GGML_MAX_OP_PARAMS 32
202
203
  #define GGML_DEFAULT_N_THREADS 4
203
204
 
204
205
 
@@ -207,6 +208,7 @@
207
208
 
208
209
  #define GGML_UNUSED(x) (void)(x)
209
210
 
211
+ #define GGML_PAD(x, n) (((x) + (n) - 1) & ~((n) - 1))
210
212
 
211
213
  #define GGML_ASSERT(x) \
212
214
  do { \
@@ -329,16 +331,6 @@ extern "C" {
329
331
  GGML_OP_ARGMAX,
330
332
  GGML_OP_REPEAT,
331
333
  GGML_OP_REPEAT_BACK,
332
- GGML_OP_ABS,
333
- GGML_OP_SGN,
334
- GGML_OP_NEG,
335
- GGML_OP_STEP,
336
- GGML_OP_TANH,
337
- GGML_OP_ELU,
338
- GGML_OP_RELU,
339
- GGML_OP_GELU,
340
- GGML_OP_GELU_QUICK,
341
- GGML_OP_SILU,
342
334
  GGML_OP_SILU_BACK,
343
335
  GGML_OP_NORM, // normalize
344
336
  GGML_OP_RMS_NORM,
@@ -377,6 +369,8 @@ extern "C" {
377
369
  GGML_OP_WIN_PART,
378
370
  GGML_OP_WIN_UNPART,
379
371
 
372
+ GGML_OP_UNARY,
373
+
380
374
  GGML_OP_MAP_UNARY,
381
375
  GGML_OP_MAP_BINARY,
382
376
 
@@ -390,6 +384,24 @@ extern "C" {
390
384
  GGML_OP_COUNT,
391
385
  };
392
386
 
387
+ enum ggml_unary_op {
388
+ GGML_UNARY_OP_ABS,
389
+ GGML_UNARY_OP_SGN,
390
+ GGML_UNARY_OP_NEG,
391
+ GGML_UNARY_OP_STEP,
392
+ GGML_UNARY_OP_TANH,
393
+ GGML_UNARY_OP_ELU,
394
+ GGML_UNARY_OP_RELU,
395
+ GGML_UNARY_OP_GELU,
396
+ GGML_UNARY_OP_GELU_QUICK,
397
+ GGML_UNARY_OP_SILU,
398
+ };
399
+
400
+ enum ggml_object_type {
401
+ GGML_OBJECT_TENSOR,
402
+ GGML_OBJECT_GRAPH,
403
+ GGML_OBJECT_WORK_BUFFER
404
+ };
393
405
 
394
406
  // ggml object
395
407
  struct ggml_object {
@@ -398,7 +410,9 @@ extern "C" {
398
410
 
399
411
  struct ggml_object * next;
400
412
 
401
- char padding[8];
413
+ enum ggml_object_type type;
414
+
415
+ char padding[4];
402
416
  };
403
417
 
404
418
  static const size_t GGML_OBJECT_SIZE = sizeof(struct ggml_object);
@@ -418,6 +432,9 @@ extern "C" {
418
432
  // compute data
419
433
  enum ggml_op op;
420
434
 
435
+ // op params - allocated as int32_t for alignment
436
+ int32_t op_params[GGML_MAX_OP_PARAMS / sizeof(int32_t)];
437
+
421
438
  bool is_param;
422
439
 
423
440
  struct ggml_tensor * grad;
@@ -434,7 +451,7 @@ extern "C" {
434
451
 
435
452
  void * extra; // extra things e.g. for ggml-cuda.cu
436
453
 
437
- char padding[8];
454
+ char padding[4];
438
455
  };
439
456
 
440
457
  static const size_t GGML_TENSOR_SIZE = sizeof(struct ggml_tensor);
@@ -455,6 +472,11 @@ extern "C" {
455
472
  void * abort_callback_data;
456
473
  };
457
474
 
475
+ // next prime after GGML_MAX_NODES
476
+ // #define GGML_GRAPH_HASHTABLE_SIZE 4099
477
+ // next prime after GGML_MAX_NODES * 2 (nodes + leafs)
478
+ #define GGML_GRAPH_HASHTABLE_SIZE 8273
479
+
458
480
  // computation graph
459
481
  struct ggml_cgraph {
460
482
  int n_nodes;
@@ -464,12 +486,16 @@ extern "C" {
464
486
  struct ggml_tensor * grads[GGML_MAX_NODES];
465
487
  struct ggml_tensor * leafs[GGML_MAX_NODES];
466
488
 
489
+ void * visited_hash_table[GGML_GRAPH_HASHTABLE_SIZE];
490
+
467
491
  // performance
468
492
  int perf_runs;
469
493
  int64_t perf_cycles;
470
494
  int64_t perf_time_us;
471
495
  };
472
496
 
497
+ static const size_t GGML_GRAPH_SIZE = sizeof(struct ggml_cgraph);
498
+
473
499
  // scratch buffer
474
500
  struct ggml_scratch {
475
501
  size_t offs;
@@ -531,6 +557,7 @@ extern "C" {
531
557
 
532
558
  GGML_API const char * ggml_type_name(enum ggml_type type);
533
559
  GGML_API const char * ggml_op_name (enum ggml_op op);
560
+ GGML_API const char * ggml_op_symbol(enum ggml_op op);
534
561
 
535
562
  GGML_API size_t ggml_element_size(const struct ggml_tensor * tensor);
536
563
 
@@ -554,6 +581,7 @@ extern "C" {
554
581
  GGML_API size_t ggml_used_mem(const struct ggml_context * ctx);
555
582
 
556
583
  GGML_API size_t ggml_set_scratch (struct ggml_context * ctx, struct ggml_scratch scratch);
584
+ GGML_API bool ggml_get_no_alloc(struct ggml_context * ctx);
557
585
  GGML_API void ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc);
558
586
 
559
587
  GGML_API void * ggml_get_mem_buffer (const struct ggml_context * ctx);
@@ -613,9 +641,11 @@ extern "C" {
613
641
  GGML_API void * ggml_get_data (const struct ggml_tensor * tensor);
614
642
  GGML_API float * ggml_get_data_f32(const struct ggml_tensor * tensor);
615
643
 
616
- GGML_API const char * ggml_get_name(const struct ggml_tensor * tensor);
617
- GGML_API struct ggml_tensor * ggml_set_name(struct ggml_tensor * tensor, const char * name);
618
- GGML_API struct ggml_tensor * ggml_format_name(struct ggml_tensor * tensor, const char * fmt, ...);
644
+ GGML_API enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor);
645
+
646
+ GGML_API const char * ggml_get_name (const struct ggml_tensor * tensor);
647
+ GGML_API struct ggml_tensor * ggml_set_name ( struct ggml_tensor * tensor, const char * name);
648
+ GGML_API struct ggml_tensor * ggml_format_name( struct ggml_tensor * tensor, const char * fmt, ...);
619
649
 
620
650
  //
621
651
  // operations on tensors with backpropagation
@@ -625,6 +655,11 @@ extern "C" {
625
655
  struct ggml_context * ctx,
626
656
  struct ggml_tensor * a);
627
657
 
658
+ // in-place, returns view(a)
659
+ GGML_API struct ggml_tensor * ggml_dup_inplace(
660
+ struct ggml_context * ctx,
661
+ struct ggml_tensor * a);
662
+
628
663
  GGML_API struct ggml_tensor * ggml_add(
629
664
  struct ggml_context * ctx,
630
665
  struct ggml_tensor * a,
@@ -849,14 +884,17 @@ extern "C" {
849
884
 
850
885
  GGML_API struct ggml_tensor * ggml_rms_norm(
851
886
  struct ggml_context * ctx,
852
- struct ggml_tensor * a);
887
+ struct ggml_tensor * a,
888
+ float eps);
853
889
 
854
890
  GGML_API struct ggml_tensor * ggml_rms_norm_inplace(
855
891
  struct ggml_context * ctx,
856
- struct ggml_tensor * a);
892
+ struct ggml_tensor * a,
893
+ float eps);
857
894
 
858
895
  // a - x
859
896
  // b - dy
897
+ // TODO: update with configurable eps
860
898
  GGML_API struct ggml_tensor * ggml_rms_norm_back(
861
899
  struct ggml_context * ctx,
862
900
  struct ggml_tensor * a,
@@ -948,11 +986,22 @@ extern "C" {
948
986
  struct ggml_tensor * a,
949
987
  struct ggml_tensor * b);
950
988
 
989
+ // a -> b, in-place, return view(b)
990
+ GGML_API struct ggml_tensor * ggml_cpy_inplace(
991
+ struct ggml_context * ctx,
992
+ struct ggml_tensor * a,
993
+ struct ggml_tensor * b);
994
+
951
995
  // make contiguous
952
996
  GGML_API struct ggml_tensor * ggml_cont(
953
997
  struct ggml_context * ctx,
954
998
  struct ggml_tensor * a);
955
999
 
1000
+ // make contiguous, in-place
1001
+ GGML_API struct ggml_tensor * ggml_cont_inplace(
1002
+ struct ggml_context * ctx,
1003
+ struct ggml_tensor * a);
1004
+
956
1005
  // return view(a), b specifies the new shape
957
1006
  // TODO: when we start computing gradient, make a copy instead of view
958
1007
  GGML_API struct ggml_tensor * ggml_reshape(
@@ -1121,7 +1170,18 @@ extern "C" {
1121
1170
  int mode,
1122
1171
  int n_ctx);
1123
1172
 
1124
- // custom RoPE, in-place, returns view(a)
1173
+ // custom RoPE
1174
+ GGML_API struct ggml_tensor * ggml_rope_custom(
1175
+ struct ggml_context * ctx,
1176
+ struct ggml_tensor * a,
1177
+ int n_past,
1178
+ int n_dims,
1179
+ int mode,
1180
+ int n_ctx,
1181
+ float freq_base,
1182
+ float freq_scale);
1183
+
1184
+ // in-place, returns view(a)
1125
1185
  GGML_API struct ggml_tensor * ggml_rope_custom_inplace(
1126
1186
  struct ggml_context * ctx,
1127
1187
  struct ggml_tensor * a,
@@ -1264,6 +1324,16 @@ extern "C" {
1264
1324
  typedef void (*ggml_custom2_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
1265
1325
  typedef void (*ggml_custom3_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
1266
1326
 
1327
+ GGML_API struct ggml_tensor * ggml_unary(
1328
+ struct ggml_context * ctx,
1329
+ struct ggml_tensor * a,
1330
+ enum ggml_unary_op op);
1331
+
1332
+ GGML_API struct ggml_tensor * ggml_unary_inplace(
1333
+ struct ggml_context * ctx,
1334
+ struct ggml_tensor * a,
1335
+ enum ggml_unary_op op);
1336
+
1267
1337
  GGML_API struct ggml_tensor * ggml_map_unary_f32(
1268
1338
  struct ggml_context * ctx,
1269
1339
  struct ggml_tensor * a,
@@ -1343,11 +1413,17 @@ extern "C" {
1343
1413
  struct ggml_context * ctx,
1344
1414
  struct ggml_tensor * tensor);
1345
1415
 
1416
+
1346
1417
  GGML_API void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
1347
1418
 
1348
1419
  GGML_API struct ggml_cgraph ggml_build_forward (struct ggml_tensor * tensor);
1349
1420
  GGML_API struct ggml_cgraph ggml_build_backward(struct ggml_context * ctx, struct ggml_cgraph * gf, bool keep);
1350
1421
 
1422
+ // graph allocation in a context
1423
+ GGML_API struct ggml_cgraph * ggml_new_graph (struct ggml_context * ctx);
1424
+ GGML_API struct ggml_cgraph * ggml_build_forward_ctx(struct ggml_context * ctx, struct ggml_tensor * tensor);
1425
+ GGML_API size_t ggml_graph_overhead(void);
1426
+
1351
1427
  // ggml_graph_plan() has to be called before ggml_graph_compute()
1352
1428
  // when plan.work_size > 0, caller must allocate memory for plan.work_data
1353
1429
  GGML_API struct ggml_cplan ggml_graph_plan (struct ggml_cgraph * cgraph, int n_threads /*= GGML_DEFAULT_N_THREADS*/);