llama_cpp 0.9.2 → 0.9.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -58,7 +58,8 @@
58
58
  // {
59
59
  // ...
60
60
  //
61
- // struct ggml_cgraph gf = ggml_build_forward(f);
61
+ // struct ggml_cgraph * gf = ggml_new_graph(ctx);
62
+ // ggml_build_forward_expand(gf, f);
62
63
  //
63
64
  // // set the input variable and parameter values
64
65
  // ggml_set_f32(x, 2.0f);
@@ -213,15 +214,14 @@
213
214
  #define GGML_QNT_VERSION 2 // bump this on quantization format changes
214
215
  #define GGML_QNT_VERSION_FACTOR 1000 // do not change this
215
216
 
216
- #define GGML_MAX_DIMS 4
217
- #define GGML_MAX_NODES 16384
218
- #define GGML_MAX_PARAMS 1024
219
- #define GGML_MAX_CONTEXTS 64
220
- #define GGML_MAX_SRC 6
221
- #define GGML_MAX_NAME 64
222
- #define GGML_MAX_OP_PARAMS 64
223
- #define GGML_DEFAULT_N_THREADS 4
224
-
217
+ #define GGML_MAX_DIMS 4
218
+ #define GGML_MAX_PARAMS 1024
219
+ #define GGML_MAX_CONTEXTS 64
220
+ #define GGML_MAX_SRC 6
221
+ #define GGML_MAX_NAME 64
222
+ #define GGML_MAX_OP_PARAMS 64
223
+ #define GGML_DEFAULT_N_THREADS 4
224
+ #define GGML_DEFAULT_GRAPH_SIZE 2048
225
225
  #if UINTPTR_MAX == 0xFFFFFFFF
226
226
  #define GGML_MEM_ALIGN 4
227
227
  #else
@@ -245,7 +245,10 @@
245
245
  do { \
246
246
  if (!(x)) { \
247
247
  fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", __FILE__, __LINE__, #x); \
248
- abort(); \
248
+ fflush(stderr); \
249
+ fflush(stdout); \
250
+ ggml_print_backtrace(); \
251
+ exit(1); \
249
252
  } \
250
253
  } while (0)
251
254
 
@@ -400,13 +403,8 @@ extern "C" {
400
403
  GGML_OP_ROPE_BACK,
401
404
  GGML_OP_ALIBI,
402
405
  GGML_OP_CLAMP,
403
- GGML_OP_CONV_1D,
404
- GGML_OP_CONV_1D_STAGE_0, // internal
405
- GGML_OP_CONV_1D_STAGE_1, // internal
406
406
  GGML_OP_CONV_TRANSPOSE_1D,
407
- GGML_OP_CONV_2D,
408
- GGML_OP_CONV_2D_STAGE_0, // internal
409
- GGML_OP_CONV_2D_STAGE_1, // internal
407
+ GGML_OP_IM2COL,
410
408
  GGML_OP_CONV_TRANSPOSE_2D,
411
409
  GGML_OP_POOL_1D,
412
410
  GGML_OP_POOL_2D,
@@ -451,6 +449,7 @@ extern "C" {
451
449
  GGML_UNARY_OP_GELU,
452
450
  GGML_UNARY_OP_GELU_QUICK,
453
451
  GGML_UNARY_OP_SILU,
452
+ GGML_UNARY_OP_LEAKY
454
453
  };
455
454
 
456
455
  enum ggml_object_type {
@@ -531,37 +530,33 @@ extern "C" {
531
530
 
532
531
  int n_threads;
533
532
 
534
- // the `n_tasks` of nodes, 1:1 mapping to cgraph nodes
535
- int n_tasks[GGML_MAX_NODES];
536
-
537
533
  // abort ggml_graph_compute when true
538
534
  bool (*abort_callback)(void * data);
539
535
  void * abort_callback_data;
540
536
  };
541
537
 
542
- // next prime after GGML_MAX_NODES
543
- // #define GGML_GRAPH_HASHTABLE_SIZE 4099
544
- // next prime after GGML_MAX_NODES * 2 (nodes + leafs)
545
- // #define GGML_GRAPH_HASHTABLE_SIZE 8273
546
- // #define GGML_GRAPH_HASHTABLE_SIZE 16411
547
- #define GGML_GRAPH_HASHTABLE_SIZE 32771
548
-
549
538
  enum ggml_cgraph_eval_order {
550
539
  GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT = 0,
551
540
  GGML_CGRAPH_EVAL_ORDER_RIGHT_TO_LEFT,
552
541
  GGML_CGRAPH_EVAL_ORDER_COUNT
553
542
  };
554
543
 
544
+ struct ggml_hash_set {
545
+ size_t size;
546
+ struct ggml_tensor ** keys;
547
+ };
548
+
555
549
  // computation graph
556
550
  struct ggml_cgraph {
551
+ int size;
557
552
  int n_nodes;
558
553
  int n_leafs;
559
554
 
560
- struct ggml_tensor * nodes[GGML_MAX_NODES];
561
- struct ggml_tensor * grads[GGML_MAX_NODES];
562
- struct ggml_tensor * leafs[GGML_MAX_NODES];
555
+ struct ggml_tensor ** nodes;
556
+ struct ggml_tensor ** grads;
557
+ struct ggml_tensor ** leafs;
563
558
 
564
- void * visited_hash_table[GGML_GRAPH_HASHTABLE_SIZE];
559
+ struct ggml_hash_set visited_hash_table;
565
560
 
566
561
  enum ggml_cgraph_eval_order order;
567
562
 
@@ -571,8 +566,6 @@ extern "C" {
571
566
  int64_t perf_time_us;
572
567
  };
573
568
 
574
- static const size_t GGML_GRAPH_SIZE = sizeof(struct ggml_cgraph);
575
-
576
569
  // scratch buffer
577
570
  struct ggml_scratch {
578
571
  size_t offs;
@@ -617,6 +610,8 @@ extern "C" {
617
610
  GGML_API int64_t ggml_cycles(void);
618
611
  GGML_API int64_t ggml_cycles_per_ms(void);
619
612
 
613
+ GGML_API void ggml_print_backtrace(void);
614
+
620
615
  GGML_API void ggml_numa_init(void); // call once for better performance on NUMA systems
621
616
  GGML_API bool ggml_is_numa(void); // true if init detected that system has >1 NUMA node
622
617
 
@@ -709,7 +704,7 @@ extern "C" {
709
704
  // Context tensor enumeration and lookup
710
705
  GGML_API struct ggml_tensor * ggml_get_first_tensor(struct ggml_context * ctx);
711
706
  GGML_API struct ggml_tensor * ggml_get_next_tensor (struct ggml_context * ctx, struct ggml_tensor * tensor);
712
- GGML_API struct ggml_tensor * ggml_get_tensor (struct ggml_context * ctx, const char * name);
707
+ GGML_API struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name);
713
708
 
714
709
  GGML_API struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor);
715
710
  GGML_API struct ggml_tensor * ggml_set_i32 (struct ggml_tensor * tensor, int32_t value);
@@ -943,6 +938,10 @@ extern "C" {
943
938
  struct ggml_context * ctx,
944
939
  struct ggml_tensor * a);
945
940
 
941
+ GGML_API struct ggml_tensor * ggml_leaky(
942
+ struct ggml_context * ctx,
943
+ struct ggml_tensor * a);
944
+
946
945
  GGML_API struct ggml_tensor * ggml_relu_inplace(
947
946
  struct ggml_context * ctx,
948
947
  struct ggml_tensor * a);
@@ -1399,6 +1398,18 @@ extern "C" {
1399
1398
  float min,
1400
1399
  float max);
1401
1400
 
1401
+ GGML_API struct ggml_tensor * ggml_im2col(
1402
+ struct ggml_context * ctx,
1403
+ struct ggml_tensor * a,
1404
+ struct ggml_tensor * b,
1405
+ int s0,
1406
+ int s1,
1407
+ int p0,
1408
+ int p1,
1409
+ int d0,
1410
+ int d1,
1411
+ bool is_2D);
1412
+
1402
1413
  GGML_API struct ggml_tensor * ggml_conv_1d(
1403
1414
  struct ggml_context * ctx,
1404
1415
  struct ggml_tensor * a,
@@ -1482,6 +1493,8 @@ extern "C" {
1482
1493
  int s0, // stride
1483
1494
  int p0); // padding
1484
1495
 
1496
+ // the result will have 2*p0 padding for the first dimension
1497
+ // and 2*p1 padding for the second dimension
1485
1498
  GGML_API struct ggml_tensor * ggml_pool_2d(
1486
1499
  struct ggml_context * ctx,
1487
1500
  struct ggml_tensor * a,
@@ -1490,8 +1503,8 @@ extern "C" {
1490
1503
  int k1,
1491
1504
  int s0,
1492
1505
  int s1,
1493
- int p0,
1494
- int p1);
1506
+ float p0,
1507
+ float p1);
1495
1508
 
1496
1509
  // nearest interpolate
1497
1510
  // used in stable-diffusion
@@ -1732,19 +1745,22 @@ extern "C" {
1732
1745
  GGML_API void ggml_build_forward_expand (struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
1733
1746
  GGML_API void ggml_build_backward_expand(struct ggml_context * ctx, struct ggml_cgraph * gf, struct ggml_cgraph * gb, bool keep);
1734
1747
 
1735
- GGML_API struct ggml_cgraph ggml_build_forward (struct ggml_tensor * tensor);
1736
- GGML_API struct ggml_cgraph ggml_build_backward(struct ggml_context * ctx, struct ggml_cgraph * gf, bool keep);
1737
-
1738
1748
  // graph allocation in a context
1739
- GGML_API struct ggml_cgraph * ggml_new_graph (struct ggml_context * ctx);
1740
- GGML_API struct ggml_cgraph * ggml_build_forward_ctx(struct ggml_context * ctx, struct ggml_tensor * tensor);
1749
+ GGML_API struct ggml_cgraph * ggml_new_graph (struct ggml_context * ctx); // size = GGML_DEFAULT_GRAPH_SIZE, grads = false
1750
+ GGML_API struct ggml_cgraph * ggml_new_graph_custom (struct ggml_context * ctx, size_t size, bool grads);
1751
+ GGML_API struct ggml_cgraph * ggml_graph_dup (struct ggml_context * ctx, struct ggml_cgraph * cgraph);
1752
+ GGML_API struct ggml_cgraph * ggml_graph_view (struct ggml_context * ctx, struct ggml_cgraph * cgraph, int i0, int i1);
1753
+ GGML_API void ggml_graph_cpy (struct ggml_cgraph * src, struct ggml_cgraph * dst);
1754
+ GGML_API void ggml_graph_reset (struct ggml_cgraph * cgraph); // zero grads
1755
+ GGML_API void ggml_graph_clear (struct ggml_cgraph * cgraph);
1756
+
1741
1757
  GGML_API size_t ggml_graph_overhead(void);
1758
+ GGML_API size_t ggml_graph_overhead_custom(size_t size, bool grads);
1742
1759
 
1743
1760
  // ggml_graph_plan() has to be called before ggml_graph_compute()
1744
1761
  // when plan.work_size > 0, caller must allocate memory for plan.work_data
1745
1762
  GGML_API struct ggml_cplan ggml_graph_plan (struct ggml_cgraph * cgraph, int n_threads /*= GGML_DEFAULT_N_THREADS*/);
1746
- GGML_API int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan);
1747
- GGML_API void ggml_graph_reset (struct ggml_cgraph * cgraph);
1763
+ GGML_API int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan);
1748
1764
 
1749
1765
  // same as ggml_graph_compute() but the work data is allocated as a part of the context
1750
1766
  // note: the drawback of this API is that you must have ensured that the context has enough memory for the work data
@@ -1752,8 +1768,8 @@ extern "C" {
1752
1768
 
1753
1769
  GGML_API struct ggml_tensor * ggml_graph_get_tensor(struct ggml_cgraph * cgraph, const char * name);
1754
1770
 
1755
- GGML_API void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname);
1756
- GGML_API struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context ** ctx_data, struct ggml_context ** ctx_eval);
1771
+ GGML_API void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname);
1772
+ GGML_API struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context ** ctx_data, struct ggml_context ** ctx_eval);
1757
1773
 
1758
1774
  // print info and performance information for the graph
1759
1775
  GGML_API void ggml_graph_print(const struct ggml_cgraph * cgraph);
@@ -1816,6 +1832,8 @@ extern "C" {
1816
1832
  struct ggml_opt_params {
1817
1833
  enum ggml_opt_type type;
1818
1834
 
1835
+ size_t graph_size;
1836
+
1819
1837
  int n_threads;
1820
1838
 
1821
1839
  // delta-based convergence test