llama_cpp 0.9.5 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -283,6 +283,20 @@
283
283
  const type prefix##3 = (pointer)->array[3]; \
284
284
  GGML_UNUSED(prefix##3);
285
285
 
286
+ #define GGML_TENSOR_UNARY_OP_LOCALS \
287
+ GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne) \
288
+ GGML_TENSOR_LOCALS(size_t, nb0, src0, nb) \
289
+ GGML_TENSOR_LOCALS(int64_t, ne, dst, ne) \
290
+ GGML_TENSOR_LOCALS(size_t, nb, dst, nb)
291
+
292
+ #define GGML_TENSOR_BINARY_OP_LOCALS \
293
+ GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne) \
294
+ GGML_TENSOR_LOCALS(size_t, nb0, src0, nb) \
295
+ GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne) \
296
+ GGML_TENSOR_LOCALS(size_t, nb1, src1, nb) \
297
+ GGML_TENSOR_LOCALS(int64_t, ne, dst, ne) \
298
+ GGML_TENSOR_LOCALS(size_t, nb, dst, nb)
299
+
286
300
  #ifdef __cplusplus
287
301
  extern "C" {
288
302
  #endif
@@ -381,6 +395,7 @@ extern "C" {
381
395
  GGML_OP_GROUP_NORM,
382
396
 
383
397
  GGML_OP_MUL_MAT,
398
+ GGML_OP_MUL_MAT_ID,
384
399
  GGML_OP_OUT_PROD,
385
400
 
386
401
  GGML_OP_SCALE,
@@ -407,8 +422,8 @@ extern "C" {
407
422
  GGML_OP_CONV_TRANSPOSE_2D,
408
423
  GGML_OP_POOL_1D,
409
424
  GGML_OP_POOL_2D,
410
-
411
425
  GGML_OP_UPSCALE, // nearest interpolate
426
+ GGML_OP_ARGSORT,
412
427
 
413
428
  GGML_OP_FLASH_ATTN,
414
429
  GGML_OP_FLASH_FF,
@@ -448,7 +463,9 @@ extern "C" {
448
463
  GGML_UNARY_OP_GELU,
449
464
  GGML_UNARY_OP_GELU_QUICK,
450
465
  GGML_UNARY_OP_SILU,
451
- GGML_UNARY_OP_LEAKY
466
+ GGML_UNARY_OP_LEAKY,
467
+
468
+ GGML_UNARY_OP_COUNT,
452
469
  };
453
470
 
454
471
  enum ggml_object_type {
@@ -631,6 +648,9 @@ extern "C" {
631
648
  GGML_API const char * ggml_op_name (enum ggml_op op);
632
649
  GGML_API const char * ggml_op_symbol(enum ggml_op op);
633
650
 
651
+ GGML_API const char * ggml_unary_op_name(enum ggml_unary_op op);
652
+ GGML_API const char * ggml_op_desc(const struct ggml_tensor * t); // unary or op name
653
+
634
654
  GGML_API size_t ggml_element_size(const struct ggml_tensor * tensor);
635
655
 
636
656
  GGML_API bool ggml_is_quantized(enum ggml_type type);
@@ -1027,6 +1047,15 @@ extern "C" {
1027
1047
  struct ggml_tensor * a,
1028
1048
  struct ggml_tensor * b);
1029
1049
 
1050
+ // indirect matrix multiplication
1051
+ // ggml_mul_mat_id(ctx, as, ids, id, b) ~= ggml_mul_mat(as[ids[id]], b)
1052
+ GGML_API struct ggml_tensor * ggml_mul_mat_id(
1053
+ struct ggml_context * ctx,
1054
+ struct ggml_tensor * as[],
1055
+ struct ggml_tensor * ids,
1056
+ int id,
1057
+ struct ggml_tensor * b);
1058
+
1030
1059
  // A: m columns, n rows,
1031
1060
  // B: p columns, n rows,
1032
1061
  // result is m columns, p rows
@@ -1520,6 +1549,23 @@ extern "C" {
1520
1549
  struct ggml_tensor * a,
1521
1550
  int scale_factor);
1522
1551
 
1552
+ // sort rows
1553
+ enum ggml_sort_order {
1554
+ GGML_SORT_ASC,
1555
+ GGML_SORT_DESC,
1556
+ };
1557
+
1558
+ GGML_API struct ggml_tensor * ggml_argsort(
1559
+ struct ggml_context * ctx,
1560
+ struct ggml_tensor * a,
1561
+ enum ggml_sort_order order);
1562
+
1563
+ // top k elements per row
1564
+ GGML_API struct ggml_tensor * ggml_top_k(
1565
+ struct ggml_context * ctx,
1566
+ struct ggml_tensor * a,
1567
+ int k);
1568
+
1523
1569
  GGML_API struct ggml_tensor * ggml_flash_attn(
1524
1570
  struct ggml_context * ctx,
1525
1571
  struct ggml_tensor * q,
@@ -1581,7 +1627,6 @@ extern "C" {
1581
1627
  int kh);
1582
1628
 
1583
1629
  // used in sam
1584
-
1585
1630
  GGML_API struct ggml_tensor * ggml_add_rel_pos(
1586
1631
  struct ggml_context * ctx,
1587
1632
  struct ggml_tensor * a,
@@ -1756,7 +1801,7 @@ extern "C" {
1756
1801
  GGML_API struct ggml_cgraph * ggml_new_graph (struct ggml_context * ctx); // size = GGML_DEFAULT_GRAPH_SIZE, grads = false
1757
1802
  GGML_API struct ggml_cgraph * ggml_new_graph_custom (struct ggml_context * ctx, size_t size, bool grads);
1758
1803
  GGML_API struct ggml_cgraph * ggml_graph_dup (struct ggml_context * ctx, struct ggml_cgraph * cgraph);
1759
- GGML_API struct ggml_cgraph * ggml_graph_view (struct ggml_context * ctx, struct ggml_cgraph * cgraph, int i0, int i1);
1804
+ GGML_API struct ggml_cgraph ggml_graph_view (struct ggml_cgraph * cgraph, int i0, int i1);
1760
1805
  GGML_API void ggml_graph_cpy (struct ggml_cgraph * src, struct ggml_cgraph * dst);
1761
1806
  GGML_API void ggml_graph_reset (struct ggml_cgraph * cgraph); // zero grads
1762
1807
  GGML_API void ggml_graph_clear (struct ggml_cgraph * cgraph);