llama_cpp 0.9.5 → 0.10.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -283,6 +283,20 @@
283
283
  const type prefix##3 = (pointer)->array[3]; \
284
284
  GGML_UNUSED(prefix##3);
285
285
 
286
+ #define GGML_TENSOR_UNARY_OP_LOCALS \
287
+ GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne) \
288
+ GGML_TENSOR_LOCALS(size_t, nb0, src0, nb) \
289
+ GGML_TENSOR_LOCALS(int64_t, ne, dst, ne) \
290
+ GGML_TENSOR_LOCALS(size_t, nb, dst, nb)
291
+
292
+ #define GGML_TENSOR_BINARY_OP_LOCALS \
293
+ GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne) \
294
+ GGML_TENSOR_LOCALS(size_t, nb0, src0, nb) \
295
+ GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne) \
296
+ GGML_TENSOR_LOCALS(size_t, nb1, src1, nb) \
297
+ GGML_TENSOR_LOCALS(int64_t, ne, dst, ne) \
298
+ GGML_TENSOR_LOCALS(size_t, nb, dst, nb)
299
+
286
300
  #ifdef __cplusplus
287
301
  extern "C" {
288
302
  #endif
@@ -381,6 +395,7 @@ extern "C" {
381
395
  GGML_OP_GROUP_NORM,
382
396
 
383
397
  GGML_OP_MUL_MAT,
398
+ GGML_OP_MUL_MAT_ID,
384
399
  GGML_OP_OUT_PROD,
385
400
 
386
401
  GGML_OP_SCALE,
@@ -407,8 +422,8 @@ extern "C" {
407
422
  GGML_OP_CONV_TRANSPOSE_2D,
408
423
  GGML_OP_POOL_1D,
409
424
  GGML_OP_POOL_2D,
410
-
411
425
  GGML_OP_UPSCALE, // nearest interpolate
426
+ GGML_OP_ARGSORT,
412
427
 
413
428
  GGML_OP_FLASH_ATTN,
414
429
  GGML_OP_FLASH_FF,
@@ -448,7 +463,9 @@ extern "C" {
448
463
  GGML_UNARY_OP_GELU,
449
464
  GGML_UNARY_OP_GELU_QUICK,
450
465
  GGML_UNARY_OP_SILU,
451
- GGML_UNARY_OP_LEAKY
466
+ GGML_UNARY_OP_LEAKY,
467
+
468
+ GGML_UNARY_OP_COUNT,
452
469
  };
453
470
 
454
471
  enum ggml_object_type {
@@ -631,6 +648,9 @@ extern "C" {
631
648
  GGML_API const char * ggml_op_name (enum ggml_op op);
632
649
  GGML_API const char * ggml_op_symbol(enum ggml_op op);
633
650
 
651
+ GGML_API const char * ggml_unary_op_name(enum ggml_unary_op op);
652
+ GGML_API const char * ggml_op_desc(const struct ggml_tensor * t); // unary or op name
653
+
634
654
  GGML_API size_t ggml_element_size(const struct ggml_tensor * tensor);
635
655
 
636
656
  GGML_API bool ggml_is_quantized(enum ggml_type type);
@@ -1027,6 +1047,15 @@ extern "C" {
1027
1047
  struct ggml_tensor * a,
1028
1048
  struct ggml_tensor * b);
1029
1049
 
1050
+ // indirect matrix multiplication
1051
+ // ggml_mul_mat_id(ctx, as, ids, id, b) ~= ggml_mul_mat(as[ids[id]], b)
1052
+ GGML_API struct ggml_tensor * ggml_mul_mat_id(
1053
+ struct ggml_context * ctx,
1054
+ struct ggml_tensor * as[],
1055
+ struct ggml_tensor * ids,
1056
+ int id,
1057
+ struct ggml_tensor * b);
1058
+
1030
1059
  // A: m columns, n rows,
1031
1060
  // B: p columns, n rows,
1032
1061
  // result is m columns, p rows
@@ -1520,6 +1549,23 @@ extern "C" {
1520
1549
  struct ggml_tensor * a,
1521
1550
  int scale_factor);
1522
1551
 
1552
+ // sort rows
1553
+ enum ggml_sort_order {
1554
+ GGML_SORT_ASC,
1555
+ GGML_SORT_DESC,
1556
+ };
1557
+
1558
+ GGML_API struct ggml_tensor * ggml_argsort(
1559
+ struct ggml_context * ctx,
1560
+ struct ggml_tensor * a,
1561
+ enum ggml_sort_order order);
1562
+
1563
+ // top k elements per row
1564
+ GGML_API struct ggml_tensor * ggml_top_k(
1565
+ struct ggml_context * ctx,
1566
+ struct ggml_tensor * a,
1567
+ int k);
1568
+
1523
1569
  GGML_API struct ggml_tensor * ggml_flash_attn(
1524
1570
  struct ggml_context * ctx,
1525
1571
  struct ggml_tensor * q,
@@ -1581,7 +1627,6 @@ extern "C" {
1581
1627
  int kh);
1582
1628
 
1583
1629
  // used in sam
1584
-
1585
1630
  GGML_API struct ggml_tensor * ggml_add_rel_pos(
1586
1631
  struct ggml_context * ctx,
1587
1632
  struct ggml_tensor * a,
@@ -1756,7 +1801,7 @@ extern "C" {
1756
1801
  GGML_API struct ggml_cgraph * ggml_new_graph (struct ggml_context * ctx); // size = GGML_DEFAULT_GRAPH_SIZE, grads = false
1757
1802
  GGML_API struct ggml_cgraph * ggml_new_graph_custom (struct ggml_context * ctx, size_t size, bool grads);
1758
1803
  GGML_API struct ggml_cgraph * ggml_graph_dup (struct ggml_context * ctx, struct ggml_cgraph * cgraph);
1759
- GGML_API struct ggml_cgraph * ggml_graph_view (struct ggml_context * ctx, struct ggml_cgraph * cgraph, int i0, int i1);
1804
+ GGML_API struct ggml_cgraph ggml_graph_view (struct ggml_cgraph * cgraph, int i0, int i1);
1760
1805
  GGML_API void ggml_graph_cpy (struct ggml_cgraph * src, struct ggml_cgraph * dst);
1761
1806
  GGML_API void ggml_graph_reset (struct ggml_cgraph * cgraph); // zero grads
1762
1807
  GGML_API void ggml_graph_clear (struct ggml_cgraph * cgraph);