llama_cpp 0.9.4 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -244,11 +244,10 @@
244
244
  #define GGML_ASSERT(x) \
245
245
  do { \
246
246
  if (!(x)) { \
247
- fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", __FILE__, __LINE__, #x); \
248
- fflush(stderr); \
249
247
  fflush(stdout); \
248
+ fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", __FILE__, __LINE__, #x); \
250
249
  ggml_print_backtrace(); \
251
- exit(1); \
250
+ abort(); \
252
251
  } \
253
252
  } while (0)
254
253
 
@@ -284,6 +283,20 @@
284
283
  const type prefix##3 = (pointer)->array[3]; \
285
284
  GGML_UNUSED(prefix##3);
286
285
 
286
+ #define GGML_TENSOR_UNARY_OP_LOCALS \
287
+ GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne) \
288
+ GGML_TENSOR_LOCALS(size_t, nb0, src0, nb) \
289
+ GGML_TENSOR_LOCALS(int64_t, ne, dst, ne) \
290
+ GGML_TENSOR_LOCALS(size_t, nb, dst, nb)
291
+
292
+ #define GGML_TENSOR_BINARY_OP_LOCALS \
293
+ GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne) \
294
+ GGML_TENSOR_LOCALS(size_t, nb0, src0, nb) \
295
+ GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne) \
296
+ GGML_TENSOR_LOCALS(size_t, nb1, src1, nb) \
297
+ GGML_TENSOR_LOCALS(int64_t, ne, dst, ne) \
298
+ GGML_TENSOR_LOCALS(size_t, nb, dst, nb)
299
+
287
300
  #ifdef __cplusplus
288
301
  extern "C" {
289
302
  #endif
@@ -382,6 +395,7 @@ extern "C" {
382
395
  GGML_OP_GROUP_NORM,
383
396
 
384
397
  GGML_OP_MUL_MAT,
398
+ GGML_OP_MUL_MAT_ID,
385
399
  GGML_OP_OUT_PROD,
386
400
 
387
401
  GGML_OP_SCALE,
@@ -408,8 +422,8 @@ extern "C" {
408
422
  GGML_OP_CONV_TRANSPOSE_2D,
409
423
  GGML_OP_POOL_1D,
410
424
  GGML_OP_POOL_2D,
411
-
412
425
  GGML_OP_UPSCALE, // nearest interpolate
426
+ GGML_OP_ARGSORT,
413
427
 
414
428
  GGML_OP_FLASH_ATTN,
415
429
  GGML_OP_FLASH_FF,
@@ -449,7 +463,9 @@ extern "C" {
449
463
  GGML_UNARY_OP_GELU,
450
464
  GGML_UNARY_OP_GELU_QUICK,
451
465
  GGML_UNARY_OP_SILU,
452
- GGML_UNARY_OP_LEAKY
466
+ GGML_UNARY_OP_LEAKY,
467
+
468
+ GGML_UNARY_OP_COUNT,
453
469
  };
454
470
 
455
471
  enum ggml_object_type {
@@ -632,6 +648,9 @@ extern "C" {
632
648
  GGML_API const char * ggml_op_name (enum ggml_op op);
633
649
  GGML_API const char * ggml_op_symbol(enum ggml_op op);
634
650
 
651
+ GGML_API const char * ggml_unary_op_name(enum ggml_unary_op op);
652
+ GGML_API const char * ggml_op_desc(const struct ggml_tensor * t); // unary or op name
653
+
635
654
  GGML_API size_t ggml_element_size(const struct ggml_tensor * tensor);
636
655
 
637
656
  GGML_API bool ggml_is_quantized(enum ggml_type type);
@@ -1028,6 +1047,15 @@ extern "C" {
1028
1047
  struct ggml_tensor * a,
1029
1048
  struct ggml_tensor * b);
1030
1049
 
1050
+ // indirect matrix multiplication
1051
+ // ggml_mul_mat_id(ctx, as, ids, id, b) ~= ggml_mul_mat(as[ids[id]], b)
1052
+ GGML_API struct ggml_tensor * ggml_mul_mat_id(
1053
+ struct ggml_context * ctx,
1054
+ struct ggml_tensor * as[],
1055
+ struct ggml_tensor * ids,
1056
+ int id,
1057
+ struct ggml_tensor * b);
1058
+
1031
1059
  // A: m columns, n rows,
1032
1060
  // B: p columns, n rows,
1033
1061
  // result is m columns, p rows
@@ -1283,6 +1311,14 @@ extern "C" {
1283
1311
  struct ggml_context * ctx,
1284
1312
  struct ggml_tensor * a);
1285
1313
 
1314
+ // fused soft_max(a*scale + mask)
1315
+ // mask is optional
1316
+ GGML_API struct ggml_tensor * ggml_soft_max_ext(
1317
+ struct ggml_context * ctx,
1318
+ struct ggml_tensor * a,
1319
+ struct ggml_tensor * mask,
1320
+ float scale);
1321
+
1286
1322
  GGML_API struct ggml_tensor * ggml_soft_max_back(
1287
1323
  struct ggml_context * ctx,
1288
1324
  struct ggml_tensor * a,
@@ -1513,6 +1549,23 @@ extern "C" {
1513
1549
  struct ggml_tensor * a,
1514
1550
  int scale_factor);
1515
1551
 
1552
+ // sort rows
1553
+ enum ggml_sort_order {
1554
+ GGML_SORT_ASC,
1555
+ GGML_SORT_DESC,
1556
+ };
1557
+
1558
+ GGML_API struct ggml_tensor * ggml_argsort(
1559
+ struct ggml_context * ctx,
1560
+ struct ggml_tensor * a,
1561
+ enum ggml_sort_order order);
1562
+
1563
+ // top k elements per row
1564
+ GGML_API struct ggml_tensor * ggml_top_k(
1565
+ struct ggml_context * ctx,
1566
+ struct ggml_tensor * a,
1567
+ int k);
1568
+
1516
1569
  GGML_API struct ggml_tensor * ggml_flash_attn(
1517
1570
  struct ggml_context * ctx,
1518
1571
  struct ggml_tensor * q,
@@ -1574,7 +1627,6 @@ extern "C" {
1574
1627
  int kh);
1575
1628
 
1576
1629
  // used in sam
1577
-
1578
1630
  GGML_API struct ggml_tensor * ggml_add_rel_pos(
1579
1631
  struct ggml_context * ctx,
1580
1632
  struct ggml_tensor * a,
@@ -1749,7 +1801,7 @@ extern "C" {
1749
1801
  GGML_API struct ggml_cgraph * ggml_new_graph (struct ggml_context * ctx); // size = GGML_DEFAULT_GRAPH_SIZE, grads = false
1750
1802
  GGML_API struct ggml_cgraph * ggml_new_graph_custom (struct ggml_context * ctx, size_t size, bool grads);
1751
1803
  GGML_API struct ggml_cgraph * ggml_graph_dup (struct ggml_context * ctx, struct ggml_cgraph * cgraph);
1752
- GGML_API struct ggml_cgraph * ggml_graph_view (struct ggml_context * ctx, struct ggml_cgraph * cgraph, int i0, int i1);
1804
+ GGML_API struct ggml_cgraph ggml_graph_view (struct ggml_cgraph * cgraph, int i0, int i1);
1753
1805
  GGML_API void ggml_graph_cpy (struct ggml_cgraph * src, struct ggml_cgraph * dst);
1754
1806
  GGML_API void ggml_graph_reset (struct ggml_cgraph * cgraph); // zero grads
1755
1807
  GGML_API void ggml_graph_clear (struct ggml_cgraph * cgraph);