llama_cpp 0.9.4 → 0.10.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -244,11 +244,10 @@
244
244
  #define GGML_ASSERT(x) \
245
245
  do { \
246
246
  if (!(x)) { \
247
- fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", __FILE__, __LINE__, #x); \
248
- fflush(stderr); \
249
247
  fflush(stdout); \
248
+ fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", __FILE__, __LINE__, #x); \
250
249
  ggml_print_backtrace(); \
251
- exit(1); \
250
+ abort(); \
252
251
  } \
253
252
  } while (0)
254
253
 
@@ -284,6 +283,20 @@
284
283
  const type prefix##3 = (pointer)->array[3]; \
285
284
  GGML_UNUSED(prefix##3);
286
285
 
286
+ #define GGML_TENSOR_UNARY_OP_LOCALS \
287
+ GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne) \
288
+ GGML_TENSOR_LOCALS(size_t, nb0, src0, nb) \
289
+ GGML_TENSOR_LOCALS(int64_t, ne, dst, ne) \
290
+ GGML_TENSOR_LOCALS(size_t, nb, dst, nb)
291
+
292
+ #define GGML_TENSOR_BINARY_OP_LOCALS \
293
+ GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne) \
294
+ GGML_TENSOR_LOCALS(size_t, nb0, src0, nb) \
295
+ GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne) \
296
+ GGML_TENSOR_LOCALS(size_t, nb1, src1, nb) \
297
+ GGML_TENSOR_LOCALS(int64_t, ne, dst, ne) \
298
+ GGML_TENSOR_LOCALS(size_t, nb, dst, nb)
299
+
287
300
  #ifdef __cplusplus
288
301
  extern "C" {
289
302
  #endif
@@ -382,6 +395,7 @@ extern "C" {
382
395
  GGML_OP_GROUP_NORM,
383
396
 
384
397
  GGML_OP_MUL_MAT,
398
+ GGML_OP_MUL_MAT_ID,
385
399
  GGML_OP_OUT_PROD,
386
400
 
387
401
  GGML_OP_SCALE,
@@ -408,8 +422,8 @@ extern "C" {
408
422
  GGML_OP_CONV_TRANSPOSE_2D,
409
423
  GGML_OP_POOL_1D,
410
424
  GGML_OP_POOL_2D,
411
-
412
425
  GGML_OP_UPSCALE, // nearest interpolate
426
+ GGML_OP_ARGSORT,
413
427
 
414
428
  GGML_OP_FLASH_ATTN,
415
429
  GGML_OP_FLASH_FF,
@@ -449,7 +463,9 @@ extern "C" {
449
463
  GGML_UNARY_OP_GELU,
450
464
  GGML_UNARY_OP_GELU_QUICK,
451
465
  GGML_UNARY_OP_SILU,
452
- GGML_UNARY_OP_LEAKY
466
+ GGML_UNARY_OP_LEAKY,
467
+
468
+ GGML_UNARY_OP_COUNT,
453
469
  };
454
470
 
455
471
  enum ggml_object_type {
@@ -632,6 +648,9 @@ extern "C" {
632
648
  GGML_API const char * ggml_op_name (enum ggml_op op);
633
649
  GGML_API const char * ggml_op_symbol(enum ggml_op op);
634
650
 
651
+ GGML_API const char * ggml_unary_op_name(enum ggml_unary_op op);
652
+ GGML_API const char * ggml_op_desc(const struct ggml_tensor * t); // unary or op name
653
+
635
654
  GGML_API size_t ggml_element_size(const struct ggml_tensor * tensor);
636
655
 
637
656
  GGML_API bool ggml_is_quantized(enum ggml_type type);
@@ -1028,6 +1047,15 @@ extern "C" {
1028
1047
  struct ggml_tensor * a,
1029
1048
  struct ggml_tensor * b);
1030
1049
 
1050
+ // indirect matrix multiplication
1051
+ // ggml_mul_mat_id(ctx, as, ids, id, b) ~= ggml_mul_mat(as[ids[id]], b)
1052
+ GGML_API struct ggml_tensor * ggml_mul_mat_id(
1053
+ struct ggml_context * ctx,
1054
+ struct ggml_tensor * as[],
1055
+ struct ggml_tensor * ids,
1056
+ int id,
1057
+ struct ggml_tensor * b);
1058
+
1031
1059
  // A: m columns, n rows,
1032
1060
  // B: p columns, n rows,
1033
1061
  // result is m columns, p rows
@@ -1283,6 +1311,14 @@ extern "C" {
1283
1311
  struct ggml_context * ctx,
1284
1312
  struct ggml_tensor * a);
1285
1313
 
1314
+ // fused soft_max(a*scale + mask)
1315
+ // mask is optional
1316
+ GGML_API struct ggml_tensor * ggml_soft_max_ext(
1317
+ struct ggml_context * ctx,
1318
+ struct ggml_tensor * a,
1319
+ struct ggml_tensor * mask,
1320
+ float scale);
1321
+
1286
1322
  GGML_API struct ggml_tensor * ggml_soft_max_back(
1287
1323
  struct ggml_context * ctx,
1288
1324
  struct ggml_tensor * a,
@@ -1513,6 +1549,23 @@ extern "C" {
1513
1549
  struct ggml_tensor * a,
1514
1550
  int scale_factor);
1515
1551
 
1552
+ // sort rows
1553
+ enum ggml_sort_order {
1554
+ GGML_SORT_ASC,
1555
+ GGML_SORT_DESC,
1556
+ };
1557
+
1558
+ GGML_API struct ggml_tensor * ggml_argsort(
1559
+ struct ggml_context * ctx,
1560
+ struct ggml_tensor * a,
1561
+ enum ggml_sort_order order);
1562
+
1563
+ // top k elements per row
1564
+ GGML_API struct ggml_tensor * ggml_top_k(
1565
+ struct ggml_context * ctx,
1566
+ struct ggml_tensor * a,
1567
+ int k);
1568
+
1516
1569
  GGML_API struct ggml_tensor * ggml_flash_attn(
1517
1570
  struct ggml_context * ctx,
1518
1571
  struct ggml_tensor * q,
@@ -1574,7 +1627,6 @@ extern "C" {
1574
1627
  int kh);
1575
1628
 
1576
1629
  // used in sam
1577
-
1578
1630
  GGML_API struct ggml_tensor * ggml_add_rel_pos(
1579
1631
  struct ggml_context * ctx,
1580
1632
  struct ggml_tensor * a,
@@ -1749,7 +1801,7 @@ extern "C" {
1749
1801
  GGML_API struct ggml_cgraph * ggml_new_graph (struct ggml_context * ctx); // size = GGML_DEFAULT_GRAPH_SIZE, grads = false
1750
1802
  GGML_API struct ggml_cgraph * ggml_new_graph_custom (struct ggml_context * ctx, size_t size, bool grads);
1751
1803
  GGML_API struct ggml_cgraph * ggml_graph_dup (struct ggml_context * ctx, struct ggml_cgraph * cgraph);
1752
- GGML_API struct ggml_cgraph * ggml_graph_view (struct ggml_context * ctx, struct ggml_cgraph * cgraph, int i0, int i1);
1804
+ GGML_API struct ggml_cgraph ggml_graph_view (struct ggml_cgraph * cgraph, int i0, int i1);
1753
1805
  GGML_API void ggml_graph_cpy (struct ggml_cgraph * src, struct ggml_cgraph * dst);
1754
1806
  GGML_API void ggml_graph_reset (struct ggml_cgraph * cgraph); // zero grads
1755
1807
  GGML_API void ggml_graph_clear (struct ggml_cgraph * cgraph);