llama_cpp 0.9.5 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/ext/llama_cpp/llama_cpp.cpp +121 -15
- data/ext/llama_cpp/src/ggml-alloc.c +42 -7
- data/ext/llama_cpp/src/ggml-alloc.h +7 -0
- data/ext/llama_cpp/src/ggml-backend-impl.h +46 -21
- data/ext/llama_cpp/src/ggml-backend.c +563 -156
- data/ext/llama_cpp/src/ggml-backend.h +62 -17
- data/ext/llama_cpp/src/ggml-cuda.cu +1140 -355
- data/ext/llama_cpp/src/ggml-cuda.h +9 -1
- data/ext/llama_cpp/src/ggml-impl.h +1 -1
- data/ext/llama_cpp/src/ggml-metal.h +6 -0
- data/ext/llama_cpp/src/ggml-metal.m +506 -158
- data/ext/llama_cpp/src/ggml-metal.metal +795 -144
- data/ext/llama_cpp/src/ggml.c +331 -111
- data/ext/llama_cpp/src/ggml.h +49 -4
- data/ext/llama_cpp/src/llama.cpp +749 -329
- data/ext/llama_cpp/src/llama.h +28 -5
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +20 -2
- metadata +2 -2
data/ext/llama_cpp/src/ggml.h
CHANGED
@@ -283,6 +283,20 @@
|
|
283
283
|
const type prefix##3 = (pointer)->array[3]; \
|
284
284
|
GGML_UNUSED(prefix##3);
|
285
285
|
|
286
|
+
#define GGML_TENSOR_UNARY_OP_LOCALS \
|
287
|
+
GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne) \
|
288
|
+
GGML_TENSOR_LOCALS(size_t, nb0, src0, nb) \
|
289
|
+
GGML_TENSOR_LOCALS(int64_t, ne, dst, ne) \
|
290
|
+
GGML_TENSOR_LOCALS(size_t, nb, dst, nb)
|
291
|
+
|
292
|
+
#define GGML_TENSOR_BINARY_OP_LOCALS \
|
293
|
+
GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne) \
|
294
|
+
GGML_TENSOR_LOCALS(size_t, nb0, src0, nb) \
|
295
|
+
GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne) \
|
296
|
+
GGML_TENSOR_LOCALS(size_t, nb1, src1, nb) \
|
297
|
+
GGML_TENSOR_LOCALS(int64_t, ne, dst, ne) \
|
298
|
+
GGML_TENSOR_LOCALS(size_t, nb, dst, nb)
|
299
|
+
|
286
300
|
#ifdef __cplusplus
|
287
301
|
extern "C" {
|
288
302
|
#endif
|
@@ -381,6 +395,7 @@ extern "C" {
|
|
381
395
|
GGML_OP_GROUP_NORM,
|
382
396
|
|
383
397
|
GGML_OP_MUL_MAT,
|
398
|
+
GGML_OP_MUL_MAT_ID,
|
384
399
|
GGML_OP_OUT_PROD,
|
385
400
|
|
386
401
|
GGML_OP_SCALE,
|
@@ -407,8 +422,8 @@ extern "C" {
|
|
407
422
|
GGML_OP_CONV_TRANSPOSE_2D,
|
408
423
|
GGML_OP_POOL_1D,
|
409
424
|
GGML_OP_POOL_2D,
|
410
|
-
|
411
425
|
GGML_OP_UPSCALE, // nearest interpolate
|
426
|
+
GGML_OP_ARGSORT,
|
412
427
|
|
413
428
|
GGML_OP_FLASH_ATTN,
|
414
429
|
GGML_OP_FLASH_FF,
|
@@ -448,7 +463,9 @@ extern "C" {
|
|
448
463
|
GGML_UNARY_OP_GELU,
|
449
464
|
GGML_UNARY_OP_GELU_QUICK,
|
450
465
|
GGML_UNARY_OP_SILU,
|
451
|
-
GGML_UNARY_OP_LEAKY
|
466
|
+
GGML_UNARY_OP_LEAKY,
|
467
|
+
|
468
|
+
GGML_UNARY_OP_COUNT,
|
452
469
|
};
|
453
470
|
|
454
471
|
enum ggml_object_type {
|
@@ -631,6 +648,9 @@ extern "C" {
|
|
631
648
|
GGML_API const char * ggml_op_name (enum ggml_op op);
|
632
649
|
GGML_API const char * ggml_op_symbol(enum ggml_op op);
|
633
650
|
|
651
|
+
GGML_API const char * ggml_unary_op_name(enum ggml_unary_op op);
|
652
|
+
GGML_API const char * ggml_op_desc(const struct ggml_tensor * t); // unary or op name
|
653
|
+
|
634
654
|
GGML_API size_t ggml_element_size(const struct ggml_tensor * tensor);
|
635
655
|
|
636
656
|
GGML_API bool ggml_is_quantized(enum ggml_type type);
|
@@ -1027,6 +1047,15 @@ extern "C" {
|
|
1027
1047
|
struct ggml_tensor * a,
|
1028
1048
|
struct ggml_tensor * b);
|
1029
1049
|
|
1050
|
+
// indirect matrix multiplication
|
1051
|
+
// ggml_mul_mat_id(ctx, as, ids, id, b) ~= ggml_mul_mat(as[ids[id]], b)
|
1052
|
+
GGML_API struct ggml_tensor * ggml_mul_mat_id(
|
1053
|
+
struct ggml_context * ctx,
|
1054
|
+
struct ggml_tensor * as[],
|
1055
|
+
struct ggml_tensor * ids,
|
1056
|
+
int id,
|
1057
|
+
struct ggml_tensor * b);
|
1058
|
+
|
1030
1059
|
// A: m columns, n rows,
|
1031
1060
|
// B: p columns, n rows,
|
1032
1061
|
// result is m columns, p rows
|
@@ -1520,6 +1549,23 @@ extern "C" {
|
|
1520
1549
|
struct ggml_tensor * a,
|
1521
1550
|
int scale_factor);
|
1522
1551
|
|
1552
|
+
// sort rows
|
1553
|
+
enum ggml_sort_order {
|
1554
|
+
GGML_SORT_ASC,
|
1555
|
+
GGML_SORT_DESC,
|
1556
|
+
};
|
1557
|
+
|
1558
|
+
GGML_API struct ggml_tensor * ggml_argsort(
|
1559
|
+
struct ggml_context * ctx,
|
1560
|
+
struct ggml_tensor * a,
|
1561
|
+
enum ggml_sort_order order);
|
1562
|
+
|
1563
|
+
// top k elements per row
|
1564
|
+
GGML_API struct ggml_tensor * ggml_top_k(
|
1565
|
+
struct ggml_context * ctx,
|
1566
|
+
struct ggml_tensor * a,
|
1567
|
+
int k);
|
1568
|
+
|
1523
1569
|
GGML_API struct ggml_tensor * ggml_flash_attn(
|
1524
1570
|
struct ggml_context * ctx,
|
1525
1571
|
struct ggml_tensor * q,
|
@@ -1581,7 +1627,6 @@ extern "C" {
|
|
1581
1627
|
int kh);
|
1582
1628
|
|
1583
1629
|
// used in sam
|
1584
|
-
|
1585
1630
|
GGML_API struct ggml_tensor * ggml_add_rel_pos(
|
1586
1631
|
struct ggml_context * ctx,
|
1587
1632
|
struct ggml_tensor * a,
|
@@ -1756,7 +1801,7 @@ extern "C" {
|
|
1756
1801
|
GGML_API struct ggml_cgraph * ggml_new_graph (struct ggml_context * ctx); // size = GGML_DEFAULT_GRAPH_SIZE, grads = false
|
1757
1802
|
GGML_API struct ggml_cgraph * ggml_new_graph_custom (struct ggml_context * ctx, size_t size, bool grads);
|
1758
1803
|
GGML_API struct ggml_cgraph * ggml_graph_dup (struct ggml_context * ctx, struct ggml_cgraph * cgraph);
|
1759
|
-
GGML_API struct ggml_cgraph
|
1804
|
+
GGML_API struct ggml_cgraph ggml_graph_view (struct ggml_cgraph * cgraph, int i0, int i1);
|
1760
1805
|
GGML_API void ggml_graph_cpy (struct ggml_cgraph * src, struct ggml_cgraph * dst);
|
1761
1806
|
GGML_API void ggml_graph_reset (struct ggml_cgraph * cgraph); // zero grads
|
1762
1807
|
GGML_API void ggml_graph_clear (struct ggml_cgraph * cgraph);
|