llama_cpp 0.9.5 → 0.10.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/ext/llama_cpp/llama_cpp.cpp +121 -15
- data/ext/llama_cpp/src/ggml-alloc.c +42 -7
- data/ext/llama_cpp/src/ggml-alloc.h +7 -0
- data/ext/llama_cpp/src/ggml-backend-impl.h +46 -21
- data/ext/llama_cpp/src/ggml-backend.c +563 -156
- data/ext/llama_cpp/src/ggml-backend.h +62 -17
- data/ext/llama_cpp/src/ggml-cuda.cu +1140 -355
- data/ext/llama_cpp/src/ggml-cuda.h +9 -1
- data/ext/llama_cpp/src/ggml-impl.h +1 -1
- data/ext/llama_cpp/src/ggml-metal.h +6 -0
- data/ext/llama_cpp/src/ggml-metal.m +506 -158
- data/ext/llama_cpp/src/ggml-metal.metal +795 -144
- data/ext/llama_cpp/src/ggml.c +331 -111
- data/ext/llama_cpp/src/ggml.h +49 -4
- data/ext/llama_cpp/src/llama.cpp +749 -329
- data/ext/llama_cpp/src/llama.h +28 -5
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +20 -2
- metadata +2 -2
data/ext/llama_cpp/src/ggml.h
CHANGED
@@ -283,6 +283,20 @@
|
|
283
283
|
const type prefix##3 = (pointer)->array[3]; \
|
284
284
|
GGML_UNUSED(prefix##3);
|
285
285
|
|
286
|
+
#define GGML_TENSOR_UNARY_OP_LOCALS \
|
287
|
+
GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne) \
|
288
|
+
GGML_TENSOR_LOCALS(size_t, nb0, src0, nb) \
|
289
|
+
GGML_TENSOR_LOCALS(int64_t, ne, dst, ne) \
|
290
|
+
GGML_TENSOR_LOCALS(size_t, nb, dst, nb)
|
291
|
+
|
292
|
+
#define GGML_TENSOR_BINARY_OP_LOCALS \
|
293
|
+
GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne) \
|
294
|
+
GGML_TENSOR_LOCALS(size_t, nb0, src0, nb) \
|
295
|
+
GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne) \
|
296
|
+
GGML_TENSOR_LOCALS(size_t, nb1, src1, nb) \
|
297
|
+
GGML_TENSOR_LOCALS(int64_t, ne, dst, ne) \
|
298
|
+
GGML_TENSOR_LOCALS(size_t, nb, dst, nb)
|
299
|
+
|
286
300
|
#ifdef __cplusplus
|
287
301
|
extern "C" {
|
288
302
|
#endif
|
@@ -381,6 +395,7 @@ extern "C" {
|
|
381
395
|
GGML_OP_GROUP_NORM,
|
382
396
|
|
383
397
|
GGML_OP_MUL_MAT,
|
398
|
+
GGML_OP_MUL_MAT_ID,
|
384
399
|
GGML_OP_OUT_PROD,
|
385
400
|
|
386
401
|
GGML_OP_SCALE,
|
@@ -407,8 +422,8 @@ extern "C" {
|
|
407
422
|
GGML_OP_CONV_TRANSPOSE_2D,
|
408
423
|
GGML_OP_POOL_1D,
|
409
424
|
GGML_OP_POOL_2D,
|
410
|
-
|
411
425
|
GGML_OP_UPSCALE, // nearest interpolate
|
426
|
+
GGML_OP_ARGSORT,
|
412
427
|
|
413
428
|
GGML_OP_FLASH_ATTN,
|
414
429
|
GGML_OP_FLASH_FF,
|
@@ -448,7 +463,9 @@ extern "C" {
|
|
448
463
|
GGML_UNARY_OP_GELU,
|
449
464
|
GGML_UNARY_OP_GELU_QUICK,
|
450
465
|
GGML_UNARY_OP_SILU,
|
451
|
-
GGML_UNARY_OP_LEAKY
|
466
|
+
GGML_UNARY_OP_LEAKY,
|
467
|
+
|
468
|
+
GGML_UNARY_OP_COUNT,
|
452
469
|
};
|
453
470
|
|
454
471
|
enum ggml_object_type {
|
@@ -631,6 +648,9 @@ extern "C" {
|
|
631
648
|
GGML_API const char * ggml_op_name (enum ggml_op op);
|
632
649
|
GGML_API const char * ggml_op_symbol(enum ggml_op op);
|
633
650
|
|
651
|
+
GGML_API const char * ggml_unary_op_name(enum ggml_unary_op op);
|
652
|
+
GGML_API const char * ggml_op_desc(const struct ggml_tensor * t); // unary or op name
|
653
|
+
|
634
654
|
GGML_API size_t ggml_element_size(const struct ggml_tensor * tensor);
|
635
655
|
|
636
656
|
GGML_API bool ggml_is_quantized(enum ggml_type type);
|
@@ -1027,6 +1047,15 @@ extern "C" {
|
|
1027
1047
|
struct ggml_tensor * a,
|
1028
1048
|
struct ggml_tensor * b);
|
1029
1049
|
|
1050
|
+
// indirect matrix multiplication
|
1051
|
+
// ggml_mul_mat_id(ctx, as, ids, id, b) ~= ggml_mul_mat(as[ids[id]], b)
|
1052
|
+
GGML_API struct ggml_tensor * ggml_mul_mat_id(
|
1053
|
+
struct ggml_context * ctx,
|
1054
|
+
struct ggml_tensor * as[],
|
1055
|
+
struct ggml_tensor * ids,
|
1056
|
+
int id,
|
1057
|
+
struct ggml_tensor * b);
|
1058
|
+
|
1030
1059
|
// A: m columns, n rows,
|
1031
1060
|
// B: p columns, n rows,
|
1032
1061
|
// result is m columns, p rows
|
@@ -1520,6 +1549,23 @@ extern "C" {
|
|
1520
1549
|
struct ggml_tensor * a,
|
1521
1550
|
int scale_factor);
|
1522
1551
|
|
1552
|
+
// sort rows
|
1553
|
+
enum ggml_sort_order {
|
1554
|
+
GGML_SORT_ASC,
|
1555
|
+
GGML_SORT_DESC,
|
1556
|
+
};
|
1557
|
+
|
1558
|
+
GGML_API struct ggml_tensor * ggml_argsort(
|
1559
|
+
struct ggml_context * ctx,
|
1560
|
+
struct ggml_tensor * a,
|
1561
|
+
enum ggml_sort_order order);
|
1562
|
+
|
1563
|
+
// top k elements per row
|
1564
|
+
GGML_API struct ggml_tensor * ggml_top_k(
|
1565
|
+
struct ggml_context * ctx,
|
1566
|
+
struct ggml_tensor * a,
|
1567
|
+
int k);
|
1568
|
+
|
1523
1569
|
GGML_API struct ggml_tensor * ggml_flash_attn(
|
1524
1570
|
struct ggml_context * ctx,
|
1525
1571
|
struct ggml_tensor * q,
|
@@ -1581,7 +1627,6 @@ extern "C" {
|
|
1581
1627
|
int kh);
|
1582
1628
|
|
1583
1629
|
// used in sam
|
1584
|
-
|
1585
1630
|
GGML_API struct ggml_tensor * ggml_add_rel_pos(
|
1586
1631
|
struct ggml_context * ctx,
|
1587
1632
|
struct ggml_tensor * a,
|
@@ -1756,7 +1801,7 @@ extern "C" {
|
|
1756
1801
|
GGML_API struct ggml_cgraph * ggml_new_graph (struct ggml_context * ctx); // size = GGML_DEFAULT_GRAPH_SIZE, grads = false
|
1757
1802
|
GGML_API struct ggml_cgraph * ggml_new_graph_custom (struct ggml_context * ctx, size_t size, bool grads);
|
1758
1803
|
GGML_API struct ggml_cgraph * ggml_graph_dup (struct ggml_context * ctx, struct ggml_cgraph * cgraph);
|
1759
|
-
GGML_API struct ggml_cgraph
|
1804
|
+
GGML_API struct ggml_cgraph ggml_graph_view (struct ggml_cgraph * cgraph, int i0, int i1);
|
1760
1805
|
GGML_API void ggml_graph_cpy (struct ggml_cgraph * src, struct ggml_cgraph * dst);
|
1761
1806
|
GGML_API void ggml_graph_reset (struct ggml_cgraph * cgraph); // zero grads
|
1762
1807
|
GGML_API void ggml_graph_clear (struct ggml_cgraph * cgraph);
|