@fugood/llama.node 0.2.1 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/darwin/arm64/default.metallib +0 -0
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/default.metallib +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/package.json +1 -1
- package/src/LlamaContext.cpp +2 -2
- package/src/LoadSessionWorker.cpp +1 -0
- package/src/llama.cpp/CMakeLists.txt +72 -46
- package/src/llama.cpp/cmake/arm64-windows-llvm.cmake +16 -0
- package/src/llama.cpp/cmake/arm64-windows-msvc.cmake +6 -0
- package/src/llama.cpp/common/common.cpp +732 -752
- package/src/llama.cpp/common/common.h +47 -41
- package/src/llama.cpp/common/grammar-parser.cpp +1 -1
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +6 -6
- package/src/llama.cpp/common/log.h +5 -5
- package/src/llama.cpp/common/sampling.cpp +89 -7
- package/src/llama.cpp/common/sampling.h +5 -0
- package/src/llama.cpp/common/train.cpp +2 -2
- package/src/llama.cpp/examples/batched/batched.cpp +1 -1
- package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +1 -1
- package/src/llama.cpp/examples/embedding/embedding.cpp +3 -2
- package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +2 -2
- package/src/llama.cpp/examples/finetune/finetune.cpp +4 -3
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +2 -2
- package/src/llama.cpp/examples/infill/infill.cpp +8 -8
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +2 -2
- package/src/llama.cpp/examples/llama.android/llama/CMakeLists.txt +13 -8
- package/src/llama.cpp/examples/llava/clip.h +1 -1
- package/src/llama.cpp/examples/llava/llava-cli.cpp +1 -1
- package/src/llama.cpp/examples/llava/llava.cpp +0 -15
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +1 -1
- package/src/llama.cpp/examples/lookup/lookup.cpp +1 -1
- package/src/llama.cpp/examples/main/main.cpp +24 -16
- package/src/llama.cpp/examples/parallel/parallel.cpp +1 -1
- package/src/llama.cpp/examples/perplexity/perplexity.cpp +9 -9
- package/src/llama.cpp/examples/quantize/quantize.cpp +2 -2
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +2 -2
- package/src/llama.cpp/examples/rpc/rpc-server.cpp +78 -14
- package/src/llama.cpp/examples/server/server.cpp +21 -9
- package/src/llama.cpp/examples/tokenize/tokenize.cpp +359 -9
- package/src/llama.cpp/examples/train-text-from-scratch/train-text-from-scratch.cpp +4 -3
- package/src/llama.cpp/ggml-backend.c +0 -1
- package/src/llama.cpp/ggml-common.h +0 -54
- package/src/llama.cpp/ggml-cuda.h +1 -0
- package/src/llama.cpp/ggml-impl.h +51 -0
- package/src/llama.cpp/ggml-kompute.cpp +4 -0
- package/src/llama.cpp/ggml-opencl.cpp +4 -1
- package/src/llama.cpp/ggml-quants.c +3700 -2041
- package/src/llama.cpp/ggml-rpc.cpp +188 -56
- package/src/llama.cpp/ggml-sycl.cpp +99 -530
- package/src/llama.cpp/ggml-vulkan-shaders.hpp +9351 -5627
- package/src/llama.cpp/ggml-vulkan.cpp +202 -225
- package/src/llama.cpp/ggml.c +1034 -1154
- package/src/llama.cpp/ggml.h +59 -31
- package/src/llama.cpp/llama.cpp +859 -609
- package/src/llama.cpp/llama.h +19 -6
- package/src/llama.cpp/requirements.txt +0 -1
- package/src/llama.cpp/tests/test-backend-ops.cpp +113 -47
- package/src/llama.cpp/tests/test-chat-template.cpp +16 -4
- package/src/llama.cpp/tests/test-grad0.cpp +43 -83
- package/src/llama.cpp/unicode-data.cpp +6969 -2169
- package/src/llama.cpp/unicode-data.h +15 -12
- package/src/llama.cpp/unicode.cpp +89 -111
- package/src/llama.cpp/unicode.h +44 -12
- package/src/llama.cpp/build.zig +0 -172
- package/src/llama.cpp/ggml-mpi.c +0 -216
- package/src/llama.cpp/ggml-mpi.h +0 -39
- package/src/llama.cpp/requirements/requirements-convert-persimmon-to-gguf.txt +0 -2
package/src/llama.cpp/ggml.h
CHANGED
|
@@ -481,9 +481,7 @@ extern "C" {
|
|
|
481
481
|
GGML_OP_ARGSORT,
|
|
482
482
|
GGML_OP_LEAKY_RELU,
|
|
483
483
|
|
|
484
|
-
GGML_OP_FLASH_ATTN,
|
|
485
484
|
GGML_OP_FLASH_ATTN_EXT,
|
|
486
|
-
GGML_OP_FLASH_FF,
|
|
487
485
|
GGML_OP_FLASH_ATTN_BACK,
|
|
488
486
|
GGML_OP_SSM_CONV,
|
|
489
487
|
GGML_OP_SSM_SCAN,
|
|
@@ -565,7 +563,8 @@ extern "C" {
|
|
|
565
563
|
// n-dimensional tensor
|
|
566
564
|
struct ggml_tensor {
|
|
567
565
|
enum ggml_type type;
|
|
568
|
-
|
|
566
|
+
|
|
567
|
+
GGML_DEPRECATED(enum ggml_backend_type backend, "use the buffer type to find the storage location of the tensor");
|
|
569
568
|
|
|
570
569
|
struct ggml_backend_buffer * buffer;
|
|
571
570
|
|
|
@@ -766,7 +765,8 @@ extern "C" {
|
|
|
766
765
|
GGML_API bool ggml_is_3d (const struct ggml_tensor * tensor);
|
|
767
766
|
GGML_API int ggml_n_dims (const struct ggml_tensor * tensor); // returns 1 for scalars
|
|
768
767
|
|
|
769
|
-
GGML_API bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
|
|
768
|
+
GGML_API bool ggml_are_same_shape (const struct ggml_tensor * t0, const struct ggml_tensor * t1);
|
|
769
|
+
GGML_API bool ggml_are_same_stride(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
|
|
770
770
|
|
|
771
771
|
// use this to compute the memory overhead of a tensor
|
|
772
772
|
GGML_API size_t ggml_tensor_overhead(void);
|
|
@@ -1007,12 +1007,13 @@ extern "C" {
|
|
|
1007
1007
|
struct ggml_tensor * a,
|
|
1008
1008
|
struct ggml_tensor * b);
|
|
1009
1009
|
|
|
1010
|
-
// concat a and b
|
|
1010
|
+
// concat a and b along dim
|
|
1011
1011
|
// used in stable-diffusion
|
|
1012
1012
|
GGML_API struct ggml_tensor * ggml_concat(
|
|
1013
1013
|
struct ggml_context * ctx,
|
|
1014
1014
|
struct ggml_tensor * a,
|
|
1015
|
-
struct ggml_tensor * b
|
|
1015
|
+
struct ggml_tensor * b,
|
|
1016
|
+
int dim);
|
|
1016
1017
|
|
|
1017
1018
|
GGML_API struct ggml_tensor * ggml_abs(
|
|
1018
1019
|
struct ggml_context * ctx,
|
|
@@ -1458,11 +1459,12 @@ extern "C" {
|
|
|
1458
1459
|
struct ggml_tensor * b);
|
|
1459
1460
|
|
|
1460
1461
|
// rotary position embedding
|
|
1461
|
-
// if mode & 1 == 1, skip n_past elements (
|
|
1462
|
+
// if mode & 1 == 1, skip n_past elements (NOT SUPPORTED)
|
|
1462
1463
|
// if mode & 2 == 1, GPT-NeoX style
|
|
1463
1464
|
// if mode & 4 == 1, ChatGLM style
|
|
1464
1465
|
//
|
|
1465
1466
|
// b is an int32 vector with size a->ne[2], it contains the positions
|
|
1467
|
+
// c is freq factors (e.g. phi3-128k), (optional)
|
|
1466
1468
|
GGML_API struct ggml_tensor * ggml_rope(
|
|
1467
1469
|
struct ggml_context * ctx,
|
|
1468
1470
|
struct ggml_tensor * a,
|
|
@@ -1481,10 +1483,11 @@ extern "C" {
|
|
|
1481
1483
|
int n_ctx);
|
|
1482
1484
|
|
|
1483
1485
|
// custom RoPE
|
|
1484
|
-
GGML_API struct ggml_tensor *
|
|
1486
|
+
GGML_API struct ggml_tensor * ggml_rope_ext(
|
|
1485
1487
|
struct ggml_context * ctx,
|
|
1486
1488
|
struct ggml_tensor * a,
|
|
1487
1489
|
struct ggml_tensor * b,
|
|
1490
|
+
struct ggml_tensor * c,
|
|
1488
1491
|
int n_dims,
|
|
1489
1492
|
int mode,
|
|
1490
1493
|
int n_ctx,
|
|
@@ -1497,10 +1500,11 @@ extern "C" {
|
|
|
1497
1500
|
float beta_slow);
|
|
1498
1501
|
|
|
1499
1502
|
// in-place, returns view(a)
|
|
1500
|
-
GGML_API struct ggml_tensor *
|
|
1503
|
+
GGML_API struct ggml_tensor * ggml_rope_ext_inplace(
|
|
1501
1504
|
struct ggml_context * ctx,
|
|
1502
1505
|
struct ggml_tensor * a,
|
|
1503
1506
|
struct ggml_tensor * b,
|
|
1507
|
+
struct ggml_tensor * c,
|
|
1504
1508
|
int n_dims,
|
|
1505
1509
|
int mode,
|
|
1506
1510
|
int n_ctx,
|
|
@@ -1512,18 +1516,41 @@ extern "C" {
|
|
|
1512
1516
|
float beta_fast,
|
|
1513
1517
|
float beta_slow);
|
|
1514
1518
|
|
|
1515
|
-
|
|
1516
|
-
|
|
1517
|
-
|
|
1519
|
+
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_rope_custom(
|
|
1520
|
+
struct ggml_context * ctx,
|
|
1521
|
+
struct ggml_tensor * a,
|
|
1522
|
+
struct ggml_tensor * b,
|
|
1523
|
+
int n_dims,
|
|
1524
|
+
int mode,
|
|
1525
|
+
int n_ctx,
|
|
1526
|
+
int n_orig_ctx,
|
|
1527
|
+
float freq_base,
|
|
1528
|
+
float freq_scale,
|
|
1529
|
+
float ext_factor,
|
|
1530
|
+
float attn_factor,
|
|
1531
|
+
float beta_fast,
|
|
1532
|
+
float beta_slow),
|
|
1533
|
+
"use ggml_rope_ext instead");
|
|
1518
1534
|
|
|
1519
|
-
|
|
1520
|
-
GGML_API struct ggml_tensor * ggml_rope_xpos_inplace(
|
|
1535
|
+
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_rope_custom_inplace(
|
|
1521
1536
|
struct ggml_context * ctx,
|
|
1522
1537
|
struct ggml_tensor * a,
|
|
1523
1538
|
struct ggml_tensor * b,
|
|
1524
1539
|
int n_dims,
|
|
1525
|
-
|
|
1526
|
-
|
|
1540
|
+
int mode,
|
|
1541
|
+
int n_ctx,
|
|
1542
|
+
int n_orig_ctx,
|
|
1543
|
+
float freq_base,
|
|
1544
|
+
float freq_scale,
|
|
1545
|
+
float ext_factor,
|
|
1546
|
+
float attn_factor,
|
|
1547
|
+
float beta_fast,
|
|
1548
|
+
float beta_slow),
|
|
1549
|
+
"use ggml_rope_ext_inplace instead");
|
|
1550
|
+
|
|
1551
|
+
// compute correction dims for YaRN RoPE scaling
|
|
1552
|
+
GGML_CALL void ggml_rope_yarn_corr_dims(
|
|
1553
|
+
int n_dims, int n_orig_ctx, float freq_base, float beta_fast, float beta_slow, float dims[2]);
|
|
1527
1554
|
|
|
1528
1555
|
// rotary position embedding backward, i.e compute dx from dy
|
|
1529
1556
|
// a - dy
|
|
@@ -1531,6 +1558,7 @@ extern "C" {
|
|
|
1531
1558
|
struct ggml_context * ctx,
|
|
1532
1559
|
struct ggml_tensor * a,
|
|
1533
1560
|
struct ggml_tensor * b,
|
|
1561
|
+
struct ggml_tensor * c,
|
|
1534
1562
|
int n_dims,
|
|
1535
1563
|
int mode,
|
|
1536
1564
|
int n_ctx,
|
|
@@ -1673,12 +1701,24 @@ extern "C" {
|
|
|
1673
1701
|
float p1);
|
|
1674
1702
|
|
|
1675
1703
|
// nearest interpolate
|
|
1704
|
+
// multiplies ne0 and ne1 by scale factor
|
|
1676
1705
|
// used in stable-diffusion
|
|
1677
1706
|
GGML_API struct ggml_tensor * ggml_upscale(
|
|
1678
1707
|
struct ggml_context * ctx,
|
|
1679
1708
|
struct ggml_tensor * a,
|
|
1680
1709
|
int scale_factor);
|
|
1681
1710
|
|
|
1711
|
+
// nearest interpolate
|
|
1712
|
+
// nearest interpolate to specified dimensions
|
|
1713
|
+
// used in tortoise.cpp
|
|
1714
|
+
GGML_API struct ggml_tensor * ggml_upscale_ext(
|
|
1715
|
+
struct ggml_context * ctx,
|
|
1716
|
+
struct ggml_tensor * a,
|
|
1717
|
+
int ne0,
|
|
1718
|
+
int ne1,
|
|
1719
|
+
int ne2,
|
|
1720
|
+
int ne3);
|
|
1721
|
+
|
|
1682
1722
|
// pad each dimension with zeros: [x, ..., x] -> [x, ..., x, 0, ..., 0]
|
|
1683
1723
|
GGML_API struct ggml_tensor * ggml_pad(
|
|
1684
1724
|
struct ggml_context * ctx,
|
|
@@ -1720,13 +1760,6 @@ extern "C" {
|
|
|
1720
1760
|
struct ggml_tensor * a,
|
|
1721
1761
|
int k);
|
|
1722
1762
|
|
|
1723
|
-
GGML_API struct ggml_tensor * ggml_flash_attn(
|
|
1724
|
-
struct ggml_context * ctx,
|
|
1725
|
-
struct ggml_tensor * q,
|
|
1726
|
-
struct ggml_tensor * k,
|
|
1727
|
-
struct ggml_tensor * v,
|
|
1728
|
-
bool masked);
|
|
1729
|
-
|
|
1730
1763
|
#define GGML_KQ_MASK_PAD 32
|
|
1731
1764
|
|
|
1732
1765
|
// q: [n_embd, n_batch, n_head, 1]
|
|
@@ -1747,6 +1780,7 @@ extern "C" {
|
|
|
1747
1780
|
struct ggml_tensor * a,
|
|
1748
1781
|
enum ggml_prec prec);
|
|
1749
1782
|
|
|
1783
|
+
// TODO: needs to be adapted to ggml_flash_attn_ext
|
|
1750
1784
|
GGML_API struct ggml_tensor * ggml_flash_attn_back(
|
|
1751
1785
|
struct ggml_context * ctx,
|
|
1752
1786
|
struct ggml_tensor * q,
|
|
@@ -1755,14 +1789,6 @@ extern "C" {
|
|
|
1755
1789
|
struct ggml_tensor * d,
|
|
1756
1790
|
bool masked);
|
|
1757
1791
|
|
|
1758
|
-
GGML_API struct ggml_tensor * ggml_flash_ff(
|
|
1759
|
-
struct ggml_context * ctx,
|
|
1760
|
-
struct ggml_tensor * a,
|
|
1761
|
-
struct ggml_tensor * b0,
|
|
1762
|
-
struct ggml_tensor * b1,
|
|
1763
|
-
struct ggml_tensor * c0,
|
|
1764
|
-
struct ggml_tensor * c1);
|
|
1765
|
-
|
|
1766
1792
|
GGML_API struct ggml_tensor * ggml_ssm_conv(
|
|
1767
1793
|
struct ggml_context * ctx,
|
|
1768
1794
|
struct ggml_tensor * s,
|
|
@@ -2376,8 +2402,10 @@ extern "C" {
|
|
|
2376
2402
|
GGML_API int ggml_cpu_has_avx512 (void);
|
|
2377
2403
|
GGML_API int ggml_cpu_has_avx512_vbmi(void);
|
|
2378
2404
|
GGML_API int ggml_cpu_has_avx512_vnni(void);
|
|
2405
|
+
GGML_API int ggml_cpu_has_avx512_bf16(void);
|
|
2379
2406
|
GGML_API int ggml_cpu_has_fma (void);
|
|
2380
2407
|
GGML_API int ggml_cpu_has_neon (void);
|
|
2408
|
+
GGML_API int ggml_cpu_has_sve (void);
|
|
2381
2409
|
GGML_API int ggml_cpu_has_arm_fma (void);
|
|
2382
2410
|
GGML_API int ggml_cpu_has_metal (void);
|
|
2383
2411
|
GGML_API int ggml_cpu_has_f16c (void);
|