@fugood/llama.node 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +9 -0
- package/README.md +1 -1
- package/bin/darwin/arm64/default.metallib +0 -0
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/default.metallib +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +1 -1
- package/package.json +2 -1
- package/patches/llama.patch +22 -0
- package/src/TokenizeWorker.cpp +1 -1
- package/src/llama.cpp/CMakeLists.txt +14 -12
- package/src/llama.cpp/common/common.cpp +19 -5
- package/src/llama.cpp/common/common.h +2 -0
- package/src/llama.cpp/common/grammar-parser.cpp +9 -0
- package/src/llama.cpp/common/sampling.cpp +3 -3
- package/src/llama.cpp/common/sampling.h +1 -1
- package/src/llama.cpp/examples/CMakeLists.txt +3 -0
- package/src/llama.cpp/examples/embedding/embedding.cpp +10 -2
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +56 -7
- package/src/llama.cpp/examples/llama.android/{app/src/main/cpp → llama}/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/CMakeLists.txt +49 -0
- package/src/llama.cpp/examples/llama.android/{app → llama}/src/main/cpp/llama-android.cpp +14 -14
- package/src/llama.cpp/examples/llava/llava-cli.cpp +26 -6
- package/src/llama.cpp/examples/main/main.cpp +5 -1
- package/src/llama.cpp/examples/rpc/CMakeLists.txt +2 -0
- package/src/llama.cpp/examples/rpc/rpc-server.cpp +70 -0
- package/src/llama.cpp/examples/server/server.cpp +12 -16
- package/src/llama.cpp/examples/server/utils.hpp +1 -1
- package/src/llama.cpp/ggml-backend.c +2 -2
- package/src/llama.cpp/ggml-kompute.cpp +9 -3
- package/src/llama.cpp/ggml-quants.c +6 -0
- package/src/llama.cpp/ggml-rpc.cpp +1023 -0
- package/src/llama.cpp/ggml-rpc.h +24 -0
- package/src/llama.cpp/ggml-sycl.cpp +20 -143
- package/src/llama.cpp/ggml-vulkan.cpp +4 -2
- package/src/llama.cpp/ggml.c +116 -271
- package/src/llama.cpp/ggml.h +12 -15
- package/src/llama.cpp/llama.cpp +451 -265
- package/src/llama.cpp/llama.h +3 -0
- package/src/llama.cpp/requirements.txt +0 -1
- package/src/llama.cpp/tests/CMakeLists.txt +1 -1
- package/src/llama.cpp/tests/test-backend-ops.cpp +16 -19
- package/src/llama.cpp/tests/test-grammar-integration.cpp +46 -0
- package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +27 -3
- package/src/llama.cpp/requirements/requirements-convert-lora-to-ggml.txt +0 -2
package/src/llama.cpp/ggml.h
CHANGED
|
@@ -468,7 +468,6 @@ extern "C" {
|
|
|
468
468
|
GGML_OP_SOFT_MAX_BACK,
|
|
469
469
|
GGML_OP_ROPE,
|
|
470
470
|
GGML_OP_ROPE_BACK,
|
|
471
|
-
GGML_OP_ALIBI,
|
|
472
471
|
GGML_OP_CLAMP,
|
|
473
472
|
GGML_OP_CONV_TRANSPOSE_1D,
|
|
474
473
|
GGML_OP_IM2COL,
|
|
@@ -520,6 +519,7 @@ extern "C" {
|
|
|
520
519
|
GGML_UNARY_OP_TANH,
|
|
521
520
|
GGML_UNARY_OP_ELU,
|
|
522
521
|
GGML_UNARY_OP_RELU,
|
|
522
|
+
GGML_UNARY_OP_SIGMOID,
|
|
523
523
|
GGML_UNARY_OP_GELU,
|
|
524
524
|
GGML_UNARY_OP_GELU_QUICK,
|
|
525
525
|
GGML_UNARY_OP_SILU,
|
|
@@ -1074,6 +1074,14 @@ extern "C" {
|
|
|
1074
1074
|
struct ggml_context * ctx,
|
|
1075
1075
|
struct ggml_tensor * a);
|
|
1076
1076
|
|
|
1077
|
+
GGML_API struct ggml_tensor * ggml_sigmoid(
|
|
1078
|
+
struct ggml_context * ctx,
|
|
1079
|
+
struct ggml_tensor * a);
|
|
1080
|
+
|
|
1081
|
+
GGML_API struct ggml_tensor * ggml_sigmoid_inplace(
|
|
1082
|
+
struct ggml_context * ctx,
|
|
1083
|
+
struct ggml_tensor * a);
|
|
1084
|
+
|
|
1077
1085
|
GGML_API struct ggml_tensor * ggml_gelu(
|
|
1078
1086
|
struct ggml_context * ctx,
|
|
1079
1087
|
struct ggml_tensor * a);
|
|
@@ -1428,15 +1436,13 @@ extern "C" {
|
|
|
1428
1436
|
struct ggml_context * ctx,
|
|
1429
1437
|
struct ggml_tensor * a);
|
|
1430
1438
|
|
|
1431
|
-
// fused soft_max(a*scale + mask
|
|
1439
|
+
// fused soft_max(a*scale + mask*(ALiBi slope))
|
|
1432
1440
|
// mask is optional
|
|
1433
|
-
// pos is required when max_bias > 0.0f
|
|
1434
1441
|
// max_bias = 0.0f for no ALiBi
|
|
1435
1442
|
GGML_API struct ggml_tensor * ggml_soft_max_ext(
|
|
1436
1443
|
struct ggml_context * ctx,
|
|
1437
1444
|
struct ggml_tensor * a,
|
|
1438
1445
|
struct ggml_tensor * mask,
|
|
1439
|
-
struct ggml_tensor * pos,
|
|
1440
1446
|
float scale,
|
|
1441
1447
|
float max_bias);
|
|
1442
1448
|
|
|
@@ -1538,16 +1544,6 @@ extern "C" {
|
|
|
1538
1544
|
float xpos_base,
|
|
1539
1545
|
bool xpos_down);
|
|
1540
1546
|
|
|
1541
|
-
// alibi position embedding
|
|
1542
|
-
// in-place, returns view(a)
|
|
1543
|
-
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_alibi(
|
|
1544
|
-
struct ggml_context * ctx,
|
|
1545
|
-
struct ggml_tensor * a,
|
|
1546
|
-
int n_past,
|
|
1547
|
-
int n_head,
|
|
1548
|
-
float bias_max),
|
|
1549
|
-
"use ggml_soft_max_ext instead (will be removed in Mar 2024)");
|
|
1550
|
-
|
|
1551
1547
|
// clamp
|
|
1552
1548
|
// in-place, returns view(a)
|
|
1553
1549
|
GGML_API struct ggml_tensor * ggml_clamp(
|
|
@@ -1744,7 +1740,8 @@ extern "C" {
|
|
|
1744
1740
|
struct ggml_tensor * k,
|
|
1745
1741
|
struct ggml_tensor * v,
|
|
1746
1742
|
struct ggml_tensor * mask,
|
|
1747
|
-
float scale
|
|
1743
|
+
float scale,
|
|
1744
|
+
float max_bias);
|
|
1748
1745
|
|
|
1749
1746
|
GGML_API void ggml_flash_attn_ext_set_prec(
|
|
1750
1747
|
struct ggml_tensor * a,
|