@fugood/llama.node 0.3.1 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +1 -8
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/package.json +4 -2
- package/src/DetokenizeWorker.cpp +1 -1
- package/src/EmbeddingWorker.cpp +2 -2
- package/src/LlamaCompletionWorker.cpp +10 -10
- package/src/LlamaCompletionWorker.h +2 -2
- package/src/LlamaContext.cpp +14 -17
- package/src/TokenizeWorker.cpp +1 -1
- package/src/common.hpp +5 -4
- package/src/llama.cpp/.github/workflows/build.yml +137 -29
- package/src/llama.cpp/.github/workflows/close-issue.yml +5 -0
- package/src/llama.cpp/.github/workflows/docker.yml +46 -34
- package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +7 -0
- package/src/llama.cpp/.github/workflows/nix-ci.yml +7 -0
- package/src/llama.cpp/.github/workflows/python-check-requirements.yml +2 -4
- package/src/llama.cpp/.github/workflows/python-type-check.yml +3 -1
- package/src/llama.cpp/.github/workflows/server.yml +7 -0
- package/src/llama.cpp/CMakeLists.txt +26 -11
- package/src/llama.cpp/cmake/arm64-apple-clang.cmake +16 -0
- package/src/llama.cpp/common/CMakeLists.txt +10 -10
- package/src/llama.cpp/common/arg.cpp +2041 -0
- package/src/llama.cpp/common/arg.h +77 -0
- package/src/llama.cpp/common/common.cpp +523 -1861
- package/src/llama.cpp/common/common.h +234 -106
- package/src/llama.cpp/common/console.cpp +3 -0
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +1 -1
- package/src/llama.cpp/common/log.cpp +401 -0
- package/src/llama.cpp/common/log.h +66 -698
- package/src/llama.cpp/common/ngram-cache.cpp +39 -36
- package/src/llama.cpp/common/ngram-cache.h +19 -19
- package/src/llama.cpp/common/sampling.cpp +356 -350
- package/src/llama.cpp/common/sampling.h +62 -139
- package/src/llama.cpp/common/stb_image.h +5990 -6398
- package/src/llama.cpp/docs/build.md +72 -17
- package/src/llama.cpp/examples/CMakeLists.txt +1 -2
- package/src/llama.cpp/examples/batched/batched.cpp +49 -65
- package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +42 -53
- package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +55 -52
- package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +22 -22
- package/src/llama.cpp/examples/cvector-generator/pca.hpp +3 -13
- package/src/llama.cpp/examples/embedding/embedding.cpp +147 -91
- package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +37 -37
- package/src/llama.cpp/examples/export-lora/export-lora.cpp +39 -38
- package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +14 -39
- package/src/llama.cpp/examples/{baby-llama → gen-docs}/CMakeLists.txt +2 -2
- package/src/llama.cpp/examples/gen-docs/gen-docs.cpp +83 -0
- package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +58 -39
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +46 -39
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +75 -69
- package/src/llama.cpp/examples/infill/infill.cpp +131 -192
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +276 -178
- package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +1 -0
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +40 -36
- package/src/llama.cpp/examples/llava/CMakeLists.txt +7 -0
- package/src/llama.cpp/examples/llava/clip.cpp +686 -150
- package/src/llama.cpp/examples/llava/clip.h +11 -2
- package/src/llama.cpp/examples/llava/llava-cli.cpp +60 -71
- package/src/llama.cpp/examples/llava/llava.cpp +146 -26
- package/src/llama.cpp/examples/llava/llava.h +2 -3
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +323 -0
- package/src/llama.cpp/examples/llava/requirements.txt +1 -0
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +55 -56
- package/src/llama.cpp/examples/lookup/lookup-create.cpp +15 -13
- package/src/llama.cpp/examples/lookup/lookup-merge.cpp +4 -4
- package/src/llama.cpp/examples/lookup/lookup-stats.cpp +34 -33
- package/src/llama.cpp/examples/lookup/lookup.cpp +60 -63
- package/src/llama.cpp/examples/main/main.cpp +216 -313
- package/src/llama.cpp/examples/parallel/parallel.cpp +58 -59
- package/src/llama.cpp/examples/passkey/passkey.cpp +53 -61
- package/src/llama.cpp/examples/perplexity/perplexity.cpp +277 -311
- package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/quantize/quantize.cpp +27 -9
- package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +12 -12
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +57 -52
- package/src/llama.cpp/examples/rpc/rpc-server.cpp +27 -2
- package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +60 -46
- package/src/llama.cpp/examples/server/CMakeLists.txt +7 -18
- package/src/llama.cpp/examples/server/server.cpp +1347 -1531
- package/src/llama.cpp/examples/server/tests/requirements.txt +2 -1
- package/src/llama.cpp/examples/server/utils.hpp +396 -107
- package/src/llama.cpp/examples/simple/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/simple/simple.cpp +132 -106
- package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +197 -0
- package/src/llama.cpp/examples/speculative/speculative.cpp +153 -124
- package/src/llama.cpp/examples/sycl/run-llama2.sh +10 -19
- package/src/llama.cpp/examples/sycl/win-run-llama2.bat +1 -1
- package/src/llama.cpp/examples/tokenize/tokenize.cpp +27 -29
- package/src/llama.cpp/ggml/CMakeLists.txt +29 -12
- package/src/llama.cpp/ggml/include/ggml-alloc.h +3 -3
- package/src/llama.cpp/ggml/include/ggml-amx.h +25 -0
- package/src/llama.cpp/ggml/include/ggml-backend.h +166 -68
- package/src/llama.cpp/ggml/include/ggml-blas.h +5 -3
- package/src/llama.cpp/ggml/include/ggml-cann.h +17 -19
- package/src/llama.cpp/ggml/include/ggml-cpp.h +38 -0
- package/src/llama.cpp/ggml/include/ggml-cpu.h +177 -0
- package/src/llama.cpp/ggml/include/ggml-cuda.h +17 -17
- package/src/llama.cpp/ggml/include/ggml-kompute.h +7 -3
- package/src/llama.cpp/ggml/include/ggml-metal.h +13 -12
- package/src/llama.cpp/ggml/include/ggml-opt.h +216 -0
- package/src/llama.cpp/ggml/include/ggml-rpc.h +9 -5
- package/src/llama.cpp/ggml/include/ggml-sycl.h +18 -11
- package/src/llama.cpp/ggml/include/ggml-vulkan.h +10 -8
- package/src/llama.cpp/ggml/include/ggml.h +272 -505
- package/src/llama.cpp/ggml/src/CMakeLists.txt +69 -1110
- package/src/llama.cpp/ggml/src/ggml-aarch64.c +52 -2116
- package/src/llama.cpp/ggml/src/ggml-aarch64.h +0 -20
- package/src/llama.cpp/ggml/src/ggml-alloc.c +29 -27
- package/src/llama.cpp/ggml/src/ggml-amx/CMakeLists.txt +107 -0
- package/src/llama.cpp/ggml/src/ggml-amx/common.h +94 -0
- package/src/llama.cpp/ggml/src/ggml-amx/ggml-amx.cpp +446 -0
- package/src/llama.cpp/ggml/src/ggml-amx/mmq.cpp +2510 -0
- package/src/llama.cpp/ggml/src/ggml-amx/mmq.h +17 -0
- package/src/llama.cpp/ggml/src/ggml-backend-impl.h +144 -81
- package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +195 -0
- package/src/llama.cpp/ggml/src/{ggml-backend.c → ggml-backend.cpp} +394 -635
- package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +91 -0
- package/src/llama.cpp/ggml/src/{ggml-blas.cpp → ggml-blas/ggml-blas.cpp} +217 -70
- package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +46 -0
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +4 -27
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +32 -4
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +179 -41
- package/src/llama.cpp/ggml/src/ggml-cann/common.h +1 -0
- package/src/llama.cpp/ggml/src/{ggml-cann.cpp → ggml-cann/ggml-cann.cpp} +458 -353
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +2 -1
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +2 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +278 -0
- package/src/llama.cpp/ggml/src/ggml-common.h +20 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +261 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.c +3560 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +30 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +371 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +10822 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.h +63 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +13970 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +663 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1885 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +155 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h +14 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +178 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +134 -0
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +106 -0
- package/src/llama.cpp/ggml/src/ggml-impl.h +380 -584
- package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +162 -0
- package/src/llama.cpp/ggml/src/{ggml-kompute.cpp → ggml-kompute/ggml-kompute.cpp} +233 -87
- package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +108 -0
- package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +249 -0
- package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +100 -0
- package/src/llama.cpp/ggml/src/ggml-opt.cpp +867 -0
- package/src/llama.cpp/ggml/src/ggml-quants.c +369 -9994
- package/src/llama.cpp/ggml/src/ggml-quants.h +78 -110
- package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +11 -0
- package/src/llama.cpp/ggml/src/{ggml-rpc.cpp → ggml-rpc/ggml-rpc.cpp} +560 -335
- package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +81 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +6 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +51 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +310 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +1 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/conv.cpp +99 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/conv.hpp +21 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +57 -57
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +106 -106
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +4 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +18 -25
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +1011 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +76 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +101 -0
- package/src/llama.cpp/ggml/src/{ggml-sycl.cpp → ggml-sycl/ggml-sycl.cpp} +3350 -3980
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +125 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +23 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +70 -68
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +9 -6
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +56 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +11 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +8 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +71 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.hpp +21 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +4 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +138 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.hpp +10 -0
- package/src/llama.cpp/ggml/src/ggml-threading.cpp +12 -0
- package/src/llama.cpp/ggml/src/ggml-threading.h +12 -0
- package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +78 -0
- package/src/llama.cpp/ggml/src/{ggml-vulkan.cpp → ggml-vulkan/ggml-vulkan.cpp} +2034 -1718
- package/src/llama.cpp/ggml/src/{vulkan-shaders → ggml-vulkan/vulkan-shaders}/CMakeLists.txt +2 -0
- package/src/llama.cpp/ggml/src/{vulkan-shaders → ggml-vulkan/vulkan-shaders}/vulkan-shaders-gen.cpp +152 -185
- package/src/llama.cpp/ggml/src/ggml.c +2075 -16579
- package/src/llama.cpp/include/llama.h +296 -285
- package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.out +46 -0
- package/src/llama.cpp/pocs/vdot/q8dot.cpp +4 -3
- package/src/llama.cpp/pocs/vdot/vdot.cpp +8 -7
- package/src/llama.cpp/requirements/requirements-convert_legacy_llama.txt +1 -1
- package/src/llama.cpp/src/CMakeLists.txt +2 -1
- package/src/llama.cpp/src/llama-grammar.cpp +721 -122
- package/src/llama.cpp/src/llama-grammar.h +120 -15
- package/src/llama.cpp/src/llama-impl.h +156 -1
- package/src/llama.cpp/src/llama-sampling.cpp +2058 -346
- package/src/llama.cpp/src/llama-sampling.h +39 -47
- package/src/llama.cpp/src/llama-vocab.cpp +390 -127
- package/src/llama.cpp/src/llama-vocab.h +60 -20
- package/src/llama.cpp/src/llama.cpp +6215 -3263
- package/src/llama.cpp/src/unicode-data.cpp +6 -4
- package/src/llama.cpp/src/unicode-data.h +4 -4
- package/src/llama.cpp/src/unicode.cpp +15 -7
- package/src/llama.cpp/tests/CMakeLists.txt +4 -2
- package/src/llama.cpp/tests/test-arg-parser.cpp +131 -0
- package/src/llama.cpp/tests/test-backend-ops.cpp +1725 -297
- package/src/llama.cpp/tests/test-barrier.cpp +94 -0
- package/src/llama.cpp/tests/test-chat-template.cpp +9 -5
- package/src/llama.cpp/tests/test-grammar-integration.cpp +23 -38
- package/src/llama.cpp/tests/test-grammar-parser.cpp +6 -4
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +23 -8
- package/src/llama.cpp/tests/test-llama-grammar.cpp +9 -8
- package/src/llama.cpp/tests/test-log.cpp +39 -0
- package/src/llama.cpp/tests/test-opt.cpp +853 -142
- package/src/llama.cpp/tests/test-quantize-fns.cpp +28 -19
- package/src/llama.cpp/tests/test-quantize-perf.cpp +16 -14
- package/src/llama.cpp/tests/test-rope.cpp +2 -1
- package/src/llama.cpp/tests/test-sampling.cpp +226 -142
- package/src/llama.cpp/tests/test-tokenizer-0.cpp +56 -36
- package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +5 -5
- package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +5 -5
- package/patches/llama.patch +0 -22
- package/src/llama.cpp/.github/workflows/bench.yml +0 -310
- package/src/llama.cpp/common/grammar-parser.cpp +0 -536
- package/src/llama.cpp/common/grammar-parser.h +0 -29
- package/src/llama.cpp/common/train.cpp +0 -1513
- package/src/llama.cpp/common/train.h +0 -233
- package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +0 -1640
- package/src/llama.cpp/examples/benchmark/CMakeLists.txt +0 -6
- package/src/llama.cpp/examples/benchmark/benchmark-matmult.cpp +0 -275
- package/src/llama.cpp/ggml/src/llamafile/sgemm.cpp +0 -1027
- package/src/llama.cpp/tests/test-grad0.cpp +0 -1566
- /package/src/llama.cpp/ggml/{cmake → src/ggml-cpu/cmake}/FindSIMD.cmake +0 -0
- /package/src/llama.cpp/ggml/src/{llamafile → ggml-cpu/llamafile}/sgemm.h +0 -0
package/CMakeLists.txt
CHANGED
|
@@ -62,14 +62,7 @@ if (VULKAN_SDK)
|
|
|
62
62
|
find_package(Vulkan REQUIRED)
|
|
63
63
|
endif()
|
|
64
64
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
add_custom_target(
|
|
68
|
-
patch ALL
|
|
69
|
-
COMMAND ${PATCH} -p1 -N < ${CMAKE_SOURCE_DIR}/patches/llama.patch || true
|
|
70
|
-
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/src/llama.cpp
|
|
71
|
-
COMMENT "Applying patches"
|
|
72
|
-
)
|
|
65
|
+
set(LLAMA_BUILD_COMMON ON CACHE BOOL "Build common")
|
|
73
66
|
|
|
74
67
|
set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libraries")
|
|
75
68
|
add_subdirectory("src/llama.cpp")
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/bin/win32/arm64/node.lib
CHANGED
|
Binary file
|
|
Binary file
|
package/bin/win32/x64/node.lib
CHANGED
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fugood/llama.node",
|
|
3
3
|
"access": "public",
|
|
4
|
-
"version": "0.3.
|
|
4
|
+
"version": "0.3.3",
|
|
5
5
|
"description": "Llama.cpp for Node.js",
|
|
6
6
|
"main": "lib/index.js",
|
|
7
7
|
"scripts": {
|
|
@@ -11,7 +11,8 @@
|
|
|
11
11
|
"build-native": "cmake-js compile",
|
|
12
12
|
"clean": "rimraf build",
|
|
13
13
|
"prepare": "husky",
|
|
14
|
-
"commitlint": "commitlint --edit"
|
|
14
|
+
"commitlint": "commitlint --edit",
|
|
15
|
+
"release": "release-it"
|
|
15
16
|
},
|
|
16
17
|
"repository": {
|
|
17
18
|
"type": "git",
|
|
@@ -55,6 +56,7 @@
|
|
|
55
56
|
"cmake-js": "^7.3.0",
|
|
56
57
|
"husky": "^9.0.11",
|
|
57
58
|
"jest": "^29.7.0",
|
|
59
|
+
"release-it": "^17.7.0",
|
|
58
60
|
"rimraf": "^6.0.1",
|
|
59
61
|
"typescript": "^5.4.5",
|
|
60
62
|
"wait-for-expect": "^3.0.2"
|
package/src/DetokenizeWorker.cpp
CHANGED
|
@@ -8,7 +8,7 @@ DetokenizeWorker::DetokenizeWorker(const Napi::CallbackInfo &info,
|
|
|
8
8
|
_tokens(std::move(tokens)) {}
|
|
9
9
|
|
|
10
10
|
void DetokenizeWorker::Execute() {
|
|
11
|
-
const auto text = ::
|
|
11
|
+
const auto text = ::common_detokenize(_sess->context(), _tokens);
|
|
12
12
|
_text = std::move(text);
|
|
13
13
|
}
|
|
14
14
|
|
package/src/EmbeddingWorker.cpp
CHANGED
|
@@ -7,7 +7,7 @@ EmbeddingWorker::EmbeddingWorker(const Napi::CallbackInfo &info,
|
|
|
7
7
|
|
|
8
8
|
void EmbeddingWorker::Execute() {
|
|
9
9
|
llama_kv_cache_clear(_sess->context());
|
|
10
|
-
auto tokens = ::
|
|
10
|
+
auto tokens = ::common_tokenize(_sess->context(), _text, true);
|
|
11
11
|
// add SEP if not present
|
|
12
12
|
if (tokens.empty() || tokens.back() != llama_token_sep(_sess->model())) {
|
|
13
13
|
tokens.push_back(llama_token_sep(_sess->model()));
|
|
@@ -16,7 +16,7 @@ void EmbeddingWorker::Execute() {
|
|
|
16
16
|
do {
|
|
17
17
|
int ret =
|
|
18
18
|
llama_decode(_sess->context(),
|
|
19
|
-
llama_batch_get_one(tokens.data(), tokens.size()
|
|
19
|
+
llama_batch_get_one(tokens.data(), tokens.size()));
|
|
20
20
|
if (ret < 0) {
|
|
21
21
|
SetError("Failed to inference, code: " + std::to_string(ret));
|
|
22
22
|
break;
|
|
@@ -34,7 +34,7 @@ size_t findStoppingStrings(const std::string &text,
|
|
|
34
34
|
|
|
35
35
|
LlamaCompletionWorker::LlamaCompletionWorker(
|
|
36
36
|
const Napi::CallbackInfo &info, LlamaSessionPtr &sess,
|
|
37
|
-
Napi::Function callback,
|
|
37
|
+
Napi::Function callback, common_params params,
|
|
38
38
|
std::vector<std::string> stop_words)
|
|
39
39
|
: AsyncWorker(info.Env()), Deferred(info.Env()), _sess(sess),
|
|
40
40
|
_params(params), _stop_words(stop_words) {
|
|
@@ -59,16 +59,16 @@ void LlamaCompletionWorker::Execute() {
|
|
|
59
59
|
size_t n_cur = 0;
|
|
60
60
|
size_t n_input = 0;
|
|
61
61
|
const auto model = _sess->model();
|
|
62
|
-
const bool add_bos =
|
|
62
|
+
const bool add_bos = llama_add_bos_token(model);
|
|
63
63
|
auto ctx = _sess->context();
|
|
64
64
|
|
|
65
|
-
|
|
65
|
+
auto sparams = llama_sampler_chain_default_params();
|
|
66
66
|
|
|
67
|
-
LlamaCppSampling sampling{
|
|
68
|
-
|
|
67
|
+
LlamaCppSampling sampling{common_sampler_init(model, _params.sparams),
|
|
68
|
+
common_sampler_free};
|
|
69
69
|
|
|
70
70
|
std::vector<llama_token> prompt_tokens =
|
|
71
|
-
::
|
|
71
|
+
::common_tokenize(ctx, _params.prompt, add_bos);
|
|
72
72
|
n_input = prompt_tokens.size();
|
|
73
73
|
if (_sess->tokens_ptr()->size() > 0) {
|
|
74
74
|
n_cur = common_part(*(_sess->tokens_ptr()), prompt_tokens);
|
|
@@ -102,18 +102,18 @@ void LlamaCompletionWorker::Execute() {
|
|
|
102
102
|
_result.truncated = true;
|
|
103
103
|
}
|
|
104
104
|
int ret = llama_decode(
|
|
105
|
-
ctx, llama_batch_get_one(embd->data() + n_cur, n_input
|
|
105
|
+
ctx, llama_batch_get_one(embd->data() + n_cur, n_input));
|
|
106
106
|
if (ret < 0) {
|
|
107
107
|
SetError("Failed to decode token, code: " + std::to_string(ret));
|
|
108
108
|
break;
|
|
109
109
|
}
|
|
110
110
|
// sample the next token
|
|
111
111
|
const llama_token new_token_id =
|
|
112
|
-
|
|
113
|
-
|
|
112
|
+
common_sampler_sample(sampling.get(), ctx, -1);
|
|
113
|
+
common_sampler_accept(sampling.get(), new_token_id, true);
|
|
114
114
|
// prepare the next batch
|
|
115
115
|
embd->emplace_back(new_token_id);
|
|
116
|
-
auto token =
|
|
116
|
+
auto token = common_token_to_piece(ctx, new_token_id);
|
|
117
117
|
_result.text += token;
|
|
118
118
|
n_cur += n_input;
|
|
119
119
|
_result.tokens_evaluated += n_input;
|
|
@@ -12,7 +12,7 @@ class LlamaCompletionWorker : public Napi::AsyncWorker,
|
|
|
12
12
|
public Napi::Promise::Deferred {
|
|
13
13
|
public:
|
|
14
14
|
LlamaCompletionWorker(const Napi::CallbackInfo &info, LlamaSessionPtr &sess,
|
|
15
|
-
Napi::Function callback,
|
|
15
|
+
Napi::Function callback, common_params params,
|
|
16
16
|
std::vector<std::string> stop_words = {});
|
|
17
17
|
|
|
18
18
|
~LlamaCompletionWorker();
|
|
@@ -28,7 +28,7 @@ protected:
|
|
|
28
28
|
|
|
29
29
|
private:
|
|
30
30
|
LlamaSessionPtr _sess;
|
|
31
|
-
|
|
31
|
+
common_params _params;
|
|
32
32
|
std::vector<std::string> _stop_words;
|
|
33
33
|
Napi::ThreadSafeFunction _tsfn;
|
|
34
34
|
bool _has_callback = false;
|
package/src/LlamaContext.cpp
CHANGED
|
@@ -7,8 +7,8 @@
|
|
|
7
7
|
#include "SaveSessionWorker.h"
|
|
8
8
|
#include "TokenizeWorker.h"
|
|
9
9
|
|
|
10
|
-
std::vector<
|
|
11
|
-
std::vector<
|
|
10
|
+
std::vector<common_chat_msg> get_messages(Napi::Array messages) {
|
|
11
|
+
std::vector<common_chat_msg> chat;
|
|
12
12
|
for (size_t i = 0; i < messages.Length(); i++) {
|
|
13
13
|
auto message = messages.Get(i).As<Napi::Object>();
|
|
14
14
|
chat.push_back({
|
|
@@ -67,7 +67,7 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
|
|
|
67
67
|
}
|
|
68
68
|
auto options = info[0].As<Napi::Object>();
|
|
69
69
|
|
|
70
|
-
|
|
70
|
+
common_params params;
|
|
71
71
|
params.model = get_option<std::string>(options, "model", "");
|
|
72
72
|
if (params.model.empty()) {
|
|
73
73
|
Napi::TypeError::New(env, "Model is required").ThrowAsJavaScriptException();
|
|
@@ -75,7 +75,7 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
|
|
|
75
75
|
params.embedding = get_option<bool>(options, "embedding", false);
|
|
76
76
|
params.n_ctx = get_option<int32_t>(options, "n_ctx", 512);
|
|
77
77
|
params.n_batch = get_option<int32_t>(options, "n_batch", 2048);
|
|
78
|
-
params.n_threads =
|
|
78
|
+
params.cpuparams.n_threads =
|
|
79
79
|
get_option<int32_t>(options, "n_threads", cpu_get_num_math() / 2);
|
|
80
80
|
params.n_gpu_layers = get_option<int32_t>(options, "n_gpu_layers", -1);
|
|
81
81
|
params.use_mlock = get_option<bool>(options, "use_mlock", false);
|
|
@@ -86,17 +86,15 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
|
|
|
86
86
|
llama_backend_init();
|
|
87
87
|
llama_numa_init(params.numa);
|
|
88
88
|
|
|
89
|
-
|
|
90
|
-
llama_context *ctx;
|
|
91
|
-
std::tie(model, ctx) = llama_init_from_gpt_params(params);
|
|
89
|
+
auto result = common_init_from_params(params);
|
|
92
90
|
|
|
93
|
-
if (model == nullptr ||
|
|
91
|
+
if (result.model == nullptr || result.context == nullptr) {
|
|
94
92
|
Napi::TypeError::New(env, "Failed to load model")
|
|
95
93
|
.ThrowAsJavaScriptException();
|
|
96
94
|
}
|
|
97
95
|
|
|
98
|
-
_sess = std::make_shared<LlamaSession>(model,
|
|
99
|
-
_info =
|
|
96
|
+
_sess = std::make_shared<LlamaSession>(result.model, result.context, params);
|
|
97
|
+
_info = common_params_get_system_info(params);
|
|
100
98
|
}
|
|
101
99
|
|
|
102
100
|
// getSystemInfo(): string
|
|
@@ -111,7 +109,7 @@ Napi::Value LlamaContext::GetFormattedChat(const Napi::CallbackInfo &info) {
|
|
|
111
109
|
Napi::TypeError::New(env, "Array expected").ThrowAsJavaScriptException();
|
|
112
110
|
}
|
|
113
111
|
auto messages = info[0].As<Napi::Array>();
|
|
114
|
-
auto formatted =
|
|
112
|
+
auto formatted = common_chat_apply_template(_sess->model(), "", get_messages(messages), true);
|
|
115
113
|
return Napi::String::New(env, formatted);
|
|
116
114
|
}
|
|
117
115
|
|
|
@@ -135,10 +133,10 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
|
|
|
135
133
|
}
|
|
136
134
|
auto options = info[0].As<Napi::Object>();
|
|
137
135
|
|
|
138
|
-
|
|
136
|
+
common_params params = _sess->params();
|
|
139
137
|
if (options.Has("messages") && options.Get("messages").IsArray()) {
|
|
140
138
|
auto messages = options.Get("messages").As<Napi::Array>();
|
|
141
|
-
auto formatted =
|
|
139
|
+
auto formatted = common_chat_apply_template(_sess->model(), "", get_messages(messages), true);
|
|
142
140
|
params.prompt = formatted;
|
|
143
141
|
} else {
|
|
144
142
|
params.prompt = get_option<std::string>(options, "prompt", "");
|
|
@@ -152,7 +150,6 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
|
|
|
152
150
|
params.sparams.top_k = get_option<int32_t>(options, "top_k", 40);
|
|
153
151
|
params.sparams.top_p = get_option<float>(options, "top_p", 0.95f);
|
|
154
152
|
params.sparams.min_p = get_option<float>(options, "min_p", 0.05f);
|
|
155
|
-
params.sparams.tfs_z = get_option<float>(options, "tfs_z", 1.00f);
|
|
156
153
|
params.sparams.mirostat = get_option<int32_t>(options, "mirostat", 0.00f);
|
|
157
154
|
params.sparams.mirostat_tau =
|
|
158
155
|
get_option<float>(options, "mirostat_tau", 5.00f);
|
|
@@ -167,11 +164,11 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
|
|
|
167
164
|
params.sparams.penalty_present =
|
|
168
165
|
get_option<float>(options, "penalty_present", 0.00f);
|
|
169
166
|
params.sparams.penalize_nl = get_option<bool>(options, "penalize_nl", false);
|
|
170
|
-
params.sparams.
|
|
171
|
-
params.ignore_eos = get_option<float>(options, "ignore_eos", false);
|
|
167
|
+
params.sparams.typ_p = get_option<float>(options, "typical_p", 1.00f);
|
|
168
|
+
params.sparams.ignore_eos = get_option<float>(options, "ignore_eos", false);
|
|
172
169
|
params.sparams.grammar = get_option<std::string>(options, "grammar", "");
|
|
173
170
|
params.n_keep = get_option<int32_t>(options, "n_keep", 0);
|
|
174
|
-
params.seed = get_option<int32_t>(options, "seed", LLAMA_DEFAULT_SEED);
|
|
171
|
+
params.sparams.seed = get_option<int32_t>(options, "seed", LLAMA_DEFAULT_SEED);
|
|
175
172
|
std::vector<std::string> stop_words;
|
|
176
173
|
if (options.Has("stop") && options.Get("stop").IsArray()) {
|
|
177
174
|
auto stop_words_array = options.Get("stop").As<Napi::Array>();
|
package/src/TokenizeWorker.cpp
CHANGED
|
@@ -6,7 +6,7 @@ TokenizeWorker::TokenizeWorker(const Napi::CallbackInfo &info,
|
|
|
6
6
|
: AsyncWorker(info.Env()), Deferred(info.Env()), _sess(sess), _text(text) {}
|
|
7
7
|
|
|
8
8
|
void TokenizeWorker::Execute() {
|
|
9
|
-
const auto tokens = ::
|
|
9
|
+
const auto tokens = ::common_tokenize(_sess->context(), _text, false);
|
|
10
10
|
_result.tokens = std::move(tokens);
|
|
11
11
|
}
|
|
12
12
|
|
package/src/common.hpp
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
#pragma once
|
|
2
2
|
|
|
3
3
|
#include "common/common.h"
|
|
4
|
+
#include "common/sampling.h"
|
|
4
5
|
#include "llama.h"
|
|
5
6
|
#include <memory>
|
|
6
7
|
#include <mutex>
|
|
@@ -12,7 +13,7 @@
|
|
|
12
13
|
|
|
13
14
|
typedef std::unique_ptr<llama_model, decltype(&llama_free_model)> LlamaCppModel;
|
|
14
15
|
typedef std::unique_ptr<llama_context, decltype(&llama_free)> LlamaCppContext;
|
|
15
|
-
typedef std::unique_ptr<
|
|
16
|
+
typedef std::unique_ptr<common_sampler, decltype(&common_sampler_free)>
|
|
16
17
|
LlamaCppSampling;
|
|
17
18
|
typedef std::unique_ptr<llama_batch, decltype(&llama_batch_free)> LlamaCppBatch;
|
|
18
19
|
|
|
@@ -46,7 +47,7 @@ constexpr T get_option(const Napi::Object &options, const std::string &name,
|
|
|
46
47
|
|
|
47
48
|
class LlamaSession {
|
|
48
49
|
public:
|
|
49
|
-
LlamaSession(llama_model *model, llama_context *ctx,
|
|
50
|
+
LlamaSession(llama_model *model, llama_context *ctx, common_params params)
|
|
50
51
|
: model_(LlamaCppModel(model, llama_free_model)),
|
|
51
52
|
ctx_(LlamaCppContext(ctx, llama_free)), params_(params) {
|
|
52
53
|
tokens_.reserve(params.n_ctx);
|
|
@@ -64,7 +65,7 @@ public:
|
|
|
64
65
|
tokens_ = std::move(tokens);
|
|
65
66
|
}
|
|
66
67
|
|
|
67
|
-
inline const
|
|
68
|
+
inline const common_params ¶ms() const { return params_; }
|
|
68
69
|
|
|
69
70
|
inline std::mutex &get_mutex() { return mutex; }
|
|
70
71
|
|
|
@@ -78,7 +79,7 @@ public:
|
|
|
78
79
|
private:
|
|
79
80
|
LlamaCppModel model_;
|
|
80
81
|
LlamaCppContext ctx_;
|
|
81
|
-
const
|
|
82
|
+
const common_params params_;
|
|
82
83
|
std::vector<llama_token> tokens_{};
|
|
83
84
|
std::mutex mutex;
|
|
84
85
|
};
|