@fugood/llama.node 0.3.2 → 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +7 -0
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +18 -1
- package/package.json +1 -1
- package/src/DetokenizeWorker.cpp +1 -1
- package/src/EmbeddingWorker.cpp +17 -7
- package/src/EmbeddingWorker.h +2 -1
- package/src/LlamaCompletionWorker.cpp +8 -8
- package/src/LlamaCompletionWorker.h +2 -2
- package/src/LlamaContext.cpp +89 -27
- package/src/LlamaContext.h +2 -0
- package/src/TokenizeWorker.cpp +1 -1
- package/src/common.hpp +4 -4
- package/src/llama.cpp/.github/workflows/build.yml +240 -168
- package/src/llama.cpp/.github/workflows/docker.yml +8 -8
- package/src/llama.cpp/.github/workflows/python-lint.yml +8 -1
- package/src/llama.cpp/.github/workflows/server.yml +21 -14
- package/src/llama.cpp/CMakeLists.txt +14 -6
- package/src/llama.cpp/Sources/llama/llama.h +4 -0
- package/src/llama.cpp/cmake/arm64-apple-clang.cmake +16 -0
- package/src/llama.cpp/cmake/common.cmake +33 -0
- package/src/llama.cpp/cmake/x64-windows-llvm.cmake +11 -0
- package/src/llama.cpp/common/CMakeLists.txt +6 -4
- package/src/llama.cpp/common/arg.cpp +986 -770
- package/src/llama.cpp/common/arg.h +22 -22
- package/src/llama.cpp/common/common.cpp +212 -351
- package/src/llama.cpp/common/common.h +204 -117
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +1 -1
- package/src/llama.cpp/common/log.cpp +50 -50
- package/src/llama.cpp/common/log.h +18 -18
- package/src/llama.cpp/common/ngram-cache.cpp +36 -36
- package/src/llama.cpp/common/ngram-cache.h +19 -19
- package/src/llama.cpp/common/sampling.cpp +163 -121
- package/src/llama.cpp/common/sampling.h +41 -20
- package/src/llama.cpp/common/speculative.cpp +274 -0
- package/src/llama.cpp/common/speculative.h +28 -0
- package/src/llama.cpp/docs/build.md +134 -161
- package/src/llama.cpp/examples/CMakeLists.txt +33 -14
- package/src/llama.cpp/examples/batched/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/batched/batched.cpp +19 -18
- package/src/llama.cpp/examples/batched-bench/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +10 -11
- package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +1 -1
- package/src/llama.cpp/examples/cvector-generator/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +9 -9
- package/src/llama.cpp/examples/deprecation-warning/deprecation-warning.cpp +1 -1
- package/src/llama.cpp/examples/embedding/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/embedding/embedding.cpp +12 -12
- package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +3 -2
- package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +8 -8
- package/src/llama.cpp/examples/export-lora/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/export-lora/export-lora.cpp +5 -5
- package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +4 -7
- package/src/llama.cpp/examples/gen-docs/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gen-docs/gen-docs.cpp +7 -7
- package/src/llama.cpp/examples/gguf/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gguf-hash/CMakeLists.txt +8 -1
- package/src/llama.cpp/examples/gguf-split/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +2 -2
- package/src/llama.cpp/examples/gritlm/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +18 -18
- package/src/llama.cpp/examples/imatrix/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +31 -13
- package/src/llama.cpp/examples/infill/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/infill/infill.cpp +41 -87
- package/src/llama.cpp/examples/llama-bench/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +439 -459
- package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +2 -0
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +11 -14
- package/src/llama.cpp/examples/llava/CMakeLists.txt +10 -3
- package/src/llama.cpp/examples/llava/clip.cpp +263 -66
- package/src/llama.cpp/examples/llava/clip.h +8 -2
- package/src/llama.cpp/examples/llava/llava-cli.cpp +23 -23
- package/src/llama.cpp/examples/llava/llava.cpp +83 -22
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +21 -21
- package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +581 -0
- package/src/llama.cpp/examples/lookahead/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +26 -26
- package/src/llama.cpp/examples/lookup/CMakeLists.txt +4 -4
- package/src/llama.cpp/examples/lookup/lookup-create.cpp +7 -7
- package/src/llama.cpp/examples/lookup/lookup-merge.cpp +4 -4
- package/src/llama.cpp/examples/lookup/lookup-stats.cpp +16 -15
- package/src/llama.cpp/examples/lookup/lookup.cpp +30 -30
- package/src/llama.cpp/examples/main/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/main/main.cpp +73 -114
- package/src/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/parallel/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/parallel/parallel.cpp +18 -19
- package/src/llama.cpp/examples/passkey/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/passkey/passkey.cpp +14 -14
- package/src/llama.cpp/examples/perplexity/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/perplexity/perplexity.cpp +99 -120
- package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/quantize/quantize.cpp +0 -3
- package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +10 -9
- package/src/llama.cpp/examples/retrieval/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +16 -16
- package/src/llama.cpp/examples/rpc/rpc-server.cpp +3 -1
- package/src/llama.cpp/examples/run/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/run/run.cpp +911 -0
- package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +38 -21
- package/src/llama.cpp/examples/server/CMakeLists.txt +3 -16
- package/src/llama.cpp/examples/server/server.cpp +2073 -1339
- package/src/llama.cpp/examples/server/tests/requirements.txt +2 -2
- package/src/llama.cpp/examples/server/utils.hpp +354 -277
- package/src/llama.cpp/examples/simple/CMakeLists.txt +2 -2
- package/src/llama.cpp/examples/simple/simple.cpp +130 -94
- package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +200 -0
- package/src/llama.cpp/examples/speculative/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/speculative/speculative.cpp +68 -64
- package/src/llama.cpp/examples/speculative-simple/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +265 -0
- package/src/llama.cpp/examples/tokenize/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/tokenize/tokenize.cpp +3 -3
- package/src/llama.cpp/examples/tts/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/tts/tts.cpp +932 -0
- package/src/llama.cpp/ggml/CMakeLists.txt +54 -36
- package/src/llama.cpp/ggml/include/ggml-backend.h +63 -34
- package/src/llama.cpp/ggml/include/ggml-blas.h +5 -3
- package/src/llama.cpp/ggml/include/ggml-cann.h +9 -7
- package/src/llama.cpp/ggml/include/ggml-cpp.h +38 -0
- package/src/llama.cpp/ggml/include/ggml-cpu.h +135 -0
- package/src/llama.cpp/ggml/include/ggml-cuda.h +12 -12
- package/src/llama.cpp/ggml/include/ggml-kompute.h +7 -3
- package/src/llama.cpp/ggml/include/ggml-metal.h +11 -7
- package/src/llama.cpp/ggml/include/ggml-opencl.h +26 -0
- package/src/llama.cpp/ggml/include/ggml-opt.h +216 -0
- package/src/llama.cpp/ggml/include/ggml-rpc.h +9 -5
- package/src/llama.cpp/ggml/include/ggml-sycl.h +18 -11
- package/src/llama.cpp/ggml/include/ggml-vulkan.h +10 -8
- package/src/llama.cpp/ggml/include/ggml.h +159 -417
- package/src/llama.cpp/ggml/src/CMakeLists.txt +121 -1155
- package/src/llama.cpp/ggml/src/ggml-alloc.c +23 -28
- package/src/llama.cpp/ggml/src/ggml-backend-impl.h +57 -36
- package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +552 -0
- package/src/llama.cpp/ggml/src/ggml-backend.cpp +306 -867
- package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +87 -0
- package/src/llama.cpp/ggml/src/{ggml-blas.cpp → ggml-blas/ggml-blas.cpp} +216 -65
- package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +76 -0
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +456 -111
- package/src/llama.cpp/ggml/src/ggml-cann/common.h +6 -3
- package/src/llama.cpp/ggml/src/{ggml-cann.cpp → ggml-cann/ggml-cann.cpp} +343 -177
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +2 -5
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +22 -9
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +24 -13
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +23 -13
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +11 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +10 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +10 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +17 -0
- package/src/llama.cpp/ggml/src/ggml-common.h +42 -42
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +336 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +220 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.h +8 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/common.h +91 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/mmq.cpp +2511 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/mmq.h +10 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +323 -0
- package/src/llama.cpp/ggml/src/{ggml-aarch64.c → ggml-cpu/ggml-cpu-aarch64.cpp} +1299 -246
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +8 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp +55 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.h +8 -0
- package/src/llama.cpp/ggml/src/{ggml-cpu-impl.h → ggml-cpu/ggml-cpu-impl.h} +14 -242
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +10835 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.h +63 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-traits.cpp +36 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-traits.h +38 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +14123 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +628 -0
- package/src/llama.cpp/ggml/src/{llamafile → ggml-cpu/llamafile}/sgemm.cpp +666 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +152 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +8 -0
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +104 -0
- package/src/llama.cpp/ggml/src/ggml-impl.h +393 -22
- package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +166 -0
- package/src/llama.cpp/ggml/src/{ggml-kompute.cpp → ggml-kompute/ggml-kompute.cpp} +360 -127
- package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +105 -0
- package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +288 -0
- package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +107 -0
- package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +147 -0
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +4004 -0
- package/src/llama.cpp/ggml/src/ggml-opt.cpp +854 -0
- package/src/llama.cpp/ggml/src/ggml-quants.c +188 -10702
- package/src/llama.cpp/ggml/src/ggml-quants.h +78 -125
- package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +9 -0
- package/src/llama.cpp/ggml/src/{ggml-rpc.cpp → ggml-rpc/ggml-rpc.cpp} +478 -300
- package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +84 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +3 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +36 -5
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +259 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +3 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +5 -5
- package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +34 -35
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +1030 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +76 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +4 -4
- package/src/llama.cpp/ggml/src/{ggml-sycl.cpp → ggml-sycl/ggml-sycl.cpp} +3638 -4151
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +3 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +6 -6
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +75 -87
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +7 -6
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +56 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +11 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +6 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +4 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +7 -7
- package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +1 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +4 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +141 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.hpp +10 -0
- package/src/llama.cpp/ggml/src/ggml-threading.cpp +12 -0
- package/src/llama.cpp/ggml/src/ggml-threading.h +14 -0
- package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +92 -0
- package/src/llama.cpp/ggml/src/{ggml-vulkan.cpp → ggml-vulkan/ggml-vulkan.cpp} +2138 -887
- package/src/llama.cpp/ggml/src/{vulkan-shaders → ggml-vulkan/vulkan-shaders}/CMakeLists.txt +3 -1
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +593 -0
- package/src/llama.cpp/ggml/src/ggml.c +4427 -20125
- package/src/llama.cpp/include/llama-cpp.h +25 -0
- package/src/llama.cpp/include/llama.h +93 -52
- package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +46 -0
- package/src/llama.cpp/pocs/CMakeLists.txt +3 -1
- package/src/llama.cpp/pocs/vdot/CMakeLists.txt +2 -2
- package/src/llama.cpp/pocs/vdot/q8dot.cpp +4 -3
- package/src/llama.cpp/pocs/vdot/vdot.cpp +8 -7
- package/src/llama.cpp/src/CMakeLists.txt +4 -8
- package/src/llama.cpp/src/llama-grammar.cpp +15 -15
- package/src/llama.cpp/src/llama-grammar.h +2 -5
- package/src/llama.cpp/src/llama-sampling.cpp +779 -194
- package/src/llama.cpp/src/llama-sampling.h +21 -2
- package/src/llama.cpp/src/llama-vocab.cpp +55 -10
- package/src/llama.cpp/src/llama-vocab.h +35 -11
- package/src/llama.cpp/src/llama.cpp +4317 -2979
- package/src/llama.cpp/src/unicode-data.cpp +2 -2
- package/src/llama.cpp/src/unicode.cpp +62 -51
- package/src/llama.cpp/src/unicode.h +9 -10
- package/src/llama.cpp/tests/CMakeLists.txt +48 -38
- package/src/llama.cpp/tests/test-arg-parser.cpp +15 -15
- package/src/llama.cpp/tests/test-backend-ops.cpp +324 -80
- package/src/llama.cpp/tests/test-barrier.cpp +1 -0
- package/src/llama.cpp/tests/test-chat-template.cpp +59 -9
- package/src/llama.cpp/tests/test-gguf.cpp +1303 -0
- package/src/llama.cpp/tests/test-grammar-integration.cpp +3 -6
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +17 -4
- package/src/llama.cpp/tests/test-llama-grammar.cpp +2 -4
- package/src/llama.cpp/tests/test-log.cpp +2 -2
- package/src/llama.cpp/tests/test-opt.cpp +853 -142
- package/src/llama.cpp/tests/test-quantize-fns.cpp +24 -21
- package/src/llama.cpp/tests/test-quantize-perf.cpp +16 -14
- package/src/llama.cpp/tests/test-rope.cpp +62 -20
- package/src/llama.cpp/tests/test-sampling.cpp +163 -138
- package/src/llama.cpp/tests/test-tokenizer-0.cpp +7 -7
- package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +5 -5
- package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +5 -5
- package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +0 -72
- package/src/llama.cpp/.github/workflows/nix-ci.yml +0 -79
- package/src/llama.cpp/.github/workflows/nix-flake-update.yml +0 -22
- package/src/llama.cpp/.github/workflows/nix-publish-flake.yml +0 -36
- package/src/llama.cpp/common/train.cpp +0 -1515
- package/src/llama.cpp/common/train.h +0 -233
- package/src/llama.cpp/examples/baby-llama/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +0 -1639
- package/src/llama.cpp/ggml/src/ggml-aarch64.h +0 -39
- package/src/llama.cpp/ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp +0 -600
- package/src/llama.cpp/tests/test-grad0.cpp +0 -1683
- /package/src/llama.cpp/ggml/{cmake → src/ggml-cpu/cmake}/FindSIMD.cmake +0 -0
- /package/src/llama.cpp/ggml/src/{llamafile → ggml-cpu/llamafile}/sgemm.h +0 -0
package/CMakeLists.txt
CHANGED
|
@@ -6,6 +6,11 @@ project (llama-node)
|
|
|
6
6
|
|
|
7
7
|
set(CMAKE_CXX_STANDARD 17)
|
|
8
8
|
|
|
9
|
+
execute_process(COMMAND
|
|
10
|
+
git apply ${CMAKE_CURRENT_SOURCE_DIR}/scripts/ggml-cpu-CMakeLists.txt.patch
|
|
11
|
+
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
|
|
12
|
+
)
|
|
13
|
+
|
|
9
14
|
if(NOT DEFINED napi_build_version)
|
|
10
15
|
set(napi_build_version 6)
|
|
11
16
|
endif()
|
|
@@ -62,6 +67,8 @@ if (VULKAN_SDK)
|
|
|
62
67
|
find_package(Vulkan REQUIRED)
|
|
63
68
|
endif()
|
|
64
69
|
|
|
70
|
+
set(LLAMA_BUILD_COMMON ON CACHE BOOL "Build common")
|
|
71
|
+
|
|
65
72
|
set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libraries")
|
|
66
73
|
add_subdirectory("src/llama.cpp")
|
|
67
74
|
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/bin/win32/arm64/node.lib
CHANGED
|
Binary file
|
|
Binary file
|
package/bin/win32/x64/node.lib
CHANGED
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/lib/binding.ts
CHANGED
|
@@ -8,6 +8,8 @@ export type ChatMessage = {
|
|
|
8
8
|
export type LlamaModelOptions = {
|
|
9
9
|
model: string
|
|
10
10
|
embedding?: boolean
|
|
11
|
+
embd_normalize?: number
|
|
12
|
+
pooling_type?: number
|
|
11
13
|
n_ctx?: number
|
|
12
14
|
n_batch?: number
|
|
13
15
|
n_threads?: number
|
|
@@ -23,7 +25,21 @@ export type LlamaCompletionOptions = {
|
|
|
23
25
|
temperature?: number
|
|
24
26
|
top_k?: number
|
|
25
27
|
top_p?: number
|
|
26
|
-
|
|
28
|
+
min_p?: number
|
|
29
|
+
mirostat?: number
|
|
30
|
+
mirostat_tau?: number
|
|
31
|
+
mirostat_eta?: number
|
|
32
|
+
penalty_last_n?: number
|
|
33
|
+
penalty_repeat?: number
|
|
34
|
+
penalty_freq?: number
|
|
35
|
+
penalty_present?: number
|
|
36
|
+
typ_p?: number
|
|
37
|
+
xtc_threshold?: number
|
|
38
|
+
xtc_probability?: number
|
|
39
|
+
dry_multiplier?: number
|
|
40
|
+
dry_base?: number
|
|
41
|
+
dry_allowed_length?: number
|
|
42
|
+
dry_penalty_last_n?: number
|
|
27
43
|
n_predict?: number
|
|
28
44
|
max_length?: number
|
|
29
45
|
max_tokens?: number
|
|
@@ -54,6 +70,7 @@ export type EmbeddingResult = {
|
|
|
54
70
|
export interface LlamaContext {
|
|
55
71
|
new (options: LlamaModelOptions): LlamaContext
|
|
56
72
|
getSystemInfo(): string
|
|
73
|
+
getModelInfo(): object
|
|
57
74
|
getFormattedChat(messages: ChatMessage[]): string
|
|
58
75
|
completion(options: LlamaCompletionOptions, callback?: (token: LlamaCompletionToken) => void): Promise<LlamaCompletionResult>
|
|
59
76
|
stopCompletion(): void
|
package/package.json
CHANGED
package/src/DetokenizeWorker.cpp
CHANGED
|
@@ -8,7 +8,7 @@ DetokenizeWorker::DetokenizeWorker(const Napi::CallbackInfo &info,
|
|
|
8
8
|
_tokens(std::move(tokens)) {}
|
|
9
9
|
|
|
10
10
|
void DetokenizeWorker::Execute() {
|
|
11
|
-
const auto text = ::
|
|
11
|
+
const auto text = ::common_detokenize(_sess->context(), _tokens);
|
|
12
12
|
_text = std::move(text);
|
|
13
13
|
}
|
|
14
14
|
|
package/src/EmbeddingWorker.cpp
CHANGED
|
@@ -2,32 +2,42 @@
|
|
|
2
2
|
#include "LlamaContext.h"
|
|
3
3
|
|
|
4
4
|
EmbeddingWorker::EmbeddingWorker(const Napi::CallbackInfo &info,
|
|
5
|
-
LlamaSessionPtr &sess, std::string text)
|
|
6
|
-
: AsyncWorker(info.Env()), Deferred(info.Env()), _sess(sess), _text(text) {}
|
|
5
|
+
LlamaSessionPtr &sess, std::string text, common_params ¶ms)
|
|
6
|
+
: AsyncWorker(info.Env()), Deferred(info.Env()), _sess(sess), _text(text), _params(params) {}
|
|
7
7
|
|
|
8
8
|
void EmbeddingWorker::Execute() {
|
|
9
9
|
llama_kv_cache_clear(_sess->context());
|
|
10
|
-
auto tokens = ::
|
|
10
|
+
auto tokens = ::common_tokenize(_sess->context(), _text, true);
|
|
11
11
|
// add SEP if not present
|
|
12
12
|
if (tokens.empty() || tokens.back() != llama_token_sep(_sess->model())) {
|
|
13
13
|
tokens.push_back(llama_token_sep(_sess->model()));
|
|
14
14
|
}
|
|
15
15
|
const int n_embd = llama_n_embd(_sess->model());
|
|
16
16
|
do {
|
|
17
|
+
auto ctx = _sess->context();
|
|
17
18
|
int ret =
|
|
18
|
-
llama_decode(
|
|
19
|
-
llama_batch_get_one(tokens.data(), tokens.size()
|
|
19
|
+
llama_decode(ctx,
|
|
20
|
+
llama_batch_get_one(tokens.data(), tokens.size()));
|
|
20
21
|
if (ret < 0) {
|
|
21
22
|
SetError("Failed to inference, code: " + std::to_string(ret));
|
|
22
23
|
break;
|
|
23
24
|
}
|
|
24
|
-
|
|
25
|
+
|
|
26
|
+
float *embd;
|
|
27
|
+
const enum llama_pooling_type pooling_type = llama_pooling_type(ctx);
|
|
28
|
+
if (pooling_type == LLAMA_POOLING_TYPE_NONE) {
|
|
29
|
+
embd = llama_get_embeddings(ctx);
|
|
30
|
+
} else {
|
|
31
|
+
embd = llama_get_embeddings_seq(ctx, 0);
|
|
32
|
+
}
|
|
25
33
|
if (embd == nullptr) {
|
|
26
34
|
SetError("Failed to get embeddings");
|
|
27
35
|
break;
|
|
28
36
|
}
|
|
29
37
|
_result.embedding.resize(n_embd);
|
|
30
|
-
|
|
38
|
+
std::vector<float> embedding(embd, embd + n_embd), out(embd, embd + n_embd);
|
|
39
|
+
common_embd_normalize(embedding.data(), out.data(), n_embd, _params.embd_normalize);
|
|
40
|
+
memcpy(_result.embedding.data(), out.data(), n_embd * sizeof(float));
|
|
31
41
|
} while (false);
|
|
32
42
|
}
|
|
33
43
|
|
package/src/EmbeddingWorker.h
CHANGED
|
@@ -9,7 +9,7 @@ class EmbeddingWorker : public Napi::AsyncWorker,
|
|
|
9
9
|
public Napi::Promise::Deferred {
|
|
10
10
|
public:
|
|
11
11
|
EmbeddingWorker(const Napi::CallbackInfo &info, LlamaSessionPtr &sess,
|
|
12
|
-
std::string text);
|
|
12
|
+
std::string text, common_params ¶ms);
|
|
13
13
|
|
|
14
14
|
protected:
|
|
15
15
|
void Execute();
|
|
@@ -19,5 +19,6 @@ protected:
|
|
|
19
19
|
private:
|
|
20
20
|
LlamaSessionPtr _sess;
|
|
21
21
|
std::string _text;
|
|
22
|
+
common_params _params;
|
|
22
23
|
EmbeddingResult _result;
|
|
23
24
|
};
|
|
@@ -34,7 +34,7 @@ size_t findStoppingStrings(const std::string &text,
|
|
|
34
34
|
|
|
35
35
|
LlamaCompletionWorker::LlamaCompletionWorker(
|
|
36
36
|
const Napi::CallbackInfo &info, LlamaSessionPtr &sess,
|
|
37
|
-
Napi::Function callback,
|
|
37
|
+
Napi::Function callback, common_params params,
|
|
38
38
|
std::vector<std::string> stop_words)
|
|
39
39
|
: AsyncWorker(info.Env()), Deferred(info.Env()), _sess(sess),
|
|
40
40
|
_params(params), _stop_words(stop_words) {
|
|
@@ -64,11 +64,11 @@ void LlamaCompletionWorker::Execute() {
|
|
|
64
64
|
|
|
65
65
|
auto sparams = llama_sampler_chain_default_params();
|
|
66
66
|
|
|
67
|
-
LlamaCppSampling sampling{
|
|
68
|
-
|
|
67
|
+
LlamaCppSampling sampling{common_sampler_init(model, _params.sampling),
|
|
68
|
+
common_sampler_free};
|
|
69
69
|
|
|
70
70
|
std::vector<llama_token> prompt_tokens =
|
|
71
|
-
::
|
|
71
|
+
::common_tokenize(ctx, _params.prompt, add_bos);
|
|
72
72
|
n_input = prompt_tokens.size();
|
|
73
73
|
if (_sess->tokens_ptr()->size() > 0) {
|
|
74
74
|
n_cur = common_part(*(_sess->tokens_ptr()), prompt_tokens);
|
|
@@ -102,18 +102,18 @@ void LlamaCompletionWorker::Execute() {
|
|
|
102
102
|
_result.truncated = true;
|
|
103
103
|
}
|
|
104
104
|
int ret = llama_decode(
|
|
105
|
-
ctx, llama_batch_get_one(embd->data() + n_cur, n_input
|
|
105
|
+
ctx, llama_batch_get_one(embd->data() + n_cur, n_input));
|
|
106
106
|
if (ret < 0) {
|
|
107
107
|
SetError("Failed to decode token, code: " + std::to_string(ret));
|
|
108
108
|
break;
|
|
109
109
|
}
|
|
110
110
|
// sample the next token
|
|
111
111
|
const llama_token new_token_id =
|
|
112
|
-
|
|
113
|
-
|
|
112
|
+
common_sampler_sample(sampling.get(), ctx, -1);
|
|
113
|
+
common_sampler_accept(sampling.get(), new_token_id, true);
|
|
114
114
|
// prepare the next batch
|
|
115
115
|
embd->emplace_back(new_token_id);
|
|
116
|
-
auto token =
|
|
116
|
+
auto token = common_token_to_piece(ctx, new_token_id);
|
|
117
117
|
_result.text += token;
|
|
118
118
|
n_cur += n_input;
|
|
119
119
|
_result.tokens_evaluated += n_input;
|
|
@@ -12,7 +12,7 @@ class LlamaCompletionWorker : public Napi::AsyncWorker,
|
|
|
12
12
|
public Napi::Promise::Deferred {
|
|
13
13
|
public:
|
|
14
14
|
LlamaCompletionWorker(const Napi::CallbackInfo &info, LlamaSessionPtr &sess,
|
|
15
|
-
Napi::Function callback,
|
|
15
|
+
Napi::Function callback, common_params params,
|
|
16
16
|
std::vector<std::string> stop_words = {});
|
|
17
17
|
|
|
18
18
|
~LlamaCompletionWorker();
|
|
@@ -28,7 +28,7 @@ protected:
|
|
|
28
28
|
|
|
29
29
|
private:
|
|
30
30
|
LlamaSessionPtr _sess;
|
|
31
|
-
|
|
31
|
+
common_params _params;
|
|
32
32
|
std::vector<std::string> _stop_words;
|
|
33
33
|
Napi::ThreadSafeFunction _tsfn;
|
|
34
34
|
bool _has_callback = false;
|
package/src/LlamaContext.cpp
CHANGED
|
@@ -7,8 +7,8 @@
|
|
|
7
7
|
#include "SaveSessionWorker.h"
|
|
8
8
|
#include "TokenizeWorker.h"
|
|
9
9
|
|
|
10
|
-
std::vector<
|
|
11
|
-
std::vector<
|
|
10
|
+
std::vector<common_chat_msg> get_messages(Napi::Array messages) {
|
|
11
|
+
std::vector<common_chat_msg> chat;
|
|
12
12
|
for (size_t i = 0; i < messages.Length(); i++) {
|
|
13
13
|
auto message = messages.Get(i).As<Napi::Object>();
|
|
14
14
|
chat.push_back({
|
|
@@ -25,6 +25,9 @@ void LlamaContext::Init(Napi::Env env, Napi::Object &exports) {
|
|
|
25
25
|
{InstanceMethod<&LlamaContext::GetSystemInfo>(
|
|
26
26
|
"getSystemInfo",
|
|
27
27
|
static_cast<napi_property_attributes>(napi_enumerable)),
|
|
28
|
+
InstanceMethod<&LlamaContext::GetModelInfo>(
|
|
29
|
+
"getModelInfo",
|
|
30
|
+
static_cast<napi_property_attributes>(napi_enumerable)),
|
|
28
31
|
InstanceMethod<&LlamaContext::GetFormattedChat>(
|
|
29
32
|
"getFormattedChat",
|
|
30
33
|
static_cast<napi_property_attributes>(napi_enumerable)),
|
|
@@ -67,14 +70,23 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
|
|
|
67
70
|
}
|
|
68
71
|
auto options = info[0].As<Napi::Object>();
|
|
69
72
|
|
|
70
|
-
|
|
73
|
+
common_params params;
|
|
71
74
|
params.model = get_option<std::string>(options, "model", "");
|
|
72
75
|
if (params.model.empty()) {
|
|
73
76
|
Napi::TypeError::New(env, "Model is required").ThrowAsJavaScriptException();
|
|
74
77
|
}
|
|
75
|
-
|
|
78
|
+
|
|
76
79
|
params.n_ctx = get_option<int32_t>(options, "n_ctx", 512);
|
|
77
80
|
params.n_batch = get_option<int32_t>(options, "n_batch", 2048);
|
|
81
|
+
params.embedding = get_option<bool>(options, "embedding", false);
|
|
82
|
+
if (params.embedding) {
|
|
83
|
+
// For non-causal models, batch size must be equal to ubatch size
|
|
84
|
+
params.n_ubatch = params.n_batch;
|
|
85
|
+
}
|
|
86
|
+
params.embd_normalize = get_option<int32_t>(options, "embd_normalize", 2);
|
|
87
|
+
int32_t pooling_type = get_option<int32_t>(options, "pooling_type", -1);
|
|
88
|
+
params.pooling_type = (enum llama_pooling_type) pooling_type;
|
|
89
|
+
|
|
78
90
|
params.cpuparams.n_threads =
|
|
79
91
|
get_option<int32_t>(options, "n_threads", cpu_get_num_math() / 2);
|
|
80
92
|
params.n_gpu_layers = get_option<int32_t>(options, "n_gpu_layers", -1);
|
|
@@ -86,7 +98,7 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
|
|
|
86
98
|
llama_backend_init();
|
|
87
99
|
llama_numa_init(params.numa);
|
|
88
100
|
|
|
89
|
-
auto result =
|
|
101
|
+
auto result = common_init_from_params(params);
|
|
90
102
|
|
|
91
103
|
if (result.model == nullptr || result.context == nullptr) {
|
|
92
104
|
Napi::TypeError::New(env, "Failed to load model")
|
|
@@ -94,7 +106,7 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
|
|
|
94
106
|
}
|
|
95
107
|
|
|
96
108
|
_sess = std::make_shared<LlamaSession>(result.model, result.context, params);
|
|
97
|
-
_info =
|
|
109
|
+
_info = common_params_get_system_info(params);
|
|
98
110
|
}
|
|
99
111
|
|
|
100
112
|
// getSystemInfo(): string
|
|
@@ -102,6 +114,44 @@ Napi::Value LlamaContext::GetSystemInfo(const Napi::CallbackInfo &info) {
|
|
|
102
114
|
return Napi::String::New(info.Env(), _info);
|
|
103
115
|
}
|
|
104
116
|
|
|
117
|
+
bool validateModelChatTemplate(const struct llama_model * model) {
|
|
118
|
+
std::vector<char> model_template(2048, 0); // longest known template is about 1200 bytes
|
|
119
|
+
std::string template_key = "tokenizer.chat_template";
|
|
120
|
+
int32_t res = llama_model_meta_val_str(model, template_key.c_str(), model_template.data(), model_template.size());
|
|
121
|
+
if (res >= 0) {
|
|
122
|
+
llama_chat_message chat[] = {{"user", "test"}};
|
|
123
|
+
std::string tmpl = std::string(model_template.data(), model_template.size());
|
|
124
|
+
int32_t chat_res = llama_chat_apply_template(model, tmpl.c_str(), chat, 1, true, nullptr, 0);
|
|
125
|
+
return chat_res > 0;
|
|
126
|
+
}
|
|
127
|
+
return res > 0;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
// getModelInfo(): object
|
|
131
|
+
Napi::Value LlamaContext::GetModelInfo(const Napi::CallbackInfo &info) {
|
|
132
|
+
char desc[1024];
|
|
133
|
+
auto model = _sess->model();
|
|
134
|
+
llama_model_desc(model, desc, sizeof(desc));
|
|
135
|
+
|
|
136
|
+
int count = llama_model_meta_count(model);
|
|
137
|
+
Napi::Object metadata = Napi::Object::New(info.Env());
|
|
138
|
+
for (int i = 0; i < count; i++) {
|
|
139
|
+
char key[256];
|
|
140
|
+
llama_model_meta_key_by_index(model, i, key, sizeof(key));
|
|
141
|
+
char val[2048];
|
|
142
|
+
llama_model_meta_val_str_by_index(model, i, val, sizeof(val));
|
|
143
|
+
|
|
144
|
+
metadata.Set(key, val);
|
|
145
|
+
}
|
|
146
|
+
Napi::Object details = Napi::Object::New(info.Env());
|
|
147
|
+
details.Set("desc", desc);
|
|
148
|
+
details.Set("nParams", llama_model_n_params(model));
|
|
149
|
+
details.Set("size", llama_model_size(model));
|
|
150
|
+
details.Set("isChatTemplateSupported", validateModelChatTemplate(model));
|
|
151
|
+
details.Set("metadata", metadata);
|
|
152
|
+
return details;
|
|
153
|
+
}
|
|
154
|
+
|
|
105
155
|
// getFormattedChat(messages: [{ role: string, content: string }]): string
|
|
106
156
|
Napi::Value LlamaContext::GetFormattedChat(const Napi::CallbackInfo &info) {
|
|
107
157
|
Napi::Env env = info.Env();
|
|
@@ -109,7 +159,7 @@ Napi::Value LlamaContext::GetFormattedChat(const Napi::CallbackInfo &info) {
|
|
|
109
159
|
Napi::TypeError::New(env, "Array expected").ThrowAsJavaScriptException();
|
|
110
160
|
}
|
|
111
161
|
auto messages = info[0].As<Napi::Array>();
|
|
112
|
-
auto formatted =
|
|
162
|
+
auto formatted = common_chat_apply_template(_sess->model(), "", get_messages(messages), true);
|
|
113
163
|
return Napi::String::New(env, formatted);
|
|
114
164
|
}
|
|
115
165
|
|
|
@@ -133,10 +183,10 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
|
|
|
133
183
|
}
|
|
134
184
|
auto options = info[0].As<Napi::Object>();
|
|
135
185
|
|
|
136
|
-
|
|
186
|
+
common_params params = _sess->params();
|
|
137
187
|
if (options.Has("messages") && options.Get("messages").IsArray()) {
|
|
138
188
|
auto messages = options.Get("messages").As<Napi::Array>();
|
|
139
|
-
auto formatted =
|
|
189
|
+
auto formatted = common_chat_apply_template(_sess->model(), "", get_messages(messages), true);
|
|
140
190
|
params.prompt = formatted;
|
|
141
191
|
} else {
|
|
142
192
|
params.prompt = get_option<std::string>(options, "prompt", "");
|
|
@@ -146,30 +196,34 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
|
|
|
146
196
|
.ThrowAsJavaScriptException();
|
|
147
197
|
}
|
|
148
198
|
params.n_predict = get_option<int32_t>(options, "n_predict", -1);
|
|
149
|
-
params.
|
|
150
|
-
params.
|
|
151
|
-
params.
|
|
152
|
-
params.
|
|
153
|
-
params.
|
|
154
|
-
params.
|
|
155
|
-
params.sparams.mirostat_tau =
|
|
199
|
+
params.sampling.temp = get_option<float>(options, "temperature", 0.80f);
|
|
200
|
+
params.sampling.top_k = get_option<int32_t>(options, "top_k", 40);
|
|
201
|
+
params.sampling.top_p = get_option<float>(options, "top_p", 0.95f);
|
|
202
|
+
params.sampling.min_p = get_option<float>(options, "min_p", 0.05f);
|
|
203
|
+
params.sampling.mirostat = get_option<int32_t>(options, "mirostat", 0.00f);
|
|
204
|
+
params.sampling.mirostat_tau =
|
|
156
205
|
get_option<float>(options, "mirostat_tau", 5.00f);
|
|
157
|
-
params.
|
|
206
|
+
params.sampling.mirostat_eta =
|
|
158
207
|
get_option<float>(options, "mirostat_eta", 0.10f);
|
|
159
|
-
params.
|
|
208
|
+
params.sampling.penalty_last_n =
|
|
160
209
|
get_option<int32_t>(options, "penalty_last_n", 64);
|
|
161
|
-
params.
|
|
210
|
+
params.sampling.penalty_repeat =
|
|
162
211
|
get_option<float>(options, "penalty_repeat", 1.00f);
|
|
163
|
-
params.
|
|
212
|
+
params.sampling.penalty_freq =
|
|
164
213
|
get_option<float>(options, "penalty_freq", 0.00f);
|
|
165
|
-
params.
|
|
214
|
+
params.sampling.penalty_present =
|
|
166
215
|
get_option<float>(options, "penalty_present", 0.00f);
|
|
167
|
-
params.
|
|
168
|
-
params.
|
|
169
|
-
params.
|
|
170
|
-
params.
|
|
216
|
+
params.sampling.typ_p = get_option<float>(options, "typical_p", 1.00f);
|
|
217
|
+
params.sampling.xtc_threshold = get_option<float>(options, "xtc_threshold", 0.00f);
|
|
218
|
+
params.sampling.xtc_probability = get_option<float>(options, "xtc_probability", 0.10f);
|
|
219
|
+
params.sampling.dry_multiplier = get_option<float>(options, "dry_multiplier", 1.75f);
|
|
220
|
+
params.sampling.dry_base = get_option<float>(options, "dry_base", 2);
|
|
221
|
+
params.sampling.dry_allowed_length = get_option<float>(options, "dry_allowed_length", -1);
|
|
222
|
+
params.sampling.dry_penalty_last_n = get_option<float>(options, "dry_penalty_last_n", 0);
|
|
223
|
+
params.sampling.ignore_eos = get_option<bool>(options, "ignore_eos", false);
|
|
224
|
+
params.sampling.grammar = get_option<std::string>(options, "grammar", "");
|
|
171
225
|
params.n_keep = get_option<int32_t>(options, "n_keep", 0);
|
|
172
|
-
params.
|
|
226
|
+
params.sampling.seed = get_option<int32_t>(options, "seed", LLAMA_DEFAULT_SEED);
|
|
173
227
|
std::vector<std::string> stop_words;
|
|
174
228
|
if (options.Has("stop") && options.Get("stop").IsArray()) {
|
|
175
229
|
auto stop_words_array = options.Get("stop").As<Napi::Array>();
|
|
@@ -244,8 +298,16 @@ Napi::Value LlamaContext::Embedding(const Napi::CallbackInfo &info) {
|
|
|
244
298
|
Napi::TypeError::New(env, "Context is disposed")
|
|
245
299
|
.ThrowAsJavaScriptException();
|
|
246
300
|
}
|
|
301
|
+
auto options = Napi::Object::New(env);
|
|
302
|
+
if (info.Length() >= 2 && info[1].IsObject()) {
|
|
303
|
+
options = info[1].As<Napi::Object>();
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
common_params embdParams;
|
|
307
|
+
embdParams.embedding = true;
|
|
308
|
+
embdParams.embd_normalize = get_option<int32_t>(options, "embd_normalize", 2);
|
|
247
309
|
auto text = info[0].ToString().Utf8Value();
|
|
248
|
-
auto *worker = new EmbeddingWorker(info, _sess, text);
|
|
310
|
+
auto *worker = new EmbeddingWorker(info, _sess, text, embdParams);
|
|
249
311
|
worker->Queue();
|
|
250
312
|
return worker->Promise();
|
|
251
313
|
}
|
package/src/LlamaContext.h
CHANGED
|
@@ -9,6 +9,7 @@ public:
|
|
|
9
9
|
|
|
10
10
|
private:
|
|
11
11
|
Napi::Value GetSystemInfo(const Napi::CallbackInfo &info);
|
|
12
|
+
Napi::Value GetModelInfo(const Napi::CallbackInfo &info);
|
|
12
13
|
Napi::Value GetFormattedChat(const Napi::CallbackInfo &info);
|
|
13
14
|
Napi::Value Completion(const Napi::CallbackInfo &info);
|
|
14
15
|
void StopCompletion(const Napi::CallbackInfo &info);
|
|
@@ -20,6 +21,7 @@ private:
|
|
|
20
21
|
Napi::Value Release(const Napi::CallbackInfo &info);
|
|
21
22
|
|
|
22
23
|
std::string _info;
|
|
24
|
+
Napi::Object _meta;
|
|
23
25
|
LlamaSessionPtr _sess = nullptr;
|
|
24
26
|
LlamaCompletionWorker *_wip = nullptr;
|
|
25
27
|
};
|
package/src/TokenizeWorker.cpp
CHANGED
|
@@ -6,7 +6,7 @@ TokenizeWorker::TokenizeWorker(const Napi::CallbackInfo &info,
|
|
|
6
6
|
: AsyncWorker(info.Env()), Deferred(info.Env()), _sess(sess), _text(text) {}
|
|
7
7
|
|
|
8
8
|
void TokenizeWorker::Execute() {
|
|
9
|
-
const auto tokens = ::
|
|
9
|
+
const auto tokens = ::common_tokenize(_sess->context(), _text, false);
|
|
10
10
|
_result.tokens = std::move(tokens);
|
|
11
11
|
}
|
|
12
12
|
|
package/src/common.hpp
CHANGED
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
|
|
14
14
|
typedef std::unique_ptr<llama_model, decltype(&llama_free_model)> LlamaCppModel;
|
|
15
15
|
typedef std::unique_ptr<llama_context, decltype(&llama_free)> LlamaCppContext;
|
|
16
|
-
typedef std::unique_ptr<
|
|
16
|
+
typedef std::unique_ptr<common_sampler, decltype(&common_sampler_free)>
|
|
17
17
|
LlamaCppSampling;
|
|
18
18
|
typedef std::unique_ptr<llama_batch, decltype(&llama_batch_free)> LlamaCppBatch;
|
|
19
19
|
|
|
@@ -47,7 +47,7 @@ constexpr T get_option(const Napi::Object &options, const std::string &name,
|
|
|
47
47
|
|
|
48
48
|
class LlamaSession {
|
|
49
49
|
public:
|
|
50
|
-
LlamaSession(llama_model *model, llama_context *ctx,
|
|
50
|
+
LlamaSession(llama_model *model, llama_context *ctx, common_params params)
|
|
51
51
|
: model_(LlamaCppModel(model, llama_free_model)),
|
|
52
52
|
ctx_(LlamaCppContext(ctx, llama_free)), params_(params) {
|
|
53
53
|
tokens_.reserve(params.n_ctx);
|
|
@@ -65,7 +65,7 @@ public:
|
|
|
65
65
|
tokens_ = std::move(tokens);
|
|
66
66
|
}
|
|
67
67
|
|
|
68
|
-
inline const
|
|
68
|
+
inline const common_params ¶ms() const { return params_; }
|
|
69
69
|
|
|
70
70
|
inline std::mutex &get_mutex() { return mutex; }
|
|
71
71
|
|
|
@@ -79,7 +79,7 @@ public:
|
|
|
79
79
|
private:
|
|
80
80
|
LlamaCppModel model_;
|
|
81
81
|
LlamaCppContext ctx_;
|
|
82
|
-
const
|
|
82
|
+
const common_params params_;
|
|
83
83
|
std::vector<llama_token> tokens_{};
|
|
84
84
|
std::mutex mutex;
|
|
85
85
|
};
|