@fugood/llama.node 0.3.16 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +6 -1
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +44 -2
- package/lib/index.js +132 -1
- package/lib/index.ts +203 -3
- package/package.json +2 -1
- package/src/EmbeddingWorker.cpp +1 -1
- package/src/LlamaCompletionWorker.cpp +374 -19
- package/src/LlamaCompletionWorker.h +31 -10
- package/src/LlamaContext.cpp +216 -7
- package/src/LlamaContext.h +12 -0
- package/src/common.hpp +15 -0
- package/src/llama.cpp/.github/workflows/build-linux-cross.yml +233 -0
- package/src/llama.cpp/.github/workflows/build.yml +89 -767
- package/src/llama.cpp/.github/workflows/docker.yml +9 -6
- package/src/llama.cpp/.github/workflows/release.yml +716 -0
- package/src/llama.cpp/.github/workflows/server.yml +19 -23
- package/src/llama.cpp/CMakeLists.txt +11 -1
- package/src/llama.cpp/cmake/build-info.cmake +8 -2
- package/src/llama.cpp/cmake/x64-windows-llvm.cmake +0 -6
- package/src/llama.cpp/common/CMakeLists.txt +35 -4
- package/src/llama.cpp/common/arg.cpp +844 -121
- package/src/llama.cpp/common/arg.h +9 -0
- package/src/llama.cpp/common/chat.cpp +129 -107
- package/src/llama.cpp/common/chat.h +2 -0
- package/src/llama.cpp/common/common.cpp +64 -518
- package/src/llama.cpp/common/common.h +35 -45
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +3 -0
- package/src/llama.cpp/common/llguidance.cpp +31 -47
- package/src/llama.cpp/common/minja/chat-template.hpp +23 -11
- package/src/llama.cpp/common/minja/minja.hpp +186 -127
- package/src/llama.cpp/common/regex-partial.cpp +204 -0
- package/src/llama.cpp/common/regex-partial.h +56 -0
- package/src/llama.cpp/common/sampling.cpp +60 -50
- package/src/llama.cpp/docs/build.md +122 -7
- package/src/llama.cpp/examples/CMakeLists.txt +2 -32
- package/src/llama.cpp/examples/batched/batched.cpp +1 -1
- package/src/llama.cpp/examples/embedding/embedding.cpp +9 -12
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +1 -1
- package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +1 -0
- package/src/llama.cpp/examples/parallel/parallel.cpp +89 -15
- package/src/llama.cpp/examples/passkey/passkey.cpp +1 -1
- package/src/llama.cpp/examples/speculative/speculative.cpp +1 -1
- package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +1 -1
- package/src/llama.cpp/examples/sycl/build.sh +2 -2
- package/src/llama.cpp/examples/sycl/win-build-sycl.bat +2 -2
- package/src/llama.cpp/examples/training/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/training/finetune.cpp +96 -0
- package/src/llama.cpp/ggml/CMakeLists.txt +35 -2
- package/src/llama.cpp/ggml/cmake/GitVars.cmake +22 -0
- package/src/llama.cpp/ggml/include/ggml-backend.h +4 -4
- package/src/llama.cpp/ggml/include/ggml-cpp.h +1 -1
- package/src/llama.cpp/ggml/include/ggml-cpu.h +5 -0
- package/src/llama.cpp/ggml/include/ggml-opt.h +47 -28
- package/src/llama.cpp/ggml/include/ggml-rpc.h +6 -1
- package/src/llama.cpp/ggml/include/ggml.h +76 -106
- package/src/llama.cpp/ggml/src/CMakeLists.txt +11 -8
- package/src/llama.cpp/ggml/src/ggml-alloc.c +4 -1
- package/src/llama.cpp/ggml/src/ggml-backend.cpp +9 -5
- package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +0 -2
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +8 -4
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +5 -5
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +692 -1534
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +613 -122
- package/src/llama.cpp/ggml/src/ggml-cann/common.h +135 -1
- package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +507 -137
- package/src/llama.cpp/ggml/src/ggml-common.h +12 -6
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +66 -33
- package/src/llama.cpp/ggml/src/ggml-cpu/binary-ops.cpp +158 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/binary-ops.h +16 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/common.h +72 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +896 -194
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +2 -21
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +1060 -410
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +1008 -13533
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +31 -16
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +90 -12
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +47 -13
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +266 -72
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1034 -88
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +8796 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.h +110 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +892 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.cpp +186 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.h +28 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +252 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +802 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +23 -4
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +7 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +1 -0
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +0 -4
- package/src/llama.cpp/ggml/src/ggml-impl.h +52 -18
- package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +106 -14
- package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +67 -119
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +1023 -262
- package/src/llama.cpp/ggml/src/ggml-opt.cpp +368 -190
- package/src/llama.cpp/ggml/src/ggml-quants.c +0 -6
- package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +307 -40
- package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +125 -45
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +10 -8
- package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +239 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.hpp +39 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +0 -35
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +9 -307
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +72 -25
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +14 -7
- package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +59 -21
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +7 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +79 -90
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +944 -438
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +22 -23
- package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +37 -8
- package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +24 -20
- package/src/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +1 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +507 -411
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +84 -74
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +1 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +185 -89
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +37 -49
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +7 -22
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +4 -14
- package/src/llama.cpp/ggml/src/ggml-sycl/quants.hpp +83 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +204 -118
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.hpp +1 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +128 -53
- package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +83 -49
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +1278 -282
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +32 -0
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +133 -30
- package/src/llama.cpp/ggml/src/ggml.c +170 -265
- package/src/llama.cpp/ggml/src/gguf.cpp +34 -33
- package/src/llama.cpp/include/llama.h +82 -22
- package/src/llama.cpp/models/ggml-vocab-llama4.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-llama4.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.out +46 -0
- package/src/llama.cpp/requirements/requirements-all.txt +5 -3
- package/src/llama.cpp/requirements/requirements-gguf_editor_gui.txt +3 -0
- package/src/llama.cpp/scripts/xxd.cmake +1 -1
- package/src/llama.cpp/src/CMakeLists.txt +4 -2
- package/src/llama.cpp/src/llama-adapter.cpp +43 -1
- package/src/llama.cpp/src/llama-arch.cpp +163 -17
- package/src/llama.cpp/src/llama-arch.h +16 -0
- package/src/llama.cpp/src/llama-batch.cpp +5 -1
- package/src/llama.cpp/src/llama-batch.h +2 -1
- package/src/llama.cpp/src/llama-chat.cpp +91 -16
- package/src/llama.cpp/src/llama-chat.h +7 -2
- package/src/llama.cpp/src/llama-context.cpp +479 -575
- package/src/llama.cpp/src/llama-context.h +44 -33
- package/src/llama.cpp/src/llama-cparams.h +1 -0
- package/src/llama.cpp/src/llama-graph.cpp +209 -157
- package/src/llama.cpp/src/llama-graph.h +38 -14
- package/src/llama.cpp/src/llama-hparams.h +13 -0
- package/src/llama.cpp/src/llama-kv-cache.cpp +1604 -543
- package/src/llama.cpp/src/llama-kv-cache.h +283 -171
- package/src/llama.cpp/src/llama-memory.h +12 -2
- package/src/llama.cpp/src/llama-mmap.cpp +1 -1
- package/src/llama.cpp/src/llama-model-loader.cpp +34 -20
- package/src/llama.cpp/src/llama-model-loader.h +5 -3
- package/src/llama.cpp/src/llama-model-saver.cpp +281 -0
- package/src/llama.cpp/src/llama-model-saver.h +37 -0
- package/src/llama.cpp/src/llama-model.cpp +1803 -330
- package/src/llama.cpp/src/llama-model.h +21 -2
- package/src/llama.cpp/src/llama-quant.cpp +33 -10
- package/src/llama.cpp/src/llama-sampling.cpp +25 -7
- package/src/llama.cpp/src/llama-vocab.cpp +86 -10
- package/src/llama.cpp/src/llama-vocab.h +6 -0
- package/src/llama.cpp/src/llama.cpp +15 -1
- package/src/llama.cpp/tests/CMakeLists.txt +52 -31
- package/src/llama.cpp/tests/test-arg-parser.cpp +51 -4
- package/src/llama.cpp/tests/test-backend-ops.cpp +189 -90
- package/src/llama.cpp/tests/test-chat-template.cpp +26 -6
- package/src/llama.cpp/tests/test-chat.cpp +15 -3
- package/src/llama.cpp/{examples/gbnf-validator/gbnf-validator.cpp → tests/test-gbnf-validator.cpp} +2 -2
- package/src/llama.cpp/tests/test-grammar-integration.cpp +3 -2
- package/src/llama.cpp/tests/test-grammar-llguidance.cpp +63 -2
- package/src/llama.cpp/tests/test-grammar-parser.cpp +3 -1
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +17 -1
- package/src/llama.cpp/tests/test-llama-grammar.cpp +2 -1
- package/src/llama.cpp/tests/test-mtmd-c-api.c +63 -0
- package/src/llama.cpp/tests/test-opt.cpp +33 -21
- package/src/llama.cpp/{examples/quantize-stats/quantize-stats.cpp → tests/test-quantize-stats.cpp} +3 -1
- package/src/llama.cpp/tests/test-regex-partial.cpp +288 -0
- package/src/llama.cpp/tests/test-sampling.cpp +1 -1
- package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +2 -1
- package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +2 -1
- package/src/llama.cpp/tools/CMakeLists.txt +39 -0
- package/src/llama.cpp/{examples → tools}/batched-bench/batched-bench.cpp +3 -3
- package/src/llama.cpp/{examples → tools}/export-lora/export-lora.cpp +1 -1
- package/src/llama.cpp/{examples → tools}/gguf-split/gguf-split.cpp +15 -16
- package/src/llama.cpp/{examples → tools}/imatrix/imatrix.cpp +11 -9
- package/src/llama.cpp/{examples → tools}/llama-bench/llama-bench.cpp +623 -274
- package/src/llama.cpp/{examples → tools}/main/main.cpp +22 -14
- package/src/llama.cpp/tools/mtmd/CMakeLists.txt +47 -0
- package/src/llama.cpp/tools/mtmd/clip-impl.h +365 -0
- package/src/llama.cpp/tools/mtmd/clip.cpp +3646 -0
- package/src/llama.cpp/tools/mtmd/clip.h +99 -0
- package/src/llama.cpp/tools/mtmd/deprecation-warning.cpp +22 -0
- package/src/llama.cpp/tools/mtmd/mtmd-cli.cpp +370 -0
- package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +310 -0
- package/src/llama.cpp/tools/mtmd/mtmd.cpp +678 -0
- package/src/llama.cpp/tools/mtmd/mtmd.h +331 -0
- package/src/llama.cpp/{examples → tools}/perplexity/perplexity.cpp +21 -5
- package/src/llama.cpp/{examples → tools}/quantize/quantize.cpp +53 -3
- package/src/llama.cpp/tools/rpc/CMakeLists.txt +4 -0
- package/src/llama.cpp/tools/rpc/rpc-server.cpp +322 -0
- package/src/llama.cpp/tools/run/CMakeLists.txt +16 -0
- package/src/llama.cpp/{examples → tools}/run/run.cpp +30 -30
- package/src/llama.cpp/{examples → tools}/server/CMakeLists.txt +2 -1
- package/src/llama.cpp/{examples → tools}/server/httplib.h +313 -247
- package/src/llama.cpp/{examples → tools}/server/server.cpp +529 -215
- package/src/llama.cpp/{examples → tools}/server/utils.hpp +427 -6
- package/src/llama.cpp/{examples → tools}/tts/tts.cpp +6 -9
- package/src/llama.cpp/cmake/arm64-windows-msvc.cmake +0 -6
- package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/infill/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/infill/infill.cpp +0 -590
- package/src/llama.cpp/examples/llava/CMakeLists.txt +0 -66
- package/src/llama.cpp/examples/llava/android/build_64.sh +0 -8
- package/src/llama.cpp/examples/llava/clip-quantize-cli.cpp +0 -59
- package/src/llama.cpp/examples/llava/clip.cpp +0 -3206
- package/src/llama.cpp/examples/llava/clip.h +0 -118
- package/src/llama.cpp/examples/llava/gemma3-cli.cpp +0 -341
- package/src/llama.cpp/examples/llava/llava-cli.cpp +0 -332
- package/src/llama.cpp/examples/llava/llava.cpp +0 -574
- package/src/llama.cpp/examples/llava/llava.h +0 -49
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +0 -354
- package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +0 -584
- package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +0 -6
- package/src/llama.cpp/examples/rpc/CMakeLists.txt +0 -2
- package/src/llama.cpp/examples/rpc/rpc-server.cpp +0 -171
- package/src/llama.cpp/examples/run/CMakeLists.txt +0 -5
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +0 -30
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +0 -19
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +0 -234
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +0 -197
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +0 -190
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +0 -204
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +0 -191
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +0 -218
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +0 -216
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +0 -295
- /package/src/llama.cpp/{examples → tools}/batched-bench/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/completions.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/cvector-generator.cpp +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/mean.hpp +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/negative.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/pca.hpp +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/positive.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/export-lora/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/gguf-split/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/imatrix/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/llama-bench/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/main/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples/llava → tools/mtmd}/requirements.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/perplexity/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/quantize/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.cpp +0 -0
- /package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.h +0 -0
- /package/src/llama.cpp/{examples → tools}/server/bench/requirements.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/server/tests/requirements.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/tokenize/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/tokenize/tokenize.cpp +0 -0
- /package/src/llama.cpp/{examples → tools}/tts/CMakeLists.txt +0 -0
package/CMakeLists.txt
CHANGED
|
@@ -69,12 +69,17 @@ endif()
|
|
|
69
69
|
|
|
70
70
|
set(LLAMA_BUILD_COMMON ON CACHE BOOL "Build common")
|
|
71
71
|
|
|
72
|
+
set(LLAMA_CURL OFF CACHE BOOL "Build curl")
|
|
73
|
+
|
|
72
74
|
set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libraries")
|
|
73
75
|
add_subdirectory("src/llama.cpp")
|
|
76
|
+
add_subdirectory("src/llama.cpp/tools/mtmd")
|
|
74
77
|
|
|
75
78
|
include_directories(
|
|
76
79
|
${CMAKE_JS_INC}
|
|
77
80
|
"src/llama.cpp"
|
|
81
|
+
"src/llama.cpp/src"
|
|
82
|
+
"src/tools/mtmd"
|
|
78
83
|
)
|
|
79
84
|
|
|
80
85
|
file(
|
|
@@ -101,7 +106,7 @@ file(
|
|
|
101
106
|
|
|
102
107
|
add_library(${PROJECT_NAME} SHARED ${SOURCE_FILES} ${CMAKE_JS_SRC})
|
|
103
108
|
set_target_properties(${PROJECT_NAME} PROPERTIES PREFIX "" SUFFIX ".node")
|
|
104
|
-
target_link_libraries(${PROJECT_NAME} ${CMAKE_JS_LIB} llama ggml common)
|
|
109
|
+
target_link_libraries(${PROJECT_NAME} ${CMAKE_JS_LIB} llama ggml common mtmd ${CMAKE_THREAD_LIBS_INIT})
|
|
105
110
|
|
|
106
111
|
add_custom_target(copy_assets ALL DEPENDS ${PROJECT_NAME})
|
|
107
112
|
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/bin/win32/arm64/node.lib
CHANGED
|
Binary file
|
|
Binary file
|
package/bin/win32/x64/node.lib
CHANGED
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/lib/binding.ts
CHANGED
|
@@ -1,8 +1,17 @@
|
|
|
1
1
|
import * as path from 'path'
|
|
2
2
|
|
|
3
|
+
|
|
4
|
+
export type MessagePart = {
|
|
5
|
+
type: string,
|
|
6
|
+
text?: string,
|
|
7
|
+
image_url?: {
|
|
8
|
+
url?: string
|
|
9
|
+
}
|
|
10
|
+
}
|
|
11
|
+
|
|
3
12
|
export type ChatMessage = {
|
|
4
13
|
role: string
|
|
5
|
-
content
|
|
14
|
+
content?: string | MessagePart[]
|
|
6
15
|
}
|
|
7
16
|
|
|
8
17
|
export type LlamaModelOptions = {
|
|
@@ -36,6 +45,10 @@ export type LlamaModelOptions = {
|
|
|
36
45
|
| 'iq4_nl'
|
|
37
46
|
| 'q5_0'
|
|
38
47
|
| 'q5_1'
|
|
48
|
+
/**
|
|
49
|
+
* Enable context shifting to handle prompts larger than context size
|
|
50
|
+
*/
|
|
51
|
+
ctx_shift?: boolean
|
|
39
52
|
use_mlock?: boolean
|
|
40
53
|
use_mmap?: boolean
|
|
41
54
|
vocab_only?: boolean
|
|
@@ -89,6 +102,13 @@ export type LlamaCompletionOptions = {
|
|
|
89
102
|
grammar_lazy?: boolean
|
|
90
103
|
grammar_triggers?: { type: number; word: string; at_start: boolean }[]
|
|
91
104
|
preserved_tokens?: string[]
|
|
105
|
+
/**
|
|
106
|
+
* Path(s) to image file(s) to process before generating text.
|
|
107
|
+
* When provided, the image(s) will be processed and added to the context.
|
|
108
|
+
* Requires multimodal support to be enabled via initMultimodal.
|
|
109
|
+
* Supports both file paths and base64 data URLs.
|
|
110
|
+
*/
|
|
111
|
+
image_paths?: string | string[]
|
|
92
112
|
}
|
|
93
113
|
|
|
94
114
|
export type LlamaCompletionResult = {
|
|
@@ -96,6 +116,7 @@ export type LlamaCompletionResult = {
|
|
|
96
116
|
tokens_predicted: number
|
|
97
117
|
tokens_evaluated: number
|
|
98
118
|
truncated: boolean
|
|
119
|
+
context_full: boolean
|
|
99
120
|
timings: {
|
|
100
121
|
prompt_n: number
|
|
101
122
|
prompt_ms: number
|
|
@@ -149,9 +170,30 @@ export interface LlamaContext {
|
|
|
149
170
|
applyLoraAdapters(adapters: { path: string; scaled: number }[]): void
|
|
150
171
|
removeLoraAdapters(adapters: { path: string }[]): void
|
|
151
172
|
getLoadedLoraAdapters(): { path: string; scaled: number }[]
|
|
173
|
+
/**
|
|
174
|
+
* Initialize multimodal support with a mmproj file
|
|
175
|
+
* @param mmproj_path Path to the multimodal projector file
|
|
176
|
+
* @returns Promise resolving to true if initialization was successful
|
|
177
|
+
*/
|
|
178
|
+
initMultimodal(options: { path: string; use_gpu?: boolean }): Promise<boolean>
|
|
179
|
+
|
|
180
|
+
/**
|
|
181
|
+
* Check if multimodal support is enabled
|
|
182
|
+
* @returns Promise resolving to true if multimodal is enabled
|
|
183
|
+
*/
|
|
184
|
+
isMultimodalEnabled(): Promise<boolean>
|
|
185
|
+
|
|
186
|
+
/**
|
|
187
|
+
* Release multimodal support
|
|
188
|
+
*/
|
|
189
|
+
releaseMultimodal(): Promise<void>
|
|
190
|
+
|
|
152
191
|
// static
|
|
153
192
|
loadModelInfo(path: string, skip: string[]): Promise<Object>
|
|
154
|
-
toggleNativeLog(
|
|
193
|
+
toggleNativeLog(
|
|
194
|
+
enable: boolean,
|
|
195
|
+
callback: (level: string, text: string) => void,
|
|
196
|
+
): void
|
|
155
197
|
}
|
|
156
198
|
|
|
157
199
|
export interface Module {
|
package/lib/index.js
CHANGED
|
@@ -51,12 +51,143 @@ function addNativeLogListener(listener) {
|
|
|
51
51
|
},
|
|
52
52
|
};
|
|
53
53
|
}
|
|
54
|
+
const getJsonSchema = (responseFormat) => {
|
|
55
|
+
var _a;
|
|
56
|
+
if ((responseFormat === null || responseFormat === void 0 ? void 0 : responseFormat.type) === 'json_schema') {
|
|
57
|
+
return (_a = responseFormat.json_schema) === null || _a === void 0 ? void 0 : _a.schema;
|
|
58
|
+
}
|
|
59
|
+
if ((responseFormat === null || responseFormat === void 0 ? void 0 : responseFormat.type) === 'json_object') {
|
|
60
|
+
return responseFormat.schema || {};
|
|
61
|
+
}
|
|
62
|
+
return null;
|
|
63
|
+
};
|
|
64
|
+
class LlamaContextWrapper {
|
|
65
|
+
constructor(nativeCtx) {
|
|
66
|
+
this.ctx = nativeCtx;
|
|
67
|
+
}
|
|
68
|
+
getSystemInfo() {
|
|
69
|
+
return this.ctx.getSystemInfo();
|
|
70
|
+
}
|
|
71
|
+
getModelInfo() {
|
|
72
|
+
return this.ctx.getModelInfo();
|
|
73
|
+
}
|
|
74
|
+
isJinjaSupported() {
|
|
75
|
+
const { minja } = this.ctx.getModelInfo().chatTemplates;
|
|
76
|
+
return !!(minja === null || minja === void 0 ? void 0 : minja.toolUse) || !!(minja === null || minja === void 0 ? void 0 : minja.default);
|
|
77
|
+
}
|
|
78
|
+
isLlamaChatSupported() {
|
|
79
|
+
return !!this.ctx.getModelInfo().chatTemplates.llamaChat;
|
|
80
|
+
}
|
|
81
|
+
_formatImageChat(messages) {
|
|
82
|
+
if (!messages)
|
|
83
|
+
return {
|
|
84
|
+
messages,
|
|
85
|
+
has_image: false,
|
|
86
|
+
};
|
|
87
|
+
const imagePaths = [];
|
|
88
|
+
return {
|
|
89
|
+
messages: messages.map((msg) => {
|
|
90
|
+
if (Array.isArray(msg.content)) {
|
|
91
|
+
const content = msg.content.map((part) => {
|
|
92
|
+
var _a;
|
|
93
|
+
// Handle multimodal content
|
|
94
|
+
if (part.type === 'image_url') {
|
|
95
|
+
let path = ((_a = part.image_url) === null || _a === void 0 ? void 0 : _a.url) || '';
|
|
96
|
+
imagePaths.push(path);
|
|
97
|
+
return {
|
|
98
|
+
type: 'text',
|
|
99
|
+
text: '<__image__>',
|
|
100
|
+
};
|
|
101
|
+
}
|
|
102
|
+
return part;
|
|
103
|
+
});
|
|
104
|
+
return Object.assign(Object.assign({}, msg), { content });
|
|
105
|
+
}
|
|
106
|
+
return msg;
|
|
107
|
+
}),
|
|
108
|
+
has_image: imagePaths.length > 0,
|
|
109
|
+
image_paths: imagePaths,
|
|
110
|
+
};
|
|
111
|
+
}
|
|
112
|
+
getFormattedChat(messages, template, params) {
|
|
113
|
+
const { messages: chat, has_image, image_paths, } = this._formatImageChat(messages);
|
|
114
|
+
const useJinja = this.isJinjaSupported() && (params === null || params === void 0 ? void 0 : params.jinja);
|
|
115
|
+
let tmpl = this.isLlamaChatSupported() || useJinja ? undefined : 'chatml';
|
|
116
|
+
if (template)
|
|
117
|
+
tmpl = template; // Force replace if provided
|
|
118
|
+
const jsonSchema = getJsonSchema(params === null || params === void 0 ? void 0 : params.response_format);
|
|
119
|
+
const result = this.ctx.getFormattedChat(chat, tmpl, {
|
|
120
|
+
jinja: useJinja,
|
|
121
|
+
json_schema: jsonSchema,
|
|
122
|
+
tools: params === null || params === void 0 ? void 0 : params.tools,
|
|
123
|
+
parallel_tool_calls: params === null || params === void 0 ? void 0 : params.parallel_tool_calls,
|
|
124
|
+
tool_choice: params === null || params === void 0 ? void 0 : params.tool_choice,
|
|
125
|
+
});
|
|
126
|
+
if (!useJinja) {
|
|
127
|
+
return {
|
|
128
|
+
type: 'llama-chat',
|
|
129
|
+
prompt: result,
|
|
130
|
+
has_image,
|
|
131
|
+
image_paths,
|
|
132
|
+
};
|
|
133
|
+
}
|
|
134
|
+
const jinjaResult = result;
|
|
135
|
+
jinjaResult.type = 'jinja';
|
|
136
|
+
jinjaResult.has_image = has_image;
|
|
137
|
+
jinjaResult.image_paths = image_paths;
|
|
138
|
+
return jinjaResult;
|
|
139
|
+
}
|
|
140
|
+
completion(options, callback) {
|
|
141
|
+
const { messages, image_paths = options.image_paths } = this._formatImageChat(options.messages);
|
|
142
|
+
return this.ctx.completion(Object.assign(Object.assign({}, options), { messages, image_paths: options.image_paths || image_paths }), callback || (() => { }));
|
|
143
|
+
}
|
|
144
|
+
stopCompletion() {
|
|
145
|
+
return this.ctx.stopCompletion();
|
|
146
|
+
}
|
|
147
|
+
tokenize(text) {
|
|
148
|
+
return this.ctx.tokenize(text);
|
|
149
|
+
}
|
|
150
|
+
detokenize(tokens) {
|
|
151
|
+
return this.ctx.detokenize(tokens);
|
|
152
|
+
}
|
|
153
|
+
embedding(text) {
|
|
154
|
+
return this.ctx.embedding(text);
|
|
155
|
+
}
|
|
156
|
+
saveSession(path) {
|
|
157
|
+
return this.ctx.saveSession(path);
|
|
158
|
+
}
|
|
159
|
+
loadSession(path) {
|
|
160
|
+
return this.ctx.loadSession(path);
|
|
161
|
+
}
|
|
162
|
+
release() {
|
|
163
|
+
return this.ctx.release();
|
|
164
|
+
}
|
|
165
|
+
applyLoraAdapters(adapters) {
|
|
166
|
+
return this.ctx.applyLoraAdapters(adapters);
|
|
167
|
+
}
|
|
168
|
+
removeLoraAdapters(adapters) {
|
|
169
|
+
return this.ctx.removeLoraAdapters(adapters);
|
|
170
|
+
}
|
|
171
|
+
getLoadedLoraAdapters() {
|
|
172
|
+
return this.ctx.getLoadedLoraAdapters();
|
|
173
|
+
}
|
|
174
|
+
initMultimodal(options) {
|
|
175
|
+
return this.ctx.initMultimodal(options);
|
|
176
|
+
}
|
|
177
|
+
isMultimodalEnabled() {
|
|
178
|
+
return this.ctx.isMultimodalEnabled();
|
|
179
|
+
}
|
|
180
|
+
releaseMultimodal() {
|
|
181
|
+
return this.ctx.releaseMultimodal();
|
|
182
|
+
}
|
|
183
|
+
}
|
|
54
184
|
const loadModel = (options) => __awaiter(void 0, void 0, void 0, function* () {
|
|
55
185
|
var _a, _b;
|
|
56
186
|
const variant = (_a = options.lib_variant) !== null && _a !== void 0 ? _a : 'default';
|
|
57
187
|
(_b = mods[variant]) !== null && _b !== void 0 ? _b : (mods[variant] = yield (0, binding_1.loadModule)(options.lib_variant));
|
|
58
188
|
refreshNativeLogSetup();
|
|
59
|
-
|
|
189
|
+
const nativeCtx = new mods[variant].LlamaContext(options);
|
|
190
|
+
return new LlamaContextWrapper(nativeCtx);
|
|
60
191
|
});
|
|
61
192
|
exports.loadModel = loadModel;
|
|
62
193
|
exports.initLlama = binding_1.loadModule;
|
package/lib/index.ts
CHANGED
|
@@ -1,5 +1,16 @@
|
|
|
1
1
|
import { loadModule, LlamaModelOptions } from './binding'
|
|
2
|
-
import type {
|
|
2
|
+
import type {
|
|
3
|
+
Module,
|
|
4
|
+
LlamaContext,
|
|
5
|
+
LibVariant,
|
|
6
|
+
ChatMessage,
|
|
7
|
+
LlamaCompletionOptions,
|
|
8
|
+
LlamaCompletionToken,
|
|
9
|
+
LlamaCompletionResult,
|
|
10
|
+
TokenizeResult,
|
|
11
|
+
EmbeddingResult,
|
|
12
|
+
CompletionResponseFormat,
|
|
13
|
+
} from './binding'
|
|
3
14
|
|
|
4
15
|
export * from './binding'
|
|
5
16
|
|
|
@@ -39,13 +50,202 @@ export function addNativeLogListener(
|
|
|
39
50
|
}
|
|
40
51
|
}
|
|
41
52
|
|
|
53
|
+
const getJsonSchema = (responseFormat?: CompletionResponseFormat) => {
|
|
54
|
+
if (responseFormat?.type === 'json_schema') {
|
|
55
|
+
return responseFormat.json_schema?.schema
|
|
56
|
+
}
|
|
57
|
+
if (responseFormat?.type === 'json_object') {
|
|
58
|
+
return responseFormat.schema || {}
|
|
59
|
+
}
|
|
60
|
+
return null
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
class LlamaContextWrapper {
|
|
64
|
+
ctx: any
|
|
65
|
+
|
|
66
|
+
constructor(nativeCtx: any) {
|
|
67
|
+
this.ctx = nativeCtx
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
getSystemInfo(): string {
|
|
71
|
+
return this.ctx.getSystemInfo()
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
getModelInfo(): object {
|
|
75
|
+
return this.ctx.getModelInfo()
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
isJinjaSupported(): boolean {
|
|
79
|
+
const { minja } = this.ctx.getModelInfo().chatTemplates
|
|
80
|
+
return !!minja?.toolUse || !!minja?.default
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
isLlamaChatSupported(): boolean {
|
|
84
|
+
return !!this.ctx.getModelInfo().chatTemplates.llamaChat
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
_formatImageChat(messages: ChatMessage[] | undefined): {
|
|
88
|
+
messages: ChatMessage[] | undefined
|
|
89
|
+
has_image: boolean
|
|
90
|
+
image_paths?: string[]
|
|
91
|
+
} {
|
|
92
|
+
if (!messages)
|
|
93
|
+
return {
|
|
94
|
+
messages,
|
|
95
|
+
has_image: false,
|
|
96
|
+
}
|
|
97
|
+
const imagePaths: string[] = []
|
|
98
|
+
return {
|
|
99
|
+
messages: messages.map((msg) => {
|
|
100
|
+
if (Array.isArray(msg.content)) {
|
|
101
|
+
const content = msg.content.map((part) => {
|
|
102
|
+
// Handle multimodal content
|
|
103
|
+
if (part.type === 'image_url') {
|
|
104
|
+
let path = part.image_url?.url || ''
|
|
105
|
+
imagePaths.push(path)
|
|
106
|
+
return {
|
|
107
|
+
type: 'text',
|
|
108
|
+
text: '<__image__>',
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
return part
|
|
112
|
+
})
|
|
113
|
+
|
|
114
|
+
return {
|
|
115
|
+
...msg,
|
|
116
|
+
content,
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
return msg
|
|
120
|
+
}),
|
|
121
|
+
has_image: imagePaths.length > 0,
|
|
122
|
+
image_paths: imagePaths,
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
getFormattedChat(
|
|
127
|
+
messages: ChatMessage[],
|
|
128
|
+
template?: string,
|
|
129
|
+
params?: {
|
|
130
|
+
jinja?: boolean
|
|
131
|
+
response_format?: CompletionResponseFormat
|
|
132
|
+
tools?: object
|
|
133
|
+
parallel_tool_calls?: object
|
|
134
|
+
tool_choice?: string
|
|
135
|
+
},
|
|
136
|
+
): object {
|
|
137
|
+
const {
|
|
138
|
+
messages: chat,
|
|
139
|
+
has_image,
|
|
140
|
+
image_paths,
|
|
141
|
+
} = this._formatImageChat(messages)
|
|
142
|
+
|
|
143
|
+
const useJinja = this.isJinjaSupported() && params?.jinja
|
|
144
|
+
let tmpl = this.isLlamaChatSupported() || useJinja ? undefined : 'chatml'
|
|
145
|
+
if (template) tmpl = template // Force replace if provided
|
|
146
|
+
const jsonSchema = getJsonSchema(params?.response_format)
|
|
147
|
+
|
|
148
|
+
const result = this.ctx.getFormattedChat(chat, tmpl, {
|
|
149
|
+
jinja: useJinja,
|
|
150
|
+
json_schema: jsonSchema,
|
|
151
|
+
tools: params?.tools,
|
|
152
|
+
parallel_tool_calls: params?.parallel_tool_calls,
|
|
153
|
+
tool_choice: params?.tool_choice,
|
|
154
|
+
})
|
|
155
|
+
|
|
156
|
+
if (!useJinja) {
|
|
157
|
+
return {
|
|
158
|
+
type: 'llama-chat',
|
|
159
|
+
prompt: result as string,
|
|
160
|
+
has_image,
|
|
161
|
+
image_paths,
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
const jinjaResult = result
|
|
165
|
+
jinjaResult.type = 'jinja'
|
|
166
|
+
jinjaResult.has_image = has_image
|
|
167
|
+
jinjaResult.image_paths = image_paths
|
|
168
|
+
return jinjaResult
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
completion(
|
|
172
|
+
options: LlamaCompletionOptions,
|
|
173
|
+
callback?: (token: LlamaCompletionToken) => void,
|
|
174
|
+
): Promise<LlamaCompletionResult> {
|
|
175
|
+
const { messages, image_paths = options.image_paths } =
|
|
176
|
+
this._formatImageChat(options.messages)
|
|
177
|
+
return this.ctx.completion({
|
|
178
|
+
...options,
|
|
179
|
+
messages,
|
|
180
|
+
image_paths: options.image_paths || image_paths,
|
|
181
|
+
}, callback || (() => {}))
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
stopCompletion(): void {
|
|
185
|
+
return this.ctx.stopCompletion()
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
tokenize(text: string): Promise<TokenizeResult> {
|
|
189
|
+
return this.ctx.tokenize(text)
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
detokenize(tokens: number[]): Promise<string> {
|
|
193
|
+
return this.ctx.detokenize(tokens)
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
embedding(text: string): Promise<EmbeddingResult> {
|
|
197
|
+
return this.ctx.embedding(text)
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
saveSession(path: string): Promise<void> {
|
|
201
|
+
return this.ctx.saveSession(path)
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
loadSession(path: string): Promise<void> {
|
|
205
|
+
return this.ctx.loadSession(path)
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
release(): Promise<void> {
|
|
209
|
+
return this.ctx.release()
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
applyLoraAdapters(adapters: { path: string; scaled: number }[]): void {
|
|
213
|
+
return this.ctx.applyLoraAdapters(adapters)
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
removeLoraAdapters(adapters: { path: string }[]): void {
|
|
217
|
+
return this.ctx.removeLoraAdapters(adapters)
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
getLoadedLoraAdapters(): { path: string; scaled: number }[] {
|
|
221
|
+
return this.ctx.getLoadedLoraAdapters()
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
initMultimodal(options: {
|
|
225
|
+
path: string
|
|
226
|
+
use_gpu?: boolean
|
|
227
|
+
}): Promise<boolean> {
|
|
228
|
+
return this.ctx.initMultimodal(options)
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
isMultimodalEnabled(): Promise<boolean> {
|
|
232
|
+
return this.ctx.isMultimodalEnabled()
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
releaseMultimodal(): Promise<void> {
|
|
236
|
+
return this.ctx.releaseMultimodal()
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
|
|
42
240
|
export const loadModel = async (
|
|
43
241
|
options: LlamaModelOptionsExtended,
|
|
44
|
-
): Promise<
|
|
242
|
+
): Promise<LlamaContextWrapper> => {
|
|
45
243
|
const variant = options.lib_variant ?? 'default'
|
|
46
244
|
mods[variant] ??= await loadModule(options.lib_variant)
|
|
47
245
|
refreshNativeLogSetup()
|
|
48
|
-
|
|
246
|
+
|
|
247
|
+
const nativeCtx = new mods[variant].LlamaContext(options)
|
|
248
|
+
return new LlamaContextWrapper(nativeCtx)
|
|
49
249
|
}
|
|
50
250
|
|
|
51
251
|
export const initLlama = loadModule
|
package/package.json
CHANGED
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fugood/llama.node",
|
|
3
3
|
"access": "public",
|
|
4
|
-
"version": "0.
|
|
4
|
+
"version": "0.4.0",
|
|
5
5
|
"description": "An another Node binding of llama.cpp",
|
|
6
6
|
"main": "lib/index.js",
|
|
7
7
|
"scripts": {
|
|
8
|
+
"postinstall": "node scripts/bootstrap.js",
|
|
8
9
|
"test": "jest",
|
|
9
10
|
"build": "tsc",
|
|
10
11
|
"prepack": "yarn build",
|
package/src/EmbeddingWorker.cpp
CHANGED
|
@@ -6,7 +6,7 @@ EmbeddingWorker::EmbeddingWorker(const Napi::CallbackInfo &info,
|
|
|
6
6
|
: AsyncWorker(info.Env()), Deferred(info.Env()), _sess(sess), _text(text), _params(params) {}
|
|
7
7
|
|
|
8
8
|
void EmbeddingWorker::Execute() {
|
|
9
|
-
|
|
9
|
+
llama_kv_self_clear(_sess->context());
|
|
10
10
|
auto tokens = ::common_tokenize(_sess->context(), _text, true);
|
|
11
11
|
// add SEP if not present
|
|
12
12
|
auto vocab = llama_model_get_vocab(_sess->model());
|