@fugood/llama.node 0.3.16 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (281) hide show
  1. package/CMakeLists.txt +6 -1
  2. package/bin/darwin/arm64/llama-node.node +0 -0
  3. package/bin/darwin/x64/llama-node.node +0 -0
  4. package/bin/linux/arm64/llama-node.node +0 -0
  5. package/bin/linux/x64/llama-node.node +0 -0
  6. package/bin/linux-cuda/arm64/llama-node.node +0 -0
  7. package/bin/linux-cuda/x64/llama-node.node +0 -0
  8. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  9. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  10. package/bin/win32/arm64/llama-node.node +0 -0
  11. package/bin/win32/arm64/node.lib +0 -0
  12. package/bin/win32/x64/llama-node.node +0 -0
  13. package/bin/win32/x64/node.lib +0 -0
  14. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  15. package/bin/win32-vulkan/arm64/node.lib +0 -0
  16. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  17. package/bin/win32-vulkan/x64/node.lib +0 -0
  18. package/lib/binding.ts +44 -2
  19. package/lib/index.js +132 -1
  20. package/lib/index.ts +203 -3
  21. package/package.json +2 -1
  22. package/src/EmbeddingWorker.cpp +1 -1
  23. package/src/LlamaCompletionWorker.cpp +374 -19
  24. package/src/LlamaCompletionWorker.h +31 -10
  25. package/src/LlamaContext.cpp +216 -7
  26. package/src/LlamaContext.h +12 -0
  27. package/src/common.hpp +15 -0
  28. package/src/llama.cpp/.github/workflows/build-linux-cross.yml +233 -0
  29. package/src/llama.cpp/.github/workflows/build.yml +89 -767
  30. package/src/llama.cpp/.github/workflows/docker.yml +9 -6
  31. package/src/llama.cpp/.github/workflows/release.yml +716 -0
  32. package/src/llama.cpp/.github/workflows/server.yml +19 -23
  33. package/src/llama.cpp/CMakeLists.txt +11 -1
  34. package/src/llama.cpp/cmake/build-info.cmake +8 -2
  35. package/src/llama.cpp/cmake/x64-windows-llvm.cmake +0 -6
  36. package/src/llama.cpp/common/CMakeLists.txt +35 -4
  37. package/src/llama.cpp/common/arg.cpp +844 -121
  38. package/src/llama.cpp/common/arg.h +9 -0
  39. package/src/llama.cpp/common/chat.cpp +129 -107
  40. package/src/llama.cpp/common/chat.h +2 -0
  41. package/src/llama.cpp/common/common.cpp +64 -518
  42. package/src/llama.cpp/common/common.h +35 -45
  43. package/src/llama.cpp/common/json-schema-to-grammar.cpp +3 -0
  44. package/src/llama.cpp/common/llguidance.cpp +31 -47
  45. package/src/llama.cpp/common/minja/chat-template.hpp +23 -11
  46. package/src/llama.cpp/common/minja/minja.hpp +186 -127
  47. package/src/llama.cpp/common/regex-partial.cpp +204 -0
  48. package/src/llama.cpp/common/regex-partial.h +56 -0
  49. package/src/llama.cpp/common/sampling.cpp +60 -50
  50. package/src/llama.cpp/docs/build.md +122 -7
  51. package/src/llama.cpp/examples/CMakeLists.txt +2 -32
  52. package/src/llama.cpp/examples/batched/batched.cpp +1 -1
  53. package/src/llama.cpp/examples/embedding/embedding.cpp +9 -12
  54. package/src/llama.cpp/examples/gritlm/gritlm.cpp +1 -1
  55. package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +1 -0
  56. package/src/llama.cpp/examples/parallel/parallel.cpp +89 -15
  57. package/src/llama.cpp/examples/passkey/passkey.cpp +1 -1
  58. package/src/llama.cpp/examples/speculative/speculative.cpp +1 -1
  59. package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +1 -1
  60. package/src/llama.cpp/examples/sycl/build.sh +2 -2
  61. package/src/llama.cpp/examples/sycl/win-build-sycl.bat +2 -2
  62. package/src/llama.cpp/examples/training/CMakeLists.txt +5 -0
  63. package/src/llama.cpp/examples/training/finetune.cpp +96 -0
  64. package/src/llama.cpp/ggml/CMakeLists.txt +35 -2
  65. package/src/llama.cpp/ggml/cmake/GitVars.cmake +22 -0
  66. package/src/llama.cpp/ggml/include/ggml-backend.h +4 -4
  67. package/src/llama.cpp/ggml/include/ggml-cpp.h +1 -1
  68. package/src/llama.cpp/ggml/include/ggml-cpu.h +5 -0
  69. package/src/llama.cpp/ggml/include/ggml-opt.h +47 -28
  70. package/src/llama.cpp/ggml/include/ggml-rpc.h +6 -1
  71. package/src/llama.cpp/ggml/include/ggml.h +76 -106
  72. package/src/llama.cpp/ggml/src/CMakeLists.txt +11 -8
  73. package/src/llama.cpp/ggml/src/ggml-alloc.c +4 -1
  74. package/src/llama.cpp/ggml/src/ggml-backend.cpp +9 -5
  75. package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +0 -2
  76. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +8 -4
  77. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +5 -5
  78. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +692 -1534
  79. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +613 -122
  80. package/src/llama.cpp/ggml/src/ggml-cann/common.h +135 -1
  81. package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +507 -137
  82. package/src/llama.cpp/ggml/src/ggml-common.h +12 -6
  83. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +66 -33
  84. package/src/llama.cpp/ggml/src/ggml-cpu/binary-ops.cpp +158 -0
  85. package/src/llama.cpp/ggml/src/ggml-cpu/binary-ops.h +16 -0
  86. package/src/llama.cpp/ggml/src/ggml-cpu/common.h +72 -0
  87. package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +1 -1
  88. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +896 -194
  89. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +2 -21
  90. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +1060 -410
  91. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +1008 -13533
  92. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +31 -16
  93. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +90 -12
  94. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +47 -13
  95. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +266 -72
  96. package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1034 -88
  97. package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +8796 -0
  98. package/src/llama.cpp/ggml/src/ggml-cpu/ops.h +110 -0
  99. package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +892 -0
  100. package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.cpp +186 -0
  101. package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.h +28 -0
  102. package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +252 -0
  103. package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +802 -0
  104. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +23 -4
  105. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +7 -0
  106. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +1 -0
  107. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +0 -4
  108. package/src/llama.cpp/ggml/src/ggml-impl.h +52 -18
  109. package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +106 -14
  110. package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +67 -119
  111. package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +1023 -262
  112. package/src/llama.cpp/ggml/src/ggml-opt.cpp +368 -190
  113. package/src/llama.cpp/ggml/src/ggml-quants.c +0 -6
  114. package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +307 -40
  115. package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +125 -45
  116. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +10 -8
  117. package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +239 -0
  118. package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.hpp +39 -0
  119. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +0 -35
  120. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +9 -307
  121. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +72 -25
  122. package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +14 -7
  123. package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +59 -21
  124. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +7 -1
  125. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +79 -90
  126. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +944 -438
  127. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +22 -23
  128. package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +37 -8
  129. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +24 -20
  130. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +1 -4
  131. package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +507 -411
  132. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +84 -74
  133. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +1 -3
  134. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +185 -89
  135. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +37 -49
  136. package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +7 -22
  137. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +4 -14
  138. package/src/llama.cpp/ggml/src/ggml-sycl/quants.hpp +83 -0
  139. package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +204 -118
  140. package/src/llama.cpp/ggml/src/ggml-sycl/rope.hpp +1 -3
  141. package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +128 -53
  142. package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +83 -49
  143. package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +1278 -282
  144. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +32 -0
  145. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +133 -30
  146. package/src/llama.cpp/ggml/src/ggml.c +170 -265
  147. package/src/llama.cpp/ggml/src/gguf.cpp +34 -33
  148. package/src/llama.cpp/include/llama.h +82 -22
  149. package/src/llama.cpp/models/ggml-vocab-llama4.gguf.inp +112 -0
  150. package/src/llama.cpp/models/ggml-vocab-llama4.gguf.out +46 -0
  151. package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +112 -0
  152. package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.out +46 -0
  153. package/src/llama.cpp/requirements/requirements-all.txt +5 -3
  154. package/src/llama.cpp/requirements/requirements-gguf_editor_gui.txt +3 -0
  155. package/src/llama.cpp/scripts/xxd.cmake +1 -1
  156. package/src/llama.cpp/src/CMakeLists.txt +4 -2
  157. package/src/llama.cpp/src/llama-adapter.cpp +43 -1
  158. package/src/llama.cpp/src/llama-arch.cpp +163 -17
  159. package/src/llama.cpp/src/llama-arch.h +16 -0
  160. package/src/llama.cpp/src/llama-batch.cpp +5 -1
  161. package/src/llama.cpp/src/llama-batch.h +2 -1
  162. package/src/llama.cpp/src/llama-chat.cpp +91 -16
  163. package/src/llama.cpp/src/llama-chat.h +7 -2
  164. package/src/llama.cpp/src/llama-context.cpp +479 -575
  165. package/src/llama.cpp/src/llama-context.h +44 -33
  166. package/src/llama.cpp/src/llama-cparams.h +1 -0
  167. package/src/llama.cpp/src/llama-graph.cpp +209 -157
  168. package/src/llama.cpp/src/llama-graph.h +38 -14
  169. package/src/llama.cpp/src/llama-hparams.h +13 -0
  170. package/src/llama.cpp/src/llama-kv-cache.cpp +1604 -543
  171. package/src/llama.cpp/src/llama-kv-cache.h +283 -171
  172. package/src/llama.cpp/src/llama-memory.h +12 -2
  173. package/src/llama.cpp/src/llama-mmap.cpp +1 -1
  174. package/src/llama.cpp/src/llama-model-loader.cpp +34 -20
  175. package/src/llama.cpp/src/llama-model-loader.h +5 -3
  176. package/src/llama.cpp/src/llama-model-saver.cpp +281 -0
  177. package/src/llama.cpp/src/llama-model-saver.h +37 -0
  178. package/src/llama.cpp/src/llama-model.cpp +1803 -330
  179. package/src/llama.cpp/src/llama-model.h +21 -2
  180. package/src/llama.cpp/src/llama-quant.cpp +33 -10
  181. package/src/llama.cpp/src/llama-sampling.cpp +25 -7
  182. package/src/llama.cpp/src/llama-vocab.cpp +86 -10
  183. package/src/llama.cpp/src/llama-vocab.h +6 -0
  184. package/src/llama.cpp/src/llama.cpp +15 -1
  185. package/src/llama.cpp/tests/CMakeLists.txt +52 -31
  186. package/src/llama.cpp/tests/test-arg-parser.cpp +51 -4
  187. package/src/llama.cpp/tests/test-backend-ops.cpp +189 -90
  188. package/src/llama.cpp/tests/test-chat-template.cpp +26 -6
  189. package/src/llama.cpp/tests/test-chat.cpp +15 -3
  190. package/src/llama.cpp/{examples/gbnf-validator/gbnf-validator.cpp → tests/test-gbnf-validator.cpp} +2 -2
  191. package/src/llama.cpp/tests/test-grammar-integration.cpp +3 -2
  192. package/src/llama.cpp/tests/test-grammar-llguidance.cpp +63 -2
  193. package/src/llama.cpp/tests/test-grammar-parser.cpp +3 -1
  194. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +17 -1
  195. package/src/llama.cpp/tests/test-llama-grammar.cpp +2 -1
  196. package/src/llama.cpp/tests/test-mtmd-c-api.c +63 -0
  197. package/src/llama.cpp/tests/test-opt.cpp +33 -21
  198. package/src/llama.cpp/{examples/quantize-stats/quantize-stats.cpp → tests/test-quantize-stats.cpp} +3 -1
  199. package/src/llama.cpp/tests/test-regex-partial.cpp +288 -0
  200. package/src/llama.cpp/tests/test-sampling.cpp +1 -1
  201. package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +2 -1
  202. package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +2 -1
  203. package/src/llama.cpp/tools/CMakeLists.txt +39 -0
  204. package/src/llama.cpp/{examples → tools}/batched-bench/batched-bench.cpp +3 -3
  205. package/src/llama.cpp/{examples → tools}/export-lora/export-lora.cpp +1 -1
  206. package/src/llama.cpp/{examples → tools}/gguf-split/gguf-split.cpp +15 -16
  207. package/src/llama.cpp/{examples → tools}/imatrix/imatrix.cpp +11 -9
  208. package/src/llama.cpp/{examples → tools}/llama-bench/llama-bench.cpp +623 -274
  209. package/src/llama.cpp/{examples → tools}/main/main.cpp +22 -14
  210. package/src/llama.cpp/tools/mtmd/CMakeLists.txt +47 -0
  211. package/src/llama.cpp/tools/mtmd/clip-impl.h +365 -0
  212. package/src/llama.cpp/tools/mtmd/clip.cpp +3646 -0
  213. package/src/llama.cpp/tools/mtmd/clip.h +99 -0
  214. package/src/llama.cpp/tools/mtmd/deprecation-warning.cpp +22 -0
  215. package/src/llama.cpp/tools/mtmd/mtmd-cli.cpp +370 -0
  216. package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +310 -0
  217. package/src/llama.cpp/tools/mtmd/mtmd.cpp +678 -0
  218. package/src/llama.cpp/tools/mtmd/mtmd.h +331 -0
  219. package/src/llama.cpp/{examples → tools}/perplexity/perplexity.cpp +21 -5
  220. package/src/llama.cpp/{examples → tools}/quantize/quantize.cpp +53 -3
  221. package/src/llama.cpp/tools/rpc/CMakeLists.txt +4 -0
  222. package/src/llama.cpp/tools/rpc/rpc-server.cpp +322 -0
  223. package/src/llama.cpp/tools/run/CMakeLists.txt +16 -0
  224. package/src/llama.cpp/{examples → tools}/run/run.cpp +30 -30
  225. package/src/llama.cpp/{examples → tools}/server/CMakeLists.txt +2 -1
  226. package/src/llama.cpp/{examples → tools}/server/httplib.h +313 -247
  227. package/src/llama.cpp/{examples → tools}/server/server.cpp +529 -215
  228. package/src/llama.cpp/{examples → tools}/server/utils.hpp +427 -6
  229. package/src/llama.cpp/{examples → tools}/tts/tts.cpp +6 -9
  230. package/src/llama.cpp/cmake/arm64-windows-msvc.cmake +0 -6
  231. package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +0 -5
  232. package/src/llama.cpp/examples/infill/CMakeLists.txt +0 -5
  233. package/src/llama.cpp/examples/infill/infill.cpp +0 -590
  234. package/src/llama.cpp/examples/llava/CMakeLists.txt +0 -66
  235. package/src/llama.cpp/examples/llava/android/build_64.sh +0 -8
  236. package/src/llama.cpp/examples/llava/clip-quantize-cli.cpp +0 -59
  237. package/src/llama.cpp/examples/llava/clip.cpp +0 -3206
  238. package/src/llama.cpp/examples/llava/clip.h +0 -118
  239. package/src/llama.cpp/examples/llava/gemma3-cli.cpp +0 -341
  240. package/src/llama.cpp/examples/llava/llava-cli.cpp +0 -332
  241. package/src/llama.cpp/examples/llava/llava.cpp +0 -574
  242. package/src/llama.cpp/examples/llava/llava.h +0 -49
  243. package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +0 -354
  244. package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +0 -584
  245. package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +0 -6
  246. package/src/llama.cpp/examples/rpc/CMakeLists.txt +0 -2
  247. package/src/llama.cpp/examples/rpc/rpc-server.cpp +0 -171
  248. package/src/llama.cpp/examples/run/CMakeLists.txt +0 -5
  249. package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +0 -30
  250. package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +0 -19
  251. package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +0 -234
  252. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +0 -197
  253. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +0 -190
  254. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +0 -204
  255. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +0 -191
  256. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +0 -218
  257. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +0 -216
  258. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +0 -295
  259. /package/src/llama.cpp/{examples → tools}/batched-bench/CMakeLists.txt +0 -0
  260. /package/src/llama.cpp/{examples → tools}/cvector-generator/CMakeLists.txt +0 -0
  261. /package/src/llama.cpp/{examples → tools}/cvector-generator/completions.txt +0 -0
  262. /package/src/llama.cpp/{examples → tools}/cvector-generator/cvector-generator.cpp +0 -0
  263. /package/src/llama.cpp/{examples → tools}/cvector-generator/mean.hpp +0 -0
  264. /package/src/llama.cpp/{examples → tools}/cvector-generator/negative.txt +0 -0
  265. /package/src/llama.cpp/{examples → tools}/cvector-generator/pca.hpp +0 -0
  266. /package/src/llama.cpp/{examples → tools}/cvector-generator/positive.txt +0 -0
  267. /package/src/llama.cpp/{examples → tools}/export-lora/CMakeLists.txt +0 -0
  268. /package/src/llama.cpp/{examples → tools}/gguf-split/CMakeLists.txt +0 -0
  269. /package/src/llama.cpp/{examples → tools}/imatrix/CMakeLists.txt +0 -0
  270. /package/src/llama.cpp/{examples → tools}/llama-bench/CMakeLists.txt +0 -0
  271. /package/src/llama.cpp/{examples → tools}/main/CMakeLists.txt +0 -0
  272. /package/src/llama.cpp/{examples/llava → tools/mtmd}/requirements.txt +0 -0
  273. /package/src/llama.cpp/{examples → tools}/perplexity/CMakeLists.txt +0 -0
  274. /package/src/llama.cpp/{examples → tools}/quantize/CMakeLists.txt +0 -0
  275. /package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.cpp +0 -0
  276. /package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.h +0 -0
  277. /package/src/llama.cpp/{examples → tools}/server/bench/requirements.txt +0 -0
  278. /package/src/llama.cpp/{examples → tools}/server/tests/requirements.txt +0 -0
  279. /package/src/llama.cpp/{examples → tools}/tokenize/CMakeLists.txt +0 -0
  280. /package/src/llama.cpp/{examples → tools}/tokenize/tokenize.cpp +0 -0
  281. /package/src/llama.cpp/{examples → tools}/tts/CMakeLists.txt +0 -0
package/CMakeLists.txt CHANGED
@@ -69,12 +69,17 @@ endif()
69
69
 
70
70
  set(LLAMA_BUILD_COMMON ON CACHE BOOL "Build common")
71
71
 
72
+ set(LLAMA_CURL OFF CACHE BOOL "Build curl")
73
+
72
74
  set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libraries")
73
75
  add_subdirectory("src/llama.cpp")
76
+ add_subdirectory("src/llama.cpp/tools/mtmd")
74
77
 
75
78
  include_directories(
76
79
  ${CMAKE_JS_INC}
77
80
  "src/llama.cpp"
81
+ "src/llama.cpp/src"
82
+ "src/tools/mtmd"
78
83
  )
79
84
 
80
85
  file(
@@ -101,7 +106,7 @@ file(
101
106
 
102
107
  add_library(${PROJECT_NAME} SHARED ${SOURCE_FILES} ${CMAKE_JS_SRC})
103
108
  set_target_properties(${PROJECT_NAME} PROPERTIES PREFIX "" SUFFIX ".node")
104
- target_link_libraries(${PROJECT_NAME} ${CMAKE_JS_LIB} llama ggml common)
109
+ target_link_libraries(${PROJECT_NAME} ${CMAKE_JS_LIB} llama ggml common mtmd ${CMAKE_THREAD_LIBS_INIT})
105
110
 
106
111
  add_custom_target(copy_assets ALL DEPENDS ${PROJECT_NAME})
107
112
 
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
package/lib/binding.ts CHANGED
@@ -1,8 +1,17 @@
1
1
  import * as path from 'path'
2
2
 
3
+
4
+ export type MessagePart = {
5
+ type: string,
6
+ text?: string,
7
+ image_url?: {
8
+ url?: string
9
+ }
10
+ }
11
+
3
12
  export type ChatMessage = {
4
13
  role: string
5
- content: string
14
+ content?: string | MessagePart[]
6
15
  }
7
16
 
8
17
  export type LlamaModelOptions = {
@@ -36,6 +45,10 @@ export type LlamaModelOptions = {
36
45
  | 'iq4_nl'
37
46
  | 'q5_0'
38
47
  | 'q5_1'
48
+ /**
49
+ * Enable context shifting to handle prompts larger than context size
50
+ */
51
+ ctx_shift?: boolean
39
52
  use_mlock?: boolean
40
53
  use_mmap?: boolean
41
54
  vocab_only?: boolean
@@ -89,6 +102,13 @@ export type LlamaCompletionOptions = {
89
102
  grammar_lazy?: boolean
90
103
  grammar_triggers?: { type: number; word: string; at_start: boolean }[]
91
104
  preserved_tokens?: string[]
105
+ /**
106
+ * Path(s) to image file(s) to process before generating text.
107
+ * When provided, the image(s) will be processed and added to the context.
108
+ * Requires multimodal support to be enabled via initMultimodal.
109
+ * Supports both file paths and base64 data URLs.
110
+ */
111
+ image_paths?: string | string[]
92
112
  }
93
113
 
94
114
  export type LlamaCompletionResult = {
@@ -96,6 +116,7 @@ export type LlamaCompletionResult = {
96
116
  tokens_predicted: number
97
117
  tokens_evaluated: number
98
118
  truncated: boolean
119
+ context_full: boolean
99
120
  timings: {
100
121
  prompt_n: number
101
122
  prompt_ms: number
@@ -149,9 +170,30 @@ export interface LlamaContext {
149
170
  applyLoraAdapters(adapters: { path: string; scaled: number }[]): void
150
171
  removeLoraAdapters(adapters: { path: string }[]): void
151
172
  getLoadedLoraAdapters(): { path: string; scaled: number }[]
173
+ /**
174
+ * Initialize multimodal support with a mmproj file
175
+ * @param mmproj_path Path to the multimodal projector file
176
+ * @returns Promise resolving to true if initialization was successful
177
+ */
178
+ initMultimodal(options: { path: string; use_gpu?: boolean }): Promise<boolean>
179
+
180
+ /**
181
+ * Check if multimodal support is enabled
182
+ * @returns Promise resolving to true if multimodal is enabled
183
+ */
184
+ isMultimodalEnabled(): Promise<boolean>
185
+
186
+ /**
187
+ * Release multimodal support
188
+ */
189
+ releaseMultimodal(): Promise<void>
190
+
152
191
  // static
153
192
  loadModelInfo(path: string, skip: string[]): Promise<Object>
154
- toggleNativeLog(enable: boolean, callback: (level: string, text: string) => void): void
193
+ toggleNativeLog(
194
+ enable: boolean,
195
+ callback: (level: string, text: string) => void,
196
+ ): void
155
197
  }
156
198
 
157
199
  export interface Module {
package/lib/index.js CHANGED
@@ -51,12 +51,143 @@ function addNativeLogListener(listener) {
51
51
  },
52
52
  };
53
53
  }
54
+ const getJsonSchema = (responseFormat) => {
55
+ var _a;
56
+ if ((responseFormat === null || responseFormat === void 0 ? void 0 : responseFormat.type) === 'json_schema') {
57
+ return (_a = responseFormat.json_schema) === null || _a === void 0 ? void 0 : _a.schema;
58
+ }
59
+ if ((responseFormat === null || responseFormat === void 0 ? void 0 : responseFormat.type) === 'json_object') {
60
+ return responseFormat.schema || {};
61
+ }
62
+ return null;
63
+ };
64
+ class LlamaContextWrapper {
65
+ constructor(nativeCtx) {
66
+ this.ctx = nativeCtx;
67
+ }
68
+ getSystemInfo() {
69
+ return this.ctx.getSystemInfo();
70
+ }
71
+ getModelInfo() {
72
+ return this.ctx.getModelInfo();
73
+ }
74
+ isJinjaSupported() {
75
+ const { minja } = this.ctx.getModelInfo().chatTemplates;
76
+ return !!(minja === null || minja === void 0 ? void 0 : minja.toolUse) || !!(minja === null || minja === void 0 ? void 0 : minja.default);
77
+ }
78
+ isLlamaChatSupported() {
79
+ return !!this.ctx.getModelInfo().chatTemplates.llamaChat;
80
+ }
81
+ _formatImageChat(messages) {
82
+ if (!messages)
83
+ return {
84
+ messages,
85
+ has_image: false,
86
+ };
87
+ const imagePaths = [];
88
+ return {
89
+ messages: messages.map((msg) => {
90
+ if (Array.isArray(msg.content)) {
91
+ const content = msg.content.map((part) => {
92
+ var _a;
93
+ // Handle multimodal content
94
+ if (part.type === 'image_url') {
95
+ let path = ((_a = part.image_url) === null || _a === void 0 ? void 0 : _a.url) || '';
96
+ imagePaths.push(path);
97
+ return {
98
+ type: 'text',
99
+ text: '<__image__>',
100
+ };
101
+ }
102
+ return part;
103
+ });
104
+ return Object.assign(Object.assign({}, msg), { content });
105
+ }
106
+ return msg;
107
+ }),
108
+ has_image: imagePaths.length > 0,
109
+ image_paths: imagePaths,
110
+ };
111
+ }
112
+ getFormattedChat(messages, template, params) {
113
+ const { messages: chat, has_image, image_paths, } = this._formatImageChat(messages);
114
+ const useJinja = this.isJinjaSupported() && (params === null || params === void 0 ? void 0 : params.jinja);
115
+ let tmpl = this.isLlamaChatSupported() || useJinja ? undefined : 'chatml';
116
+ if (template)
117
+ tmpl = template; // Force replace if provided
118
+ const jsonSchema = getJsonSchema(params === null || params === void 0 ? void 0 : params.response_format);
119
+ const result = this.ctx.getFormattedChat(chat, tmpl, {
120
+ jinja: useJinja,
121
+ json_schema: jsonSchema,
122
+ tools: params === null || params === void 0 ? void 0 : params.tools,
123
+ parallel_tool_calls: params === null || params === void 0 ? void 0 : params.parallel_tool_calls,
124
+ tool_choice: params === null || params === void 0 ? void 0 : params.tool_choice,
125
+ });
126
+ if (!useJinja) {
127
+ return {
128
+ type: 'llama-chat',
129
+ prompt: result,
130
+ has_image,
131
+ image_paths,
132
+ };
133
+ }
134
+ const jinjaResult = result;
135
+ jinjaResult.type = 'jinja';
136
+ jinjaResult.has_image = has_image;
137
+ jinjaResult.image_paths = image_paths;
138
+ return jinjaResult;
139
+ }
140
+ completion(options, callback) {
141
+ const { messages, image_paths = options.image_paths } = this._formatImageChat(options.messages);
142
+ return this.ctx.completion(Object.assign(Object.assign({}, options), { messages, image_paths: options.image_paths || image_paths }), callback || (() => { }));
143
+ }
144
+ stopCompletion() {
145
+ return this.ctx.stopCompletion();
146
+ }
147
+ tokenize(text) {
148
+ return this.ctx.tokenize(text);
149
+ }
150
+ detokenize(tokens) {
151
+ return this.ctx.detokenize(tokens);
152
+ }
153
+ embedding(text) {
154
+ return this.ctx.embedding(text);
155
+ }
156
+ saveSession(path) {
157
+ return this.ctx.saveSession(path);
158
+ }
159
+ loadSession(path) {
160
+ return this.ctx.loadSession(path);
161
+ }
162
+ release() {
163
+ return this.ctx.release();
164
+ }
165
+ applyLoraAdapters(adapters) {
166
+ return this.ctx.applyLoraAdapters(adapters);
167
+ }
168
+ removeLoraAdapters(adapters) {
169
+ return this.ctx.removeLoraAdapters(adapters);
170
+ }
171
+ getLoadedLoraAdapters() {
172
+ return this.ctx.getLoadedLoraAdapters();
173
+ }
174
+ initMultimodal(options) {
175
+ return this.ctx.initMultimodal(options);
176
+ }
177
+ isMultimodalEnabled() {
178
+ return this.ctx.isMultimodalEnabled();
179
+ }
180
+ releaseMultimodal() {
181
+ return this.ctx.releaseMultimodal();
182
+ }
183
+ }
54
184
  const loadModel = (options) => __awaiter(void 0, void 0, void 0, function* () {
55
185
  var _a, _b;
56
186
  const variant = (_a = options.lib_variant) !== null && _a !== void 0 ? _a : 'default';
57
187
  (_b = mods[variant]) !== null && _b !== void 0 ? _b : (mods[variant] = yield (0, binding_1.loadModule)(options.lib_variant));
58
188
  refreshNativeLogSetup();
59
- return new mods[variant].LlamaContext(options);
189
+ const nativeCtx = new mods[variant].LlamaContext(options);
190
+ return new LlamaContextWrapper(nativeCtx);
60
191
  });
61
192
  exports.loadModel = loadModel;
62
193
  exports.initLlama = binding_1.loadModule;
package/lib/index.ts CHANGED
@@ -1,5 +1,16 @@
1
1
  import { loadModule, LlamaModelOptions } from './binding'
2
- import type { Module, LlamaContext, LibVariant } from './binding'
2
+ import type {
3
+ Module,
4
+ LlamaContext,
5
+ LibVariant,
6
+ ChatMessage,
7
+ LlamaCompletionOptions,
8
+ LlamaCompletionToken,
9
+ LlamaCompletionResult,
10
+ TokenizeResult,
11
+ EmbeddingResult,
12
+ CompletionResponseFormat,
13
+ } from './binding'
3
14
 
4
15
  export * from './binding'
5
16
 
@@ -39,13 +50,202 @@ export function addNativeLogListener(
39
50
  }
40
51
  }
41
52
 
53
+ const getJsonSchema = (responseFormat?: CompletionResponseFormat) => {
54
+ if (responseFormat?.type === 'json_schema') {
55
+ return responseFormat.json_schema?.schema
56
+ }
57
+ if (responseFormat?.type === 'json_object') {
58
+ return responseFormat.schema || {}
59
+ }
60
+ return null
61
+ }
62
+
63
+ class LlamaContextWrapper {
64
+ ctx: any
65
+
66
+ constructor(nativeCtx: any) {
67
+ this.ctx = nativeCtx
68
+ }
69
+
70
+ getSystemInfo(): string {
71
+ return this.ctx.getSystemInfo()
72
+ }
73
+
74
+ getModelInfo(): object {
75
+ return this.ctx.getModelInfo()
76
+ }
77
+
78
+ isJinjaSupported(): boolean {
79
+ const { minja } = this.ctx.getModelInfo().chatTemplates
80
+ return !!minja?.toolUse || !!minja?.default
81
+ }
82
+
83
+ isLlamaChatSupported(): boolean {
84
+ return !!this.ctx.getModelInfo().chatTemplates.llamaChat
85
+ }
86
+
87
+ _formatImageChat(messages: ChatMessage[] | undefined): {
88
+ messages: ChatMessage[] | undefined
89
+ has_image: boolean
90
+ image_paths?: string[]
91
+ } {
92
+ if (!messages)
93
+ return {
94
+ messages,
95
+ has_image: false,
96
+ }
97
+ const imagePaths: string[] = []
98
+ return {
99
+ messages: messages.map((msg) => {
100
+ if (Array.isArray(msg.content)) {
101
+ const content = msg.content.map((part) => {
102
+ // Handle multimodal content
103
+ if (part.type === 'image_url') {
104
+ let path = part.image_url?.url || ''
105
+ imagePaths.push(path)
106
+ return {
107
+ type: 'text',
108
+ text: '<__image__>',
109
+ }
110
+ }
111
+ return part
112
+ })
113
+
114
+ return {
115
+ ...msg,
116
+ content,
117
+ }
118
+ }
119
+ return msg
120
+ }),
121
+ has_image: imagePaths.length > 0,
122
+ image_paths: imagePaths,
123
+ }
124
+ }
125
+
126
+ getFormattedChat(
127
+ messages: ChatMessage[],
128
+ template?: string,
129
+ params?: {
130
+ jinja?: boolean
131
+ response_format?: CompletionResponseFormat
132
+ tools?: object
133
+ parallel_tool_calls?: object
134
+ tool_choice?: string
135
+ },
136
+ ): object {
137
+ const {
138
+ messages: chat,
139
+ has_image,
140
+ image_paths,
141
+ } = this._formatImageChat(messages)
142
+
143
+ const useJinja = this.isJinjaSupported() && params?.jinja
144
+ let tmpl = this.isLlamaChatSupported() || useJinja ? undefined : 'chatml'
145
+ if (template) tmpl = template // Force replace if provided
146
+ const jsonSchema = getJsonSchema(params?.response_format)
147
+
148
+ const result = this.ctx.getFormattedChat(chat, tmpl, {
149
+ jinja: useJinja,
150
+ json_schema: jsonSchema,
151
+ tools: params?.tools,
152
+ parallel_tool_calls: params?.parallel_tool_calls,
153
+ tool_choice: params?.tool_choice,
154
+ })
155
+
156
+ if (!useJinja) {
157
+ return {
158
+ type: 'llama-chat',
159
+ prompt: result as string,
160
+ has_image,
161
+ image_paths,
162
+ }
163
+ }
164
+ const jinjaResult = result
165
+ jinjaResult.type = 'jinja'
166
+ jinjaResult.has_image = has_image
167
+ jinjaResult.image_paths = image_paths
168
+ return jinjaResult
169
+ }
170
+
171
+ completion(
172
+ options: LlamaCompletionOptions,
173
+ callback?: (token: LlamaCompletionToken) => void,
174
+ ): Promise<LlamaCompletionResult> {
175
+ const { messages, image_paths = options.image_paths } =
176
+ this._formatImageChat(options.messages)
177
+ return this.ctx.completion({
178
+ ...options,
179
+ messages,
180
+ image_paths: options.image_paths || image_paths,
181
+ }, callback || (() => {}))
182
+ }
183
+
184
+ stopCompletion(): void {
185
+ return this.ctx.stopCompletion()
186
+ }
187
+
188
+ tokenize(text: string): Promise<TokenizeResult> {
189
+ return this.ctx.tokenize(text)
190
+ }
191
+
192
+ detokenize(tokens: number[]): Promise<string> {
193
+ return this.ctx.detokenize(tokens)
194
+ }
195
+
196
+ embedding(text: string): Promise<EmbeddingResult> {
197
+ return this.ctx.embedding(text)
198
+ }
199
+
200
+ saveSession(path: string): Promise<void> {
201
+ return this.ctx.saveSession(path)
202
+ }
203
+
204
+ loadSession(path: string): Promise<void> {
205
+ return this.ctx.loadSession(path)
206
+ }
207
+
208
+ release(): Promise<void> {
209
+ return this.ctx.release()
210
+ }
211
+
212
+ applyLoraAdapters(adapters: { path: string; scaled: number }[]): void {
213
+ return this.ctx.applyLoraAdapters(adapters)
214
+ }
215
+
216
+ removeLoraAdapters(adapters: { path: string }[]): void {
217
+ return this.ctx.removeLoraAdapters(adapters)
218
+ }
219
+
220
+ getLoadedLoraAdapters(): { path: string; scaled: number }[] {
221
+ return this.ctx.getLoadedLoraAdapters()
222
+ }
223
+
224
+ initMultimodal(options: {
225
+ path: string
226
+ use_gpu?: boolean
227
+ }): Promise<boolean> {
228
+ return this.ctx.initMultimodal(options)
229
+ }
230
+
231
+ isMultimodalEnabled(): Promise<boolean> {
232
+ return this.ctx.isMultimodalEnabled()
233
+ }
234
+
235
+ releaseMultimodal(): Promise<void> {
236
+ return this.ctx.releaseMultimodal()
237
+ }
238
+ }
239
+
42
240
  export const loadModel = async (
43
241
  options: LlamaModelOptionsExtended,
44
- ): Promise<LlamaContext> => {
242
+ ): Promise<LlamaContextWrapper> => {
45
243
  const variant = options.lib_variant ?? 'default'
46
244
  mods[variant] ??= await loadModule(options.lib_variant)
47
245
  refreshNativeLogSetup()
48
- return new mods[variant].LlamaContext(options)
246
+
247
+ const nativeCtx = new mods[variant].LlamaContext(options)
248
+ return new LlamaContextWrapper(nativeCtx)
49
249
  }
50
250
 
51
251
  export const initLlama = loadModule
package/package.json CHANGED
@@ -1,10 +1,11 @@
1
1
  {
2
2
  "name": "@fugood/llama.node",
3
3
  "access": "public",
4
- "version": "0.3.16",
4
+ "version": "0.4.0",
5
5
  "description": "An another Node binding of llama.cpp",
6
6
  "main": "lib/index.js",
7
7
  "scripts": {
8
+ "postinstall": "node scripts/bootstrap.js",
8
9
  "test": "jest",
9
10
  "build": "tsc",
10
11
  "prepack": "yarn build",
@@ -6,7 +6,7 @@ EmbeddingWorker::EmbeddingWorker(const Napi::CallbackInfo &info,
6
6
  : AsyncWorker(info.Env()), Deferred(info.Env()), _sess(sess), _text(text), _params(params) {}
7
7
 
8
8
  void EmbeddingWorker::Execute() {
9
- llama_kv_cache_clear(_sess->context());
9
+ llama_kv_self_clear(_sess->context());
10
10
  auto tokens = ::common_tokenize(_sess->context(), _text, true);
11
11
  // add SEP if not present
12
12
  auto vocab = llama_model_get_vocab(_sess->model());