@fugood/llama.node 0.3.16 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (281) hide show
  1. package/CMakeLists.txt +6 -1
  2. package/bin/darwin/arm64/llama-node.node +0 -0
  3. package/bin/darwin/x64/llama-node.node +0 -0
  4. package/bin/linux/arm64/llama-node.node +0 -0
  5. package/bin/linux/x64/llama-node.node +0 -0
  6. package/bin/linux-cuda/arm64/llama-node.node +0 -0
  7. package/bin/linux-cuda/x64/llama-node.node +0 -0
  8. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  9. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  10. package/bin/win32/arm64/llama-node.node +0 -0
  11. package/bin/win32/arm64/node.lib +0 -0
  12. package/bin/win32/x64/llama-node.node +0 -0
  13. package/bin/win32/x64/node.lib +0 -0
  14. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  15. package/bin/win32-vulkan/arm64/node.lib +0 -0
  16. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  17. package/bin/win32-vulkan/x64/node.lib +0 -0
  18. package/lib/binding.ts +44 -2
  19. package/lib/index.js +132 -1
  20. package/lib/index.ts +203 -3
  21. package/package.json +2 -1
  22. package/src/EmbeddingWorker.cpp +1 -1
  23. package/src/LlamaCompletionWorker.cpp +374 -19
  24. package/src/LlamaCompletionWorker.h +31 -10
  25. package/src/LlamaContext.cpp +216 -7
  26. package/src/LlamaContext.h +12 -0
  27. package/src/common.hpp +15 -0
  28. package/src/llama.cpp/.github/workflows/build-linux-cross.yml +233 -0
  29. package/src/llama.cpp/.github/workflows/build.yml +89 -767
  30. package/src/llama.cpp/.github/workflows/docker.yml +9 -6
  31. package/src/llama.cpp/.github/workflows/release.yml +716 -0
  32. package/src/llama.cpp/.github/workflows/server.yml +19 -23
  33. package/src/llama.cpp/CMakeLists.txt +11 -1
  34. package/src/llama.cpp/cmake/build-info.cmake +8 -2
  35. package/src/llama.cpp/cmake/x64-windows-llvm.cmake +0 -6
  36. package/src/llama.cpp/common/CMakeLists.txt +35 -4
  37. package/src/llama.cpp/common/arg.cpp +844 -121
  38. package/src/llama.cpp/common/arg.h +9 -0
  39. package/src/llama.cpp/common/chat.cpp +129 -107
  40. package/src/llama.cpp/common/chat.h +2 -0
  41. package/src/llama.cpp/common/common.cpp +64 -518
  42. package/src/llama.cpp/common/common.h +35 -45
  43. package/src/llama.cpp/common/json-schema-to-grammar.cpp +3 -0
  44. package/src/llama.cpp/common/llguidance.cpp +31 -47
  45. package/src/llama.cpp/common/minja/chat-template.hpp +23 -11
  46. package/src/llama.cpp/common/minja/minja.hpp +186 -127
  47. package/src/llama.cpp/common/regex-partial.cpp +204 -0
  48. package/src/llama.cpp/common/regex-partial.h +56 -0
  49. package/src/llama.cpp/common/sampling.cpp +60 -50
  50. package/src/llama.cpp/docs/build.md +122 -7
  51. package/src/llama.cpp/examples/CMakeLists.txt +2 -32
  52. package/src/llama.cpp/examples/batched/batched.cpp +1 -1
  53. package/src/llama.cpp/examples/embedding/embedding.cpp +9 -12
  54. package/src/llama.cpp/examples/gritlm/gritlm.cpp +1 -1
  55. package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +1 -0
  56. package/src/llama.cpp/examples/parallel/parallel.cpp +89 -15
  57. package/src/llama.cpp/examples/passkey/passkey.cpp +1 -1
  58. package/src/llama.cpp/examples/speculative/speculative.cpp +1 -1
  59. package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +1 -1
  60. package/src/llama.cpp/examples/sycl/build.sh +2 -2
  61. package/src/llama.cpp/examples/sycl/win-build-sycl.bat +2 -2
  62. package/src/llama.cpp/examples/training/CMakeLists.txt +5 -0
  63. package/src/llama.cpp/examples/training/finetune.cpp +96 -0
  64. package/src/llama.cpp/ggml/CMakeLists.txt +35 -2
  65. package/src/llama.cpp/ggml/cmake/GitVars.cmake +22 -0
  66. package/src/llama.cpp/ggml/include/ggml-backend.h +4 -4
  67. package/src/llama.cpp/ggml/include/ggml-cpp.h +1 -1
  68. package/src/llama.cpp/ggml/include/ggml-cpu.h +5 -0
  69. package/src/llama.cpp/ggml/include/ggml-opt.h +47 -28
  70. package/src/llama.cpp/ggml/include/ggml-rpc.h +6 -1
  71. package/src/llama.cpp/ggml/include/ggml.h +76 -106
  72. package/src/llama.cpp/ggml/src/CMakeLists.txt +11 -8
  73. package/src/llama.cpp/ggml/src/ggml-alloc.c +4 -1
  74. package/src/llama.cpp/ggml/src/ggml-backend.cpp +9 -5
  75. package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +0 -2
  76. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +8 -4
  77. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +5 -5
  78. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +692 -1534
  79. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +613 -122
  80. package/src/llama.cpp/ggml/src/ggml-cann/common.h +135 -1
  81. package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +507 -137
  82. package/src/llama.cpp/ggml/src/ggml-common.h +12 -6
  83. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +66 -33
  84. package/src/llama.cpp/ggml/src/ggml-cpu/binary-ops.cpp +158 -0
  85. package/src/llama.cpp/ggml/src/ggml-cpu/binary-ops.h +16 -0
  86. package/src/llama.cpp/ggml/src/ggml-cpu/common.h +72 -0
  87. package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +1 -1
  88. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +896 -194
  89. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +2 -21
  90. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +1060 -410
  91. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +1008 -13533
  92. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +31 -16
  93. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +90 -12
  94. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +47 -13
  95. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +266 -72
  96. package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1034 -88
  97. package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +8796 -0
  98. package/src/llama.cpp/ggml/src/ggml-cpu/ops.h +110 -0
  99. package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +892 -0
  100. package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.cpp +186 -0
  101. package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.h +28 -0
  102. package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +252 -0
  103. package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +802 -0
  104. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +23 -4
  105. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +7 -0
  106. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +1 -0
  107. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +0 -4
  108. package/src/llama.cpp/ggml/src/ggml-impl.h +52 -18
  109. package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +106 -14
  110. package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +67 -119
  111. package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +1023 -262
  112. package/src/llama.cpp/ggml/src/ggml-opt.cpp +368 -190
  113. package/src/llama.cpp/ggml/src/ggml-quants.c +0 -6
  114. package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +307 -40
  115. package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +125 -45
  116. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +10 -8
  117. package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +239 -0
  118. package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.hpp +39 -0
  119. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +0 -35
  120. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +9 -307
  121. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +72 -25
  122. package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +14 -7
  123. package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +59 -21
  124. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +7 -1
  125. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +79 -90
  126. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +944 -438
  127. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +22 -23
  128. package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +37 -8
  129. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +24 -20
  130. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +1 -4
  131. package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +507 -411
  132. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +84 -74
  133. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +1 -3
  134. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +185 -89
  135. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +37 -49
  136. package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +7 -22
  137. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +4 -14
  138. package/src/llama.cpp/ggml/src/ggml-sycl/quants.hpp +83 -0
  139. package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +204 -118
  140. package/src/llama.cpp/ggml/src/ggml-sycl/rope.hpp +1 -3
  141. package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +128 -53
  142. package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +83 -49
  143. package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +1278 -282
  144. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +32 -0
  145. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +133 -30
  146. package/src/llama.cpp/ggml/src/ggml.c +170 -265
  147. package/src/llama.cpp/ggml/src/gguf.cpp +34 -33
  148. package/src/llama.cpp/include/llama.h +82 -22
  149. package/src/llama.cpp/models/ggml-vocab-llama4.gguf.inp +112 -0
  150. package/src/llama.cpp/models/ggml-vocab-llama4.gguf.out +46 -0
  151. package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +112 -0
  152. package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.out +46 -0
  153. package/src/llama.cpp/requirements/requirements-all.txt +5 -3
  154. package/src/llama.cpp/requirements/requirements-gguf_editor_gui.txt +3 -0
  155. package/src/llama.cpp/scripts/xxd.cmake +1 -1
  156. package/src/llama.cpp/src/CMakeLists.txt +4 -2
  157. package/src/llama.cpp/src/llama-adapter.cpp +43 -1
  158. package/src/llama.cpp/src/llama-arch.cpp +163 -17
  159. package/src/llama.cpp/src/llama-arch.h +16 -0
  160. package/src/llama.cpp/src/llama-batch.cpp +5 -1
  161. package/src/llama.cpp/src/llama-batch.h +2 -1
  162. package/src/llama.cpp/src/llama-chat.cpp +91 -16
  163. package/src/llama.cpp/src/llama-chat.h +7 -2
  164. package/src/llama.cpp/src/llama-context.cpp +479 -575
  165. package/src/llama.cpp/src/llama-context.h +44 -33
  166. package/src/llama.cpp/src/llama-cparams.h +1 -0
  167. package/src/llama.cpp/src/llama-graph.cpp +209 -157
  168. package/src/llama.cpp/src/llama-graph.h +38 -14
  169. package/src/llama.cpp/src/llama-hparams.h +13 -0
  170. package/src/llama.cpp/src/llama-kv-cache.cpp +1604 -543
  171. package/src/llama.cpp/src/llama-kv-cache.h +283 -171
  172. package/src/llama.cpp/src/llama-memory.h +12 -2
  173. package/src/llama.cpp/src/llama-mmap.cpp +1 -1
  174. package/src/llama.cpp/src/llama-model-loader.cpp +34 -20
  175. package/src/llama.cpp/src/llama-model-loader.h +5 -3
  176. package/src/llama.cpp/src/llama-model-saver.cpp +281 -0
  177. package/src/llama.cpp/src/llama-model-saver.h +37 -0
  178. package/src/llama.cpp/src/llama-model.cpp +1803 -330
  179. package/src/llama.cpp/src/llama-model.h +21 -2
  180. package/src/llama.cpp/src/llama-quant.cpp +33 -10
  181. package/src/llama.cpp/src/llama-sampling.cpp +25 -7
  182. package/src/llama.cpp/src/llama-vocab.cpp +86 -10
  183. package/src/llama.cpp/src/llama-vocab.h +6 -0
  184. package/src/llama.cpp/src/llama.cpp +15 -1
  185. package/src/llama.cpp/tests/CMakeLists.txt +52 -31
  186. package/src/llama.cpp/tests/test-arg-parser.cpp +51 -4
  187. package/src/llama.cpp/tests/test-backend-ops.cpp +189 -90
  188. package/src/llama.cpp/tests/test-chat-template.cpp +26 -6
  189. package/src/llama.cpp/tests/test-chat.cpp +15 -3
  190. package/src/llama.cpp/{examples/gbnf-validator/gbnf-validator.cpp → tests/test-gbnf-validator.cpp} +2 -2
  191. package/src/llama.cpp/tests/test-grammar-integration.cpp +3 -2
  192. package/src/llama.cpp/tests/test-grammar-llguidance.cpp +63 -2
  193. package/src/llama.cpp/tests/test-grammar-parser.cpp +3 -1
  194. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +17 -1
  195. package/src/llama.cpp/tests/test-llama-grammar.cpp +2 -1
  196. package/src/llama.cpp/tests/test-mtmd-c-api.c +63 -0
  197. package/src/llama.cpp/tests/test-opt.cpp +33 -21
  198. package/src/llama.cpp/{examples/quantize-stats/quantize-stats.cpp → tests/test-quantize-stats.cpp} +3 -1
  199. package/src/llama.cpp/tests/test-regex-partial.cpp +288 -0
  200. package/src/llama.cpp/tests/test-sampling.cpp +1 -1
  201. package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +2 -1
  202. package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +2 -1
  203. package/src/llama.cpp/tools/CMakeLists.txt +39 -0
  204. package/src/llama.cpp/{examples → tools}/batched-bench/batched-bench.cpp +3 -3
  205. package/src/llama.cpp/{examples → tools}/export-lora/export-lora.cpp +1 -1
  206. package/src/llama.cpp/{examples → tools}/gguf-split/gguf-split.cpp +15 -16
  207. package/src/llama.cpp/{examples → tools}/imatrix/imatrix.cpp +11 -9
  208. package/src/llama.cpp/{examples → tools}/llama-bench/llama-bench.cpp +623 -274
  209. package/src/llama.cpp/{examples → tools}/main/main.cpp +22 -14
  210. package/src/llama.cpp/tools/mtmd/CMakeLists.txt +47 -0
  211. package/src/llama.cpp/tools/mtmd/clip-impl.h +365 -0
  212. package/src/llama.cpp/tools/mtmd/clip.cpp +3646 -0
  213. package/src/llama.cpp/tools/mtmd/clip.h +99 -0
  214. package/src/llama.cpp/tools/mtmd/deprecation-warning.cpp +22 -0
  215. package/src/llama.cpp/tools/mtmd/mtmd-cli.cpp +370 -0
  216. package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +310 -0
  217. package/src/llama.cpp/tools/mtmd/mtmd.cpp +678 -0
  218. package/src/llama.cpp/tools/mtmd/mtmd.h +331 -0
  219. package/src/llama.cpp/{examples → tools}/perplexity/perplexity.cpp +21 -5
  220. package/src/llama.cpp/{examples → tools}/quantize/quantize.cpp +53 -3
  221. package/src/llama.cpp/tools/rpc/CMakeLists.txt +4 -0
  222. package/src/llama.cpp/tools/rpc/rpc-server.cpp +322 -0
  223. package/src/llama.cpp/tools/run/CMakeLists.txt +16 -0
  224. package/src/llama.cpp/{examples → tools}/run/run.cpp +30 -30
  225. package/src/llama.cpp/{examples → tools}/server/CMakeLists.txt +2 -1
  226. package/src/llama.cpp/{examples → tools}/server/httplib.h +313 -247
  227. package/src/llama.cpp/{examples → tools}/server/server.cpp +529 -215
  228. package/src/llama.cpp/{examples → tools}/server/utils.hpp +427 -6
  229. package/src/llama.cpp/{examples → tools}/tts/tts.cpp +6 -9
  230. package/src/llama.cpp/cmake/arm64-windows-msvc.cmake +0 -6
  231. package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +0 -5
  232. package/src/llama.cpp/examples/infill/CMakeLists.txt +0 -5
  233. package/src/llama.cpp/examples/infill/infill.cpp +0 -590
  234. package/src/llama.cpp/examples/llava/CMakeLists.txt +0 -66
  235. package/src/llama.cpp/examples/llava/android/build_64.sh +0 -8
  236. package/src/llama.cpp/examples/llava/clip-quantize-cli.cpp +0 -59
  237. package/src/llama.cpp/examples/llava/clip.cpp +0 -3206
  238. package/src/llama.cpp/examples/llava/clip.h +0 -118
  239. package/src/llama.cpp/examples/llava/gemma3-cli.cpp +0 -341
  240. package/src/llama.cpp/examples/llava/llava-cli.cpp +0 -332
  241. package/src/llama.cpp/examples/llava/llava.cpp +0 -574
  242. package/src/llama.cpp/examples/llava/llava.h +0 -49
  243. package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +0 -354
  244. package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +0 -584
  245. package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +0 -6
  246. package/src/llama.cpp/examples/rpc/CMakeLists.txt +0 -2
  247. package/src/llama.cpp/examples/rpc/rpc-server.cpp +0 -171
  248. package/src/llama.cpp/examples/run/CMakeLists.txt +0 -5
  249. package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +0 -30
  250. package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +0 -19
  251. package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +0 -234
  252. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +0 -197
  253. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +0 -190
  254. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +0 -204
  255. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +0 -191
  256. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +0 -218
  257. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +0 -216
  258. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +0 -295
  259. /package/src/llama.cpp/{examples → tools}/batched-bench/CMakeLists.txt +0 -0
  260. /package/src/llama.cpp/{examples → tools}/cvector-generator/CMakeLists.txt +0 -0
  261. /package/src/llama.cpp/{examples → tools}/cvector-generator/completions.txt +0 -0
  262. /package/src/llama.cpp/{examples → tools}/cvector-generator/cvector-generator.cpp +0 -0
  263. /package/src/llama.cpp/{examples → tools}/cvector-generator/mean.hpp +0 -0
  264. /package/src/llama.cpp/{examples → tools}/cvector-generator/negative.txt +0 -0
  265. /package/src/llama.cpp/{examples → tools}/cvector-generator/pca.hpp +0 -0
  266. /package/src/llama.cpp/{examples → tools}/cvector-generator/positive.txt +0 -0
  267. /package/src/llama.cpp/{examples → tools}/export-lora/CMakeLists.txt +0 -0
  268. /package/src/llama.cpp/{examples → tools}/gguf-split/CMakeLists.txt +0 -0
  269. /package/src/llama.cpp/{examples → tools}/imatrix/CMakeLists.txt +0 -0
  270. /package/src/llama.cpp/{examples → tools}/llama-bench/CMakeLists.txt +0 -0
  271. /package/src/llama.cpp/{examples → tools}/main/CMakeLists.txt +0 -0
  272. /package/src/llama.cpp/{examples/llava → tools/mtmd}/requirements.txt +0 -0
  273. /package/src/llama.cpp/{examples → tools}/perplexity/CMakeLists.txt +0 -0
  274. /package/src/llama.cpp/{examples → tools}/quantize/CMakeLists.txt +0 -0
  275. /package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.cpp +0 -0
  276. /package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.h +0 -0
  277. /package/src/llama.cpp/{examples → tools}/server/bench/requirements.txt +0 -0
  278. /package/src/llama.cpp/{examples → tools}/server/tests/requirements.txt +0 -0
  279. /package/src/llama.cpp/{examples → tools}/tokenize/CMakeLists.txt +0 -0
  280. /package/src/llama.cpp/{examples → tools}/tokenize/tokenize.cpp +0 -0
  281. /package/src/llama.cpp/{examples → tools}/tts/CMakeLists.txt +0 -0
@@ -189,7 +189,7 @@ llama_ubatch llama_sbatch::split_seq(size_t n_ubatch) {
189
189
  return ubatch;
190
190
  }
191
191
 
192
- void llama_sbatch::from_batch(const llama_batch & batch, size_t n_embd, bool simple_split, bool logits_all) {
192
+ llama_sbatch::llama_sbatch(const llama_batch & batch, size_t n_embd, bool simple_split, bool logits_all) {
193
193
  GGML_ASSERT(batch.n_tokens >= 0);
194
194
  this->batch = &batch;
195
195
  this->n_embd = n_embd;
@@ -203,6 +203,7 @@ void llama_sbatch::from_batch(const llama_batch & batch, size_t n_embd, bool sim
203
203
  for (size_t i = 0; i < n_tokens; ++i) {
204
204
  ids[i] = i;
205
205
  }
206
+
206
207
  if (simple_split) {
207
208
  seq.resize(1);
208
209
  llama_sbatch_seq & s = seq[0];
@@ -212,6 +213,7 @@ void llama_sbatch::from_batch(const llama_batch & batch, size_t n_embd, bool sim
212
213
  s.length = n_tokens;
213
214
  return;
214
215
  }
216
+
215
217
  std::sort(ids.begin(), ids.end(),
216
218
  [&batch](size_t a, size_t b) {
217
219
  int32_t n_seq_a = batch.n_seq_id ? batch.n_seq_id[a] : 1;
@@ -239,6 +241,7 @@ void llama_sbatch::from_batch(const llama_batch & batch, size_t n_embd, bool sim
239
241
  return n_seq_a > n_seq_b;
240
242
  }
241
243
  );
244
+
242
245
  // init seq
243
246
  llama_sbatch_seq * last_seq = nullptr;
244
247
 
@@ -262,6 +265,7 @@ void llama_sbatch::from_batch(const llama_batch & batch, size_t n_embd, bool sim
262
265
  seq.push_back(new_seq);
263
266
  last_seq = &seq.back();
264
267
  }
268
+
265
269
  // keep shared prompts first at the end, then sort by length descending.
266
270
  std::sort(seq.begin(), seq.end(),
267
271
  [](llama_sbatch_seq & a, llama_sbatch_seq & b) {
@@ -70,7 +70,8 @@ struct llama_sbatch {
70
70
  // sequence-wise split
71
71
  llama_ubatch split_seq(size_t n_ubatch);
72
72
 
73
- void from_batch(const llama_batch & batch, size_t n_embd, bool simple_split = false, bool logits_all = false);
73
+ llama_sbatch() = default;
74
+ llama_sbatch(const llama_batch & batch, size_t n_embd, bool simple_split = false, bool logits_all = false);
74
75
  };
75
76
 
76
77
  // temporary allocate memory for the input batch if needed
@@ -35,6 +35,7 @@ static const std::map<std::string, llm_chat_template> LLM_CHAT_TEMPLATES = {
35
35
  { "mistral-v3", LLM_CHAT_TEMPLATE_MISTRAL_V3 },
36
36
  { "mistral-v3-tekken", LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN },
37
37
  { "mistral-v7", LLM_CHAT_TEMPLATE_MISTRAL_V7 },
38
+ { "mistral-v7-tekken", LLM_CHAT_TEMPLATE_MISTRAL_V7_TEKKEN },
38
39
  { "phi3", LLM_CHAT_TEMPLATE_PHI_3 },
39
40
  { "phi4", LLM_CHAT_TEMPLATE_PHI_4 },
40
41
  { "falcon3", LLM_CHAT_TEMPLATE_FALCON_3 },
@@ -50,8 +51,8 @@ static const std::map<std::string, llm_chat_template> LLM_CHAT_TEMPLATES = {
50
51
  { "deepseek3", LLM_CHAT_TEMPLATE_DEEPSEEK_3 },
51
52
  { "command-r", LLM_CHAT_TEMPLATE_COMMAND_R },
52
53
  { "llama3", LLM_CHAT_TEMPLATE_LLAMA_3 },
53
- { "chatglm3", LLM_CHAT_TEMPLATE_CHATGML_3 },
54
- { "chatglm4", LLM_CHAT_TEMPLATE_CHATGML_4 },
54
+ { "chatglm3", LLM_CHAT_TEMPLATE_CHATGLM_3 },
55
+ { "chatglm4", LLM_CHAT_TEMPLATE_CHATGLM_4 },
55
56
  { "glmedge", LLM_CHAT_TEMPLATE_GLMEDGE },
56
57
  { "minicpm", LLM_CHAT_TEMPLATE_MINICPM },
57
58
  { "exaone3", LLM_CHAT_TEMPLATE_EXAONE_3 },
@@ -59,6 +60,10 @@ static const std::map<std::string, llm_chat_template> LLM_CHAT_TEMPLATES = {
59
60
  { "granite", LLM_CHAT_TEMPLATE_GRANITE },
60
61
  { "gigachat", LLM_CHAT_TEMPLATE_GIGACHAT },
61
62
  { "megrez", LLM_CHAT_TEMPLATE_MEGREZ },
63
+ { "yandex", LLM_CHAT_TEMPLATE_YANDEX },
64
+ { "bailing", LLM_CHAT_TEMPLATE_BAILING },
65
+ { "llama4", LLM_CHAT_TEMPLATE_LLAMA4 },
66
+ { "smolvlm", LLM_CHAT_TEMPLATE_SMOLVLM },
62
67
  };
63
68
 
64
69
  llm_chat_template llm_chat_template_from_str(const std::string & name) {
@@ -78,7 +83,9 @@ llm_chat_template llm_chat_detect_template(const std::string & tmpl) {
78
83
  if (tmpl_contains("<|im_start|>")) {
79
84
  return tmpl_contains("<|im_sep|>")
80
85
  ? LLM_CHAT_TEMPLATE_PHI_4
81
- : LLM_CHAT_TEMPLATE_CHATML;
86
+ : tmpl_contains("<end_of_utterance>")
87
+ ? LLM_CHAT_TEMPLATE_SMOLVLM // SmolVLM uses <|im_start|> as BOS, but it is NOT chatml
88
+ : LLM_CHAT_TEMPLATE_CHATML;
82
89
  } else if (tmpl.find("mistral") == 0 || tmpl_contains("[INST]")) {
83
90
  if (tmpl_contains("[SYSTEM_PROMPT]")) {
84
91
  return LLM_CHAT_TEMPLATE_MISTRAL_V7;
@@ -116,8 +123,12 @@ llm_chat_template llm_chat_detect_template(const std::string & tmpl) {
116
123
  }
117
124
  } else if (tmpl_contains("<|assistant|>") && tmpl_contains("<|end|>")) {
118
125
  return LLM_CHAT_TEMPLATE_PHI_3;
126
+ } else if (tmpl_contains("[gMASK]<sop>")) {
127
+ return LLM_CHAT_TEMPLATE_CHATGLM_4;
119
128
  } else if (tmpl_contains("<|assistant|>") && tmpl_contains("<|user|>")) {
120
129
  return tmpl_contains("</s>") ? LLM_CHAT_TEMPLATE_FALCON_3 : LLM_CHAT_TEMPLATE_GLMEDGE;
130
+ } else if (tmpl_contains("<|{{ item['role'] }}|>") && tmpl_contains("<|begin_of_image|>")) {
131
+ return LLM_CHAT_TEMPLATE_GLMEDGE;
121
132
  } else if (tmpl_contains("<|user|>") && tmpl_contains("<|endoftext|>")) {
122
133
  return LLM_CHAT_TEMPLATE_ZEPHYR;
123
134
  } else if (tmpl_contains("bos_token + message['role']")) {
@@ -146,9 +157,7 @@ llm_chat_template llm_chat_detect_template(const std::string & tmpl) {
146
157
  return LLM_CHAT_TEMPLATE_LLAMA_3;
147
158
  } else if (tmpl_contains("[gMASK]sop")) {
148
159
  // chatglm3-6b
149
- return LLM_CHAT_TEMPLATE_CHATGML_3;
150
- } else if (tmpl_contains("[gMASK]<sop>")) {
151
- return LLM_CHAT_TEMPLATE_CHATGML_4;
160
+ return LLM_CHAT_TEMPLATE_CHATGLM_3;
152
161
  } else if (tmpl_contains(LU8("<用户>"))) {
153
162
  // MiniCPM-3B-OpenHermes-2.5-v2-GGUF
154
163
  return LLM_CHAT_TEMPLATE_MINICPM;
@@ -168,6 +177,12 @@ llm_chat_template llm_chat_detect_template(const std::string & tmpl) {
168
177
  return LLM_CHAT_TEMPLATE_GIGACHAT;
169
178
  } else if (tmpl_contains("<|role_start|>")) {
170
179
  return LLM_CHAT_TEMPLATE_MEGREZ;
180
+ } else if (tmpl_contains(" Ассистент:")) {
181
+ return LLM_CHAT_TEMPLATE_YANDEX;
182
+ } else if (tmpl_contains("<role>ASSISTANT</role>") && tmpl_contains("'HUMAN'")) {
183
+ return LLM_CHAT_TEMPLATE_BAILING;
184
+ } else if (tmpl_contains("<|header_start|>") && tmpl_contains("<|header_end|>")) {
185
+ return LLM_CHAT_TEMPLATE_LLAMA4;
171
186
  }
172
187
  return LLM_CHAT_TEMPLATE_UNKNOWN;
173
188
  }
@@ -188,19 +203,20 @@ int32_t llm_chat_apply_template(
188
203
  if (add_ass) {
189
204
  ss << "<|im_start|>assistant\n";
190
205
  }
191
- } else if (tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V7) {
206
+ } else if (tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V7 || tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V7_TEKKEN) {
192
207
  // Official mistral 'v7' template
193
208
  // See: https://huggingface.co/mistralai/Mistral-Large-Instruct-2411#basic-instruct-template-v7
209
+ // https://huggingface.co/mistralai/Mistral-Small-3.1-24B-Instruct-2503#basic-instruct-template-v7-tekken
210
+ const char * trailing_space = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V7 ? " " : "";
194
211
  for (auto message : chat) {
195
212
  std::string role(message->role);
196
213
  std::string content(message->content);
197
214
  if (role == "system") {
198
- ss << "[SYSTEM_PROMPT] " << content << "[/SYSTEM_PROMPT]";
215
+ ss << "[SYSTEM_PROMPT]" << trailing_space << content << "[/SYSTEM_PROMPT]";
199
216
  } else if (role == "user") {
200
- ss << "[INST] " << content << "[/INST]";
201
- }
202
- else {
203
- ss << " " << content << "</s>";
217
+ ss << "[INST]" << trailing_space << content << "[/INST]";
218
+ } else {
219
+ ss << trailing_space << content << "</s>";
204
220
  }
205
221
  }
206
222
  } else if (tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V1
@@ -423,7 +439,7 @@ int32_t llm_chat_apply_template(
423
439
  if (add_ass) {
424
440
  ss << "<|start_header_id|>assistant<|end_header_id|>\n\n";
425
441
  }
426
- } else if (tmpl == LLM_CHAT_TEMPLATE_CHATGML_3) {
442
+ } else if (tmpl == LLM_CHAT_TEMPLATE_CHATGLM_3) {
427
443
  // chatglm3-6b
428
444
  ss << "[gMASK]" << "sop";
429
445
  for (auto message : chat) {
@@ -433,14 +449,14 @@ int32_t llm_chat_apply_template(
433
449
  if (add_ass) {
434
450
  ss << "<|assistant|>";
435
451
  }
436
- } else if (tmpl == LLM_CHAT_TEMPLATE_CHATGML_4) {
452
+ } else if (tmpl == LLM_CHAT_TEMPLATE_CHATGLM_4) {
437
453
  ss << "[gMASK]" << "<sop>";
438
454
  for (auto message : chat) {
439
455
  std::string role(message->role);
440
456
  ss << "<|" << role << "|>" << "\n" << message->content;
441
457
  }
442
458
  if (add_ass) {
443
- ss << "<|assistant|>";
459
+ ss << "<|assistant|>\n";
444
460
  }
445
461
  } else if (tmpl == LLM_CHAT_TEMPLATE_GLMEDGE) {
446
462
  for (auto message : chat) {
@@ -567,6 +583,66 @@ int32_t llm_chat_apply_template(
567
583
  if (add_ass) {
568
584
  ss << "<|role_start|>assistant<|role_end|>";
569
585
  }
586
+ } else if (tmpl == LLM_CHAT_TEMPLATE_YANDEX) {
587
+ // Yandex template ("\n\n" is defined as EOT token)
588
+
589
+ ss << "<s>";
590
+
591
+ for (size_t i = 0; i < chat.size(); i++) {
592
+ std::string role(chat[i]->role);
593
+ if (role == "user") {
594
+ ss << " Пользователь: " << chat[i]->content << "\n\n";
595
+ } else if (role == "assistant") {
596
+ ss << " Ассистент: " << chat[i]->content << "\n\n";
597
+ }
598
+ }
599
+
600
+ // Add generation prompt if needed
601
+ if (add_ass) {
602
+ ss << " Ассистент:[SEP]";
603
+ }
604
+ } else if (tmpl == LLM_CHAT_TEMPLATE_BAILING) {
605
+ // Bailing (Ling) template
606
+ for (auto message : chat) {
607
+ std::string role(message->role);
608
+
609
+ if (role == "user") {
610
+ role = "HUMAN";
611
+ } else {
612
+ std::transform(role.begin(), role.end(), role.begin(), ::toupper);
613
+ }
614
+
615
+ ss << "<role>" << role << "</role>" << message->content;
616
+ }
617
+
618
+ if (add_ass) {
619
+ ss << "<role>ASSISTANT</role>";
620
+ }
621
+ } else if (tmpl == LLM_CHAT_TEMPLATE_LLAMA4) {
622
+ // Llama 4
623
+ for (auto message : chat) {
624
+ std::string role(message->role);
625
+ ss << "<|header_start|>" << role << "<|header_end|>\n\n" << trim(message->content) << "<|eot|>";
626
+ }
627
+ if (add_ass) {
628
+ ss << "<|header_start|>assistant<|header_end|>\n\n";
629
+ }
630
+ } else if (tmpl == LLM_CHAT_TEMPLATE_SMOLVLM) {
631
+ // SmolVLM
632
+ ss << "<|im_start|>"; // uses <|im_start|> as BOS, but the actual content is NOT chatml
633
+ for (auto message : chat) {
634
+ std::string role(message->role);
635
+ if (role == "system") {
636
+ ss << message->content << "\n\n";
637
+ } else if (role == "user") {
638
+ ss << "User: " << message->content << "<end_of_utterance>\n";
639
+ } else {
640
+ ss << "Assistant: " << message->content << "<end_of_utterance>\n";
641
+ }
642
+ }
643
+ if (add_ass) {
644
+ ss << "Assistant:";
645
+ }
570
646
  } else {
571
647
  // template not supported
572
648
  return -1;
@@ -585,4 +661,3 @@ int32_t llama_chat_builtin_templates(const char ** output, size_t len) {
585
661
  }
586
662
  return (int32_t) LLM_CHAT_TEMPLATES.size();
587
663
  }
588
-
@@ -14,6 +14,7 @@ enum llm_chat_template {
14
14
  LLM_CHAT_TEMPLATE_MISTRAL_V3,
15
15
  LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN,
16
16
  LLM_CHAT_TEMPLATE_MISTRAL_V7,
17
+ LLM_CHAT_TEMPLATE_MISTRAL_V7_TEKKEN,
17
18
  LLM_CHAT_TEMPLATE_PHI_3,
18
19
  LLM_CHAT_TEMPLATE_PHI_4,
19
20
  LLM_CHAT_TEMPLATE_FALCON_3,
@@ -29,8 +30,8 @@ enum llm_chat_template {
29
30
  LLM_CHAT_TEMPLATE_DEEPSEEK_3,
30
31
  LLM_CHAT_TEMPLATE_COMMAND_R,
31
32
  LLM_CHAT_TEMPLATE_LLAMA_3,
32
- LLM_CHAT_TEMPLATE_CHATGML_3,
33
- LLM_CHAT_TEMPLATE_CHATGML_4,
33
+ LLM_CHAT_TEMPLATE_CHATGLM_3,
34
+ LLM_CHAT_TEMPLATE_CHATGLM_4,
34
35
  LLM_CHAT_TEMPLATE_GLMEDGE,
35
36
  LLM_CHAT_TEMPLATE_MINICPM,
36
37
  LLM_CHAT_TEMPLATE_EXAONE_3,
@@ -38,6 +39,10 @@ enum llm_chat_template {
38
39
  LLM_CHAT_TEMPLATE_GRANITE,
39
40
  LLM_CHAT_TEMPLATE_GIGACHAT,
40
41
  LLM_CHAT_TEMPLATE_MEGREZ,
42
+ LLM_CHAT_TEMPLATE_YANDEX,
43
+ LLM_CHAT_TEMPLATE_BAILING,
44
+ LLM_CHAT_TEMPLATE_LLAMA4,
45
+ LLM_CHAT_TEMPLATE_SMOLVLM,
41
46
  LLM_CHAT_TEMPLATE_UNKNOWN,
42
47
  };
43
48