@fugood/llama.node 0.3.16 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (281) hide show
  1. package/CMakeLists.txt +6 -1
  2. package/bin/darwin/arm64/llama-node.node +0 -0
  3. package/bin/darwin/x64/llama-node.node +0 -0
  4. package/bin/linux/arm64/llama-node.node +0 -0
  5. package/bin/linux/x64/llama-node.node +0 -0
  6. package/bin/linux-cuda/arm64/llama-node.node +0 -0
  7. package/bin/linux-cuda/x64/llama-node.node +0 -0
  8. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  9. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  10. package/bin/win32/arm64/llama-node.node +0 -0
  11. package/bin/win32/arm64/node.lib +0 -0
  12. package/bin/win32/x64/llama-node.node +0 -0
  13. package/bin/win32/x64/node.lib +0 -0
  14. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  15. package/bin/win32-vulkan/arm64/node.lib +0 -0
  16. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  17. package/bin/win32-vulkan/x64/node.lib +0 -0
  18. package/lib/binding.ts +44 -2
  19. package/lib/index.js +132 -1
  20. package/lib/index.ts +203 -3
  21. package/package.json +2 -1
  22. package/src/EmbeddingWorker.cpp +1 -1
  23. package/src/LlamaCompletionWorker.cpp +374 -19
  24. package/src/LlamaCompletionWorker.h +31 -10
  25. package/src/LlamaContext.cpp +216 -7
  26. package/src/LlamaContext.h +12 -0
  27. package/src/common.hpp +15 -0
  28. package/src/llama.cpp/.github/workflows/build-linux-cross.yml +233 -0
  29. package/src/llama.cpp/.github/workflows/build.yml +89 -767
  30. package/src/llama.cpp/.github/workflows/docker.yml +9 -6
  31. package/src/llama.cpp/.github/workflows/release.yml +716 -0
  32. package/src/llama.cpp/.github/workflows/server.yml +19 -23
  33. package/src/llama.cpp/CMakeLists.txt +11 -1
  34. package/src/llama.cpp/cmake/build-info.cmake +8 -2
  35. package/src/llama.cpp/cmake/x64-windows-llvm.cmake +0 -6
  36. package/src/llama.cpp/common/CMakeLists.txt +35 -4
  37. package/src/llama.cpp/common/arg.cpp +844 -121
  38. package/src/llama.cpp/common/arg.h +9 -0
  39. package/src/llama.cpp/common/chat.cpp +129 -107
  40. package/src/llama.cpp/common/chat.h +2 -0
  41. package/src/llama.cpp/common/common.cpp +64 -518
  42. package/src/llama.cpp/common/common.h +35 -45
  43. package/src/llama.cpp/common/json-schema-to-grammar.cpp +3 -0
  44. package/src/llama.cpp/common/llguidance.cpp +31 -47
  45. package/src/llama.cpp/common/minja/chat-template.hpp +23 -11
  46. package/src/llama.cpp/common/minja/minja.hpp +186 -127
  47. package/src/llama.cpp/common/regex-partial.cpp +204 -0
  48. package/src/llama.cpp/common/regex-partial.h +56 -0
  49. package/src/llama.cpp/common/sampling.cpp +60 -50
  50. package/src/llama.cpp/docs/build.md +122 -7
  51. package/src/llama.cpp/examples/CMakeLists.txt +2 -32
  52. package/src/llama.cpp/examples/batched/batched.cpp +1 -1
  53. package/src/llama.cpp/examples/embedding/embedding.cpp +9 -12
  54. package/src/llama.cpp/examples/gritlm/gritlm.cpp +1 -1
  55. package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +1 -0
  56. package/src/llama.cpp/examples/parallel/parallel.cpp +89 -15
  57. package/src/llama.cpp/examples/passkey/passkey.cpp +1 -1
  58. package/src/llama.cpp/examples/speculative/speculative.cpp +1 -1
  59. package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +1 -1
  60. package/src/llama.cpp/examples/sycl/build.sh +2 -2
  61. package/src/llama.cpp/examples/sycl/win-build-sycl.bat +2 -2
  62. package/src/llama.cpp/examples/training/CMakeLists.txt +5 -0
  63. package/src/llama.cpp/examples/training/finetune.cpp +96 -0
  64. package/src/llama.cpp/ggml/CMakeLists.txt +35 -2
  65. package/src/llama.cpp/ggml/cmake/GitVars.cmake +22 -0
  66. package/src/llama.cpp/ggml/include/ggml-backend.h +4 -4
  67. package/src/llama.cpp/ggml/include/ggml-cpp.h +1 -1
  68. package/src/llama.cpp/ggml/include/ggml-cpu.h +5 -0
  69. package/src/llama.cpp/ggml/include/ggml-opt.h +47 -28
  70. package/src/llama.cpp/ggml/include/ggml-rpc.h +6 -1
  71. package/src/llama.cpp/ggml/include/ggml.h +76 -106
  72. package/src/llama.cpp/ggml/src/CMakeLists.txt +11 -8
  73. package/src/llama.cpp/ggml/src/ggml-alloc.c +4 -1
  74. package/src/llama.cpp/ggml/src/ggml-backend.cpp +9 -5
  75. package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +0 -2
  76. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +8 -4
  77. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +5 -5
  78. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +692 -1534
  79. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +613 -122
  80. package/src/llama.cpp/ggml/src/ggml-cann/common.h +135 -1
  81. package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +507 -137
  82. package/src/llama.cpp/ggml/src/ggml-common.h +12 -6
  83. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +66 -33
  84. package/src/llama.cpp/ggml/src/ggml-cpu/binary-ops.cpp +158 -0
  85. package/src/llama.cpp/ggml/src/ggml-cpu/binary-ops.h +16 -0
  86. package/src/llama.cpp/ggml/src/ggml-cpu/common.h +72 -0
  87. package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +1 -1
  88. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +896 -194
  89. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +2 -21
  90. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +1060 -410
  91. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +1008 -13533
  92. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +31 -16
  93. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +90 -12
  94. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +47 -13
  95. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +266 -72
  96. package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1034 -88
  97. package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +8796 -0
  98. package/src/llama.cpp/ggml/src/ggml-cpu/ops.h +110 -0
  99. package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +892 -0
  100. package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.cpp +186 -0
  101. package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.h +28 -0
  102. package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +252 -0
  103. package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +802 -0
  104. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +23 -4
  105. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +7 -0
  106. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +1 -0
  107. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +0 -4
  108. package/src/llama.cpp/ggml/src/ggml-impl.h +52 -18
  109. package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +106 -14
  110. package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +67 -119
  111. package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +1023 -262
  112. package/src/llama.cpp/ggml/src/ggml-opt.cpp +368 -190
  113. package/src/llama.cpp/ggml/src/ggml-quants.c +0 -6
  114. package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +307 -40
  115. package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +125 -45
  116. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +10 -8
  117. package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +239 -0
  118. package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.hpp +39 -0
  119. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +0 -35
  120. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +9 -307
  121. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +72 -25
  122. package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +14 -7
  123. package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +59 -21
  124. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +7 -1
  125. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +79 -90
  126. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +944 -438
  127. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +22 -23
  128. package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +37 -8
  129. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +24 -20
  130. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +1 -4
  131. package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +507 -411
  132. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +84 -74
  133. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +1 -3
  134. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +185 -89
  135. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +37 -49
  136. package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +7 -22
  137. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +4 -14
  138. package/src/llama.cpp/ggml/src/ggml-sycl/quants.hpp +83 -0
  139. package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +204 -118
  140. package/src/llama.cpp/ggml/src/ggml-sycl/rope.hpp +1 -3
  141. package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +128 -53
  142. package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +83 -49
  143. package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +1278 -282
  144. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +32 -0
  145. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +133 -30
  146. package/src/llama.cpp/ggml/src/ggml.c +170 -265
  147. package/src/llama.cpp/ggml/src/gguf.cpp +34 -33
  148. package/src/llama.cpp/include/llama.h +82 -22
  149. package/src/llama.cpp/models/ggml-vocab-llama4.gguf.inp +112 -0
  150. package/src/llama.cpp/models/ggml-vocab-llama4.gguf.out +46 -0
  151. package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +112 -0
  152. package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.out +46 -0
  153. package/src/llama.cpp/requirements/requirements-all.txt +5 -3
  154. package/src/llama.cpp/requirements/requirements-gguf_editor_gui.txt +3 -0
  155. package/src/llama.cpp/scripts/xxd.cmake +1 -1
  156. package/src/llama.cpp/src/CMakeLists.txt +4 -2
  157. package/src/llama.cpp/src/llama-adapter.cpp +43 -1
  158. package/src/llama.cpp/src/llama-arch.cpp +163 -17
  159. package/src/llama.cpp/src/llama-arch.h +16 -0
  160. package/src/llama.cpp/src/llama-batch.cpp +5 -1
  161. package/src/llama.cpp/src/llama-batch.h +2 -1
  162. package/src/llama.cpp/src/llama-chat.cpp +91 -16
  163. package/src/llama.cpp/src/llama-chat.h +7 -2
  164. package/src/llama.cpp/src/llama-context.cpp +479 -575
  165. package/src/llama.cpp/src/llama-context.h +44 -33
  166. package/src/llama.cpp/src/llama-cparams.h +1 -0
  167. package/src/llama.cpp/src/llama-graph.cpp +209 -157
  168. package/src/llama.cpp/src/llama-graph.h +38 -14
  169. package/src/llama.cpp/src/llama-hparams.h +13 -0
  170. package/src/llama.cpp/src/llama-kv-cache.cpp +1604 -543
  171. package/src/llama.cpp/src/llama-kv-cache.h +283 -171
  172. package/src/llama.cpp/src/llama-memory.h +12 -2
  173. package/src/llama.cpp/src/llama-mmap.cpp +1 -1
  174. package/src/llama.cpp/src/llama-model-loader.cpp +34 -20
  175. package/src/llama.cpp/src/llama-model-loader.h +5 -3
  176. package/src/llama.cpp/src/llama-model-saver.cpp +281 -0
  177. package/src/llama.cpp/src/llama-model-saver.h +37 -0
  178. package/src/llama.cpp/src/llama-model.cpp +1803 -330
  179. package/src/llama.cpp/src/llama-model.h +21 -2
  180. package/src/llama.cpp/src/llama-quant.cpp +33 -10
  181. package/src/llama.cpp/src/llama-sampling.cpp +25 -7
  182. package/src/llama.cpp/src/llama-vocab.cpp +86 -10
  183. package/src/llama.cpp/src/llama-vocab.h +6 -0
  184. package/src/llama.cpp/src/llama.cpp +15 -1
  185. package/src/llama.cpp/tests/CMakeLists.txt +52 -31
  186. package/src/llama.cpp/tests/test-arg-parser.cpp +51 -4
  187. package/src/llama.cpp/tests/test-backend-ops.cpp +189 -90
  188. package/src/llama.cpp/tests/test-chat-template.cpp +26 -6
  189. package/src/llama.cpp/tests/test-chat.cpp +15 -3
  190. package/src/llama.cpp/{examples/gbnf-validator/gbnf-validator.cpp → tests/test-gbnf-validator.cpp} +2 -2
  191. package/src/llama.cpp/tests/test-grammar-integration.cpp +3 -2
  192. package/src/llama.cpp/tests/test-grammar-llguidance.cpp +63 -2
  193. package/src/llama.cpp/tests/test-grammar-parser.cpp +3 -1
  194. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +17 -1
  195. package/src/llama.cpp/tests/test-llama-grammar.cpp +2 -1
  196. package/src/llama.cpp/tests/test-mtmd-c-api.c +63 -0
  197. package/src/llama.cpp/tests/test-opt.cpp +33 -21
  198. package/src/llama.cpp/{examples/quantize-stats/quantize-stats.cpp → tests/test-quantize-stats.cpp} +3 -1
  199. package/src/llama.cpp/tests/test-regex-partial.cpp +288 -0
  200. package/src/llama.cpp/tests/test-sampling.cpp +1 -1
  201. package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +2 -1
  202. package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +2 -1
  203. package/src/llama.cpp/tools/CMakeLists.txt +39 -0
  204. package/src/llama.cpp/{examples → tools}/batched-bench/batched-bench.cpp +3 -3
  205. package/src/llama.cpp/{examples → tools}/export-lora/export-lora.cpp +1 -1
  206. package/src/llama.cpp/{examples → tools}/gguf-split/gguf-split.cpp +15 -16
  207. package/src/llama.cpp/{examples → tools}/imatrix/imatrix.cpp +11 -9
  208. package/src/llama.cpp/{examples → tools}/llama-bench/llama-bench.cpp +623 -274
  209. package/src/llama.cpp/{examples → tools}/main/main.cpp +22 -14
  210. package/src/llama.cpp/tools/mtmd/CMakeLists.txt +47 -0
  211. package/src/llama.cpp/tools/mtmd/clip-impl.h +365 -0
  212. package/src/llama.cpp/tools/mtmd/clip.cpp +3646 -0
  213. package/src/llama.cpp/tools/mtmd/clip.h +99 -0
  214. package/src/llama.cpp/tools/mtmd/deprecation-warning.cpp +22 -0
  215. package/src/llama.cpp/tools/mtmd/mtmd-cli.cpp +370 -0
  216. package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +310 -0
  217. package/src/llama.cpp/tools/mtmd/mtmd.cpp +678 -0
  218. package/src/llama.cpp/tools/mtmd/mtmd.h +331 -0
  219. package/src/llama.cpp/{examples → tools}/perplexity/perplexity.cpp +21 -5
  220. package/src/llama.cpp/{examples → tools}/quantize/quantize.cpp +53 -3
  221. package/src/llama.cpp/tools/rpc/CMakeLists.txt +4 -0
  222. package/src/llama.cpp/tools/rpc/rpc-server.cpp +322 -0
  223. package/src/llama.cpp/tools/run/CMakeLists.txt +16 -0
  224. package/src/llama.cpp/{examples → tools}/run/run.cpp +30 -30
  225. package/src/llama.cpp/{examples → tools}/server/CMakeLists.txt +2 -1
  226. package/src/llama.cpp/{examples → tools}/server/httplib.h +313 -247
  227. package/src/llama.cpp/{examples → tools}/server/server.cpp +529 -215
  228. package/src/llama.cpp/{examples → tools}/server/utils.hpp +427 -6
  229. package/src/llama.cpp/{examples → tools}/tts/tts.cpp +6 -9
  230. package/src/llama.cpp/cmake/arm64-windows-msvc.cmake +0 -6
  231. package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +0 -5
  232. package/src/llama.cpp/examples/infill/CMakeLists.txt +0 -5
  233. package/src/llama.cpp/examples/infill/infill.cpp +0 -590
  234. package/src/llama.cpp/examples/llava/CMakeLists.txt +0 -66
  235. package/src/llama.cpp/examples/llava/android/build_64.sh +0 -8
  236. package/src/llama.cpp/examples/llava/clip-quantize-cli.cpp +0 -59
  237. package/src/llama.cpp/examples/llava/clip.cpp +0 -3206
  238. package/src/llama.cpp/examples/llava/clip.h +0 -118
  239. package/src/llama.cpp/examples/llava/gemma3-cli.cpp +0 -341
  240. package/src/llama.cpp/examples/llava/llava-cli.cpp +0 -332
  241. package/src/llama.cpp/examples/llava/llava.cpp +0 -574
  242. package/src/llama.cpp/examples/llava/llava.h +0 -49
  243. package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +0 -354
  244. package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +0 -584
  245. package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +0 -6
  246. package/src/llama.cpp/examples/rpc/CMakeLists.txt +0 -2
  247. package/src/llama.cpp/examples/rpc/rpc-server.cpp +0 -171
  248. package/src/llama.cpp/examples/run/CMakeLists.txt +0 -5
  249. package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +0 -30
  250. package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +0 -19
  251. package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +0 -234
  252. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +0 -197
  253. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +0 -190
  254. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +0 -204
  255. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +0 -191
  256. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +0 -218
  257. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +0 -216
  258. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +0 -295
  259. /package/src/llama.cpp/{examples → tools}/batched-bench/CMakeLists.txt +0 -0
  260. /package/src/llama.cpp/{examples → tools}/cvector-generator/CMakeLists.txt +0 -0
  261. /package/src/llama.cpp/{examples → tools}/cvector-generator/completions.txt +0 -0
  262. /package/src/llama.cpp/{examples → tools}/cvector-generator/cvector-generator.cpp +0 -0
  263. /package/src/llama.cpp/{examples → tools}/cvector-generator/mean.hpp +0 -0
  264. /package/src/llama.cpp/{examples → tools}/cvector-generator/negative.txt +0 -0
  265. /package/src/llama.cpp/{examples → tools}/cvector-generator/pca.hpp +0 -0
  266. /package/src/llama.cpp/{examples → tools}/cvector-generator/positive.txt +0 -0
  267. /package/src/llama.cpp/{examples → tools}/export-lora/CMakeLists.txt +0 -0
  268. /package/src/llama.cpp/{examples → tools}/gguf-split/CMakeLists.txt +0 -0
  269. /package/src/llama.cpp/{examples → tools}/imatrix/CMakeLists.txt +0 -0
  270. /package/src/llama.cpp/{examples → tools}/llama-bench/CMakeLists.txt +0 -0
  271. /package/src/llama.cpp/{examples → tools}/main/CMakeLists.txt +0 -0
  272. /package/src/llama.cpp/{examples/llava → tools/mtmd}/requirements.txt +0 -0
  273. /package/src/llama.cpp/{examples → tools}/perplexity/CMakeLists.txt +0 -0
  274. /package/src/llama.cpp/{examples → tools}/quantize/CMakeLists.txt +0 -0
  275. /package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.cpp +0 -0
  276. /package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.h +0 -0
  277. /package/src/llama.cpp/{examples → tools}/server/bench/requirements.txt +0 -0
  278. /package/src/llama.cpp/{examples → tools}/server/tests/requirements.txt +0 -0
  279. /package/src/llama.cpp/{examples → tools}/tokenize/CMakeLists.txt +0 -0
  280. /package/src/llama.cpp/{examples → tools}/tokenize/tokenize.cpp +0 -0
  281. /package/src/llama.cpp/{examples → tools}/tts/CMakeLists.txt +0 -0
@@ -78,3 +78,12 @@ bool common_params_parse(int argc, char ** argv, common_params & params, llama_e
78
78
 
79
79
  // function to be used by test-arg-parser
80
80
  common_params_context common_params_parser_init(common_params & params, llama_example ex, void(*print_usage)(int, char **) = nullptr);
81
+ bool common_has_curl();
82
+
83
+ struct common_remote_params {
84
+ std::vector<std::string> headers;
85
+ long timeout = 0; // CURLOPT_TIMEOUT, in seconds ; 0 means no timeout
86
+ long max_size = 0; // max size of the response ; unlimited if 0 ; max is 2GB
87
+ };
88
+ // get remote file content, returns <http_code, raw_response_body>
89
+ std::pair<long, std::vector<char>> common_remote_get_content(const std::string & url, const common_remote_params & params);
@@ -6,6 +6,15 @@
6
6
 
7
7
  #include <optional>
8
8
 
9
+ static std::string format_time(const std::chrono::system_clock::time_point & now, const std::string & format) {
10
+ auto time = std::chrono::system_clock::to_time_t(now);
11
+ auto local_time = *std::localtime(&time);
12
+ std::ostringstream ss;
13
+ ss << std::put_time(&local_time, format.c_str());
14
+ auto res = ss.str();
15
+ return res;
16
+ }
17
+
9
18
  typedef minja::chat_template common_chat_template;
10
19
 
11
20
  struct common_chat_templates {
@@ -24,6 +33,7 @@ struct templates_params {
24
33
  std::string grammar;
25
34
  bool add_generation_prompt = true;
26
35
  bool extract_reasoning = true;
36
+ std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
27
37
  };
28
38
 
29
39
  common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::string & tool_choice) {
@@ -125,7 +135,9 @@ std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const json & messa
125
135
  msgs.push_back(msg);
126
136
  }
127
137
  } catch (const std::exception & e) {
128
- throw std::runtime_error("Failed to parse messages: " + std::string(e.what()) + "; messages = " + messages.dump(2));
138
+ // @ngxson : disable otherwise it's bloating the API response
139
+ // printf("%s\n", std::string("; messages = ") + messages.dump(2));
140
+ throw std::runtime_error("Failed to parse messages: " + std::string(e.what()));
129
141
  }
130
142
 
131
143
  return msgs;
@@ -937,78 +949,83 @@ static void expect_tool_parameters(const std::string & name, const json & parame
937
949
  }
938
950
  }
939
951
 
940
- static common_chat_params common_chat_params_init_llama_3_1_tool_calls(const common_chat_template & tmpl, const struct templates_params & inputs, bool allow_python_tag_builtin_tools) {
952
+ static common_chat_params common_chat_params_init_llama_3_x(const common_chat_template & tmpl, const struct templates_params & inputs, bool allow_python_tag_builtin_tools) {
941
953
  auto builtin_tools = json::array();
942
954
  common_chat_params data;
943
- data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
944
- data.grammar = build_grammar([&](const common_grammar_builder & builder) {
945
- std::vector<std::string> tool_rules;
955
+ if (!inputs.tools.is_null()) {
956
+ data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
957
+ data.grammar = build_grammar([&](const common_grammar_builder & builder) {
958
+ std::vector<std::string> tool_rules;
946
959
 
947
- auto handle_builtin_tool = [&](const std::string & name, const json & parameters) {
948
- if (name == "wolfram_alpha" || name == "web_search" || name == "brave_search") {
949
- // https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py
950
- // https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py
951
- expect_tool_parameters(name, parameters, {"query"});
952
- } else if (name == "python" || name == "code_interpreter") {
953
- // https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/inline/tool_runtime/code_interpreter/code_interpreter.py
954
- expect_tool_parameters(name, parameters, {"code"});
955
- } else {
956
- return false;
957
- }
960
+ auto handle_builtin_tool = [&](const std::string & name, const json & parameters) {
961
+ if (name == "wolfram_alpha" || name == "web_search" || name == "brave_search") {
962
+ // https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py
963
+ // https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py
964
+ expect_tool_parameters(name, parameters, {"query"});
965
+ } else if (name == "python" || name == "code_interpreter") {
966
+ // https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/inline/tool_runtime/code_interpreter/code_interpreter.py
967
+ expect_tool_parameters(name, parameters, {"code"});
968
+ } else {
969
+ return false;
970
+ }
958
971
 
959
- std::vector<std::string> kvs;
960
- for (const auto & [key, value] : parameters.at("properties").items()) {
961
- kvs.push_back("\"" + key + "=\" " + builder.add_schema(name + "-args-" + key, value)); // NOLINT
962
- }
972
+ std::vector<std::string> kvs;
973
+ for (const auto & [key, value] : parameters.at("properties").items()) {
974
+ kvs.push_back("\"" + key + "=\" " + builder.add_schema(name + "-args-" + key, value)); // NOLINT
975
+ }
963
976
 
964
- tool_rules.push_back(
965
- builder.add_rule(
966
- name + "-call",
967
- "\"<|python_tag|>" + name + ".call(\" " + string_join(kvs, " \", \" ") + " \")\""));
968
- builtin_tools.push_back(name);
977
+ tool_rules.push_back(
978
+ builder.add_rule(
979
+ name + "-call",
980
+ "\"<|python_tag|>" + name + ".call(\" " + string_join(kvs, " \", \" ") + " \")\""));
981
+ builtin_tools.push_back(name);
969
982
 
970
- return true;
971
- };
983
+ return true;
984
+ };
972
985
 
973
- foreach_function(inputs.tools, [&](const json & tool) {
974
- const auto & function = tool.at("function");
975
- std::string name = function.at("name");
976
- auto parameters = function.at("parameters");
977
- builder.resolve_refs(parameters);
986
+ foreach_function(inputs.tools, [&](const json & tool) {
987
+ const auto & function = tool.at("function");
988
+ std::string name = function.at("name");
989
+ auto parameters = function.at("parameters");
990
+ builder.resolve_refs(parameters);
978
991
 
979
- // https://github.com/meta-llama/llama-stack/tree/main/llama_stack/providers/remote/tool_runtime
980
- if (allow_python_tag_builtin_tools) {
981
- handle_builtin_tool(name, parameters);
992
+ // https://github.com/meta-llama/llama-stack/tree/main/llama_stack/providers/remote/tool_runtime
993
+ if (allow_python_tag_builtin_tools) {
994
+ handle_builtin_tool(name, parameters);
995
+ }
996
+ tool_rules.push_back(
997
+ builder.add_rule(
998
+ name + "-call",
999
+ "\"{\" space "
1000
+ "( \"\\\"type\\\"\" space \":\" space \"\\\"function\\\"\" space \",\" space )? "
1001
+ " \"\\\"name\\\"\" space \":\" space \"\\\"" + name + "\\\"\" space \",\" space "
1002
+ " \"\\\"parameters\\\"\" space \":\" space " + builder.add_schema(name + "-args", parameters) + " "
1003
+ "\"}\" space"));
1004
+ });
1005
+ // Small models may hallucinate function names so we match anything (*at the start*) that looks like the JSON of a function call, regardless of the name.
1006
+ data.grammar_triggers.push_back({
1007
+ COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_START,
1008
+ "\\{\\s*(?:\"type\"\\s*:\\s*\"function\"\\s*,\\s*)?\"name\"\\s*:\\s*\"", // + name + "\"[\\s\\S]*",
1009
+ });
1010
+ if (!builtin_tools.empty()) {
1011
+ data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<|python_tag|>"});
1012
+ data.preserved_tokens.push_back("<|python_tag|>");
982
1013
  }
983
- tool_rules.push_back(
984
- builder.add_rule(
985
- name + "-call",
986
- "\"{\" space "
987
- "( \"\\\"type\\\"\" space \":\" space \"\\\"function\\\"\" space \",\" space )? "
988
- " \"\\\"name\\\"\" space \":\" space \"\\\"" + name + "\\\"\" space \",\" space "
989
- " \"\\\"parameters\\\"\" space \":\" space " + builder.add_schema(name + "-args", parameters) + " "
990
- "\"}\" space"));
1014
+ // Allow a few empty lines on top of the usual constrained json schema space rule.
1015
+ builder.add_rule("root", string_join(tool_rules, " | "));
1016
+ data.additional_stops.push_back("<|eom_id|>");
991
1017
  });
992
- // Small models may hallucinate function names so we match anything (*at the start*) that looks like the JSON of a function call, regardless of the name.
993
- data.grammar_triggers.push_back({
994
- COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_START,
995
- "\\{\\s*(?:\"type\"\\s*:\\s*\"function\"\\s*,\\s*)?\"name\"\\s*:\\s*\"", // + name + "\"[\\s\\S]*",
996
- });
997
- if (!builtin_tools.empty()) {
998
- data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<|python_tag|>"});
999
- data.preserved_tokens.push_back("<|python_tag|>");
1000
- }
1001
- // Allow a few empty lines on top of the usual constrained json schema space rule.
1002
- builder.add_rule("root", string_join(tool_rules, " | "));
1003
- });
1004
- data.additional_stops.push_back("<|eom_id|>");
1018
+ data.format = allow_python_tag_builtin_tools && !builtin_tools.empty()
1019
+ ? COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS
1020
+ : COMMON_CHAT_FORMAT_LLAMA_3_X;
1021
+ } else {
1022
+ data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
1023
+ }
1005
1024
  data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt, {
1025
+ {"date_string", format_time(inputs.now, "%d %b %Y")},
1006
1026
  {"tools_in_user_message", false},
1007
1027
  {"builtin_tools", builtin_tools.empty() ? json() : builtin_tools},
1008
1028
  });
1009
- data.format = allow_python_tag_builtin_tools && !builtin_tools.empty()
1010
- ? COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS
1011
- : COMMON_CHAT_FORMAT_LLAMA_3_X;
1012
1029
  return data;
1013
1030
  }
1014
1031
  static common_chat_msg common_chat_parse_llama_3_1(const std::string & input, bool with_builtin_tools = false) {
@@ -1148,7 +1165,7 @@ static common_chat_params common_chat_params_init_firefunction_v2(const common_c
1148
1165
  LOG_DBG("%s\n", __func__);
1149
1166
  common_chat_params data;
1150
1167
  data.prompt = apply(tmpl, inputs.messages, /* tools= */ nullptr, inputs.add_generation_prompt, {
1151
- {"datetime", "Jan 29 2025 13:00:00 GMT"},
1168
+ {"datetime", format_time(inputs.now, "%b %d %Y %H:%M:%S GMT")},
1152
1169
  {"functions", json(inputs.tools.empty() ? "" : inputs.tools.dump(2))},
1153
1170
  });
1154
1171
  if (inputs.tools.is_array() && !inputs.tools.empty()) {
@@ -1283,55 +1300,59 @@ static common_chat_msg common_chat_parse_functionary_v3_2(const std::string & in
1283
1300
  static common_chat_params common_chat_params_init_functionary_v3_1_llama_3_1(const common_chat_template & tmpl, const struct templates_params & inputs) {
1284
1301
  // https://github.com/MeetKai/functionary/blob/main/tests/prompt_test_v3-llama3.1.txt
1285
1302
  common_chat_params data;
1286
- json tools = inputs.tools.is_null() ? inputs.tools : json::array();
1287
- std::string python_code_argument_name;
1288
- auto has_raw_python = false;
1289
1303
 
1290
- data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
1291
- data.grammar = build_grammar([&](const common_grammar_builder & builder) {
1292
- std::vector<std::string> tool_rules;
1293
- foreach_function(inputs.tools, [&](const json & tool) {
1294
- const auto & function = tool.at("function");
1295
- const auto & parameters = function.at("parameters");
1296
- std::string name = function.at("name");
1297
- if (name == "python" || name == "ipython") {
1298
- if (!parameters.contains("type")) {
1299
- throw std::runtime_error("Missing type in python tool");
1300
- }
1301
- has_raw_python = true;
1302
- const auto & type = parameters.at("type");
1303
- if (type == "object") {
1304
- auto properties = parameters.at("properties");
1305
- for (auto it = properties.begin(); it != properties.end(); ++it) {
1306
- if (it.value().at("type") == "string") {
1307
- if (!python_code_argument_name.empty()) {
1308
- throw std::runtime_error("Multiple string arguments found in python tool");
1304
+ if (!inputs.tools.is_null()) {
1305
+ std::string python_code_argument_name;
1306
+ auto has_raw_python = false;
1307
+
1308
+ data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
1309
+ data.grammar = build_grammar([&](const common_grammar_builder & builder) {
1310
+ std::vector<std::string> tool_rules;
1311
+ foreach_function(inputs.tools, [&](const json & tool) {
1312
+ const auto & function = tool.at("function");
1313
+ const auto & parameters = function.at("parameters");
1314
+ std::string name = function.at("name");
1315
+ if (name == "python" || name == "ipython") {
1316
+ if (!parameters.contains("type")) {
1317
+ throw std::runtime_error("Missing type in python tool");
1318
+ }
1319
+ has_raw_python = true;
1320
+ const auto & type = parameters.at("type");
1321
+ if (type == "object") {
1322
+ auto properties = parameters.at("properties");
1323
+ for (auto it = properties.begin(); it != properties.end(); ++it) {
1324
+ if (it.value().at("type") == "string") {
1325
+ if (!python_code_argument_name.empty()) {
1326
+ throw std::runtime_error("Multiple string arguments found in python tool");
1327
+ }
1328
+ python_code_argument_name = it.key();
1309
1329
  }
1310
- python_code_argument_name = it.key();
1311
1330
  }
1331
+ if (python_code_argument_name.empty()) {
1332
+ throw std::runtime_error("No string argument found in python tool");
1333
+ }
1334
+ } else if (type != "string") {
1335
+ throw std::runtime_error("Invalid type in python tool: " + type.dump());
1312
1336
  }
1313
- if (python_code_argument_name.empty()) {
1314
- throw std::runtime_error("No string argument found in python tool");
1315
- }
1316
- } else if (type != "string") {
1317
- throw std::runtime_error("Invalid type in python tool: " + type.dump());
1318
1337
  }
1338
+ tool_rules.push_back(builder.add_rule(name + "-call", "\"<function=" + name + ">\" " + builder.add_schema(name + "-args", parameters) + " \"</function>\" space"));
1339
+ });
1340
+ if (has_raw_python) {
1341
+ tool_rules.push_back(builder.add_rule("python-call", "\"<|python_tag|>\" .*"));
1342
+ data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<|python_tag|>"});
1343
+ data.preserved_tokens.push_back("<|python_tag|>");
1319
1344
  }
1320
- tool_rules.push_back(builder.add_rule(name + "-call", "\"<function=" + name + ">\" " + builder.add_schema(name + "-args", parameters) + " \"</function>\" space"));
1345
+ auto tool_call = builder.add_rule("tool_call", string_join(tool_rules, " | ")) + " space";
1346
+ builder.add_rule("root", inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call);
1347
+ data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<function="});
1321
1348
  });
1322
- if (has_raw_python) {
1323
- tool_rules.push_back(builder.add_rule("python-call", "\"<|python_tag|>\" .*"));
1324
- data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<|python_tag|>"});
1325
- data.preserved_tokens.push_back("<|python_tag|>");
1326
- }
1327
- auto tool_call = builder.add_rule("tool_call", string_join(tool_rules, " | ")) + " space";
1328
- builder.add_rule("root", inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call);
1329
- data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<function="});
1330
- });
1349
+ data.format = COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1;
1350
+ } else {
1351
+ data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
1352
+ }
1331
1353
 
1332
1354
  data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt);
1333
1355
  // TODO: if (has_raw_python)
1334
- data.format = COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1;
1335
1356
  return data;
1336
1357
  }
1337
1358
  static common_chat_msg common_chat_parse_functionary_v3_1_llama_3_1(const std::string & input) {
@@ -1591,6 +1612,7 @@ static common_chat_params common_chat_templates_apply_jinja(
1591
1612
  params.extract_reasoning = inputs.extract_reasoning;
1592
1613
  params.tool_choice = inputs.tool_choice;
1593
1614
  params.grammar = inputs.grammar;
1615
+ params.now = inputs.now;
1594
1616
  if (!inputs.json_schema.empty()) {
1595
1617
  params.json_schema = json::parse(inputs.json_schema);
1596
1618
  }
@@ -1622,7 +1644,7 @@ static common_chat_params common_chat_templates_apply_jinja(
1622
1644
  }
1623
1645
 
1624
1646
  // Hermes 2/3 Pro, Qwen 2.5 Instruct (w/ tools)
1625
- if (src.find("<tool_call>") != std::string::npos && params.json_schema.is_null()) {
1647
+ if (src.find("<tool_call>") != std::string::npos && params.json_schema.is_null() && params.tools.is_array() && params.json_schema.is_null()) {
1626
1648
  return common_chat_params_init_hermes_2_pro(tmpl, params);
1627
1649
  }
1628
1650
 
@@ -1642,21 +1664,21 @@ static common_chat_params common_chat_templates_apply_jinja(
1642
1664
  return common_chat_params_init_firefunction_v2(tmpl, params);
1643
1665
  }
1644
1666
 
1645
- // Plain handler (no tools)
1646
- if (params.tools.is_null() || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE) {
1647
- return common_chat_params_init_without_tools(tmpl, params);
1648
- }
1649
-
1650
1667
  // Functionary v3.1 (w/ tools)
1651
1668
  if (src.find("<|start_header_id|>") != std::string::npos
1652
1669
  && src.find("<function=") != std::string::npos) {
1653
1670
  return common_chat_params_init_functionary_v3_1_llama_3_1(tmpl, params);
1654
1671
  }
1655
1672
 
1656
- // Llama 3.1, 3.2, 3.3 (w/ tools)
1673
+ // Llama 3.1, 3.2, 3.3 (also requires date_string so using it even w/o tools)
1657
1674
  if (src.find("<|start_header_id|>ipython<|end_header_id|>") != std::string::npos) {
1658
1675
  auto allow_python_tag_builtin_tools = src.find("<|python_tag|>") != std::string::npos;
1659
- return common_chat_params_init_llama_3_1_tool_calls(tmpl, params, allow_python_tag_builtin_tools);
1676
+ return common_chat_params_init_llama_3_x(tmpl, params, allow_python_tag_builtin_tools);
1677
+ }
1678
+
1679
+ // Plain handler (no tools)
1680
+ if (params.tools.is_null() || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE) {
1681
+ return common_chat_params_init_without_tools(tmpl, params);
1660
1682
  }
1661
1683
 
1662
1684
  // Mistral Nemo (w/ tools)
@@ -3,6 +3,7 @@
3
3
  #pragma once
4
4
 
5
5
  #include "common.h"
6
+ #include <chrono>
6
7
  #include <string>
7
8
  #include <vector>
8
9
 
@@ -71,6 +72,7 @@ struct common_chat_templates_inputs {
71
72
  common_chat_tool_choice tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO;
72
73
  bool parallel_tool_calls = false;
73
74
  bool extract_reasoning = true;
75
+ std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
74
76
  };
75
77
 
76
78
  struct common_chat_params {