@fugood/llama.node 0.3.16 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (281) hide show
  1. package/CMakeLists.txt +6 -1
  2. package/bin/darwin/arm64/llama-node.node +0 -0
  3. package/bin/darwin/x64/llama-node.node +0 -0
  4. package/bin/linux/arm64/llama-node.node +0 -0
  5. package/bin/linux/x64/llama-node.node +0 -0
  6. package/bin/linux-cuda/arm64/llama-node.node +0 -0
  7. package/bin/linux-cuda/x64/llama-node.node +0 -0
  8. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  9. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  10. package/bin/win32/arm64/llama-node.node +0 -0
  11. package/bin/win32/arm64/node.lib +0 -0
  12. package/bin/win32/x64/llama-node.node +0 -0
  13. package/bin/win32/x64/node.lib +0 -0
  14. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  15. package/bin/win32-vulkan/arm64/node.lib +0 -0
  16. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  17. package/bin/win32-vulkan/x64/node.lib +0 -0
  18. package/lib/binding.ts +44 -2
  19. package/lib/index.js +132 -1
  20. package/lib/index.ts +203 -3
  21. package/package.json +2 -1
  22. package/src/EmbeddingWorker.cpp +1 -1
  23. package/src/LlamaCompletionWorker.cpp +374 -19
  24. package/src/LlamaCompletionWorker.h +31 -10
  25. package/src/LlamaContext.cpp +216 -7
  26. package/src/LlamaContext.h +12 -0
  27. package/src/common.hpp +15 -0
  28. package/src/llama.cpp/.github/workflows/build-linux-cross.yml +233 -0
  29. package/src/llama.cpp/.github/workflows/build.yml +89 -767
  30. package/src/llama.cpp/.github/workflows/docker.yml +9 -6
  31. package/src/llama.cpp/.github/workflows/release.yml +716 -0
  32. package/src/llama.cpp/.github/workflows/server.yml +19 -23
  33. package/src/llama.cpp/CMakeLists.txt +11 -1
  34. package/src/llama.cpp/cmake/build-info.cmake +8 -2
  35. package/src/llama.cpp/cmake/x64-windows-llvm.cmake +0 -6
  36. package/src/llama.cpp/common/CMakeLists.txt +35 -4
  37. package/src/llama.cpp/common/arg.cpp +844 -121
  38. package/src/llama.cpp/common/arg.h +9 -0
  39. package/src/llama.cpp/common/chat.cpp +129 -107
  40. package/src/llama.cpp/common/chat.h +2 -0
  41. package/src/llama.cpp/common/common.cpp +64 -518
  42. package/src/llama.cpp/common/common.h +35 -45
  43. package/src/llama.cpp/common/json-schema-to-grammar.cpp +3 -0
  44. package/src/llama.cpp/common/llguidance.cpp +31 -47
  45. package/src/llama.cpp/common/minja/chat-template.hpp +23 -11
  46. package/src/llama.cpp/common/minja/minja.hpp +186 -127
  47. package/src/llama.cpp/common/regex-partial.cpp +204 -0
  48. package/src/llama.cpp/common/regex-partial.h +56 -0
  49. package/src/llama.cpp/common/sampling.cpp +60 -50
  50. package/src/llama.cpp/docs/build.md +122 -7
  51. package/src/llama.cpp/examples/CMakeLists.txt +2 -32
  52. package/src/llama.cpp/examples/batched/batched.cpp +1 -1
  53. package/src/llama.cpp/examples/embedding/embedding.cpp +9 -12
  54. package/src/llama.cpp/examples/gritlm/gritlm.cpp +1 -1
  55. package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +1 -0
  56. package/src/llama.cpp/examples/parallel/parallel.cpp +89 -15
  57. package/src/llama.cpp/examples/passkey/passkey.cpp +1 -1
  58. package/src/llama.cpp/examples/speculative/speculative.cpp +1 -1
  59. package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +1 -1
  60. package/src/llama.cpp/examples/sycl/build.sh +2 -2
  61. package/src/llama.cpp/examples/sycl/win-build-sycl.bat +2 -2
  62. package/src/llama.cpp/examples/training/CMakeLists.txt +5 -0
  63. package/src/llama.cpp/examples/training/finetune.cpp +96 -0
  64. package/src/llama.cpp/ggml/CMakeLists.txt +35 -2
  65. package/src/llama.cpp/ggml/cmake/GitVars.cmake +22 -0
  66. package/src/llama.cpp/ggml/include/ggml-backend.h +4 -4
  67. package/src/llama.cpp/ggml/include/ggml-cpp.h +1 -1
  68. package/src/llama.cpp/ggml/include/ggml-cpu.h +5 -0
  69. package/src/llama.cpp/ggml/include/ggml-opt.h +47 -28
  70. package/src/llama.cpp/ggml/include/ggml-rpc.h +6 -1
  71. package/src/llama.cpp/ggml/include/ggml.h +76 -106
  72. package/src/llama.cpp/ggml/src/CMakeLists.txt +11 -8
  73. package/src/llama.cpp/ggml/src/ggml-alloc.c +4 -1
  74. package/src/llama.cpp/ggml/src/ggml-backend.cpp +9 -5
  75. package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +0 -2
  76. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +8 -4
  77. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +5 -5
  78. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +692 -1534
  79. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +613 -122
  80. package/src/llama.cpp/ggml/src/ggml-cann/common.h +135 -1
  81. package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +507 -137
  82. package/src/llama.cpp/ggml/src/ggml-common.h +12 -6
  83. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +66 -33
  84. package/src/llama.cpp/ggml/src/ggml-cpu/binary-ops.cpp +158 -0
  85. package/src/llama.cpp/ggml/src/ggml-cpu/binary-ops.h +16 -0
  86. package/src/llama.cpp/ggml/src/ggml-cpu/common.h +72 -0
  87. package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +1 -1
  88. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +896 -194
  89. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +2 -21
  90. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +1060 -410
  91. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +1008 -13533
  92. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +31 -16
  93. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +90 -12
  94. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +47 -13
  95. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +266 -72
  96. package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1034 -88
  97. package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +8796 -0
  98. package/src/llama.cpp/ggml/src/ggml-cpu/ops.h +110 -0
  99. package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +892 -0
  100. package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.cpp +186 -0
  101. package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.h +28 -0
  102. package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +252 -0
  103. package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +802 -0
  104. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +23 -4
  105. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +7 -0
  106. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +1 -0
  107. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +0 -4
  108. package/src/llama.cpp/ggml/src/ggml-impl.h +52 -18
  109. package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +106 -14
  110. package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +67 -119
  111. package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +1023 -262
  112. package/src/llama.cpp/ggml/src/ggml-opt.cpp +368 -190
  113. package/src/llama.cpp/ggml/src/ggml-quants.c +0 -6
  114. package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +307 -40
  115. package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +125 -45
  116. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +10 -8
  117. package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +239 -0
  118. package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.hpp +39 -0
  119. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +0 -35
  120. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +9 -307
  121. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +72 -25
  122. package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +14 -7
  123. package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +59 -21
  124. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +7 -1
  125. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +79 -90
  126. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +944 -438
  127. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +22 -23
  128. package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +37 -8
  129. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +24 -20
  130. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +1 -4
  131. package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +507 -411
  132. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +84 -74
  133. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +1 -3
  134. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +185 -89
  135. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +37 -49
  136. package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +7 -22
  137. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +4 -14
  138. package/src/llama.cpp/ggml/src/ggml-sycl/quants.hpp +83 -0
  139. package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +204 -118
  140. package/src/llama.cpp/ggml/src/ggml-sycl/rope.hpp +1 -3
  141. package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +128 -53
  142. package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +83 -49
  143. package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +1278 -282
  144. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +32 -0
  145. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +133 -30
  146. package/src/llama.cpp/ggml/src/ggml.c +170 -265
  147. package/src/llama.cpp/ggml/src/gguf.cpp +34 -33
  148. package/src/llama.cpp/include/llama.h +82 -22
  149. package/src/llama.cpp/models/ggml-vocab-llama4.gguf.inp +112 -0
  150. package/src/llama.cpp/models/ggml-vocab-llama4.gguf.out +46 -0
  151. package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +112 -0
  152. package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.out +46 -0
  153. package/src/llama.cpp/requirements/requirements-all.txt +5 -3
  154. package/src/llama.cpp/requirements/requirements-gguf_editor_gui.txt +3 -0
  155. package/src/llama.cpp/scripts/xxd.cmake +1 -1
  156. package/src/llama.cpp/src/CMakeLists.txt +4 -2
  157. package/src/llama.cpp/src/llama-adapter.cpp +43 -1
  158. package/src/llama.cpp/src/llama-arch.cpp +163 -17
  159. package/src/llama.cpp/src/llama-arch.h +16 -0
  160. package/src/llama.cpp/src/llama-batch.cpp +5 -1
  161. package/src/llama.cpp/src/llama-batch.h +2 -1
  162. package/src/llama.cpp/src/llama-chat.cpp +91 -16
  163. package/src/llama.cpp/src/llama-chat.h +7 -2
  164. package/src/llama.cpp/src/llama-context.cpp +479 -575
  165. package/src/llama.cpp/src/llama-context.h +44 -33
  166. package/src/llama.cpp/src/llama-cparams.h +1 -0
  167. package/src/llama.cpp/src/llama-graph.cpp +209 -157
  168. package/src/llama.cpp/src/llama-graph.h +38 -14
  169. package/src/llama.cpp/src/llama-hparams.h +13 -0
  170. package/src/llama.cpp/src/llama-kv-cache.cpp +1604 -543
  171. package/src/llama.cpp/src/llama-kv-cache.h +283 -171
  172. package/src/llama.cpp/src/llama-memory.h +12 -2
  173. package/src/llama.cpp/src/llama-mmap.cpp +1 -1
  174. package/src/llama.cpp/src/llama-model-loader.cpp +34 -20
  175. package/src/llama.cpp/src/llama-model-loader.h +5 -3
  176. package/src/llama.cpp/src/llama-model-saver.cpp +281 -0
  177. package/src/llama.cpp/src/llama-model-saver.h +37 -0
  178. package/src/llama.cpp/src/llama-model.cpp +1803 -330
  179. package/src/llama.cpp/src/llama-model.h +21 -2
  180. package/src/llama.cpp/src/llama-quant.cpp +33 -10
  181. package/src/llama.cpp/src/llama-sampling.cpp +25 -7
  182. package/src/llama.cpp/src/llama-vocab.cpp +86 -10
  183. package/src/llama.cpp/src/llama-vocab.h +6 -0
  184. package/src/llama.cpp/src/llama.cpp +15 -1
  185. package/src/llama.cpp/tests/CMakeLists.txt +52 -31
  186. package/src/llama.cpp/tests/test-arg-parser.cpp +51 -4
  187. package/src/llama.cpp/tests/test-backend-ops.cpp +189 -90
  188. package/src/llama.cpp/tests/test-chat-template.cpp +26 -6
  189. package/src/llama.cpp/tests/test-chat.cpp +15 -3
  190. package/src/llama.cpp/{examples/gbnf-validator/gbnf-validator.cpp → tests/test-gbnf-validator.cpp} +2 -2
  191. package/src/llama.cpp/tests/test-grammar-integration.cpp +3 -2
  192. package/src/llama.cpp/tests/test-grammar-llguidance.cpp +63 -2
  193. package/src/llama.cpp/tests/test-grammar-parser.cpp +3 -1
  194. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +17 -1
  195. package/src/llama.cpp/tests/test-llama-grammar.cpp +2 -1
  196. package/src/llama.cpp/tests/test-mtmd-c-api.c +63 -0
  197. package/src/llama.cpp/tests/test-opt.cpp +33 -21
  198. package/src/llama.cpp/{examples/quantize-stats/quantize-stats.cpp → tests/test-quantize-stats.cpp} +3 -1
  199. package/src/llama.cpp/tests/test-regex-partial.cpp +288 -0
  200. package/src/llama.cpp/tests/test-sampling.cpp +1 -1
  201. package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +2 -1
  202. package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +2 -1
  203. package/src/llama.cpp/tools/CMakeLists.txt +39 -0
  204. package/src/llama.cpp/{examples → tools}/batched-bench/batched-bench.cpp +3 -3
  205. package/src/llama.cpp/{examples → tools}/export-lora/export-lora.cpp +1 -1
  206. package/src/llama.cpp/{examples → tools}/gguf-split/gguf-split.cpp +15 -16
  207. package/src/llama.cpp/{examples → tools}/imatrix/imatrix.cpp +11 -9
  208. package/src/llama.cpp/{examples → tools}/llama-bench/llama-bench.cpp +623 -274
  209. package/src/llama.cpp/{examples → tools}/main/main.cpp +22 -14
  210. package/src/llama.cpp/tools/mtmd/CMakeLists.txt +47 -0
  211. package/src/llama.cpp/tools/mtmd/clip-impl.h +365 -0
  212. package/src/llama.cpp/tools/mtmd/clip.cpp +3646 -0
  213. package/src/llama.cpp/tools/mtmd/clip.h +99 -0
  214. package/src/llama.cpp/tools/mtmd/deprecation-warning.cpp +22 -0
  215. package/src/llama.cpp/tools/mtmd/mtmd-cli.cpp +370 -0
  216. package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +310 -0
  217. package/src/llama.cpp/tools/mtmd/mtmd.cpp +678 -0
  218. package/src/llama.cpp/tools/mtmd/mtmd.h +331 -0
  219. package/src/llama.cpp/{examples → tools}/perplexity/perplexity.cpp +21 -5
  220. package/src/llama.cpp/{examples → tools}/quantize/quantize.cpp +53 -3
  221. package/src/llama.cpp/tools/rpc/CMakeLists.txt +4 -0
  222. package/src/llama.cpp/tools/rpc/rpc-server.cpp +322 -0
  223. package/src/llama.cpp/tools/run/CMakeLists.txt +16 -0
  224. package/src/llama.cpp/{examples → tools}/run/run.cpp +30 -30
  225. package/src/llama.cpp/{examples → tools}/server/CMakeLists.txt +2 -1
  226. package/src/llama.cpp/{examples → tools}/server/httplib.h +313 -247
  227. package/src/llama.cpp/{examples → tools}/server/server.cpp +529 -215
  228. package/src/llama.cpp/{examples → tools}/server/utils.hpp +427 -6
  229. package/src/llama.cpp/{examples → tools}/tts/tts.cpp +6 -9
  230. package/src/llama.cpp/cmake/arm64-windows-msvc.cmake +0 -6
  231. package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +0 -5
  232. package/src/llama.cpp/examples/infill/CMakeLists.txt +0 -5
  233. package/src/llama.cpp/examples/infill/infill.cpp +0 -590
  234. package/src/llama.cpp/examples/llava/CMakeLists.txt +0 -66
  235. package/src/llama.cpp/examples/llava/android/build_64.sh +0 -8
  236. package/src/llama.cpp/examples/llava/clip-quantize-cli.cpp +0 -59
  237. package/src/llama.cpp/examples/llava/clip.cpp +0 -3206
  238. package/src/llama.cpp/examples/llava/clip.h +0 -118
  239. package/src/llama.cpp/examples/llava/gemma3-cli.cpp +0 -341
  240. package/src/llama.cpp/examples/llava/llava-cli.cpp +0 -332
  241. package/src/llama.cpp/examples/llava/llava.cpp +0 -574
  242. package/src/llama.cpp/examples/llava/llava.h +0 -49
  243. package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +0 -354
  244. package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +0 -584
  245. package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +0 -6
  246. package/src/llama.cpp/examples/rpc/CMakeLists.txt +0 -2
  247. package/src/llama.cpp/examples/rpc/rpc-server.cpp +0 -171
  248. package/src/llama.cpp/examples/run/CMakeLists.txt +0 -5
  249. package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +0 -30
  250. package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +0 -19
  251. package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +0 -234
  252. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +0 -197
  253. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +0 -190
  254. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +0 -204
  255. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +0 -191
  256. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +0 -218
  257. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +0 -216
  258. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +0 -295
  259. /package/src/llama.cpp/{examples → tools}/batched-bench/CMakeLists.txt +0 -0
  260. /package/src/llama.cpp/{examples → tools}/cvector-generator/CMakeLists.txt +0 -0
  261. /package/src/llama.cpp/{examples → tools}/cvector-generator/completions.txt +0 -0
  262. /package/src/llama.cpp/{examples → tools}/cvector-generator/cvector-generator.cpp +0 -0
  263. /package/src/llama.cpp/{examples → tools}/cvector-generator/mean.hpp +0 -0
  264. /package/src/llama.cpp/{examples → tools}/cvector-generator/negative.txt +0 -0
  265. /package/src/llama.cpp/{examples → tools}/cvector-generator/pca.hpp +0 -0
  266. /package/src/llama.cpp/{examples → tools}/cvector-generator/positive.txt +0 -0
  267. /package/src/llama.cpp/{examples → tools}/export-lora/CMakeLists.txt +0 -0
  268. /package/src/llama.cpp/{examples → tools}/gguf-split/CMakeLists.txt +0 -0
  269. /package/src/llama.cpp/{examples → tools}/imatrix/CMakeLists.txt +0 -0
  270. /package/src/llama.cpp/{examples → tools}/llama-bench/CMakeLists.txt +0 -0
  271. /package/src/llama.cpp/{examples → tools}/main/CMakeLists.txt +0 -0
  272. /package/src/llama.cpp/{examples/llava → tools/mtmd}/requirements.txt +0 -0
  273. /package/src/llama.cpp/{examples → tools}/perplexity/CMakeLists.txt +0 -0
  274. /package/src/llama.cpp/{examples → tools}/quantize/CMakeLists.txt +0 -0
  275. /package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.cpp +0 -0
  276. /package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.h +0 -0
  277. /package/src/llama.cpp/{examples → tools}/server/bench/requirements.txt +0 -0
  278. /package/src/llama.cpp/{examples → tools}/server/tests/requirements.txt +0 -0
  279. /package/src/llama.cpp/{examples → tools}/tokenize/CMakeLists.txt +0 -0
  280. /package/src/llama.cpp/{examples → tools}/tokenize/tokenize.cpp +0 -0
  281. /package/src/llama.cpp/{examples → tools}/tts/CMakeLists.txt +0 -0
@@ -19,6 +19,8 @@ static std::string normalize_newlines(const std::string & s) {
19
19
  #endif
20
20
  }
21
21
 
22
+ #define U8C(x) (const char*)(u8##x)
23
+
22
24
  static common_chat_msg simple_msg(const std::string & role, const std::string & content) {
23
25
  common_chat_msg msg;
24
26
  msg.role = role;
@@ -35,6 +37,8 @@ int main(void) {
35
37
  {"assistant", " I am an assistant "},
36
38
  {"user", "Another question"},
37
39
  };
40
+
41
+ // std::string wrong = /* .template_str= */ u8"[gMASK]<sop>{% for item in messages %}{% if item['tools'] is defined %}<|system|>\n你是一个名为 ChatGLM 的人工智能助手。你是基于智谱AI训练的语言模型 GLM-4 模型开发的,你的任务是针对用户的问题和要求提供适当的答复和支持。\n\n# 可用工具{% set tools = item['tools'] %}{% for tool in tools %}{% if tool['type'] == 'function' %}\n\n## {{ tool['function']['name'] }}\n\n{{ tool['function'] | tojson(indent=4) }}\n......{% endif %}{% endfor %}{% endif %}{% if item['content'] %}<|{{ item['role'] }}|>{{ item['metadata'] }}\n{{ item['content'] }}{% endif %}{% endfor %}{% if add_generation_prompt %}<|assistant|>{% endif %}";
38
42
  struct TestCase {
39
43
  std::string name;
40
44
  std::string template_str;
@@ -177,8 +181,8 @@ int main(void) {
177
181
  },
178
182
  {
179
183
  /* .name= */ "ChatGLM4",
180
- /* .template_str= */ u8"[gMASK]<sop>{% for item in messages %}{% if item['tools'] is defined %}<|system|>\n你是一个名为 ChatGLM 的人工智能助手。你是基于智谱AI训练的语言模型 GLM-4 模型开发的,你的任务是针对用户的问题和要求提供适当的答复和支持。\n\n# 可用工具{% set tools = item['tools'] %}{% for tool in tools %}{% if tool['type'] == 'function' %}\n\n## {{ tool['function']['name'] }}\n\n{{ tool['function'] | tojson(indent=4) }}\n......{% endif %}{% endfor %}{% endif %}{% if item['content'] %}<|{{ item['role'] }}|>{{ item['metadata'] }}\n{{ item['content'] }}{% endif %}{% endfor %}{% if add_generation_prompt %}<|assistant|>{% endif %}",
181
- /* .expected_output= */ "[gMASK]<sop><|system|>\nYou are a helpful assistant<|user|>\nHello<|assistant|>\nHi there<|user|>\nWho are you<|assistant|>\n I am an assistant <|user|>\nAnother question<|assistant|>",
184
+ /* .template_str= */ U8C("[gMASK]<sop>{% for item in messages %}{% if item['tools'] is defined %}<|system|>\n你是一个名为 ChatGLM 的人工智能助手。你是基于智谱AI训练的语言模型 GLM-4 模型开发的,你的任务是针对用户的问题和要求提供适当的答复和支持。\n\n# 可用工具{% set tools = item['tools'] %}{% for tool in tools %}{% if tool['type'] == 'function' %}\n\n## {{ tool['function']['name'] }}\n\n{{ tool['function'] | tojson(indent=4) }}\n......{% endif %}{% endfor %}{% endif %}{% if item['content'] %}<|{{ item['role'] }}|>{{ item['metadata'] }}\n{{ item['content'] }}{% endif %}{% endfor %}{% if add_generation_prompt %}<|assistant|>\n{% endif %}"),
185
+ /* .expected_output= */ "[gMASK]<sop><|system|>\nYou are a helpful assistant<|user|>\nHello<|assistant|>\nHi there<|user|>\nWho are you<|assistant|>\n I am an assistant <|user|>\nAnother question<|assistant|>\n",
182
186
  /* .expected_output_jinja= */ "",
183
187
  /* .bos_token= */ "",
184
188
  /* .eos_token= */ "",
@@ -193,8 +197,8 @@ int main(void) {
193
197
  },
194
198
  {
195
199
  /* .name= */ "MiniCPM-3B-OpenHermes-2.5-v2-GGUF",
196
- /* .template_str= */ u8"{% for message in messages %}{% if message['role'] == 'user' %}{{'<用户>' + message['content'].strip() + '<AI>'}}{% else %}{{message['content'].strip()}}{% endif %}{% endfor %}",
197
- /* .expected_output= */ u8"You are a helpful assistant<用户>Hello<AI>Hi there<用户>Who are you<AI>I am an assistant<用户>Another question<AI>",
200
+ /* .template_str= */ U8C("{% for message in messages %}{% if message['role'] == 'user' %}{{'<用户>' + message['content'].strip() + '<AI>'}}{% else %}{{message['content'].strip()}}{% endif %}{% endfor %}"),
201
+ /* .expected_output= */ U8C("You are a helpful assistant<用户>Hello<AI>Hi there<用户>Who are you<AI>I am an assistant<用户>Another question<AI>"),
198
202
  /* .expected_output_jinja= */ "",
199
203
  /* .bos_token= */ "",
200
204
  /* .eos_token= */ "",
@@ -202,7 +206,7 @@ int main(void) {
202
206
  {
203
207
  /* .name= */ "DeepSeek-V2",
204
208
  /* .template_str= */ "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{{ bos_token }}{% for message in messages %}{% if message['role'] == 'user' %}{{ 'User: ' + message['content'] + '\n\n' }}{% elif message['role'] == 'assistant' %}{{ 'Assistant: ' + message['content'] + eos_token }}{% elif message['role'] == 'system' %}{{ message['content'] + '\n\n' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ 'Assistant:' }}{% endif %}",
205
- /* .expected_output= */ u8"You are a helpful assistant\n\nUser: Hello\n\nAssistant: Hi there<|end▁of▁sentence|>User: Who are you\n\nAssistant: I am an assistant <|end▁of▁sentence|>User: Another question\n\nAssistant:",
209
+ /* .expected_output= */ U8C("You are a helpful assistant\n\nUser: Hello\n\nAssistant: Hi there<|end▁of▁sentence|>User: Who are you\n\nAssistant: I am an assistant <|end▁of▁sentence|>User: Another question\n\nAssistant:"),
206
210
  /* .expected_output_jinja= */ "",
207
211
  /* .bos_token= */ "",
208
212
  /* .eos_token= */ "<|end▁of▁sentence|>",
@@ -256,7 +260,7 @@ int main(void) {
256
260
  },
257
261
  {
258
262
  /* .name= */ "Infinigence/Megrez-3B-Instruct",
259
- /* .template_str= */ u8"{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|role_start|>system<|role_end|>你是Megrez-3B-Instruct,将针对用户的问题给出详细的、积极的回答。<|turn_end|>' }}{% endif %}{{ '<|role_start|>' + message['role'] + '<|role_end|>' + message['content'] + '<|turn_end|>' }}{% endfor %}{% if add_generation_prompt %}{{ '<|role_start|>assistant<|role_end|>' }}{% endif %}",
263
+ /* .template_str= */ U8C("{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|role_start|>system<|role_end|>你是Megrez-3B-Instruct,将针对用户的问题给出详细的、积极的回答。<|turn_end|>' }}{% endif %}{{ '<|role_start|>' + message['role'] + '<|role_end|>' + message['content'] + '<|turn_end|>' }}{% endfor %}{% if add_generation_prompt %}{{ '<|role_start|>assistant<|role_end|>' }}{% endif %}"),
260
264
  /* .expected_output= */ "<|role_start|>system<|role_end|>You are a helpful assistant<|turn_end|><|role_start|>user<|role_end|>Hello<|turn_end|><|role_start|>assistant<|role_end|>Hi there<|turn_end|><|role_start|>user<|role_end|>Who are you<|turn_end|><|role_start|>assistant<|role_end|> I am an assistant <|turn_end|><|role_start|>user<|role_end|>Another question<|turn_end|><|role_start|>assistant<|role_end|>",
261
265
  /* .expected_output_jinja= */ "",
262
266
  /* .bos_token= */ "",
@@ -270,6 +274,22 @@ int main(void) {
270
274
  /* .bos_token= */ "",
271
275
  /* .eos_token= */ "",
272
276
  },
277
+ {
278
+ /* .name= */ "yandex/YandexGPT-5-Lite-8B-instruct",
279
+ /* .template_str= */ "<s>{%- set names = {'assistant': ' Ассистент:', 'user': ' Пользователь:'} %}\n{%- set tools_prefix = 'Тебе доступны следующие функции:' %}\n{%- macro __render_tool(tool) %}\n {%- set name = tool.function.name %}\n {%- set description = tool.function.description|default('') %}\n {%- set parameters = tool.function.parameters|tojson %}\n {{- '\\n' }}function {{ '{' }}'name':'{{ name }}',\n {%- if tool.function.description %}'description':'{{ description }}',{% endif %}\n'parameters':{{ parameters }}\n {{- '}' }}\n{%- endmacro %}\n{%- macro __render_tools(tools) %}\n {{- tools_prefix }}\n {%- for tool in tools %}\n {{- __render_tool(tool) }}\n {%- endfor %}\n {{- '\\n\\n' }}\n{%- endmacro %}\n{%- macro __render_tool_message(message) %}\n {{- '\\n\\nРезультат вызова' }} {{ message.name }}: {{ message.content }} {{ '\\n\\n' }}\n{%- endmacro %}\n{%- if tools -%}\n {{- __render_tools(tools) }}\n{%- endif -%}\n{%- macro __render_user_message(message) %}\n{{ names.user }} {{ message.content + '\\n\\n' }}\n{%- endmacro %}\n{%- macro __render_assistant_message(message) %}\n {{- names.assistant }}\n {%- set call = message['function_call'] %}\n {%- if call %}\n {{- '\\n[TOOL_CALL_START]' }}{{ call.name }}{{ '\\n' }}{{ call.arguments|tojson }}\n {%- else %}\n {{- ' ' + message.content + '\\n\\n' }}\n {%- endif %}\n{%- endmacro %}\n{%- if not add_generation_prompt is defined %}\n{%- set add_generation_prompt = false %}\n{%- endif %}\n{%- for message in messages %}\n {%- if message['role'] == 'user' %}\n {{- __render_user_message(message) }}\n {%- endif %}\n {%- if message.role == 'assistant' and not loop.last %}\n {{- __render_assistant_message(message) }}\n {%- endif %}\n {%- if message.role == 'tool' %}\n {{- __render_tool_message(message) }}\n {%- endif %}\n {%- if loop.last %}\n {{- ' Ассистент:[SEP]' }}\n {%- endif %}\n{%- endfor %}\n",
280
+ /* .expected_output= */ "<s> Пользователь: Hello\n\n Ассистент: Hi there\n\n Пользователь: Who are you\n\n Ассистент: I am an assistant \n\n Пользователь: Another question\n\n Ассистент:[SEP]",
281
+ /* .expected_output_jinja= */ "<s> Пользователь: You are a helpful assistant\nHello\n\n Ассистент: Hi there\n\n Пользователь: Who are you\n\n Ассистент: I am an assistant \n\n Пользователь: Another question\n\n Ассистент:[SEP]",
282
+ /* .bos_token= */ "",
283
+ /* .eos_token= */ "",
284
+ },
285
+ {
286
+ /* .name= */ "inclusionAI/Ling-lite",
287
+ /* .template_str */ "{% for message in messages %}{% set role = message['role'] | lower %}{% if role == 'user' %}{% set role = 'HUMAN' %}{% endif %}{% set role = role | upper %}{{ '<role>' + role + '</role>' + message['content'] }}{% endfor %}{% if add_generation_prompt %}{{ '<role>ASSISTANT</role>' }}{% endif %}",
288
+ /* .expected_output= */ "<role>SYSTEM</role>You are a helpful assistant<role>HUMAN</role>Hello<role>ASSISTANT</role>Hi there<role>HUMAN</role>Who are you<role>ASSISTANT</role> I am an assistant <role>HUMAN</role>Another question<role>ASSISTANT</role>",
289
+ /* .expected_output_jinja= */ "",
290
+ /* .bos_token= */ "",
291
+ /* .eos_token= */ "",
292
+ },
273
293
  };
274
294
  std::vector<char> formatted_chat(1024);
275
295
  int32_t res;
@@ -11,8 +11,9 @@
11
11
  #include <string>
12
12
 
13
13
  #include "chat.h"
14
- #include "llama-grammar.h"
15
- #include "unicode.h"
14
+
15
+ #include "../src/unicode.h"
16
+ #include "../src/llama-grammar.h"
16
17
 
17
18
  using json = nlohmann::ordered_json;
18
19
 
@@ -569,6 +570,7 @@ static void test_template_output_parsers() {
569
570
  {
570
571
  // Not supported yet
571
572
  auto tmpls = read_templates("models/templates/CohereForAI-c4ai-command-r-plus-tool_use.jinja");
573
+ assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
572
574
  assert_equals(COMMON_CHAT_FORMAT_GENERIC, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
573
575
  }
574
576
  {
@@ -665,6 +667,7 @@ static void test_template_output_parsers() {
665
667
  auto tmpls = read_templates("models/templates/NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja");
666
668
  std::vector<std::string> end_tokens{ "<|im_end|>" };
667
669
 
670
+ assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
668
671
  assert_equals(COMMON_CHAT_FORMAT_HERMES_2_PRO, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
669
672
  assert_equals(
670
673
  COMMON_CHAT_FORMAT_HERMES_2_PRO,
@@ -793,6 +796,7 @@ static void test_template_output_parsers() {
793
796
  auto tmpls = read_templates("models/templates/meta-llama-Llama-3.1-8B-Instruct.jinja");
794
797
  std::vector<std::string> end_tokens{ "<|eom_id|>", "<|eot_id|>" };
795
798
 
799
+ assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
796
800
  assert_equals(COMMON_CHAT_FORMAT_LLAMA_3_X, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
797
801
  assert_equals(COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS,
798
802
  common_chat_templates_apply(tmpls.get(), inputs_tools_builtin).format);
@@ -815,6 +819,7 @@ static void test_template_output_parsers() {
815
819
  std::vector<std::string> end_tokens{ "<|eom_id|>", "<|eot_id|>" };
816
820
 
817
821
  assert_equals(COMMON_CHAT_FORMAT_LLAMA_3_X, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
822
+ assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
818
823
 
819
824
  test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
820
825
  test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
@@ -824,8 +829,12 @@ static void test_template_output_parsers() {
824
829
  auto tmpls = read_templates("models/templates/meetkai-functionary-medium-v3.1.jinja");
825
830
  std::vector<std::string> end_tokens{ "<|eom_id|>", "<|eot_id|>" };
826
831
 
832
+ assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY,
833
+ common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
827
834
  assert_equals(COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1,
828
- common_chat_templates_apply(tmpls.get(), inputs_tools).format);
835
+ common_chat_templates_apply(tmpls.get(), inputs_tools).format);
836
+ assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY,
837
+ common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
829
838
 
830
839
  test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
831
840
  test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
@@ -851,6 +860,7 @@ static void test_template_output_parsers() {
851
860
  auto tmpls = read_templates("models/templates/fireworks-ai-llama-3-firefunction-v2.jinja");
852
861
  std::vector<std::string> end_tokens{ "<|eot_id|>" };
853
862
 
863
+ assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
854
864
  assert_equals(COMMON_CHAT_FORMAT_FIREFUNCTION_V2, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
855
865
 
856
866
  test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
@@ -862,6 +872,7 @@ static void test_template_output_parsers() {
862
872
  auto tmpls = read_templates("models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja");
863
873
  std::vector<std::string> end_tokens{ "<|end▁of▁sentence|>" };
864
874
 
875
+ assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_R1, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
865
876
  assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_R1, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
866
877
  assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_R1_EXTRACT_REASONING, common_chat_templates_apply(tmpls.get(), inputs_tools_think).format);
867
878
 
@@ -891,6 +902,7 @@ static void test_template_output_parsers() {
891
902
  auto tmpls = read_templates("models/templates/llama-cpp-deepseek-r1.jinja");
892
903
  std::vector<std::string> end_tokens{ "<|end▁of▁sentence|>" };
893
904
 
905
+ assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_R1, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
894
906
  assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_R1, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
895
907
  assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_R1_EXTRACT_REASONING, common_chat_templates_apply(tmpls.get(), inputs_tools_think).format);
896
908
 
@@ -1,5 +1,5 @@
1
- #include "unicode.h"
2
- #include "llama-grammar.h"
1
+ #include "../src/unicode.h"
2
+ #include "../src/llama-grammar.h"
3
3
 
4
4
  #include <cstdio>
5
5
  #include <cstdlib>
@@ -2,10 +2,11 @@
2
2
  #undef NDEBUG
3
3
  #endif
4
4
 
5
- #include "unicode.h"
6
- #include "llama-grammar.h"
7
5
  #include "json-schema-to-grammar.h"
8
6
 
7
+ #include "../src/unicode.h"
8
+ #include "../src/llama-grammar.h"
9
+
9
10
  #include <cassert>
10
11
  #include <string>
11
12
  #include <vector>
@@ -2,7 +2,6 @@
2
2
  # undef NDEBUG
3
3
  #endif
4
4
 
5
- #include "unicode.h"
6
5
  #include "sampling.h"
7
6
 
8
7
  #include <cassert>
@@ -84,7 +83,7 @@ static void test(const std::string & test_desc, const std::string & grammar_str,
84
83
 
85
84
  fprintf(stderr,
86
85
  "\n NOTE: Debug grammar file generated. To analyze this failure in detail, run the following "
87
- "command: ./llama-gbnf-validator test-grammar-integration.grammar.gbnf "
86
+ "command: ./test-gbnf-validator test-grammar-integration.grammar.gbnf "
88
87
  "test-grammar-integration.string.txt\n\n");
89
88
  } else {
90
89
  fprintf(stdout, "✅︎\n");
@@ -1086,6 +1085,65 @@ static void test_json_schema() {
1086
1085
  });
1087
1086
  }
1088
1087
 
1088
+ static void one_hot(llama_token_data_array & tok_arr, llama_token selected) {
1089
+ auto n_vocab = tok_arr.size;
1090
+
1091
+ tok_arr.selected = -1;
1092
+ tok_arr.sorted = false;
1093
+ for (llama_token token_id = 0; token_id < (llama_token) n_vocab; token_id++) {
1094
+ tok_arr.data[token_id].id = token_id;
1095
+ tok_arr.data[token_id].logit = 0.0f;
1096
+ }
1097
+
1098
+ tok_arr.data[selected].logit = 100.0f;
1099
+ }
1100
+
1101
+ static void test_sampler_chain(void) {
1102
+ auto sparams = llama_sampler_chain_default_params();
1103
+ sparams.no_perf = false;
1104
+ llama_sampler * sampler = llama_sampler_chain_init(sparams);
1105
+
1106
+ const auto grammar_data = R"(%llguidance {}
1107
+ start: /[A-Z ]*/)";
1108
+
1109
+ llama_sampler_chain_add(sampler, llama_sampler_init_llg(vocab, "lark", grammar_data));
1110
+ llama_sampler_chain_add(sampler, llama_sampler_init_dist(42));
1111
+
1112
+ auto input = "ALL YOUR BASE ARE BELONG TO US";
1113
+ auto tokens = common_tokenize(vocab, input, false, false);
1114
+
1115
+ auto n_vocab = llama_vocab_n_tokens(vocab);
1116
+
1117
+ std::vector<llama_token_data> cur;
1118
+ cur.reserve(n_vocab);
1119
+ for (llama_token token_id = 0; token_id < (llama_token) n_vocab; token_id++) {
1120
+ cur.emplace_back(llama_token_data{ token_id, 0.0f, 0.0f });
1121
+ }
1122
+ auto tok_arr = llama_token_data_array{ cur.data(), cur.size(), -1, false };
1123
+
1124
+ for (const auto token : tokens) {
1125
+ one_hot(tok_arr, token);
1126
+
1127
+ fprintf(stderr, "applying token: %d\n", token);
1128
+ llama_sampler_apply(sampler, &tok_arr);
1129
+
1130
+ auto idx = tok_arr.selected;
1131
+ fprintf(stderr, " -> %d %f\n", cur[idx].id, cur[idx].logit);
1132
+ assert(cur[tok_arr.selected].id == token);
1133
+ llama_sampler_accept(sampler, token);
1134
+ }
1135
+
1136
+ auto tok_eos = llama_vocab_eot(vocab);
1137
+ if (tok_eos == LLAMA_TOKEN_NULL) {
1138
+ tok_eos = llama_vocab_eos(vocab);
1139
+ }
1140
+
1141
+ one_hot(tok_arr, tok_eos);
1142
+
1143
+ llama_sampler_apply(sampler, &tok_arr);
1144
+ assert(cur[tok_arr.selected].id == tok_eos);
1145
+ }
1146
+
1089
1147
  int main(int argc, const char ** argv) {
1090
1148
  fprintf(stdout, "Running llguidance integration tests...\n");
1091
1149
 
@@ -1135,6 +1193,9 @@ int main(int argc, const char ** argv) {
1135
1193
  test_special_chars();
1136
1194
  test_quantifiers();
1137
1195
  test_json_schema();
1196
+
1197
+ test_sampler_chain();
1198
+
1138
1199
  fprintf(stdout, "All tests passed.\n");
1139
1200
  return 0;
1140
1201
  }
@@ -3,7 +3,9 @@
3
3
  #endif
4
4
 
5
5
  #include "llama.h"
6
- #include "llama-grammar.h"
6
+
7
+ // TODO: shold not include libllama sources
8
+ #include "../src/llama-grammar.h"
7
9
 
8
10
  #include <cassert>
9
11
 
@@ -4,7 +4,7 @@
4
4
 
5
5
  #include "json-schema-to-grammar.h"
6
6
 
7
- #include "llama-grammar.h"
7
+ #include "../src/llama-grammar.h"
8
8
 
9
9
  #include <cassert>
10
10
  #include <fstream>
@@ -597,6 +597,22 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
597
597
  )"""
598
598
  });
599
599
 
600
+ test({
601
+ SUCCESS,
602
+ "maxItems 0",
603
+ R"""({
604
+ "items": {
605
+ "type": "boolean"
606
+ },
607
+ "maxItems": 0
608
+ })""",
609
+ R"""(
610
+ boolean ::= ("true" | "false") space
611
+ root ::= "[" space "]" space
612
+ space ::= | " " | "\n"{1,2} [ \t]{0,20}
613
+ )"""
614
+ });
615
+
600
616
  test({
601
617
  SUCCESS,
602
618
  "maxItems 1",
@@ -3,7 +3,8 @@
3
3
  #endif
4
4
 
5
5
  #include "llama.h"
6
- #include "llama-grammar.h"
6
+
7
+ #include "../src/llama-grammar.h"
7
8
 
8
9
  #include <cassert>
9
10
  #include <stdexcept>
@@ -0,0 +1,63 @@
1
+ #include <stdio.h>
2
+ #include <assert.h>
3
+
4
+ #include "mtmd.h"
5
+
6
+ int main(void) {
7
+ printf("\n\nTesting libmtmd C API...\n");
8
+ printf("--------\n\n");
9
+
10
+ struct mtmd_context_params params = mtmd_context_params_default();
11
+ printf("Default image marker: %s\n", params.image_marker);
12
+
13
+ mtmd_input_chunks * chunks = mtmd_test_create_input_chunks();
14
+
15
+ if (!chunks) {
16
+ fprintf(stderr, "Failed to create input chunks\n");
17
+ return 1;
18
+ }
19
+
20
+ size_t n_chunks = mtmd_input_chunks_size(chunks);
21
+ printf("Number of chunks: %zu\n", n_chunks);
22
+ assert(n_chunks > 0);
23
+
24
+ for (size_t i = 0; i < n_chunks; i++) {
25
+ const mtmd_input_chunk * chunk = mtmd_input_chunks_get(chunks, i);
26
+ assert(chunk != NULL);
27
+ enum mtmd_input_chunk_type type = mtmd_input_chunk_get_type(chunk);
28
+ printf("Chunk %zu type: %d\n", i, type);
29
+
30
+ if (type == MTMD_INPUT_CHUNK_TYPE_TEXT) {
31
+ size_t n_tokens;
32
+ const llama_token * tokens = mtmd_input_chunk_get_tokens_text(chunk, &n_tokens);
33
+ printf(" Text chunk with %zu tokens\n", n_tokens);
34
+ assert(tokens != NULL);
35
+ assert(n_tokens > 0);
36
+ for (size_t j = 0; j < n_tokens; j++) {
37
+ assert(tokens[j] >= 0);
38
+ printf(" > Token %zu: %d\n", j, tokens[j]);
39
+ }
40
+
41
+ } else if (type == MTMD_INPUT_CHUNK_TYPE_IMAGE) {
42
+ const mtmd_image_tokens * image_tokens = mtmd_input_chunk_get_tokens_image(chunk);
43
+ size_t n_tokens = mtmd_image_tokens_get_n_tokens(image_tokens);
44
+ size_t nx = mtmd_image_tokens_get_nx(image_tokens);
45
+ size_t ny = mtmd_image_tokens_get_ny(image_tokens);
46
+ const char * id = mtmd_image_tokens_get_id(image_tokens);
47
+ assert(n_tokens > 0);
48
+ assert(nx > 0);
49
+ assert(ny > 0);
50
+ assert(id != NULL);
51
+ printf(" Image chunk with %zu tokens\n", n_tokens);
52
+ printf(" Image size: %zu x %zu\n", nx, ny);
53
+ printf(" Image ID: %s\n", id);
54
+ }
55
+ }
56
+
57
+ // Free the chunks
58
+ mtmd_input_chunks_free(chunks);
59
+
60
+ printf("\n\nDONE: test libmtmd C API...\n");
61
+
62
+ return 0;
63
+ }
@@ -57,7 +57,8 @@ static helper_ctx_data helper_get_ctx_data(
57
57
  enum ggml_opt_loss_type loss_type = GGML_OPT_LOSS_TYPE_SUM) {
58
58
  std::vector<ggml_opt_dataset_t> datasets(ndata);
59
59
  for (int64_t ndata_shard = 1; ndata_shard <= ndata; ++ndata_shard) {
60
- ggml_opt_dataset_t dataset = ggml_opt_dataset_init(ne_datapoint, ne_label, ndata, ndata_shard);
60
+ ggml_opt_dataset_t dataset = ggml_opt_dataset_init(
61
+ GGML_TYPE_F32, GGML_TYPE_F32, ne_datapoint, ne_label, ndata, ndata_shard);
61
62
 
62
63
  float * data = ggml_get_data_f32(ggml_opt_dataset_data( dataset));
63
64
  float * labels = ggml_get_data_f32(ggml_opt_dataset_labels(dataset));
@@ -74,7 +75,8 @@ static helper_ctx_data helper_get_ctx_data(
74
75
  datasets[ndata_shard-1] = dataset;
75
76
  }
76
77
 
77
- ggml_opt_dataset_t dataset_unsupervised = ggml_opt_dataset_init(1, 0, ndata, /*ndata_shard =*/ 1);
78
+ ggml_opt_dataset_t dataset_unsupervised = ggml_opt_dataset_init(
79
+ GGML_TYPE_F32, GGML_TYPE_F32, 1, 0, ndata, /*ndata_shard =*/ 1);
78
80
 
79
81
  float * data = ggml_get_data_f32(ggml_opt_dataset_data(dataset_unsupervised));
80
82
 
@@ -113,7 +115,7 @@ static helper_ctx_data helper_get_ctx_data(
113
115
 
114
116
  struct ggml_tensor * weights = ggml_new_tensor_1d(ctx_static, GGML_TYPE_F32, 1);
115
117
  ggml_set_name(weights, "weights");
116
- ggml_set_param(ctx_static, weights);
118
+ ggml_set_param(weights);
117
119
 
118
120
  struct ggml_tensor * intermediary = ggml_add(ctx_compute, inputs, weights);
119
121
 
@@ -127,8 +129,11 @@ static helper_ctx_data helper_get_ctx_data(
127
129
  GGML_ASSERT(nbatch_logical % nbatch_physical == 0);
128
130
  const int32_t opt_period = nbatch_logical / nbatch_physical;
129
131
 
130
- struct ggml_opt_params opt_params = ggml_opt_default_params(backend_sched, ctx_compute, inputs, outputs, loss_type);
131
- opt_params.opt_period = opt_period;
132
+ struct ggml_opt_params opt_params = ggml_opt_default_params(backend_sched, loss_type);
133
+ opt_params.ctx_compute = ctx_compute;
134
+ opt_params.inputs = inputs;
135
+ opt_params.outputs = outputs;
136
+ opt_params.opt_period = opt_period;
132
137
  if (!optimizer_defaults) {
133
138
  opt_params.get_opt_pars = helper_get_test_opt_pars;
134
139
  }
@@ -264,8 +269,9 @@ static std::pair<int, int> test_grad(ggml_backend_sched_t backend_sched, ggml_ba
264
269
 
265
270
  for (int idata = 0; idata < ndata; ++idata) {
266
271
  const float idataf = idata;
272
+ ggml_opt_alloc(cd.opt_ctx, /*backward =*/ true);
267
273
  ggml_backend_tensor_set(cd.inputs, &idataf, 0, ggml_nbytes(cd.inputs));
268
- ggml_opt_forward_backward(cd.opt_ctx, cd.result);
274
+ ggml_opt_eval(cd.opt_ctx, cd.result);
269
275
  ggml_backend_tensor_get(ggml_opt_grad_acc(cd.opt_ctx, cd.weights), grad_history.data() + idata, 0, sizeof(float));
270
276
  }
271
277
 
@@ -334,8 +340,9 @@ static std::pair<int, int> test_forward_backward(
334
340
  } else {
335
341
  for (int idata = 0; idata < ndata; ++idata) {
336
342
  const float idataf = idata;
343
+ ggml_opt_alloc(cd.opt_ctx, /*backward =*/ false);
337
344
  ggml_backend_tensor_set(cd.inputs, &idataf, 0, ggml_nbytes(cd.inputs));
338
- ggml_opt_forward(cd.opt_ctx, cd.result);
345
+ ggml_opt_eval(cd.opt_ctx, cd.result);
339
346
  ggml_backend_tensor_get(loss, loss_history.data() + idata, 0, sizeof(float));
340
347
  }
341
348
  }
@@ -367,7 +374,8 @@ static std::pair<int, int> test_forward_backward(
367
374
  float w0;
368
375
  ggml_backend_tensor_get(cd.weights, &w0, 0, sizeof(float));
369
376
  for (int i = 0; i < 10; ++i) {
370
- ggml_opt_forward_backward(cd.opt_ctx, nullptr);
377
+ ggml_opt_alloc(cd.opt_ctx, /*backward =*/ true);
378
+ ggml_opt_eval(cd.opt_ctx, cd.result);
371
379
  }
372
380
  ggml_backend_tensor_set(cd.weights, &w0, 0, sizeof(float));
373
381
 
@@ -387,8 +395,9 @@ static std::pair<int, int> test_forward_backward(
387
395
  } else {
388
396
  for (int idata = 0; idata < ndata; ++idata) {
389
397
  const float idataf = idata;
398
+ ggml_opt_alloc(cd.opt_ctx, /*backward =*/ true);
390
399
  ggml_backend_tensor_set(cd.inputs, &idataf, 0, ggml_nbytes(cd.inputs));
391
- ggml_opt_forward_backward(cd.opt_ctx, cd.result);
400
+ ggml_opt_eval(cd.opt_ctx, cd.result);
392
401
  ggml_backend_tensor_get(loss, loss_history.data() + idata, 0, sizeof(float));
393
402
  }
394
403
  }
@@ -492,14 +501,16 @@ static std::pair<int, int> test_idata_split(ggml_backend_sched_t backend_sched,
492
501
  int idata = 0;
493
502
  for (; idata < idata_split; ++idata) {
494
503
  const float idataf = idata;
504
+ ggml_opt_alloc(cd.opt_ctx, /*backward =*/ true);
495
505
  ggml_backend_tensor_set(cd.inputs, &idataf, 0, ggml_nbytes(cd.inputs));
496
- ggml_opt_forward_backward(cd.opt_ctx, cd.result);
506
+ ggml_opt_eval(cd.opt_ctx, cd.result);
497
507
  ggml_backend_tensor_get(loss, loss_history.data() + idata, 0, sizeof(float));
498
508
  }
499
509
  for (; idata < ndata; ++idata) {
500
510
  const float idataf = idata;
511
+ ggml_opt_alloc(cd.opt_ctx, /*backward =*/ false);
501
512
  ggml_backend_tensor_set(cd.inputs, &idataf, 0, ggml_nbytes(cd.inputs));
502
- ggml_opt_forward(cd.opt_ctx, cd.result2);
513
+ ggml_opt_eval(cd.opt_ctx, cd.result2);
503
514
  ggml_backend_tensor_get(loss, loss_history.data() + idata, 0, sizeof(float));
504
515
  }
505
516
  }
@@ -573,7 +584,6 @@ static std::pair<int, int> test_gradient_accumulation(
573
584
 
574
585
  struct helper_ctx_data cd = helper_get_ctx_data(
575
586
  backend_sched, backend, /*init_opt_ctx =*/ true, /*optimizer_defaults =*/ false, /*nbatch_logical =*/ 6, nbatch_physical, loss_type);
576
- struct ggml_tensor * loss = ggml_opt_loss(cd.opt_ctx);
577
587
 
578
588
  std::vector<float> grad_history(ndata);
579
589
  for (int64_t idata = 0; idata < ndata; ++idata) {
@@ -584,15 +594,17 @@ static std::pair<int, int> test_gradient_accumulation(
584
594
  if (nbatch_physical == 1) {
585
595
  for (int idata = 0; idata < ndata; ++idata) {
586
596
  const float idataf = idata;
597
+ ggml_opt_alloc(cd.opt_ctx, /*backward =*/ true);
587
598
  ggml_backend_tensor_set(cd.inputs, &idataf, 0, 1*sizeof(float));
588
- ggml_opt_forward_backward(cd.opt_ctx, cd.result);
599
+ ggml_opt_eval(cd.opt_ctx, cd.result);
589
600
  ggml_backend_tensor_get(ggml_opt_grad_acc(cd.opt_ctx, cd.weights), grad_history.data() + idata, 0, 1*sizeof(float));
590
601
  }
591
602
  } else if (nbatch_physical == 2) {
592
603
  for (int idata = 0; idata < ndata; idata += 2) {
593
604
  const float idataf[2] = {float(idata + 0), float(idata + 1)};
605
+ ggml_opt_alloc(cd.opt_ctx, /*backward =*/ true);
594
606
  ggml_backend_tensor_set(cd.inputs, idataf, 0, 2*sizeof(float));
595
- ggml_opt_forward_backward(cd.opt_ctx, cd.result);
607
+ ggml_opt_eval(cd.opt_ctx, cd.result);
596
608
 
597
609
  grad_history[idata + 0] = 0.0f;
598
610
  ggml_backend_tensor_get(ggml_opt_grad_acc(cd.opt_ctx, cd.weights), grad_history.data() + idata + 1, 0, 1*sizeof(float));
@@ -617,7 +629,7 @@ static std::pair<int, int> test_gradient_accumulation(
617
629
  }
618
630
  subtest_ok = subtest_ok && almost_equal(grad_history[1], 2.0, atol);
619
631
  subtest_ok = subtest_ok && almost_equal(grad_history[3], 4.0, atol);
620
- subtest_ok = subtest_ok && almost_equal(grad_history[5], 0.0, atol);
632
+ subtest_ok = subtest_ok && almost_equal(grad_history[5], 6.0, atol);
621
633
  } else if (loss_type == GGML_OPT_LOSS_TYPE_MEAN) {
622
634
  if (nbatch_physical == 1) {
623
635
  subtest_ok = subtest_ok && almost_equal(grad_history[0], 1.0/ndata, atol);
@@ -630,7 +642,7 @@ static std::pair<int, int> test_gradient_accumulation(
630
642
  }
631
643
  subtest_ok = subtest_ok && almost_equal(grad_history[1], 2.0/ndata, atol);
632
644
  subtest_ok = subtest_ok && almost_equal(grad_history[3], 4.0/ndata, atol);
633
- subtest_ok = subtest_ok && almost_equal(grad_history[5], 0.0/ndata, atol);
645
+ subtest_ok = subtest_ok && almost_equal(grad_history[5], 6.0/ndata, atol);
634
646
  } else {
635
647
  GGML_ASSERT(false);
636
648
  }
@@ -692,7 +704,8 @@ static std::pair<int, int> test_regression(ggml_backend_sched_t backend_sched, g
692
704
  std::mt19937 gen(12345);
693
705
  std::normal_distribution<float> nd{0.0f, 0.1f};
694
706
 
695
- ggml_opt_dataset_t dataset = ggml_opt_dataset_init(1, 1, ndata_regression, ndata_regression);
707
+ ggml_opt_dataset_t dataset = ggml_opt_dataset_init(
708
+ GGML_TYPE_F32, GGML_TYPE_F32, 1, 1, ndata_regression, ndata_regression);
696
709
 
697
710
  float * data = ggml_get_data_f32(ggml_opt_dataset_data( dataset));
698
711
  float * labels = ggml_get_data_f32(ggml_opt_dataset_labels(dataset));
@@ -733,15 +746,14 @@ static std::pair<int, int> test_regression(ggml_backend_sched_t backend_sched, g
733
746
 
734
747
  struct ggml_tensor * a = ggml_new_tensor_1d(ctx_static, GGML_TYPE_F32, 1);
735
748
  ggml_set_name(a, "a");
736
- ggml_set_param(ctx_static, a);
749
+ ggml_set_param(a);
737
750
 
738
751
  struct ggml_tensor * b = ggml_new_tensor_1d(ctx_static, GGML_TYPE_F32, 1);
739
752
  ggml_set_name(b, "b");
740
- ggml_set_param(ctx_static, b);
753
+ ggml_set_param(b);
741
754
 
742
755
  struct ggml_tensor * f = ggml_add(ctx_compute, ggml_mul(ctx_compute, x, a), b);
743
756
  ggml_set_name(f, "f");
744
- ggml_set_param(ctx_static, f);
745
757
 
746
758
  ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors(ctx_static, backend);
747
759
  const float a0 = 1.0f;
@@ -853,7 +865,7 @@ int main(void) {
853
865
  backends_modded.insert(backends_modded.end(), backends.begin(), backends.end());
854
866
 
855
867
  ggml_backend_sched_t backend_sched = ggml_backend_sched_new(
856
- backends_modded.data(), nullptr, backends_modded.size(), GGML_DEFAULT_GRAPH_SIZE, false);
868
+ backends_modded.data(), nullptr, backends_modded.size(), GGML_DEFAULT_GRAPH_SIZE, false, true);
857
869
 
858
870
  printf("Backend %zu/%zu: %s\n", i + 1, dev_count, ggml_backend_dev_name(devs[i]));
859
871
  printf(" Device description: %s\n", ggml_backend_dev_description(devs[i]));
@@ -1,8 +1,10 @@
1
1
  #include "ggml.h"
2
+ #include "ggml-cpu.h"
2
3
  #include "llama.h"
3
- #include "llama-model.h"
4
4
  #include "common.h"
5
5
 
6
+ #include "../src/llama-model.h"
7
+
6
8
  #include <algorithm>
7
9
  #include <cassert>
8
10
  #include <cinttypes>