@fugood/llama.node 0.6.3 → 1.0.0-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (377) hide show
  1. package/CMakeLists.txt +40 -30
  2. package/README.md +4 -1
  3. package/lib/binding.js +41 -29
  4. package/lib/binding.ts +26 -25
  5. package/package.json +45 -7
  6. package/scripts/build.js +47 -0
  7. package/scripts/llama.cpp.patch +109 -0
  8. package/src/anyascii.c +22223 -0
  9. package/src/anyascii.h +42 -0
  10. package/src/tts_utils.cpp +20 -7
  11. package/src/tts_utils.h +2 -0
  12. package/bin/darwin/arm64/llama-node.node +0 -0
  13. package/bin/darwin/x64/llama-node.node +0 -0
  14. package/bin/linux/arm64/llama-node.node +0 -0
  15. package/bin/linux/x64/llama-node.node +0 -0
  16. package/bin/linux-cuda/arm64/llama-node.node +0 -0
  17. package/bin/linux-cuda/x64/llama-node.node +0 -0
  18. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  19. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  20. package/bin/win32/x64/llama-node.node +0 -0
  21. package/bin/win32/x64/node.lib +0 -0
  22. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  23. package/bin/win32-vulkan/arm64/node.lib +0 -0
  24. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  25. package/bin/win32-vulkan/x64/node.lib +0 -0
  26. package/src/llama.cpp/.github/workflows/build-linux-cross.yml +0 -233
  27. package/src/llama.cpp/.github/workflows/build.yml +0 -1078
  28. package/src/llama.cpp/.github/workflows/close-issue.yml +0 -28
  29. package/src/llama.cpp/.github/workflows/docker.yml +0 -178
  30. package/src/llama.cpp/.github/workflows/editorconfig.yml +0 -29
  31. package/src/llama.cpp/.github/workflows/gguf-publish.yml +0 -44
  32. package/src/llama.cpp/.github/workflows/labeler.yml +0 -17
  33. package/src/llama.cpp/.github/workflows/python-check-requirements.yml +0 -33
  34. package/src/llama.cpp/.github/workflows/python-lint.yml +0 -30
  35. package/src/llama.cpp/.github/workflows/python-type-check.yml +0 -40
  36. package/src/llama.cpp/.github/workflows/release.yml +0 -739
  37. package/src/llama.cpp/.github/workflows/server.yml +0 -237
  38. package/src/llama.cpp/.github/workflows/winget.yml +0 -42
  39. package/src/llama.cpp/cmake/arm64-apple-clang.cmake +0 -16
  40. package/src/llama.cpp/cmake/arm64-windows-llvm.cmake +0 -16
  41. package/src/llama.cpp/cmake/build-info.cmake +0 -64
  42. package/src/llama.cpp/cmake/common.cmake +0 -35
  43. package/src/llama.cpp/cmake/git-vars.cmake +0 -22
  44. package/src/llama.cpp/cmake/x64-windows-llvm.cmake +0 -5
  45. package/src/llama.cpp/common/build-info.cpp.in +0 -4
  46. package/src/llama.cpp/docs/build.md +0 -561
  47. package/src/llama.cpp/examples/CMakeLists.txt +0 -43
  48. package/src/llama.cpp/examples/batched/CMakeLists.txt +0 -5
  49. package/src/llama.cpp/examples/batched/batched.cpp +0 -246
  50. package/src/llama.cpp/examples/chat-13B.bat +0 -57
  51. package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +0 -5
  52. package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +0 -941
  53. package/src/llama.cpp/examples/deprecation-warning/deprecation-warning.cpp +0 -35
  54. package/src/llama.cpp/examples/embedding/CMakeLists.txt +0 -5
  55. package/src/llama.cpp/examples/embedding/embedding.cpp +0 -323
  56. package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +0 -10
  57. package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +0 -194
  58. package/src/llama.cpp/examples/gen-docs/CMakeLists.txt +0 -5
  59. package/src/llama.cpp/examples/gen-docs/gen-docs.cpp +0 -83
  60. package/src/llama.cpp/examples/gguf/CMakeLists.txt +0 -5
  61. package/src/llama.cpp/examples/gguf/gguf.cpp +0 -265
  62. package/src/llama.cpp/examples/gguf-hash/CMakeLists.txt +0 -22
  63. package/src/llama.cpp/examples/gguf-hash/deps/rotate-bits/rotate-bits.h +0 -46
  64. package/src/llama.cpp/examples/gguf-hash/deps/sha1/sha1.c +0 -295
  65. package/src/llama.cpp/examples/gguf-hash/deps/sha1/sha1.h +0 -52
  66. package/src/llama.cpp/examples/gguf-hash/deps/sha256/sha256.c +0 -221
  67. package/src/llama.cpp/examples/gguf-hash/deps/sha256/sha256.h +0 -24
  68. package/src/llama.cpp/examples/gguf-hash/deps/xxhash/xxhash.c +0 -42
  69. package/src/llama.cpp/examples/gguf-hash/deps/xxhash/xxhash.h +0 -7093
  70. package/src/llama.cpp/examples/gguf-hash/gguf-hash.cpp +0 -694
  71. package/src/llama.cpp/examples/gritlm/CMakeLists.txt +0 -5
  72. package/src/llama.cpp/examples/gritlm/gritlm.cpp +0 -229
  73. package/src/llama.cpp/examples/jeopardy/questions.txt +0 -100
  74. package/src/llama.cpp/examples/llama.android/app/build.gradle.kts +0 -65
  75. package/src/llama.cpp/examples/llama.android/build.gradle.kts +0 -6
  76. package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +0 -71
  77. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/CMakeLists.txt +0 -53
  78. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +0 -452
  79. package/src/llama.cpp/examples/llama.android/settings.gradle.kts +0 -18
  80. package/src/llama.cpp/examples/lookahead/CMakeLists.txt +0 -5
  81. package/src/llama.cpp/examples/lookahead/lookahead.cpp +0 -472
  82. package/src/llama.cpp/examples/lookup/CMakeLists.txt +0 -23
  83. package/src/llama.cpp/examples/lookup/lookup-create.cpp +0 -40
  84. package/src/llama.cpp/examples/lookup/lookup-merge.cpp +0 -47
  85. package/src/llama.cpp/examples/lookup/lookup-stats.cpp +0 -157
  86. package/src/llama.cpp/examples/lookup/lookup.cpp +0 -242
  87. package/src/llama.cpp/examples/parallel/CMakeLists.txt +0 -5
  88. package/src/llama.cpp/examples/parallel/parallel.cpp +0 -492
  89. package/src/llama.cpp/examples/passkey/CMakeLists.txt +0 -5
  90. package/src/llama.cpp/examples/passkey/passkey.cpp +0 -277
  91. package/src/llama.cpp/examples/retrieval/CMakeLists.txt +0 -5
  92. package/src/llama.cpp/examples/retrieval/retrieval.cpp +0 -304
  93. package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +0 -5
  94. package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +0 -246
  95. package/src/llama.cpp/examples/simple/CMakeLists.txt +0 -5
  96. package/src/llama.cpp/examples/simple/simple.cpp +0 -206
  97. package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +0 -5
  98. package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +0 -206
  99. package/src/llama.cpp/examples/simple-cmake-pkg/CMakeLists.txt +0 -11
  100. package/src/llama.cpp/examples/speculative/CMakeLists.txt +0 -5
  101. package/src/llama.cpp/examples/speculative/speculative.cpp +0 -644
  102. package/src/llama.cpp/examples/speculative-simple/CMakeLists.txt +0 -5
  103. package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +0 -261
  104. package/src/llama.cpp/examples/sycl/CMakeLists.txt +0 -9
  105. package/src/llama.cpp/examples/sycl/build.sh +0 -23
  106. package/src/llama.cpp/examples/sycl/ls-sycl-device.cpp +0 -13
  107. package/src/llama.cpp/examples/sycl/run-llama2.sh +0 -27
  108. package/src/llama.cpp/examples/sycl/run-llama3.sh +0 -28
  109. package/src/llama.cpp/examples/sycl/win-build-sycl.bat +0 -33
  110. package/src/llama.cpp/examples/sycl/win-run-llama2.bat +0 -9
  111. package/src/llama.cpp/examples/sycl/win-run-llama3.bat +0 -9
  112. package/src/llama.cpp/examples/training/CMakeLists.txt +0 -5
  113. package/src/llama.cpp/examples/training/finetune.cpp +0 -96
  114. package/src/llama.cpp/ggml/cmake/GitVars.cmake +0 -22
  115. package/src/llama.cpp/ggml/cmake/common.cmake +0 -26
  116. package/src/llama.cpp/ggml/src/ggml-alloc.c +0 -1042
  117. package/src/llama.cpp/ggml/src/ggml-backend-impl.h +0 -255
  118. package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +0 -586
  119. package/src/llama.cpp/ggml/src/ggml-backend.cpp +0 -2008
  120. package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +0 -87
  121. package/src/llama.cpp/ggml/src/ggml-blas/ggml-blas.cpp +0 -517
  122. package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +0 -74
  123. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +0 -179
  124. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +0 -258
  125. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +0 -2863
  126. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +0 -1110
  127. package/src/llama.cpp/ggml/src/ggml-cann/common.h +0 -420
  128. package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +0 -2570
  129. package/src/llama.cpp/ggml/src/ggml-common.h +0 -1857
  130. package/src/llama.cpp/ggml/src/ggml-cpu/cmake/FindSIMD.cmake +0 -100
  131. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +0 -184
  132. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h +0 -15
  133. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +0 -243
  134. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +0 -140
  135. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +0 -131
  136. package/src/llama.cpp/ggml/src/ggml-impl.h +0 -601
  137. package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +0 -166
  138. package/src/llama.cpp/ggml/src/ggml-kompute/ggml-kompute.cpp +0 -2251
  139. package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +0 -120
  140. package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +0 -622
  141. package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +0 -113
  142. package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +0 -96
  143. package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +0 -5124
  144. package/src/llama.cpp/ggml/src/ggml-opt.cpp +0 -1037
  145. package/src/llama.cpp/ggml/src/ggml-quants.c +0 -5232
  146. package/src/llama.cpp/ggml/src/ggml-quants.h +0 -100
  147. package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +0 -9
  148. package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +0 -1813
  149. package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +0 -189
  150. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +0 -37
  151. package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +0 -239
  152. package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.hpp +0 -39
  153. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +0 -83
  154. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +0 -493
  155. package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +0 -197
  156. package/src/llama.cpp/ggml/src/ggml-sycl/concat.hpp +0 -20
  157. package/src/llama.cpp/ggml/src/ggml-sycl/conv.cpp +0 -100
  158. package/src/llama.cpp/ggml/src/ggml-sycl/conv.hpp +0 -20
  159. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +0 -623
  160. package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +0 -34
  161. package/src/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +0 -701
  162. package/src/llama.cpp/ggml/src/ggml-sycl/cpy.hpp +0 -11
  163. package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +0 -791
  164. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +0 -1160
  165. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.hpp +0 -27
  166. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +0 -2957
  167. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +0 -1536
  168. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +0 -75
  169. package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +0 -99
  170. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +0 -311
  171. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +0 -20
  172. package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +0 -4443
  173. package/src/llama.cpp/ggml/src/ggml-sycl/gla.cpp +0 -105
  174. package/src/llama.cpp/ggml/src/ggml-sycl/gla.hpp +0 -8
  175. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +0 -136
  176. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +0 -21
  177. package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +0 -3030
  178. package/src/llama.cpp/ggml/src/ggml-sycl/mmq.hpp +0 -33
  179. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +0 -1108
  180. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.hpp +0 -27
  181. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +0 -474
  182. package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +0 -26
  183. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +0 -46
  184. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +0 -10
  185. package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +0 -74
  186. package/src/llama.cpp/ggml/src/ggml-sycl/quants.hpp +0 -83
  187. package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +0 -362
  188. package/src/llama.cpp/ggml/src/ggml-sycl/rope.hpp +0 -20
  189. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +0 -264
  190. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.hpp +0 -20
  191. package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.cpp +0 -13
  192. package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.hpp +0 -23
  193. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +0 -73
  194. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.hpp +0 -20
  195. package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +0 -1215
  196. package/src/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +0 -305
  197. package/src/llama.cpp/ggml/src/ggml-sycl/wkv.hpp +0 -10
  198. package/src/llama.cpp/ggml/src/ggml-threading.cpp +0 -12
  199. package/src/llama.cpp/ggml/src/ggml-threading.h +0 -14
  200. package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +0 -196
  201. package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +0 -10699
  202. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +0 -39
  203. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +0 -751
  204. package/src/llama.cpp/ggml/src/ggml.c +0 -6550
  205. package/src/llama.cpp/ggml/src/gguf.cpp +0 -1330
  206. package/src/llama.cpp/models/.editorconfig +0 -1
  207. package/src/llama.cpp/models/ggml-vocab-aquila.gguf +0 -0
  208. package/src/llama.cpp/models/ggml-vocab-baichuan.gguf +0 -0
  209. package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf +0 -0
  210. package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf.inp +0 -112
  211. package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf.out +0 -46
  212. package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +0 -112
  213. package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.out +0 -46
  214. package/src/llama.cpp/models/ggml-vocab-command-r.gguf +0 -0
  215. package/src/llama.cpp/models/ggml-vocab-command-r.gguf.inp +0 -112
  216. package/src/llama.cpp/models/ggml-vocab-command-r.gguf.out +0 -46
  217. package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf +0 -0
  218. package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.inp +0 -112
  219. package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.out +0 -46
  220. package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf +0 -0
  221. package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.inp +0 -112
  222. package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.out +0 -46
  223. package/src/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.inp +0 -112
  224. package/src/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.out +0 -46
  225. package/src/llama.cpp/models/ggml-vocab-falcon.gguf +0 -0
  226. package/src/llama.cpp/models/ggml-vocab-falcon.gguf.inp +0 -112
  227. package/src/llama.cpp/models/ggml-vocab-falcon.gguf.out +0 -46
  228. package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf +0 -0
  229. package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf.inp +0 -112
  230. package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf.out +0 -46
  231. package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.inp +0 -112
  232. package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.out +0 -46
  233. package/src/llama.cpp/models/ggml-vocab-gpt-neox.gguf +0 -0
  234. package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf +0 -0
  235. package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf.inp +0 -112
  236. package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf.out +0 -46
  237. package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf +0 -0
  238. package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf.inp +0 -112
  239. package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf.out +0 -46
  240. package/src/llama.cpp/models/ggml-vocab-llama4.gguf.inp +0 -112
  241. package/src/llama.cpp/models/ggml-vocab-llama4.gguf.out +0 -46
  242. package/src/llama.cpp/models/ggml-vocab-mpt.gguf +0 -0
  243. package/src/llama.cpp/models/ggml-vocab-mpt.gguf.inp +0 -112
  244. package/src/llama.cpp/models/ggml-vocab-mpt.gguf.out +0 -46
  245. package/src/llama.cpp/models/ggml-vocab-phi-3.gguf +0 -0
  246. package/src/llama.cpp/models/ggml-vocab-phi-3.gguf.inp +0 -112
  247. package/src/llama.cpp/models/ggml-vocab-phi-3.gguf.out +0 -46
  248. package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +0 -112
  249. package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.out +0 -46
  250. package/src/llama.cpp/models/ggml-vocab-qwen2.gguf +0 -0
  251. package/src/llama.cpp/models/ggml-vocab-qwen2.gguf.inp +0 -112
  252. package/src/llama.cpp/models/ggml-vocab-qwen2.gguf.out +0 -46
  253. package/src/llama.cpp/models/ggml-vocab-refact.gguf +0 -0
  254. package/src/llama.cpp/models/ggml-vocab-refact.gguf.inp +0 -112
  255. package/src/llama.cpp/models/ggml-vocab-refact.gguf.out +0 -46
  256. package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +0 -112
  257. package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +0 -46
  258. package/src/llama.cpp/models/ggml-vocab-starcoder.gguf +0 -0
  259. package/src/llama.cpp/models/ggml-vocab-starcoder.gguf.inp +0 -112
  260. package/src/llama.cpp/models/ggml-vocab-starcoder.gguf.out +0 -46
  261. package/src/llama.cpp/pocs/CMakeLists.txt +0 -14
  262. package/src/llama.cpp/pocs/vdot/CMakeLists.txt +0 -9
  263. package/src/llama.cpp/pocs/vdot/q8dot.cpp +0 -173
  264. package/src/llama.cpp/pocs/vdot/vdot.cpp +0 -311
  265. package/src/llama.cpp/prompts/LLM-questions.txt +0 -49
  266. package/src/llama.cpp/prompts/alpaca.txt +0 -1
  267. package/src/llama.cpp/prompts/assistant.txt +0 -31
  268. package/src/llama.cpp/prompts/chat-with-baichuan.txt +0 -4
  269. package/src/llama.cpp/prompts/chat-with-bob.txt +0 -7
  270. package/src/llama.cpp/prompts/chat-with-qwen.txt +0 -1
  271. package/src/llama.cpp/prompts/chat-with-vicuna-v0.txt +0 -7
  272. package/src/llama.cpp/prompts/chat-with-vicuna-v1.txt +0 -7
  273. package/src/llama.cpp/prompts/chat.txt +0 -28
  274. package/src/llama.cpp/prompts/dan-modified.txt +0 -1
  275. package/src/llama.cpp/prompts/dan.txt +0 -1
  276. package/src/llama.cpp/prompts/mnemonics.txt +0 -93
  277. package/src/llama.cpp/prompts/parallel-questions.txt +0 -43
  278. package/src/llama.cpp/prompts/reason-act.txt +0 -18
  279. package/src/llama.cpp/requirements/requirements-all.txt +0 -15
  280. package/src/llama.cpp/requirements/requirements-compare-llama-bench.txt +0 -2
  281. package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +0 -7
  282. package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +0 -7
  283. package/src/llama.cpp/requirements/requirements-convert_legacy_llama.txt +0 -5
  284. package/src/llama.cpp/requirements/requirements-convert_llama_ggml_to_gguf.txt +0 -1
  285. package/src/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +0 -4
  286. package/src/llama.cpp/requirements/requirements-gguf_editor_gui.txt +0 -3
  287. package/src/llama.cpp/requirements/requirements-pydantic.txt +0 -3
  288. package/src/llama.cpp/requirements/requirements-test-tokenizer-random.txt +0 -1
  289. package/src/llama.cpp/requirements/requirements-tool_bench.txt +0 -12
  290. package/src/llama.cpp/requirements.txt +0 -13
  291. package/src/llama.cpp/scripts/build-info.sh +0 -30
  292. package/src/llama.cpp/scripts/install-oneapi.bat +0 -19
  293. package/src/llama.cpp/scripts/xxd.cmake +0 -16
  294. package/src/llama.cpp/tests/CMakeLists.txt +0 -177
  295. package/src/llama.cpp/tests/get-model.cpp +0 -21
  296. package/src/llama.cpp/tests/get-model.h +0 -2
  297. package/src/llama.cpp/tests/test-arg-parser.cpp +0 -178
  298. package/src/llama.cpp/tests/test-autorelease.cpp +0 -24
  299. package/src/llama.cpp/tests/test-backend-ops.cpp +0 -4793
  300. package/src/llama.cpp/tests/test-barrier.cpp +0 -94
  301. package/src/llama.cpp/tests/test-c.c +0 -7
  302. package/src/llama.cpp/tests/test-chat-template.cpp +0 -417
  303. package/src/llama.cpp/tests/test-chat.cpp +0 -985
  304. package/src/llama.cpp/tests/test-double-float.cpp +0 -57
  305. package/src/llama.cpp/tests/test-gbnf-validator.cpp +0 -109
  306. package/src/llama.cpp/tests/test-gguf.cpp +0 -1338
  307. package/src/llama.cpp/tests/test-grammar-integration.cpp +0 -1308
  308. package/src/llama.cpp/tests/test-grammar-llguidance.cpp +0 -1201
  309. package/src/llama.cpp/tests/test-grammar-parser.cpp +0 -519
  310. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +0 -1304
  311. package/src/llama.cpp/tests/test-llama-grammar.cpp +0 -408
  312. package/src/llama.cpp/tests/test-log.cpp +0 -39
  313. package/src/llama.cpp/tests/test-model-load-cancel.cpp +0 -27
  314. package/src/llama.cpp/tests/test-mtmd-c-api.c +0 -63
  315. package/src/llama.cpp/tests/test-opt.cpp +0 -904
  316. package/src/llama.cpp/tests/test-quantize-fns.cpp +0 -186
  317. package/src/llama.cpp/tests/test-quantize-perf.cpp +0 -365
  318. package/src/llama.cpp/tests/test-quantize-stats.cpp +0 -424
  319. package/src/llama.cpp/tests/test-regex-partial.cpp +0 -288
  320. package/src/llama.cpp/tests/test-rope.cpp +0 -262
  321. package/src/llama.cpp/tests/test-sampling.cpp +0 -399
  322. package/src/llama.cpp/tests/test-tokenizer-0.cpp +0 -312
  323. package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +0 -155
  324. package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +0 -125
  325. package/src/llama.cpp/tools/CMakeLists.txt +0 -39
  326. package/src/llama.cpp/tools/batched-bench/CMakeLists.txt +0 -5
  327. package/src/llama.cpp/tools/batched-bench/batched-bench.cpp +0 -204
  328. package/src/llama.cpp/tools/cvector-generator/CMakeLists.txt +0 -5
  329. package/src/llama.cpp/tools/cvector-generator/completions.txt +0 -582
  330. package/src/llama.cpp/tools/cvector-generator/cvector-generator.cpp +0 -508
  331. package/src/llama.cpp/tools/cvector-generator/mean.hpp +0 -48
  332. package/src/llama.cpp/tools/cvector-generator/negative.txt +0 -4
  333. package/src/llama.cpp/tools/cvector-generator/pca.hpp +0 -315
  334. package/src/llama.cpp/tools/cvector-generator/positive.txt +0 -4
  335. package/src/llama.cpp/tools/export-lora/CMakeLists.txt +0 -5
  336. package/src/llama.cpp/tools/export-lora/export-lora.cpp +0 -434
  337. package/src/llama.cpp/tools/gguf-split/CMakeLists.txt +0 -5
  338. package/src/llama.cpp/tools/gguf-split/gguf-split.cpp +0 -583
  339. package/src/llama.cpp/tools/imatrix/CMakeLists.txt +0 -5
  340. package/src/llama.cpp/tools/imatrix/imatrix.cpp +0 -667
  341. package/src/llama.cpp/tools/llama-bench/CMakeLists.txt +0 -5
  342. package/src/llama.cpp/tools/llama-bench/llama-bench.cpp +0 -2024
  343. package/src/llama.cpp/tools/main/CMakeLists.txt +0 -5
  344. package/src/llama.cpp/tools/main/main.cpp +0 -977
  345. package/src/llama.cpp/tools/mtmd/CMakeLists.txt +0 -58
  346. package/src/llama.cpp/tools/mtmd/clip-impl.h +0 -462
  347. package/src/llama.cpp/tools/mtmd/clip.cpp +0 -4024
  348. package/src/llama.cpp/tools/mtmd/clip.h +0 -101
  349. package/src/llama.cpp/tools/mtmd/deprecation-warning.cpp +0 -22
  350. package/src/llama.cpp/tools/mtmd/miniaudio.h +0 -93468
  351. package/src/llama.cpp/tools/mtmd/mtmd-audio.cpp +0 -855
  352. package/src/llama.cpp/tools/mtmd/mtmd-audio.h +0 -62
  353. package/src/llama.cpp/tools/mtmd/mtmd-cli.cpp +0 -377
  354. package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +0 -297
  355. package/src/llama.cpp/tools/mtmd/mtmd.cpp +0 -942
  356. package/src/llama.cpp/tools/mtmd/mtmd.h +0 -362
  357. package/src/llama.cpp/tools/mtmd/requirements.txt +0 -5
  358. package/src/llama.cpp/tools/perplexity/CMakeLists.txt +0 -5
  359. package/src/llama.cpp/tools/perplexity/perplexity.cpp +0 -2063
  360. package/src/llama.cpp/tools/quantize/CMakeLists.txt +0 -6
  361. package/src/llama.cpp/tools/quantize/quantize.cpp +0 -519
  362. package/src/llama.cpp/tools/rpc/CMakeLists.txt +0 -4
  363. package/src/llama.cpp/tools/rpc/rpc-server.cpp +0 -322
  364. package/src/llama.cpp/tools/run/CMakeLists.txt +0 -16
  365. package/src/llama.cpp/tools/run/linenoise.cpp/linenoise.cpp +0 -1995
  366. package/src/llama.cpp/tools/run/linenoise.cpp/linenoise.h +0 -137
  367. package/src/llama.cpp/tools/run/run.cpp +0 -1261
  368. package/src/llama.cpp/tools/server/CMakeLists.txt +0 -51
  369. package/src/llama.cpp/tools/server/bench/requirements.txt +0 -2
  370. package/src/llama.cpp/tools/server/httplib.h +0 -10506
  371. package/src/llama.cpp/tools/server/server.cpp +0 -4966
  372. package/src/llama.cpp/tools/server/tests/requirements.txt +0 -8
  373. package/src/llama.cpp/tools/server/utils.hpp +0 -1337
  374. package/src/llama.cpp/tools/tokenize/CMakeLists.txt +0 -5
  375. package/src/llama.cpp/tools/tokenize/tokenize.cpp +0 -416
  376. package/src/llama.cpp/tools/tts/CMakeLists.txt +0 -5
  377. package/src/llama.cpp/tools/tts/tts.cpp +0 -1092
@@ -1,985 +0,0 @@
1
- // Tests chat handling, including grammar generation and parsing for tool calling, for various templates.
2
- //
3
- // Also acts as a CLI to generate a Markdown summary of the formats of Jinja templates,
4
- // e.g. given Minja (http://github.com/google/minja) checked out in parent dir:
5
- //
6
- // cmake -B build && cmake --build build --parallel && ./build/bin/test-chat ../minja/build/tests/*.jinja 2>/dev/null
7
- //
8
- #include <fstream>
9
- #include <iostream>
10
- #include <json.hpp>
11
- #include <string>
12
-
13
- #include "chat.h"
14
-
15
- #include "../src/unicode.h"
16
- #include "../src/llama-grammar.h"
17
-
18
- using json = nlohmann::ordered_json;
19
-
20
-
21
- template <class T> static void assert_equals(const T & expected, const T & actual) {
22
- if (expected != actual) {
23
- std::cerr << "Expected: " << expected << std::endl;
24
- std::cerr << "Actual: " << actual << std::endl;
25
- std::cerr << std::flush;
26
- throw std::runtime_error("Test failed");
27
- }
28
- }
29
-
30
- static std::string read_file(const std::string & path) {
31
- std::cerr << "# Reading: " << path << '\n' << std::flush;
32
- std::ifstream fs(path, std::ios_base::binary);
33
- if (!fs.is_open()) {
34
- fs = std::ifstream("../" + path, std::ios_base::binary);
35
- if (!fs.is_open()) {
36
- throw std::runtime_error("Failed to open file: " + path);
37
- }
38
- }
39
- fs.seekg(0, std::ios_base::end);
40
- auto size = fs.tellg();
41
- fs.seekg(0);
42
- std::string out;
43
- out.resize(static_cast<size_t>(size));
44
- fs.read(out.data(), static_cast<std::streamsize>(size));
45
- return out;
46
- }
47
-
48
- static common_chat_templates_ptr read_templates(const std::string & path) {
49
- return common_chat_templates_ptr(common_chat_templates_init(/* model= */ nullptr, read_file(path)));
50
- }
51
-
52
- static std::unique_ptr<llama_grammar> build_grammar(const std::string & grammar_str) {
53
- return std::unique_ptr<llama_grammar>(
54
- llama_grammar_init_impl(nullptr, grammar_str.c_str(), "root", false, nullptr, 0, nullptr, 0));
55
- }
56
-
57
- // TODO: extract to common helper (copied from test-grammar-integration.cpp)
58
- static bool match_string(const std::string & input, llama_grammar * grammar) {
59
- const auto cpts = unicode_cpts_from_utf8(input);
60
-
61
- auto & stacks_cur = llama_grammar_get_stacks(grammar);
62
-
63
- for (const auto & cpt : cpts) {
64
- llama_grammar_accept(grammar, cpt);
65
-
66
- if (stacks_cur.empty()) {
67
- // no stacks means that the grammar failed to match at this point
68
- return false;
69
- }
70
- }
71
-
72
- if (std::any_of(stacks_cur.begin(), stacks_cur.end(), [](const auto & stack) { return stack.empty(); })) {
73
- // An empty stack means that the grammar has been completed
74
- return true;
75
- }
76
-
77
- return false;
78
- }
79
-
80
- static void assert_msg_equals(const common_chat_msg & expected, const common_chat_msg & actual) {
81
- assert_equals(expected.role, actual.role);
82
- assert_equals(expected.content, actual.content);
83
- assert_equals(expected.content_parts.size(), actual.content_parts.size());
84
- for (size_t i = 0; i < expected.content_parts.size(); i++) {
85
- const auto & expected_part = expected.content_parts[i];
86
- const auto & actual_part = actual.content_parts[i];
87
- assert_equals(expected_part.type, actual_part.type);
88
- assert_equals(expected_part.text, actual_part.text);
89
- }
90
- assert_equals(expected.reasoning_content, actual.reasoning_content);
91
- assert_equals(expected.tool_calls.size(), actual.tool_calls.size());
92
- for (size_t i = 0; i < expected.tool_calls.size(); i++) {
93
- const auto & expected_tool_call = expected.tool_calls[i];
94
- const auto & actual_tool_call = actual.tool_calls[i];
95
- assert_equals(expected_tool_call.name, actual_tool_call.name);
96
- assert_equals(json::parse(expected_tool_call.arguments).dump(), json::parse(actual_tool_call.arguments).dump());
97
- assert_equals(expected_tool_call.id, actual_tool_call.id);
98
- }
99
- }
100
-
101
- common_chat_tool special_function_tool {
102
- /* .name = */ "special_function",
103
- /* .description = */ "I'm special",
104
- /* .parameters = */ R"({
105
- "type": "object",
106
- "properties": {
107
- "arg1": {
108
- "type": "integer",
109
- "description": "The arg."
110
- }
111
- },
112
- "required": ["arg1"]
113
- })",
114
- };
115
- common_chat_tool python_tool {
116
- /* .name = */ "python",
117
- /* .description = */ "an ipython interpreter",
118
- /* .parameters = */ R"({
119
- "type": "object",
120
- "properties": {
121
- "code": {
122
- "type": "string",
123
- "description": "Python code to execute."
124
- }
125
- },
126
- "required": ["code"]
127
- })",
128
- };
129
- common_chat_tool code_interpreter_tool {
130
- /* .name = */ "code_interpreter",
131
- /* .description = */ "an ipython interpreter",
132
- /* .parameters = */ R"({
133
- "type": "object",
134
- "properties": {
135
- "code": {
136
- "type": "string",
137
- "description": "Python code to execute."
138
- }
139
- },
140
- "required": ["code"]
141
- })",
142
- };
143
- std::vector<common_chat_tool> tools { special_function_tool, python_tool };
144
- std::vector<common_chat_tool> llama_3_1_tools { special_function_tool, code_interpreter_tool };
145
-
146
- struct delta_data {
147
- std::string delta;
148
- common_chat_params params;
149
- };
150
-
151
- static delta_data init_delta(const struct common_chat_templates * tmpls, const std::vector<std::string> & end_tokens,
152
- const common_chat_msg & user_message,
153
- const common_chat_msg & delta_message,
154
- const std::vector<common_chat_tool> & tools,
155
- const common_chat_tool_choice & tool_choice,
156
- bool think = false) {
157
- common_chat_templates_inputs inputs;
158
- inputs.parallel_tool_calls = true;
159
- inputs.messages.push_back(user_message);
160
- inputs.tools = tools;
161
- inputs.tool_choice = tool_choice;
162
- inputs.extract_reasoning = think;
163
- auto params_prefix = common_chat_templates_apply(tmpls, inputs);
164
-
165
- inputs.messages.push_back(delta_message);
166
- inputs.add_generation_prompt = false;
167
- auto params_full = common_chat_templates_apply(tmpls, inputs);
168
-
169
- std::string prefix = params_prefix.prompt;
170
- std::string full = params_full.prompt;
171
-
172
- if (full == prefix) {
173
- throw std::runtime_error("Full message is the same as the prefix");
174
- }
175
-
176
- size_t common_prefix_length = 0;
177
- for (size_t i = 0; i < prefix.size() && i < full.size(); ++i) {
178
- if (prefix[i] != full[i]) {
179
- break;
180
- }
181
- if (prefix[i] == '<') {
182
- // DeepSeek R1's template (as of 20250209) adds a trailing <think> if add_generation_prompt,
183
- // but it removes thinking tags for past messages.
184
- // The prefix and full strings diverge at <think> vs. <|tool▁calls▁begin|>, we avoid consuming the leading <.
185
- continue;
186
- }
187
- common_prefix_length = i + 1;
188
- }
189
- auto delta = full.substr(common_prefix_length);
190
-
191
- // Strip end tokens
192
- for (const auto & end_token : end_tokens) {
193
- // rfind to find the last occurrence
194
- auto pos = delta.rfind(end_token);
195
- if (pos != std::string::npos) {
196
- delta = delta.substr(0, pos);
197
- break;
198
- }
199
- }
200
- return { delta, params_full };
201
- }
202
-
203
- /*
204
- Applies the template to 1 user message w/ add_generation_prompt=true, then w/ the test message w/ add_generation_prompt=false,
205
- gets the diff, removes any end tokens and parses the result w/ the grammar, checking that
206
- the parsed message is the same as the test_message
207
- */
208
- static void test_templates(const struct common_chat_templates * tmpls, const std::vector<std::string> & end_tokens,
209
- const common_chat_msg & test_message,
210
- const std::vector<common_chat_tool> & tools = {},
211
- const std::string & expected_delta = "",
212
- bool expect_grammar_triggered = true,
213
- bool test_grammar_if_triggered = true,
214
- bool think = false) {
215
- common_chat_msg user_message;
216
- user_message.role = "user";
217
- user_message.content = "Hello, world!";
218
-
219
- for (const auto & tool_choice : std::vector<common_chat_tool_choice> {COMMON_CHAT_TOOL_CHOICE_AUTO, COMMON_CHAT_TOOL_CHOICE_REQUIRED}) {
220
- auto data = init_delta(tmpls, end_tokens, user_message, test_message, tools, tool_choice, think);
221
- if (!expected_delta.empty()) {
222
- assert_equals(expected_delta, data.delta);
223
- }
224
-
225
- if (expect_grammar_triggered) {
226
- const auto msg = common_chat_parse(data.delta, data.params.format);
227
- assert_msg_equals(test_message, msg);
228
- }
229
-
230
- if (!test_message.tool_calls.empty()) {
231
- GGML_ASSERT(!data.params.grammar.empty());
232
- }
233
- if (!data.params.grammar.empty()) {
234
- auto grammar = build_grammar(data.params.grammar);
235
- if (!grammar) {
236
- throw std::runtime_error("Failed to build grammar");
237
- }
238
- auto earliest_trigger_pos = std::string::npos;
239
- auto constrained = data.delta;
240
- for (const auto & trigger : data.params.grammar_triggers) {
241
- size_t pos = std::string::npos;
242
- std::smatch match;
243
- switch (trigger.type) {
244
- case COMMON_GRAMMAR_TRIGGER_TYPE_WORD:
245
- {
246
- const auto & word = trigger.value;
247
- pos = constrained.find(word);
248
- break;
249
- }
250
- case COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN:
251
- {
252
- const auto & pattern = trigger.value;
253
- if (std::regex_search(constrained, match, std::regex(pattern))) {
254
- pos = match.position();
255
- }
256
- break;
257
- }
258
- case COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_START:
259
- {
260
- const auto & pattern = trigger.value;
261
- if (std::regex_search(constrained, match, std::regex(pattern)) && match.position() == 0) {
262
- pos = 0;
263
- }
264
- break;
265
- }
266
- default:
267
- throw std::runtime_error("Unknown trigger type");
268
- }
269
- if (pos == std::string::npos) {
270
- continue;
271
- }
272
- if (earliest_trigger_pos == std::string::npos || pos < earliest_trigger_pos) {
273
- earliest_trigger_pos = pos;
274
- }
275
- }
276
- auto grammar_triggered = false;
277
- if (earliest_trigger_pos != std::string::npos) {
278
- constrained = constrained.substr(earliest_trigger_pos);
279
- grammar_triggered = true;
280
- }
281
- if (data.params.grammar_lazy) {
282
- assert_equals(expect_grammar_triggered, grammar_triggered);
283
- }
284
-
285
- if (grammar_triggered && test_grammar_if_triggered && !match_string(constrained, grammar.get())) {
286
- throw std::runtime_error("Failed to match delta against grammar:\n\n" + data.delta +
287
- "\n\nConstrained: " + constrained +
288
- "\n\nGrammar: " + data.params.grammar);
289
- }
290
- }
291
- }
292
- }
293
-
294
- const common_chat_msg message_user {
295
- "user",
296
- "Hey there!",
297
- /* .content_parts = */ {},
298
- /* .tool_calls = */ {},
299
- /* .reasoning_content = */ "",
300
- /* .tool_name = */ "",
301
- /* .tool_call_id = */ "",
302
- };
303
-
304
- const common_chat_msg message_user_parts {
305
- "user",
306
- /* .content = */ "",
307
- /* .content_parts = */ {
308
- { "text", "Hey" },
309
- { "text", "there" },
310
- },
311
- /* .tool_calls = */ {},
312
- /* .reasoning_content = */ "",
313
- /* .tool_name = */ "",
314
- /* .tool_call_id = */ "",
315
- };
316
- const common_chat_msg message_assist {
317
- "assistant",
318
- "Hello, world!\nWhat's up?",
319
- /* .content_parts = */ {},
320
- /* .tool_calls = */ {},
321
- /* .reasoning_content = */ "",
322
- /* .tool_name = */ "",
323
- /* .tool_call_id = */ "",
324
- };
325
- const common_chat_msg message_assist_thoughts_unparsed_think {
326
- "assistant",
327
- "<think>I'm thinking</think>Hello, world!\nWhat's up?",
328
- /* .content_parts = */ {},
329
- /* .tool_calls = */ {},
330
- /* .reasoning_content = */ "",
331
- /* .tool_name = */ "",
332
- /* .tool_call_id = */ "",
333
- };
334
- const common_chat_msg message_assist_thoughts_unparsed_r7b {
335
- "assistant",
336
- "<|START_THINKING|>I'm thinking<|END_THINKING|>Hello, world!\nWhat's up?",
337
- /* .content_parts = */ {},
338
- /* .tool_calls = */ {},
339
- /* .reasoning_content = */ "",
340
- /* .tool_name = */ "",
341
- /* .tool_call_id = */ "",
342
- };
343
- const common_chat_msg message_assist_thoughts {
344
- "assistant",
345
- "Hello, world!\nWhat's up?",
346
- /* .content_parts = */ {},
347
- /* .tool_calls = */ {},
348
- /* .reasoning_content = */ "I'm thinking",
349
- /* .tool_name = */ "",
350
- /* .tool_call_id = */ "",
351
- };
352
- const std::vector<common_chat_tool_call> tool_calls {
353
- { "special_function", "{\"arg1\": 1}", /* .id = */ "" },
354
- };
355
- const std::vector<common_chat_tool_call> tool_calls_idx {
356
- { "special_function", "{\"arg1\": 1}", /* .id = */ "0" },
357
- };
358
- const std::vector<common_chat_tool_call> tool_calls_id {
359
- { "special_function", "{\"arg1\": 1}", /* .id = */ "123456789" },
360
- };
361
-
362
- const common_chat_msg message_assist_call {
363
- "assistant",
364
- "",
365
- /* .content_parts = */ {},
366
- tool_calls,
367
- /* .reasoning_content = */ "",
368
- /* .tool_name = */ "",
369
- /* .tool_call_id = */ "",
370
- };
371
- const common_chat_msg message_assist_call_thoughts = {
372
- "assistant",
373
- /* .content = */ "",
374
- /* .content_parts = */ {},
375
- tool_calls,
376
- /* .reasoning_content = */ "I'm\nthinking",
377
- /* .tool_name = */ "",
378
- /* .tool_call_id = */ "",
379
- };
380
- const common_chat_msg message_assist_call_thoughts_unparsed = {
381
- "assistant",
382
- /* .content = */ "<think>I'm\nthinking</think>",
383
- /* .content_parts = */ {},
384
- tool_calls,
385
- /* .reasoning_content = */ "",
386
- /* .tool_name = */ "",
387
- /* .tool_call_id = */ "",
388
- };
389
- const common_chat_msg message_assist_call_id {
390
- "assistant",
391
- "",
392
- /* .content_parts = */ {},
393
- tool_calls_id,
394
- /* .reasoning_content = */ "",
395
- /* .tool_name = */ "",
396
- /* .tool_call_id = */ "",
397
- };
398
- const common_chat_msg message_assist_call_idx {
399
- "assistant",
400
- "",
401
- /* .content_parts = */ {},
402
- tool_calls_idx,
403
- /* .reasoning_content = */ "",
404
- /* .tool_name = */ "",
405
- /* .tool_call_id = */ "",
406
- };
407
- const common_chat_msg message_assist_call_python {
408
- "assistant",
409
- "",
410
- /* .content_parts = */ {},
411
- { { "python", "{\"code\": \"print('hey')\"}", /* .id = */ "" } },
412
- /* .reasoning_content = */ "",
413
- /* .tool_name = */ "",
414
- /* .tool_call_id = */ "",
415
- };
416
- const common_chat_msg message_assist_call_code_interpreter {
417
- "assistant",
418
- "",
419
- /* .content_parts = */ {},
420
- { { "code_interpreter", "{\"code\": \"print('hey')\"}", /* .id = */ "" } },
421
- /* .reasoning_content = */ "",
422
- /* .tool_name = */ "",
423
- /* .tool_call_id = */ "",
424
- };
425
-
426
- static void test_msgs_oaicompat_json_conversion() {
427
- std::vector<common_chat_msg> msgs{
428
- message_user,
429
- message_user_parts,
430
- message_assist_call,
431
- message_assist_call_thoughts,
432
- message_assist_call_thoughts_unparsed,
433
- message_assist_call_id,
434
- message_assist_call_idx,
435
- message_assist_call_python,
436
- message_assist_call_code_interpreter,
437
- };
438
- for (const auto & msg : msgs) {
439
- auto oai_json = common_chat_msgs_to_json_oaicompat<json>({msg});
440
- auto msgs2 = common_chat_msgs_parse_oaicompat(oai_json);
441
- assert_equals((size_t) 1, msgs2.size());
442
- auto msg2 = msgs2[0];
443
- assert_msg_equals(msg, msg2);
444
- }
445
- assert_equals(
446
- std::string(
447
- "[\n"
448
- " {\n"
449
- " \"role\": \"user\",\n"
450
- " \"content\": [\n"
451
- " {\n"
452
- " \"type\": \"text\",\n"
453
- " \"text\": \"Hey\"\n"
454
- " },\n"
455
- " {\n"
456
- " \"type\": \"text\",\n"
457
- " \"text\": \"there\"\n"
458
- " }\n"
459
- " ]\n"
460
- " }\n"
461
- "]"
462
- ),
463
- common_chat_msgs_to_json_oaicompat<json>({message_user_parts}).dump(2));
464
-
465
- assert_equals(
466
- std::string(
467
- "[\n"
468
- " {\n"
469
- " \"role\": \"assistant\",\n"
470
- " \"content\": null,\n"
471
- " \"tool_calls\": [\n"
472
- " {\n"
473
- " \"type\": \"function\",\n"
474
- " \"function\": {\n"
475
- " \"name\": \"python\",\n"
476
- " \"arguments\": \"{\\\"code\\\": \\\"print('hey')\\\"}\"\n"
477
- " }\n"
478
- " }\n"
479
- " ]\n"
480
- " }\n"
481
- "]"
482
- ),
483
- common_chat_msgs_to_json_oaicompat<json>({message_assist_call_python}).dump(2));
484
-
485
- auto res = common_chat_msgs_parse_oaicompat(json::parse("[{\"role\": \"assistant\", \"tool_calls\": []}]"));
486
- assert_equals<size_t>(1, res.size());
487
- assert_equals<std::string>(res[0].role, "assistant");
488
- assert_equals(true, res[0].content.empty());
489
- assert_equals(true, res[0].tool_calls.empty());
490
-
491
- try {
492
- common_chat_msgs_parse_oaicompat(json::parse("[{\"role\": \"assistant\"}]"));
493
- throw std::runtime_error("Expected exception");
494
- } catch (const std::exception & e) {
495
- if (std::string(e.what()).find("'content'") == std::string::npos) {
496
- throw std::runtime_error("Expected exception about missing 'content'");
497
- }
498
- }
499
- }
500
-
501
- static void test_tools_oaicompat_json_conversion() {
502
- std::vector<common_chat_tool> tools{
503
- special_function_tool,
504
- python_tool,
505
- code_interpreter_tool,
506
- };
507
-
508
- for (const auto & tool : tools) {
509
- auto oai_json = common_chat_tools_to_json_oaicompat<json>({tool});
510
- auto tools2 = common_chat_tools_parse_oaicompat(oai_json);
511
- assert_equals((size_t) 1, tools2.size());
512
- auto tool2 = tools2[0];
513
- assert_equals(tool.name, tool2.name);
514
- assert_equals(tool.description, tool2.description);
515
- assert_equals(json::parse(tool.parameters).dump(2), json::parse(tool2.parameters).dump(2));
516
- }
517
-
518
- assert_equals(
519
- std::string(
520
- "[\n"
521
- " {\n"
522
- " \"type\": \"function\",\n"
523
- " \"function\": {\n"
524
- " \"name\": \"special_function\",\n"
525
- " \"description\": \"I'm special\",\n"
526
- " \"parameters\": {\n"
527
- " \"type\": \"object\",\n"
528
- " \"properties\": {\n"
529
- " \"arg1\": {\n"
530
- " \"type\": \"integer\",\n"
531
- " \"description\": \"The arg.\"\n"
532
- " }\n"
533
- " },\n"
534
- " \"required\": [\n"
535
- " \"arg1\"\n"
536
- " ]\n"
537
- " }\n"
538
- " }\n"
539
- " }\n"
540
- "]"
541
- ),
542
- common_chat_tools_to_json_oaicompat<json>({special_function_tool}).dump(2));
543
- }
544
-
545
- static void test_template_output_parsers() {
546
-
547
- common_chat_templates_inputs inputs_no_tools;
548
- inputs_no_tools.messages = {message_user};
549
- inputs_no_tools.extract_reasoning = false;
550
-
551
- common_chat_templates_inputs inputs_no_tools_think;
552
- inputs_no_tools_think.messages = {message_user};
553
- inputs_no_tools_think.extract_reasoning = true;
554
-
555
- common_chat_templates_inputs inputs_tools;
556
- inputs_tools.messages = {message_user};
557
- inputs_tools.tools = {special_function_tool};
558
- inputs_tools.extract_reasoning = false;
559
-
560
- common_chat_templates_inputs inputs_tools_think;
561
- inputs_tools_think.messages = {message_user};
562
- inputs_tools_think.tools = {special_function_tool};
563
- inputs_tools_think.extract_reasoning = true;
564
-
565
- common_chat_templates_inputs inputs_tools_builtin;
566
- inputs_tools_builtin.messages = {message_user};
567
- inputs_tools_builtin.tools = {python_tool};
568
- inputs_tools_builtin.extract_reasoning = false;
569
-
570
- {
571
- // Not supported yet
572
- auto tmpls = read_templates("models/templates/CohereForAI-c4ai-command-r-plus-tool_use.jinja");
573
- assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
574
- assert_equals(COMMON_CHAT_FORMAT_GENERIC, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
575
- }
576
- {
577
- auto tmpls = read_templates("models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja");
578
- std::vector<std::string> end_tokens{ "<|END_OF_TURN_TOKEN|>" };
579
-
580
- assert_equals(COMMON_CHAT_FORMAT_COMMAND_R7B, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
581
- assert_equals(COMMON_CHAT_FORMAT_COMMAND_R7B, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
582
- assert_equals(COMMON_CHAT_FORMAT_COMMAND_R7B_EXTRACT_REASONING, common_chat_templates_apply(tmpls.get(), inputs_tools_think).format);
583
-
584
- assert_msg_equals(message_assist,
585
- common_chat_parse(
586
- "Hello, world!\nWhat's up?",
587
- COMMON_CHAT_FORMAT_COMMAND_R7B));
588
- assert_msg_equals(message_assist,
589
- common_chat_parse(
590
- "Hello, world!\nWhat's up?<|END_RESPONSE|>",
591
- COMMON_CHAT_FORMAT_COMMAND_R7B));
592
- assert_msg_equals(message_assist,
593
- common_chat_parse(
594
- "<|START_RESPONSE|>Hello, world!\nWhat's up?<|END_RESPONSE|>",
595
- COMMON_CHAT_FORMAT_COMMAND_R7B));
596
- assert_msg_equals(message_assist_thoughts_unparsed_r7b,
597
- common_chat_parse(
598
- "<|START_THINKING|>I'm thinking<|END_THINKING|>"
599
- "<|START_RESPONSE|>Hello, world!\nWhat's up?<|END_RESPONSE|>",
600
- COMMON_CHAT_FORMAT_COMMAND_R7B));
601
- assert_msg_equals(message_assist_thoughts_unparsed_r7b,
602
- common_chat_parse(
603
- "<|START_THINKING|>I'm thinking<|END_THINKING|>"
604
- "Hello, world!\nWhat's up?<|END_RESPONSE|>",
605
- COMMON_CHAT_FORMAT_COMMAND_R7B));
606
-
607
- assert_msg_equals(message_assist_thoughts,
608
- common_chat_parse(
609
- "<|START_THINKING|>I'm thinking<|END_THINKING|>"
610
- "<|START_RESPONSE|>Hello, world!\nWhat's up?<|END_RESPONSE|>",
611
- COMMON_CHAT_FORMAT_COMMAND_R7B_EXTRACT_REASONING));
612
-
613
- test_templates(tmpls.get(), end_tokens, message_assist_call_idx, tools,
614
- "<|START_THINKING|><|END_THINKING|>"
615
- "<|START_ACTION|>[\n"
616
- " {\"tool_call_id\": \"0\", \"tool_name\": \"special_function\", \"parameters\": {\"arg1\": 1}}\n"
617
- "]<|END_ACTION|>");
618
- test_templates(tmpls.get(), end_tokens, message_assist, tools,
619
- "<|START_RESPONSE|>Hello, world!\n"
620
- "What's up?<|END_RESPONSE|>",
621
- /* expect_grammar_triggered= */ false);
622
- }
623
- {
624
- auto tmpls = read_templates("models/templates/google-gemma-2-2b-it.jinja");
625
- std::vector<std::string> end_tokens{ "<end_of_turn>" };
626
-
627
- assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
628
- assert_equals(COMMON_CHAT_FORMAT_GENERIC, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
629
- assert_equals(COMMON_CHAT_FORMAT_GENERIC,
630
- common_chat_templates_apply(
631
- read_templates("models/templates/microsoft-Phi-3.5-mini-instruct.jinja").get(),
632
- inputs_tools)
633
- .format);
634
-
635
- // Generic tool calls doesn't generate / parse content-only messages symmetrically.
636
-
637
- assert_msg_equals(message_assist,
638
- common_chat_parse("{\n"
639
- " \"response\": \"Hello, world!\\nWhat's up?\"\n"
640
- "}",
641
- common_chat_templates_apply(tmpls.get(), inputs_tools).format));
642
- test_templates(tmpls.get(), end_tokens, message_assist_call_id, tools,
643
- "{\n"
644
- " \"tool_calls\": [\n"
645
- " {\n"
646
- " \"name\": \"special_function\",\n"
647
- " \"arguments\": {\n"
648
- " \"arg1\": 1\n"
649
- " },\n"
650
- " \"id\": \"123456789\"\n"
651
- " }\n"
652
- " ]\n"
653
- "}");
654
- }
655
- {
656
- auto tmpls = read_templates("models/templates/mistralai-Mistral-Nemo-Instruct-2407.jinja");
657
- std::vector<std::string> end_tokens{ "</s>" };
658
-
659
- assert_equals(COMMON_CHAT_FORMAT_MISTRAL_NEMO, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
660
-
661
- test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
662
- test_templates(
663
- tmpls.get(), end_tokens, message_assist_call_id, tools,
664
- "[TOOL_CALLS][{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}, \"id\": \"123456789\"}]");
665
- }
666
- {
667
- auto tmpls = read_templates("models/templates/NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja");
668
- std::vector<std::string> end_tokens{ "<|im_end|>" };
669
-
670
- assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
671
- assert_equals(COMMON_CHAT_FORMAT_HERMES_2_PRO, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
672
- assert_equals(
673
- COMMON_CHAT_FORMAT_HERMES_2_PRO,
674
- common_chat_templates_apply(
675
- read_templates("models/templates/NousResearch-Hermes-3-Llama-3.1-8B-tool_use.jinja").get(),
676
- inputs_tools)
677
- .format);
678
- assert_equals(
679
- COMMON_CHAT_FORMAT_HERMES_2_PRO,
680
- common_chat_templates_apply(
681
- read_templates("models/templates/Qwen-Qwen2.5-7B-Instruct.jinja").get(),
682
- inputs_tools)
683
- .format);
684
-
685
- // Test parsing
686
- assert_msg_equals(message_assist_call, common_chat_parse(
687
- "<tool_call>\n"
688
- "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
689
- "</tool_call>",
690
- COMMON_CHAT_FORMAT_HERMES_2_PRO));
691
- assert_msg_equals(message_assist_call, common_chat_parse(
692
- "<function=special_function>{\"arg1\": 1}</function>",
693
- COMMON_CHAT_FORMAT_HERMES_2_PRO));
694
- assert_msg_equals(message_assist_call, common_chat_parse(
695
- "<function name=\"special_function\">\n"
696
- "{\"arg1\": 1}\n"
697
- "</function>",
698
- COMMON_CHAT_FORMAT_HERMES_2_PRO));
699
- assert_msg_equals(message_assist_call, common_chat_parse(
700
- "<tool>\n"
701
- " {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
702
- "</tool>",
703
- COMMON_CHAT_FORMAT_HERMES_2_PRO));
704
- assert_msg_equals(message_assist_call, common_chat_parse(
705
- "<tools>\n"
706
- " {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
707
- "</tools>",
708
- COMMON_CHAT_FORMAT_HERMES_2_PRO));
709
- assert_msg_equals(message_assist_call, common_chat_parse(
710
- "<response>\n"
711
- " {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
712
- "</response>",
713
- COMMON_CHAT_FORMAT_HERMES_2_PRO));
714
- assert_msg_equals(message_assist_call, common_chat_parse(
715
- "```xml\n"
716
- "<response>\n"
717
- " {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
718
- "</response>\n"
719
- "```",
720
- COMMON_CHAT_FORMAT_HERMES_2_PRO));
721
- assert_msg_equals(message_assist_call, common_chat_parse(
722
- "```xml\n"
723
- " {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
724
- "```",
725
- COMMON_CHAT_FORMAT_HERMES_2_PRO));
726
- assert_msg_equals(message_assist_call, common_chat_parse(
727
- "```\n"
728
- " {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
729
- "```",
730
- COMMON_CHAT_FORMAT_HERMES_2_PRO));
731
- assert_msg_equals(message_assist_call, common_chat_parse(
732
- "```\n"
733
- "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
734
- "```",
735
- COMMON_CHAT_FORMAT_HERMES_2_PRO));
736
- assert_msg_equals(message_assist_call, common_chat_parse(
737
- "```json\n"
738
- " {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
739
- "```",
740
- COMMON_CHAT_FORMAT_HERMES_2_PRO));
741
- assert_msg_equals(message_assist_call, common_chat_parse(
742
- "```json\n"
743
- "\n"
744
- " <function_call> {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}} \n"
745
- " </function_call> \n"
746
- "``` ",
747
- COMMON_CHAT_FORMAT_HERMES_2_PRO));
748
- assert_msg_equals(message_assist_call, common_chat_parse(
749
- "<json>\n"
750
- " {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
751
- "</json>",
752
- COMMON_CHAT_FORMAT_HERMES_2_PRO));
753
- assert_msg_equals(message_assist_call, common_chat_parse(
754
- "<xml>\n"
755
- " {\n"
756
- " \"name\": \"special_function\", \"arguments\": {\"arg1\": 1}\n"
757
- " }\n"
758
- "</xml>",
759
- COMMON_CHAT_FORMAT_HERMES_2_PRO));
760
- assert_msg_equals(message_assist_call, common_chat_parse(
761
- "<JSON>\n"
762
- " {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
763
- "</JSON>",
764
- COMMON_CHAT_FORMAT_HERMES_2_PRO));
765
- assert_msg_equals(message_assist_call, common_chat_parse(
766
- "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}",
767
- COMMON_CHAT_FORMAT_HERMES_2_PRO));
768
- assert_msg_equals(message_assist_call, common_chat_parse(
769
- "{\n \"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}",
770
- COMMON_CHAT_FORMAT_HERMES_2_PRO));
771
-
772
- assert_msg_equals(message_assist_thoughts_unparsed_think,
773
- common_chat_parse("<think>I'm thinking</think>Hello, world!\nWhat's up?",
774
- COMMON_CHAT_FORMAT_HERMES_2_PRO));
775
- assert_msg_equals(message_assist_thoughts_unparsed_think,
776
- common_chat_parse("I'm thinking</think>Hello, world!\nWhat's up?",
777
- COMMON_CHAT_FORMAT_HERMES_2_PRO));
778
- assert_msg_equals(message_assist_thoughts,
779
- common_chat_parse("<think>I'm thinking</think>Hello, world!\nWhat's up?",
780
- COMMON_CHAT_FORMAT_HERMES_2_PRO_EXTRACT_REASONING));
781
- assert_msg_equals(message_assist_thoughts,
782
- common_chat_parse("I'm thinking</think>Hello, world!\nWhat's up?",
783
- COMMON_CHAT_FORMAT_HERMES_2_PRO_EXTRACT_REASONING));
784
-
785
- test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
786
- test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
787
- "<tool_call>\n"
788
- "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
789
- "</tool_call>");
790
- test_templates(tmpls.get(), end_tokens, message_assist_call_python, tools,
791
- "<tool_call>\n"
792
- "{\"name\": \"python\", \"arguments\": {\"code\": \"print('hey')\"}}\n"
793
- "</tool_call>");
794
- }
795
- {
796
- auto tmpls = read_templates("models/templates/meta-llama-Llama-3.1-8B-Instruct.jinja");
797
- std::vector<std::string> end_tokens{ "<|eom_id|>", "<|eot_id|>" };
798
-
799
- assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
800
- assert_equals(COMMON_CHAT_FORMAT_LLAMA_3_X, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
801
- assert_equals(COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS,
802
- common_chat_templates_apply(tmpls.get(), inputs_tools_builtin).format);
803
- assert_equals(COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS,
804
- common_chat_templates_apply(
805
- read_templates("models/templates/meta-llama-Llama-3.3-70B-Instruct.jinja").get(),
806
- inputs_tools_builtin)
807
- .format);
808
-
809
- // test_templates(tmpls.get(), end_tokens, message_assist, tools, R"(?)", /* expect_grammar_triggered= */ false);
810
- test_templates(tmpls.get(), end_tokens, message_assist_call_code_interpreter, llama_3_1_tools,
811
- "<|python_tag|>code_interpreter.call(code=\"print('hey')\")");
812
- test_templates(tmpls.get(), end_tokens, message_assist_call_python, tools,
813
- "<|python_tag|>python.call(code=\"print('hey')\")");
814
- test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
815
- "{\"name\": \"special_function\", \"parameters\": {\"arg1\": 1}}");
816
- }
817
- {
818
- auto tmpls = read_templates("models/templates/meta-llama-Llama-3.2-3B-Instruct.jinja");
819
- std::vector<std::string> end_tokens{ "<|eom_id|>", "<|eot_id|>" };
820
-
821
- assert_equals(COMMON_CHAT_FORMAT_LLAMA_3_X, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
822
- assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
823
-
824
- test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
825
- test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
826
- "{\"name\": \"special_function\", \"parameters\": {\"arg1\": 1}}");
827
- }
828
- {
829
- auto tmpls = read_templates("models/templates/meetkai-functionary-medium-v3.1.jinja");
830
- std::vector<std::string> end_tokens{ "<|eom_id|>", "<|eot_id|>" };
831
-
832
- assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY,
833
- common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
834
- assert_equals(COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1,
835
- common_chat_templates_apply(tmpls.get(), inputs_tools).format);
836
- assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY,
837
- common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
838
-
839
- test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
840
- test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
841
- "<function=special_function>{\"arg1\": 1}</function>");
842
- }
843
- {
844
- auto tmpls = read_templates("models/templates/meetkai-functionary-medium-v3.2.jinja");
845
- std::vector<std::string> end_tokens{ "<|eom_id|>", "<|eot_id|>" };
846
-
847
- assert_equals(COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
848
- assert_equals(COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
849
-
850
- test_templates(tmpls.get(), end_tokens, message_assist, {},
851
- "all\n"
852
- "Hello, world!\n"
853
- "What's up?",
854
- /* expect_grammar_triggered= */ false);
855
- test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
856
- "special_function\n"
857
- "{\"arg1\": 1}");
858
- }
859
- {
860
- auto tmpls = read_templates("models/templates/fireworks-ai-llama-3-firefunction-v2.jinja");
861
- std::vector<std::string> end_tokens{ "<|eot_id|>" };
862
-
863
- assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
864
- assert_equals(COMMON_CHAT_FORMAT_FIREFUNCTION_V2, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
865
-
866
- test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
867
- test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
868
- " functools[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]");
869
- }
870
- {
871
- // Original DeepSeek R1 template. Leaves <|tool▁calls▁begin|> and others unclosed. Our logic fixes the prompt.
872
- auto tmpls = read_templates("models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja");
873
- std::vector<std::string> end_tokens{ "<|end▁of▁sentence|>" };
874
-
875
- assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_R1, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
876
- assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_R1, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
877
- assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_R1_EXTRACT_REASONING, common_chat_templates_apply(tmpls.get(), inputs_tools_think).format);
878
-
879
- test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
880
- test_templates(tmpls.get(), end_tokens, message_assist_thoughts, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
881
- assert_msg_equals(message_assist_thoughts_unparsed_think,
882
- common_chat_parse("<think>I'm thinking</think>Hello, world!\nWhat's up?",
883
- COMMON_CHAT_FORMAT_DEEPSEEK_R1));
884
- assert_msg_equals(message_assist_thoughts,
885
- common_chat_parse("<think>I'm thinking</think>Hello, world!\nWhat's up?",
886
- COMMON_CHAT_FORMAT_DEEPSEEK_R1_EXTRACT_REASONING));
887
- assert_msg_equals(message_assist_thoughts,
888
- // Latest template update (ast of 20250209) adds a trailing <think>\n if add_generation_prompt is true.
889
- common_chat_parse("I'm thinking</think>Hello, world!\nWhat's up?",
890
- COMMON_CHAT_FORMAT_DEEPSEEK_R1_EXTRACT_REASONING));
891
- // test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
892
- // "<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>special_function\n"
893
- // "```json\n"
894
- // "{\"arg1\": 1}\n"
895
- // // Look what's not here: <|tool▁calls▁end|> (also missing the <|end▁of▁sentence|>, but that is removed lazily by the test's delta logic)
896
- // "```<|tool▁call▁end|>",
897
- // /* expect_grammar_triggered= */ true,
898
- // /* test_grammar_if_triggered= */ false);
899
- }
900
- {
901
- // Replacement DeepSeek R1 template. Makes the Distill Qwen 7B/32B models happy to call tools and all.
902
- auto tmpls = read_templates("models/templates/llama-cpp-deepseek-r1.jinja");
903
- std::vector<std::string> end_tokens{ "<|end▁of▁sentence|>" };
904
-
905
- assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_R1, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
906
- assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_R1, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
907
- assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_R1_EXTRACT_REASONING, common_chat_templates_apply(tmpls.get(), inputs_tools_think).format);
908
-
909
- test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
910
- test_templates(tmpls.get(), end_tokens, message_assist_thoughts, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
911
- assert_msg_equals(message_assist_thoughts_unparsed_think,
912
- common_chat_parse("<think>I'm thinking</think>Hello, world!\nWhat's up?",
913
- COMMON_CHAT_FORMAT_DEEPSEEK_R1));
914
- assert_msg_equals(message_assist_thoughts,
915
- common_chat_parse("<think>I'm thinking</think>Hello, world!\nWhat's up?",
916
- COMMON_CHAT_FORMAT_DEEPSEEK_R1_EXTRACT_REASONING));
917
-
918
- assert_msg_equals(message_assist_call_thoughts_unparsed,
919
- common_chat_parse(
920
- "<think>I'm\nthinking</think>\n\n"
921
- "<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>special_function\n"
922
- "```json\n"
923
- "{\"arg1\": 1}\n"
924
- "```<|tool▁call▁end|><|tool▁calls▁end|>",
925
- COMMON_CHAT_FORMAT_DEEPSEEK_R1));
926
- assert_msg_equals(message_assist_call_thoughts,
927
- common_chat_parse(
928
- "<think>I'm\nthinking</think>\n\n"
929
- "<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>special_function\n"
930
- "```json\n"
931
- "{\"arg1\": 1}\n"
932
- "```<|tool▁call▁end|><|tool▁calls▁end|>",
933
- COMMON_CHAT_FORMAT_DEEPSEEK_R1_EXTRACT_REASONING));
934
- test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
935
- "<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>special_function\n"
936
- "```json\n"
937
- "{\"arg1\": 1}\n"
938
- "```<|tool▁call▁end|><|tool▁calls▁end|>");
939
- }
940
- }
941
-
942
- int main(int argc, char ** argv) {
943
- // try {
944
- #ifndef _WIN32
945
- if (argc > 1) {
946
- common_chat_templates_inputs inputs;
947
- common_chat_msg msg;
948
- msg.role = "user";
949
- msg.content = "Hey";
950
- inputs.messages = {msg};
951
- inputs.tools = { special_function_tool };
952
-
953
- std::cout << "| Template | Format |\n";
954
- std::cout << "|----------|--------|\n";
955
-
956
- for (int i = 1; i < argc; i++) {
957
- try {
958
- std::string path = argv[i];
959
- if (path.rfind(".jinja") != path.size() - 6) {
960
- std::cerr << "Skipping non-jinja file: " << path << '\n';
961
- continue;
962
- }
963
- auto tmpls = read_templates(path);
964
- auto parts = string_split(path, "/");
965
- auto name = parts[parts.size() - 1];
966
- auto format = common_chat_format_name(common_chat_templates_apply(tmpls.get(), inputs).format);
967
- std::cout << "| " << name << " | " << format << " |\n";
968
- } catch (const std::exception & e) {
969
- std::cerr << "Failed to process " << argv[i] << ": " << e.what() << '\n';
970
- }
971
- }
972
- } else
973
- #endif
974
- {
975
- test_msgs_oaicompat_json_conversion();
976
- test_tools_oaicompat_json_conversion();
977
- test_template_output_parsers();
978
- std::cout << "\n[chat] All tests passed!" << '\n';
979
- }
980
- return 0;
981
- // } catch (const std::exception & e) {
982
- // std::cerr << "Error: " << e.what() << '\n';
983
- // return 1;
984
- // }
985
- }