@fugood/llama.node 0.3.2 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (286) hide show
  1. package/CMakeLists.txt +7 -0
  2. package/bin/darwin/arm64/llama-node.node +0 -0
  3. package/bin/darwin/x64/llama-node.node +0 -0
  4. package/bin/linux/arm64/llama-node.node +0 -0
  5. package/bin/linux/x64/llama-node.node +0 -0
  6. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  7. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  8. package/bin/win32/arm64/llama-node.node +0 -0
  9. package/bin/win32/arm64/node.lib +0 -0
  10. package/bin/win32/x64/llama-node.node +0 -0
  11. package/bin/win32/x64/node.lib +0 -0
  12. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  13. package/bin/win32-vulkan/arm64/node.lib +0 -0
  14. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  15. package/bin/win32-vulkan/x64/node.lib +0 -0
  16. package/lib/binding.ts +18 -1
  17. package/package.json +1 -1
  18. package/src/DetokenizeWorker.cpp +1 -1
  19. package/src/EmbeddingWorker.cpp +17 -7
  20. package/src/EmbeddingWorker.h +2 -1
  21. package/src/LlamaCompletionWorker.cpp +8 -8
  22. package/src/LlamaCompletionWorker.h +2 -2
  23. package/src/LlamaContext.cpp +89 -27
  24. package/src/LlamaContext.h +2 -0
  25. package/src/TokenizeWorker.cpp +1 -1
  26. package/src/common.hpp +4 -4
  27. package/src/llama.cpp/.github/workflows/build.yml +240 -168
  28. package/src/llama.cpp/.github/workflows/docker.yml +8 -8
  29. package/src/llama.cpp/.github/workflows/python-lint.yml +8 -1
  30. package/src/llama.cpp/.github/workflows/server.yml +21 -14
  31. package/src/llama.cpp/CMakeLists.txt +14 -6
  32. package/src/llama.cpp/Sources/llama/llama.h +4 -0
  33. package/src/llama.cpp/cmake/arm64-apple-clang.cmake +16 -0
  34. package/src/llama.cpp/cmake/common.cmake +33 -0
  35. package/src/llama.cpp/cmake/x64-windows-llvm.cmake +11 -0
  36. package/src/llama.cpp/common/CMakeLists.txt +6 -4
  37. package/src/llama.cpp/common/arg.cpp +986 -770
  38. package/src/llama.cpp/common/arg.h +22 -22
  39. package/src/llama.cpp/common/common.cpp +212 -351
  40. package/src/llama.cpp/common/common.h +204 -117
  41. package/src/llama.cpp/common/json-schema-to-grammar.cpp +1 -1
  42. package/src/llama.cpp/common/log.cpp +50 -50
  43. package/src/llama.cpp/common/log.h +18 -18
  44. package/src/llama.cpp/common/ngram-cache.cpp +36 -36
  45. package/src/llama.cpp/common/ngram-cache.h +19 -19
  46. package/src/llama.cpp/common/sampling.cpp +163 -121
  47. package/src/llama.cpp/common/sampling.h +41 -20
  48. package/src/llama.cpp/common/speculative.cpp +274 -0
  49. package/src/llama.cpp/common/speculative.h +28 -0
  50. package/src/llama.cpp/docs/build.md +134 -161
  51. package/src/llama.cpp/examples/CMakeLists.txt +33 -14
  52. package/src/llama.cpp/examples/batched/CMakeLists.txt +1 -1
  53. package/src/llama.cpp/examples/batched/batched.cpp +19 -18
  54. package/src/llama.cpp/examples/batched-bench/CMakeLists.txt +1 -1
  55. package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +10 -11
  56. package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +1 -1
  57. package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +1 -1
  58. package/src/llama.cpp/examples/cvector-generator/CMakeLists.txt +1 -1
  59. package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +9 -9
  60. package/src/llama.cpp/examples/deprecation-warning/deprecation-warning.cpp +1 -1
  61. package/src/llama.cpp/examples/embedding/CMakeLists.txt +1 -1
  62. package/src/llama.cpp/examples/embedding/embedding.cpp +12 -12
  63. package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +3 -2
  64. package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +8 -8
  65. package/src/llama.cpp/examples/export-lora/CMakeLists.txt +1 -1
  66. package/src/llama.cpp/examples/export-lora/export-lora.cpp +5 -5
  67. package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +1 -1
  68. package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +4 -7
  69. package/src/llama.cpp/examples/gen-docs/CMakeLists.txt +1 -1
  70. package/src/llama.cpp/examples/gen-docs/gen-docs.cpp +7 -7
  71. package/src/llama.cpp/examples/gguf/CMakeLists.txt +1 -1
  72. package/src/llama.cpp/examples/gguf-hash/CMakeLists.txt +8 -1
  73. package/src/llama.cpp/examples/gguf-split/CMakeLists.txt +1 -1
  74. package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +2 -2
  75. package/src/llama.cpp/examples/gritlm/CMakeLists.txt +1 -1
  76. package/src/llama.cpp/examples/gritlm/gritlm.cpp +18 -18
  77. package/src/llama.cpp/examples/imatrix/CMakeLists.txt +1 -1
  78. package/src/llama.cpp/examples/imatrix/imatrix.cpp +31 -13
  79. package/src/llama.cpp/examples/infill/CMakeLists.txt +1 -1
  80. package/src/llama.cpp/examples/infill/infill.cpp +41 -87
  81. package/src/llama.cpp/examples/llama-bench/CMakeLists.txt +1 -1
  82. package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +439 -459
  83. package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +2 -0
  84. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +11 -14
  85. package/src/llama.cpp/examples/llava/CMakeLists.txt +10 -3
  86. package/src/llama.cpp/examples/llava/clip.cpp +263 -66
  87. package/src/llama.cpp/examples/llava/clip.h +8 -2
  88. package/src/llama.cpp/examples/llava/llava-cli.cpp +23 -23
  89. package/src/llama.cpp/examples/llava/llava.cpp +83 -22
  90. package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +21 -21
  91. package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +581 -0
  92. package/src/llama.cpp/examples/lookahead/CMakeLists.txt +1 -1
  93. package/src/llama.cpp/examples/lookahead/lookahead.cpp +26 -26
  94. package/src/llama.cpp/examples/lookup/CMakeLists.txt +4 -4
  95. package/src/llama.cpp/examples/lookup/lookup-create.cpp +7 -7
  96. package/src/llama.cpp/examples/lookup/lookup-merge.cpp +4 -4
  97. package/src/llama.cpp/examples/lookup/lookup-stats.cpp +16 -15
  98. package/src/llama.cpp/examples/lookup/lookup.cpp +30 -30
  99. package/src/llama.cpp/examples/main/CMakeLists.txt +1 -1
  100. package/src/llama.cpp/examples/main/main.cpp +73 -114
  101. package/src/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +1 -1
  102. package/src/llama.cpp/examples/parallel/CMakeLists.txt +1 -1
  103. package/src/llama.cpp/examples/parallel/parallel.cpp +18 -19
  104. package/src/llama.cpp/examples/passkey/CMakeLists.txt +1 -1
  105. package/src/llama.cpp/examples/passkey/passkey.cpp +14 -14
  106. package/src/llama.cpp/examples/perplexity/CMakeLists.txt +1 -1
  107. package/src/llama.cpp/examples/perplexity/perplexity.cpp +99 -120
  108. package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
  109. package/src/llama.cpp/examples/quantize/quantize.cpp +0 -3
  110. package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +1 -1
  111. package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +10 -9
  112. package/src/llama.cpp/examples/retrieval/CMakeLists.txt +1 -1
  113. package/src/llama.cpp/examples/retrieval/retrieval.cpp +16 -16
  114. package/src/llama.cpp/examples/rpc/rpc-server.cpp +3 -1
  115. package/src/llama.cpp/examples/run/CMakeLists.txt +5 -0
  116. package/src/llama.cpp/examples/run/run.cpp +911 -0
  117. package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +1 -1
  118. package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +38 -21
  119. package/src/llama.cpp/examples/server/CMakeLists.txt +3 -16
  120. package/src/llama.cpp/examples/server/server.cpp +2073 -1339
  121. package/src/llama.cpp/examples/server/tests/requirements.txt +2 -2
  122. package/src/llama.cpp/examples/server/utils.hpp +354 -277
  123. package/src/llama.cpp/examples/simple/CMakeLists.txt +2 -2
  124. package/src/llama.cpp/examples/simple/simple.cpp +130 -94
  125. package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +5 -0
  126. package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +200 -0
  127. package/src/llama.cpp/examples/speculative/CMakeLists.txt +1 -1
  128. package/src/llama.cpp/examples/speculative/speculative.cpp +68 -64
  129. package/src/llama.cpp/examples/speculative-simple/CMakeLists.txt +5 -0
  130. package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +265 -0
  131. package/src/llama.cpp/examples/tokenize/CMakeLists.txt +1 -1
  132. package/src/llama.cpp/examples/tokenize/tokenize.cpp +3 -3
  133. package/src/llama.cpp/examples/tts/CMakeLists.txt +5 -0
  134. package/src/llama.cpp/examples/tts/tts.cpp +932 -0
  135. package/src/llama.cpp/ggml/CMakeLists.txt +54 -36
  136. package/src/llama.cpp/ggml/include/ggml-backend.h +63 -34
  137. package/src/llama.cpp/ggml/include/ggml-blas.h +5 -3
  138. package/src/llama.cpp/ggml/include/ggml-cann.h +9 -7
  139. package/src/llama.cpp/ggml/include/ggml-cpp.h +38 -0
  140. package/src/llama.cpp/ggml/include/ggml-cpu.h +135 -0
  141. package/src/llama.cpp/ggml/include/ggml-cuda.h +12 -12
  142. package/src/llama.cpp/ggml/include/ggml-kompute.h +7 -3
  143. package/src/llama.cpp/ggml/include/ggml-metal.h +11 -7
  144. package/src/llama.cpp/ggml/include/ggml-opencl.h +26 -0
  145. package/src/llama.cpp/ggml/include/ggml-opt.h +216 -0
  146. package/src/llama.cpp/ggml/include/ggml-rpc.h +9 -5
  147. package/src/llama.cpp/ggml/include/ggml-sycl.h +18 -11
  148. package/src/llama.cpp/ggml/include/ggml-vulkan.h +10 -8
  149. package/src/llama.cpp/ggml/include/ggml.h +159 -417
  150. package/src/llama.cpp/ggml/src/CMakeLists.txt +121 -1155
  151. package/src/llama.cpp/ggml/src/ggml-alloc.c +23 -28
  152. package/src/llama.cpp/ggml/src/ggml-backend-impl.h +57 -36
  153. package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +552 -0
  154. package/src/llama.cpp/ggml/src/ggml-backend.cpp +306 -867
  155. package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +87 -0
  156. package/src/llama.cpp/ggml/src/{ggml-blas.cpp → ggml-blas/ggml-blas.cpp} +216 -65
  157. package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +76 -0
  158. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +456 -111
  159. package/src/llama.cpp/ggml/src/ggml-cann/common.h +6 -3
  160. package/src/llama.cpp/ggml/src/{ggml-cann.cpp → ggml-cann/ggml-cann.cpp} +343 -177
  161. package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +2 -5
  162. package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +22 -9
  163. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +24 -13
  164. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +23 -13
  165. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +11 -0
  166. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +10 -0
  167. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +10 -0
  168. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +17 -0
  169. package/src/llama.cpp/ggml/src/ggml-common.h +42 -42
  170. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +336 -0
  171. package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +220 -0
  172. package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.h +8 -0
  173. package/src/llama.cpp/ggml/src/ggml-cpu/amx/common.h +91 -0
  174. package/src/llama.cpp/ggml/src/ggml-cpu/amx/mmq.cpp +2511 -0
  175. package/src/llama.cpp/ggml/src/ggml-cpu/amx/mmq.h +10 -0
  176. package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +323 -0
  177. package/src/llama.cpp/ggml/src/{ggml-aarch64.c → ggml-cpu/ggml-cpu-aarch64.cpp} +1299 -246
  178. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +8 -0
  179. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp +55 -0
  180. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.h +8 -0
  181. package/src/llama.cpp/ggml/src/{ggml-cpu-impl.h → ggml-cpu/ggml-cpu-impl.h} +14 -242
  182. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +10835 -0
  183. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.h +63 -0
  184. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-traits.cpp +36 -0
  185. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-traits.h +38 -0
  186. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +14123 -0
  187. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +628 -0
  188. package/src/llama.cpp/ggml/src/{llamafile → ggml-cpu/llamafile}/sgemm.cpp +666 -0
  189. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +152 -0
  190. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +8 -0
  191. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +104 -0
  192. package/src/llama.cpp/ggml/src/ggml-impl.h +393 -22
  193. package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +166 -0
  194. package/src/llama.cpp/ggml/src/{ggml-kompute.cpp → ggml-kompute/ggml-kompute.cpp} +360 -127
  195. package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +105 -0
  196. package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +288 -0
  197. package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +107 -0
  198. package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +147 -0
  199. package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +4004 -0
  200. package/src/llama.cpp/ggml/src/ggml-opt.cpp +854 -0
  201. package/src/llama.cpp/ggml/src/ggml-quants.c +188 -10702
  202. package/src/llama.cpp/ggml/src/ggml-quants.h +78 -125
  203. package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +9 -0
  204. package/src/llama.cpp/ggml/src/{ggml-rpc.cpp → ggml-rpc/ggml-rpc.cpp} +478 -300
  205. package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +84 -0
  206. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +3 -0
  207. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +36 -5
  208. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +259 -0
  209. package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +3 -2
  210. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +1 -1
  211. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +5 -5
  212. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +34 -35
  213. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +1030 -0
  214. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +76 -0
  215. package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +4 -4
  216. package/src/llama.cpp/ggml/src/{ggml-sycl.cpp → ggml-sycl/ggml-sycl.cpp} +3638 -4151
  217. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +3 -2
  218. package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +6 -6
  219. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +75 -87
  220. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +7 -6
  221. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +56 -0
  222. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +11 -0
  223. package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +6 -0
  224. package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +4 -3
  225. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +7 -7
  226. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +1 -0
  227. package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +4 -4
  228. package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +141 -0
  229. package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.hpp +10 -0
  230. package/src/llama.cpp/ggml/src/ggml-threading.cpp +12 -0
  231. package/src/llama.cpp/ggml/src/ggml-threading.h +14 -0
  232. package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +92 -0
  233. package/src/llama.cpp/ggml/src/{ggml-vulkan.cpp → ggml-vulkan/ggml-vulkan.cpp} +2138 -887
  234. package/src/llama.cpp/ggml/src/{vulkan-shaders → ggml-vulkan/vulkan-shaders}/CMakeLists.txt +3 -1
  235. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +593 -0
  236. package/src/llama.cpp/ggml/src/ggml.c +4427 -20125
  237. package/src/llama.cpp/include/llama-cpp.h +25 -0
  238. package/src/llama.cpp/include/llama.h +93 -52
  239. package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +112 -0
  240. package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +46 -0
  241. package/src/llama.cpp/pocs/CMakeLists.txt +3 -1
  242. package/src/llama.cpp/pocs/vdot/CMakeLists.txt +2 -2
  243. package/src/llama.cpp/pocs/vdot/q8dot.cpp +4 -3
  244. package/src/llama.cpp/pocs/vdot/vdot.cpp +8 -7
  245. package/src/llama.cpp/src/CMakeLists.txt +4 -8
  246. package/src/llama.cpp/src/llama-grammar.cpp +15 -15
  247. package/src/llama.cpp/src/llama-grammar.h +2 -5
  248. package/src/llama.cpp/src/llama-sampling.cpp +779 -194
  249. package/src/llama.cpp/src/llama-sampling.h +21 -2
  250. package/src/llama.cpp/src/llama-vocab.cpp +55 -10
  251. package/src/llama.cpp/src/llama-vocab.h +35 -11
  252. package/src/llama.cpp/src/llama.cpp +4317 -2979
  253. package/src/llama.cpp/src/unicode-data.cpp +2 -2
  254. package/src/llama.cpp/src/unicode.cpp +62 -51
  255. package/src/llama.cpp/src/unicode.h +9 -10
  256. package/src/llama.cpp/tests/CMakeLists.txt +48 -38
  257. package/src/llama.cpp/tests/test-arg-parser.cpp +15 -15
  258. package/src/llama.cpp/tests/test-backend-ops.cpp +324 -80
  259. package/src/llama.cpp/tests/test-barrier.cpp +1 -0
  260. package/src/llama.cpp/tests/test-chat-template.cpp +59 -9
  261. package/src/llama.cpp/tests/test-gguf.cpp +1303 -0
  262. package/src/llama.cpp/tests/test-grammar-integration.cpp +3 -6
  263. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +17 -4
  264. package/src/llama.cpp/tests/test-llama-grammar.cpp +2 -4
  265. package/src/llama.cpp/tests/test-log.cpp +2 -2
  266. package/src/llama.cpp/tests/test-opt.cpp +853 -142
  267. package/src/llama.cpp/tests/test-quantize-fns.cpp +24 -21
  268. package/src/llama.cpp/tests/test-quantize-perf.cpp +16 -14
  269. package/src/llama.cpp/tests/test-rope.cpp +62 -20
  270. package/src/llama.cpp/tests/test-sampling.cpp +163 -138
  271. package/src/llama.cpp/tests/test-tokenizer-0.cpp +7 -7
  272. package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +5 -5
  273. package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +5 -5
  274. package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +0 -72
  275. package/src/llama.cpp/.github/workflows/nix-ci.yml +0 -79
  276. package/src/llama.cpp/.github/workflows/nix-flake-update.yml +0 -22
  277. package/src/llama.cpp/.github/workflows/nix-publish-flake.yml +0 -36
  278. package/src/llama.cpp/common/train.cpp +0 -1515
  279. package/src/llama.cpp/common/train.h +0 -233
  280. package/src/llama.cpp/examples/baby-llama/CMakeLists.txt +0 -5
  281. package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +0 -1639
  282. package/src/llama.cpp/ggml/src/ggml-aarch64.h +0 -39
  283. package/src/llama.cpp/ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp +0 -600
  284. package/src/llama.cpp/tests/test-grad0.cpp +0 -1683
  285. /package/src/llama.cpp/ggml/{cmake → src/ggml-cpu/cmake}/FindSIMD.cmake +0 -0
  286. /package/src/llama.cpp/ggml/src/{llamafile → ggml-cpu/llamafile}/sgemm.h +0 -0
@@ -2311,7 +2311,7 @@ const std::unordered_set<uint32_t> unicode_set_whitespace = {
2311
2311
  0x003000,
2312
2312
  };
2313
2313
 
2314
- // list is always in ascending order, to enable binary searh
2314
+ // list is always in ascending order, to enable binary search
2315
2315
  const std::initializer_list<std::pair<uint32_t, uint32_t>> unicode_map_lowercase = {
2316
2316
  {0x000041, 0x000061},
2317
2317
  {0x000042, 0x000062},
@@ -3748,7 +3748,7 @@ const std::initializer_list<std::pair<uint32_t, uint32_t>> unicode_map_lowercase
3748
3748
  {0x01E921, 0x01E943},
3749
3749
  };
3750
3750
 
3751
- // list is always in ascending order, to enable binary searh
3751
+ // list is always in ascending order, to enable binary search
3752
3752
  const std::initializer_list<std::pair<uint32_t, uint32_t>> unicode_map_uppercase = {
3753
3753
  {0x000061, 0x000041},
3754
3754
  {0x000062, 0x000042},
@@ -71,15 +71,15 @@ uint32_t unicode_cpt_from_utf8(const std::string & utf8, size_t & offset) {
71
71
  throw std::invalid_argument("failed to convert utf8 to codepoint");
72
72
  }
73
73
 
74
- //static std::vector<uint16_t> unicode_cpt_to_utf16(uint32_t cp) {
74
+ //static std::vector<uint16_t> unicode_cpt_to_utf16(uint32_t cpt) {
75
75
  // std::vector<uint16_t> result;
76
- // if (/* 0x0000 <= cp && */ cp <= 0xffff) {
77
- // result.emplace_back(cp);
76
+ // if (/* 0x0000 <= cpt && */ cpt <= 0xffff) {
77
+ // result.emplace_back(cpt);
78
78
  // return result;
79
79
  // }
80
- // if (0x10000 <= cp && cp <= 0x10ffff) {
81
- // result.emplace_back(0xd800 | ((cp - 0x10000) >> 10));
82
- // result.emplace_back(0xdc00 | ((cp - 0x10000) & 0x03ff));
80
+ // if (0x10000 <= cpt && cpt <= 0x10ffff) {
81
+ // result.emplace_back(0xd800 | ((cpt - 0x10000) >> 10));
82
+ // result.emplace_back(0xdc00 | ((cpt - 0x10000) & 0x03ff));
83
83
  // return result;
84
84
  // }
85
85
  // throw std::invalid_argument("failed to convert codepoint to utf16");
@@ -120,8 +120,8 @@ uint32_t unicode_cpt_from_utf8(const std::string & utf8, size_t & offset) {
120
120
  // return result;
121
121
  //}
122
122
 
123
- static std::vector<codepoint_flags> unicode_cpt_flags_array() {
124
- std::vector<codepoint_flags> cpt_flags(MAX_CODEPOINTS, codepoint_flags::UNDEFINED);
123
+ static std::vector<unicode_cpt_flags> unicode_cpt_flags_array() {
124
+ std::vector<unicode_cpt_flags> cpt_flags(MAX_CODEPOINTS, unicode_cpt_flags::UNDEFINED);
125
125
 
126
126
  assert (unicode_ranges_flags.begin()[0].first == 0);
127
127
  assert (unicode_ranges_flags.begin()[unicode_ranges_flags.size()-1].first == MAX_CODEPOINTS);
@@ -201,7 +201,18 @@ static std::unordered_map<std::string, uint8_t> unicode_utf8_to_byte_map() {
201
201
  }
202
202
 
203
203
  static inline std::wstring unicode_wstring_from_utf8(const std::string & s) {
204
+ #if defined(__clang__)
205
+ // disable C++17 deprecation warning for std::codecvt_utf8
206
+ # pragma clang diagnostic push
207
+ # pragma clang diagnostic ignored "-Wdeprecated-declarations"
208
+ #endif
209
+
204
210
  std::wstring_convert<std::codecvt_utf8<wchar_t>> conv;
211
+
212
+ #if defined(__clang__)
213
+ # pragma clang diagnostic pop
214
+ #endif
215
+
205
216
  return conv.from_bytes(s);
206
217
  }
207
218
 
@@ -242,8 +253,8 @@ static std::vector<size_t> unicode_regex_split_custom_gpt2(const std::string & t
242
253
  return (offset_ini <= pos && pos < offset_end) ? cpts[pos] : OUT_OF_RANGE;
243
254
  };
244
255
 
245
- auto _get_flags = [&] (const size_t pos) -> codepoint_flags {
246
- return (offset_ini <= pos && pos < offset_end) ? unicode_cpt_flags(cpts[pos]) : codepoint_flags{};
256
+ auto _get_flags = [&] (const size_t pos) -> unicode_cpt_flags {
257
+ return (offset_ini <= pos && pos < offset_end) ? unicode_cpt_flags_from_cpt(cpts[pos]) : unicode_cpt_flags{};
247
258
  };
248
259
 
249
260
  size_t _prev_end = offset_ini;
@@ -360,8 +371,8 @@ static std::vector<size_t> unicode_regex_split_custom_llama3(const std::string &
360
371
  return (offset_ini <= pos && pos < offset_end) ? cpts[pos] : OUT_OF_RANGE;
361
372
  };
362
373
 
363
- auto _get_flags = [&] (const size_t pos) -> codepoint_flags {
364
- return (offset_ini <= pos && pos < offset_end) ? unicode_cpt_flags(cpts[pos]) : codepoint_flags{};
374
+ auto _get_flags = [&] (const size_t pos) -> unicode_cpt_flags {
375
+ return (offset_ini <= pos && pos < offset_end) ? unicode_cpt_flags_from_cpt(cpts[pos]) : unicode_cpt_flags{};
365
376
  };
366
377
 
367
378
  size_t _prev_end = offset_ini;
@@ -561,29 +572,29 @@ static std::vector<size_t> unicode_regex_split_custom(const std::string & text,
561
572
  // interface
562
573
  //
563
574
 
564
- std::string unicode_cpt_to_utf8(uint32_t cp) {
575
+ std::string unicode_cpt_to_utf8(uint32_t cpt) {
565
576
  std::string result;
566
577
 
567
- if (/* 0x00 <= cp && */ cp <= 0x7f) {
568
- result.push_back(cp);
578
+ if (/* 0x00 <= cpt && */ cpt <= 0x7f) {
579
+ result.push_back(cpt);
569
580
  return result;
570
581
  }
571
- if (0x80 <= cp && cp <= 0x7ff) {
572
- result.push_back(0xc0 | ((cp >> 6) & 0x1f));
573
- result.push_back(0x80 | (cp & 0x3f));
582
+ if (0x80 <= cpt && cpt <= 0x7ff) {
583
+ result.push_back(0xc0 | ((cpt >> 6) & 0x1f));
584
+ result.push_back(0x80 | (cpt & 0x3f));
574
585
  return result;
575
586
  }
576
- if (0x800 <= cp && cp <= 0xffff) {
577
- result.push_back(0xe0 | ((cp >> 12) & 0x0f));
578
- result.push_back(0x80 | ((cp >> 6) & 0x3f));
579
- result.push_back(0x80 | (cp & 0x3f));
587
+ if (0x800 <= cpt && cpt <= 0xffff) {
588
+ result.push_back(0xe0 | ((cpt >> 12) & 0x0f));
589
+ result.push_back(0x80 | ((cpt >> 6) & 0x3f));
590
+ result.push_back(0x80 | (cpt & 0x3f));
580
591
  return result;
581
592
  }
582
- if (0x10000 <= cp && cp <= 0x10ffff) {
583
- result.push_back(0xf0 | ((cp >> 18) & 0x07));
584
- result.push_back(0x80 | ((cp >> 12) & 0x3f));
585
- result.push_back(0x80 | ((cp >> 6) & 0x3f));
586
- result.push_back(0x80 | (cp & 0x3f));
593
+ if (0x10000 <= cpt && cpt <= 0x10ffff) {
594
+ result.push_back(0xf0 | ((cpt >> 18) & 0x07));
595
+ result.push_back(0x80 | ((cpt >> 12) & 0x3f));
596
+ result.push_back(0x80 | ((cpt >> 6) & 0x3f));
597
+ result.push_back(0x80 | (cpt & 0x3f));
587
598
  return result;
588
599
  }
589
600
 
@@ -613,19 +624,19 @@ std::vector<uint32_t> unicode_cpts_from_utf8(const std::string & utf8) {
613
624
  return result;
614
625
  }
615
626
 
616
- codepoint_flags unicode_cpt_flags(const uint32_t cp) {
617
- static const codepoint_flags undef(codepoint_flags::UNDEFINED);
627
+ unicode_cpt_flags unicode_cpt_flags_from_cpt(const uint32_t cpt) {
628
+ static const unicode_cpt_flags undef(unicode_cpt_flags::UNDEFINED);
618
629
  static const auto cpt_flags = unicode_cpt_flags_array();
619
- return cp < cpt_flags.size() ? cpt_flags[cp] : undef;
630
+ return cpt < cpt_flags.size() ? cpt_flags[cpt] : undef;
620
631
  }
621
632
 
622
- codepoint_flags unicode_cpt_flags(const std::string & utf8) {
623
- static const codepoint_flags undef(codepoint_flags::UNDEFINED);
633
+ unicode_cpt_flags unicode_cpt_flags_from_utf8(const std::string & utf8) {
634
+ static const unicode_cpt_flags undef(unicode_cpt_flags::UNDEFINED);
624
635
  if (utf8.empty()) {
625
636
  return undef; // undefined
626
637
  }
627
638
  size_t offset = 0;
628
- return unicode_cpt_flags(unicode_cpt_from_utf8(utf8, offset));
639
+ return unicode_cpt_flags_from_cpt(unicode_cpt_from_utf8(utf8, offset));
629
640
  }
630
641
 
631
642
  std::string unicode_byte_to_utf8(uint8_t byte) {
@@ -638,41 +649,41 @@ uint8_t unicode_utf8_to_byte(const std::string & utf8) {
638
649
  return map.at(utf8);
639
650
  }
640
651
 
641
- uint32_t unicode_tolower(uint32_t cp) {
652
+ uint32_t unicode_tolower(uint32_t cpt) {
642
653
  // binary search
643
- auto it = std::lower_bound(unicode_map_lowercase.begin(), unicode_map_lowercase.end(), cp,
654
+ auto it = std::lower_bound(unicode_map_lowercase.begin(), unicode_map_lowercase.end(), cpt,
644
655
  [](const std::pair<uint32_t, uint32_t> & pair, uint32_t value) {
645
656
  return pair.first < value;
646
657
  });
647
- if (it != unicode_map_lowercase.end() && it->first == cp) {
658
+ if (it != unicode_map_lowercase.end() && it->first == cpt) {
648
659
  return it->second;
649
660
  }
650
- return cp; // Return the original code point if no lowercase mapping is found
661
+ return cpt; // Return the original code point if no lowercase mapping is found
651
662
  }
652
663
 
653
664
  std::vector<std::string> unicode_regex_split(const std::string & text, const std::vector<std::string> & regex_exprs) {
654
665
  // unicode categories
655
666
  static const std::map<std::string, int> k_ucat_enum = {
656
- { "\\p{N}", codepoint_flags::NUMBER },
657
- { "\\p{L}", codepoint_flags::LETTER },
658
- { "\\p{P}", codepoint_flags::PUNCTUATION },
667
+ { "\\p{N}", unicode_cpt_flags::NUMBER },
668
+ { "\\p{L}", unicode_cpt_flags::LETTER },
669
+ { "\\p{P}", unicode_cpt_flags::PUNCTUATION },
659
670
  };
660
671
 
661
672
  static const std::map<int, int> k_ucat_cpt = {
662
- { codepoint_flags::NUMBER, 0xD1 },
663
- { codepoint_flags::LETTER, 0xD2 },
664
- { codepoint_flags::PUNCTUATION, 0xD3 },
673
+ { unicode_cpt_flags::NUMBER, 0xD1 },
674
+ { unicode_cpt_flags::LETTER, 0xD2 },
675
+ { unicode_cpt_flags::PUNCTUATION, 0xD3 },
665
676
  };
666
677
 
667
678
  static const std::map<int, std::string> k_ucat_map = {
668
- { codepoint_flags::NUMBER, "\x30-\x39" }, // 0-9
669
- { codepoint_flags::LETTER, "\x41-\x5A\x61-\x7A" }, // A-Za-z
670
- { codepoint_flags::PUNCTUATION, "\x21-\x23\x25-\x2A\x2C-\x2F\x3A-\x3B\x3F-\x40\\\x5B-\\\x5D\x5F\\\x7B\\\x7D" }, // !-#%-*,-/:-;?-@\[-\]_\{\}
679
+ { unicode_cpt_flags::NUMBER, "\x30-\x39" }, // 0-9
680
+ { unicode_cpt_flags::LETTER, "\x41-\x5A\x61-\x7A" }, // A-Za-z
681
+ { unicode_cpt_flags::PUNCTUATION, "\x21-\x23\x25-\x2A\x2C-\x2F\x3A-\x3B\x3F-\x40\\\x5B-\\\x5D\x5F\\\x7B\\\x7D" }, // !-#%-*,-/:-;?-@\[-\]_\{\}
671
682
  };
672
683
 
673
684
  // compute collapsed codepoints only if needed by at least one regex
674
685
  bool need_collapse = false;
675
- for (auto & regex_expr : regex_exprs) {
686
+ for (const auto & regex_expr : regex_exprs) {
676
687
  // search for unicode categories
677
688
  for (const auto & ucat : k_ucat_enum) {
678
689
  if (std::string::npos != regex_expr.find(ucat.first)) {
@@ -698,7 +709,7 @@ std::vector<std::string> unicode_regex_split(const std::string & text, const std
698
709
  continue;
699
710
  }
700
711
 
701
- const auto flags = unicode_cpt_flags(cpts[i]);
712
+ const auto flags = unicode_cpt_flags_from_cpt(cpts[i]);
702
713
 
703
714
  if (flags.is_whitespace) {
704
715
  //NOTE: C++ std::regex \s does not mach 0x85, Rust and Python regex does.
@@ -714,7 +725,7 @@ std::vector<std::string> unicode_regex_split(const std::string & text, const std
714
725
 
715
726
  std::vector<size_t> bpe_offsets = { cpts.size() };
716
727
 
717
- for (auto & regex_expr : regex_exprs) {
728
+ for (const auto & regex_expr : regex_exprs) {
718
729
  // first, see if we have an efficient custom regex implementation
719
730
  auto tmp = unicode_regex_split_custom(text, regex_expr, bpe_offsets);
720
731
 
@@ -728,7 +739,7 @@ std::vector<std::string> unicode_regex_split(const std::string & text, const std
728
739
  // if a unicode category is used in the regex, we use the collapsed text and replace the unicode category
729
740
  // with the corresponding collapsed representation
730
741
  bool use_collapsed = false;
731
- for (auto & ucat : k_ucat_enum) {
742
+ for (const auto & ucat : k_ucat_enum) {
732
743
  if (std::string::npos != regex_expr.find(ucat.first)) {
733
744
  use_collapsed = true;
734
745
  break;
@@ -794,7 +805,7 @@ std::vector<std::string> unicode_regex_split(const std::string & text, const std
794
805
  // std::wregex \s does not mach non-ASCII whitespaces, using 0x0B as fallback
795
806
  std::wstring wtext(cpts.begin(), cpts.end());
796
807
  for (size_t i = 0; i < wtext.size(); ++i) {
797
- if (wtext[i] > 0x7F && unicode_cpt_flags(wtext[i]).is_whitespace) {
808
+ if (wtext[i] > 0x7F && unicode_cpt_flags_from_cpt(wtext[i]).is_whitespace) {
798
809
  wtext[i] = 0x0B;
799
810
  }
800
811
  }
@@ -4,9 +4,7 @@
4
4
  #include <string>
5
5
  #include <vector>
6
6
 
7
- // TODO: prefix all symbols with "llama_"
8
-
9
- struct codepoint_flags {
7
+ struct unicode_cpt_flags {
10
8
  enum {
11
9
  UNDEFINED = 0x0001,
12
10
  NUMBER = 0x0002, // regex: \p{N}
@@ -35,7 +33,7 @@ struct codepoint_flags {
35
33
  uint16_t is_nfd : 1;
36
34
 
37
35
  // decode from uint16
38
- inline codepoint_flags(const uint16_t flags=0) {
36
+ inline unicode_cpt_flags(const uint16_t flags = 0) {
39
37
  *reinterpret_cast<uint16_t*>(this) = flags;
40
38
  }
41
39
 
@@ -50,18 +48,19 @@ struct codepoint_flags {
50
48
 
51
49
  size_t unicode_len_utf8(char src);
52
50
 
53
- std::string unicode_cpt_to_utf8(uint32_t cp);
54
- uint32_t unicode_cpt_from_utf8(const std::string & utf8, size_t & offset);
51
+ std::string unicode_cpt_to_utf8 (uint32_t cpt);
52
+ uint32_t unicode_cpt_from_utf8(const std::string & utf8, size_t & offset);
53
+
55
54
  std::vector<uint32_t> unicode_cpts_from_utf8(const std::string & utf8);
56
55
 
57
56
  std::vector<uint32_t> unicode_cpts_normalize_nfd(const std::vector<uint32_t> & cpts);
58
57
 
59
- codepoint_flags unicode_cpt_flags(const uint32_t cp);
60
- codepoint_flags unicode_cpt_flags(const std::string & utf8);
58
+ unicode_cpt_flags unicode_cpt_flags_from_cpt (uint32_t cpt);
59
+ unicode_cpt_flags unicode_cpt_flags_from_utf8(const std::string & utf8);
61
60
 
62
61
  std::string unicode_byte_to_utf8(uint8_t byte);
63
- uint8_t unicode_utf8_to_byte(const std::string & utf8);
62
+ uint8_t unicode_utf8_to_byte(const std::string & utf8);
64
63
 
65
- uint32_t unicode_tolower(uint32_t cp);
64
+ uint32_t unicode_tolower(uint32_t cpt);
66
65
 
67
66
  std::vector<std::string> unicode_regex_split(const std::string & text, const std::vector<std::string> & regex_exprs);
@@ -84,56 +84,66 @@ llama_test(test-tokenizer-0 NAME test-tokenizer-0-qwen2 ARGS ${CMAKE
84
84
  llama_test(test-tokenizer-0 NAME test-tokenizer-0-refact ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-refact.gguf)
85
85
  llama_test(test-tokenizer-0 NAME test-tokenizer-0-starcoder ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-starcoder.gguf)
86
86
 
87
- # build test-tokenizer-1-bpe target once and add many tests
88
- add_executable(test-tokenizer-1-bpe test-tokenizer-1-bpe.cpp)
89
- target_link_libraries(test-tokenizer-1-bpe PRIVATE common)
90
- install(TARGETS test-tokenizer-1-bpe RUNTIME)
91
-
92
- # TODO: disabled due to slowness
93
- #llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-aquila ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-aquila.gguf)
94
- #llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-falcon ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-falcon.gguf)
95
- #llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-gpt-2 ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-gpt-2.gguf)
96
- #llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-gpt-neox ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-gpt-neox.gguf)
97
- #llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-llama-bpe ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama-bpe.gguf --ignore-merges)
98
- #llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-mpt ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-mpt.gguf)
99
- #llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-refact ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-refact.gguf)
100
- #llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-starcoder ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-starcoder.gguf)
101
-
102
- # build test-tokenizer-1-spm target once and add many tests
103
- add_executable(test-tokenizer-1-spm test-tokenizer-1-spm.cpp)
104
- target_link_libraries(test-tokenizer-1-spm PRIVATE common)
105
- install(TARGETS test-tokenizer-1-spm RUNTIME)
106
-
107
- llama_test(test-tokenizer-1-spm NAME test-tokenizer-1-llama-spm ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama-spm.gguf)
108
- #llama_test(test-tokenizer-1-spm NAME test-tokenizer-1-baichuan ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-baichuan.gguf)
109
-
110
- # llama_target_and_test(test-double-float.cpp) # SLOW
87
+
88
+ if (NOT WIN32)
89
+ # these tests are disabled on Windows because they use internal functions not exported with LLAMA_API
90
+ llama_target_and_test(test-sampling.cpp)
91
+ llama_target_and_test(test-grammar-parser.cpp)
92
+ llama_target_and_test(test-grammar-integration.cpp)
93
+ llama_target_and_test(test-llama-grammar.cpp)
94
+ # TODO: disabled on loongarch64 because the ggml-ci node lacks Python 3.8
95
+ if (NOT ${CMAKE_SYSTEM_PROCESSOR} MATCHES "loongarch64")
96
+ llama_target_and_test(test-json-schema-to-grammar.cpp WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/..)
97
+ target_include_directories(test-json-schema-to-grammar PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../examples/server)
98
+ endif()
99
+
100
+
101
+ # build test-tokenizer-1-bpe target once and add many tests
102
+ add_executable(test-tokenizer-1-bpe test-tokenizer-1-bpe.cpp)
103
+ target_link_libraries(test-tokenizer-1-bpe PRIVATE common)
104
+ install(TARGETS test-tokenizer-1-bpe RUNTIME)
105
+
106
+ # TODO: disabled due to slowness
107
+ #llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-aquila ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-aquila.gguf)
108
+ #llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-falcon ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-falcon.gguf)
109
+ #llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-gpt-2 ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-gpt-2.gguf)
110
+ #llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-gpt-neox ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-gpt-neox.gguf)
111
+ #llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-llama-bpe ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama-bpe.gguf --ignore-merges)
112
+ #llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-mpt ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-mpt.gguf)
113
+ #llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-refact ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-refact.gguf)
114
+ #llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-starcoder ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-starcoder.gguf)
115
+
116
+ # build test-tokenizer-1-spm target once and add many tests
117
+ add_executable(test-tokenizer-1-spm test-tokenizer-1-spm.cpp)
118
+ target_link_libraries(test-tokenizer-1-spm PRIVATE common)
119
+ install(TARGETS test-tokenizer-1-spm RUNTIME)
120
+
121
+ llama_test(test-tokenizer-1-spm NAME test-tokenizer-1-llama-spm ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama-spm.gguf)
122
+ #llama_test(test-tokenizer-1-spm NAME test-tokenizer-1-baichuan ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-baichuan.gguf)
123
+
124
+ # llama_target_and_test(test-double-float.cpp) # SLOW
125
+ endif()
126
+
111
127
  llama_target_and_test(test-log.cpp)
112
128
  llama_target_and_test(test-arg-parser.cpp)
113
- llama_target_and_test(test-quantize-fns.cpp)
114
- llama_target_and_test(test-quantize-perf.cpp)
115
- llama_target_and_test(test-sampling.cpp)
116
129
  llama_target_and_test(test-chat-template.cpp)
117
130
 
118
- llama_target_and_test(test-grammar-parser.cpp)
119
- llama_target_and_test(test-llama-grammar.cpp)
120
- llama_target_and_test(test-grammar-integration.cpp)
121
- llama_target_and_test(test-grad0.cpp)
122
- llama_target_and_test(test-barrier.cpp)
123
131
  # llama_target_and_test(test-opt.cpp) # SLOW
132
+ llama_target_and_test(test-gguf.cpp)
124
133
  llama_target_and_test(test-backend-ops.cpp)
125
134
 
126
- llama_target_and_test(test-rope.cpp)
127
-
128
135
  llama_target_and_test(test-model-load-cancel.cpp LABEL "model")
129
136
  llama_target_and_test(test-autorelease.cpp LABEL "model")
130
137
 
131
- # TODO: disabled on loongarch64 because the ggml-ci node lacks Python 3.8
132
- if (NOT ${CMAKE_SYSTEM_PROCESSOR} MATCHES "loongarch64")
133
- llama_target_and_test(test-json-schema-to-grammar.cpp WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/..)
134
- target_include_directories(test-json-schema-to-grammar PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../examples/server)
138
+ if (NOT GGML_BACKEND_DL)
139
+ # these tests use the backends directly and cannot be built with dynamic loading
140
+ llama_target_and_test(test-barrier.cpp)
141
+ llama_target_and_test(test-quantize-fns.cpp)
142
+ llama_target_and_test(test-quantize-perf.cpp)
143
+ llama_target_and_test(test-rope.cpp)
135
144
  endif()
136
145
 
146
+
137
147
  # dummy executable - not installed
138
148
  get_filename_component(TEST_TARGET test-c.c NAME_WE)
139
149
  add_executable(${TEST_TARGET} test-c.c)
@@ -10,12 +10,12 @@
10
10
  #include <cassert>
11
11
 
12
12
  int main(void) {
13
- gpt_params params;
13
+ common_params params;
14
14
 
15
15
  printf("test-arg-parser: make sure there is no duplicated arguments in any examples\n\n");
16
16
  for (int ex = 0; ex < LLAMA_EXAMPLE_COUNT; ex++) {
17
17
  try {
18
- auto ctx_arg = gpt_params_parser_init(params, (enum llama_example)ex);
18
+ auto ctx_arg = common_params_parser_init(params, (enum llama_example)ex);
19
19
  std::unordered_set<std::string> seen_args;
20
20
  std::unordered_set<std::string> seen_env_vars;
21
21
  for (const auto & opt : ctx_arg.options) {
@@ -58,45 +58,45 @@ int main(void) {
58
58
 
59
59
  // missing value
60
60
  argv = {"binary_name", "-m"};
61
- assert(false == gpt_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
61
+ assert(false == common_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
62
62
 
63
63
  // wrong value (int)
64
64
  argv = {"binary_name", "-ngl", "hello"};
65
- assert(false == gpt_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
65
+ assert(false == common_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
66
66
 
67
67
  // wrong value (enum)
68
68
  argv = {"binary_name", "-sm", "hello"};
69
- assert(false == gpt_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
69
+ assert(false == common_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
70
70
 
71
71
  // non-existence arg in specific example (--draft cannot be used outside llama-speculative)
72
72
  argv = {"binary_name", "--draft", "123"};
73
- assert(false == gpt_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_SERVER));
73
+ assert(false == common_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_EMBEDDING));
74
74
 
75
75
 
76
76
  printf("test-arg-parser: test valid usage\n\n");
77
77
 
78
78
  argv = {"binary_name", "-m", "model_file.gguf"};
79
- assert(true == gpt_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
79
+ assert(true == common_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
80
80
  assert(params.model == "model_file.gguf");
81
81
 
82
82
  argv = {"binary_name", "-t", "1234"};
83
- assert(true == gpt_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
83
+ assert(true == common_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
84
84
  assert(params.cpuparams.n_threads == 1234);
85
85
 
86
86
  argv = {"binary_name", "--verbose"};
87
- assert(true == gpt_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
87
+ assert(true == common_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
88
88
  assert(params.verbosity > 1);
89
89
 
90
90
  argv = {"binary_name", "-m", "abc.gguf", "--predict", "6789", "--batch-size", "9090"};
91
- assert(true == gpt_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
91
+ assert(true == common_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
92
92
  assert(params.model == "abc.gguf");
93
93
  assert(params.n_predict == 6789);
94
94
  assert(params.n_batch == 9090);
95
95
 
96
96
  // --draft cannot be used outside llama-speculative
97
97
  argv = {"binary_name", "--draft", "123"};
98
- assert(true == gpt_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_SPECULATIVE));
99
- assert(params.n_draft == 123);
98
+ assert(true == common_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_SPECULATIVE));
99
+ assert(params.speculative.n_max == 123);
100
100
 
101
101
  // skip this part on windows, because setenv is not supported
102
102
  #ifdef _WIN32
@@ -106,12 +106,12 @@ int main(void) {
106
106
 
107
107
  setenv("LLAMA_ARG_THREADS", "blah", true);
108
108
  argv = {"binary_name"};
109
- assert(false == gpt_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
109
+ assert(false == common_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
110
110
 
111
111
  setenv("LLAMA_ARG_MODEL", "blah.gguf", true);
112
112
  setenv("LLAMA_ARG_THREADS", "1010", true);
113
113
  argv = {"binary_name"};
114
- assert(true == gpt_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
114
+ assert(true == common_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
115
115
  assert(params.model == "blah.gguf");
116
116
  assert(params.cpuparams.n_threads == 1010);
117
117
 
@@ -121,7 +121,7 @@ int main(void) {
121
121
  setenv("LLAMA_ARG_MODEL", "blah.gguf", true);
122
122
  setenv("LLAMA_ARG_THREADS", "1010", true);
123
123
  argv = {"binary_name", "-m", "overwritten.gguf"};
124
- assert(true == gpt_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
124
+ assert(true == common_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
125
125
  assert(params.model == "overwritten.gguf");
126
126
  assert(params.cpuparams.n_threads == 1010);
127
127
  #endif // _WIN32