@fugood/llama.node 0.3.16 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (281) hide show
  1. package/CMakeLists.txt +6 -1
  2. package/bin/darwin/arm64/llama-node.node +0 -0
  3. package/bin/darwin/x64/llama-node.node +0 -0
  4. package/bin/linux/arm64/llama-node.node +0 -0
  5. package/bin/linux/x64/llama-node.node +0 -0
  6. package/bin/linux-cuda/arm64/llama-node.node +0 -0
  7. package/bin/linux-cuda/x64/llama-node.node +0 -0
  8. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  9. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  10. package/bin/win32/arm64/llama-node.node +0 -0
  11. package/bin/win32/arm64/node.lib +0 -0
  12. package/bin/win32/x64/llama-node.node +0 -0
  13. package/bin/win32/x64/node.lib +0 -0
  14. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  15. package/bin/win32-vulkan/arm64/node.lib +0 -0
  16. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  17. package/bin/win32-vulkan/x64/node.lib +0 -0
  18. package/lib/binding.ts +44 -2
  19. package/lib/index.js +132 -1
  20. package/lib/index.ts +203 -3
  21. package/package.json +2 -1
  22. package/src/EmbeddingWorker.cpp +1 -1
  23. package/src/LlamaCompletionWorker.cpp +374 -19
  24. package/src/LlamaCompletionWorker.h +31 -10
  25. package/src/LlamaContext.cpp +216 -7
  26. package/src/LlamaContext.h +12 -0
  27. package/src/common.hpp +15 -0
  28. package/src/llama.cpp/.github/workflows/build-linux-cross.yml +233 -0
  29. package/src/llama.cpp/.github/workflows/build.yml +89 -767
  30. package/src/llama.cpp/.github/workflows/docker.yml +9 -6
  31. package/src/llama.cpp/.github/workflows/release.yml +716 -0
  32. package/src/llama.cpp/.github/workflows/server.yml +19 -23
  33. package/src/llama.cpp/CMakeLists.txt +11 -1
  34. package/src/llama.cpp/cmake/build-info.cmake +8 -2
  35. package/src/llama.cpp/cmake/x64-windows-llvm.cmake +0 -6
  36. package/src/llama.cpp/common/CMakeLists.txt +35 -4
  37. package/src/llama.cpp/common/arg.cpp +844 -121
  38. package/src/llama.cpp/common/arg.h +9 -0
  39. package/src/llama.cpp/common/chat.cpp +129 -107
  40. package/src/llama.cpp/common/chat.h +2 -0
  41. package/src/llama.cpp/common/common.cpp +64 -518
  42. package/src/llama.cpp/common/common.h +35 -45
  43. package/src/llama.cpp/common/json-schema-to-grammar.cpp +3 -0
  44. package/src/llama.cpp/common/llguidance.cpp +31 -47
  45. package/src/llama.cpp/common/minja/chat-template.hpp +23 -11
  46. package/src/llama.cpp/common/minja/minja.hpp +186 -127
  47. package/src/llama.cpp/common/regex-partial.cpp +204 -0
  48. package/src/llama.cpp/common/regex-partial.h +56 -0
  49. package/src/llama.cpp/common/sampling.cpp +60 -50
  50. package/src/llama.cpp/docs/build.md +122 -7
  51. package/src/llama.cpp/examples/CMakeLists.txt +2 -32
  52. package/src/llama.cpp/examples/batched/batched.cpp +1 -1
  53. package/src/llama.cpp/examples/embedding/embedding.cpp +9 -12
  54. package/src/llama.cpp/examples/gritlm/gritlm.cpp +1 -1
  55. package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +1 -0
  56. package/src/llama.cpp/examples/parallel/parallel.cpp +89 -15
  57. package/src/llama.cpp/examples/passkey/passkey.cpp +1 -1
  58. package/src/llama.cpp/examples/speculative/speculative.cpp +1 -1
  59. package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +1 -1
  60. package/src/llama.cpp/examples/sycl/build.sh +2 -2
  61. package/src/llama.cpp/examples/sycl/win-build-sycl.bat +2 -2
  62. package/src/llama.cpp/examples/training/CMakeLists.txt +5 -0
  63. package/src/llama.cpp/examples/training/finetune.cpp +96 -0
  64. package/src/llama.cpp/ggml/CMakeLists.txt +35 -2
  65. package/src/llama.cpp/ggml/cmake/GitVars.cmake +22 -0
  66. package/src/llama.cpp/ggml/include/ggml-backend.h +4 -4
  67. package/src/llama.cpp/ggml/include/ggml-cpp.h +1 -1
  68. package/src/llama.cpp/ggml/include/ggml-cpu.h +5 -0
  69. package/src/llama.cpp/ggml/include/ggml-opt.h +47 -28
  70. package/src/llama.cpp/ggml/include/ggml-rpc.h +6 -1
  71. package/src/llama.cpp/ggml/include/ggml.h +76 -106
  72. package/src/llama.cpp/ggml/src/CMakeLists.txt +11 -8
  73. package/src/llama.cpp/ggml/src/ggml-alloc.c +4 -1
  74. package/src/llama.cpp/ggml/src/ggml-backend.cpp +9 -5
  75. package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +0 -2
  76. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +8 -4
  77. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +5 -5
  78. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +692 -1534
  79. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +613 -122
  80. package/src/llama.cpp/ggml/src/ggml-cann/common.h +135 -1
  81. package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +507 -137
  82. package/src/llama.cpp/ggml/src/ggml-common.h +12 -6
  83. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +66 -33
  84. package/src/llama.cpp/ggml/src/ggml-cpu/binary-ops.cpp +158 -0
  85. package/src/llama.cpp/ggml/src/ggml-cpu/binary-ops.h +16 -0
  86. package/src/llama.cpp/ggml/src/ggml-cpu/common.h +72 -0
  87. package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +1 -1
  88. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +896 -194
  89. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +2 -21
  90. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +1060 -410
  91. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +1008 -13533
  92. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +31 -16
  93. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +90 -12
  94. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +47 -13
  95. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +266 -72
  96. package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1034 -88
  97. package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +8796 -0
  98. package/src/llama.cpp/ggml/src/ggml-cpu/ops.h +110 -0
  99. package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +892 -0
  100. package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.cpp +186 -0
  101. package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.h +28 -0
  102. package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +252 -0
  103. package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +802 -0
  104. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +23 -4
  105. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +7 -0
  106. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +1 -0
  107. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +0 -4
  108. package/src/llama.cpp/ggml/src/ggml-impl.h +52 -18
  109. package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +106 -14
  110. package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +67 -119
  111. package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +1023 -262
  112. package/src/llama.cpp/ggml/src/ggml-opt.cpp +368 -190
  113. package/src/llama.cpp/ggml/src/ggml-quants.c +0 -6
  114. package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +307 -40
  115. package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +125 -45
  116. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +10 -8
  117. package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +239 -0
  118. package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.hpp +39 -0
  119. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +0 -35
  120. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +9 -307
  121. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +72 -25
  122. package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +14 -7
  123. package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +59 -21
  124. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +7 -1
  125. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +79 -90
  126. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +944 -438
  127. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +22 -23
  128. package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +37 -8
  129. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +24 -20
  130. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +1 -4
  131. package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +507 -411
  132. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +84 -74
  133. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +1 -3
  134. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +185 -89
  135. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +37 -49
  136. package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +7 -22
  137. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +4 -14
  138. package/src/llama.cpp/ggml/src/ggml-sycl/quants.hpp +83 -0
  139. package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +204 -118
  140. package/src/llama.cpp/ggml/src/ggml-sycl/rope.hpp +1 -3
  141. package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +128 -53
  142. package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +83 -49
  143. package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +1278 -282
  144. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +32 -0
  145. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +133 -30
  146. package/src/llama.cpp/ggml/src/ggml.c +170 -265
  147. package/src/llama.cpp/ggml/src/gguf.cpp +34 -33
  148. package/src/llama.cpp/include/llama.h +82 -22
  149. package/src/llama.cpp/models/ggml-vocab-llama4.gguf.inp +112 -0
  150. package/src/llama.cpp/models/ggml-vocab-llama4.gguf.out +46 -0
  151. package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +112 -0
  152. package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.out +46 -0
  153. package/src/llama.cpp/requirements/requirements-all.txt +5 -3
  154. package/src/llama.cpp/requirements/requirements-gguf_editor_gui.txt +3 -0
  155. package/src/llama.cpp/scripts/xxd.cmake +1 -1
  156. package/src/llama.cpp/src/CMakeLists.txt +4 -2
  157. package/src/llama.cpp/src/llama-adapter.cpp +43 -1
  158. package/src/llama.cpp/src/llama-arch.cpp +163 -17
  159. package/src/llama.cpp/src/llama-arch.h +16 -0
  160. package/src/llama.cpp/src/llama-batch.cpp +5 -1
  161. package/src/llama.cpp/src/llama-batch.h +2 -1
  162. package/src/llama.cpp/src/llama-chat.cpp +91 -16
  163. package/src/llama.cpp/src/llama-chat.h +7 -2
  164. package/src/llama.cpp/src/llama-context.cpp +479 -575
  165. package/src/llama.cpp/src/llama-context.h +44 -33
  166. package/src/llama.cpp/src/llama-cparams.h +1 -0
  167. package/src/llama.cpp/src/llama-graph.cpp +209 -157
  168. package/src/llama.cpp/src/llama-graph.h +38 -14
  169. package/src/llama.cpp/src/llama-hparams.h +13 -0
  170. package/src/llama.cpp/src/llama-kv-cache.cpp +1604 -543
  171. package/src/llama.cpp/src/llama-kv-cache.h +283 -171
  172. package/src/llama.cpp/src/llama-memory.h +12 -2
  173. package/src/llama.cpp/src/llama-mmap.cpp +1 -1
  174. package/src/llama.cpp/src/llama-model-loader.cpp +34 -20
  175. package/src/llama.cpp/src/llama-model-loader.h +5 -3
  176. package/src/llama.cpp/src/llama-model-saver.cpp +281 -0
  177. package/src/llama.cpp/src/llama-model-saver.h +37 -0
  178. package/src/llama.cpp/src/llama-model.cpp +1803 -330
  179. package/src/llama.cpp/src/llama-model.h +21 -2
  180. package/src/llama.cpp/src/llama-quant.cpp +33 -10
  181. package/src/llama.cpp/src/llama-sampling.cpp +25 -7
  182. package/src/llama.cpp/src/llama-vocab.cpp +86 -10
  183. package/src/llama.cpp/src/llama-vocab.h +6 -0
  184. package/src/llama.cpp/src/llama.cpp +15 -1
  185. package/src/llama.cpp/tests/CMakeLists.txt +52 -31
  186. package/src/llama.cpp/tests/test-arg-parser.cpp +51 -4
  187. package/src/llama.cpp/tests/test-backend-ops.cpp +189 -90
  188. package/src/llama.cpp/tests/test-chat-template.cpp +26 -6
  189. package/src/llama.cpp/tests/test-chat.cpp +15 -3
  190. package/src/llama.cpp/{examples/gbnf-validator/gbnf-validator.cpp → tests/test-gbnf-validator.cpp} +2 -2
  191. package/src/llama.cpp/tests/test-grammar-integration.cpp +3 -2
  192. package/src/llama.cpp/tests/test-grammar-llguidance.cpp +63 -2
  193. package/src/llama.cpp/tests/test-grammar-parser.cpp +3 -1
  194. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +17 -1
  195. package/src/llama.cpp/tests/test-llama-grammar.cpp +2 -1
  196. package/src/llama.cpp/tests/test-mtmd-c-api.c +63 -0
  197. package/src/llama.cpp/tests/test-opt.cpp +33 -21
  198. package/src/llama.cpp/{examples/quantize-stats/quantize-stats.cpp → tests/test-quantize-stats.cpp} +3 -1
  199. package/src/llama.cpp/tests/test-regex-partial.cpp +288 -0
  200. package/src/llama.cpp/tests/test-sampling.cpp +1 -1
  201. package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +2 -1
  202. package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +2 -1
  203. package/src/llama.cpp/tools/CMakeLists.txt +39 -0
  204. package/src/llama.cpp/{examples → tools}/batched-bench/batched-bench.cpp +3 -3
  205. package/src/llama.cpp/{examples → tools}/export-lora/export-lora.cpp +1 -1
  206. package/src/llama.cpp/{examples → tools}/gguf-split/gguf-split.cpp +15 -16
  207. package/src/llama.cpp/{examples → tools}/imatrix/imatrix.cpp +11 -9
  208. package/src/llama.cpp/{examples → tools}/llama-bench/llama-bench.cpp +623 -274
  209. package/src/llama.cpp/{examples → tools}/main/main.cpp +22 -14
  210. package/src/llama.cpp/tools/mtmd/CMakeLists.txt +47 -0
  211. package/src/llama.cpp/tools/mtmd/clip-impl.h +365 -0
  212. package/src/llama.cpp/tools/mtmd/clip.cpp +3646 -0
  213. package/src/llama.cpp/tools/mtmd/clip.h +99 -0
  214. package/src/llama.cpp/tools/mtmd/deprecation-warning.cpp +22 -0
  215. package/src/llama.cpp/tools/mtmd/mtmd-cli.cpp +370 -0
  216. package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +310 -0
  217. package/src/llama.cpp/tools/mtmd/mtmd.cpp +678 -0
  218. package/src/llama.cpp/tools/mtmd/mtmd.h +331 -0
  219. package/src/llama.cpp/{examples → tools}/perplexity/perplexity.cpp +21 -5
  220. package/src/llama.cpp/{examples → tools}/quantize/quantize.cpp +53 -3
  221. package/src/llama.cpp/tools/rpc/CMakeLists.txt +4 -0
  222. package/src/llama.cpp/tools/rpc/rpc-server.cpp +322 -0
  223. package/src/llama.cpp/tools/run/CMakeLists.txt +16 -0
  224. package/src/llama.cpp/{examples → tools}/run/run.cpp +30 -30
  225. package/src/llama.cpp/{examples → tools}/server/CMakeLists.txt +2 -1
  226. package/src/llama.cpp/{examples → tools}/server/httplib.h +313 -247
  227. package/src/llama.cpp/{examples → tools}/server/server.cpp +529 -215
  228. package/src/llama.cpp/{examples → tools}/server/utils.hpp +427 -6
  229. package/src/llama.cpp/{examples → tools}/tts/tts.cpp +6 -9
  230. package/src/llama.cpp/cmake/arm64-windows-msvc.cmake +0 -6
  231. package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +0 -5
  232. package/src/llama.cpp/examples/infill/CMakeLists.txt +0 -5
  233. package/src/llama.cpp/examples/infill/infill.cpp +0 -590
  234. package/src/llama.cpp/examples/llava/CMakeLists.txt +0 -66
  235. package/src/llama.cpp/examples/llava/android/build_64.sh +0 -8
  236. package/src/llama.cpp/examples/llava/clip-quantize-cli.cpp +0 -59
  237. package/src/llama.cpp/examples/llava/clip.cpp +0 -3206
  238. package/src/llama.cpp/examples/llava/clip.h +0 -118
  239. package/src/llama.cpp/examples/llava/gemma3-cli.cpp +0 -341
  240. package/src/llama.cpp/examples/llava/llava-cli.cpp +0 -332
  241. package/src/llama.cpp/examples/llava/llava.cpp +0 -574
  242. package/src/llama.cpp/examples/llava/llava.h +0 -49
  243. package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +0 -354
  244. package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +0 -584
  245. package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +0 -6
  246. package/src/llama.cpp/examples/rpc/CMakeLists.txt +0 -2
  247. package/src/llama.cpp/examples/rpc/rpc-server.cpp +0 -171
  248. package/src/llama.cpp/examples/run/CMakeLists.txt +0 -5
  249. package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +0 -30
  250. package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +0 -19
  251. package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +0 -234
  252. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +0 -197
  253. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +0 -190
  254. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +0 -204
  255. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +0 -191
  256. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +0 -218
  257. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +0 -216
  258. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +0 -295
  259. /package/src/llama.cpp/{examples → tools}/batched-bench/CMakeLists.txt +0 -0
  260. /package/src/llama.cpp/{examples → tools}/cvector-generator/CMakeLists.txt +0 -0
  261. /package/src/llama.cpp/{examples → tools}/cvector-generator/completions.txt +0 -0
  262. /package/src/llama.cpp/{examples → tools}/cvector-generator/cvector-generator.cpp +0 -0
  263. /package/src/llama.cpp/{examples → tools}/cvector-generator/mean.hpp +0 -0
  264. /package/src/llama.cpp/{examples → tools}/cvector-generator/negative.txt +0 -0
  265. /package/src/llama.cpp/{examples → tools}/cvector-generator/pca.hpp +0 -0
  266. /package/src/llama.cpp/{examples → tools}/cvector-generator/positive.txt +0 -0
  267. /package/src/llama.cpp/{examples → tools}/export-lora/CMakeLists.txt +0 -0
  268. /package/src/llama.cpp/{examples → tools}/gguf-split/CMakeLists.txt +0 -0
  269. /package/src/llama.cpp/{examples → tools}/imatrix/CMakeLists.txt +0 -0
  270. /package/src/llama.cpp/{examples → tools}/llama-bench/CMakeLists.txt +0 -0
  271. /package/src/llama.cpp/{examples → tools}/main/CMakeLists.txt +0 -0
  272. /package/src/llama.cpp/{examples/llava → tools/mtmd}/requirements.txt +0 -0
  273. /package/src/llama.cpp/{examples → tools}/perplexity/CMakeLists.txt +0 -0
  274. /package/src/llama.cpp/{examples → tools}/quantize/CMakeLists.txt +0 -0
  275. /package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.cpp +0 -0
  276. /package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.h +0 -0
  277. /package/src/llama.cpp/{examples → tools}/server/bench/requirements.txt +0 -0
  278. /package/src/llama.cpp/{examples → tools}/server/tests/requirements.txt +0 -0
  279. /package/src/llama.cpp/{examples → tools}/tokenize/CMakeLists.txt +0 -0
  280. /package/src/llama.cpp/{examples → tools}/tokenize/tokenize.cpp +0 -0
  281. /package/src/llama.cpp/{examples → tools}/tts/CMakeLists.txt +0 -0
@@ -0,0 +1,288 @@
1
+ // Tests common_regex (esp. its partial final matches support).
2
+
3
+ #include "common.h"
4
+ #include "regex-partial.h"
5
+
6
+ #include <sstream>
7
+ #include <iostream>
8
+ #include <optional>
9
+
10
+ template <class T> static void assert_equals(const T & expected, const T & actual) {
11
+ if (expected != actual) {
12
+ std::cerr << "Expected: " << expected << std::endl;
13
+ std::cerr << " Actual: " << actual << std::endl;
14
+ std::cerr << std::flush;
15
+ throw std::runtime_error("Test failed");
16
+ }
17
+ }
18
+
19
+ struct test_case {
20
+ std::string pattern;
21
+ struct input_output {
22
+ std::string input;
23
+ common_regex_match output;
24
+ };
25
+ std::vector<input_output> inputs_outputs;
26
+ };
27
+
28
+ static std::string common_regex_match_type_name(common_regex_match_type type) {
29
+ switch (type) {
30
+ case COMMON_REGEX_MATCH_TYPE_NONE:
31
+ return "COMMON_REGEX_MATCH_TYPE_NONE";
32
+ case COMMON_REGEX_MATCH_TYPE_PARTIAL:
33
+ return "COMMON_REGEX_MATCH_TYPE_PARTIAL";
34
+ case COMMON_REGEX_MATCH_TYPE_FULL:
35
+ return "COMMON_REGEX_MATCH_TYPE_FULL";
36
+ }
37
+ return "?";
38
+ }
39
+
40
+ static void test_regex() {
41
+ printf("[%s]\n", __func__);
42
+ auto test = [](const test_case & test_case) {
43
+ common_regex cr(test_case.pattern);
44
+ std::cout << "Testing pattern: /" << test_case.pattern << "/\n";
45
+ // std::cout << " partial rev: " << cr.reversed_partial_pattern.str() << '\n';
46
+ for (const auto & input_output : test_case.inputs_outputs) {
47
+ std::cout << " Input: " << input_output.input << '\n';
48
+ auto m = cr.search(input_output.input, 0);
49
+ if (m != input_output.output) {
50
+ auto match_to_str = [&](const std::optional<common_regex_match> & m) {
51
+ std::ostringstream ss;
52
+ if (m->type == COMMON_REGEX_MATCH_TYPE_NONE) {
53
+ ss << "<no match>";
54
+ } else {
55
+ GGML_ASSERT(!input_output.output.groups.empty());
56
+ std::vector<std::string> parts;
57
+ for (const auto & g : m->groups) {
58
+ parts.push_back("{" + std::to_string(g.begin) + ", " + std::to_string(g.end) + "}");
59
+ }
60
+ ss << "{" << common_regex_match_type_name(m->type) << ", {" << string_join(parts, ", ") << "}}";
61
+ }
62
+ return ss.str();
63
+ };
64
+ std::cout << " Expected: " << match_to_str(input_output.output) << '\n';
65
+ std::cout << " Got: " << match_to_str(m) << '\n';
66
+ std::cout << " Inverted pattern: /" << regex_to_reversed_partial_regex(test_case.pattern) << "/\n";
67
+
68
+ throw std::runtime_error("Test failed");
69
+ }
70
+ }
71
+ };
72
+ test({
73
+ "a",
74
+ {
75
+ {"a", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 1}}}},
76
+ {"b", {COMMON_REGEX_MATCH_TYPE_NONE, {}}},
77
+ {"ab", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 1}}}},
78
+ {"ba", {COMMON_REGEX_MATCH_TYPE_FULL, {{1, 2}}}},
79
+ }
80
+ });
81
+ test({
82
+ "abcd",
83
+ {
84
+ {"abcd", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 4}}}},
85
+ {"abcde", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 4}}}},
86
+ {"abc", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 3}}}},
87
+ {"ab", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 2}}}},
88
+ {"a", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 1}}}},
89
+ {"d", {}},
90
+ {"bcd", {}},
91
+ {"cde", {}},
92
+ {"cd", {}},
93
+ {"yeah ab", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{5, 7}}}},
94
+ {"abbie", {}},
95
+ {"", {}},
96
+ }
97
+ });
98
+ test({
99
+ ".*?ab",
100
+ {
101
+ {"ab", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 2}}}},
102
+ {"abc", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 2}}}},
103
+ {"dab", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 3}}}},
104
+ {"dabc", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 3}}}},
105
+ {"da", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 2}}}},
106
+ {"d", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 1}}}},
107
+ }
108
+ });
109
+ test({
110
+ "a.*?b",
111
+ {
112
+ {"ab", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 2}}}},
113
+ {"abc", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 2}}}},
114
+ {"a b", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 3}}}},
115
+ {"a", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 1}}}},
116
+ {"argh", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 4}}}},
117
+ {"d", {}},
118
+ {"b", {}},
119
+ }
120
+ });
121
+ test({
122
+ "ab(?:cd){2,4}ef",
123
+ {
124
+ // {"ab", {COMMON_REGEX_MATCH_TYPE_PARTIAL, 0, {}}},
125
+ {"ab", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 2}}}},
126
+ {"abcd", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 4}}}},
127
+ {"abcde", {}},
128
+ {"abcdef", {}},
129
+ {"abcdcd", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 6}}}},
130
+ {"abcdcde", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 7}}}},
131
+ {"abcdcdef", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 8}}}},
132
+ {"abcdcdcdcdef", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 12}}}},
133
+ {"abcdcdcdcdcdef", {}},
134
+ {"abcde", {}},
135
+ {"yea", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{2, 3}}}},
136
+ }
137
+ });
138
+ test({
139
+ "a(?:rte| pure )fact",
140
+ {
141
+ {"a", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 1}}}},
142
+ {"art", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 3}}}},
143
+ {"artefa", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 6}}}},
144
+ {"fact", {}},
145
+ {"an arte", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{3, 7}}}},
146
+ {"artefact", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 8}}}},
147
+ {"an artefact", {COMMON_REGEX_MATCH_TYPE_FULL, {{3, 11}}}},
148
+ {"a pure", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 6}}}},
149
+ {"a pure fact", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 11}}}},
150
+ {"it's a pure fact", {COMMON_REGEX_MATCH_TYPE_FULL, {{5, 16}}}},
151
+ {"" , {}},
152
+ {"pure", {}},
153
+ {"pure fact", {}},
154
+ }
155
+ });
156
+ test({
157
+ "abc",
158
+ {
159
+ {" abcc", {COMMON_REGEX_MATCH_TYPE_FULL, {{1, 4}}}},
160
+ {"ab", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 2}}}},
161
+ {"abc", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 3}}}},
162
+ {" ab", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{1, 3}}}},
163
+ {"a", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 1}}}},
164
+ {"b", {}},
165
+ {"c", {}},
166
+ {"", {}},
167
+ }
168
+ });
169
+
170
+ test({
171
+ "(?:abc)?\\s*def",
172
+ {
173
+ {"ab", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 2}}}},
174
+ {"abc", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 3}}}},
175
+ {"abc ", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 4}}}},
176
+ {"abc d", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 5}}}},
177
+ {"abc de", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 6}}}},
178
+ {"abc def", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 7}}}},
179
+ {"abc defg", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 7}}}},
180
+ {"abc defgh", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 7}}}},
181
+ {"abcde", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 5}}}},
182
+ {"abcdefgh", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 6}}}},
183
+ {" d", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 2}}}},
184
+ {"def", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 3}}}},
185
+ }
186
+ });
187
+
188
+ test({
189
+ "a+b",
190
+ {
191
+ {"aaab", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 4}}}},
192
+ {"aaa", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 3}}}},
193
+ {"ab", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 2}}}},
194
+ }
195
+ });
196
+
197
+ test({
198
+ "(?:"
199
+ "(```(?:xml|json)?\\n\\s*)?" // match 1 (block_start)
200
+ "(" // match 2 (open_tag)
201
+ "<tool_call>"
202
+ "|<function_call>"
203
+ "|<tool>"
204
+ "|<tools>"
205
+ "|<response>"
206
+ "|<json>"
207
+ "|<xml>"
208
+ "|<JSON>"
209
+ ")?"
210
+ "(\\s*\\{\\s*\"name\"\\s*:)" // match 3 (named tool call)
211
+ ")"
212
+ "|<function=([^>]+)>" // match 4 (function name)
213
+ "|<function name=\"([^\"]+)\">", // match 5 (function name again)
214
+ {
215
+ {"{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 8}, {54, 54}, {54, 54}, {0, 8}, {54, 54}, {54, 54}}}},
216
+ {"<tool_call> {\"name", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 18}}}},
217
+ {"<tool_call>{\"name", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 17}}}},
218
+ {"Let's call something\n<tool_call>{\"name", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{21, 38}}}},
219
+ {"Ok then<tool_call>{\"name", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{7, 24}}}},
220
+ {"{\"name", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{0, 6}}}},
221
+ {"Ok then{\"name", {COMMON_REGEX_MATCH_TYPE_PARTIAL, {{7, 13}}}},
222
+ {"<tool_call> {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 20}, {66, 66}, {0, 11}, {11, 20}, {66, 66}, {66, 66}}}},
223
+ {"<function_call> {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 24}, {70, 70}, {0, 15}, {15, 24}, {70, 70}, {70, 70}}}},
224
+ {"<function name=\"special_function\"> {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 34}, {89, 89}, {89, 89}, {89, 89}, {89, 89}, {16, 32}}}},
225
+ {"<function=all>", {COMMON_REGEX_MATCH_TYPE_FULL, {{0, 14}, {14, 14}, {14, 14}, {14, 14}, {10, 13}, {14, 14}}}},
226
+
227
+ }
228
+ });
229
+ }
230
+
231
+ static void test_regex_to_reversed_partial_regex() {
232
+ printf("[%s]\n", __func__);
233
+
234
+ assert_equals<std::string>(
235
+ "((?:(?:c)?b)?a)[\\s\\S]*",
236
+ regex_to_reversed_partial_regex("abc"));
237
+
238
+ assert_equals<std::string>(
239
+ "(a+)[\\s\\S]*",
240
+ regex_to_reversed_partial_regex("a+"));
241
+
242
+ assert_equals<std::string>(
243
+ "(a*)[\\s\\S]*",
244
+ regex_to_reversed_partial_regex("a*"));
245
+
246
+ assert_equals<std::string>(
247
+ "(a?)[\\s\\S]*",
248
+ regex_to_reversed_partial_regex("a?"));
249
+
250
+ assert_equals<std::string>(
251
+ "([a-z])[\\s\\S]*",
252
+ regex_to_reversed_partial_regex("[a-z]"));
253
+
254
+ assert_equals<std::string>(
255
+ "((?:\\w+)?[a-z])[\\s\\S]*",
256
+ regex_to_reversed_partial_regex("[a-z]\\w+"));
257
+
258
+ assert_equals<std::string>(
259
+ "((?:a|b))[\\s\\S]*",
260
+ regex_to_reversed_partial_regex("(?:a|b)"));
261
+ assert_equals<std::string>(
262
+ "((?:(?:(?:d)?c)?b)?a)[\\s\\S]*",
263
+ regex_to_reversed_partial_regex("abcd"));
264
+ assert_equals<std::string>(
265
+ "((?:b)?a*)[\\s\\S]*", // TODO: ((?:b)?a*+).* ??
266
+ regex_to_reversed_partial_regex("a*b"));
267
+ assert_equals<std::string>(
268
+ "((?:(?:b)?a)?.*)[\\s\\S]*",
269
+ regex_to_reversed_partial_regex(".*?ab"));
270
+ assert_equals<std::string>(
271
+ "((?:(?:b)?.*)?a)[\\s\\S]*",
272
+ regex_to_reversed_partial_regex("a.*?b"));
273
+ assert_equals<std::string>(
274
+ "((?:(?:d)?(?:(?:c)?b))?a)[\\s\\S]*",
275
+ regex_to_reversed_partial_regex("a(bc)d"));
276
+ assert_equals<std::string>(
277
+ "((?:(?:(?:c)?b|(?:e)?d))?a)[\\s\\S]*",
278
+ regex_to_reversed_partial_regex("a(bc|de)"));
279
+ assert_equals<std::string>(
280
+ "((?:(?:(?:(?:(?:c)?b?)?b?)?b)?b)?a)[\\s\\S]*",
281
+ regex_to_reversed_partial_regex("ab{2,4}c"));
282
+ }
283
+
284
+ int main() {
285
+ test_regex_to_reversed_partial_regex();
286
+ test_regex();
287
+ std::cout << "All tests passed.\n";
288
+ }
@@ -360,7 +360,7 @@ int main(void) {
360
360
  test_dry({0.2f, 0.2f, 0.2f, 0.2f, 0.2f}, {0, 1, 2, 3, 4, 0, 1}, {0.2f, 0.2f, 0.2f, 0.2f, 0.2f}, 1.0f, 1.1f, 4, 7, {});
361
361
 
362
362
  test_top_n_sigma({0.1f, 0.2f, 0.3f, 0.4f}, {0.571429f, 0.428571f, 0.0f, 0.0f}, 1.00f);
363
- test_top_n_sigma({0.1f, 0.2f, 0.3f, 0.4f}, {1.0f, 0.0f, 0.0f, 0.0f}, 0.00f);
363
+ test_top_n_sigma({0.1f, 0.2f, 0.3f, 0.4f}, {0.4f, 0.3f, 0.2f, 0.1f}, 0.00f); // top_n_sigma == 0 now represents a no-op rather than greedy decoding as of PR#13345
364
364
  test_top_n_sigma({0.1f, 0.2f, 0.3f, 0.4f}, {0.4f, 0.3f, 0.2f, 0.1f}, 3.00f);
365
365
 
366
366
  test_sampler_queue(10000, "k", 10000, 1.0f, 1.0f);
@@ -1,8 +1,9 @@
1
1
  #include "llama.h"
2
2
  #include "common.h"
3
- #include "unicode.h"
4
3
  #include "console.h"
5
4
 
5
+ #include "../src/unicode.h"
6
+
6
7
  #include <cassert>
7
8
  #include <codecvt>
8
9
  #include <cstdio>
@@ -1,8 +1,9 @@
1
1
  #include "llama.h"
2
2
  #include "common.h"
3
- #include "unicode.h"
4
3
  #include "console.h"
5
4
 
5
+ #include "../src/unicode.h"
6
+
6
7
  #include <cassert>
7
8
  #include <codecvt>
8
9
  #include <cstdio>
@@ -0,0 +1,39 @@
1
+ # dependencies
2
+
3
+ find_package(Threads REQUIRED)
4
+
5
+ # third-party
6
+
7
+ # ...
8
+
9
+ # flags
10
+
11
+ llama_add_compile_flags()
12
+
13
+ # tools
14
+
15
+ if (EMSCRIPTEN)
16
+ else()
17
+ add_subdirectory(batched-bench)
18
+ add_subdirectory(gguf-split)
19
+ add_subdirectory(imatrix)
20
+ add_subdirectory(llama-bench)
21
+ add_subdirectory(main)
22
+ add_subdirectory(perplexity)
23
+ add_subdirectory(quantize)
24
+ if (LLAMA_BUILD_SERVER)
25
+ add_subdirectory(server)
26
+ endif()
27
+ add_subdirectory(run)
28
+ add_subdirectory(tokenize)
29
+ add_subdirectory(tts)
30
+ add_subdirectory(mtmd)
31
+ if (GGML_RPC)
32
+ add_subdirectory(rpc)
33
+ endif()
34
+ if (NOT GGML_BACKEND_DL)
35
+ # these examples use the backends directly and cannot be built with dynamic loading
36
+ add_subdirectory(cvector-generator)
37
+ add_subdirectory(export-lora)
38
+ endif()
39
+ endif()
@@ -38,7 +38,7 @@ int main(int argc, char ** argv) {
38
38
 
39
39
  llama_model_params model_params = common_model_params_to_llama(params);
40
40
 
41
- llama_model * model = llama_model_load_from_file(params.model.c_str(), model_params);
41
+ llama_model * model = llama_model_load_from_file(params.model.path.c_str(), model_params);
42
42
 
43
43
  if (model == NULL) {
44
44
  fprintf(stderr , "%s: error: unable to load model\n" , __func__);
@@ -123,8 +123,8 @@ int main(int argc, char ** argv) {
123
123
 
124
124
  common_batch_clear(batch);
125
125
 
126
- for (int i = 0; i < pp; ++i) {
127
- for (int j = 0; j < (is_pp_shared ? 1 : pl); ++j) {
126
+ for (int j = 0; j < (is_pp_shared ? 1 : pl); ++j) {
127
+ for (int i = 0; i < pp; ++i) {
128
128
  common_batch_add(batch, 0, i, { j }, false);
129
129
  }
130
130
  }
@@ -421,7 +421,7 @@ int main(int argc, char ** argv) {
421
421
 
422
422
  g_verbose = (params.verbosity > 1);
423
423
  try {
424
- lora_merge_ctx ctx(params.model, params.lora_adapters, params.out_file, params.cpuparams.n_threads);
424
+ lora_merge_ctx ctx(params.model.path, params.lora_adapters, params.out_file, params.cpuparams.n_threads);
425
425
  ctx.run_merge();
426
426
  } catch (const std::exception & err) {
427
427
  fprintf(stderr, "%s\n", err.what());
@@ -408,8 +408,6 @@ static void gguf_merge(const split_params & split_params) {
408
408
  exit(EXIT_FAILURE);
409
409
  }
410
410
 
411
- std::ofstream fout(split_params.output.c_str(), std::ios::binary);
412
- fout.exceptions(std::ofstream::failbit); // fail fast on write errors
413
411
 
414
412
  auto * ctx_out = gguf_init_empty();
415
413
 
@@ -453,7 +451,6 @@ static void gguf_merge(const split_params & split_params) {
453
451
  gguf_free(ctx_gguf);
454
452
  ggml_free(ctx_meta);
455
453
  gguf_free(ctx_out);
456
- fout.close();
457
454
  exit(EXIT_FAILURE);
458
455
  }
459
456
 
@@ -466,7 +463,6 @@ static void gguf_merge(const split_params & split_params) {
466
463
  gguf_free(ctx_gguf);
467
464
  ggml_free(ctx_meta);
468
465
  gguf_free(ctx_out);
469
- fout.close();
470
466
  exit(EXIT_FAILURE);
471
467
  }
472
468
 
@@ -479,7 +475,6 @@ static void gguf_merge(const split_params & split_params) {
479
475
  gguf_free(ctx_gguf);
480
476
  ggml_free(ctx_meta);
481
477
  gguf_free(ctx_out);
482
- fout.close();
483
478
  exit(EXIT_FAILURE);
484
479
  }
485
480
 
@@ -500,9 +495,11 @@ static void gguf_merge(const split_params & split_params) {
500
495
 
501
496
  fprintf(stderr, "\033[3Ddone\n");
502
497
  }
503
-
504
- // placeholder for the meta data
505
- {
498
+ std::ofstream fout;
499
+ if (!split_params.dry_run) {
500
+ fout.open(split_params.output.c_str(), std::ios::binary);
501
+ fout.exceptions(std::ofstream::failbit); // fail fast on write errors
502
+ // placeholder for the meta data
506
503
  auto meta_size = gguf_get_meta_size(ctx_out);
507
504
  ::zeros(fout, meta_size);
508
505
  }
@@ -518,7 +515,9 @@ static void gguf_merge(const split_params & split_params) {
518
515
  ggml_free(ctx_metas[i]);
519
516
  }
520
517
  gguf_free(ctx_out);
521
- fout.close();
518
+ if (!split_params.dry_run) {
519
+ fout.close();
520
+ }
522
521
  exit(EXIT_FAILURE);
523
522
  }
524
523
  fprintf(stderr, "%s: writing tensors %s ...", __func__, split_path);
@@ -540,10 +539,11 @@ static void gguf_merge(const split_params & split_params) {
540
539
  auto offset = gguf_get_data_offset(ctx_gguf) + gguf_get_tensor_offset(ctx_gguf, i_tensor);
541
540
  f_input.seekg(offset);
542
541
  f_input.read((char *)read_data.data(), n_bytes);
543
-
544
- // write tensor data + padding
545
- fout.write((const char *)read_data.data(), n_bytes);
546
- zeros(fout, GGML_PAD(n_bytes, GGUF_DEFAULT_ALIGNMENT) - n_bytes);
542
+ if (!split_params.dry_run) {
543
+ // write tensor data + padding
544
+ fout.write((const char *)read_data.data(), n_bytes);
545
+ zeros(fout, GGML_PAD(n_bytes, GGUF_DEFAULT_ALIGNMENT) - n_bytes);
546
+ }
547
547
  }
548
548
 
549
549
  gguf_free(ctx_gguf);
@@ -552,16 +552,15 @@ static void gguf_merge(const split_params & split_params) {
552
552
  fprintf(stderr, "\033[3Ddone\n");
553
553
  }
554
554
 
555
- {
555
+ if (!split_params.dry_run) {
556
556
  // go back to beginning of file and write the updated metadata
557
557
  fout.seekp(0);
558
558
  std::vector<uint8_t> data(gguf_get_meta_size(ctx_out));
559
559
  gguf_get_meta_data(ctx_out, data.data());
560
560
  fout.write((const char *)data.data(), data.size());
561
-
562
561
  fout.close();
563
- gguf_free(ctx_out);
564
562
  }
563
+ gguf_free(ctx_out);
565
564
 
566
565
  fprintf(stderr, "%s: %s merged from %d split with %d tensors.\n",
567
566
  __func__, split_params.output.c_str(), n_split, total_tensors);
@@ -24,7 +24,8 @@ static void print_usage(int, char ** argv) {
24
24
  LOG("\n %s \\\n"
25
25
  " -m model.gguf -f some-text.txt [-o imatrix.dat] [--process-output] \\\n"
26
26
  " [--no-ppl] [--chunk 123] [--output-frequency 10] [--save-frequency 0] \\\n"
27
- " [--in-file imatrix-prev-0.dat --in-file imatrix-prev-1.dat ...]\n" , argv[0]);
27
+ " [--in-file imatrix-prev-0.dat --in-file imatrix-prev-1.dat ...] \\\n"
28
+ " [--parse-special]\n" , argv[0]);
28
29
  LOG("\n");
29
30
  }
30
31
 
@@ -46,7 +47,7 @@ private:
46
47
  common_params m_params;
47
48
  std::mutex m_mutex;
48
49
  int m_last_call = 0;
49
- std::vector<float> m_src1_data;
50
+ std::vector<char> m_src1_data;
50
51
  std::vector<char> m_ids; // the expert ids from ggml_mul_mat_id
51
52
  };
52
53
 
@@ -93,11 +94,13 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
93
94
  const bool is_host = ggml_backend_buffer_is_host(src1->buffer);
94
95
 
95
96
  if (!is_host) {
96
- m_src1_data.resize(ggml_nelements(src1));
97
- ggml_backend_tensor_get(src1, m_src1_data.data(), 0, ggml_nbytes(src1));
97
+ const size_t src1_nbytes = ggml_nbytes(src1);
98
+ m_src1_data.resize(src1_nbytes);
99
+ ggml_backend_tensor_get(src1, m_src1_data.data(), 0, src1_nbytes);
98
100
  }
99
101
 
100
- const float * data = is_host ? (const float *) src1->data : m_src1_data.data();
102
+ const char * data = is_host ? (const char *) src1->data : m_src1_data.data();
103
+ GGML_ASSERT(src1->nb[0] == ggml_element_size(src1));
101
104
 
102
105
  // this has been adapted to the new format of storing merged experts in a single 3d tensor
103
106
  // ref: https://github.com/ggml-org/llama.cpp/pull/6387
@@ -144,7 +147,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
144
147
 
145
148
  const int64_t i11 = idx % src1->ne[1];
146
149
  const int64_t i12 = row;
147
- const float * x = (const float *)((const char *)data + i11*src1->nb[1] + i12*src1->nb[2]);
150
+ const float * x = (const float *)(data + i11*src1->nb[1] + i12*src1->nb[2]);
148
151
 
149
152
  for (int j = 0; j < (int)src1->ne[0]; ++j) {
150
153
  e.values[e_start + j] += x[j]*x[j];
@@ -180,7 +183,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
180
183
  ++e.ncall;
181
184
  LOG_DBGV(2, "%s[%d]: %32s, %s, %5d x %5d, %d\n", __func__, m_last_call, wname.c_str(), ggml_op_name(t->op), (int)src1->ne[0], (int)src1->ne[1], (int)src1->type);
182
185
  for (int row = 0; row < (int)src1->ne[1]; ++row) {
183
- const float * x = data + row * src1->ne[0];
186
+ const float * x = (const float *) (data + row * src1->nb[1]);
184
187
  for (int j = 0; j < (int)src1->ne[0]; ++j) {
185
188
  e.values[j] += x[j]*x[j];
186
189
  e.counts[j]++;
@@ -437,7 +440,7 @@ static bool compute_imatrix(llama_context * ctx, const common_params & params) {
437
440
  auto tim1 = std::chrono::high_resolution_clock::now();
438
441
  LOG_INF("%s: tokenizing the input ..\n", __func__);
439
442
 
440
- std::vector<llama_token> tokens = common_tokenize(ctx, params.prompt, true);
443
+ std::vector<llama_token> tokens = common_tokenize(ctx, params.prompt, true, params.parse_special);
441
444
 
442
445
  auto tim2 = std::chrono::high_resolution_clock::now();
443
446
  LOG_INF("%s: tokenization took %g ms\n",__func__,1e-3*std::chrono::duration_cast<std::chrono::microseconds>(tim2-tim1).count());
@@ -583,7 +586,6 @@ int main(int argc, char ** argv) {
583
586
  params.out_file = "imatrix.dat" ;
584
587
 
585
588
  params.n_ctx = 512;
586
- params.logits_all = true;
587
589
  params.escape = false;
588
590
 
589
591
  if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_IMATRIX, print_usage)) {