@fugood/llama.node 0.6.3 → 1.0.0-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (377) hide show
  1. package/CMakeLists.txt +40 -30
  2. package/README.md +4 -1
  3. package/lib/binding.js +41 -29
  4. package/lib/binding.ts +26 -25
  5. package/package.json +45 -7
  6. package/scripts/build.js +47 -0
  7. package/scripts/llama.cpp.patch +109 -0
  8. package/src/anyascii.c +22223 -0
  9. package/src/anyascii.h +42 -0
  10. package/src/tts_utils.cpp +20 -7
  11. package/src/tts_utils.h +2 -0
  12. package/bin/darwin/arm64/llama-node.node +0 -0
  13. package/bin/darwin/x64/llama-node.node +0 -0
  14. package/bin/linux/arm64/llama-node.node +0 -0
  15. package/bin/linux/x64/llama-node.node +0 -0
  16. package/bin/linux-cuda/arm64/llama-node.node +0 -0
  17. package/bin/linux-cuda/x64/llama-node.node +0 -0
  18. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  19. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  20. package/bin/win32/x64/llama-node.node +0 -0
  21. package/bin/win32/x64/node.lib +0 -0
  22. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  23. package/bin/win32-vulkan/arm64/node.lib +0 -0
  24. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  25. package/bin/win32-vulkan/x64/node.lib +0 -0
  26. package/src/llama.cpp/.github/workflows/build-linux-cross.yml +0 -233
  27. package/src/llama.cpp/.github/workflows/build.yml +0 -1078
  28. package/src/llama.cpp/.github/workflows/close-issue.yml +0 -28
  29. package/src/llama.cpp/.github/workflows/docker.yml +0 -178
  30. package/src/llama.cpp/.github/workflows/editorconfig.yml +0 -29
  31. package/src/llama.cpp/.github/workflows/gguf-publish.yml +0 -44
  32. package/src/llama.cpp/.github/workflows/labeler.yml +0 -17
  33. package/src/llama.cpp/.github/workflows/python-check-requirements.yml +0 -33
  34. package/src/llama.cpp/.github/workflows/python-lint.yml +0 -30
  35. package/src/llama.cpp/.github/workflows/python-type-check.yml +0 -40
  36. package/src/llama.cpp/.github/workflows/release.yml +0 -739
  37. package/src/llama.cpp/.github/workflows/server.yml +0 -237
  38. package/src/llama.cpp/.github/workflows/winget.yml +0 -42
  39. package/src/llama.cpp/cmake/arm64-apple-clang.cmake +0 -16
  40. package/src/llama.cpp/cmake/arm64-windows-llvm.cmake +0 -16
  41. package/src/llama.cpp/cmake/build-info.cmake +0 -64
  42. package/src/llama.cpp/cmake/common.cmake +0 -35
  43. package/src/llama.cpp/cmake/git-vars.cmake +0 -22
  44. package/src/llama.cpp/cmake/x64-windows-llvm.cmake +0 -5
  45. package/src/llama.cpp/common/build-info.cpp.in +0 -4
  46. package/src/llama.cpp/docs/build.md +0 -561
  47. package/src/llama.cpp/examples/CMakeLists.txt +0 -43
  48. package/src/llama.cpp/examples/batched/CMakeLists.txt +0 -5
  49. package/src/llama.cpp/examples/batched/batched.cpp +0 -246
  50. package/src/llama.cpp/examples/chat-13B.bat +0 -57
  51. package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +0 -5
  52. package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +0 -941
  53. package/src/llama.cpp/examples/deprecation-warning/deprecation-warning.cpp +0 -35
  54. package/src/llama.cpp/examples/embedding/CMakeLists.txt +0 -5
  55. package/src/llama.cpp/examples/embedding/embedding.cpp +0 -323
  56. package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +0 -10
  57. package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +0 -194
  58. package/src/llama.cpp/examples/gen-docs/CMakeLists.txt +0 -5
  59. package/src/llama.cpp/examples/gen-docs/gen-docs.cpp +0 -83
  60. package/src/llama.cpp/examples/gguf/CMakeLists.txt +0 -5
  61. package/src/llama.cpp/examples/gguf/gguf.cpp +0 -265
  62. package/src/llama.cpp/examples/gguf-hash/CMakeLists.txt +0 -22
  63. package/src/llama.cpp/examples/gguf-hash/deps/rotate-bits/rotate-bits.h +0 -46
  64. package/src/llama.cpp/examples/gguf-hash/deps/sha1/sha1.c +0 -295
  65. package/src/llama.cpp/examples/gguf-hash/deps/sha1/sha1.h +0 -52
  66. package/src/llama.cpp/examples/gguf-hash/deps/sha256/sha256.c +0 -221
  67. package/src/llama.cpp/examples/gguf-hash/deps/sha256/sha256.h +0 -24
  68. package/src/llama.cpp/examples/gguf-hash/deps/xxhash/xxhash.c +0 -42
  69. package/src/llama.cpp/examples/gguf-hash/deps/xxhash/xxhash.h +0 -7093
  70. package/src/llama.cpp/examples/gguf-hash/gguf-hash.cpp +0 -694
  71. package/src/llama.cpp/examples/gritlm/CMakeLists.txt +0 -5
  72. package/src/llama.cpp/examples/gritlm/gritlm.cpp +0 -229
  73. package/src/llama.cpp/examples/jeopardy/questions.txt +0 -100
  74. package/src/llama.cpp/examples/llama.android/app/build.gradle.kts +0 -65
  75. package/src/llama.cpp/examples/llama.android/build.gradle.kts +0 -6
  76. package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +0 -71
  77. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/CMakeLists.txt +0 -53
  78. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +0 -452
  79. package/src/llama.cpp/examples/llama.android/settings.gradle.kts +0 -18
  80. package/src/llama.cpp/examples/lookahead/CMakeLists.txt +0 -5
  81. package/src/llama.cpp/examples/lookahead/lookahead.cpp +0 -472
  82. package/src/llama.cpp/examples/lookup/CMakeLists.txt +0 -23
  83. package/src/llama.cpp/examples/lookup/lookup-create.cpp +0 -40
  84. package/src/llama.cpp/examples/lookup/lookup-merge.cpp +0 -47
  85. package/src/llama.cpp/examples/lookup/lookup-stats.cpp +0 -157
  86. package/src/llama.cpp/examples/lookup/lookup.cpp +0 -242
  87. package/src/llama.cpp/examples/parallel/CMakeLists.txt +0 -5
  88. package/src/llama.cpp/examples/parallel/parallel.cpp +0 -492
  89. package/src/llama.cpp/examples/passkey/CMakeLists.txt +0 -5
  90. package/src/llama.cpp/examples/passkey/passkey.cpp +0 -277
  91. package/src/llama.cpp/examples/retrieval/CMakeLists.txt +0 -5
  92. package/src/llama.cpp/examples/retrieval/retrieval.cpp +0 -304
  93. package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +0 -5
  94. package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +0 -246
  95. package/src/llama.cpp/examples/simple/CMakeLists.txt +0 -5
  96. package/src/llama.cpp/examples/simple/simple.cpp +0 -206
  97. package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +0 -5
  98. package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +0 -206
  99. package/src/llama.cpp/examples/simple-cmake-pkg/CMakeLists.txt +0 -11
  100. package/src/llama.cpp/examples/speculative/CMakeLists.txt +0 -5
  101. package/src/llama.cpp/examples/speculative/speculative.cpp +0 -644
  102. package/src/llama.cpp/examples/speculative-simple/CMakeLists.txt +0 -5
  103. package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +0 -261
  104. package/src/llama.cpp/examples/sycl/CMakeLists.txt +0 -9
  105. package/src/llama.cpp/examples/sycl/build.sh +0 -23
  106. package/src/llama.cpp/examples/sycl/ls-sycl-device.cpp +0 -13
  107. package/src/llama.cpp/examples/sycl/run-llama2.sh +0 -27
  108. package/src/llama.cpp/examples/sycl/run-llama3.sh +0 -28
  109. package/src/llama.cpp/examples/sycl/win-build-sycl.bat +0 -33
  110. package/src/llama.cpp/examples/sycl/win-run-llama2.bat +0 -9
  111. package/src/llama.cpp/examples/sycl/win-run-llama3.bat +0 -9
  112. package/src/llama.cpp/examples/training/CMakeLists.txt +0 -5
  113. package/src/llama.cpp/examples/training/finetune.cpp +0 -96
  114. package/src/llama.cpp/ggml/cmake/GitVars.cmake +0 -22
  115. package/src/llama.cpp/ggml/cmake/common.cmake +0 -26
  116. package/src/llama.cpp/ggml/src/ggml-alloc.c +0 -1042
  117. package/src/llama.cpp/ggml/src/ggml-backend-impl.h +0 -255
  118. package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +0 -586
  119. package/src/llama.cpp/ggml/src/ggml-backend.cpp +0 -2008
  120. package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +0 -87
  121. package/src/llama.cpp/ggml/src/ggml-blas/ggml-blas.cpp +0 -517
  122. package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +0 -74
  123. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +0 -179
  124. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +0 -258
  125. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +0 -2863
  126. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +0 -1110
  127. package/src/llama.cpp/ggml/src/ggml-cann/common.h +0 -420
  128. package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +0 -2570
  129. package/src/llama.cpp/ggml/src/ggml-common.h +0 -1857
  130. package/src/llama.cpp/ggml/src/ggml-cpu/cmake/FindSIMD.cmake +0 -100
  131. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +0 -184
  132. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h +0 -15
  133. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +0 -243
  134. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +0 -140
  135. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +0 -131
  136. package/src/llama.cpp/ggml/src/ggml-impl.h +0 -601
  137. package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +0 -166
  138. package/src/llama.cpp/ggml/src/ggml-kompute/ggml-kompute.cpp +0 -2251
  139. package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +0 -120
  140. package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +0 -622
  141. package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +0 -113
  142. package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +0 -96
  143. package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +0 -5124
  144. package/src/llama.cpp/ggml/src/ggml-opt.cpp +0 -1037
  145. package/src/llama.cpp/ggml/src/ggml-quants.c +0 -5232
  146. package/src/llama.cpp/ggml/src/ggml-quants.h +0 -100
  147. package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +0 -9
  148. package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +0 -1813
  149. package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +0 -189
  150. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +0 -37
  151. package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +0 -239
  152. package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.hpp +0 -39
  153. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +0 -83
  154. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +0 -493
  155. package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +0 -197
  156. package/src/llama.cpp/ggml/src/ggml-sycl/concat.hpp +0 -20
  157. package/src/llama.cpp/ggml/src/ggml-sycl/conv.cpp +0 -100
  158. package/src/llama.cpp/ggml/src/ggml-sycl/conv.hpp +0 -20
  159. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +0 -623
  160. package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +0 -34
  161. package/src/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +0 -701
  162. package/src/llama.cpp/ggml/src/ggml-sycl/cpy.hpp +0 -11
  163. package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +0 -791
  164. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +0 -1160
  165. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.hpp +0 -27
  166. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +0 -2957
  167. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +0 -1536
  168. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +0 -75
  169. package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +0 -99
  170. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +0 -311
  171. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +0 -20
  172. package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +0 -4443
  173. package/src/llama.cpp/ggml/src/ggml-sycl/gla.cpp +0 -105
  174. package/src/llama.cpp/ggml/src/ggml-sycl/gla.hpp +0 -8
  175. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +0 -136
  176. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +0 -21
  177. package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +0 -3030
  178. package/src/llama.cpp/ggml/src/ggml-sycl/mmq.hpp +0 -33
  179. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +0 -1108
  180. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.hpp +0 -27
  181. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +0 -474
  182. package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +0 -26
  183. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +0 -46
  184. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +0 -10
  185. package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +0 -74
  186. package/src/llama.cpp/ggml/src/ggml-sycl/quants.hpp +0 -83
  187. package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +0 -362
  188. package/src/llama.cpp/ggml/src/ggml-sycl/rope.hpp +0 -20
  189. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +0 -264
  190. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.hpp +0 -20
  191. package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.cpp +0 -13
  192. package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.hpp +0 -23
  193. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +0 -73
  194. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.hpp +0 -20
  195. package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +0 -1215
  196. package/src/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +0 -305
  197. package/src/llama.cpp/ggml/src/ggml-sycl/wkv.hpp +0 -10
  198. package/src/llama.cpp/ggml/src/ggml-threading.cpp +0 -12
  199. package/src/llama.cpp/ggml/src/ggml-threading.h +0 -14
  200. package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +0 -196
  201. package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +0 -10699
  202. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +0 -39
  203. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +0 -751
  204. package/src/llama.cpp/ggml/src/ggml.c +0 -6550
  205. package/src/llama.cpp/ggml/src/gguf.cpp +0 -1330
  206. package/src/llama.cpp/models/.editorconfig +0 -1
  207. package/src/llama.cpp/models/ggml-vocab-aquila.gguf +0 -0
  208. package/src/llama.cpp/models/ggml-vocab-baichuan.gguf +0 -0
  209. package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf +0 -0
  210. package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf.inp +0 -112
  211. package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf.out +0 -46
  212. package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +0 -112
  213. package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.out +0 -46
  214. package/src/llama.cpp/models/ggml-vocab-command-r.gguf +0 -0
  215. package/src/llama.cpp/models/ggml-vocab-command-r.gguf.inp +0 -112
  216. package/src/llama.cpp/models/ggml-vocab-command-r.gguf.out +0 -46
  217. package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf +0 -0
  218. package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.inp +0 -112
  219. package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.out +0 -46
  220. package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf +0 -0
  221. package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.inp +0 -112
  222. package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.out +0 -46
  223. package/src/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.inp +0 -112
  224. package/src/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.out +0 -46
  225. package/src/llama.cpp/models/ggml-vocab-falcon.gguf +0 -0
  226. package/src/llama.cpp/models/ggml-vocab-falcon.gguf.inp +0 -112
  227. package/src/llama.cpp/models/ggml-vocab-falcon.gguf.out +0 -46
  228. package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf +0 -0
  229. package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf.inp +0 -112
  230. package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf.out +0 -46
  231. package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.inp +0 -112
  232. package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.out +0 -46
  233. package/src/llama.cpp/models/ggml-vocab-gpt-neox.gguf +0 -0
  234. package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf +0 -0
  235. package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf.inp +0 -112
  236. package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf.out +0 -46
  237. package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf +0 -0
  238. package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf.inp +0 -112
  239. package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf.out +0 -46
  240. package/src/llama.cpp/models/ggml-vocab-llama4.gguf.inp +0 -112
  241. package/src/llama.cpp/models/ggml-vocab-llama4.gguf.out +0 -46
  242. package/src/llama.cpp/models/ggml-vocab-mpt.gguf +0 -0
  243. package/src/llama.cpp/models/ggml-vocab-mpt.gguf.inp +0 -112
  244. package/src/llama.cpp/models/ggml-vocab-mpt.gguf.out +0 -46
  245. package/src/llama.cpp/models/ggml-vocab-phi-3.gguf +0 -0
  246. package/src/llama.cpp/models/ggml-vocab-phi-3.gguf.inp +0 -112
  247. package/src/llama.cpp/models/ggml-vocab-phi-3.gguf.out +0 -46
  248. package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +0 -112
  249. package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.out +0 -46
  250. package/src/llama.cpp/models/ggml-vocab-qwen2.gguf +0 -0
  251. package/src/llama.cpp/models/ggml-vocab-qwen2.gguf.inp +0 -112
  252. package/src/llama.cpp/models/ggml-vocab-qwen2.gguf.out +0 -46
  253. package/src/llama.cpp/models/ggml-vocab-refact.gguf +0 -0
  254. package/src/llama.cpp/models/ggml-vocab-refact.gguf.inp +0 -112
  255. package/src/llama.cpp/models/ggml-vocab-refact.gguf.out +0 -46
  256. package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +0 -112
  257. package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +0 -46
  258. package/src/llama.cpp/models/ggml-vocab-starcoder.gguf +0 -0
  259. package/src/llama.cpp/models/ggml-vocab-starcoder.gguf.inp +0 -112
  260. package/src/llama.cpp/models/ggml-vocab-starcoder.gguf.out +0 -46
  261. package/src/llama.cpp/pocs/CMakeLists.txt +0 -14
  262. package/src/llama.cpp/pocs/vdot/CMakeLists.txt +0 -9
  263. package/src/llama.cpp/pocs/vdot/q8dot.cpp +0 -173
  264. package/src/llama.cpp/pocs/vdot/vdot.cpp +0 -311
  265. package/src/llama.cpp/prompts/LLM-questions.txt +0 -49
  266. package/src/llama.cpp/prompts/alpaca.txt +0 -1
  267. package/src/llama.cpp/prompts/assistant.txt +0 -31
  268. package/src/llama.cpp/prompts/chat-with-baichuan.txt +0 -4
  269. package/src/llama.cpp/prompts/chat-with-bob.txt +0 -7
  270. package/src/llama.cpp/prompts/chat-with-qwen.txt +0 -1
  271. package/src/llama.cpp/prompts/chat-with-vicuna-v0.txt +0 -7
  272. package/src/llama.cpp/prompts/chat-with-vicuna-v1.txt +0 -7
  273. package/src/llama.cpp/prompts/chat.txt +0 -28
  274. package/src/llama.cpp/prompts/dan-modified.txt +0 -1
  275. package/src/llama.cpp/prompts/dan.txt +0 -1
  276. package/src/llama.cpp/prompts/mnemonics.txt +0 -93
  277. package/src/llama.cpp/prompts/parallel-questions.txt +0 -43
  278. package/src/llama.cpp/prompts/reason-act.txt +0 -18
  279. package/src/llama.cpp/requirements/requirements-all.txt +0 -15
  280. package/src/llama.cpp/requirements/requirements-compare-llama-bench.txt +0 -2
  281. package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +0 -7
  282. package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +0 -7
  283. package/src/llama.cpp/requirements/requirements-convert_legacy_llama.txt +0 -5
  284. package/src/llama.cpp/requirements/requirements-convert_llama_ggml_to_gguf.txt +0 -1
  285. package/src/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +0 -4
  286. package/src/llama.cpp/requirements/requirements-gguf_editor_gui.txt +0 -3
  287. package/src/llama.cpp/requirements/requirements-pydantic.txt +0 -3
  288. package/src/llama.cpp/requirements/requirements-test-tokenizer-random.txt +0 -1
  289. package/src/llama.cpp/requirements/requirements-tool_bench.txt +0 -12
  290. package/src/llama.cpp/requirements.txt +0 -13
  291. package/src/llama.cpp/scripts/build-info.sh +0 -30
  292. package/src/llama.cpp/scripts/install-oneapi.bat +0 -19
  293. package/src/llama.cpp/scripts/xxd.cmake +0 -16
  294. package/src/llama.cpp/tests/CMakeLists.txt +0 -177
  295. package/src/llama.cpp/tests/get-model.cpp +0 -21
  296. package/src/llama.cpp/tests/get-model.h +0 -2
  297. package/src/llama.cpp/tests/test-arg-parser.cpp +0 -178
  298. package/src/llama.cpp/tests/test-autorelease.cpp +0 -24
  299. package/src/llama.cpp/tests/test-backend-ops.cpp +0 -4793
  300. package/src/llama.cpp/tests/test-barrier.cpp +0 -94
  301. package/src/llama.cpp/tests/test-c.c +0 -7
  302. package/src/llama.cpp/tests/test-chat-template.cpp +0 -417
  303. package/src/llama.cpp/tests/test-chat.cpp +0 -985
  304. package/src/llama.cpp/tests/test-double-float.cpp +0 -57
  305. package/src/llama.cpp/tests/test-gbnf-validator.cpp +0 -109
  306. package/src/llama.cpp/tests/test-gguf.cpp +0 -1338
  307. package/src/llama.cpp/tests/test-grammar-integration.cpp +0 -1308
  308. package/src/llama.cpp/tests/test-grammar-llguidance.cpp +0 -1201
  309. package/src/llama.cpp/tests/test-grammar-parser.cpp +0 -519
  310. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +0 -1304
  311. package/src/llama.cpp/tests/test-llama-grammar.cpp +0 -408
  312. package/src/llama.cpp/tests/test-log.cpp +0 -39
  313. package/src/llama.cpp/tests/test-model-load-cancel.cpp +0 -27
  314. package/src/llama.cpp/tests/test-mtmd-c-api.c +0 -63
  315. package/src/llama.cpp/tests/test-opt.cpp +0 -904
  316. package/src/llama.cpp/tests/test-quantize-fns.cpp +0 -186
  317. package/src/llama.cpp/tests/test-quantize-perf.cpp +0 -365
  318. package/src/llama.cpp/tests/test-quantize-stats.cpp +0 -424
  319. package/src/llama.cpp/tests/test-regex-partial.cpp +0 -288
  320. package/src/llama.cpp/tests/test-rope.cpp +0 -262
  321. package/src/llama.cpp/tests/test-sampling.cpp +0 -399
  322. package/src/llama.cpp/tests/test-tokenizer-0.cpp +0 -312
  323. package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +0 -155
  324. package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +0 -125
  325. package/src/llama.cpp/tools/CMakeLists.txt +0 -39
  326. package/src/llama.cpp/tools/batched-bench/CMakeLists.txt +0 -5
  327. package/src/llama.cpp/tools/batched-bench/batched-bench.cpp +0 -204
  328. package/src/llama.cpp/tools/cvector-generator/CMakeLists.txt +0 -5
  329. package/src/llama.cpp/tools/cvector-generator/completions.txt +0 -582
  330. package/src/llama.cpp/tools/cvector-generator/cvector-generator.cpp +0 -508
  331. package/src/llama.cpp/tools/cvector-generator/mean.hpp +0 -48
  332. package/src/llama.cpp/tools/cvector-generator/negative.txt +0 -4
  333. package/src/llama.cpp/tools/cvector-generator/pca.hpp +0 -315
  334. package/src/llama.cpp/tools/cvector-generator/positive.txt +0 -4
  335. package/src/llama.cpp/tools/export-lora/CMakeLists.txt +0 -5
  336. package/src/llama.cpp/tools/export-lora/export-lora.cpp +0 -434
  337. package/src/llama.cpp/tools/gguf-split/CMakeLists.txt +0 -5
  338. package/src/llama.cpp/tools/gguf-split/gguf-split.cpp +0 -583
  339. package/src/llama.cpp/tools/imatrix/CMakeLists.txt +0 -5
  340. package/src/llama.cpp/tools/imatrix/imatrix.cpp +0 -667
  341. package/src/llama.cpp/tools/llama-bench/CMakeLists.txt +0 -5
  342. package/src/llama.cpp/tools/llama-bench/llama-bench.cpp +0 -2024
  343. package/src/llama.cpp/tools/main/CMakeLists.txt +0 -5
  344. package/src/llama.cpp/tools/main/main.cpp +0 -977
  345. package/src/llama.cpp/tools/mtmd/CMakeLists.txt +0 -58
  346. package/src/llama.cpp/tools/mtmd/clip-impl.h +0 -462
  347. package/src/llama.cpp/tools/mtmd/clip.cpp +0 -4024
  348. package/src/llama.cpp/tools/mtmd/clip.h +0 -101
  349. package/src/llama.cpp/tools/mtmd/deprecation-warning.cpp +0 -22
  350. package/src/llama.cpp/tools/mtmd/miniaudio.h +0 -93468
  351. package/src/llama.cpp/tools/mtmd/mtmd-audio.cpp +0 -855
  352. package/src/llama.cpp/tools/mtmd/mtmd-audio.h +0 -62
  353. package/src/llama.cpp/tools/mtmd/mtmd-cli.cpp +0 -377
  354. package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +0 -297
  355. package/src/llama.cpp/tools/mtmd/mtmd.cpp +0 -942
  356. package/src/llama.cpp/tools/mtmd/mtmd.h +0 -362
  357. package/src/llama.cpp/tools/mtmd/requirements.txt +0 -5
  358. package/src/llama.cpp/tools/perplexity/CMakeLists.txt +0 -5
  359. package/src/llama.cpp/tools/perplexity/perplexity.cpp +0 -2063
  360. package/src/llama.cpp/tools/quantize/CMakeLists.txt +0 -6
  361. package/src/llama.cpp/tools/quantize/quantize.cpp +0 -519
  362. package/src/llama.cpp/tools/rpc/CMakeLists.txt +0 -4
  363. package/src/llama.cpp/tools/rpc/rpc-server.cpp +0 -322
  364. package/src/llama.cpp/tools/run/CMakeLists.txt +0 -16
  365. package/src/llama.cpp/tools/run/linenoise.cpp/linenoise.cpp +0 -1995
  366. package/src/llama.cpp/tools/run/linenoise.cpp/linenoise.h +0 -137
  367. package/src/llama.cpp/tools/run/run.cpp +0 -1261
  368. package/src/llama.cpp/tools/server/CMakeLists.txt +0 -51
  369. package/src/llama.cpp/tools/server/bench/requirements.txt +0 -2
  370. package/src/llama.cpp/tools/server/httplib.h +0 -10506
  371. package/src/llama.cpp/tools/server/server.cpp +0 -4966
  372. package/src/llama.cpp/tools/server/tests/requirements.txt +0 -8
  373. package/src/llama.cpp/tools/server/utils.hpp +0 -1337
  374. package/src/llama.cpp/tools/tokenize/CMakeLists.txt +0 -5
  375. package/src/llama.cpp/tools/tokenize/tokenize.cpp +0 -416
  376. package/src/llama.cpp/tools/tts/CMakeLists.txt +0 -5
  377. package/src/llama.cpp/tools/tts/tts.cpp +0 -1092
@@ -1,583 +0,0 @@
1
- #include "ggml.h"
2
- #include "gguf.h"
3
- #include "llama.h"
4
- #include "common.h"
5
-
6
- #include <algorithm>
7
- #include <cinttypes>
8
- #include <climits>
9
- #include <cstdio>
10
- #include <cstdlib>
11
- #include <stdexcept>
12
- #include <cstring>
13
- #include <fstream>
14
- #include <string>
15
- #include <vector>
16
-
17
- #if defined(_WIN32)
18
- #include <windows.h>
19
- #ifndef PATH_MAX
20
- #define PATH_MAX MAX_PATH
21
- #endif
22
- #include <io.h>
23
- #endif
24
-
25
- enum split_operation : uint8_t {
26
- OP_NONE,
27
- OP_SPLIT,
28
- OP_MERGE,
29
- };
30
-
31
- enum split_mode : uint8_t {
32
- MODE_NONE,
33
- MODE_TENSOR,
34
- MODE_SIZE,
35
- };
36
-
37
- struct split_params {
38
- split_operation operation = OP_NONE;
39
- split_mode mode = MODE_NONE;
40
- size_t n_bytes_split = 0;
41
- int n_split_tensors = 128;
42
- std::string input;
43
- std::string output;
44
- bool no_tensor_first_split = false;
45
- bool dry_run = false;
46
- };
47
-
48
- static void split_print_usage(const char * executable) {
49
- const split_params default_params;
50
- printf("\n");
51
- printf("usage: %s [options] GGUF_IN GGUF_OUT\n", executable);
52
- printf("\n");
53
- printf("Apply a GGUF operation on IN to OUT.");
54
- printf("\n");
55
- printf("options:\n");
56
- printf(" -h, --help show this help message and exit\n");
57
- printf(" --version show version and build info\n");
58
- printf(" --split split GGUF to multiple GGUF (enabled by default)\n");
59
- printf(" --merge merge multiple GGUF to a single GGUF\n");
60
- printf(" --split-max-tensors max tensors in each split (default: %d)\n", default_params.n_split_tensors);
61
- printf(" --split-max-size N(M|G) max size per split\n");
62
- printf(" --no-tensor-first-split do not add tensors to the first split (disabled by default)\n");
63
- printf(" --dry-run only print out a split plan and exit, without writing any new files\n");
64
- printf("\n");
65
- }
66
-
67
- // return convert string, for example "128M" or "4G" to number of bytes
68
- static size_t split_str_to_n_bytes(std::string str) {
69
- size_t n_bytes = 0;
70
- int n;
71
- if (str.back() == 'M') {
72
- sscanf(str.c_str(), "%d", &n);
73
- n_bytes = (size_t)n * 1000 * 1000; // megabytes
74
- } else if (str.back() == 'G') {
75
- sscanf(str.c_str(), "%d", &n);
76
- n_bytes = (size_t)n * 1000 * 1000 * 1000; // gigabytes
77
- } else {
78
- throw std::invalid_argument("error: supported units are M (megabytes) or G (gigabytes), but got: " + std::string(1, str.back()));
79
- }
80
- if (n <= 0) {
81
- throw std::invalid_argument("error: size must be a positive value");
82
- }
83
- return n_bytes;
84
- }
85
-
86
- static void split_params_parse_ex(int argc, const char ** argv, split_params & params) {
87
- std::string arg;
88
- const std::string arg_prefix = "--";
89
- bool invalid_param = false;
90
-
91
- int arg_idx = 1;
92
- for (; arg_idx < argc && strncmp(argv[arg_idx], "--", 2) == 0; arg_idx++) {
93
- arg = argv[arg_idx];
94
- if (arg.compare(0, arg_prefix.size(), arg_prefix) == 0) {
95
- std::replace(arg.begin(), arg.end(), '_', '-');
96
- }
97
-
98
- bool arg_found = false;
99
- if (arg == "-h" || arg == "--help") {
100
- split_print_usage(argv[0]);
101
- exit(0);
102
- } else if (arg == "--version") {
103
- fprintf(stderr, "version: %d (%s)\n", LLAMA_BUILD_NUMBER, LLAMA_COMMIT);
104
- fprintf(stderr, "built with %s for %s\n", LLAMA_COMPILER, LLAMA_BUILD_TARGET);
105
- exit(0);
106
- } else if (arg == "--dry-run") {
107
- arg_found = true;
108
- params.dry_run = true;
109
- } else if (arg == "--no-tensor-first-split") {
110
- arg_found = true;
111
- params.no_tensor_first_split = true;
112
- } else if (arg == "--merge") {
113
- arg_found = true;
114
- if (params.operation != OP_NONE && params.operation != OP_MERGE) {
115
- throw std::invalid_argument("error: either --split or --merge can be specified, but not both");
116
- }
117
- params.operation = OP_MERGE;
118
- } else if (arg == "--split") {
119
- arg_found = true;
120
- if (params.operation != OP_NONE && params.operation != OP_SPLIT) {
121
- throw std::invalid_argument("error: either --split or --merge can be specified, but not both");
122
- }
123
- params.operation = OP_SPLIT;
124
- } else if (arg == "--split-max-tensors") {
125
- if (++arg_idx >= argc) {
126
- invalid_param = true;
127
- break;
128
- }
129
- arg_found = true;
130
- if (params.mode != MODE_NONE && params.mode != MODE_TENSOR) {
131
- throw std::invalid_argument("error: either --split-max-tensors or --split-max-size can be specified, but not both");
132
- }
133
- params.mode = MODE_TENSOR;
134
- params.n_split_tensors = atoi(argv[arg_idx]);
135
- } else if (arg == "--split-max-size") {
136
- if (++arg_idx >= argc) {
137
- invalid_param = true;
138
- break;
139
- }
140
- arg_found = true;
141
- if (params.mode != MODE_NONE && params.mode != MODE_SIZE) {
142
- throw std::invalid_argument("error: either --split-max-tensors or --split-max-size can be specified, but not both");
143
- }
144
- params.mode = MODE_SIZE;
145
- params.n_bytes_split = split_str_to_n_bytes(argv[arg_idx]);
146
- }
147
-
148
- if (!arg_found) {
149
- throw std::invalid_argument("error: unknown argument: " + arg);
150
- }
151
- }
152
-
153
- // the operation is split if not specified
154
- if (params.operation == OP_NONE) {
155
- params.operation = OP_SPLIT;
156
- }
157
- // the split mode is by tensor if not specified
158
- if (params.mode == MODE_NONE) {
159
- params.mode = MODE_TENSOR;
160
- }
161
-
162
- if (invalid_param) {
163
- throw std::invalid_argument("error: invalid parameter for argument: " + arg);
164
- }
165
-
166
- if (argc - arg_idx != 2) {
167
- throw std::invalid_argument("error: bad arguments");
168
- }
169
-
170
- params.input = argv[arg_idx++];
171
- params.output = argv[arg_idx++];
172
- }
173
-
174
- static bool split_params_parse(int argc, const char ** argv, split_params & params) {
175
- bool result = true;
176
- try {
177
- split_params_parse_ex(argc, argv, params);
178
- }
179
- catch (const std::invalid_argument & ex) {
180
- fprintf(stderr, "%s\n", ex.what());
181
- split_print_usage(argv[0]);
182
- exit(EXIT_FAILURE);
183
- }
184
- return result;
185
- }
186
-
187
- static void zeros(std::ofstream & file, size_t n) {
188
- char zero = 0;
189
- for (size_t i = 0; i < n; ++i) {
190
- file.write(&zero, 1);
191
- }
192
- }
193
-
194
- struct split_strategy {
195
- const split_params params;
196
- std::ifstream & f_input;
197
- struct gguf_context * ctx_gguf;
198
- struct ggml_context * ctx_meta = NULL;
199
- const int n_tensors;
200
-
201
- // one ctx_out per one output file
202
- std::vector<struct gguf_context *> ctx_outs;
203
-
204
- // temporary buffer for reading in tensor data
205
- std::vector<uint8_t> read_buf;
206
-
207
- split_strategy(const split_params & params,
208
- std::ifstream & f_input,
209
- struct gguf_context * ctx_gguf,
210
- struct ggml_context * ctx_meta) :
211
- params(params),
212
- f_input(f_input),
213
- ctx_gguf(ctx_gguf),
214
- ctx_meta(ctx_meta),
215
- n_tensors(gguf_get_n_tensors(ctx_gguf)) {
216
-
217
- // because we need to know list of tensors for each file in advance, we will build all the ctx_out for all output splits
218
- int i_split = -1;
219
- struct gguf_context * ctx_out = NULL;
220
- auto new_ctx_out = [&](bool allow_no_tensors) {
221
- i_split++;
222
- if (ctx_out != NULL) {
223
- if (gguf_get_n_tensors(ctx_out) == 0 && !allow_no_tensors) {
224
- fprintf(stderr, "error: one of splits have 0 tensors. Maybe size or tensors limit is too small\n");
225
- exit(EXIT_FAILURE);
226
- }
227
- ctx_outs.push_back(ctx_out);
228
- }
229
- ctx_out = gguf_init_empty();
230
- // Save all metadata in first split only
231
- if (i_split == 0) {
232
- gguf_set_kv(ctx_out, ctx_gguf);
233
- }
234
- gguf_set_val_u16(ctx_out, LLM_KV_SPLIT_NO, i_split);
235
- gguf_set_val_u16(ctx_out, LLM_KV_SPLIT_COUNT, 0); // placeholder
236
- gguf_set_val_i32(ctx_out, LLM_KV_SPLIT_TENSORS_COUNT, n_tensors);
237
- };
238
-
239
- // initialize ctx_out for the first split
240
- new_ctx_out(false);
241
-
242
- // skip first split if no_tensor_first_split is set
243
- if (params.no_tensor_first_split) {
244
- new_ctx_out(true);
245
- }
246
-
247
- // process tensors one by one
248
- size_t curr_tensors_size = 0; // current size by counting only tensors size (without metadata)
249
- for (int i = 0; i < n_tensors; ++i) {
250
- struct ggml_tensor * t = ggml_get_tensor(ctx_meta, gguf_get_tensor_name(ctx_gguf, i));
251
- // calculate the "imaginary" size = the current size + next tensor size
252
- size_t n_bytes = GGML_PAD(ggml_nbytes(t), GGUF_DEFAULT_ALIGNMENT);
253
- size_t next_tensors_size = curr_tensors_size + n_bytes;
254
- if (should_split(i, next_tensors_size)) {
255
- new_ctx_out(false);
256
- curr_tensors_size = n_bytes;
257
- } else {
258
- curr_tensors_size = next_tensors_size;
259
- }
260
- gguf_add_tensor(ctx_out, t);
261
- }
262
-
263
- // push the last ctx_out
264
- ctx_outs.push_back(ctx_out);
265
-
266
- // set the correct n_split for all ctx_out
267
- for (auto & ctx : ctx_outs) {
268
- gguf_set_val_u16(ctx, LLM_KV_SPLIT_COUNT, ctx_outs.size());
269
- }
270
- }
271
-
272
- ~split_strategy() {
273
- for (auto & ctx_out : ctx_outs) {
274
- gguf_free(ctx_out);
275
- }
276
- }
277
-
278
- bool should_split(int i_tensor, size_t next_size) {
279
- if (params.mode == MODE_SIZE) {
280
- // split by max size per file
281
- return next_size > params.n_bytes_split;
282
- } else if (params.mode == MODE_TENSOR) {
283
- // split by number of tensors per file
284
- return i_tensor > 0 && i_tensor < n_tensors && i_tensor % params.n_split_tensors == 0;
285
- }
286
- // should never happen
287
- GGML_ABORT("invalid mode");
288
- }
289
-
290
- void print_info() {
291
- printf("n_split: %zu\n", ctx_outs.size());
292
- int i_split = 0;
293
- for (auto & ctx_out : ctx_outs) {
294
- // re-calculate the real gguf size for each split (= metadata size + total size of all tensors)
295
- size_t total_size = gguf_get_meta_size(ctx_out);
296
- for (int i = 0; i < gguf_get_n_tensors(ctx_out); ++i) {
297
- struct ggml_tensor * t = ggml_get_tensor(ctx_meta, gguf_get_tensor_name(ctx_out, i));
298
- total_size += ggml_nbytes(t);
299
- }
300
- total_size = total_size / 1000 / 1000; // convert to megabytes
301
- printf("split %05d: n_tensors = %" PRIi64 ", total_size = %zuM\n", i_split + 1, gguf_get_n_tensors(ctx_out), total_size);
302
- i_split++;
303
- }
304
- }
305
-
306
- void write() {
307
- int i_split = 0;
308
- int n_split = ctx_outs.size();
309
- for (auto & ctx_out : ctx_outs) {
310
- // construct file path
311
- char split_path[PATH_MAX] = {0};
312
- llama_split_path(split_path, sizeof(split_path), params.output.c_str(), i_split, n_split);
313
-
314
- // open the output file
315
- printf("Writing file %s ... ", split_path);
316
- fflush(stdout);
317
- std::ofstream fout = std::ofstream(split_path, std::ios::binary);
318
- fout.exceptions(std::ofstream::failbit); // fail fast on write errors
319
-
320
- // write metadata
321
- std::vector<uint8_t> data(gguf_get_meta_size(ctx_out));
322
- gguf_get_meta_data(ctx_out, data.data());
323
- fout.write((const char *)data.data(), data.size());
324
-
325
- // write tensors
326
- for (int i = 0; i < gguf_get_n_tensors(ctx_out); ++i) {
327
- // read tensor meta and prepare buffer
328
- const char * t_name = gguf_get_tensor_name(ctx_out, i);
329
- struct ggml_tensor * t = ggml_get_tensor(ctx_meta, t_name);
330
- auto n_bytes = ggml_nbytes(t);
331
- read_buf.resize(n_bytes);
332
-
333
- // calculate offset
334
- auto i_tensor_in = gguf_find_tensor(ctx_gguf, t_name); // idx of tensor in the input file
335
- auto offset = gguf_get_data_offset(ctx_gguf) + gguf_get_tensor_offset(ctx_gguf, i_tensor_in);
336
-
337
- // copy tensor from input to output file
338
- copy_file_to_file(f_input, fout, offset, n_bytes);
339
- zeros(fout, GGML_PAD(n_bytes, GGUF_DEFAULT_ALIGNMENT) - n_bytes);
340
- }
341
-
342
- printf("done\n");
343
- // close the file
344
- fout.close();
345
- i_split++;
346
- }
347
- }
348
-
349
- void copy_file_to_file(std::ifstream & f_in, std::ofstream & f_out, const size_t in_offset, const size_t len) {
350
- // TODO: detect OS and use copy_file_range() here for better performance
351
- if (read_buf.size() < len) {
352
- read_buf.resize(len);
353
- }
354
- f_in.seekg(in_offset);
355
- f_in.read((char *)read_buf.data(), len);
356
- f_out.write((const char *)read_buf.data(), len);
357
- }
358
- };
359
-
360
- static void gguf_split(const split_params & split_params) {
361
- struct ggml_context * ctx_meta = NULL;
362
-
363
- struct gguf_init_params params = {
364
- /*.no_alloc = */ true,
365
- /*.ctx = */ &ctx_meta,
366
- };
367
-
368
- std::ifstream f_input(split_params.input.c_str(), std::ios::binary);
369
- if (!f_input.is_open()) {
370
- fprintf(stderr, "%s: failed to open input GGUF from %s\n", __func__, split_params.input.c_str());
371
- exit(EXIT_FAILURE);
372
- }
373
-
374
- auto * ctx_gguf = gguf_init_from_file(split_params.input.c_str(), params);
375
- if (!ctx_gguf) {
376
- fprintf(stderr, "%s: failed to load input GGUF from %s\n", __func__, split_params.input.c_str());
377
- exit(EXIT_FAILURE);
378
- }
379
-
380
- // prepare the strategy
381
- split_strategy strategy(split_params, f_input, ctx_gguf, ctx_meta);
382
- int n_split = strategy.ctx_outs.size();
383
- strategy.print_info();
384
-
385
- if (!split_params.dry_run) {
386
- // write all output splits
387
- strategy.write();
388
- }
389
-
390
- // done, clean up
391
- gguf_free(ctx_gguf);
392
- f_input.close();
393
-
394
- fprintf(stderr, "%s: %d gguf split written with a total of %d tensors.\n",
395
- __func__, n_split, strategy.n_tensors);
396
- }
397
-
398
- static void gguf_merge(const split_params & split_params) {
399
- fprintf(stderr, "%s: %s -> %s\n",
400
- __func__, split_params.input.c_str(),
401
- split_params.output.c_str());
402
- int n_split = 1;
403
- int total_tensors = 0;
404
-
405
- // avoid overwriting existing output file
406
- if (std::ifstream(split_params.output.c_str())) {
407
- fprintf(stderr, "%s: output file %s already exists\n", __func__, split_params.output.c_str());
408
- exit(EXIT_FAILURE);
409
- }
410
-
411
-
412
- auto * ctx_out = gguf_init_empty();
413
-
414
- std::vector<uint8_t> read_data;
415
- std::vector<ggml_context *> ctx_metas;
416
- std::vector<gguf_context *> ctx_ggufs;
417
-
418
- char split_path[PATH_MAX] = {0};
419
- strncpy(split_path, split_params.input.c_str(), sizeof(split_path) - 1);
420
- char split_prefix[PATH_MAX] = {0};
421
-
422
- // First pass to find KV and tensors metadata
423
- for (int i_split = 0; i_split < n_split; i_split++) {
424
- struct ggml_context * ctx_meta = NULL;
425
-
426
- struct gguf_init_params params = {
427
- /*.no_alloc = */ true,
428
- /*.ctx = */ &ctx_meta,
429
- };
430
-
431
- if (i_split > 0) {
432
- llama_split_path(split_path, sizeof(split_path), split_prefix, i_split, n_split);
433
- }
434
- fprintf(stderr, "%s: reading metadata %s ...", __func__, split_path);
435
-
436
- auto * ctx_gguf = gguf_init_from_file(split_path, params);
437
- if (!ctx_gguf) {
438
- fprintf(stderr, "\n%s: failed to load input GGUF from %s\n", __func__, split_params.input.c_str());
439
- exit(EXIT_FAILURE);
440
- }
441
- ctx_ggufs.push_back(ctx_gguf);
442
- ctx_metas.push_back(ctx_meta);
443
-
444
- if (i_split == 0) {
445
- auto key_n_split = gguf_find_key(ctx_gguf, LLM_KV_SPLIT_COUNT);
446
- if (key_n_split < 0) {
447
- fprintf(stderr,
448
- "\n%s: input file does not contain %s metadata\n",
449
- __func__,
450
- LLM_KV_SPLIT_COUNT);
451
- gguf_free(ctx_gguf);
452
- ggml_free(ctx_meta);
453
- gguf_free(ctx_out);
454
- exit(EXIT_FAILURE);
455
- }
456
-
457
- n_split = gguf_get_val_u16(ctx_gguf, key_n_split);
458
- if (n_split < 1) {
459
- fprintf(stderr,
460
- "\n%s: input file does not contain a valid split count %d\n",
461
- __func__,
462
- n_split);
463
- gguf_free(ctx_gguf);
464
- ggml_free(ctx_meta);
465
- gguf_free(ctx_out);
466
- exit(EXIT_FAILURE);
467
- }
468
-
469
- // Verify the file naming and extract split_prefix
470
- if (!llama_split_prefix(split_prefix, sizeof (split_prefix), split_path, i_split, n_split)) {
471
- fprintf(stderr, "\n%s: unexpected input file name: %s"
472
- " i_split=%d"
473
- " n_split=%d\n", __func__,
474
- split_path, i_split, n_split);
475
- gguf_free(ctx_gguf);
476
- ggml_free(ctx_meta);
477
- gguf_free(ctx_out);
478
- exit(EXIT_FAILURE);
479
- }
480
-
481
- // Do not trigger merge if we try to merge again the output
482
- gguf_set_val_u16(ctx_gguf, LLM_KV_SPLIT_COUNT, 0);
483
-
484
- // Set metadata from the first split
485
- gguf_set_kv(ctx_out, ctx_gguf);
486
- }
487
-
488
- auto n_tensors = gguf_get_n_tensors(ctx_gguf);
489
- for (int i_tensor = 0; i_tensor < n_tensors; i_tensor++) {
490
- const char * t_name = gguf_get_tensor_name(ctx_gguf, i_tensor);
491
- struct ggml_tensor * t = ggml_get_tensor(ctx_meta, t_name);
492
- gguf_add_tensor(ctx_out, t);
493
- }
494
- total_tensors += n_tensors;
495
-
496
- fprintf(stderr, "\033[3Ddone\n");
497
- }
498
- std::ofstream fout;
499
- if (!split_params.dry_run) {
500
- fout.open(split_params.output.c_str(), std::ios::binary);
501
- fout.exceptions(std::ofstream::failbit); // fail fast on write errors
502
- // placeholder for the meta data
503
- auto meta_size = gguf_get_meta_size(ctx_out);
504
- ::zeros(fout, meta_size);
505
- }
506
-
507
- // Write tensors data
508
- for (int i_split = 0; i_split < n_split; i_split++) {
509
- llama_split_path(split_path, sizeof(split_path), split_prefix, i_split, n_split);
510
- std::ifstream f_input(split_path, std::ios::binary);
511
- if (!f_input.is_open()) {
512
- fprintf(stderr, "%s: failed to open input GGUF from %s\n", __func__, split_path);
513
- for (uint32_t i = 0; i < ctx_ggufs.size(); i++) {
514
- gguf_free(ctx_ggufs[i]);
515
- ggml_free(ctx_metas[i]);
516
- }
517
- gguf_free(ctx_out);
518
- if (!split_params.dry_run) {
519
- fout.close();
520
- }
521
- exit(EXIT_FAILURE);
522
- }
523
- fprintf(stderr, "%s: writing tensors %s ...", __func__, split_path);
524
-
525
- auto * ctx_gguf = ctx_ggufs[i_split];
526
- auto * ctx_meta = ctx_metas[i_split];
527
-
528
- auto n_tensors = gguf_get_n_tensors(ctx_gguf);
529
- for (int i_tensor = 0; i_tensor < n_tensors; i_tensor++) {
530
- const char * t_name = gguf_get_tensor_name(ctx_gguf, i_tensor);
531
- struct ggml_tensor * t = ggml_get_tensor(ctx_meta, t_name);
532
-
533
- auto n_bytes = ggml_nbytes(t);
534
-
535
- if (read_data.size() < n_bytes) {
536
- read_data.resize(n_bytes);
537
- }
538
-
539
- auto offset = gguf_get_data_offset(ctx_gguf) + gguf_get_tensor_offset(ctx_gguf, i_tensor);
540
- f_input.seekg(offset);
541
- f_input.read((char *)read_data.data(), n_bytes);
542
- if (!split_params.dry_run) {
543
- // write tensor data + padding
544
- fout.write((const char *)read_data.data(), n_bytes);
545
- zeros(fout, GGML_PAD(n_bytes, GGUF_DEFAULT_ALIGNMENT) - n_bytes);
546
- }
547
- }
548
-
549
- gguf_free(ctx_gguf);
550
- ggml_free(ctx_meta);
551
- f_input.close();
552
- fprintf(stderr, "\033[3Ddone\n");
553
- }
554
-
555
- if (!split_params.dry_run) {
556
- // go back to beginning of file and write the updated metadata
557
- fout.seekp(0);
558
- std::vector<uint8_t> data(gguf_get_meta_size(ctx_out));
559
- gguf_get_meta_data(ctx_out, data.data());
560
- fout.write((const char *)data.data(), data.size());
561
- fout.close();
562
- }
563
- gguf_free(ctx_out);
564
-
565
- fprintf(stderr, "%s: %s merged from %d split with %d tensors.\n",
566
- __func__, split_params.output.c_str(), n_split, total_tensors);
567
- }
568
-
569
- int main(int argc, const char ** argv) {
570
- split_params params;
571
- split_params_parse(argc, argv, params);
572
-
573
- switch (params.operation) {
574
- case OP_SPLIT: gguf_split(params);
575
- break;
576
- case OP_MERGE: gguf_merge(params);
577
- break;
578
- default: split_print_usage(argv[0]);
579
- exit(EXIT_FAILURE);
580
- }
581
-
582
- return 0;
583
- }
@@ -1,5 +0,0 @@
1
- set(TARGET llama-imatrix)
2
- add_executable(${TARGET} imatrix.cpp)
3
- install(TARGETS ${TARGET} RUNTIME)
4
- target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
5
- target_compile_features(${TARGET} PRIVATE cxx_std_17)