@fugood/llama.node 0.6.2 → 1.0.0-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (378) hide show
  1. package/CMakeLists.txt +40 -30
  2. package/README.md +4 -1
  3. package/lib/binding.js +41 -29
  4. package/lib/binding.ts +26 -25
  5. package/package.json +45 -10
  6. package/scripts/build.js +47 -0
  7. package/scripts/llama.cpp.patch +109 -0
  8. package/src/anyascii.c +22223 -0
  9. package/src/anyascii.h +42 -0
  10. package/src/tts_utils.cpp +20 -7
  11. package/src/tts_utils.h +2 -0
  12. package/bin/darwin/arm64/llama-node.node +0 -0
  13. package/bin/darwin/x64/llama-node.node +0 -0
  14. package/bin/linux/arm64/llama-node.node +0 -0
  15. package/bin/linux/x64/llama-node.node +0 -0
  16. package/bin/linux-cuda/arm64/llama-node.node +0 -0
  17. package/bin/linux-cuda/x64/llama-node.node +0 -0
  18. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  19. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  20. package/bin/win32/x64/llama-node.node +0 -0
  21. package/bin/win32/x64/node.lib +0 -0
  22. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  23. package/bin/win32-vulkan/arm64/node.lib +0 -0
  24. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  25. package/bin/win32-vulkan/x64/node.lib +0 -0
  26. package/patches/node-api-headers+1.1.0.patch +0 -26
  27. package/src/llama.cpp/.github/workflows/build-linux-cross.yml +0 -233
  28. package/src/llama.cpp/.github/workflows/build.yml +0 -1078
  29. package/src/llama.cpp/.github/workflows/close-issue.yml +0 -28
  30. package/src/llama.cpp/.github/workflows/docker.yml +0 -178
  31. package/src/llama.cpp/.github/workflows/editorconfig.yml +0 -29
  32. package/src/llama.cpp/.github/workflows/gguf-publish.yml +0 -44
  33. package/src/llama.cpp/.github/workflows/labeler.yml +0 -17
  34. package/src/llama.cpp/.github/workflows/python-check-requirements.yml +0 -33
  35. package/src/llama.cpp/.github/workflows/python-lint.yml +0 -30
  36. package/src/llama.cpp/.github/workflows/python-type-check.yml +0 -40
  37. package/src/llama.cpp/.github/workflows/release.yml +0 -739
  38. package/src/llama.cpp/.github/workflows/server.yml +0 -237
  39. package/src/llama.cpp/.github/workflows/winget.yml +0 -42
  40. package/src/llama.cpp/cmake/arm64-apple-clang.cmake +0 -16
  41. package/src/llama.cpp/cmake/arm64-windows-llvm.cmake +0 -16
  42. package/src/llama.cpp/cmake/build-info.cmake +0 -64
  43. package/src/llama.cpp/cmake/common.cmake +0 -35
  44. package/src/llama.cpp/cmake/git-vars.cmake +0 -22
  45. package/src/llama.cpp/cmake/x64-windows-llvm.cmake +0 -5
  46. package/src/llama.cpp/common/build-info.cpp.in +0 -4
  47. package/src/llama.cpp/docs/build.md +0 -561
  48. package/src/llama.cpp/examples/CMakeLists.txt +0 -43
  49. package/src/llama.cpp/examples/batched/CMakeLists.txt +0 -5
  50. package/src/llama.cpp/examples/batched/batched.cpp +0 -246
  51. package/src/llama.cpp/examples/chat-13B.bat +0 -57
  52. package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +0 -5
  53. package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +0 -941
  54. package/src/llama.cpp/examples/deprecation-warning/deprecation-warning.cpp +0 -35
  55. package/src/llama.cpp/examples/embedding/CMakeLists.txt +0 -5
  56. package/src/llama.cpp/examples/embedding/embedding.cpp +0 -323
  57. package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +0 -10
  58. package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +0 -194
  59. package/src/llama.cpp/examples/gen-docs/CMakeLists.txt +0 -5
  60. package/src/llama.cpp/examples/gen-docs/gen-docs.cpp +0 -83
  61. package/src/llama.cpp/examples/gguf/CMakeLists.txt +0 -5
  62. package/src/llama.cpp/examples/gguf/gguf.cpp +0 -265
  63. package/src/llama.cpp/examples/gguf-hash/CMakeLists.txt +0 -22
  64. package/src/llama.cpp/examples/gguf-hash/deps/rotate-bits/rotate-bits.h +0 -46
  65. package/src/llama.cpp/examples/gguf-hash/deps/sha1/sha1.c +0 -295
  66. package/src/llama.cpp/examples/gguf-hash/deps/sha1/sha1.h +0 -52
  67. package/src/llama.cpp/examples/gguf-hash/deps/sha256/sha256.c +0 -221
  68. package/src/llama.cpp/examples/gguf-hash/deps/sha256/sha256.h +0 -24
  69. package/src/llama.cpp/examples/gguf-hash/deps/xxhash/xxhash.c +0 -42
  70. package/src/llama.cpp/examples/gguf-hash/deps/xxhash/xxhash.h +0 -7093
  71. package/src/llama.cpp/examples/gguf-hash/gguf-hash.cpp +0 -694
  72. package/src/llama.cpp/examples/gritlm/CMakeLists.txt +0 -5
  73. package/src/llama.cpp/examples/gritlm/gritlm.cpp +0 -229
  74. package/src/llama.cpp/examples/jeopardy/questions.txt +0 -100
  75. package/src/llama.cpp/examples/llama.android/app/build.gradle.kts +0 -65
  76. package/src/llama.cpp/examples/llama.android/build.gradle.kts +0 -6
  77. package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +0 -71
  78. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/CMakeLists.txt +0 -53
  79. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +0 -452
  80. package/src/llama.cpp/examples/llama.android/settings.gradle.kts +0 -18
  81. package/src/llama.cpp/examples/lookahead/CMakeLists.txt +0 -5
  82. package/src/llama.cpp/examples/lookahead/lookahead.cpp +0 -472
  83. package/src/llama.cpp/examples/lookup/CMakeLists.txt +0 -23
  84. package/src/llama.cpp/examples/lookup/lookup-create.cpp +0 -40
  85. package/src/llama.cpp/examples/lookup/lookup-merge.cpp +0 -47
  86. package/src/llama.cpp/examples/lookup/lookup-stats.cpp +0 -157
  87. package/src/llama.cpp/examples/lookup/lookup.cpp +0 -242
  88. package/src/llama.cpp/examples/parallel/CMakeLists.txt +0 -5
  89. package/src/llama.cpp/examples/parallel/parallel.cpp +0 -492
  90. package/src/llama.cpp/examples/passkey/CMakeLists.txt +0 -5
  91. package/src/llama.cpp/examples/passkey/passkey.cpp +0 -277
  92. package/src/llama.cpp/examples/retrieval/CMakeLists.txt +0 -5
  93. package/src/llama.cpp/examples/retrieval/retrieval.cpp +0 -304
  94. package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +0 -5
  95. package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +0 -246
  96. package/src/llama.cpp/examples/simple/CMakeLists.txt +0 -5
  97. package/src/llama.cpp/examples/simple/simple.cpp +0 -206
  98. package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +0 -5
  99. package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +0 -206
  100. package/src/llama.cpp/examples/simple-cmake-pkg/CMakeLists.txt +0 -11
  101. package/src/llama.cpp/examples/speculative/CMakeLists.txt +0 -5
  102. package/src/llama.cpp/examples/speculative/speculative.cpp +0 -644
  103. package/src/llama.cpp/examples/speculative-simple/CMakeLists.txt +0 -5
  104. package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +0 -261
  105. package/src/llama.cpp/examples/sycl/CMakeLists.txt +0 -9
  106. package/src/llama.cpp/examples/sycl/build.sh +0 -23
  107. package/src/llama.cpp/examples/sycl/ls-sycl-device.cpp +0 -13
  108. package/src/llama.cpp/examples/sycl/run-llama2.sh +0 -27
  109. package/src/llama.cpp/examples/sycl/run-llama3.sh +0 -28
  110. package/src/llama.cpp/examples/sycl/win-build-sycl.bat +0 -33
  111. package/src/llama.cpp/examples/sycl/win-run-llama2.bat +0 -9
  112. package/src/llama.cpp/examples/sycl/win-run-llama3.bat +0 -9
  113. package/src/llama.cpp/examples/training/CMakeLists.txt +0 -5
  114. package/src/llama.cpp/examples/training/finetune.cpp +0 -96
  115. package/src/llama.cpp/ggml/cmake/GitVars.cmake +0 -22
  116. package/src/llama.cpp/ggml/cmake/common.cmake +0 -26
  117. package/src/llama.cpp/ggml/src/ggml-alloc.c +0 -1042
  118. package/src/llama.cpp/ggml/src/ggml-backend-impl.h +0 -255
  119. package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +0 -586
  120. package/src/llama.cpp/ggml/src/ggml-backend.cpp +0 -2008
  121. package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +0 -87
  122. package/src/llama.cpp/ggml/src/ggml-blas/ggml-blas.cpp +0 -517
  123. package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +0 -74
  124. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +0 -179
  125. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +0 -258
  126. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +0 -2863
  127. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +0 -1110
  128. package/src/llama.cpp/ggml/src/ggml-cann/common.h +0 -420
  129. package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +0 -2570
  130. package/src/llama.cpp/ggml/src/ggml-common.h +0 -1857
  131. package/src/llama.cpp/ggml/src/ggml-cpu/cmake/FindSIMD.cmake +0 -100
  132. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +0 -184
  133. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h +0 -15
  134. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +0 -243
  135. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +0 -140
  136. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +0 -131
  137. package/src/llama.cpp/ggml/src/ggml-impl.h +0 -601
  138. package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +0 -166
  139. package/src/llama.cpp/ggml/src/ggml-kompute/ggml-kompute.cpp +0 -2251
  140. package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +0 -120
  141. package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +0 -622
  142. package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +0 -113
  143. package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +0 -96
  144. package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +0 -5124
  145. package/src/llama.cpp/ggml/src/ggml-opt.cpp +0 -1037
  146. package/src/llama.cpp/ggml/src/ggml-quants.c +0 -5232
  147. package/src/llama.cpp/ggml/src/ggml-quants.h +0 -100
  148. package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +0 -9
  149. package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +0 -1813
  150. package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +0 -189
  151. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +0 -37
  152. package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +0 -239
  153. package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.hpp +0 -39
  154. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +0 -83
  155. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +0 -493
  156. package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +0 -197
  157. package/src/llama.cpp/ggml/src/ggml-sycl/concat.hpp +0 -20
  158. package/src/llama.cpp/ggml/src/ggml-sycl/conv.cpp +0 -100
  159. package/src/llama.cpp/ggml/src/ggml-sycl/conv.hpp +0 -20
  160. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +0 -623
  161. package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +0 -34
  162. package/src/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +0 -701
  163. package/src/llama.cpp/ggml/src/ggml-sycl/cpy.hpp +0 -11
  164. package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +0 -791
  165. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +0 -1160
  166. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.hpp +0 -27
  167. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +0 -2957
  168. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +0 -1536
  169. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +0 -75
  170. package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +0 -99
  171. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +0 -311
  172. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +0 -20
  173. package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +0 -4443
  174. package/src/llama.cpp/ggml/src/ggml-sycl/gla.cpp +0 -105
  175. package/src/llama.cpp/ggml/src/ggml-sycl/gla.hpp +0 -8
  176. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +0 -136
  177. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +0 -21
  178. package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +0 -3030
  179. package/src/llama.cpp/ggml/src/ggml-sycl/mmq.hpp +0 -33
  180. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +0 -1108
  181. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.hpp +0 -27
  182. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +0 -474
  183. package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +0 -26
  184. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +0 -46
  185. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +0 -10
  186. package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +0 -74
  187. package/src/llama.cpp/ggml/src/ggml-sycl/quants.hpp +0 -83
  188. package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +0 -362
  189. package/src/llama.cpp/ggml/src/ggml-sycl/rope.hpp +0 -20
  190. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +0 -264
  191. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.hpp +0 -20
  192. package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.cpp +0 -13
  193. package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.hpp +0 -23
  194. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +0 -73
  195. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.hpp +0 -20
  196. package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +0 -1215
  197. package/src/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +0 -305
  198. package/src/llama.cpp/ggml/src/ggml-sycl/wkv.hpp +0 -10
  199. package/src/llama.cpp/ggml/src/ggml-threading.cpp +0 -12
  200. package/src/llama.cpp/ggml/src/ggml-threading.h +0 -14
  201. package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +0 -196
  202. package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +0 -10699
  203. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +0 -39
  204. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +0 -751
  205. package/src/llama.cpp/ggml/src/ggml.c +0 -6550
  206. package/src/llama.cpp/ggml/src/gguf.cpp +0 -1330
  207. package/src/llama.cpp/models/.editorconfig +0 -1
  208. package/src/llama.cpp/models/ggml-vocab-aquila.gguf +0 -0
  209. package/src/llama.cpp/models/ggml-vocab-baichuan.gguf +0 -0
  210. package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf +0 -0
  211. package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf.inp +0 -112
  212. package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf.out +0 -46
  213. package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +0 -112
  214. package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.out +0 -46
  215. package/src/llama.cpp/models/ggml-vocab-command-r.gguf +0 -0
  216. package/src/llama.cpp/models/ggml-vocab-command-r.gguf.inp +0 -112
  217. package/src/llama.cpp/models/ggml-vocab-command-r.gguf.out +0 -46
  218. package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf +0 -0
  219. package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.inp +0 -112
  220. package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.out +0 -46
  221. package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf +0 -0
  222. package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.inp +0 -112
  223. package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.out +0 -46
  224. package/src/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.inp +0 -112
  225. package/src/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.out +0 -46
  226. package/src/llama.cpp/models/ggml-vocab-falcon.gguf +0 -0
  227. package/src/llama.cpp/models/ggml-vocab-falcon.gguf.inp +0 -112
  228. package/src/llama.cpp/models/ggml-vocab-falcon.gguf.out +0 -46
  229. package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf +0 -0
  230. package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf.inp +0 -112
  231. package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf.out +0 -46
  232. package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.inp +0 -112
  233. package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.out +0 -46
  234. package/src/llama.cpp/models/ggml-vocab-gpt-neox.gguf +0 -0
  235. package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf +0 -0
  236. package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf.inp +0 -112
  237. package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf.out +0 -46
  238. package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf +0 -0
  239. package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf.inp +0 -112
  240. package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf.out +0 -46
  241. package/src/llama.cpp/models/ggml-vocab-llama4.gguf.inp +0 -112
  242. package/src/llama.cpp/models/ggml-vocab-llama4.gguf.out +0 -46
  243. package/src/llama.cpp/models/ggml-vocab-mpt.gguf +0 -0
  244. package/src/llama.cpp/models/ggml-vocab-mpt.gguf.inp +0 -112
  245. package/src/llama.cpp/models/ggml-vocab-mpt.gguf.out +0 -46
  246. package/src/llama.cpp/models/ggml-vocab-phi-3.gguf +0 -0
  247. package/src/llama.cpp/models/ggml-vocab-phi-3.gguf.inp +0 -112
  248. package/src/llama.cpp/models/ggml-vocab-phi-3.gguf.out +0 -46
  249. package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +0 -112
  250. package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.out +0 -46
  251. package/src/llama.cpp/models/ggml-vocab-qwen2.gguf +0 -0
  252. package/src/llama.cpp/models/ggml-vocab-qwen2.gguf.inp +0 -112
  253. package/src/llama.cpp/models/ggml-vocab-qwen2.gguf.out +0 -46
  254. package/src/llama.cpp/models/ggml-vocab-refact.gguf +0 -0
  255. package/src/llama.cpp/models/ggml-vocab-refact.gguf.inp +0 -112
  256. package/src/llama.cpp/models/ggml-vocab-refact.gguf.out +0 -46
  257. package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +0 -112
  258. package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +0 -46
  259. package/src/llama.cpp/models/ggml-vocab-starcoder.gguf +0 -0
  260. package/src/llama.cpp/models/ggml-vocab-starcoder.gguf.inp +0 -112
  261. package/src/llama.cpp/models/ggml-vocab-starcoder.gguf.out +0 -46
  262. package/src/llama.cpp/pocs/CMakeLists.txt +0 -14
  263. package/src/llama.cpp/pocs/vdot/CMakeLists.txt +0 -9
  264. package/src/llama.cpp/pocs/vdot/q8dot.cpp +0 -173
  265. package/src/llama.cpp/pocs/vdot/vdot.cpp +0 -311
  266. package/src/llama.cpp/prompts/LLM-questions.txt +0 -49
  267. package/src/llama.cpp/prompts/alpaca.txt +0 -1
  268. package/src/llama.cpp/prompts/assistant.txt +0 -31
  269. package/src/llama.cpp/prompts/chat-with-baichuan.txt +0 -4
  270. package/src/llama.cpp/prompts/chat-with-bob.txt +0 -7
  271. package/src/llama.cpp/prompts/chat-with-qwen.txt +0 -1
  272. package/src/llama.cpp/prompts/chat-with-vicuna-v0.txt +0 -7
  273. package/src/llama.cpp/prompts/chat-with-vicuna-v1.txt +0 -7
  274. package/src/llama.cpp/prompts/chat.txt +0 -28
  275. package/src/llama.cpp/prompts/dan-modified.txt +0 -1
  276. package/src/llama.cpp/prompts/dan.txt +0 -1
  277. package/src/llama.cpp/prompts/mnemonics.txt +0 -93
  278. package/src/llama.cpp/prompts/parallel-questions.txt +0 -43
  279. package/src/llama.cpp/prompts/reason-act.txt +0 -18
  280. package/src/llama.cpp/requirements/requirements-all.txt +0 -15
  281. package/src/llama.cpp/requirements/requirements-compare-llama-bench.txt +0 -2
  282. package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +0 -7
  283. package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +0 -7
  284. package/src/llama.cpp/requirements/requirements-convert_legacy_llama.txt +0 -5
  285. package/src/llama.cpp/requirements/requirements-convert_llama_ggml_to_gguf.txt +0 -1
  286. package/src/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +0 -4
  287. package/src/llama.cpp/requirements/requirements-gguf_editor_gui.txt +0 -3
  288. package/src/llama.cpp/requirements/requirements-pydantic.txt +0 -3
  289. package/src/llama.cpp/requirements/requirements-test-tokenizer-random.txt +0 -1
  290. package/src/llama.cpp/requirements/requirements-tool_bench.txt +0 -12
  291. package/src/llama.cpp/requirements.txt +0 -13
  292. package/src/llama.cpp/scripts/build-info.sh +0 -30
  293. package/src/llama.cpp/scripts/install-oneapi.bat +0 -19
  294. package/src/llama.cpp/scripts/xxd.cmake +0 -16
  295. package/src/llama.cpp/tests/CMakeLists.txt +0 -177
  296. package/src/llama.cpp/tests/get-model.cpp +0 -21
  297. package/src/llama.cpp/tests/get-model.h +0 -2
  298. package/src/llama.cpp/tests/test-arg-parser.cpp +0 -178
  299. package/src/llama.cpp/tests/test-autorelease.cpp +0 -24
  300. package/src/llama.cpp/tests/test-backend-ops.cpp +0 -4793
  301. package/src/llama.cpp/tests/test-barrier.cpp +0 -94
  302. package/src/llama.cpp/tests/test-c.c +0 -7
  303. package/src/llama.cpp/tests/test-chat-template.cpp +0 -417
  304. package/src/llama.cpp/tests/test-chat.cpp +0 -985
  305. package/src/llama.cpp/tests/test-double-float.cpp +0 -57
  306. package/src/llama.cpp/tests/test-gbnf-validator.cpp +0 -109
  307. package/src/llama.cpp/tests/test-gguf.cpp +0 -1338
  308. package/src/llama.cpp/tests/test-grammar-integration.cpp +0 -1308
  309. package/src/llama.cpp/tests/test-grammar-llguidance.cpp +0 -1201
  310. package/src/llama.cpp/tests/test-grammar-parser.cpp +0 -519
  311. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +0 -1304
  312. package/src/llama.cpp/tests/test-llama-grammar.cpp +0 -408
  313. package/src/llama.cpp/tests/test-log.cpp +0 -39
  314. package/src/llama.cpp/tests/test-model-load-cancel.cpp +0 -27
  315. package/src/llama.cpp/tests/test-mtmd-c-api.c +0 -63
  316. package/src/llama.cpp/tests/test-opt.cpp +0 -904
  317. package/src/llama.cpp/tests/test-quantize-fns.cpp +0 -186
  318. package/src/llama.cpp/tests/test-quantize-perf.cpp +0 -365
  319. package/src/llama.cpp/tests/test-quantize-stats.cpp +0 -424
  320. package/src/llama.cpp/tests/test-regex-partial.cpp +0 -288
  321. package/src/llama.cpp/tests/test-rope.cpp +0 -262
  322. package/src/llama.cpp/tests/test-sampling.cpp +0 -399
  323. package/src/llama.cpp/tests/test-tokenizer-0.cpp +0 -312
  324. package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +0 -155
  325. package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +0 -125
  326. package/src/llama.cpp/tools/CMakeLists.txt +0 -39
  327. package/src/llama.cpp/tools/batched-bench/CMakeLists.txt +0 -5
  328. package/src/llama.cpp/tools/batched-bench/batched-bench.cpp +0 -204
  329. package/src/llama.cpp/tools/cvector-generator/CMakeLists.txt +0 -5
  330. package/src/llama.cpp/tools/cvector-generator/completions.txt +0 -582
  331. package/src/llama.cpp/tools/cvector-generator/cvector-generator.cpp +0 -508
  332. package/src/llama.cpp/tools/cvector-generator/mean.hpp +0 -48
  333. package/src/llama.cpp/tools/cvector-generator/negative.txt +0 -4
  334. package/src/llama.cpp/tools/cvector-generator/pca.hpp +0 -315
  335. package/src/llama.cpp/tools/cvector-generator/positive.txt +0 -4
  336. package/src/llama.cpp/tools/export-lora/CMakeLists.txt +0 -5
  337. package/src/llama.cpp/tools/export-lora/export-lora.cpp +0 -434
  338. package/src/llama.cpp/tools/gguf-split/CMakeLists.txt +0 -5
  339. package/src/llama.cpp/tools/gguf-split/gguf-split.cpp +0 -583
  340. package/src/llama.cpp/tools/imatrix/CMakeLists.txt +0 -5
  341. package/src/llama.cpp/tools/imatrix/imatrix.cpp +0 -667
  342. package/src/llama.cpp/tools/llama-bench/CMakeLists.txt +0 -5
  343. package/src/llama.cpp/tools/llama-bench/llama-bench.cpp +0 -2024
  344. package/src/llama.cpp/tools/main/CMakeLists.txt +0 -5
  345. package/src/llama.cpp/tools/main/main.cpp +0 -977
  346. package/src/llama.cpp/tools/mtmd/CMakeLists.txt +0 -58
  347. package/src/llama.cpp/tools/mtmd/clip-impl.h +0 -462
  348. package/src/llama.cpp/tools/mtmd/clip.cpp +0 -4024
  349. package/src/llama.cpp/tools/mtmd/clip.h +0 -101
  350. package/src/llama.cpp/tools/mtmd/deprecation-warning.cpp +0 -22
  351. package/src/llama.cpp/tools/mtmd/miniaudio.h +0 -93468
  352. package/src/llama.cpp/tools/mtmd/mtmd-audio.cpp +0 -855
  353. package/src/llama.cpp/tools/mtmd/mtmd-audio.h +0 -62
  354. package/src/llama.cpp/tools/mtmd/mtmd-cli.cpp +0 -377
  355. package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +0 -297
  356. package/src/llama.cpp/tools/mtmd/mtmd.cpp +0 -942
  357. package/src/llama.cpp/tools/mtmd/mtmd.h +0 -362
  358. package/src/llama.cpp/tools/mtmd/requirements.txt +0 -5
  359. package/src/llama.cpp/tools/perplexity/CMakeLists.txt +0 -5
  360. package/src/llama.cpp/tools/perplexity/perplexity.cpp +0 -2063
  361. package/src/llama.cpp/tools/quantize/CMakeLists.txt +0 -6
  362. package/src/llama.cpp/tools/quantize/quantize.cpp +0 -519
  363. package/src/llama.cpp/tools/rpc/CMakeLists.txt +0 -4
  364. package/src/llama.cpp/tools/rpc/rpc-server.cpp +0 -322
  365. package/src/llama.cpp/tools/run/CMakeLists.txt +0 -16
  366. package/src/llama.cpp/tools/run/linenoise.cpp/linenoise.cpp +0 -1995
  367. package/src/llama.cpp/tools/run/linenoise.cpp/linenoise.h +0 -137
  368. package/src/llama.cpp/tools/run/run.cpp +0 -1261
  369. package/src/llama.cpp/tools/server/CMakeLists.txt +0 -51
  370. package/src/llama.cpp/tools/server/bench/requirements.txt +0 -2
  371. package/src/llama.cpp/tools/server/httplib.h +0 -10506
  372. package/src/llama.cpp/tools/server/server.cpp +0 -4966
  373. package/src/llama.cpp/tools/server/tests/requirements.txt +0 -8
  374. package/src/llama.cpp/tools/server/utils.hpp +0 -1337
  375. package/src/llama.cpp/tools/tokenize/CMakeLists.txt +0 -5
  376. package/src/llama.cpp/tools/tokenize/tokenize.cpp +0 -416
  377. package/src/llama.cpp/tools/tts/CMakeLists.txt +0 -5
  378. package/src/llama.cpp/tools/tts/tts.cpp +0 -1092
@@ -1,583 +0,0 @@
1
- #include "ggml.h"
2
- #include "gguf.h"
3
- #include "llama.h"
4
- #include "common.h"
5
-
6
- #include <algorithm>
7
- #include <cinttypes>
8
- #include <climits>
9
- #include <cstdio>
10
- #include <cstdlib>
11
- #include <stdexcept>
12
- #include <cstring>
13
- #include <fstream>
14
- #include <string>
15
- #include <vector>
16
-
17
- #if defined(_WIN32)
18
- #include <windows.h>
19
- #ifndef PATH_MAX
20
- #define PATH_MAX MAX_PATH
21
- #endif
22
- #include <io.h>
23
- #endif
24
-
25
- enum split_operation : uint8_t {
26
- OP_NONE,
27
- OP_SPLIT,
28
- OP_MERGE,
29
- };
30
-
31
- enum split_mode : uint8_t {
32
- MODE_NONE,
33
- MODE_TENSOR,
34
- MODE_SIZE,
35
- };
36
-
37
- struct split_params {
38
- split_operation operation = OP_NONE;
39
- split_mode mode = MODE_NONE;
40
- size_t n_bytes_split = 0;
41
- int n_split_tensors = 128;
42
- std::string input;
43
- std::string output;
44
- bool no_tensor_first_split = false;
45
- bool dry_run = false;
46
- };
47
-
48
- static void split_print_usage(const char * executable) {
49
- const split_params default_params;
50
- printf("\n");
51
- printf("usage: %s [options] GGUF_IN GGUF_OUT\n", executable);
52
- printf("\n");
53
- printf("Apply a GGUF operation on IN to OUT.");
54
- printf("\n");
55
- printf("options:\n");
56
- printf(" -h, --help show this help message and exit\n");
57
- printf(" --version show version and build info\n");
58
- printf(" --split split GGUF to multiple GGUF (enabled by default)\n");
59
- printf(" --merge merge multiple GGUF to a single GGUF\n");
60
- printf(" --split-max-tensors max tensors in each split (default: %d)\n", default_params.n_split_tensors);
61
- printf(" --split-max-size N(M|G) max size per split\n");
62
- printf(" --no-tensor-first-split do not add tensors to the first split (disabled by default)\n");
63
- printf(" --dry-run only print out a split plan and exit, without writing any new files\n");
64
- printf("\n");
65
- }
66
-
67
- // return convert string, for example "128M" or "4G" to number of bytes
68
- static size_t split_str_to_n_bytes(std::string str) {
69
- size_t n_bytes = 0;
70
- int n;
71
- if (str.back() == 'M') {
72
- sscanf(str.c_str(), "%d", &n);
73
- n_bytes = (size_t)n * 1000 * 1000; // megabytes
74
- } else if (str.back() == 'G') {
75
- sscanf(str.c_str(), "%d", &n);
76
- n_bytes = (size_t)n * 1000 * 1000 * 1000; // gigabytes
77
- } else {
78
- throw std::invalid_argument("error: supported units are M (megabytes) or G (gigabytes), but got: " + std::string(1, str.back()));
79
- }
80
- if (n <= 0) {
81
- throw std::invalid_argument("error: size must be a positive value");
82
- }
83
- return n_bytes;
84
- }
85
-
86
- static void split_params_parse_ex(int argc, const char ** argv, split_params & params) {
87
- std::string arg;
88
- const std::string arg_prefix = "--";
89
- bool invalid_param = false;
90
-
91
- int arg_idx = 1;
92
- for (; arg_idx < argc && strncmp(argv[arg_idx], "--", 2) == 0; arg_idx++) {
93
- arg = argv[arg_idx];
94
- if (arg.compare(0, arg_prefix.size(), arg_prefix) == 0) {
95
- std::replace(arg.begin(), arg.end(), '_', '-');
96
- }
97
-
98
- bool arg_found = false;
99
- if (arg == "-h" || arg == "--help") {
100
- split_print_usage(argv[0]);
101
- exit(0);
102
- } else if (arg == "--version") {
103
- fprintf(stderr, "version: %d (%s)\n", LLAMA_BUILD_NUMBER, LLAMA_COMMIT);
104
- fprintf(stderr, "built with %s for %s\n", LLAMA_COMPILER, LLAMA_BUILD_TARGET);
105
- exit(0);
106
- } else if (arg == "--dry-run") {
107
- arg_found = true;
108
- params.dry_run = true;
109
- } else if (arg == "--no-tensor-first-split") {
110
- arg_found = true;
111
- params.no_tensor_first_split = true;
112
- } else if (arg == "--merge") {
113
- arg_found = true;
114
- if (params.operation != OP_NONE && params.operation != OP_MERGE) {
115
- throw std::invalid_argument("error: either --split or --merge can be specified, but not both");
116
- }
117
- params.operation = OP_MERGE;
118
- } else if (arg == "--split") {
119
- arg_found = true;
120
- if (params.operation != OP_NONE && params.operation != OP_SPLIT) {
121
- throw std::invalid_argument("error: either --split or --merge can be specified, but not both");
122
- }
123
- params.operation = OP_SPLIT;
124
- } else if (arg == "--split-max-tensors") {
125
- if (++arg_idx >= argc) {
126
- invalid_param = true;
127
- break;
128
- }
129
- arg_found = true;
130
- if (params.mode != MODE_NONE && params.mode != MODE_TENSOR) {
131
- throw std::invalid_argument("error: either --split-max-tensors or --split-max-size can be specified, but not both");
132
- }
133
- params.mode = MODE_TENSOR;
134
- params.n_split_tensors = atoi(argv[arg_idx]);
135
- } else if (arg == "--split-max-size") {
136
- if (++arg_idx >= argc) {
137
- invalid_param = true;
138
- break;
139
- }
140
- arg_found = true;
141
- if (params.mode != MODE_NONE && params.mode != MODE_SIZE) {
142
- throw std::invalid_argument("error: either --split-max-tensors or --split-max-size can be specified, but not both");
143
- }
144
- params.mode = MODE_SIZE;
145
- params.n_bytes_split = split_str_to_n_bytes(argv[arg_idx]);
146
- }
147
-
148
- if (!arg_found) {
149
- throw std::invalid_argument("error: unknown argument: " + arg);
150
- }
151
- }
152
-
153
- // the operation is split if not specified
154
- if (params.operation == OP_NONE) {
155
- params.operation = OP_SPLIT;
156
- }
157
- // the split mode is by tensor if not specified
158
- if (params.mode == MODE_NONE) {
159
- params.mode = MODE_TENSOR;
160
- }
161
-
162
- if (invalid_param) {
163
- throw std::invalid_argument("error: invalid parameter for argument: " + arg);
164
- }
165
-
166
- if (argc - arg_idx != 2) {
167
- throw std::invalid_argument("error: bad arguments");
168
- }
169
-
170
- params.input = argv[arg_idx++];
171
- params.output = argv[arg_idx++];
172
- }
173
-
174
- static bool split_params_parse(int argc, const char ** argv, split_params & params) {
175
- bool result = true;
176
- try {
177
- split_params_parse_ex(argc, argv, params);
178
- }
179
- catch (const std::invalid_argument & ex) {
180
- fprintf(stderr, "%s\n", ex.what());
181
- split_print_usage(argv[0]);
182
- exit(EXIT_FAILURE);
183
- }
184
- return result;
185
- }
186
-
187
- static void zeros(std::ofstream & file, size_t n) {
188
- char zero = 0;
189
- for (size_t i = 0; i < n; ++i) {
190
- file.write(&zero, 1);
191
- }
192
- }
193
-
194
- struct split_strategy {
195
- const split_params params;
196
- std::ifstream & f_input;
197
- struct gguf_context * ctx_gguf;
198
- struct ggml_context * ctx_meta = NULL;
199
- const int n_tensors;
200
-
201
- // one ctx_out per one output file
202
- std::vector<struct gguf_context *> ctx_outs;
203
-
204
- // temporary buffer for reading in tensor data
205
- std::vector<uint8_t> read_buf;
206
-
207
- split_strategy(const split_params & params,
208
- std::ifstream & f_input,
209
- struct gguf_context * ctx_gguf,
210
- struct ggml_context * ctx_meta) :
211
- params(params),
212
- f_input(f_input),
213
- ctx_gguf(ctx_gguf),
214
- ctx_meta(ctx_meta),
215
- n_tensors(gguf_get_n_tensors(ctx_gguf)) {
216
-
217
- // because we need to know list of tensors for each file in advance, we will build all the ctx_out for all output splits
218
- int i_split = -1;
219
- struct gguf_context * ctx_out = NULL;
220
- auto new_ctx_out = [&](bool allow_no_tensors) {
221
- i_split++;
222
- if (ctx_out != NULL) {
223
- if (gguf_get_n_tensors(ctx_out) == 0 && !allow_no_tensors) {
224
- fprintf(stderr, "error: one of splits have 0 tensors. Maybe size or tensors limit is too small\n");
225
- exit(EXIT_FAILURE);
226
- }
227
- ctx_outs.push_back(ctx_out);
228
- }
229
- ctx_out = gguf_init_empty();
230
- // Save all metadata in first split only
231
- if (i_split == 0) {
232
- gguf_set_kv(ctx_out, ctx_gguf);
233
- }
234
- gguf_set_val_u16(ctx_out, LLM_KV_SPLIT_NO, i_split);
235
- gguf_set_val_u16(ctx_out, LLM_KV_SPLIT_COUNT, 0); // placeholder
236
- gguf_set_val_i32(ctx_out, LLM_KV_SPLIT_TENSORS_COUNT, n_tensors);
237
- };
238
-
239
- // initialize ctx_out for the first split
240
- new_ctx_out(false);
241
-
242
- // skip first split if no_tensor_first_split is set
243
- if (params.no_tensor_first_split) {
244
- new_ctx_out(true);
245
- }
246
-
247
- // process tensors one by one
248
- size_t curr_tensors_size = 0; // current size by counting only tensors size (without metadata)
249
- for (int i = 0; i < n_tensors; ++i) {
250
- struct ggml_tensor * t = ggml_get_tensor(ctx_meta, gguf_get_tensor_name(ctx_gguf, i));
251
- // calculate the "imaginary" size = the current size + next tensor size
252
- size_t n_bytes = GGML_PAD(ggml_nbytes(t), GGUF_DEFAULT_ALIGNMENT);
253
- size_t next_tensors_size = curr_tensors_size + n_bytes;
254
- if (should_split(i, next_tensors_size)) {
255
- new_ctx_out(false);
256
- curr_tensors_size = n_bytes;
257
- } else {
258
- curr_tensors_size = next_tensors_size;
259
- }
260
- gguf_add_tensor(ctx_out, t);
261
- }
262
-
263
- // push the last ctx_out
264
- ctx_outs.push_back(ctx_out);
265
-
266
- // set the correct n_split for all ctx_out
267
- for (auto & ctx : ctx_outs) {
268
- gguf_set_val_u16(ctx, LLM_KV_SPLIT_COUNT, ctx_outs.size());
269
- }
270
- }
271
-
272
- ~split_strategy() {
273
- for (auto & ctx_out : ctx_outs) {
274
- gguf_free(ctx_out);
275
- }
276
- }
277
-
278
- bool should_split(int i_tensor, size_t next_size) {
279
- if (params.mode == MODE_SIZE) {
280
- // split by max size per file
281
- return next_size > params.n_bytes_split;
282
- } else if (params.mode == MODE_TENSOR) {
283
- // split by number of tensors per file
284
- return i_tensor > 0 && i_tensor < n_tensors && i_tensor % params.n_split_tensors == 0;
285
- }
286
- // should never happen
287
- GGML_ABORT("invalid mode");
288
- }
289
-
290
- void print_info() {
291
- printf("n_split: %zu\n", ctx_outs.size());
292
- int i_split = 0;
293
- for (auto & ctx_out : ctx_outs) {
294
- // re-calculate the real gguf size for each split (= metadata size + total size of all tensors)
295
- size_t total_size = gguf_get_meta_size(ctx_out);
296
- for (int i = 0; i < gguf_get_n_tensors(ctx_out); ++i) {
297
- struct ggml_tensor * t = ggml_get_tensor(ctx_meta, gguf_get_tensor_name(ctx_out, i));
298
- total_size += ggml_nbytes(t);
299
- }
300
- total_size = total_size / 1000 / 1000; // convert to megabytes
301
- printf("split %05d: n_tensors = %" PRIi64 ", total_size = %zuM\n", i_split + 1, gguf_get_n_tensors(ctx_out), total_size);
302
- i_split++;
303
- }
304
- }
305
-
306
- void write() {
307
- int i_split = 0;
308
- int n_split = ctx_outs.size();
309
- for (auto & ctx_out : ctx_outs) {
310
- // construct file path
311
- char split_path[PATH_MAX] = {0};
312
- llama_split_path(split_path, sizeof(split_path), params.output.c_str(), i_split, n_split);
313
-
314
- // open the output file
315
- printf("Writing file %s ... ", split_path);
316
- fflush(stdout);
317
- std::ofstream fout = std::ofstream(split_path, std::ios::binary);
318
- fout.exceptions(std::ofstream::failbit); // fail fast on write errors
319
-
320
- // write metadata
321
- std::vector<uint8_t> data(gguf_get_meta_size(ctx_out));
322
- gguf_get_meta_data(ctx_out, data.data());
323
- fout.write((const char *)data.data(), data.size());
324
-
325
- // write tensors
326
- for (int i = 0; i < gguf_get_n_tensors(ctx_out); ++i) {
327
- // read tensor meta and prepare buffer
328
- const char * t_name = gguf_get_tensor_name(ctx_out, i);
329
- struct ggml_tensor * t = ggml_get_tensor(ctx_meta, t_name);
330
- auto n_bytes = ggml_nbytes(t);
331
- read_buf.resize(n_bytes);
332
-
333
- // calculate offset
334
- auto i_tensor_in = gguf_find_tensor(ctx_gguf, t_name); // idx of tensor in the input file
335
- auto offset = gguf_get_data_offset(ctx_gguf) + gguf_get_tensor_offset(ctx_gguf, i_tensor_in);
336
-
337
- // copy tensor from input to output file
338
- copy_file_to_file(f_input, fout, offset, n_bytes);
339
- zeros(fout, GGML_PAD(n_bytes, GGUF_DEFAULT_ALIGNMENT) - n_bytes);
340
- }
341
-
342
- printf("done\n");
343
- // close the file
344
- fout.close();
345
- i_split++;
346
- }
347
- }
348
-
349
- void copy_file_to_file(std::ifstream & f_in, std::ofstream & f_out, const size_t in_offset, const size_t len) {
350
- // TODO: detect OS and use copy_file_range() here for better performance
351
- if (read_buf.size() < len) {
352
- read_buf.resize(len);
353
- }
354
- f_in.seekg(in_offset);
355
- f_in.read((char *)read_buf.data(), len);
356
- f_out.write((const char *)read_buf.data(), len);
357
- }
358
- };
359
-
360
- static void gguf_split(const split_params & split_params) {
361
- struct ggml_context * ctx_meta = NULL;
362
-
363
- struct gguf_init_params params = {
364
- /*.no_alloc = */ true,
365
- /*.ctx = */ &ctx_meta,
366
- };
367
-
368
- std::ifstream f_input(split_params.input.c_str(), std::ios::binary);
369
- if (!f_input.is_open()) {
370
- fprintf(stderr, "%s: failed to open input GGUF from %s\n", __func__, split_params.input.c_str());
371
- exit(EXIT_FAILURE);
372
- }
373
-
374
- auto * ctx_gguf = gguf_init_from_file(split_params.input.c_str(), params);
375
- if (!ctx_gguf) {
376
- fprintf(stderr, "%s: failed to load input GGUF from %s\n", __func__, split_params.input.c_str());
377
- exit(EXIT_FAILURE);
378
- }
379
-
380
- // prepare the strategy
381
- split_strategy strategy(split_params, f_input, ctx_gguf, ctx_meta);
382
- int n_split = strategy.ctx_outs.size();
383
- strategy.print_info();
384
-
385
- if (!split_params.dry_run) {
386
- // write all output splits
387
- strategy.write();
388
- }
389
-
390
- // done, clean up
391
- gguf_free(ctx_gguf);
392
- f_input.close();
393
-
394
- fprintf(stderr, "%s: %d gguf split written with a total of %d tensors.\n",
395
- __func__, n_split, strategy.n_tensors);
396
- }
397
-
398
- static void gguf_merge(const split_params & split_params) {
399
- fprintf(stderr, "%s: %s -> %s\n",
400
- __func__, split_params.input.c_str(),
401
- split_params.output.c_str());
402
- int n_split = 1;
403
- int total_tensors = 0;
404
-
405
- // avoid overwriting existing output file
406
- if (std::ifstream(split_params.output.c_str())) {
407
- fprintf(stderr, "%s: output file %s already exists\n", __func__, split_params.output.c_str());
408
- exit(EXIT_FAILURE);
409
- }
410
-
411
-
412
- auto * ctx_out = gguf_init_empty();
413
-
414
- std::vector<uint8_t> read_data;
415
- std::vector<ggml_context *> ctx_metas;
416
- std::vector<gguf_context *> ctx_ggufs;
417
-
418
- char split_path[PATH_MAX] = {0};
419
- strncpy(split_path, split_params.input.c_str(), sizeof(split_path) - 1);
420
- char split_prefix[PATH_MAX] = {0};
421
-
422
- // First pass to find KV and tensors metadata
423
- for (int i_split = 0; i_split < n_split; i_split++) {
424
- struct ggml_context * ctx_meta = NULL;
425
-
426
- struct gguf_init_params params = {
427
- /*.no_alloc = */ true,
428
- /*.ctx = */ &ctx_meta,
429
- };
430
-
431
- if (i_split > 0) {
432
- llama_split_path(split_path, sizeof(split_path), split_prefix, i_split, n_split);
433
- }
434
- fprintf(stderr, "%s: reading metadata %s ...", __func__, split_path);
435
-
436
- auto * ctx_gguf = gguf_init_from_file(split_path, params);
437
- if (!ctx_gguf) {
438
- fprintf(stderr, "\n%s: failed to load input GGUF from %s\n", __func__, split_params.input.c_str());
439
- exit(EXIT_FAILURE);
440
- }
441
- ctx_ggufs.push_back(ctx_gguf);
442
- ctx_metas.push_back(ctx_meta);
443
-
444
- if (i_split == 0) {
445
- auto key_n_split = gguf_find_key(ctx_gguf, LLM_KV_SPLIT_COUNT);
446
- if (key_n_split < 0) {
447
- fprintf(stderr,
448
- "\n%s: input file does not contain %s metadata\n",
449
- __func__,
450
- LLM_KV_SPLIT_COUNT);
451
- gguf_free(ctx_gguf);
452
- ggml_free(ctx_meta);
453
- gguf_free(ctx_out);
454
- exit(EXIT_FAILURE);
455
- }
456
-
457
- n_split = gguf_get_val_u16(ctx_gguf, key_n_split);
458
- if (n_split < 1) {
459
- fprintf(stderr,
460
- "\n%s: input file does not contain a valid split count %d\n",
461
- __func__,
462
- n_split);
463
- gguf_free(ctx_gguf);
464
- ggml_free(ctx_meta);
465
- gguf_free(ctx_out);
466
- exit(EXIT_FAILURE);
467
- }
468
-
469
- // Verify the file naming and extract split_prefix
470
- if (!llama_split_prefix(split_prefix, sizeof (split_prefix), split_path, i_split, n_split)) {
471
- fprintf(stderr, "\n%s: unexpected input file name: %s"
472
- " i_split=%d"
473
- " n_split=%d\n", __func__,
474
- split_path, i_split, n_split);
475
- gguf_free(ctx_gguf);
476
- ggml_free(ctx_meta);
477
- gguf_free(ctx_out);
478
- exit(EXIT_FAILURE);
479
- }
480
-
481
- // Do not trigger merge if we try to merge again the output
482
- gguf_set_val_u16(ctx_gguf, LLM_KV_SPLIT_COUNT, 0);
483
-
484
- // Set metadata from the first split
485
- gguf_set_kv(ctx_out, ctx_gguf);
486
- }
487
-
488
- auto n_tensors = gguf_get_n_tensors(ctx_gguf);
489
- for (int i_tensor = 0; i_tensor < n_tensors; i_tensor++) {
490
- const char * t_name = gguf_get_tensor_name(ctx_gguf, i_tensor);
491
- struct ggml_tensor * t = ggml_get_tensor(ctx_meta, t_name);
492
- gguf_add_tensor(ctx_out, t);
493
- }
494
- total_tensors += n_tensors;
495
-
496
- fprintf(stderr, "\033[3Ddone\n");
497
- }
498
- std::ofstream fout;
499
- if (!split_params.dry_run) {
500
- fout.open(split_params.output.c_str(), std::ios::binary);
501
- fout.exceptions(std::ofstream::failbit); // fail fast on write errors
502
- // placeholder for the meta data
503
- auto meta_size = gguf_get_meta_size(ctx_out);
504
- ::zeros(fout, meta_size);
505
- }
506
-
507
- // Write tensors data
508
- for (int i_split = 0; i_split < n_split; i_split++) {
509
- llama_split_path(split_path, sizeof(split_path), split_prefix, i_split, n_split);
510
- std::ifstream f_input(split_path, std::ios::binary);
511
- if (!f_input.is_open()) {
512
- fprintf(stderr, "%s: failed to open input GGUF from %s\n", __func__, split_path);
513
- for (uint32_t i = 0; i < ctx_ggufs.size(); i++) {
514
- gguf_free(ctx_ggufs[i]);
515
- ggml_free(ctx_metas[i]);
516
- }
517
- gguf_free(ctx_out);
518
- if (!split_params.dry_run) {
519
- fout.close();
520
- }
521
- exit(EXIT_FAILURE);
522
- }
523
- fprintf(stderr, "%s: writing tensors %s ...", __func__, split_path);
524
-
525
- auto * ctx_gguf = ctx_ggufs[i_split];
526
- auto * ctx_meta = ctx_metas[i_split];
527
-
528
- auto n_tensors = gguf_get_n_tensors(ctx_gguf);
529
- for (int i_tensor = 0; i_tensor < n_tensors; i_tensor++) {
530
- const char * t_name = gguf_get_tensor_name(ctx_gguf, i_tensor);
531
- struct ggml_tensor * t = ggml_get_tensor(ctx_meta, t_name);
532
-
533
- auto n_bytes = ggml_nbytes(t);
534
-
535
- if (read_data.size() < n_bytes) {
536
- read_data.resize(n_bytes);
537
- }
538
-
539
- auto offset = gguf_get_data_offset(ctx_gguf) + gguf_get_tensor_offset(ctx_gguf, i_tensor);
540
- f_input.seekg(offset);
541
- f_input.read((char *)read_data.data(), n_bytes);
542
- if (!split_params.dry_run) {
543
- // write tensor data + padding
544
- fout.write((const char *)read_data.data(), n_bytes);
545
- zeros(fout, GGML_PAD(n_bytes, GGUF_DEFAULT_ALIGNMENT) - n_bytes);
546
- }
547
- }
548
-
549
- gguf_free(ctx_gguf);
550
- ggml_free(ctx_meta);
551
- f_input.close();
552
- fprintf(stderr, "\033[3Ddone\n");
553
- }
554
-
555
- if (!split_params.dry_run) {
556
- // go back to beginning of file and write the updated metadata
557
- fout.seekp(0);
558
- std::vector<uint8_t> data(gguf_get_meta_size(ctx_out));
559
- gguf_get_meta_data(ctx_out, data.data());
560
- fout.write((const char *)data.data(), data.size());
561
- fout.close();
562
- }
563
- gguf_free(ctx_out);
564
-
565
- fprintf(stderr, "%s: %s merged from %d split with %d tensors.\n",
566
- __func__, split_params.output.c_str(), n_split, total_tensors);
567
- }
568
-
569
- int main(int argc, const char ** argv) {
570
- split_params params;
571
- split_params_parse(argc, argv, params);
572
-
573
- switch (params.operation) {
574
- case OP_SPLIT: gguf_split(params);
575
- break;
576
- case OP_MERGE: gguf_merge(params);
577
- break;
578
- default: split_print_usage(argv[0]);
579
- exit(EXIT_FAILURE);
580
- }
581
-
582
- return 0;
583
- }
@@ -1,5 +0,0 @@
1
- set(TARGET llama-imatrix)
2
- add_executable(${TARGET} imatrix.cpp)
3
- install(TARGETS ${TARGET} RUNTIME)
4
- target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
5
- target_compile_features(${TARGET} PRIVATE cxx_std_17)