@fugood/llama.node 0.6.3 → 1.0.0-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (377) hide show
  1. package/CMakeLists.txt +40 -30
  2. package/README.md +4 -1
  3. package/lib/binding.js +41 -29
  4. package/lib/binding.ts +26 -25
  5. package/package.json +45 -7
  6. package/scripts/build.js +47 -0
  7. package/scripts/llama.cpp.patch +109 -0
  8. package/src/anyascii.c +22223 -0
  9. package/src/anyascii.h +42 -0
  10. package/src/tts_utils.cpp +20 -7
  11. package/src/tts_utils.h +2 -0
  12. package/bin/darwin/arm64/llama-node.node +0 -0
  13. package/bin/darwin/x64/llama-node.node +0 -0
  14. package/bin/linux/arm64/llama-node.node +0 -0
  15. package/bin/linux/x64/llama-node.node +0 -0
  16. package/bin/linux-cuda/arm64/llama-node.node +0 -0
  17. package/bin/linux-cuda/x64/llama-node.node +0 -0
  18. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  19. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  20. package/bin/win32/x64/llama-node.node +0 -0
  21. package/bin/win32/x64/node.lib +0 -0
  22. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  23. package/bin/win32-vulkan/arm64/node.lib +0 -0
  24. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  25. package/bin/win32-vulkan/x64/node.lib +0 -0
  26. package/src/llama.cpp/.github/workflows/build-linux-cross.yml +0 -233
  27. package/src/llama.cpp/.github/workflows/build.yml +0 -1078
  28. package/src/llama.cpp/.github/workflows/close-issue.yml +0 -28
  29. package/src/llama.cpp/.github/workflows/docker.yml +0 -178
  30. package/src/llama.cpp/.github/workflows/editorconfig.yml +0 -29
  31. package/src/llama.cpp/.github/workflows/gguf-publish.yml +0 -44
  32. package/src/llama.cpp/.github/workflows/labeler.yml +0 -17
  33. package/src/llama.cpp/.github/workflows/python-check-requirements.yml +0 -33
  34. package/src/llama.cpp/.github/workflows/python-lint.yml +0 -30
  35. package/src/llama.cpp/.github/workflows/python-type-check.yml +0 -40
  36. package/src/llama.cpp/.github/workflows/release.yml +0 -739
  37. package/src/llama.cpp/.github/workflows/server.yml +0 -237
  38. package/src/llama.cpp/.github/workflows/winget.yml +0 -42
  39. package/src/llama.cpp/cmake/arm64-apple-clang.cmake +0 -16
  40. package/src/llama.cpp/cmake/arm64-windows-llvm.cmake +0 -16
  41. package/src/llama.cpp/cmake/build-info.cmake +0 -64
  42. package/src/llama.cpp/cmake/common.cmake +0 -35
  43. package/src/llama.cpp/cmake/git-vars.cmake +0 -22
  44. package/src/llama.cpp/cmake/x64-windows-llvm.cmake +0 -5
  45. package/src/llama.cpp/common/build-info.cpp.in +0 -4
  46. package/src/llama.cpp/docs/build.md +0 -561
  47. package/src/llama.cpp/examples/CMakeLists.txt +0 -43
  48. package/src/llama.cpp/examples/batched/CMakeLists.txt +0 -5
  49. package/src/llama.cpp/examples/batched/batched.cpp +0 -246
  50. package/src/llama.cpp/examples/chat-13B.bat +0 -57
  51. package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +0 -5
  52. package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +0 -941
  53. package/src/llama.cpp/examples/deprecation-warning/deprecation-warning.cpp +0 -35
  54. package/src/llama.cpp/examples/embedding/CMakeLists.txt +0 -5
  55. package/src/llama.cpp/examples/embedding/embedding.cpp +0 -323
  56. package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +0 -10
  57. package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +0 -194
  58. package/src/llama.cpp/examples/gen-docs/CMakeLists.txt +0 -5
  59. package/src/llama.cpp/examples/gen-docs/gen-docs.cpp +0 -83
  60. package/src/llama.cpp/examples/gguf/CMakeLists.txt +0 -5
  61. package/src/llama.cpp/examples/gguf/gguf.cpp +0 -265
  62. package/src/llama.cpp/examples/gguf-hash/CMakeLists.txt +0 -22
  63. package/src/llama.cpp/examples/gguf-hash/deps/rotate-bits/rotate-bits.h +0 -46
  64. package/src/llama.cpp/examples/gguf-hash/deps/sha1/sha1.c +0 -295
  65. package/src/llama.cpp/examples/gguf-hash/deps/sha1/sha1.h +0 -52
  66. package/src/llama.cpp/examples/gguf-hash/deps/sha256/sha256.c +0 -221
  67. package/src/llama.cpp/examples/gguf-hash/deps/sha256/sha256.h +0 -24
  68. package/src/llama.cpp/examples/gguf-hash/deps/xxhash/xxhash.c +0 -42
  69. package/src/llama.cpp/examples/gguf-hash/deps/xxhash/xxhash.h +0 -7093
  70. package/src/llama.cpp/examples/gguf-hash/gguf-hash.cpp +0 -694
  71. package/src/llama.cpp/examples/gritlm/CMakeLists.txt +0 -5
  72. package/src/llama.cpp/examples/gritlm/gritlm.cpp +0 -229
  73. package/src/llama.cpp/examples/jeopardy/questions.txt +0 -100
  74. package/src/llama.cpp/examples/llama.android/app/build.gradle.kts +0 -65
  75. package/src/llama.cpp/examples/llama.android/build.gradle.kts +0 -6
  76. package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +0 -71
  77. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/CMakeLists.txt +0 -53
  78. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +0 -452
  79. package/src/llama.cpp/examples/llama.android/settings.gradle.kts +0 -18
  80. package/src/llama.cpp/examples/lookahead/CMakeLists.txt +0 -5
  81. package/src/llama.cpp/examples/lookahead/lookahead.cpp +0 -472
  82. package/src/llama.cpp/examples/lookup/CMakeLists.txt +0 -23
  83. package/src/llama.cpp/examples/lookup/lookup-create.cpp +0 -40
  84. package/src/llama.cpp/examples/lookup/lookup-merge.cpp +0 -47
  85. package/src/llama.cpp/examples/lookup/lookup-stats.cpp +0 -157
  86. package/src/llama.cpp/examples/lookup/lookup.cpp +0 -242
  87. package/src/llama.cpp/examples/parallel/CMakeLists.txt +0 -5
  88. package/src/llama.cpp/examples/parallel/parallel.cpp +0 -492
  89. package/src/llama.cpp/examples/passkey/CMakeLists.txt +0 -5
  90. package/src/llama.cpp/examples/passkey/passkey.cpp +0 -277
  91. package/src/llama.cpp/examples/retrieval/CMakeLists.txt +0 -5
  92. package/src/llama.cpp/examples/retrieval/retrieval.cpp +0 -304
  93. package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +0 -5
  94. package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +0 -246
  95. package/src/llama.cpp/examples/simple/CMakeLists.txt +0 -5
  96. package/src/llama.cpp/examples/simple/simple.cpp +0 -206
  97. package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +0 -5
  98. package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +0 -206
  99. package/src/llama.cpp/examples/simple-cmake-pkg/CMakeLists.txt +0 -11
  100. package/src/llama.cpp/examples/speculative/CMakeLists.txt +0 -5
  101. package/src/llama.cpp/examples/speculative/speculative.cpp +0 -644
  102. package/src/llama.cpp/examples/speculative-simple/CMakeLists.txt +0 -5
  103. package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +0 -261
  104. package/src/llama.cpp/examples/sycl/CMakeLists.txt +0 -9
  105. package/src/llama.cpp/examples/sycl/build.sh +0 -23
  106. package/src/llama.cpp/examples/sycl/ls-sycl-device.cpp +0 -13
  107. package/src/llama.cpp/examples/sycl/run-llama2.sh +0 -27
  108. package/src/llama.cpp/examples/sycl/run-llama3.sh +0 -28
  109. package/src/llama.cpp/examples/sycl/win-build-sycl.bat +0 -33
  110. package/src/llama.cpp/examples/sycl/win-run-llama2.bat +0 -9
  111. package/src/llama.cpp/examples/sycl/win-run-llama3.bat +0 -9
  112. package/src/llama.cpp/examples/training/CMakeLists.txt +0 -5
  113. package/src/llama.cpp/examples/training/finetune.cpp +0 -96
  114. package/src/llama.cpp/ggml/cmake/GitVars.cmake +0 -22
  115. package/src/llama.cpp/ggml/cmake/common.cmake +0 -26
  116. package/src/llama.cpp/ggml/src/ggml-alloc.c +0 -1042
  117. package/src/llama.cpp/ggml/src/ggml-backend-impl.h +0 -255
  118. package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +0 -586
  119. package/src/llama.cpp/ggml/src/ggml-backend.cpp +0 -2008
  120. package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +0 -87
  121. package/src/llama.cpp/ggml/src/ggml-blas/ggml-blas.cpp +0 -517
  122. package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +0 -74
  123. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +0 -179
  124. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +0 -258
  125. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +0 -2863
  126. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +0 -1110
  127. package/src/llama.cpp/ggml/src/ggml-cann/common.h +0 -420
  128. package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +0 -2570
  129. package/src/llama.cpp/ggml/src/ggml-common.h +0 -1857
  130. package/src/llama.cpp/ggml/src/ggml-cpu/cmake/FindSIMD.cmake +0 -100
  131. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +0 -184
  132. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h +0 -15
  133. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +0 -243
  134. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +0 -140
  135. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +0 -131
  136. package/src/llama.cpp/ggml/src/ggml-impl.h +0 -601
  137. package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +0 -166
  138. package/src/llama.cpp/ggml/src/ggml-kompute/ggml-kompute.cpp +0 -2251
  139. package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +0 -120
  140. package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +0 -622
  141. package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +0 -113
  142. package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +0 -96
  143. package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +0 -5124
  144. package/src/llama.cpp/ggml/src/ggml-opt.cpp +0 -1037
  145. package/src/llama.cpp/ggml/src/ggml-quants.c +0 -5232
  146. package/src/llama.cpp/ggml/src/ggml-quants.h +0 -100
  147. package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +0 -9
  148. package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +0 -1813
  149. package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +0 -189
  150. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +0 -37
  151. package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +0 -239
  152. package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.hpp +0 -39
  153. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +0 -83
  154. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +0 -493
  155. package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +0 -197
  156. package/src/llama.cpp/ggml/src/ggml-sycl/concat.hpp +0 -20
  157. package/src/llama.cpp/ggml/src/ggml-sycl/conv.cpp +0 -100
  158. package/src/llama.cpp/ggml/src/ggml-sycl/conv.hpp +0 -20
  159. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +0 -623
  160. package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +0 -34
  161. package/src/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +0 -701
  162. package/src/llama.cpp/ggml/src/ggml-sycl/cpy.hpp +0 -11
  163. package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +0 -791
  164. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +0 -1160
  165. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.hpp +0 -27
  166. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +0 -2957
  167. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +0 -1536
  168. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +0 -75
  169. package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +0 -99
  170. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +0 -311
  171. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +0 -20
  172. package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +0 -4443
  173. package/src/llama.cpp/ggml/src/ggml-sycl/gla.cpp +0 -105
  174. package/src/llama.cpp/ggml/src/ggml-sycl/gla.hpp +0 -8
  175. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +0 -136
  176. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +0 -21
  177. package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +0 -3030
  178. package/src/llama.cpp/ggml/src/ggml-sycl/mmq.hpp +0 -33
  179. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +0 -1108
  180. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.hpp +0 -27
  181. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +0 -474
  182. package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +0 -26
  183. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +0 -46
  184. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +0 -10
  185. package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +0 -74
  186. package/src/llama.cpp/ggml/src/ggml-sycl/quants.hpp +0 -83
  187. package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +0 -362
  188. package/src/llama.cpp/ggml/src/ggml-sycl/rope.hpp +0 -20
  189. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +0 -264
  190. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.hpp +0 -20
  191. package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.cpp +0 -13
  192. package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.hpp +0 -23
  193. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +0 -73
  194. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.hpp +0 -20
  195. package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +0 -1215
  196. package/src/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +0 -305
  197. package/src/llama.cpp/ggml/src/ggml-sycl/wkv.hpp +0 -10
  198. package/src/llama.cpp/ggml/src/ggml-threading.cpp +0 -12
  199. package/src/llama.cpp/ggml/src/ggml-threading.h +0 -14
  200. package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +0 -196
  201. package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +0 -10699
  202. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +0 -39
  203. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +0 -751
  204. package/src/llama.cpp/ggml/src/ggml.c +0 -6550
  205. package/src/llama.cpp/ggml/src/gguf.cpp +0 -1330
  206. package/src/llama.cpp/models/.editorconfig +0 -1
  207. package/src/llama.cpp/models/ggml-vocab-aquila.gguf +0 -0
  208. package/src/llama.cpp/models/ggml-vocab-baichuan.gguf +0 -0
  209. package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf +0 -0
  210. package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf.inp +0 -112
  211. package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf.out +0 -46
  212. package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +0 -112
  213. package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.out +0 -46
  214. package/src/llama.cpp/models/ggml-vocab-command-r.gguf +0 -0
  215. package/src/llama.cpp/models/ggml-vocab-command-r.gguf.inp +0 -112
  216. package/src/llama.cpp/models/ggml-vocab-command-r.gguf.out +0 -46
  217. package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf +0 -0
  218. package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.inp +0 -112
  219. package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.out +0 -46
  220. package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf +0 -0
  221. package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.inp +0 -112
  222. package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.out +0 -46
  223. package/src/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.inp +0 -112
  224. package/src/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.out +0 -46
  225. package/src/llama.cpp/models/ggml-vocab-falcon.gguf +0 -0
  226. package/src/llama.cpp/models/ggml-vocab-falcon.gguf.inp +0 -112
  227. package/src/llama.cpp/models/ggml-vocab-falcon.gguf.out +0 -46
  228. package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf +0 -0
  229. package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf.inp +0 -112
  230. package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf.out +0 -46
  231. package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.inp +0 -112
  232. package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.out +0 -46
  233. package/src/llama.cpp/models/ggml-vocab-gpt-neox.gguf +0 -0
  234. package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf +0 -0
  235. package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf.inp +0 -112
  236. package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf.out +0 -46
  237. package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf +0 -0
  238. package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf.inp +0 -112
  239. package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf.out +0 -46
  240. package/src/llama.cpp/models/ggml-vocab-llama4.gguf.inp +0 -112
  241. package/src/llama.cpp/models/ggml-vocab-llama4.gguf.out +0 -46
  242. package/src/llama.cpp/models/ggml-vocab-mpt.gguf +0 -0
  243. package/src/llama.cpp/models/ggml-vocab-mpt.gguf.inp +0 -112
  244. package/src/llama.cpp/models/ggml-vocab-mpt.gguf.out +0 -46
  245. package/src/llama.cpp/models/ggml-vocab-phi-3.gguf +0 -0
  246. package/src/llama.cpp/models/ggml-vocab-phi-3.gguf.inp +0 -112
  247. package/src/llama.cpp/models/ggml-vocab-phi-3.gguf.out +0 -46
  248. package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +0 -112
  249. package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.out +0 -46
  250. package/src/llama.cpp/models/ggml-vocab-qwen2.gguf +0 -0
  251. package/src/llama.cpp/models/ggml-vocab-qwen2.gguf.inp +0 -112
  252. package/src/llama.cpp/models/ggml-vocab-qwen2.gguf.out +0 -46
  253. package/src/llama.cpp/models/ggml-vocab-refact.gguf +0 -0
  254. package/src/llama.cpp/models/ggml-vocab-refact.gguf.inp +0 -112
  255. package/src/llama.cpp/models/ggml-vocab-refact.gguf.out +0 -46
  256. package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +0 -112
  257. package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +0 -46
  258. package/src/llama.cpp/models/ggml-vocab-starcoder.gguf +0 -0
  259. package/src/llama.cpp/models/ggml-vocab-starcoder.gguf.inp +0 -112
  260. package/src/llama.cpp/models/ggml-vocab-starcoder.gguf.out +0 -46
  261. package/src/llama.cpp/pocs/CMakeLists.txt +0 -14
  262. package/src/llama.cpp/pocs/vdot/CMakeLists.txt +0 -9
  263. package/src/llama.cpp/pocs/vdot/q8dot.cpp +0 -173
  264. package/src/llama.cpp/pocs/vdot/vdot.cpp +0 -311
  265. package/src/llama.cpp/prompts/LLM-questions.txt +0 -49
  266. package/src/llama.cpp/prompts/alpaca.txt +0 -1
  267. package/src/llama.cpp/prompts/assistant.txt +0 -31
  268. package/src/llama.cpp/prompts/chat-with-baichuan.txt +0 -4
  269. package/src/llama.cpp/prompts/chat-with-bob.txt +0 -7
  270. package/src/llama.cpp/prompts/chat-with-qwen.txt +0 -1
  271. package/src/llama.cpp/prompts/chat-with-vicuna-v0.txt +0 -7
  272. package/src/llama.cpp/prompts/chat-with-vicuna-v1.txt +0 -7
  273. package/src/llama.cpp/prompts/chat.txt +0 -28
  274. package/src/llama.cpp/prompts/dan-modified.txt +0 -1
  275. package/src/llama.cpp/prompts/dan.txt +0 -1
  276. package/src/llama.cpp/prompts/mnemonics.txt +0 -93
  277. package/src/llama.cpp/prompts/parallel-questions.txt +0 -43
  278. package/src/llama.cpp/prompts/reason-act.txt +0 -18
  279. package/src/llama.cpp/requirements/requirements-all.txt +0 -15
  280. package/src/llama.cpp/requirements/requirements-compare-llama-bench.txt +0 -2
  281. package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +0 -7
  282. package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +0 -7
  283. package/src/llama.cpp/requirements/requirements-convert_legacy_llama.txt +0 -5
  284. package/src/llama.cpp/requirements/requirements-convert_llama_ggml_to_gguf.txt +0 -1
  285. package/src/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +0 -4
  286. package/src/llama.cpp/requirements/requirements-gguf_editor_gui.txt +0 -3
  287. package/src/llama.cpp/requirements/requirements-pydantic.txt +0 -3
  288. package/src/llama.cpp/requirements/requirements-test-tokenizer-random.txt +0 -1
  289. package/src/llama.cpp/requirements/requirements-tool_bench.txt +0 -12
  290. package/src/llama.cpp/requirements.txt +0 -13
  291. package/src/llama.cpp/scripts/build-info.sh +0 -30
  292. package/src/llama.cpp/scripts/install-oneapi.bat +0 -19
  293. package/src/llama.cpp/scripts/xxd.cmake +0 -16
  294. package/src/llama.cpp/tests/CMakeLists.txt +0 -177
  295. package/src/llama.cpp/tests/get-model.cpp +0 -21
  296. package/src/llama.cpp/tests/get-model.h +0 -2
  297. package/src/llama.cpp/tests/test-arg-parser.cpp +0 -178
  298. package/src/llama.cpp/tests/test-autorelease.cpp +0 -24
  299. package/src/llama.cpp/tests/test-backend-ops.cpp +0 -4793
  300. package/src/llama.cpp/tests/test-barrier.cpp +0 -94
  301. package/src/llama.cpp/tests/test-c.c +0 -7
  302. package/src/llama.cpp/tests/test-chat-template.cpp +0 -417
  303. package/src/llama.cpp/tests/test-chat.cpp +0 -985
  304. package/src/llama.cpp/tests/test-double-float.cpp +0 -57
  305. package/src/llama.cpp/tests/test-gbnf-validator.cpp +0 -109
  306. package/src/llama.cpp/tests/test-gguf.cpp +0 -1338
  307. package/src/llama.cpp/tests/test-grammar-integration.cpp +0 -1308
  308. package/src/llama.cpp/tests/test-grammar-llguidance.cpp +0 -1201
  309. package/src/llama.cpp/tests/test-grammar-parser.cpp +0 -519
  310. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +0 -1304
  311. package/src/llama.cpp/tests/test-llama-grammar.cpp +0 -408
  312. package/src/llama.cpp/tests/test-log.cpp +0 -39
  313. package/src/llama.cpp/tests/test-model-load-cancel.cpp +0 -27
  314. package/src/llama.cpp/tests/test-mtmd-c-api.c +0 -63
  315. package/src/llama.cpp/tests/test-opt.cpp +0 -904
  316. package/src/llama.cpp/tests/test-quantize-fns.cpp +0 -186
  317. package/src/llama.cpp/tests/test-quantize-perf.cpp +0 -365
  318. package/src/llama.cpp/tests/test-quantize-stats.cpp +0 -424
  319. package/src/llama.cpp/tests/test-regex-partial.cpp +0 -288
  320. package/src/llama.cpp/tests/test-rope.cpp +0 -262
  321. package/src/llama.cpp/tests/test-sampling.cpp +0 -399
  322. package/src/llama.cpp/tests/test-tokenizer-0.cpp +0 -312
  323. package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +0 -155
  324. package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +0 -125
  325. package/src/llama.cpp/tools/CMakeLists.txt +0 -39
  326. package/src/llama.cpp/tools/batched-bench/CMakeLists.txt +0 -5
  327. package/src/llama.cpp/tools/batched-bench/batched-bench.cpp +0 -204
  328. package/src/llama.cpp/tools/cvector-generator/CMakeLists.txt +0 -5
  329. package/src/llama.cpp/tools/cvector-generator/completions.txt +0 -582
  330. package/src/llama.cpp/tools/cvector-generator/cvector-generator.cpp +0 -508
  331. package/src/llama.cpp/tools/cvector-generator/mean.hpp +0 -48
  332. package/src/llama.cpp/tools/cvector-generator/negative.txt +0 -4
  333. package/src/llama.cpp/tools/cvector-generator/pca.hpp +0 -315
  334. package/src/llama.cpp/tools/cvector-generator/positive.txt +0 -4
  335. package/src/llama.cpp/tools/export-lora/CMakeLists.txt +0 -5
  336. package/src/llama.cpp/tools/export-lora/export-lora.cpp +0 -434
  337. package/src/llama.cpp/tools/gguf-split/CMakeLists.txt +0 -5
  338. package/src/llama.cpp/tools/gguf-split/gguf-split.cpp +0 -583
  339. package/src/llama.cpp/tools/imatrix/CMakeLists.txt +0 -5
  340. package/src/llama.cpp/tools/imatrix/imatrix.cpp +0 -667
  341. package/src/llama.cpp/tools/llama-bench/CMakeLists.txt +0 -5
  342. package/src/llama.cpp/tools/llama-bench/llama-bench.cpp +0 -2024
  343. package/src/llama.cpp/tools/main/CMakeLists.txt +0 -5
  344. package/src/llama.cpp/tools/main/main.cpp +0 -977
  345. package/src/llama.cpp/tools/mtmd/CMakeLists.txt +0 -58
  346. package/src/llama.cpp/tools/mtmd/clip-impl.h +0 -462
  347. package/src/llama.cpp/tools/mtmd/clip.cpp +0 -4024
  348. package/src/llama.cpp/tools/mtmd/clip.h +0 -101
  349. package/src/llama.cpp/tools/mtmd/deprecation-warning.cpp +0 -22
  350. package/src/llama.cpp/tools/mtmd/miniaudio.h +0 -93468
  351. package/src/llama.cpp/tools/mtmd/mtmd-audio.cpp +0 -855
  352. package/src/llama.cpp/tools/mtmd/mtmd-audio.h +0 -62
  353. package/src/llama.cpp/tools/mtmd/mtmd-cli.cpp +0 -377
  354. package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +0 -297
  355. package/src/llama.cpp/tools/mtmd/mtmd.cpp +0 -942
  356. package/src/llama.cpp/tools/mtmd/mtmd.h +0 -362
  357. package/src/llama.cpp/tools/mtmd/requirements.txt +0 -5
  358. package/src/llama.cpp/tools/perplexity/CMakeLists.txt +0 -5
  359. package/src/llama.cpp/tools/perplexity/perplexity.cpp +0 -2063
  360. package/src/llama.cpp/tools/quantize/CMakeLists.txt +0 -6
  361. package/src/llama.cpp/tools/quantize/quantize.cpp +0 -519
  362. package/src/llama.cpp/tools/rpc/CMakeLists.txt +0 -4
  363. package/src/llama.cpp/tools/rpc/rpc-server.cpp +0 -322
  364. package/src/llama.cpp/tools/run/CMakeLists.txt +0 -16
  365. package/src/llama.cpp/tools/run/linenoise.cpp/linenoise.cpp +0 -1995
  366. package/src/llama.cpp/tools/run/linenoise.cpp/linenoise.h +0 -137
  367. package/src/llama.cpp/tools/run/run.cpp +0 -1261
  368. package/src/llama.cpp/tools/server/CMakeLists.txt +0 -51
  369. package/src/llama.cpp/tools/server/bench/requirements.txt +0 -2
  370. package/src/llama.cpp/tools/server/httplib.h +0 -10506
  371. package/src/llama.cpp/tools/server/server.cpp +0 -4966
  372. package/src/llama.cpp/tools/server/tests/requirements.txt +0 -8
  373. package/src/llama.cpp/tools/server/utils.hpp +0 -1337
  374. package/src/llama.cpp/tools/tokenize/CMakeLists.txt +0 -5
  375. package/src/llama.cpp/tools/tokenize/tokenize.cpp +0 -416
  376. package/src/llama.cpp/tools/tts/CMakeLists.txt +0 -5
  377. package/src/llama.cpp/tools/tts/tts.cpp +0 -1092
@@ -1,941 +0,0 @@
1
- #include "ggml.h"
2
- #include "gguf.h"
3
-
4
- #include "llama.h"
5
- #include "common.h"
6
- #include "log.h"
7
-
8
- #include <unordered_map>
9
- #include <vector>
10
- #include <cassert>
11
- #include <climits>
12
- #include <cstring>
13
- #include <cstdarg>
14
- #include <cinttypes>
15
- #include <ctime>
16
- #include <random>
17
- #include <stdexcept>
18
- #include <sstream>
19
- #include <algorithm>
20
- #include <string>
21
-
22
- // GGUF keys & tensor names.
23
-
24
- #define KV_GENERAL_ARCHITECTURE "general.architecture"
25
- #define KV_GENERAL_NAME "general.name"
26
-
27
- #define KV_TOKENIZER_MODEL "tokenizer.ggml.model"
28
- #define KV_TOKENIZER_LIST "tokenizer.ggml.tokens"
29
- #define KV_TOKENIZER_TOKEN_TYPE "tokenizer.ggml.token_type"
30
- #define KV_TOKENIZER_SCORES "tokenizer.ggml.scores"
31
- #define KV_TOKENIZER_BOS_ID "tokenizer.ggml.bos_token_id"
32
- #define KV_TOKENIZER_EOS_ID "tokenizer.ggml.eos_token_id"
33
- #define KV_TOKENIZER_UNK_ID "tokenizer.ggml.unknown_token_id"
34
- #define KV_TOKENIZER_SEP_ID "tokenizer.ggml.seperator_token_id"
35
- #define KV_TOKENIZER_PAD_ID "tokenizer.ggml.padding_token_id"
36
- #define KV_TOKENIZER_HF_JSON "tokenizer.huggingface.json"
37
-
38
- #define KV_CONTEXT_LENGTH "llama.context_length"
39
- #define KV_EMBEDDING_LENGTH "llama.embedding_length"
40
- #define KV_BLOCK_COUNT "llama.block_count"
41
- #define KV_FEED_FORWARD_LENGTH "llama.feed_forward_length"
42
- #define KV_ATTENTION_HEAD_COUNT "llama.attention.head_count"
43
- #define KV_ATTENTION_HEAD_COUNT_KV "llama.attention.head_count_kv"
44
- #define KV_ATTENTION_LAYERNORM_RMS_EPS "llama.attention.layer_norm_rms_epsilon"
45
- #define KV_ROPE_DIMENSION_COUNT "llama.rope.dimension_count"
46
-
47
- #define TN_TOKEN_EMBD "token_embd.weight"
48
- #define TN_OUTPUT_NORM "output_norm.weight"
49
- #define TN_OUTPUT "output.weight"
50
- #define TN_ATTN_NORM "blk.%d.attn_norm.weight"
51
- #define TN_ATTN_Q "blk.%d.attn_q.weight"
52
- #define TN_ATTN_K "blk.%d.attn_k.weight"
53
- #define TN_ATTN_V "blk.%d.attn_v.weight"
54
- #define TN_ATTN_OUTPUT "blk.%d.attn_output.weight"
55
- #define TN_FFN_NORM "blk.%d.ffn_norm.weight"
56
- #define TN_FFN_GATE "blk.%d.ffn_gate.weight"
57
- #define TN_FFN_DOWN "blk.%d.ffn_down.weight"
58
- #define TN_FFN_UP "blk.%d.ffn_up.weight"
59
-
60
- #if defined(_MSC_VER)
61
- #pragma warning(disable: 4244 4267) // possible loss of data
62
- #endif
63
-
64
- #define LLAMA_FILE_MAGIC_GGJT 0x67676a74u // 'ggjt'
65
- #define LLAMA_FILE_VERSION_GGJT_V3 3
66
-
67
- #define TOKENIZER_NAME "llama"
68
- #define UNKNOWN_TOKEN_ID 0
69
- #define BOS_TOKEN_ID 1
70
- #define EOS_TOKEN_ID 2
71
-
72
- //////////////////////////////////////// llama2.c model structs and functions to load models, alloc memory etc.
73
- typedef struct {
74
- int dim; // transformer dimension
75
- int hidden_dim; // for ffn layers
76
- int n_layers; // number of layers
77
- int n_heads; // number of query heads
78
- int n_kv_heads; // number of key/value heads (can be < query heads because of multiquery)
79
- int vocab_size; // vocabulary size, usually 256 (byte-level)
80
- int seq_len; // max sequence length
81
- } Config;
82
-
83
- struct TransformerWeights {
84
- // token embedding table
85
- std::vector<float> token_embedding_table; // (vocab_size, dim)
86
- // weights for rmsnorms
87
- std::vector<float> rms_att_weight; // (layer, dim) rmsnorm weights
88
- std::vector<float> rms_ffn_weight; // (layer, dim)
89
- // weights for matmuls
90
- std::vector<float> wq; // (layer, dim, dim)
91
- std::vector<float> wk; // (layer, dim, dim)
92
- std::vector<float> wv; // (layer, dim, dim)
93
- std::vector<float> wo; // (layer, dim, dim)
94
- // weights for ffn
95
- std::vector<float> w1; // (layer, hidden_dim, dim)
96
- std::vector<float> w2; // (layer, dim, hidden_dim)
97
- std::vector<float> w3; // (layer, hidden_dim, dim)
98
- // final rmsnorm
99
- std::vector<float> rms_final_weight; // (dim,)
100
- // freq_cis for RoPE relatively positional embeddings
101
- // std::vector<float> freq_cis_real; // (seq_len, dim/2)
102
- // std::vector<float> freq_cis_imag; // (seq_len, dim/2)
103
- // (optional) classifier weights for the logits, on the last layer
104
- std::vector<float> wcls;
105
- };
106
-
107
- static void alloc_weights(TransformerWeights * w, const Config * p, bool shared_weights) {
108
- const int n_multiqueries = p->n_kv_heads <= 0 || p->n_kv_heads >= p->n_heads ? 1 : p->n_heads / p->n_kv_heads;
109
- try {
110
- w->token_embedding_table.resize(p->vocab_size * p->dim);
111
- LOG_INF("%s: Allocating [%d] x [%d] = [%d] float space for w->token_embedding_table\n",__func__,p->vocab_size , p->dim, p->vocab_size * p->dim);
112
-
113
- w->rms_att_weight.resize(p->n_layers * p->dim);
114
- LOG_INF("%s: Allocating [%d] x [%d] = [%d] float space for w->rms_att_weight\n",__func__,p->n_layers, p->dim, p->n_layers * p->dim);
115
-
116
- w->rms_ffn_weight.resize(p->n_layers * p->dim);
117
- LOG_INF("%s: Allocating [%d] x [%d] = [%d] float space for w->rms_ffn_weight\n",__func__,p->n_layers , p->dim, p->n_layers * p->dim);
118
-
119
- w->wq.resize(p->n_layers * p->dim * p->dim);
120
- LOG_INF("%s: Allocating [%d] x [%d] x [%d] = [%d] float space for w->wq\n",__func__,p->n_layers, p->dim, p->dim, p->n_layers * p->dim * p->dim);
121
-
122
- w->wk.resize(p->n_layers * p->dim * p->dim / n_multiqueries);
123
- LOG_INF("%s: Allocating [%d] x [%d] x [%d] = [%d] float space for w->wk\n",__func__,p->n_layers, p->dim, p->dim / n_multiqueries, p->n_layers * p->dim * p->dim / n_multiqueries);
124
-
125
- w->wv.resize(p->n_layers * p->dim * p->dim / n_multiqueries);
126
- LOG_INF("%s: Allocating [%d] x [%d] x [%d] = [%d] float space for w->wv\n",__func__, p->n_layers, p->dim, p->dim / n_multiqueries, p->n_layers * p->dim * p->dim / n_multiqueries);
127
-
128
- w->wo.resize(p->n_layers * p->dim * p->dim);
129
- LOG_INF("%s: Allocating [%d] x [%d] x [%d] = [%d] float space for w->wo\n",__func__,p->n_layers, p->dim, p->dim, p->n_layers * p->dim * p->dim);
130
-
131
- w->w1.resize(p->n_layers * p->hidden_dim * p->dim);
132
- LOG_INF("%s: Allocating [%d] x [%d] x [%d] = [%d] float space for w->w1\n",__func__,p->n_layers, p->hidden_dim, p->dim, p->n_layers * p->hidden_dim * p->dim);
133
-
134
- w->w2.resize(p->n_layers * p->hidden_dim * p->dim);
135
- LOG_INF("%s: Allocating [%d] x [%d] x [%d] = [%d] float space for w->w2\n",__func__,p->n_layers, p->dim, p->hidden_dim, p->n_layers * p->hidden_dim * p->dim);
136
-
137
- w->w3.resize(p->n_layers * p->hidden_dim * p->dim);
138
- LOG_INF("%s: Allocating [%d] x [%d] x [%d] = [%d] float space for w->w3\n",__func__,p->n_layers, p->hidden_dim, p->dim, p->n_layers * p->hidden_dim * p->dim);
139
-
140
- w->rms_final_weight.resize(p->dim);
141
- LOG_INF("%s: Allocating [%d] float space for w->rms_final_weight\n",__func__,p->dim);
142
-
143
- if (shared_weights) {
144
- w->wcls = {};
145
- } else {
146
- w->wcls.resize(p->vocab_size * p->dim);
147
- LOG_INF("%s: Allocating [%d] x [%d] = [%d] float space for w->wcls\n",__func__,p->vocab_size , p->dim, p->vocab_size * p->dim);
148
- }
149
- }
150
- catch (std::length_error &) {
151
- die("Invalid configuration. Failed to allocate memory for weights");
152
- }
153
- }
154
-
155
- static int checkpoint_init_weights(TransformerWeights * w, const Config * p, FILE * f, bool shared_weights) {
156
- if (fread(w->token_embedding_table.data(), sizeof(float), w->token_embedding_table.size(), f) != w->token_embedding_table.size()) return 1;
157
- if (fread(w->rms_att_weight.data(), sizeof(float), w->rms_att_weight.size(), f) != w->rms_att_weight.size()) return 1;
158
- if (fread(w->wq.data(), sizeof(float), w->wq.size(), f) != w->wq.size()) return 1;
159
- if (fread(w->wk.data(), sizeof(float), w->wk.size(), f) != w->wk.size()) return 1;
160
- if (fread(w->wv.data(), sizeof(float), w->wv.size(), f) != w->wv.size()) return 1;
161
- if (fread(w->wo.data(), sizeof(float), w->wo.size(), f) != w->wo.size()) return 1;
162
- if (fread(w->rms_ffn_weight.data(), sizeof(float), w->rms_ffn_weight.size(), f) != w->rms_ffn_weight.size()) return 1;
163
- if (fread(w->w1.data(), sizeof(float), w->w1.size(), f) != w->w1.size()) return 1;
164
- if (fread(w->w2.data(), sizeof(float), w->w2.size(), f) != w->w2.size()) return 1;
165
- if (fread(w->w3.data(), sizeof(float), w->w3.size(), f) != w->w3.size()) return 1;
166
- if (fread(w->rms_final_weight.data(), sizeof(float), w->rms_final_weight.size(), f) != w->rms_final_weight.size()) return 1;
167
-
168
- // Skip freq_cis_real & freq_cis_imag
169
- int head_size = p->dim / p->n_heads;
170
- fseek(f, p->seq_len * head_size * sizeof(float), SEEK_CUR);
171
-
172
- if (!shared_weights && fread(w->wcls.data(), sizeof(float), w->wcls.size(), f) != w->wcls.size()) return 1;
173
-
174
- // Check we didn't forget to read anything
175
- auto curr = ftell(f);
176
- fseek(f, 0, SEEK_END);
177
- auto end = ftell(f);
178
- if (curr != end) {
179
- LOG_ERR("%s: Error: failed to read the checkpoint file to the end (curr = %ld, end = %ld)\n", __func__, curr, end);
180
- return 1;
181
- }
182
-
183
- return 0;
184
- }
185
-
186
- static void print_sample_weights(TransformerWeights *w){
187
- LOG_INF("----- Quick print of first of the weight vales of all the variables\n");
188
- LOG_INF("%f\n", w->token_embedding_table[0]);
189
- LOG_INF("%f\n", w->rms_att_weight[0]);
190
- LOG_INF("%f\n", w->rms_ffn_weight[0]);
191
-
192
- LOG_INF("%f\n", w->wq[0]);
193
- LOG_INF("%f\n", w->wk[0]);
194
- LOG_INF("%f\n", w->wv[0]);
195
- LOG_INF("%f\n", w->wo[0]);
196
- LOG_INF("%f\n", w->w1[0]);
197
- LOG_INF("%f\n", w->w2[0]);
198
- LOG_INF("%f\n", w->w3[0]);
199
- LOG_INF("%f\n", w->rms_att_weight[0]);
200
- if (!w->wcls.empty()) LOG_INF("%f\n", w->wcls[0]);
201
- }
202
- ////////////////////////////////////////////////////////////////////////////////////////////////////////////
203
-
204
- //////////////////////////////////////// ggml structs and functions required to load models, configs and save the model.
205
-
206
- struct my_llama_vocab {
207
- using id = int32_t;
208
- using token = std::string;
209
- using ttype = llama_token_type;
210
-
211
- struct token_data {
212
- token text;
213
- float score;
214
- ttype type;
215
- };
216
-
217
- std::unordered_map<token, id> token_to_id;
218
- std::vector<token_data> id_to_token;
219
- };
220
-
221
- struct my_llama_hparams {
222
- uint32_t n_vocab = 32000;
223
- uint32_t n_ctx = 512; // this is provided as user input?
224
- uint32_t n_embd = 4096;
225
- uint32_t n_ff = 11008;
226
- uint32_t n_mult = 4;
227
- uint32_t n_head = 32;
228
- uint32_t n_head_kv = 32;
229
- uint32_t n_layer = 32;
230
- uint32_t n_rot = 64;
231
-
232
- bool operator!=(const my_llama_hparams& other) const {
233
- return memcmp(this, &other, sizeof(my_llama_hparams));
234
- }
235
- };
236
-
237
- struct my_llama_layer {
238
- // normalization
239
- struct ggml_tensor * attention_norm;
240
-
241
- // attention
242
- struct ggml_tensor * wq;
243
- struct ggml_tensor * wk;
244
- struct ggml_tensor * wv;
245
- struct ggml_tensor * wo;
246
-
247
- // normalization
248
- struct ggml_tensor * ffn_norm;
249
-
250
- // ff
251
- struct ggml_tensor * w1;
252
- struct ggml_tensor * w2;
253
- struct ggml_tensor * w3;
254
- };
255
-
256
- struct my_llama_model {
257
- struct ggml_context * ctx = NULL;
258
-
259
- std::string name;
260
-
261
- my_llama_hparams hparams;
262
-
263
- struct ggml_tensor * tok_embeddings;
264
-
265
- struct ggml_tensor * norm;
266
- struct ggml_tensor * output;
267
-
268
- std::vector<my_llama_layer> layers;
269
-
270
- uint32_t train_its = 0;
271
- uint32_t train_samples = 0;
272
- uint32_t train_tokens = 0;
273
- };
274
-
275
- struct train_params {
276
- const char * fn_vocab_model;
277
- const char * fn_llama2c_model;
278
- const char * fn_llama2c_output_model;
279
- const char * fn_train_data;
280
- const char * fn_checkpoint_in;
281
- const char * fn_checkpoint_out;
282
- const char * fn_model_out;
283
-
284
- uint32_t seed;
285
-
286
- int n_ctx;
287
- int n_embd;
288
- int n_mult;
289
- int n_head;
290
- int n_layer;
291
- int n_rotmax;
292
-
293
- int n_threads;
294
- int n_batch;
295
- int n_examples;
296
- int n_predict;
297
-
298
- int print_info_interval;
299
- int print_details_interval;
300
-
301
- bool samples_start_after_nl;
302
- bool use_adam;
303
- bool use_flash;
304
- bool use_scratch;
305
-
306
- // only adam
307
- int warmup;
308
- int cos_decay_steps;
309
- float cos_decay_restart;
310
- float cos_decay_alpha;
311
-
312
- int lbfgs_n_iter;
313
- int adam_n_iter;
314
- float adam_alpha;
315
- float adam_decay;
316
-
317
- int mem_model_gb;
318
- int mem_compute_gb;
319
- int mem_compute0_gb;
320
- int mem_compute1_gb;
321
- };
322
-
323
- static void print_params(struct my_llama_hparams * params) {
324
- LOG_INF("%s: n_vocab: %u\n", __func__, params->n_vocab);
325
- LOG_INF("%s: n_ctx: %u\n", __func__, params->n_ctx);
326
- LOG_INF("%s: n_embd: %u\n", __func__, params->n_embd);
327
- LOG_INF("%s: n_mult: %u\n", __func__, params->n_mult);
328
- LOG_INF("%s: n_head: %u\n", __func__, params->n_head);
329
- LOG_INF("%s: n_head_kv: %u\n", __func__, params->n_head_kv);
330
- LOG_INF("%s: n_ff: %u\n", __func__, params->n_ff);
331
- LOG_INF("%s: n_layer: %u\n", __func__, params->n_layer);
332
- LOG_INF("%s: n_rot: %u\n", __func__, params->n_rot);
333
- }
334
-
335
- static void print_tensor_info(const struct ggml_context * ctx) {
336
- for (auto t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
337
- LOG_INF("%s: Allocating ", __func__);
338
- int64_t total = 1;
339
- int i = 0;
340
- for (; i < ggml_n_dims(t); ++i) {
341
- if (i > 0) LOG("x ");
342
- LOG("[%" PRId64 "] ", t->ne[i]);
343
- total *= t->ne[i];
344
- }
345
- if (i > 1) LOG("= [%" PRId64 "] ", total);
346
- LOG("float space for %s\n", ggml_get_name(t));
347
- }
348
- }
349
-
350
- static void init_model(struct my_llama_model * model) {
351
- const auto & hparams = model->hparams;
352
-
353
- const uint32_t n_embd = hparams.n_embd;
354
- const uint32_t n_layer = hparams.n_layer;
355
- const uint32_t n_vocab = hparams.n_vocab;
356
-
357
- const uint32_t n_multiqueries = hparams.n_head_kv <= 0 || hparams.n_head_kv >= hparams.n_head ? 1 : hparams.n_head / hparams.n_head_kv;
358
-
359
- const uint32_t n_ff = hparams.n_ff;
360
- struct ggml_context * ctx = model->ctx;
361
-
362
- model->train_its = 0;
363
- model->train_samples = 0;
364
- model->train_tokens = 0;
365
-
366
- model->tok_embeddings = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_embd, n_vocab);
367
- model->norm = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd);
368
- model->output = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_embd, n_vocab);
369
-
370
- ggml_set_name(model->tok_embeddings, "tok_embeddings.weight");
371
- ggml_set_name(model->norm, "norm.weight");
372
- ggml_set_name(model->output, "output.weight");
373
-
374
- model->layers.resize(n_layer);
375
- for (uint32_t i = 0; i < n_layer; ++i) {
376
- auto & layer = model->layers[i];
377
-
378
- std::string layers_i = "layers." + std::to_string(i);
379
-
380
- layer.attention_norm = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd);
381
-
382
- layer.wq = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_embd, n_embd);
383
- layer.wk = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_embd, n_embd / n_multiqueries);
384
- layer.wv = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_embd, n_embd / n_multiqueries);
385
- layer.wo = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_embd, n_embd);
386
-
387
- layer.ffn_norm = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd);
388
-
389
- layer.w1 = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_embd, n_ff);
390
- layer.w2 = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_ff, n_embd);
391
- layer.w3 = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_embd, n_ff);
392
-
393
- ggml_set_name(layer.attention_norm, (layers_i + ".attention_norm.weight").c_str());
394
-
395
- ggml_set_name(layer.wq, (layers_i + ".attention.wq.weight").c_str());
396
- ggml_set_name(layer.wk, (layers_i + ".attention.wk.weight").c_str());
397
- ggml_set_name(layer.wv, (layers_i + ".attention.wv.weight").c_str());
398
- ggml_set_name(layer.wo, (layers_i + ".attention.wo.weight").c_str());
399
-
400
- ggml_set_name(layer.ffn_norm, (layers_i + ".ffn_norm.weight").c_str());
401
-
402
- ggml_format_name(layer.w1, "%s.feed_forward.w1.weight", layers_i.c_str());
403
- ggml_format_name(layer.w2, "%s.feed_forward.w2.weight", layers_i.c_str());
404
- ggml_format_name(layer.w3, "%s.feed_forward.w3.weight", layers_i.c_str());
405
- }
406
-
407
- print_tensor_info(ctx);
408
- }
409
-
410
- static float get_f32_2d(struct ggml_tensor * tensor, int64_t i0, int64_t i1) {
411
- float * ptr = (float *) ((char *) tensor->data + i0*tensor->nb[0] + i1*tensor->nb[1]);
412
- return *ptr;
413
- }
414
-
415
- static int32_t get_i32_2d(struct ggml_tensor * tensor, int64_t i0, int64_t i1) {
416
- int32_t * ptr = (int32_t *) ((char *) tensor->data + i0*tensor->nb[0] + i1*tensor->nb[1]);
417
- return *ptr;
418
- }
419
-
420
- static void print_row(struct ggml_tensor * probs, int i) {
421
- for (int k = 0; k < probs->ne[0]; ++k) {
422
- float p = get_f32_2d(probs, k, i);
423
- LOG(" %f", p);
424
- }
425
- LOG("\n");
426
- }
427
-
428
- static void print_matrix(struct ggml_tensor * probs) {
429
- assert(ggml_is_matrix(probs));
430
- for (int i = 0; i < probs->ne[1]; ++i) {
431
- for (int k = 0; k < probs->ne[0]; ++k) {
432
- float p = get_f32_2d(probs, k, i);
433
- LOG(" %.2f", p);
434
- }
435
- LOG("\n");
436
- }
437
- }
438
-
439
- struct my_llama_file {
440
- // use FILE * so we don't have to re-open the file to mmap
441
- FILE * fp;
442
- size_t size;
443
-
444
- my_llama_file(const char * fname, const char * mode) {
445
- fp = std::fopen(fname, mode);
446
- if (fp == NULL) {
447
- size = 0;
448
- } else {
449
- seek(0, SEEK_END);
450
- size = tell();
451
- seek(0, SEEK_SET);
452
- }
453
- }
454
-
455
- size_t tell() const {
456
- #ifdef _WIN32
457
- __int64 ret = _ftelli64(fp);
458
- #else
459
- long ret = std::ftell(fp);
460
- #endif
461
- GGML_ASSERT(ret != -1); // this really shouldn't fail
462
- return (size_t) ret;
463
- }
464
-
465
- void seek(size_t offset, int whence) {
466
- #ifdef _WIN32
467
- int ret = _fseeki64(fp, (__int64) offset, whence);
468
- #else
469
- int ret = std::fseek(fp, (long) offset, whence);
470
- #endif
471
- GGML_ASSERT(ret == 0); // same
472
- }
473
-
474
- void read_raw(void * ptr, size_t size) {
475
- if (size == 0) {
476
- return;
477
- }
478
- errno = 0;
479
- std::size_t ret = std::fread(ptr, size, 1, fp);
480
- if (ferror(fp)) {
481
- die_fmt("fread failed: %s", strerror(errno));
482
- }
483
- if (ret != 1) {
484
- die("unexpectedly reached end of file");
485
- }
486
- }
487
-
488
- std::uint32_t read_u32() {
489
- std::uint32_t ret;
490
- read_raw(&ret, sizeof(ret));
491
- return ret;
492
- }
493
- std::float_t read_f32() {
494
- std::float_t ret;
495
- read_raw(&ret, sizeof(ret));
496
- return ret;
497
- }
498
-
499
- std::string read_string(std::uint32_t len) {
500
- std::vector<char> chars(len);
501
- read_raw(chars.data(), len);
502
- return std::string(chars.data(), len);
503
- }
504
-
505
- ~my_llama_file() {
506
- if (fp) {
507
- std::fclose(fp);
508
- }
509
- }
510
- };
511
-
512
- static bool is_ggml_file(const char * filename) {
513
- my_llama_file file(filename, "rb");
514
- if (file.size < 4) {
515
- return false;
516
- }
517
- std::string magic = file.read_string(4);
518
- return magic == GGUF_MAGIC;
519
- }
520
-
521
- static std::string llama_escape_whitespaces(const std::string & text) {
522
- std::ostringstream out;
523
- for (char c : text) {
524
- if (c == ' ') out << "\xe2\x96\x81";
525
- else out << c;
526
- }
527
- return out.str();
528
- }
529
-
530
- static void load_vocab(const char * filename, const Config * config, struct my_llama_vocab * vocab) {
531
- if (is_ggml_file(filename)) {
532
- LOG_INF("%s: Loading vocabulary from gguf file %s\n", __func__, filename);
533
- struct ggml_context * ctx_data = NULL;
534
-
535
- struct gguf_init_params params = {
536
- /*.no_alloc = */ false,
537
- /*.ctx = */ &ctx_data,
538
- };
539
-
540
- struct gguf_context * ctx = gguf_init_from_file(filename, params);
541
- GGML_ASSERT(ctx != NULL);
542
-
543
- const int model_idx = gguf_find_key(ctx, KV_TOKENIZER_MODEL);
544
- GGML_ASSERT(model_idx >= 0);
545
- std::string tokenizer_name = gguf_get_val_str(ctx, model_idx);
546
- GGML_ASSERT(tokenizer_name == TOKENIZER_NAME);
547
-
548
- const int token_idx = gguf_find_key(ctx, KV_TOKENIZER_LIST);
549
- GGML_ASSERT(token_idx >= 0);
550
-
551
- const int score_idx = gguf_find_key(ctx, KV_TOKENIZER_SCORES);
552
- GGML_ASSERT(score_idx >= 0);
553
- const float * scores = (const float * ) gguf_get_arr_data(ctx, score_idx);
554
-
555
- const int toktype_idx = gguf_find_key(ctx, KV_TOKENIZER_TOKEN_TYPE);
556
- GGML_ASSERT(toktype_idx >= 0);
557
- const int * toktypes = (const int * ) gguf_get_arr_data(ctx, toktype_idx);
558
-
559
- const uint32_t n_vocab = gguf_get_arr_n(ctx, token_idx);
560
- if (n_vocab != static_cast<uint32_t>(config->vocab_size)) {
561
- die_fmt("vocab size mismatch: (gguf) %u != (llama2c) %d", n_vocab, config->vocab_size);
562
- }
563
-
564
- vocab->id_to_token.resize(n_vocab);
565
-
566
- for (uint32_t i = 0; i < n_vocab; i++) {
567
- std::string word = gguf_get_arr_str(ctx, token_idx, i);
568
-
569
- vocab->token_to_id[word] = i;
570
-
571
- auto & token_data = vocab->id_to_token[i];
572
- token_data.text = std::move(word);
573
- token_data.score = scores[i];
574
- token_data.type = (llama_token_type) toktypes[i];
575
- }
576
- ggml_free(ctx_data);
577
- gguf_free(ctx);
578
- } else {
579
- // assume llama2.c vocabulary
580
- LOG_INF("%s: Assuming llama2.c vocabulary since %s is not a gguf file\n", __func__, filename);
581
- my_llama_file file(filename, "rb");
582
- if (!file.fp) {
583
- die_fmt("%s: %s", strerror(errno), filename);
584
- }
585
- const int n_vocab = config->vocab_size;
586
- /* uint32_t max_token_length = */ file.read_u32(); // unused
587
- vocab->id_to_token.resize(n_vocab);
588
- for (my_llama_vocab::id id=0; id<n_vocab; ++id) {
589
- float_t score = file.read_f32();
590
- uint32_t len = file.read_u32();
591
- std::string text = file.read_string(len);
592
-
593
- unsigned char byte_val;
594
- my_llama_vocab::ttype type = LLAMA_TOKEN_TYPE_NORMAL;
595
- if (id == UNKNOWN_TOKEN_ID) {
596
- text = "<unk>";
597
- type = LLAMA_TOKEN_TYPE_UNKNOWN;
598
- } else if (id == BOS_TOKEN_ID) {
599
- text = "<s>";
600
- type = LLAMA_TOKEN_TYPE_CONTROL;
601
- } else if (id == EOS_TOKEN_ID) {
602
- text = "</s>";
603
- type = LLAMA_TOKEN_TYPE_CONTROL;
604
- } else if (text.empty()) {
605
- type = LLAMA_TOKEN_TYPE_CONTROL;
606
- } else if (sscanf(text.c_str(), "<0x%02hhX>", &byte_val) == 1) {
607
- // Text of byte tokens is already in the expected format.
608
- type = LLAMA_TOKEN_TYPE_BYTE;
609
- } else {
610
- type = LLAMA_TOKEN_TYPE_NORMAL;
611
- }
612
- text = llama_escape_whitespaces(text);
613
-
614
- vocab->id_to_token[id].text = text;
615
- vocab->id_to_token[id].score = score;
616
- vocab->id_to_token[id].type = type;
617
- vocab->token_to_id.emplace(text, id);
618
- }
619
- }
620
- }
621
-
622
- static void convert_weights_ak_to_gg(struct ggml_tensor * gg_weights, const float * karpathy_weights) {
623
- int size = 1;
624
- for (int dim = 0; dim < ggml_n_dims(gg_weights); ++dim) {
625
- size *= gg_weights->ne[dim];
626
- }
627
- for (int ct = 0; ct < size; ++ct) {
628
- int64_t i0 = 0; int64_t i1 = 0;
629
- int64_t i2 = 0; int64_t i3 = 0;
630
- ggml_unravel_index(gg_weights, ct, &i0, &i1, &i2, &i3);
631
- ggml_set_f32_nd(gg_weights, i0, i1, i2, i3, karpathy_weights[ct]);
632
- }
633
- }
634
-
635
- static void save_as_llama_model(
636
- struct my_llama_vocab * vocab, struct my_llama_model * model, TransformerWeights* w, const char * filename
637
- ) {
638
- // convert AK weights into GG weights one by one.
639
- // w->token_embedding_table -> model->tok_embeddings
640
- // float* -> struct ggml_tensor
641
- convert_weights_ak_to_gg(model->tok_embeddings, w->token_embedding_table.data());
642
- convert_weights_ak_to_gg(model->output, !w->wcls.empty() ? w->wcls.data() : w->token_embedding_table.data());
643
-
644
- convert_weights_ak_to_gg(model->norm, w->rms_final_weight.data());
645
- //print_row(model->norm, 0);
646
-
647
- // for rms-att-weight
648
- int row_length = model->hparams.n_embd;
649
- int n_ff = model->hparams.n_ff;
650
-
651
- const uint32_t n_multiqueries = model->hparams.n_head_kv <= 0 || model->hparams.n_head_kv >= model->hparams.n_head ? 1 : model->hparams.n_head / model->hparams.n_head_kv;
652
-
653
- for (uint32_t i = 0; i < model->hparams.n_layer; ++i){
654
- auto & layer = model->layers[i];
655
- // 1d
656
- convert_weights_ak_to_gg(layer.attention_norm, &w->rms_att_weight[i*row_length]);
657
- convert_weights_ak_to_gg(layer.ffn_norm , &w->rms_ffn_weight[i*row_length]);
658
-
659
- // from 3d matrix layer x dim x dim to 2d matrix dim x dim
660
- convert_weights_ak_to_gg(layer.wq , &w->wq[i*row_length*row_length]);
661
- convert_weights_ak_to_gg(layer.wo , &w->wo[i*row_length*row_length]);
662
- // from 3d matrix layer x dim x dim to 2d matrix dim x dim / n_multiqueries
663
- convert_weights_ak_to_gg(layer.wk , &w->wk[i*row_length*row_length/n_multiqueries]);
664
- convert_weights_ak_to_gg(layer.wv , &w->wv[i*row_length*row_length/n_multiqueries]);
665
-
666
- convert_weights_ak_to_gg(layer.w1 , &w->w1[i*row_length*n_ff]);
667
- convert_weights_ak_to_gg(layer.w2 , &w->w2[i*n_ff*row_length]);
668
- convert_weights_ak_to_gg(layer.w3 , &w->w3[i*row_length*n_ff]);
669
- }
670
-
671
- struct gguf_context * ctx = gguf_init_empty();
672
-
673
- std::vector<const char*> tokens;
674
- std::vector<float> scores;
675
- std::vector<llama_token_type> token_types;
676
- for (const my_llama_vocab::token_data & token_data : vocab->id_to_token) {
677
- tokens.push_back(token_data.text.c_str());
678
- scores.push_back(token_data.score);
679
- token_types.push_back(token_data.type);
680
- }
681
- gguf_set_arr_str(ctx, KV_TOKENIZER_LIST, tokens.data(), tokens.size());
682
- gguf_set_arr_data(ctx, KV_TOKENIZER_SCORES, GGUF_TYPE_FLOAT32, scores.data(), scores.size());
683
- gguf_set_arr_data(ctx, KV_TOKENIZER_TOKEN_TYPE, GGUF_TYPE_INT32, token_types.data(), token_types.size());
684
-
685
- gguf_set_val_str(ctx, KV_TOKENIZER_MODEL, TOKENIZER_NAME);
686
-
687
- gguf_set_val_str(ctx, KV_GENERAL_ARCHITECTURE, "llama");
688
- gguf_set_val_str(ctx, KV_GENERAL_NAME, "llama");
689
-
690
- // special tokens
691
- gguf_set_val_u32(ctx, KV_TOKENIZER_UNK_ID, UNKNOWN_TOKEN_ID);
692
- gguf_set_val_u32(ctx, KV_TOKENIZER_BOS_ID, BOS_TOKEN_ID);
693
- gguf_set_val_u32(ctx, KV_TOKENIZER_EOS_ID, EOS_TOKEN_ID);
694
- gguf_set_val_u32(ctx, KV_TOKENIZER_SEP_ID, LLAMA_TOKEN_NULL);
695
- gguf_set_val_u32(ctx, KV_TOKENIZER_PAD_ID, LLAMA_TOKEN_NULL);
696
-
697
- gguf_set_val_u32(ctx, KV_CONTEXT_LENGTH, model->hparams.n_ctx);
698
- gguf_set_val_u32(ctx, KV_EMBEDDING_LENGTH, model->hparams.n_embd);
699
- gguf_set_val_u32(ctx, KV_FEED_FORWARD_LENGTH, model->hparams.n_ff);
700
- gguf_set_val_u32(ctx, KV_ATTENTION_HEAD_COUNT, model->hparams.n_head);
701
- gguf_set_val_u32(ctx, KV_ATTENTION_HEAD_COUNT, model->hparams.n_head);
702
- gguf_set_val_u32(ctx, KV_ATTENTION_HEAD_COUNT_KV, model->hparams.n_head_kv);
703
- gguf_set_val_u32(ctx, KV_BLOCK_COUNT, model->hparams.n_layer);
704
- gguf_set_val_u32(ctx, KV_ROPE_DIMENSION_COUNT, model->hparams.n_rot);
705
- gguf_set_val_f32(ctx, KV_ATTENTION_LAYERNORM_RMS_EPS, 1e-5f);
706
-
707
- // write tensors
708
- ggml_set_name(model->tok_embeddings, TN_TOKEN_EMBD);
709
- gguf_add_tensor(ctx, model->tok_embeddings);
710
-
711
- ggml_set_name(model->norm, TN_OUTPUT_NORM);
712
- gguf_add_tensor(ctx, model->norm);
713
-
714
- ggml_set_name(model->output, TN_OUTPUT);
715
- gguf_add_tensor(ctx, model->output);
716
-
717
- for (uint32_t i = 0; i < model->hparams.n_layer; ++i) {
718
- auto & layer = model->layers[i];
719
-
720
- ggml_format_name(layer.wq, TN_ATTN_Q, i);
721
- gguf_add_tensor(ctx, layer.wq);
722
-
723
- ggml_format_name(layer.wk, TN_ATTN_K, i);
724
- gguf_add_tensor(ctx, layer.wk);
725
-
726
- ggml_format_name(layer.wv, TN_ATTN_V, i);
727
- gguf_add_tensor(ctx, layer.wv);
728
-
729
- ggml_format_name(layer.wo, TN_ATTN_OUTPUT, i);
730
- gguf_add_tensor(ctx, layer.wo);
731
-
732
- ggml_format_name(layer.attention_norm, TN_ATTN_NORM, i);
733
- gguf_add_tensor(ctx, layer.attention_norm);
734
-
735
- ggml_format_name(layer.w1, TN_FFN_GATE, i);
736
- gguf_add_tensor(ctx, layer.w1);
737
-
738
- ggml_format_name(layer.w2, TN_FFN_DOWN, i);
739
- gguf_add_tensor(ctx, layer.w2);
740
-
741
- ggml_format_name(layer.w3, TN_FFN_UP, i);
742
- gguf_add_tensor(ctx, layer.w3);
743
-
744
- ggml_format_name(layer.ffn_norm, TN_FFN_NORM, i);
745
- gguf_add_tensor(ctx, layer.ffn_norm);
746
- }
747
-
748
- gguf_write_to_file(ctx, filename, false);
749
- gguf_free(ctx);
750
- }
751
-
752
- static struct train_params get_default_train_params() {
753
- struct train_params params;
754
- params.fn_vocab_model = "models/7B/ggml-model-f16.gguf";
755
- params.fn_llama2c_output_model = "ak_llama_model.bin";
756
- params.fn_train_data = "shakespeare.txt";
757
- params.fn_checkpoint_in = "checkpoint.bin";
758
- params.fn_checkpoint_out = "checkpoint.bin";
759
- params.fn_model_out = "ggml-checkpoint-f32.bin";
760
-
761
- params.seed = -1;
762
-
763
- params.n_ctx = 128;
764
- params.n_embd = 256;
765
- params.n_mult = 256;
766
- params.n_head = 8;
767
- params.n_layer = 16;
768
- params.n_rotmax = 64;
769
-
770
- params.n_threads = 6;
771
- params.n_batch = 8;
772
- params.n_examples = 8;
773
- params.n_predict = 1024;
774
-
775
- params.print_info_interval = 1;
776
- params.print_details_interval = 2;
777
-
778
- params.samples_start_after_nl = false;
779
- params.use_adam = true;
780
- params.use_flash = false;
781
- params.use_scratch = true;
782
-
783
- // only adam
784
- params.warmup = 100;
785
- params.cos_decay_steps = 1000;
786
- params.cos_decay_restart = 1.1f;
787
- params.cos_decay_alpha = 0.0f;
788
-
789
- params.lbfgs_n_iter = 16;
790
- params.adam_n_iter = 16;
791
- params.adam_alpha = 1e-3f;
792
- params.adam_decay = 1e-3f;
793
-
794
- params.mem_model_gb = 2;
795
- params.mem_compute_gb = 24;
796
- params.mem_compute0_gb = 8;
797
- params.mem_compute1_gb = 2;
798
-
799
- return params;
800
- }
801
-
802
- static void print_usage(int /*argc*/, char ** argv, const struct train_params * params) {
803
- fprintf(stderr, "usage: %s [options]\n", argv[0]);
804
- fprintf(stderr, "\n");
805
- fprintf(stderr, "options:\n");
806
- fprintf(stderr, " -h, --help show this help message and exit\n");
807
- fprintf(stderr, " --copy-vocab-from-model FNAME path of gguf llama model or llama2.c vocabulary from which to copy vocab (default '%s')\n", params->fn_vocab_model);
808
- fprintf(stderr, " --llama2c-model FNAME [REQUIRED] model path from which to load Karpathy's llama2.c model\n");
809
- fprintf(stderr, " --llama2c-output-model FNAME model path to save the converted llama2.c model (default %s')\n", params->fn_llama2c_output_model);
810
- fprintf(stderr, "\n");
811
- }
812
-
813
- static bool params_parse(int argc, char ** argv, struct train_params * params) {
814
- bool invalid_param = false;
815
- bool reqd_param_found = false;
816
- std::string arg;
817
- struct train_params default_params = get_default_train_params();
818
- const std::string arg_prefix = "--";
819
-
820
- for (int i = 1; i < argc; i++) {
821
- arg = argv[i];
822
- if (arg.compare(0, arg_prefix.size(), arg_prefix) == 0) {
823
- std::replace(arg.begin(), arg.end(), '_', '-');
824
- }
825
-
826
- if (arg == "--copy-vocab-from-model") {
827
- if (++i >= argc) {
828
- invalid_param = true;
829
- break;
830
- }
831
- params->fn_vocab_model = argv[i];
832
- } else if (arg == "--llama2c-model") {
833
- if (++i >= argc) {
834
- invalid_param = true;
835
- break;
836
- }
837
- reqd_param_found = true;
838
- params->fn_llama2c_model = argv[i];
839
- } else if (arg == "--llama2c-output-model") {
840
- if (++i >= argc) {
841
- invalid_param = true;
842
- break;
843
- }
844
- params->fn_llama2c_output_model = argv[i];
845
- } else if (arg == "-h" || arg == "--help") {
846
- print_usage(argc, argv, &default_params);
847
- exit(0);
848
- } else {
849
- fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
850
- print_usage(argc, argv, &default_params);
851
- exit(1);
852
- }
853
- }
854
- if (invalid_param) {
855
- fprintf(stderr, "error: invalid parameter for argument: %s\n", arg.c_str());
856
- print_usage(argc, argv, &default_params);
857
- exit(1);
858
- }
859
- if (!reqd_param_found){
860
- fprintf(stderr, "error: please specify a llama2.c .bin file to be converted with argument --llama2c-model\n");
861
- print_usage(argc, argv, &default_params);
862
- exit(1);
863
- }
864
-
865
- return true;
866
- }
867
-
868
- static std::string basename(const std::string &path) {
869
- size_t pos = path.find_last_of("/\\");
870
- if (pos == std::string::npos) {
871
- return path;
872
- }
873
- return path.substr(pos + 1);
874
- }
875
-
876
- int main(int argc, char ** argv) {
877
- common_init();
878
-
879
- struct train_params params = get_default_train_params();
880
- if (!params_parse(argc, argv, &params)) {
881
- return 1;
882
- }
883
-
884
- Config config;
885
- TransformerWeights weights = {};
886
- {
887
- LOG_INF("%s: Loading llama2c model from %s\n", __func__, params.fn_llama2c_model);
888
- FILE * file = fopen(params.fn_llama2c_model, "rb");
889
- if (!file) {
890
- LOG_ERR("%s: Unable to open the checkpoint file %s!\n", __func__, params.fn_llama2c_model);
891
- return 1;
892
- }
893
- // read in the config header
894
- if (fread(&config, sizeof(Config), 1, file) != 1) {
895
- LOG_ERR("%s: Unable to read llama2c config from %s!\n",__func__,params.fn_llama2c_model);
896
- return 1;
897
- }
898
- auto shared_weights = config.vocab_size > 0;
899
- config.vocab_size = abs(config.vocab_size);
900
-
901
- // read in the Transformer weights
902
- alloc_weights(&weights, &config, shared_weights);
903
- if (checkpoint_init_weights(&weights, &config, file, shared_weights)) {
904
- LOG_ERR("%s: Unable to initialize transformer weights from %s!",__func__,params.fn_llama2c_model);
905
- return 1;
906
- }
907
- fclose(file);
908
- }
909
-
910
- struct my_llama_vocab vocab;
911
- load_vocab(params.fn_vocab_model, &config, &vocab);
912
-
913
- struct my_llama_model model;
914
- model.hparams.n_vocab = config.vocab_size; //llama_vocab_n_vocab(lctx);
915
- model.hparams.n_ctx = params.n_ctx;
916
- model.hparams.n_embd = config.dim; //params.n_embd;
917
- model.hparams.n_ff = config.hidden_dim;
918
- model.hparams.n_mult = 32;//params.n_mult;
919
- model.hparams.n_head = config.n_heads; //params.n_head;
920
- model.hparams.n_head_kv = config.n_kv_heads;
921
- model.hparams.n_layer = config.n_layers; //params.n_layer;
922
- model.hparams.n_rot = std::min((uint32_t)params.n_rotmax, model.hparams.n_embd / model.hparams.n_head);
923
-
924
- print_params(&model.hparams);
925
-
926
- struct ggml_init_params lcparams;
927
- lcparams.mem_size = 1024ll*1024ll*1024ll*((size_t) params.mem_model_gb);
928
- lcparams.mem_buffer = NULL;
929
- lcparams.no_alloc = false;
930
-
931
- model.ctx = ggml_init(lcparams);
932
-
933
- init_model(&model);
934
- model.name = basename(params.fn_llama2c_model);
935
- save_as_llama_model(&vocab, &model, &weights, params.fn_llama2c_output_model);
936
-
937
- LOG_INF("%s: Saving llama.c model file %s in ggml format at %s\n", __func__, params.fn_llama2c_model, params.fn_llama2c_output_model);
938
-
939
- ggml_free(model.ctx);
940
- return 0;
941
- }