@fugood/llama.node 0.6.3 → 1.0.0-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (377) hide show
  1. package/CMakeLists.txt +40 -30
  2. package/README.md +4 -1
  3. package/lib/binding.js +41 -29
  4. package/lib/binding.ts +26 -25
  5. package/package.json +45 -7
  6. package/scripts/build.js +47 -0
  7. package/scripts/llama.cpp.patch +109 -0
  8. package/src/anyascii.c +22223 -0
  9. package/src/anyascii.h +42 -0
  10. package/src/tts_utils.cpp +20 -7
  11. package/src/tts_utils.h +2 -0
  12. package/bin/darwin/arm64/llama-node.node +0 -0
  13. package/bin/darwin/x64/llama-node.node +0 -0
  14. package/bin/linux/arm64/llama-node.node +0 -0
  15. package/bin/linux/x64/llama-node.node +0 -0
  16. package/bin/linux-cuda/arm64/llama-node.node +0 -0
  17. package/bin/linux-cuda/x64/llama-node.node +0 -0
  18. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  19. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  20. package/bin/win32/x64/llama-node.node +0 -0
  21. package/bin/win32/x64/node.lib +0 -0
  22. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  23. package/bin/win32-vulkan/arm64/node.lib +0 -0
  24. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  25. package/bin/win32-vulkan/x64/node.lib +0 -0
  26. package/src/llama.cpp/.github/workflows/build-linux-cross.yml +0 -233
  27. package/src/llama.cpp/.github/workflows/build.yml +0 -1078
  28. package/src/llama.cpp/.github/workflows/close-issue.yml +0 -28
  29. package/src/llama.cpp/.github/workflows/docker.yml +0 -178
  30. package/src/llama.cpp/.github/workflows/editorconfig.yml +0 -29
  31. package/src/llama.cpp/.github/workflows/gguf-publish.yml +0 -44
  32. package/src/llama.cpp/.github/workflows/labeler.yml +0 -17
  33. package/src/llama.cpp/.github/workflows/python-check-requirements.yml +0 -33
  34. package/src/llama.cpp/.github/workflows/python-lint.yml +0 -30
  35. package/src/llama.cpp/.github/workflows/python-type-check.yml +0 -40
  36. package/src/llama.cpp/.github/workflows/release.yml +0 -739
  37. package/src/llama.cpp/.github/workflows/server.yml +0 -237
  38. package/src/llama.cpp/.github/workflows/winget.yml +0 -42
  39. package/src/llama.cpp/cmake/arm64-apple-clang.cmake +0 -16
  40. package/src/llama.cpp/cmake/arm64-windows-llvm.cmake +0 -16
  41. package/src/llama.cpp/cmake/build-info.cmake +0 -64
  42. package/src/llama.cpp/cmake/common.cmake +0 -35
  43. package/src/llama.cpp/cmake/git-vars.cmake +0 -22
  44. package/src/llama.cpp/cmake/x64-windows-llvm.cmake +0 -5
  45. package/src/llama.cpp/common/build-info.cpp.in +0 -4
  46. package/src/llama.cpp/docs/build.md +0 -561
  47. package/src/llama.cpp/examples/CMakeLists.txt +0 -43
  48. package/src/llama.cpp/examples/batched/CMakeLists.txt +0 -5
  49. package/src/llama.cpp/examples/batched/batched.cpp +0 -246
  50. package/src/llama.cpp/examples/chat-13B.bat +0 -57
  51. package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +0 -5
  52. package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +0 -941
  53. package/src/llama.cpp/examples/deprecation-warning/deprecation-warning.cpp +0 -35
  54. package/src/llama.cpp/examples/embedding/CMakeLists.txt +0 -5
  55. package/src/llama.cpp/examples/embedding/embedding.cpp +0 -323
  56. package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +0 -10
  57. package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +0 -194
  58. package/src/llama.cpp/examples/gen-docs/CMakeLists.txt +0 -5
  59. package/src/llama.cpp/examples/gen-docs/gen-docs.cpp +0 -83
  60. package/src/llama.cpp/examples/gguf/CMakeLists.txt +0 -5
  61. package/src/llama.cpp/examples/gguf/gguf.cpp +0 -265
  62. package/src/llama.cpp/examples/gguf-hash/CMakeLists.txt +0 -22
  63. package/src/llama.cpp/examples/gguf-hash/deps/rotate-bits/rotate-bits.h +0 -46
  64. package/src/llama.cpp/examples/gguf-hash/deps/sha1/sha1.c +0 -295
  65. package/src/llama.cpp/examples/gguf-hash/deps/sha1/sha1.h +0 -52
  66. package/src/llama.cpp/examples/gguf-hash/deps/sha256/sha256.c +0 -221
  67. package/src/llama.cpp/examples/gguf-hash/deps/sha256/sha256.h +0 -24
  68. package/src/llama.cpp/examples/gguf-hash/deps/xxhash/xxhash.c +0 -42
  69. package/src/llama.cpp/examples/gguf-hash/deps/xxhash/xxhash.h +0 -7093
  70. package/src/llama.cpp/examples/gguf-hash/gguf-hash.cpp +0 -694
  71. package/src/llama.cpp/examples/gritlm/CMakeLists.txt +0 -5
  72. package/src/llama.cpp/examples/gritlm/gritlm.cpp +0 -229
  73. package/src/llama.cpp/examples/jeopardy/questions.txt +0 -100
  74. package/src/llama.cpp/examples/llama.android/app/build.gradle.kts +0 -65
  75. package/src/llama.cpp/examples/llama.android/build.gradle.kts +0 -6
  76. package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +0 -71
  77. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/CMakeLists.txt +0 -53
  78. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +0 -452
  79. package/src/llama.cpp/examples/llama.android/settings.gradle.kts +0 -18
  80. package/src/llama.cpp/examples/lookahead/CMakeLists.txt +0 -5
  81. package/src/llama.cpp/examples/lookahead/lookahead.cpp +0 -472
  82. package/src/llama.cpp/examples/lookup/CMakeLists.txt +0 -23
  83. package/src/llama.cpp/examples/lookup/lookup-create.cpp +0 -40
  84. package/src/llama.cpp/examples/lookup/lookup-merge.cpp +0 -47
  85. package/src/llama.cpp/examples/lookup/lookup-stats.cpp +0 -157
  86. package/src/llama.cpp/examples/lookup/lookup.cpp +0 -242
  87. package/src/llama.cpp/examples/parallel/CMakeLists.txt +0 -5
  88. package/src/llama.cpp/examples/parallel/parallel.cpp +0 -492
  89. package/src/llama.cpp/examples/passkey/CMakeLists.txt +0 -5
  90. package/src/llama.cpp/examples/passkey/passkey.cpp +0 -277
  91. package/src/llama.cpp/examples/retrieval/CMakeLists.txt +0 -5
  92. package/src/llama.cpp/examples/retrieval/retrieval.cpp +0 -304
  93. package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +0 -5
  94. package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +0 -246
  95. package/src/llama.cpp/examples/simple/CMakeLists.txt +0 -5
  96. package/src/llama.cpp/examples/simple/simple.cpp +0 -206
  97. package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +0 -5
  98. package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +0 -206
  99. package/src/llama.cpp/examples/simple-cmake-pkg/CMakeLists.txt +0 -11
  100. package/src/llama.cpp/examples/speculative/CMakeLists.txt +0 -5
  101. package/src/llama.cpp/examples/speculative/speculative.cpp +0 -644
  102. package/src/llama.cpp/examples/speculative-simple/CMakeLists.txt +0 -5
  103. package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +0 -261
  104. package/src/llama.cpp/examples/sycl/CMakeLists.txt +0 -9
  105. package/src/llama.cpp/examples/sycl/build.sh +0 -23
  106. package/src/llama.cpp/examples/sycl/ls-sycl-device.cpp +0 -13
  107. package/src/llama.cpp/examples/sycl/run-llama2.sh +0 -27
  108. package/src/llama.cpp/examples/sycl/run-llama3.sh +0 -28
  109. package/src/llama.cpp/examples/sycl/win-build-sycl.bat +0 -33
  110. package/src/llama.cpp/examples/sycl/win-run-llama2.bat +0 -9
  111. package/src/llama.cpp/examples/sycl/win-run-llama3.bat +0 -9
  112. package/src/llama.cpp/examples/training/CMakeLists.txt +0 -5
  113. package/src/llama.cpp/examples/training/finetune.cpp +0 -96
  114. package/src/llama.cpp/ggml/cmake/GitVars.cmake +0 -22
  115. package/src/llama.cpp/ggml/cmake/common.cmake +0 -26
  116. package/src/llama.cpp/ggml/src/ggml-alloc.c +0 -1042
  117. package/src/llama.cpp/ggml/src/ggml-backend-impl.h +0 -255
  118. package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +0 -586
  119. package/src/llama.cpp/ggml/src/ggml-backend.cpp +0 -2008
  120. package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +0 -87
  121. package/src/llama.cpp/ggml/src/ggml-blas/ggml-blas.cpp +0 -517
  122. package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +0 -74
  123. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +0 -179
  124. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +0 -258
  125. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +0 -2863
  126. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +0 -1110
  127. package/src/llama.cpp/ggml/src/ggml-cann/common.h +0 -420
  128. package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +0 -2570
  129. package/src/llama.cpp/ggml/src/ggml-common.h +0 -1857
  130. package/src/llama.cpp/ggml/src/ggml-cpu/cmake/FindSIMD.cmake +0 -100
  131. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +0 -184
  132. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h +0 -15
  133. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +0 -243
  134. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +0 -140
  135. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +0 -131
  136. package/src/llama.cpp/ggml/src/ggml-impl.h +0 -601
  137. package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +0 -166
  138. package/src/llama.cpp/ggml/src/ggml-kompute/ggml-kompute.cpp +0 -2251
  139. package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +0 -120
  140. package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +0 -622
  141. package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +0 -113
  142. package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +0 -96
  143. package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +0 -5124
  144. package/src/llama.cpp/ggml/src/ggml-opt.cpp +0 -1037
  145. package/src/llama.cpp/ggml/src/ggml-quants.c +0 -5232
  146. package/src/llama.cpp/ggml/src/ggml-quants.h +0 -100
  147. package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +0 -9
  148. package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +0 -1813
  149. package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +0 -189
  150. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +0 -37
  151. package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +0 -239
  152. package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.hpp +0 -39
  153. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +0 -83
  154. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +0 -493
  155. package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +0 -197
  156. package/src/llama.cpp/ggml/src/ggml-sycl/concat.hpp +0 -20
  157. package/src/llama.cpp/ggml/src/ggml-sycl/conv.cpp +0 -100
  158. package/src/llama.cpp/ggml/src/ggml-sycl/conv.hpp +0 -20
  159. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +0 -623
  160. package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +0 -34
  161. package/src/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +0 -701
  162. package/src/llama.cpp/ggml/src/ggml-sycl/cpy.hpp +0 -11
  163. package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +0 -791
  164. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +0 -1160
  165. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.hpp +0 -27
  166. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +0 -2957
  167. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +0 -1536
  168. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +0 -75
  169. package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +0 -99
  170. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +0 -311
  171. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +0 -20
  172. package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +0 -4443
  173. package/src/llama.cpp/ggml/src/ggml-sycl/gla.cpp +0 -105
  174. package/src/llama.cpp/ggml/src/ggml-sycl/gla.hpp +0 -8
  175. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +0 -136
  176. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +0 -21
  177. package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +0 -3030
  178. package/src/llama.cpp/ggml/src/ggml-sycl/mmq.hpp +0 -33
  179. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +0 -1108
  180. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.hpp +0 -27
  181. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +0 -474
  182. package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +0 -26
  183. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +0 -46
  184. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +0 -10
  185. package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +0 -74
  186. package/src/llama.cpp/ggml/src/ggml-sycl/quants.hpp +0 -83
  187. package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +0 -362
  188. package/src/llama.cpp/ggml/src/ggml-sycl/rope.hpp +0 -20
  189. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +0 -264
  190. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.hpp +0 -20
  191. package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.cpp +0 -13
  192. package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.hpp +0 -23
  193. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +0 -73
  194. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.hpp +0 -20
  195. package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +0 -1215
  196. package/src/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +0 -305
  197. package/src/llama.cpp/ggml/src/ggml-sycl/wkv.hpp +0 -10
  198. package/src/llama.cpp/ggml/src/ggml-threading.cpp +0 -12
  199. package/src/llama.cpp/ggml/src/ggml-threading.h +0 -14
  200. package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +0 -196
  201. package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +0 -10699
  202. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +0 -39
  203. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +0 -751
  204. package/src/llama.cpp/ggml/src/ggml.c +0 -6550
  205. package/src/llama.cpp/ggml/src/gguf.cpp +0 -1330
  206. package/src/llama.cpp/models/.editorconfig +0 -1
  207. package/src/llama.cpp/models/ggml-vocab-aquila.gguf +0 -0
  208. package/src/llama.cpp/models/ggml-vocab-baichuan.gguf +0 -0
  209. package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf +0 -0
  210. package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf.inp +0 -112
  211. package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf.out +0 -46
  212. package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +0 -112
  213. package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.out +0 -46
  214. package/src/llama.cpp/models/ggml-vocab-command-r.gguf +0 -0
  215. package/src/llama.cpp/models/ggml-vocab-command-r.gguf.inp +0 -112
  216. package/src/llama.cpp/models/ggml-vocab-command-r.gguf.out +0 -46
  217. package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf +0 -0
  218. package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.inp +0 -112
  219. package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.out +0 -46
  220. package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf +0 -0
  221. package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.inp +0 -112
  222. package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.out +0 -46
  223. package/src/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.inp +0 -112
  224. package/src/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.out +0 -46
  225. package/src/llama.cpp/models/ggml-vocab-falcon.gguf +0 -0
  226. package/src/llama.cpp/models/ggml-vocab-falcon.gguf.inp +0 -112
  227. package/src/llama.cpp/models/ggml-vocab-falcon.gguf.out +0 -46
  228. package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf +0 -0
  229. package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf.inp +0 -112
  230. package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf.out +0 -46
  231. package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.inp +0 -112
  232. package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.out +0 -46
  233. package/src/llama.cpp/models/ggml-vocab-gpt-neox.gguf +0 -0
  234. package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf +0 -0
  235. package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf.inp +0 -112
  236. package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf.out +0 -46
  237. package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf +0 -0
  238. package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf.inp +0 -112
  239. package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf.out +0 -46
  240. package/src/llama.cpp/models/ggml-vocab-llama4.gguf.inp +0 -112
  241. package/src/llama.cpp/models/ggml-vocab-llama4.gguf.out +0 -46
  242. package/src/llama.cpp/models/ggml-vocab-mpt.gguf +0 -0
  243. package/src/llama.cpp/models/ggml-vocab-mpt.gguf.inp +0 -112
  244. package/src/llama.cpp/models/ggml-vocab-mpt.gguf.out +0 -46
  245. package/src/llama.cpp/models/ggml-vocab-phi-3.gguf +0 -0
  246. package/src/llama.cpp/models/ggml-vocab-phi-3.gguf.inp +0 -112
  247. package/src/llama.cpp/models/ggml-vocab-phi-3.gguf.out +0 -46
  248. package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +0 -112
  249. package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.out +0 -46
  250. package/src/llama.cpp/models/ggml-vocab-qwen2.gguf +0 -0
  251. package/src/llama.cpp/models/ggml-vocab-qwen2.gguf.inp +0 -112
  252. package/src/llama.cpp/models/ggml-vocab-qwen2.gguf.out +0 -46
  253. package/src/llama.cpp/models/ggml-vocab-refact.gguf +0 -0
  254. package/src/llama.cpp/models/ggml-vocab-refact.gguf.inp +0 -112
  255. package/src/llama.cpp/models/ggml-vocab-refact.gguf.out +0 -46
  256. package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +0 -112
  257. package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +0 -46
  258. package/src/llama.cpp/models/ggml-vocab-starcoder.gguf +0 -0
  259. package/src/llama.cpp/models/ggml-vocab-starcoder.gguf.inp +0 -112
  260. package/src/llama.cpp/models/ggml-vocab-starcoder.gguf.out +0 -46
  261. package/src/llama.cpp/pocs/CMakeLists.txt +0 -14
  262. package/src/llama.cpp/pocs/vdot/CMakeLists.txt +0 -9
  263. package/src/llama.cpp/pocs/vdot/q8dot.cpp +0 -173
  264. package/src/llama.cpp/pocs/vdot/vdot.cpp +0 -311
  265. package/src/llama.cpp/prompts/LLM-questions.txt +0 -49
  266. package/src/llama.cpp/prompts/alpaca.txt +0 -1
  267. package/src/llama.cpp/prompts/assistant.txt +0 -31
  268. package/src/llama.cpp/prompts/chat-with-baichuan.txt +0 -4
  269. package/src/llama.cpp/prompts/chat-with-bob.txt +0 -7
  270. package/src/llama.cpp/prompts/chat-with-qwen.txt +0 -1
  271. package/src/llama.cpp/prompts/chat-with-vicuna-v0.txt +0 -7
  272. package/src/llama.cpp/prompts/chat-with-vicuna-v1.txt +0 -7
  273. package/src/llama.cpp/prompts/chat.txt +0 -28
  274. package/src/llama.cpp/prompts/dan-modified.txt +0 -1
  275. package/src/llama.cpp/prompts/dan.txt +0 -1
  276. package/src/llama.cpp/prompts/mnemonics.txt +0 -93
  277. package/src/llama.cpp/prompts/parallel-questions.txt +0 -43
  278. package/src/llama.cpp/prompts/reason-act.txt +0 -18
  279. package/src/llama.cpp/requirements/requirements-all.txt +0 -15
  280. package/src/llama.cpp/requirements/requirements-compare-llama-bench.txt +0 -2
  281. package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +0 -7
  282. package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +0 -7
  283. package/src/llama.cpp/requirements/requirements-convert_legacy_llama.txt +0 -5
  284. package/src/llama.cpp/requirements/requirements-convert_llama_ggml_to_gguf.txt +0 -1
  285. package/src/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +0 -4
  286. package/src/llama.cpp/requirements/requirements-gguf_editor_gui.txt +0 -3
  287. package/src/llama.cpp/requirements/requirements-pydantic.txt +0 -3
  288. package/src/llama.cpp/requirements/requirements-test-tokenizer-random.txt +0 -1
  289. package/src/llama.cpp/requirements/requirements-tool_bench.txt +0 -12
  290. package/src/llama.cpp/requirements.txt +0 -13
  291. package/src/llama.cpp/scripts/build-info.sh +0 -30
  292. package/src/llama.cpp/scripts/install-oneapi.bat +0 -19
  293. package/src/llama.cpp/scripts/xxd.cmake +0 -16
  294. package/src/llama.cpp/tests/CMakeLists.txt +0 -177
  295. package/src/llama.cpp/tests/get-model.cpp +0 -21
  296. package/src/llama.cpp/tests/get-model.h +0 -2
  297. package/src/llama.cpp/tests/test-arg-parser.cpp +0 -178
  298. package/src/llama.cpp/tests/test-autorelease.cpp +0 -24
  299. package/src/llama.cpp/tests/test-backend-ops.cpp +0 -4793
  300. package/src/llama.cpp/tests/test-barrier.cpp +0 -94
  301. package/src/llama.cpp/tests/test-c.c +0 -7
  302. package/src/llama.cpp/tests/test-chat-template.cpp +0 -417
  303. package/src/llama.cpp/tests/test-chat.cpp +0 -985
  304. package/src/llama.cpp/tests/test-double-float.cpp +0 -57
  305. package/src/llama.cpp/tests/test-gbnf-validator.cpp +0 -109
  306. package/src/llama.cpp/tests/test-gguf.cpp +0 -1338
  307. package/src/llama.cpp/tests/test-grammar-integration.cpp +0 -1308
  308. package/src/llama.cpp/tests/test-grammar-llguidance.cpp +0 -1201
  309. package/src/llama.cpp/tests/test-grammar-parser.cpp +0 -519
  310. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +0 -1304
  311. package/src/llama.cpp/tests/test-llama-grammar.cpp +0 -408
  312. package/src/llama.cpp/tests/test-log.cpp +0 -39
  313. package/src/llama.cpp/tests/test-model-load-cancel.cpp +0 -27
  314. package/src/llama.cpp/tests/test-mtmd-c-api.c +0 -63
  315. package/src/llama.cpp/tests/test-opt.cpp +0 -904
  316. package/src/llama.cpp/tests/test-quantize-fns.cpp +0 -186
  317. package/src/llama.cpp/tests/test-quantize-perf.cpp +0 -365
  318. package/src/llama.cpp/tests/test-quantize-stats.cpp +0 -424
  319. package/src/llama.cpp/tests/test-regex-partial.cpp +0 -288
  320. package/src/llama.cpp/tests/test-rope.cpp +0 -262
  321. package/src/llama.cpp/tests/test-sampling.cpp +0 -399
  322. package/src/llama.cpp/tests/test-tokenizer-0.cpp +0 -312
  323. package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +0 -155
  324. package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +0 -125
  325. package/src/llama.cpp/tools/CMakeLists.txt +0 -39
  326. package/src/llama.cpp/tools/batched-bench/CMakeLists.txt +0 -5
  327. package/src/llama.cpp/tools/batched-bench/batched-bench.cpp +0 -204
  328. package/src/llama.cpp/tools/cvector-generator/CMakeLists.txt +0 -5
  329. package/src/llama.cpp/tools/cvector-generator/completions.txt +0 -582
  330. package/src/llama.cpp/tools/cvector-generator/cvector-generator.cpp +0 -508
  331. package/src/llama.cpp/tools/cvector-generator/mean.hpp +0 -48
  332. package/src/llama.cpp/tools/cvector-generator/negative.txt +0 -4
  333. package/src/llama.cpp/tools/cvector-generator/pca.hpp +0 -315
  334. package/src/llama.cpp/tools/cvector-generator/positive.txt +0 -4
  335. package/src/llama.cpp/tools/export-lora/CMakeLists.txt +0 -5
  336. package/src/llama.cpp/tools/export-lora/export-lora.cpp +0 -434
  337. package/src/llama.cpp/tools/gguf-split/CMakeLists.txt +0 -5
  338. package/src/llama.cpp/tools/gguf-split/gguf-split.cpp +0 -583
  339. package/src/llama.cpp/tools/imatrix/CMakeLists.txt +0 -5
  340. package/src/llama.cpp/tools/imatrix/imatrix.cpp +0 -667
  341. package/src/llama.cpp/tools/llama-bench/CMakeLists.txt +0 -5
  342. package/src/llama.cpp/tools/llama-bench/llama-bench.cpp +0 -2024
  343. package/src/llama.cpp/tools/main/CMakeLists.txt +0 -5
  344. package/src/llama.cpp/tools/main/main.cpp +0 -977
  345. package/src/llama.cpp/tools/mtmd/CMakeLists.txt +0 -58
  346. package/src/llama.cpp/tools/mtmd/clip-impl.h +0 -462
  347. package/src/llama.cpp/tools/mtmd/clip.cpp +0 -4024
  348. package/src/llama.cpp/tools/mtmd/clip.h +0 -101
  349. package/src/llama.cpp/tools/mtmd/deprecation-warning.cpp +0 -22
  350. package/src/llama.cpp/tools/mtmd/miniaudio.h +0 -93468
  351. package/src/llama.cpp/tools/mtmd/mtmd-audio.cpp +0 -855
  352. package/src/llama.cpp/tools/mtmd/mtmd-audio.h +0 -62
  353. package/src/llama.cpp/tools/mtmd/mtmd-cli.cpp +0 -377
  354. package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +0 -297
  355. package/src/llama.cpp/tools/mtmd/mtmd.cpp +0 -942
  356. package/src/llama.cpp/tools/mtmd/mtmd.h +0 -362
  357. package/src/llama.cpp/tools/mtmd/requirements.txt +0 -5
  358. package/src/llama.cpp/tools/perplexity/CMakeLists.txt +0 -5
  359. package/src/llama.cpp/tools/perplexity/perplexity.cpp +0 -2063
  360. package/src/llama.cpp/tools/quantize/CMakeLists.txt +0 -6
  361. package/src/llama.cpp/tools/quantize/quantize.cpp +0 -519
  362. package/src/llama.cpp/tools/rpc/CMakeLists.txt +0 -4
  363. package/src/llama.cpp/tools/rpc/rpc-server.cpp +0 -322
  364. package/src/llama.cpp/tools/run/CMakeLists.txt +0 -16
  365. package/src/llama.cpp/tools/run/linenoise.cpp/linenoise.cpp +0 -1995
  366. package/src/llama.cpp/tools/run/linenoise.cpp/linenoise.h +0 -137
  367. package/src/llama.cpp/tools/run/run.cpp +0 -1261
  368. package/src/llama.cpp/tools/server/CMakeLists.txt +0 -51
  369. package/src/llama.cpp/tools/server/bench/requirements.txt +0 -2
  370. package/src/llama.cpp/tools/server/httplib.h +0 -10506
  371. package/src/llama.cpp/tools/server/server.cpp +0 -4966
  372. package/src/llama.cpp/tools/server/tests/requirements.txt +0 -8
  373. package/src/llama.cpp/tools/server/utils.hpp +0 -1337
  374. package/src/llama.cpp/tools/tokenize/CMakeLists.txt +0 -5
  375. package/src/llama.cpp/tools/tokenize/tokenize.cpp +0 -416
  376. package/src/llama.cpp/tools/tts/CMakeLists.txt +0 -5
  377. package/src/llama.cpp/tools/tts/tts.cpp +0 -1092
@@ -1,601 +0,0 @@
1
- #pragma once
2
-
3
- // GGML internal header
4
-
5
- #include "ggml.h"
6
- #include "gguf.h"
7
-
8
- #include <assert.h>
9
- #include <math.h>
10
- #include <stdlib.h> // load `stdlib.h` before other headers to work around MinGW bug: https://sourceforge.net/p/mingw-w64/bugs/192/
11
- #include <stdbool.h>
12
- #include <stdint.h>
13
- #include <string.h>
14
-
15
- #ifdef __ARM_FEATURE_SVE
16
- #include <arm_sve.h>
17
- #endif // __ARM_FEATURE_SVE
18
-
19
- #if defined(__ARM_NEON) && !defined(__CUDACC__) && !defined(__MUSACC__)
20
- // if YCM cannot find <arm_neon.h>, make a symbolic link to it, for example:
21
- //
22
- // $ ln -sfn /Library/Developer/CommandLineTools/usr/lib/clang/13.1.6/include/arm_neon.h ./src/
23
- //
24
- #include <arm_neon.h>
25
- #endif
26
-
27
- #if defined(__F16C__)
28
- #include <immintrin.h>
29
- #endif
30
-
31
- #ifdef __cplusplus
32
- extern "C" {
33
- #endif
34
-
35
- #ifndef MIN
36
- # define MIN(a, b) ((a) < (b) ? (a) : (b))
37
- #endif
38
-
39
- #ifndef MAX
40
- # define MAX(a, b) ((a) > (b) ? (a) : (b))
41
- #endif
42
-
43
- // required for mmap as gguf only guarantees 32-byte alignment
44
- #define TENSOR_ALIGNMENT 32
45
-
46
- // static_assert should be a #define, but if it's not,
47
- // fall back to the _Static_assert C11 keyword.
48
- // if C99 - static_assert is noop
49
- // ref: https://stackoverflow.com/a/53923785/4039976
50
- #ifndef __cplusplus
51
- #ifndef static_assert
52
- #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201100L)
53
- #define static_assert(cond, msg) _Static_assert(cond, msg)
54
- #else
55
- #define static_assert(cond, msg) struct global_scope_noop_trick
56
- #endif
57
- #endif
58
- #endif
59
-
60
- static inline int ggml_up32(int n) {
61
- return (n + 31) & ~31;
62
- }
63
-
64
- //static inline int ggml_up64(int n) {
65
- // return (n + 63) & ~63;
66
- //}
67
-
68
- static inline int ggml_up(int n, int m) {
69
- // assert m is a power of 2
70
- GGML_ASSERT((m & (m - 1)) == 0);
71
- return (n + m - 1) & ~(m - 1);
72
- }
73
-
74
- //
75
- // logging
76
- //
77
-
78
- GGML_ATTRIBUTE_FORMAT(2, 3)
79
- GGML_API void ggml_log_internal (enum ggml_log_level level, const char * format, ...);
80
- GGML_API void ggml_log_callback_default(enum ggml_log_level level, const char * text, void * user_data);
81
-
82
- #define GGML_LOG(...) ggml_log_internal(GGML_LOG_LEVEL_NONE , __VA_ARGS__)
83
- #define GGML_LOG_INFO(...) ggml_log_internal(GGML_LOG_LEVEL_INFO , __VA_ARGS__)
84
- #define GGML_LOG_WARN(...) ggml_log_internal(GGML_LOG_LEVEL_WARN , __VA_ARGS__)
85
- #define GGML_LOG_ERROR(...) ggml_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__)
86
- #define GGML_LOG_DEBUG(...) ggml_log_internal(GGML_LOG_LEVEL_DEBUG, __VA_ARGS__)
87
- #define GGML_LOG_CONT(...) ggml_log_internal(GGML_LOG_LEVEL_CONT , __VA_ARGS__)
88
-
89
- #define GGML_DEBUG 0
90
-
91
- #if (GGML_DEBUG >= 1)
92
- #define GGML_PRINT_DEBUG(...) GGML_LOG_DEBUG(__VA_ARGS__)
93
- #else
94
- #define GGML_PRINT_DEBUG(...)
95
- #endif
96
-
97
- #if (GGML_DEBUG >= 5)
98
- #define GGML_PRINT_DEBUG_5(...) GGML_LOG_DEBUG(__VA_ARGS__)
99
- #else
100
- #define GGML_PRINT_DEBUG_5(...)
101
- #endif
102
-
103
- #if (GGML_DEBUG >= 10)
104
- #define GGML_PRINT_DEBUG_10(...) GGML_LOG_DEBUG(__VA_ARGS__)
105
- #else
106
- #define GGML_PRINT_DEBUG_10(...)
107
- #endif
108
-
109
- // tensor params
110
-
111
- static void ggml_set_op_params(struct ggml_tensor * tensor, const void * params, size_t params_size) {
112
- GGML_ASSERT(tensor != NULL); // silence -Warray-bounds warnings
113
- assert(params_size <= GGML_MAX_OP_PARAMS);
114
- memcpy(tensor->op_params, params, params_size);
115
- }
116
-
117
- static int32_t ggml_get_op_params_i32(const struct ggml_tensor * tensor, uint32_t i) {
118
- assert(i < GGML_MAX_OP_PARAMS / sizeof(int32_t));
119
- return ((const int32_t *)(tensor->op_params))[i];
120
- }
121
-
122
- static float ggml_get_op_params_f32(const struct ggml_tensor * tensor, uint32_t i) {
123
- assert(i < GGML_MAX_OP_PARAMS / sizeof(float));
124
- return ((const float *)(tensor->op_params))[i];
125
- }
126
-
127
- static void ggml_set_op_params_i32(struct ggml_tensor * tensor, uint32_t i, int32_t value) {
128
- assert(i < GGML_MAX_OP_PARAMS / sizeof(int32_t));
129
- ((int32_t *)(tensor->op_params))[i] = value;
130
- }
131
-
132
- static void ggml_set_op_params_f32(struct ggml_tensor * tensor, uint32_t i, float value) {
133
- assert(i < GGML_MAX_OP_PARAMS / sizeof(float));
134
- ((float *)(tensor->op_params))[i] = value;
135
- }
136
-
137
- struct ggml_map_custom1_op_params {
138
- ggml_custom1_op_t fun;
139
- int n_tasks;
140
- void * userdata;
141
- };
142
-
143
- struct ggml_map_custom2_op_params {
144
- ggml_custom2_op_t fun;
145
- int n_tasks;
146
- void * userdata;
147
- };
148
-
149
- struct ggml_map_custom3_op_params {
150
- ggml_custom3_op_t fun;
151
- int n_tasks;
152
- void * userdata;
153
- };
154
-
155
- struct ggml_custom_op_params {
156
- ggml_custom_op_t fun;
157
- int n_tasks;
158
- void * userdata;
159
- };
160
-
161
- // bitset
162
-
163
- typedef uint32_t ggml_bitset_t;
164
-
165
- static_assert(sizeof(ggml_bitset_t) == 4, "bitset_t constants must be updated");
166
- #define BITSET_SHR 5 // log2(sizeof(ggml_bitset_t)*8)
167
- #define BITSET_MASK (sizeof(ggml_bitset_t)*8 - 1)
168
-
169
- static size_t ggml_bitset_size(size_t n) {
170
- return (n + BITSET_MASK) >> BITSET_SHR;
171
- }
172
-
173
- static inline bool ggml_bitset_get(const ggml_bitset_t * bitset, size_t i) {
174
- return !!(bitset[i >> BITSET_SHR] & (1u << (i & BITSET_MASK)));
175
- }
176
-
177
- static inline void ggml_bitset_set(ggml_bitset_t * bitset, size_t i) {
178
- bitset[i >> BITSET_SHR] |= (1u << (i & BITSET_MASK));
179
- }
180
-
181
- static inline void ggml_bitset_clear(ggml_bitset_t * bitset, size_t i) {
182
- bitset[i >> BITSET_SHR] &= ~(1u << (i & BITSET_MASK));
183
- }
184
-
185
- // hash set
186
-
187
- #define GGML_HASHSET_FULL ((size_t)-1)
188
- #define GGML_HASHSET_ALREADY_EXISTS ((size_t)-2)
189
-
190
- struct ggml_hash_set {
191
- size_t size;
192
- ggml_bitset_t * used; // whether or not the keys are in use i.e. set
193
- struct ggml_tensor ** keys; // actual tensors in the set, keys[i] is only defined if ggml_bitset_get(used, i)
194
- };
195
-
196
- struct ggml_hash_set ggml_hash_set_new(size_t size);
197
- void ggml_hash_set_free(struct ggml_hash_set * hash_set);
198
-
199
- // returns the minimum size for a hash set that can hold min_sz elements
200
- size_t ggml_hash_size(size_t min_sz);
201
-
202
- // remove all elements from the hash set
203
- void ggml_hash_set_reset(struct ggml_hash_set * hash_set);
204
-
205
- // returns true if key is in the hash set
206
- static bool ggml_hash_contains(const struct ggml_hash_set * hash_set, struct ggml_tensor * key);
207
-
208
- // returns GGML_HASHSET_FULL if table is full, otherwise the current index of the key or where it should be inserted
209
- static size_t ggml_hash_find(const struct ggml_hash_set * hash_set, const struct ggml_tensor * key);
210
-
211
- // returns GGML_HASHSET_ALREADY_EXISTS if key already exists, index otherwise, asserts if table is full
212
- static size_t ggml_hash_insert(struct ggml_hash_set * hash_set, struct ggml_tensor * key);
213
-
214
- // return index, asserts if table is full
215
- static size_t ggml_hash_find_or_insert(struct ggml_hash_set * hash_set, struct ggml_tensor * key);
216
-
217
- // hash function for ggml_tensor
218
- static inline size_t ggml_hash(const struct ggml_tensor * p) {
219
- // the last 4 bits are always zero due to alignment
220
- return (size_t)(uintptr_t)p >> 4;
221
- }
222
-
223
- static size_t ggml_hash_find(const struct ggml_hash_set * hash_set, const struct ggml_tensor * key) {
224
- size_t h = ggml_hash(key) % hash_set->size;
225
-
226
- // linear probing
227
- size_t i = h;
228
- while (ggml_bitset_get(hash_set->used, i) && hash_set->keys[i] != key) {
229
- i = (i + 1) % hash_set->size;
230
- if (i == h) {
231
- // visited all hash table entries -> not found
232
- return GGML_HASHSET_FULL;
233
- }
234
- }
235
- return i;
236
- }
237
-
238
- static bool ggml_hash_contains(const struct ggml_hash_set * hash_set, struct ggml_tensor * key) {
239
- size_t i = ggml_hash_find(hash_set, key);
240
- return i != GGML_HASHSET_FULL && ggml_bitset_get(hash_set->used, i);
241
- }
242
-
243
- static size_t ggml_hash_insert(struct ggml_hash_set * hash_set, struct ggml_tensor * key) {
244
- size_t h = ggml_hash(key) % hash_set->size;
245
-
246
- // linear probing
247
- size_t i = h;
248
- do {
249
- if (!ggml_bitset_get(hash_set->used, i)) {
250
- ggml_bitset_set(hash_set->used, i);
251
- hash_set->keys[i] = key;
252
- return i;
253
- }
254
- if (hash_set->keys[i] == key) {
255
- return GGML_HASHSET_ALREADY_EXISTS;
256
- }
257
- i = (i + 1) % hash_set->size;
258
- } while (i != h);
259
-
260
- // visited all hash table entries -> not found
261
- GGML_ABORT("fatal error");
262
- }
263
-
264
- static size_t ggml_hash_find_or_insert(struct ggml_hash_set * hash_set, struct ggml_tensor * key) {
265
- size_t h = ggml_hash(key) % hash_set->size;
266
-
267
- // linear probing
268
- size_t i = h;
269
- do {
270
- if (!ggml_bitset_get(hash_set->used, i)) {
271
- ggml_bitset_set(hash_set->used, i);
272
- hash_set->keys[i] = key;
273
- return i;
274
- }
275
- if (hash_set->keys[i] == key) {
276
- return i;
277
- }
278
- i = (i + 1) % hash_set->size;
279
- } while (i != h);
280
-
281
- // visited all hash table entries -> not found
282
- GGML_ABORT("fatal error");
283
- }
284
-
285
- // computation graph
286
-
287
- enum ggml_cgraph_eval_order {
288
- GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT = 0,
289
- GGML_CGRAPH_EVAL_ORDER_RIGHT_TO_LEFT,
290
- GGML_CGRAPH_EVAL_ORDER_COUNT
291
- };
292
-
293
- struct ggml_cgraph {
294
- int size; // maximum number of nodes/leafs/grads/grad_accs
295
- int n_nodes; // number of nodes currently in use
296
- int n_leafs; // number of leafs currently in use
297
-
298
- struct ggml_tensor ** nodes; // tensors with data that can change if the graph is evaluated
299
- struct ggml_tensor ** grads; // the outputs of these tensors are the gradients of the nodes
300
- struct ggml_tensor ** grad_accs; // accumulators for node gradients
301
- struct ggml_tensor ** leafs; // tensors with constant data
302
-
303
- struct ggml_hash_set visited_hash_set;
304
-
305
- enum ggml_cgraph_eval_order order;
306
- };
307
-
308
- // returns a slice of cgraph with nodes [i0, i1)
309
- // the slice does not have leafs or gradients
310
- // if you need the gradients, get them from the original graph
311
- struct ggml_cgraph ggml_graph_view(struct ggml_cgraph * cgraph, int i0, int i1);
312
-
313
- // Memory allocation
314
-
315
- GGML_API void * ggml_aligned_malloc(size_t size);
316
- GGML_API void ggml_aligned_free(void * ptr, size_t size);
317
-
318
- // FP16 to FP32 conversion
319
-
320
- // 16-bit float
321
- // on Arm, we use __fp16
322
- // on x86, we use uint16_t
323
- //
324
- // for old CUDA compilers (<= 11), we use uint16_t: ref https://github.com/ggml-org/llama.cpp/pull/10616
325
- // for MUSA compilers , we use uint16_t: ref https://github.com/ggml-org/llama.cpp/pull/11843
326
- //
327
- #if defined(__ARM_NEON) && !(defined(__CUDACC__) && __CUDACC_VER_MAJOR__ <= 11) && !defined(__MUSACC__)
328
- #define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
329
- #define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x)
330
-
331
- #define GGML_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
332
-
333
- static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) {
334
- __fp16 tmp;
335
- memcpy(&tmp, &h, sizeof(ggml_fp16_t));
336
- return (float)tmp;
337
- }
338
-
339
- static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) {
340
- ggml_fp16_t res;
341
- __fp16 tmp = f;
342
- memcpy(&res, &tmp, sizeof(ggml_fp16_t));
343
- return res;
344
- }
345
-
346
- #elif defined(__F16C__)
347
-
348
- #ifdef _MSC_VER
349
- #define GGML_COMPUTE_FP16_TO_FP32(x) _mm_cvtss_f32(_mm_cvtph_ps(_mm_cvtsi32_si128(x)))
350
- #define GGML_COMPUTE_FP32_TO_FP16(x) _mm_extract_epi16(_mm_cvtps_ph(_mm_set_ss(x), 0), 0)
351
- #else
352
- #define GGML_COMPUTE_FP16_TO_FP32(x) _cvtsh_ss(x)
353
- #define GGML_COMPUTE_FP32_TO_FP16(x) _cvtss_sh(x, 0)
354
- #endif
355
-
356
- #elif defined(__POWER9_VECTOR__)
357
-
358
- #define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
359
- #define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x)
360
- /* the inline asm below is about 12% faster than the lookup method */
361
- #define GGML_FP16_TO_FP32(x) GGML_COMPUTE_FP16_TO_FP32(x)
362
- #define GGML_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x)
363
-
364
- static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) {
365
- float f;
366
- double d;
367
- __asm__(
368
- "mtfprd %0,%2\n"
369
- "xscvhpdp %0,%0\n"
370
- "frsp %1,%0\n" :
371
- /* temp */ "=d"(d),
372
- /* out */ "=f"(f):
373
- /* in */ "r"(h));
374
- return f;
375
- }
376
-
377
- static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) {
378
- double d;
379
- ggml_fp16_t r;
380
- __asm__( /* xscvdphp can work on double or single precision */
381
- "xscvdphp %0,%2\n"
382
- "mffprd %1,%0\n" :
383
- /* temp */ "=d"(d),
384
- /* out */ "=r"(r):
385
- /* in */ "f"(f));
386
- return r;
387
- }
388
-
389
- #elif defined(__riscv) && defined(GGML_RV_ZFH)
390
-
391
- static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) {
392
- float f;
393
- __asm__(
394
- "fmv.h.x %[f], %[h]\n\t"
395
- "fcvt.s.h %[f], %[f]"
396
- : [f] "=&f" (f)
397
- : [h] "r" (h)
398
- );
399
- return f;
400
- }
401
-
402
- static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) {
403
- ggml_fp16_t res;
404
- __asm__(
405
- "fcvt.h.s %[f], %[f]\n\t"
406
- "fmv.x.h %[h], %[f]"
407
- : [h] "=&r" (res)
408
- : [f] "f" (f)
409
- );
410
- return res;
411
- }
412
-
413
- #define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
414
- #define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x)
415
- #define GGML_FP16_TO_FP32(x) GGML_COMPUTE_FP16_TO_FP32(x)
416
- #define GGML_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x)
417
-
418
- #else
419
-
420
- // FP16 <-> FP32
421
- // ref: https://github.com/Maratyszcza/FP16
422
-
423
- static inline float fp32_from_bits(uint32_t w) {
424
- union {
425
- uint32_t as_bits;
426
- float as_value;
427
- } fp32;
428
- fp32.as_bits = w;
429
- return fp32.as_value;
430
- }
431
-
432
- static inline uint32_t fp32_to_bits(float f) {
433
- union {
434
- float as_value;
435
- uint32_t as_bits;
436
- } fp32;
437
- fp32.as_value = f;
438
- return fp32.as_bits;
439
- }
440
-
441
- static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) {
442
- const uint32_t w = (uint32_t) h << 16;
443
- const uint32_t sign = w & UINT32_C(0x80000000);
444
- const uint32_t two_w = w + w;
445
-
446
- const uint32_t exp_offset = UINT32_C(0xE0) << 23;
447
- #if (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__)) && (!defined(__cplusplus) || __cplusplus >= 201703L)
448
- const float exp_scale = 0x1.0p-112f;
449
- #else
450
- const float exp_scale = fp32_from_bits(UINT32_C(0x7800000));
451
- #endif
452
- const float normalized_value = fp32_from_bits((two_w >> 4) + exp_offset) * exp_scale;
453
-
454
- const uint32_t magic_mask = UINT32_C(126) << 23;
455
- const float magic_bias = 0.5f;
456
- const float denormalized_value = fp32_from_bits((two_w >> 17) | magic_mask) - magic_bias;
457
-
458
- const uint32_t denormalized_cutoff = UINT32_C(1) << 27;
459
- const uint32_t result = sign |
460
- (two_w < denormalized_cutoff ? fp32_to_bits(denormalized_value) : fp32_to_bits(normalized_value));
461
- return fp32_from_bits(result);
462
- }
463
-
464
- static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) {
465
- #if (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__)) && (!defined(__cplusplus) || __cplusplus >= 201703L)
466
- const float scale_to_inf = 0x1.0p+112f;
467
- const float scale_to_zero = 0x1.0p-110f;
468
- #else
469
- const float scale_to_inf = fp32_from_bits(UINT32_C(0x77800000));
470
- const float scale_to_zero = fp32_from_bits(UINT32_C(0x08800000));
471
- #endif
472
- float base = (fabsf(f) * scale_to_inf) * scale_to_zero;
473
-
474
- const uint32_t w = fp32_to_bits(f);
475
- const uint32_t shl1_w = w + w;
476
- const uint32_t sign = w & UINT32_C(0x80000000);
477
- uint32_t bias = shl1_w & UINT32_C(0xFF000000);
478
- if (bias < UINT32_C(0x71000000)) {
479
- bias = UINT32_C(0x71000000);
480
- }
481
-
482
- base = fp32_from_bits((bias >> 1) + UINT32_C(0x07800000)) + base;
483
- const uint32_t bits = fp32_to_bits(base);
484
- const uint32_t exp_bits = (bits >> 13) & UINT32_C(0x00007C00);
485
- const uint32_t mantissa_bits = bits & UINT32_C(0x00000FFF);
486
- const uint32_t nonsign = exp_bits + mantissa_bits;
487
- return (sign >> 16) | (shl1_w > UINT32_C(0xFF000000) ? UINT16_C(0x7E00) : nonsign);
488
- }
489
-
490
- #define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
491
- #define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x)
492
-
493
- #endif // defined(__ARM_NEON) && !(defined(__CUDACC__) && __CUDACC_VER_MAJOR__ <= 11) && !defined(__MUSACC__)
494
-
495
- // precomputed f32 table for f16 (256 KB)
496
- // defined in ggml.c, initialized in ggml_init()
497
- GGML_API float ggml_table_f32_f16[1 << 16];
498
-
499
- // On ARM NEON, it's quicker to directly convert x -> x instead of calling into ggml_lookup_fp16_to_fp32,
500
- // so we define GGML_FP16_TO_FP32 and GGML_FP32_TO_FP16 elsewhere for NEON.
501
- // This is also true for POWER9.
502
- #if !defined(GGML_FP16_TO_FP32)
503
- inline static float ggml_lookup_fp16_to_fp32(ggml_fp16_t f) {
504
- uint16_t s;
505
- memcpy(&s, &f, sizeof(uint16_t));
506
- return ggml_table_f32_f16[s];
507
- }
508
-
509
- #define GGML_FP16_TO_FP32(x) ggml_lookup_fp16_to_fp32(x)
510
- #endif
511
-
512
- #if !defined(GGML_FP32_TO_FP16)
513
- #define GGML_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x)
514
- #endif
515
-
516
- /**
517
- * Converts brain16 to float32.
518
- *
519
- * The bfloat16 floating point format has the following structure:
520
- *
521
- * ┌sign
522
- * │
523
- * │ ┌exponent
524
- * │ │
525
- * │ │ ┌mantissa
526
- * │ │ │
527
- * │┌──┴───┐┌─┴───┐
528
- * 0b0000000000000000 brain16
529
- *
530
- * Since bf16 has the same number of exponent bits as a 32bit float,
531
- * encoding and decoding numbers becomes relatively straightforward.
532
- *
533
- * ┌sign
534
- * │
535
- * │ ┌exponent
536
- * │ │
537
- * │ │ ┌mantissa
538
- * │ │ │
539
- * │┌──┴───┐┌─┴───────────────────┐
540
- * 0b00000000000000000000000000000000 IEEE binary32
541
- *
542
- * For comparison, the standard fp16 format has fewer exponent bits.
543
- *
544
- * ┌sign
545
- * │
546
- * │ ┌exponent
547
- * │ │
548
- * │ │ ┌mantissa
549
- * │ │ │
550
- * │┌─┴─┐┌─┴──────┐
551
- * 0b0000000000000000 IEEE binary16
552
- *
553
- * @see IEEE 754-2008
554
- */
555
- static inline float ggml_compute_bf16_to_fp32(ggml_bf16_t h) {
556
- union {
557
- float f;
558
- uint32_t i;
559
- } u;
560
- u.i = (uint32_t)h.bits << 16;
561
- return u.f;
562
- }
563
-
564
- /**
565
- * Converts float32 to brain16.
566
- *
567
- * This is binary identical with Google Brain float conversion.
568
- * Floats shall round to nearest even, and NANs shall be quiet.
569
- * Subnormals aren't flushed to zero, except perhaps when used.
570
- * This code should vectorize nicely if using modern compilers.
571
- */
572
- static inline ggml_bf16_t ggml_compute_fp32_to_bf16(float s) {
573
- ggml_bf16_t h;
574
- union {
575
- float f;
576
- uint32_t i;
577
- } u;
578
- u.f = s;
579
- if ((u.i & 0x7fffffff) > 0x7f800000) { /* nan */
580
- h.bits = (u.i >> 16) | 64; /* force to quiet */
581
- return h;
582
- }
583
- h.bits = (u.i + (0x7fff + ((u.i >> 16) & 1))) >> 16;
584
- return h;
585
- }
586
-
587
- #define GGML_FP32_TO_BF16(x) ggml_compute_fp32_to_bf16(x)
588
- #define GGML_BF16_TO_FP32(x) ggml_compute_bf16_to_fp32(x)
589
-
590
- #ifdef __cplusplus
591
- }
592
- #endif
593
-
594
- #ifdef __cplusplus
595
- #include <vector>
596
-
597
- // expose GGUF internals for test code
598
- GGML_API size_t gguf_type_size(enum gguf_type type);
599
- GGML_API struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_params params);
600
- GGML_API void gguf_write_to_buf(const struct gguf_context * ctx, std::vector<int8_t> & buf, bool only_meta);
601
- #endif // __cplusplus