@novastera-oss/llamarn 0.0.1-alpha.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (989) hide show
  1. package/INTERFACE.md +389 -0
  2. package/LICENSE +201 -0
  3. package/README.md +235 -0
  4. package/RNLlamaCpp.podspec +69 -0
  5. package/android/CMakeLists.txt +107 -0
  6. package/android/build.gradle +111 -0
  7. package/android/generated/java/com/novastera/llamarn/NativeRNLlamaCppSpec.java +47 -0
  8. package/android/generated/jni/CMakeLists.txt +36 -0
  9. package/android/generated/jni/RNLlamaCppSpec-generated.cpp +44 -0
  10. package/android/generated/jni/RNLlamaCppSpec.h +31 -0
  11. package/android/generated/jni/react/renderer/components/RNLlamaCppSpec/RNLlamaCppSpecJSI-generated.cpp +42 -0
  12. package/android/generated/jni/react/renderer/components/RNLlamaCppSpec/RNLlamaCppSpecJSI.h +336 -0
  13. package/android/gradle.properties +5 -0
  14. package/android/src/main/AndroidManifest.xml +3 -0
  15. package/android/src/main/AndroidManifestNew.xml +2 -0
  16. package/android/src/main/cpp/include/llama-cpp.h +30 -0
  17. package/android/src/main/cpp/include/llama.h +1440 -0
  18. package/android/src/main/java/com/novastera/llamarn/RNLlamaCppPackage.kt +21 -0
  19. package/android/src/main/jniLibs/arm64-v8a/libOpenCL.so +0 -0
  20. package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
  21. package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
  22. package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
  23. package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
  24. package/android/src/main/jniLibs/x86_64/libOpenCL.so +0 -0
  25. package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
  26. package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
  27. package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
  28. package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
  29. package/cpp/LlamaCppModel.cpp +984 -0
  30. package/cpp/LlamaCppModel.h +162 -0
  31. package/cpp/PureCppImpl.cpp +308 -0
  32. package/cpp/PureCppImpl.h +59 -0
  33. package/cpp/SystemUtils.cpp +180 -0
  34. package/cpp/SystemUtils.h +74 -0
  35. package/cpp/build-info.cpp +4 -0
  36. package/cpp/llama.cpp/AUTHORS +1106 -0
  37. package/cpp/llama.cpp/CMakeLists.txt +254 -0
  38. package/cpp/llama.cpp/CMakePresets.json +84 -0
  39. package/cpp/llama.cpp/CODEOWNERS +11 -0
  40. package/cpp/llama.cpp/CONTRIBUTING.md +127 -0
  41. package/cpp/llama.cpp/LICENSE +21 -0
  42. package/cpp/llama.cpp/Makefile +1608 -0
  43. package/cpp/llama.cpp/README.md +575 -0
  44. package/cpp/llama.cpp/SECURITY.md +68 -0
  45. package/cpp/llama.cpp/build-xcframework.sh +540 -0
  46. package/cpp/llama.cpp/cmake/arm64-apple-clang.cmake +16 -0
  47. package/cpp/llama.cpp/cmake/arm64-windows-llvm.cmake +16 -0
  48. package/cpp/llama.cpp/cmake/build-info.cmake +64 -0
  49. package/cpp/llama.cpp/cmake/common.cmake +35 -0
  50. package/cpp/llama.cpp/cmake/git-vars.cmake +22 -0
  51. package/cpp/llama.cpp/cmake/llama-config.cmake.in +30 -0
  52. package/cpp/llama.cpp/cmake/llama.pc.in +10 -0
  53. package/cpp/llama.cpp/cmake/x64-windows-llvm.cmake +5 -0
  54. package/cpp/llama.cpp/common/CMakeLists.txt +170 -0
  55. package/cpp/llama.cpp/common/arg.cpp +3337 -0
  56. package/cpp/llama.cpp/common/arg.h +89 -0
  57. package/cpp/llama.cpp/common/base64.hpp +392 -0
  58. package/cpp/llama.cpp/common/build-info.cpp.in +4 -0
  59. package/cpp/llama.cpp/common/chat.cpp +1781 -0
  60. package/cpp/llama.cpp/common/chat.h +135 -0
  61. package/cpp/llama.cpp/common/cmake/build-info-gen-cpp.cmake +24 -0
  62. package/cpp/llama.cpp/common/common.cpp +1567 -0
  63. package/cpp/llama.cpp/common/common.h +668 -0
  64. package/cpp/llama.cpp/common/console.cpp +504 -0
  65. package/cpp/llama.cpp/common/console.h +19 -0
  66. package/cpp/llama.cpp/common/json-schema-to-grammar.cpp +1027 -0
  67. package/cpp/llama.cpp/common/json-schema-to-grammar.h +21 -0
  68. package/cpp/llama.cpp/common/json.hpp +24766 -0
  69. package/cpp/llama.cpp/common/llguidance.cpp +254 -0
  70. package/cpp/llama.cpp/common/log.cpp +393 -0
  71. package/cpp/llama.cpp/common/log.h +103 -0
  72. package/cpp/llama.cpp/common/minja/chat-template.hpp +537 -0
  73. package/cpp/llama.cpp/common/minja/minja.hpp +2941 -0
  74. package/cpp/llama.cpp/common/ngram-cache.cpp +286 -0
  75. package/cpp/llama.cpp/common/ngram-cache.h +101 -0
  76. package/cpp/llama.cpp/common/sampling.cpp +580 -0
  77. package/cpp/llama.cpp/common/sampling.h +107 -0
  78. package/cpp/llama.cpp/common/speculative.cpp +278 -0
  79. package/cpp/llama.cpp/common/speculative.h +28 -0
  80. package/cpp/llama.cpp/common/stb_image.h +7988 -0
  81. package/cpp/llama.cpp/convert_hf_to_gguf.py +6195 -0
  82. package/cpp/llama.cpp/convert_hf_to_gguf_update.py +393 -0
  83. package/cpp/llama.cpp/convert_llama_ggml_to_gguf.py +450 -0
  84. package/cpp/llama.cpp/convert_lora_to_gguf.py +461 -0
  85. package/cpp/llama.cpp/flake.lock +58 -0
  86. package/cpp/llama.cpp/flake.nix +185 -0
  87. package/cpp/llama.cpp/ggml/CMakeLists.txt +388 -0
  88. package/cpp/llama.cpp/ggml/cmake/GitVars.cmake +22 -0
  89. package/cpp/llama.cpp/ggml/cmake/common.cmake +26 -0
  90. package/cpp/llama.cpp/ggml/cmake/ggml-config.cmake.in +152 -0
  91. package/cpp/llama.cpp/ggml/include/ggml-alloc.h +76 -0
  92. package/cpp/llama.cpp/ggml/include/ggml-backend.h +354 -0
  93. package/cpp/llama.cpp/ggml/include/ggml-blas.h +25 -0
  94. package/cpp/llama.cpp/ggml/include/ggml-cann.h +123 -0
  95. package/cpp/llama.cpp/ggml/include/ggml-cpp.h +39 -0
  96. package/cpp/llama.cpp/ggml/include/ggml-cpu.h +143 -0
  97. package/cpp/llama.cpp/ggml/include/ggml-cuda.h +47 -0
  98. package/cpp/llama.cpp/ggml/include/ggml-kompute.h +50 -0
  99. package/cpp/llama.cpp/ggml/include/ggml-metal.h +66 -0
  100. package/cpp/llama.cpp/ggml/include/ggml-opencl.h +26 -0
  101. package/cpp/llama.cpp/ggml/include/ggml-opt.h +216 -0
  102. package/cpp/llama.cpp/ggml/include/ggml-rpc.h +33 -0
  103. package/cpp/llama.cpp/ggml/include/ggml-sycl.h +49 -0
  104. package/cpp/llama.cpp/ggml/include/ggml-vulkan.h +29 -0
  105. package/cpp/llama.cpp/ggml/include/ggml.h +2192 -0
  106. package/cpp/llama.cpp/ggml/include/gguf.h +202 -0
  107. package/cpp/llama.cpp/ggml/src/CMakeLists.txt +345 -0
  108. package/cpp/llama.cpp/ggml/src/ggml-alloc.c +1042 -0
  109. package/cpp/llama.cpp/ggml/src/ggml-backend-impl.h +255 -0
  110. package/cpp/llama.cpp/ggml/src/ggml-backend-reg.cpp +586 -0
  111. package/cpp/llama.cpp/ggml/src/ggml-backend.cpp +2008 -0
  112. package/cpp/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +87 -0
  113. package/cpp/llama.cpp/ggml/src/ggml-blas/ggml-blas.cpp +517 -0
  114. package/cpp/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +74 -0
  115. package/cpp/llama.cpp/ggml/src/ggml-cann/Doxyfile +2579 -0
  116. package/cpp/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +179 -0
  117. package/cpp/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +258 -0
  118. package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +2589 -0
  119. package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +1083 -0
  120. package/cpp/llama.cpp/ggml/src/ggml-cann/common.h +420 -0
  121. package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +2554 -0
  122. package/cpp/llama.cpp/ggml/src/ggml-common.h +1857 -0
  123. package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +495 -0
  124. package/cpp/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +221 -0
  125. package/cpp/llama.cpp/ggml/src/ggml-cpu/amx/amx.h +8 -0
  126. package/cpp/llama.cpp/ggml/src/ggml-cpu/amx/common.h +91 -0
  127. package/cpp/llama.cpp/ggml/src/ggml-cpu/amx/mmq.cpp +2511 -0
  128. package/cpp/llama.cpp/ggml/src/ggml-cpu/amx/mmq.h +10 -0
  129. package/cpp/llama.cpp/ggml/src/ggml-cpu/binary-ops.cpp +158 -0
  130. package/cpp/llama.cpp/ggml/src/ggml-cpu/binary-ops.h +16 -0
  131. package/cpp/llama.cpp/ggml/src/ggml-cpu/cmake/FindSIMD.cmake +100 -0
  132. package/cpp/llama.cpp/ggml/src/ggml-cpu/common.h +72 -0
  133. package/cpp/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +327 -0
  134. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +6431 -0
  135. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +8 -0
  136. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp +55 -0
  137. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.h +8 -0
  138. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +512 -0
  139. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +13131 -0
  140. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.h +63 -0
  141. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-traits.cpp +36 -0
  142. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-traits.h +38 -0
  143. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +3492 -0
  144. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +671 -0
  145. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +254 -0
  146. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +60 -0
  147. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +287 -0
  148. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.h +17 -0
  149. package/cpp/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +3544 -0
  150. package/cpp/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.h +14 -0
  151. package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +8796 -0
  152. package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.h +110 -0
  153. package/cpp/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +892 -0
  154. package/cpp/llama.cpp/ggml/src/ggml-cpu/unary-ops.cpp +186 -0
  155. package/cpp/llama.cpp/ggml/src/ggml-cpu/unary-ops.h +28 -0
  156. package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.cpp +252 -0
  157. package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.h +802 -0
  158. package/cpp/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +184 -0
  159. package/cpp/llama.cpp/ggml/src/ggml-cuda/acc.cu +47 -0
  160. package/cpp/llama.cpp/ggml/src/ggml-cuda/acc.cuh +5 -0
  161. package/cpp/llama.cpp/ggml/src/ggml-cuda/arange.cu +34 -0
  162. package/cpp/llama.cpp/ggml/src/ggml-cuda/arange.cuh +5 -0
  163. package/cpp/llama.cpp/ggml/src/ggml-cuda/argmax.cu +91 -0
  164. package/cpp/llama.cpp/ggml/src/ggml-cuda/argmax.cuh +3 -0
  165. package/cpp/llama.cpp/ggml/src/ggml-cuda/argsort.cu +104 -0
  166. package/cpp/llama.cpp/ggml/src/ggml-cuda/argsort.cuh +3 -0
  167. package/cpp/llama.cpp/ggml/src/ggml-cuda/binbcast.cu +363 -0
  168. package/cpp/llama.cpp/ggml/src/ggml-cuda/binbcast.cuh +9 -0
  169. package/cpp/llama.cpp/ggml/src/ggml-cuda/clamp.cu +45 -0
  170. package/cpp/llama.cpp/ggml/src/ggml-cuda/clamp.cuh +5 -0
  171. package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +828 -0
  172. package/cpp/llama.cpp/ggml/src/ggml-cuda/concat.cu +221 -0
  173. package/cpp/llama.cpp/ggml/src/ggml-cuda/concat.cuh +5 -0
  174. package/cpp/llama.cpp/ggml/src/ggml-cuda/conv-transpose-1d.cu +89 -0
  175. package/cpp/llama.cpp/ggml/src/ggml-cuda/conv-transpose-1d.cuh +5 -0
  176. package/cpp/llama.cpp/ggml/src/ggml-cuda/convert.cu +730 -0
  177. package/cpp/llama.cpp/ggml/src/ggml-cuda/convert.cuh +26 -0
  178. package/cpp/llama.cpp/ggml/src/ggml-cuda/count-equal.cu +64 -0
  179. package/cpp/llama.cpp/ggml/src/ggml-cuda/count-equal.cuh +5 -0
  180. package/cpp/llama.cpp/ggml/src/ggml-cuda/cp-async.cuh +57 -0
  181. package/cpp/llama.cpp/ggml/src/ggml-cuda/cpy.cu +695 -0
  182. package/cpp/llama.cpp/ggml/src/ggml-cuda/cpy.cuh +11 -0
  183. package/cpp/llama.cpp/ggml/src/ggml-cuda/cross-entropy-loss.cu +189 -0
  184. package/cpp/llama.cpp/ggml/src/ggml-cuda/cross-entropy-loss.cuh +7 -0
  185. package/cpp/llama.cpp/ggml/src/ggml-cuda/dequantize.cuh +103 -0
  186. package/cpp/llama.cpp/ggml/src/ggml-cuda/diagmask.cu +40 -0
  187. package/cpp/llama.cpp/ggml/src/ggml-cuda/diagmask.cuh +5 -0
  188. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-common.cuh +873 -0
  189. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-mma-f16.cuh +1269 -0
  190. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f16.cu +357 -0
  191. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f16.cuh +3 -0
  192. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f32.cu +365 -0
  193. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f32.cuh +3 -0
  194. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f16.cuh +437 -0
  195. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f32.cuh +428 -0
  196. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu +634 -0
  197. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cuh +3 -0
  198. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn.cu +345 -0
  199. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn.cuh +3 -0
  200. package/cpp/llama.cpp/ggml/src/ggml-cuda/getrows.cu +275 -0
  201. package/cpp/llama.cpp/ggml/src/ggml-cuda/getrows.cuh +15 -0
  202. package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +3501 -0
  203. package/cpp/llama.cpp/ggml/src/ggml-cuda/gla.cu +93 -0
  204. package/cpp/llama.cpp/ggml/src/ggml-cuda/gla.cuh +3 -0
  205. package/cpp/llama.cpp/ggml/src/ggml-cuda/im2col.cu +103 -0
  206. package/cpp/llama.cpp/ggml/src/ggml-cuda/im2col.cuh +5 -0
  207. package/cpp/llama.cpp/ggml/src/ggml-cuda/mma.cuh +396 -0
  208. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cu +322 -0
  209. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cuh +3217 -0
  210. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmv.cu +336 -0
  211. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmv.cuh +12 -0
  212. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmvq.cu +595 -0
  213. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmvq.cuh +12 -0
  214. package/cpp/llama.cpp/ggml/src/ggml-cuda/norm.cu +458 -0
  215. package/cpp/llama.cpp/ggml/src/ggml-cuda/norm.cuh +11 -0
  216. package/cpp/llama.cpp/ggml/src/ggml-cuda/opt-step-adamw.cu +78 -0
  217. package/cpp/llama.cpp/ggml/src/ggml-cuda/opt-step-adamw.cuh +5 -0
  218. package/cpp/llama.cpp/ggml/src/ggml-cuda/out-prod.cu +68 -0
  219. package/cpp/llama.cpp/ggml/src/ggml-cuda/out-prod.cuh +3 -0
  220. package/cpp/llama.cpp/ggml/src/ggml-cuda/pad.cu +49 -0
  221. package/cpp/llama.cpp/ggml/src/ggml-cuda/pad.cuh +5 -0
  222. package/cpp/llama.cpp/ggml/src/ggml-cuda/pool2d.cu +94 -0
  223. package/cpp/llama.cpp/ggml/src/ggml-cuda/pool2d.cuh +5 -0
  224. package/cpp/llama.cpp/ggml/src/ggml-cuda/quantize.cu +189 -0
  225. package/cpp/llama.cpp/ggml/src/ggml-cuda/quantize.cuh +27 -0
  226. package/cpp/llama.cpp/ggml/src/ggml-cuda/rope.cu +456 -0
  227. package/cpp/llama.cpp/ggml/src/ggml-cuda/rope.cuh +7 -0
  228. package/cpp/llama.cpp/ggml/src/ggml-cuda/scale.cu +31 -0
  229. package/cpp/llama.cpp/ggml/src/ggml-cuda/scale.cuh +5 -0
  230. package/cpp/llama.cpp/ggml/src/ggml-cuda/softmax.cu +283 -0
  231. package/cpp/llama.cpp/ggml/src/ggml-cuda/softmax.cuh +7 -0
  232. package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-conv.cu +148 -0
  233. package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-conv.cuh +3 -0
  234. package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-scan.cu +153 -0
  235. package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-scan.cuh +3 -0
  236. package/cpp/llama.cpp/ggml/src/ggml-cuda/sum.cu +45 -0
  237. package/cpp/llama.cpp/ggml/src/ggml-cuda/sum.cuh +5 -0
  238. package/cpp/llama.cpp/ggml/src/ggml-cuda/sumrows.cu +39 -0
  239. package/cpp/llama.cpp/ggml/src/ggml-cuda/sumrows.cuh +5 -0
  240. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_16.cu +5 -0
  241. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_8.cu +10 -0
  242. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_1.cu +10 -0
  243. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_2.cu +10 -0
  244. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_4.cu +10 -0
  245. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_16.cu +5 -0
  246. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_4.cu +10 -0
  247. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_8.cu +10 -0
  248. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_1.cu +10 -0
  249. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_2.cu +10 -0
  250. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_16.cu +5 -0
  251. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_2.cu +10 -0
  252. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_4.cu +10 -0
  253. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_8.cu +10 -0
  254. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_64-ncols2_1.cu +10 -0
  255. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_1.cu +10 -0
  256. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_2.cu +10 -0
  257. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_4.cu +10 -0
  258. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_8.cu +10 -0
  259. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu +5 -0
  260. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu +5 -0
  261. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu +5 -0
  262. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu +5 -0
  263. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu +5 -0
  264. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu +5 -0
  265. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu +5 -0
  266. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu +5 -0
  267. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu +5 -0
  268. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu +5 -0
  269. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu +5 -0
  270. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu +5 -0
  271. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu +5 -0
  272. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu +5 -0
  273. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu +5 -0
  274. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu +5 -0
  275. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu +5 -0
  276. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu +5 -0
  277. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu +5 -0
  278. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu +5 -0
  279. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu +5 -0
  280. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu +5 -0
  281. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu +5 -0
  282. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu +5 -0
  283. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu +5 -0
  284. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu +5 -0
  285. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu +5 -0
  286. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu +5 -0
  287. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu +5 -0
  288. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu +5 -0
  289. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu +5 -0
  290. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu +5 -0
  291. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu +5 -0
  292. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu +5 -0
  293. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu +5 -0
  294. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu +5 -0
  295. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu +5 -0
  296. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu +5 -0
  297. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu +5 -0
  298. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu +5 -0
  299. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu +5 -0
  300. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu +5 -0
  301. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu +5 -0
  302. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu +5 -0
  303. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu +5 -0
  304. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu +5 -0
  305. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu +5 -0
  306. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu +5 -0
  307. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu +5 -0
  308. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu +5 -0
  309. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu +5 -0
  310. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu +5 -0
  311. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu +5 -0
  312. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu +5 -0
  313. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu +5 -0
  314. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu +5 -0
  315. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu +5 -0
  316. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu +5 -0
  317. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu +5 -0
  318. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu +5 -0
  319. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu +5 -0
  320. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu +5 -0
  321. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu +5 -0
  322. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu +5 -0
  323. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu +5 -0
  324. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu +5 -0
  325. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu +5 -0
  326. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu +5 -0
  327. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu +5 -0
  328. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu +5 -0
  329. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu +5 -0
  330. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu +5 -0
  331. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu +5 -0
  332. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu +5 -0
  333. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu +5 -0
  334. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu +5 -0
  335. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu +5 -0
  336. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu +5 -0
  337. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu +5 -0
  338. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu +5 -0
  339. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu +5 -0
  340. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu +5 -0
  341. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu +5 -0
  342. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu +5 -0
  343. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu +5 -0
  344. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu +5 -0
  345. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/generate_cu_files.py +78 -0
  346. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq1_s.cu +5 -0
  347. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_s.cu +5 -0
  348. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xs.cu +5 -0
  349. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xxs.cu +5 -0
  350. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_s.cu +5 -0
  351. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_xxs.cu +5 -0
  352. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_nl.cu +5 -0
  353. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_xs.cu +5 -0
  354. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q2_k.cu +5 -0
  355. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q3_k.cu +5 -0
  356. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_0.cu +5 -0
  357. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_1.cu +5 -0
  358. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_k.cu +5 -0
  359. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_0.cu +5 -0
  360. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_1.cu +5 -0
  361. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_k.cu +5 -0
  362. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q6_k.cu +5 -0
  363. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q8_0.cu +5 -0
  364. package/cpp/llama.cpp/ggml/src/ggml-cuda/tsembd.cu +47 -0
  365. package/cpp/llama.cpp/ggml/src/ggml-cuda/tsembd.cuh +5 -0
  366. package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cu +279 -0
  367. package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cuh +57 -0
  368. package/cpp/llama.cpp/ggml/src/ggml-cuda/upscale.cu +51 -0
  369. package/cpp/llama.cpp/ggml/src/ggml-cuda/upscale.cuh +5 -0
  370. package/cpp/llama.cpp/ggml/src/ggml-cuda/vecdotq.cuh +1135 -0
  371. package/cpp/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h +15 -0
  372. package/cpp/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +243 -0
  373. package/cpp/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +140 -0
  374. package/cpp/llama.cpp/ggml/src/ggml-cuda/wkv.cu +199 -0
  375. package/cpp/llama.cpp/ggml/src/ggml-cuda/wkv.cuh +7 -0
  376. package/cpp/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +131 -0
  377. package/cpp/llama.cpp/ggml/src/ggml-impl.h +601 -0
  378. package/cpp/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +166 -0
  379. package/cpp/llama.cpp/ggml/src/ggml-kompute/ggml-kompute.cpp +2251 -0
  380. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/common.comp +112 -0
  381. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_add.comp +58 -0
  382. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_addrow.comp +25 -0
  383. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f16.comp +52 -0
  384. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f32.comp +52 -0
  385. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f16.comp +52 -0
  386. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f32.comp +52 -0
  387. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_diagmask.comp +30 -0
  388. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_gelu.comp +22 -0
  389. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows.comp +17 -0
  390. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f16.comp +31 -0
  391. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f32.comp +31 -0
  392. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_0.comp +38 -0
  393. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_1.comp +39 -0
  394. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q6_k.comp +44 -0
  395. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul.comp +52 -0
  396. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_f16.comp +69 -0
  397. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_mat_f32.comp +51 -0
  398. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_0.comp +33 -0
  399. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_1.comp +35 -0
  400. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_k.comp +140 -0
  401. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q6_k.comp +106 -0
  402. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q8_0.comp +73 -0
  403. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n.comp +52 -0
  404. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n_pre.comp +28 -0
  405. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_norm.comp +84 -0
  406. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_relu.comp +21 -0
  407. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rmsnorm.comp +53 -0
  408. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f16.comp +52 -0
  409. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f32.comp +52 -0
  410. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f16.comp +52 -0
  411. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f32.comp +52 -0
  412. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_scale.comp +19 -0
  413. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_scale_8.comp +23 -0
  414. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_silu.comp +22 -0
  415. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_softmax.comp +72 -0
  416. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/rope_common.comp +71 -0
  417. package/cpp/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +120 -0
  418. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +618 -0
  419. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +5916 -0
  420. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +6891 -0
  421. package/cpp/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +107 -0
  422. package/cpp/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +96 -0
  423. package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +4966 -0
  424. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/add.cl +83 -0
  425. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/clamp.cl +20 -0
  426. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/cpy.cl +184 -0
  427. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/cvt.cl +118 -0
  428. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/diag_mask_inf.cl +58 -0
  429. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/embed_kernel.py +26 -0
  430. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/gelu.cl +62 -0
  431. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle.cl +268 -0
  432. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general.cl +274 -0
  433. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/get_rows.cl +163 -0
  434. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/im2col_f16.cl +57 -0
  435. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/im2col_f32.cl +57 -0
  436. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul.cl +79 -0
  437. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mat_Ab_Bi_8x4.cl +139 -0
  438. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f16.cl +118 -0
  439. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32.cl +118 -0
  440. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_1row.cl +94 -0
  441. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_l4.cl +84 -0
  442. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_f32_f32.cl +118 -0
  443. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32.cl +192 -0
  444. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_16x_flat.cl +307 -0
  445. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_8x_flat.cl +265 -0
  446. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_8x_flat.cl +272 -0
  447. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_v.cl +254 -0
  448. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_q6_k.cl +190 -0
  449. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/norm.cl +81 -0
  450. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/relu.cl +16 -0
  451. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/rms_norm.cl +96 -0
  452. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/rope.cl +721 -0
  453. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/scale.cl +16 -0
  454. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/silu.cl +30 -0
  455. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +87 -0
  456. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +87 -0
  457. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_f16.cl +86 -0
  458. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_f32.cl +86 -0
  459. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/transpose.cl +84 -0
  460. package/cpp/llama.cpp/ggml/src/ggml-opt.cpp +854 -0
  461. package/cpp/llama.cpp/ggml/src/ggml-quants.c +5232 -0
  462. package/cpp/llama.cpp/ggml/src/ggml-quants.h +100 -0
  463. package/cpp/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +9 -0
  464. package/cpp/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +1813 -0
  465. package/cpp/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +183 -0
  466. package/cpp/llama.cpp/ggml/src/ggml-sycl/backend.hpp +37 -0
  467. package/cpp/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +350 -0
  468. package/cpp/llama.cpp/ggml/src/ggml-sycl/binbcast.hpp +39 -0
  469. package/cpp/llama.cpp/ggml/src/ggml-sycl/common.cpp +83 -0
  470. package/cpp/llama.cpp/ggml/src/ggml-sycl/common.hpp +493 -0
  471. package/cpp/llama.cpp/ggml/src/ggml-sycl/concat.cpp +197 -0
  472. package/cpp/llama.cpp/ggml/src/ggml-sycl/concat.hpp +20 -0
  473. package/cpp/llama.cpp/ggml/src/ggml-sycl/conv.cpp +100 -0
  474. package/cpp/llama.cpp/ggml/src/ggml-sycl/conv.hpp +20 -0
  475. package/cpp/llama.cpp/ggml/src/ggml-sycl/convert.cpp +596 -0
  476. package/cpp/llama.cpp/ggml/src/ggml-sycl/convert.hpp +34 -0
  477. package/cpp/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +701 -0
  478. package/cpp/llama.cpp/ggml/src/ggml-sycl/cpy.hpp +11 -0
  479. package/cpp/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +753 -0
  480. package/cpp/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +1154 -0
  481. package/cpp/llama.cpp/ggml/src/ggml-sycl/dmmv.hpp +27 -0
  482. package/cpp/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +2957 -0
  483. package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +1559 -0
  484. package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +75 -0
  485. package/cpp/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +70 -0
  486. package/cpp/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +311 -0
  487. package/cpp/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +20 -0
  488. package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +4302 -0
  489. package/cpp/llama.cpp/ggml/src/ggml-sycl/gla.cpp +105 -0
  490. package/cpp/llama.cpp/ggml/src/ggml-sycl/gla.hpp +8 -0
  491. package/cpp/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +136 -0
  492. package/cpp/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +21 -0
  493. package/cpp/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +3030 -0
  494. package/cpp/llama.cpp/ggml/src/ggml-sycl/mmq.hpp +33 -0
  495. package/cpp/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +1081 -0
  496. package/cpp/llama.cpp/ggml/src/ggml-sycl/mmvq.hpp +27 -0
  497. package/cpp/llama.cpp/ggml/src/ggml-sycl/norm.cpp +474 -0
  498. package/cpp/llama.cpp/ggml/src/ggml-sycl/norm.hpp +26 -0
  499. package/cpp/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +46 -0
  500. package/cpp/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +10 -0
  501. package/cpp/llama.cpp/ggml/src/ggml-sycl/presets.hpp +74 -0
  502. package/cpp/llama.cpp/ggml/src/ggml-sycl/quants.hpp +61 -0
  503. package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +362 -0
  504. package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.hpp +20 -0
  505. package/cpp/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +264 -0
  506. package/cpp/llama.cpp/ggml/src/ggml-sycl/softmax.hpp +20 -0
  507. package/cpp/llama.cpp/ggml/src/ggml-sycl/sycl_hw.cpp +13 -0
  508. package/cpp/llama.cpp/ggml/src/ggml-sycl/sycl_hw.hpp +23 -0
  509. package/cpp/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +73 -0
  510. package/cpp/llama.cpp/ggml/src/ggml-sycl/tsembd.hpp +20 -0
  511. package/cpp/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +1189 -0
  512. package/cpp/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +305 -0
  513. package/cpp/llama.cpp/ggml/src/ggml-sycl/wkv.hpp +10 -0
  514. package/cpp/llama.cpp/ggml/src/ggml-threading.cpp +12 -0
  515. package/cpp/llama.cpp/ggml/src/ggml-threading.h +14 -0
  516. package/cpp/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +202 -0
  517. package/cpp/llama.cpp/ggml/src/ggml-vulkan/cmake/host-toolchain.cmake.in +15 -0
  518. package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +10502 -0
  519. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +22 -0
  520. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/acc.comp +29 -0
  521. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/add.comp +29 -0
  522. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/argmax.comp +51 -0
  523. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/argsort.comp +69 -0
  524. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/clamp.comp +17 -0
  525. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/concat.comp +41 -0
  526. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/contig_copy.comp +49 -0
  527. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_dw.comp +105 -0
  528. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy.comp +23 -0
  529. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy_from_quant.comp +51 -0
  530. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +242 -0
  531. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/cos.comp +17 -0
  532. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/count_equal.comp +31 -0
  533. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_f32.comp +20 -0
  534. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs.comp +462 -0
  535. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs_cm2.comp +699 -0
  536. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_head.comp +13 -0
  537. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp +42 -0
  538. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_s.comp +35 -0
  539. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_s.comp +44 -0
  540. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xs.comp +43 -0
  541. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xxs.comp +48 -0
  542. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_s.comp +39 -0
  543. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_xxs.comp +49 -0
  544. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_nl.comp +32 -0
  545. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_xs.comp +34 -0
  546. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp +34 -0
  547. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp +42 -0
  548. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_0.comp +30 -0
  549. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_1.comp +32 -0
  550. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp +68 -0
  551. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_0.comp +34 -0
  552. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_1.comp +35 -0
  553. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp +70 -0
  554. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp +33 -0
  555. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q8_0.comp +31 -0
  556. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/diag_mask_inf.comp +34 -0
  557. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/div.comp +27 -0
  558. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +483 -0
  559. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +383 -0
  560. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +59 -0
  561. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/gelu.comp +25 -0
  562. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/gelu_quick.comp +23 -0
  563. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/generic_binary_head.comp +64 -0
  564. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/generic_head.comp +9 -0
  565. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/generic_unary_head.comp +76 -0
  566. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/get_rows.comp +33 -0
  567. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/get_rows_quant.comp +41 -0
  568. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/group_norm.comp +66 -0
  569. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp +100 -0
  570. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/l2_norm.comp +41 -0
  571. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/leaky_relu.comp +22 -0
  572. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul.comp +27 -0
  573. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_split_k_reduce.comp +48 -0
  574. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec.comp +169 -0
  575. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_base.comp +118 -0
  576. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_m.comp +82 -0
  577. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_s.comp +79 -0
  578. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_s.comp +90 -0
  579. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xs.comp +87 -0
  580. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xxs.comp +87 -0
  581. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_s.comp +90 -0
  582. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_xxs.comp +88 -0
  583. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_nc.comp +118 -0
  584. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_p021.comp +154 -0
  585. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q2_k.comp +130 -0
  586. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q3_k.comp +132 -0
  587. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q4_k.comp +136 -0
  588. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q5_k.comp +167 -0
  589. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q6_k.comp +130 -0
  590. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +868 -0
  591. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +441 -0
  592. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq.comp +442 -0
  593. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.comp +99 -0
  594. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/norm.comp +44 -0
  595. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_adamw.comp +42 -0
  596. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/pad.comp +28 -0
  597. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/pool2d.comp +74 -0
  598. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/quantize_q8_1.comp +77 -0
  599. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/relu.comp +21 -0
  600. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/repeat.comp +26 -0
  601. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/repeat_back.comp +37 -0
  602. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +52 -0
  603. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_back.comp +55 -0
  604. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.comp +58 -0
  605. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +60 -0
  606. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +43 -0
  607. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +43 -0
  608. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_vision.comp +47 -0
  609. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +24 -0
  610. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/sigmoid.comp +20 -0
  611. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/silu.comp +22 -0
  612. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/silu_back.comp +26 -0
  613. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/sin.comp +17 -0
  614. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +173 -0
  615. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_back.comp +50 -0
  616. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/square.comp +17 -0
  617. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/sub.comp +29 -0
  618. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.comp +37 -0
  619. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/tanh.comp +20 -0
  620. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/test_bfloat16_support.comp +7 -0
  621. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/test_coopmat2_support.comp +7 -0
  622. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/test_coopmat_support.comp +7 -0
  623. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/test_integer_dot_support.comp +7 -0
  624. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/timestep_embedding.comp +41 -0
  625. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/types.comp +1373 -0
  626. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +36 -0
  627. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +740 -0
  628. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/wkv6.comp +87 -0
  629. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/wkv7.comp +91 -0
  630. package/cpp/llama.cpp/ggml/src/ggml.c +6499 -0
  631. package/cpp/llama.cpp/ggml/src/gguf.cpp +1330 -0
  632. package/cpp/llama.cpp/gguf-py/LICENSE +21 -0
  633. package/cpp/llama.cpp/gguf-py/README.md +99 -0
  634. package/cpp/llama.cpp/gguf-py/examples/reader.py +49 -0
  635. package/cpp/llama.cpp/gguf-py/examples/writer.py +39 -0
  636. package/cpp/llama.cpp/gguf-py/gguf/__init__.py +9 -0
  637. package/cpp/llama.cpp/gguf-py/gguf/constants.py +2296 -0
  638. package/cpp/llama.cpp/gguf-py/gguf/gguf.py +15 -0
  639. package/cpp/llama.cpp/gguf-py/gguf/gguf_reader.py +367 -0
  640. package/cpp/llama.cpp/gguf-py/gguf/gguf_writer.py +1041 -0
  641. package/cpp/llama.cpp/gguf-py/gguf/lazy.py +223 -0
  642. package/cpp/llama.cpp/gguf-py/gguf/metadata.py +642 -0
  643. package/cpp/llama.cpp/gguf-py/gguf/py.typed +0 -0
  644. package/cpp/llama.cpp/gguf-py/gguf/quants.py +1269 -0
  645. package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_convert_endian.py +182 -0
  646. package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_dump.py +454 -0
  647. package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_editor_gui.py +1610 -0
  648. package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_hash.py +102 -0
  649. package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_new_metadata.py +207 -0
  650. package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_set_metadata.py +95 -0
  651. package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +1172 -0
  652. package/cpp/llama.cpp/gguf-py/gguf/utility.py +264 -0
  653. package/cpp/llama.cpp/gguf-py/gguf/vocab.py +492 -0
  654. package/cpp/llama.cpp/gguf-py/pyproject.toml +43 -0
  655. package/cpp/llama.cpp/gguf-py/tests/__init__.py +1 -0
  656. package/cpp/llama.cpp/gguf-py/tests/test_metadata.py +238 -0
  657. package/cpp/llama.cpp/gguf-py/tests/test_quants.py +238 -0
  658. package/cpp/llama.cpp/grammars/README.md +382 -0
  659. package/cpp/llama.cpp/grammars/arithmetic.gbnf +6 -0
  660. package/cpp/llama.cpp/grammars/c.gbnf +42 -0
  661. package/cpp/llama.cpp/grammars/chess.gbnf +13 -0
  662. package/cpp/llama.cpp/grammars/english.gbnf +6 -0
  663. package/cpp/llama.cpp/grammars/japanese.gbnf +7 -0
  664. package/cpp/llama.cpp/grammars/json.gbnf +25 -0
  665. package/cpp/llama.cpp/grammars/json_arr.gbnf +34 -0
  666. package/cpp/llama.cpp/grammars/list.gbnf +4 -0
  667. package/cpp/llama.cpp/include/llama-cpp.h +30 -0
  668. package/cpp/llama.cpp/include/llama.h +1440 -0
  669. package/cpp/llama.cpp/licenses/LICENSE-curl +9 -0
  670. package/cpp/llama.cpp/licenses/LICENSE-httplib +21 -0
  671. package/cpp/llama.cpp/licenses/LICENSE-jsonhpp +21 -0
  672. package/cpp/llama.cpp/licenses/LICENSE-linenoise +26 -0
  673. package/cpp/llama.cpp/media/llama0-banner.png +0 -0
  674. package/cpp/llama.cpp/media/llama0-logo.png +0 -0
  675. package/cpp/llama.cpp/media/llama1-banner.png +0 -0
  676. package/cpp/llama.cpp/media/llama1-logo.png +0 -0
  677. package/cpp/llama.cpp/media/llama1-logo.svg +34 -0
  678. package/cpp/llama.cpp/media/matmul.png +0 -0
  679. package/cpp/llama.cpp/media/matmul.svg +1238 -0
  680. package/cpp/llama.cpp/models/ggml-vocab-aquila.gguf +0 -0
  681. package/cpp/llama.cpp/models/ggml-vocab-baichuan.gguf +0 -0
  682. package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf +0 -0
  683. package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf.inp +112 -0
  684. package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf.out +46 -0
  685. package/cpp/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +112 -0
  686. package/cpp/llama.cpp/models/ggml-vocab-chameleon.gguf.out +46 -0
  687. package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf +0 -0
  688. package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf.inp +112 -0
  689. package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf.out +46 -0
  690. package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf +0 -0
  691. package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.inp +112 -0
  692. package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.out +46 -0
  693. package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf +0 -0
  694. package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.inp +112 -0
  695. package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.out +46 -0
  696. package/cpp/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.inp +112 -0
  697. package/cpp/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.out +46 -0
  698. package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf +0 -0
  699. package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf.inp +112 -0
  700. package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf.out +46 -0
  701. package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf +0 -0
  702. package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf.inp +112 -0
  703. package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf.out +46 -0
  704. package/cpp/llama.cpp/models/ggml-vocab-gpt-4o.gguf.inp +112 -0
  705. package/cpp/llama.cpp/models/ggml-vocab-gpt-4o.gguf.out +46 -0
  706. package/cpp/llama.cpp/models/ggml-vocab-gpt-neox.gguf +0 -0
  707. package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf +0 -0
  708. package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf.inp +112 -0
  709. package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf.out +46 -0
  710. package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf +0 -0
  711. package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf.inp +112 -0
  712. package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf.out +46 -0
  713. package/cpp/llama.cpp/models/ggml-vocab-llama4.gguf.inp +112 -0
  714. package/cpp/llama.cpp/models/ggml-vocab-llama4.gguf.out +46 -0
  715. package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf +0 -0
  716. package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf.inp +112 -0
  717. package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf.out +46 -0
  718. package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf +0 -0
  719. package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf.inp +112 -0
  720. package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf.out +46 -0
  721. package/cpp/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +112 -0
  722. package/cpp/llama.cpp/models/ggml-vocab-pixtral.gguf.out +46 -0
  723. package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf +0 -0
  724. package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf.inp +112 -0
  725. package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf.out +46 -0
  726. package/cpp/llama.cpp/models/ggml-vocab-refact.gguf +0 -0
  727. package/cpp/llama.cpp/models/ggml-vocab-refact.gguf.inp +112 -0
  728. package/cpp/llama.cpp/models/ggml-vocab-refact.gguf.out +46 -0
  729. package/cpp/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +112 -0
  730. package/cpp/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +46 -0
  731. package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf +0 -0
  732. package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf.inp +112 -0
  733. package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf.out +46 -0
  734. package/cpp/llama.cpp/models/templates/CohereForAI-c4ai-command-r-plus-tool_use.jinja +202 -0
  735. package/cpp/llama.cpp/models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja +156 -0
  736. package/cpp/llama.cpp/models/templates/NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja +152 -0
  737. package/cpp/llama.cpp/models/templates/NousResearch-Hermes-3-Llama-3.1-8B-tool_use.jinja +152 -0
  738. package/cpp/llama.cpp/models/templates/Qwen-Qwen2.5-7B-Instruct.jinja +54 -0
  739. package/cpp/llama.cpp/models/templates/README.md +22 -0
  740. package/cpp/llama.cpp/models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja +1 -0
  741. package/cpp/llama.cpp/models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja +1 -0
  742. package/cpp/llama.cpp/models/templates/fireworks-ai-llama-3-firefunction-v2.jinja +57 -0
  743. package/cpp/llama.cpp/models/templates/google-gemma-2-2b-it.jinja +4 -0
  744. package/cpp/llama.cpp/models/templates/llama-cpp-deepseek-r1.jinja +76 -0
  745. package/cpp/llama.cpp/models/templates/meetkai-functionary-medium-v3.1.jinja +58 -0
  746. package/cpp/llama.cpp/models/templates/meetkai-functionary-medium-v3.2.jinja +287 -0
  747. package/cpp/llama.cpp/models/templates/meta-llama-Llama-3.1-8B-Instruct.jinja +109 -0
  748. package/cpp/llama.cpp/models/templates/meta-llama-Llama-3.2-3B-Instruct.jinja +93 -0
  749. package/cpp/llama.cpp/models/templates/meta-llama-Llama-3.3-70B-Instruct.jinja +109 -0
  750. package/cpp/llama.cpp/models/templates/microsoft-Phi-3.5-mini-instruct.jinja +8 -0
  751. package/cpp/llama.cpp/models/templates/mistralai-Mistral-Nemo-Instruct-2407.jinja +87 -0
  752. package/cpp/llama.cpp/mypy.ini +7 -0
  753. package/cpp/llama.cpp/pocs/CMakeLists.txt +14 -0
  754. package/cpp/llama.cpp/pocs/vdot/CMakeLists.txt +9 -0
  755. package/cpp/llama.cpp/pocs/vdot/q8dot.cpp +173 -0
  756. package/cpp/llama.cpp/pocs/vdot/vdot.cpp +311 -0
  757. package/cpp/llama.cpp/poetry.lock +1197 -0
  758. package/cpp/llama.cpp/prompts/LLM-questions.txt +49 -0
  759. package/cpp/llama.cpp/prompts/alpaca.txt +1 -0
  760. package/cpp/llama.cpp/prompts/assistant.txt +31 -0
  761. package/cpp/llama.cpp/prompts/chat-with-baichuan.txt +4 -0
  762. package/cpp/llama.cpp/prompts/chat-with-bob.txt +7 -0
  763. package/cpp/llama.cpp/prompts/chat-with-qwen.txt +1 -0
  764. package/cpp/llama.cpp/prompts/chat-with-vicuna-v0.txt +7 -0
  765. package/cpp/llama.cpp/prompts/chat-with-vicuna-v1.txt +7 -0
  766. package/cpp/llama.cpp/prompts/chat.txt +28 -0
  767. package/cpp/llama.cpp/prompts/dan-modified.txt +1 -0
  768. package/cpp/llama.cpp/prompts/dan.txt +1 -0
  769. package/cpp/llama.cpp/prompts/mnemonics.txt +93 -0
  770. package/cpp/llama.cpp/prompts/parallel-questions.txt +43 -0
  771. package/cpp/llama.cpp/prompts/reason-act.txt +18 -0
  772. package/cpp/llama.cpp/pyproject.toml +45 -0
  773. package/cpp/llama.cpp/pyrightconfig.json +22 -0
  774. package/cpp/llama.cpp/requirements/requirements-all.txt +15 -0
  775. package/cpp/llama.cpp/requirements/requirements-compare-llama-bench.txt +2 -0
  776. package/cpp/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +3 -0
  777. package/cpp/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +3 -0
  778. package/cpp/llama.cpp/requirements/requirements-convert_legacy_llama.txt +5 -0
  779. package/cpp/llama.cpp/requirements/requirements-convert_llama_ggml_to_gguf.txt +1 -0
  780. package/cpp/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +2 -0
  781. package/cpp/llama.cpp/requirements/requirements-gguf_editor_gui.txt +3 -0
  782. package/cpp/llama.cpp/requirements/requirements-pydantic.txt +3 -0
  783. package/cpp/llama.cpp/requirements/requirements-test-tokenizer-random.txt +1 -0
  784. package/cpp/llama.cpp/requirements/requirements-tool_bench.txt +12 -0
  785. package/cpp/llama.cpp/requirements.txt +13 -0
  786. package/cpp/llama.cpp/src/CMakeLists.txt +45 -0
  787. package/cpp/llama.cpp/src/llama-adapter.cpp +388 -0
  788. package/cpp/llama.cpp/src/llama-adapter.h +76 -0
  789. package/cpp/llama.cpp/src/llama-arch.cpp +1743 -0
  790. package/cpp/llama.cpp/src/llama-arch.h +437 -0
  791. package/cpp/llama.cpp/src/llama-batch.cpp +372 -0
  792. package/cpp/llama.cpp/src/llama-batch.h +89 -0
  793. package/cpp/llama.cpp/src/llama-chat.cpp +663 -0
  794. package/cpp/llama.cpp/src/llama-chat.h +58 -0
  795. package/cpp/llama.cpp/src/llama-context.cpp +2459 -0
  796. package/cpp/llama.cpp/src/llama-context.h +246 -0
  797. package/cpp/llama.cpp/src/llama-cparams.cpp +1 -0
  798. package/cpp/llama.cpp/src/llama-cparams.h +39 -0
  799. package/cpp/llama.cpp/src/llama-grammar.cpp +1219 -0
  800. package/cpp/llama.cpp/src/llama-grammar.h +173 -0
  801. package/cpp/llama.cpp/src/llama-graph.cpp +1713 -0
  802. package/cpp/llama.cpp/src/llama-graph.h +595 -0
  803. package/cpp/llama.cpp/src/llama-hparams.cpp +79 -0
  804. package/cpp/llama.cpp/src/llama-hparams.h +161 -0
  805. package/cpp/llama.cpp/src/llama-impl.cpp +167 -0
  806. package/cpp/llama.cpp/src/llama-impl.h +61 -0
  807. package/cpp/llama.cpp/src/llama-io.cpp +15 -0
  808. package/cpp/llama.cpp/src/llama-io.h +35 -0
  809. package/cpp/llama.cpp/src/llama-kv-cache.cpp +2486 -0
  810. package/cpp/llama.cpp/src/llama-kv-cache.h +405 -0
  811. package/cpp/llama.cpp/src/llama-memory.cpp +1 -0
  812. package/cpp/llama.cpp/src/llama-memory.h +31 -0
  813. package/cpp/llama.cpp/src/llama-mmap.cpp +600 -0
  814. package/cpp/llama.cpp/src/llama-mmap.h +68 -0
  815. package/cpp/llama.cpp/src/llama-model-loader.cpp +1133 -0
  816. package/cpp/llama.cpp/src/llama-model-loader.h +169 -0
  817. package/cpp/llama.cpp/src/llama-model.cpp +13453 -0
  818. package/cpp/llama.cpp/src/llama-model.h +420 -0
  819. package/cpp/llama.cpp/src/llama-quant.cpp +964 -0
  820. package/cpp/llama.cpp/src/llama-quant.h +1 -0
  821. package/cpp/llama.cpp/src/llama-sampling.cpp +2575 -0
  822. package/cpp/llama.cpp/src/llama-sampling.h +32 -0
  823. package/cpp/llama.cpp/src/llama-vocab.cpp +3313 -0
  824. package/cpp/llama.cpp/src/llama-vocab.h +125 -0
  825. package/cpp/llama.cpp/src/llama.cpp +340 -0
  826. package/cpp/llama.cpp/src/unicode-data.cpp +7034 -0
  827. package/cpp/llama.cpp/src/unicode-data.h +20 -0
  828. package/cpp/llama.cpp/src/unicode.cpp +849 -0
  829. package/cpp/llama.cpp/src/unicode.h +66 -0
  830. package/cpp/rn-completion.cpp +431 -0
  831. package/cpp/rn-llama.hpp +60 -0
  832. package/cpp/rn-utils.hpp +331 -0
  833. package/ios/OnLoad.mm +22 -0
  834. package/ios/generated/RNLlamaCppSpec/RNLlamaCppSpec-generated.mm +64 -0
  835. package/ios/generated/RNLlamaCppSpec/RNLlamaCppSpec.h +251 -0
  836. package/ios/generated/RNLlamaCppSpecJSI-generated.cpp +42 -0
  837. package/ios/generated/RNLlamaCppSpecJSI.h +336 -0
  838. package/ios/include/chat.h +135 -0
  839. package/ios/include/common/base64.hpp +392 -0
  840. package/ios/include/common/json.hpp +24766 -0
  841. package/ios/include/common/minja/chat-template.hpp +537 -0
  842. package/ios/include/common/minja/minja.hpp +2941 -0
  843. package/ios/include/common.h +668 -0
  844. package/ios/include/json-schema-to-grammar.h +21 -0
  845. package/ios/include/llama-cpp.h +30 -0
  846. package/ios/include/llama.h +1440 -0
  847. package/ios/include/log.h +103 -0
  848. package/ios/include/ngram-cache.h +101 -0
  849. package/ios/include/sampling.h +107 -0
  850. package/ios/include/speculative.h +28 -0
  851. package/ios/libs/llama.xcframework/Info.plist +135 -0
  852. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Info.plist +20 -0
  853. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  854. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4492 -0
  855. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-alloc.h +76 -0
  856. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-backend.h +354 -0
  857. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-blas.h +25 -0
  858. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-cpu.h +143 -0
  859. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-metal.h +66 -0
  860. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +2192 -0
  861. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/gguf.h +202 -0
  862. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +1440 -0
  863. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Info.plist +36 -0
  864. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Modules/module.modulemap +17 -0
  865. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
  866. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Info.plist +20 -0
  867. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  868. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4513 -0
  869. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3440 -0
  870. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-alloc.h +76 -0
  871. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +354 -0
  872. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-blas.h +25 -0
  873. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +143 -0
  874. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-metal.h +66 -0
  875. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +2192 -0
  876. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/gguf.h +202 -0
  877. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +1440 -0
  878. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Info.plist +36 -0
  879. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Modules/module.modulemap +17 -0
  880. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
  881. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Info.plist +20 -0
  882. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  883. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4513 -0
  884. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3442 -0
  885. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-alloc.h +76 -0
  886. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-backend.h +354 -0
  887. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-blas.h +25 -0
  888. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-cpu.h +143 -0
  889. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-metal.h +66 -0
  890. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +2192 -0
  891. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/gguf.h +202 -0
  892. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +1440 -0
  893. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Modules/module.modulemap +17 -0
  894. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Resources/Info.plist +32 -0
  895. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-alloc.h +76 -0
  896. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-backend.h +354 -0
  897. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-blas.h +25 -0
  898. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-cpu.h +143 -0
  899. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-metal.h +66 -0
  900. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +2192 -0
  901. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/gguf.h +202 -0
  902. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +1440 -0
  903. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Modules/module.modulemap +17 -0
  904. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Resources/Info.plist +32 -0
  905. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
  906. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-alloc.h +76 -0
  907. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-backend.h +354 -0
  908. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-blas.h +25 -0
  909. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-cpu.h +143 -0
  910. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-metal.h +66 -0
  911. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +2192 -0
  912. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/gguf.h +202 -0
  913. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +1440 -0
  914. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Modules/module.modulemap +17 -0
  915. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Resources/Info.plist +32 -0
  916. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
  917. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
  918. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Info.plist +20 -0
  919. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  920. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4492 -0
  921. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-alloc.h +76 -0
  922. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-backend.h +354 -0
  923. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-blas.h +25 -0
  924. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-cpu.h +143 -0
  925. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-metal.h +66 -0
  926. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +2192 -0
  927. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/gguf.h +202 -0
  928. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +1440 -0
  929. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Info.plist +35 -0
  930. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Modules/module.modulemap +17 -0
  931. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
  932. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Info.plist +20 -0
  933. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  934. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4513 -0
  935. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3440 -0
  936. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-alloc.h +76 -0
  937. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +354 -0
  938. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-blas.h +25 -0
  939. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +143 -0
  940. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-metal.h +66 -0
  941. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +2192 -0
  942. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/gguf.h +202 -0
  943. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +1440 -0
  944. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Info.plist +35 -0
  945. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Modules/module.modulemap +17 -0
  946. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
  947. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Info.plist +20 -0
  948. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  949. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4528 -0
  950. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-alloc.h +76 -0
  951. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-backend.h +354 -0
  952. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-blas.h +25 -0
  953. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-cpu.h +143 -0
  954. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-metal.h +66 -0
  955. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +2192 -0
  956. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/gguf.h +202 -0
  957. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +1440 -0
  958. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Info.plist +32 -0
  959. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Modules/module.modulemap +17 -0
  960. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
  961. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Info.plist +20 -0
  962. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  963. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4549 -0
  964. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3470 -0
  965. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-alloc.h +76 -0
  966. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +354 -0
  967. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-blas.h +25 -0
  968. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +143 -0
  969. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-metal.h +66 -0
  970. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +2192 -0
  971. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/gguf.h +202 -0
  972. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +1440 -0
  973. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Info.plist +32 -0
  974. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Modules/module.modulemap +17 -0
  975. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
  976. package/lib/module/NativeRNLlamaCpp.js +35 -0
  977. package/lib/module/NativeRNLlamaCpp.js.map +1 -0
  978. package/lib/module/index.js +20 -0
  979. package/lib/module/index.js.map +1 -0
  980. package/lib/module/package.json +1 -0
  981. package/lib/typescript/package.json +1 -0
  982. package/lib/typescript/src/NativeRNLlamaCpp.d.ts +222 -0
  983. package/lib/typescript/src/NativeRNLlamaCpp.d.ts.map +1 -0
  984. package/lib/typescript/src/index.d.ts +5 -0
  985. package/lib/typescript/src/index.d.ts.map +1 -0
  986. package/package.json +161 -0
  987. package/react-native.config.js +15 -0
  988. package/src/NativeRNLlamaCpp.ts +282 -0
  989. package/src/index.tsx +54 -0
@@ -0,0 +1,1330 @@
1
+ #include "ggml.h"
2
+ #include "ggml-backend.h"
3
+ #include "ggml-impl.h"
4
+ #include "gguf.h"
5
+
6
+ #include <cinttypes>
7
+ #include <cstddef>
8
+ #include <cstdint>
9
+ #include <cstdio>
10
+ #include <cstdlib>
11
+ #include <cstring>
12
+ #include <map>
13
+ #include <new>
14
+ #include <stdexcept>
15
+ #include <string>
16
+ #include <vector>
17
+
18
+ template <typename T>
19
+ struct type_to_gguf_type;
20
+
21
+ template <>
22
+ struct type_to_gguf_type<uint8_t> {
23
+ static constexpr enum gguf_type value = GGUF_TYPE_UINT8;
24
+ };
25
+
26
+ template <>
27
+ struct type_to_gguf_type<int8_t> {
28
+ static constexpr enum gguf_type value = GGUF_TYPE_INT8;
29
+ };
30
+
31
+ template <>
32
+ struct type_to_gguf_type<uint16_t> {
33
+ static constexpr enum gguf_type value = GGUF_TYPE_UINT16;
34
+ };
35
+
36
+ template <>
37
+ struct type_to_gguf_type<int16_t> {
38
+ static constexpr enum gguf_type value = GGUF_TYPE_INT16;
39
+ };
40
+
41
+ template <>
42
+ struct type_to_gguf_type<uint32_t> {
43
+ static constexpr enum gguf_type value = GGUF_TYPE_UINT32;
44
+ };
45
+
46
+ template <>
47
+ struct type_to_gguf_type<int32_t> {
48
+ static constexpr enum gguf_type value = GGUF_TYPE_INT32;
49
+ };
50
+
51
+ template <>
52
+ struct type_to_gguf_type<float> {
53
+ static constexpr enum gguf_type value = GGUF_TYPE_FLOAT32;
54
+ };
55
+
56
+ template <>
57
+ struct type_to_gguf_type<bool> {
58
+ static constexpr enum gguf_type value = GGUF_TYPE_BOOL;
59
+ };
60
+
61
+ template <>
62
+ struct type_to_gguf_type<std::string> {
63
+ static constexpr enum gguf_type value = GGUF_TYPE_STRING;
64
+ };
65
+
66
+ template <>
67
+ struct type_to_gguf_type<uint64_t> {
68
+ static constexpr enum gguf_type value = GGUF_TYPE_UINT64;
69
+ };
70
+
71
+ template <>
72
+ struct type_to_gguf_type<int64_t> {
73
+ static constexpr enum gguf_type value = GGUF_TYPE_INT64;
74
+ };
75
+
76
+ template <>
77
+ struct type_to_gguf_type<double> {
78
+ static constexpr enum gguf_type value = GGUF_TYPE_FLOAT64;
79
+ };
80
+
81
+ static const std::map<gguf_type, size_t> GGUF_TYPE_SIZE = {
82
+ {GGUF_TYPE_UINT8, sizeof(uint8_t)},
83
+ {GGUF_TYPE_INT8, sizeof(int8_t)},
84
+ {GGUF_TYPE_UINT16, sizeof(uint16_t)},
85
+ {GGUF_TYPE_INT16, sizeof(int16_t)},
86
+ {GGUF_TYPE_UINT32, sizeof(uint32_t)},
87
+ {GGUF_TYPE_INT32, sizeof(int32_t)},
88
+ {GGUF_TYPE_FLOAT32, sizeof(float)},
89
+ {GGUF_TYPE_BOOL, sizeof(int8_t)},
90
+ {GGUF_TYPE_STRING, 0}, // undefined
91
+ {GGUF_TYPE_ARRAY, 0}, // undefined
92
+ {GGUF_TYPE_UINT64, sizeof(uint64_t)},
93
+ {GGUF_TYPE_INT64, sizeof(int64_t)},
94
+ {GGUF_TYPE_FLOAT64, sizeof(double)},
95
+ };
96
+ static_assert(GGUF_TYPE_COUNT == 13, "GGUF_TYPE_COUNT != 13");
97
+
98
+ static const std::map<gguf_type, const char *> GGUF_TYPE_NAME = {
99
+ {GGUF_TYPE_UINT8, "u8"},
100
+ {GGUF_TYPE_INT8, "i8"},
101
+ {GGUF_TYPE_UINT16, "u16"},
102
+ {GGUF_TYPE_INT16, "i16"},
103
+ {GGUF_TYPE_UINT32, "u32"},
104
+ {GGUF_TYPE_INT32, "i32"},
105
+ {GGUF_TYPE_FLOAT32, "f32"},
106
+ {GGUF_TYPE_BOOL, "bool"},
107
+ {GGUF_TYPE_STRING, "str"},
108
+ {GGUF_TYPE_ARRAY, "arr"},
109
+ {GGUF_TYPE_UINT64, "u64"},
110
+ {GGUF_TYPE_INT64, "i64"},
111
+ {GGUF_TYPE_FLOAT64, "f64"},
112
+ };
113
+ static_assert(GGUF_TYPE_COUNT == 13, "GGUF_TYPE_COUNT != 13");
114
+
115
+ size_t gguf_type_size(enum gguf_type type) {
116
+ auto it = GGUF_TYPE_SIZE.find(type);
117
+ return it == GGUF_TYPE_SIZE.end() ? 0 : it->second;
118
+ }
119
+
120
+ struct gguf_kv {
121
+ std::string key;
122
+
123
+ bool is_array;
124
+ enum gguf_type type;
125
+
126
+ std::vector<int8_t> data;
127
+ std::vector<std::string> data_string;
128
+
129
+ template <typename T>
130
+ gguf_kv(const std::string & key, const T value)
131
+ : key(key), is_array(false), type(type_to_gguf_type<T>::value) {
132
+ GGML_ASSERT(!key.empty());
133
+ data.resize(sizeof(T));
134
+ memcpy(data.data(), &value, sizeof(T));
135
+ }
136
+
137
+ template <typename T>
138
+ gguf_kv(const std::string & key, const std::vector<T> & value)
139
+ : key(key), is_array(true), type(type_to_gguf_type<T>::value) {
140
+ GGML_ASSERT(!key.empty());
141
+ data.resize(value.size()*sizeof(T));
142
+ for (size_t i = 0; i < value.size(); ++i) {
143
+ const T tmp = value[i];
144
+ memcpy(data.data() + i*sizeof(T), &tmp, sizeof(T));
145
+ }
146
+ }
147
+
148
+ gguf_kv(const std::string & key, const std::string & value)
149
+ : key(key), is_array(false), type(GGUF_TYPE_STRING) {
150
+ GGML_ASSERT(!key.empty());
151
+ data_string.push_back(value);
152
+ }
153
+
154
+ gguf_kv(const std::string & key, const std::vector<std::string> & value)
155
+ : key(key), is_array(true), type(GGUF_TYPE_STRING) {
156
+ GGML_ASSERT(!key.empty());
157
+ data_string = value;
158
+ }
159
+
160
+ const std::string & get_key() const {
161
+ return key;
162
+ }
163
+
164
+ const enum gguf_type & get_type() const {
165
+ return type;
166
+ }
167
+
168
+ size_t get_ne() const {
169
+ if (type == GGUF_TYPE_STRING) {
170
+ const size_t ne = data_string.size();
171
+ GGML_ASSERT(is_array || ne == 1);
172
+ return ne;
173
+ }
174
+ const size_t type_size = gguf_type_size(type);
175
+ GGML_ASSERT(data.size() % type_size == 0);
176
+ const size_t ne = data.size() / type_size;
177
+ GGML_ASSERT(is_array || ne == 1);
178
+ return ne;
179
+ }
180
+
181
+ template <typename T>
182
+ const T & get_val(const size_t i = 0) const {
183
+ GGML_ASSERT(type_to_gguf_type<T>::value == type);
184
+ if constexpr (std::is_same<T, std::string>::value) {
185
+ GGML_ASSERT(data_string.size() >= i+1);
186
+ return data_string[i];
187
+ }
188
+ const size_t type_size = gguf_type_size(type);
189
+ GGML_ASSERT(data.size() % type_size == 0);
190
+ GGML_ASSERT(data.size() >= (i+1)*type_size);
191
+ return reinterpret_cast<const T *>(data.data())[i];
192
+ }
193
+
194
+ void cast(const enum gguf_type new_type) {
195
+ const size_t new_type_size = gguf_type_size(new_type);
196
+ GGML_ASSERT(data.size() % new_type_size == 0);
197
+ type = new_type;
198
+ }
199
+ };
200
+
201
+ struct gguf_tensor_info {
202
+ struct ggml_tensor t; // for holding the equivalent info
203
+ uint64_t offset; // offset from start of `data`, must be a multiple of `ALIGNMENT`
204
+ };
205
+
206
+ struct gguf_context {
207
+ uint32_t version = GGUF_VERSION;
208
+
209
+ std::vector<struct gguf_kv> kv;
210
+ std::vector<struct gguf_tensor_info> info;
211
+
212
+ size_t alignment = GGUF_DEFAULT_ALIGNMENT;
213
+ size_t offset = 0; // offset of `data` from beginning of file
214
+ size_t size = 0; // size of `data` in bytes
215
+
216
+ void * data = nullptr;
217
+ };
218
+
219
+ struct gguf_reader {
220
+ FILE * file;
221
+
222
+ gguf_reader(FILE * file) : file(file) {}
223
+
224
+ template <typename T>
225
+ bool read(T & dst) const {
226
+ return fread(&dst, 1, sizeof(dst), file) == sizeof(dst);
227
+ }
228
+
229
+ template <typename T>
230
+ bool read(std::vector<T> & dst, const size_t n) const {
231
+ dst.resize(n);
232
+ for (size_t i = 0; i < dst.size(); ++i) {
233
+ if constexpr (std::is_same<T, bool>::value) {
234
+ bool tmp;
235
+ if (!read(tmp)) {
236
+ return false;
237
+ }
238
+ dst[i] = tmp;
239
+ } else {
240
+ if (!read(dst[i])) {
241
+ return false;
242
+ }
243
+ }
244
+ }
245
+ return true;
246
+ }
247
+
248
+ bool read(bool & dst) const {
249
+ int8_t tmp = -1;
250
+ if (!read(tmp)) {
251
+ return false;
252
+ }
253
+ dst = tmp != 0;
254
+ return true;
255
+ }
256
+
257
+ bool read(enum ggml_type & dst) const {
258
+ int32_t tmp = -1;
259
+ if (!read(tmp)) {
260
+ return false;
261
+ }
262
+ dst = ggml_type(tmp);
263
+ return true;
264
+ }
265
+
266
+ bool read(enum gguf_type & dst) const {
267
+ int32_t tmp = -1;
268
+ if (!read(tmp)) {
269
+ return false;
270
+ }
271
+ dst = gguf_type(tmp);
272
+ return true;
273
+ }
274
+
275
+ bool read(std::string & dst) const {
276
+ uint64_t size = -1;
277
+ if (!read(size)) {
278
+ return false;
279
+ }
280
+ dst.resize(size);
281
+ return fread(dst.data(), 1, dst.length(), file) == dst.length();
282
+ }
283
+
284
+ bool read(void * dst, const size_t size) const {
285
+ return fread(dst, 1, size, file) == size;
286
+ }
287
+ };
288
+
289
+ struct gguf_context * gguf_init_empty(void) {
290
+ return new gguf_context;
291
+ }
292
+
293
+ template<typename T>
294
+ bool gguf_read_emplace_helper(const struct gguf_reader & gr, std::vector<struct gguf_kv> & kv, const std::string & key, const bool is_array, const size_t n) {
295
+ if (is_array) {
296
+ std::vector<T> value;
297
+ try {
298
+ if (!gr.read(value, n)) {
299
+ return false;
300
+ }
301
+ } catch (std::length_error &) {
302
+ fprintf(stderr, "%s: encountered length_error while reading value for key '%s'\n", __func__, key.c_str());
303
+ return false;
304
+ } catch (std::bad_alloc &) {
305
+ fprintf(stderr, "%s: encountered bad_alloc error while reading value for key '%s'\n", __func__, key.c_str());
306
+ return false;
307
+ }
308
+ kv.emplace_back(key, value);
309
+ } else {
310
+ T value;
311
+ if (!gr.read(value)) {
312
+ return false;
313
+ }
314
+ kv.emplace_back(key, value);
315
+ }
316
+ return true;
317
+ }
318
+
319
+ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_params params) {
320
+ const struct gguf_reader gr(file);
321
+ struct gguf_context * ctx = new gguf_context;
322
+
323
+ bool ok = true;
324
+
325
+ // file magic
326
+ {
327
+ std::vector<char> magic;
328
+ ok = ok && gr.read(magic, 4);
329
+
330
+ if (!ok) {
331
+ fprintf(stderr, "%s: failed to read magic\n", __func__);
332
+ gguf_free(ctx);
333
+ return nullptr;
334
+ }
335
+
336
+ for (uint32_t i = 0; i < magic.size(); i++) {
337
+ if (magic[i] != GGUF_MAGIC[i]) {
338
+ fprintf(stderr, "%s: invalid magic characters: '%c%c%c%c', expected 'GGUF'\n", __func__, magic[0], magic[1], magic[2], magic[3]);
339
+ gguf_free(ctx);
340
+ return nullptr;
341
+ }
342
+ }
343
+ }
344
+
345
+ // header
346
+ int64_t n_kv = 0;
347
+ int64_t n_tensors = 0;
348
+
349
+ if (ok && gr.read(ctx->version)) {
350
+ if (ctx->version == 1) {
351
+ fprintf(stderr, "%s: GGUFv1 is no longer supported, please use a more up-to-date version\n", __func__);
352
+ ok = false;
353
+ }
354
+ if (ctx->version > GGUF_VERSION) {
355
+ fprintf(stderr, "%s: this GGUF file is version %" PRIu32 " but this software only supports up to version %d\n",
356
+ __func__, ctx->version, GGUF_VERSION);
357
+ ok = false;
358
+ }
359
+ } else {
360
+ ok = false;
361
+ }
362
+
363
+ if (ok && gr.read(n_tensors)) {
364
+ static_assert(sizeof(size_t) <= 8 && sizeof(gguf_tensor_info) >= 2, "int64_t insufficient for indexing");
365
+ if (n_tensors < 0 || n_tensors > int64_t(SIZE_MAX/sizeof(gguf_tensor_info))) {
366
+ fprintf(stderr, "%s: number of tensors is %" PRIi64 " but must be in [0, %zu]\n",
367
+ __func__, n_tensors, SIZE_MAX/sizeof(gguf_tensor_info));
368
+ ok = false;
369
+ }
370
+ } else {
371
+ ok = false;
372
+ }
373
+
374
+ if (ok && gr.read(n_kv)) {
375
+ static_assert(sizeof(size_t) <= 8 && sizeof(gguf_tensor_info) >= 2, "int64_t insufficient for indexing");
376
+ if (n_kv < 0 || n_kv > int64_t(SIZE_MAX/sizeof(gguf_kv))) {
377
+ fprintf(stderr, "%s: number of key value pairs is %" PRIi64 " but must be in [0, %zu]\n",
378
+ __func__, n_kv, SIZE_MAX/sizeof(gguf_kv));
379
+ ok = false;
380
+ }
381
+ } else {
382
+ ok = false;
383
+ }
384
+
385
+ if (!ok) {
386
+ fprintf(stderr, "%s: failed to read header\n", __func__);
387
+ gguf_free(ctx);
388
+ return nullptr;
389
+ }
390
+
391
+ // KV pairs
392
+ {
393
+ for (int64_t i = 0; ok && i < n_kv; ++i) {
394
+ std::string key;
395
+ gguf_type type = gguf_type(-1);
396
+ bool is_array = false;
397
+ uint64_t n = 1;
398
+
399
+ try {
400
+ ok = ok && gr.read(key);
401
+ } catch (std::length_error &) {
402
+ fprintf(stderr, "%s: encountered length_error while reading key %" PRIi64 "\n", __func__, i);
403
+ ok = false;
404
+ } catch (std::bad_alloc &) {
405
+ fprintf(stderr, "%s: encountered bad_alloc error while reading key %" PRIi64 "\n", __func__, i);
406
+ ok = false;
407
+ }
408
+ for (size_t j = 0; ok && j < ctx->kv.size(); ++j) {
409
+ if (key == ctx->kv[j].key) {
410
+ fprintf(stderr, "%s: duplicate key '%s' for tensors %zu and %" PRIi64 " \n", __func__, key.c_str(), j, i);
411
+ ok = false;
412
+ }
413
+ }
414
+ if (!ok) {
415
+ break;
416
+ }
417
+
418
+ ok = ok && gr.read(type);
419
+ if (type == GGUF_TYPE_ARRAY) {
420
+ is_array = true;
421
+ ok = ok && gr.read(type);
422
+ ok = ok && gr.read(n);
423
+ }
424
+ if (!ok) {
425
+ break;
426
+ }
427
+
428
+ switch (type) {
429
+ case GGUF_TYPE_UINT8: ok = ok && gguf_read_emplace_helper<uint8_t> (gr, ctx->kv, key, is_array, n); break;
430
+ case GGUF_TYPE_INT8: ok = ok && gguf_read_emplace_helper<int8_t> (gr, ctx->kv, key, is_array, n); break;
431
+ case GGUF_TYPE_UINT16: ok = ok && gguf_read_emplace_helper<uint16_t> (gr, ctx->kv, key, is_array, n); break;
432
+ case GGUF_TYPE_INT16: ok = ok && gguf_read_emplace_helper<int16_t> (gr, ctx->kv, key, is_array, n); break;
433
+ case GGUF_TYPE_UINT32: ok = ok && gguf_read_emplace_helper<uint32_t> (gr, ctx->kv, key, is_array, n); break;
434
+ case GGUF_TYPE_INT32: ok = ok && gguf_read_emplace_helper<int32_t> (gr, ctx->kv, key, is_array, n); break;
435
+ case GGUF_TYPE_FLOAT32: ok = ok && gguf_read_emplace_helper<float> (gr, ctx->kv, key, is_array, n); break;
436
+ case GGUF_TYPE_BOOL: ok = ok && gguf_read_emplace_helper<bool> (gr, ctx->kv, key, is_array, n); break;
437
+ case GGUF_TYPE_STRING: ok = ok && gguf_read_emplace_helper<std::string>(gr, ctx->kv, key, is_array, n); break;
438
+ case GGUF_TYPE_UINT64: ok = ok && gguf_read_emplace_helper<uint64_t> (gr, ctx->kv, key, is_array, n); break;
439
+ case GGUF_TYPE_INT64: ok = ok && gguf_read_emplace_helper<int64_t> (gr, ctx->kv, key, is_array, n); break;
440
+ case GGUF_TYPE_FLOAT64: ok = ok && gguf_read_emplace_helper<double> (gr, ctx->kv, key, is_array, n); break;
441
+ case GGUF_TYPE_ARRAY:
442
+ default:
443
+ {
444
+ fprintf(stderr, "%s: key '%s' has invalid GGUF type %d\n", __func__, key.c_str(), type);
445
+ ok = false;
446
+ } break;
447
+ }
448
+ }
449
+
450
+ if (!ok) {
451
+ fprintf(stderr, "%s: failed to read key-value pairs\n", __func__);
452
+ gguf_free(ctx);
453
+ return nullptr;
454
+ }
455
+ GGML_ASSERT(int64_t(ctx->kv.size()) == n_kv);
456
+
457
+ const int alignment_idx = gguf_find_key(ctx, GGUF_KEY_GENERAL_ALIGNMENT);
458
+ ctx->alignment = alignment_idx == -1 ? GGUF_DEFAULT_ALIGNMENT : gguf_get_val_u32(ctx, alignment_idx);
459
+
460
+ if (ctx->alignment == 0 || (ctx->alignment & (ctx->alignment - 1)) != 0) {
461
+ fprintf(stderr, "%s: alignment %zu is not a power of 2\n", __func__, ctx->alignment);
462
+ gguf_free(ctx);
463
+ return nullptr;
464
+ }
465
+ }
466
+
467
+ // read the tensor info
468
+ for (int64_t i = 0; ok && i < n_tensors; ++i) {
469
+ struct gguf_tensor_info info;
470
+
471
+ // tensor name
472
+ {
473
+ std::string name;
474
+ try {
475
+ ok = ok && gr.read(name);
476
+ } catch (std::length_error &) {
477
+ fprintf(stderr, "%s: encountered length_error while reading tensor name %" PRIi64 "\n", __func__, i);
478
+ ok = false;
479
+ } catch (std::bad_alloc &) {
480
+ fprintf(stderr, "%s: encountered bad_alloc error while reading tensor name %" PRIi64 "\n", __func__, i);
481
+ ok = false;
482
+ }
483
+ if (name.length() >= GGML_MAX_NAME) {
484
+ fprintf(stderr, "%s: tensor name %" PRIi64 " is too long: %zu >= %d\n", __func__, i, name.length(), GGML_MAX_NAME);
485
+ ok = false;
486
+ break;
487
+ }
488
+ ggml_set_name(&info.t, name.c_str());
489
+
490
+ // make sure there are no duplicate tensor names
491
+ for (int64_t j = 0; ok && j < i; ++j) {
492
+ if (strcmp(info.t.name, ctx->info[j].t.name) == 0) {
493
+ fprintf(stderr, "%s: duplicate tensor name '%s' for tensors %" PRIi64 " and %" PRIi64 "\n", __func__, info.t.name, j, i);
494
+ ok = false;
495
+ break;
496
+ }
497
+ }
498
+ }
499
+ if (!ok) {
500
+ break;
501
+ }
502
+
503
+ // tensor shape
504
+ {
505
+ uint32_t n_dims = -1;
506
+ ok = ok && gr.read(n_dims);
507
+ if (n_dims > GGML_MAX_DIMS) {
508
+ fprintf(stderr, "%s: tensor '%s' has invalid number of dimensions: %" PRIu32 " > %" PRIu32 "\n",
509
+ __func__, info.t.name, n_dims, GGML_MAX_DIMS);
510
+ ok = false;
511
+ break;
512
+ }
513
+ for (uint32_t j = 0; ok && j < GGML_MAX_DIMS; ++j) {
514
+ info.t.ne[j] = 1;
515
+ if (j < n_dims) {
516
+ ok = ok && gr.read(info.t.ne[j]);
517
+ }
518
+
519
+ // check that all ne are non-negative
520
+ if (info.t.ne[j] < 0) {
521
+ fprintf(stderr, "%s: tensor '%s' dimension %" PRIu32 " has invalid number of elements: %" PRIi64 " < 0\n",
522
+ __func__, info.t.name, j, info.t.ne[j]);
523
+ ok = false;
524
+ break;
525
+ }
526
+ }
527
+
528
+ // check that the total number of elements is representable
529
+ if (ok && ((INT64_MAX/info.t.ne[1] <= info.t.ne[0]) ||
530
+ (INT64_MAX/info.t.ne[2] <= info.t.ne[0]*info.t.ne[1]) ||
531
+ (INT64_MAX/info.t.ne[3] <= info.t.ne[0]*info.t.ne[1]*info.t.ne[2]))) {
532
+
533
+ fprintf(stderr, "%s: total number of elements in tensor '%s' with shape "
534
+ "(%" PRIi64 ", %" PRIi64 ", %" PRIi64 ", %" PRIi64 ") is >= %" PRIi64 "\n",
535
+ __func__, info.t.name, info.t.ne[0], info.t.ne[1], info.t.ne[2], info.t.ne[3], INT64_MAX);
536
+ ok = false;
537
+ break;
538
+ }
539
+ }
540
+ if (!ok) {
541
+ break;
542
+ }
543
+
544
+ // tensor type
545
+ {
546
+ ok = ok && gr.read(info.t.type);
547
+
548
+ // check that tensor type is within defined range
549
+ if (info.t.type < 0 || info.t.type >= GGML_TYPE_COUNT) {
550
+ fprintf(stderr, "%s: tensor '%s' has invalid ggml type %d (%s)\n",
551
+ __func__, info.t.name, info.t.type, ggml_type_name(info.t.type));
552
+ ok = false;
553
+ break;
554
+ }
555
+ const size_t type_size = ggml_type_size(info.t.type);
556
+ const int64_t blck_size = ggml_blck_size(info.t.type);
557
+
558
+ // check that row size is divisible by block size
559
+ if (blck_size == 0 || info.t.ne[0] % blck_size != 0) {
560
+ fprintf(stderr, "%s: tensor '%s' of type %d (%s) has %" PRId64 " elements per row, "
561
+ "not a multiple of block size (%" PRId64 ")\n",
562
+ __func__, info.t.name, (int) info.t.type, ggml_type_name(info.t.type), info.t.ne[0], blck_size);
563
+ ok = false;
564
+ break;
565
+ }
566
+
567
+ // calculate byte offsets given the tensor shape and type
568
+ info.t.nb[0] = type_size;
569
+ info.t.nb[1] = info.t.nb[0]*(info.t.ne[0]/blck_size);
570
+ for (int j = 2; j < GGML_MAX_DIMS; ++j) {
571
+ info.t.nb[j] = info.t.nb[j - 1]*info.t.ne[j - 1];
572
+ }
573
+ }
574
+ if (!ok) {
575
+ break;
576
+ }
577
+
578
+ // tensor data offset within buffer
579
+ ok = ok && gr.read(info.offset);
580
+
581
+ ctx->info.push_back(info);
582
+ }
583
+
584
+ if (!ok) {
585
+ fprintf(stderr, "%s: failed to read tensor info\n", __func__);
586
+ gguf_free(ctx);
587
+ return nullptr;
588
+ }
589
+ GGML_ASSERT(int64_t(ctx->info.size()) == n_tensors);
590
+
591
+ // we require the data section to be aligned, so take into account any padding
592
+ if (fseek(file, GGML_PAD(ftell(file), ctx->alignment), SEEK_SET) != 0) {
593
+ fprintf(stderr, "%s: failed to seek to beginning of data section\n", __func__);
594
+ gguf_free(ctx);
595
+ return nullptr;
596
+ }
597
+
598
+ // store the current file offset - this is where the data section starts
599
+ ctx->offset = ftell(file);
600
+
601
+ // compute the total size of the data section, taking into account the alignment
602
+ {
603
+ ctx->size = 0;
604
+ for (size_t i = 0; i < ctx->info.size(); ++i) {
605
+ const gguf_tensor_info & ti = ctx->info[i];
606
+ if (ti.offset != ctx->size) {
607
+ fprintf(stderr, "%s: tensor '%s' has offset %" PRIu64 ", expected %zu\n",
608
+ __func__, ti.t.name, ti.offset, ctx->size);
609
+ fprintf(stderr, "%s: failed to read tensor data\n", __func__);
610
+ gguf_free(ctx);
611
+ return nullptr;
612
+ }
613
+ ctx->size += GGML_PAD(ggml_nbytes(&ti.t), ctx->alignment);
614
+ }
615
+ }
616
+
617
+ // load the tensor data only if requested
618
+ if (params.ctx != nullptr) {
619
+ // if the provided gguf_context is no_alloc, then we create "empty" tensors and do not read the binary blob
620
+ // otherwise, we load the binary blob into the created ggml_context as well, and point the "data" members of
621
+ // the ggml_tensor structs to the appropriate locations in the binary blob
622
+
623
+ // compute the exact size needed for the new ggml_context
624
+ const size_t mem_size =
625
+ params.no_alloc ?
626
+ (n_tensors )*ggml_tensor_overhead() :
627
+ (n_tensors + 1)*ggml_tensor_overhead() + ctx->size;
628
+
629
+ struct ggml_init_params pdata = {
630
+ /*mem_size =*/ mem_size,
631
+ /*mem_buffer =*/ nullptr,
632
+ /*no_alloc =*/ params.no_alloc,
633
+ };
634
+
635
+ *params.ctx = ggml_init(pdata);
636
+ if (*params.ctx == nullptr) {
637
+ fprintf(stderr, "%s: failed to initialize ggml context for storing tensors\n", __func__);
638
+ gguf_free(ctx);
639
+ return nullptr;
640
+ }
641
+
642
+ struct ggml_context * ctx_data = *params.ctx;
643
+
644
+ struct ggml_tensor * data = nullptr;
645
+
646
+ if (!params.no_alloc) {
647
+ data = ggml_new_tensor_1d(ctx_data, GGML_TYPE_I8, ctx->size);
648
+
649
+ ok = ok && data != nullptr;
650
+
651
+ if (ok) {
652
+ ggml_set_name(data, "GGUF tensor data binary blob");
653
+ }
654
+
655
+ // read the binary blob with the tensor data
656
+ ok = ok && gr.read(data->data, ctx->size);
657
+
658
+ if (!ok) {
659
+ fprintf(stderr, "%s: failed to read tensor data binary blob\n", __func__);
660
+ ggml_free(ctx_data);
661
+ *params.ctx = nullptr;
662
+ gguf_free(ctx);
663
+ return nullptr;
664
+ }
665
+
666
+ ctx->data = data->data;
667
+ }
668
+
669
+ ggml_set_no_alloc(ctx_data, true);
670
+
671
+ // create the tensors
672
+ for (size_t i = 0; i < ctx->info.size(); ++i) {
673
+ const struct gguf_tensor_info & info = ctx->info[i];
674
+
675
+ struct ggml_tensor * cur = ggml_new_tensor(ctx_data, info.t.type, GGML_MAX_DIMS, info.t.ne);
676
+
677
+ ok = ok && cur != nullptr;
678
+
679
+ if (!ok) {
680
+ break;
681
+ }
682
+
683
+ ggml_set_name(cur, info.t.name);
684
+
685
+ // point the data member to the appropriate location in the binary blob using the tensor info
686
+ if (!params.no_alloc) {
687
+ cur->data = (char *) data->data + info.offset;
688
+ }
689
+ }
690
+
691
+ if (!ok) {
692
+ fprintf(stderr, "%s: failed to create tensors\n", __func__);
693
+ ggml_free(ctx_data);
694
+ *params.ctx = nullptr;
695
+ gguf_free(ctx);
696
+ return nullptr;
697
+ }
698
+
699
+ ggml_set_no_alloc(ctx_data, params.no_alloc);
700
+ }
701
+
702
+ return ctx;
703
+ }
704
+
705
+ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params) {
706
+ FILE * file = ggml_fopen(fname, "rb");
707
+
708
+ if (!file) {
709
+ fprintf(stderr, "%s: failed to open GGUF file '%s'\n", __func__, fname);
710
+ return nullptr;
711
+ }
712
+
713
+ struct gguf_context * result = gguf_init_from_file_impl(file, params);
714
+ fclose(file);
715
+ return result;
716
+ }
717
+
718
+ void gguf_free(struct gguf_context * ctx) {
719
+ if (ctx == nullptr) {
720
+ return;
721
+ }
722
+ delete ctx;
723
+ }
724
+
725
+ const char * gguf_type_name(enum gguf_type type) {
726
+ auto it = GGUF_TYPE_NAME.find(type);
727
+ return it == GGUF_TYPE_NAME.end() ? nullptr : it->second;
728
+ }
729
+
730
+ uint32_t gguf_get_version(const struct gguf_context * ctx) {
731
+ return ctx->version;
732
+ }
733
+
734
+ size_t gguf_get_alignment(const struct gguf_context * ctx) {
735
+ return ctx->alignment;
736
+ }
737
+
738
+ size_t gguf_get_data_offset(const struct gguf_context * ctx) {
739
+ return ctx->offset;
740
+ }
741
+
742
+ int64_t gguf_get_n_kv(const struct gguf_context * ctx) {
743
+ return ctx->kv.size();
744
+ }
745
+
746
+ int64_t gguf_find_key(const struct gguf_context * ctx, const char * key) {
747
+ // return -1 if key not found
748
+ int64_t keyfound = -1;
749
+
750
+ const int64_t n_kv = gguf_get_n_kv(ctx);
751
+
752
+ for (int64_t i = 0; i < n_kv; ++i) {
753
+ if (strcmp(key, gguf_get_key(ctx, i)) == 0) {
754
+ keyfound = i;
755
+ break;
756
+ }
757
+ }
758
+
759
+ return keyfound;
760
+ }
761
+
762
+ const char * gguf_get_key(const struct gguf_context * ctx, int64_t key_id) {
763
+ GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
764
+ return ctx->kv[key_id].get_key().c_str();
765
+ }
766
+
767
+ enum gguf_type gguf_get_kv_type(const struct gguf_context * ctx, int64_t key_id) {
768
+ GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
769
+ return ctx->kv[key_id].is_array ? GGUF_TYPE_ARRAY : ctx->kv[key_id].get_type();
770
+ }
771
+
772
+ enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int64_t key_id) {
773
+ GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
774
+ GGML_ASSERT(ctx->kv[key_id].is_array);
775
+ return ctx->kv[key_id].get_type();
776
+ }
777
+
778
+ const void * gguf_get_arr_data(const struct gguf_context * ctx, int64_t key_id) {
779
+ GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
780
+ GGML_ASSERT(ctx->kv[key_id].get_type() != GGUF_TYPE_STRING);
781
+ return ctx->kv[key_id].data.data();
782
+ }
783
+
784
+ const char * gguf_get_arr_str(const struct gguf_context * ctx, int64_t key_id, size_t i) {
785
+ GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
786
+ GGML_ASSERT(ctx->kv[key_id].get_type() == GGUF_TYPE_STRING);
787
+ return ctx->kv[key_id].data_string[i].c_str();
788
+ }
789
+
790
+ size_t gguf_get_arr_n(const struct gguf_context * ctx, int64_t key_id) {
791
+ GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
792
+
793
+ if (ctx->kv[key_id].type == GGUF_TYPE_STRING) {
794
+ return ctx->kv[key_id].data_string.size();
795
+ }
796
+
797
+ const size_t type_size = gguf_type_size(ctx->kv[key_id].type);
798
+ GGML_ASSERT(ctx->kv[key_id].data.size() % type_size == 0);
799
+ return ctx->kv[key_id].data.size() / type_size;
800
+ }
801
+
802
+ uint8_t gguf_get_val_u8(const struct gguf_context * ctx, int64_t key_id) {
803
+ GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
804
+ GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
805
+ return ctx->kv[key_id].get_val<uint8_t>();
806
+ }
807
+
808
+ int8_t gguf_get_val_i8(const struct gguf_context * ctx, int64_t key_id) {
809
+ GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
810
+ GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
811
+ return ctx->kv[key_id].get_val<int8_t>();
812
+ }
813
+
814
+ uint16_t gguf_get_val_u16(const struct gguf_context * ctx, int64_t key_id) {
815
+ GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
816
+ GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
817
+ return ctx->kv[key_id].get_val<uint16_t>();
818
+ }
819
+
820
+ int16_t gguf_get_val_i16(const struct gguf_context * ctx, int64_t key_id) {
821
+ GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
822
+ GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
823
+ return ctx->kv[key_id].get_val<int16_t>();
824
+ }
825
+
826
+ uint32_t gguf_get_val_u32(const struct gguf_context * ctx, int64_t key_id) {
827
+ GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
828
+ GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
829
+ return ctx->kv[key_id].get_val<uint32_t>();
830
+ }
831
+
832
+ int32_t gguf_get_val_i32(const struct gguf_context * ctx, int64_t key_id) {
833
+ GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
834
+ GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
835
+ return ctx->kv[key_id].get_val<int32_t>();
836
+ }
837
+
838
+ float gguf_get_val_f32(const struct gguf_context * ctx, int64_t key_id) {
839
+ GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
840
+ GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
841
+ return ctx->kv[key_id].get_val<float>();
842
+ }
843
+
844
+ uint64_t gguf_get_val_u64(const struct gguf_context * ctx, int64_t key_id) {
845
+ GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
846
+ GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
847
+ return ctx->kv[key_id].get_val<uint64_t>();
848
+ }
849
+
850
+ int64_t gguf_get_val_i64(const struct gguf_context * ctx, int64_t key_id) {
851
+ GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
852
+ GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
853
+ return ctx->kv[key_id].get_val<int64_t>();
854
+ }
855
+
856
+ double gguf_get_val_f64(const struct gguf_context * ctx, int64_t key_id) {
857
+ GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
858
+ GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
859
+ return ctx->kv[key_id].get_val<double>();
860
+ }
861
+
862
+ bool gguf_get_val_bool(const struct gguf_context * ctx, int64_t key_id) {
863
+ GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
864
+ GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
865
+ return ctx->kv[key_id].get_val<bool>();
866
+ }
867
+
868
+ const char * gguf_get_val_str(const struct gguf_context * ctx, int64_t key_id) {
869
+ GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
870
+ GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
871
+ return ctx->kv[key_id].get_val<std::string>().c_str();
872
+ }
873
+
874
+ const void * gguf_get_val_data(const struct gguf_context * ctx, int64_t key_id) {
875
+ GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
876
+ GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
877
+ GGML_ASSERT(ctx->kv[key_id].get_type() != GGUF_TYPE_STRING);
878
+ return ctx->kv[key_id].data.data();
879
+ }
880
+
881
+ int64_t gguf_get_n_tensors(const struct gguf_context * ctx) {
882
+ return ctx->info.size();
883
+ }
884
+
885
+ int64_t gguf_find_tensor(const struct gguf_context * ctx, const char * name) {
886
+ // return -1 if tensor not found
887
+ int64_t tensor_id = -1;
888
+
889
+ const int64_t n_tensors = gguf_get_n_tensors(ctx);
890
+
891
+ for (int64_t i = 0; i < n_tensors; ++i) {
892
+ if (strcmp(name, gguf_get_tensor_name(ctx, i)) == 0) {
893
+ tensor_id = i;
894
+ break;
895
+ }
896
+ }
897
+
898
+ return tensor_id;
899
+ }
900
+
901
+ size_t gguf_get_tensor_offset(const struct gguf_context * ctx, int64_t tensor_id) {
902
+ GGML_ASSERT(tensor_id >= 0 && tensor_id < gguf_get_n_tensors(ctx));
903
+ return ctx->info[tensor_id].offset;
904
+ }
905
+
906
+ const char * gguf_get_tensor_name(const struct gguf_context * ctx, int64_t tensor_id) {
907
+ GGML_ASSERT(tensor_id >= 0 && tensor_id < gguf_get_n_tensors(ctx));
908
+ return ctx->info[tensor_id].t.name;
909
+ }
910
+
911
+ enum ggml_type gguf_get_tensor_type(const struct gguf_context * ctx, int64_t tensor_id) {
912
+ GGML_ASSERT(tensor_id >= 0 && tensor_id < gguf_get_n_tensors(ctx));
913
+ return ctx->info[tensor_id].t.type;
914
+ }
915
+
916
+ size_t gguf_get_tensor_size(const struct gguf_context * ctx, int64_t tensor_id) {
917
+ GGML_ASSERT(tensor_id >= 0 && tensor_id < gguf_get_n_tensors(ctx));
918
+ return ggml_nbytes(&ctx->info[tensor_id].t);
919
+ }
920
+
921
+ int64_t gguf_remove_key(struct gguf_context * ctx, const char * key) {
922
+ const int64_t key_id = gguf_find_key(ctx, key);
923
+ if (key_id >= 0) {
924
+ ctx->kv.erase(ctx->kv.begin() + key_id);
925
+ }
926
+ return key_id;
927
+ }
928
+
929
+ template<typename T>
930
+ static void gguf_check_reserved_keys(const std::string & key, const T val) {
931
+ if (key == GGUF_KEY_GENERAL_ALIGNMENT) {
932
+ if constexpr (std::is_same<T, uint32_t>::value) {
933
+ GGML_ASSERT(val > 0 && (val & (val - 1)) == 0 && GGUF_KEY_GENERAL_ALIGNMENT " must be power of 2");
934
+ } else {
935
+ GGML_UNUSED(val);
936
+ GGML_ABORT(GGUF_KEY_GENERAL_ALIGNMENT " must be type u32");
937
+ }
938
+ }
939
+ }
940
+
941
+ void gguf_set_val_u8(struct gguf_context * ctx, const char * key, uint8_t val) {
942
+ gguf_check_reserved_keys(key, val);
943
+ gguf_remove_key(ctx, key);
944
+ ctx->kv.emplace_back(key, val);
945
+ }
946
+
947
+ void gguf_set_val_i8(struct gguf_context * ctx, const char * key, int8_t val) {
948
+ gguf_check_reserved_keys(key, val);
949
+ gguf_remove_key(ctx, key);
950
+ ctx->kv.emplace_back(key, val);
951
+ }
952
+
953
+ void gguf_set_val_u16(struct gguf_context * ctx, const char * key, uint16_t val) {
954
+ gguf_check_reserved_keys(key, val);
955
+ gguf_remove_key(ctx, key);
956
+ ctx->kv.emplace_back(key, val);
957
+ }
958
+
959
+ void gguf_set_val_i16(struct gguf_context * ctx, const char * key, int16_t val) {
960
+ gguf_check_reserved_keys(key, val);
961
+ gguf_remove_key(ctx, key);
962
+ ctx->kv.emplace_back(key, val);
963
+ }
964
+
965
+ void gguf_set_val_u32(struct gguf_context * ctx, const char * key, uint32_t val) {
966
+ gguf_check_reserved_keys(key, val);
967
+ gguf_remove_key(ctx, key);
968
+ ctx->kv.emplace_back(key, val);
969
+ }
970
+
971
+ void gguf_set_val_i32(struct gguf_context * ctx, const char * key, int32_t val) {
972
+ gguf_check_reserved_keys(key, val);
973
+ gguf_remove_key(ctx, key);
974
+ ctx->kv.emplace_back(key, val);
975
+ }
976
+
977
+ void gguf_set_val_f32(struct gguf_context * ctx, const char * key, float val) {
978
+ gguf_check_reserved_keys(key, val);
979
+ gguf_remove_key(ctx, key);
980
+ ctx->kv.emplace_back(key, val);
981
+ }
982
+
983
+ void gguf_set_val_u64(struct gguf_context * ctx, const char * key, uint64_t val) {
984
+ gguf_check_reserved_keys(key, val);
985
+ gguf_remove_key(ctx, key);
986
+ ctx->kv.emplace_back(key, val);
987
+ }
988
+
989
+ void gguf_set_val_i64(struct gguf_context * ctx, const char * key, int64_t val) {
990
+ gguf_check_reserved_keys(key, val);
991
+ gguf_remove_key(ctx, key);
992
+ ctx->kv.emplace_back(key, val);
993
+ }
994
+
995
+ void gguf_set_val_f64(struct gguf_context * ctx, const char * key, double val) {
996
+ gguf_check_reserved_keys(key, val);
997
+ gguf_remove_key(ctx, key);
998
+ ctx->kv.emplace_back(key, val);
999
+ }
1000
+
1001
+ void gguf_set_val_bool(struct gguf_context * ctx, const char * key, bool val) {
1002
+ gguf_check_reserved_keys(key, val);
1003
+ gguf_remove_key(ctx, key);
1004
+ ctx->kv.emplace_back(key, val);
1005
+ }
1006
+
1007
+ void gguf_set_val_str(struct gguf_context * ctx, const char * key, const char * val) {
1008
+ gguf_check_reserved_keys(key, val);
1009
+ gguf_remove_key(ctx, key);
1010
+ ctx->kv.emplace_back(key, std::string(val));
1011
+ }
1012
+
1013
+ void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_type type, const void * data, size_t n) {
1014
+ gguf_check_reserved_keys(key, data);
1015
+ gguf_remove_key(ctx, key);
1016
+
1017
+ const size_t nbytes = n*gguf_type_size(type);
1018
+ std::vector<int8_t> tmp(nbytes);
1019
+ if (!tmp.empty()) {
1020
+ memcpy(tmp.data(), data, nbytes);
1021
+ }
1022
+ ctx->kv.emplace_back(key, tmp);
1023
+ ctx->kv.back().cast(type);
1024
+ }
1025
+
1026
+ void gguf_set_arr_str(struct gguf_context * ctx, const char * key, const char ** data, size_t n) {
1027
+ gguf_check_reserved_keys(key, data);
1028
+ gguf_remove_key(ctx, key);
1029
+
1030
+ std::vector<std::string> tmp(n);
1031
+ for (size_t i = 0; i < n; ++i) {
1032
+ tmp[i] = data[i];
1033
+ }
1034
+ ctx->kv.emplace_back(key, tmp);
1035
+ }
1036
+
1037
+ // set or add KV pairs from another context
1038
+ void gguf_set_kv(struct gguf_context * ctx, const struct gguf_context * src) {
1039
+ const int64_t n_kv = gguf_get_n_kv(src);
1040
+ for (int64_t i = 0; i < n_kv; ++i) {
1041
+ const struct gguf_kv & kv = src->kv[i];
1042
+
1043
+ if (!kv.is_array) {
1044
+ switch (kv.get_type()) {
1045
+ case GGUF_TYPE_UINT8: gguf_set_val_u8 (ctx, kv.get_key().c_str(), kv.get_val<uint8_t>()); break;
1046
+ case GGUF_TYPE_INT8: gguf_set_val_i8 (ctx, kv.get_key().c_str(), kv.get_val<int8_t>()); break;
1047
+ case GGUF_TYPE_UINT16: gguf_set_val_u16 (ctx, kv.get_key().c_str(), kv.get_val<uint16_t>()); break;
1048
+ case GGUF_TYPE_INT16: gguf_set_val_i16 (ctx, kv.get_key().c_str(), kv.get_val<int16_t>()); break;
1049
+ case GGUF_TYPE_UINT32: gguf_set_val_u32 (ctx, kv.get_key().c_str(), kv.get_val<uint32_t>()); break;
1050
+ case GGUF_TYPE_INT32: gguf_set_val_i32 (ctx, kv.get_key().c_str(), kv.get_val<int32_t>()); break;
1051
+ case GGUF_TYPE_FLOAT32: gguf_set_val_f32 (ctx, kv.get_key().c_str(), kv.get_val<float>()); break;
1052
+ case GGUF_TYPE_UINT64: gguf_set_val_u64 (ctx, kv.get_key().c_str(), kv.get_val<uint64_t>()); break;
1053
+ case GGUF_TYPE_INT64: gguf_set_val_i64 (ctx, kv.get_key().c_str(), kv.get_val<int64_t>()); break;
1054
+ case GGUF_TYPE_FLOAT64: gguf_set_val_f64 (ctx, kv.get_key().c_str(), kv.get_val<double>()); break;
1055
+ case GGUF_TYPE_BOOL: gguf_set_val_bool(ctx, kv.get_key().c_str(), kv.get_val<bool>()); break;
1056
+ case GGUF_TYPE_STRING: gguf_set_val_str (ctx, kv.get_key().c_str(), kv.get_val<std::string>().c_str()); break;
1057
+ case GGUF_TYPE_ARRAY:
1058
+ default: GGML_ABORT("invalid type");
1059
+ }
1060
+ continue;
1061
+ }
1062
+
1063
+ const size_t ne = kv.get_ne();
1064
+
1065
+ switch (kv.get_type()) {
1066
+ case GGUF_TYPE_UINT8:
1067
+ case GGUF_TYPE_INT8:
1068
+ case GGUF_TYPE_UINT16:
1069
+ case GGUF_TYPE_INT16:
1070
+ case GGUF_TYPE_UINT32:
1071
+ case GGUF_TYPE_INT32:
1072
+ case GGUF_TYPE_FLOAT32:
1073
+ case GGUF_TYPE_UINT64:
1074
+ case GGUF_TYPE_INT64:
1075
+ case GGUF_TYPE_FLOAT64:
1076
+ case GGUF_TYPE_BOOL: {
1077
+ gguf_set_arr_data(ctx, kv.get_key().c_str(), kv.get_type(), kv.data.data(), ne);
1078
+ } break;
1079
+ case GGUF_TYPE_STRING: {
1080
+ std::vector<const char *> tmp(ne);
1081
+ for (size_t j = 0; j < ne; ++j) {
1082
+ tmp[j] = kv.data_string[j].c_str();
1083
+ }
1084
+ gguf_set_arr_str(ctx, kv.get_key().c_str(), tmp.data(), ne);
1085
+ } break;
1086
+ case GGUF_TYPE_ARRAY:
1087
+ default: GGML_ABORT("invalid type");
1088
+ }
1089
+ }
1090
+ }
1091
+
1092
+ void gguf_add_tensor(
1093
+ struct gguf_context * ctx,
1094
+ const struct ggml_tensor * tensor) {
1095
+ GGML_ASSERT(tensor);
1096
+ if (gguf_find_tensor(ctx, tensor->name) != -1) {
1097
+ GGML_ABORT("duplicate tensor name: %s", tensor->name);
1098
+ }
1099
+
1100
+ struct gguf_tensor_info ti;
1101
+ ti.t = *tensor;
1102
+ ti.offset = ctx->info.empty() ? 0 :
1103
+ ctx->info.back().offset + GGML_PAD(ggml_nbytes(&ctx->info.back().t), ctx->alignment);
1104
+ ctx->info.push_back(ti);
1105
+ }
1106
+
1107
+ void gguf_set_tensor_type(struct gguf_context * ctx, const char * name, enum ggml_type type) {
1108
+ const int64_t tensor_id = gguf_find_tensor(ctx, name);
1109
+ if (tensor_id < 0) {
1110
+ GGML_ABORT("tensor not found: %s", name);
1111
+ }
1112
+ struct ggml_tensor * tensor = &ctx->info[tensor_id].t;
1113
+ const size_t type_size = ggml_type_size(type);
1114
+ const int64_t blck_size = ggml_blck_size(type);
1115
+
1116
+ tensor->type = type;
1117
+ GGML_ASSERT(tensor->ne[0] % blck_size == 0 && "tensor row size not divisible by block size of new type");
1118
+
1119
+ tensor->nb[0] = type_size;
1120
+ tensor->nb[1] = tensor->nb[0]*(tensor->ne[0]/blck_size);
1121
+ for (int i = 2; i < GGML_MAX_DIMS; i++) {
1122
+ tensor->nb[i] = tensor->nb[i - 1]*tensor->ne[i - 1];
1123
+ }
1124
+
1125
+ // update offsets
1126
+ const int64_t n_tensors = gguf_get_n_tensors(ctx);
1127
+ for (int64_t i = tensor_id + 1; i < n_tensors; ++i) {
1128
+ ctx->info[i].offset = ctx->info[i - 1].offset + GGML_PAD(ggml_nbytes(&ctx->info[i - 1].t), ctx->alignment);
1129
+ }
1130
+ }
1131
+
1132
+ void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const void * data) {
1133
+ const int64_t tensor_id = gguf_find_tensor(ctx, name);
1134
+ if (tensor_id < 0) {
1135
+ GGML_ABORT("tensor not found: %s", name);
1136
+ }
1137
+
1138
+ ctx->info[tensor_id].t.data = (void *)(uintptr_t)data; // double cast suppresses warning about casting away const
1139
+ }
1140
+
1141
+ struct gguf_writer {
1142
+ std::vector<int8_t> & buf;
1143
+
1144
+ gguf_writer(std::vector<int8_t> & buf) : buf(buf) {}
1145
+
1146
+ template <typename T>
1147
+ void write(const T & val) const {
1148
+ for (size_t i = 0; i < sizeof(val); ++i) {
1149
+ buf.push_back(reinterpret_cast<const int8_t *>(&val)[i]);
1150
+ }
1151
+ }
1152
+
1153
+ void write(const std::vector<int8_t> & val) const {
1154
+ buf.insert(buf.end(), val.begin(), val.end());
1155
+ }
1156
+
1157
+ void write(const bool & val) const {
1158
+ const int8_t val8 = val ? 1 : 0;
1159
+ write(val8);
1160
+ }
1161
+
1162
+ void write(const std::string & val) const {
1163
+ {
1164
+ const uint64_t n = val.length();
1165
+ write(n);
1166
+ }
1167
+ for (size_t i = 0; i < val.length(); ++i) {
1168
+ buf.push_back(reinterpret_cast<const int8_t *>(val.data())[i]);
1169
+ }
1170
+ }
1171
+
1172
+ void write(const char * val) const {
1173
+ write(std::string(val));
1174
+ }
1175
+
1176
+ void write(const enum ggml_type & val) const {
1177
+ write(int32_t(val));
1178
+ }
1179
+
1180
+ void write(const enum gguf_type & val) const {
1181
+ write(int32_t(val));
1182
+ }
1183
+
1184
+ void write(const struct gguf_kv & kv) const {
1185
+ const uint64_t ne = kv.get_ne();
1186
+
1187
+ write(kv.get_key());
1188
+
1189
+ if (kv.is_array) {
1190
+ write(GGUF_TYPE_ARRAY);
1191
+ write(kv.get_type());
1192
+ write(ne);
1193
+ } else {
1194
+ write(kv.get_type());
1195
+ }
1196
+
1197
+ switch (kv.get_type()) {
1198
+ case GGUF_TYPE_UINT8:
1199
+ case GGUF_TYPE_INT8:
1200
+ case GGUF_TYPE_UINT16:
1201
+ case GGUF_TYPE_INT16:
1202
+ case GGUF_TYPE_UINT32:
1203
+ case GGUF_TYPE_INT32:
1204
+ case GGUF_TYPE_FLOAT32:
1205
+ case GGUF_TYPE_UINT64:
1206
+ case GGUF_TYPE_INT64:
1207
+ case GGUF_TYPE_FLOAT64: {
1208
+ write(kv.data);
1209
+ } break;
1210
+ case GGUF_TYPE_BOOL: {
1211
+ for (size_t i = 0; i < ne; ++i) {
1212
+ write(kv.get_val<bool>(i));
1213
+ }
1214
+ } break;
1215
+ case GGUF_TYPE_STRING: {
1216
+ for (size_t i = 0; i < ne; ++i) {
1217
+ write(kv.get_val<std::string>(i));
1218
+ }
1219
+ } break;
1220
+ case GGUF_TYPE_ARRAY:
1221
+ default: GGML_ABORT("invalid type");
1222
+ }
1223
+ }
1224
+
1225
+ void write_tensor_meta(const struct gguf_tensor_info & info) const {
1226
+ write(info.t.name);
1227
+
1228
+ const uint32_t n_dims = ggml_n_dims(&info.t);
1229
+ write(n_dims);
1230
+
1231
+ for (uint32_t j = 0; j < n_dims; ++j) {
1232
+ write(info.t.ne[j]);
1233
+ }
1234
+ write(info.t.type);
1235
+ write(info.offset);
1236
+ }
1237
+
1238
+ void pad(const size_t alignment) const {
1239
+ while (buf.size() % alignment != 0) {
1240
+ const int8_t zero = 0;
1241
+ write(zero);
1242
+ }
1243
+ }
1244
+
1245
+ void write_tensor_data(const struct gguf_tensor_info & info, const size_t offset_data, const size_t alignment) const {
1246
+ GGML_ASSERT(buf.size() - offset_data == info.offset);
1247
+
1248
+ GGML_ASSERT(ggml_is_contiguous(&info.t));
1249
+ const size_t offset = buf.size();
1250
+ const size_t nbytes = ggml_nbytes(&info.t);
1251
+
1252
+ buf.resize(offset + nbytes);
1253
+ if (info.t.buffer) {
1254
+ ggml_backend_tensor_get(&info.t, buf.data() + offset, 0, nbytes);
1255
+ } else {
1256
+ GGML_ASSERT(info.t.data);
1257
+ memcpy(buf.data() + offset, info.t.data, nbytes);
1258
+ }
1259
+
1260
+ pad(alignment);
1261
+ }
1262
+ };
1263
+
1264
+ void gguf_write_to_buf(const struct gguf_context * ctx, std::vector<int8_t> & buf, bool only_meta) {
1265
+ const struct gguf_writer gw(buf);
1266
+
1267
+ const int64_t n_kv = gguf_get_n_kv(ctx);
1268
+ const int64_t n_tensors = gguf_get_n_tensors(ctx);
1269
+
1270
+ // write header
1271
+ gw.write(GGUF_MAGIC[0]);
1272
+ gw.write(GGUF_MAGIC[1]);
1273
+ gw.write(GGUF_MAGIC[2]);
1274
+ gw.write(GGUF_MAGIC[3]);
1275
+ gw.write(ctx->version);
1276
+ gw.write(n_tensors);
1277
+ gw.write(n_kv);
1278
+
1279
+ // write key-value pairs
1280
+ for (int64_t i = 0; i < n_kv; ++i) {
1281
+ gw.write(ctx->kv[i]);
1282
+ }
1283
+
1284
+ // write tensor info
1285
+ for (int64_t i = 0; i < n_tensors; ++i) {
1286
+ gw.write_tensor_meta(ctx->info[i]);
1287
+ }
1288
+
1289
+ // we require the data section to be aligned
1290
+ gw.pad(ctx->alignment);
1291
+
1292
+ if (only_meta) {
1293
+ return;
1294
+ }
1295
+
1296
+ const size_t offset_data = gw.buf.size();
1297
+
1298
+ // write tensor data
1299
+ for (int64_t i = 0; i < n_tensors; ++i) {
1300
+ gw.write_tensor_data(ctx->info[i], offset_data, ctx->alignment);
1301
+ }
1302
+ }
1303
+
1304
+ bool gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta) {
1305
+ FILE * file = ggml_fopen(fname, "wb");
1306
+
1307
+ if (!file) {
1308
+ fprintf(stderr, "%s: failed to open file '%s' for writing GGUF data\n", __func__, fname);
1309
+ return false;
1310
+ }
1311
+
1312
+ std::vector<int8_t> buf;
1313
+ gguf_write_to_buf(ctx, buf, only_meta);
1314
+ const bool ok = fwrite(buf.data(), 1, buf.size(), file) == buf.size();
1315
+ fclose(file);
1316
+ return ok;
1317
+ }
1318
+
1319
+ size_t gguf_get_meta_size(const struct gguf_context * ctx) {
1320
+ // only return size
1321
+ std::vector<int8_t> buf;
1322
+ gguf_write_to_buf(ctx, buf, /*only_meta =*/ true);
1323
+ return buf.size();
1324
+ }
1325
+
1326
+ void gguf_get_meta_data(const struct gguf_context * ctx, void * data) {
1327
+ std::vector<int8_t> buf;
1328
+ gguf_write_to_buf(ctx, buf, /*only_meta =*/ true);
1329
+ memcpy(data, buf.data(), buf.size());
1330
+ }