@novastera-oss/llamarn 0.0.1-alpha.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (989) hide show
  1. package/INTERFACE.md +389 -0
  2. package/LICENSE +201 -0
  3. package/README.md +235 -0
  4. package/RNLlamaCpp.podspec +69 -0
  5. package/android/CMakeLists.txt +107 -0
  6. package/android/build.gradle +111 -0
  7. package/android/generated/java/com/novastera/llamarn/NativeRNLlamaCppSpec.java +47 -0
  8. package/android/generated/jni/CMakeLists.txt +36 -0
  9. package/android/generated/jni/RNLlamaCppSpec-generated.cpp +44 -0
  10. package/android/generated/jni/RNLlamaCppSpec.h +31 -0
  11. package/android/generated/jni/react/renderer/components/RNLlamaCppSpec/RNLlamaCppSpecJSI-generated.cpp +42 -0
  12. package/android/generated/jni/react/renderer/components/RNLlamaCppSpec/RNLlamaCppSpecJSI.h +336 -0
  13. package/android/gradle.properties +5 -0
  14. package/android/src/main/AndroidManifest.xml +3 -0
  15. package/android/src/main/AndroidManifestNew.xml +2 -0
  16. package/android/src/main/cpp/include/llama-cpp.h +30 -0
  17. package/android/src/main/cpp/include/llama.h +1440 -0
  18. package/android/src/main/java/com/novastera/llamarn/RNLlamaCppPackage.kt +21 -0
  19. package/android/src/main/jniLibs/arm64-v8a/libOpenCL.so +0 -0
  20. package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
  21. package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
  22. package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
  23. package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
  24. package/android/src/main/jniLibs/x86_64/libOpenCL.so +0 -0
  25. package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
  26. package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
  27. package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
  28. package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
  29. package/cpp/LlamaCppModel.cpp +984 -0
  30. package/cpp/LlamaCppModel.h +162 -0
  31. package/cpp/PureCppImpl.cpp +308 -0
  32. package/cpp/PureCppImpl.h +59 -0
  33. package/cpp/SystemUtils.cpp +180 -0
  34. package/cpp/SystemUtils.h +74 -0
  35. package/cpp/build-info.cpp +4 -0
  36. package/cpp/llama.cpp/AUTHORS +1106 -0
  37. package/cpp/llama.cpp/CMakeLists.txt +254 -0
  38. package/cpp/llama.cpp/CMakePresets.json +84 -0
  39. package/cpp/llama.cpp/CODEOWNERS +11 -0
  40. package/cpp/llama.cpp/CONTRIBUTING.md +127 -0
  41. package/cpp/llama.cpp/LICENSE +21 -0
  42. package/cpp/llama.cpp/Makefile +1608 -0
  43. package/cpp/llama.cpp/README.md +575 -0
  44. package/cpp/llama.cpp/SECURITY.md +68 -0
  45. package/cpp/llama.cpp/build-xcframework.sh +540 -0
  46. package/cpp/llama.cpp/cmake/arm64-apple-clang.cmake +16 -0
  47. package/cpp/llama.cpp/cmake/arm64-windows-llvm.cmake +16 -0
  48. package/cpp/llama.cpp/cmake/build-info.cmake +64 -0
  49. package/cpp/llama.cpp/cmake/common.cmake +35 -0
  50. package/cpp/llama.cpp/cmake/git-vars.cmake +22 -0
  51. package/cpp/llama.cpp/cmake/llama-config.cmake.in +30 -0
  52. package/cpp/llama.cpp/cmake/llama.pc.in +10 -0
  53. package/cpp/llama.cpp/cmake/x64-windows-llvm.cmake +5 -0
  54. package/cpp/llama.cpp/common/CMakeLists.txt +170 -0
  55. package/cpp/llama.cpp/common/arg.cpp +3337 -0
  56. package/cpp/llama.cpp/common/arg.h +89 -0
  57. package/cpp/llama.cpp/common/base64.hpp +392 -0
  58. package/cpp/llama.cpp/common/build-info.cpp.in +4 -0
  59. package/cpp/llama.cpp/common/chat.cpp +1781 -0
  60. package/cpp/llama.cpp/common/chat.h +135 -0
  61. package/cpp/llama.cpp/common/cmake/build-info-gen-cpp.cmake +24 -0
  62. package/cpp/llama.cpp/common/common.cpp +1567 -0
  63. package/cpp/llama.cpp/common/common.h +668 -0
  64. package/cpp/llama.cpp/common/console.cpp +504 -0
  65. package/cpp/llama.cpp/common/console.h +19 -0
  66. package/cpp/llama.cpp/common/json-schema-to-grammar.cpp +1027 -0
  67. package/cpp/llama.cpp/common/json-schema-to-grammar.h +21 -0
  68. package/cpp/llama.cpp/common/json.hpp +24766 -0
  69. package/cpp/llama.cpp/common/llguidance.cpp +254 -0
  70. package/cpp/llama.cpp/common/log.cpp +393 -0
  71. package/cpp/llama.cpp/common/log.h +103 -0
  72. package/cpp/llama.cpp/common/minja/chat-template.hpp +537 -0
  73. package/cpp/llama.cpp/common/minja/minja.hpp +2941 -0
  74. package/cpp/llama.cpp/common/ngram-cache.cpp +286 -0
  75. package/cpp/llama.cpp/common/ngram-cache.h +101 -0
  76. package/cpp/llama.cpp/common/sampling.cpp +580 -0
  77. package/cpp/llama.cpp/common/sampling.h +107 -0
  78. package/cpp/llama.cpp/common/speculative.cpp +278 -0
  79. package/cpp/llama.cpp/common/speculative.h +28 -0
  80. package/cpp/llama.cpp/common/stb_image.h +7988 -0
  81. package/cpp/llama.cpp/convert_hf_to_gguf.py +6195 -0
  82. package/cpp/llama.cpp/convert_hf_to_gguf_update.py +393 -0
  83. package/cpp/llama.cpp/convert_llama_ggml_to_gguf.py +450 -0
  84. package/cpp/llama.cpp/convert_lora_to_gguf.py +461 -0
  85. package/cpp/llama.cpp/flake.lock +58 -0
  86. package/cpp/llama.cpp/flake.nix +185 -0
  87. package/cpp/llama.cpp/ggml/CMakeLists.txt +388 -0
  88. package/cpp/llama.cpp/ggml/cmake/GitVars.cmake +22 -0
  89. package/cpp/llama.cpp/ggml/cmake/common.cmake +26 -0
  90. package/cpp/llama.cpp/ggml/cmake/ggml-config.cmake.in +152 -0
  91. package/cpp/llama.cpp/ggml/include/ggml-alloc.h +76 -0
  92. package/cpp/llama.cpp/ggml/include/ggml-backend.h +354 -0
  93. package/cpp/llama.cpp/ggml/include/ggml-blas.h +25 -0
  94. package/cpp/llama.cpp/ggml/include/ggml-cann.h +123 -0
  95. package/cpp/llama.cpp/ggml/include/ggml-cpp.h +39 -0
  96. package/cpp/llama.cpp/ggml/include/ggml-cpu.h +143 -0
  97. package/cpp/llama.cpp/ggml/include/ggml-cuda.h +47 -0
  98. package/cpp/llama.cpp/ggml/include/ggml-kompute.h +50 -0
  99. package/cpp/llama.cpp/ggml/include/ggml-metal.h +66 -0
  100. package/cpp/llama.cpp/ggml/include/ggml-opencl.h +26 -0
  101. package/cpp/llama.cpp/ggml/include/ggml-opt.h +216 -0
  102. package/cpp/llama.cpp/ggml/include/ggml-rpc.h +33 -0
  103. package/cpp/llama.cpp/ggml/include/ggml-sycl.h +49 -0
  104. package/cpp/llama.cpp/ggml/include/ggml-vulkan.h +29 -0
  105. package/cpp/llama.cpp/ggml/include/ggml.h +2192 -0
  106. package/cpp/llama.cpp/ggml/include/gguf.h +202 -0
  107. package/cpp/llama.cpp/ggml/src/CMakeLists.txt +345 -0
  108. package/cpp/llama.cpp/ggml/src/ggml-alloc.c +1042 -0
  109. package/cpp/llama.cpp/ggml/src/ggml-backend-impl.h +255 -0
  110. package/cpp/llama.cpp/ggml/src/ggml-backend-reg.cpp +586 -0
  111. package/cpp/llama.cpp/ggml/src/ggml-backend.cpp +2008 -0
  112. package/cpp/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +87 -0
  113. package/cpp/llama.cpp/ggml/src/ggml-blas/ggml-blas.cpp +517 -0
  114. package/cpp/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +74 -0
  115. package/cpp/llama.cpp/ggml/src/ggml-cann/Doxyfile +2579 -0
  116. package/cpp/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +179 -0
  117. package/cpp/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +258 -0
  118. package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +2589 -0
  119. package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +1083 -0
  120. package/cpp/llama.cpp/ggml/src/ggml-cann/common.h +420 -0
  121. package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +2554 -0
  122. package/cpp/llama.cpp/ggml/src/ggml-common.h +1857 -0
  123. package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +495 -0
  124. package/cpp/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +221 -0
  125. package/cpp/llama.cpp/ggml/src/ggml-cpu/amx/amx.h +8 -0
  126. package/cpp/llama.cpp/ggml/src/ggml-cpu/amx/common.h +91 -0
  127. package/cpp/llama.cpp/ggml/src/ggml-cpu/amx/mmq.cpp +2511 -0
  128. package/cpp/llama.cpp/ggml/src/ggml-cpu/amx/mmq.h +10 -0
  129. package/cpp/llama.cpp/ggml/src/ggml-cpu/binary-ops.cpp +158 -0
  130. package/cpp/llama.cpp/ggml/src/ggml-cpu/binary-ops.h +16 -0
  131. package/cpp/llama.cpp/ggml/src/ggml-cpu/cmake/FindSIMD.cmake +100 -0
  132. package/cpp/llama.cpp/ggml/src/ggml-cpu/common.h +72 -0
  133. package/cpp/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +327 -0
  134. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +6431 -0
  135. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +8 -0
  136. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp +55 -0
  137. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.h +8 -0
  138. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +512 -0
  139. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +13131 -0
  140. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.h +63 -0
  141. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-traits.cpp +36 -0
  142. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-traits.h +38 -0
  143. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +3492 -0
  144. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +671 -0
  145. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +254 -0
  146. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +60 -0
  147. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +287 -0
  148. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.h +17 -0
  149. package/cpp/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +3544 -0
  150. package/cpp/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.h +14 -0
  151. package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +8796 -0
  152. package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.h +110 -0
  153. package/cpp/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +892 -0
  154. package/cpp/llama.cpp/ggml/src/ggml-cpu/unary-ops.cpp +186 -0
  155. package/cpp/llama.cpp/ggml/src/ggml-cpu/unary-ops.h +28 -0
  156. package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.cpp +252 -0
  157. package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.h +802 -0
  158. package/cpp/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +184 -0
  159. package/cpp/llama.cpp/ggml/src/ggml-cuda/acc.cu +47 -0
  160. package/cpp/llama.cpp/ggml/src/ggml-cuda/acc.cuh +5 -0
  161. package/cpp/llama.cpp/ggml/src/ggml-cuda/arange.cu +34 -0
  162. package/cpp/llama.cpp/ggml/src/ggml-cuda/arange.cuh +5 -0
  163. package/cpp/llama.cpp/ggml/src/ggml-cuda/argmax.cu +91 -0
  164. package/cpp/llama.cpp/ggml/src/ggml-cuda/argmax.cuh +3 -0
  165. package/cpp/llama.cpp/ggml/src/ggml-cuda/argsort.cu +104 -0
  166. package/cpp/llama.cpp/ggml/src/ggml-cuda/argsort.cuh +3 -0
  167. package/cpp/llama.cpp/ggml/src/ggml-cuda/binbcast.cu +363 -0
  168. package/cpp/llama.cpp/ggml/src/ggml-cuda/binbcast.cuh +9 -0
  169. package/cpp/llama.cpp/ggml/src/ggml-cuda/clamp.cu +45 -0
  170. package/cpp/llama.cpp/ggml/src/ggml-cuda/clamp.cuh +5 -0
  171. package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +828 -0
  172. package/cpp/llama.cpp/ggml/src/ggml-cuda/concat.cu +221 -0
  173. package/cpp/llama.cpp/ggml/src/ggml-cuda/concat.cuh +5 -0
  174. package/cpp/llama.cpp/ggml/src/ggml-cuda/conv-transpose-1d.cu +89 -0
  175. package/cpp/llama.cpp/ggml/src/ggml-cuda/conv-transpose-1d.cuh +5 -0
  176. package/cpp/llama.cpp/ggml/src/ggml-cuda/convert.cu +730 -0
  177. package/cpp/llama.cpp/ggml/src/ggml-cuda/convert.cuh +26 -0
  178. package/cpp/llama.cpp/ggml/src/ggml-cuda/count-equal.cu +64 -0
  179. package/cpp/llama.cpp/ggml/src/ggml-cuda/count-equal.cuh +5 -0
  180. package/cpp/llama.cpp/ggml/src/ggml-cuda/cp-async.cuh +57 -0
  181. package/cpp/llama.cpp/ggml/src/ggml-cuda/cpy.cu +695 -0
  182. package/cpp/llama.cpp/ggml/src/ggml-cuda/cpy.cuh +11 -0
  183. package/cpp/llama.cpp/ggml/src/ggml-cuda/cross-entropy-loss.cu +189 -0
  184. package/cpp/llama.cpp/ggml/src/ggml-cuda/cross-entropy-loss.cuh +7 -0
  185. package/cpp/llama.cpp/ggml/src/ggml-cuda/dequantize.cuh +103 -0
  186. package/cpp/llama.cpp/ggml/src/ggml-cuda/diagmask.cu +40 -0
  187. package/cpp/llama.cpp/ggml/src/ggml-cuda/diagmask.cuh +5 -0
  188. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-common.cuh +873 -0
  189. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-mma-f16.cuh +1269 -0
  190. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f16.cu +357 -0
  191. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f16.cuh +3 -0
  192. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f32.cu +365 -0
  193. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f32.cuh +3 -0
  194. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f16.cuh +437 -0
  195. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f32.cuh +428 -0
  196. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu +634 -0
  197. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cuh +3 -0
  198. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn.cu +345 -0
  199. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn.cuh +3 -0
  200. package/cpp/llama.cpp/ggml/src/ggml-cuda/getrows.cu +275 -0
  201. package/cpp/llama.cpp/ggml/src/ggml-cuda/getrows.cuh +15 -0
  202. package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +3501 -0
  203. package/cpp/llama.cpp/ggml/src/ggml-cuda/gla.cu +93 -0
  204. package/cpp/llama.cpp/ggml/src/ggml-cuda/gla.cuh +3 -0
  205. package/cpp/llama.cpp/ggml/src/ggml-cuda/im2col.cu +103 -0
  206. package/cpp/llama.cpp/ggml/src/ggml-cuda/im2col.cuh +5 -0
  207. package/cpp/llama.cpp/ggml/src/ggml-cuda/mma.cuh +396 -0
  208. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cu +322 -0
  209. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cuh +3217 -0
  210. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmv.cu +336 -0
  211. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmv.cuh +12 -0
  212. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmvq.cu +595 -0
  213. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmvq.cuh +12 -0
  214. package/cpp/llama.cpp/ggml/src/ggml-cuda/norm.cu +458 -0
  215. package/cpp/llama.cpp/ggml/src/ggml-cuda/norm.cuh +11 -0
  216. package/cpp/llama.cpp/ggml/src/ggml-cuda/opt-step-adamw.cu +78 -0
  217. package/cpp/llama.cpp/ggml/src/ggml-cuda/opt-step-adamw.cuh +5 -0
  218. package/cpp/llama.cpp/ggml/src/ggml-cuda/out-prod.cu +68 -0
  219. package/cpp/llama.cpp/ggml/src/ggml-cuda/out-prod.cuh +3 -0
  220. package/cpp/llama.cpp/ggml/src/ggml-cuda/pad.cu +49 -0
  221. package/cpp/llama.cpp/ggml/src/ggml-cuda/pad.cuh +5 -0
  222. package/cpp/llama.cpp/ggml/src/ggml-cuda/pool2d.cu +94 -0
  223. package/cpp/llama.cpp/ggml/src/ggml-cuda/pool2d.cuh +5 -0
  224. package/cpp/llama.cpp/ggml/src/ggml-cuda/quantize.cu +189 -0
  225. package/cpp/llama.cpp/ggml/src/ggml-cuda/quantize.cuh +27 -0
  226. package/cpp/llama.cpp/ggml/src/ggml-cuda/rope.cu +456 -0
  227. package/cpp/llama.cpp/ggml/src/ggml-cuda/rope.cuh +7 -0
  228. package/cpp/llama.cpp/ggml/src/ggml-cuda/scale.cu +31 -0
  229. package/cpp/llama.cpp/ggml/src/ggml-cuda/scale.cuh +5 -0
  230. package/cpp/llama.cpp/ggml/src/ggml-cuda/softmax.cu +283 -0
  231. package/cpp/llama.cpp/ggml/src/ggml-cuda/softmax.cuh +7 -0
  232. package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-conv.cu +148 -0
  233. package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-conv.cuh +3 -0
  234. package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-scan.cu +153 -0
  235. package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-scan.cuh +3 -0
  236. package/cpp/llama.cpp/ggml/src/ggml-cuda/sum.cu +45 -0
  237. package/cpp/llama.cpp/ggml/src/ggml-cuda/sum.cuh +5 -0
  238. package/cpp/llama.cpp/ggml/src/ggml-cuda/sumrows.cu +39 -0
  239. package/cpp/llama.cpp/ggml/src/ggml-cuda/sumrows.cuh +5 -0
  240. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_16.cu +5 -0
  241. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_8.cu +10 -0
  242. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_1.cu +10 -0
  243. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_2.cu +10 -0
  244. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_4.cu +10 -0
  245. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_16.cu +5 -0
  246. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_4.cu +10 -0
  247. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_8.cu +10 -0
  248. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_1.cu +10 -0
  249. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_2.cu +10 -0
  250. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_16.cu +5 -0
  251. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_2.cu +10 -0
  252. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_4.cu +10 -0
  253. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_8.cu +10 -0
  254. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_64-ncols2_1.cu +10 -0
  255. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_1.cu +10 -0
  256. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_2.cu +10 -0
  257. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_4.cu +10 -0
  258. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_8.cu +10 -0
  259. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu +5 -0
  260. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu +5 -0
  261. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu +5 -0
  262. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu +5 -0
  263. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu +5 -0
  264. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu +5 -0
  265. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu +5 -0
  266. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu +5 -0
  267. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu +5 -0
  268. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu +5 -0
  269. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu +5 -0
  270. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu +5 -0
  271. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu +5 -0
  272. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu +5 -0
  273. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu +5 -0
  274. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu +5 -0
  275. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu +5 -0
  276. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu +5 -0
  277. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu +5 -0
  278. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu +5 -0
  279. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu +5 -0
  280. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu +5 -0
  281. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu +5 -0
  282. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu +5 -0
  283. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu +5 -0
  284. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu +5 -0
  285. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu +5 -0
  286. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu +5 -0
  287. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu +5 -0
  288. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu +5 -0
  289. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu +5 -0
  290. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu +5 -0
  291. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu +5 -0
  292. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu +5 -0
  293. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu +5 -0
  294. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu +5 -0
  295. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu +5 -0
  296. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu +5 -0
  297. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu +5 -0
  298. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu +5 -0
  299. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu +5 -0
  300. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu +5 -0
  301. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu +5 -0
  302. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu +5 -0
  303. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu +5 -0
  304. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu +5 -0
  305. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu +5 -0
  306. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu +5 -0
  307. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu +5 -0
  308. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu +5 -0
  309. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu +5 -0
  310. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu +5 -0
  311. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu +5 -0
  312. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu +5 -0
  313. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu +5 -0
  314. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu +5 -0
  315. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu +5 -0
  316. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu +5 -0
  317. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu +5 -0
  318. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu +5 -0
  319. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu +5 -0
  320. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu +5 -0
  321. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu +5 -0
  322. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu +5 -0
  323. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu +5 -0
  324. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu +5 -0
  325. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu +5 -0
  326. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu +5 -0
  327. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu +5 -0
  328. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu +5 -0
  329. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu +5 -0
  330. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu +5 -0
  331. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu +5 -0
  332. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu +5 -0
  333. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu +5 -0
  334. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu +5 -0
  335. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu +5 -0
  336. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu +5 -0
  337. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu +5 -0
  338. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu +5 -0
  339. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu +5 -0
  340. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu +5 -0
  341. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu +5 -0
  342. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu +5 -0
  343. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu +5 -0
  344. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu +5 -0
  345. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/generate_cu_files.py +78 -0
  346. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq1_s.cu +5 -0
  347. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_s.cu +5 -0
  348. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xs.cu +5 -0
  349. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xxs.cu +5 -0
  350. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_s.cu +5 -0
  351. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_xxs.cu +5 -0
  352. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_nl.cu +5 -0
  353. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_xs.cu +5 -0
  354. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q2_k.cu +5 -0
  355. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q3_k.cu +5 -0
  356. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_0.cu +5 -0
  357. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_1.cu +5 -0
  358. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_k.cu +5 -0
  359. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_0.cu +5 -0
  360. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_1.cu +5 -0
  361. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_k.cu +5 -0
  362. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q6_k.cu +5 -0
  363. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q8_0.cu +5 -0
  364. package/cpp/llama.cpp/ggml/src/ggml-cuda/tsembd.cu +47 -0
  365. package/cpp/llama.cpp/ggml/src/ggml-cuda/tsembd.cuh +5 -0
  366. package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cu +279 -0
  367. package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cuh +57 -0
  368. package/cpp/llama.cpp/ggml/src/ggml-cuda/upscale.cu +51 -0
  369. package/cpp/llama.cpp/ggml/src/ggml-cuda/upscale.cuh +5 -0
  370. package/cpp/llama.cpp/ggml/src/ggml-cuda/vecdotq.cuh +1135 -0
  371. package/cpp/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h +15 -0
  372. package/cpp/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +243 -0
  373. package/cpp/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +140 -0
  374. package/cpp/llama.cpp/ggml/src/ggml-cuda/wkv.cu +199 -0
  375. package/cpp/llama.cpp/ggml/src/ggml-cuda/wkv.cuh +7 -0
  376. package/cpp/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +131 -0
  377. package/cpp/llama.cpp/ggml/src/ggml-impl.h +601 -0
  378. package/cpp/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +166 -0
  379. package/cpp/llama.cpp/ggml/src/ggml-kompute/ggml-kompute.cpp +2251 -0
  380. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/common.comp +112 -0
  381. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_add.comp +58 -0
  382. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_addrow.comp +25 -0
  383. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f16.comp +52 -0
  384. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f32.comp +52 -0
  385. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f16.comp +52 -0
  386. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f32.comp +52 -0
  387. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_diagmask.comp +30 -0
  388. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_gelu.comp +22 -0
  389. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows.comp +17 -0
  390. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f16.comp +31 -0
  391. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f32.comp +31 -0
  392. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_0.comp +38 -0
  393. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_1.comp +39 -0
  394. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q6_k.comp +44 -0
  395. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul.comp +52 -0
  396. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_f16.comp +69 -0
  397. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_mat_f32.comp +51 -0
  398. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_0.comp +33 -0
  399. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_1.comp +35 -0
  400. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_k.comp +140 -0
  401. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q6_k.comp +106 -0
  402. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q8_0.comp +73 -0
  403. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n.comp +52 -0
  404. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n_pre.comp +28 -0
  405. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_norm.comp +84 -0
  406. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_relu.comp +21 -0
  407. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rmsnorm.comp +53 -0
  408. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f16.comp +52 -0
  409. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f32.comp +52 -0
  410. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f16.comp +52 -0
  411. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f32.comp +52 -0
  412. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_scale.comp +19 -0
  413. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_scale_8.comp +23 -0
  414. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_silu.comp +22 -0
  415. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_softmax.comp +72 -0
  416. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/rope_common.comp +71 -0
  417. package/cpp/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +120 -0
  418. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +618 -0
  419. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +5916 -0
  420. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +6891 -0
  421. package/cpp/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +107 -0
  422. package/cpp/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +96 -0
  423. package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +4966 -0
  424. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/add.cl +83 -0
  425. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/clamp.cl +20 -0
  426. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/cpy.cl +184 -0
  427. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/cvt.cl +118 -0
  428. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/diag_mask_inf.cl +58 -0
  429. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/embed_kernel.py +26 -0
  430. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/gelu.cl +62 -0
  431. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle.cl +268 -0
  432. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general.cl +274 -0
  433. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/get_rows.cl +163 -0
  434. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/im2col_f16.cl +57 -0
  435. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/im2col_f32.cl +57 -0
  436. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul.cl +79 -0
  437. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mat_Ab_Bi_8x4.cl +139 -0
  438. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f16.cl +118 -0
  439. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32.cl +118 -0
  440. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_1row.cl +94 -0
  441. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_l4.cl +84 -0
  442. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_f32_f32.cl +118 -0
  443. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32.cl +192 -0
  444. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_16x_flat.cl +307 -0
  445. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_8x_flat.cl +265 -0
  446. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_8x_flat.cl +272 -0
  447. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_v.cl +254 -0
  448. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_q6_k.cl +190 -0
  449. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/norm.cl +81 -0
  450. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/relu.cl +16 -0
  451. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/rms_norm.cl +96 -0
  452. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/rope.cl +721 -0
  453. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/scale.cl +16 -0
  454. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/silu.cl +30 -0
  455. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +87 -0
  456. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +87 -0
  457. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_f16.cl +86 -0
  458. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_f32.cl +86 -0
  459. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/transpose.cl +84 -0
  460. package/cpp/llama.cpp/ggml/src/ggml-opt.cpp +854 -0
  461. package/cpp/llama.cpp/ggml/src/ggml-quants.c +5232 -0
  462. package/cpp/llama.cpp/ggml/src/ggml-quants.h +100 -0
  463. package/cpp/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +9 -0
  464. package/cpp/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +1813 -0
  465. package/cpp/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +183 -0
  466. package/cpp/llama.cpp/ggml/src/ggml-sycl/backend.hpp +37 -0
  467. package/cpp/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +350 -0
  468. package/cpp/llama.cpp/ggml/src/ggml-sycl/binbcast.hpp +39 -0
  469. package/cpp/llama.cpp/ggml/src/ggml-sycl/common.cpp +83 -0
  470. package/cpp/llama.cpp/ggml/src/ggml-sycl/common.hpp +493 -0
  471. package/cpp/llama.cpp/ggml/src/ggml-sycl/concat.cpp +197 -0
  472. package/cpp/llama.cpp/ggml/src/ggml-sycl/concat.hpp +20 -0
  473. package/cpp/llama.cpp/ggml/src/ggml-sycl/conv.cpp +100 -0
  474. package/cpp/llama.cpp/ggml/src/ggml-sycl/conv.hpp +20 -0
  475. package/cpp/llama.cpp/ggml/src/ggml-sycl/convert.cpp +596 -0
  476. package/cpp/llama.cpp/ggml/src/ggml-sycl/convert.hpp +34 -0
  477. package/cpp/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +701 -0
  478. package/cpp/llama.cpp/ggml/src/ggml-sycl/cpy.hpp +11 -0
  479. package/cpp/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +753 -0
  480. package/cpp/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +1154 -0
  481. package/cpp/llama.cpp/ggml/src/ggml-sycl/dmmv.hpp +27 -0
  482. package/cpp/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +2957 -0
  483. package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +1559 -0
  484. package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +75 -0
  485. package/cpp/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +70 -0
  486. package/cpp/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +311 -0
  487. package/cpp/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +20 -0
  488. package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +4302 -0
  489. package/cpp/llama.cpp/ggml/src/ggml-sycl/gla.cpp +105 -0
  490. package/cpp/llama.cpp/ggml/src/ggml-sycl/gla.hpp +8 -0
  491. package/cpp/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +136 -0
  492. package/cpp/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +21 -0
  493. package/cpp/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +3030 -0
  494. package/cpp/llama.cpp/ggml/src/ggml-sycl/mmq.hpp +33 -0
  495. package/cpp/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +1081 -0
  496. package/cpp/llama.cpp/ggml/src/ggml-sycl/mmvq.hpp +27 -0
  497. package/cpp/llama.cpp/ggml/src/ggml-sycl/norm.cpp +474 -0
  498. package/cpp/llama.cpp/ggml/src/ggml-sycl/norm.hpp +26 -0
  499. package/cpp/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +46 -0
  500. package/cpp/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +10 -0
  501. package/cpp/llama.cpp/ggml/src/ggml-sycl/presets.hpp +74 -0
  502. package/cpp/llama.cpp/ggml/src/ggml-sycl/quants.hpp +61 -0
  503. package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +362 -0
  504. package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.hpp +20 -0
  505. package/cpp/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +264 -0
  506. package/cpp/llama.cpp/ggml/src/ggml-sycl/softmax.hpp +20 -0
  507. package/cpp/llama.cpp/ggml/src/ggml-sycl/sycl_hw.cpp +13 -0
  508. package/cpp/llama.cpp/ggml/src/ggml-sycl/sycl_hw.hpp +23 -0
  509. package/cpp/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +73 -0
  510. package/cpp/llama.cpp/ggml/src/ggml-sycl/tsembd.hpp +20 -0
  511. package/cpp/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +1189 -0
  512. package/cpp/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +305 -0
  513. package/cpp/llama.cpp/ggml/src/ggml-sycl/wkv.hpp +10 -0
  514. package/cpp/llama.cpp/ggml/src/ggml-threading.cpp +12 -0
  515. package/cpp/llama.cpp/ggml/src/ggml-threading.h +14 -0
  516. package/cpp/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +202 -0
  517. package/cpp/llama.cpp/ggml/src/ggml-vulkan/cmake/host-toolchain.cmake.in +15 -0
  518. package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +10502 -0
  519. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +22 -0
  520. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/acc.comp +29 -0
  521. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/add.comp +29 -0
  522. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/argmax.comp +51 -0
  523. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/argsort.comp +69 -0
  524. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/clamp.comp +17 -0
  525. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/concat.comp +41 -0
  526. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/contig_copy.comp +49 -0
  527. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_dw.comp +105 -0
  528. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy.comp +23 -0
  529. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy_from_quant.comp +51 -0
  530. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +242 -0
  531. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/cos.comp +17 -0
  532. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/count_equal.comp +31 -0
  533. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_f32.comp +20 -0
  534. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs.comp +462 -0
  535. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs_cm2.comp +699 -0
  536. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_head.comp +13 -0
  537. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp +42 -0
  538. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_s.comp +35 -0
  539. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_s.comp +44 -0
  540. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xs.comp +43 -0
  541. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xxs.comp +48 -0
  542. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_s.comp +39 -0
  543. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_xxs.comp +49 -0
  544. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_nl.comp +32 -0
  545. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_xs.comp +34 -0
  546. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp +34 -0
  547. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp +42 -0
  548. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_0.comp +30 -0
  549. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_1.comp +32 -0
  550. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp +68 -0
  551. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_0.comp +34 -0
  552. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_1.comp +35 -0
  553. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp +70 -0
  554. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp +33 -0
  555. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q8_0.comp +31 -0
  556. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/diag_mask_inf.comp +34 -0
  557. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/div.comp +27 -0
  558. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +483 -0
  559. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +383 -0
  560. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +59 -0
  561. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/gelu.comp +25 -0
  562. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/gelu_quick.comp +23 -0
  563. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/generic_binary_head.comp +64 -0
  564. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/generic_head.comp +9 -0
  565. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/generic_unary_head.comp +76 -0
  566. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/get_rows.comp +33 -0
  567. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/get_rows_quant.comp +41 -0
  568. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/group_norm.comp +66 -0
  569. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp +100 -0
  570. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/l2_norm.comp +41 -0
  571. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/leaky_relu.comp +22 -0
  572. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul.comp +27 -0
  573. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_split_k_reduce.comp +48 -0
  574. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec.comp +169 -0
  575. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_base.comp +118 -0
  576. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_m.comp +82 -0
  577. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_s.comp +79 -0
  578. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_s.comp +90 -0
  579. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xs.comp +87 -0
  580. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xxs.comp +87 -0
  581. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_s.comp +90 -0
  582. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_xxs.comp +88 -0
  583. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_nc.comp +118 -0
  584. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_p021.comp +154 -0
  585. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q2_k.comp +130 -0
  586. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q3_k.comp +132 -0
  587. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q4_k.comp +136 -0
  588. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q5_k.comp +167 -0
  589. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q6_k.comp +130 -0
  590. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +868 -0
  591. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +441 -0
  592. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq.comp +442 -0
  593. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.comp +99 -0
  594. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/norm.comp +44 -0
  595. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_adamw.comp +42 -0
  596. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/pad.comp +28 -0
  597. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/pool2d.comp +74 -0
  598. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/quantize_q8_1.comp +77 -0
  599. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/relu.comp +21 -0
  600. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/repeat.comp +26 -0
  601. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/repeat_back.comp +37 -0
  602. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +52 -0
  603. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_back.comp +55 -0
  604. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.comp +58 -0
  605. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +60 -0
  606. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +43 -0
  607. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +43 -0
  608. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_vision.comp +47 -0
  609. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +24 -0
  610. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/sigmoid.comp +20 -0
  611. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/silu.comp +22 -0
  612. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/silu_back.comp +26 -0
  613. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/sin.comp +17 -0
  614. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +173 -0
  615. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_back.comp +50 -0
  616. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/square.comp +17 -0
  617. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/sub.comp +29 -0
  618. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.comp +37 -0
  619. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/tanh.comp +20 -0
  620. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/test_bfloat16_support.comp +7 -0
  621. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/test_coopmat2_support.comp +7 -0
  622. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/test_coopmat_support.comp +7 -0
  623. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/test_integer_dot_support.comp +7 -0
  624. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/timestep_embedding.comp +41 -0
  625. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/types.comp +1373 -0
  626. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +36 -0
  627. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +740 -0
  628. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/wkv6.comp +87 -0
  629. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/wkv7.comp +91 -0
  630. package/cpp/llama.cpp/ggml/src/ggml.c +6499 -0
  631. package/cpp/llama.cpp/ggml/src/gguf.cpp +1330 -0
  632. package/cpp/llama.cpp/gguf-py/LICENSE +21 -0
  633. package/cpp/llama.cpp/gguf-py/README.md +99 -0
  634. package/cpp/llama.cpp/gguf-py/examples/reader.py +49 -0
  635. package/cpp/llama.cpp/gguf-py/examples/writer.py +39 -0
  636. package/cpp/llama.cpp/gguf-py/gguf/__init__.py +9 -0
  637. package/cpp/llama.cpp/gguf-py/gguf/constants.py +2296 -0
  638. package/cpp/llama.cpp/gguf-py/gguf/gguf.py +15 -0
  639. package/cpp/llama.cpp/gguf-py/gguf/gguf_reader.py +367 -0
  640. package/cpp/llama.cpp/gguf-py/gguf/gguf_writer.py +1041 -0
  641. package/cpp/llama.cpp/gguf-py/gguf/lazy.py +223 -0
  642. package/cpp/llama.cpp/gguf-py/gguf/metadata.py +642 -0
  643. package/cpp/llama.cpp/gguf-py/gguf/py.typed +0 -0
  644. package/cpp/llama.cpp/gguf-py/gguf/quants.py +1269 -0
  645. package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_convert_endian.py +182 -0
  646. package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_dump.py +454 -0
  647. package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_editor_gui.py +1610 -0
  648. package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_hash.py +102 -0
  649. package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_new_metadata.py +207 -0
  650. package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_set_metadata.py +95 -0
  651. package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +1172 -0
  652. package/cpp/llama.cpp/gguf-py/gguf/utility.py +264 -0
  653. package/cpp/llama.cpp/gguf-py/gguf/vocab.py +492 -0
  654. package/cpp/llama.cpp/gguf-py/pyproject.toml +43 -0
  655. package/cpp/llama.cpp/gguf-py/tests/__init__.py +1 -0
  656. package/cpp/llama.cpp/gguf-py/tests/test_metadata.py +238 -0
  657. package/cpp/llama.cpp/gguf-py/tests/test_quants.py +238 -0
  658. package/cpp/llama.cpp/grammars/README.md +382 -0
  659. package/cpp/llama.cpp/grammars/arithmetic.gbnf +6 -0
  660. package/cpp/llama.cpp/grammars/c.gbnf +42 -0
  661. package/cpp/llama.cpp/grammars/chess.gbnf +13 -0
  662. package/cpp/llama.cpp/grammars/english.gbnf +6 -0
  663. package/cpp/llama.cpp/grammars/japanese.gbnf +7 -0
  664. package/cpp/llama.cpp/grammars/json.gbnf +25 -0
  665. package/cpp/llama.cpp/grammars/json_arr.gbnf +34 -0
  666. package/cpp/llama.cpp/grammars/list.gbnf +4 -0
  667. package/cpp/llama.cpp/include/llama-cpp.h +30 -0
  668. package/cpp/llama.cpp/include/llama.h +1440 -0
  669. package/cpp/llama.cpp/licenses/LICENSE-curl +9 -0
  670. package/cpp/llama.cpp/licenses/LICENSE-httplib +21 -0
  671. package/cpp/llama.cpp/licenses/LICENSE-jsonhpp +21 -0
  672. package/cpp/llama.cpp/licenses/LICENSE-linenoise +26 -0
  673. package/cpp/llama.cpp/media/llama0-banner.png +0 -0
  674. package/cpp/llama.cpp/media/llama0-logo.png +0 -0
  675. package/cpp/llama.cpp/media/llama1-banner.png +0 -0
  676. package/cpp/llama.cpp/media/llama1-logo.png +0 -0
  677. package/cpp/llama.cpp/media/llama1-logo.svg +34 -0
  678. package/cpp/llama.cpp/media/matmul.png +0 -0
  679. package/cpp/llama.cpp/media/matmul.svg +1238 -0
  680. package/cpp/llama.cpp/models/ggml-vocab-aquila.gguf +0 -0
  681. package/cpp/llama.cpp/models/ggml-vocab-baichuan.gguf +0 -0
  682. package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf +0 -0
  683. package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf.inp +112 -0
  684. package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf.out +46 -0
  685. package/cpp/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +112 -0
  686. package/cpp/llama.cpp/models/ggml-vocab-chameleon.gguf.out +46 -0
  687. package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf +0 -0
  688. package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf.inp +112 -0
  689. package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf.out +46 -0
  690. package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf +0 -0
  691. package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.inp +112 -0
  692. package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.out +46 -0
  693. package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf +0 -0
  694. package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.inp +112 -0
  695. package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.out +46 -0
  696. package/cpp/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.inp +112 -0
  697. package/cpp/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.out +46 -0
  698. package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf +0 -0
  699. package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf.inp +112 -0
  700. package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf.out +46 -0
  701. package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf +0 -0
  702. package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf.inp +112 -0
  703. package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf.out +46 -0
  704. package/cpp/llama.cpp/models/ggml-vocab-gpt-4o.gguf.inp +112 -0
  705. package/cpp/llama.cpp/models/ggml-vocab-gpt-4o.gguf.out +46 -0
  706. package/cpp/llama.cpp/models/ggml-vocab-gpt-neox.gguf +0 -0
  707. package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf +0 -0
  708. package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf.inp +112 -0
  709. package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf.out +46 -0
  710. package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf +0 -0
  711. package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf.inp +112 -0
  712. package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf.out +46 -0
  713. package/cpp/llama.cpp/models/ggml-vocab-llama4.gguf.inp +112 -0
  714. package/cpp/llama.cpp/models/ggml-vocab-llama4.gguf.out +46 -0
  715. package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf +0 -0
  716. package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf.inp +112 -0
  717. package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf.out +46 -0
  718. package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf +0 -0
  719. package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf.inp +112 -0
  720. package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf.out +46 -0
  721. package/cpp/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +112 -0
  722. package/cpp/llama.cpp/models/ggml-vocab-pixtral.gguf.out +46 -0
  723. package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf +0 -0
  724. package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf.inp +112 -0
  725. package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf.out +46 -0
  726. package/cpp/llama.cpp/models/ggml-vocab-refact.gguf +0 -0
  727. package/cpp/llama.cpp/models/ggml-vocab-refact.gguf.inp +112 -0
  728. package/cpp/llama.cpp/models/ggml-vocab-refact.gguf.out +46 -0
  729. package/cpp/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +112 -0
  730. package/cpp/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +46 -0
  731. package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf +0 -0
  732. package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf.inp +112 -0
  733. package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf.out +46 -0
  734. package/cpp/llama.cpp/models/templates/CohereForAI-c4ai-command-r-plus-tool_use.jinja +202 -0
  735. package/cpp/llama.cpp/models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja +156 -0
  736. package/cpp/llama.cpp/models/templates/NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja +152 -0
  737. package/cpp/llama.cpp/models/templates/NousResearch-Hermes-3-Llama-3.1-8B-tool_use.jinja +152 -0
  738. package/cpp/llama.cpp/models/templates/Qwen-Qwen2.5-7B-Instruct.jinja +54 -0
  739. package/cpp/llama.cpp/models/templates/README.md +22 -0
  740. package/cpp/llama.cpp/models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja +1 -0
  741. package/cpp/llama.cpp/models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja +1 -0
  742. package/cpp/llama.cpp/models/templates/fireworks-ai-llama-3-firefunction-v2.jinja +57 -0
  743. package/cpp/llama.cpp/models/templates/google-gemma-2-2b-it.jinja +4 -0
  744. package/cpp/llama.cpp/models/templates/llama-cpp-deepseek-r1.jinja +76 -0
  745. package/cpp/llama.cpp/models/templates/meetkai-functionary-medium-v3.1.jinja +58 -0
  746. package/cpp/llama.cpp/models/templates/meetkai-functionary-medium-v3.2.jinja +287 -0
  747. package/cpp/llama.cpp/models/templates/meta-llama-Llama-3.1-8B-Instruct.jinja +109 -0
  748. package/cpp/llama.cpp/models/templates/meta-llama-Llama-3.2-3B-Instruct.jinja +93 -0
  749. package/cpp/llama.cpp/models/templates/meta-llama-Llama-3.3-70B-Instruct.jinja +109 -0
  750. package/cpp/llama.cpp/models/templates/microsoft-Phi-3.5-mini-instruct.jinja +8 -0
  751. package/cpp/llama.cpp/models/templates/mistralai-Mistral-Nemo-Instruct-2407.jinja +87 -0
  752. package/cpp/llama.cpp/mypy.ini +7 -0
  753. package/cpp/llama.cpp/pocs/CMakeLists.txt +14 -0
  754. package/cpp/llama.cpp/pocs/vdot/CMakeLists.txt +9 -0
  755. package/cpp/llama.cpp/pocs/vdot/q8dot.cpp +173 -0
  756. package/cpp/llama.cpp/pocs/vdot/vdot.cpp +311 -0
  757. package/cpp/llama.cpp/poetry.lock +1197 -0
  758. package/cpp/llama.cpp/prompts/LLM-questions.txt +49 -0
  759. package/cpp/llama.cpp/prompts/alpaca.txt +1 -0
  760. package/cpp/llama.cpp/prompts/assistant.txt +31 -0
  761. package/cpp/llama.cpp/prompts/chat-with-baichuan.txt +4 -0
  762. package/cpp/llama.cpp/prompts/chat-with-bob.txt +7 -0
  763. package/cpp/llama.cpp/prompts/chat-with-qwen.txt +1 -0
  764. package/cpp/llama.cpp/prompts/chat-with-vicuna-v0.txt +7 -0
  765. package/cpp/llama.cpp/prompts/chat-with-vicuna-v1.txt +7 -0
  766. package/cpp/llama.cpp/prompts/chat.txt +28 -0
  767. package/cpp/llama.cpp/prompts/dan-modified.txt +1 -0
  768. package/cpp/llama.cpp/prompts/dan.txt +1 -0
  769. package/cpp/llama.cpp/prompts/mnemonics.txt +93 -0
  770. package/cpp/llama.cpp/prompts/parallel-questions.txt +43 -0
  771. package/cpp/llama.cpp/prompts/reason-act.txt +18 -0
  772. package/cpp/llama.cpp/pyproject.toml +45 -0
  773. package/cpp/llama.cpp/pyrightconfig.json +22 -0
  774. package/cpp/llama.cpp/requirements/requirements-all.txt +15 -0
  775. package/cpp/llama.cpp/requirements/requirements-compare-llama-bench.txt +2 -0
  776. package/cpp/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +3 -0
  777. package/cpp/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +3 -0
  778. package/cpp/llama.cpp/requirements/requirements-convert_legacy_llama.txt +5 -0
  779. package/cpp/llama.cpp/requirements/requirements-convert_llama_ggml_to_gguf.txt +1 -0
  780. package/cpp/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +2 -0
  781. package/cpp/llama.cpp/requirements/requirements-gguf_editor_gui.txt +3 -0
  782. package/cpp/llama.cpp/requirements/requirements-pydantic.txt +3 -0
  783. package/cpp/llama.cpp/requirements/requirements-test-tokenizer-random.txt +1 -0
  784. package/cpp/llama.cpp/requirements/requirements-tool_bench.txt +12 -0
  785. package/cpp/llama.cpp/requirements.txt +13 -0
  786. package/cpp/llama.cpp/src/CMakeLists.txt +45 -0
  787. package/cpp/llama.cpp/src/llama-adapter.cpp +388 -0
  788. package/cpp/llama.cpp/src/llama-adapter.h +76 -0
  789. package/cpp/llama.cpp/src/llama-arch.cpp +1743 -0
  790. package/cpp/llama.cpp/src/llama-arch.h +437 -0
  791. package/cpp/llama.cpp/src/llama-batch.cpp +372 -0
  792. package/cpp/llama.cpp/src/llama-batch.h +89 -0
  793. package/cpp/llama.cpp/src/llama-chat.cpp +663 -0
  794. package/cpp/llama.cpp/src/llama-chat.h +58 -0
  795. package/cpp/llama.cpp/src/llama-context.cpp +2459 -0
  796. package/cpp/llama.cpp/src/llama-context.h +246 -0
  797. package/cpp/llama.cpp/src/llama-cparams.cpp +1 -0
  798. package/cpp/llama.cpp/src/llama-cparams.h +39 -0
  799. package/cpp/llama.cpp/src/llama-grammar.cpp +1219 -0
  800. package/cpp/llama.cpp/src/llama-grammar.h +173 -0
  801. package/cpp/llama.cpp/src/llama-graph.cpp +1713 -0
  802. package/cpp/llama.cpp/src/llama-graph.h +595 -0
  803. package/cpp/llama.cpp/src/llama-hparams.cpp +79 -0
  804. package/cpp/llama.cpp/src/llama-hparams.h +161 -0
  805. package/cpp/llama.cpp/src/llama-impl.cpp +167 -0
  806. package/cpp/llama.cpp/src/llama-impl.h +61 -0
  807. package/cpp/llama.cpp/src/llama-io.cpp +15 -0
  808. package/cpp/llama.cpp/src/llama-io.h +35 -0
  809. package/cpp/llama.cpp/src/llama-kv-cache.cpp +2486 -0
  810. package/cpp/llama.cpp/src/llama-kv-cache.h +405 -0
  811. package/cpp/llama.cpp/src/llama-memory.cpp +1 -0
  812. package/cpp/llama.cpp/src/llama-memory.h +31 -0
  813. package/cpp/llama.cpp/src/llama-mmap.cpp +600 -0
  814. package/cpp/llama.cpp/src/llama-mmap.h +68 -0
  815. package/cpp/llama.cpp/src/llama-model-loader.cpp +1133 -0
  816. package/cpp/llama.cpp/src/llama-model-loader.h +169 -0
  817. package/cpp/llama.cpp/src/llama-model.cpp +13453 -0
  818. package/cpp/llama.cpp/src/llama-model.h +420 -0
  819. package/cpp/llama.cpp/src/llama-quant.cpp +964 -0
  820. package/cpp/llama.cpp/src/llama-quant.h +1 -0
  821. package/cpp/llama.cpp/src/llama-sampling.cpp +2575 -0
  822. package/cpp/llama.cpp/src/llama-sampling.h +32 -0
  823. package/cpp/llama.cpp/src/llama-vocab.cpp +3313 -0
  824. package/cpp/llama.cpp/src/llama-vocab.h +125 -0
  825. package/cpp/llama.cpp/src/llama.cpp +340 -0
  826. package/cpp/llama.cpp/src/unicode-data.cpp +7034 -0
  827. package/cpp/llama.cpp/src/unicode-data.h +20 -0
  828. package/cpp/llama.cpp/src/unicode.cpp +849 -0
  829. package/cpp/llama.cpp/src/unicode.h +66 -0
  830. package/cpp/rn-completion.cpp +431 -0
  831. package/cpp/rn-llama.hpp +60 -0
  832. package/cpp/rn-utils.hpp +331 -0
  833. package/ios/OnLoad.mm +22 -0
  834. package/ios/generated/RNLlamaCppSpec/RNLlamaCppSpec-generated.mm +64 -0
  835. package/ios/generated/RNLlamaCppSpec/RNLlamaCppSpec.h +251 -0
  836. package/ios/generated/RNLlamaCppSpecJSI-generated.cpp +42 -0
  837. package/ios/generated/RNLlamaCppSpecJSI.h +336 -0
  838. package/ios/include/chat.h +135 -0
  839. package/ios/include/common/base64.hpp +392 -0
  840. package/ios/include/common/json.hpp +24766 -0
  841. package/ios/include/common/minja/chat-template.hpp +537 -0
  842. package/ios/include/common/minja/minja.hpp +2941 -0
  843. package/ios/include/common.h +668 -0
  844. package/ios/include/json-schema-to-grammar.h +21 -0
  845. package/ios/include/llama-cpp.h +30 -0
  846. package/ios/include/llama.h +1440 -0
  847. package/ios/include/log.h +103 -0
  848. package/ios/include/ngram-cache.h +101 -0
  849. package/ios/include/sampling.h +107 -0
  850. package/ios/include/speculative.h +28 -0
  851. package/ios/libs/llama.xcframework/Info.plist +135 -0
  852. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Info.plist +20 -0
  853. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  854. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4492 -0
  855. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-alloc.h +76 -0
  856. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-backend.h +354 -0
  857. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-blas.h +25 -0
  858. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-cpu.h +143 -0
  859. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-metal.h +66 -0
  860. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +2192 -0
  861. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/gguf.h +202 -0
  862. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +1440 -0
  863. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Info.plist +36 -0
  864. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Modules/module.modulemap +17 -0
  865. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
  866. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Info.plist +20 -0
  867. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  868. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4513 -0
  869. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3440 -0
  870. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-alloc.h +76 -0
  871. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +354 -0
  872. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-blas.h +25 -0
  873. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +143 -0
  874. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-metal.h +66 -0
  875. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +2192 -0
  876. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/gguf.h +202 -0
  877. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +1440 -0
  878. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Info.plist +36 -0
  879. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Modules/module.modulemap +17 -0
  880. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
  881. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Info.plist +20 -0
  882. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  883. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4513 -0
  884. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3442 -0
  885. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-alloc.h +76 -0
  886. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-backend.h +354 -0
  887. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-blas.h +25 -0
  888. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-cpu.h +143 -0
  889. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-metal.h +66 -0
  890. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +2192 -0
  891. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/gguf.h +202 -0
  892. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +1440 -0
  893. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Modules/module.modulemap +17 -0
  894. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Resources/Info.plist +32 -0
  895. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-alloc.h +76 -0
  896. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-backend.h +354 -0
  897. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-blas.h +25 -0
  898. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-cpu.h +143 -0
  899. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-metal.h +66 -0
  900. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +2192 -0
  901. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/gguf.h +202 -0
  902. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +1440 -0
  903. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Modules/module.modulemap +17 -0
  904. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Resources/Info.plist +32 -0
  905. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
  906. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-alloc.h +76 -0
  907. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-backend.h +354 -0
  908. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-blas.h +25 -0
  909. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-cpu.h +143 -0
  910. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-metal.h +66 -0
  911. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +2192 -0
  912. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/gguf.h +202 -0
  913. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +1440 -0
  914. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Modules/module.modulemap +17 -0
  915. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Resources/Info.plist +32 -0
  916. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
  917. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
  918. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Info.plist +20 -0
  919. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  920. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4492 -0
  921. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-alloc.h +76 -0
  922. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-backend.h +354 -0
  923. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-blas.h +25 -0
  924. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-cpu.h +143 -0
  925. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-metal.h +66 -0
  926. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +2192 -0
  927. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/gguf.h +202 -0
  928. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +1440 -0
  929. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Info.plist +35 -0
  930. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Modules/module.modulemap +17 -0
  931. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
  932. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Info.plist +20 -0
  933. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  934. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4513 -0
  935. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3440 -0
  936. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-alloc.h +76 -0
  937. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +354 -0
  938. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-blas.h +25 -0
  939. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +143 -0
  940. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-metal.h +66 -0
  941. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +2192 -0
  942. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/gguf.h +202 -0
  943. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +1440 -0
  944. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Info.plist +35 -0
  945. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Modules/module.modulemap +17 -0
  946. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
  947. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Info.plist +20 -0
  948. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  949. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4528 -0
  950. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-alloc.h +76 -0
  951. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-backend.h +354 -0
  952. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-blas.h +25 -0
  953. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-cpu.h +143 -0
  954. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-metal.h +66 -0
  955. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +2192 -0
  956. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/gguf.h +202 -0
  957. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +1440 -0
  958. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Info.plist +32 -0
  959. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Modules/module.modulemap +17 -0
  960. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
  961. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Info.plist +20 -0
  962. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  963. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4549 -0
  964. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3470 -0
  965. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-alloc.h +76 -0
  966. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +354 -0
  967. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-blas.h +25 -0
  968. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +143 -0
  969. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-metal.h +66 -0
  970. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +2192 -0
  971. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/gguf.h +202 -0
  972. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +1440 -0
  973. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Info.plist +32 -0
  974. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Modules/module.modulemap +17 -0
  975. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
  976. package/lib/module/NativeRNLlamaCpp.js +35 -0
  977. package/lib/module/NativeRNLlamaCpp.js.map +1 -0
  978. package/lib/module/index.js +20 -0
  979. package/lib/module/index.js.map +1 -0
  980. package/lib/module/package.json +1 -0
  981. package/lib/typescript/package.json +1 -0
  982. package/lib/typescript/src/NativeRNLlamaCpp.d.ts +222 -0
  983. package/lib/typescript/src/NativeRNLlamaCpp.d.ts.map +1 -0
  984. package/lib/typescript/src/index.d.ts +5 -0
  985. package/lib/typescript/src/index.d.ts.map +1 -0
  986. package/package.json +161 -0
  987. package/react-native.config.js +15 -0
  988. package/src/NativeRNLlamaCpp.ts +282 -0
  989. package/src/index.tsx +54 -0
@@ -0,0 +1,2296 @@
1
+ from __future__ import annotations
2
+
3
+ from enum import Enum, IntEnum, auto
4
+ from typing import Any
5
+
6
+ #
7
+ # constants
8
+ #
9
+
10
+ GGUF_MAGIC = 0x46554747 # "GGUF"
11
+ GGUF_VERSION = 3
12
+ GGUF_DEFAULT_ALIGNMENT = 32
13
+ GGML_QUANT_VERSION = 2 # GGML_QNT_VERSION from ggml.h
14
+
15
+ #
16
+ # metadata keys
17
+ #
18
+
19
+
20
+ class Keys:
21
+ class General:
22
+ TYPE = "general.type"
23
+ ARCHITECTURE = "general.architecture"
24
+ QUANTIZATION_VERSION = "general.quantization_version"
25
+ ALIGNMENT = "general.alignment"
26
+ FILE_TYPE = "general.file_type"
27
+
28
+ # Authorship Metadata
29
+ NAME = "general.name"
30
+ AUTHOR = "general.author"
31
+ VERSION = "general.version"
32
+ ORGANIZATION = "general.organization"
33
+
34
+ FINETUNE = "general.finetune"
35
+ BASENAME = "general.basename"
36
+
37
+ DESCRIPTION = "general.description"
38
+ QUANTIZED_BY = "general.quantized_by"
39
+
40
+ SIZE_LABEL = "general.size_label"
41
+
42
+ # Licensing details
43
+ LICENSE = "general.license"
44
+ LICENSE_NAME = "general.license.name"
45
+ LICENSE_LINK = "general.license.link"
46
+
47
+ # Typically represents the converted GGUF repo (Unless native)
48
+ URL = "general.url" # Model Website/Paper
49
+ DOI = "general.doi"
50
+ UUID = "general.uuid"
51
+ REPO_URL = "general.repo_url" # Model Source Repository (git/svn/etc...)
52
+
53
+ # Model Source during conversion
54
+ SOURCE_URL = "general.source.url" # Model Website/Paper
55
+ SOURCE_DOI = "general.source.doi"
56
+ SOURCE_UUID = "general.source.uuid"
57
+ SOURCE_REPO_URL = "general.source.repo_url" # Model Source Repository (git/svn/etc...)
58
+
59
+ # Base Model Source. There can be more than one source if it's a merged
60
+ # model like with 'Mistral-7B-Merge-14-v0.1'. This will assist in
61
+ # tracing linage of models as it is finetuned or merged over time.
62
+ BASE_MODEL_COUNT = "general.base_model.count"
63
+ BASE_MODEL_NAME = "general.base_model.{id}.name"
64
+ BASE_MODEL_AUTHOR = "general.base_model.{id}.author"
65
+ BASE_MODEL_VERSION = "general.base_model.{id}.version"
66
+ BASE_MODEL_ORGANIZATION = "general.base_model.{id}.organization"
67
+ BASE_MODEL_DESCRIPTION = "general.base_model.{id}.description"
68
+ BASE_MODEL_URL = "general.base_model.{id}.url" # Model Website/Paper
69
+ BASE_MODEL_DOI = "general.base_model.{id}.doi"
70
+ BASE_MODEL_UUID = "general.base_model.{id}.uuid"
71
+ BASE_MODEL_REPO_URL = "general.base_model.{id}.repo_url" # Model Source Repository (git/svn/etc...)
72
+
73
+ # Dataset Source
74
+ DATASET_COUNT = "general.dataset.count"
75
+ DATASET_NAME = "general.dataset.{id}.name"
76
+ DATASET_AUTHOR = "general.dataset.{id}.author"
77
+ DATASET_VERSION = "general.dataset.{id}.version"
78
+ DATASET_ORGANIZATION = "general.dataset.{id}.organization"
79
+ DATASET_DESCRIPTION = "general.dataset.{id}.description"
80
+ DATASET_URL = "general.dataset.{id}.url" # Model Website/Paper
81
+ DATASET_DOI = "general.dataset.{id}.doi"
82
+ DATASET_UUID = "general.dataset.{id}.uuid"
83
+ DATASET_REPO_URL = "general.dataset.{id}.repo_url" # Model Source Repository (git/svn/etc...)
84
+
85
+ # Array based KV stores
86
+ TAGS = "general.tags"
87
+ LANGUAGES = "general.languages"
88
+
89
+ class LLM:
90
+ VOCAB_SIZE = "{arch}.vocab_size"
91
+ CONTEXT_LENGTH = "{arch}.context_length"
92
+ EMBEDDING_LENGTH = "{arch}.embedding_length"
93
+ FEATURES_LENGTH = "{arch}.features_length"
94
+ BLOCK_COUNT = "{arch}.block_count"
95
+ LEADING_DENSE_BLOCK_COUNT = "{arch}.leading_dense_block_count"
96
+ FEED_FORWARD_LENGTH = "{arch}.feed_forward_length"
97
+ EXPERT_FEED_FORWARD_LENGTH = "{arch}.expert_feed_forward_length"
98
+ EXPERT_SHARED_FEED_FORWARD_LENGTH = "{arch}.expert_shared_feed_forward_length"
99
+ USE_PARALLEL_RESIDUAL = "{arch}.use_parallel_residual"
100
+ TENSOR_DATA_LAYOUT = "{arch}.tensor_data_layout"
101
+ EXPERT_COUNT = "{arch}.expert_count"
102
+ EXPERT_USED_COUNT = "{arch}.expert_used_count"
103
+ EXPERT_SHARED_COUNT = "{arch}.expert_shared_count"
104
+ EXPERT_WEIGHTS_SCALE = "{arch}.expert_weights_scale"
105
+ EXPERT_WEIGHTS_NORM = "{arch}.expert_weights_norm"
106
+ EXPERT_GATING_FUNC = "{arch}.expert_gating_func"
107
+ MOE_EVERY_N_LAYERS = "{arch}.moe_every_n_layers"
108
+ POOLING_TYPE = "{arch}.pooling_type"
109
+ LOGIT_SCALE = "{arch}.logit_scale"
110
+ DECODER_START_TOKEN_ID = "{arch}.decoder_start_token_id"
111
+ ATTN_LOGIT_SOFTCAPPING = "{arch}.attn_logit_softcapping"
112
+ FINAL_LOGIT_SOFTCAPPING = "{arch}.final_logit_softcapping"
113
+ SWIN_NORM = "{arch}.swin_norm"
114
+ RESCALE_EVERY_N_LAYERS = "{arch}.rescale_every_n_layers"
115
+ TIME_MIX_EXTRA_DIM = "{arch}.time_mix_extra_dim"
116
+ TIME_DECAY_EXTRA_DIM = "{arch}.time_decay_extra_dim"
117
+ RESIDUAL_SCALE = "{arch}.residual_scale"
118
+ EMBEDDING_SCALE = "{arch}.embedding_scale"
119
+ TOKEN_SHIFT_COUNT = "{arch}.token_shift_count"
120
+ INTERLEAVE_MOE_LAYER_STEP = "{arch}.interleave_moe_layer_step"
121
+
122
+ class Attention:
123
+ HEAD_COUNT = "{arch}.attention.head_count"
124
+ HEAD_COUNT_KV = "{arch}.attention.head_count_kv"
125
+ MAX_ALIBI_BIAS = "{arch}.attention.max_alibi_bias"
126
+ CLAMP_KQV = "{arch}.attention.clamp_kqv"
127
+ KEY_LENGTH = "{arch}.attention.key_length"
128
+ VALUE_LENGTH = "{arch}.attention.value_length"
129
+ LAYERNORM_EPS = "{arch}.attention.layer_norm_epsilon"
130
+ LAYERNORM_RMS_EPS = "{arch}.attention.layer_norm_rms_epsilon"
131
+ GROUPNORM_EPS = "{arch}.attention.group_norm_epsilon"
132
+ GROUPNORM_GROUPS = "{arch}.attention.group_norm_groups"
133
+ CAUSAL = "{arch}.attention.causal"
134
+ Q_LORA_RANK = "{arch}.attention.q_lora_rank"
135
+ KV_LORA_RANK = "{arch}.attention.kv_lora_rank"
136
+ DECAY_LORA_RANK = "{arch}.attention.decay_lora_rank"
137
+ ICLR_LORA_RANK = "{arch}.attention.iclr_lora_rank"
138
+ VALUE_RESIDUAL_MIX_LORA_RANK = "{arch}.attention.value_residual_mix_lora_rank"
139
+ GATE_LORA_RANK = "{arch}.attention.gate_lora_rank"
140
+ REL_BUCKETS_COUNT = "{arch}.attention.relative_buckets_count"
141
+ SLIDING_WINDOW = "{arch}.attention.sliding_window"
142
+ SCALE = "{arch}.attention.scale"
143
+ KEY_LENGTH_MLA = "{arch}.attention.key_length_mla"
144
+ VALUE_LENGTH_MLA = "{arch}.attention.value_length_mla"
145
+
146
+ class Rope:
147
+ DIMENSION_COUNT = "{arch}.rope.dimension_count"
148
+ DIMENSION_SECTIONS = "{arch}.rope.dimension_sections"
149
+ FREQ_BASE = "{arch}.rope.freq_base"
150
+ SCALING_TYPE = "{arch}.rope.scaling.type"
151
+ SCALING_FACTOR = "{arch}.rope.scaling.factor"
152
+ SCALING_ATTN_FACTOR = "{arch}.rope.scaling.attn_factor"
153
+ SCALING_ORIG_CTX_LEN = "{arch}.rope.scaling.original_context_length"
154
+ SCALING_FINETUNED = "{arch}.rope.scaling.finetuned"
155
+ SCALING_YARN_LOG_MUL = "{arch}.rope.scaling.yarn_log_multiplier"
156
+
157
+ class Split:
158
+ LLM_KV_SPLIT_NO = "split.no"
159
+ LLM_KV_SPLIT_COUNT = "split.count"
160
+ LLM_KV_SPLIT_TENSORS_COUNT = "split.tensors.count"
161
+
162
+ class SSM:
163
+ CONV_KERNEL = "{arch}.ssm.conv_kernel"
164
+ INNER_SIZE = "{arch}.ssm.inner_size"
165
+ STATE_SIZE = "{arch}.ssm.state_size"
166
+ TIME_STEP_RANK = "{arch}.ssm.time_step_rank"
167
+ DT_B_C_RMS = "{arch}.ssm.dt_b_c_rms"
168
+
169
+ class WKV:
170
+ HEAD_SIZE = "{arch}.wkv.head_size"
171
+
172
+ class PosNet:
173
+ EMBEDDING_LENGTH = "{arch}.posnet.embedding_length"
174
+ BLOCK_COUNT = "{arch}.posnet.block_count"
175
+
176
+ class ConvNext:
177
+ EMBEDDING_LENGTH = "{arch}.convnext.embedding_length"
178
+ BLOCK_COUNT = "{arch}.convnext.block_count"
179
+
180
+ class Tokenizer:
181
+ MODEL = "tokenizer.ggml.model"
182
+ PRE = "tokenizer.ggml.pre"
183
+ LIST = "tokenizer.ggml.tokens"
184
+ TOKEN_TYPE = "tokenizer.ggml.token_type"
185
+ TOKEN_TYPE_COUNT = "tokenizer.ggml.token_type_count" # for BERT-style token types
186
+ SCORES = "tokenizer.ggml.scores"
187
+ MERGES = "tokenizer.ggml.merges"
188
+ BOS_ID = "tokenizer.ggml.bos_token_id"
189
+ EOS_ID = "tokenizer.ggml.eos_token_id"
190
+ EOT_ID = "tokenizer.ggml.eot_token_id"
191
+ EOM_ID = "tokenizer.ggml.eom_token_id"
192
+ UNK_ID = "tokenizer.ggml.unknown_token_id"
193
+ SEP_ID = "tokenizer.ggml.seperator_token_id"
194
+ PAD_ID = "tokenizer.ggml.padding_token_id"
195
+ MASK_ID = "tokenizer.ggml.mask_token_id"
196
+ ADD_BOS = "tokenizer.ggml.add_bos_token"
197
+ ADD_EOS = "tokenizer.ggml.add_eos_token"
198
+ ADD_PREFIX = "tokenizer.ggml.add_space_prefix"
199
+ REMOVE_EXTRA_WS = "tokenizer.ggml.remove_extra_whitespaces"
200
+ PRECOMPILED_CHARSMAP = "tokenizer.ggml.precompiled_charsmap"
201
+ HF_JSON = "tokenizer.huggingface.json"
202
+ RWKV = "tokenizer.rwkv.world"
203
+ CHAT_TEMPLATE = "tokenizer.chat_template"
204
+ CHAT_TEMPLATE_N = "tokenizer.chat_template.{name}"
205
+ CHAT_TEMPLATES = "tokenizer.chat_templates"
206
+ # FIM/Infill special tokens constants
207
+ FIM_PRE_ID = "tokenizer.ggml.fim_pre_token_id"
208
+ FIM_SUF_ID = "tokenizer.ggml.fim_suf_token_id"
209
+ FIM_MID_ID = "tokenizer.ggml.fim_mid_token_id"
210
+ FIM_PAD_ID = "tokenizer.ggml.fim_pad_token_id"
211
+ FIM_REP_ID = "tokenizer.ggml.fim_rep_token_id"
212
+ FIM_SEP_ID = "tokenizer.ggml.fim_sep_token_id"
213
+ # deprecated:
214
+ PREFIX_ID = "tokenizer.ggml.prefix_token_id"
215
+ SUFFIX_ID = "tokenizer.ggml.suffix_token_id"
216
+ MIDDLE_ID = "tokenizer.ggml.middle_token_id"
217
+
218
+ class Adapter:
219
+ TYPE = "adapter.type"
220
+ LORA_ALPHA = "adapter.lora.alpha"
221
+
222
+ class ClipVision:
223
+ PROJECTOR_TYPE = "clip.projector_type"
224
+ HAS_VISION_ENCODER = "clip.has_vision_encoder"
225
+ HAS_LLAVA_PROJECTOR = "clip.has_llava_projector"
226
+ IMAGE_SIZE = "clip.vision.image_size"
227
+ PATCH_SIZE = "clip.vision.patch_size"
228
+ EMBEDDING_LENGTH = "clip.vision.embedding_length"
229
+ FEED_FORWARD_LENGTH = "clip.vision.feed_forward_length"
230
+ PROJECTION_DIM = "clip.vision.projection_dim"
231
+ BLOCK_COUNT = "clip.vision.block_count"
232
+ IMAGE_MEAN = "clip.vision.image_mean"
233
+ IMAGE_STD = "clip.vision.image_std"
234
+ SPATIAL_MERGE_SIZE = "clip.vision.spatial_merge_size"
235
+ USE_GELU = "clip.use_gelu"
236
+ USE_SILU = "clip.use_silu"
237
+ N_WA_PATTERN = "clip.vision.n_wa_pattern" # used by qwen2.5vl
238
+
239
+ class Attention:
240
+ HEAD_COUNT = "clip.vision.attention.head_count"
241
+ LAYERNORM_EPS = "clip.vision.attention.layer_norm_epsilon"
242
+
243
+ class Projector:
244
+ SCALE_FACTOR = "clip.vision.projector.scale_factor"
245
+
246
+ #
247
+ # recommended mapping of model tensor names for storage in gguf
248
+ #
249
+
250
+
251
+ class GGUFType:
252
+ MODEL = "model"
253
+ ADAPTER = "adapter"
254
+ CLIP_VISION = "clip-vision"
255
+
256
+
257
+ class MODEL_ARCH(IntEnum):
258
+ CLIP_VISION = auto() # dummy arch for clip.cpp
259
+ LLAMA = auto()
260
+ LLAMA4 = auto()
261
+ DECI = auto()
262
+ FALCON = auto()
263
+ BAICHUAN = auto()
264
+ GROK = auto()
265
+ GPT2 = auto()
266
+ GPTJ = auto()
267
+ GPTNEOX = auto()
268
+ MPT = auto()
269
+ STARCODER = auto()
270
+ REFACT = auto()
271
+ BERT = auto()
272
+ NOMIC_BERT = auto()
273
+ NOMIC_BERT_MOE = auto()
274
+ JINA_BERT_V2 = auto()
275
+ BLOOM = auto()
276
+ STABLELM = auto()
277
+ QWEN = auto()
278
+ QWEN2 = auto()
279
+ QWEN2MOE = auto()
280
+ QWEN2VL = auto()
281
+ QWEN3 = auto()
282
+ QWEN3MOE = auto()
283
+ PHI2 = auto()
284
+ PHI3 = auto()
285
+ PHIMOE = auto()
286
+ PLAMO = auto()
287
+ CODESHELL = auto()
288
+ ORION = auto()
289
+ INTERNLM2 = auto()
290
+ MINICPM = auto()
291
+ MINICPM3 = auto()
292
+ GEMMA = auto()
293
+ GEMMA2 = auto()
294
+ GEMMA3 = auto()
295
+ STARCODER2 = auto()
296
+ RWKV6 = auto()
297
+ RWKV6QWEN2 = auto()
298
+ RWKV7 = auto()
299
+ ARWKV7 = auto()
300
+ MAMBA = auto()
301
+ XVERSE = auto()
302
+ COMMAND_R = auto()
303
+ COHERE2 = auto()
304
+ DBRX = auto()
305
+ OLMO = auto()
306
+ OLMO2 = auto()
307
+ OLMOE = auto()
308
+ OPENELM = auto()
309
+ ARCTIC = auto()
310
+ DEEPSEEK = auto()
311
+ DEEPSEEK2 = auto()
312
+ CHATGLM = auto()
313
+ GLM4 = auto()
314
+ BITNET = auto()
315
+ T5 = auto()
316
+ T5ENCODER = auto()
317
+ JAIS = auto()
318
+ NEMOTRON = auto()
319
+ EXAONE = auto()
320
+ GRANITE = auto()
321
+ GRANITE_MOE = auto()
322
+ CHAMELEON = auto()
323
+ WAVTOKENIZER_DEC = auto()
324
+ PLM = auto()
325
+ BAILINGMOE = auto()
326
+
327
+
328
+ class VISION_PROJECTOR_TYPE(IntEnum):
329
+ MLP = auto()
330
+ LDP = auto()
331
+ LDPV2 = auto()
332
+ RESAMPLER = auto()
333
+ GLM_EDGE = auto()
334
+ MERGER = auto()
335
+ GEMMA3 = auto()
336
+
337
+
338
+ class MODEL_TENSOR(IntEnum):
339
+ TOKEN_EMBD = auto()
340
+ TOKEN_EMBD_NORM = auto()
341
+ TOKEN_TYPES = auto()
342
+ POS_EMBD = auto()
343
+ OUTPUT = auto()
344
+ OUTPUT_NORM = auto()
345
+ ROPE_FREQS = auto()
346
+ ROPE_FACTORS_LONG = auto()
347
+ ROPE_FACTORS_SHORT = auto()
348
+ ATTN_Q = auto()
349
+ ATTN_K = auto()
350
+ ATTN_V = auto()
351
+ ATTN_QKV = auto()
352
+ ATTN_OUT = auto()
353
+ ATTN_NORM = auto()
354
+ ATTN_NORM_2 = auto()
355
+ ATTN_OUT_NORM = auto()
356
+ ATTN_POST_NORM = auto()
357
+ ATTN_ROT_EMBD = auto()
358
+ FFN_GATE_INP = auto()
359
+ FFN_GATE_INP_SHEXP = auto()
360
+ FFN_NORM = auto()
361
+ FFN_PRE_NORM = auto()
362
+ FFN_POST_NORM = auto()
363
+ FFN_GATE = auto()
364
+ FFN_DOWN = auto()
365
+ FFN_UP = auto()
366
+ FFN_ACT = auto()
367
+ FFN_NORM_EXP = auto()
368
+ FFN_GATE_EXP = auto()
369
+ FFN_DOWN_EXP = auto()
370
+ FFN_UP_EXP = auto()
371
+ FFN_GATE_SHEXP = auto()
372
+ FFN_DOWN_SHEXP = auto()
373
+ FFN_UP_SHEXP = auto()
374
+ FFN_EXP_PROBS_B = auto()
375
+ ATTN_Q_NORM = auto()
376
+ ATTN_K_NORM = auto()
377
+ LAYER_OUT_NORM = auto()
378
+ SSM_IN = auto()
379
+ SSM_CONV1D = auto()
380
+ SSM_X = auto()
381
+ SSM_DT = auto()
382
+ SSM_A = auto()
383
+ SSM_D = auto()
384
+ SSM_OUT = auto()
385
+ TIME_MIX_W0 = auto()
386
+ TIME_MIX_W1 = auto()
387
+ TIME_MIX_W2 = auto()
388
+ TIME_MIX_A0 = auto()
389
+ TIME_MIX_A1 = auto()
390
+ TIME_MIX_A2 = auto()
391
+ TIME_MIX_V0 = auto()
392
+ TIME_MIX_V1 = auto()
393
+ TIME_MIX_V2 = auto()
394
+ TIME_MIX_G1 = auto()
395
+ TIME_MIX_G2 = auto()
396
+ TIME_MIX_K_K = auto()
397
+ TIME_MIX_K_A = auto()
398
+ TIME_MIX_R_K = auto()
399
+ TIME_MIX_LERP_X = auto()
400
+ TIME_MIX_LERP_K = auto()
401
+ TIME_MIX_LERP_V = auto()
402
+ TIME_MIX_LERP_R = auto()
403
+ TIME_MIX_LERP_G = auto()
404
+ TIME_MIX_LERP_FUSED = auto()
405
+ TIME_MIX_LERP_W = auto()
406
+ TIME_MIX_FIRST = auto()
407
+ TIME_MIX_DECAY = auto()
408
+ TIME_MIX_DECAY_W1 = auto()
409
+ TIME_MIX_DECAY_W2 = auto()
410
+ TIME_MIX_KEY = auto()
411
+ TIME_MIX_VALUE = auto()
412
+ TIME_MIX_RECEPTANCE = auto()
413
+ TIME_MIX_GATE = auto()
414
+ TIME_MIX_LN = auto()
415
+ TIME_MIX_OUTPUT = auto()
416
+ CHANNEL_MIX_LERP_K = auto()
417
+ CHANNEL_MIX_LERP_R = auto()
418
+ CHANNEL_MIX_KEY = auto()
419
+ CHANNEL_MIX_RECEPTANCE = auto()
420
+ CHANNEL_MIX_VALUE = auto()
421
+ ATTN_Q_A = auto()
422
+ ATTN_Q_B = auto()
423
+ ATTN_KV_A_MQA = auto()
424
+ ATTN_KV_B = auto()
425
+ ATTN_K_B = auto()
426
+ ATTN_V_B = auto()
427
+ ATTN_Q_A_NORM = auto()
428
+ ATTN_KV_A_NORM = auto()
429
+ FFN_SUB_NORM = auto()
430
+ ATTN_SUB_NORM = auto()
431
+ DEC_ATTN_NORM = auto()
432
+ DEC_ATTN_Q = auto()
433
+ DEC_ATTN_K = auto()
434
+ DEC_ATTN_V = auto()
435
+ DEC_ATTN_OUT = auto()
436
+ DEC_ATTN_REL_B = auto()
437
+ DEC_CROSS_ATTN_NORM = auto()
438
+ DEC_CROSS_ATTN_Q = auto()
439
+ DEC_CROSS_ATTN_K = auto()
440
+ DEC_CROSS_ATTN_V = auto()
441
+ DEC_CROSS_ATTN_OUT = auto()
442
+ DEC_CROSS_ATTN_REL_B = auto()
443
+ DEC_FFN_NORM = auto()
444
+ DEC_FFN_GATE = auto()
445
+ DEC_FFN_DOWN = auto()
446
+ DEC_FFN_UP = auto()
447
+ DEC_OUTPUT_NORM = auto()
448
+ ENC_ATTN_NORM = auto()
449
+ ENC_ATTN_Q = auto()
450
+ ENC_ATTN_K = auto()
451
+ ENC_ATTN_V = auto()
452
+ ENC_ATTN_OUT = auto()
453
+ ENC_ATTN_REL_B = auto()
454
+ ENC_FFN_NORM = auto()
455
+ ENC_FFN_GATE = auto()
456
+ ENC_FFN_DOWN = auto()
457
+ ENC_FFN_UP = auto()
458
+ ENC_OUTPUT_NORM = auto()
459
+ CLS = auto() # classifier
460
+ CLS_OUT = auto() # classifier output projection
461
+ CONV1D = auto()
462
+ CONVNEXT_DW = auto()
463
+ CONVNEXT_NORM = auto()
464
+ CONVNEXT_PW1 = auto()
465
+ CONVNEXT_PW2 = auto()
466
+ CONVNEXT_GAMMA = auto()
467
+ POSNET_CONV1 = auto()
468
+ POSNET_CONV2 = auto()
469
+ POSNET_NORM = auto()
470
+ POSNET_NORM1 = auto()
471
+ POSNET_NORM2 = auto()
472
+ POSNET_ATTN_NORM = auto()
473
+ POSNET_ATTN_Q = auto()
474
+ POSNET_ATTN_K = auto()
475
+ POSNET_ATTN_V = auto()
476
+ POSNET_ATTN_OUT = auto()
477
+ # vision
478
+ V_MMPROJ = auto()
479
+ V_MMPROJ_FC = auto()
480
+ V_MMPROJ_MLP = auto()
481
+ V_MMPROJ_PEG = auto()
482
+ V_ENC_EMBD_CLS = auto()
483
+ V_ENC_EMBD_PATCH = auto()
484
+ V_ENC_EMBD_POS = auto()
485
+ V_ENC_ATTN_Q = auto()
486
+ V_ENC_ATTN_Q_NORM = auto()
487
+ V_ENC_ATTN_K = auto()
488
+ V_ENC_ATTN_K_NORM = auto()
489
+ V_ENC_ATTN_V = auto()
490
+ V_ENC_INPUT_NORM = auto()
491
+ V_ENC_OUTPUT = auto()
492
+ V_ENC_OUTPUT_NORM = auto()
493
+ V_ENC_FFN_UP = auto()
494
+ V_ENC_FFN_GATE = auto()
495
+ V_ENC_FFN_DOWN = auto()
496
+ V_LAYER_SCALE_1 = auto()
497
+ V_LAYER_SCALE_2 = auto()
498
+ V_PRE_NORM = auto()
499
+ V_POST_NORM = auto()
500
+ V_MM_INP_NORM = auto()
501
+ V_MM_INP_PROJ = auto() # gemma3
502
+ V_MM_SOFT_EMB_NORM = auto() # gemma3
503
+ V_RESMPL_POS_EMBD_K = auto() # minicpmv
504
+ V_RESMPL_ATTN_Q = auto() # minicpmv
505
+ V_RESMPL_ATTN_K = auto() # minicpmv
506
+ V_RESMPL_ATTN_V = auto() # minicpmv
507
+ V_RESMPL_ATTN_OUT = auto() # minicpmv
508
+ V_RESMPL_KV = auto() # minicpmv
509
+ V_RESMPL_KV_NORM = auto() # minicpmv
510
+ V_RESMPL_POST_NORM = auto() # minicpmv
511
+ V_RESMPL_Q_NORM = auto() # minicpmv
512
+ V_RESMPL_PROJ = auto() # minicpmv
513
+ V_RESMPL_QUERY = auto() # minicpmv
514
+ V_TOK_EMBD_IMG_BREAK = auto() # pixtral
515
+ V_MM_PATCH_MERGER = auto() # mistral small 3.1
516
+
517
+
518
+ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
519
+ MODEL_ARCH.CLIP_VISION: "clip", # dummy arch for clip.cpp
520
+ MODEL_ARCH.LLAMA: "llama",
521
+ MODEL_ARCH.LLAMA4: "llama4",
522
+ MODEL_ARCH.DECI: "deci",
523
+ MODEL_ARCH.FALCON: "falcon",
524
+ MODEL_ARCH.BAICHUAN: "baichuan",
525
+ MODEL_ARCH.GROK: "grok",
526
+ MODEL_ARCH.GPT2: "gpt2",
527
+ MODEL_ARCH.GPTJ: "gptj",
528
+ MODEL_ARCH.GPTNEOX: "gptneox",
529
+ MODEL_ARCH.MPT: "mpt",
530
+ MODEL_ARCH.STARCODER: "starcoder",
531
+ MODEL_ARCH.REFACT: "refact",
532
+ MODEL_ARCH.BERT: "bert",
533
+ MODEL_ARCH.NOMIC_BERT: "nomic-bert",
534
+ MODEL_ARCH.NOMIC_BERT_MOE: "nomic-bert-moe",
535
+ MODEL_ARCH.JINA_BERT_V2: "jina-bert-v2",
536
+ MODEL_ARCH.BLOOM: "bloom",
537
+ MODEL_ARCH.STABLELM: "stablelm",
538
+ MODEL_ARCH.QWEN: "qwen",
539
+ MODEL_ARCH.QWEN2: "qwen2",
540
+ MODEL_ARCH.QWEN2MOE: "qwen2moe",
541
+ MODEL_ARCH.QWEN2VL: "qwen2vl",
542
+ MODEL_ARCH.QWEN3: "qwen3",
543
+ MODEL_ARCH.QWEN3MOE: "qwen3moe",
544
+ MODEL_ARCH.PHI2: "phi2",
545
+ MODEL_ARCH.PHI3: "phi3",
546
+ MODEL_ARCH.PHIMOE: "phimoe",
547
+ MODEL_ARCH.PLAMO: "plamo",
548
+ MODEL_ARCH.CODESHELL: "codeshell",
549
+ MODEL_ARCH.ORION: "orion",
550
+ MODEL_ARCH.INTERNLM2: "internlm2",
551
+ MODEL_ARCH.MINICPM: "minicpm",
552
+ MODEL_ARCH.MINICPM3: "minicpm3",
553
+ MODEL_ARCH.GEMMA: "gemma",
554
+ MODEL_ARCH.GEMMA2: "gemma2",
555
+ MODEL_ARCH.GEMMA3: "gemma3",
556
+ MODEL_ARCH.STARCODER2: "starcoder2",
557
+ MODEL_ARCH.RWKV6: "rwkv6",
558
+ MODEL_ARCH.RWKV6QWEN2: "rwkv6qwen2",
559
+ MODEL_ARCH.RWKV7: "rwkv7",
560
+ MODEL_ARCH.ARWKV7: "arwkv7",
561
+ MODEL_ARCH.MAMBA: "mamba",
562
+ MODEL_ARCH.XVERSE: "xverse",
563
+ MODEL_ARCH.COMMAND_R: "command-r",
564
+ MODEL_ARCH.COHERE2: "cohere2",
565
+ MODEL_ARCH.DBRX: "dbrx",
566
+ MODEL_ARCH.OLMO: "olmo",
567
+ MODEL_ARCH.OLMO2: "olmo2",
568
+ MODEL_ARCH.OLMOE: "olmoe",
569
+ MODEL_ARCH.OPENELM: "openelm",
570
+ MODEL_ARCH.ARCTIC: "arctic",
571
+ MODEL_ARCH.DEEPSEEK: "deepseek",
572
+ MODEL_ARCH.DEEPSEEK2: "deepseek2",
573
+ MODEL_ARCH.CHATGLM: "chatglm",
574
+ MODEL_ARCH.GLM4: "glm4",
575
+ MODEL_ARCH.BITNET: "bitnet",
576
+ MODEL_ARCH.T5: "t5",
577
+ MODEL_ARCH.T5ENCODER: "t5encoder",
578
+ MODEL_ARCH.JAIS: "jais",
579
+ MODEL_ARCH.NEMOTRON: "nemotron",
580
+ MODEL_ARCH.EXAONE: "exaone",
581
+ MODEL_ARCH.GRANITE: "granite",
582
+ MODEL_ARCH.GRANITE_MOE: "granitemoe",
583
+ MODEL_ARCH.CHAMELEON: "chameleon",
584
+ MODEL_ARCH.WAVTOKENIZER_DEC: "wavtokenizer-dec",
585
+ MODEL_ARCH.PLM: "plm",
586
+ MODEL_ARCH.BAILINGMOE: "bailingmoe",
587
+ }
588
+
589
+ VISION_PROJECTOR_TYPE_NAMES: dict[VISION_PROJECTOR_TYPE, str] = {
590
+ VISION_PROJECTOR_TYPE.MLP: "mlp",
591
+ VISION_PROJECTOR_TYPE.LDP: "ldp",
592
+ VISION_PROJECTOR_TYPE.LDPV2: "ldpv2",
593
+ VISION_PROJECTOR_TYPE.RESAMPLER: "resampler",
594
+ VISION_PROJECTOR_TYPE.GLM_EDGE: "adapter",
595
+ VISION_PROJECTOR_TYPE.MERGER: "qwen2vl_merger",
596
+ VISION_PROJECTOR_TYPE.GEMMA3: "gemma3",
597
+ }
598
+
599
+ TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
600
+ MODEL_TENSOR.TOKEN_EMBD: "token_embd",
601
+ MODEL_TENSOR.TOKEN_EMBD_NORM: "token_embd_norm",
602
+ MODEL_TENSOR.TOKEN_TYPES: "token_types",
603
+ MODEL_TENSOR.POS_EMBD: "position_embd",
604
+ MODEL_TENSOR.OUTPUT_NORM: "output_norm",
605
+ MODEL_TENSOR.OUTPUT: "output",
606
+ MODEL_TENSOR.ROPE_FREQS: "rope_freqs",
607
+ MODEL_TENSOR.ROPE_FACTORS_LONG: "rope_factors_long",
608
+ MODEL_TENSOR.ROPE_FACTORS_SHORT: "rope_factors_short",
609
+ MODEL_TENSOR.ATTN_NORM: "blk.{bid}.attn_norm",
610
+ MODEL_TENSOR.ATTN_NORM_2: "blk.{bid}.attn_norm_2",
611
+ MODEL_TENSOR.ATTN_QKV: "blk.{bid}.attn_qkv",
612
+ MODEL_TENSOR.ATTN_Q: "blk.{bid}.attn_q",
613
+ MODEL_TENSOR.ATTN_K: "blk.{bid}.attn_k",
614
+ MODEL_TENSOR.ATTN_V: "blk.{bid}.attn_v",
615
+ MODEL_TENSOR.ATTN_OUT: "blk.{bid}.attn_output",
616
+ MODEL_TENSOR.ATTN_ROT_EMBD: "blk.{bid}.attn_rot_embd",
617
+ MODEL_TENSOR.ATTN_Q_NORM: "blk.{bid}.attn_q_norm",
618
+ MODEL_TENSOR.ATTN_K_NORM: "blk.{bid}.attn_k_norm",
619
+ MODEL_TENSOR.ATTN_OUT_NORM: "blk.{bid}.attn_output_norm",
620
+ MODEL_TENSOR.ATTN_POST_NORM: "blk.{bid}.post_attention_norm",
621
+ MODEL_TENSOR.FFN_GATE_INP: "blk.{bid}.ffn_gate_inp",
622
+ MODEL_TENSOR.FFN_GATE_INP_SHEXP: "blk.{bid}.ffn_gate_inp_shexp",
623
+ MODEL_TENSOR.FFN_NORM: "blk.{bid}.ffn_norm",
624
+ MODEL_TENSOR.FFN_PRE_NORM: "blk.{bid}.ffn_norm",
625
+ MODEL_TENSOR.FFN_POST_NORM: "blk.{bid}.post_ffw_norm",
626
+ MODEL_TENSOR.FFN_GATE: "blk.{bid}.ffn_gate",
627
+ MODEL_TENSOR.FFN_DOWN: "blk.{bid}.ffn_down",
628
+ MODEL_TENSOR.FFN_UP: "blk.{bid}.ffn_up",
629
+ MODEL_TENSOR.FFN_GATE_SHEXP: "blk.{bid}.ffn_gate_shexp",
630
+ MODEL_TENSOR.FFN_DOWN_SHEXP: "blk.{bid}.ffn_down_shexp",
631
+ MODEL_TENSOR.FFN_UP_SHEXP: "blk.{bid}.ffn_up_shexp",
632
+ MODEL_TENSOR.FFN_ACT: "blk.{bid}.ffn",
633
+ MODEL_TENSOR.FFN_NORM_EXP: "blk.{bid}.ffn_norm_exps",
634
+ MODEL_TENSOR.FFN_GATE_EXP: "blk.{bid}.ffn_gate_exps",
635
+ MODEL_TENSOR.FFN_DOWN_EXP: "blk.{bid}.ffn_down_exps",
636
+ MODEL_TENSOR.FFN_UP_EXP: "blk.{bid}.ffn_up_exps",
637
+ MODEL_TENSOR.FFN_EXP_PROBS_B: "blk.{bid}.exp_probs_b",
638
+ MODEL_TENSOR.LAYER_OUT_NORM: "blk.{bid}.layer_output_norm",
639
+ MODEL_TENSOR.SSM_IN: "blk.{bid}.ssm_in",
640
+ MODEL_TENSOR.SSM_CONV1D: "blk.{bid}.ssm_conv1d",
641
+ MODEL_TENSOR.SSM_X: "blk.{bid}.ssm_x",
642
+ MODEL_TENSOR.SSM_DT: "blk.{bid}.ssm_dt",
643
+ MODEL_TENSOR.SSM_A: "blk.{bid}.ssm_a",
644
+ MODEL_TENSOR.SSM_D: "blk.{bid}.ssm_d",
645
+ MODEL_TENSOR.SSM_OUT: "blk.{bid}.ssm_out",
646
+ MODEL_TENSOR.TIME_MIX_W0: "blk.{bid}.time_mix_w0",
647
+ MODEL_TENSOR.TIME_MIX_W1: "blk.{bid}.time_mix_w1",
648
+ MODEL_TENSOR.TIME_MIX_W2: "blk.{bid}.time_mix_w2",
649
+ MODEL_TENSOR.TIME_MIX_A0: "blk.{bid}.time_mix_a0",
650
+ MODEL_TENSOR.TIME_MIX_A1: "blk.{bid}.time_mix_a1",
651
+ MODEL_TENSOR.TIME_MIX_A2: "blk.{bid}.time_mix_a2",
652
+ MODEL_TENSOR.TIME_MIX_V0: "blk.{bid}.time_mix_v0",
653
+ MODEL_TENSOR.TIME_MIX_V1: "blk.{bid}.time_mix_v1",
654
+ MODEL_TENSOR.TIME_MIX_V2: "blk.{bid}.time_mix_v2",
655
+ MODEL_TENSOR.TIME_MIX_G1: "blk.{bid}.time_mix_g1",
656
+ MODEL_TENSOR.TIME_MIX_G2: "blk.{bid}.time_mix_g2",
657
+ MODEL_TENSOR.TIME_MIX_K_K: "blk.{bid}.time_mix_k_k",
658
+ MODEL_TENSOR.TIME_MIX_K_A: "blk.{bid}.time_mix_k_a",
659
+ MODEL_TENSOR.TIME_MIX_R_K: "blk.{bid}.time_mix_r_k",
660
+ MODEL_TENSOR.TIME_MIX_LERP_X: "blk.{bid}.time_mix_lerp_x",
661
+ MODEL_TENSOR.TIME_MIX_LERP_K: "blk.{bid}.time_mix_lerp_k",
662
+ MODEL_TENSOR.TIME_MIX_LERP_V: "blk.{bid}.time_mix_lerp_v",
663
+ MODEL_TENSOR.TIME_MIX_LERP_R: "blk.{bid}.time_mix_lerp_r",
664
+ MODEL_TENSOR.TIME_MIX_LERP_G: "blk.{bid}.time_mix_lerp_g",
665
+ MODEL_TENSOR.TIME_MIX_LERP_FUSED: "blk.{bid}.time_mix_lerp_fused",
666
+ MODEL_TENSOR.TIME_MIX_LERP_W: "blk.{bid}.time_mix_lerp_w",
667
+ MODEL_TENSOR.TIME_MIX_FIRST: "blk.{bid}.time_mix_first",
668
+ MODEL_TENSOR.TIME_MIX_DECAY: "blk.{bid}.time_mix_decay",
669
+ MODEL_TENSOR.TIME_MIX_DECAY_W1: "blk.{bid}.time_mix_decay_w1",
670
+ MODEL_TENSOR.TIME_MIX_DECAY_W2: "blk.{bid}.time_mix_decay_w2",
671
+ MODEL_TENSOR.TIME_MIX_KEY: "blk.{bid}.time_mix_key",
672
+ MODEL_TENSOR.TIME_MIX_VALUE: "blk.{bid}.time_mix_value",
673
+ MODEL_TENSOR.TIME_MIX_RECEPTANCE: "blk.{bid}.time_mix_receptance",
674
+ MODEL_TENSOR.TIME_MIX_GATE: "blk.{bid}.time_mix_gate",
675
+ MODEL_TENSOR.TIME_MIX_LN: "blk.{bid}.time_mix_ln",
676
+ MODEL_TENSOR.TIME_MIX_OUTPUT: "blk.{bid}.time_mix_output",
677
+ MODEL_TENSOR.CHANNEL_MIX_LERP_K: "blk.{bid}.channel_mix_lerp_k",
678
+ MODEL_TENSOR.CHANNEL_MIX_LERP_R: "blk.{bid}.channel_mix_lerp_r",
679
+ MODEL_TENSOR.CHANNEL_MIX_KEY: "blk.{bid}.channel_mix_key",
680
+ MODEL_TENSOR.CHANNEL_MIX_RECEPTANCE: "blk.{bid}.channel_mix_receptance",
681
+ MODEL_TENSOR.CHANNEL_MIX_VALUE: "blk.{bid}.channel_mix_value",
682
+ MODEL_TENSOR.ATTN_Q_A: "blk.{bid}.attn_q_a",
683
+ MODEL_TENSOR.ATTN_Q_B: "blk.{bid}.attn_q_b",
684
+ MODEL_TENSOR.ATTN_KV_A_MQA: "blk.{bid}.attn_kv_a_mqa",
685
+ MODEL_TENSOR.ATTN_KV_B: "blk.{bid}.attn_kv_b",
686
+ MODEL_TENSOR.ATTN_K_B: "blk.{bid}.attn_k_b",
687
+ MODEL_TENSOR.ATTN_V_B: "blk.{bid}.attn_v_b",
688
+ MODEL_TENSOR.ATTN_Q_A_NORM: "blk.{bid}.attn_q_a_norm",
689
+ MODEL_TENSOR.ATTN_KV_A_NORM: "blk.{bid}.attn_kv_a_norm",
690
+ MODEL_TENSOR.ATTN_SUB_NORM: "blk.{bid}.attn_sub_norm",
691
+ MODEL_TENSOR.FFN_SUB_NORM: "blk.{bid}.ffn_sub_norm",
692
+ MODEL_TENSOR.DEC_ATTN_NORM: "dec.blk.{bid}.attn_norm",
693
+ MODEL_TENSOR.DEC_ATTN_Q: "dec.blk.{bid}.attn_q",
694
+ MODEL_TENSOR.DEC_ATTN_K: "dec.blk.{bid}.attn_k",
695
+ MODEL_TENSOR.DEC_ATTN_V: "dec.blk.{bid}.attn_v",
696
+ MODEL_TENSOR.DEC_ATTN_OUT: "dec.blk.{bid}.attn_o",
697
+ MODEL_TENSOR.DEC_ATTN_REL_B: "dec.blk.{bid}.attn_rel_b",
698
+ MODEL_TENSOR.DEC_CROSS_ATTN_NORM: "dec.blk.{bid}.cross_attn_norm",
699
+ MODEL_TENSOR.DEC_CROSS_ATTN_Q: "dec.blk.{bid}.cross_attn_q",
700
+ MODEL_TENSOR.DEC_CROSS_ATTN_K: "dec.blk.{bid}.cross_attn_k",
701
+ MODEL_TENSOR.DEC_CROSS_ATTN_V: "dec.blk.{bid}.cross_attn_v",
702
+ MODEL_TENSOR.DEC_CROSS_ATTN_OUT: "dec.blk.{bid}.cross_attn_o",
703
+ MODEL_TENSOR.DEC_CROSS_ATTN_REL_B: "dec.blk.{bid}.cross_attn_rel_b",
704
+ MODEL_TENSOR.DEC_FFN_NORM: "dec.blk.{bid}.ffn_norm",
705
+ MODEL_TENSOR.DEC_FFN_GATE: "dec.blk.{bid}.ffn_gate",
706
+ MODEL_TENSOR.DEC_FFN_DOWN: "dec.blk.{bid}.ffn_down",
707
+ MODEL_TENSOR.DEC_FFN_UP: "dec.blk.{bid}.ffn_up",
708
+ MODEL_TENSOR.DEC_OUTPUT_NORM: "dec.output_norm",
709
+ MODEL_TENSOR.ENC_ATTN_NORM: "enc.blk.{bid}.attn_norm",
710
+ MODEL_TENSOR.ENC_ATTN_Q: "enc.blk.{bid}.attn_q",
711
+ MODEL_TENSOR.ENC_ATTN_K: "enc.blk.{bid}.attn_k",
712
+ MODEL_TENSOR.ENC_ATTN_V: "enc.blk.{bid}.attn_v",
713
+ MODEL_TENSOR.ENC_ATTN_OUT: "enc.blk.{bid}.attn_o",
714
+ MODEL_TENSOR.ENC_ATTN_REL_B: "enc.blk.{bid}.attn_rel_b",
715
+ MODEL_TENSOR.ENC_FFN_NORM: "enc.blk.{bid}.ffn_norm",
716
+ MODEL_TENSOR.ENC_FFN_GATE: "enc.blk.{bid}.ffn_gate",
717
+ MODEL_TENSOR.ENC_FFN_DOWN: "enc.blk.{bid}.ffn_down",
718
+ MODEL_TENSOR.ENC_FFN_UP: "enc.blk.{bid}.ffn_up",
719
+ MODEL_TENSOR.ENC_OUTPUT_NORM: "enc.output_norm",
720
+ MODEL_TENSOR.CLS: "cls",
721
+ MODEL_TENSOR.CLS_OUT: "cls.output",
722
+ MODEL_TENSOR.CONV1D: "conv1d",
723
+ MODEL_TENSOR.CONVNEXT_DW: "convnext.{bid}.dw",
724
+ MODEL_TENSOR.CONVNEXT_NORM: "convnext.{bid}.norm",
725
+ MODEL_TENSOR.CONVNEXT_PW1: "convnext.{bid}.pw1",
726
+ MODEL_TENSOR.CONVNEXT_PW2: "convnext.{bid}.pw2",
727
+ MODEL_TENSOR.CONVNEXT_GAMMA: "convnext.{bid}.gamma",
728
+ MODEL_TENSOR.POSNET_CONV1: "posnet.{bid}.conv1",
729
+ MODEL_TENSOR.POSNET_CONV2: "posnet.{bid}.conv2",
730
+ MODEL_TENSOR.POSNET_NORM: "posnet.{bid}.norm",
731
+ MODEL_TENSOR.POSNET_NORM1: "posnet.{bid}.norm1",
732
+ MODEL_TENSOR.POSNET_NORM2: "posnet.{bid}.norm2",
733
+ MODEL_TENSOR.POSNET_ATTN_NORM: "posnet.{bid}.attn_norm",
734
+ MODEL_TENSOR.POSNET_ATTN_Q: "posnet.{bid}.attn_q",
735
+ MODEL_TENSOR.POSNET_ATTN_K: "posnet.{bid}.attn_k",
736
+ MODEL_TENSOR.POSNET_ATTN_V: "posnet.{bid}.attn_v",
737
+ MODEL_TENSOR.POSNET_ATTN_OUT: "posnet.{bid}.attn_output",
738
+ # vision
739
+ MODEL_TENSOR.V_MMPROJ: "mm.{bid}",
740
+ MODEL_TENSOR.V_MMPROJ_FC: "mm.model.fc",
741
+ MODEL_TENSOR.V_MMPROJ_MLP: "mm.model.mlp.{bid}",
742
+ MODEL_TENSOR.V_MMPROJ_PEG: "mm.model.peg.{bid}",
743
+ MODEL_TENSOR.V_ENC_EMBD_CLS: "v.class_embd",
744
+ MODEL_TENSOR.V_ENC_EMBD_PATCH: "v.patch_embd",
745
+ MODEL_TENSOR.V_ENC_EMBD_POS: "v.position_embd",
746
+ MODEL_TENSOR.V_ENC_ATTN_Q: "v.blk.{bid}.attn_q",
747
+ MODEL_TENSOR.V_ENC_ATTN_Q_NORM: "v.blk.{bid}.attn_q_norm",
748
+ MODEL_TENSOR.V_ENC_ATTN_K: "v.blk.{bid}.attn_k",
749
+ MODEL_TENSOR.V_ENC_ATTN_K_NORM: "v.blk.{bid}.attn_k_norm",
750
+ MODEL_TENSOR.V_ENC_ATTN_V: "v.blk.{bid}.attn_v",
751
+ MODEL_TENSOR.V_ENC_INPUT_NORM: "v.blk.{bid}.ln1",
752
+ MODEL_TENSOR.V_ENC_OUTPUT: "v.blk.{bid}.attn_out",
753
+ MODEL_TENSOR.V_ENC_OUTPUT_NORM: "v.blk.{bid}.ln2",
754
+ MODEL_TENSOR.V_ENC_FFN_UP: "v.blk.{bid}.ffn_up",
755
+ MODEL_TENSOR.V_ENC_FFN_GATE: "v.blk.{bid}.ffn_gate",
756
+ MODEL_TENSOR.V_ENC_FFN_DOWN: "v.blk.{bid}.ffn_down",
757
+ MODEL_TENSOR.V_LAYER_SCALE_1: "v.blk.{bid}.ls1",
758
+ MODEL_TENSOR.V_LAYER_SCALE_2: "v.blk.{bid}.ls2",
759
+ MODEL_TENSOR.V_PRE_NORM: "v.pre_ln",
760
+ MODEL_TENSOR.V_POST_NORM: "v.post_ln",
761
+ MODEL_TENSOR.V_MM_INP_PROJ: "mm.input_projection",
762
+ MODEL_TENSOR.V_MM_INP_NORM: "mm.input_norm",
763
+ MODEL_TENSOR.V_MM_SOFT_EMB_NORM: "mm.soft_emb_norm",
764
+ MODEL_TENSOR.V_RESMPL_POS_EMBD_K: "resampler.pos_embd_k",
765
+ MODEL_TENSOR.V_RESMPL_ATTN_Q: "resampler.attn.q",
766
+ MODEL_TENSOR.V_RESMPL_ATTN_K: "resampler.attn.k",
767
+ MODEL_TENSOR.V_RESMPL_ATTN_V: "resampler.attn.v",
768
+ MODEL_TENSOR.V_RESMPL_ATTN_OUT: "resampler.attn.out",
769
+ MODEL_TENSOR.V_RESMPL_KV: "resampler.kv",
770
+ MODEL_TENSOR.V_RESMPL_KV_NORM: "resampler.ln_kv",
771
+ MODEL_TENSOR.V_RESMPL_POST_NORM: "resampler.ln_post",
772
+ MODEL_TENSOR.V_RESMPL_Q_NORM: "resampler.ln_q",
773
+ MODEL_TENSOR.V_RESMPL_PROJ: "resampler.proj",
774
+ MODEL_TENSOR.V_RESMPL_QUERY: "resampler.query",
775
+ MODEL_TENSOR.V_TOK_EMBD_IMG_BREAK: "v.token_embd.img_break", # pixtral
776
+ MODEL_TENSOR.V_MM_PATCH_MERGER: "mm.patch_merger", # mistral small 3.1
777
+ }
778
+
779
+ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
780
+ MODEL_ARCH.CLIP_VISION: [
781
+ MODEL_TENSOR.V_MMPROJ,
782
+ MODEL_TENSOR.V_MMPROJ_FC,
783
+ MODEL_TENSOR.V_MMPROJ_MLP,
784
+ MODEL_TENSOR.V_MMPROJ_PEG,
785
+ MODEL_TENSOR.V_ENC_EMBD_CLS,
786
+ MODEL_TENSOR.V_ENC_EMBD_PATCH,
787
+ MODEL_TENSOR.V_ENC_EMBD_POS,
788
+ MODEL_TENSOR.V_ENC_ATTN_Q,
789
+ MODEL_TENSOR.V_ENC_ATTN_Q_NORM,
790
+ MODEL_TENSOR.V_ENC_ATTN_K,
791
+ MODEL_TENSOR.V_ENC_ATTN_K_NORM,
792
+ MODEL_TENSOR.V_ENC_ATTN_V,
793
+ MODEL_TENSOR.V_ENC_INPUT_NORM,
794
+ MODEL_TENSOR.V_ENC_OUTPUT,
795
+ MODEL_TENSOR.V_ENC_OUTPUT_NORM,
796
+ MODEL_TENSOR.V_ENC_FFN_UP,
797
+ MODEL_TENSOR.V_ENC_FFN_GATE,
798
+ MODEL_TENSOR.V_ENC_FFN_DOWN,
799
+ MODEL_TENSOR.V_LAYER_SCALE_1,
800
+ MODEL_TENSOR.V_LAYER_SCALE_2,
801
+ MODEL_TENSOR.V_PRE_NORM,
802
+ MODEL_TENSOR.V_POST_NORM,
803
+ MODEL_TENSOR.V_MM_INP_PROJ,
804
+ MODEL_TENSOR.V_MM_INP_NORM,
805
+ MODEL_TENSOR.V_MM_SOFT_EMB_NORM,
806
+ MODEL_TENSOR.V_RESMPL_POS_EMBD_K,
807
+ MODEL_TENSOR.V_RESMPL_ATTN_Q,
808
+ MODEL_TENSOR.V_RESMPL_ATTN_K,
809
+ MODEL_TENSOR.V_RESMPL_ATTN_V,
810
+ MODEL_TENSOR.V_RESMPL_ATTN_OUT,
811
+ MODEL_TENSOR.V_RESMPL_KV,
812
+ MODEL_TENSOR.V_RESMPL_KV_NORM,
813
+ MODEL_TENSOR.V_RESMPL_POST_NORM,
814
+ MODEL_TENSOR.V_RESMPL_Q_NORM,
815
+ MODEL_TENSOR.V_RESMPL_PROJ,
816
+ MODEL_TENSOR.V_RESMPL_QUERY,
817
+ MODEL_TENSOR.V_TOK_EMBD_IMG_BREAK,
818
+ MODEL_TENSOR.V_MM_PATCH_MERGER,
819
+ ],
820
+ MODEL_ARCH.LLAMA: [
821
+ MODEL_TENSOR.TOKEN_EMBD,
822
+ MODEL_TENSOR.OUTPUT_NORM,
823
+ MODEL_TENSOR.OUTPUT,
824
+ MODEL_TENSOR.ROPE_FREQS,
825
+ MODEL_TENSOR.ATTN_NORM,
826
+ MODEL_TENSOR.ATTN_Q,
827
+ MODEL_TENSOR.ATTN_K,
828
+ MODEL_TENSOR.ATTN_V,
829
+ MODEL_TENSOR.ATTN_OUT,
830
+ MODEL_TENSOR.ATTN_ROT_EMBD,
831
+ MODEL_TENSOR.FFN_GATE_INP,
832
+ MODEL_TENSOR.FFN_NORM,
833
+ MODEL_TENSOR.FFN_GATE,
834
+ MODEL_TENSOR.FFN_DOWN,
835
+ MODEL_TENSOR.FFN_UP,
836
+ MODEL_TENSOR.FFN_GATE_EXP,
837
+ MODEL_TENSOR.FFN_DOWN_EXP,
838
+ MODEL_TENSOR.FFN_UP_EXP,
839
+ ],
840
+ MODEL_ARCH.LLAMA4: [
841
+ MODEL_TENSOR.TOKEN_EMBD,
842
+ MODEL_TENSOR.OUTPUT_NORM,
843
+ MODEL_TENSOR.OUTPUT,
844
+ MODEL_TENSOR.ROPE_FREQS,
845
+ MODEL_TENSOR.ATTN_NORM,
846
+ MODEL_TENSOR.ATTN_Q,
847
+ MODEL_TENSOR.ATTN_K,
848
+ MODEL_TENSOR.ATTN_V,
849
+ MODEL_TENSOR.ATTN_OUT,
850
+ MODEL_TENSOR.ATTN_ROT_EMBD,
851
+ MODEL_TENSOR.FFN_GATE_INP,
852
+ MODEL_TENSOR.FFN_NORM,
853
+ MODEL_TENSOR.FFN_GATE,
854
+ MODEL_TENSOR.FFN_DOWN,
855
+ MODEL_TENSOR.FFN_UP,
856
+ MODEL_TENSOR.FFN_GATE_EXP,
857
+ MODEL_TENSOR.FFN_DOWN_EXP,
858
+ MODEL_TENSOR.FFN_UP_EXP,
859
+ MODEL_TENSOR.FFN_GATE_SHEXP,
860
+ MODEL_TENSOR.FFN_DOWN_SHEXP,
861
+ MODEL_TENSOR.FFN_UP_SHEXP,
862
+ ],
863
+ MODEL_ARCH.DECI: [
864
+ MODEL_TENSOR.TOKEN_EMBD,
865
+ MODEL_TENSOR.OUTPUT_NORM,
866
+ MODEL_TENSOR.OUTPUT,
867
+ MODEL_TENSOR.ROPE_FREQS,
868
+ MODEL_TENSOR.ATTN_NORM,
869
+ MODEL_TENSOR.ATTN_Q,
870
+ MODEL_TENSOR.ATTN_K,
871
+ MODEL_TENSOR.ATTN_V,
872
+ MODEL_TENSOR.ATTN_OUT,
873
+ MODEL_TENSOR.ATTN_ROT_EMBD,
874
+ MODEL_TENSOR.FFN_GATE_INP,
875
+ MODEL_TENSOR.FFN_NORM,
876
+ MODEL_TENSOR.FFN_GATE,
877
+ MODEL_TENSOR.FFN_DOWN,
878
+ MODEL_TENSOR.FFN_UP,
879
+ MODEL_TENSOR.FFN_GATE_EXP,
880
+ MODEL_TENSOR.FFN_DOWN_EXP,
881
+ MODEL_TENSOR.FFN_UP_EXP,
882
+ ],
883
+ MODEL_ARCH.GROK: [
884
+ MODEL_TENSOR.TOKEN_EMBD,
885
+ MODEL_TENSOR.OUTPUT_NORM,
886
+ MODEL_TENSOR.OUTPUT,
887
+ MODEL_TENSOR.ROPE_FREQS,
888
+ MODEL_TENSOR.ATTN_NORM,
889
+ MODEL_TENSOR.ATTN_Q,
890
+ MODEL_TENSOR.ATTN_K,
891
+ MODEL_TENSOR.ATTN_V,
892
+ MODEL_TENSOR.ATTN_OUT,
893
+ MODEL_TENSOR.ATTN_ROT_EMBD,
894
+ MODEL_TENSOR.ATTN_OUT_NORM,
895
+ MODEL_TENSOR.FFN_GATE_INP,
896
+ MODEL_TENSOR.FFN_NORM,
897
+ MODEL_TENSOR.FFN_GATE,
898
+ MODEL_TENSOR.FFN_DOWN,
899
+ MODEL_TENSOR.FFN_UP,
900
+ MODEL_TENSOR.FFN_GATE_EXP,
901
+ MODEL_TENSOR.FFN_DOWN_EXP,
902
+ MODEL_TENSOR.FFN_UP_EXP,
903
+ MODEL_TENSOR.LAYER_OUT_NORM,
904
+ ],
905
+ MODEL_ARCH.GPTNEOX: [
906
+ MODEL_TENSOR.TOKEN_EMBD,
907
+ MODEL_TENSOR.OUTPUT_NORM,
908
+ MODEL_TENSOR.OUTPUT,
909
+ MODEL_TENSOR.ATTN_NORM,
910
+ MODEL_TENSOR.ATTN_QKV,
911
+ MODEL_TENSOR.ATTN_OUT,
912
+ MODEL_TENSOR.FFN_NORM,
913
+ MODEL_TENSOR.FFN_DOWN,
914
+ MODEL_TENSOR.FFN_UP,
915
+ ],
916
+ MODEL_ARCH.FALCON: [
917
+ MODEL_TENSOR.TOKEN_EMBD,
918
+ MODEL_TENSOR.OUTPUT_NORM,
919
+ MODEL_TENSOR.OUTPUT,
920
+ MODEL_TENSOR.ATTN_NORM,
921
+ MODEL_TENSOR.ATTN_NORM_2,
922
+ MODEL_TENSOR.ATTN_QKV,
923
+ MODEL_TENSOR.ATTN_OUT,
924
+ MODEL_TENSOR.FFN_DOWN,
925
+ MODEL_TENSOR.FFN_UP,
926
+ ],
927
+ MODEL_ARCH.BAICHUAN: [
928
+ MODEL_TENSOR.TOKEN_EMBD,
929
+ MODEL_TENSOR.OUTPUT_NORM,
930
+ MODEL_TENSOR.OUTPUT,
931
+ MODEL_TENSOR.ROPE_FREQS,
932
+ MODEL_TENSOR.ATTN_NORM,
933
+ MODEL_TENSOR.ATTN_Q,
934
+ MODEL_TENSOR.ATTN_K,
935
+ MODEL_TENSOR.ATTN_V,
936
+ MODEL_TENSOR.ATTN_OUT,
937
+ MODEL_TENSOR.ATTN_ROT_EMBD,
938
+ MODEL_TENSOR.FFN_NORM,
939
+ MODEL_TENSOR.FFN_GATE,
940
+ MODEL_TENSOR.FFN_DOWN,
941
+ MODEL_TENSOR.FFN_UP,
942
+ ],
943
+ MODEL_ARCH.STARCODER: [
944
+ MODEL_TENSOR.TOKEN_EMBD,
945
+ MODEL_TENSOR.POS_EMBD,
946
+ MODEL_TENSOR.OUTPUT_NORM,
947
+ MODEL_TENSOR.OUTPUT,
948
+ MODEL_TENSOR.ATTN_NORM,
949
+ MODEL_TENSOR.ATTN_QKV,
950
+ MODEL_TENSOR.ATTN_OUT,
951
+ MODEL_TENSOR.FFN_NORM,
952
+ MODEL_TENSOR.FFN_DOWN,
953
+ MODEL_TENSOR.FFN_UP,
954
+ ],
955
+ MODEL_ARCH.BERT: [
956
+ MODEL_TENSOR.TOKEN_EMBD,
957
+ MODEL_TENSOR.TOKEN_EMBD_NORM,
958
+ MODEL_TENSOR.TOKEN_TYPES,
959
+ MODEL_TENSOR.POS_EMBD,
960
+ MODEL_TENSOR.OUTPUT_NORM,
961
+ MODEL_TENSOR.ATTN_OUT_NORM,
962
+ MODEL_TENSOR.ATTN_Q,
963
+ MODEL_TENSOR.ATTN_K,
964
+ MODEL_TENSOR.ATTN_V,
965
+ MODEL_TENSOR.ATTN_OUT,
966
+ MODEL_TENSOR.FFN_DOWN,
967
+ MODEL_TENSOR.FFN_UP,
968
+ MODEL_TENSOR.LAYER_OUT_NORM,
969
+ MODEL_TENSOR.CLS,
970
+ MODEL_TENSOR.CLS_OUT,
971
+ ],
972
+ MODEL_ARCH.NOMIC_BERT: [
973
+ MODEL_TENSOR.TOKEN_EMBD,
974
+ MODEL_TENSOR.TOKEN_EMBD_NORM,
975
+ MODEL_TENSOR.TOKEN_TYPES,
976
+ MODEL_TENSOR.POS_EMBD,
977
+ MODEL_TENSOR.OUTPUT_NORM,
978
+ MODEL_TENSOR.ATTN_OUT_NORM,
979
+ MODEL_TENSOR.ATTN_QKV,
980
+ MODEL_TENSOR.ATTN_OUT,
981
+ MODEL_TENSOR.FFN_GATE,
982
+ MODEL_TENSOR.FFN_DOWN,
983
+ MODEL_TENSOR.FFN_UP,
984
+ MODEL_TENSOR.LAYER_OUT_NORM,
985
+ ],
986
+ MODEL_ARCH.NOMIC_BERT_MOE: [
987
+ MODEL_TENSOR.TOKEN_EMBD,
988
+ MODEL_TENSOR.TOKEN_EMBD_NORM,
989
+ MODEL_TENSOR.TOKEN_TYPES,
990
+ MODEL_TENSOR.POS_EMBD,
991
+ MODEL_TENSOR.OUTPUT_NORM,
992
+ MODEL_TENSOR.ATTN_OUT_NORM,
993
+ MODEL_TENSOR.ATTN_QKV,
994
+ MODEL_TENSOR.ATTN_OUT,
995
+ MODEL_TENSOR.FFN_DOWN,
996
+ MODEL_TENSOR.FFN_UP,
997
+ MODEL_TENSOR.FFN_GATE_INP,
998
+ MODEL_TENSOR.FFN_DOWN_EXP,
999
+ MODEL_TENSOR.FFN_UP_EXP,
1000
+ MODEL_TENSOR.LAYER_OUT_NORM,
1001
+ ],
1002
+ MODEL_ARCH.JINA_BERT_V2: [
1003
+ MODEL_TENSOR.TOKEN_EMBD,
1004
+ MODEL_TENSOR.TOKEN_EMBD_NORM,
1005
+ MODEL_TENSOR.TOKEN_TYPES,
1006
+ MODEL_TENSOR.ATTN_NORM_2,
1007
+ MODEL_TENSOR.ATTN_OUT_NORM,
1008
+ MODEL_TENSOR.ATTN_Q,
1009
+ MODEL_TENSOR.ATTN_Q_NORM,
1010
+ MODEL_TENSOR.ATTN_K,
1011
+ MODEL_TENSOR.ATTN_K_NORM,
1012
+ MODEL_TENSOR.ATTN_V,
1013
+ MODEL_TENSOR.ATTN_OUT,
1014
+ MODEL_TENSOR.FFN_UP,
1015
+ MODEL_TENSOR.FFN_GATE,
1016
+ MODEL_TENSOR.FFN_DOWN,
1017
+ MODEL_TENSOR.LAYER_OUT_NORM,
1018
+ MODEL_TENSOR.CLS,
1019
+ ],
1020
+ MODEL_ARCH.MPT: [
1021
+ MODEL_TENSOR.TOKEN_EMBD,
1022
+ MODEL_TENSOR.OUTPUT_NORM,
1023
+ MODEL_TENSOR.OUTPUT,
1024
+ MODEL_TENSOR.ATTN_NORM,
1025
+ MODEL_TENSOR.ATTN_QKV,
1026
+ MODEL_TENSOR.ATTN_OUT,
1027
+ MODEL_TENSOR.FFN_NORM,
1028
+ MODEL_TENSOR.FFN_DOWN,
1029
+ MODEL_TENSOR.FFN_UP,
1030
+ MODEL_TENSOR.FFN_ACT,
1031
+ MODEL_TENSOR.ATTN_Q_NORM,
1032
+ MODEL_TENSOR.ATTN_K_NORM,
1033
+ MODEL_TENSOR.POS_EMBD,
1034
+ ],
1035
+ MODEL_ARCH.GPTJ: [
1036
+ MODEL_TENSOR.TOKEN_EMBD,
1037
+ MODEL_TENSOR.OUTPUT_NORM,
1038
+ MODEL_TENSOR.OUTPUT,
1039
+ MODEL_TENSOR.ATTN_NORM,
1040
+ MODEL_TENSOR.ATTN_Q,
1041
+ MODEL_TENSOR.ATTN_K,
1042
+ MODEL_TENSOR.ATTN_V,
1043
+ MODEL_TENSOR.ATTN_OUT,
1044
+ MODEL_TENSOR.FFN_DOWN,
1045
+ MODEL_TENSOR.FFN_UP,
1046
+ ],
1047
+ MODEL_ARCH.REFACT: [
1048
+ MODEL_TENSOR.TOKEN_EMBD,
1049
+ MODEL_TENSOR.OUTPUT_NORM,
1050
+ MODEL_TENSOR.OUTPUT,
1051
+ MODEL_TENSOR.ATTN_NORM,
1052
+ MODEL_TENSOR.ATTN_Q,
1053
+ MODEL_TENSOR.ATTN_K,
1054
+ MODEL_TENSOR.ATTN_V,
1055
+ MODEL_TENSOR.ATTN_OUT,
1056
+ MODEL_TENSOR.FFN_NORM,
1057
+ MODEL_TENSOR.FFN_GATE,
1058
+ MODEL_TENSOR.FFN_DOWN,
1059
+ MODEL_TENSOR.FFN_UP,
1060
+ ],
1061
+ MODEL_ARCH.BLOOM: [
1062
+ MODEL_TENSOR.TOKEN_EMBD,
1063
+ MODEL_TENSOR.TOKEN_EMBD_NORM,
1064
+ MODEL_TENSOR.OUTPUT_NORM,
1065
+ MODEL_TENSOR.OUTPUT,
1066
+ MODEL_TENSOR.ATTN_NORM,
1067
+ MODEL_TENSOR.ATTN_QKV,
1068
+ MODEL_TENSOR.ATTN_OUT,
1069
+ MODEL_TENSOR.FFN_NORM,
1070
+ MODEL_TENSOR.FFN_DOWN,
1071
+ MODEL_TENSOR.FFN_UP,
1072
+ ],
1073
+ MODEL_ARCH.STABLELM: [
1074
+ MODEL_TENSOR.TOKEN_EMBD,
1075
+ MODEL_TENSOR.OUTPUT_NORM,
1076
+ MODEL_TENSOR.OUTPUT,
1077
+ MODEL_TENSOR.ROPE_FREQS,
1078
+ MODEL_TENSOR.ATTN_NORM,
1079
+ MODEL_TENSOR.ATTN_Q,
1080
+ MODEL_TENSOR.ATTN_K,
1081
+ MODEL_TENSOR.ATTN_V,
1082
+ MODEL_TENSOR.ATTN_OUT,
1083
+ MODEL_TENSOR.FFN_NORM,
1084
+ MODEL_TENSOR.FFN_GATE,
1085
+ MODEL_TENSOR.FFN_DOWN,
1086
+ MODEL_TENSOR.FFN_UP,
1087
+ MODEL_TENSOR.ATTN_Q_NORM,
1088
+ MODEL_TENSOR.ATTN_K_NORM,
1089
+ ],
1090
+ MODEL_ARCH.QWEN: [
1091
+ MODEL_TENSOR.TOKEN_EMBD,
1092
+ MODEL_TENSOR.OUTPUT_NORM,
1093
+ MODEL_TENSOR.OUTPUT,
1094
+ MODEL_TENSOR.ROPE_FREQS,
1095
+ MODEL_TENSOR.ATTN_NORM,
1096
+ MODEL_TENSOR.ATTN_QKV,
1097
+ MODEL_TENSOR.ATTN_OUT,
1098
+ MODEL_TENSOR.ATTN_ROT_EMBD,
1099
+ MODEL_TENSOR.FFN_NORM,
1100
+ MODEL_TENSOR.FFN_GATE,
1101
+ MODEL_TENSOR.FFN_DOWN,
1102
+ MODEL_TENSOR.FFN_UP,
1103
+ ],
1104
+ MODEL_ARCH.QWEN2: [
1105
+ MODEL_TENSOR.TOKEN_EMBD,
1106
+ MODEL_TENSOR.OUTPUT_NORM,
1107
+ MODEL_TENSOR.OUTPUT,
1108
+ MODEL_TENSOR.ROPE_FREQS,
1109
+ MODEL_TENSOR.ATTN_NORM,
1110
+ MODEL_TENSOR.ATTN_Q,
1111
+ MODEL_TENSOR.ATTN_K,
1112
+ MODEL_TENSOR.ATTN_V,
1113
+ MODEL_TENSOR.ATTN_OUT,
1114
+ MODEL_TENSOR.FFN_NORM,
1115
+ MODEL_TENSOR.FFN_GATE,
1116
+ MODEL_TENSOR.FFN_DOWN,
1117
+ MODEL_TENSOR.FFN_UP,
1118
+ ],
1119
+ MODEL_ARCH.QWEN2VL: [
1120
+ MODEL_TENSOR.TOKEN_EMBD,
1121
+ MODEL_TENSOR.OUTPUT_NORM,
1122
+ MODEL_TENSOR.OUTPUT,
1123
+ MODEL_TENSOR.ATTN_NORM,
1124
+ MODEL_TENSOR.ATTN_Q,
1125
+ MODEL_TENSOR.ATTN_K,
1126
+ MODEL_TENSOR.ATTN_V,
1127
+ MODEL_TENSOR.ATTN_OUT,
1128
+ MODEL_TENSOR.FFN_NORM,
1129
+ MODEL_TENSOR.FFN_GATE,
1130
+ MODEL_TENSOR.FFN_DOWN,
1131
+ MODEL_TENSOR.FFN_UP,
1132
+ ],
1133
+ MODEL_ARCH.QWEN2MOE: [
1134
+ MODEL_TENSOR.TOKEN_EMBD,
1135
+ MODEL_TENSOR.OUTPUT_NORM,
1136
+ MODEL_TENSOR.OUTPUT,
1137
+ MODEL_TENSOR.ATTN_NORM,
1138
+ MODEL_TENSOR.ATTN_Q,
1139
+ MODEL_TENSOR.ATTN_K,
1140
+ MODEL_TENSOR.ATTN_V,
1141
+ MODEL_TENSOR.ATTN_OUT,
1142
+ MODEL_TENSOR.FFN_NORM,
1143
+ MODEL_TENSOR.FFN_GATE_INP,
1144
+ MODEL_TENSOR.FFN_GATE_EXP,
1145
+ MODEL_TENSOR.FFN_DOWN_EXP,
1146
+ MODEL_TENSOR.FFN_UP_EXP,
1147
+ MODEL_TENSOR.FFN_GATE_INP_SHEXP,
1148
+ MODEL_TENSOR.FFN_GATE_SHEXP,
1149
+ MODEL_TENSOR.FFN_DOWN_SHEXP,
1150
+ MODEL_TENSOR.FFN_UP_SHEXP,
1151
+ ],
1152
+ MODEL_ARCH.QWEN3: [
1153
+ MODEL_TENSOR.TOKEN_EMBD,
1154
+ MODEL_TENSOR.OUTPUT_NORM,
1155
+ MODEL_TENSOR.OUTPUT,
1156
+ MODEL_TENSOR.ROPE_FREQS,
1157
+ MODEL_TENSOR.ATTN_NORM,
1158
+ MODEL_TENSOR.ATTN_Q,
1159
+ MODEL_TENSOR.ATTN_Q_NORM,
1160
+ MODEL_TENSOR.ATTN_K,
1161
+ MODEL_TENSOR.ATTN_K_NORM,
1162
+ MODEL_TENSOR.ATTN_V,
1163
+ MODEL_TENSOR.ATTN_OUT,
1164
+ MODEL_TENSOR.FFN_NORM,
1165
+ MODEL_TENSOR.FFN_GATE,
1166
+ MODEL_TENSOR.FFN_DOWN,
1167
+ MODEL_TENSOR.FFN_UP,
1168
+ ],
1169
+ MODEL_ARCH.QWEN3MOE: [
1170
+ MODEL_TENSOR.TOKEN_EMBD,
1171
+ MODEL_TENSOR.OUTPUT_NORM,
1172
+ MODEL_TENSOR.OUTPUT,
1173
+ MODEL_TENSOR.ATTN_NORM,
1174
+ MODEL_TENSOR.ATTN_Q,
1175
+ MODEL_TENSOR.ATTN_Q_NORM,
1176
+ MODEL_TENSOR.ATTN_K,
1177
+ MODEL_TENSOR.ATTN_K_NORM,
1178
+ MODEL_TENSOR.ATTN_V,
1179
+ MODEL_TENSOR.ATTN_OUT,
1180
+ MODEL_TENSOR.FFN_NORM,
1181
+ MODEL_TENSOR.FFN_GATE_INP,
1182
+ MODEL_TENSOR.FFN_GATE_EXP,
1183
+ MODEL_TENSOR.FFN_DOWN_EXP,
1184
+ MODEL_TENSOR.FFN_UP_EXP,
1185
+ ],
1186
+ MODEL_ARCH.PLAMO: [
1187
+ MODEL_TENSOR.TOKEN_EMBD,
1188
+ MODEL_TENSOR.OUTPUT_NORM,
1189
+ MODEL_TENSOR.OUTPUT,
1190
+ MODEL_TENSOR.ROPE_FREQS,
1191
+ MODEL_TENSOR.ATTN_NORM,
1192
+ MODEL_TENSOR.ATTN_Q,
1193
+ MODEL_TENSOR.ATTN_K,
1194
+ MODEL_TENSOR.ATTN_V,
1195
+ MODEL_TENSOR.ATTN_OUT,
1196
+ MODEL_TENSOR.ATTN_ROT_EMBD,
1197
+ MODEL_TENSOR.FFN_GATE,
1198
+ MODEL_TENSOR.FFN_DOWN,
1199
+ MODEL_TENSOR.FFN_UP,
1200
+ ],
1201
+ MODEL_ARCH.GPT2: [
1202
+ MODEL_TENSOR.TOKEN_EMBD,
1203
+ MODEL_TENSOR.POS_EMBD,
1204
+ MODEL_TENSOR.OUTPUT_NORM,
1205
+ MODEL_TENSOR.OUTPUT,
1206
+ MODEL_TENSOR.ATTN_NORM,
1207
+ MODEL_TENSOR.ATTN_QKV,
1208
+ MODEL_TENSOR.ATTN_OUT,
1209
+ MODEL_TENSOR.FFN_NORM,
1210
+ MODEL_TENSOR.FFN_DOWN,
1211
+ MODEL_TENSOR.FFN_UP,
1212
+ ],
1213
+ MODEL_ARCH.PHI2: [
1214
+ MODEL_TENSOR.TOKEN_EMBD,
1215
+ MODEL_TENSOR.OUTPUT_NORM,
1216
+ MODEL_TENSOR.OUTPUT,
1217
+ MODEL_TENSOR.ATTN_NORM,
1218
+ MODEL_TENSOR.ATTN_QKV,
1219
+ MODEL_TENSOR.ATTN_Q,
1220
+ MODEL_TENSOR.ATTN_K,
1221
+ MODEL_TENSOR.ATTN_V,
1222
+ MODEL_TENSOR.ATTN_OUT,
1223
+ MODEL_TENSOR.FFN_NORM,
1224
+ MODEL_TENSOR.FFN_DOWN,
1225
+ MODEL_TENSOR.FFN_UP,
1226
+ ],
1227
+ MODEL_ARCH.PHI3: [
1228
+ MODEL_TENSOR.TOKEN_EMBD,
1229
+ MODEL_TENSOR.OUTPUT_NORM,
1230
+ MODEL_TENSOR.OUTPUT,
1231
+ MODEL_TENSOR.ROPE_FACTORS_LONG,
1232
+ MODEL_TENSOR.ROPE_FACTORS_SHORT,
1233
+ MODEL_TENSOR.ATTN_NORM,
1234
+ MODEL_TENSOR.ATTN_QKV,
1235
+ MODEL_TENSOR.ATTN_Q,
1236
+ MODEL_TENSOR.ATTN_K,
1237
+ MODEL_TENSOR.ATTN_V,
1238
+ MODEL_TENSOR.ATTN_OUT,
1239
+ MODEL_TENSOR.FFN_NORM,
1240
+ MODEL_TENSOR.FFN_DOWN,
1241
+ MODEL_TENSOR.FFN_UP,
1242
+ ],
1243
+ MODEL_ARCH.PHIMOE: [
1244
+ MODEL_TENSOR.TOKEN_EMBD,
1245
+ MODEL_TENSOR.OUTPUT_NORM,
1246
+ MODEL_TENSOR.OUTPUT,
1247
+ MODEL_TENSOR.ROPE_FACTORS_LONG,
1248
+ MODEL_TENSOR.ROPE_FACTORS_SHORT,
1249
+ MODEL_TENSOR.ATTN_NORM,
1250
+ MODEL_TENSOR.ATTN_QKV,
1251
+ MODEL_TENSOR.ATTN_Q,
1252
+ MODEL_TENSOR.ATTN_K,
1253
+ MODEL_TENSOR.ATTN_V,
1254
+ MODEL_TENSOR.ATTN_OUT,
1255
+ MODEL_TENSOR.FFN_NORM,
1256
+ MODEL_TENSOR.FFN_GATE_INP,
1257
+ MODEL_TENSOR.FFN_GATE_EXP,
1258
+ MODEL_TENSOR.FFN_DOWN_EXP,
1259
+ MODEL_TENSOR.FFN_UP_EXP,
1260
+ ],
1261
+ MODEL_ARCH.CODESHELL: [
1262
+ MODEL_TENSOR.TOKEN_EMBD,
1263
+ MODEL_TENSOR.POS_EMBD,
1264
+ MODEL_TENSOR.OUTPUT_NORM,
1265
+ MODEL_TENSOR.OUTPUT,
1266
+ MODEL_TENSOR.ATTN_NORM,
1267
+ MODEL_TENSOR.ATTN_QKV,
1268
+ MODEL_TENSOR.ATTN_OUT,
1269
+ MODEL_TENSOR.ATTN_ROT_EMBD,
1270
+ MODEL_TENSOR.FFN_NORM,
1271
+ MODEL_TENSOR.FFN_DOWN,
1272
+ MODEL_TENSOR.FFN_UP,
1273
+ ],
1274
+ MODEL_ARCH.ORION: [
1275
+ MODEL_TENSOR.TOKEN_EMBD,
1276
+ MODEL_TENSOR.OUTPUT_NORM,
1277
+ MODEL_TENSOR.OUTPUT,
1278
+ MODEL_TENSOR.ROPE_FREQS,
1279
+ MODEL_TENSOR.ATTN_NORM,
1280
+ MODEL_TENSOR.ATTN_Q,
1281
+ MODEL_TENSOR.ATTN_K,
1282
+ MODEL_TENSOR.ATTN_V,
1283
+ MODEL_TENSOR.ATTN_OUT,
1284
+ MODEL_TENSOR.ATTN_ROT_EMBD,
1285
+ MODEL_TENSOR.FFN_NORM,
1286
+ MODEL_TENSOR.FFN_GATE,
1287
+ MODEL_TENSOR.FFN_DOWN,
1288
+ MODEL_TENSOR.FFN_UP,
1289
+ ],
1290
+ MODEL_ARCH.INTERNLM2: [
1291
+ MODEL_TENSOR.TOKEN_EMBD,
1292
+ MODEL_TENSOR.OUTPUT_NORM,
1293
+ MODEL_TENSOR.OUTPUT,
1294
+ MODEL_TENSOR.ATTN_NORM,
1295
+ MODEL_TENSOR.ATTN_Q,
1296
+ MODEL_TENSOR.ATTN_K,
1297
+ MODEL_TENSOR.ATTN_V,
1298
+ MODEL_TENSOR.ATTN_OUT,
1299
+ MODEL_TENSOR.ATTN_ROT_EMBD,
1300
+ MODEL_TENSOR.FFN_NORM,
1301
+ MODEL_TENSOR.FFN_GATE,
1302
+ MODEL_TENSOR.FFN_DOWN,
1303
+ MODEL_TENSOR.FFN_UP,
1304
+ ],
1305
+ MODEL_ARCH.MINICPM: [
1306
+ MODEL_TENSOR.TOKEN_EMBD,
1307
+ MODEL_TENSOR.OUTPUT,
1308
+ MODEL_TENSOR.OUTPUT_NORM,
1309
+ MODEL_TENSOR.ROPE_FREQS,
1310
+ MODEL_TENSOR.ROPE_FACTORS_LONG,
1311
+ MODEL_TENSOR.ROPE_FACTORS_SHORT,
1312
+ MODEL_TENSOR.ATTN_NORM,
1313
+ MODEL_TENSOR.ATTN_Q,
1314
+ MODEL_TENSOR.ATTN_K,
1315
+ MODEL_TENSOR.ATTN_V,
1316
+ MODEL_TENSOR.ATTN_OUT,
1317
+ MODEL_TENSOR.ATTN_ROT_EMBD,
1318
+ MODEL_TENSOR.FFN_GATE_INP,
1319
+ MODEL_TENSOR.FFN_NORM,
1320
+ MODEL_TENSOR.FFN_GATE,
1321
+ MODEL_TENSOR.FFN_DOWN,
1322
+ MODEL_TENSOR.FFN_UP,
1323
+ MODEL_TENSOR.FFN_GATE_EXP,
1324
+ MODEL_TENSOR.FFN_DOWN_EXP,
1325
+ MODEL_TENSOR.FFN_UP_EXP,
1326
+ ],
1327
+ MODEL_ARCH.MINICPM3: [
1328
+ MODEL_TENSOR.TOKEN_EMBD,
1329
+ MODEL_TENSOR.OUTPUT_NORM,
1330
+ MODEL_TENSOR.OUTPUT,
1331
+ MODEL_TENSOR.ROPE_FACTORS_LONG,
1332
+ MODEL_TENSOR.ROPE_FACTORS_SHORT,
1333
+ MODEL_TENSOR.ATTN_NORM,
1334
+ MODEL_TENSOR.ATTN_Q_A,
1335
+ MODEL_TENSOR.ATTN_Q_B,
1336
+ MODEL_TENSOR.ATTN_KV_A_MQA,
1337
+ MODEL_TENSOR.ATTN_KV_B,
1338
+ MODEL_TENSOR.ATTN_Q_A_NORM,
1339
+ MODEL_TENSOR.ATTN_KV_A_NORM,
1340
+ MODEL_TENSOR.ATTN_OUT,
1341
+ MODEL_TENSOR.FFN_NORM,
1342
+ MODEL_TENSOR.FFN_GATE,
1343
+ MODEL_TENSOR.FFN_DOWN,
1344
+ MODEL_TENSOR.FFN_UP,
1345
+ ],
1346
+ MODEL_ARCH.GEMMA: [
1347
+ MODEL_TENSOR.TOKEN_EMBD,
1348
+ MODEL_TENSOR.OUTPUT_NORM,
1349
+ MODEL_TENSOR.ATTN_NORM,
1350
+ MODEL_TENSOR.ATTN_Q,
1351
+ MODEL_TENSOR.ATTN_K,
1352
+ MODEL_TENSOR.ATTN_V,
1353
+ MODEL_TENSOR.ATTN_OUT,
1354
+ MODEL_TENSOR.FFN_GATE,
1355
+ MODEL_TENSOR.FFN_DOWN,
1356
+ MODEL_TENSOR.FFN_UP,
1357
+ MODEL_TENSOR.FFN_NORM,
1358
+ ],
1359
+ MODEL_ARCH.GEMMA2: [
1360
+ MODEL_TENSOR.TOKEN_EMBD,
1361
+ MODEL_TENSOR.OUTPUT_NORM,
1362
+ MODEL_TENSOR.ATTN_Q,
1363
+ MODEL_TENSOR.ATTN_K,
1364
+ MODEL_TENSOR.ATTN_V,
1365
+ MODEL_TENSOR.ATTN_OUT,
1366
+ MODEL_TENSOR.FFN_GATE,
1367
+ MODEL_TENSOR.FFN_DOWN,
1368
+ MODEL_TENSOR.FFN_UP,
1369
+ MODEL_TENSOR.ATTN_NORM,
1370
+ MODEL_TENSOR.ATTN_POST_NORM,
1371
+ MODEL_TENSOR.FFN_PRE_NORM,
1372
+ MODEL_TENSOR.FFN_POST_NORM,
1373
+ ],
1374
+ MODEL_ARCH.GEMMA3: [
1375
+ MODEL_TENSOR.TOKEN_EMBD,
1376
+ MODEL_TENSOR.OUTPUT,
1377
+ MODEL_TENSOR.OUTPUT_NORM,
1378
+ MODEL_TENSOR.ATTN_Q,
1379
+ MODEL_TENSOR.ATTN_Q_NORM,
1380
+ MODEL_TENSOR.ATTN_K,
1381
+ MODEL_TENSOR.ATTN_K_NORM,
1382
+ MODEL_TENSOR.ATTN_V,
1383
+ MODEL_TENSOR.ATTN_OUT,
1384
+ MODEL_TENSOR.FFN_GATE,
1385
+ MODEL_TENSOR.FFN_DOWN,
1386
+ MODEL_TENSOR.FFN_UP,
1387
+ MODEL_TENSOR.ATTN_NORM,
1388
+ MODEL_TENSOR.ATTN_POST_NORM,
1389
+ MODEL_TENSOR.FFN_PRE_NORM,
1390
+ MODEL_TENSOR.FFN_POST_NORM,
1391
+ ],
1392
+ MODEL_ARCH.STARCODER2: [
1393
+ MODEL_TENSOR.TOKEN_EMBD,
1394
+ MODEL_TENSOR.OUTPUT_NORM,
1395
+ MODEL_TENSOR.OUTPUT,
1396
+ MODEL_TENSOR.ROPE_FREQS,
1397
+ MODEL_TENSOR.ATTN_NORM,
1398
+ MODEL_TENSOR.ATTN_Q,
1399
+ MODEL_TENSOR.ATTN_K,
1400
+ MODEL_TENSOR.ATTN_V,
1401
+ MODEL_TENSOR.ATTN_OUT,
1402
+ MODEL_TENSOR.ATTN_ROT_EMBD,
1403
+ MODEL_TENSOR.FFN_NORM,
1404
+ MODEL_TENSOR.FFN_DOWN,
1405
+ MODEL_TENSOR.FFN_UP,
1406
+ ],
1407
+ MODEL_ARCH.RWKV6: [
1408
+ MODEL_TENSOR.TOKEN_EMBD,
1409
+ MODEL_TENSOR.TOKEN_EMBD_NORM,
1410
+ MODEL_TENSOR.OUTPUT_NORM,
1411
+ MODEL_TENSOR.OUTPUT,
1412
+ MODEL_TENSOR.ATTN_NORM,
1413
+ MODEL_TENSOR.ATTN_NORM_2,
1414
+ MODEL_TENSOR.TIME_MIX_W1,
1415
+ MODEL_TENSOR.TIME_MIX_W2,
1416
+ MODEL_TENSOR.TIME_MIX_LERP_X,
1417
+ MODEL_TENSOR.TIME_MIX_LERP_K,
1418
+ MODEL_TENSOR.TIME_MIX_LERP_V,
1419
+ MODEL_TENSOR.TIME_MIX_LERP_R,
1420
+ MODEL_TENSOR.TIME_MIX_LERP_G,
1421
+ MODEL_TENSOR.TIME_MIX_LERP_W,
1422
+ MODEL_TENSOR.TIME_MIX_LERP_FUSED,
1423
+ MODEL_TENSOR.TIME_MIX_FIRST,
1424
+ MODEL_TENSOR.TIME_MIX_DECAY,
1425
+ MODEL_TENSOR.TIME_MIX_DECAY_W1,
1426
+ MODEL_TENSOR.TIME_MIX_DECAY_W2,
1427
+ MODEL_TENSOR.TIME_MIX_KEY,
1428
+ MODEL_TENSOR.TIME_MIX_VALUE,
1429
+ MODEL_TENSOR.TIME_MIX_RECEPTANCE,
1430
+ MODEL_TENSOR.TIME_MIX_GATE,
1431
+ MODEL_TENSOR.TIME_MIX_LN,
1432
+ MODEL_TENSOR.TIME_MIX_OUTPUT,
1433
+ MODEL_TENSOR.CHANNEL_MIX_LERP_K,
1434
+ MODEL_TENSOR.CHANNEL_MIX_LERP_R,
1435
+ MODEL_TENSOR.CHANNEL_MIX_KEY,
1436
+ MODEL_TENSOR.CHANNEL_MIX_RECEPTANCE,
1437
+ MODEL_TENSOR.CHANNEL_MIX_VALUE,
1438
+ ],
1439
+ MODEL_ARCH.RWKV6QWEN2: [
1440
+ MODEL_TENSOR.TOKEN_EMBD,
1441
+ MODEL_TENSOR.OUTPUT_NORM,
1442
+ MODEL_TENSOR.OUTPUT,
1443
+ MODEL_TENSOR.ATTN_NORM,
1444
+ MODEL_TENSOR.TIME_MIX_W1,
1445
+ MODEL_TENSOR.TIME_MIX_W2,
1446
+ MODEL_TENSOR.TIME_MIX_LERP_X,
1447
+ MODEL_TENSOR.TIME_MIX_LERP_K,
1448
+ MODEL_TENSOR.TIME_MIX_LERP_V,
1449
+ MODEL_TENSOR.TIME_MIX_LERP_R,
1450
+ MODEL_TENSOR.TIME_MIX_LERP_G,
1451
+ MODEL_TENSOR.TIME_MIX_LERP_W,
1452
+ MODEL_TENSOR.TIME_MIX_LERP_FUSED,
1453
+ MODEL_TENSOR.TIME_MIX_FIRST,
1454
+ MODEL_TENSOR.TIME_MIX_DECAY,
1455
+ MODEL_TENSOR.TIME_MIX_DECAY_W1,
1456
+ MODEL_TENSOR.TIME_MIX_DECAY_W2,
1457
+ MODEL_TENSOR.TIME_MIX_KEY,
1458
+ MODEL_TENSOR.TIME_MIX_VALUE,
1459
+ MODEL_TENSOR.TIME_MIX_RECEPTANCE,
1460
+ MODEL_TENSOR.TIME_MIX_GATE,
1461
+ MODEL_TENSOR.TIME_MIX_LN,
1462
+ MODEL_TENSOR.TIME_MIX_OUTPUT,
1463
+ MODEL_TENSOR.FFN_NORM,
1464
+ MODEL_TENSOR.FFN_GATE,
1465
+ MODEL_TENSOR.FFN_DOWN,
1466
+ MODEL_TENSOR.FFN_UP,
1467
+ ],
1468
+ MODEL_ARCH.RWKV7: [
1469
+ MODEL_TENSOR.TOKEN_EMBD,
1470
+ MODEL_TENSOR.TOKEN_EMBD_NORM,
1471
+ MODEL_TENSOR.OUTPUT_NORM,
1472
+ MODEL_TENSOR.OUTPUT,
1473
+ MODEL_TENSOR.ATTN_NORM,
1474
+ MODEL_TENSOR.ATTN_NORM_2,
1475
+ MODEL_TENSOR.TIME_MIX_LERP_FUSED,
1476
+ MODEL_TENSOR.TIME_MIX_W0,
1477
+ MODEL_TENSOR.TIME_MIX_W1,
1478
+ MODEL_TENSOR.TIME_MIX_W2,
1479
+ MODEL_TENSOR.TIME_MIX_A0,
1480
+ MODEL_TENSOR.TIME_MIX_A1,
1481
+ MODEL_TENSOR.TIME_MIX_A2,
1482
+ MODEL_TENSOR.TIME_MIX_V0,
1483
+ MODEL_TENSOR.TIME_MIX_V1,
1484
+ MODEL_TENSOR.TIME_MIX_V2,
1485
+ MODEL_TENSOR.TIME_MIX_G1,
1486
+ MODEL_TENSOR.TIME_MIX_G2,
1487
+ MODEL_TENSOR.TIME_MIX_K_K,
1488
+ MODEL_TENSOR.TIME_MIX_K_A,
1489
+ MODEL_TENSOR.TIME_MIX_R_K,
1490
+ MODEL_TENSOR.TIME_MIX_KEY,
1491
+ MODEL_TENSOR.TIME_MIX_VALUE,
1492
+ MODEL_TENSOR.TIME_MIX_RECEPTANCE,
1493
+ MODEL_TENSOR.TIME_MIX_LN,
1494
+ MODEL_TENSOR.TIME_MIX_OUTPUT,
1495
+ MODEL_TENSOR.CHANNEL_MIX_LERP_K,
1496
+ MODEL_TENSOR.CHANNEL_MIX_KEY,
1497
+ MODEL_TENSOR.CHANNEL_MIX_VALUE,
1498
+ ],
1499
+ MODEL_ARCH.ARWKV7: [
1500
+ MODEL_TENSOR.TOKEN_EMBD,
1501
+ MODEL_TENSOR.TOKEN_EMBD_NORM,
1502
+ MODEL_TENSOR.OUTPUT_NORM,
1503
+ MODEL_TENSOR.OUTPUT,
1504
+ MODEL_TENSOR.ATTN_NORM,
1505
+ MODEL_TENSOR.TIME_MIX_LERP_FUSED,
1506
+ MODEL_TENSOR.TIME_MIX_W0,
1507
+ MODEL_TENSOR.TIME_MIX_W1,
1508
+ MODEL_TENSOR.TIME_MIX_W2,
1509
+ MODEL_TENSOR.TIME_MIX_A0,
1510
+ MODEL_TENSOR.TIME_MIX_A1,
1511
+ MODEL_TENSOR.TIME_MIX_A2,
1512
+ MODEL_TENSOR.TIME_MIX_V0,
1513
+ MODEL_TENSOR.TIME_MIX_V1,
1514
+ MODEL_TENSOR.TIME_MIX_V2,
1515
+ MODEL_TENSOR.TIME_MIX_G1,
1516
+ MODEL_TENSOR.TIME_MIX_G2,
1517
+ MODEL_TENSOR.TIME_MIX_K_K,
1518
+ MODEL_TENSOR.TIME_MIX_K_A,
1519
+ MODEL_TENSOR.TIME_MIX_R_K,
1520
+ MODEL_TENSOR.TIME_MIX_KEY,
1521
+ MODEL_TENSOR.TIME_MIX_VALUE,
1522
+ MODEL_TENSOR.TIME_MIX_RECEPTANCE,
1523
+ MODEL_TENSOR.TIME_MIX_LN,
1524
+ MODEL_TENSOR.TIME_MIX_OUTPUT,
1525
+ MODEL_TENSOR.FFN_NORM,
1526
+ MODEL_TENSOR.FFN_GATE,
1527
+ MODEL_TENSOR.FFN_DOWN,
1528
+ MODEL_TENSOR.FFN_UP,
1529
+ ],
1530
+ MODEL_ARCH.MAMBA: [
1531
+ MODEL_TENSOR.TOKEN_EMBD,
1532
+ MODEL_TENSOR.OUTPUT_NORM,
1533
+ MODEL_TENSOR.OUTPUT,
1534
+ MODEL_TENSOR.ATTN_NORM,
1535
+ MODEL_TENSOR.SSM_IN,
1536
+ MODEL_TENSOR.SSM_CONV1D,
1537
+ MODEL_TENSOR.SSM_X,
1538
+ MODEL_TENSOR.SSM_DT,
1539
+ MODEL_TENSOR.SSM_A,
1540
+ MODEL_TENSOR.SSM_D,
1541
+ MODEL_TENSOR.SSM_OUT,
1542
+ ],
1543
+ MODEL_ARCH.XVERSE: [
1544
+ MODEL_TENSOR.TOKEN_EMBD,
1545
+ MODEL_TENSOR.OUTPUT_NORM,
1546
+ MODEL_TENSOR.OUTPUT,
1547
+ MODEL_TENSOR.ROPE_FREQS,
1548
+ MODEL_TENSOR.ATTN_NORM,
1549
+ MODEL_TENSOR.ATTN_Q,
1550
+ MODEL_TENSOR.ATTN_K,
1551
+ MODEL_TENSOR.ATTN_V,
1552
+ MODEL_TENSOR.ATTN_OUT,
1553
+ MODEL_TENSOR.ATTN_ROT_EMBD,
1554
+ MODEL_TENSOR.FFN_NORM,
1555
+ MODEL_TENSOR.FFN_GATE,
1556
+ MODEL_TENSOR.FFN_DOWN,
1557
+ MODEL_TENSOR.FFN_UP,
1558
+ ],
1559
+ MODEL_ARCH.COMMAND_R: [
1560
+ MODEL_TENSOR.TOKEN_EMBD,
1561
+ MODEL_TENSOR.OUTPUT_NORM,
1562
+ MODEL_TENSOR.ATTN_NORM,
1563
+ MODEL_TENSOR.ATTN_Q,
1564
+ MODEL_TENSOR.ATTN_K,
1565
+ MODEL_TENSOR.ATTN_V,
1566
+ MODEL_TENSOR.ATTN_OUT,
1567
+ MODEL_TENSOR.FFN_GATE,
1568
+ MODEL_TENSOR.FFN_DOWN,
1569
+ MODEL_TENSOR.FFN_UP,
1570
+ MODEL_TENSOR.ATTN_K_NORM,
1571
+ MODEL_TENSOR.ATTN_Q_NORM,
1572
+ ],
1573
+ MODEL_ARCH.COHERE2: [
1574
+ MODEL_TENSOR.TOKEN_EMBD,
1575
+ MODEL_TENSOR.OUTPUT_NORM,
1576
+ MODEL_TENSOR.ATTN_NORM,
1577
+ MODEL_TENSOR.ATTN_Q,
1578
+ MODEL_TENSOR.ATTN_K,
1579
+ MODEL_TENSOR.ATTN_V,
1580
+ MODEL_TENSOR.ATTN_OUT,
1581
+ MODEL_TENSOR.FFN_GATE,
1582
+ MODEL_TENSOR.FFN_DOWN,
1583
+ MODEL_TENSOR.FFN_UP,
1584
+ ],
1585
+ MODEL_ARCH.DBRX: [
1586
+ MODEL_TENSOR.TOKEN_EMBD,
1587
+ MODEL_TENSOR.OUTPUT_NORM,
1588
+ MODEL_TENSOR.OUTPUT,
1589
+ MODEL_TENSOR.ATTN_NORM,
1590
+ MODEL_TENSOR.ATTN_QKV,
1591
+ MODEL_TENSOR.ATTN_OUT,
1592
+ MODEL_TENSOR.ATTN_OUT_NORM,
1593
+ MODEL_TENSOR.FFN_GATE_INP,
1594
+ MODEL_TENSOR.FFN_GATE_EXP,
1595
+ MODEL_TENSOR.FFN_DOWN_EXP,
1596
+ MODEL_TENSOR.FFN_UP_EXP,
1597
+ ],
1598
+ MODEL_ARCH.OLMO: [
1599
+ MODEL_TENSOR.TOKEN_EMBD,
1600
+ MODEL_TENSOR.OUTPUT,
1601
+ MODEL_TENSOR.ATTN_Q,
1602
+ MODEL_TENSOR.ATTN_K,
1603
+ MODEL_TENSOR.ATTN_V,
1604
+ MODEL_TENSOR.ATTN_OUT,
1605
+ MODEL_TENSOR.FFN_GATE,
1606
+ MODEL_TENSOR.FFN_DOWN,
1607
+ MODEL_TENSOR.FFN_UP,
1608
+ ],
1609
+ MODEL_ARCH.OLMO2: [
1610
+ MODEL_TENSOR.TOKEN_EMBD,
1611
+ MODEL_TENSOR.OUTPUT_NORM,
1612
+ MODEL_TENSOR.OUTPUT,
1613
+ MODEL_TENSOR.ATTN_Q,
1614
+ MODEL_TENSOR.ATTN_K,
1615
+ MODEL_TENSOR.ATTN_V,
1616
+ MODEL_TENSOR.ATTN_OUT,
1617
+ MODEL_TENSOR.ATTN_POST_NORM,
1618
+ MODEL_TENSOR.ATTN_Q_NORM,
1619
+ MODEL_TENSOR.ATTN_K_NORM,
1620
+ MODEL_TENSOR.FFN_POST_NORM,
1621
+ MODEL_TENSOR.FFN_GATE,
1622
+ MODEL_TENSOR.FFN_DOWN,
1623
+ MODEL_TENSOR.FFN_UP,
1624
+ ],
1625
+ MODEL_ARCH.OLMOE: [
1626
+ MODEL_TENSOR.TOKEN_EMBD,
1627
+ MODEL_TENSOR.OUTPUT_NORM,
1628
+ MODEL_TENSOR.OUTPUT,
1629
+ MODEL_TENSOR.ATTN_OUT,
1630
+ MODEL_TENSOR.ATTN_Q,
1631
+ MODEL_TENSOR.ATTN_K,
1632
+ MODEL_TENSOR.ATTN_V,
1633
+ MODEL_TENSOR.ATTN_NORM,
1634
+ MODEL_TENSOR.ATTN_Q_NORM,
1635
+ MODEL_TENSOR.ATTN_K_NORM,
1636
+ MODEL_TENSOR.FFN_NORM,
1637
+ MODEL_TENSOR.FFN_GATE_INP,
1638
+ MODEL_TENSOR.FFN_GATE_EXP,
1639
+ MODEL_TENSOR.FFN_UP_EXP,
1640
+ MODEL_TENSOR.FFN_DOWN_EXP,
1641
+ ],
1642
+ MODEL_ARCH.OPENELM: [
1643
+ MODEL_TENSOR.TOKEN_EMBD,
1644
+ MODEL_TENSOR.OUTPUT_NORM,
1645
+ MODEL_TENSOR.ATTN_NORM,
1646
+ MODEL_TENSOR.ATTN_QKV,
1647
+ MODEL_TENSOR.ATTN_Q_NORM,
1648
+ MODEL_TENSOR.ATTN_K_NORM,
1649
+ MODEL_TENSOR.ATTN_OUT,
1650
+ MODEL_TENSOR.FFN_NORM,
1651
+ MODEL_TENSOR.FFN_GATE,
1652
+ MODEL_TENSOR.FFN_DOWN,
1653
+ MODEL_TENSOR.FFN_UP,
1654
+ ],
1655
+ MODEL_ARCH.ARCTIC: [
1656
+ MODEL_TENSOR.TOKEN_EMBD,
1657
+ MODEL_TENSOR.OUTPUT_NORM,
1658
+ MODEL_TENSOR.OUTPUT,
1659
+ MODEL_TENSOR.ROPE_FREQS,
1660
+ MODEL_TENSOR.ATTN_NORM,
1661
+ MODEL_TENSOR.ATTN_Q,
1662
+ MODEL_TENSOR.ATTN_K,
1663
+ MODEL_TENSOR.ATTN_V,
1664
+ MODEL_TENSOR.ATTN_OUT,
1665
+ MODEL_TENSOR.ATTN_ROT_EMBD,
1666
+ MODEL_TENSOR.FFN_GATE_INP,
1667
+ MODEL_TENSOR.FFN_NORM,
1668
+ MODEL_TENSOR.FFN_GATE,
1669
+ MODEL_TENSOR.FFN_DOWN,
1670
+ MODEL_TENSOR.FFN_UP,
1671
+ MODEL_TENSOR.FFN_NORM_EXP,
1672
+ MODEL_TENSOR.FFN_GATE_EXP,
1673
+ MODEL_TENSOR.FFN_DOWN_EXP,
1674
+ MODEL_TENSOR.FFN_UP_EXP,
1675
+ ],
1676
+ MODEL_ARCH.DEEPSEEK: [
1677
+ MODEL_TENSOR.TOKEN_EMBD,
1678
+ MODEL_TENSOR.OUTPUT_NORM,
1679
+ MODEL_TENSOR.OUTPUT,
1680
+ MODEL_TENSOR.ROPE_FREQS,
1681
+ MODEL_TENSOR.ATTN_NORM,
1682
+ MODEL_TENSOR.ATTN_Q,
1683
+ MODEL_TENSOR.ATTN_K,
1684
+ MODEL_TENSOR.ATTN_V,
1685
+ MODEL_TENSOR.ATTN_OUT,
1686
+ MODEL_TENSOR.ATTN_ROT_EMBD,
1687
+ MODEL_TENSOR.FFN_GATE_INP,
1688
+ MODEL_TENSOR.FFN_NORM,
1689
+ MODEL_TENSOR.FFN_GATE,
1690
+ MODEL_TENSOR.FFN_DOWN,
1691
+ MODEL_TENSOR.FFN_UP,
1692
+ MODEL_TENSOR.FFN_GATE_EXP,
1693
+ MODEL_TENSOR.FFN_DOWN_EXP,
1694
+ MODEL_TENSOR.FFN_UP_EXP,
1695
+ MODEL_TENSOR.FFN_GATE_SHEXP,
1696
+ MODEL_TENSOR.FFN_DOWN_SHEXP,
1697
+ MODEL_TENSOR.FFN_UP_SHEXP,
1698
+ ],
1699
+ MODEL_ARCH.DEEPSEEK2: [
1700
+ MODEL_TENSOR.TOKEN_EMBD,
1701
+ MODEL_TENSOR.OUTPUT_NORM,
1702
+ MODEL_TENSOR.OUTPUT,
1703
+ MODEL_TENSOR.ROPE_FREQS,
1704
+ MODEL_TENSOR.ATTN_NORM,
1705
+ MODEL_TENSOR.ATTN_Q,
1706
+ MODEL_TENSOR.ATTN_Q_A,
1707
+ MODEL_TENSOR.ATTN_Q_B,
1708
+ MODEL_TENSOR.ATTN_KV_A_MQA,
1709
+ MODEL_TENSOR.ATTN_KV_B,
1710
+ MODEL_TENSOR.ATTN_K_B,
1711
+ MODEL_TENSOR.ATTN_V_B,
1712
+ MODEL_TENSOR.ATTN_Q_A_NORM,
1713
+ MODEL_TENSOR.ATTN_KV_A_NORM,
1714
+ MODEL_TENSOR.ATTN_OUT,
1715
+ MODEL_TENSOR.ATTN_ROT_EMBD,
1716
+ MODEL_TENSOR.FFN_GATE_INP,
1717
+ MODEL_TENSOR.FFN_NORM,
1718
+ MODEL_TENSOR.FFN_GATE,
1719
+ MODEL_TENSOR.FFN_DOWN,
1720
+ MODEL_TENSOR.FFN_UP,
1721
+ MODEL_TENSOR.FFN_GATE_EXP,
1722
+ MODEL_TENSOR.FFN_DOWN_EXP,
1723
+ MODEL_TENSOR.FFN_UP_EXP,
1724
+ MODEL_TENSOR.FFN_GATE_SHEXP,
1725
+ MODEL_TENSOR.FFN_DOWN_SHEXP,
1726
+ MODEL_TENSOR.FFN_UP_SHEXP,
1727
+ MODEL_TENSOR.FFN_EXP_PROBS_B,
1728
+ ],
1729
+ MODEL_ARCH.PLM: [
1730
+ MODEL_TENSOR.TOKEN_EMBD,
1731
+ MODEL_TENSOR.OUTPUT,
1732
+ MODEL_TENSOR.OUTPUT_NORM,
1733
+ MODEL_TENSOR.ATTN_NORM,
1734
+ MODEL_TENSOR.ATTN_Q,
1735
+ MODEL_TENSOR.ATTN_KV_A_MQA,
1736
+ MODEL_TENSOR.ATTN_KV_A_NORM,
1737
+ MODEL_TENSOR.ATTN_KV_B,
1738
+ MODEL_TENSOR.ATTN_OUT,
1739
+ MODEL_TENSOR.FFN_NORM,
1740
+ MODEL_TENSOR.FFN_UP,
1741
+ MODEL_TENSOR.FFN_DOWN,
1742
+ ],
1743
+ MODEL_ARCH.CHATGLM : [
1744
+ MODEL_TENSOR.TOKEN_EMBD,
1745
+ MODEL_TENSOR.ROPE_FREQS,
1746
+ MODEL_TENSOR.OUTPUT_NORM,
1747
+ MODEL_TENSOR.OUTPUT,
1748
+ MODEL_TENSOR.ATTN_NORM,
1749
+ MODEL_TENSOR.ATTN_QKV,
1750
+ MODEL_TENSOR.ATTN_Q,
1751
+ MODEL_TENSOR.ATTN_K,
1752
+ MODEL_TENSOR.ATTN_V,
1753
+ MODEL_TENSOR.ATTN_OUT,
1754
+ MODEL_TENSOR.FFN_NORM,
1755
+ MODEL_TENSOR.FFN_DOWN,
1756
+ MODEL_TENSOR.FFN_UP,
1757
+ ],
1758
+ MODEL_ARCH.GLM4 : [
1759
+ MODEL_TENSOR.TOKEN_EMBD,
1760
+ MODEL_TENSOR.ROPE_FREQS,
1761
+ MODEL_TENSOR.OUTPUT_NORM,
1762
+ MODEL_TENSOR.OUTPUT,
1763
+ MODEL_TENSOR.ATTN_NORM,
1764
+ MODEL_TENSOR.ATTN_QKV,
1765
+ MODEL_TENSOR.ATTN_Q,
1766
+ MODEL_TENSOR.ATTN_K,
1767
+ MODEL_TENSOR.ATTN_V,
1768
+ MODEL_TENSOR.ATTN_OUT,
1769
+ MODEL_TENSOR.FFN_NORM,
1770
+ MODEL_TENSOR.FFN_DOWN,
1771
+ MODEL_TENSOR.FFN_UP,
1772
+ MODEL_TENSOR.ATTN_POST_NORM,
1773
+ MODEL_TENSOR.FFN_POST_NORM,
1774
+ ],
1775
+ MODEL_ARCH.BITNET: [
1776
+ MODEL_TENSOR.ATTN_Q,
1777
+ MODEL_TENSOR.ATTN_K,
1778
+ MODEL_TENSOR.ATTN_V,
1779
+ MODEL_TENSOR.TOKEN_EMBD,
1780
+ MODEL_TENSOR.OUTPUT_NORM,
1781
+ MODEL_TENSOR.ATTN_NORM,
1782
+ MODEL_TENSOR.ATTN_OUT,
1783
+ MODEL_TENSOR.FFN_NORM,
1784
+ MODEL_TENSOR.FFN_GATE,
1785
+ MODEL_TENSOR.FFN_DOWN,
1786
+ MODEL_TENSOR.FFN_UP,
1787
+ MODEL_TENSOR.ATTN_SUB_NORM,
1788
+ MODEL_TENSOR.FFN_SUB_NORM,
1789
+ ],
1790
+ MODEL_ARCH.T5: [
1791
+ MODEL_TENSOR.TOKEN_EMBD,
1792
+ MODEL_TENSOR.OUTPUT,
1793
+ MODEL_TENSOR.DEC_ATTN_NORM,
1794
+ MODEL_TENSOR.DEC_ATTN_Q,
1795
+ MODEL_TENSOR.DEC_ATTN_K,
1796
+ MODEL_TENSOR.DEC_ATTN_V,
1797
+ MODEL_TENSOR.DEC_ATTN_OUT,
1798
+ MODEL_TENSOR.DEC_ATTN_REL_B,
1799
+ MODEL_TENSOR.DEC_CROSS_ATTN_NORM,
1800
+ MODEL_TENSOR.DEC_CROSS_ATTN_Q,
1801
+ MODEL_TENSOR.DEC_CROSS_ATTN_K,
1802
+ MODEL_TENSOR.DEC_CROSS_ATTN_V,
1803
+ MODEL_TENSOR.DEC_CROSS_ATTN_OUT,
1804
+ MODEL_TENSOR.DEC_CROSS_ATTN_REL_B,
1805
+ MODEL_TENSOR.DEC_FFN_NORM,
1806
+ MODEL_TENSOR.DEC_FFN_GATE,
1807
+ MODEL_TENSOR.DEC_FFN_DOWN,
1808
+ MODEL_TENSOR.DEC_FFN_UP,
1809
+ MODEL_TENSOR.DEC_OUTPUT_NORM,
1810
+ MODEL_TENSOR.ENC_ATTN_NORM,
1811
+ MODEL_TENSOR.ENC_ATTN_Q,
1812
+ MODEL_TENSOR.ENC_ATTN_K,
1813
+ MODEL_TENSOR.ENC_ATTN_V,
1814
+ MODEL_TENSOR.ENC_ATTN_OUT,
1815
+ MODEL_TENSOR.ENC_ATTN_REL_B,
1816
+ MODEL_TENSOR.ENC_FFN_NORM,
1817
+ MODEL_TENSOR.ENC_FFN_GATE,
1818
+ MODEL_TENSOR.ENC_FFN_DOWN,
1819
+ MODEL_TENSOR.ENC_FFN_UP,
1820
+ MODEL_TENSOR.ENC_OUTPUT_NORM,
1821
+ ],
1822
+ MODEL_ARCH.T5ENCODER: [
1823
+ MODEL_TENSOR.TOKEN_EMBD,
1824
+ MODEL_TENSOR.OUTPUT,
1825
+ MODEL_TENSOR.ENC_ATTN_NORM,
1826
+ MODEL_TENSOR.ENC_ATTN_Q,
1827
+ MODEL_TENSOR.ENC_ATTN_K,
1828
+ MODEL_TENSOR.ENC_ATTN_V,
1829
+ MODEL_TENSOR.ENC_ATTN_OUT,
1830
+ MODEL_TENSOR.ENC_ATTN_REL_B,
1831
+ MODEL_TENSOR.ENC_FFN_NORM,
1832
+ MODEL_TENSOR.ENC_FFN_GATE,
1833
+ MODEL_TENSOR.ENC_FFN_DOWN,
1834
+ MODEL_TENSOR.ENC_FFN_UP,
1835
+ MODEL_TENSOR.ENC_OUTPUT_NORM,
1836
+ ],
1837
+ MODEL_ARCH.JAIS: [
1838
+ MODEL_TENSOR.TOKEN_EMBD,
1839
+ MODEL_TENSOR.OUTPUT_NORM,
1840
+ MODEL_TENSOR.OUTPUT,
1841
+ MODEL_TENSOR.ATTN_NORM,
1842
+ MODEL_TENSOR.ATTN_QKV,
1843
+ MODEL_TENSOR.ATTN_OUT,
1844
+ MODEL_TENSOR.FFN_NORM,
1845
+ MODEL_TENSOR.FFN_DOWN,
1846
+ MODEL_TENSOR.FFN_GATE,
1847
+ MODEL_TENSOR.FFN_UP,
1848
+ ],
1849
+ MODEL_ARCH.NEMOTRON: [
1850
+ MODEL_TENSOR.TOKEN_EMBD,
1851
+ MODEL_TENSOR.OUTPUT_NORM,
1852
+ MODEL_TENSOR.OUTPUT,
1853
+ MODEL_TENSOR.ROPE_FREQS,
1854
+ MODEL_TENSOR.ATTN_NORM,
1855
+ MODEL_TENSOR.ATTN_Q,
1856
+ MODEL_TENSOR.ATTN_K,
1857
+ MODEL_TENSOR.ATTN_V,
1858
+ MODEL_TENSOR.ATTN_OUT,
1859
+ MODEL_TENSOR.ATTN_ROT_EMBD,
1860
+ MODEL_TENSOR.FFN_NORM,
1861
+ MODEL_TENSOR.FFN_DOWN,
1862
+ MODEL_TENSOR.FFN_UP,
1863
+ ],
1864
+ MODEL_ARCH.EXAONE: [
1865
+ MODEL_TENSOR.TOKEN_EMBD,
1866
+ MODEL_TENSOR.OUTPUT_NORM,
1867
+ MODEL_TENSOR.OUTPUT,
1868
+ MODEL_TENSOR.ROPE_FREQS,
1869
+ MODEL_TENSOR.ATTN_NORM,
1870
+ MODEL_TENSOR.ATTN_Q,
1871
+ MODEL_TENSOR.ATTN_K,
1872
+ MODEL_TENSOR.ATTN_V,
1873
+ MODEL_TENSOR.ATTN_OUT,
1874
+ MODEL_TENSOR.ATTN_ROT_EMBD,
1875
+ MODEL_TENSOR.FFN_NORM,
1876
+ MODEL_TENSOR.FFN_GATE,
1877
+ MODEL_TENSOR.FFN_DOWN,
1878
+ MODEL_TENSOR.FFN_UP,
1879
+ ],
1880
+ MODEL_ARCH.GRANITE: [
1881
+ MODEL_TENSOR.TOKEN_EMBD,
1882
+ MODEL_TENSOR.OUTPUT_NORM,
1883
+ MODEL_TENSOR.OUTPUT,
1884
+ MODEL_TENSOR.ATTN_NORM,
1885
+ MODEL_TENSOR.ATTN_Q,
1886
+ MODEL_TENSOR.ATTN_K,
1887
+ MODEL_TENSOR.ATTN_V,
1888
+ MODEL_TENSOR.ATTN_OUT,
1889
+ MODEL_TENSOR.FFN_NORM,
1890
+ MODEL_TENSOR.FFN_GATE,
1891
+ MODEL_TENSOR.FFN_DOWN,
1892
+ MODEL_TENSOR.FFN_UP,
1893
+ ],
1894
+ MODEL_ARCH.GRANITE_MOE: [
1895
+ MODEL_TENSOR.TOKEN_EMBD,
1896
+ MODEL_TENSOR.OUTPUT_NORM,
1897
+ MODEL_TENSOR.OUTPUT,
1898
+ MODEL_TENSOR.ATTN_NORM,
1899
+ MODEL_TENSOR.ATTN_Q,
1900
+ MODEL_TENSOR.ATTN_K,
1901
+ MODEL_TENSOR.ATTN_V,
1902
+ MODEL_TENSOR.ATTN_OUT,
1903
+ MODEL_TENSOR.FFN_NORM,
1904
+ MODEL_TENSOR.FFN_GATE_INP,
1905
+ MODEL_TENSOR.FFN_GATE_EXP,
1906
+ MODEL_TENSOR.FFN_DOWN_EXP,
1907
+ MODEL_TENSOR.FFN_UP_EXP,
1908
+ ],
1909
+ MODEL_ARCH.CHAMELEON: [
1910
+ MODEL_TENSOR.TOKEN_EMBD,
1911
+ MODEL_TENSOR.OUTPUT_NORM,
1912
+ MODEL_TENSOR.OUTPUT,
1913
+ MODEL_TENSOR.ATTN_NORM,
1914
+ MODEL_TENSOR.ATTN_Q,
1915
+ MODEL_TENSOR.ATTN_Q_NORM,
1916
+ MODEL_TENSOR.ATTN_K,
1917
+ MODEL_TENSOR.ATTN_K_NORM,
1918
+ MODEL_TENSOR.ATTN_V,
1919
+ MODEL_TENSOR.ATTN_OUT,
1920
+ MODEL_TENSOR.FFN_NORM,
1921
+ MODEL_TENSOR.FFN_GATE,
1922
+ MODEL_TENSOR.FFN_DOWN,
1923
+ MODEL_TENSOR.FFN_UP,
1924
+ ],
1925
+ MODEL_ARCH.WAVTOKENIZER_DEC: [
1926
+ MODEL_TENSOR.TOKEN_EMBD,
1927
+ MODEL_TENSOR.TOKEN_EMBD_NORM,
1928
+ MODEL_TENSOR.CONV1D,
1929
+ MODEL_TENSOR.CONVNEXT_DW,
1930
+ MODEL_TENSOR.CONVNEXT_NORM,
1931
+ MODEL_TENSOR.CONVNEXT_PW1,
1932
+ MODEL_TENSOR.CONVNEXT_PW2,
1933
+ MODEL_TENSOR.CONVNEXT_GAMMA,
1934
+ MODEL_TENSOR.OUTPUT,
1935
+ MODEL_TENSOR.OUTPUT_NORM,
1936
+ MODEL_TENSOR.POSNET_CONV1,
1937
+ MODEL_TENSOR.POSNET_CONV2,
1938
+ MODEL_TENSOR.POSNET_NORM,
1939
+ MODEL_TENSOR.POSNET_NORM1,
1940
+ MODEL_TENSOR.POSNET_NORM2,
1941
+ MODEL_TENSOR.POSNET_ATTN_NORM,
1942
+ MODEL_TENSOR.POSNET_ATTN_Q,
1943
+ MODEL_TENSOR.POSNET_ATTN_K,
1944
+ MODEL_TENSOR.POSNET_ATTN_V,
1945
+ MODEL_TENSOR.POSNET_ATTN_OUT,
1946
+ ],
1947
+ MODEL_ARCH.BAILINGMOE: [
1948
+ MODEL_TENSOR.TOKEN_EMBD,
1949
+ MODEL_TENSOR.OUTPUT_NORM,
1950
+ MODEL_TENSOR.OUTPUT,
1951
+ MODEL_TENSOR.ROPE_FREQS,
1952
+ MODEL_TENSOR.ATTN_NORM,
1953
+ MODEL_TENSOR.ATTN_Q,
1954
+ MODEL_TENSOR.ATTN_K,
1955
+ MODEL_TENSOR.ATTN_V,
1956
+ MODEL_TENSOR.ATTN_OUT,
1957
+ MODEL_TENSOR.FFN_GATE_INP,
1958
+ MODEL_TENSOR.FFN_NORM,
1959
+ MODEL_TENSOR.FFN_GATE_EXP,
1960
+ MODEL_TENSOR.FFN_DOWN_EXP,
1961
+ MODEL_TENSOR.FFN_UP_EXP,
1962
+ MODEL_TENSOR.FFN_GATE_SHEXP,
1963
+ MODEL_TENSOR.FFN_DOWN_SHEXP,
1964
+ MODEL_TENSOR.FFN_UP_SHEXP,
1965
+ ],
1966
+ # TODO
1967
+ }
1968
+
1969
+ # tensors that will not be serialized
1970
+ MODEL_TENSOR_SKIP: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
1971
+ MODEL_ARCH.LLAMA: [
1972
+ MODEL_TENSOR.ROPE_FREQS,
1973
+ MODEL_TENSOR.ATTN_ROT_EMBD,
1974
+ ],
1975
+ MODEL_ARCH.DECI: [
1976
+ MODEL_TENSOR.ROPE_FREQS,
1977
+ MODEL_TENSOR.ATTN_ROT_EMBD,
1978
+ ],
1979
+ MODEL_ARCH.BAICHUAN: [
1980
+ MODEL_TENSOR.ROPE_FREQS,
1981
+ MODEL_TENSOR.ATTN_ROT_EMBD,
1982
+ ],
1983
+ MODEL_ARCH.QWEN: [
1984
+ MODEL_TENSOR.ROPE_FREQS,
1985
+ MODEL_TENSOR.ATTN_ROT_EMBD,
1986
+ ],
1987
+ MODEL_ARCH.CODESHELL: [
1988
+ MODEL_TENSOR.ROPE_FREQS,
1989
+ MODEL_TENSOR.ATTN_ROT_EMBD,
1990
+ ],
1991
+ MODEL_ARCH.ORION: [
1992
+ MODEL_TENSOR.ROPE_FREQS,
1993
+ MODEL_TENSOR.ATTN_ROT_EMBD,
1994
+ ],
1995
+ MODEL_ARCH.STARCODER2: [
1996
+ MODEL_TENSOR.ROPE_FREQS,
1997
+ MODEL_TENSOR.ATTN_ROT_EMBD,
1998
+ ],
1999
+ MODEL_ARCH.XVERSE: [
2000
+ MODEL_TENSOR.ROPE_FREQS,
2001
+ MODEL_TENSOR.ATTN_ROT_EMBD,
2002
+ ],
2003
+ MODEL_ARCH.DEEPSEEK: [
2004
+ MODEL_TENSOR.ROPE_FREQS,
2005
+ MODEL_TENSOR.ATTN_ROT_EMBD,
2006
+ ],
2007
+ MODEL_ARCH.DEEPSEEK2: [
2008
+ MODEL_TENSOR.ROPE_FREQS,
2009
+ MODEL_TENSOR.ATTN_ROT_EMBD,
2010
+ ],
2011
+ MODEL_ARCH.CHATGLM: [
2012
+ MODEL_TENSOR.ROPE_FREQS,
2013
+ ],
2014
+ MODEL_ARCH.NEMOTRON: [
2015
+ MODEL_TENSOR.ROPE_FREQS,
2016
+ MODEL_TENSOR.ATTN_ROT_EMBD,
2017
+ ],
2018
+ MODEL_ARCH.BAILINGMOE: [
2019
+ MODEL_TENSOR.ROPE_FREQS,
2020
+ ],
2021
+ }
2022
+
2023
+ #
2024
+ # types
2025
+ #
2026
+
2027
+
2028
+ class TokenType(IntEnum):
2029
+ NORMAL = 1
2030
+ UNKNOWN = 2
2031
+ CONTROL = 3
2032
+ USER_DEFINED = 4
2033
+ UNUSED = 5
2034
+ BYTE = 6
2035
+
2036
+
2037
+ class RopeScalingType(Enum):
2038
+ NONE = 'none'
2039
+ LINEAR = 'linear'
2040
+ YARN = 'yarn'
2041
+ LONGROPE = 'longrope'
2042
+
2043
+
2044
+ class PoolingType(IntEnum):
2045
+ NONE = 0
2046
+ MEAN = 1
2047
+ CLS = 2
2048
+ LAST = 3
2049
+ RANK = 4
2050
+
2051
+
2052
+ class GGMLQuantizationType(IntEnum):
2053
+ F32 = 0
2054
+ F16 = 1
2055
+ Q4_0 = 2
2056
+ Q4_1 = 3
2057
+ Q5_0 = 6
2058
+ Q5_1 = 7
2059
+ Q8_0 = 8
2060
+ Q8_1 = 9
2061
+ Q2_K = 10
2062
+ Q3_K = 11
2063
+ Q4_K = 12
2064
+ Q5_K = 13
2065
+ Q6_K = 14
2066
+ Q8_K = 15
2067
+ IQ2_XXS = 16
2068
+ IQ2_XS = 17
2069
+ IQ3_XXS = 18
2070
+ IQ1_S = 19
2071
+ IQ4_NL = 20
2072
+ IQ3_S = 21
2073
+ IQ2_S = 22
2074
+ IQ4_XS = 23
2075
+ I8 = 24
2076
+ I16 = 25
2077
+ I32 = 26
2078
+ I64 = 27
2079
+ F64 = 28
2080
+ IQ1_M = 29
2081
+ BF16 = 30
2082
+ TQ1_0 = 34
2083
+ TQ2_0 = 35
2084
+
2085
+
2086
+ class ExpertGatingFuncType(IntEnum):
2087
+ SOFTMAX = 1
2088
+ SIGMOID = 2
2089
+
2090
+
2091
+ # TODO: add GGMLFileType from ggml_ftype in ggml.h
2092
+
2093
+
2094
+ # from llama_ftype in llama.h
2095
+ # ALL VALUES SHOULD BE THE SAME HERE AS THEY ARE OVER THERE.
2096
+ class LlamaFileType(IntEnum):
2097
+ ALL_F32 = 0
2098
+ MOSTLY_F16 = 1 # except 1d tensors
2099
+ MOSTLY_Q4_0 = 2 # except 1d tensors
2100
+ MOSTLY_Q4_1 = 3 # except 1d tensors
2101
+ # MOSTLY_Q4_1_SOME_F16 = 4 # tok_embeddings.weight and output.weight are F16
2102
+ # MOSTLY_Q4_2 = 5 # support has been removed
2103
+ # MOSTLY_Q4_3 = 6 # support has been removed
2104
+ MOSTLY_Q8_0 = 7 # except 1d tensors
2105
+ MOSTLY_Q5_0 = 8 # except 1d tensors
2106
+ MOSTLY_Q5_1 = 9 # except 1d tensors
2107
+ MOSTLY_Q2_K = 10 # except 1d tensors
2108
+ MOSTLY_Q3_K_S = 11 # except 1d tensors
2109
+ MOSTLY_Q3_K_M = 12 # except 1d tensors
2110
+ MOSTLY_Q3_K_L = 13 # except 1d tensors
2111
+ MOSTLY_Q4_K_S = 14 # except 1d tensors
2112
+ MOSTLY_Q4_K_M = 15 # except 1d tensors
2113
+ MOSTLY_Q5_K_S = 16 # except 1d tensors
2114
+ MOSTLY_Q5_K_M = 17 # except 1d tensors
2115
+ MOSTLY_Q6_K = 18 # except 1d tensors
2116
+ MOSTLY_IQ2_XXS = 19 # except 1d tensors
2117
+ MOSTLY_IQ2_XS = 20 # except 1d tensors
2118
+ MOSTLY_Q2_K_S = 21 # except 1d tensors
2119
+ MOSTLY_IQ3_XS = 22 # except 1d tensors
2120
+ MOSTLY_IQ3_XXS = 23 # except 1d tensors
2121
+ MOSTLY_IQ1_S = 24 # except 1d tensors
2122
+ MOSTLY_IQ4_NL = 25 # except 1d tensors
2123
+ MOSTLY_IQ3_S = 26 # except 1d tensors
2124
+ MOSTLY_IQ3_M = 27 # except 1d tensors
2125
+ MOSTLY_IQ2_S = 28 # except 1d tensors
2126
+ MOSTLY_IQ2_M = 29 # except 1d tensors
2127
+ MOSTLY_IQ4_XS = 30 # except 1d tensors
2128
+ MOSTLY_IQ1_M = 31 # except 1d tensors
2129
+ MOSTLY_BF16 = 32 # except 1d tensors
2130
+ # MOSTLY_Q4_0_4_4 = 33 # removed from gguf files, use Q4_0 and runtime repack
2131
+ # MOSTLY_Q4_0_4_8 = 34 # removed from gguf files, use Q4_0 and runtime repack
2132
+ # MOSTLY_Q4_0_8_8 = 35 # removed from gguf files, use Q4_0 and runtime repack
2133
+ MOSTLY_TQ1_0 = 36 # except 1d tensors
2134
+ MOSTLY_TQ2_0 = 37 # except 1d tensors
2135
+
2136
+ GUESSED = 1024 # not specified in the model file
2137
+
2138
+
2139
+ class GGUFEndian(IntEnum):
2140
+ LITTLE = 0
2141
+ BIG = 1
2142
+
2143
+
2144
+ class GGUFValueType(IntEnum):
2145
+ UINT8 = 0
2146
+ INT8 = 1
2147
+ UINT16 = 2
2148
+ INT16 = 3
2149
+ UINT32 = 4
2150
+ INT32 = 5
2151
+ FLOAT32 = 6
2152
+ BOOL = 7
2153
+ STRING = 8
2154
+ ARRAY = 9
2155
+ UINT64 = 10
2156
+ INT64 = 11
2157
+ FLOAT64 = 12
2158
+
2159
+ @staticmethod
2160
+ def get_type(val: Any) -> GGUFValueType:
2161
+ if isinstance(val, (str, bytes, bytearray)):
2162
+ return GGUFValueType.STRING
2163
+ elif isinstance(val, list):
2164
+ return GGUFValueType.ARRAY
2165
+ elif isinstance(val, float):
2166
+ return GGUFValueType.FLOAT32
2167
+ elif isinstance(val, bool):
2168
+ return GGUFValueType.BOOL
2169
+ elif isinstance(val, int):
2170
+ return GGUFValueType.INT32
2171
+ # TODO: need help with 64-bit types in Python
2172
+ else:
2173
+ raise ValueError(f"Unknown type: {type(val)}")
2174
+
2175
+
2176
+ class VisionProjectorType:
2177
+ GEMMA3 = "gemma3"
2178
+ IDEFICS3 = "idefics3"
2179
+ PIXTRAL = "pixtral"
2180
+ QWEN2VL = "qwen2vl_merger"
2181
+ QWEN25VL = "qwen2.5vl_merger"
2182
+ INTERNVL = "internvl"
2183
+
2184
+
2185
+ # Items here are (block size, type size)
2186
+ QK_K = 256
2187
+ GGML_QUANT_SIZES: dict[GGMLQuantizationType, tuple[int, int]] = {
2188
+ GGMLQuantizationType.F32: (1, 4),
2189
+ GGMLQuantizationType.F16: (1, 2),
2190
+ GGMLQuantizationType.Q4_0: (32, 2 + 16),
2191
+ GGMLQuantizationType.Q4_1: (32, 2 + 2 + 16),
2192
+ GGMLQuantizationType.Q5_0: (32, 2 + 4 + 16),
2193
+ GGMLQuantizationType.Q5_1: (32, 2 + 2 + 4 + 16),
2194
+ GGMLQuantizationType.Q8_0: (32, 2 + 32),
2195
+ GGMLQuantizationType.Q8_1: (32, 4 + 4 + 32),
2196
+ GGMLQuantizationType.Q2_K: (256, 2 + 2 + QK_K // 16 + QK_K // 4),
2197
+ GGMLQuantizationType.Q3_K: (256, 2 + QK_K // 4 + QK_K // 8 + 12),
2198
+ GGMLQuantizationType.Q4_K: (256, 2 + 2 + QK_K // 2 + 12),
2199
+ GGMLQuantizationType.Q5_K: (256, 2 + 2 + QK_K // 2 + QK_K // 8 + 12),
2200
+ GGMLQuantizationType.Q6_K: (256, 2 + QK_K // 2 + QK_K // 4 + QK_K // 16),
2201
+ GGMLQuantizationType.Q8_K: (256, 4 + QK_K + QK_K // 8),
2202
+ GGMLQuantizationType.IQ2_XXS: (256, 2 + QK_K // 4),
2203
+ GGMLQuantizationType.IQ2_XS: (256, 2 + QK_K // 4 + QK_K // 32),
2204
+ GGMLQuantizationType.IQ3_XXS: (256, 2 + QK_K // 4 + QK_K // 8),
2205
+ GGMLQuantizationType.IQ1_S: (256, 2 + QK_K // 8 + QK_K // 16),
2206
+ GGMLQuantizationType.IQ4_NL: (32, 2 + 16),
2207
+ GGMLQuantizationType.IQ3_S: (256, 2 + QK_K // 4 + QK_K // 8 + QK_K // 32 + 4),
2208
+ GGMLQuantizationType.IQ2_S: (256, 2 + QK_K // 4 + QK_K // 16),
2209
+ GGMLQuantizationType.IQ4_XS: (256, 2 + 2 + QK_K // 2 + QK_K // 64),
2210
+ GGMLQuantizationType.I8: (1, 1),
2211
+ GGMLQuantizationType.I16: (1, 2),
2212
+ GGMLQuantizationType.I32: (1, 4),
2213
+ GGMLQuantizationType.I64: (1, 8),
2214
+ GGMLQuantizationType.F64: (1, 8),
2215
+ GGMLQuantizationType.IQ1_M: (256, QK_K // 8 + QK_K // 16 + QK_K // 32),
2216
+ GGMLQuantizationType.BF16: (1, 2),
2217
+ GGMLQuantizationType.TQ1_0: (256, 2 + 4 * 13),
2218
+ GGMLQuantizationType.TQ2_0: (256, 2 + 64),
2219
+ }
2220
+
2221
+
2222
+ # Aliases for backward compatibility.
2223
+
2224
+ # general
2225
+ KEY_GENERAL_ARCHITECTURE = Keys.General.ARCHITECTURE
2226
+ KEY_GENERAL_QUANTIZATION_VERSION = Keys.General.QUANTIZATION_VERSION
2227
+ KEY_GENERAL_ALIGNMENT = Keys.General.ALIGNMENT
2228
+ KEY_GENERAL_NAME = Keys.General.NAME
2229
+ KEY_GENERAL_AUTHOR = Keys.General.AUTHOR
2230
+ KEY_GENERAL_URL = Keys.General.URL
2231
+ KEY_GENERAL_DESCRIPTION = Keys.General.DESCRIPTION
2232
+ KEY_GENERAL_LICENSE = Keys.General.LICENSE
2233
+ KEY_GENERAL_SOURCE_URL = Keys.General.SOURCE_URL
2234
+ KEY_GENERAL_FILE_TYPE = Keys.General.FILE_TYPE
2235
+
2236
+ # LLM
2237
+ KEY_VOCAB_SIZE = Keys.LLM.VOCAB_SIZE
2238
+ KEY_CONTEXT_LENGTH = Keys.LLM.CONTEXT_LENGTH
2239
+ KEY_EMBEDDING_LENGTH = Keys.LLM.EMBEDDING_LENGTH
2240
+ KEY_BLOCK_COUNT = Keys.LLM.BLOCK_COUNT
2241
+ KEY_FEED_FORWARD_LENGTH = Keys.LLM.FEED_FORWARD_LENGTH
2242
+ KEY_USE_PARALLEL_RESIDUAL = Keys.LLM.USE_PARALLEL_RESIDUAL
2243
+ KEY_TENSOR_DATA_LAYOUT = Keys.LLM.TENSOR_DATA_LAYOUT
2244
+
2245
+ # attention
2246
+ KEY_ATTENTION_HEAD_COUNT = Keys.Attention.HEAD_COUNT
2247
+ KEY_ATTENTION_HEAD_COUNT_KV = Keys.Attention.HEAD_COUNT_KV
2248
+ KEY_ATTENTION_MAX_ALIBI_BIAS = Keys.Attention.MAX_ALIBI_BIAS
2249
+ KEY_ATTENTION_CLAMP_KQV = Keys.Attention.CLAMP_KQV
2250
+ KEY_ATTENTION_LAYERNORM_EPS = Keys.Attention.LAYERNORM_EPS
2251
+ KEY_ATTENTION_LAYERNORM_RMS_EPS = Keys.Attention.LAYERNORM_RMS_EPS
2252
+
2253
+ # RoPE
2254
+ KEY_ROPE_DIMENSION_COUNT = Keys.Rope.DIMENSION_COUNT
2255
+ KEY_ROPE_FREQ_BASE = Keys.Rope.FREQ_BASE
2256
+ KEY_ROPE_SCALING_TYPE = Keys.Rope.SCALING_TYPE
2257
+ KEY_ROPE_SCALING_FACTOR = Keys.Rope.SCALING_FACTOR
2258
+ KEY_ROPE_SCALING_ORIG_CTX_LEN = Keys.Rope.SCALING_ORIG_CTX_LEN
2259
+ KEY_ROPE_SCALING_FINETUNED = Keys.Rope.SCALING_FINETUNED
2260
+
2261
+ # SSM
2262
+ KEY_SSM_CONV_KERNEL = Keys.SSM.CONV_KERNEL
2263
+ KEY_SSM_INNER_SIZE = Keys.SSM.INNER_SIZE
2264
+ KEY_SSM_STATE_SIZE = Keys.SSM.STATE_SIZE
2265
+ KEY_SSM_TIME_STEP_RANK = Keys.SSM.TIME_STEP_RANK
2266
+ KEY_SSM_DT_B_C_RMS = Keys.SSM.DT_B_C_RMS
2267
+
2268
+ # tokenization
2269
+ KEY_TOKENIZER_MODEL = Keys.Tokenizer.MODEL
2270
+ KEY_TOKENIZER_PRE = Keys.Tokenizer.PRE
2271
+ KEY_TOKENIZER_LIST = Keys.Tokenizer.LIST
2272
+ KEY_TOKENIZER_TOKEN_TYPE = Keys.Tokenizer.TOKEN_TYPE
2273
+ KEY_TOKENIZER_SCORES = Keys.Tokenizer.SCORES
2274
+ KEY_TOKENIZER_MERGES = Keys.Tokenizer.MERGES
2275
+ KEY_TOKENIZER_BOS_ID = Keys.Tokenizer.BOS_ID
2276
+ KEY_TOKENIZER_EOS_ID = Keys.Tokenizer.EOS_ID
2277
+ KEY_TOKENIZER_EOT_ID = Keys.Tokenizer.EOT_ID
2278
+ KEY_TOKENIZER_EOM_ID = Keys.Tokenizer.EOM_ID
2279
+ KEY_TOKENIZER_UNK_ID = Keys.Tokenizer.UNK_ID
2280
+ KEY_TOKENIZER_SEP_ID = Keys.Tokenizer.SEP_ID
2281
+ KEY_TOKENIZER_PAD_ID = Keys.Tokenizer.PAD_ID
2282
+ KEY_TOKENIZER_MASK_ID = Keys.Tokenizer.MASK_ID
2283
+ KEY_TOKENIZER_HF_JSON = Keys.Tokenizer.HF_JSON
2284
+ KEY_TOKENIZER_RWKV = Keys.Tokenizer.RWKV
2285
+
2286
+ KEY_TOKENIZER_FIM_PRE_ID = Keys.Tokenizer.FIM_PRE_ID
2287
+ KEY_TOKENIZER_FIM_SUF_ID = Keys.Tokenizer.FIM_SUF_ID
2288
+ KEY_TOKENIZER_FIM_MID_ID = Keys.Tokenizer.FIM_MID_ID
2289
+ KEY_TOKENIZER_FIM_PAD_ID = Keys.Tokenizer.FIM_PAD_ID
2290
+ KEY_TOKENIZER_FIM_REP_ID = Keys.Tokenizer.FIM_REP_ID
2291
+ KEY_TOKENIZER_FIM_SEP_ID = Keys.Tokenizer.FIM_SEP_ID
2292
+
2293
+ # deprecated
2294
+ KEY_TOKENIZER_PREFIX_ID = Keys.Tokenizer.PREFIX_ID
2295
+ KEY_TOKENIZER_SUFFIX_ID = Keys.Tokenizer.SUFFIX_ID
2296
+ KEY_TOKENIZER_MIDDLE_ID = Keys.Tokenizer.MIDDLE_ID