local-llm-rn 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (626) hide show
  1. package/cpp/CMakeLists.txt +285 -0
  2. package/cpp/common/CMakeLists.txt +149 -0
  3. package/cpp/common/arg.cpp +3799 -0
  4. package/cpp/common/arg.h +131 -0
  5. package/cpp/common/base64.hpp +392 -0
  6. package/cpp/common/build-info.cpp.in +4 -0
  7. package/cpp/common/chat-parser-xml-toolcall.cpp +879 -0
  8. package/cpp/common/chat-parser-xml-toolcall.h +45 -0
  9. package/cpp/common/chat-parser.cpp +1649 -0
  10. package/cpp/common/chat-parser.h +133 -0
  11. package/cpp/common/chat-peg-parser.cpp +124 -0
  12. package/cpp/common/chat-peg-parser.h +105 -0
  13. package/cpp/common/chat.cpp +3355 -0
  14. package/cpp/common/chat.h +252 -0
  15. package/cpp/common/common.cpp +1824 -0
  16. package/cpp/common/common.h +930 -0
  17. package/cpp/common/console.cpp +1137 -0
  18. package/cpp/common/console.h +41 -0
  19. package/cpp/common/debug.cpp +167 -0
  20. package/cpp/common/debug.h +43 -0
  21. package/cpp/common/download.cpp +792 -0
  22. package/cpp/common/download.h +84 -0
  23. package/cpp/common/http.h +84 -0
  24. package/cpp/common/jinja/README.md +88 -0
  25. package/cpp/common/jinja/caps.cpp +285 -0
  26. package/cpp/common/jinja/caps.h +30 -0
  27. package/cpp/common/jinja/lexer.cpp +341 -0
  28. package/cpp/common/jinja/lexer.h +157 -0
  29. package/cpp/common/jinja/parser.cpp +591 -0
  30. package/cpp/common/jinja/parser.h +21 -0
  31. package/cpp/common/jinja/runtime.cpp +867 -0
  32. package/cpp/common/jinja/runtime.h +638 -0
  33. package/cpp/common/jinja/string.cpp +213 -0
  34. package/cpp/common/jinja/string.h +61 -0
  35. package/cpp/common/jinja/utils.h +149 -0
  36. package/cpp/common/jinja/value.cpp +1393 -0
  37. package/cpp/common/jinja/value.h +756 -0
  38. package/cpp/common/json-partial.cpp +324 -0
  39. package/cpp/common/json-partial.h +39 -0
  40. package/cpp/common/json-schema-to-grammar.cpp +1153 -0
  41. package/cpp/common/json-schema-to-grammar.h +43 -0
  42. package/cpp/common/llguidance.cpp +258 -0
  43. package/cpp/common/log.cpp +446 -0
  44. package/cpp/common/log.h +119 -0
  45. package/cpp/common/ngram-cache.cpp +285 -0
  46. package/cpp/common/ngram-cache.h +101 -0
  47. package/cpp/common/ngram-map.cpp +530 -0
  48. package/cpp/common/ngram-map.h +115 -0
  49. package/cpp/common/ngram-mod.cpp +60 -0
  50. package/cpp/common/ngram-mod.h +38 -0
  51. package/cpp/common/peg-parser.cpp +1712 -0
  52. package/cpp/common/peg-parser.h +459 -0
  53. package/cpp/common/preset.cpp +483 -0
  54. package/cpp/common/preset.h +83 -0
  55. package/cpp/common/regex-partial.cpp +204 -0
  56. package/cpp/common/regex-partial.h +56 -0
  57. package/cpp/common/sampling.cpp +745 -0
  58. package/cpp/common/sampling.h +119 -0
  59. package/cpp/common/speculative.cpp +1074 -0
  60. package/cpp/common/speculative.h +41 -0
  61. package/cpp/common/unicode.cpp +64 -0
  62. package/cpp/common/unicode.h +22 -0
  63. package/cpp/ggml/CMakeLists.txt +494 -0
  64. package/cpp/ggml/cmake/GitVars.cmake +22 -0
  65. package/cpp/ggml/cmake/common.cmake +50 -0
  66. package/cpp/ggml/cmake/ggml-config.cmake.in +191 -0
  67. package/cpp/ggml/include/ggml-alloc.h +85 -0
  68. package/cpp/ggml/include/ggml-backend.h +373 -0
  69. package/cpp/ggml/include/ggml-blas.h +25 -0
  70. package/cpp/ggml/include/ggml-cann.h +123 -0
  71. package/cpp/ggml/include/ggml-cpp.h +39 -0
  72. package/cpp/ggml/include/ggml-cpu.h +151 -0
  73. package/cpp/ggml/include/ggml-cuda.h +47 -0
  74. package/cpp/ggml/include/ggml-hexagon.h +19 -0
  75. package/cpp/ggml/include/ggml-metal.h +61 -0
  76. package/cpp/ggml/include/ggml-opencl.h +26 -0
  77. package/cpp/ggml/include/ggml-opt.h +256 -0
  78. package/cpp/ggml/include/ggml-rpc.h +30 -0
  79. package/cpp/ggml/include/ggml-sycl.h +49 -0
  80. package/cpp/ggml/include/ggml-virtgpu.h +14 -0
  81. package/cpp/ggml/include/ggml-vulkan.h +29 -0
  82. package/cpp/ggml/include/ggml-webgpu.h +19 -0
  83. package/cpp/ggml/include/ggml-zdnn.h +17 -0
  84. package/cpp/ggml/include/ggml-zendnn.h +22 -0
  85. package/cpp/ggml/include/ggml.h +2753 -0
  86. package/cpp/ggml/include/gguf.h +204 -0
  87. package/cpp/ggml/src/CMakeLists.txt +492 -0
  88. package/cpp/ggml/src/ggml-alloc.c +1244 -0
  89. package/cpp/ggml/src/ggml-backend-dl.cpp +48 -0
  90. package/cpp/ggml/src/ggml-backend-dl.h +45 -0
  91. package/cpp/ggml/src/ggml-backend-impl.h +255 -0
  92. package/cpp/ggml/src/ggml-backend-reg.cpp +566 -0
  93. package/cpp/ggml/src/ggml-backend.cpp +2270 -0
  94. package/cpp/ggml/src/ggml-blas/CMakeLists.txt +101 -0
  95. package/cpp/ggml/src/ggml-blas/ggml-blas.cpp +518 -0
  96. package/cpp/ggml/src/ggml-common.h +1878 -0
  97. package/cpp/ggml/src/ggml-cpu/CMakeLists.txt +691 -0
  98. package/cpp/ggml/src/ggml-cpu/amx/amx.cpp +247 -0
  99. package/cpp/ggml/src/ggml-cpu/amx/amx.h +8 -0
  100. package/cpp/ggml/src/ggml-cpu/amx/common.h +91 -0
  101. package/cpp/ggml/src/ggml-cpu/amx/mmq.cpp +2512 -0
  102. package/cpp/ggml/src/ggml-cpu/amx/mmq.h +10 -0
  103. package/cpp/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +98 -0
  104. package/cpp/ggml/src/ggml-cpu/arch/arm/quants.c +4052 -0
  105. package/cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +4935 -0
  106. package/cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +2159 -0
  107. package/cpp/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
  108. package/cpp/ggml/src/ggml-cpu/arch/powerpc/quants.c +2305 -0
  109. package/cpp/ggml/src/ggml-cpu/arch/riscv/cpu-feats.cpp +38 -0
  110. package/cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +2726 -0
  111. package/cpp/ggml/src/ggml-cpu/arch/riscv/repack.cpp +342 -0
  112. package/cpp/ggml/src/ggml-cpu/arch/s390/cpu-feats.cpp +50 -0
  113. package/cpp/ggml/src/ggml-cpu/arch/s390/quants.c +1468 -0
  114. package/cpp/ggml/src/ggml-cpu/arch/wasm/quants.c +1221 -0
  115. package/cpp/ggml/src/ggml-cpu/arch/x86/cpu-feats.cpp +327 -0
  116. package/cpp/ggml/src/ggml-cpu/arch/x86/quants.c +3820 -0
  117. package/cpp/ggml/src/ggml-cpu/arch/x86/repack.cpp +6307 -0
  118. package/cpp/ggml/src/ggml-cpu/arch-fallback.h +313 -0
  119. package/cpp/ggml/src/ggml-cpu/binary-ops.cpp +154 -0
  120. package/cpp/ggml/src/ggml-cpu/binary-ops.h +16 -0
  121. package/cpp/ggml/src/ggml-cpu/cmake/FindSIMD.cmake +100 -0
  122. package/cpp/ggml/src/ggml-cpu/common.h +95 -0
  123. package/cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +529 -0
  124. package/cpp/ggml/src/ggml-cpu/ggml-cpu.c +3734 -0
  125. package/cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +701 -0
  126. package/cpp/ggml/src/ggml-cpu/hbm.cpp +55 -0
  127. package/cpp/ggml/src/ggml-cpu/hbm.h +8 -0
  128. package/cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +938 -0
  129. package/cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +90 -0
  130. package/cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +798 -0
  131. package/cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.h +17 -0
  132. package/cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +4033 -0
  133. package/cpp/ggml/src/ggml-cpu/llamafile/sgemm.h +25 -0
  134. package/cpp/ggml/src/ggml-cpu/ops.cpp +10978 -0
  135. package/cpp/ggml/src/ggml-cpu/ops.h +116 -0
  136. package/cpp/ggml/src/ggml-cpu/quants.c +1193 -0
  137. package/cpp/ggml/src/ggml-cpu/quants.h +97 -0
  138. package/cpp/ggml/src/ggml-cpu/repack.cpp +3316 -0
  139. package/cpp/ggml/src/ggml-cpu/repack.h +173 -0
  140. package/cpp/ggml/src/ggml-cpu/simd-gemm.h +136 -0
  141. package/cpp/ggml/src/ggml-cpu/simd-mappings.h +1279 -0
  142. package/cpp/ggml/src/ggml-cpu/spacemit/ime.cpp +1025 -0
  143. package/cpp/ggml/src/ggml-cpu/spacemit/ime.h +13 -0
  144. package/cpp/ggml/src/ggml-cpu/spacemit/ime1_kernels.cpp +3196 -0
  145. package/cpp/ggml/src/ggml-cpu/spacemit/ime_kernels.h +26 -0
  146. package/cpp/ggml/src/ggml-cpu/traits.cpp +36 -0
  147. package/cpp/ggml/src/ggml-cpu/traits.h +38 -0
  148. package/cpp/ggml/src/ggml-cpu/unary-ops.cpp +337 -0
  149. package/cpp/ggml/src/ggml-cpu/unary-ops.h +35 -0
  150. package/cpp/ggml/src/ggml-cpu/vec.cpp +629 -0
  151. package/cpp/ggml/src/ggml-cpu/vec.h +1585 -0
  152. package/cpp/ggml/src/ggml-hexagon/CMakeLists.txt +117 -0
  153. package/cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp +3232 -0
  154. package/cpp/ggml/src/ggml-hexagon/htp/CMakeLists.txt +45 -0
  155. package/cpp/ggml/src/ggml-hexagon/htp/act-ops.c +815 -0
  156. package/cpp/ggml/src/ggml-hexagon/htp/argsort-ops.c +281 -0
  157. package/cpp/ggml/src/ggml-hexagon/htp/binary-ops.c +827 -0
  158. package/cpp/ggml/src/ggml-hexagon/htp/cmake-toolchain.cmake +157 -0
  159. package/cpp/ggml/src/ggml-hexagon/htp/cpy-ops.c +251 -0
  160. package/cpp/ggml/src/ggml-hexagon/htp/flash-attn-ops.c +666 -0
  161. package/cpp/ggml/src/ggml-hexagon/htp/get-rows-ops.c +111 -0
  162. package/cpp/ggml/src/ggml-hexagon/htp/hex-dma.c +63 -0
  163. package/cpp/ggml/src/ggml-hexagon/htp/hex-dma.h +182 -0
  164. package/cpp/ggml/src/ggml-hexagon/htp/hex-dump.h +77 -0
  165. package/cpp/ggml/src/ggml-hexagon/htp/hex-fastdiv.h +37 -0
  166. package/cpp/ggml/src/ggml-hexagon/htp/hex-utils.h +51 -0
  167. package/cpp/ggml/src/ggml-hexagon/htp/htp-ctx.h +35 -0
  168. package/cpp/ggml/src/ggml-hexagon/htp/htp-msg.h +154 -0
  169. package/cpp/ggml/src/ggml-hexagon/htp/htp-ops.h +65 -0
  170. package/cpp/ggml/src/ggml-hexagon/htp/htp_iface.idl +16 -0
  171. package/cpp/ggml/src/ggml-hexagon/htp/hvx-arith.h +470 -0
  172. package/cpp/ggml/src/ggml-hexagon/htp/hvx-base.h +173 -0
  173. package/cpp/ggml/src/ggml-hexagon/htp/hvx-copy.h +245 -0
  174. package/cpp/ggml/src/ggml-hexagon/htp/hvx-div.h +116 -0
  175. package/cpp/ggml/src/ggml-hexagon/htp/hvx-dump.h +129 -0
  176. package/cpp/ggml/src/ggml-hexagon/htp/hvx-exp.h +215 -0
  177. package/cpp/ggml/src/ggml-hexagon/htp/hvx-floor.h +100 -0
  178. package/cpp/ggml/src/ggml-hexagon/htp/hvx-inverse.h +176 -0
  179. package/cpp/ggml/src/ggml-hexagon/htp/hvx-reduce.h +266 -0
  180. package/cpp/ggml/src/ggml-hexagon/htp/hvx-scale.h +133 -0
  181. package/cpp/ggml/src/ggml-hexagon/htp/hvx-sigmoid.h +141 -0
  182. package/cpp/ggml/src/ggml-hexagon/htp/hvx-sqrt.h +126 -0
  183. package/cpp/ggml/src/ggml-hexagon/htp/hvx-types.h +36 -0
  184. package/cpp/ggml/src/ggml-hexagon/htp/hvx-utils.h +18 -0
  185. package/cpp/ggml/src/ggml-hexagon/htp/main.c +1150 -0
  186. package/cpp/ggml/src/ggml-hexagon/htp/matmul-ops.c +2595 -0
  187. package/cpp/ggml/src/ggml-hexagon/htp/rope-ops.c +498 -0
  188. package/cpp/ggml/src/ggml-hexagon/htp/set-rows-ops.c +167 -0
  189. package/cpp/ggml/src/ggml-hexagon/htp/softmax-ops.c +421 -0
  190. package/cpp/ggml/src/ggml-hexagon/htp/sum-rows-ops.c +130 -0
  191. package/cpp/ggml/src/ggml-hexagon/htp/unary-ops.c +384 -0
  192. package/cpp/ggml/src/ggml-hexagon/htp/worker-pool.c +293 -0
  193. package/cpp/ggml/src/ggml-hexagon/htp/worker-pool.h +57 -0
  194. package/cpp/ggml/src/ggml-hexagon/htp-drv.cpp +418 -0
  195. package/cpp/ggml/src/ggml-hexagon/htp-drv.h +121 -0
  196. package/cpp/ggml/src/ggml-hexagon/libdl.h +79 -0
  197. package/cpp/ggml/src/ggml-hexagon/libggml-htp.inf +38 -0
  198. package/cpp/ggml/src/ggml-hexagon/op-desc.h +153 -0
  199. package/cpp/ggml/src/ggml-impl.h +724 -0
  200. package/cpp/ggml/src/ggml-metal/CMakeLists.txt +124 -0
  201. package/cpp/ggml/src/ggml-metal/ggml-metal-common.cpp +457 -0
  202. package/cpp/ggml/src/ggml-metal/ggml-metal-common.h +52 -0
  203. package/cpp/ggml/src/ggml-metal/ggml-metal-context.h +41 -0
  204. package/cpp/ggml/src/ggml-metal/ggml-metal-context.m +702 -0
  205. package/cpp/ggml/src/ggml-metal/ggml-metal-device.cpp +1890 -0
  206. package/cpp/ggml/src/ggml-metal/ggml-metal-device.h +290 -0
  207. package/cpp/ggml/src/ggml-metal/ggml-metal-device.m +1749 -0
  208. package/cpp/ggml/src/ggml-metal/ggml-metal-impl.h +1054 -0
  209. package/cpp/ggml/src/ggml-metal/ggml-metal-ops.cpp +4370 -0
  210. package/cpp/ggml/src/ggml-metal/ggml-metal-ops.h +94 -0
  211. package/cpp/ggml/src/ggml-metal/ggml-metal.cpp +937 -0
  212. package/cpp/ggml/src/ggml-metal/ggml-metal.metal +9819 -0
  213. package/cpp/ggml/src/ggml-musa/CMakeLists.txt +125 -0
  214. package/cpp/ggml/src/ggml-musa/mudnn.cu +112 -0
  215. package/cpp/ggml/src/ggml-musa/mudnn.cuh +12 -0
  216. package/cpp/ggml/src/ggml-opencl/CMakeLists.txt +150 -0
  217. package/cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +11553 -0
  218. package/cpp/ggml/src/ggml-opencl/kernels/add.cl +190 -0
  219. package/cpp/ggml/src/ggml-opencl/kernels/add_id.cl +42 -0
  220. package/cpp/ggml/src/ggml-opencl/kernels/argsort.cl +86 -0
  221. package/cpp/ggml/src/ggml-opencl/kernels/clamp.cl +20 -0
  222. package/cpp/ggml/src/ggml-opencl/kernels/concat.cl +51 -0
  223. package/cpp/ggml/src/ggml-opencl/kernels/conv2d.cl +185 -0
  224. package/cpp/ggml/src/ggml-opencl/kernels/conv2d_f16_f32.cl +176 -0
  225. package/cpp/ggml/src/ggml-opencl/kernels/cpy.cl +184 -0
  226. package/cpp/ggml/src/ggml-opencl/kernels/cvt.cl +417 -0
  227. package/cpp/ggml/src/ggml-opencl/kernels/diag_mask_inf.cl +58 -0
  228. package/cpp/ggml/src/ggml-opencl/kernels/div.cl +138 -0
  229. package/cpp/ggml/src/ggml-opencl/kernels/embed_kernel.py +26 -0
  230. package/cpp/ggml/src/ggml-opencl/kernels/expm1.cl +113 -0
  231. package/cpp/ggml/src/ggml-opencl/kernels/fill.cl +17 -0
  232. package/cpp/ggml/src/ggml-opencl/kernels/flash_attn_f16.cl +370 -0
  233. package/cpp/ggml/src/ggml-opencl/kernels/flash_attn_f32.cl +371 -0
  234. package/cpp/ggml/src/ggml-opencl/kernels/flash_attn_f32_f16.cl +373 -0
  235. package/cpp/ggml/src/ggml-opencl/kernels/gelu.cl +89 -0
  236. package/cpp/ggml/src/ggml-opencl/kernels/gemm_moe_mxfp4_f32.cl +162 -0
  237. package/cpp/ggml/src/ggml-opencl/kernels/gemv_moe_mxfp4_f32.cl +156 -0
  238. package/cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle.cl +268 -0
  239. package/cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general.cl +274 -0
  240. package/cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general_q8_0_f32.cl +195 -0
  241. package/cpp/ggml/src/ggml-opencl/kernels/get_rows.cl +187 -0
  242. package/cpp/ggml/src/ggml-opencl/kernels/glu.cl +378 -0
  243. package/cpp/ggml/src/ggml-opencl/kernels/group_norm.cl +121 -0
  244. package/cpp/ggml/src/ggml-opencl/kernels/im2col_f16.cl +57 -0
  245. package/cpp/ggml/src/ggml-opencl/kernels/im2col_f32.cl +57 -0
  246. package/cpp/ggml/src/ggml-opencl/kernels/mean.cl +140 -0
  247. package/cpp/ggml/src/ggml-opencl/kernels/mul.cl +152 -0
  248. package/cpp/ggml/src/ggml-opencl/kernels/mul_mat_Ab_Bi_8x4.cl +139 -0
  249. package/cpp/ggml/src/ggml-opencl/kernels/mul_mat_f16_f32.cl +130 -0
  250. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_kq_kqv.cl +273 -0
  251. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_l4_lm.cl +146 -0
  252. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_f32_f32_l4_lm.cl +147 -0
  253. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q4_0_f32_l4_lm.cl +163 -0
  254. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q4_1_f32_l4_lm.cl +165 -0
  255. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q6_k_f32_l4_lm.cl +158 -0
  256. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_8x4.cl +129 -0
  257. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_l4_lm.cl +154 -0
  258. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f16.cl +118 -0
  259. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32.cl +118 -0
  260. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_1row.cl +94 -0
  261. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_l4.cl +84 -0
  262. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f32_f32.cl +118 -0
  263. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32.cl +189 -0
  264. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32_flat.cl +176 -0
  265. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q4_0_f32_8x_flat.cl +283 -0
  266. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32.cl +140 -0
  267. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32_flat.cl +222 -0
  268. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32.cl +144 -0
  269. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32_flat.cl +167 -0
  270. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32.cl +192 -0
  271. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_16x_flat.cl +307 -0
  272. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_8x_flat.cl +265 -0
  273. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_8x_flat.cl +272 -0
  274. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_v.cl +254 -0
  275. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32.cl +219 -0
  276. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32_flat.cl +229 -0
  277. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_k_f32.cl +180 -0
  278. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q6_k_f32.cl +194 -0
  279. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q6_k_f32_flat.cl +194 -0
  280. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32.cl +125 -0
  281. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32_flat.cl +202 -0
  282. package/cpp/ggml/src/ggml-opencl/kernels/norm.cl +161 -0
  283. package/cpp/ggml/src/ggml-opencl/kernels/pad.cl +39 -0
  284. package/cpp/ggml/src/ggml-opencl/kernels/relu.cl +16 -0
  285. package/cpp/ggml/src/ggml-opencl/kernels/repeat.cl +38 -0
  286. package/cpp/ggml/src/ggml-opencl/kernels/rms_norm.cl +190 -0
  287. package/cpp/ggml/src/ggml-opencl/kernels/rope.cl +747 -0
  288. package/cpp/ggml/src/ggml-opencl/kernels/scale.cl +27 -0
  289. package/cpp/ggml/src/ggml-opencl/kernels/set_rows.cl +208 -0
  290. package/cpp/ggml/src/ggml-opencl/kernels/sigmoid.cl +29 -0
  291. package/cpp/ggml/src/ggml-opencl/kernels/silu.cl +30 -0
  292. package/cpp/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +108 -0
  293. package/cpp/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +108 -0
  294. package/cpp/ggml/src/ggml-opencl/kernels/softmax_f16.cl +107 -0
  295. package/cpp/ggml/src/ggml-opencl/kernels/softmax_f32.cl +107 -0
  296. package/cpp/ggml/src/ggml-opencl/kernels/softplus.cl +116 -0
  297. package/cpp/ggml/src/ggml-opencl/kernels/solve_tri.cl +51 -0
  298. package/cpp/ggml/src/ggml-opencl/kernels/sqr.cl +53 -0
  299. package/cpp/ggml/src/ggml-opencl/kernels/sqrt.cl +53 -0
  300. package/cpp/ggml/src/ggml-opencl/kernels/ssm_conv.cl +77 -0
  301. package/cpp/ggml/src/ggml-opencl/kernels/sub.cl +138 -0
  302. package/cpp/ggml/src/ggml-opencl/kernels/sum_rows.cl +140 -0
  303. package/cpp/ggml/src/ggml-opencl/kernels/tanh.cl +109 -0
  304. package/cpp/ggml/src/ggml-opencl/kernels/transpose.cl +117 -0
  305. package/cpp/ggml/src/ggml-opencl/kernels/tri.cl +32 -0
  306. package/cpp/ggml/src/ggml-opencl/kernels/tsembd.cl +48 -0
  307. package/cpp/ggml/src/ggml-opencl/kernels/upscale.cl +120 -0
  308. package/cpp/ggml/src/ggml-opt.cpp +1093 -0
  309. package/cpp/ggml/src/ggml-quants.c +5325 -0
  310. package/cpp/ggml/src/ggml-quants.h +106 -0
  311. package/cpp/ggml/src/ggml-rpc/CMakeLists.txt +9 -0
  312. package/cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +2118 -0
  313. package/cpp/ggml/src/ggml-threading.cpp +12 -0
  314. package/cpp/ggml/src/ggml-threading.h +14 -0
  315. package/cpp/ggml/src/ggml-virtgpu/CMakeLists.txt +70 -0
  316. package/cpp/ggml/src/ggml-virtgpu/apir_cs_ggml-rpc-front.cpp +87 -0
  317. package/cpp/ggml/src/ggml-virtgpu/backend/CMakeLists.txt +21 -0
  318. package/cpp/ggml/src/ggml-virtgpu/backend/apir_cs_ggml-rpc-back.cpp +115 -0
  319. package/cpp/ggml/src/ggml-virtgpu/backend/backend-convert.h +13 -0
  320. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-backend.cpp +102 -0
  321. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer-type.cpp +105 -0
  322. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer.cpp +179 -0
  323. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-device.cpp +148 -0
  324. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched.cpp +51 -0
  325. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched.gen.h +73 -0
  326. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched.h +27 -0
  327. package/cpp/ggml/src/ggml-virtgpu/backend/backend-virgl-apir.h +32 -0
  328. package/cpp/ggml/src/ggml-virtgpu/backend/backend.cpp +144 -0
  329. package/cpp/ggml/src/ggml-virtgpu/backend/shared/api_remoting.h +95 -0
  330. package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_backend.gen.h +94 -0
  331. package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_backend.h +50 -0
  332. package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_cs.h +378 -0
  333. package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h +232 -0
  334. package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_cs_rpc.h +58 -0
  335. package/cpp/ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp +81 -0
  336. package/cpp/ggml/src/ggml-virtgpu/ggml-backend-buffer.cpp +119 -0
  337. package/cpp/ggml/src/ggml-virtgpu/ggml-backend-device.cpp +158 -0
  338. package/cpp/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp +213 -0
  339. package/cpp/ggml/src/ggml-virtgpu/ggml-backend.cpp +69 -0
  340. package/cpp/ggml/src/ggml-virtgpu/ggml-remoting.h +71 -0
  341. package/cpp/ggml/src/ggml-virtgpu/ggmlremoting_functions.yaml +166 -0
  342. package/cpp/ggml/src/ggml-virtgpu/include/apir_hw.h +9 -0
  343. package/cpp/ggml/src/ggml-virtgpu/regenerate_remoting.py +333 -0
  344. package/cpp/ggml/src/ggml-virtgpu/virtgpu-apir.h +15 -0
  345. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-backend.cpp +58 -0
  346. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp +110 -0
  347. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-buffer.cpp +173 -0
  348. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-device.cpp +192 -0
  349. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-impl.h +36 -0
  350. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward.gen.h +53 -0
  351. package/cpp/ggml/src/ggml-virtgpu/virtgpu-shm.cpp +98 -0
  352. package/cpp/ggml/src/ggml-virtgpu/virtgpu-shm.h +23 -0
  353. package/cpp/ggml/src/ggml-virtgpu/virtgpu-utils.cpp +179 -0
  354. package/cpp/ggml/src/ggml-virtgpu/virtgpu-utils.h +86 -0
  355. package/cpp/ggml/src/ggml-virtgpu/virtgpu.cpp +544 -0
  356. package/cpp/ggml/src/ggml-virtgpu/virtgpu.h +117 -0
  357. package/cpp/ggml/src/ggml-webgpu/CMakeLists.txt +80 -0
  358. package/cpp/ggml/src/ggml-webgpu/ggml-webgpu-shader-lib.hpp +1231 -0
  359. package/cpp/ggml/src/ggml-webgpu/ggml-webgpu.cpp +3150 -0
  360. package/cpp/ggml/src/ggml-webgpu/pre_wgsl.hpp +778 -0
  361. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/argmax.wgsl +72 -0
  362. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/argsort.wgsl +106 -0
  363. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/argsort_merge.wgsl +134 -0
  364. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/binary.wgsl +107 -0
  365. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/common_decls.tmpl +923 -0
  366. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/cpy.tmpl.wgsl +107 -0
  367. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/cumsum.wgsl +66 -0
  368. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +182 -0
  369. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn.wgsl +636 -0
  370. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/get_rows.wgsl +668 -0
  371. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/glu.tmpl.wgsl +323 -0
  372. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/memset.wgsl +40 -0
  373. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.wgsl +713 -0
  374. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_decls.tmpl +103 -0
  375. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_reg_tile.wgsl +138 -0
  376. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_subgroup_matrix.wgsl +188 -0
  377. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec.wgsl +194 -0
  378. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/pad.wgsl +86 -0
  379. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm.wgsl +123 -0
  380. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/rope.tmpl.wgsl +295 -0
  381. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/scale.wgsl +63 -0
  382. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.wgsl +109 -0
  383. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/soft_max.tmpl.wgsl +345 -0
  384. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/sum_rows.wgsl +55 -0
  385. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/unary.wgsl +193 -0
  386. package/cpp/ggml/src/ggml-zdnn/CMakeLists.txt +36 -0
  387. package/cpp/ggml/src/ggml-zdnn/common.hpp +59 -0
  388. package/cpp/ggml/src/ggml-zdnn/ggml-zdnn.cpp +633 -0
  389. package/cpp/ggml/src/ggml-zdnn/mmf.cpp +80 -0
  390. package/cpp/ggml/src/ggml-zdnn/mmf.hpp +12 -0
  391. package/cpp/ggml/src/ggml-zdnn/utils.cpp +79 -0
  392. package/cpp/ggml/src/ggml-zdnn/utils.hpp +19 -0
  393. package/cpp/ggml/src/ggml-zendnn/CMakeLists.txt +92 -0
  394. package/cpp/ggml/src/ggml-zendnn/ggml-zendnn.cpp +469 -0
  395. package/cpp/ggml/src/ggml.c +7669 -0
  396. package/cpp/ggml/src/ggml.cpp +26 -0
  397. package/cpp/ggml/src/gguf.cpp +1699 -0
  398. package/cpp/include/llama-cpp.h +32 -0
  399. package/cpp/include/llama.h +1568 -0
  400. package/cpp/mtmd/CMakeLists.txt +98 -0
  401. package/cpp/mtmd/README.md +63 -0
  402. package/cpp/mtmd/clip-graph.h +117 -0
  403. package/cpp/mtmd/clip-impl.h +586 -0
  404. package/cpp/mtmd/clip-model.h +390 -0
  405. package/cpp/mtmd/clip.cpp +4154 -0
  406. package/cpp/mtmd/clip.h +121 -0
  407. package/cpp/mtmd/deprecation-warning.cpp +22 -0
  408. package/cpp/mtmd/legacy-models/convert_image_encoder_to_gguf.py +412 -0
  409. package/cpp/mtmd/legacy-models/glmedge-convert-image-encoder-to-gguf.py +280 -0
  410. package/cpp/mtmd/legacy-models/glmedge-surgery.py +33 -0
  411. package/cpp/mtmd/legacy-models/llava_surgery.py +38 -0
  412. package/cpp/mtmd/legacy-models/llava_surgery_v2.py +180 -0
  413. package/cpp/mtmd/legacy-models/minicpmv-convert-image-encoder-to-gguf.py +892 -0
  414. package/cpp/mtmd/legacy-models/minicpmv-surgery.py +47 -0
  415. package/cpp/mtmd/models/cogvlm.cpp +98 -0
  416. package/cpp/mtmd/models/conformer.cpp +216 -0
  417. package/cpp/mtmd/models/glm4v.cpp +122 -0
  418. package/cpp/mtmd/models/internvl.cpp +69 -0
  419. package/cpp/mtmd/models/kimik25.cpp +101 -0
  420. package/cpp/mtmd/models/kimivl.cpp +63 -0
  421. package/cpp/mtmd/models/llama4.cpp +96 -0
  422. package/cpp/mtmd/models/llava.cpp +374 -0
  423. package/cpp/mtmd/models/minicpmv.cpp +114 -0
  424. package/cpp/mtmd/models/mobilenetv5.cpp +451 -0
  425. package/cpp/mtmd/models/models.h +128 -0
  426. package/cpp/mtmd/models/nemotron-v2-vl.cpp +35 -0
  427. package/cpp/mtmd/models/paddleocr.cpp +52 -0
  428. package/cpp/mtmd/models/pixtral.cpp +86 -0
  429. package/cpp/mtmd/models/qwen2vl.cpp +183 -0
  430. package/cpp/mtmd/models/qwen3vl.cpp +193 -0
  431. package/cpp/mtmd/models/siglip.cpp +86 -0
  432. package/cpp/mtmd/models/whisper-enc.cpp +115 -0
  433. package/cpp/mtmd/models/youtuvl.cpp +179 -0
  434. package/cpp/mtmd/mtmd-audio.cpp +730 -0
  435. package/cpp/mtmd/mtmd-audio.h +113 -0
  436. package/cpp/mtmd/mtmd-cli.cpp +437 -0
  437. package/cpp/mtmd/mtmd-helper.cpp +521 -0
  438. package/cpp/mtmd/mtmd-helper.h +96 -0
  439. package/cpp/mtmd/mtmd.cpp +1156 -0
  440. package/cpp/mtmd/mtmd.h +319 -0
  441. package/cpp/mtmd/requirements.txt +5 -0
  442. package/cpp/mtmd/test-1.jpeg +0 -0
  443. package/cpp/mtmd/test-2.mp3 +0 -0
  444. package/cpp/mtmd/tests.sh +192 -0
  445. package/cpp/src/CMakeLists.txt +169 -0
  446. package/cpp/src/llama-adapter.cpp +488 -0
  447. package/cpp/src/llama-adapter.h +89 -0
  448. package/cpp/src/llama-arch.cpp +2855 -0
  449. package/cpp/src/llama-arch.h +619 -0
  450. package/cpp/src/llama-batch.cpp +917 -0
  451. package/cpp/src/llama-batch.h +173 -0
  452. package/cpp/src/llama-chat.cpp +896 -0
  453. package/cpp/src/llama-chat.h +71 -0
  454. package/cpp/src/llama-context.cpp +3512 -0
  455. package/cpp/src/llama-context.h +359 -0
  456. package/cpp/src/llama-cparams.cpp +5 -0
  457. package/cpp/src/llama-cparams.h +44 -0
  458. package/cpp/src/llama-grammar.cpp +1464 -0
  459. package/cpp/src/llama-grammar.h +194 -0
  460. package/cpp/src/llama-graph.cpp +2685 -0
  461. package/cpp/src/llama-graph.h +1026 -0
  462. package/cpp/src/llama-hparams.cpp +234 -0
  463. package/cpp/src/llama-hparams.h +339 -0
  464. package/cpp/src/llama-impl.cpp +171 -0
  465. package/cpp/src/llama-impl.h +73 -0
  466. package/cpp/src/llama-io.cpp +15 -0
  467. package/cpp/src/llama-io.h +35 -0
  468. package/cpp/src/llama-kv-cache-iswa.cpp +330 -0
  469. package/cpp/src/llama-kv-cache-iswa.h +137 -0
  470. package/cpp/src/llama-kv-cache.cpp +2271 -0
  471. package/cpp/src/llama-kv-cache.h +388 -0
  472. package/cpp/src/llama-kv-cells.h +533 -0
  473. package/cpp/src/llama-memory-hybrid-iswa.cpp +275 -0
  474. package/cpp/src/llama-memory-hybrid-iswa.h +140 -0
  475. package/cpp/src/llama-memory-hybrid.cpp +268 -0
  476. package/cpp/src/llama-memory-hybrid.h +139 -0
  477. package/cpp/src/llama-memory-recurrent.cpp +1165 -0
  478. package/cpp/src/llama-memory-recurrent.h +182 -0
  479. package/cpp/src/llama-memory.cpp +59 -0
  480. package/cpp/src/llama-memory.h +122 -0
  481. package/cpp/src/llama-mmap.cpp +785 -0
  482. package/cpp/src/llama-mmap.h +92 -0
  483. package/cpp/src/llama-model-loader.cpp +1414 -0
  484. package/cpp/src/llama-model-loader.h +203 -0
  485. package/cpp/src/llama-model-saver.cpp +286 -0
  486. package/cpp/src/llama-model-saver.h +37 -0
  487. package/cpp/src/llama-model.cpp +9253 -0
  488. package/cpp/src/llama-model.h +576 -0
  489. package/cpp/src/llama-quant.cpp +1119 -0
  490. package/cpp/src/llama-quant.h +1 -0
  491. package/cpp/src/llama-sampler.cpp +3885 -0
  492. package/cpp/src/llama-sampler.h +42 -0
  493. package/cpp/src/llama-vocab.cpp +3970 -0
  494. package/cpp/src/llama-vocab.h +187 -0
  495. package/cpp/src/llama.cpp +1313 -0
  496. package/cpp/src/models/afmoe.cpp +191 -0
  497. package/cpp/src/models/apertus.cpp +125 -0
  498. package/cpp/src/models/arcee.cpp +135 -0
  499. package/cpp/src/models/arctic.cpp +138 -0
  500. package/cpp/src/models/arwkv7.cpp +86 -0
  501. package/cpp/src/models/baichuan.cpp +122 -0
  502. package/cpp/src/models/bailingmoe.cpp +144 -0
  503. package/cpp/src/models/bailingmoe2.cpp +135 -0
  504. package/cpp/src/models/bert.cpp +178 -0
  505. package/cpp/src/models/bitnet.cpp +160 -0
  506. package/cpp/src/models/bloom.cpp +101 -0
  507. package/cpp/src/models/chameleon.cpp +178 -0
  508. package/cpp/src/models/chatglm.cpp +132 -0
  509. package/cpp/src/models/codeshell.cpp +111 -0
  510. package/cpp/src/models/cogvlm.cpp +102 -0
  511. package/cpp/src/models/cohere2-iswa.cpp +134 -0
  512. package/cpp/src/models/command-r.cpp +122 -0
  513. package/cpp/src/models/dbrx.cpp +123 -0
  514. package/cpp/src/models/deci.cpp +135 -0
  515. package/cpp/src/models/deepseek.cpp +144 -0
  516. package/cpp/src/models/deepseek2.cpp +262 -0
  517. package/cpp/src/models/delta-net-base.cpp +376 -0
  518. package/cpp/src/models/dots1.cpp +134 -0
  519. package/cpp/src/models/dream.cpp +105 -0
  520. package/cpp/src/models/ernie4-5-moe.cpp +150 -0
  521. package/cpp/src/models/ernie4-5.cpp +110 -0
  522. package/cpp/src/models/eurobert.cpp +97 -0
  523. package/cpp/src/models/exaone-moe.cpp +146 -0
  524. package/cpp/src/models/exaone.cpp +114 -0
  525. package/cpp/src/models/exaone4.cpp +123 -0
  526. package/cpp/src/models/falcon-h1.cpp +111 -0
  527. package/cpp/src/models/falcon.cpp +120 -0
  528. package/cpp/src/models/gemma-embedding.cpp +116 -0
  529. package/cpp/src/models/gemma.cpp +112 -0
  530. package/cpp/src/models/gemma2-iswa.cpp +128 -0
  531. package/cpp/src/models/gemma3.cpp +155 -0
  532. package/cpp/src/models/gemma3n-iswa.cpp +384 -0
  533. package/cpp/src/models/glm4-moe.cpp +170 -0
  534. package/cpp/src/models/glm4.cpp +157 -0
  535. package/cpp/src/models/gpt2.cpp +105 -0
  536. package/cpp/src/models/gptneox.cpp +144 -0
  537. package/cpp/src/models/granite-hybrid.cpp +196 -0
  538. package/cpp/src/models/granite.cpp +211 -0
  539. package/cpp/src/models/grok.cpp +159 -0
  540. package/cpp/src/models/grovemoe.cpp +141 -0
  541. package/cpp/src/models/hunyuan-dense.cpp +132 -0
  542. package/cpp/src/models/hunyuan-moe.cpp +154 -0
  543. package/cpp/src/models/internlm2.cpp +120 -0
  544. package/cpp/src/models/jais.cpp +86 -0
  545. package/cpp/src/models/jais2.cpp +123 -0
  546. package/cpp/src/models/jamba.cpp +106 -0
  547. package/cpp/src/models/kimi-linear.cpp +392 -0
  548. package/cpp/src/models/lfm2.cpp +190 -0
  549. package/cpp/src/models/llada-moe.cpp +122 -0
  550. package/cpp/src/models/llada.cpp +99 -0
  551. package/cpp/src/models/llama-iswa.cpp +178 -0
  552. package/cpp/src/models/llama.cpp +168 -0
  553. package/cpp/src/models/maincoder.cpp +117 -0
  554. package/cpp/src/models/mamba-base.cpp +285 -0
  555. package/cpp/src/models/mamba.cpp +54 -0
  556. package/cpp/src/models/mimo2-iswa.cpp +123 -0
  557. package/cpp/src/models/minicpm3.cpp +200 -0
  558. package/cpp/src/models/minimax-m2.cpp +124 -0
  559. package/cpp/src/models/mistral3.cpp +160 -0
  560. package/cpp/src/models/models.h +684 -0
  561. package/cpp/src/models/modern-bert.cpp +109 -0
  562. package/cpp/src/models/mpt.cpp +126 -0
  563. package/cpp/src/models/nemotron-h.cpp +148 -0
  564. package/cpp/src/models/nemotron.cpp +122 -0
  565. package/cpp/src/models/neo-bert.cpp +104 -0
  566. package/cpp/src/models/olmo.cpp +121 -0
  567. package/cpp/src/models/olmo2.cpp +150 -0
  568. package/cpp/src/models/olmoe.cpp +124 -0
  569. package/cpp/src/models/openai-moe-iswa.cpp +127 -0
  570. package/cpp/src/models/openelm.cpp +124 -0
  571. package/cpp/src/models/orion.cpp +123 -0
  572. package/cpp/src/models/paddleocr.cpp +122 -0
  573. package/cpp/src/models/pangu-embedded.cpp +121 -0
  574. package/cpp/src/models/phi2.cpp +121 -0
  575. package/cpp/src/models/phi3.cpp +152 -0
  576. package/cpp/src/models/plamo.cpp +110 -0
  577. package/cpp/src/models/plamo2.cpp +318 -0
  578. package/cpp/src/models/plamo3.cpp +128 -0
  579. package/cpp/src/models/plm.cpp +169 -0
  580. package/cpp/src/models/qwen.cpp +108 -0
  581. package/cpp/src/models/qwen2.cpp +126 -0
  582. package/cpp/src/models/qwen2moe.cpp +151 -0
  583. package/cpp/src/models/qwen2vl.cpp +117 -0
  584. package/cpp/src/models/qwen3.cpp +117 -0
  585. package/cpp/src/models/qwen35.cpp +386 -0
  586. package/cpp/src/models/qwen35moe.cpp +420 -0
  587. package/cpp/src/models/qwen3moe.cpp +124 -0
  588. package/cpp/src/models/qwen3next.cpp +525 -0
  589. package/cpp/src/models/qwen3vl-moe.cpp +140 -0
  590. package/cpp/src/models/qwen3vl.cpp +132 -0
  591. package/cpp/src/models/refact.cpp +94 -0
  592. package/cpp/src/models/rnd1.cpp +126 -0
  593. package/cpp/src/models/rwkv6-base.cpp +164 -0
  594. package/cpp/src/models/rwkv6.cpp +94 -0
  595. package/cpp/src/models/rwkv6qwen2.cpp +86 -0
  596. package/cpp/src/models/rwkv7-base.cpp +137 -0
  597. package/cpp/src/models/rwkv7.cpp +90 -0
  598. package/cpp/src/models/seed-oss.cpp +124 -0
  599. package/cpp/src/models/smallthinker.cpp +126 -0
  600. package/cpp/src/models/smollm3.cpp +128 -0
  601. package/cpp/src/models/stablelm.cpp +146 -0
  602. package/cpp/src/models/starcoder.cpp +100 -0
  603. package/cpp/src/models/starcoder2.cpp +121 -0
  604. package/cpp/src/models/step35-iswa.cpp +168 -0
  605. package/cpp/src/models/t5-dec.cpp +166 -0
  606. package/cpp/src/models/t5-enc.cpp +96 -0
  607. package/cpp/src/models/wavtokenizer-dec.cpp +149 -0
  608. package/cpp/src/models/xverse.cpp +108 -0
  609. package/cpp/src/unicode-data.cpp +7034 -0
  610. package/cpp/src/unicode-data.h +20 -0
  611. package/cpp/src/unicode.cpp +1103 -0
  612. package/cpp/src/unicode.h +111 -0
  613. package/cpp/vendor/nlohmann/json.hpp +25526 -0
  614. package/cpp/vendor/nlohmann/json_fwd.hpp +187 -0
  615. package/cpp/vendor/stb/stb_image.h +7988 -0
  616. package/ios/LocalLLM-Bridging-Header.h +2 -0
  617. package/ios/LocalLLM.h +5 -0
  618. package/ios/LocalLLM.mm +1267 -0
  619. package/local-llm-rn.podspec +60 -0
  620. package/package.json +35 -0
  621. package/src/NativeLocalLLM.ts +73 -0
  622. package/src/device.ts +50 -0
  623. package/src/download-adapter.ts +17 -0
  624. package/src/index.ts +21 -0
  625. package/src/native-bridge.ts +142 -0
  626. package/src/rn-downloader.ts +37 -0
@@ -0,0 +1,892 @@
1
+ # coding=utf-8
2
+ # Copyright 2024 Google AI and The HuggingFace Team. All rights reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ """ PyTorch Siglip model. """
16
+ # Copied from HuggingFaceM4/siglip-so400m-14-980-flash-attn2-navit and add tgt_sizes
17
+
18
+
19
+ import os
20
+ import math
21
+ import warnings
22
+
23
+ import numpy as np
24
+ import torch
25
+ import torch.nn.functional as F
26
+ from torch import nn
27
+ from torch.nn.init import _calculate_fan_in_and_fan_out
28
+
29
+ from transformers.activations import ACT2FN
30
+ from transformers.modeling_utils import PreTrainedModel
31
+ from transformers.configuration_utils import PretrainedConfig
32
+ from transformers.utils import (
33
+ logging,
34
+ )
35
+ from transformers.utils import logging
36
+
37
+ logger = logging.get_logger(__name__)
38
+
39
+ class SiglipVisionConfig(PretrainedConfig):
40
+ r"""
41
+ This is the configuration class to store the configuration of a [`SiglipVisionModel`]. It is used to instantiate a
42
+ Siglip vision encoder according to the specified arguments, defining the model architecture. Instantiating a
43
+ configuration with the defaults will yield a similar configuration to that of the vision encoder of the Siglip
44
+ [google/siglip-base-patch16-224](https://huggingface.co/google/siglip-base-patch16-224) architecture.
45
+ Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
46
+ documentation from [`PretrainedConfig`] for more information.
47
+ Args:
48
+ hidden_size (`int`, *optional*, defaults to 768):
49
+ Dimensionality of the encoder layers and the pooler layer.
50
+ intermediate_size (`int`, *optional*, defaults to 3072):
51
+ Dimensionality of the "intermediate" (i.e., feed-forward) layer in the Transformer encoder.
52
+ num_hidden_layers (`int`, *optional*, defaults to 12):
53
+ Number of hidden layers in the Transformer encoder.
54
+ num_attention_heads (`int`, *optional*, defaults to 12):
55
+ Number of attention heads for each attention layer in the Transformer encoder.
56
+ num_channels (`int`, *optional*, defaults to 3):
57
+ Number of channels in the input images.
58
+ image_size (`int`, *optional*, defaults to 224):
59
+ The size (resolution) of each image.
60
+ patch_size (`int`, *optional*, defaults to 16):
61
+ The size (resolution) of each patch.
62
+ hidden_act (`str` or `function`, *optional*, defaults to `"gelu_pytorch_tanh"`):
63
+ The non-linear activation function (function or string) in the encoder and pooler. If string, `"gelu"`,
64
+ `"relu"`, `"selu"` and `"gelu_new"` ``"quick_gelu"` are supported.
65
+ layer_norm_eps (`float`, *optional*, defaults to 1e-06):
66
+ The epsilon used by the layer normalization layers.
67
+ attention_dropout (`float`, *optional*, defaults to 0.0):
68
+ The dropout ratio for the attention probabilities.
69
+ Example:
70
+ ```python
71
+ >>> from transformers import SiglipVisionConfig, SiglipVisionModel
72
+ >>> # Initializing a SiglipVisionConfig with google/siglip-base-patch16-224 style configuration
73
+ >>> configuration = SiglipVisionConfig()
74
+ >>> # Initializing a SiglipVisionModel (with random weights) from the google/siglip-base-patch16-224 style configuration
75
+ >>> model = SiglipVisionModel(configuration)
76
+ >>> # Accessing the model configuration
77
+ >>> configuration = model.config
78
+ ```"""
79
+
80
+ model_type = "siglip_vision_model"
81
+
82
+ def __init__(
83
+ self,
84
+ hidden_size=768,
85
+ intermediate_size=3072,
86
+ num_hidden_layers=12,
87
+ num_attention_heads=12,
88
+ num_channels=3,
89
+ image_size=224,
90
+ patch_size=16,
91
+ hidden_act="gelu_pytorch_tanh",
92
+ layer_norm_eps=1e-6,
93
+ attention_dropout=0.0,
94
+ **kwargs,
95
+ ):
96
+ super().__init__(**kwargs)
97
+
98
+ self.hidden_size = hidden_size
99
+ self.intermediate_size = intermediate_size
100
+ self.num_hidden_layers = num_hidden_layers
101
+ self.num_attention_heads = num_attention_heads
102
+ self.num_channels = num_channels
103
+ self.patch_size = patch_size
104
+ self.image_size = image_size
105
+ self.attention_dropout = attention_dropout
106
+ self.layer_norm_eps = layer_norm_eps
107
+ self.hidden_act = hidden_act
108
+
109
+ _CHECKPOINT_FOR_DOC = "google/siglip-base-patch16-224"
110
+
111
+ SIGLIP_PRETRAINED_MODEL_ARCHIVE_LIST = [
112
+ "google/siglip-base-patch16-224",
113
+ # See all SigLIP models at https://huggingface.co/models?filter=siglip
114
+ ]
115
+
116
+ # Copied from transformers.models.llama.modeling_llama._get_unpad_data
117
+ def _get_unpad_data(attention_mask):
118
+ seqlens_in_batch = attention_mask.sum(dim=-1, dtype=torch.int32)
119
+ indices = torch.nonzero(attention_mask.flatten(), as_tuple=False).flatten()
120
+ max_seqlen_in_batch = seqlens_in_batch.max().item()
121
+ cu_seqlens = F.pad(torch.cumsum(seqlens_in_batch, dim=0, dtype=torch.int32), (1, 0))
122
+ return (
123
+ indices,
124
+ cu_seqlens,
125
+ max_seqlen_in_batch,
126
+ )
127
+
128
+
129
+ def _trunc_normal_(tensor, mean, std, a, b):
130
+ # Cut & paste from PyTorch official master until it's in a few official releases - RW
131
+ # Method based on https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf
132
+ def norm_cdf(x):
133
+ # Computes standard normal cumulative distribution function
134
+ return (1.0 + math.erf(x / math.sqrt(2.0))) / 2.0
135
+
136
+ if (mean < a - 2 * std) or (mean > b + 2 * std):
137
+ warnings.warn(
138
+ "mean is more than 2 std from [a, b] in nn.init.trunc_normal_. "
139
+ "The distribution of values may be incorrect.",
140
+ stacklevel=2,
141
+ )
142
+
143
+ # Values are generated by using a truncated uniform distribution and
144
+ # then using the inverse CDF for the normal distribution.
145
+ # Get upper and lower cdf values
146
+ l = norm_cdf((a - mean) / std)
147
+ u = norm_cdf((b - mean) / std)
148
+
149
+ # Uniformly fill tensor with values from [l, u], then translate to
150
+ # [2l-1, 2u-1].
151
+ tensor.uniform_(2 * l - 1, 2 * u - 1)
152
+
153
+ # Use inverse cdf transform for normal distribution to get truncated
154
+ # standard normal
155
+ if tensor.dtype in [torch.float16, torch.bfloat16]:
156
+ # The `erfinv_` op is not (yet?) defined in float16+cpu, bfloat16+gpu
157
+ og_dtype = tensor.dtype
158
+ tensor = tensor.to(torch.float32)
159
+ tensor.erfinv_()
160
+ tensor = tensor.to(og_dtype)
161
+ else:
162
+ tensor.erfinv_()
163
+
164
+ # Transform to proper mean, std
165
+ tensor.mul_(std * math.sqrt(2.0))
166
+ tensor.add_(mean)
167
+
168
+ # Clamp to ensure it's in the proper range
169
+ if tensor.dtype == torch.float16:
170
+ # The `clamp_` op is not (yet?) defined in float16+cpu
171
+ tensor = tensor.to(torch.float32)
172
+ tensor.clamp_(min=a, max=b)
173
+ tensor = tensor.to(torch.float16)
174
+ else:
175
+ tensor.clamp_(min=a, max=b)
176
+
177
+
178
+ def trunc_normal_tf_(
179
+ tensor: torch.Tensor, mean: float = 0.0, std: float = 1.0, a: float = -2.0, b: float = 2.0
180
+ ):
181
+ """Fills the input Tensor with values drawn from a truncated
182
+ normal distribution. The values are effectively drawn from the
183
+ normal distribution :math:`\\mathcal{N}(\text{mean}, \text{std}^2)`
184
+ with values outside :math:`[a, b]` redrawn until they are within
185
+ the bounds. The method used for generating the random values works
186
+ best when :math:`a \\leq \text{mean} \\leq b`.
187
+ NOTE: this 'tf' variant behaves closer to Tensorflow / JAX impl where the
188
+ bounds [a, b] are applied when sampling the normal distribution with mean=0, std=1.0
189
+ and the result is subsquently scaled and shifted by the mean and std args.
190
+ Args:
191
+ tensor: an n-dimensional `torch.Tensor`
192
+ mean: the mean of the normal distribution
193
+ std: the standard deviation of the normal distribution
194
+ a: the minimum cutoff value
195
+ b: the maximum cutoff value
196
+ """
197
+ with torch.no_grad():
198
+ _trunc_normal_(tensor, 0, 1.0, a, b)
199
+ tensor.mul_(std).add_(mean)
200
+
201
+
202
+ def variance_scaling_(tensor, scale=1.0, mode="fan_in", distribution="normal"):
203
+ fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor)
204
+ denom = fan_in
205
+ if mode == "fan_in":
206
+ denom = fan_in
207
+ elif mode == "fan_out":
208
+ denom = fan_out
209
+ elif mode == "fan_avg":
210
+ denom = (fan_in + fan_out) / 2
211
+
212
+ variance = scale / denom
213
+
214
+ if distribution == "truncated_normal":
215
+ # constant is stddev of standard normal truncated to (-2, 2)
216
+ trunc_normal_tf_(tensor, std=math.sqrt(variance) / 0.87962566103423978)
217
+ elif distribution == "normal":
218
+ with torch.no_grad():
219
+ tensor.normal_(std=math.sqrt(variance))
220
+ elif distribution == "uniform":
221
+ bound = math.sqrt(3 * variance)
222
+ with torch.no_grad():
223
+ tensor.uniform_(-bound, bound)
224
+ else:
225
+ raise ValueError(f"invalid distribution {distribution}")
226
+
227
+
228
+ def lecun_normal_(tensor):
229
+ variance_scaling_(tensor, mode="fan_in", distribution="truncated_normal")
230
+
231
+
232
+ def default_flax_embed_init(tensor):
233
+ variance_scaling_(tensor, mode="fan_in", distribution="normal")
234
+
235
+ class SiglipVisionEmbeddings(nn.Module):
236
+ def __init__(self, config: SiglipVisionConfig):
237
+ super().__init__()
238
+ self.config = config
239
+ self.embed_dim = config.hidden_size
240
+ self.image_size = config.image_size
241
+ self.patch_size = config.patch_size
242
+
243
+ self.patch_embedding = nn.Conv2d(
244
+ in_channels=config.num_channels,
245
+ out_channels=self.embed_dim,
246
+ kernel_size=self.patch_size,
247
+ stride=self.patch_size,
248
+ padding="valid",
249
+ )
250
+
251
+ self.num_patches_per_side = self.image_size // self.patch_size
252
+ self.num_patches = self.num_patches_per_side**2
253
+ self.num_positions = self.num_patches
254
+ self.position_embedding = nn.Embedding(self.num_positions, self.embed_dim)
255
+
256
+ class SiglipAttention(nn.Module):
257
+ """Multi-headed attention from 'Attention Is All You Need' paper"""
258
+
259
+ # Copied from transformers.models.clip.modeling_clip.CLIPAttention.__init__
260
+ def __init__(self, config):
261
+ super().__init__()
262
+ self.config = config
263
+ self.embed_dim = config.hidden_size
264
+ self.num_heads = config.num_attention_heads
265
+ self.head_dim = self.embed_dim // self.num_heads
266
+ if self.head_dim * self.num_heads != self.embed_dim:
267
+ raise ValueError(
268
+ f"embed_dim must be divisible by num_heads (got `embed_dim`: {self.embed_dim} and `num_heads`:"
269
+ f" {self.num_heads})."
270
+ )
271
+ self.scale = self.head_dim**-0.5
272
+ self.dropout = config.attention_dropout
273
+
274
+ self.k_proj = nn.Linear(self.embed_dim, self.embed_dim)
275
+ self.v_proj = nn.Linear(self.embed_dim, self.embed_dim)
276
+ self.q_proj = nn.Linear(self.embed_dim, self.embed_dim)
277
+ self.out_proj = nn.Linear(self.embed_dim, self.embed_dim)
278
+
279
+ # Copied from transformers.models.clip.modeling_clip.CLIPMLP with CLIP->Siglip
280
+ class SiglipMLP(nn.Module):
281
+ def __init__(self, config):
282
+ super().__init__()
283
+ self.config = config
284
+ self.activation_fn = ACT2FN[config.hidden_act]
285
+ self.fc1 = nn.Linear(config.hidden_size, config.intermediate_size)
286
+ self.fc2 = nn.Linear(config.intermediate_size, config.hidden_size)
287
+
288
+
289
+ # Copied from transformers.models.clip.modeling_clip.CLIPEncoderLayer with CLIP->Siglip
290
+ class SiglipEncoderLayer(nn.Module):
291
+ def __init__(self, config: SiglipVisionConfig):
292
+ super().__init__()
293
+ self.embed_dim = config.hidden_size
294
+ self._use_flash_attention_2 = config._attn_implementation == "flash_attention_2"
295
+ self.self_attn = (
296
+ SiglipAttention(config)
297
+ )
298
+ self.layer_norm1 = nn.LayerNorm(self.embed_dim, eps=config.layer_norm_eps)
299
+ self.mlp = SiglipMLP(config)
300
+ self.layer_norm2 = nn.LayerNorm(self.embed_dim, eps=config.layer_norm_eps)
301
+
302
+ class SiglipPreTrainedModel(PreTrainedModel):
303
+ """
304
+ An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
305
+ models.
306
+ """
307
+
308
+ config_class = SiglipVisionConfig
309
+ base_model_prefix = "siglip"
310
+ supports_gradient_checkpointing = True
311
+
312
+ def _init_weights(self, module):
313
+ """Initialize the weights"""
314
+
315
+ if isinstance(module, SiglipVisionEmbeddings):
316
+ width = self.config.hidden_size
317
+ nn.init.normal_(module.position_embedding.weight, std=1 / np.sqrt(width))
318
+ elif isinstance(module, nn.Embedding):
319
+ default_flax_embed_init(module.weight)
320
+ elif isinstance(module, SiglipAttention):
321
+ nn.init.normal_(module.q_proj.weight)
322
+ nn.init.normal_(module.k_proj.weight)
323
+ nn.init.normal_(module.v_proj.weight)
324
+ nn.init.normal_(module.out_proj.weight)
325
+ nn.init.zeros_(module.q_proj.bias)
326
+ nn.init.zeros_(module.k_proj.bias)
327
+ nn.init.zeros_(module.v_proj.bias)
328
+ nn.init.zeros_(module.out_proj.bias)
329
+ elif isinstance(module, SiglipMLP):
330
+ nn.init.normal_(module.fc1.weight)
331
+ nn.init.normal_(module.fc2.weight)
332
+ nn.init.normal_(module.fc1.bias, std=1e-6)
333
+ nn.init.normal_(module.fc2.bias, std=1e-6)
334
+ elif isinstance(module, (nn.Linear, nn.Conv2d)):
335
+ lecun_normal_(module.weight)
336
+ if module.bias is not None:
337
+ nn.init.zeros_(module.bias)
338
+ elif isinstance(module, nn.LayerNorm):
339
+ module.bias.data.zero_()
340
+ module.weight.data.fill_(1.0)
341
+
342
+
343
+ SIGLIP_START_DOCSTRING = r"""
344
+ This model inherits from [`PreTrainedModel`]. Check the superclass documentation for the generic methods the
345
+ library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads
346
+ etc.)
347
+ This model is also a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) subclass.
348
+ Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage
349
+ and behavior.
350
+ Parameters:
351
+ config ([`SiglipVisionConfig`]): Model configuration class with all the parameters of the model.
352
+ Initializing with a config file does not load the weights associated with the model, only the
353
+ configuration. Check out the [`~PreTrainedModel.from_pretrained`] method to load the model weights.
354
+ """
355
+
356
+
357
+ SIGLIP_VISION_INPUTS_DOCSTRING = r"""
358
+ Args:
359
+ pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
360
+ Pixel values. Padding will be ignored by default should you provide it. Pixel values can be obtained using
361
+ [`AutoImageProcessor`]. See [`CLIPImageProcessor.__call__`] for details.
362
+ output_attentions (`bool`, *optional*):
363
+ Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
364
+ tensors for more detail.
365
+ output_hidden_states (`bool`, *optional*):
366
+ Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
367
+ more detail.
368
+ return_dict (`bool`, *optional*):
369
+ Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
370
+ """
371
+
372
+
373
+ # Copied from transformers.models.clip.modeling_clip.CLIPEncoder with CLIP->Siglip
374
+ class SiglipEncoder(nn.Module):
375
+ """
376
+ Transformer encoder consisting of `config.num_hidden_layers` self attention layers. Each layer is a
377
+ [`SiglipEncoderLayer`].
378
+ Args:
379
+ config: SiglipConfig
380
+ """
381
+
382
+ def __init__(self, config: SiglipVisionConfig):
383
+ super().__init__()
384
+ self.config = config
385
+ self.layers = nn.ModuleList([SiglipEncoderLayer(config) for _ in range(config.num_hidden_layers)])
386
+ self.gradient_checkpointing = False
387
+
388
+ class SiglipVisionTransformer(SiglipPreTrainedModel):
389
+ config_class = SiglipVisionConfig
390
+ main_input_name = "pixel_values"
391
+ _supports_flash_attn_2 = True
392
+
393
+ def __init__(self, config: SiglipVisionConfig):
394
+ super().__init__(config)
395
+ self.config = config
396
+ embed_dim = config.hidden_size
397
+
398
+ self.embeddings = SiglipVisionEmbeddings(config)
399
+ self.encoder = SiglipEncoder(config)
400
+ self.post_layernorm = nn.LayerNorm(embed_dim, eps=config.layer_norm_eps)
401
+ self._use_flash_attention_2 = config._attn_implementation == "flash_attention_2"
402
+
403
+ # Initialize weights and apply final processing
404
+ self.post_init()
405
+
406
+ def get_input_embeddings(self) -> nn.Module:
407
+ return self.embeddings.patch_embedding
408
+
409
+ import argparse
410
+ import json
411
+ import re
412
+
413
+ import numpy as np
414
+ from gguf import *
415
+ from transformers.models.idefics2.modeling_idefics2 import Idefics2VisionTransformer
416
+ from transformers.models.idefics2.configuration_idefics2 import Idefics2VisionConfig
417
+
418
+ TEXT = "clip.text"
419
+ VISION = "clip.vision"
420
+
421
+
422
+ def add_key_str(raw_key: str, arch: str) -> str:
423
+ return raw_key.format(arch=arch)
424
+
425
+
426
+ def should_skip_tensor(name: str, has_text: bool, has_vision: bool, has_minicpmv: bool) -> bool:
427
+ if name in (
428
+ "logit_scale",
429
+ "text_model.embeddings.position_ids",
430
+ "vision_model.embeddings.position_ids",
431
+ ):
432
+ return True
433
+
434
+ if has_minicpmv and name in ["visual_projection.weight"]:
435
+ return True
436
+
437
+ if name.startswith("v") and not has_vision:
438
+ return True
439
+
440
+ if name.startswith("t") and not has_text:
441
+ return True
442
+
443
+ return False
444
+
445
+
446
+ def get_tensor_name(name: str) -> str:
447
+ if "projection" in name:
448
+ return name
449
+ if "mm_projector" in name:
450
+ name = name.replace("model.mm_projector", "mm")
451
+ name = re.sub(r'mm\.mlp\.mlp', 'mm.model.mlp', name, count=1)
452
+ name = re.sub(r'mm\.peg\.peg', 'mm.model.peg', name, count=1)
453
+ return name
454
+
455
+ return name.replace("text_model", "t").replace("vision_model", "v").replace("encoder.layers", "blk").replace("embeddings.", "").replace("_proj", "").replace("self_attn.", "attn_").replace("layer_norm", "ln").replace("layernorm", "ln").replace("mlp.fc1", "ffn_down").replace("mlp.fc2", "ffn_up").replace("embedding", "embd").replace("final", "post").replace("layrnorm", "ln")
456
+
457
+
458
+ def bytes_to_unicode():
459
+ """
460
+ Returns list of utf-8 byte and a corresponding list of unicode strings.
461
+ The reversible bpe codes work on unicode strings.
462
+ This means you need a large # of unicode characters in your vocab if you want to avoid UNKs.
463
+ When you're at something like a 10B token dataset you end up needing around 5K for decent coverage.
464
+ This is a significant percentage of your normal, say, 32K bpe vocab.
465
+ To avoid that, we want lookup tables between utf-8 bytes and unicode strings.
466
+ And avoids mapping to whitespace/control characters the bpe code barfs on.
467
+ """
468
+ bs = (
469
+ list(range(ord("!"), ord("~") + 1))
470
+ + list(range(ord("¡"), ord("¬") + 1))
471
+ + list(range(ord("®"), ord("ÿ") + 1))
472
+ )
473
+ cs = bs[:]
474
+ n = 0
475
+ for b in range(2**8):
476
+ if b not in bs:
477
+ bs.append(b)
478
+ cs.append(2**8 + n)
479
+ n += 1
480
+ cs = [chr(n) for n in cs]
481
+ return dict(zip(bs, cs))
482
+
483
+
484
+ ap = argparse.ArgumentParser()
485
+ ap.add_argument("-m", "--model-dir", help="Path to model directory cloned from HF Hub", required=True)
486
+ ap.add_argument("--use-f32", action="store_true", default=False, help="Use f32 instead of f16")
487
+ ap.add_argument("--text-only", action="store_true", required=False,
488
+ help="Save a text-only model. It can't be used to encode images")
489
+ ap.add_argument("--vision-only", action="store_true", required=False,
490
+ help="Save a vision-only model. It can't be used to encode texts")
491
+ ap.add_argument("--clip-model-is-vision", action="store_true", required=False,
492
+ help="The clip model is a pure vision model (ShareGPT4V vision extract for example)")
493
+ ap.add_argument("--clip-model-is-openclip", action="store_true", required=False,
494
+ help="The clip model is from openclip (for ViT-SO400M type))")
495
+ ap.add_argument("--minicpmv-projector", help="Path to minicpmv.projector file. If specified, save an image encoder for MiniCPM-V models.")
496
+ ap.add_argument("--projector-type", help="Type of projector. Possible values: mlp, ldp, ldpv2", choices=["mlp", "ldp", "ldpv2"], default="mlp")
497
+ ap.add_argument("-o", "--output-dir", help="Directory to save GGUF files. Default is the original model directory", default=None)
498
+ # Example --image_mean 0.48145466 0.4578275 0.40821073 --image_std 0.26862954 0.26130258 0.27577711
499
+ # Example --image_mean 0.5 0.5 0.5 --image_std 0.5 0.5 0.5
500
+ default_image_mean = [0.5, 0.5, 0.5]
501
+ default_image_std = [0.5, 0.5, 0.5]
502
+ ap.add_argument('--image-mean', type=float, nargs='+', help='Mean of the images for normalization (overrides processor) ', default=None)
503
+ ap.add_argument('--image-std', type=float, nargs='+', help='Standard deviation of the images for normalization (overrides processor)', default=None)
504
+ ap.add_argument('--minicpmv_version', type=int, help='minicpmv_version: MiniCPM-V-2 use 1; MiniCPM-V-2.5 use 2; MiniCPM-V-2.6 use 3; MiniCPM-o-2.6 use 4; MiniCPM-V 4.0 use 5; MiniCPM-o-4.0 use 6; MiniCPM-o-4.5 use 100045', default=2)
505
+
506
+ # with proper
507
+ args = ap.parse_args()
508
+
509
+
510
+ if args.text_only and args.vision_only:
511
+ print("--text-only and --image-only arguments cannot be specified at the same time.")
512
+ exit(1)
513
+
514
+ if args.use_f32:
515
+ print("WARNING: Weights for the convolution op is always saved in f16, as the convolution op in GGML does not support 32-bit kernel weights yet.")
516
+
517
+ # output in the same directory as the model if output_dir is None
518
+ dir_model = args.model_dir
519
+
520
+ # Read config.json to get actual model configuration
521
+ config_path = os.path.join(dir_model, "config.json")
522
+ model_config = {}
523
+ if os.path.isfile(config_path):
524
+ with open(config_path, "r", encoding="utf-8") as f:
525
+ model_config = json.load(f)
526
+ print(f"Loaded config from {config_path}")
527
+ else:
528
+ print(f"Warning: config.json not found at {config_path}")
529
+
530
+ # If minicpmv_projector is not specified but the default path exists, use the default path
531
+ if args.minicpmv_projector is None:
532
+ default_projector_path = os.path.join(dir_model, "minicpmv.projector")
533
+ if os.path.isfile(default_projector_path):
534
+ args.minicpmv_projector = default_projector_path
535
+ print(f"Found default projector file: {default_projector_path}")
536
+
537
+ # If output_dir is not specified, use model_dir as the default value
538
+ if args.output_dir is None:
539
+ args.output_dir = dir_model
540
+
541
+ if args.clip_model_is_vision or not os.path.exists(dir_model + "/vocab.json") or args.clip_model_is_openclip:
542
+ vocab = None
543
+ tokens = None
544
+ else:
545
+ with open(dir_model + "/vocab.json", "r", encoding="utf-8") as f:
546
+ vocab = json.load(f)
547
+ tokens = [key for key in vocab]
548
+
549
+ # possible data types
550
+ # ftype == 0 -> float32
551
+ # ftype == 1 -> float16
552
+ #
553
+ # map from ftype to string
554
+ ftype_str = ["f32", "f16"]
555
+
556
+ ftype = 1
557
+ if args.use_f32:
558
+ ftype = 0
559
+
560
+ # if args.clip_model_is_vision or args.clip_model_is_openclip:
561
+ # model = CLIPVisionModel.from_pretrained(dir_model)
562
+ # processor = None
563
+ # else:
564
+ # model = CLIPModel.from_pretrained(dir_model)
565
+ # processor = CLIPProcessor.from_pretrained(dir_model)
566
+
567
+ minicpmv_version = args.minicpmv_version
568
+
569
+ # Use actual config values instead of hardcoded ones
570
+ if model_config:
571
+ # For the projector/resampler, use the main model's hidden_size
572
+ emb_dim = model_config.get("hidden_size", 1536)
573
+
574
+ # For the vision model, use vision_config values
575
+ vision_config_dict = model_config.get("vision_config", {})
576
+ default_vision_config = {
577
+ "hidden_size": vision_config_dict.get("hidden_size", 1152),
578
+ "image_size": vision_config_dict.get("image_size", 980),
579
+ "intermediate_size": vision_config_dict.get("intermediate_size", 4304),
580
+ "model_type": vision_config_dict.get("model_type", "siglip"),
581
+ "num_attention_heads": vision_config_dict.get("num_attention_heads", 16),
582
+ "num_hidden_layers": vision_config_dict.get("num_hidden_layers", 27),
583
+ "patch_size": vision_config_dict.get("patch_size", 14),
584
+ }
585
+
586
+ # Use vision model's num_hidden_layers for block_count
587
+ block_count = vision_config_dict.get("num_hidden_layers", 27)
588
+
589
+ print(f"Using config values: emb_dim={emb_dim}, block_count={block_count}")
590
+ print(f"Vision config: {default_vision_config}")
591
+ else:
592
+ # Fallback to original hardcoded logic if config.json not found
593
+ emb_dim = 4096
594
+ block_count = 26
595
+ if minicpmv_version == 1:
596
+ emb_dim = 2304
597
+ block_count = 26
598
+ elif minicpmv_version == 2:
599
+ emb_dim = 4096
600
+ block_count = 27
601
+ elif minicpmv_version == 3:
602
+ emb_dim = 3584
603
+ block_count = 27
604
+ elif minicpmv_version == 4:
605
+ emb_dim = 3584
606
+ block_count = 27
607
+ elif minicpmv_version == 5:
608
+ emb_dim = 2560
609
+ block_count = 27
610
+ elif minicpmv_version == 6:
611
+ emb_dim = 4096
612
+ block_count = 27
613
+ elif minicpmv_version == 100045:
614
+ emb_dim = 4096
615
+ block_count = 27
616
+
617
+ default_vision_config = {
618
+ "hidden_size": 1152,
619
+ "image_size": 980,
620
+ "intermediate_size": 4304,
621
+ "model_type": "idefics2",
622
+ "num_attention_heads": 16,
623
+ "num_hidden_layers": 27,
624
+ "patch_size": 14,
625
+ }
626
+
627
+ vision_config = Idefics2VisionConfig(**default_vision_config)
628
+ model = Idefics2VisionTransformer(vision_config)
629
+ if minicpmv_version == 3 or (model_config and model_config.get("vision_config", {}).get("model_type") == "siglip"):
630
+ vision_config = SiglipVisionConfig(**default_vision_config)
631
+ model = SiglipVisionTransformer(vision_config)
632
+ elif minicpmv_version == 4:
633
+ vision_config = SiglipVisionConfig(**default_vision_config)
634
+ model = SiglipVisionTransformer(vision_config)
635
+ elif minicpmv_version == 5:
636
+ default_vision_config["model_type"] = "siglip_vision_model"
637
+ vision_config = SiglipVisionConfig(**default_vision_config)
638
+ model = SiglipVisionTransformer(vision_config)
639
+ elif minicpmv_version == 6:
640
+ default_vision_config["model_type"] = "siglip_vision_model"
641
+ vision_config = SiglipVisionConfig(**default_vision_config)
642
+ model = SiglipVisionTransformer(vision_config)
643
+ elif minicpmv_version == 100045:
644
+ default_vision_config["model_type"] = "siglip_vision_model"
645
+ vision_config = SiglipVisionConfig(**default_vision_config)
646
+ model = SiglipVisionTransformer(vision_config)
647
+
648
+ processor = None
649
+ # if model.attn_pool is not None:
650
+ # model.attn_pool = torch.nn.Identity()
651
+
652
+ # model.blocks = model.blocks[:-1]
653
+ model.load_state_dict(torch.load(os.path.join(dir_model, "minicpmv.clip")))
654
+
655
+ fname_middle = None
656
+ has_text_encoder = True
657
+ has_vision_encoder = True
658
+ has_minicpmv_projector = False
659
+
660
+ if args.text_only:
661
+ fname_middle = "text-"
662
+ has_vision_encoder = False
663
+ elif args.minicpmv_projector is not None:
664
+ fname_middle = "mmproj-"
665
+ has_text_encoder = False
666
+ has_minicpmv_projector = True
667
+ elif args.vision_only:
668
+ fname_middle = "vision-"
669
+ has_text_encoder = False
670
+ else:
671
+ fname_middle = ""
672
+
673
+ output_dir = args.output_dir
674
+ os.makedirs(output_dir, exist_ok=True)
675
+ output_prefix = os.path.basename(output_dir).replace("ggml_", "")
676
+ fname_out = os.path.join(output_dir, f"{fname_middle}model-{ftype_str[ftype]}.gguf")
677
+ fout = GGUFWriter(path=fname_out, arch="clip")
678
+
679
+ fout.add_bool("clip.has_text_encoder", has_text_encoder)
680
+ fout.add_bool("clip.has_vision_encoder", has_vision_encoder)
681
+ fout.add_bool("clip.has_minicpmv_projector", has_minicpmv_projector)
682
+ fout.add_file_type(ftype)
683
+ if args.text_only:
684
+ fout.add_description("text-only CLIP model")
685
+ elif args.vision_only and not has_minicpmv_projector:
686
+ fout.add_description("vision-only CLIP model")
687
+ elif has_minicpmv_projector:
688
+ fout.add_description("image encoder for MiniCPM-V")
689
+ # add projector type
690
+ fout.add_string("clip.projector_type", "resampler")
691
+ fout.add_int32("clip.minicpmv_version", minicpmv_version)
692
+ else:
693
+ fout.add_description("two-tower CLIP model")
694
+
695
+ if has_vision_encoder:
696
+ # vision_model hparams - use actual config values
697
+ vision_image_size = model_config.get("image_size", 448) if model_config else 448
698
+ vision_patch_size = default_vision_config.get("patch_size", 14)
699
+ vision_hidden_size = default_vision_config.get("hidden_size", 1152)
700
+ vision_intermediate_size = default_vision_config.get("intermediate_size", 4304)
701
+ vision_attention_heads = default_vision_config.get("num_attention_heads", 16)
702
+
703
+ fout.add_uint32("clip.vision.image_size", vision_image_size)
704
+ fout.add_uint32("clip.vision.patch_size", vision_patch_size)
705
+ fout.add_uint32(add_key_str(KEY_EMBEDDING_LENGTH, VISION), vision_hidden_size)
706
+ fout.add_uint32(add_key_str(KEY_FEED_FORWARD_LENGTH, VISION), vision_intermediate_size)
707
+ fout.add_uint32("clip.vision.projection_dim", 0)
708
+ fout.add_uint32(add_key_str(KEY_ATTENTION_HEAD_COUNT, VISION), vision_attention_heads)
709
+ fout.add_float32(add_key_str(KEY_ATTENTION_LAYERNORM_EPS, VISION), 1e-6)
710
+ fout.add_uint32(add_key_str(KEY_BLOCK_COUNT, VISION), block_count)
711
+
712
+ # Add MiniCPM-V specific parameters
713
+ query_num = model_config.get("query_num", 0) if model_config else 0
714
+ resampler_emb_dim = model_config.get("hidden_size", 0) if model_config else 0
715
+ fout.add_uint32("clip.minicpmv_query_num", query_num)
716
+
717
+ if processor is not None:
718
+ image_mean = processor.image_processor.image_mean if args.image_mean is None or args.image_mean == default_image_mean else args.image_mean
719
+ image_std = processor.image_processor.image_std if args.image_std is None or args.image_std == default_image_std else args.image_std
720
+ else:
721
+ image_mean = args.image_mean if args.image_mean is not None else default_image_mean
722
+ image_std = args.image_std if args.image_std is not None else default_image_std
723
+ fout.add_array("clip.vision.image_mean", image_mean)
724
+ fout.add_array("clip.vision.image_std", image_std)
725
+
726
+ use_gelu = True
727
+ fout.add_bool("clip.use_gelu", use_gelu)
728
+
729
+ def get_1d_sincos_pos_embed_from_grid(embed_dim, pos):
730
+ """
731
+ embed_dim: output dimension for each position
732
+ pos: a list of positions to be encoded: size (M,)
733
+ out: (M, D)
734
+ """
735
+ assert embed_dim % 2 == 0
736
+ omega = np.arange(embed_dim // 2, dtype=np.float32)
737
+ omega /= embed_dim / 2.
738
+ omega = 1. / 10000 ** omega # (D/2,)
739
+
740
+ pos = pos.reshape(-1) # (M,)
741
+ out = np.einsum('m,d->md', pos, omega) # (M, D/2), outer product
742
+
743
+ emb_sin = np.sin(out) # (M, D/2)
744
+ emb_cos = np.cos(out) # (M, D/2)
745
+
746
+ emb = np.concatenate([emb_sin, emb_cos], axis=1) # (M, D)
747
+ return emb
748
+
749
+ def get_2d_sincos_pos_embed_from_grid(embed_dim, grid):
750
+ assert embed_dim % 2 == 0
751
+
752
+ # use half of dimensions to encode grid_h
753
+ emb_h = get_1d_sincos_pos_embed_from_grid(embed_dim // 2, grid[0]) # (H*W, D/2)
754
+ emb_w = get_1d_sincos_pos_embed_from_grid(embed_dim // 2, grid[1]) # (H*W, D/2)
755
+
756
+ emb = np.concatenate([emb_h, emb_w], axis=1) # (H*W, D)
757
+ return emb
758
+
759
+
760
+ # https://github.com/facebookresearch/mae/blob/efb2a8062c206524e35e47d04501ed4f544c0ae8/util/pos_embed.py#L20
761
+ def get_2d_sincos_pos_embed(embed_dim, grid_size, cls_token=False):
762
+ """
763
+ grid_size: int of the grid height and width
764
+ return:
765
+ pos_embed: [grid_size*grid_size, embed_dim] or [1+grid_size*grid_size, embed_dim] (w/ or w/o cls_token)
766
+ """
767
+ if isinstance(grid_size, int):
768
+ grid_h_size, grid_w_size = grid_size, grid_size
769
+ else:
770
+ grid_h_size, grid_w_size = grid_size[0], grid_size[1]
771
+
772
+ grid_h = np.arange(grid_h_size, dtype=np.float32)
773
+ grid_w = np.arange(grid_w_size, dtype=np.float32)
774
+ grid = np.meshgrid(grid_w, grid_h) # here w goes first
775
+ grid = np.stack(grid, axis=0)
776
+
777
+ grid = grid.reshape([2, 1, grid_h_size, grid_w_size])
778
+ pos_embed = get_2d_sincos_pos_embed_from_grid(embed_dim, grid)
779
+ if cls_token:
780
+ pos_embed = np.concatenate([np.zeros([1, embed_dim]), pos_embed], axis=0)
781
+ return pos_embed
782
+
783
+ def _replace_name_resampler(s, v):
784
+ if re.match("resampler.pos_embed", s):
785
+ return {
786
+ s: v,
787
+ re.sub("pos_embed", "pos_embed_k", s): torch.from_numpy(get_2d_sincos_pos_embed(emb_dim, (70, 70))),
788
+ }
789
+ if re.match("resampler.proj", s):
790
+ return {
791
+ re.sub("proj", "pos_embed_k", s): torch.from_numpy(get_2d_sincos_pos_embed(emb_dim, (70, 70))),
792
+ re.sub("proj", "proj.weight", s): v.transpose(-1, -2).contiguous(),
793
+ }
794
+ if re.match("resampler.attn.in_proj_.*", s):
795
+ return {
796
+ re.sub("attn.in_proj_", "attn.q.", s): v.chunk(3, dim=0)[0],
797
+ re.sub("attn.in_proj_", "attn.k.", s): v.chunk(3, dim=0)[1],
798
+ re.sub("attn.in_proj_", "attn.v.", s): v.chunk(3, dim=0)[2],
799
+ }
800
+ return {s: v}
801
+
802
+ if has_minicpmv_projector:
803
+ projector = torch.load(args.minicpmv_projector)
804
+ new_state_dict = {}
805
+ for k, v in projector.items():
806
+ kvs = _replace_name_resampler(k, v)
807
+ for nk, nv in kvs.items():
808
+ new_state_dict[nk] = nv
809
+ projector = new_state_dict
810
+ ftype_cur = 0
811
+ for name, data in projector.items():
812
+ name = get_tensor_name(name)
813
+ data = data.squeeze().numpy()
814
+
815
+ n_dims = len(data.shape)
816
+ if ftype == 1:
817
+ if name[-7:] == ".weight" and n_dims == 2:
818
+ print(" Converting to float16")
819
+ data = data.astype(np.float16)
820
+ ftype_cur = 1
821
+ else:
822
+ print(" Converting to float32")
823
+ data = data.astype(np.float32)
824
+ ftype_cur = 0
825
+ else:
826
+ if data.dtype != np.float32:
827
+ print(" Converting to float32")
828
+ data = data.astype(np.float32)
829
+ ftype_cur = 0
830
+
831
+ fout.add_tensor(name, data)
832
+ print(f"{name} - {ftype_str[ftype_cur]} - shape = {data.shape}")
833
+
834
+ print("Projector tensors added\n")
835
+
836
+ def _replace_name(s, v):
837
+ s = "vision_model." + s
838
+ if re.match("vision_model.embeddings.position_embedding", s):
839
+ v = v.unsqueeze(0)
840
+ return {s: v}
841
+
842
+ return {s: v}
843
+
844
+ state_dict = model.state_dict()
845
+ new_state_dict = {}
846
+ for k, v in state_dict.items():
847
+ kvs = _replace_name(k, v)
848
+ for nk, nv in kvs.items():
849
+ new_state_dict[nk] = nv
850
+ state_dict = new_state_dict
851
+ for name, data in state_dict.items():
852
+ if should_skip_tensor(name, has_text_encoder, has_vision_encoder, has_minicpmv_projector):
853
+ # we don't need this
854
+ print(f"skipping parameter: {name}")
855
+ continue
856
+
857
+ name = get_tensor_name(name)
858
+ data = data.squeeze().numpy()
859
+
860
+ n_dims = len(data.shape)
861
+
862
+ # ftype == 0 -> float32, ftype == 1 -> float16
863
+ ftype_cur = 0
864
+ if n_dims == 4:
865
+ print(f"tensor {name} is always saved in f16")
866
+ data = data.astype(np.float16)
867
+ ftype_cur = 1
868
+ elif ftype == 1:
869
+ if name[-7:] == ".weight" and n_dims == 2:
870
+ print(" Converting to float16")
871
+ data = data.astype(np.float16)
872
+ ftype_cur = 1
873
+ else:
874
+ print(" Converting to float32")
875
+ data = data.astype(np.float32)
876
+ ftype_cur = 0
877
+ else:
878
+ if data.dtype != np.float32:
879
+ print(" Converting to float32")
880
+ data = data.astype(np.float32)
881
+ ftype_cur = 0
882
+
883
+ print(f"{name} - {ftype_str[ftype_cur]} - shape = {data.shape}")
884
+ fout.add_tensor(name, data)
885
+
886
+
887
+ fout.write_header_to_file()
888
+ fout.write_kv_data_to_file()
889
+ fout.write_tensors_to_file()
890
+ fout.close()
891
+
892
+ print("Done. Output file: " + fname_out)