local-llm-rn 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (626) hide show
  1. package/cpp/CMakeLists.txt +285 -0
  2. package/cpp/common/CMakeLists.txt +149 -0
  3. package/cpp/common/arg.cpp +3799 -0
  4. package/cpp/common/arg.h +131 -0
  5. package/cpp/common/base64.hpp +392 -0
  6. package/cpp/common/build-info.cpp.in +4 -0
  7. package/cpp/common/chat-parser-xml-toolcall.cpp +879 -0
  8. package/cpp/common/chat-parser-xml-toolcall.h +45 -0
  9. package/cpp/common/chat-parser.cpp +1649 -0
  10. package/cpp/common/chat-parser.h +133 -0
  11. package/cpp/common/chat-peg-parser.cpp +124 -0
  12. package/cpp/common/chat-peg-parser.h +105 -0
  13. package/cpp/common/chat.cpp +3355 -0
  14. package/cpp/common/chat.h +252 -0
  15. package/cpp/common/common.cpp +1824 -0
  16. package/cpp/common/common.h +930 -0
  17. package/cpp/common/console.cpp +1137 -0
  18. package/cpp/common/console.h +41 -0
  19. package/cpp/common/debug.cpp +167 -0
  20. package/cpp/common/debug.h +43 -0
  21. package/cpp/common/download.cpp +792 -0
  22. package/cpp/common/download.h +84 -0
  23. package/cpp/common/http.h +84 -0
  24. package/cpp/common/jinja/README.md +88 -0
  25. package/cpp/common/jinja/caps.cpp +285 -0
  26. package/cpp/common/jinja/caps.h +30 -0
  27. package/cpp/common/jinja/lexer.cpp +341 -0
  28. package/cpp/common/jinja/lexer.h +157 -0
  29. package/cpp/common/jinja/parser.cpp +591 -0
  30. package/cpp/common/jinja/parser.h +21 -0
  31. package/cpp/common/jinja/runtime.cpp +867 -0
  32. package/cpp/common/jinja/runtime.h +638 -0
  33. package/cpp/common/jinja/string.cpp +213 -0
  34. package/cpp/common/jinja/string.h +61 -0
  35. package/cpp/common/jinja/utils.h +149 -0
  36. package/cpp/common/jinja/value.cpp +1393 -0
  37. package/cpp/common/jinja/value.h +756 -0
  38. package/cpp/common/json-partial.cpp +324 -0
  39. package/cpp/common/json-partial.h +39 -0
  40. package/cpp/common/json-schema-to-grammar.cpp +1153 -0
  41. package/cpp/common/json-schema-to-grammar.h +43 -0
  42. package/cpp/common/llguidance.cpp +258 -0
  43. package/cpp/common/log.cpp +446 -0
  44. package/cpp/common/log.h +119 -0
  45. package/cpp/common/ngram-cache.cpp +285 -0
  46. package/cpp/common/ngram-cache.h +101 -0
  47. package/cpp/common/ngram-map.cpp +530 -0
  48. package/cpp/common/ngram-map.h +115 -0
  49. package/cpp/common/ngram-mod.cpp +60 -0
  50. package/cpp/common/ngram-mod.h +38 -0
  51. package/cpp/common/peg-parser.cpp +1712 -0
  52. package/cpp/common/peg-parser.h +459 -0
  53. package/cpp/common/preset.cpp +483 -0
  54. package/cpp/common/preset.h +83 -0
  55. package/cpp/common/regex-partial.cpp +204 -0
  56. package/cpp/common/regex-partial.h +56 -0
  57. package/cpp/common/sampling.cpp +745 -0
  58. package/cpp/common/sampling.h +119 -0
  59. package/cpp/common/speculative.cpp +1074 -0
  60. package/cpp/common/speculative.h +41 -0
  61. package/cpp/common/unicode.cpp +64 -0
  62. package/cpp/common/unicode.h +22 -0
  63. package/cpp/ggml/CMakeLists.txt +494 -0
  64. package/cpp/ggml/cmake/GitVars.cmake +22 -0
  65. package/cpp/ggml/cmake/common.cmake +50 -0
  66. package/cpp/ggml/cmake/ggml-config.cmake.in +191 -0
  67. package/cpp/ggml/include/ggml-alloc.h +85 -0
  68. package/cpp/ggml/include/ggml-backend.h +373 -0
  69. package/cpp/ggml/include/ggml-blas.h +25 -0
  70. package/cpp/ggml/include/ggml-cann.h +123 -0
  71. package/cpp/ggml/include/ggml-cpp.h +39 -0
  72. package/cpp/ggml/include/ggml-cpu.h +151 -0
  73. package/cpp/ggml/include/ggml-cuda.h +47 -0
  74. package/cpp/ggml/include/ggml-hexagon.h +19 -0
  75. package/cpp/ggml/include/ggml-metal.h +61 -0
  76. package/cpp/ggml/include/ggml-opencl.h +26 -0
  77. package/cpp/ggml/include/ggml-opt.h +256 -0
  78. package/cpp/ggml/include/ggml-rpc.h +30 -0
  79. package/cpp/ggml/include/ggml-sycl.h +49 -0
  80. package/cpp/ggml/include/ggml-virtgpu.h +14 -0
  81. package/cpp/ggml/include/ggml-vulkan.h +29 -0
  82. package/cpp/ggml/include/ggml-webgpu.h +19 -0
  83. package/cpp/ggml/include/ggml-zdnn.h +17 -0
  84. package/cpp/ggml/include/ggml-zendnn.h +22 -0
  85. package/cpp/ggml/include/ggml.h +2753 -0
  86. package/cpp/ggml/include/gguf.h +204 -0
  87. package/cpp/ggml/src/CMakeLists.txt +492 -0
  88. package/cpp/ggml/src/ggml-alloc.c +1244 -0
  89. package/cpp/ggml/src/ggml-backend-dl.cpp +48 -0
  90. package/cpp/ggml/src/ggml-backend-dl.h +45 -0
  91. package/cpp/ggml/src/ggml-backend-impl.h +255 -0
  92. package/cpp/ggml/src/ggml-backend-reg.cpp +566 -0
  93. package/cpp/ggml/src/ggml-backend.cpp +2270 -0
  94. package/cpp/ggml/src/ggml-blas/CMakeLists.txt +101 -0
  95. package/cpp/ggml/src/ggml-blas/ggml-blas.cpp +518 -0
  96. package/cpp/ggml/src/ggml-common.h +1878 -0
  97. package/cpp/ggml/src/ggml-cpu/CMakeLists.txt +691 -0
  98. package/cpp/ggml/src/ggml-cpu/amx/amx.cpp +247 -0
  99. package/cpp/ggml/src/ggml-cpu/amx/amx.h +8 -0
  100. package/cpp/ggml/src/ggml-cpu/amx/common.h +91 -0
  101. package/cpp/ggml/src/ggml-cpu/amx/mmq.cpp +2512 -0
  102. package/cpp/ggml/src/ggml-cpu/amx/mmq.h +10 -0
  103. package/cpp/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +98 -0
  104. package/cpp/ggml/src/ggml-cpu/arch/arm/quants.c +4052 -0
  105. package/cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +4935 -0
  106. package/cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +2159 -0
  107. package/cpp/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
  108. package/cpp/ggml/src/ggml-cpu/arch/powerpc/quants.c +2305 -0
  109. package/cpp/ggml/src/ggml-cpu/arch/riscv/cpu-feats.cpp +38 -0
  110. package/cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +2726 -0
  111. package/cpp/ggml/src/ggml-cpu/arch/riscv/repack.cpp +342 -0
  112. package/cpp/ggml/src/ggml-cpu/arch/s390/cpu-feats.cpp +50 -0
  113. package/cpp/ggml/src/ggml-cpu/arch/s390/quants.c +1468 -0
  114. package/cpp/ggml/src/ggml-cpu/arch/wasm/quants.c +1221 -0
  115. package/cpp/ggml/src/ggml-cpu/arch/x86/cpu-feats.cpp +327 -0
  116. package/cpp/ggml/src/ggml-cpu/arch/x86/quants.c +3820 -0
  117. package/cpp/ggml/src/ggml-cpu/arch/x86/repack.cpp +6307 -0
  118. package/cpp/ggml/src/ggml-cpu/arch-fallback.h +313 -0
  119. package/cpp/ggml/src/ggml-cpu/binary-ops.cpp +154 -0
  120. package/cpp/ggml/src/ggml-cpu/binary-ops.h +16 -0
  121. package/cpp/ggml/src/ggml-cpu/cmake/FindSIMD.cmake +100 -0
  122. package/cpp/ggml/src/ggml-cpu/common.h +95 -0
  123. package/cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +529 -0
  124. package/cpp/ggml/src/ggml-cpu/ggml-cpu.c +3734 -0
  125. package/cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +701 -0
  126. package/cpp/ggml/src/ggml-cpu/hbm.cpp +55 -0
  127. package/cpp/ggml/src/ggml-cpu/hbm.h +8 -0
  128. package/cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +938 -0
  129. package/cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +90 -0
  130. package/cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +798 -0
  131. package/cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.h +17 -0
  132. package/cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +4033 -0
  133. package/cpp/ggml/src/ggml-cpu/llamafile/sgemm.h +25 -0
  134. package/cpp/ggml/src/ggml-cpu/ops.cpp +10978 -0
  135. package/cpp/ggml/src/ggml-cpu/ops.h +116 -0
  136. package/cpp/ggml/src/ggml-cpu/quants.c +1193 -0
  137. package/cpp/ggml/src/ggml-cpu/quants.h +97 -0
  138. package/cpp/ggml/src/ggml-cpu/repack.cpp +3316 -0
  139. package/cpp/ggml/src/ggml-cpu/repack.h +173 -0
  140. package/cpp/ggml/src/ggml-cpu/simd-gemm.h +136 -0
  141. package/cpp/ggml/src/ggml-cpu/simd-mappings.h +1279 -0
  142. package/cpp/ggml/src/ggml-cpu/spacemit/ime.cpp +1025 -0
  143. package/cpp/ggml/src/ggml-cpu/spacemit/ime.h +13 -0
  144. package/cpp/ggml/src/ggml-cpu/spacemit/ime1_kernels.cpp +3196 -0
  145. package/cpp/ggml/src/ggml-cpu/spacemit/ime_kernels.h +26 -0
  146. package/cpp/ggml/src/ggml-cpu/traits.cpp +36 -0
  147. package/cpp/ggml/src/ggml-cpu/traits.h +38 -0
  148. package/cpp/ggml/src/ggml-cpu/unary-ops.cpp +337 -0
  149. package/cpp/ggml/src/ggml-cpu/unary-ops.h +35 -0
  150. package/cpp/ggml/src/ggml-cpu/vec.cpp +629 -0
  151. package/cpp/ggml/src/ggml-cpu/vec.h +1585 -0
  152. package/cpp/ggml/src/ggml-hexagon/CMakeLists.txt +117 -0
  153. package/cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp +3232 -0
  154. package/cpp/ggml/src/ggml-hexagon/htp/CMakeLists.txt +45 -0
  155. package/cpp/ggml/src/ggml-hexagon/htp/act-ops.c +815 -0
  156. package/cpp/ggml/src/ggml-hexagon/htp/argsort-ops.c +281 -0
  157. package/cpp/ggml/src/ggml-hexagon/htp/binary-ops.c +827 -0
  158. package/cpp/ggml/src/ggml-hexagon/htp/cmake-toolchain.cmake +157 -0
  159. package/cpp/ggml/src/ggml-hexagon/htp/cpy-ops.c +251 -0
  160. package/cpp/ggml/src/ggml-hexagon/htp/flash-attn-ops.c +666 -0
  161. package/cpp/ggml/src/ggml-hexagon/htp/get-rows-ops.c +111 -0
  162. package/cpp/ggml/src/ggml-hexagon/htp/hex-dma.c +63 -0
  163. package/cpp/ggml/src/ggml-hexagon/htp/hex-dma.h +182 -0
  164. package/cpp/ggml/src/ggml-hexagon/htp/hex-dump.h +77 -0
  165. package/cpp/ggml/src/ggml-hexagon/htp/hex-fastdiv.h +37 -0
  166. package/cpp/ggml/src/ggml-hexagon/htp/hex-utils.h +51 -0
  167. package/cpp/ggml/src/ggml-hexagon/htp/htp-ctx.h +35 -0
  168. package/cpp/ggml/src/ggml-hexagon/htp/htp-msg.h +154 -0
  169. package/cpp/ggml/src/ggml-hexagon/htp/htp-ops.h +65 -0
  170. package/cpp/ggml/src/ggml-hexagon/htp/htp_iface.idl +16 -0
  171. package/cpp/ggml/src/ggml-hexagon/htp/hvx-arith.h +470 -0
  172. package/cpp/ggml/src/ggml-hexagon/htp/hvx-base.h +173 -0
  173. package/cpp/ggml/src/ggml-hexagon/htp/hvx-copy.h +245 -0
  174. package/cpp/ggml/src/ggml-hexagon/htp/hvx-div.h +116 -0
  175. package/cpp/ggml/src/ggml-hexagon/htp/hvx-dump.h +129 -0
  176. package/cpp/ggml/src/ggml-hexagon/htp/hvx-exp.h +215 -0
  177. package/cpp/ggml/src/ggml-hexagon/htp/hvx-floor.h +100 -0
  178. package/cpp/ggml/src/ggml-hexagon/htp/hvx-inverse.h +176 -0
  179. package/cpp/ggml/src/ggml-hexagon/htp/hvx-reduce.h +266 -0
  180. package/cpp/ggml/src/ggml-hexagon/htp/hvx-scale.h +133 -0
  181. package/cpp/ggml/src/ggml-hexagon/htp/hvx-sigmoid.h +141 -0
  182. package/cpp/ggml/src/ggml-hexagon/htp/hvx-sqrt.h +126 -0
  183. package/cpp/ggml/src/ggml-hexagon/htp/hvx-types.h +36 -0
  184. package/cpp/ggml/src/ggml-hexagon/htp/hvx-utils.h +18 -0
  185. package/cpp/ggml/src/ggml-hexagon/htp/main.c +1150 -0
  186. package/cpp/ggml/src/ggml-hexagon/htp/matmul-ops.c +2595 -0
  187. package/cpp/ggml/src/ggml-hexagon/htp/rope-ops.c +498 -0
  188. package/cpp/ggml/src/ggml-hexagon/htp/set-rows-ops.c +167 -0
  189. package/cpp/ggml/src/ggml-hexagon/htp/softmax-ops.c +421 -0
  190. package/cpp/ggml/src/ggml-hexagon/htp/sum-rows-ops.c +130 -0
  191. package/cpp/ggml/src/ggml-hexagon/htp/unary-ops.c +384 -0
  192. package/cpp/ggml/src/ggml-hexagon/htp/worker-pool.c +293 -0
  193. package/cpp/ggml/src/ggml-hexagon/htp/worker-pool.h +57 -0
  194. package/cpp/ggml/src/ggml-hexagon/htp-drv.cpp +418 -0
  195. package/cpp/ggml/src/ggml-hexagon/htp-drv.h +121 -0
  196. package/cpp/ggml/src/ggml-hexagon/libdl.h +79 -0
  197. package/cpp/ggml/src/ggml-hexagon/libggml-htp.inf +38 -0
  198. package/cpp/ggml/src/ggml-hexagon/op-desc.h +153 -0
  199. package/cpp/ggml/src/ggml-impl.h +724 -0
  200. package/cpp/ggml/src/ggml-metal/CMakeLists.txt +124 -0
  201. package/cpp/ggml/src/ggml-metal/ggml-metal-common.cpp +457 -0
  202. package/cpp/ggml/src/ggml-metal/ggml-metal-common.h +52 -0
  203. package/cpp/ggml/src/ggml-metal/ggml-metal-context.h +41 -0
  204. package/cpp/ggml/src/ggml-metal/ggml-metal-context.m +702 -0
  205. package/cpp/ggml/src/ggml-metal/ggml-metal-device.cpp +1890 -0
  206. package/cpp/ggml/src/ggml-metal/ggml-metal-device.h +290 -0
  207. package/cpp/ggml/src/ggml-metal/ggml-metal-device.m +1749 -0
  208. package/cpp/ggml/src/ggml-metal/ggml-metal-impl.h +1054 -0
  209. package/cpp/ggml/src/ggml-metal/ggml-metal-ops.cpp +4370 -0
  210. package/cpp/ggml/src/ggml-metal/ggml-metal-ops.h +94 -0
  211. package/cpp/ggml/src/ggml-metal/ggml-metal.cpp +937 -0
  212. package/cpp/ggml/src/ggml-metal/ggml-metal.metal +9819 -0
  213. package/cpp/ggml/src/ggml-musa/CMakeLists.txt +125 -0
  214. package/cpp/ggml/src/ggml-musa/mudnn.cu +112 -0
  215. package/cpp/ggml/src/ggml-musa/mudnn.cuh +12 -0
  216. package/cpp/ggml/src/ggml-opencl/CMakeLists.txt +150 -0
  217. package/cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +11553 -0
  218. package/cpp/ggml/src/ggml-opencl/kernels/add.cl +190 -0
  219. package/cpp/ggml/src/ggml-opencl/kernels/add_id.cl +42 -0
  220. package/cpp/ggml/src/ggml-opencl/kernels/argsort.cl +86 -0
  221. package/cpp/ggml/src/ggml-opencl/kernels/clamp.cl +20 -0
  222. package/cpp/ggml/src/ggml-opencl/kernels/concat.cl +51 -0
  223. package/cpp/ggml/src/ggml-opencl/kernels/conv2d.cl +185 -0
  224. package/cpp/ggml/src/ggml-opencl/kernels/conv2d_f16_f32.cl +176 -0
  225. package/cpp/ggml/src/ggml-opencl/kernels/cpy.cl +184 -0
  226. package/cpp/ggml/src/ggml-opencl/kernels/cvt.cl +417 -0
  227. package/cpp/ggml/src/ggml-opencl/kernels/diag_mask_inf.cl +58 -0
  228. package/cpp/ggml/src/ggml-opencl/kernels/div.cl +138 -0
  229. package/cpp/ggml/src/ggml-opencl/kernels/embed_kernel.py +26 -0
  230. package/cpp/ggml/src/ggml-opencl/kernels/expm1.cl +113 -0
  231. package/cpp/ggml/src/ggml-opencl/kernels/fill.cl +17 -0
  232. package/cpp/ggml/src/ggml-opencl/kernels/flash_attn_f16.cl +370 -0
  233. package/cpp/ggml/src/ggml-opencl/kernels/flash_attn_f32.cl +371 -0
  234. package/cpp/ggml/src/ggml-opencl/kernels/flash_attn_f32_f16.cl +373 -0
  235. package/cpp/ggml/src/ggml-opencl/kernels/gelu.cl +89 -0
  236. package/cpp/ggml/src/ggml-opencl/kernels/gemm_moe_mxfp4_f32.cl +162 -0
  237. package/cpp/ggml/src/ggml-opencl/kernels/gemv_moe_mxfp4_f32.cl +156 -0
  238. package/cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle.cl +268 -0
  239. package/cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general.cl +274 -0
  240. package/cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general_q8_0_f32.cl +195 -0
  241. package/cpp/ggml/src/ggml-opencl/kernels/get_rows.cl +187 -0
  242. package/cpp/ggml/src/ggml-opencl/kernels/glu.cl +378 -0
  243. package/cpp/ggml/src/ggml-opencl/kernels/group_norm.cl +121 -0
  244. package/cpp/ggml/src/ggml-opencl/kernels/im2col_f16.cl +57 -0
  245. package/cpp/ggml/src/ggml-opencl/kernels/im2col_f32.cl +57 -0
  246. package/cpp/ggml/src/ggml-opencl/kernels/mean.cl +140 -0
  247. package/cpp/ggml/src/ggml-opencl/kernels/mul.cl +152 -0
  248. package/cpp/ggml/src/ggml-opencl/kernels/mul_mat_Ab_Bi_8x4.cl +139 -0
  249. package/cpp/ggml/src/ggml-opencl/kernels/mul_mat_f16_f32.cl +130 -0
  250. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_kq_kqv.cl +273 -0
  251. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_l4_lm.cl +146 -0
  252. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_f32_f32_l4_lm.cl +147 -0
  253. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q4_0_f32_l4_lm.cl +163 -0
  254. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q4_1_f32_l4_lm.cl +165 -0
  255. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q6_k_f32_l4_lm.cl +158 -0
  256. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_8x4.cl +129 -0
  257. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_l4_lm.cl +154 -0
  258. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f16.cl +118 -0
  259. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32.cl +118 -0
  260. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_1row.cl +94 -0
  261. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_l4.cl +84 -0
  262. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f32_f32.cl +118 -0
  263. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32.cl +189 -0
  264. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32_flat.cl +176 -0
  265. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q4_0_f32_8x_flat.cl +283 -0
  266. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32.cl +140 -0
  267. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32_flat.cl +222 -0
  268. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32.cl +144 -0
  269. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32_flat.cl +167 -0
  270. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32.cl +192 -0
  271. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_16x_flat.cl +307 -0
  272. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_8x_flat.cl +265 -0
  273. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_8x_flat.cl +272 -0
  274. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_v.cl +254 -0
  275. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32.cl +219 -0
  276. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32_flat.cl +229 -0
  277. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_k_f32.cl +180 -0
  278. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q6_k_f32.cl +194 -0
  279. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q6_k_f32_flat.cl +194 -0
  280. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32.cl +125 -0
  281. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32_flat.cl +202 -0
  282. package/cpp/ggml/src/ggml-opencl/kernels/norm.cl +161 -0
  283. package/cpp/ggml/src/ggml-opencl/kernels/pad.cl +39 -0
  284. package/cpp/ggml/src/ggml-opencl/kernels/relu.cl +16 -0
  285. package/cpp/ggml/src/ggml-opencl/kernels/repeat.cl +38 -0
  286. package/cpp/ggml/src/ggml-opencl/kernels/rms_norm.cl +190 -0
  287. package/cpp/ggml/src/ggml-opencl/kernels/rope.cl +747 -0
  288. package/cpp/ggml/src/ggml-opencl/kernels/scale.cl +27 -0
  289. package/cpp/ggml/src/ggml-opencl/kernels/set_rows.cl +208 -0
  290. package/cpp/ggml/src/ggml-opencl/kernels/sigmoid.cl +29 -0
  291. package/cpp/ggml/src/ggml-opencl/kernels/silu.cl +30 -0
  292. package/cpp/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +108 -0
  293. package/cpp/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +108 -0
  294. package/cpp/ggml/src/ggml-opencl/kernels/softmax_f16.cl +107 -0
  295. package/cpp/ggml/src/ggml-opencl/kernels/softmax_f32.cl +107 -0
  296. package/cpp/ggml/src/ggml-opencl/kernels/softplus.cl +116 -0
  297. package/cpp/ggml/src/ggml-opencl/kernels/solve_tri.cl +51 -0
  298. package/cpp/ggml/src/ggml-opencl/kernels/sqr.cl +53 -0
  299. package/cpp/ggml/src/ggml-opencl/kernels/sqrt.cl +53 -0
  300. package/cpp/ggml/src/ggml-opencl/kernels/ssm_conv.cl +77 -0
  301. package/cpp/ggml/src/ggml-opencl/kernels/sub.cl +138 -0
  302. package/cpp/ggml/src/ggml-opencl/kernels/sum_rows.cl +140 -0
  303. package/cpp/ggml/src/ggml-opencl/kernels/tanh.cl +109 -0
  304. package/cpp/ggml/src/ggml-opencl/kernels/transpose.cl +117 -0
  305. package/cpp/ggml/src/ggml-opencl/kernels/tri.cl +32 -0
  306. package/cpp/ggml/src/ggml-opencl/kernels/tsembd.cl +48 -0
  307. package/cpp/ggml/src/ggml-opencl/kernels/upscale.cl +120 -0
  308. package/cpp/ggml/src/ggml-opt.cpp +1093 -0
  309. package/cpp/ggml/src/ggml-quants.c +5325 -0
  310. package/cpp/ggml/src/ggml-quants.h +106 -0
  311. package/cpp/ggml/src/ggml-rpc/CMakeLists.txt +9 -0
  312. package/cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +2118 -0
  313. package/cpp/ggml/src/ggml-threading.cpp +12 -0
  314. package/cpp/ggml/src/ggml-threading.h +14 -0
  315. package/cpp/ggml/src/ggml-virtgpu/CMakeLists.txt +70 -0
  316. package/cpp/ggml/src/ggml-virtgpu/apir_cs_ggml-rpc-front.cpp +87 -0
  317. package/cpp/ggml/src/ggml-virtgpu/backend/CMakeLists.txt +21 -0
  318. package/cpp/ggml/src/ggml-virtgpu/backend/apir_cs_ggml-rpc-back.cpp +115 -0
  319. package/cpp/ggml/src/ggml-virtgpu/backend/backend-convert.h +13 -0
  320. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-backend.cpp +102 -0
  321. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer-type.cpp +105 -0
  322. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer.cpp +179 -0
  323. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-device.cpp +148 -0
  324. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched.cpp +51 -0
  325. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched.gen.h +73 -0
  326. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched.h +27 -0
  327. package/cpp/ggml/src/ggml-virtgpu/backend/backend-virgl-apir.h +32 -0
  328. package/cpp/ggml/src/ggml-virtgpu/backend/backend.cpp +144 -0
  329. package/cpp/ggml/src/ggml-virtgpu/backend/shared/api_remoting.h +95 -0
  330. package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_backend.gen.h +94 -0
  331. package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_backend.h +50 -0
  332. package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_cs.h +378 -0
  333. package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h +232 -0
  334. package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_cs_rpc.h +58 -0
  335. package/cpp/ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp +81 -0
  336. package/cpp/ggml/src/ggml-virtgpu/ggml-backend-buffer.cpp +119 -0
  337. package/cpp/ggml/src/ggml-virtgpu/ggml-backend-device.cpp +158 -0
  338. package/cpp/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp +213 -0
  339. package/cpp/ggml/src/ggml-virtgpu/ggml-backend.cpp +69 -0
  340. package/cpp/ggml/src/ggml-virtgpu/ggml-remoting.h +71 -0
  341. package/cpp/ggml/src/ggml-virtgpu/ggmlremoting_functions.yaml +166 -0
  342. package/cpp/ggml/src/ggml-virtgpu/include/apir_hw.h +9 -0
  343. package/cpp/ggml/src/ggml-virtgpu/regenerate_remoting.py +333 -0
  344. package/cpp/ggml/src/ggml-virtgpu/virtgpu-apir.h +15 -0
  345. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-backend.cpp +58 -0
  346. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp +110 -0
  347. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-buffer.cpp +173 -0
  348. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-device.cpp +192 -0
  349. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-impl.h +36 -0
  350. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward.gen.h +53 -0
  351. package/cpp/ggml/src/ggml-virtgpu/virtgpu-shm.cpp +98 -0
  352. package/cpp/ggml/src/ggml-virtgpu/virtgpu-shm.h +23 -0
  353. package/cpp/ggml/src/ggml-virtgpu/virtgpu-utils.cpp +179 -0
  354. package/cpp/ggml/src/ggml-virtgpu/virtgpu-utils.h +86 -0
  355. package/cpp/ggml/src/ggml-virtgpu/virtgpu.cpp +544 -0
  356. package/cpp/ggml/src/ggml-virtgpu/virtgpu.h +117 -0
  357. package/cpp/ggml/src/ggml-webgpu/CMakeLists.txt +80 -0
  358. package/cpp/ggml/src/ggml-webgpu/ggml-webgpu-shader-lib.hpp +1231 -0
  359. package/cpp/ggml/src/ggml-webgpu/ggml-webgpu.cpp +3150 -0
  360. package/cpp/ggml/src/ggml-webgpu/pre_wgsl.hpp +778 -0
  361. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/argmax.wgsl +72 -0
  362. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/argsort.wgsl +106 -0
  363. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/argsort_merge.wgsl +134 -0
  364. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/binary.wgsl +107 -0
  365. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/common_decls.tmpl +923 -0
  366. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/cpy.tmpl.wgsl +107 -0
  367. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/cumsum.wgsl +66 -0
  368. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +182 -0
  369. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn.wgsl +636 -0
  370. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/get_rows.wgsl +668 -0
  371. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/glu.tmpl.wgsl +323 -0
  372. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/memset.wgsl +40 -0
  373. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.wgsl +713 -0
  374. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_decls.tmpl +103 -0
  375. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_reg_tile.wgsl +138 -0
  376. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_subgroup_matrix.wgsl +188 -0
  377. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec.wgsl +194 -0
  378. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/pad.wgsl +86 -0
  379. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm.wgsl +123 -0
  380. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/rope.tmpl.wgsl +295 -0
  381. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/scale.wgsl +63 -0
  382. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.wgsl +109 -0
  383. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/soft_max.tmpl.wgsl +345 -0
  384. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/sum_rows.wgsl +55 -0
  385. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/unary.wgsl +193 -0
  386. package/cpp/ggml/src/ggml-zdnn/CMakeLists.txt +36 -0
  387. package/cpp/ggml/src/ggml-zdnn/common.hpp +59 -0
  388. package/cpp/ggml/src/ggml-zdnn/ggml-zdnn.cpp +633 -0
  389. package/cpp/ggml/src/ggml-zdnn/mmf.cpp +80 -0
  390. package/cpp/ggml/src/ggml-zdnn/mmf.hpp +12 -0
  391. package/cpp/ggml/src/ggml-zdnn/utils.cpp +79 -0
  392. package/cpp/ggml/src/ggml-zdnn/utils.hpp +19 -0
  393. package/cpp/ggml/src/ggml-zendnn/CMakeLists.txt +92 -0
  394. package/cpp/ggml/src/ggml-zendnn/ggml-zendnn.cpp +469 -0
  395. package/cpp/ggml/src/ggml.c +7669 -0
  396. package/cpp/ggml/src/ggml.cpp +26 -0
  397. package/cpp/ggml/src/gguf.cpp +1699 -0
  398. package/cpp/include/llama-cpp.h +32 -0
  399. package/cpp/include/llama.h +1568 -0
  400. package/cpp/mtmd/CMakeLists.txt +98 -0
  401. package/cpp/mtmd/README.md +63 -0
  402. package/cpp/mtmd/clip-graph.h +117 -0
  403. package/cpp/mtmd/clip-impl.h +586 -0
  404. package/cpp/mtmd/clip-model.h +390 -0
  405. package/cpp/mtmd/clip.cpp +4154 -0
  406. package/cpp/mtmd/clip.h +121 -0
  407. package/cpp/mtmd/deprecation-warning.cpp +22 -0
  408. package/cpp/mtmd/legacy-models/convert_image_encoder_to_gguf.py +412 -0
  409. package/cpp/mtmd/legacy-models/glmedge-convert-image-encoder-to-gguf.py +280 -0
  410. package/cpp/mtmd/legacy-models/glmedge-surgery.py +33 -0
  411. package/cpp/mtmd/legacy-models/llava_surgery.py +38 -0
  412. package/cpp/mtmd/legacy-models/llava_surgery_v2.py +180 -0
  413. package/cpp/mtmd/legacy-models/minicpmv-convert-image-encoder-to-gguf.py +892 -0
  414. package/cpp/mtmd/legacy-models/minicpmv-surgery.py +47 -0
  415. package/cpp/mtmd/models/cogvlm.cpp +98 -0
  416. package/cpp/mtmd/models/conformer.cpp +216 -0
  417. package/cpp/mtmd/models/glm4v.cpp +122 -0
  418. package/cpp/mtmd/models/internvl.cpp +69 -0
  419. package/cpp/mtmd/models/kimik25.cpp +101 -0
  420. package/cpp/mtmd/models/kimivl.cpp +63 -0
  421. package/cpp/mtmd/models/llama4.cpp +96 -0
  422. package/cpp/mtmd/models/llava.cpp +374 -0
  423. package/cpp/mtmd/models/minicpmv.cpp +114 -0
  424. package/cpp/mtmd/models/mobilenetv5.cpp +451 -0
  425. package/cpp/mtmd/models/models.h +128 -0
  426. package/cpp/mtmd/models/nemotron-v2-vl.cpp +35 -0
  427. package/cpp/mtmd/models/paddleocr.cpp +52 -0
  428. package/cpp/mtmd/models/pixtral.cpp +86 -0
  429. package/cpp/mtmd/models/qwen2vl.cpp +183 -0
  430. package/cpp/mtmd/models/qwen3vl.cpp +193 -0
  431. package/cpp/mtmd/models/siglip.cpp +86 -0
  432. package/cpp/mtmd/models/whisper-enc.cpp +115 -0
  433. package/cpp/mtmd/models/youtuvl.cpp +179 -0
  434. package/cpp/mtmd/mtmd-audio.cpp +730 -0
  435. package/cpp/mtmd/mtmd-audio.h +113 -0
  436. package/cpp/mtmd/mtmd-cli.cpp +437 -0
  437. package/cpp/mtmd/mtmd-helper.cpp +521 -0
  438. package/cpp/mtmd/mtmd-helper.h +96 -0
  439. package/cpp/mtmd/mtmd.cpp +1156 -0
  440. package/cpp/mtmd/mtmd.h +319 -0
  441. package/cpp/mtmd/requirements.txt +5 -0
  442. package/cpp/mtmd/test-1.jpeg +0 -0
  443. package/cpp/mtmd/test-2.mp3 +0 -0
  444. package/cpp/mtmd/tests.sh +192 -0
  445. package/cpp/src/CMakeLists.txt +169 -0
  446. package/cpp/src/llama-adapter.cpp +488 -0
  447. package/cpp/src/llama-adapter.h +89 -0
  448. package/cpp/src/llama-arch.cpp +2855 -0
  449. package/cpp/src/llama-arch.h +619 -0
  450. package/cpp/src/llama-batch.cpp +917 -0
  451. package/cpp/src/llama-batch.h +173 -0
  452. package/cpp/src/llama-chat.cpp +896 -0
  453. package/cpp/src/llama-chat.h +71 -0
  454. package/cpp/src/llama-context.cpp +3512 -0
  455. package/cpp/src/llama-context.h +359 -0
  456. package/cpp/src/llama-cparams.cpp +5 -0
  457. package/cpp/src/llama-cparams.h +44 -0
  458. package/cpp/src/llama-grammar.cpp +1464 -0
  459. package/cpp/src/llama-grammar.h +194 -0
  460. package/cpp/src/llama-graph.cpp +2685 -0
  461. package/cpp/src/llama-graph.h +1026 -0
  462. package/cpp/src/llama-hparams.cpp +234 -0
  463. package/cpp/src/llama-hparams.h +339 -0
  464. package/cpp/src/llama-impl.cpp +171 -0
  465. package/cpp/src/llama-impl.h +73 -0
  466. package/cpp/src/llama-io.cpp +15 -0
  467. package/cpp/src/llama-io.h +35 -0
  468. package/cpp/src/llama-kv-cache-iswa.cpp +330 -0
  469. package/cpp/src/llama-kv-cache-iswa.h +137 -0
  470. package/cpp/src/llama-kv-cache.cpp +2271 -0
  471. package/cpp/src/llama-kv-cache.h +388 -0
  472. package/cpp/src/llama-kv-cells.h +533 -0
  473. package/cpp/src/llama-memory-hybrid-iswa.cpp +275 -0
  474. package/cpp/src/llama-memory-hybrid-iswa.h +140 -0
  475. package/cpp/src/llama-memory-hybrid.cpp +268 -0
  476. package/cpp/src/llama-memory-hybrid.h +139 -0
  477. package/cpp/src/llama-memory-recurrent.cpp +1165 -0
  478. package/cpp/src/llama-memory-recurrent.h +182 -0
  479. package/cpp/src/llama-memory.cpp +59 -0
  480. package/cpp/src/llama-memory.h +122 -0
  481. package/cpp/src/llama-mmap.cpp +785 -0
  482. package/cpp/src/llama-mmap.h +92 -0
  483. package/cpp/src/llama-model-loader.cpp +1414 -0
  484. package/cpp/src/llama-model-loader.h +203 -0
  485. package/cpp/src/llama-model-saver.cpp +286 -0
  486. package/cpp/src/llama-model-saver.h +37 -0
  487. package/cpp/src/llama-model.cpp +9253 -0
  488. package/cpp/src/llama-model.h +576 -0
  489. package/cpp/src/llama-quant.cpp +1119 -0
  490. package/cpp/src/llama-quant.h +1 -0
  491. package/cpp/src/llama-sampler.cpp +3885 -0
  492. package/cpp/src/llama-sampler.h +42 -0
  493. package/cpp/src/llama-vocab.cpp +3970 -0
  494. package/cpp/src/llama-vocab.h +187 -0
  495. package/cpp/src/llama.cpp +1313 -0
  496. package/cpp/src/models/afmoe.cpp +191 -0
  497. package/cpp/src/models/apertus.cpp +125 -0
  498. package/cpp/src/models/arcee.cpp +135 -0
  499. package/cpp/src/models/arctic.cpp +138 -0
  500. package/cpp/src/models/arwkv7.cpp +86 -0
  501. package/cpp/src/models/baichuan.cpp +122 -0
  502. package/cpp/src/models/bailingmoe.cpp +144 -0
  503. package/cpp/src/models/bailingmoe2.cpp +135 -0
  504. package/cpp/src/models/bert.cpp +178 -0
  505. package/cpp/src/models/bitnet.cpp +160 -0
  506. package/cpp/src/models/bloom.cpp +101 -0
  507. package/cpp/src/models/chameleon.cpp +178 -0
  508. package/cpp/src/models/chatglm.cpp +132 -0
  509. package/cpp/src/models/codeshell.cpp +111 -0
  510. package/cpp/src/models/cogvlm.cpp +102 -0
  511. package/cpp/src/models/cohere2-iswa.cpp +134 -0
  512. package/cpp/src/models/command-r.cpp +122 -0
  513. package/cpp/src/models/dbrx.cpp +123 -0
  514. package/cpp/src/models/deci.cpp +135 -0
  515. package/cpp/src/models/deepseek.cpp +144 -0
  516. package/cpp/src/models/deepseek2.cpp +262 -0
  517. package/cpp/src/models/delta-net-base.cpp +376 -0
  518. package/cpp/src/models/dots1.cpp +134 -0
  519. package/cpp/src/models/dream.cpp +105 -0
  520. package/cpp/src/models/ernie4-5-moe.cpp +150 -0
  521. package/cpp/src/models/ernie4-5.cpp +110 -0
  522. package/cpp/src/models/eurobert.cpp +97 -0
  523. package/cpp/src/models/exaone-moe.cpp +146 -0
  524. package/cpp/src/models/exaone.cpp +114 -0
  525. package/cpp/src/models/exaone4.cpp +123 -0
  526. package/cpp/src/models/falcon-h1.cpp +111 -0
  527. package/cpp/src/models/falcon.cpp +120 -0
  528. package/cpp/src/models/gemma-embedding.cpp +116 -0
  529. package/cpp/src/models/gemma.cpp +112 -0
  530. package/cpp/src/models/gemma2-iswa.cpp +128 -0
  531. package/cpp/src/models/gemma3.cpp +155 -0
  532. package/cpp/src/models/gemma3n-iswa.cpp +384 -0
  533. package/cpp/src/models/glm4-moe.cpp +170 -0
  534. package/cpp/src/models/glm4.cpp +157 -0
  535. package/cpp/src/models/gpt2.cpp +105 -0
  536. package/cpp/src/models/gptneox.cpp +144 -0
  537. package/cpp/src/models/granite-hybrid.cpp +196 -0
  538. package/cpp/src/models/granite.cpp +211 -0
  539. package/cpp/src/models/grok.cpp +159 -0
  540. package/cpp/src/models/grovemoe.cpp +141 -0
  541. package/cpp/src/models/hunyuan-dense.cpp +132 -0
  542. package/cpp/src/models/hunyuan-moe.cpp +154 -0
  543. package/cpp/src/models/internlm2.cpp +120 -0
  544. package/cpp/src/models/jais.cpp +86 -0
  545. package/cpp/src/models/jais2.cpp +123 -0
  546. package/cpp/src/models/jamba.cpp +106 -0
  547. package/cpp/src/models/kimi-linear.cpp +392 -0
  548. package/cpp/src/models/lfm2.cpp +190 -0
  549. package/cpp/src/models/llada-moe.cpp +122 -0
  550. package/cpp/src/models/llada.cpp +99 -0
  551. package/cpp/src/models/llama-iswa.cpp +178 -0
  552. package/cpp/src/models/llama.cpp +168 -0
  553. package/cpp/src/models/maincoder.cpp +117 -0
  554. package/cpp/src/models/mamba-base.cpp +285 -0
  555. package/cpp/src/models/mamba.cpp +54 -0
  556. package/cpp/src/models/mimo2-iswa.cpp +123 -0
  557. package/cpp/src/models/minicpm3.cpp +200 -0
  558. package/cpp/src/models/minimax-m2.cpp +124 -0
  559. package/cpp/src/models/mistral3.cpp +160 -0
  560. package/cpp/src/models/models.h +684 -0
  561. package/cpp/src/models/modern-bert.cpp +109 -0
  562. package/cpp/src/models/mpt.cpp +126 -0
  563. package/cpp/src/models/nemotron-h.cpp +148 -0
  564. package/cpp/src/models/nemotron.cpp +122 -0
  565. package/cpp/src/models/neo-bert.cpp +104 -0
  566. package/cpp/src/models/olmo.cpp +121 -0
  567. package/cpp/src/models/olmo2.cpp +150 -0
  568. package/cpp/src/models/olmoe.cpp +124 -0
  569. package/cpp/src/models/openai-moe-iswa.cpp +127 -0
  570. package/cpp/src/models/openelm.cpp +124 -0
  571. package/cpp/src/models/orion.cpp +123 -0
  572. package/cpp/src/models/paddleocr.cpp +122 -0
  573. package/cpp/src/models/pangu-embedded.cpp +121 -0
  574. package/cpp/src/models/phi2.cpp +121 -0
  575. package/cpp/src/models/phi3.cpp +152 -0
  576. package/cpp/src/models/plamo.cpp +110 -0
  577. package/cpp/src/models/plamo2.cpp +318 -0
  578. package/cpp/src/models/plamo3.cpp +128 -0
  579. package/cpp/src/models/plm.cpp +169 -0
  580. package/cpp/src/models/qwen.cpp +108 -0
  581. package/cpp/src/models/qwen2.cpp +126 -0
  582. package/cpp/src/models/qwen2moe.cpp +151 -0
  583. package/cpp/src/models/qwen2vl.cpp +117 -0
  584. package/cpp/src/models/qwen3.cpp +117 -0
  585. package/cpp/src/models/qwen35.cpp +386 -0
  586. package/cpp/src/models/qwen35moe.cpp +420 -0
  587. package/cpp/src/models/qwen3moe.cpp +124 -0
  588. package/cpp/src/models/qwen3next.cpp +525 -0
  589. package/cpp/src/models/qwen3vl-moe.cpp +140 -0
  590. package/cpp/src/models/qwen3vl.cpp +132 -0
  591. package/cpp/src/models/refact.cpp +94 -0
  592. package/cpp/src/models/rnd1.cpp +126 -0
  593. package/cpp/src/models/rwkv6-base.cpp +164 -0
  594. package/cpp/src/models/rwkv6.cpp +94 -0
  595. package/cpp/src/models/rwkv6qwen2.cpp +86 -0
  596. package/cpp/src/models/rwkv7-base.cpp +137 -0
  597. package/cpp/src/models/rwkv7.cpp +90 -0
  598. package/cpp/src/models/seed-oss.cpp +124 -0
  599. package/cpp/src/models/smallthinker.cpp +126 -0
  600. package/cpp/src/models/smollm3.cpp +128 -0
  601. package/cpp/src/models/stablelm.cpp +146 -0
  602. package/cpp/src/models/starcoder.cpp +100 -0
  603. package/cpp/src/models/starcoder2.cpp +121 -0
  604. package/cpp/src/models/step35-iswa.cpp +168 -0
  605. package/cpp/src/models/t5-dec.cpp +166 -0
  606. package/cpp/src/models/t5-enc.cpp +96 -0
  607. package/cpp/src/models/wavtokenizer-dec.cpp +149 -0
  608. package/cpp/src/models/xverse.cpp +108 -0
  609. package/cpp/src/unicode-data.cpp +7034 -0
  610. package/cpp/src/unicode-data.h +20 -0
  611. package/cpp/src/unicode.cpp +1103 -0
  612. package/cpp/src/unicode.h +111 -0
  613. package/cpp/vendor/nlohmann/json.hpp +25526 -0
  614. package/cpp/vendor/nlohmann/json_fwd.hpp +187 -0
  615. package/cpp/vendor/stb/stb_image.h +7988 -0
  616. package/ios/LocalLLM-Bridging-Header.h +2 -0
  617. package/ios/LocalLLM.h +5 -0
  618. package/ios/LocalLLM.mm +1267 -0
  619. package/local-llm-rn.podspec +60 -0
  620. package/package.json +35 -0
  621. package/src/NativeLocalLLM.ts +73 -0
  622. package/src/device.ts +50 -0
  623. package/src/download-adapter.ts +17 -0
  624. package/src/index.ts +21 -0
  625. package/src/native-bridge.ts +142 -0
  626. package/src/rn-downloader.ts +37 -0
@@ -0,0 +1,724 @@
1
+ #pragma once
2
+
3
+ // GGML internal header
4
+
5
+ #include "ggml.h"
6
+ #include "gguf.h"
7
+
8
+ #include <assert.h>
9
+ #include <math.h>
10
+ #include <stdlib.h> // load `stdlib.h` before other headers to work around MinGW bug: https://sourceforge.net/p/mingw-w64/bugs/192/
11
+ #include <stdbool.h>
12
+ #include <stdint.h>
13
+ #include <string.h>
14
+
15
+ #ifdef __ARM_FEATURE_SVE
16
+ #include <arm_sve.h>
17
+ #endif // __ARM_FEATURE_SVE
18
+
19
+ #if defined(__ARM_NEON) && !defined(__CUDACC__) && !defined(__MUSACC__)
20
+ // if YCM cannot find <arm_neon.h>, make a symbolic link to it, for example:
21
+ //
22
+ // $ ln -sfn /Library/Developer/CommandLineTools/usr/lib/clang/13.1.6/include/arm_neon.h ./src/
23
+ //
24
+ #include <arm_neon.h>
25
+ #endif
26
+
27
+ #ifdef __cplusplus
28
+ extern "C" {
29
+ #endif
30
+
31
+ void ggml_print_backtrace(void);
32
+
33
+ #ifndef MIN
34
+ # define MIN(a, b) ((a) < (b) ? (a) : (b))
35
+ #endif
36
+
37
+ #ifndef MAX
38
+ # define MAX(a, b) ((a) > (b) ? (a) : (b))
39
+ #endif
40
+
41
+ // required for mmap as gguf only guarantees 32-byte alignment
42
+ #define TENSOR_ALIGNMENT 32
43
+
44
+ // static_assert should be a #define, but if it's not,
45
+ // fall back to the _Static_assert C11 keyword.
46
+ // if C99 - static_assert is noop
47
+ // ref: https://stackoverflow.com/a/53923785/4039976
48
+ #ifndef __cplusplus
49
+ #ifndef static_assert
50
+ #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201100L)
51
+ #define static_assert(cond, msg) _Static_assert(cond, msg)
52
+ #else
53
+ #define static_assert(cond, msg) struct global_scope_noop_trick
54
+ #endif
55
+ #endif
56
+ #endif
57
+
58
+ static inline int ggml_up32(int n) {
59
+ return (n + 31) & ~31;
60
+ }
61
+
62
+ //static inline int ggml_up64(int n) {
63
+ // return (n + 63) & ~63;
64
+ //}
65
+
66
+ static inline int ggml_up(int n, int m) {
67
+ // assert m is a power of 2
68
+ GGML_ASSERT((m & (m - 1)) == 0);
69
+ return (n + m - 1) & ~(m - 1);
70
+ }
71
+
72
+ // TODO: move to ggml.h? (won't be able to inline)
73
+ static bool ggml_are_same_layout(const struct ggml_tensor * a, const struct ggml_tensor * b) {
74
+ if (a->type != b->type) {
75
+ return false;
76
+ }
77
+ for (int i = 0; i < GGML_MAX_DIMS; i++) {
78
+ if (a->ne[i] != b->ne[i]) {
79
+ return false;
80
+ }
81
+ if (a->nb[i] != b->nb[i]) {
82
+ return false;
83
+ }
84
+ }
85
+ return true;
86
+ }
87
+
88
+ static bool ggml_op_is_empty(enum ggml_op op) {
89
+ switch (op) {
90
+ case GGML_OP_NONE:
91
+ case GGML_OP_RESHAPE:
92
+ case GGML_OP_TRANSPOSE:
93
+ case GGML_OP_VIEW:
94
+ case GGML_OP_PERMUTE:
95
+ return true;
96
+ default:
97
+ return false;
98
+ }
99
+ }
100
+
101
+ static inline bool ggml_impl_is_view(const struct ggml_tensor * t) {
102
+ return t->view_src != NULL;
103
+ }
104
+
105
+ static inline float ggml_compute_softplus_f32(float input) {
106
+ return (input > 20.0f) ? input : logf(1 + expf(input));
107
+ }
108
+ //
109
+ // logging
110
+ //
111
+
112
+ GGML_ATTRIBUTE_FORMAT(2, 3)
113
+ GGML_API void ggml_log_internal (enum ggml_log_level level, const char * format, ...);
114
+ GGML_API void ggml_log_callback_default(enum ggml_log_level level, const char * text, void * user_data);
115
+
116
+ #define GGML_LOG(...) ggml_log_internal(GGML_LOG_LEVEL_NONE , __VA_ARGS__)
117
+ #define GGML_LOG_INFO(...) ggml_log_internal(GGML_LOG_LEVEL_INFO , __VA_ARGS__)
118
+ #define GGML_LOG_WARN(...) ggml_log_internal(GGML_LOG_LEVEL_WARN , __VA_ARGS__)
119
+ #define GGML_LOG_ERROR(...) ggml_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__)
120
+ #define GGML_LOG_DEBUG(...) ggml_log_internal(GGML_LOG_LEVEL_DEBUG, __VA_ARGS__)
121
+ #define GGML_LOG_CONT(...) ggml_log_internal(GGML_LOG_LEVEL_CONT , __VA_ARGS__)
122
+
123
+ #define GGML_DEBUG 0
124
+
125
+ #if (GGML_DEBUG >= 1)
126
+ #define GGML_PRINT_DEBUG(...) GGML_LOG_DEBUG(__VA_ARGS__)
127
+ #else
128
+ #define GGML_PRINT_DEBUG(...)
129
+ #endif
130
+
131
+ #if (GGML_DEBUG >= 5)
132
+ #define GGML_PRINT_DEBUG_5(...) GGML_LOG_DEBUG(__VA_ARGS__)
133
+ #else
134
+ #define GGML_PRINT_DEBUG_5(...)
135
+ #endif
136
+
137
+ #if (GGML_DEBUG >= 10)
138
+ #define GGML_PRINT_DEBUG_10(...) GGML_LOG_DEBUG(__VA_ARGS__)
139
+ #else
140
+ #define GGML_PRINT_DEBUG_10(...)
141
+ #endif
142
+
143
+ // tensor params
144
+
145
+ static void ggml_set_op_params(struct ggml_tensor * tensor, const void * params, size_t params_size) {
146
+ GGML_ASSERT(tensor != NULL); // silence -Warray-bounds warnings
147
+ assert(params_size <= GGML_MAX_OP_PARAMS);
148
+ memcpy(tensor->op_params, params, params_size);
149
+ }
150
+
151
+ static int32_t ggml_get_op_params_i32(const struct ggml_tensor * tensor, uint32_t i) {
152
+ assert(i < GGML_MAX_OP_PARAMS / sizeof(int32_t));
153
+ return ((const int32_t *)(tensor->op_params))[i];
154
+ }
155
+
156
+ static float ggml_get_op_params_f32(const struct ggml_tensor * tensor, uint32_t i) {
157
+ assert(i < GGML_MAX_OP_PARAMS / sizeof(float));
158
+ return ((const float *)(tensor->op_params))[i];
159
+ }
160
+
161
+ static void ggml_set_op_params_i32(struct ggml_tensor * tensor, uint32_t i, int32_t value) {
162
+ assert(i < GGML_MAX_OP_PARAMS / sizeof(int32_t));
163
+ ((int32_t *)(tensor->op_params))[i] = value;
164
+ }
165
+
166
+ static void ggml_set_op_params_f32(struct ggml_tensor * tensor, uint32_t i, float value) {
167
+ assert(i < GGML_MAX_OP_PARAMS / sizeof(float));
168
+ ((float *)(tensor->op_params))[i] = value;
169
+ }
170
+
171
+ struct ggml_map_custom1_op_params {
172
+ ggml_custom1_op_t fun;
173
+ int n_tasks;
174
+ void * userdata;
175
+ };
176
+
177
+ struct ggml_map_custom2_op_params {
178
+ ggml_custom2_op_t fun;
179
+ int n_tasks;
180
+ void * userdata;
181
+ };
182
+
183
+ struct ggml_map_custom3_op_params {
184
+ ggml_custom3_op_t fun;
185
+ int n_tasks;
186
+ void * userdata;
187
+ };
188
+
189
+ struct ggml_custom_op_params {
190
+ ggml_custom_op_t fun;
191
+ int n_tasks;
192
+ void * userdata;
193
+ };
194
+
195
+ // bitset
196
+
197
+ typedef uint32_t ggml_bitset_t;
198
+
199
+ static_assert(sizeof(ggml_bitset_t) == 4, "bitset_t constants must be updated");
200
+ #define BITSET_SHR 5 // log2(sizeof(ggml_bitset_t)*8)
201
+ #define BITSET_MASK (sizeof(ggml_bitset_t)*8 - 1)
202
+
203
+ static size_t ggml_bitset_size(size_t n) {
204
+ return (n + BITSET_MASK) >> BITSET_SHR;
205
+ }
206
+
207
+ static inline bool ggml_bitset_get(const ggml_bitset_t * bitset, size_t i) {
208
+ return !!(bitset[i >> BITSET_SHR] & (1u << (i & BITSET_MASK)));
209
+ }
210
+
211
+ static inline void ggml_bitset_set(ggml_bitset_t * bitset, size_t i) {
212
+ bitset[i >> BITSET_SHR] |= (1u << (i & BITSET_MASK));
213
+ }
214
+
215
+ static inline void ggml_bitset_clear(ggml_bitset_t * bitset, size_t i) {
216
+ bitset[i >> BITSET_SHR] &= ~(1u << (i & BITSET_MASK));
217
+ }
218
+
219
+ // hash set
220
+
221
+ #define GGML_HASHSET_FULL ((size_t)-1)
222
+ #define GGML_HASHSET_ALREADY_EXISTS ((size_t)-2)
223
+
224
+ struct ggml_hash_set {
225
+ size_t size;
226
+ ggml_bitset_t * used; // whether or not the keys are in use i.e. set
227
+ struct ggml_tensor ** keys; // actual tensors in the set, keys[i] is only defined if ggml_bitset_get(used, i)
228
+ };
229
+
230
+ struct ggml_hash_set ggml_hash_set_new(size_t size);
231
+ void ggml_hash_set_free(struct ggml_hash_set * hash_set);
232
+
233
+ // returns the minimum size for a hash set that can hold min_sz elements
234
+ size_t ggml_hash_size(size_t min_sz);
235
+
236
+ // remove all elements from the hash set
237
+ void ggml_hash_set_reset(struct ggml_hash_set * hash_set);
238
+
239
+ // returns true if key is in the hash set
240
+ static bool ggml_hash_contains(const struct ggml_hash_set * hash_set, struct ggml_tensor * key);
241
+
242
+ // returns GGML_HASHSET_FULL if table is full, otherwise the current index of the key or where it should be inserted
243
+ static size_t ggml_hash_find(const struct ggml_hash_set * hash_set, const struct ggml_tensor * key);
244
+
245
+ // returns GGML_HASHSET_ALREADY_EXISTS if key already exists, index otherwise, asserts if table is full
246
+ static size_t ggml_hash_insert(struct ggml_hash_set * hash_set, struct ggml_tensor * key);
247
+
248
+ // return index, asserts if table is full
249
+ static size_t ggml_hash_find_or_insert(struct ggml_hash_set * hash_set, struct ggml_tensor * key);
250
+
251
+ // hash function for ggml_tensor
252
+ static inline size_t ggml_hash(const struct ggml_tensor * p) {
253
+ // the last 4 bits are always zero due to alignment
254
+ return (size_t)(uintptr_t)p >> 4;
255
+ }
256
+
257
+ static size_t ggml_hash_find(const struct ggml_hash_set * hash_set, const struct ggml_tensor * key) {
258
+ size_t h = ggml_hash(key) % hash_set->size;
259
+
260
+ // linear probing
261
+ size_t i = h;
262
+ while (ggml_bitset_get(hash_set->used, i) && hash_set->keys[i] != key) {
263
+ i = (i + 1) % hash_set->size;
264
+ if (i == h) {
265
+ // visited all hash table entries -> not found
266
+ return GGML_HASHSET_FULL;
267
+ }
268
+ }
269
+ return i;
270
+ }
271
+
272
+ static bool ggml_hash_contains(const struct ggml_hash_set * hash_set, struct ggml_tensor * key) {
273
+ size_t i = ggml_hash_find(hash_set, key);
274
+ return i != GGML_HASHSET_FULL && ggml_bitset_get(hash_set->used, i);
275
+ }
276
+
277
+ static size_t ggml_hash_insert(struct ggml_hash_set * hash_set, struct ggml_tensor * key) {
278
+ size_t h = ggml_hash(key) % hash_set->size;
279
+
280
+ // linear probing
281
+ size_t i = h;
282
+ do {
283
+ if (!ggml_bitset_get(hash_set->used, i)) {
284
+ ggml_bitset_set(hash_set->used, i);
285
+ hash_set->keys[i] = key;
286
+ return i;
287
+ }
288
+ if (hash_set->keys[i] == key) {
289
+ return GGML_HASHSET_ALREADY_EXISTS;
290
+ }
291
+ i = (i + 1) % hash_set->size;
292
+ } while (i != h);
293
+
294
+ // visited all hash table entries -> not found
295
+ GGML_ABORT("fatal error");
296
+ }
297
+
298
+ static size_t ggml_hash_find_or_insert(struct ggml_hash_set * hash_set, struct ggml_tensor * key) {
299
+ size_t h = ggml_hash(key) % hash_set->size;
300
+
301
+ // linear probing
302
+ size_t i = h;
303
+ do {
304
+ if (!ggml_bitset_get(hash_set->used, i)) {
305
+ ggml_bitset_set(hash_set->used, i);
306
+ hash_set->keys[i] = key;
307
+ return i;
308
+ }
309
+ if (hash_set->keys[i] == key) {
310
+ return i;
311
+ }
312
+ i = (i + 1) % hash_set->size;
313
+ } while (i != h);
314
+
315
+ // visited all hash table entries -> not found
316
+ GGML_ABORT("fatal error");
317
+ }
318
+
319
+ // computation graph
320
+
321
+ enum ggml_cgraph_eval_order {
322
+ GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT = 0,
323
+ GGML_CGRAPH_EVAL_ORDER_RIGHT_TO_LEFT,
324
+ GGML_CGRAPH_EVAL_ORDER_COUNT
325
+ };
326
+
327
+ struct ggml_cgraph {
328
+ int size; // maximum number of nodes/leafs/grads/grad_accs
329
+ int n_nodes; // number of nodes currently in use
330
+ int n_leafs; // number of leafs currently in use
331
+
332
+ struct ggml_tensor ** nodes; // tensors with data that can change if the graph is evaluated
333
+ struct ggml_tensor ** grads; // the outputs of these tensors are the gradients of the nodes
334
+ struct ggml_tensor ** grad_accs; // accumulators for node gradients
335
+ struct ggml_tensor ** leafs; // tensors with constant data
336
+ int32_t * use_counts;// number of uses of each tensor, indexed by hash table slot
337
+
338
+ struct ggml_hash_set visited_hash_set;
339
+
340
+ enum ggml_cgraph_eval_order order;
341
+ };
342
+
343
+ // returns a slice of cgraph with nodes [i0, i1)
344
+ // the slice does not have leafs or gradients
345
+ // if you need the gradients, get them from the original graph
346
+ struct ggml_cgraph ggml_graph_view(struct ggml_cgraph * cgraph, int i0, int i1);
347
+
348
+ // ggml-alloc.c: true if the operation can reuse memory from its sources
349
+ GGML_API bool ggml_op_can_inplace(enum ggml_op op);
350
+
351
+
352
+ // Memory allocation
353
+
354
+ GGML_API void * ggml_aligned_malloc(size_t size);
355
+ GGML_API void ggml_aligned_free(void * ptr, size_t size);
356
+
357
+ // FP16 <-> FP32
358
+ // ref: https://github.com/Maratyszcza/FP16
359
+
360
+ static inline float fp32_from_bits(uint32_t w) {
361
+ union {
362
+ uint32_t as_bits;
363
+ float as_value;
364
+ } fp32;
365
+ fp32.as_bits = w;
366
+ return fp32.as_value;
367
+ }
368
+
369
+ static inline uint32_t fp32_to_bits(float f) {
370
+ union {
371
+ float as_value;
372
+ uint32_t as_bits;
373
+ } fp32;
374
+ fp32.as_value = f;
375
+ return fp32.as_bits;
376
+ }
377
+
378
+ static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) {
379
+ const uint32_t w = (uint32_t) h << 16;
380
+ const uint32_t sign = w & UINT32_C(0x80000000);
381
+ const uint32_t two_w = w + w;
382
+
383
+ const uint32_t exp_offset = UINT32_C(0xE0) << 23;
384
+ #if (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__)) && (!defined(__cplusplus) || __cplusplus >= 201703L)
385
+ const float exp_scale = 0x1.0p-112f;
386
+ #else
387
+ const float exp_scale = fp32_from_bits(UINT32_C(0x7800000));
388
+ #endif
389
+ const float normalized_value = fp32_from_bits((two_w >> 4) + exp_offset) * exp_scale;
390
+
391
+ const uint32_t magic_mask = UINT32_C(126) << 23;
392
+ const float magic_bias = 0.5f;
393
+ const float denormalized_value = fp32_from_bits((two_w >> 17) | magic_mask) - magic_bias;
394
+
395
+ const uint32_t denormalized_cutoff = UINT32_C(1) << 27;
396
+ const uint32_t result = sign |
397
+ (two_w < denormalized_cutoff ? fp32_to_bits(denormalized_value) : fp32_to_bits(normalized_value));
398
+ return fp32_from_bits(result);
399
+ }
400
+
401
+ static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) {
402
+ #if (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__)) && (!defined(__cplusplus) || __cplusplus >= 201703L)
403
+ const float scale_to_inf = 0x1.0p+112f;
404
+ const float scale_to_zero = 0x1.0p-110f;
405
+ #else
406
+ const float scale_to_inf = fp32_from_bits(UINT32_C(0x77800000));
407
+ const float scale_to_zero = fp32_from_bits(UINT32_C(0x08800000));
408
+ #endif
409
+ float base = (fabsf(f) * scale_to_inf) * scale_to_zero;
410
+
411
+ const uint32_t w = fp32_to_bits(f);
412
+ const uint32_t shl1_w = w + w;
413
+ const uint32_t sign = w & UINT32_C(0x80000000);
414
+ uint32_t bias = shl1_w & UINT32_C(0xFF000000);
415
+ if (bias < UINT32_C(0x71000000)) {
416
+ bias = UINT32_C(0x71000000);
417
+ }
418
+
419
+ base = fp32_from_bits((bias >> 1) + UINT32_C(0x07800000)) + base;
420
+ const uint32_t bits = fp32_to_bits(base);
421
+ const uint32_t exp_bits = (bits >> 13) & UINT32_C(0x00007C00);
422
+ const uint32_t mantissa_bits = bits & UINT32_C(0x00000FFF);
423
+ const uint32_t nonsign = exp_bits + mantissa_bits;
424
+ return (sign >> 16) | (shl1_w > UINT32_C(0xFF000000) ? UINT16_C(0x7E00) : nonsign);
425
+ }
426
+
427
+ #define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
428
+ #define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x)
429
+
430
+ #define GGML_FP16_TO_FP32(x) GGML_COMPUTE_FP16_TO_FP32(x)
431
+ #define GGML_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x)
432
+
433
+ static inline float ggml_e8m0_to_fp32(uint8_t x) {
434
+ uint32_t bits; // Stores the raw bit representation of the float
435
+
436
+ // Handle special case for minimum exponent (denormalized float)
437
+ if (x == 0) {
438
+ // Bit pattern for 2^(-127):
439
+ // - Sign bit: 0 (positive)
440
+ // - Exponent: 0 (denormalized number)
441
+ // - Mantissa: 0x400000 (0.5 in fractional form)
442
+ // Value = 0.5 * 2^(-126) = 2^(-127)
443
+ bits = 0x00400000;
444
+ }
445
+ // note: disabled as we don't need to handle NaNs
446
+ //// Handle special case for NaN (all bits set)
447
+ //else if (x == 0xFF) {
448
+ // // Standard quiet NaN pattern:
449
+ // // - Sign bit: 0
450
+ // // - Exponent: all 1s (0xFF)
451
+ // // - Mantissa: 0x400000 (quiet NaN flag)
452
+ // bits = 0x7FC00000;
453
+ //}
454
+ // Normalized values (most common case)
455
+ else {
456
+ // Construct normalized float by shifting exponent into position:
457
+ // - Exponent field: 8 bits (positions 30-23)
458
+ // - Mantissa: 0 (implicit leading 1)
459
+ // Value = 2^(x - 127)
460
+ bits = (uint32_t) x << 23;
461
+ }
462
+
463
+ float result; // Final float value
464
+ // Safely reinterpret bit pattern as float without type-punning issues
465
+ memcpy(&result, &bits, sizeof(float));
466
+ return result;
467
+ }
468
+
469
+ // Equal to ggml_e8m0_to_fp32/2
470
+ // Useful with MXFP4 quantization since the E0M2 values are doubled
471
+ static inline float ggml_e8m0_to_fp32_half(uint8_t x) {
472
+ uint32_t bits;
473
+
474
+ // For x < 2: use precomputed denormal patterns
475
+ if (x < 2) {
476
+ // 0x00200000 = 2^(-128), 0x00400000 = 2^(-127)
477
+ bits = 0x00200000 << x;
478
+ }
479
+ // For x >= 2: normalized exponent adjustment
480
+ else {
481
+ // 0.5 * 2^(x-127) = 2^(x-128) = normalized with exponent (x-1)
482
+ bits = (uint32_t)(x - 1) << 23;
483
+ }
484
+ // Note: NaNs are not handled here
485
+
486
+ float result;
487
+ memcpy(&result, &bits, sizeof(float));
488
+ return result;
489
+ }
490
+
491
+ #define GGML_E8M0_TO_FP32(x) ggml_e8m0_to_fp32(x)
492
+ #define GGML_E8M0_TO_FP32_HALF(x) ggml_e8m0_to_fp32_half(x)
493
+
494
+ /**
495
+ * Converts brain16 to float32.
496
+ *
497
+ * The bfloat16 floating point format has the following structure:
498
+ *
499
+ * ┌sign
500
+ * │
501
+ * │ ┌exponent
502
+ * │ │
503
+ * │ │ ┌mantissa
504
+ * │ │ │
505
+ * │┌──┴───┐┌─┴───┐
506
+ * 0b0000000000000000 brain16
507
+ *
508
+ * Since bf16 has the same number of exponent bits as a 32bit float,
509
+ * encoding and decoding numbers becomes relatively straightforward.
510
+ *
511
+ * ┌sign
512
+ * │
513
+ * │ ┌exponent
514
+ * │ │
515
+ * │ │ ┌mantissa
516
+ * │ │ │
517
+ * │┌──┴───┐┌─┴───────────────────┐
518
+ * 0b00000000000000000000000000000000 IEEE binary32
519
+ *
520
+ * For comparison, the standard fp16 format has fewer exponent bits.
521
+ *
522
+ * ┌sign
523
+ * │
524
+ * │ ┌exponent
525
+ * │ │
526
+ * │ │ ┌mantissa
527
+ * │ │ │
528
+ * │┌─┴─┐┌─┴──────┐
529
+ * 0b0000000000000000 IEEE binary16
530
+ *
531
+ * @see IEEE 754-2008
532
+ */
533
+ static inline float ggml_compute_bf16_to_fp32(ggml_bf16_t h) {
534
+ union {
535
+ float f;
536
+ uint32_t i;
537
+ } u;
538
+ u.i = (uint32_t)h.bits << 16;
539
+ return u.f;
540
+ }
541
+
542
+ /**
543
+ * Converts float32 to brain16.
544
+ *
545
+ * This is binary identical with Google Brain float conversion.
546
+ * Floats shall round to nearest even, and NANs shall be quiet.
547
+ * Subnormals aren't flushed to zero, except perhaps when used.
548
+ * This code should vectorize nicely if using modern compilers.
549
+ */
550
+ static inline ggml_bf16_t ggml_compute_fp32_to_bf16(float s) {
551
+ ggml_bf16_t h;
552
+ union {
553
+ float f;
554
+ uint32_t i;
555
+ } u;
556
+ u.f = s;
557
+ if ((u.i & 0x7fffffff) > 0x7f800000) { /* nan */
558
+ h.bits = (u.i >> 16) | 64; /* force to quiet */
559
+ return h;
560
+ }
561
+ h.bits = (u.i + (0x7fff + ((u.i >> 16) & 1))) >> 16;
562
+ return h;
563
+ }
564
+
565
+ #define GGML_FP32_TO_BF16(x) ggml_compute_fp32_to_bf16(x)
566
+ #define GGML_BF16_TO_FP32(x) ggml_compute_bf16_to_fp32(x)
567
+
568
+ static inline int32_t ggml_node_get_use_count(const struct ggml_cgraph * cgraph, int node_idx) {
569
+ const struct ggml_tensor * node = cgraph->nodes[node_idx];
570
+
571
+ size_t hash_pos = ggml_hash_find(&cgraph->visited_hash_set, node);
572
+ if (!ggml_bitset_get(cgraph->visited_hash_set.used, hash_pos)) {
573
+ return 0;
574
+ }
575
+ return cgraph->use_counts[hash_pos];
576
+ }
577
+
578
+ // return true if the node's results are only used by N other nodes
579
+ // and can be fused into their calculations.
580
+ static inline bool ggml_node_has_n_uses(const struct ggml_cgraph * cgraph, int node_idx, int32_t n_uses) {
581
+ const struct ggml_tensor * node = cgraph->nodes[node_idx];
582
+
583
+ // check the use count against how many we're replacing
584
+ if (ggml_node_get_use_count(cgraph, node_idx) != n_uses) {
585
+ return false;
586
+ }
587
+
588
+ // if node is a view, some other node might be using the intermediate result
589
+ // via the view source.
590
+ if (node->view_src) {
591
+ return false;
592
+ }
593
+
594
+ // If the user requested output for the node, can't fuse
595
+ if (node->flags & GGML_TENSOR_FLAG_OUTPUT) {
596
+ return false;
597
+ }
598
+
599
+ return true;
600
+ }
601
+
602
+ // Returns true if nodes with indices { node_idxs } are the sequence of ggml_ops in ops[]
603
+ // and are fusable. Nodes are considered fusable according to this function if:
604
+ // - all nodes except the last have only one use and are not views/outputs (see ggml_node_has_N_uses).
605
+ // - all nodes except the last are a src of the following node.
606
+ // - all nodes are the same shape.
607
+ // TODO: Consider allowing GGML_OP_NONE nodes in between
608
+ static inline bool ggml_can_fuse_ext(const struct ggml_cgraph * cgraph, const int * node_idxs, const enum ggml_op * ops, int num_ops) {
609
+ for (int i = 0; i < num_ops; ++i) {
610
+ if (node_idxs[i] >= cgraph->n_nodes) {
611
+ return false;
612
+ }
613
+
614
+ struct ggml_tensor * node = cgraph->nodes[node_idxs[i]];
615
+ if (node->op != ops[i]) {
616
+ return false;
617
+ }
618
+ if ((node->flags & GGML_TENSOR_FLAG_COMPUTE) == 0) {
619
+ return false;
620
+ }
621
+ if (i < num_ops - 1 && !ggml_node_has_n_uses(cgraph, node_idxs[i], 1)) {
622
+ return false;
623
+ }
624
+ if (i > 0) {
625
+ struct ggml_tensor * prev = cgraph->nodes[node_idxs[i - 1]];
626
+ if (node->src[0] != prev && node->src[1] != prev) {
627
+ return false;
628
+ }
629
+ if (!ggml_are_same_shape(node, prev)) {
630
+ return false;
631
+ }
632
+ }
633
+ }
634
+ return true;
635
+ }
636
+
637
+ // same as above, for sequential indices starting at node_idx
638
+ static inline bool ggml_can_fuse(const struct ggml_cgraph * cgraph, int node_idx, const enum ggml_op * ops, int num_ops) {
639
+ assert(num_ops < 32);
640
+
641
+ if (node_idx + num_ops > cgraph->n_nodes) {
642
+ return false;
643
+ }
644
+
645
+ int idxs[32];
646
+ for (int i = 0; i < num_ops; ++i) {
647
+ idxs[i] = node_idx + i;
648
+ }
649
+
650
+ return ggml_can_fuse_ext(cgraph, idxs, ops, num_ops);
651
+ }
652
+
653
+ GGML_API bool ggml_can_fuse_subgraph_ext(const struct ggml_cgraph * cgraph,
654
+ const int * node_idxs,
655
+ int count,
656
+ const enum ggml_op * ops,
657
+ const int * outputs,
658
+ int num_outputs);
659
+
660
+ // Returns true if the subgraph formed by {node_idxs} can be fused
661
+ // checks whethers all nodes which are not part of outputs can be elided
662
+ // by checking if their num_uses are confined to the subgraph
663
+ static inline bool ggml_can_fuse_subgraph(const struct ggml_cgraph * cgraph,
664
+ int node_idx,
665
+ int count,
666
+ const enum ggml_op * ops,
667
+ const int * outputs,
668
+ int num_outputs) {
669
+ GGML_ASSERT(count < 32);
670
+ if (node_idx + count > cgraph->n_nodes) {
671
+ return false;
672
+ }
673
+
674
+ int idxs[32];
675
+
676
+ for (int i = 0; i < count; ++i) {
677
+ idxs[i] = node_idx + i;
678
+ }
679
+
680
+ return ggml_can_fuse_subgraph_ext(cgraph, idxs, count, ops, outputs, num_outputs);
681
+ }
682
+
683
+ #ifdef __cplusplus
684
+ }
685
+ #endif
686
+
687
+ #ifdef __cplusplus
688
+ #include <array>
689
+ #include <initializer_list>
690
+ #include <vector>
691
+
692
+ // nicer C++ syntax for ggml_can_fuse
693
+ inline bool ggml_can_fuse(const struct ggml_cgraph * cgraph, int node_idx, std::initializer_list<enum ggml_op> ops) {
694
+ return ggml_can_fuse(cgraph, node_idx, ops.begin(), (int)ops.size());
695
+ }
696
+
697
+ inline bool ggml_can_fuse_subgraph(const struct ggml_cgraph * cgraph,
698
+ int start_idx,
699
+ std::initializer_list<enum ggml_op> ops,
700
+ std::initializer_list<int> outputs = {}) {
701
+ return ggml_can_fuse_subgraph(cgraph, start_idx, ops.size(), ops.begin(), outputs.begin(), outputs.size());
702
+ }
703
+
704
+ // Return true if the edges in the graph match expectations.
705
+ inline bool ggml_check_edges(const struct ggml_cgraph * cgraph,
706
+ int start_idx,
707
+ std::initializer_list<std::array<int, 3>> edges) {
708
+ for (const auto & edge : edges) {
709
+ int dst_node = edge[0];
710
+ int src_idx = edge[1];
711
+ int src_node = edge[2];
712
+ if (cgraph->nodes[start_idx + dst_node]->src[src_idx] != cgraph->nodes[start_idx + src_node]) {
713
+ return false;
714
+ }
715
+ }
716
+ return true;
717
+ }
718
+
719
+ // expose GGUF internals for test code
720
+ GGML_API size_t gguf_type_size(enum gguf_type type);
721
+ GGML_API struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_params params);
722
+ GGML_API struct gguf_context * gguf_init_from_buffer_impl(const void * data, size_t size, struct gguf_init_params params);
723
+ GGML_API void gguf_write_to_buf(const struct gguf_context * ctx, std::vector<int8_t> & buf, bool only_meta);
724
+ #endif // __cplusplus