local-llm-rn 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (626) hide show
  1. package/cpp/CMakeLists.txt +285 -0
  2. package/cpp/common/CMakeLists.txt +149 -0
  3. package/cpp/common/arg.cpp +3799 -0
  4. package/cpp/common/arg.h +131 -0
  5. package/cpp/common/base64.hpp +392 -0
  6. package/cpp/common/build-info.cpp.in +4 -0
  7. package/cpp/common/chat-parser-xml-toolcall.cpp +879 -0
  8. package/cpp/common/chat-parser-xml-toolcall.h +45 -0
  9. package/cpp/common/chat-parser.cpp +1649 -0
  10. package/cpp/common/chat-parser.h +133 -0
  11. package/cpp/common/chat-peg-parser.cpp +124 -0
  12. package/cpp/common/chat-peg-parser.h +105 -0
  13. package/cpp/common/chat.cpp +3355 -0
  14. package/cpp/common/chat.h +252 -0
  15. package/cpp/common/common.cpp +1824 -0
  16. package/cpp/common/common.h +930 -0
  17. package/cpp/common/console.cpp +1137 -0
  18. package/cpp/common/console.h +41 -0
  19. package/cpp/common/debug.cpp +167 -0
  20. package/cpp/common/debug.h +43 -0
  21. package/cpp/common/download.cpp +792 -0
  22. package/cpp/common/download.h +84 -0
  23. package/cpp/common/http.h +84 -0
  24. package/cpp/common/jinja/README.md +88 -0
  25. package/cpp/common/jinja/caps.cpp +285 -0
  26. package/cpp/common/jinja/caps.h +30 -0
  27. package/cpp/common/jinja/lexer.cpp +341 -0
  28. package/cpp/common/jinja/lexer.h +157 -0
  29. package/cpp/common/jinja/parser.cpp +591 -0
  30. package/cpp/common/jinja/parser.h +21 -0
  31. package/cpp/common/jinja/runtime.cpp +867 -0
  32. package/cpp/common/jinja/runtime.h +638 -0
  33. package/cpp/common/jinja/string.cpp +213 -0
  34. package/cpp/common/jinja/string.h +61 -0
  35. package/cpp/common/jinja/utils.h +149 -0
  36. package/cpp/common/jinja/value.cpp +1393 -0
  37. package/cpp/common/jinja/value.h +756 -0
  38. package/cpp/common/json-partial.cpp +324 -0
  39. package/cpp/common/json-partial.h +39 -0
  40. package/cpp/common/json-schema-to-grammar.cpp +1153 -0
  41. package/cpp/common/json-schema-to-grammar.h +43 -0
  42. package/cpp/common/llguidance.cpp +258 -0
  43. package/cpp/common/log.cpp +446 -0
  44. package/cpp/common/log.h +119 -0
  45. package/cpp/common/ngram-cache.cpp +285 -0
  46. package/cpp/common/ngram-cache.h +101 -0
  47. package/cpp/common/ngram-map.cpp +530 -0
  48. package/cpp/common/ngram-map.h +115 -0
  49. package/cpp/common/ngram-mod.cpp +60 -0
  50. package/cpp/common/ngram-mod.h +38 -0
  51. package/cpp/common/peg-parser.cpp +1712 -0
  52. package/cpp/common/peg-parser.h +459 -0
  53. package/cpp/common/preset.cpp +483 -0
  54. package/cpp/common/preset.h +83 -0
  55. package/cpp/common/regex-partial.cpp +204 -0
  56. package/cpp/common/regex-partial.h +56 -0
  57. package/cpp/common/sampling.cpp +745 -0
  58. package/cpp/common/sampling.h +119 -0
  59. package/cpp/common/speculative.cpp +1074 -0
  60. package/cpp/common/speculative.h +41 -0
  61. package/cpp/common/unicode.cpp +64 -0
  62. package/cpp/common/unicode.h +22 -0
  63. package/cpp/ggml/CMakeLists.txt +494 -0
  64. package/cpp/ggml/cmake/GitVars.cmake +22 -0
  65. package/cpp/ggml/cmake/common.cmake +50 -0
  66. package/cpp/ggml/cmake/ggml-config.cmake.in +191 -0
  67. package/cpp/ggml/include/ggml-alloc.h +85 -0
  68. package/cpp/ggml/include/ggml-backend.h +373 -0
  69. package/cpp/ggml/include/ggml-blas.h +25 -0
  70. package/cpp/ggml/include/ggml-cann.h +123 -0
  71. package/cpp/ggml/include/ggml-cpp.h +39 -0
  72. package/cpp/ggml/include/ggml-cpu.h +151 -0
  73. package/cpp/ggml/include/ggml-cuda.h +47 -0
  74. package/cpp/ggml/include/ggml-hexagon.h +19 -0
  75. package/cpp/ggml/include/ggml-metal.h +61 -0
  76. package/cpp/ggml/include/ggml-opencl.h +26 -0
  77. package/cpp/ggml/include/ggml-opt.h +256 -0
  78. package/cpp/ggml/include/ggml-rpc.h +30 -0
  79. package/cpp/ggml/include/ggml-sycl.h +49 -0
  80. package/cpp/ggml/include/ggml-virtgpu.h +14 -0
  81. package/cpp/ggml/include/ggml-vulkan.h +29 -0
  82. package/cpp/ggml/include/ggml-webgpu.h +19 -0
  83. package/cpp/ggml/include/ggml-zdnn.h +17 -0
  84. package/cpp/ggml/include/ggml-zendnn.h +22 -0
  85. package/cpp/ggml/include/ggml.h +2753 -0
  86. package/cpp/ggml/include/gguf.h +204 -0
  87. package/cpp/ggml/src/CMakeLists.txt +492 -0
  88. package/cpp/ggml/src/ggml-alloc.c +1244 -0
  89. package/cpp/ggml/src/ggml-backend-dl.cpp +48 -0
  90. package/cpp/ggml/src/ggml-backend-dl.h +45 -0
  91. package/cpp/ggml/src/ggml-backend-impl.h +255 -0
  92. package/cpp/ggml/src/ggml-backend-reg.cpp +566 -0
  93. package/cpp/ggml/src/ggml-backend.cpp +2270 -0
  94. package/cpp/ggml/src/ggml-blas/CMakeLists.txt +101 -0
  95. package/cpp/ggml/src/ggml-blas/ggml-blas.cpp +518 -0
  96. package/cpp/ggml/src/ggml-common.h +1878 -0
  97. package/cpp/ggml/src/ggml-cpu/CMakeLists.txt +691 -0
  98. package/cpp/ggml/src/ggml-cpu/amx/amx.cpp +247 -0
  99. package/cpp/ggml/src/ggml-cpu/amx/amx.h +8 -0
  100. package/cpp/ggml/src/ggml-cpu/amx/common.h +91 -0
  101. package/cpp/ggml/src/ggml-cpu/amx/mmq.cpp +2512 -0
  102. package/cpp/ggml/src/ggml-cpu/amx/mmq.h +10 -0
  103. package/cpp/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +98 -0
  104. package/cpp/ggml/src/ggml-cpu/arch/arm/quants.c +4052 -0
  105. package/cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +4935 -0
  106. package/cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +2159 -0
  107. package/cpp/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
  108. package/cpp/ggml/src/ggml-cpu/arch/powerpc/quants.c +2305 -0
  109. package/cpp/ggml/src/ggml-cpu/arch/riscv/cpu-feats.cpp +38 -0
  110. package/cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +2726 -0
  111. package/cpp/ggml/src/ggml-cpu/arch/riscv/repack.cpp +342 -0
  112. package/cpp/ggml/src/ggml-cpu/arch/s390/cpu-feats.cpp +50 -0
  113. package/cpp/ggml/src/ggml-cpu/arch/s390/quants.c +1468 -0
  114. package/cpp/ggml/src/ggml-cpu/arch/wasm/quants.c +1221 -0
  115. package/cpp/ggml/src/ggml-cpu/arch/x86/cpu-feats.cpp +327 -0
  116. package/cpp/ggml/src/ggml-cpu/arch/x86/quants.c +3820 -0
  117. package/cpp/ggml/src/ggml-cpu/arch/x86/repack.cpp +6307 -0
  118. package/cpp/ggml/src/ggml-cpu/arch-fallback.h +313 -0
  119. package/cpp/ggml/src/ggml-cpu/binary-ops.cpp +154 -0
  120. package/cpp/ggml/src/ggml-cpu/binary-ops.h +16 -0
  121. package/cpp/ggml/src/ggml-cpu/cmake/FindSIMD.cmake +100 -0
  122. package/cpp/ggml/src/ggml-cpu/common.h +95 -0
  123. package/cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +529 -0
  124. package/cpp/ggml/src/ggml-cpu/ggml-cpu.c +3734 -0
  125. package/cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +701 -0
  126. package/cpp/ggml/src/ggml-cpu/hbm.cpp +55 -0
  127. package/cpp/ggml/src/ggml-cpu/hbm.h +8 -0
  128. package/cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +938 -0
  129. package/cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +90 -0
  130. package/cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +798 -0
  131. package/cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.h +17 -0
  132. package/cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +4033 -0
  133. package/cpp/ggml/src/ggml-cpu/llamafile/sgemm.h +25 -0
  134. package/cpp/ggml/src/ggml-cpu/ops.cpp +10978 -0
  135. package/cpp/ggml/src/ggml-cpu/ops.h +116 -0
  136. package/cpp/ggml/src/ggml-cpu/quants.c +1193 -0
  137. package/cpp/ggml/src/ggml-cpu/quants.h +97 -0
  138. package/cpp/ggml/src/ggml-cpu/repack.cpp +3316 -0
  139. package/cpp/ggml/src/ggml-cpu/repack.h +173 -0
  140. package/cpp/ggml/src/ggml-cpu/simd-gemm.h +136 -0
  141. package/cpp/ggml/src/ggml-cpu/simd-mappings.h +1279 -0
  142. package/cpp/ggml/src/ggml-cpu/spacemit/ime.cpp +1025 -0
  143. package/cpp/ggml/src/ggml-cpu/spacemit/ime.h +13 -0
  144. package/cpp/ggml/src/ggml-cpu/spacemit/ime1_kernels.cpp +3196 -0
  145. package/cpp/ggml/src/ggml-cpu/spacemit/ime_kernels.h +26 -0
  146. package/cpp/ggml/src/ggml-cpu/traits.cpp +36 -0
  147. package/cpp/ggml/src/ggml-cpu/traits.h +38 -0
  148. package/cpp/ggml/src/ggml-cpu/unary-ops.cpp +337 -0
  149. package/cpp/ggml/src/ggml-cpu/unary-ops.h +35 -0
  150. package/cpp/ggml/src/ggml-cpu/vec.cpp +629 -0
  151. package/cpp/ggml/src/ggml-cpu/vec.h +1585 -0
  152. package/cpp/ggml/src/ggml-hexagon/CMakeLists.txt +117 -0
  153. package/cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp +3232 -0
  154. package/cpp/ggml/src/ggml-hexagon/htp/CMakeLists.txt +45 -0
  155. package/cpp/ggml/src/ggml-hexagon/htp/act-ops.c +815 -0
  156. package/cpp/ggml/src/ggml-hexagon/htp/argsort-ops.c +281 -0
  157. package/cpp/ggml/src/ggml-hexagon/htp/binary-ops.c +827 -0
  158. package/cpp/ggml/src/ggml-hexagon/htp/cmake-toolchain.cmake +157 -0
  159. package/cpp/ggml/src/ggml-hexagon/htp/cpy-ops.c +251 -0
  160. package/cpp/ggml/src/ggml-hexagon/htp/flash-attn-ops.c +666 -0
  161. package/cpp/ggml/src/ggml-hexagon/htp/get-rows-ops.c +111 -0
  162. package/cpp/ggml/src/ggml-hexagon/htp/hex-dma.c +63 -0
  163. package/cpp/ggml/src/ggml-hexagon/htp/hex-dma.h +182 -0
  164. package/cpp/ggml/src/ggml-hexagon/htp/hex-dump.h +77 -0
  165. package/cpp/ggml/src/ggml-hexagon/htp/hex-fastdiv.h +37 -0
  166. package/cpp/ggml/src/ggml-hexagon/htp/hex-utils.h +51 -0
  167. package/cpp/ggml/src/ggml-hexagon/htp/htp-ctx.h +35 -0
  168. package/cpp/ggml/src/ggml-hexagon/htp/htp-msg.h +154 -0
  169. package/cpp/ggml/src/ggml-hexagon/htp/htp-ops.h +65 -0
  170. package/cpp/ggml/src/ggml-hexagon/htp/htp_iface.idl +16 -0
  171. package/cpp/ggml/src/ggml-hexagon/htp/hvx-arith.h +470 -0
  172. package/cpp/ggml/src/ggml-hexagon/htp/hvx-base.h +173 -0
  173. package/cpp/ggml/src/ggml-hexagon/htp/hvx-copy.h +245 -0
  174. package/cpp/ggml/src/ggml-hexagon/htp/hvx-div.h +116 -0
  175. package/cpp/ggml/src/ggml-hexagon/htp/hvx-dump.h +129 -0
  176. package/cpp/ggml/src/ggml-hexagon/htp/hvx-exp.h +215 -0
  177. package/cpp/ggml/src/ggml-hexagon/htp/hvx-floor.h +100 -0
  178. package/cpp/ggml/src/ggml-hexagon/htp/hvx-inverse.h +176 -0
  179. package/cpp/ggml/src/ggml-hexagon/htp/hvx-reduce.h +266 -0
  180. package/cpp/ggml/src/ggml-hexagon/htp/hvx-scale.h +133 -0
  181. package/cpp/ggml/src/ggml-hexagon/htp/hvx-sigmoid.h +141 -0
  182. package/cpp/ggml/src/ggml-hexagon/htp/hvx-sqrt.h +126 -0
  183. package/cpp/ggml/src/ggml-hexagon/htp/hvx-types.h +36 -0
  184. package/cpp/ggml/src/ggml-hexagon/htp/hvx-utils.h +18 -0
  185. package/cpp/ggml/src/ggml-hexagon/htp/main.c +1150 -0
  186. package/cpp/ggml/src/ggml-hexagon/htp/matmul-ops.c +2595 -0
  187. package/cpp/ggml/src/ggml-hexagon/htp/rope-ops.c +498 -0
  188. package/cpp/ggml/src/ggml-hexagon/htp/set-rows-ops.c +167 -0
  189. package/cpp/ggml/src/ggml-hexagon/htp/softmax-ops.c +421 -0
  190. package/cpp/ggml/src/ggml-hexagon/htp/sum-rows-ops.c +130 -0
  191. package/cpp/ggml/src/ggml-hexagon/htp/unary-ops.c +384 -0
  192. package/cpp/ggml/src/ggml-hexagon/htp/worker-pool.c +293 -0
  193. package/cpp/ggml/src/ggml-hexagon/htp/worker-pool.h +57 -0
  194. package/cpp/ggml/src/ggml-hexagon/htp-drv.cpp +418 -0
  195. package/cpp/ggml/src/ggml-hexagon/htp-drv.h +121 -0
  196. package/cpp/ggml/src/ggml-hexagon/libdl.h +79 -0
  197. package/cpp/ggml/src/ggml-hexagon/libggml-htp.inf +38 -0
  198. package/cpp/ggml/src/ggml-hexagon/op-desc.h +153 -0
  199. package/cpp/ggml/src/ggml-impl.h +724 -0
  200. package/cpp/ggml/src/ggml-metal/CMakeLists.txt +124 -0
  201. package/cpp/ggml/src/ggml-metal/ggml-metal-common.cpp +457 -0
  202. package/cpp/ggml/src/ggml-metal/ggml-metal-common.h +52 -0
  203. package/cpp/ggml/src/ggml-metal/ggml-metal-context.h +41 -0
  204. package/cpp/ggml/src/ggml-metal/ggml-metal-context.m +702 -0
  205. package/cpp/ggml/src/ggml-metal/ggml-metal-device.cpp +1890 -0
  206. package/cpp/ggml/src/ggml-metal/ggml-metal-device.h +290 -0
  207. package/cpp/ggml/src/ggml-metal/ggml-metal-device.m +1749 -0
  208. package/cpp/ggml/src/ggml-metal/ggml-metal-impl.h +1054 -0
  209. package/cpp/ggml/src/ggml-metal/ggml-metal-ops.cpp +4370 -0
  210. package/cpp/ggml/src/ggml-metal/ggml-metal-ops.h +94 -0
  211. package/cpp/ggml/src/ggml-metal/ggml-metal.cpp +937 -0
  212. package/cpp/ggml/src/ggml-metal/ggml-metal.metal +9819 -0
  213. package/cpp/ggml/src/ggml-musa/CMakeLists.txt +125 -0
  214. package/cpp/ggml/src/ggml-musa/mudnn.cu +112 -0
  215. package/cpp/ggml/src/ggml-musa/mudnn.cuh +12 -0
  216. package/cpp/ggml/src/ggml-opencl/CMakeLists.txt +150 -0
  217. package/cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +11553 -0
  218. package/cpp/ggml/src/ggml-opencl/kernels/add.cl +190 -0
  219. package/cpp/ggml/src/ggml-opencl/kernels/add_id.cl +42 -0
  220. package/cpp/ggml/src/ggml-opencl/kernels/argsort.cl +86 -0
  221. package/cpp/ggml/src/ggml-opencl/kernels/clamp.cl +20 -0
  222. package/cpp/ggml/src/ggml-opencl/kernels/concat.cl +51 -0
  223. package/cpp/ggml/src/ggml-opencl/kernels/conv2d.cl +185 -0
  224. package/cpp/ggml/src/ggml-opencl/kernels/conv2d_f16_f32.cl +176 -0
  225. package/cpp/ggml/src/ggml-opencl/kernels/cpy.cl +184 -0
  226. package/cpp/ggml/src/ggml-opencl/kernels/cvt.cl +417 -0
  227. package/cpp/ggml/src/ggml-opencl/kernels/diag_mask_inf.cl +58 -0
  228. package/cpp/ggml/src/ggml-opencl/kernels/div.cl +138 -0
  229. package/cpp/ggml/src/ggml-opencl/kernels/embed_kernel.py +26 -0
  230. package/cpp/ggml/src/ggml-opencl/kernels/expm1.cl +113 -0
  231. package/cpp/ggml/src/ggml-opencl/kernels/fill.cl +17 -0
  232. package/cpp/ggml/src/ggml-opencl/kernels/flash_attn_f16.cl +370 -0
  233. package/cpp/ggml/src/ggml-opencl/kernels/flash_attn_f32.cl +371 -0
  234. package/cpp/ggml/src/ggml-opencl/kernels/flash_attn_f32_f16.cl +373 -0
  235. package/cpp/ggml/src/ggml-opencl/kernels/gelu.cl +89 -0
  236. package/cpp/ggml/src/ggml-opencl/kernels/gemm_moe_mxfp4_f32.cl +162 -0
  237. package/cpp/ggml/src/ggml-opencl/kernels/gemv_moe_mxfp4_f32.cl +156 -0
  238. package/cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle.cl +268 -0
  239. package/cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general.cl +274 -0
  240. package/cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general_q8_0_f32.cl +195 -0
  241. package/cpp/ggml/src/ggml-opencl/kernels/get_rows.cl +187 -0
  242. package/cpp/ggml/src/ggml-opencl/kernels/glu.cl +378 -0
  243. package/cpp/ggml/src/ggml-opencl/kernels/group_norm.cl +121 -0
  244. package/cpp/ggml/src/ggml-opencl/kernels/im2col_f16.cl +57 -0
  245. package/cpp/ggml/src/ggml-opencl/kernels/im2col_f32.cl +57 -0
  246. package/cpp/ggml/src/ggml-opencl/kernels/mean.cl +140 -0
  247. package/cpp/ggml/src/ggml-opencl/kernels/mul.cl +152 -0
  248. package/cpp/ggml/src/ggml-opencl/kernels/mul_mat_Ab_Bi_8x4.cl +139 -0
  249. package/cpp/ggml/src/ggml-opencl/kernels/mul_mat_f16_f32.cl +130 -0
  250. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_kq_kqv.cl +273 -0
  251. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_l4_lm.cl +146 -0
  252. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_f32_f32_l4_lm.cl +147 -0
  253. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q4_0_f32_l4_lm.cl +163 -0
  254. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q4_1_f32_l4_lm.cl +165 -0
  255. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q6_k_f32_l4_lm.cl +158 -0
  256. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_8x4.cl +129 -0
  257. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_l4_lm.cl +154 -0
  258. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f16.cl +118 -0
  259. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32.cl +118 -0
  260. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_1row.cl +94 -0
  261. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_l4.cl +84 -0
  262. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f32_f32.cl +118 -0
  263. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32.cl +189 -0
  264. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32_flat.cl +176 -0
  265. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q4_0_f32_8x_flat.cl +283 -0
  266. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32.cl +140 -0
  267. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32_flat.cl +222 -0
  268. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32.cl +144 -0
  269. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32_flat.cl +167 -0
  270. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32.cl +192 -0
  271. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_16x_flat.cl +307 -0
  272. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_8x_flat.cl +265 -0
  273. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_8x_flat.cl +272 -0
  274. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_v.cl +254 -0
  275. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32.cl +219 -0
  276. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32_flat.cl +229 -0
  277. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_k_f32.cl +180 -0
  278. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q6_k_f32.cl +194 -0
  279. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q6_k_f32_flat.cl +194 -0
  280. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32.cl +125 -0
  281. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32_flat.cl +202 -0
  282. package/cpp/ggml/src/ggml-opencl/kernels/norm.cl +161 -0
  283. package/cpp/ggml/src/ggml-opencl/kernels/pad.cl +39 -0
  284. package/cpp/ggml/src/ggml-opencl/kernels/relu.cl +16 -0
  285. package/cpp/ggml/src/ggml-opencl/kernels/repeat.cl +38 -0
  286. package/cpp/ggml/src/ggml-opencl/kernels/rms_norm.cl +190 -0
  287. package/cpp/ggml/src/ggml-opencl/kernels/rope.cl +747 -0
  288. package/cpp/ggml/src/ggml-opencl/kernels/scale.cl +27 -0
  289. package/cpp/ggml/src/ggml-opencl/kernels/set_rows.cl +208 -0
  290. package/cpp/ggml/src/ggml-opencl/kernels/sigmoid.cl +29 -0
  291. package/cpp/ggml/src/ggml-opencl/kernels/silu.cl +30 -0
  292. package/cpp/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +108 -0
  293. package/cpp/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +108 -0
  294. package/cpp/ggml/src/ggml-opencl/kernels/softmax_f16.cl +107 -0
  295. package/cpp/ggml/src/ggml-opencl/kernels/softmax_f32.cl +107 -0
  296. package/cpp/ggml/src/ggml-opencl/kernels/softplus.cl +116 -0
  297. package/cpp/ggml/src/ggml-opencl/kernels/solve_tri.cl +51 -0
  298. package/cpp/ggml/src/ggml-opencl/kernels/sqr.cl +53 -0
  299. package/cpp/ggml/src/ggml-opencl/kernels/sqrt.cl +53 -0
  300. package/cpp/ggml/src/ggml-opencl/kernels/ssm_conv.cl +77 -0
  301. package/cpp/ggml/src/ggml-opencl/kernels/sub.cl +138 -0
  302. package/cpp/ggml/src/ggml-opencl/kernels/sum_rows.cl +140 -0
  303. package/cpp/ggml/src/ggml-opencl/kernels/tanh.cl +109 -0
  304. package/cpp/ggml/src/ggml-opencl/kernels/transpose.cl +117 -0
  305. package/cpp/ggml/src/ggml-opencl/kernels/tri.cl +32 -0
  306. package/cpp/ggml/src/ggml-opencl/kernels/tsembd.cl +48 -0
  307. package/cpp/ggml/src/ggml-opencl/kernels/upscale.cl +120 -0
  308. package/cpp/ggml/src/ggml-opt.cpp +1093 -0
  309. package/cpp/ggml/src/ggml-quants.c +5325 -0
  310. package/cpp/ggml/src/ggml-quants.h +106 -0
  311. package/cpp/ggml/src/ggml-rpc/CMakeLists.txt +9 -0
  312. package/cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +2118 -0
  313. package/cpp/ggml/src/ggml-threading.cpp +12 -0
  314. package/cpp/ggml/src/ggml-threading.h +14 -0
  315. package/cpp/ggml/src/ggml-virtgpu/CMakeLists.txt +70 -0
  316. package/cpp/ggml/src/ggml-virtgpu/apir_cs_ggml-rpc-front.cpp +87 -0
  317. package/cpp/ggml/src/ggml-virtgpu/backend/CMakeLists.txt +21 -0
  318. package/cpp/ggml/src/ggml-virtgpu/backend/apir_cs_ggml-rpc-back.cpp +115 -0
  319. package/cpp/ggml/src/ggml-virtgpu/backend/backend-convert.h +13 -0
  320. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-backend.cpp +102 -0
  321. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer-type.cpp +105 -0
  322. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer.cpp +179 -0
  323. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-device.cpp +148 -0
  324. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched.cpp +51 -0
  325. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched.gen.h +73 -0
  326. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched.h +27 -0
  327. package/cpp/ggml/src/ggml-virtgpu/backend/backend-virgl-apir.h +32 -0
  328. package/cpp/ggml/src/ggml-virtgpu/backend/backend.cpp +144 -0
  329. package/cpp/ggml/src/ggml-virtgpu/backend/shared/api_remoting.h +95 -0
  330. package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_backend.gen.h +94 -0
  331. package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_backend.h +50 -0
  332. package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_cs.h +378 -0
  333. package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h +232 -0
  334. package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_cs_rpc.h +58 -0
  335. package/cpp/ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp +81 -0
  336. package/cpp/ggml/src/ggml-virtgpu/ggml-backend-buffer.cpp +119 -0
  337. package/cpp/ggml/src/ggml-virtgpu/ggml-backend-device.cpp +158 -0
  338. package/cpp/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp +213 -0
  339. package/cpp/ggml/src/ggml-virtgpu/ggml-backend.cpp +69 -0
  340. package/cpp/ggml/src/ggml-virtgpu/ggml-remoting.h +71 -0
  341. package/cpp/ggml/src/ggml-virtgpu/ggmlremoting_functions.yaml +166 -0
  342. package/cpp/ggml/src/ggml-virtgpu/include/apir_hw.h +9 -0
  343. package/cpp/ggml/src/ggml-virtgpu/regenerate_remoting.py +333 -0
  344. package/cpp/ggml/src/ggml-virtgpu/virtgpu-apir.h +15 -0
  345. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-backend.cpp +58 -0
  346. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp +110 -0
  347. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-buffer.cpp +173 -0
  348. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-device.cpp +192 -0
  349. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-impl.h +36 -0
  350. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward.gen.h +53 -0
  351. package/cpp/ggml/src/ggml-virtgpu/virtgpu-shm.cpp +98 -0
  352. package/cpp/ggml/src/ggml-virtgpu/virtgpu-shm.h +23 -0
  353. package/cpp/ggml/src/ggml-virtgpu/virtgpu-utils.cpp +179 -0
  354. package/cpp/ggml/src/ggml-virtgpu/virtgpu-utils.h +86 -0
  355. package/cpp/ggml/src/ggml-virtgpu/virtgpu.cpp +544 -0
  356. package/cpp/ggml/src/ggml-virtgpu/virtgpu.h +117 -0
  357. package/cpp/ggml/src/ggml-webgpu/CMakeLists.txt +80 -0
  358. package/cpp/ggml/src/ggml-webgpu/ggml-webgpu-shader-lib.hpp +1231 -0
  359. package/cpp/ggml/src/ggml-webgpu/ggml-webgpu.cpp +3150 -0
  360. package/cpp/ggml/src/ggml-webgpu/pre_wgsl.hpp +778 -0
  361. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/argmax.wgsl +72 -0
  362. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/argsort.wgsl +106 -0
  363. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/argsort_merge.wgsl +134 -0
  364. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/binary.wgsl +107 -0
  365. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/common_decls.tmpl +923 -0
  366. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/cpy.tmpl.wgsl +107 -0
  367. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/cumsum.wgsl +66 -0
  368. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +182 -0
  369. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn.wgsl +636 -0
  370. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/get_rows.wgsl +668 -0
  371. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/glu.tmpl.wgsl +323 -0
  372. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/memset.wgsl +40 -0
  373. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.wgsl +713 -0
  374. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_decls.tmpl +103 -0
  375. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_reg_tile.wgsl +138 -0
  376. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_subgroup_matrix.wgsl +188 -0
  377. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec.wgsl +194 -0
  378. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/pad.wgsl +86 -0
  379. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm.wgsl +123 -0
  380. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/rope.tmpl.wgsl +295 -0
  381. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/scale.wgsl +63 -0
  382. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.wgsl +109 -0
  383. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/soft_max.tmpl.wgsl +345 -0
  384. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/sum_rows.wgsl +55 -0
  385. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/unary.wgsl +193 -0
  386. package/cpp/ggml/src/ggml-zdnn/CMakeLists.txt +36 -0
  387. package/cpp/ggml/src/ggml-zdnn/common.hpp +59 -0
  388. package/cpp/ggml/src/ggml-zdnn/ggml-zdnn.cpp +633 -0
  389. package/cpp/ggml/src/ggml-zdnn/mmf.cpp +80 -0
  390. package/cpp/ggml/src/ggml-zdnn/mmf.hpp +12 -0
  391. package/cpp/ggml/src/ggml-zdnn/utils.cpp +79 -0
  392. package/cpp/ggml/src/ggml-zdnn/utils.hpp +19 -0
  393. package/cpp/ggml/src/ggml-zendnn/CMakeLists.txt +92 -0
  394. package/cpp/ggml/src/ggml-zendnn/ggml-zendnn.cpp +469 -0
  395. package/cpp/ggml/src/ggml.c +7669 -0
  396. package/cpp/ggml/src/ggml.cpp +26 -0
  397. package/cpp/ggml/src/gguf.cpp +1699 -0
  398. package/cpp/include/llama-cpp.h +32 -0
  399. package/cpp/include/llama.h +1568 -0
  400. package/cpp/mtmd/CMakeLists.txt +98 -0
  401. package/cpp/mtmd/README.md +63 -0
  402. package/cpp/mtmd/clip-graph.h +117 -0
  403. package/cpp/mtmd/clip-impl.h +586 -0
  404. package/cpp/mtmd/clip-model.h +390 -0
  405. package/cpp/mtmd/clip.cpp +4154 -0
  406. package/cpp/mtmd/clip.h +121 -0
  407. package/cpp/mtmd/deprecation-warning.cpp +22 -0
  408. package/cpp/mtmd/legacy-models/convert_image_encoder_to_gguf.py +412 -0
  409. package/cpp/mtmd/legacy-models/glmedge-convert-image-encoder-to-gguf.py +280 -0
  410. package/cpp/mtmd/legacy-models/glmedge-surgery.py +33 -0
  411. package/cpp/mtmd/legacy-models/llava_surgery.py +38 -0
  412. package/cpp/mtmd/legacy-models/llava_surgery_v2.py +180 -0
  413. package/cpp/mtmd/legacy-models/minicpmv-convert-image-encoder-to-gguf.py +892 -0
  414. package/cpp/mtmd/legacy-models/minicpmv-surgery.py +47 -0
  415. package/cpp/mtmd/models/cogvlm.cpp +98 -0
  416. package/cpp/mtmd/models/conformer.cpp +216 -0
  417. package/cpp/mtmd/models/glm4v.cpp +122 -0
  418. package/cpp/mtmd/models/internvl.cpp +69 -0
  419. package/cpp/mtmd/models/kimik25.cpp +101 -0
  420. package/cpp/mtmd/models/kimivl.cpp +63 -0
  421. package/cpp/mtmd/models/llama4.cpp +96 -0
  422. package/cpp/mtmd/models/llava.cpp +374 -0
  423. package/cpp/mtmd/models/minicpmv.cpp +114 -0
  424. package/cpp/mtmd/models/mobilenetv5.cpp +451 -0
  425. package/cpp/mtmd/models/models.h +128 -0
  426. package/cpp/mtmd/models/nemotron-v2-vl.cpp +35 -0
  427. package/cpp/mtmd/models/paddleocr.cpp +52 -0
  428. package/cpp/mtmd/models/pixtral.cpp +86 -0
  429. package/cpp/mtmd/models/qwen2vl.cpp +183 -0
  430. package/cpp/mtmd/models/qwen3vl.cpp +193 -0
  431. package/cpp/mtmd/models/siglip.cpp +86 -0
  432. package/cpp/mtmd/models/whisper-enc.cpp +115 -0
  433. package/cpp/mtmd/models/youtuvl.cpp +179 -0
  434. package/cpp/mtmd/mtmd-audio.cpp +730 -0
  435. package/cpp/mtmd/mtmd-audio.h +113 -0
  436. package/cpp/mtmd/mtmd-cli.cpp +437 -0
  437. package/cpp/mtmd/mtmd-helper.cpp +521 -0
  438. package/cpp/mtmd/mtmd-helper.h +96 -0
  439. package/cpp/mtmd/mtmd.cpp +1156 -0
  440. package/cpp/mtmd/mtmd.h +319 -0
  441. package/cpp/mtmd/requirements.txt +5 -0
  442. package/cpp/mtmd/test-1.jpeg +0 -0
  443. package/cpp/mtmd/test-2.mp3 +0 -0
  444. package/cpp/mtmd/tests.sh +192 -0
  445. package/cpp/src/CMakeLists.txt +169 -0
  446. package/cpp/src/llama-adapter.cpp +488 -0
  447. package/cpp/src/llama-adapter.h +89 -0
  448. package/cpp/src/llama-arch.cpp +2855 -0
  449. package/cpp/src/llama-arch.h +619 -0
  450. package/cpp/src/llama-batch.cpp +917 -0
  451. package/cpp/src/llama-batch.h +173 -0
  452. package/cpp/src/llama-chat.cpp +896 -0
  453. package/cpp/src/llama-chat.h +71 -0
  454. package/cpp/src/llama-context.cpp +3512 -0
  455. package/cpp/src/llama-context.h +359 -0
  456. package/cpp/src/llama-cparams.cpp +5 -0
  457. package/cpp/src/llama-cparams.h +44 -0
  458. package/cpp/src/llama-grammar.cpp +1464 -0
  459. package/cpp/src/llama-grammar.h +194 -0
  460. package/cpp/src/llama-graph.cpp +2685 -0
  461. package/cpp/src/llama-graph.h +1026 -0
  462. package/cpp/src/llama-hparams.cpp +234 -0
  463. package/cpp/src/llama-hparams.h +339 -0
  464. package/cpp/src/llama-impl.cpp +171 -0
  465. package/cpp/src/llama-impl.h +73 -0
  466. package/cpp/src/llama-io.cpp +15 -0
  467. package/cpp/src/llama-io.h +35 -0
  468. package/cpp/src/llama-kv-cache-iswa.cpp +330 -0
  469. package/cpp/src/llama-kv-cache-iswa.h +137 -0
  470. package/cpp/src/llama-kv-cache.cpp +2271 -0
  471. package/cpp/src/llama-kv-cache.h +388 -0
  472. package/cpp/src/llama-kv-cells.h +533 -0
  473. package/cpp/src/llama-memory-hybrid-iswa.cpp +275 -0
  474. package/cpp/src/llama-memory-hybrid-iswa.h +140 -0
  475. package/cpp/src/llama-memory-hybrid.cpp +268 -0
  476. package/cpp/src/llama-memory-hybrid.h +139 -0
  477. package/cpp/src/llama-memory-recurrent.cpp +1165 -0
  478. package/cpp/src/llama-memory-recurrent.h +182 -0
  479. package/cpp/src/llama-memory.cpp +59 -0
  480. package/cpp/src/llama-memory.h +122 -0
  481. package/cpp/src/llama-mmap.cpp +785 -0
  482. package/cpp/src/llama-mmap.h +92 -0
  483. package/cpp/src/llama-model-loader.cpp +1414 -0
  484. package/cpp/src/llama-model-loader.h +203 -0
  485. package/cpp/src/llama-model-saver.cpp +286 -0
  486. package/cpp/src/llama-model-saver.h +37 -0
  487. package/cpp/src/llama-model.cpp +9253 -0
  488. package/cpp/src/llama-model.h +576 -0
  489. package/cpp/src/llama-quant.cpp +1119 -0
  490. package/cpp/src/llama-quant.h +1 -0
  491. package/cpp/src/llama-sampler.cpp +3885 -0
  492. package/cpp/src/llama-sampler.h +42 -0
  493. package/cpp/src/llama-vocab.cpp +3970 -0
  494. package/cpp/src/llama-vocab.h +187 -0
  495. package/cpp/src/llama.cpp +1313 -0
  496. package/cpp/src/models/afmoe.cpp +191 -0
  497. package/cpp/src/models/apertus.cpp +125 -0
  498. package/cpp/src/models/arcee.cpp +135 -0
  499. package/cpp/src/models/arctic.cpp +138 -0
  500. package/cpp/src/models/arwkv7.cpp +86 -0
  501. package/cpp/src/models/baichuan.cpp +122 -0
  502. package/cpp/src/models/bailingmoe.cpp +144 -0
  503. package/cpp/src/models/bailingmoe2.cpp +135 -0
  504. package/cpp/src/models/bert.cpp +178 -0
  505. package/cpp/src/models/bitnet.cpp +160 -0
  506. package/cpp/src/models/bloom.cpp +101 -0
  507. package/cpp/src/models/chameleon.cpp +178 -0
  508. package/cpp/src/models/chatglm.cpp +132 -0
  509. package/cpp/src/models/codeshell.cpp +111 -0
  510. package/cpp/src/models/cogvlm.cpp +102 -0
  511. package/cpp/src/models/cohere2-iswa.cpp +134 -0
  512. package/cpp/src/models/command-r.cpp +122 -0
  513. package/cpp/src/models/dbrx.cpp +123 -0
  514. package/cpp/src/models/deci.cpp +135 -0
  515. package/cpp/src/models/deepseek.cpp +144 -0
  516. package/cpp/src/models/deepseek2.cpp +262 -0
  517. package/cpp/src/models/delta-net-base.cpp +376 -0
  518. package/cpp/src/models/dots1.cpp +134 -0
  519. package/cpp/src/models/dream.cpp +105 -0
  520. package/cpp/src/models/ernie4-5-moe.cpp +150 -0
  521. package/cpp/src/models/ernie4-5.cpp +110 -0
  522. package/cpp/src/models/eurobert.cpp +97 -0
  523. package/cpp/src/models/exaone-moe.cpp +146 -0
  524. package/cpp/src/models/exaone.cpp +114 -0
  525. package/cpp/src/models/exaone4.cpp +123 -0
  526. package/cpp/src/models/falcon-h1.cpp +111 -0
  527. package/cpp/src/models/falcon.cpp +120 -0
  528. package/cpp/src/models/gemma-embedding.cpp +116 -0
  529. package/cpp/src/models/gemma.cpp +112 -0
  530. package/cpp/src/models/gemma2-iswa.cpp +128 -0
  531. package/cpp/src/models/gemma3.cpp +155 -0
  532. package/cpp/src/models/gemma3n-iswa.cpp +384 -0
  533. package/cpp/src/models/glm4-moe.cpp +170 -0
  534. package/cpp/src/models/glm4.cpp +157 -0
  535. package/cpp/src/models/gpt2.cpp +105 -0
  536. package/cpp/src/models/gptneox.cpp +144 -0
  537. package/cpp/src/models/granite-hybrid.cpp +196 -0
  538. package/cpp/src/models/granite.cpp +211 -0
  539. package/cpp/src/models/grok.cpp +159 -0
  540. package/cpp/src/models/grovemoe.cpp +141 -0
  541. package/cpp/src/models/hunyuan-dense.cpp +132 -0
  542. package/cpp/src/models/hunyuan-moe.cpp +154 -0
  543. package/cpp/src/models/internlm2.cpp +120 -0
  544. package/cpp/src/models/jais.cpp +86 -0
  545. package/cpp/src/models/jais2.cpp +123 -0
  546. package/cpp/src/models/jamba.cpp +106 -0
  547. package/cpp/src/models/kimi-linear.cpp +392 -0
  548. package/cpp/src/models/lfm2.cpp +190 -0
  549. package/cpp/src/models/llada-moe.cpp +122 -0
  550. package/cpp/src/models/llada.cpp +99 -0
  551. package/cpp/src/models/llama-iswa.cpp +178 -0
  552. package/cpp/src/models/llama.cpp +168 -0
  553. package/cpp/src/models/maincoder.cpp +117 -0
  554. package/cpp/src/models/mamba-base.cpp +285 -0
  555. package/cpp/src/models/mamba.cpp +54 -0
  556. package/cpp/src/models/mimo2-iswa.cpp +123 -0
  557. package/cpp/src/models/minicpm3.cpp +200 -0
  558. package/cpp/src/models/minimax-m2.cpp +124 -0
  559. package/cpp/src/models/mistral3.cpp +160 -0
  560. package/cpp/src/models/models.h +684 -0
  561. package/cpp/src/models/modern-bert.cpp +109 -0
  562. package/cpp/src/models/mpt.cpp +126 -0
  563. package/cpp/src/models/nemotron-h.cpp +148 -0
  564. package/cpp/src/models/nemotron.cpp +122 -0
  565. package/cpp/src/models/neo-bert.cpp +104 -0
  566. package/cpp/src/models/olmo.cpp +121 -0
  567. package/cpp/src/models/olmo2.cpp +150 -0
  568. package/cpp/src/models/olmoe.cpp +124 -0
  569. package/cpp/src/models/openai-moe-iswa.cpp +127 -0
  570. package/cpp/src/models/openelm.cpp +124 -0
  571. package/cpp/src/models/orion.cpp +123 -0
  572. package/cpp/src/models/paddleocr.cpp +122 -0
  573. package/cpp/src/models/pangu-embedded.cpp +121 -0
  574. package/cpp/src/models/phi2.cpp +121 -0
  575. package/cpp/src/models/phi3.cpp +152 -0
  576. package/cpp/src/models/plamo.cpp +110 -0
  577. package/cpp/src/models/plamo2.cpp +318 -0
  578. package/cpp/src/models/plamo3.cpp +128 -0
  579. package/cpp/src/models/plm.cpp +169 -0
  580. package/cpp/src/models/qwen.cpp +108 -0
  581. package/cpp/src/models/qwen2.cpp +126 -0
  582. package/cpp/src/models/qwen2moe.cpp +151 -0
  583. package/cpp/src/models/qwen2vl.cpp +117 -0
  584. package/cpp/src/models/qwen3.cpp +117 -0
  585. package/cpp/src/models/qwen35.cpp +386 -0
  586. package/cpp/src/models/qwen35moe.cpp +420 -0
  587. package/cpp/src/models/qwen3moe.cpp +124 -0
  588. package/cpp/src/models/qwen3next.cpp +525 -0
  589. package/cpp/src/models/qwen3vl-moe.cpp +140 -0
  590. package/cpp/src/models/qwen3vl.cpp +132 -0
  591. package/cpp/src/models/refact.cpp +94 -0
  592. package/cpp/src/models/rnd1.cpp +126 -0
  593. package/cpp/src/models/rwkv6-base.cpp +164 -0
  594. package/cpp/src/models/rwkv6.cpp +94 -0
  595. package/cpp/src/models/rwkv6qwen2.cpp +86 -0
  596. package/cpp/src/models/rwkv7-base.cpp +137 -0
  597. package/cpp/src/models/rwkv7.cpp +90 -0
  598. package/cpp/src/models/seed-oss.cpp +124 -0
  599. package/cpp/src/models/smallthinker.cpp +126 -0
  600. package/cpp/src/models/smollm3.cpp +128 -0
  601. package/cpp/src/models/stablelm.cpp +146 -0
  602. package/cpp/src/models/starcoder.cpp +100 -0
  603. package/cpp/src/models/starcoder2.cpp +121 -0
  604. package/cpp/src/models/step35-iswa.cpp +168 -0
  605. package/cpp/src/models/t5-dec.cpp +166 -0
  606. package/cpp/src/models/t5-enc.cpp +96 -0
  607. package/cpp/src/models/wavtokenizer-dec.cpp +149 -0
  608. package/cpp/src/models/xverse.cpp +108 -0
  609. package/cpp/src/unicode-data.cpp +7034 -0
  610. package/cpp/src/unicode-data.h +20 -0
  611. package/cpp/src/unicode.cpp +1103 -0
  612. package/cpp/src/unicode.h +111 -0
  613. package/cpp/vendor/nlohmann/json.hpp +25526 -0
  614. package/cpp/vendor/nlohmann/json_fwd.hpp +187 -0
  615. package/cpp/vendor/stb/stb_image.h +7988 -0
  616. package/ios/LocalLLM-Bridging-Header.h +2 -0
  617. package/ios/LocalLLM.h +5 -0
  618. package/ios/LocalLLM.mm +1267 -0
  619. package/local-llm-rn.podspec +60 -0
  620. package/package.json +35 -0
  621. package/src/NativeLocalLLM.ts +73 -0
  622. package/src/device.ts +50 -0
  623. package/src/download-adapter.ts +17 -0
  624. package/src/index.ts +21 -0
  625. package/src/native-bridge.ts +142 -0
  626. package/src/rn-downloader.ts +37 -0
@@ -0,0 +1,701 @@
1
+ #include "ggml-backend.h"
2
+ #include "ggml-backend-impl.h"
3
+ #include "ggml-cpu.h"
4
+ #include "repack.h"
5
+ #include "traits.h"
6
+ #include "ggml-impl.h"
7
+ #include "amx/amx.h"
8
+
9
+ #include <cctype>
10
+ #include <string>
11
+ #include <vector>
12
+
13
+ #ifdef GGML_USE_CPU_HBM
14
+ # include "hbm.h"
15
+ #endif
16
+
17
+ #ifdef GGML_USE_CPU_KLEIDIAI
18
+ # include "kleidiai/kleidiai.h"
19
+ #endif
20
+
21
+ #ifdef GGML_USE_CPU_RISCV64_SPACEMIT
22
+ # include "spacemit/ime.h"
23
+ #endif
24
+
25
+ #if defined(_WIN32)
26
+ # define WIN32_LEAN_AND_MEAN
27
+ # ifndef NOMINMAX
28
+ # define NOMINMAX
29
+ # endif
30
+ # include <windows.h>
31
+ #else
32
+ # include <unistd.h>
33
+ #endif
34
+
35
+ #if defined(__APPLE__)
36
+ # include <sys/sysctl.h>
37
+ # include <sys/types.h>
38
+ #endif
39
+
40
+ // ggml-backend interface
41
+
42
+ std::vector<ggml_backend_buffer_type_t> & ggml_backend_cpu_get_extra_buffer_types() {
43
+ static std::vector<ggml_backend_buffer_type_t> bufts = []() {
44
+ std::vector<ggml_backend_buffer_type_t> bufts;
45
+
46
+ #if defined(__AMX_INT8__) && defined(__AVX512VNNI__)
47
+ if (ggml_backend_amx_buffer_type()) {
48
+ bufts.push_back(ggml_backend_amx_buffer_type());
49
+ }
50
+ #endif
51
+
52
+ #ifdef GGML_USE_CPU_RISCV64_SPACEMIT
53
+ if (ggml_backend_cpu_riscv64_spacemit_buffer_type()) {
54
+ bufts.push_back(ggml_backend_cpu_riscv64_spacemit_buffer_type());
55
+ }
56
+ #endif
57
+
58
+ #ifdef GGML_USE_CPU_KLEIDIAI
59
+ if (ggml_backend_cpu_kleidiai_buffer_type()) {
60
+ bufts.push_back(ggml_backend_cpu_kleidiai_buffer_type());
61
+ }
62
+ #endif
63
+
64
+ #ifdef GGML_USE_CPU_REPACK
65
+ if (ggml_backend_cpu_repack_buffer_type()) {
66
+ bufts.push_back(ggml_backend_cpu_repack_buffer_type());
67
+ }
68
+ #endif
69
+
70
+ return bufts;
71
+ }();
72
+
73
+ return bufts;
74
+ }
75
+
76
+ static ggml_backend_buffer_type_t * ggml_backend_cpu_device_get_extra_buffers_type(ggml_backend_dev_t device) {
77
+ static std::vector<ggml_backend_buffer_type_t> extra_bufts = [] {
78
+ std::vector<ggml_backend_buffer_type_t> bufts = ggml_backend_cpu_get_extra_buffer_types();
79
+ bufts.push_back(nullptr);
80
+ return bufts;
81
+ }();
82
+
83
+ return extra_bufts.data();
84
+
85
+ GGML_UNUSED(device);
86
+ }
87
+
88
+ static bool ggml_backend_cpu_is_extra_buffer_type(ggml_backend_buffer_type_t buft) {
89
+ for (auto * extra : ggml_backend_cpu_get_extra_buffer_types()) {
90
+ if (extra == buft) {
91
+ return true;
92
+ }
93
+ }
94
+ return false;
95
+ }
96
+
97
+ // CPU backend - backend (stream)
98
+
99
+ struct ggml_backend_cpu_context {
100
+ int n_threads;
101
+ ggml_threadpool_t threadpool;
102
+
103
+ uint8_t * work_data;
104
+ size_t work_size;
105
+
106
+ ggml_abort_callback abort_callback;
107
+ void * abort_callback_data;
108
+
109
+ bool use_ref; // use reference implementation
110
+ };
111
+
112
+ static const char * ggml_backend_cpu_get_name(ggml_backend_t backend) {
113
+ return "CPU";
114
+
115
+ GGML_UNUSED(backend);
116
+ }
117
+
118
+ static void ggml_backend_cpu_free(ggml_backend_t backend) {
119
+ struct ggml_backend_cpu_context * cpu_ctx = (struct ggml_backend_cpu_context *)backend->context;
120
+ delete[] cpu_ctx->work_data;
121
+ delete cpu_ctx;
122
+ delete backend;
123
+ }
124
+
125
+ struct ggml_backend_plan_cpu {
126
+ struct ggml_cplan cplan;
127
+ struct ggml_cgraph cgraph;
128
+ };
129
+
130
+ static ggml_backend_graph_plan_t ggml_backend_cpu_graph_plan_create(ggml_backend_t backend, const struct ggml_cgraph * cgraph) {
131
+ struct ggml_backend_cpu_context * cpu_ctx = (struct ggml_backend_cpu_context *)backend->context;
132
+
133
+ struct ggml_backend_plan_cpu * cpu_plan = new ggml_backend_plan_cpu;
134
+
135
+ cpu_plan->cplan = ggml_graph_plan(cgraph, cpu_ctx->n_threads, cpu_ctx->threadpool);
136
+ cpu_plan->cgraph = *cgraph; // FIXME: deep copy
137
+
138
+ if (cpu_plan->cplan.work_size > 0) {
139
+ cpu_plan->cplan.work_data = new uint8_t[cpu_plan->cplan.work_size];
140
+ if (cpu_plan->cplan.work_data == NULL) {
141
+ delete cpu_plan;
142
+ return NULL;
143
+ }
144
+ }
145
+
146
+ cpu_plan->cplan.abort_callback = cpu_ctx->abort_callback;
147
+ cpu_plan->cplan.abort_callback_data = cpu_ctx->abort_callback_data;
148
+ cpu_plan->cplan.use_ref = cpu_ctx->use_ref;
149
+
150
+ return cpu_plan;
151
+ }
152
+
153
+ static void ggml_backend_cpu_graph_plan_free(ggml_backend_t backend, ggml_backend_graph_plan_t plan) {
154
+ struct ggml_backend_plan_cpu * cpu_plan = (struct ggml_backend_plan_cpu *)plan;
155
+
156
+ delete[] cpu_plan->cplan.work_data;
157
+ delete cpu_plan;
158
+
159
+ GGML_UNUSED(backend);
160
+ }
161
+
162
+ static enum ggml_status ggml_backend_cpu_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan) {
163
+ struct ggml_backend_plan_cpu * cpu_plan = (struct ggml_backend_plan_cpu *)plan;
164
+
165
+ return ggml_graph_compute(&cpu_plan->cgraph, &cpu_plan->cplan);
166
+
167
+ GGML_UNUSED(backend);
168
+ }
169
+
170
+ static enum ggml_status ggml_backend_cpu_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
171
+ struct ggml_backend_cpu_context * cpu_ctx = (struct ggml_backend_cpu_context *)backend->context;
172
+
173
+ struct ggml_cplan cplan = ggml_graph_plan(cgraph, cpu_ctx->n_threads, cpu_ctx->threadpool);
174
+
175
+ if (cpu_ctx->work_size < cplan.work_size) {
176
+ delete[] cpu_ctx->work_data;
177
+ cpu_ctx->work_data = new uint8_t[cplan.work_size];
178
+ if (cpu_ctx->work_data == NULL) {
179
+ cpu_ctx->work_size = 0;
180
+ return GGML_STATUS_ALLOC_FAILED;
181
+ }
182
+ cpu_ctx->work_size = cplan.work_size;
183
+ }
184
+ cplan.work_data = (uint8_t *)cpu_ctx->work_data;
185
+
186
+ cplan.abort_callback = cpu_ctx->abort_callback;
187
+ cplan.abort_callback_data = cpu_ctx->abort_callback_data;
188
+ cplan.use_ref = cpu_ctx->use_ref;
189
+
190
+ return ggml_graph_compute(cgraph, &cplan);
191
+ }
192
+
193
+ static const struct ggml_backend_i ggml_backend_cpu_i = {
194
+ /* .get_name = */ ggml_backend_cpu_get_name,
195
+ /* .free = */ ggml_backend_cpu_free,
196
+ /* .set_tensor_async = */ NULL,
197
+ /* .get_tensor_async = */ NULL,
198
+ /* .cpy_tensor_async = */ NULL,
199
+ /* .synchronize = */ NULL,
200
+ /* .graph_plan_create = */ ggml_backend_cpu_graph_plan_create,
201
+ /* .graph_plan_free = */ ggml_backend_cpu_graph_plan_free,
202
+ /* .graph_plan_update = */ NULL,
203
+ /* .graph_plan_compute = */ ggml_backend_cpu_graph_plan_compute,
204
+ /* .graph_compute = */ ggml_backend_cpu_graph_compute,
205
+ /* .event_record = */ NULL,
206
+ /* .event_wait = */ NULL,
207
+ /* .graph_optimize = */ NULL,
208
+ };
209
+
210
+ static ggml_guid_t ggml_backend_cpu_guid(void) {
211
+ static ggml_guid guid = { 0xaa, 0x67, 0xc7, 0x43, 0x96, 0xe6, 0xa3, 0x8a, 0xe3, 0xaf, 0xea, 0x92, 0x36, 0xbc, 0xfc, 0x89 };
212
+ return &guid;
213
+ }
214
+
215
+ ggml_backend_t ggml_backend_cpu_init(void) {
216
+ // initialize CPU backend now to avoid slowing the first graph computation
217
+ ggml_cpu_init();
218
+
219
+ struct ggml_backend_cpu_context * ctx = new ggml_backend_cpu_context;
220
+ if (ctx == NULL) {
221
+ return NULL;
222
+ }
223
+
224
+ ctx->n_threads = GGML_DEFAULT_N_THREADS;
225
+ ctx->threadpool = NULL;
226
+ ctx->work_data = NULL;
227
+ ctx->work_size = 0;
228
+ ctx->abort_callback = NULL;
229
+ ctx->abort_callback_data = NULL;
230
+ ctx->use_ref = false;
231
+
232
+ ggml_backend_t cpu_backend = new ggml_backend {
233
+ /* .guid = */ ggml_backend_cpu_guid(),
234
+ /* .iface = */ ggml_backend_cpu_i,
235
+ /* .device = */ ggml_backend_reg_dev_get(ggml_backend_cpu_reg(), 0),
236
+ /* .context = */ ctx,
237
+ };
238
+
239
+ if (cpu_backend == NULL) {
240
+ delete ctx;
241
+ return NULL;
242
+ }
243
+
244
+ return cpu_backend;
245
+ }
246
+
247
+ bool ggml_backend_is_cpu(ggml_backend_t backend) {
248
+ return backend != NULL && ggml_guid_matches(backend->guid, ggml_backend_cpu_guid());
249
+ }
250
+
251
+ void ggml_backend_cpu_set_n_threads(ggml_backend_t backend_cpu, int n_threads) {
252
+ GGML_ASSERT(ggml_backend_is_cpu(backend_cpu));
253
+
254
+ struct ggml_backend_cpu_context * ctx = (struct ggml_backend_cpu_context *)backend_cpu->context;
255
+ ctx->n_threads = n_threads;
256
+ }
257
+
258
+ void ggml_backend_cpu_set_threadpool(ggml_backend_t backend_cpu, ggml_threadpool_t threadpool) {
259
+ GGML_ASSERT(ggml_backend_is_cpu(backend_cpu));
260
+
261
+ struct ggml_backend_cpu_context * ctx = (struct ggml_backend_cpu_context *)backend_cpu->context;
262
+
263
+ if (ctx->threadpool && ctx->threadpool != threadpool) {
264
+ // already had a different threadpool, pause/suspend it before switching
265
+ ggml_threadpool_pause(ctx->threadpool);
266
+ }
267
+ ctx->threadpool = threadpool;
268
+ }
269
+
270
+ void ggml_backend_cpu_set_abort_callback(ggml_backend_t backend_cpu, ggml_abort_callback abort_callback, void * abort_callback_data) {
271
+ GGML_ASSERT(ggml_backend_is_cpu(backend_cpu));
272
+
273
+ struct ggml_backend_cpu_context * ctx = (struct ggml_backend_cpu_context *)backend_cpu->context;
274
+ ctx->abort_callback = abort_callback;
275
+ ctx->abort_callback_data = abort_callback_data;
276
+ }
277
+
278
+ void ggml_backend_cpu_set_use_ref(ggml_backend_t backend_cpu, bool use_ref) {
279
+ GGML_ASSERT(ggml_backend_is_cpu(backend_cpu));
280
+
281
+ struct ggml_backend_cpu_context * ctx = (struct ggml_backend_cpu_context *)backend_cpu->context;
282
+ ctx->use_ref = use_ref;
283
+ }
284
+
285
+ // CPU backend - device
286
+
287
+ struct ggml_backend_cpu_device_context {
288
+ std::string description = "CPU";
289
+
290
+ ggml_backend_cpu_device_context() {
291
+ #ifdef __APPLE__
292
+ size_t len = 0;
293
+ if (!sysctlbyname("machdep.cpu.brand_string", NULL, &len, NULL, 0)) {
294
+ description.resize(len);
295
+ sysctlbyname("machdep.cpu.brand_string", &description[0], &len, NULL, 0); // NOLINT
296
+ }
297
+ #elif defined(__linux__)
298
+ FILE * f = fopen("/proc/cpuinfo", "r");
299
+ if (f) {
300
+ char buf[1024];
301
+ while (fgets(buf, sizeof(buf), f)) {
302
+ if (strncmp(buf, "model name", 10) == 0) {
303
+ char * p = strchr(buf, ':');
304
+ if (p) {
305
+ p++;
306
+ while (std::isspace(*p)) {
307
+ p++;
308
+ }
309
+ while (std::isspace(p[strlen(p) - 1])) {
310
+ p[strlen(p) - 1] = '\0';
311
+ }
312
+ description = p;
313
+ break;
314
+ }
315
+ }
316
+ }
317
+ fclose(f);
318
+ }
319
+ #elif defined(_WIN32)
320
+ HKEY hKey;
321
+ if (RegOpenKeyEx(HKEY_LOCAL_MACHINE,
322
+ TEXT("HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0"),
323
+ 0,
324
+ KEY_READ,
325
+ &hKey) == ERROR_SUCCESS) {
326
+ DWORD cpu_brand_size = 0;
327
+ if (RegQueryValueExA(hKey,
328
+ "ProcessorNameString",
329
+ NULL,
330
+ NULL,
331
+ NULL,
332
+ &cpu_brand_size) == ERROR_SUCCESS) {
333
+ description.resize(cpu_brand_size);
334
+ if (RegQueryValueExA(hKey,
335
+ "ProcessorNameString",
336
+ NULL,
337
+ NULL,
338
+ (LPBYTE)&description[0], // NOLINT
339
+ &cpu_brand_size) == ERROR_SUCCESS) {
340
+ if (description.find('\0') != std::string::npos) {
341
+ description.resize(description.find('\0'));
342
+ }
343
+ }
344
+ }
345
+ RegCloseKey(hKey);
346
+ }
347
+ #endif
348
+ }
349
+ };
350
+
351
+ static const char * ggml_backend_cpu_device_get_name(ggml_backend_dev_t dev) {
352
+ return "CPU";
353
+
354
+ GGML_UNUSED(dev);
355
+ }
356
+
357
+ static const char * ggml_backend_cpu_device_get_description(ggml_backend_dev_t dev) {
358
+ struct ggml_backend_cpu_device_context * ctx = (struct ggml_backend_cpu_device_context *)dev->context;
359
+
360
+ return ctx->description.c_str();
361
+ }
362
+
363
+ static void ggml_backend_cpu_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
364
+ #ifdef _WIN32
365
+ MEMORYSTATUSEX status;
366
+ status.dwLength = sizeof(status);
367
+ GlobalMemoryStatusEx(&status);
368
+ *total = status.ullTotalPhys;
369
+ *free = status.ullAvailPhys;
370
+ #else
371
+ long pages = sysconf(_SC_PHYS_PAGES);
372
+ long page_size = sysconf(_SC_PAGE_SIZE);
373
+ *total = pages * page_size;
374
+
375
+ // "free" system memory is ill-defined, for practical purposes assume that all of it is free:
376
+ *free = *total;
377
+ #endif // _WIN32
378
+
379
+ GGML_UNUSED(dev);
380
+ }
381
+
382
+ static enum ggml_backend_dev_type ggml_backend_cpu_device_get_type(ggml_backend_dev_t dev) {
383
+ return GGML_BACKEND_DEVICE_TYPE_CPU;
384
+
385
+ GGML_UNUSED(dev);
386
+ }
387
+
388
+ static void ggml_backend_cpu_device_get_props(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props) {
389
+ props->name = ggml_backend_cpu_device_get_name(dev);
390
+ props->description = ggml_backend_cpu_device_get_description(dev);
391
+ props->type = ggml_backend_cpu_device_get_type(dev);
392
+ ggml_backend_cpu_device_get_memory(dev, &props->memory_free, &props->memory_total);
393
+ props->caps = {
394
+ /* .async = */ false,
395
+ /* .host_buffer = */ false,
396
+ /* .buffer_from_host_ptr = */ true,
397
+ /* .events = */ false,
398
+ };
399
+ }
400
+
401
+ static ggml_backend_t ggml_backend_cpu_device_init_backend(ggml_backend_dev_t dev, const char * params) {
402
+ return ggml_backend_cpu_init();
403
+
404
+ GGML_UNUSED(dev);
405
+ GGML_UNUSED(params);
406
+ }
407
+
408
+ static ggml_backend_buffer_type_t ggml_backend_cpu_device_get_buffer_type(ggml_backend_dev_t dev) {
409
+ return ggml_backend_cpu_buffer_type();
410
+
411
+ GGML_UNUSED(dev);
412
+ }
413
+
414
+ static ggml_backend_buffer_t ggml_backend_cpu_device_buffer_from_host_ptr(ggml_backend_dev_t dev, void * ptr, size_t size, size_t max_tensor_size) {
415
+ return ggml_backend_cpu_buffer_from_ptr(ptr, size);
416
+
417
+ GGML_UNUSED(dev);
418
+ GGML_UNUSED(max_tensor_size);
419
+ }
420
+
421
+ static bool ggml_backend_cpu_device_supports_op(ggml_backend_dev_t dev, const struct ggml_tensor * op) {
422
+ const struct ggml_tensor * src0 = op->src[0];
423
+ const struct ggml_tensor * src1 = op->src[1];
424
+
425
+ if (op->op == GGML_OP_NONE || op->op == GGML_OP_RESHAPE || op->op == GGML_OP_VIEW || op->op == GGML_OP_PERMUTE || op->op == GGML_OP_TRANSPOSE) {
426
+ return true;
427
+ }
428
+
429
+ // check extra buffer types
430
+ // note: only the first sources are checked for extra buffer types to reduce overhead, increase if necessary
431
+ for (int i = 0; i < 4; i++) {
432
+ if (op->src[i] && op->src[i]->buffer &&
433
+ ggml_backend_cpu_is_extra_buffer_type(op->src[i]->buffer->buft)) {
434
+ auto * buf_extra = (ggml::cpu::extra_buffer_type *) op->src[i]->buffer->buft->context;
435
+ return buf_extra->supports_op(dev, op);
436
+ }
437
+ }
438
+
439
+ switch (op->op) {
440
+ case GGML_OP_CPY:
441
+ case GGML_OP_SET_ROWS:
442
+ return
443
+ op->type != GGML_TYPE_IQ3_XXS &&
444
+ op->type != GGML_TYPE_IQ3_S &&
445
+ op->type != GGML_TYPE_IQ2_XXS &&
446
+ op->type != GGML_TYPE_IQ2_XS &&
447
+ op->type != GGML_TYPE_IQ2_S &&
448
+ op->type != GGML_TYPE_IQ1_S &&
449
+ op->type != GGML_TYPE_IQ1_M; // missing type_traits.from_float
450
+ case GGML_OP_MUL_MAT:
451
+ return src1->type == GGML_TYPE_F32 || src1->type == ggml_get_type_traits_cpu(src0->type)->vec_dot_type;
452
+ case GGML_OP_SOFT_MAX_BACK: {
453
+ if (op->src[0]->type != GGML_TYPE_F32 || op->src[1]->type != GGML_TYPE_F32) {
454
+ return false;
455
+ }
456
+ float max_bias = 0.0f;
457
+
458
+ memcpy(&max_bias, (const float *) op->op_params + 1, sizeof(float));
459
+
460
+ return max_bias == 0.0f;
461
+ }
462
+ case GGML_OP_IM2COL_BACK:
463
+ return src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_F32;
464
+ case GGML_OP_GET_ROWS_BACK:
465
+ return src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_F16;
466
+ case GGML_OP_OUT_PROD:
467
+ return (src0->type == GGML_TYPE_F32 || (ggml_is_quantized(src0->type) && src0->ne[2] == src1->ne[2] && src0->ne[3] == src1->ne[3])) &&
468
+ src1->type == GGML_TYPE_F32 && op->type == GGML_TYPE_F32;
469
+ default:
470
+ return true;
471
+ }
472
+ }
473
+
474
+ static bool ggml_backend_cpu_device_supports_buft(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) {
475
+ return ggml_backend_buft_is_host(buft) || ggml_backend_cpu_is_extra_buffer_type(buft);
476
+ GGML_UNUSED(dev);
477
+ }
478
+
479
+ static const struct ggml_backend_device_i ggml_backend_cpu_device_i = {
480
+ /* .get_name = */ ggml_backend_cpu_device_get_name,
481
+ /* .get_description = */ ggml_backend_cpu_device_get_description,
482
+ /* .get_memory = */ ggml_backend_cpu_device_get_memory,
483
+ /* .get_type = */ ggml_backend_cpu_device_get_type,
484
+ /* .get_props = */ ggml_backend_cpu_device_get_props,
485
+ /* .init_backend = */ ggml_backend_cpu_device_init_backend,
486
+ /* .get_buffer_type = */ ggml_backend_cpu_device_get_buffer_type,
487
+ /* .get_host_buffer_type = */ NULL,
488
+ /* .buffer_from_host_ptr = */ ggml_backend_cpu_device_buffer_from_host_ptr,
489
+ /* .supports_op = */ ggml_backend_cpu_device_supports_op,
490
+ /* .supports_buft = */ ggml_backend_cpu_device_supports_buft,
491
+ /* .offload_op = */ NULL,
492
+ /* .event_new = */ NULL,
493
+ /* .event_free = */ NULL,
494
+ /* .event_synchronize = */ NULL,
495
+ };
496
+
497
+ // CPU backend - backend (reg)
498
+
499
+ static const char * ggml_backend_cpu_reg_get_name(ggml_backend_reg_t reg) {
500
+ return "CPU";
501
+
502
+ GGML_UNUSED(reg);
503
+ }
504
+
505
+ static size_t ggml_backend_cpu_reg_get_device_count(ggml_backend_reg_t reg) {
506
+ return 1;
507
+
508
+ GGML_UNUSED(reg);
509
+ }
510
+
511
+ static ggml_backend_dev_t ggml_backend_cpu_reg_get_device(ggml_backend_reg_t reg, size_t index) {
512
+ GGML_ASSERT(index == 0);
513
+
514
+ static ggml_backend_cpu_device_context ctx;
515
+ static ggml_backend_device ggml_backend_cpu_device = {
516
+ /* .iface = */ ggml_backend_cpu_device_i,
517
+ /* .reg = */ reg,
518
+ /* .context = */ &ctx,
519
+ };
520
+
521
+ return &ggml_backend_cpu_device;
522
+ }
523
+
524
+ // This is intended to replace the the ggml_cpu_has_* functions when loading the CPU backend dynamically,
525
+ // and additionally to allow other backends to expose their own list of features that applications can query using the same API
526
+ static ggml_backend_feature * ggml_backend_cpu_get_features(ggml_backend_reg_t reg) {
527
+ static std::vector<ggml_backend_feature> features = []() {
528
+ ggml_cpu_init();
529
+
530
+ std::vector<ggml_backend_feature> features;
531
+ if (ggml_cpu_has_sse3()) {
532
+ features.push_back({ "SSE3", "1" });
533
+ }
534
+ if (ggml_cpu_has_ssse3()) {
535
+ features.push_back({ "SSSE3", "1" });
536
+ }
537
+ if (ggml_cpu_has_avx()) {
538
+ features.push_back({ "AVX", "1" });
539
+ }
540
+ if (ggml_cpu_has_avx_vnni()) {
541
+ features.push_back({ "AVX_VNNI", "1" });
542
+ }
543
+ if (ggml_cpu_has_avx2()) {
544
+ features.push_back({ "AVX2", "1" });
545
+ }
546
+ if (ggml_cpu_has_f16c()) {
547
+ features.push_back({ "F16C", "1" });
548
+ }
549
+ if (ggml_cpu_has_fma()) {
550
+ features.push_back({ "FMA", "1" });
551
+ }
552
+ if (ggml_cpu_has_bmi2()) {
553
+ features.push_back({ "BMI2", "1" });
554
+ }
555
+ if (ggml_cpu_has_avx512()) {
556
+ features.push_back({ "AVX512", "1" });
557
+ }
558
+ if (ggml_cpu_has_avx512_vbmi()) {
559
+ features.push_back({ "AVX512_VBMI", "1" });
560
+ }
561
+ if (ggml_cpu_has_avx512_vnni()) {
562
+ features.push_back({ "AVX512_VNNI", "1" });
563
+ }
564
+ if (ggml_cpu_has_avx512_bf16()) {
565
+ features.push_back({ "AVX512_BF16", "1" });
566
+ }
567
+ if (ggml_cpu_has_amx_int8()) {
568
+ features.push_back({ "AMX_INT8", "1" });
569
+ }
570
+ if (ggml_cpu_has_neon()) {
571
+ features.push_back({ "NEON", "1" });
572
+ }
573
+ if (ggml_cpu_has_arm_fma()) {
574
+ features.push_back({ "ARM_FMA", "1" });
575
+ }
576
+ if (ggml_cpu_has_fp16_va()) {
577
+ features.push_back({ "FP16_VA", "1" });
578
+ }
579
+ if (ggml_cpu_has_matmul_int8()) {
580
+ features.push_back({ "MATMUL_INT8", "1" });
581
+ }
582
+ if (ggml_cpu_has_sve()) {
583
+ features.push_back({ "SVE", "1" });
584
+ }
585
+ if (ggml_cpu_has_dotprod()) {
586
+ features.push_back({ "DOTPROD", "1" });
587
+ }
588
+ if (ggml_cpu_get_sve_cnt() > 0) {
589
+ static std::string sve_cnt = std::to_string(ggml_cpu_get_sve_cnt());
590
+ features.push_back({ "SVE_CNT", sve_cnt.c_str() });
591
+ }
592
+ if (ggml_cpu_has_sme()) {
593
+ features.push_back({ "SME", "1" });
594
+ }
595
+ if (ggml_cpu_has_riscv_v()) {
596
+ features.push_back({ "RISCV_V", "1" });
597
+ }
598
+ if (ggml_cpu_get_rvv_vlen() > 0) {
599
+ static std::string rvv_vlen = std::to_string(ggml_cpu_get_rvv_vlen());
600
+ features.push_back({ "RVV_VLEN", rvv_vlen.c_str() });
601
+ }
602
+ if (ggml_cpu_has_vsx()) {
603
+ features.push_back({ "VSX", "1" });
604
+ }
605
+ if (ggml_cpu_has_vxe()) {
606
+ features.push_back({ "VXE", "1" });
607
+ }
608
+ if (ggml_cpu_has_wasm_simd()) {
609
+ features.push_back({ "WASM_SIMD", "1" });
610
+ }
611
+ if (ggml_cpu_has_llamafile()) {
612
+ features.push_back({ "LLAMAFILE", "1" });
613
+ }
614
+ #ifdef GGML_USE_ACCELERATE
615
+ features.push_back({ "ACCELERATE", "1" });
616
+ #endif
617
+ #ifdef GGML_USE_CPU_HBM
618
+ features.push_back({ "CPU_HBM", "1" });
619
+ #endif
620
+ #ifdef GGML_USE_OPENMP
621
+ features.push_back({ "OPENMP", "1" });
622
+ #endif
623
+ #ifdef GGML_USE_CPU_KLEIDIAI
624
+ features.push_back({ "KLEIDIAI", "1" });
625
+ #endif
626
+ #ifdef GGML_USE_CPU_REPACK
627
+ features.push_back({ "REPACK", "1" });
628
+ #endif
629
+
630
+ features.push_back({ nullptr, nullptr });
631
+
632
+ return features;
633
+ }();
634
+
635
+ return features.data();
636
+
637
+ GGML_UNUSED(reg);
638
+ }
639
+
640
+ static void * ggml_backend_cpu_get_proc_address(ggml_backend_reg_t reg, const char * name) {
641
+ if (strcmp(name, "ggml_backend_set_n_threads") == 0) {
642
+ ggml_backend_set_n_threads_t fct = ggml_backend_cpu_set_n_threads;
643
+ return (void *)fct;
644
+ }
645
+ if (strcmp(name, "ggml_backend_dev_get_extra_bufts") == 0) {
646
+ ggml_backend_dev_get_extra_bufts_t fct = ggml_backend_cpu_device_get_extra_buffers_type;
647
+ return (void *)fct;
648
+ }
649
+ if (strcmp(name, "ggml_backend_get_features") == 0) {
650
+ return (void *)ggml_backend_cpu_get_features;
651
+ }
652
+ if (strcmp(name, "ggml_backend_set_abort_callback") == 0) {
653
+ return (void *)ggml_backend_cpu_set_abort_callback;
654
+ }
655
+ if (strcmp(name, "ggml_backend_cpu_numa_init") == 0) {
656
+ return (void *)ggml_numa_init;
657
+ }
658
+ if (strcmp(name, "ggml_backend_cpu_is_numa") == 0) {
659
+ return (void *)ggml_is_numa;
660
+ }
661
+ if (strcmp(name, "ggml_backend_cpu_set_use_ref") == 0) {
662
+ return (void *)ggml_backend_cpu_set_use_ref;
663
+ }
664
+
665
+ // threadpool - TODO: move to ggml-base
666
+ if (strcmp(name, "ggml_threadpool_new") == 0) {
667
+ return (void *)ggml_threadpool_new;
668
+ }
669
+ if (strcmp(name, "ggml_threadpool_free") == 0) {
670
+ return (void *)ggml_threadpool_free;
671
+ }
672
+ if (strcmp(name, "ggml_backend_cpu_set_threadpool") == 0) {
673
+ return (void *)ggml_backend_cpu_set_threadpool;
674
+ }
675
+
676
+ return NULL;
677
+
678
+ GGML_UNUSED(reg);
679
+ }
680
+
681
+ static const struct ggml_backend_reg_i ggml_backend_cpu_reg_i = {
682
+ /* .get_name = */ ggml_backend_cpu_reg_get_name,
683
+ /* .get_device_count = */ ggml_backend_cpu_reg_get_device_count,
684
+ /* .get_device = */ ggml_backend_cpu_reg_get_device,
685
+ /* .get_proc_address = */ ggml_backend_cpu_get_proc_address,
686
+ };
687
+
688
+ ggml_backend_reg_t ggml_backend_cpu_reg(void) {
689
+ // init CPU feature detection
690
+ ggml_cpu_init();
691
+
692
+ static struct ggml_backend_reg ggml_backend_cpu_reg = {
693
+ /* .api_version = */ GGML_BACKEND_API_VERSION,
694
+ /* .iface = */ ggml_backend_cpu_reg_i,
695
+ /* .context = */ NULL,
696
+ };
697
+
698
+ return &ggml_backend_cpu_reg;
699
+ }
700
+
701
+ GGML_BACKEND_DL_IMPL(ggml_backend_cpu_reg)