local-llm-rn 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (626) hide show
  1. package/cpp/CMakeLists.txt +285 -0
  2. package/cpp/common/CMakeLists.txt +149 -0
  3. package/cpp/common/arg.cpp +3799 -0
  4. package/cpp/common/arg.h +131 -0
  5. package/cpp/common/base64.hpp +392 -0
  6. package/cpp/common/build-info.cpp.in +4 -0
  7. package/cpp/common/chat-parser-xml-toolcall.cpp +879 -0
  8. package/cpp/common/chat-parser-xml-toolcall.h +45 -0
  9. package/cpp/common/chat-parser.cpp +1649 -0
  10. package/cpp/common/chat-parser.h +133 -0
  11. package/cpp/common/chat-peg-parser.cpp +124 -0
  12. package/cpp/common/chat-peg-parser.h +105 -0
  13. package/cpp/common/chat.cpp +3355 -0
  14. package/cpp/common/chat.h +252 -0
  15. package/cpp/common/common.cpp +1824 -0
  16. package/cpp/common/common.h +930 -0
  17. package/cpp/common/console.cpp +1137 -0
  18. package/cpp/common/console.h +41 -0
  19. package/cpp/common/debug.cpp +167 -0
  20. package/cpp/common/debug.h +43 -0
  21. package/cpp/common/download.cpp +792 -0
  22. package/cpp/common/download.h +84 -0
  23. package/cpp/common/http.h +84 -0
  24. package/cpp/common/jinja/README.md +88 -0
  25. package/cpp/common/jinja/caps.cpp +285 -0
  26. package/cpp/common/jinja/caps.h +30 -0
  27. package/cpp/common/jinja/lexer.cpp +341 -0
  28. package/cpp/common/jinja/lexer.h +157 -0
  29. package/cpp/common/jinja/parser.cpp +591 -0
  30. package/cpp/common/jinja/parser.h +21 -0
  31. package/cpp/common/jinja/runtime.cpp +867 -0
  32. package/cpp/common/jinja/runtime.h +638 -0
  33. package/cpp/common/jinja/string.cpp +213 -0
  34. package/cpp/common/jinja/string.h +61 -0
  35. package/cpp/common/jinja/utils.h +149 -0
  36. package/cpp/common/jinja/value.cpp +1393 -0
  37. package/cpp/common/jinja/value.h +756 -0
  38. package/cpp/common/json-partial.cpp +324 -0
  39. package/cpp/common/json-partial.h +39 -0
  40. package/cpp/common/json-schema-to-grammar.cpp +1153 -0
  41. package/cpp/common/json-schema-to-grammar.h +43 -0
  42. package/cpp/common/llguidance.cpp +258 -0
  43. package/cpp/common/log.cpp +446 -0
  44. package/cpp/common/log.h +119 -0
  45. package/cpp/common/ngram-cache.cpp +285 -0
  46. package/cpp/common/ngram-cache.h +101 -0
  47. package/cpp/common/ngram-map.cpp +530 -0
  48. package/cpp/common/ngram-map.h +115 -0
  49. package/cpp/common/ngram-mod.cpp +60 -0
  50. package/cpp/common/ngram-mod.h +38 -0
  51. package/cpp/common/peg-parser.cpp +1712 -0
  52. package/cpp/common/peg-parser.h +459 -0
  53. package/cpp/common/preset.cpp +483 -0
  54. package/cpp/common/preset.h +83 -0
  55. package/cpp/common/regex-partial.cpp +204 -0
  56. package/cpp/common/regex-partial.h +56 -0
  57. package/cpp/common/sampling.cpp +745 -0
  58. package/cpp/common/sampling.h +119 -0
  59. package/cpp/common/speculative.cpp +1074 -0
  60. package/cpp/common/speculative.h +41 -0
  61. package/cpp/common/unicode.cpp +64 -0
  62. package/cpp/common/unicode.h +22 -0
  63. package/cpp/ggml/CMakeLists.txt +494 -0
  64. package/cpp/ggml/cmake/GitVars.cmake +22 -0
  65. package/cpp/ggml/cmake/common.cmake +50 -0
  66. package/cpp/ggml/cmake/ggml-config.cmake.in +191 -0
  67. package/cpp/ggml/include/ggml-alloc.h +85 -0
  68. package/cpp/ggml/include/ggml-backend.h +373 -0
  69. package/cpp/ggml/include/ggml-blas.h +25 -0
  70. package/cpp/ggml/include/ggml-cann.h +123 -0
  71. package/cpp/ggml/include/ggml-cpp.h +39 -0
  72. package/cpp/ggml/include/ggml-cpu.h +151 -0
  73. package/cpp/ggml/include/ggml-cuda.h +47 -0
  74. package/cpp/ggml/include/ggml-hexagon.h +19 -0
  75. package/cpp/ggml/include/ggml-metal.h +61 -0
  76. package/cpp/ggml/include/ggml-opencl.h +26 -0
  77. package/cpp/ggml/include/ggml-opt.h +256 -0
  78. package/cpp/ggml/include/ggml-rpc.h +30 -0
  79. package/cpp/ggml/include/ggml-sycl.h +49 -0
  80. package/cpp/ggml/include/ggml-virtgpu.h +14 -0
  81. package/cpp/ggml/include/ggml-vulkan.h +29 -0
  82. package/cpp/ggml/include/ggml-webgpu.h +19 -0
  83. package/cpp/ggml/include/ggml-zdnn.h +17 -0
  84. package/cpp/ggml/include/ggml-zendnn.h +22 -0
  85. package/cpp/ggml/include/ggml.h +2753 -0
  86. package/cpp/ggml/include/gguf.h +204 -0
  87. package/cpp/ggml/src/CMakeLists.txt +492 -0
  88. package/cpp/ggml/src/ggml-alloc.c +1244 -0
  89. package/cpp/ggml/src/ggml-backend-dl.cpp +48 -0
  90. package/cpp/ggml/src/ggml-backend-dl.h +45 -0
  91. package/cpp/ggml/src/ggml-backend-impl.h +255 -0
  92. package/cpp/ggml/src/ggml-backend-reg.cpp +566 -0
  93. package/cpp/ggml/src/ggml-backend.cpp +2270 -0
  94. package/cpp/ggml/src/ggml-blas/CMakeLists.txt +101 -0
  95. package/cpp/ggml/src/ggml-blas/ggml-blas.cpp +518 -0
  96. package/cpp/ggml/src/ggml-common.h +1878 -0
  97. package/cpp/ggml/src/ggml-cpu/CMakeLists.txt +691 -0
  98. package/cpp/ggml/src/ggml-cpu/amx/amx.cpp +247 -0
  99. package/cpp/ggml/src/ggml-cpu/amx/amx.h +8 -0
  100. package/cpp/ggml/src/ggml-cpu/amx/common.h +91 -0
  101. package/cpp/ggml/src/ggml-cpu/amx/mmq.cpp +2512 -0
  102. package/cpp/ggml/src/ggml-cpu/amx/mmq.h +10 -0
  103. package/cpp/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +98 -0
  104. package/cpp/ggml/src/ggml-cpu/arch/arm/quants.c +4052 -0
  105. package/cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +4935 -0
  106. package/cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +2159 -0
  107. package/cpp/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
  108. package/cpp/ggml/src/ggml-cpu/arch/powerpc/quants.c +2305 -0
  109. package/cpp/ggml/src/ggml-cpu/arch/riscv/cpu-feats.cpp +38 -0
  110. package/cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +2726 -0
  111. package/cpp/ggml/src/ggml-cpu/arch/riscv/repack.cpp +342 -0
  112. package/cpp/ggml/src/ggml-cpu/arch/s390/cpu-feats.cpp +50 -0
  113. package/cpp/ggml/src/ggml-cpu/arch/s390/quants.c +1468 -0
  114. package/cpp/ggml/src/ggml-cpu/arch/wasm/quants.c +1221 -0
  115. package/cpp/ggml/src/ggml-cpu/arch/x86/cpu-feats.cpp +327 -0
  116. package/cpp/ggml/src/ggml-cpu/arch/x86/quants.c +3820 -0
  117. package/cpp/ggml/src/ggml-cpu/arch/x86/repack.cpp +6307 -0
  118. package/cpp/ggml/src/ggml-cpu/arch-fallback.h +313 -0
  119. package/cpp/ggml/src/ggml-cpu/binary-ops.cpp +154 -0
  120. package/cpp/ggml/src/ggml-cpu/binary-ops.h +16 -0
  121. package/cpp/ggml/src/ggml-cpu/cmake/FindSIMD.cmake +100 -0
  122. package/cpp/ggml/src/ggml-cpu/common.h +95 -0
  123. package/cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +529 -0
  124. package/cpp/ggml/src/ggml-cpu/ggml-cpu.c +3734 -0
  125. package/cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +701 -0
  126. package/cpp/ggml/src/ggml-cpu/hbm.cpp +55 -0
  127. package/cpp/ggml/src/ggml-cpu/hbm.h +8 -0
  128. package/cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +938 -0
  129. package/cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +90 -0
  130. package/cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +798 -0
  131. package/cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.h +17 -0
  132. package/cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +4033 -0
  133. package/cpp/ggml/src/ggml-cpu/llamafile/sgemm.h +25 -0
  134. package/cpp/ggml/src/ggml-cpu/ops.cpp +10978 -0
  135. package/cpp/ggml/src/ggml-cpu/ops.h +116 -0
  136. package/cpp/ggml/src/ggml-cpu/quants.c +1193 -0
  137. package/cpp/ggml/src/ggml-cpu/quants.h +97 -0
  138. package/cpp/ggml/src/ggml-cpu/repack.cpp +3316 -0
  139. package/cpp/ggml/src/ggml-cpu/repack.h +173 -0
  140. package/cpp/ggml/src/ggml-cpu/simd-gemm.h +136 -0
  141. package/cpp/ggml/src/ggml-cpu/simd-mappings.h +1279 -0
  142. package/cpp/ggml/src/ggml-cpu/spacemit/ime.cpp +1025 -0
  143. package/cpp/ggml/src/ggml-cpu/spacemit/ime.h +13 -0
  144. package/cpp/ggml/src/ggml-cpu/spacemit/ime1_kernels.cpp +3196 -0
  145. package/cpp/ggml/src/ggml-cpu/spacemit/ime_kernels.h +26 -0
  146. package/cpp/ggml/src/ggml-cpu/traits.cpp +36 -0
  147. package/cpp/ggml/src/ggml-cpu/traits.h +38 -0
  148. package/cpp/ggml/src/ggml-cpu/unary-ops.cpp +337 -0
  149. package/cpp/ggml/src/ggml-cpu/unary-ops.h +35 -0
  150. package/cpp/ggml/src/ggml-cpu/vec.cpp +629 -0
  151. package/cpp/ggml/src/ggml-cpu/vec.h +1585 -0
  152. package/cpp/ggml/src/ggml-hexagon/CMakeLists.txt +117 -0
  153. package/cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp +3232 -0
  154. package/cpp/ggml/src/ggml-hexagon/htp/CMakeLists.txt +45 -0
  155. package/cpp/ggml/src/ggml-hexagon/htp/act-ops.c +815 -0
  156. package/cpp/ggml/src/ggml-hexagon/htp/argsort-ops.c +281 -0
  157. package/cpp/ggml/src/ggml-hexagon/htp/binary-ops.c +827 -0
  158. package/cpp/ggml/src/ggml-hexagon/htp/cmake-toolchain.cmake +157 -0
  159. package/cpp/ggml/src/ggml-hexagon/htp/cpy-ops.c +251 -0
  160. package/cpp/ggml/src/ggml-hexagon/htp/flash-attn-ops.c +666 -0
  161. package/cpp/ggml/src/ggml-hexagon/htp/get-rows-ops.c +111 -0
  162. package/cpp/ggml/src/ggml-hexagon/htp/hex-dma.c +63 -0
  163. package/cpp/ggml/src/ggml-hexagon/htp/hex-dma.h +182 -0
  164. package/cpp/ggml/src/ggml-hexagon/htp/hex-dump.h +77 -0
  165. package/cpp/ggml/src/ggml-hexagon/htp/hex-fastdiv.h +37 -0
  166. package/cpp/ggml/src/ggml-hexagon/htp/hex-utils.h +51 -0
  167. package/cpp/ggml/src/ggml-hexagon/htp/htp-ctx.h +35 -0
  168. package/cpp/ggml/src/ggml-hexagon/htp/htp-msg.h +154 -0
  169. package/cpp/ggml/src/ggml-hexagon/htp/htp-ops.h +65 -0
  170. package/cpp/ggml/src/ggml-hexagon/htp/htp_iface.idl +16 -0
  171. package/cpp/ggml/src/ggml-hexagon/htp/hvx-arith.h +470 -0
  172. package/cpp/ggml/src/ggml-hexagon/htp/hvx-base.h +173 -0
  173. package/cpp/ggml/src/ggml-hexagon/htp/hvx-copy.h +245 -0
  174. package/cpp/ggml/src/ggml-hexagon/htp/hvx-div.h +116 -0
  175. package/cpp/ggml/src/ggml-hexagon/htp/hvx-dump.h +129 -0
  176. package/cpp/ggml/src/ggml-hexagon/htp/hvx-exp.h +215 -0
  177. package/cpp/ggml/src/ggml-hexagon/htp/hvx-floor.h +100 -0
  178. package/cpp/ggml/src/ggml-hexagon/htp/hvx-inverse.h +176 -0
  179. package/cpp/ggml/src/ggml-hexagon/htp/hvx-reduce.h +266 -0
  180. package/cpp/ggml/src/ggml-hexagon/htp/hvx-scale.h +133 -0
  181. package/cpp/ggml/src/ggml-hexagon/htp/hvx-sigmoid.h +141 -0
  182. package/cpp/ggml/src/ggml-hexagon/htp/hvx-sqrt.h +126 -0
  183. package/cpp/ggml/src/ggml-hexagon/htp/hvx-types.h +36 -0
  184. package/cpp/ggml/src/ggml-hexagon/htp/hvx-utils.h +18 -0
  185. package/cpp/ggml/src/ggml-hexagon/htp/main.c +1150 -0
  186. package/cpp/ggml/src/ggml-hexagon/htp/matmul-ops.c +2595 -0
  187. package/cpp/ggml/src/ggml-hexagon/htp/rope-ops.c +498 -0
  188. package/cpp/ggml/src/ggml-hexagon/htp/set-rows-ops.c +167 -0
  189. package/cpp/ggml/src/ggml-hexagon/htp/softmax-ops.c +421 -0
  190. package/cpp/ggml/src/ggml-hexagon/htp/sum-rows-ops.c +130 -0
  191. package/cpp/ggml/src/ggml-hexagon/htp/unary-ops.c +384 -0
  192. package/cpp/ggml/src/ggml-hexagon/htp/worker-pool.c +293 -0
  193. package/cpp/ggml/src/ggml-hexagon/htp/worker-pool.h +57 -0
  194. package/cpp/ggml/src/ggml-hexagon/htp-drv.cpp +418 -0
  195. package/cpp/ggml/src/ggml-hexagon/htp-drv.h +121 -0
  196. package/cpp/ggml/src/ggml-hexagon/libdl.h +79 -0
  197. package/cpp/ggml/src/ggml-hexagon/libggml-htp.inf +38 -0
  198. package/cpp/ggml/src/ggml-hexagon/op-desc.h +153 -0
  199. package/cpp/ggml/src/ggml-impl.h +724 -0
  200. package/cpp/ggml/src/ggml-metal/CMakeLists.txt +124 -0
  201. package/cpp/ggml/src/ggml-metal/ggml-metal-common.cpp +457 -0
  202. package/cpp/ggml/src/ggml-metal/ggml-metal-common.h +52 -0
  203. package/cpp/ggml/src/ggml-metal/ggml-metal-context.h +41 -0
  204. package/cpp/ggml/src/ggml-metal/ggml-metal-context.m +702 -0
  205. package/cpp/ggml/src/ggml-metal/ggml-metal-device.cpp +1890 -0
  206. package/cpp/ggml/src/ggml-metal/ggml-metal-device.h +290 -0
  207. package/cpp/ggml/src/ggml-metal/ggml-metal-device.m +1749 -0
  208. package/cpp/ggml/src/ggml-metal/ggml-metal-impl.h +1054 -0
  209. package/cpp/ggml/src/ggml-metal/ggml-metal-ops.cpp +4370 -0
  210. package/cpp/ggml/src/ggml-metal/ggml-metal-ops.h +94 -0
  211. package/cpp/ggml/src/ggml-metal/ggml-metal.cpp +937 -0
  212. package/cpp/ggml/src/ggml-metal/ggml-metal.metal +9819 -0
  213. package/cpp/ggml/src/ggml-musa/CMakeLists.txt +125 -0
  214. package/cpp/ggml/src/ggml-musa/mudnn.cu +112 -0
  215. package/cpp/ggml/src/ggml-musa/mudnn.cuh +12 -0
  216. package/cpp/ggml/src/ggml-opencl/CMakeLists.txt +150 -0
  217. package/cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +11553 -0
  218. package/cpp/ggml/src/ggml-opencl/kernels/add.cl +190 -0
  219. package/cpp/ggml/src/ggml-opencl/kernels/add_id.cl +42 -0
  220. package/cpp/ggml/src/ggml-opencl/kernels/argsort.cl +86 -0
  221. package/cpp/ggml/src/ggml-opencl/kernels/clamp.cl +20 -0
  222. package/cpp/ggml/src/ggml-opencl/kernels/concat.cl +51 -0
  223. package/cpp/ggml/src/ggml-opencl/kernels/conv2d.cl +185 -0
  224. package/cpp/ggml/src/ggml-opencl/kernels/conv2d_f16_f32.cl +176 -0
  225. package/cpp/ggml/src/ggml-opencl/kernels/cpy.cl +184 -0
  226. package/cpp/ggml/src/ggml-opencl/kernels/cvt.cl +417 -0
  227. package/cpp/ggml/src/ggml-opencl/kernels/diag_mask_inf.cl +58 -0
  228. package/cpp/ggml/src/ggml-opencl/kernels/div.cl +138 -0
  229. package/cpp/ggml/src/ggml-opencl/kernels/embed_kernel.py +26 -0
  230. package/cpp/ggml/src/ggml-opencl/kernels/expm1.cl +113 -0
  231. package/cpp/ggml/src/ggml-opencl/kernels/fill.cl +17 -0
  232. package/cpp/ggml/src/ggml-opencl/kernels/flash_attn_f16.cl +370 -0
  233. package/cpp/ggml/src/ggml-opencl/kernels/flash_attn_f32.cl +371 -0
  234. package/cpp/ggml/src/ggml-opencl/kernels/flash_attn_f32_f16.cl +373 -0
  235. package/cpp/ggml/src/ggml-opencl/kernels/gelu.cl +89 -0
  236. package/cpp/ggml/src/ggml-opencl/kernels/gemm_moe_mxfp4_f32.cl +162 -0
  237. package/cpp/ggml/src/ggml-opencl/kernels/gemv_moe_mxfp4_f32.cl +156 -0
  238. package/cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle.cl +268 -0
  239. package/cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general.cl +274 -0
  240. package/cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general_q8_0_f32.cl +195 -0
  241. package/cpp/ggml/src/ggml-opencl/kernels/get_rows.cl +187 -0
  242. package/cpp/ggml/src/ggml-opencl/kernels/glu.cl +378 -0
  243. package/cpp/ggml/src/ggml-opencl/kernels/group_norm.cl +121 -0
  244. package/cpp/ggml/src/ggml-opencl/kernels/im2col_f16.cl +57 -0
  245. package/cpp/ggml/src/ggml-opencl/kernels/im2col_f32.cl +57 -0
  246. package/cpp/ggml/src/ggml-opencl/kernels/mean.cl +140 -0
  247. package/cpp/ggml/src/ggml-opencl/kernels/mul.cl +152 -0
  248. package/cpp/ggml/src/ggml-opencl/kernels/mul_mat_Ab_Bi_8x4.cl +139 -0
  249. package/cpp/ggml/src/ggml-opencl/kernels/mul_mat_f16_f32.cl +130 -0
  250. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_kq_kqv.cl +273 -0
  251. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_l4_lm.cl +146 -0
  252. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_f32_f32_l4_lm.cl +147 -0
  253. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q4_0_f32_l4_lm.cl +163 -0
  254. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q4_1_f32_l4_lm.cl +165 -0
  255. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q6_k_f32_l4_lm.cl +158 -0
  256. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_8x4.cl +129 -0
  257. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_l4_lm.cl +154 -0
  258. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f16.cl +118 -0
  259. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32.cl +118 -0
  260. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_1row.cl +94 -0
  261. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_l4.cl +84 -0
  262. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f32_f32.cl +118 -0
  263. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32.cl +189 -0
  264. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32_flat.cl +176 -0
  265. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q4_0_f32_8x_flat.cl +283 -0
  266. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32.cl +140 -0
  267. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32_flat.cl +222 -0
  268. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32.cl +144 -0
  269. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32_flat.cl +167 -0
  270. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32.cl +192 -0
  271. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_16x_flat.cl +307 -0
  272. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_8x_flat.cl +265 -0
  273. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_8x_flat.cl +272 -0
  274. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_v.cl +254 -0
  275. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32.cl +219 -0
  276. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32_flat.cl +229 -0
  277. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_k_f32.cl +180 -0
  278. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q6_k_f32.cl +194 -0
  279. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q6_k_f32_flat.cl +194 -0
  280. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32.cl +125 -0
  281. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32_flat.cl +202 -0
  282. package/cpp/ggml/src/ggml-opencl/kernels/norm.cl +161 -0
  283. package/cpp/ggml/src/ggml-opencl/kernels/pad.cl +39 -0
  284. package/cpp/ggml/src/ggml-opencl/kernels/relu.cl +16 -0
  285. package/cpp/ggml/src/ggml-opencl/kernels/repeat.cl +38 -0
  286. package/cpp/ggml/src/ggml-opencl/kernels/rms_norm.cl +190 -0
  287. package/cpp/ggml/src/ggml-opencl/kernels/rope.cl +747 -0
  288. package/cpp/ggml/src/ggml-opencl/kernels/scale.cl +27 -0
  289. package/cpp/ggml/src/ggml-opencl/kernels/set_rows.cl +208 -0
  290. package/cpp/ggml/src/ggml-opencl/kernels/sigmoid.cl +29 -0
  291. package/cpp/ggml/src/ggml-opencl/kernels/silu.cl +30 -0
  292. package/cpp/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +108 -0
  293. package/cpp/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +108 -0
  294. package/cpp/ggml/src/ggml-opencl/kernels/softmax_f16.cl +107 -0
  295. package/cpp/ggml/src/ggml-opencl/kernels/softmax_f32.cl +107 -0
  296. package/cpp/ggml/src/ggml-opencl/kernels/softplus.cl +116 -0
  297. package/cpp/ggml/src/ggml-opencl/kernels/solve_tri.cl +51 -0
  298. package/cpp/ggml/src/ggml-opencl/kernels/sqr.cl +53 -0
  299. package/cpp/ggml/src/ggml-opencl/kernels/sqrt.cl +53 -0
  300. package/cpp/ggml/src/ggml-opencl/kernels/ssm_conv.cl +77 -0
  301. package/cpp/ggml/src/ggml-opencl/kernels/sub.cl +138 -0
  302. package/cpp/ggml/src/ggml-opencl/kernels/sum_rows.cl +140 -0
  303. package/cpp/ggml/src/ggml-opencl/kernels/tanh.cl +109 -0
  304. package/cpp/ggml/src/ggml-opencl/kernels/transpose.cl +117 -0
  305. package/cpp/ggml/src/ggml-opencl/kernels/tri.cl +32 -0
  306. package/cpp/ggml/src/ggml-opencl/kernels/tsembd.cl +48 -0
  307. package/cpp/ggml/src/ggml-opencl/kernels/upscale.cl +120 -0
  308. package/cpp/ggml/src/ggml-opt.cpp +1093 -0
  309. package/cpp/ggml/src/ggml-quants.c +5325 -0
  310. package/cpp/ggml/src/ggml-quants.h +106 -0
  311. package/cpp/ggml/src/ggml-rpc/CMakeLists.txt +9 -0
  312. package/cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +2118 -0
  313. package/cpp/ggml/src/ggml-threading.cpp +12 -0
  314. package/cpp/ggml/src/ggml-threading.h +14 -0
  315. package/cpp/ggml/src/ggml-virtgpu/CMakeLists.txt +70 -0
  316. package/cpp/ggml/src/ggml-virtgpu/apir_cs_ggml-rpc-front.cpp +87 -0
  317. package/cpp/ggml/src/ggml-virtgpu/backend/CMakeLists.txt +21 -0
  318. package/cpp/ggml/src/ggml-virtgpu/backend/apir_cs_ggml-rpc-back.cpp +115 -0
  319. package/cpp/ggml/src/ggml-virtgpu/backend/backend-convert.h +13 -0
  320. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-backend.cpp +102 -0
  321. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer-type.cpp +105 -0
  322. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer.cpp +179 -0
  323. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-device.cpp +148 -0
  324. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched.cpp +51 -0
  325. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched.gen.h +73 -0
  326. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched.h +27 -0
  327. package/cpp/ggml/src/ggml-virtgpu/backend/backend-virgl-apir.h +32 -0
  328. package/cpp/ggml/src/ggml-virtgpu/backend/backend.cpp +144 -0
  329. package/cpp/ggml/src/ggml-virtgpu/backend/shared/api_remoting.h +95 -0
  330. package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_backend.gen.h +94 -0
  331. package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_backend.h +50 -0
  332. package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_cs.h +378 -0
  333. package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h +232 -0
  334. package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_cs_rpc.h +58 -0
  335. package/cpp/ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp +81 -0
  336. package/cpp/ggml/src/ggml-virtgpu/ggml-backend-buffer.cpp +119 -0
  337. package/cpp/ggml/src/ggml-virtgpu/ggml-backend-device.cpp +158 -0
  338. package/cpp/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp +213 -0
  339. package/cpp/ggml/src/ggml-virtgpu/ggml-backend.cpp +69 -0
  340. package/cpp/ggml/src/ggml-virtgpu/ggml-remoting.h +71 -0
  341. package/cpp/ggml/src/ggml-virtgpu/ggmlremoting_functions.yaml +166 -0
  342. package/cpp/ggml/src/ggml-virtgpu/include/apir_hw.h +9 -0
  343. package/cpp/ggml/src/ggml-virtgpu/regenerate_remoting.py +333 -0
  344. package/cpp/ggml/src/ggml-virtgpu/virtgpu-apir.h +15 -0
  345. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-backend.cpp +58 -0
  346. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp +110 -0
  347. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-buffer.cpp +173 -0
  348. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-device.cpp +192 -0
  349. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-impl.h +36 -0
  350. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward.gen.h +53 -0
  351. package/cpp/ggml/src/ggml-virtgpu/virtgpu-shm.cpp +98 -0
  352. package/cpp/ggml/src/ggml-virtgpu/virtgpu-shm.h +23 -0
  353. package/cpp/ggml/src/ggml-virtgpu/virtgpu-utils.cpp +179 -0
  354. package/cpp/ggml/src/ggml-virtgpu/virtgpu-utils.h +86 -0
  355. package/cpp/ggml/src/ggml-virtgpu/virtgpu.cpp +544 -0
  356. package/cpp/ggml/src/ggml-virtgpu/virtgpu.h +117 -0
  357. package/cpp/ggml/src/ggml-webgpu/CMakeLists.txt +80 -0
  358. package/cpp/ggml/src/ggml-webgpu/ggml-webgpu-shader-lib.hpp +1231 -0
  359. package/cpp/ggml/src/ggml-webgpu/ggml-webgpu.cpp +3150 -0
  360. package/cpp/ggml/src/ggml-webgpu/pre_wgsl.hpp +778 -0
  361. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/argmax.wgsl +72 -0
  362. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/argsort.wgsl +106 -0
  363. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/argsort_merge.wgsl +134 -0
  364. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/binary.wgsl +107 -0
  365. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/common_decls.tmpl +923 -0
  366. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/cpy.tmpl.wgsl +107 -0
  367. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/cumsum.wgsl +66 -0
  368. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +182 -0
  369. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn.wgsl +636 -0
  370. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/get_rows.wgsl +668 -0
  371. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/glu.tmpl.wgsl +323 -0
  372. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/memset.wgsl +40 -0
  373. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.wgsl +713 -0
  374. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_decls.tmpl +103 -0
  375. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_reg_tile.wgsl +138 -0
  376. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_subgroup_matrix.wgsl +188 -0
  377. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec.wgsl +194 -0
  378. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/pad.wgsl +86 -0
  379. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm.wgsl +123 -0
  380. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/rope.tmpl.wgsl +295 -0
  381. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/scale.wgsl +63 -0
  382. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.wgsl +109 -0
  383. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/soft_max.tmpl.wgsl +345 -0
  384. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/sum_rows.wgsl +55 -0
  385. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/unary.wgsl +193 -0
  386. package/cpp/ggml/src/ggml-zdnn/CMakeLists.txt +36 -0
  387. package/cpp/ggml/src/ggml-zdnn/common.hpp +59 -0
  388. package/cpp/ggml/src/ggml-zdnn/ggml-zdnn.cpp +633 -0
  389. package/cpp/ggml/src/ggml-zdnn/mmf.cpp +80 -0
  390. package/cpp/ggml/src/ggml-zdnn/mmf.hpp +12 -0
  391. package/cpp/ggml/src/ggml-zdnn/utils.cpp +79 -0
  392. package/cpp/ggml/src/ggml-zdnn/utils.hpp +19 -0
  393. package/cpp/ggml/src/ggml-zendnn/CMakeLists.txt +92 -0
  394. package/cpp/ggml/src/ggml-zendnn/ggml-zendnn.cpp +469 -0
  395. package/cpp/ggml/src/ggml.c +7669 -0
  396. package/cpp/ggml/src/ggml.cpp +26 -0
  397. package/cpp/ggml/src/gguf.cpp +1699 -0
  398. package/cpp/include/llama-cpp.h +32 -0
  399. package/cpp/include/llama.h +1568 -0
  400. package/cpp/mtmd/CMakeLists.txt +98 -0
  401. package/cpp/mtmd/README.md +63 -0
  402. package/cpp/mtmd/clip-graph.h +117 -0
  403. package/cpp/mtmd/clip-impl.h +586 -0
  404. package/cpp/mtmd/clip-model.h +390 -0
  405. package/cpp/mtmd/clip.cpp +4154 -0
  406. package/cpp/mtmd/clip.h +121 -0
  407. package/cpp/mtmd/deprecation-warning.cpp +22 -0
  408. package/cpp/mtmd/legacy-models/convert_image_encoder_to_gguf.py +412 -0
  409. package/cpp/mtmd/legacy-models/glmedge-convert-image-encoder-to-gguf.py +280 -0
  410. package/cpp/mtmd/legacy-models/glmedge-surgery.py +33 -0
  411. package/cpp/mtmd/legacy-models/llava_surgery.py +38 -0
  412. package/cpp/mtmd/legacy-models/llava_surgery_v2.py +180 -0
  413. package/cpp/mtmd/legacy-models/minicpmv-convert-image-encoder-to-gguf.py +892 -0
  414. package/cpp/mtmd/legacy-models/minicpmv-surgery.py +47 -0
  415. package/cpp/mtmd/models/cogvlm.cpp +98 -0
  416. package/cpp/mtmd/models/conformer.cpp +216 -0
  417. package/cpp/mtmd/models/glm4v.cpp +122 -0
  418. package/cpp/mtmd/models/internvl.cpp +69 -0
  419. package/cpp/mtmd/models/kimik25.cpp +101 -0
  420. package/cpp/mtmd/models/kimivl.cpp +63 -0
  421. package/cpp/mtmd/models/llama4.cpp +96 -0
  422. package/cpp/mtmd/models/llava.cpp +374 -0
  423. package/cpp/mtmd/models/minicpmv.cpp +114 -0
  424. package/cpp/mtmd/models/mobilenetv5.cpp +451 -0
  425. package/cpp/mtmd/models/models.h +128 -0
  426. package/cpp/mtmd/models/nemotron-v2-vl.cpp +35 -0
  427. package/cpp/mtmd/models/paddleocr.cpp +52 -0
  428. package/cpp/mtmd/models/pixtral.cpp +86 -0
  429. package/cpp/mtmd/models/qwen2vl.cpp +183 -0
  430. package/cpp/mtmd/models/qwen3vl.cpp +193 -0
  431. package/cpp/mtmd/models/siglip.cpp +86 -0
  432. package/cpp/mtmd/models/whisper-enc.cpp +115 -0
  433. package/cpp/mtmd/models/youtuvl.cpp +179 -0
  434. package/cpp/mtmd/mtmd-audio.cpp +730 -0
  435. package/cpp/mtmd/mtmd-audio.h +113 -0
  436. package/cpp/mtmd/mtmd-cli.cpp +437 -0
  437. package/cpp/mtmd/mtmd-helper.cpp +521 -0
  438. package/cpp/mtmd/mtmd-helper.h +96 -0
  439. package/cpp/mtmd/mtmd.cpp +1156 -0
  440. package/cpp/mtmd/mtmd.h +319 -0
  441. package/cpp/mtmd/requirements.txt +5 -0
  442. package/cpp/mtmd/test-1.jpeg +0 -0
  443. package/cpp/mtmd/test-2.mp3 +0 -0
  444. package/cpp/mtmd/tests.sh +192 -0
  445. package/cpp/src/CMakeLists.txt +169 -0
  446. package/cpp/src/llama-adapter.cpp +488 -0
  447. package/cpp/src/llama-adapter.h +89 -0
  448. package/cpp/src/llama-arch.cpp +2855 -0
  449. package/cpp/src/llama-arch.h +619 -0
  450. package/cpp/src/llama-batch.cpp +917 -0
  451. package/cpp/src/llama-batch.h +173 -0
  452. package/cpp/src/llama-chat.cpp +896 -0
  453. package/cpp/src/llama-chat.h +71 -0
  454. package/cpp/src/llama-context.cpp +3512 -0
  455. package/cpp/src/llama-context.h +359 -0
  456. package/cpp/src/llama-cparams.cpp +5 -0
  457. package/cpp/src/llama-cparams.h +44 -0
  458. package/cpp/src/llama-grammar.cpp +1464 -0
  459. package/cpp/src/llama-grammar.h +194 -0
  460. package/cpp/src/llama-graph.cpp +2685 -0
  461. package/cpp/src/llama-graph.h +1026 -0
  462. package/cpp/src/llama-hparams.cpp +234 -0
  463. package/cpp/src/llama-hparams.h +339 -0
  464. package/cpp/src/llama-impl.cpp +171 -0
  465. package/cpp/src/llama-impl.h +73 -0
  466. package/cpp/src/llama-io.cpp +15 -0
  467. package/cpp/src/llama-io.h +35 -0
  468. package/cpp/src/llama-kv-cache-iswa.cpp +330 -0
  469. package/cpp/src/llama-kv-cache-iswa.h +137 -0
  470. package/cpp/src/llama-kv-cache.cpp +2271 -0
  471. package/cpp/src/llama-kv-cache.h +388 -0
  472. package/cpp/src/llama-kv-cells.h +533 -0
  473. package/cpp/src/llama-memory-hybrid-iswa.cpp +275 -0
  474. package/cpp/src/llama-memory-hybrid-iswa.h +140 -0
  475. package/cpp/src/llama-memory-hybrid.cpp +268 -0
  476. package/cpp/src/llama-memory-hybrid.h +139 -0
  477. package/cpp/src/llama-memory-recurrent.cpp +1165 -0
  478. package/cpp/src/llama-memory-recurrent.h +182 -0
  479. package/cpp/src/llama-memory.cpp +59 -0
  480. package/cpp/src/llama-memory.h +122 -0
  481. package/cpp/src/llama-mmap.cpp +785 -0
  482. package/cpp/src/llama-mmap.h +92 -0
  483. package/cpp/src/llama-model-loader.cpp +1414 -0
  484. package/cpp/src/llama-model-loader.h +203 -0
  485. package/cpp/src/llama-model-saver.cpp +286 -0
  486. package/cpp/src/llama-model-saver.h +37 -0
  487. package/cpp/src/llama-model.cpp +9253 -0
  488. package/cpp/src/llama-model.h +576 -0
  489. package/cpp/src/llama-quant.cpp +1119 -0
  490. package/cpp/src/llama-quant.h +1 -0
  491. package/cpp/src/llama-sampler.cpp +3885 -0
  492. package/cpp/src/llama-sampler.h +42 -0
  493. package/cpp/src/llama-vocab.cpp +3970 -0
  494. package/cpp/src/llama-vocab.h +187 -0
  495. package/cpp/src/llama.cpp +1313 -0
  496. package/cpp/src/models/afmoe.cpp +191 -0
  497. package/cpp/src/models/apertus.cpp +125 -0
  498. package/cpp/src/models/arcee.cpp +135 -0
  499. package/cpp/src/models/arctic.cpp +138 -0
  500. package/cpp/src/models/arwkv7.cpp +86 -0
  501. package/cpp/src/models/baichuan.cpp +122 -0
  502. package/cpp/src/models/bailingmoe.cpp +144 -0
  503. package/cpp/src/models/bailingmoe2.cpp +135 -0
  504. package/cpp/src/models/bert.cpp +178 -0
  505. package/cpp/src/models/bitnet.cpp +160 -0
  506. package/cpp/src/models/bloom.cpp +101 -0
  507. package/cpp/src/models/chameleon.cpp +178 -0
  508. package/cpp/src/models/chatglm.cpp +132 -0
  509. package/cpp/src/models/codeshell.cpp +111 -0
  510. package/cpp/src/models/cogvlm.cpp +102 -0
  511. package/cpp/src/models/cohere2-iswa.cpp +134 -0
  512. package/cpp/src/models/command-r.cpp +122 -0
  513. package/cpp/src/models/dbrx.cpp +123 -0
  514. package/cpp/src/models/deci.cpp +135 -0
  515. package/cpp/src/models/deepseek.cpp +144 -0
  516. package/cpp/src/models/deepseek2.cpp +262 -0
  517. package/cpp/src/models/delta-net-base.cpp +376 -0
  518. package/cpp/src/models/dots1.cpp +134 -0
  519. package/cpp/src/models/dream.cpp +105 -0
  520. package/cpp/src/models/ernie4-5-moe.cpp +150 -0
  521. package/cpp/src/models/ernie4-5.cpp +110 -0
  522. package/cpp/src/models/eurobert.cpp +97 -0
  523. package/cpp/src/models/exaone-moe.cpp +146 -0
  524. package/cpp/src/models/exaone.cpp +114 -0
  525. package/cpp/src/models/exaone4.cpp +123 -0
  526. package/cpp/src/models/falcon-h1.cpp +111 -0
  527. package/cpp/src/models/falcon.cpp +120 -0
  528. package/cpp/src/models/gemma-embedding.cpp +116 -0
  529. package/cpp/src/models/gemma.cpp +112 -0
  530. package/cpp/src/models/gemma2-iswa.cpp +128 -0
  531. package/cpp/src/models/gemma3.cpp +155 -0
  532. package/cpp/src/models/gemma3n-iswa.cpp +384 -0
  533. package/cpp/src/models/glm4-moe.cpp +170 -0
  534. package/cpp/src/models/glm4.cpp +157 -0
  535. package/cpp/src/models/gpt2.cpp +105 -0
  536. package/cpp/src/models/gptneox.cpp +144 -0
  537. package/cpp/src/models/granite-hybrid.cpp +196 -0
  538. package/cpp/src/models/granite.cpp +211 -0
  539. package/cpp/src/models/grok.cpp +159 -0
  540. package/cpp/src/models/grovemoe.cpp +141 -0
  541. package/cpp/src/models/hunyuan-dense.cpp +132 -0
  542. package/cpp/src/models/hunyuan-moe.cpp +154 -0
  543. package/cpp/src/models/internlm2.cpp +120 -0
  544. package/cpp/src/models/jais.cpp +86 -0
  545. package/cpp/src/models/jais2.cpp +123 -0
  546. package/cpp/src/models/jamba.cpp +106 -0
  547. package/cpp/src/models/kimi-linear.cpp +392 -0
  548. package/cpp/src/models/lfm2.cpp +190 -0
  549. package/cpp/src/models/llada-moe.cpp +122 -0
  550. package/cpp/src/models/llada.cpp +99 -0
  551. package/cpp/src/models/llama-iswa.cpp +178 -0
  552. package/cpp/src/models/llama.cpp +168 -0
  553. package/cpp/src/models/maincoder.cpp +117 -0
  554. package/cpp/src/models/mamba-base.cpp +285 -0
  555. package/cpp/src/models/mamba.cpp +54 -0
  556. package/cpp/src/models/mimo2-iswa.cpp +123 -0
  557. package/cpp/src/models/minicpm3.cpp +200 -0
  558. package/cpp/src/models/minimax-m2.cpp +124 -0
  559. package/cpp/src/models/mistral3.cpp +160 -0
  560. package/cpp/src/models/models.h +684 -0
  561. package/cpp/src/models/modern-bert.cpp +109 -0
  562. package/cpp/src/models/mpt.cpp +126 -0
  563. package/cpp/src/models/nemotron-h.cpp +148 -0
  564. package/cpp/src/models/nemotron.cpp +122 -0
  565. package/cpp/src/models/neo-bert.cpp +104 -0
  566. package/cpp/src/models/olmo.cpp +121 -0
  567. package/cpp/src/models/olmo2.cpp +150 -0
  568. package/cpp/src/models/olmoe.cpp +124 -0
  569. package/cpp/src/models/openai-moe-iswa.cpp +127 -0
  570. package/cpp/src/models/openelm.cpp +124 -0
  571. package/cpp/src/models/orion.cpp +123 -0
  572. package/cpp/src/models/paddleocr.cpp +122 -0
  573. package/cpp/src/models/pangu-embedded.cpp +121 -0
  574. package/cpp/src/models/phi2.cpp +121 -0
  575. package/cpp/src/models/phi3.cpp +152 -0
  576. package/cpp/src/models/plamo.cpp +110 -0
  577. package/cpp/src/models/plamo2.cpp +318 -0
  578. package/cpp/src/models/plamo3.cpp +128 -0
  579. package/cpp/src/models/plm.cpp +169 -0
  580. package/cpp/src/models/qwen.cpp +108 -0
  581. package/cpp/src/models/qwen2.cpp +126 -0
  582. package/cpp/src/models/qwen2moe.cpp +151 -0
  583. package/cpp/src/models/qwen2vl.cpp +117 -0
  584. package/cpp/src/models/qwen3.cpp +117 -0
  585. package/cpp/src/models/qwen35.cpp +386 -0
  586. package/cpp/src/models/qwen35moe.cpp +420 -0
  587. package/cpp/src/models/qwen3moe.cpp +124 -0
  588. package/cpp/src/models/qwen3next.cpp +525 -0
  589. package/cpp/src/models/qwen3vl-moe.cpp +140 -0
  590. package/cpp/src/models/qwen3vl.cpp +132 -0
  591. package/cpp/src/models/refact.cpp +94 -0
  592. package/cpp/src/models/rnd1.cpp +126 -0
  593. package/cpp/src/models/rwkv6-base.cpp +164 -0
  594. package/cpp/src/models/rwkv6.cpp +94 -0
  595. package/cpp/src/models/rwkv6qwen2.cpp +86 -0
  596. package/cpp/src/models/rwkv7-base.cpp +137 -0
  597. package/cpp/src/models/rwkv7.cpp +90 -0
  598. package/cpp/src/models/seed-oss.cpp +124 -0
  599. package/cpp/src/models/smallthinker.cpp +126 -0
  600. package/cpp/src/models/smollm3.cpp +128 -0
  601. package/cpp/src/models/stablelm.cpp +146 -0
  602. package/cpp/src/models/starcoder.cpp +100 -0
  603. package/cpp/src/models/starcoder2.cpp +121 -0
  604. package/cpp/src/models/step35-iswa.cpp +168 -0
  605. package/cpp/src/models/t5-dec.cpp +166 -0
  606. package/cpp/src/models/t5-enc.cpp +96 -0
  607. package/cpp/src/models/wavtokenizer-dec.cpp +149 -0
  608. package/cpp/src/models/xverse.cpp +108 -0
  609. package/cpp/src/unicode-data.cpp +7034 -0
  610. package/cpp/src/unicode-data.h +20 -0
  611. package/cpp/src/unicode.cpp +1103 -0
  612. package/cpp/src/unicode.h +111 -0
  613. package/cpp/vendor/nlohmann/json.hpp +25526 -0
  614. package/cpp/vendor/nlohmann/json_fwd.hpp +187 -0
  615. package/cpp/vendor/stb/stb_image.h +7988 -0
  616. package/ios/LocalLLM-Bridging-Header.h +2 -0
  617. package/ios/LocalLLM.h +5 -0
  618. package/ios/LocalLLM.mm +1267 -0
  619. package/local-llm-rn.podspec +60 -0
  620. package/package.json +35 -0
  621. package/src/NativeLocalLLM.ts +73 -0
  622. package/src/device.ts +50 -0
  623. package/src/download-adapter.ts +17 -0
  624. package/src/index.ts +21 -0
  625. package/src/native-bridge.ts +142 -0
  626. package/src/rn-downloader.ts +37 -0
@@ -0,0 +1,691 @@
1
+ function(ggml_add_cpu_backend_features cpu_name arch)
2
+ # The feature detection code is compiled as a separate target so that
3
+ # it can be built without the architecture flags
4
+ # Since multiple variants of the CPU backend may be included in the same
5
+ # build, using set_source_files_properties() to set the arch flags is not possible
6
+ set(GGML_CPU_FEATS_NAME ${cpu_name}-feats)
7
+ add_library(${GGML_CPU_FEATS_NAME} OBJECT ggml-cpu/arch/${arch}/cpu-feats.cpp)
8
+ target_include_directories(${GGML_CPU_FEATS_NAME} PRIVATE . ../include)
9
+ target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE ${ARGN})
10
+ target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE GGML_BACKEND_DL GGML_BACKEND_BUILD GGML_BACKEND_SHARED)
11
+ set_target_properties(${GGML_CPU_FEATS_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON)
12
+ # Disable LTO for the feature detection code to prevent cross-module optimization
13
+ # from inlining architecture-specific instructions into the score function.
14
+ # Without this, LTO can cause SIGILL when loading backends on older CPUs
15
+ # (e.g., loading power10 backend on power9 crashes before feature check runs).
16
+ target_compile_options(${GGML_CPU_FEATS_NAME} PRIVATE -fno-lto)
17
+ target_link_libraries(${cpu_name} PRIVATE ${GGML_CPU_FEATS_NAME})
18
+ endfunction()
19
+
20
+ function(ggml_add_cpu_backend_variant_impl tag_name)
21
+ if (tag_name)
22
+ set(GGML_CPU_NAME ggml-cpu-${tag_name})
23
+ else()
24
+ set(GGML_CPU_NAME ggml-cpu)
25
+ endif()
26
+
27
+ ggml_add_backend_library(${GGML_CPU_NAME})
28
+
29
+ list (APPEND GGML_CPU_SOURCES
30
+ ggml-cpu/ggml-cpu.c
31
+ ggml-cpu/ggml-cpu.cpp
32
+ ggml-cpu/repack.cpp
33
+ ggml-cpu/repack.h
34
+ ggml-cpu/hbm.cpp
35
+ ggml-cpu/hbm.h
36
+ ggml-cpu/quants.c
37
+ ggml-cpu/quants.h
38
+ ggml-cpu/traits.cpp
39
+ ggml-cpu/traits.h
40
+ ggml-cpu/amx/amx.cpp
41
+ ggml-cpu/amx/amx.h
42
+ ggml-cpu/amx/mmq.cpp
43
+ ggml-cpu/amx/mmq.h
44
+ ggml-cpu/ggml-cpu-impl.h
45
+ ggml-cpu/common.h
46
+ ggml-cpu/binary-ops.h
47
+ ggml-cpu/binary-ops.cpp
48
+ ggml-cpu/unary-ops.h
49
+ ggml-cpu/unary-ops.cpp
50
+ ggml-cpu/simd-mappings.h
51
+ ggml-cpu/vec.h
52
+ ggml-cpu/vec.cpp
53
+ ggml-cpu/ops.h
54
+ ggml-cpu/ops.cpp
55
+ )
56
+
57
+ target_compile_features(${GGML_CPU_NAME} PRIVATE c_std_11 cxx_std_17)
58
+ target_include_directories(${GGML_CPU_NAME} PRIVATE . ggml-cpu)
59
+
60
+ if (APPLE AND GGML_ACCELERATE)
61
+ find_library(ACCELERATE_FRAMEWORK Accelerate)
62
+ if (ACCELERATE_FRAMEWORK)
63
+ message(STATUS "Accelerate framework found")
64
+
65
+ target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_ACCELERATE)
66
+ target_compile_definitions(${GGML_CPU_NAME} PRIVATE ACCELERATE_NEW_LAPACK)
67
+ target_compile_definitions(${GGML_CPU_NAME} PRIVATE ACCELERATE_LAPACK_ILP64)
68
+
69
+ target_link_libraries(${GGML_CPU_NAME} PRIVATE ${ACCELERATE_FRAMEWORK})
70
+ else()
71
+ message(WARNING "Accelerate framework not found")
72
+ endif()
73
+ endif()
74
+
75
+ if (GGML_OPENMP)
76
+ find_package(OpenMP)
77
+ if (OpenMP_FOUND)
78
+ set(GGML_OPENMP_ENABLED "ON" CACHE INTERNAL "")
79
+ target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_OPENMP)
80
+
81
+ target_link_libraries(${GGML_CPU_NAME} PRIVATE OpenMP::OpenMP_C OpenMP::OpenMP_CXX)
82
+ else()
83
+ set(GGML_OPENMP_ENABLED "OFF" CACHE INTERNAL "")
84
+ message(WARNING "OpenMP not found")
85
+ endif()
86
+ endif()
87
+
88
+ if (GGML_LLAMAFILE)
89
+ target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_LLAMAFILE)
90
+
91
+ list(APPEND GGML_CPU_SOURCES
92
+ ggml-cpu/llamafile/sgemm.cpp
93
+ ggml-cpu/llamafile/sgemm.h)
94
+ endif()
95
+
96
+ if (GGML_CPU_HBM)
97
+ find_library(memkind memkind REQUIRED)
98
+
99
+ message(STATUS "Using memkind for CPU HBM")
100
+
101
+ target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_CPU_HBM)
102
+
103
+ target_link_libraries(${GGML_CPU_NAME} PUBLIC memkind)
104
+ endif()
105
+
106
+ if (GGML_SYSTEM_ARCH STREQUAL "ARM")
107
+ message(STATUS "ARM detected")
108
+ list(APPEND GGML_CPU_SOURCES
109
+ ggml-cpu/arch/arm/quants.c
110
+ ggml-cpu/arch/arm/repack.cpp
111
+ )
112
+
113
+ if (MSVC AND NOT CMAKE_C_COMPILER_ID STREQUAL "Clang")
114
+ message(FATAL_ERROR "MSVC is not supported for ARM, use clang")
115
+ else()
116
+ check_cxx_compiler_flag(-mfp16-format=ieee GGML_COMPILER_SUPPORTS_FP16_FORMAT_I3E)
117
+ if (NOT "${GGML_COMPILER_SUPPORTS_FP16_FORMAT_I3E}" STREQUAL "")
118
+ list(APPEND ARCH_FLAGS -mfp16-format=ieee)
119
+ endif()
120
+
121
+ if (GGML_NATIVE)
122
+ # -mcpu=native does not always enable all the features in some compilers,
123
+ # so we check for them manually and enable them if available
124
+
125
+ execute_process(
126
+ COMMAND ${CMAKE_C_COMPILER} -mcpu=native -E -v -
127
+ INPUT_FILE "/dev/null"
128
+ OUTPUT_QUIET
129
+ ERROR_VARIABLE ARM_MCPU
130
+ RESULT_VARIABLE ARM_MCPU_RESULT
131
+ )
132
+ if (NOT ARM_MCPU_RESULT)
133
+ string(REGEX MATCH "-mcpu=[^ ']+" ARM_MCPU_FLAG "${ARM_MCPU}")
134
+ string(REGEX MATCH "-march=[^ ']+" ARM_MARCH_FLAG "${ARM_MCPU}")
135
+
136
+ # on some old GCC we need to read -march=
137
+ if (ARM_MARCH_FLAG AND NOT "${ARM_MARCH_FLAG}" STREQUAL "-march=native")
138
+ set(ARM_NATIVE_FLAG "${ARM_MARCH_FLAG}")
139
+ elseif(ARM_MCPU_FLAG AND NOT "${ARM_MCPU_FLAG}" STREQUAL "-mcpu=native")
140
+ set(ARM_NATIVE_FLAG "${ARM_MCPU_FLAG}")
141
+ endif()
142
+ endif()
143
+
144
+ if ("${ARM_NATIVE_FLAG}" STREQUAL "")
145
+ set(ARM_NATIVE_FLAG -mcpu=native)
146
+ message(WARNING "ARM -march/-mcpu not found, -mcpu=native will be used")
147
+ else()
148
+ message(STATUS "ARM detected flags: ${ARM_NATIVE_FLAG}")
149
+ endif()
150
+
151
+ include(CheckCXXSourceRuns)
152
+
153
+ macro(check_arm_feature tag feature code)
154
+ set(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS})
155
+ set(CMAKE_REQUIRED_FLAGS "${ARM_NATIVE_FLAG}+${tag}")
156
+ check_cxx_source_runs("${code}" GGML_MACHINE_SUPPORTS_${tag})
157
+ if (GGML_MACHINE_SUPPORTS_${tag})
158
+ set(ARM_NATIVE_FLAG_FIX "${ARM_NATIVE_FLAG_FIX}+${tag}")
159
+ else()
160
+ set(CMAKE_REQUIRED_FLAGS "${ARM_NATIVE_FLAG}+no${tag}")
161
+ check_cxx_source_compiles("int main() { return 0; }" GGML_MACHINE_SUPPORTS_no${tag})
162
+ if (GGML_MACHINE_SUPPORTS_no${tag})
163
+ set(ARM_NATIVE_FLAG_FIX "${ARM_NATIVE_FLAG_FIX}+no${tag}")
164
+ list(APPEND ARCH_FLAGS -U__ARM_FEATURE_${feature})
165
+ endif()
166
+ endif()
167
+ set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_SAVE})
168
+ endmacro()
169
+
170
+ check_arm_feature(dotprod DOTPROD "#include <arm_neon.h>\nint main() { int8x16_t _a, _b; volatile int32x4_t _s = vdotq_s32(_s, _a, _b); return 0; }")
171
+ check_arm_feature(i8mm MATMUL_INT8 "#include <arm_neon.h>\nint main() { int8x16_t _a, _b; volatile int32x4_t _s = vmmlaq_s32(_s, _a, _b); return 0; }")
172
+ check_arm_feature(sve SVE "#include <arm_sve.h>\nint main() { svfloat32_t _a, _b; volatile svfloat32_t _c = svadd_f32_z(svptrue_b8(), _a, _b); return 0; }")
173
+ check_arm_feature(sme SME "#include <arm_sme.h>\n__arm_locally_streaming int main() { __asm__ volatile(\"smstart; smstop;\"); return 0; }")
174
+
175
+ list(APPEND ARCH_FLAGS "${ARM_NATIVE_FLAG}${ARM_NATIVE_FLAG_FIX}")
176
+ else()
177
+ if (GGML_CPU_ARM_ARCH)
178
+ list(APPEND ARCH_FLAGS -march=${GGML_CPU_ARM_ARCH})
179
+ elseif(GGML_CPU_ALL_VARIANTS)
180
+ # Begin with the lowest baseline
181
+ set(ARM_MCPU "armv8-a")
182
+ set(ARCH_TAGS "")
183
+ set(ARCH_DEFINITIONS "")
184
+
185
+ # When a feature is selected, bump the MCPU to the first
186
+ # version that supported it
187
+ if (GGML_INTERNAL_DOTPROD)
188
+ set(ARM_MCPU "armv8.2-a")
189
+ set(ARCH_TAGS "${ARCH_TAGS}+dotprod")
190
+ list(APPEND ARCH_DEFINITIONS GGML_USE_DOTPROD)
191
+ endif()
192
+ if (GGML_INTERNAL_FP16_VECTOR_ARITHMETIC)
193
+ set(ARM_MCPU "armv8.2-a")
194
+ set(ARCH_TAGS "${ARCH_TAGS}+fp16")
195
+ list(APPEND ARCH_DEFINITIONS GGML_USE_FP16_VECTOR_ARITHMETIC)
196
+ endif()
197
+ if (GGML_INTERNAL_SVE)
198
+ set(ARM_MCPU "armv8.2-a")
199
+ set(ARCH_TAGS "${ARCH_TAGS}+sve")
200
+ list(APPEND ARCH_DEFINITIONS GGML_USE_SVE)
201
+ endif()
202
+ if (GGML_INTERNAL_MATMUL_INT8)
203
+ set(ARM_MCPU "armv8.6-a")
204
+ set(ARCH_TAGS "${ARCH_TAGS}+i8mm")
205
+ list(APPEND ARCH_DEFINITIONS GGML_USE_MATMUL_INT8)
206
+ endif()
207
+ if (GGML_INTERNAL_SVE2)
208
+ set(ARM_MCPU "armv8.6-a")
209
+ set(ARCH_TAGS "${ARCH_TAGS}+sve2")
210
+ list(APPEND ARCH_DEFINITIONS GGML_USE_SVE2)
211
+ endif()
212
+ if (GGML_INTERNAL_NOSVE)
213
+ set(ARCH_TAGS "${ARCH_TAGS}+nosve")
214
+ endif()
215
+ if (GGML_INTERNAL_SME)
216
+ set(ARM_MCPU "armv9.2-a")
217
+ set(ARCH_TAGS "${ARCH_TAGS}+sme")
218
+ list(APPEND ARCH_DEFINITIONS GGML_USE_SME)
219
+ endif()
220
+ list(APPEND ARCH_FLAGS "-march=${ARM_MCPU}${ARCH_TAGS}")
221
+ ggml_add_cpu_backend_features(${GGML_CPU_NAME} arm ${ARCH_DEFINITIONS})
222
+ endif()
223
+ endif()
224
+
225
+ message(STATUS "Checking for ARM features using flags:")
226
+ foreach(flag IN LISTS ARCH_FLAGS)
227
+ message(STATUS " ${flag}")
228
+ endforeach()
229
+
230
+ include(CheckCXXSourceCompiles)
231
+ set(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS})
232
+ string(REPLACE ";" " " ARCH_FLAGS_STR "${ARCH_FLAGS}")
233
+ set(CMAKE_REQUIRED_FLAGS "${ARCH_FLAGS_STR}")
234
+ foreach(feature DOTPROD SVE MATMUL_INT8 FMA FP16_VECTOR_ARITHMETIC SME)
235
+ set(ARM_FEATURE "HAVE_${feature}")
236
+ check_cxx_source_compiles(
237
+ "
238
+ #if !defined(__ARM_FEATURE_${feature})
239
+ # error \"Feature ${feature} is not defined\"
240
+ #endif
241
+ int main() { return 0; }
242
+ "
243
+ ${ARM_FEATURE}
244
+ )
245
+ endforeach()
246
+ set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_SAVE})
247
+ endif()
248
+ elseif (GGML_SYSTEM_ARCH STREQUAL "x86")
249
+ message(STATUS "x86 detected")
250
+ list(APPEND GGML_CPU_SOURCES
251
+ ggml-cpu/arch/x86/quants.c
252
+ ggml-cpu/arch/x86/repack.cpp
253
+ )
254
+
255
+ if (MSVC)
256
+ # instruction set detection for MSVC only
257
+ if (GGML_NATIVE)
258
+ include(ggml-cpu/cmake/FindSIMD.cmake)
259
+ endif ()
260
+ if (GGML_AVX512)
261
+ list(APPEND ARCH_FLAGS /arch:AVX512)
262
+ # /arch:AVX512 includes: __AVX512F__, __AVX512CD__, __AVX512BW__, __AVX512DQ__, and __AVX512VL__
263
+ # MSVC has no compile-time flags enabling specific
264
+ # AVX512 extensions, neither it defines the
265
+ # macros corresponding to the extensions.
266
+ # Do it manually.
267
+ list(APPEND ARCH_DEFINITIONS GGML_AVX512)
268
+ if (GGML_AVX512_VBMI)
269
+ list(APPEND ARCH_DEFINITIONS __AVX512VBMI__)
270
+ if (CMAKE_C_COMPILER_ID STREQUAL "Clang")
271
+ list(APPEND ARCH_FLAGS -mavx512vbmi)
272
+ endif()
273
+ endif()
274
+ if (GGML_AVX512_VNNI)
275
+ list(APPEND ARCH_DEFINITIONS __AVX512VNNI__ GGML_AVX512_VNNI)
276
+ if (CMAKE_C_COMPILER_ID STREQUAL "Clang")
277
+ list(APPEND ARCH_FLAGS -mavx512vnni)
278
+ endif()
279
+ endif()
280
+ if (GGML_AVX512_BF16)
281
+ list(APPEND ARCH_DEFINITIONS __AVX512BF16__ GGML_AVX512_BF16)
282
+ if (CMAKE_C_COMPILER_ID STREQUAL "Clang")
283
+ list(APPEND ARCH_FLAGS -mavx512bf16)
284
+ endif()
285
+ endif()
286
+ if (GGML_AMX_TILE)
287
+ list(APPEND ARCH_DEFINITIONS __AMX_TILE__ GGML_AMX_TILE)
288
+ endif()
289
+ if (GGML_AMX_INT8)
290
+ list(APPEND ARCH_DEFINITIONS __AMX_INT8__ GGML_AMX_INT8)
291
+ endif()
292
+ if (GGML_AMX_BF16)
293
+ list(APPEND ARCH_DEFINITIONS __AMX_BF16__ GGML_AMX_BF16)
294
+ endif()
295
+ elseif (GGML_AVX2)
296
+ list(APPEND ARCH_FLAGS /arch:AVX2)
297
+ list(APPEND ARCH_DEFINITIONS GGML_AVX2 GGML_FMA GGML_F16C)
298
+ elseif (GGML_AVX)
299
+ list(APPEND ARCH_FLAGS /arch:AVX)
300
+ list(APPEND ARCH_DEFINITIONS GGML_AVX)
301
+ elseif (GGML_SSE42)
302
+ list(APPEND ARCH_FLAGS /arch:SSE4.2)
303
+ list(APPEND ARCH_DEFINITIONS GGML_SSE42)
304
+ endif()
305
+ if (GGML_AVX_VNNI)
306
+ list(APPEND ARCH_DEFINITIONS __AVXVNNI__ GGML_AVX_VNNI)
307
+ endif()
308
+ if (GGML_BMI2)
309
+ # MSVC does not define macro __BMI2__
310
+ list(APPEND ARCH_DEFINITIONS __BMI2__ GGML_BMI2)
311
+ endif()
312
+ else ()
313
+ if (GGML_NATIVE)
314
+ list(APPEND ARCH_FLAGS -march=native)
315
+ else ()
316
+ if (GGML_SSE42)
317
+ list(APPEND ARCH_FLAGS -msse4.2)
318
+ list(APPEND ARCH_DEFINITIONS GGML_SSE42)
319
+ endif()
320
+ if (GGML_F16C)
321
+ list(APPEND ARCH_FLAGS -mf16c)
322
+ list(APPEND ARCH_DEFINITIONS GGML_F16C)
323
+ endif()
324
+ if (GGML_FMA)
325
+ list(APPEND ARCH_FLAGS -mfma)
326
+ list(APPEND ARCH_DEFINITIONS GGML_FMA)
327
+ endif()
328
+ if (GGML_BMI2)
329
+ list(APPEND ARCH_FLAGS -mbmi2)
330
+ list(APPEND ARCH_DEFINITIONS GGML_BMI2)
331
+ endif()
332
+ if (GGML_AVX)
333
+ list(APPEND ARCH_FLAGS -mavx)
334
+ list(APPEND ARCH_DEFINITIONS GGML_AVX)
335
+ endif()
336
+ if (GGML_AVX2)
337
+ list(APPEND ARCH_FLAGS -mavx2)
338
+ list(APPEND ARCH_DEFINITIONS GGML_AVX2)
339
+ endif()
340
+ if (GGML_AVX_VNNI)
341
+ list(APPEND ARCH_FLAGS -mavxvnni)
342
+ list(APPEND ARCH_DEFINITIONS GGML_AVX_VNNI)
343
+ endif()
344
+ if (GGML_AVX512)
345
+ list(APPEND ARCH_FLAGS -mavx512f)
346
+ list(APPEND ARCH_FLAGS -mavx512cd)
347
+ list(APPEND ARCH_FLAGS -mavx512vl)
348
+ list(APPEND ARCH_FLAGS -mavx512dq)
349
+ list(APPEND ARCH_FLAGS -mavx512bw)
350
+ list(APPEND ARCH_DEFINITIONS GGML_AVX512)
351
+ endif()
352
+ if (GGML_AVX512_VBMI)
353
+ list(APPEND ARCH_FLAGS -mavx512vbmi)
354
+ list(APPEND ARCH_DEFINITIONS GGML_AVX512_VBMI)
355
+ endif()
356
+ if (GGML_AVX512_VNNI)
357
+ list(APPEND ARCH_FLAGS -mavx512vnni)
358
+ list(APPEND ARCH_DEFINITIONS GGML_AVX512_VNNI)
359
+ endif()
360
+ if (GGML_AVX512_BF16)
361
+ list(APPEND ARCH_FLAGS -mavx512bf16)
362
+ list(APPEND ARCH_DEFINITIONS GGML_AVX512_BF16)
363
+ endif()
364
+ if (GGML_AMX_TILE)
365
+ list(APPEND ARCH_FLAGS -mamx-tile)
366
+ list(APPEND ARCH_DEFINITIONS GGML_AMX_TILE)
367
+ endif()
368
+ if (GGML_AMX_INT8)
369
+ list(APPEND ARCH_FLAGS -mamx-int8)
370
+ list(APPEND ARCH_DEFINITIONS GGML_AMX_INT8)
371
+ endif()
372
+ if (GGML_AMX_BF16)
373
+ list(APPEND ARCH_FLAGS -mamx-bf16)
374
+ list(APPEND ARCH_DEFINITIONS GGML_AMX_BF16)
375
+ endif()
376
+ endif()
377
+ endif()
378
+
379
+ if (GGML_BACKEND_DL)
380
+ if (GGML_NATIVE)
381
+ # the feature check relies on ARCH_DEFINITIONS, but it is not set with GGML_NATIVE
382
+ message(FATAL_ERROR "GGML_NATIVE is not compatible with GGML_BACKEND_DL, consider using GGML_CPU_ALL_VARIANTS")
383
+ endif()
384
+ ggml_add_cpu_backend_features(${GGML_CPU_NAME} x86 ${ARCH_DEFINITIONS})
385
+ endif()
386
+ elseif (GGML_SYSTEM_ARCH STREQUAL "PowerPC")
387
+ message(STATUS "PowerPC detected")
388
+ list(APPEND GGML_CPU_SOURCES ggml-cpu/arch/powerpc/quants.c)
389
+ if (GGML_NATIVE)
390
+ if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64")
391
+ file(READ "/proc/cpuinfo" POWER10_M)
392
+ elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "powerpc")
393
+ execute_process(COMMAND bash -c "prtconf |grep 'Implementation' | head -n 1" OUTPUT_VARIABLE POWER10_M)
394
+ endif()
395
+
396
+ string(TOUPPER "${POWER10_M}" POWER10_M_UPPER)
397
+ string(REGEX MATCHALL "POWER *([0-9]+)" MATCHED_STRING "${POWER10_M_UPPER}")
398
+ string(REGEX REPLACE "POWER *([0-9]+)" "\\1" EXTRACTED_NUMBER "${MATCHED_STRING}")
399
+
400
+ if (EXTRACTED_NUMBER GREATER_EQUAL 10)
401
+ list(APPEND ARCH_FLAGS -mcpu=power10)
402
+ elseif (EXTRACTED_NUMBER EQUAL 9)
403
+ list(APPEND ARCH_FLAGS -mcpu=power9)
404
+ elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64le")
405
+ list(APPEND ARCH_FLAGS -mcpu=powerpc64le -mtune=native)
406
+ else()
407
+ list(APPEND ARCH_FLAGS -mcpu=native -mtune=native -mpowerpc64)
408
+ endif()
409
+ elseif(GGML_CPU_ALL_VARIANTS)
410
+ # Begin with the lowest baseline
411
+ set(ARCH_DEFINITIONS "")
412
+
413
+ # When a feature is selected, bump the MCPU to the first
414
+ # version that supported it
415
+ foreach(PVER RANGE 7 11)
416
+ if(DEFINED GGML_INTERNAL_POWER${PVER})
417
+ set(POWERPC_MCPU "power${PVER}")
418
+ list(APPEND ARCH_DEFINITIONS GGML_USE_POWER${PVER})
419
+ endif()
420
+ endforeach()
421
+ if (GGML_INTERNAL_VSX)
422
+ list(APPEND ARCH_DEFINITIONS GGML_USE_VSX)
423
+ list(APPEND ARCH_FLAGS -mvsx)
424
+ endif()
425
+
426
+ if (DEFINED POWERPC_MCPU)
427
+ list(APPEND ARCH_FLAGS -mcpu=${POWERPC_MCPU})
428
+ endif()
429
+ ggml_add_cpu_backend_features(${GGML_CPU_NAME} powerpc ${ARCH_DEFINITIONS})
430
+ else()
431
+ if (GGML_CPU_POWERPC_CPUTYPE)
432
+ list(APPEND ARCH_FLAGS -mcpu=${GGML_CPU_POWERPC_CPUTYPE})
433
+ endif()
434
+ endif()
435
+ elseif (GGML_SYSTEM_ARCH STREQUAL "loongarch64")
436
+ message(STATUS "loongarch64 detected")
437
+ list(APPEND GGML_CPU_SOURCES ggml-cpu/arch/loongarch/quants.c)
438
+
439
+ list(APPEND ARCH_FLAGS -march=loongarch64)
440
+ if (GGML_LASX)
441
+ list(APPEND ARCH_FLAGS -mlasx)
442
+ endif()
443
+ if (GGML_LSX)
444
+ list(APPEND ARCH_FLAGS -mlsx)
445
+ endif()
446
+ elseif (GGML_SYSTEM_ARCH STREQUAL "riscv64")
447
+ message(STATUS "riscv64 detected")
448
+ list(APPEND GGML_CPU_SOURCES
449
+ ggml-cpu/arch/riscv/quants.c
450
+ ggml-cpu/arch/riscv/repack.cpp
451
+ )
452
+ if (GGML_CPU_RISCV64_SPACEMIT)
453
+ target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_CPU_RISCV64_SPACEMIT ${RISCV64_SPACEMIT_IME_SPEC})
454
+ list(APPEND GGML_CPU_SOURCES
455
+ ggml-cpu/spacemit/ime.cpp
456
+ ggml-cpu/spacemit/ime.h
457
+ ggml-cpu/spacemit/ime1_kernels.cpp
458
+ ggml-cpu/spacemit/ime_kernels.h
459
+ )
460
+ endif()
461
+ if(NOT GGML_CPU_ALL_VARIANTS)
462
+ set(MARCH_STR "rv64gc")
463
+ if (GGML_RV_ZFH)
464
+ string(APPEND MARCH_STR "_zfh")
465
+ endif()
466
+
467
+ if (GGML_XTHEADVECTOR)
468
+ string(APPEND MARCH_STR "_xtheadvector")
469
+ elseif (GGML_RVV)
470
+ string(APPEND MARCH_STR "_v")
471
+ if (GGML_RV_ZVFH)
472
+ string(APPEND MARCH_STR "_zvfh")
473
+ endif()
474
+ if (GGML_RV_ZVFBFWMA)
475
+ string(APPEND MARCH_STR "_zvfbfwma")
476
+ endif()
477
+ endif()
478
+ if (GGML_RV_ZICBOP)
479
+ string(APPEND MARCH_STR "_zicbop")
480
+ endif()
481
+ if (GGML_RV_ZIHINTPAUSE)
482
+ string(APPEND MARCH_STR "_zihintpause")
483
+ endif()
484
+ list(APPEND ARCH_FLAGS "-march=${MARCH_STR}" -mabi=lp64d)
485
+ else()
486
+ # Begin with the lowest baseline
487
+ set(ARCH_DEFINITIONS "")
488
+
489
+ if (GGML_INTERNAL_RVV)
490
+ message(STATUS "RVV enabled")
491
+ list(APPEND ARCH_DEFINITIONS GGML_USE_RVV)
492
+ list(APPEND ARCH_FLAGS -march=rv64gc_v -mabi=lp64d)
493
+ endif()
494
+
495
+ ggml_add_cpu_backend_features(${GGML_CPU_NAME} riscv ${ARCH_DEFINITIONS})
496
+ endif()
497
+ elseif (GGML_SYSTEM_ARCH STREQUAL "s390x")
498
+ message(STATUS "s390x detected")
499
+ list(APPEND GGML_CPU_SOURCES
500
+ ggml-cpu/arch/s390/quants.c)
501
+
502
+ # for native compilation
503
+ if (GGML_NATIVE)
504
+ # check machine level to determine target
505
+ file(READ "/proc/cpuinfo" CPUINFO_CONTENTS)
506
+ string(REGEX REPLACE "machine[ \t\r\n]*=[ \t\r\n]*([0-9]+)" "\\1" S390X_M ${CPUINFO_CONTENTS})
507
+
508
+ # TODO: Separation to determine activation of VX/VXE/VXE2
509
+ if (${S390X_M} MATCHES "8561|8562")
510
+ message(STATUS "z15 target")
511
+ list(APPEND ARCH_FLAGS -march=z15)
512
+ elseif (${S390X_M} MATCHES "3931")
513
+ message(STATUS "z16 target")
514
+ list(APPEND ARCH_FLAGS -march=z16)
515
+ elseif (${S390X_M} MATCHES "9175|9176")
516
+ # NOTE: Only available from GCC 15.1.0 onwards. Any z17 machine with compile issues must first verify their GCC version.
517
+ # binutils must also be updated to the latest for the -march=z17 flag to work. Otherwise, use -march=arch15.
518
+ message(STATUS "z17 target")
519
+ list(APPEND ARCH_FLAGS -march=arch15)
520
+ else()
521
+ message(STATUS "Unknown target")
522
+ message(WARNING "Unknown target. If you are compiling for z14 and earlier, you might have to add -DGGML_VXE=OFF.")
523
+ list(APPEND ARCH_FLAGS -march=native -mtune=native)
524
+ endif()
525
+ # for cross-compilation
526
+ elseif(GGML_CPU_ALL_VARIANTS)
527
+ # range through IBM z15 to z17
528
+ # NOTE: update when a new hardware level is released
529
+ foreach (ZHW RANGE 15 17)
530
+ if(DEFINED GGML_INTERNAL_Z${ZHW})
531
+ message(STATUS "z${ZHW} cross-compile target")
532
+ list(APPEND ARCH_FLAGS -march=z${ZHW})
533
+ endif()
534
+ endforeach()
535
+ endif()
536
+
537
+ if (GGML_VXE OR GGML_INTERNAL_VXE2)
538
+ message(STATUS "VXE2 enabled")
539
+ list(APPEND ARCH_FLAGS -mvx -mzvector)
540
+ list(APPEND ARCH_DEFINITIONS GGML_USE_VXE2)
541
+ endif()
542
+
543
+ if (GGML_INTERNAL_NNPA)
544
+ message(STATUS "NNPA enabled")
545
+ list(APPEND ARCH_DEFINITIONS GGML_USE_NNPA)
546
+ endif()
547
+
548
+ ggml_add_cpu_backend_features(${GGML_CPU_NAME} s390 ${ARCH_DEFINITIONS})
549
+ elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "wasm")
550
+ message(STATUS "Wasm detected")
551
+ list (APPEND GGML_CPU_SOURCES ggml-cpu/arch/wasm/quants.c)
552
+ else()
553
+ message(WARNING "Unknown CPU architecture. Falling back to generic implementations.")
554
+ list(APPEND ARCH_FLAGS -DGGML_CPU_GENERIC)
555
+ endif()
556
+
557
+ if (GGML_CPU_REPACK)
558
+ target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_CPU_REPACK)
559
+ endif()
560
+
561
+ if (GGML_CPU_KLEIDIAI)
562
+ message(STATUS "Using KleidiAI optimized kernels if applicable")
563
+
564
+ # Disable the KleidiAI tests
565
+ set(KLEIDIAI_BUILD_TESTS OFF)
566
+
567
+ # Fetch KleidiAI sources:
568
+ include(FetchContent)
569
+ set(KLEIDIAI_COMMIT_TAG "v1.16.0")
570
+ set(KLEIDIAI_DOWNLOAD_URL "https://github.com/ARM-software/kleidiai/archive/refs/tags/${KLEIDIAI_COMMIT_TAG}.tar.gz")
571
+ set(KLEIDIAI_ARCHIVE_MD5 "0a9e9008adb6031f9e8cf70dff4a3321")
572
+
573
+ if (POLICY CMP0135)
574
+ cmake_policy(SET CMP0135 NEW)
575
+ endif()
576
+
577
+ # TODO: Use FetchContent_MakeAvailable with EXCLUDE_FROM_ALL after bumping minimum CMake version to 3.28+
578
+ # Using FetchContent_Populate instead to avoid EXCLUDE_FROM_ALL which requires CMake 3.28
579
+ FetchContent_Declare(KleidiAI_Download
580
+ URL ${KLEIDIAI_DOWNLOAD_URL}
581
+ DOWNLOAD_EXTRACT_TIMESTAMP NEW
582
+ URL_HASH MD5=${KLEIDIAI_ARCHIVE_MD5})
583
+
584
+ FetchContent_GetProperties(KleidiAI_Download
585
+ SOURCE_DIR KLEIDIAI_SRC
586
+ POPULATED KLEIDIAI_POPULATED)
587
+
588
+ if (NOT KLEIDIAI_POPULATED)
589
+ FetchContent_Populate(KleidiAI_Download)
590
+ FetchContent_GetProperties(KleidiAI_Download SOURCE_DIR KLEIDIAI_SRC)
591
+ endif()
592
+
593
+ add_compile_definitions(GGML_USE_CPU_KLEIDIAI)
594
+
595
+ list(APPEND GGML_CPU_SOURCES
596
+ ggml-cpu/kleidiai/kleidiai.cpp
597
+ ggml-cpu/kleidiai/kernels.cpp
598
+ ggml-cpu/kleidiai/kleidiai.h
599
+ ggml-cpu/kleidiai/kernels.h
600
+ )
601
+
602
+ # KleidiAI
603
+ include_directories(
604
+ ${KLEIDIAI_SRC}/
605
+ ${KLEIDIAI_SRC}/kai/
606
+ ${KLEIDIAI_SRC}/kai/ukernels/
607
+ ${KLEIDIAI_SRC}/kai/ukernels/matmul/
608
+ ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/
609
+ ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/
610
+ ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_fp32_bf16p_bf16p/
611
+ ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/)
612
+
613
+ set(ARCH_FLAGS_TEMP "${ARCH_FLAGS}")
614
+ if (NOT ARCH_FLAGS_TEMP)
615
+ string(REGEX MATCH "-march=[^ ]+" ARCH_FLAGS_TEMP "${CMAKE_C_FLAGS}")
616
+ endif()
617
+ string(FIND "${ARCH_FLAGS_TEMP}" "+dotprod" DOTPROD_ENABLED)
618
+ string(FIND "${ARCH_FLAGS_TEMP}" "+i8mm" I8MM_ENABLED)
619
+ string(FIND "${ARCH_FLAGS_TEMP}" "+sme" SME_ENABLED)
620
+ string(FIND "${ARCH_FLAGS_TEMP}" "+sve" SVE_ENABLED)
621
+
622
+ set(PRIVATE_ARCH_FLAGS ${ARCH_FLAGS_TEMP})
623
+
624
+ list(APPEND GGML_KLEIDIAI_SOURCES
625
+ ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_lhs_quant_pack_qsi8d32p_f32.c
626
+ ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_lhs_quant_pack_qsi8d32p4x8sb_f32_neon.c
627
+ ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4c32ps1s0scalef16_qsu4c32s16s0_neon.c
628
+ ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_lhs_quant_pack_qsi8d32p_f32_neon.c
629
+ ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4c32pscalef16_qsu4c32s16s0.c
630
+ ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_lhs_quant_pack_qai8dxp_f32.c
631
+ ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi8cxp_qsi8cx_neon.c)
632
+
633
+ if (NOT DOTPROD_ENABLED MATCHES -1)
634
+ list(APPEND GGML_KLEIDIAI_SOURCES
635
+ ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p4x8_1x4x32_neon_dotprod.c
636
+ ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x4_qsi4c32p4x4_1x4_neon_dotprod.c
637
+ ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p4x4_qsi4c32p4x4_16x4_neon_dotprod.c
638
+ ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp4x4_qsi8cxp4x4_16x4_neon_dotprod.c
639
+ ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1x4_qsi8cxp4x4_1x4_neon_dotprod.c
640
+ ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1x8_qsi8cxp4x8_1x4_neon_dotprod.c)
641
+ endif()
642
+
643
+ if (NOT I8MM_ENABLED MATCHES -1)
644
+ list(APPEND GGML_KLEIDIAI_SOURCES
645
+ ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p4x8_16x4_neon_i8mm.c
646
+ ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi8cxp4x8_16x4_neon_i8mm.c)
647
+ endif()
648
+
649
+ if (NOT SME_ENABLED MATCHES -1)
650
+ list(APPEND GGML_KLEIDIAI_SOURCES
651
+ ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1vlx4_qsi4c32p4vlx4_1vlx4vl_sme2_mopa.c
652
+ ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x4_qsi4c32p4vlx4_1x4vl_sme2_sdot.c
653
+ ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1vlx4_qsi8cxp4vlx4_1vlx4vl_sme2_mopa.c
654
+ ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1vlx4_qsi8cxp4vlx4_1vlx4vl_sme2_mopa_asm.S
655
+ ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1x4_qsi8cxp4vlx4_1x4vl_sme2_dot.c
656
+ ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1x4_qsi8cxp4vlx4_1x4vl_sme2_dot_asm.S
657
+ ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_fp32_bf16p_bf16p/kai_matmul_clamp_f32_bf16p2vlx2_bf16p2vlx2_2vlx2vl_sme2_mopa.c
658
+ ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_fp32_bf16p_bf16p/kai_matmul_clamp_f32_bf16p2vlx2_bf16p2vlx2_2vlx2vl_sme2_mopa_asm.S
659
+ ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_lhs_pack_bf16p2vlx2_f32_sme.c
660
+ ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_bf16p2vlx2b_f32_x32_sme.c
661
+ ${KLEIDIAI_SRC}/kai/kai_common_sme_asm.S)
662
+ set(PRIVATE_ARCH_FLAGS "-fno-tree-vectorize;${PRIVATE_ARCH_FLAGS}+sve+sve2")
663
+ endif()
664
+
665
+ if (NOT SVE_ENABLED MATCHES -1)
666
+ list(APPEND GGML_KLEIDIAI_SOURCES
667
+ ${KLEIDIAI_SRC}/kai/kai_common_sve_asm.S
668
+ ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p8x8_1x8_sve_dotprod_asm.S
669
+ ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p8x8_1x8_sve_dotprod.c
670
+ ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p8x8_16x8_sve_i8mm_asm.S
671
+ ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p8x8_16x8_sve_i8mm.c)
672
+ endif()
673
+
674
+ set_source_files_properties(${GGML_KLEIDIAI_SOURCES} PROPERTIES COMPILE_OPTIONS "${PRIVATE_ARCH_FLAGS}")
675
+ list(APPEND GGML_CPU_SOURCES ${GGML_KLEIDIAI_SOURCES})
676
+ endif()
677
+
678
+ message(STATUS "Adding CPU backend variant ${GGML_CPU_NAME}: ${ARCH_FLAGS} ${ARCH_DEFINITIONS}")
679
+ target_sources(${GGML_CPU_NAME} PRIVATE ${GGML_CPU_SOURCES})
680
+ target_compile_options(${GGML_CPU_NAME} PRIVATE ${ARCH_FLAGS})
681
+ target_compile_definitions(${GGML_CPU_NAME} PRIVATE ${ARCH_DEFINITIONS})
682
+
683
+ if (EMSCRIPTEN)
684
+ set_target_properties(${GGML_CPU_NAME} PROPERTIES COMPILE_FLAGS "-msimd128")
685
+ endif()
686
+
687
+ if (CMAKE_CXX_COMPILER_ID STREQUAL "IntelLLVM")
688
+ # The compiler automatically enables "-ffast-math" which can cause NaNs in tests due to "-fassociative-math"
689
+ target_compile_options(${GGML_CPU_NAME} PRIVATE "-fno-associative-math")
690
+ endif()
691
+ endfunction()