local-llm-rn 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (626) hide show
  1. package/cpp/CMakeLists.txt +285 -0
  2. package/cpp/common/CMakeLists.txt +149 -0
  3. package/cpp/common/arg.cpp +3799 -0
  4. package/cpp/common/arg.h +131 -0
  5. package/cpp/common/base64.hpp +392 -0
  6. package/cpp/common/build-info.cpp.in +4 -0
  7. package/cpp/common/chat-parser-xml-toolcall.cpp +879 -0
  8. package/cpp/common/chat-parser-xml-toolcall.h +45 -0
  9. package/cpp/common/chat-parser.cpp +1649 -0
  10. package/cpp/common/chat-parser.h +133 -0
  11. package/cpp/common/chat-peg-parser.cpp +124 -0
  12. package/cpp/common/chat-peg-parser.h +105 -0
  13. package/cpp/common/chat.cpp +3355 -0
  14. package/cpp/common/chat.h +252 -0
  15. package/cpp/common/common.cpp +1824 -0
  16. package/cpp/common/common.h +930 -0
  17. package/cpp/common/console.cpp +1137 -0
  18. package/cpp/common/console.h +41 -0
  19. package/cpp/common/debug.cpp +167 -0
  20. package/cpp/common/debug.h +43 -0
  21. package/cpp/common/download.cpp +792 -0
  22. package/cpp/common/download.h +84 -0
  23. package/cpp/common/http.h +84 -0
  24. package/cpp/common/jinja/README.md +88 -0
  25. package/cpp/common/jinja/caps.cpp +285 -0
  26. package/cpp/common/jinja/caps.h +30 -0
  27. package/cpp/common/jinja/lexer.cpp +341 -0
  28. package/cpp/common/jinja/lexer.h +157 -0
  29. package/cpp/common/jinja/parser.cpp +591 -0
  30. package/cpp/common/jinja/parser.h +21 -0
  31. package/cpp/common/jinja/runtime.cpp +867 -0
  32. package/cpp/common/jinja/runtime.h +638 -0
  33. package/cpp/common/jinja/string.cpp +213 -0
  34. package/cpp/common/jinja/string.h +61 -0
  35. package/cpp/common/jinja/utils.h +149 -0
  36. package/cpp/common/jinja/value.cpp +1393 -0
  37. package/cpp/common/jinja/value.h +756 -0
  38. package/cpp/common/json-partial.cpp +324 -0
  39. package/cpp/common/json-partial.h +39 -0
  40. package/cpp/common/json-schema-to-grammar.cpp +1153 -0
  41. package/cpp/common/json-schema-to-grammar.h +43 -0
  42. package/cpp/common/llguidance.cpp +258 -0
  43. package/cpp/common/log.cpp +446 -0
  44. package/cpp/common/log.h +119 -0
  45. package/cpp/common/ngram-cache.cpp +285 -0
  46. package/cpp/common/ngram-cache.h +101 -0
  47. package/cpp/common/ngram-map.cpp +530 -0
  48. package/cpp/common/ngram-map.h +115 -0
  49. package/cpp/common/ngram-mod.cpp +60 -0
  50. package/cpp/common/ngram-mod.h +38 -0
  51. package/cpp/common/peg-parser.cpp +1712 -0
  52. package/cpp/common/peg-parser.h +459 -0
  53. package/cpp/common/preset.cpp +483 -0
  54. package/cpp/common/preset.h +83 -0
  55. package/cpp/common/regex-partial.cpp +204 -0
  56. package/cpp/common/regex-partial.h +56 -0
  57. package/cpp/common/sampling.cpp +745 -0
  58. package/cpp/common/sampling.h +119 -0
  59. package/cpp/common/speculative.cpp +1074 -0
  60. package/cpp/common/speculative.h +41 -0
  61. package/cpp/common/unicode.cpp +64 -0
  62. package/cpp/common/unicode.h +22 -0
  63. package/cpp/ggml/CMakeLists.txt +494 -0
  64. package/cpp/ggml/cmake/GitVars.cmake +22 -0
  65. package/cpp/ggml/cmake/common.cmake +50 -0
  66. package/cpp/ggml/cmake/ggml-config.cmake.in +191 -0
  67. package/cpp/ggml/include/ggml-alloc.h +85 -0
  68. package/cpp/ggml/include/ggml-backend.h +373 -0
  69. package/cpp/ggml/include/ggml-blas.h +25 -0
  70. package/cpp/ggml/include/ggml-cann.h +123 -0
  71. package/cpp/ggml/include/ggml-cpp.h +39 -0
  72. package/cpp/ggml/include/ggml-cpu.h +151 -0
  73. package/cpp/ggml/include/ggml-cuda.h +47 -0
  74. package/cpp/ggml/include/ggml-hexagon.h +19 -0
  75. package/cpp/ggml/include/ggml-metal.h +61 -0
  76. package/cpp/ggml/include/ggml-opencl.h +26 -0
  77. package/cpp/ggml/include/ggml-opt.h +256 -0
  78. package/cpp/ggml/include/ggml-rpc.h +30 -0
  79. package/cpp/ggml/include/ggml-sycl.h +49 -0
  80. package/cpp/ggml/include/ggml-virtgpu.h +14 -0
  81. package/cpp/ggml/include/ggml-vulkan.h +29 -0
  82. package/cpp/ggml/include/ggml-webgpu.h +19 -0
  83. package/cpp/ggml/include/ggml-zdnn.h +17 -0
  84. package/cpp/ggml/include/ggml-zendnn.h +22 -0
  85. package/cpp/ggml/include/ggml.h +2753 -0
  86. package/cpp/ggml/include/gguf.h +204 -0
  87. package/cpp/ggml/src/CMakeLists.txt +492 -0
  88. package/cpp/ggml/src/ggml-alloc.c +1244 -0
  89. package/cpp/ggml/src/ggml-backend-dl.cpp +48 -0
  90. package/cpp/ggml/src/ggml-backend-dl.h +45 -0
  91. package/cpp/ggml/src/ggml-backend-impl.h +255 -0
  92. package/cpp/ggml/src/ggml-backend-reg.cpp +566 -0
  93. package/cpp/ggml/src/ggml-backend.cpp +2270 -0
  94. package/cpp/ggml/src/ggml-blas/CMakeLists.txt +101 -0
  95. package/cpp/ggml/src/ggml-blas/ggml-blas.cpp +518 -0
  96. package/cpp/ggml/src/ggml-common.h +1878 -0
  97. package/cpp/ggml/src/ggml-cpu/CMakeLists.txt +691 -0
  98. package/cpp/ggml/src/ggml-cpu/amx/amx.cpp +247 -0
  99. package/cpp/ggml/src/ggml-cpu/amx/amx.h +8 -0
  100. package/cpp/ggml/src/ggml-cpu/amx/common.h +91 -0
  101. package/cpp/ggml/src/ggml-cpu/amx/mmq.cpp +2512 -0
  102. package/cpp/ggml/src/ggml-cpu/amx/mmq.h +10 -0
  103. package/cpp/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +98 -0
  104. package/cpp/ggml/src/ggml-cpu/arch/arm/quants.c +4052 -0
  105. package/cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +4935 -0
  106. package/cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +2159 -0
  107. package/cpp/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
  108. package/cpp/ggml/src/ggml-cpu/arch/powerpc/quants.c +2305 -0
  109. package/cpp/ggml/src/ggml-cpu/arch/riscv/cpu-feats.cpp +38 -0
  110. package/cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +2726 -0
  111. package/cpp/ggml/src/ggml-cpu/arch/riscv/repack.cpp +342 -0
  112. package/cpp/ggml/src/ggml-cpu/arch/s390/cpu-feats.cpp +50 -0
  113. package/cpp/ggml/src/ggml-cpu/arch/s390/quants.c +1468 -0
  114. package/cpp/ggml/src/ggml-cpu/arch/wasm/quants.c +1221 -0
  115. package/cpp/ggml/src/ggml-cpu/arch/x86/cpu-feats.cpp +327 -0
  116. package/cpp/ggml/src/ggml-cpu/arch/x86/quants.c +3820 -0
  117. package/cpp/ggml/src/ggml-cpu/arch/x86/repack.cpp +6307 -0
  118. package/cpp/ggml/src/ggml-cpu/arch-fallback.h +313 -0
  119. package/cpp/ggml/src/ggml-cpu/binary-ops.cpp +154 -0
  120. package/cpp/ggml/src/ggml-cpu/binary-ops.h +16 -0
  121. package/cpp/ggml/src/ggml-cpu/cmake/FindSIMD.cmake +100 -0
  122. package/cpp/ggml/src/ggml-cpu/common.h +95 -0
  123. package/cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +529 -0
  124. package/cpp/ggml/src/ggml-cpu/ggml-cpu.c +3734 -0
  125. package/cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +701 -0
  126. package/cpp/ggml/src/ggml-cpu/hbm.cpp +55 -0
  127. package/cpp/ggml/src/ggml-cpu/hbm.h +8 -0
  128. package/cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +938 -0
  129. package/cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +90 -0
  130. package/cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +798 -0
  131. package/cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.h +17 -0
  132. package/cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +4033 -0
  133. package/cpp/ggml/src/ggml-cpu/llamafile/sgemm.h +25 -0
  134. package/cpp/ggml/src/ggml-cpu/ops.cpp +10978 -0
  135. package/cpp/ggml/src/ggml-cpu/ops.h +116 -0
  136. package/cpp/ggml/src/ggml-cpu/quants.c +1193 -0
  137. package/cpp/ggml/src/ggml-cpu/quants.h +97 -0
  138. package/cpp/ggml/src/ggml-cpu/repack.cpp +3316 -0
  139. package/cpp/ggml/src/ggml-cpu/repack.h +173 -0
  140. package/cpp/ggml/src/ggml-cpu/simd-gemm.h +136 -0
  141. package/cpp/ggml/src/ggml-cpu/simd-mappings.h +1279 -0
  142. package/cpp/ggml/src/ggml-cpu/spacemit/ime.cpp +1025 -0
  143. package/cpp/ggml/src/ggml-cpu/spacemit/ime.h +13 -0
  144. package/cpp/ggml/src/ggml-cpu/spacemit/ime1_kernels.cpp +3196 -0
  145. package/cpp/ggml/src/ggml-cpu/spacemit/ime_kernels.h +26 -0
  146. package/cpp/ggml/src/ggml-cpu/traits.cpp +36 -0
  147. package/cpp/ggml/src/ggml-cpu/traits.h +38 -0
  148. package/cpp/ggml/src/ggml-cpu/unary-ops.cpp +337 -0
  149. package/cpp/ggml/src/ggml-cpu/unary-ops.h +35 -0
  150. package/cpp/ggml/src/ggml-cpu/vec.cpp +629 -0
  151. package/cpp/ggml/src/ggml-cpu/vec.h +1585 -0
  152. package/cpp/ggml/src/ggml-hexagon/CMakeLists.txt +117 -0
  153. package/cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp +3232 -0
  154. package/cpp/ggml/src/ggml-hexagon/htp/CMakeLists.txt +45 -0
  155. package/cpp/ggml/src/ggml-hexagon/htp/act-ops.c +815 -0
  156. package/cpp/ggml/src/ggml-hexagon/htp/argsort-ops.c +281 -0
  157. package/cpp/ggml/src/ggml-hexagon/htp/binary-ops.c +827 -0
  158. package/cpp/ggml/src/ggml-hexagon/htp/cmake-toolchain.cmake +157 -0
  159. package/cpp/ggml/src/ggml-hexagon/htp/cpy-ops.c +251 -0
  160. package/cpp/ggml/src/ggml-hexagon/htp/flash-attn-ops.c +666 -0
  161. package/cpp/ggml/src/ggml-hexagon/htp/get-rows-ops.c +111 -0
  162. package/cpp/ggml/src/ggml-hexagon/htp/hex-dma.c +63 -0
  163. package/cpp/ggml/src/ggml-hexagon/htp/hex-dma.h +182 -0
  164. package/cpp/ggml/src/ggml-hexagon/htp/hex-dump.h +77 -0
  165. package/cpp/ggml/src/ggml-hexagon/htp/hex-fastdiv.h +37 -0
  166. package/cpp/ggml/src/ggml-hexagon/htp/hex-utils.h +51 -0
  167. package/cpp/ggml/src/ggml-hexagon/htp/htp-ctx.h +35 -0
  168. package/cpp/ggml/src/ggml-hexagon/htp/htp-msg.h +154 -0
  169. package/cpp/ggml/src/ggml-hexagon/htp/htp-ops.h +65 -0
  170. package/cpp/ggml/src/ggml-hexagon/htp/htp_iface.idl +16 -0
  171. package/cpp/ggml/src/ggml-hexagon/htp/hvx-arith.h +470 -0
  172. package/cpp/ggml/src/ggml-hexagon/htp/hvx-base.h +173 -0
  173. package/cpp/ggml/src/ggml-hexagon/htp/hvx-copy.h +245 -0
  174. package/cpp/ggml/src/ggml-hexagon/htp/hvx-div.h +116 -0
  175. package/cpp/ggml/src/ggml-hexagon/htp/hvx-dump.h +129 -0
  176. package/cpp/ggml/src/ggml-hexagon/htp/hvx-exp.h +215 -0
  177. package/cpp/ggml/src/ggml-hexagon/htp/hvx-floor.h +100 -0
  178. package/cpp/ggml/src/ggml-hexagon/htp/hvx-inverse.h +176 -0
  179. package/cpp/ggml/src/ggml-hexagon/htp/hvx-reduce.h +266 -0
  180. package/cpp/ggml/src/ggml-hexagon/htp/hvx-scale.h +133 -0
  181. package/cpp/ggml/src/ggml-hexagon/htp/hvx-sigmoid.h +141 -0
  182. package/cpp/ggml/src/ggml-hexagon/htp/hvx-sqrt.h +126 -0
  183. package/cpp/ggml/src/ggml-hexagon/htp/hvx-types.h +36 -0
  184. package/cpp/ggml/src/ggml-hexagon/htp/hvx-utils.h +18 -0
  185. package/cpp/ggml/src/ggml-hexagon/htp/main.c +1150 -0
  186. package/cpp/ggml/src/ggml-hexagon/htp/matmul-ops.c +2595 -0
  187. package/cpp/ggml/src/ggml-hexagon/htp/rope-ops.c +498 -0
  188. package/cpp/ggml/src/ggml-hexagon/htp/set-rows-ops.c +167 -0
  189. package/cpp/ggml/src/ggml-hexagon/htp/softmax-ops.c +421 -0
  190. package/cpp/ggml/src/ggml-hexagon/htp/sum-rows-ops.c +130 -0
  191. package/cpp/ggml/src/ggml-hexagon/htp/unary-ops.c +384 -0
  192. package/cpp/ggml/src/ggml-hexagon/htp/worker-pool.c +293 -0
  193. package/cpp/ggml/src/ggml-hexagon/htp/worker-pool.h +57 -0
  194. package/cpp/ggml/src/ggml-hexagon/htp-drv.cpp +418 -0
  195. package/cpp/ggml/src/ggml-hexagon/htp-drv.h +121 -0
  196. package/cpp/ggml/src/ggml-hexagon/libdl.h +79 -0
  197. package/cpp/ggml/src/ggml-hexagon/libggml-htp.inf +38 -0
  198. package/cpp/ggml/src/ggml-hexagon/op-desc.h +153 -0
  199. package/cpp/ggml/src/ggml-impl.h +724 -0
  200. package/cpp/ggml/src/ggml-metal/CMakeLists.txt +124 -0
  201. package/cpp/ggml/src/ggml-metal/ggml-metal-common.cpp +457 -0
  202. package/cpp/ggml/src/ggml-metal/ggml-metal-common.h +52 -0
  203. package/cpp/ggml/src/ggml-metal/ggml-metal-context.h +41 -0
  204. package/cpp/ggml/src/ggml-metal/ggml-metal-context.m +702 -0
  205. package/cpp/ggml/src/ggml-metal/ggml-metal-device.cpp +1890 -0
  206. package/cpp/ggml/src/ggml-metal/ggml-metal-device.h +290 -0
  207. package/cpp/ggml/src/ggml-metal/ggml-metal-device.m +1749 -0
  208. package/cpp/ggml/src/ggml-metal/ggml-metal-impl.h +1054 -0
  209. package/cpp/ggml/src/ggml-metal/ggml-metal-ops.cpp +4370 -0
  210. package/cpp/ggml/src/ggml-metal/ggml-metal-ops.h +94 -0
  211. package/cpp/ggml/src/ggml-metal/ggml-metal.cpp +937 -0
  212. package/cpp/ggml/src/ggml-metal/ggml-metal.metal +9819 -0
  213. package/cpp/ggml/src/ggml-musa/CMakeLists.txt +125 -0
  214. package/cpp/ggml/src/ggml-musa/mudnn.cu +112 -0
  215. package/cpp/ggml/src/ggml-musa/mudnn.cuh +12 -0
  216. package/cpp/ggml/src/ggml-opencl/CMakeLists.txt +150 -0
  217. package/cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +11553 -0
  218. package/cpp/ggml/src/ggml-opencl/kernels/add.cl +190 -0
  219. package/cpp/ggml/src/ggml-opencl/kernels/add_id.cl +42 -0
  220. package/cpp/ggml/src/ggml-opencl/kernels/argsort.cl +86 -0
  221. package/cpp/ggml/src/ggml-opencl/kernels/clamp.cl +20 -0
  222. package/cpp/ggml/src/ggml-opencl/kernels/concat.cl +51 -0
  223. package/cpp/ggml/src/ggml-opencl/kernels/conv2d.cl +185 -0
  224. package/cpp/ggml/src/ggml-opencl/kernels/conv2d_f16_f32.cl +176 -0
  225. package/cpp/ggml/src/ggml-opencl/kernels/cpy.cl +184 -0
  226. package/cpp/ggml/src/ggml-opencl/kernels/cvt.cl +417 -0
  227. package/cpp/ggml/src/ggml-opencl/kernels/diag_mask_inf.cl +58 -0
  228. package/cpp/ggml/src/ggml-opencl/kernels/div.cl +138 -0
  229. package/cpp/ggml/src/ggml-opencl/kernels/embed_kernel.py +26 -0
  230. package/cpp/ggml/src/ggml-opencl/kernels/expm1.cl +113 -0
  231. package/cpp/ggml/src/ggml-opencl/kernels/fill.cl +17 -0
  232. package/cpp/ggml/src/ggml-opencl/kernels/flash_attn_f16.cl +370 -0
  233. package/cpp/ggml/src/ggml-opencl/kernels/flash_attn_f32.cl +371 -0
  234. package/cpp/ggml/src/ggml-opencl/kernels/flash_attn_f32_f16.cl +373 -0
  235. package/cpp/ggml/src/ggml-opencl/kernels/gelu.cl +89 -0
  236. package/cpp/ggml/src/ggml-opencl/kernels/gemm_moe_mxfp4_f32.cl +162 -0
  237. package/cpp/ggml/src/ggml-opencl/kernels/gemv_moe_mxfp4_f32.cl +156 -0
  238. package/cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle.cl +268 -0
  239. package/cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general.cl +274 -0
  240. package/cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general_q8_0_f32.cl +195 -0
  241. package/cpp/ggml/src/ggml-opencl/kernels/get_rows.cl +187 -0
  242. package/cpp/ggml/src/ggml-opencl/kernels/glu.cl +378 -0
  243. package/cpp/ggml/src/ggml-opencl/kernels/group_norm.cl +121 -0
  244. package/cpp/ggml/src/ggml-opencl/kernels/im2col_f16.cl +57 -0
  245. package/cpp/ggml/src/ggml-opencl/kernels/im2col_f32.cl +57 -0
  246. package/cpp/ggml/src/ggml-opencl/kernels/mean.cl +140 -0
  247. package/cpp/ggml/src/ggml-opencl/kernels/mul.cl +152 -0
  248. package/cpp/ggml/src/ggml-opencl/kernels/mul_mat_Ab_Bi_8x4.cl +139 -0
  249. package/cpp/ggml/src/ggml-opencl/kernels/mul_mat_f16_f32.cl +130 -0
  250. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_kq_kqv.cl +273 -0
  251. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_l4_lm.cl +146 -0
  252. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_f32_f32_l4_lm.cl +147 -0
  253. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q4_0_f32_l4_lm.cl +163 -0
  254. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q4_1_f32_l4_lm.cl +165 -0
  255. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q6_k_f32_l4_lm.cl +158 -0
  256. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_8x4.cl +129 -0
  257. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_l4_lm.cl +154 -0
  258. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f16.cl +118 -0
  259. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32.cl +118 -0
  260. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_1row.cl +94 -0
  261. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_l4.cl +84 -0
  262. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f32_f32.cl +118 -0
  263. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32.cl +189 -0
  264. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32_flat.cl +176 -0
  265. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q4_0_f32_8x_flat.cl +283 -0
  266. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32.cl +140 -0
  267. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32_flat.cl +222 -0
  268. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32.cl +144 -0
  269. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32_flat.cl +167 -0
  270. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32.cl +192 -0
  271. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_16x_flat.cl +307 -0
  272. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_8x_flat.cl +265 -0
  273. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_8x_flat.cl +272 -0
  274. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_v.cl +254 -0
  275. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32.cl +219 -0
  276. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32_flat.cl +229 -0
  277. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_k_f32.cl +180 -0
  278. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q6_k_f32.cl +194 -0
  279. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q6_k_f32_flat.cl +194 -0
  280. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32.cl +125 -0
  281. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32_flat.cl +202 -0
  282. package/cpp/ggml/src/ggml-opencl/kernels/norm.cl +161 -0
  283. package/cpp/ggml/src/ggml-opencl/kernels/pad.cl +39 -0
  284. package/cpp/ggml/src/ggml-opencl/kernels/relu.cl +16 -0
  285. package/cpp/ggml/src/ggml-opencl/kernels/repeat.cl +38 -0
  286. package/cpp/ggml/src/ggml-opencl/kernels/rms_norm.cl +190 -0
  287. package/cpp/ggml/src/ggml-opencl/kernels/rope.cl +747 -0
  288. package/cpp/ggml/src/ggml-opencl/kernels/scale.cl +27 -0
  289. package/cpp/ggml/src/ggml-opencl/kernels/set_rows.cl +208 -0
  290. package/cpp/ggml/src/ggml-opencl/kernels/sigmoid.cl +29 -0
  291. package/cpp/ggml/src/ggml-opencl/kernels/silu.cl +30 -0
  292. package/cpp/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +108 -0
  293. package/cpp/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +108 -0
  294. package/cpp/ggml/src/ggml-opencl/kernels/softmax_f16.cl +107 -0
  295. package/cpp/ggml/src/ggml-opencl/kernels/softmax_f32.cl +107 -0
  296. package/cpp/ggml/src/ggml-opencl/kernels/softplus.cl +116 -0
  297. package/cpp/ggml/src/ggml-opencl/kernels/solve_tri.cl +51 -0
  298. package/cpp/ggml/src/ggml-opencl/kernels/sqr.cl +53 -0
  299. package/cpp/ggml/src/ggml-opencl/kernels/sqrt.cl +53 -0
  300. package/cpp/ggml/src/ggml-opencl/kernels/ssm_conv.cl +77 -0
  301. package/cpp/ggml/src/ggml-opencl/kernels/sub.cl +138 -0
  302. package/cpp/ggml/src/ggml-opencl/kernels/sum_rows.cl +140 -0
  303. package/cpp/ggml/src/ggml-opencl/kernels/tanh.cl +109 -0
  304. package/cpp/ggml/src/ggml-opencl/kernels/transpose.cl +117 -0
  305. package/cpp/ggml/src/ggml-opencl/kernels/tri.cl +32 -0
  306. package/cpp/ggml/src/ggml-opencl/kernels/tsembd.cl +48 -0
  307. package/cpp/ggml/src/ggml-opencl/kernels/upscale.cl +120 -0
  308. package/cpp/ggml/src/ggml-opt.cpp +1093 -0
  309. package/cpp/ggml/src/ggml-quants.c +5325 -0
  310. package/cpp/ggml/src/ggml-quants.h +106 -0
  311. package/cpp/ggml/src/ggml-rpc/CMakeLists.txt +9 -0
  312. package/cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +2118 -0
  313. package/cpp/ggml/src/ggml-threading.cpp +12 -0
  314. package/cpp/ggml/src/ggml-threading.h +14 -0
  315. package/cpp/ggml/src/ggml-virtgpu/CMakeLists.txt +70 -0
  316. package/cpp/ggml/src/ggml-virtgpu/apir_cs_ggml-rpc-front.cpp +87 -0
  317. package/cpp/ggml/src/ggml-virtgpu/backend/CMakeLists.txt +21 -0
  318. package/cpp/ggml/src/ggml-virtgpu/backend/apir_cs_ggml-rpc-back.cpp +115 -0
  319. package/cpp/ggml/src/ggml-virtgpu/backend/backend-convert.h +13 -0
  320. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-backend.cpp +102 -0
  321. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer-type.cpp +105 -0
  322. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer.cpp +179 -0
  323. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-device.cpp +148 -0
  324. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched.cpp +51 -0
  325. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched.gen.h +73 -0
  326. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched.h +27 -0
  327. package/cpp/ggml/src/ggml-virtgpu/backend/backend-virgl-apir.h +32 -0
  328. package/cpp/ggml/src/ggml-virtgpu/backend/backend.cpp +144 -0
  329. package/cpp/ggml/src/ggml-virtgpu/backend/shared/api_remoting.h +95 -0
  330. package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_backend.gen.h +94 -0
  331. package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_backend.h +50 -0
  332. package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_cs.h +378 -0
  333. package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h +232 -0
  334. package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_cs_rpc.h +58 -0
  335. package/cpp/ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp +81 -0
  336. package/cpp/ggml/src/ggml-virtgpu/ggml-backend-buffer.cpp +119 -0
  337. package/cpp/ggml/src/ggml-virtgpu/ggml-backend-device.cpp +158 -0
  338. package/cpp/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp +213 -0
  339. package/cpp/ggml/src/ggml-virtgpu/ggml-backend.cpp +69 -0
  340. package/cpp/ggml/src/ggml-virtgpu/ggml-remoting.h +71 -0
  341. package/cpp/ggml/src/ggml-virtgpu/ggmlremoting_functions.yaml +166 -0
  342. package/cpp/ggml/src/ggml-virtgpu/include/apir_hw.h +9 -0
  343. package/cpp/ggml/src/ggml-virtgpu/regenerate_remoting.py +333 -0
  344. package/cpp/ggml/src/ggml-virtgpu/virtgpu-apir.h +15 -0
  345. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-backend.cpp +58 -0
  346. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp +110 -0
  347. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-buffer.cpp +173 -0
  348. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-device.cpp +192 -0
  349. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-impl.h +36 -0
  350. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward.gen.h +53 -0
  351. package/cpp/ggml/src/ggml-virtgpu/virtgpu-shm.cpp +98 -0
  352. package/cpp/ggml/src/ggml-virtgpu/virtgpu-shm.h +23 -0
  353. package/cpp/ggml/src/ggml-virtgpu/virtgpu-utils.cpp +179 -0
  354. package/cpp/ggml/src/ggml-virtgpu/virtgpu-utils.h +86 -0
  355. package/cpp/ggml/src/ggml-virtgpu/virtgpu.cpp +544 -0
  356. package/cpp/ggml/src/ggml-virtgpu/virtgpu.h +117 -0
  357. package/cpp/ggml/src/ggml-webgpu/CMakeLists.txt +80 -0
  358. package/cpp/ggml/src/ggml-webgpu/ggml-webgpu-shader-lib.hpp +1231 -0
  359. package/cpp/ggml/src/ggml-webgpu/ggml-webgpu.cpp +3150 -0
  360. package/cpp/ggml/src/ggml-webgpu/pre_wgsl.hpp +778 -0
  361. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/argmax.wgsl +72 -0
  362. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/argsort.wgsl +106 -0
  363. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/argsort_merge.wgsl +134 -0
  364. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/binary.wgsl +107 -0
  365. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/common_decls.tmpl +923 -0
  366. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/cpy.tmpl.wgsl +107 -0
  367. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/cumsum.wgsl +66 -0
  368. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +182 -0
  369. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn.wgsl +636 -0
  370. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/get_rows.wgsl +668 -0
  371. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/glu.tmpl.wgsl +323 -0
  372. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/memset.wgsl +40 -0
  373. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.wgsl +713 -0
  374. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_decls.tmpl +103 -0
  375. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_reg_tile.wgsl +138 -0
  376. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_subgroup_matrix.wgsl +188 -0
  377. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec.wgsl +194 -0
  378. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/pad.wgsl +86 -0
  379. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm.wgsl +123 -0
  380. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/rope.tmpl.wgsl +295 -0
  381. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/scale.wgsl +63 -0
  382. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.wgsl +109 -0
  383. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/soft_max.tmpl.wgsl +345 -0
  384. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/sum_rows.wgsl +55 -0
  385. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/unary.wgsl +193 -0
  386. package/cpp/ggml/src/ggml-zdnn/CMakeLists.txt +36 -0
  387. package/cpp/ggml/src/ggml-zdnn/common.hpp +59 -0
  388. package/cpp/ggml/src/ggml-zdnn/ggml-zdnn.cpp +633 -0
  389. package/cpp/ggml/src/ggml-zdnn/mmf.cpp +80 -0
  390. package/cpp/ggml/src/ggml-zdnn/mmf.hpp +12 -0
  391. package/cpp/ggml/src/ggml-zdnn/utils.cpp +79 -0
  392. package/cpp/ggml/src/ggml-zdnn/utils.hpp +19 -0
  393. package/cpp/ggml/src/ggml-zendnn/CMakeLists.txt +92 -0
  394. package/cpp/ggml/src/ggml-zendnn/ggml-zendnn.cpp +469 -0
  395. package/cpp/ggml/src/ggml.c +7669 -0
  396. package/cpp/ggml/src/ggml.cpp +26 -0
  397. package/cpp/ggml/src/gguf.cpp +1699 -0
  398. package/cpp/include/llama-cpp.h +32 -0
  399. package/cpp/include/llama.h +1568 -0
  400. package/cpp/mtmd/CMakeLists.txt +98 -0
  401. package/cpp/mtmd/README.md +63 -0
  402. package/cpp/mtmd/clip-graph.h +117 -0
  403. package/cpp/mtmd/clip-impl.h +586 -0
  404. package/cpp/mtmd/clip-model.h +390 -0
  405. package/cpp/mtmd/clip.cpp +4154 -0
  406. package/cpp/mtmd/clip.h +121 -0
  407. package/cpp/mtmd/deprecation-warning.cpp +22 -0
  408. package/cpp/mtmd/legacy-models/convert_image_encoder_to_gguf.py +412 -0
  409. package/cpp/mtmd/legacy-models/glmedge-convert-image-encoder-to-gguf.py +280 -0
  410. package/cpp/mtmd/legacy-models/glmedge-surgery.py +33 -0
  411. package/cpp/mtmd/legacy-models/llava_surgery.py +38 -0
  412. package/cpp/mtmd/legacy-models/llava_surgery_v2.py +180 -0
  413. package/cpp/mtmd/legacy-models/minicpmv-convert-image-encoder-to-gguf.py +892 -0
  414. package/cpp/mtmd/legacy-models/minicpmv-surgery.py +47 -0
  415. package/cpp/mtmd/models/cogvlm.cpp +98 -0
  416. package/cpp/mtmd/models/conformer.cpp +216 -0
  417. package/cpp/mtmd/models/glm4v.cpp +122 -0
  418. package/cpp/mtmd/models/internvl.cpp +69 -0
  419. package/cpp/mtmd/models/kimik25.cpp +101 -0
  420. package/cpp/mtmd/models/kimivl.cpp +63 -0
  421. package/cpp/mtmd/models/llama4.cpp +96 -0
  422. package/cpp/mtmd/models/llava.cpp +374 -0
  423. package/cpp/mtmd/models/minicpmv.cpp +114 -0
  424. package/cpp/mtmd/models/mobilenetv5.cpp +451 -0
  425. package/cpp/mtmd/models/models.h +128 -0
  426. package/cpp/mtmd/models/nemotron-v2-vl.cpp +35 -0
  427. package/cpp/mtmd/models/paddleocr.cpp +52 -0
  428. package/cpp/mtmd/models/pixtral.cpp +86 -0
  429. package/cpp/mtmd/models/qwen2vl.cpp +183 -0
  430. package/cpp/mtmd/models/qwen3vl.cpp +193 -0
  431. package/cpp/mtmd/models/siglip.cpp +86 -0
  432. package/cpp/mtmd/models/whisper-enc.cpp +115 -0
  433. package/cpp/mtmd/models/youtuvl.cpp +179 -0
  434. package/cpp/mtmd/mtmd-audio.cpp +730 -0
  435. package/cpp/mtmd/mtmd-audio.h +113 -0
  436. package/cpp/mtmd/mtmd-cli.cpp +437 -0
  437. package/cpp/mtmd/mtmd-helper.cpp +521 -0
  438. package/cpp/mtmd/mtmd-helper.h +96 -0
  439. package/cpp/mtmd/mtmd.cpp +1156 -0
  440. package/cpp/mtmd/mtmd.h +319 -0
  441. package/cpp/mtmd/requirements.txt +5 -0
  442. package/cpp/mtmd/test-1.jpeg +0 -0
  443. package/cpp/mtmd/test-2.mp3 +0 -0
  444. package/cpp/mtmd/tests.sh +192 -0
  445. package/cpp/src/CMakeLists.txt +169 -0
  446. package/cpp/src/llama-adapter.cpp +488 -0
  447. package/cpp/src/llama-adapter.h +89 -0
  448. package/cpp/src/llama-arch.cpp +2855 -0
  449. package/cpp/src/llama-arch.h +619 -0
  450. package/cpp/src/llama-batch.cpp +917 -0
  451. package/cpp/src/llama-batch.h +173 -0
  452. package/cpp/src/llama-chat.cpp +896 -0
  453. package/cpp/src/llama-chat.h +71 -0
  454. package/cpp/src/llama-context.cpp +3512 -0
  455. package/cpp/src/llama-context.h +359 -0
  456. package/cpp/src/llama-cparams.cpp +5 -0
  457. package/cpp/src/llama-cparams.h +44 -0
  458. package/cpp/src/llama-grammar.cpp +1464 -0
  459. package/cpp/src/llama-grammar.h +194 -0
  460. package/cpp/src/llama-graph.cpp +2685 -0
  461. package/cpp/src/llama-graph.h +1026 -0
  462. package/cpp/src/llama-hparams.cpp +234 -0
  463. package/cpp/src/llama-hparams.h +339 -0
  464. package/cpp/src/llama-impl.cpp +171 -0
  465. package/cpp/src/llama-impl.h +73 -0
  466. package/cpp/src/llama-io.cpp +15 -0
  467. package/cpp/src/llama-io.h +35 -0
  468. package/cpp/src/llama-kv-cache-iswa.cpp +330 -0
  469. package/cpp/src/llama-kv-cache-iswa.h +137 -0
  470. package/cpp/src/llama-kv-cache.cpp +2271 -0
  471. package/cpp/src/llama-kv-cache.h +388 -0
  472. package/cpp/src/llama-kv-cells.h +533 -0
  473. package/cpp/src/llama-memory-hybrid-iswa.cpp +275 -0
  474. package/cpp/src/llama-memory-hybrid-iswa.h +140 -0
  475. package/cpp/src/llama-memory-hybrid.cpp +268 -0
  476. package/cpp/src/llama-memory-hybrid.h +139 -0
  477. package/cpp/src/llama-memory-recurrent.cpp +1165 -0
  478. package/cpp/src/llama-memory-recurrent.h +182 -0
  479. package/cpp/src/llama-memory.cpp +59 -0
  480. package/cpp/src/llama-memory.h +122 -0
  481. package/cpp/src/llama-mmap.cpp +785 -0
  482. package/cpp/src/llama-mmap.h +92 -0
  483. package/cpp/src/llama-model-loader.cpp +1414 -0
  484. package/cpp/src/llama-model-loader.h +203 -0
  485. package/cpp/src/llama-model-saver.cpp +286 -0
  486. package/cpp/src/llama-model-saver.h +37 -0
  487. package/cpp/src/llama-model.cpp +9253 -0
  488. package/cpp/src/llama-model.h +576 -0
  489. package/cpp/src/llama-quant.cpp +1119 -0
  490. package/cpp/src/llama-quant.h +1 -0
  491. package/cpp/src/llama-sampler.cpp +3885 -0
  492. package/cpp/src/llama-sampler.h +42 -0
  493. package/cpp/src/llama-vocab.cpp +3970 -0
  494. package/cpp/src/llama-vocab.h +187 -0
  495. package/cpp/src/llama.cpp +1313 -0
  496. package/cpp/src/models/afmoe.cpp +191 -0
  497. package/cpp/src/models/apertus.cpp +125 -0
  498. package/cpp/src/models/arcee.cpp +135 -0
  499. package/cpp/src/models/arctic.cpp +138 -0
  500. package/cpp/src/models/arwkv7.cpp +86 -0
  501. package/cpp/src/models/baichuan.cpp +122 -0
  502. package/cpp/src/models/bailingmoe.cpp +144 -0
  503. package/cpp/src/models/bailingmoe2.cpp +135 -0
  504. package/cpp/src/models/bert.cpp +178 -0
  505. package/cpp/src/models/bitnet.cpp +160 -0
  506. package/cpp/src/models/bloom.cpp +101 -0
  507. package/cpp/src/models/chameleon.cpp +178 -0
  508. package/cpp/src/models/chatglm.cpp +132 -0
  509. package/cpp/src/models/codeshell.cpp +111 -0
  510. package/cpp/src/models/cogvlm.cpp +102 -0
  511. package/cpp/src/models/cohere2-iswa.cpp +134 -0
  512. package/cpp/src/models/command-r.cpp +122 -0
  513. package/cpp/src/models/dbrx.cpp +123 -0
  514. package/cpp/src/models/deci.cpp +135 -0
  515. package/cpp/src/models/deepseek.cpp +144 -0
  516. package/cpp/src/models/deepseek2.cpp +262 -0
  517. package/cpp/src/models/delta-net-base.cpp +376 -0
  518. package/cpp/src/models/dots1.cpp +134 -0
  519. package/cpp/src/models/dream.cpp +105 -0
  520. package/cpp/src/models/ernie4-5-moe.cpp +150 -0
  521. package/cpp/src/models/ernie4-5.cpp +110 -0
  522. package/cpp/src/models/eurobert.cpp +97 -0
  523. package/cpp/src/models/exaone-moe.cpp +146 -0
  524. package/cpp/src/models/exaone.cpp +114 -0
  525. package/cpp/src/models/exaone4.cpp +123 -0
  526. package/cpp/src/models/falcon-h1.cpp +111 -0
  527. package/cpp/src/models/falcon.cpp +120 -0
  528. package/cpp/src/models/gemma-embedding.cpp +116 -0
  529. package/cpp/src/models/gemma.cpp +112 -0
  530. package/cpp/src/models/gemma2-iswa.cpp +128 -0
  531. package/cpp/src/models/gemma3.cpp +155 -0
  532. package/cpp/src/models/gemma3n-iswa.cpp +384 -0
  533. package/cpp/src/models/glm4-moe.cpp +170 -0
  534. package/cpp/src/models/glm4.cpp +157 -0
  535. package/cpp/src/models/gpt2.cpp +105 -0
  536. package/cpp/src/models/gptneox.cpp +144 -0
  537. package/cpp/src/models/granite-hybrid.cpp +196 -0
  538. package/cpp/src/models/granite.cpp +211 -0
  539. package/cpp/src/models/grok.cpp +159 -0
  540. package/cpp/src/models/grovemoe.cpp +141 -0
  541. package/cpp/src/models/hunyuan-dense.cpp +132 -0
  542. package/cpp/src/models/hunyuan-moe.cpp +154 -0
  543. package/cpp/src/models/internlm2.cpp +120 -0
  544. package/cpp/src/models/jais.cpp +86 -0
  545. package/cpp/src/models/jais2.cpp +123 -0
  546. package/cpp/src/models/jamba.cpp +106 -0
  547. package/cpp/src/models/kimi-linear.cpp +392 -0
  548. package/cpp/src/models/lfm2.cpp +190 -0
  549. package/cpp/src/models/llada-moe.cpp +122 -0
  550. package/cpp/src/models/llada.cpp +99 -0
  551. package/cpp/src/models/llama-iswa.cpp +178 -0
  552. package/cpp/src/models/llama.cpp +168 -0
  553. package/cpp/src/models/maincoder.cpp +117 -0
  554. package/cpp/src/models/mamba-base.cpp +285 -0
  555. package/cpp/src/models/mamba.cpp +54 -0
  556. package/cpp/src/models/mimo2-iswa.cpp +123 -0
  557. package/cpp/src/models/minicpm3.cpp +200 -0
  558. package/cpp/src/models/minimax-m2.cpp +124 -0
  559. package/cpp/src/models/mistral3.cpp +160 -0
  560. package/cpp/src/models/models.h +684 -0
  561. package/cpp/src/models/modern-bert.cpp +109 -0
  562. package/cpp/src/models/mpt.cpp +126 -0
  563. package/cpp/src/models/nemotron-h.cpp +148 -0
  564. package/cpp/src/models/nemotron.cpp +122 -0
  565. package/cpp/src/models/neo-bert.cpp +104 -0
  566. package/cpp/src/models/olmo.cpp +121 -0
  567. package/cpp/src/models/olmo2.cpp +150 -0
  568. package/cpp/src/models/olmoe.cpp +124 -0
  569. package/cpp/src/models/openai-moe-iswa.cpp +127 -0
  570. package/cpp/src/models/openelm.cpp +124 -0
  571. package/cpp/src/models/orion.cpp +123 -0
  572. package/cpp/src/models/paddleocr.cpp +122 -0
  573. package/cpp/src/models/pangu-embedded.cpp +121 -0
  574. package/cpp/src/models/phi2.cpp +121 -0
  575. package/cpp/src/models/phi3.cpp +152 -0
  576. package/cpp/src/models/plamo.cpp +110 -0
  577. package/cpp/src/models/plamo2.cpp +318 -0
  578. package/cpp/src/models/plamo3.cpp +128 -0
  579. package/cpp/src/models/plm.cpp +169 -0
  580. package/cpp/src/models/qwen.cpp +108 -0
  581. package/cpp/src/models/qwen2.cpp +126 -0
  582. package/cpp/src/models/qwen2moe.cpp +151 -0
  583. package/cpp/src/models/qwen2vl.cpp +117 -0
  584. package/cpp/src/models/qwen3.cpp +117 -0
  585. package/cpp/src/models/qwen35.cpp +386 -0
  586. package/cpp/src/models/qwen35moe.cpp +420 -0
  587. package/cpp/src/models/qwen3moe.cpp +124 -0
  588. package/cpp/src/models/qwen3next.cpp +525 -0
  589. package/cpp/src/models/qwen3vl-moe.cpp +140 -0
  590. package/cpp/src/models/qwen3vl.cpp +132 -0
  591. package/cpp/src/models/refact.cpp +94 -0
  592. package/cpp/src/models/rnd1.cpp +126 -0
  593. package/cpp/src/models/rwkv6-base.cpp +164 -0
  594. package/cpp/src/models/rwkv6.cpp +94 -0
  595. package/cpp/src/models/rwkv6qwen2.cpp +86 -0
  596. package/cpp/src/models/rwkv7-base.cpp +137 -0
  597. package/cpp/src/models/rwkv7.cpp +90 -0
  598. package/cpp/src/models/seed-oss.cpp +124 -0
  599. package/cpp/src/models/smallthinker.cpp +126 -0
  600. package/cpp/src/models/smollm3.cpp +128 -0
  601. package/cpp/src/models/stablelm.cpp +146 -0
  602. package/cpp/src/models/starcoder.cpp +100 -0
  603. package/cpp/src/models/starcoder2.cpp +121 -0
  604. package/cpp/src/models/step35-iswa.cpp +168 -0
  605. package/cpp/src/models/t5-dec.cpp +166 -0
  606. package/cpp/src/models/t5-enc.cpp +96 -0
  607. package/cpp/src/models/wavtokenizer-dec.cpp +149 -0
  608. package/cpp/src/models/xverse.cpp +108 -0
  609. package/cpp/src/unicode-data.cpp +7034 -0
  610. package/cpp/src/unicode-data.h +20 -0
  611. package/cpp/src/unicode.cpp +1103 -0
  612. package/cpp/src/unicode.h +111 -0
  613. package/cpp/vendor/nlohmann/json.hpp +25526 -0
  614. package/cpp/vendor/nlohmann/json_fwd.hpp +187 -0
  615. package/cpp/vendor/stb/stb_image.h +7988 -0
  616. package/ios/LocalLLM-Bridging-Header.h +2 -0
  617. package/ios/LocalLLM.h +5 -0
  618. package/ios/LocalLLM.mm +1267 -0
  619. package/local-llm-rn.podspec +60 -0
  620. package/package.json +35 -0
  621. package/src/NativeLocalLLM.ts +73 -0
  622. package/src/device.ts +50 -0
  623. package/src/download-adapter.ts +17 -0
  624. package/src/index.ts +21 -0
  625. package/src/native-bridge.ts +142 -0
  626. package/src/rn-downloader.ts +37 -0
@@ -0,0 +1,1150 @@
1
+ #pragma clang diagnostic ignored "-Wgnu-zero-variadic-macro-arguments"
2
+ #pragma clang diagnostic ignored "-Wunused-function"
3
+
4
+ #include <HAP_farf.h>
5
+ #include <HAP_perf.h>
6
+ #include <AEEStdErr.h>
7
+ #include <dspqueue.h>
8
+ #include <HAP_compute_res.h>
9
+ #include <HAP_etm_config.h>
10
+ #include <HAP_mem.h>
11
+ #include <HAP_power.h>
12
+ #include <HAP_ps.h>
13
+ #include <qurt.h>
14
+ #include <qurt_thread.h>
15
+ #include <remote.h>
16
+ #include <string.h>
17
+
18
+ #include "hex-dma.h"
19
+ #include "hex-utils.h"
20
+
21
+ #define GGML_COMMON_DECL_C
22
+ #include "ggml-common.h"
23
+ #include "htp-ctx.h"
24
+ #include "htp-msg.h"
25
+ #include "htp-ops.h"
26
+ #include "worker-pool.h"
27
+
28
+ AEEResult htp_iface_open(const char * uri, remote_handle64 * handle) {
29
+ struct htp_context * ctx;
30
+ int err = 0;
31
+
32
+ ctx = calloc(1, sizeof(*ctx));
33
+ if (ctx == NULL) {
34
+ return AEE_ENOMEMORY;
35
+ }
36
+
37
+ // Use the context structure as a handle
38
+ *handle = (remote_handle64) ctx;
39
+
40
+ // Enable FARF logs
41
+ HAP_setFARFRuntimeLoggingParams(0xffff, NULL, 0);
42
+
43
+ // Set client class
44
+ {
45
+ HAP_power_request_t request;
46
+ memset(&request, 0, sizeof(HAP_power_request_t));
47
+ request.type = HAP_power_set_apptype;
48
+ request.apptype = HAP_POWER_COMPUTE_CLIENT_CLASS;
49
+
50
+ if ((err = HAP_power_set((void *) ctx, &request)) != 0) {
51
+ return err;
52
+ }
53
+ }
54
+
55
+ {
56
+ HAP_power_request_t request;
57
+ memset(&request, 0, sizeof(request));
58
+
59
+ request.type = HAP_power_set_DCVS_v3;
60
+ request.dcvs_v3.set_dcvs_enable = TRUE;
61
+ request.dcvs_v3.dcvs_enable = TRUE;
62
+ request.dcvs_v3.dcvs_option = HAP_DCVS_V2_PERFORMANCE_MODE;
63
+ request.dcvs_v3.set_bus_params = TRUE;
64
+ request.dcvs_v3.bus_params.min_corner = HAP_DCVS_VCORNER_MAX;
65
+ request.dcvs_v3.bus_params.max_corner = HAP_DCVS_VCORNER_MAX;
66
+ request.dcvs_v3.bus_params.target_corner = HAP_DCVS_VCORNER_MAX;
67
+ request.dcvs_v3.set_core_params = TRUE;
68
+ request.dcvs_v3.core_params.min_corner = HAP_DCVS_VCORNER_MAX;
69
+ request.dcvs_v3.core_params.max_corner = HAP_DCVS_VCORNER_MAX;
70
+ request.dcvs_v3.core_params.target_corner = HAP_DCVS_VCORNER_MAX;
71
+ request.dcvs_v3.set_sleep_disable = TRUE;
72
+ request.dcvs_v3.sleep_disable = TRUE;
73
+ if ((err = HAP_power_set((void *) ctx, &request)) != 0) {
74
+ return err;
75
+ }
76
+
77
+ memset(&request, 0, sizeof(request));
78
+ request.type = HAP_power_set_HVX;
79
+ request.hvx.power_up = TRUE;
80
+ if ((err = HAP_power_set((void *) ctx, &request)) != 0) {
81
+ return err;
82
+ }
83
+ }
84
+
85
+ {
86
+ // Power on HMX
87
+ HAP_power_request_t request;
88
+ memset(&request, 0, sizeof(HAP_power_request_t));
89
+ request.type = HAP_power_set_HMX;
90
+ request.hmx.power_up = TRUE;
91
+ FARF(ALWAYS, "Powering HMX on\n");
92
+ err = HAP_power_set((void *) &ctx, &request);
93
+ if (err != AEE_SUCCESS) {
94
+ FARF(ERROR, "Error powering on HMX.");
95
+ return err;
96
+ }
97
+ }
98
+
99
+ return AEE_SUCCESS;
100
+ }
101
+
102
+ AEEResult htp_iface_close(remote_handle64 handle) {
103
+ struct htp_context * ctx = (struct htp_context *) handle;
104
+
105
+ if (!ctx) {
106
+ return AEE_EBADPARM;
107
+ }
108
+
109
+ if (ctx->queue) {
110
+ FARF(ERROR, "Closing handle with queue still open");
111
+ return AEE_EITEMBUSY;
112
+ }
113
+
114
+ free(ctx);
115
+ return AEE_SUCCESS;
116
+ }
117
+
118
+ AEEResult htp_iface_enable_etm(remote_handle64 handle) {
119
+ int err = HAP_user_etm_enable();
120
+ if (err) {
121
+ if (err == AEE_EVERSIONNOTSUPPORT) {
122
+ FARF(ERROR, "API HAP_user_etm_enable is not supported\n");
123
+ } else {
124
+ FARF(ERROR, "Error executing HAP_user_etm_enable with error code : 0x%x\n", err);
125
+ }
126
+ }
127
+ return err;
128
+ }
129
+
130
+ AEEResult htp_iface_disable_etm(remote_handle64 handle) {
131
+ int err = HAP_user_etm_disable();
132
+ if (err) {
133
+ if (err == AEE_EVERSIONNOTSUPPORT) {
134
+ FARF(ERROR, "API HAP_user_etm_disable is not supported\n");
135
+ } else {
136
+ FARF(ERROR, "Error executing HAP_user_etm_disable with error code : 0x%x\n", err);
137
+ }
138
+ }
139
+ return err;
140
+ }
141
+
142
+ static int vtcm_acquire(struct htp_context * ctx) {
143
+ int err;
144
+ if (!ctx->vtcm_valid) {
145
+ // Temporarily bump thread priority to make sure it's higher than other sessions.
146
+ // This way the resource manager will notify the other thread to release VTCM.
147
+ // Note that we need to reaquire VTCM at normal priority for this to work next time.
148
+ qurt_thread_set_priority(qurt_thread_get_id(), ctx->thread_prio - 10);
149
+ err = HAP_compute_res_acquire_cached(ctx->vtcm_rctx, 1000000);
150
+ if (err != 0) {
151
+ FARF(ERROR, "Failed to acquire VTCM: 0x%08x", (unsigned)err);
152
+ abort();
153
+ }
154
+ HAP_compute_res_release_cached(ctx->vtcm_rctx);
155
+ qurt_thread_set_priority(qurt_thread_get_id(), ctx->thread_prio);
156
+
157
+ err = HAP_compute_res_acquire_cached(ctx->vtcm_rctx, 1000000);
158
+ if (err != 0) {
159
+ FARF(ERROR, "Failed to acquire VTCM: 0x%08x", (unsigned)err);
160
+ abort();
161
+ }
162
+ ctx->vtcm_valid = true;
163
+ }
164
+
165
+ ctx->vtcm_inuse = true;
166
+ return 0;
167
+ }
168
+
169
+ static int vtcm_release(struct htp_context * ctx) {
170
+ ctx->vtcm_inuse = false;
171
+
172
+ if (ctx->vtcm_valid && ctx->vtcm_needs_release) {
173
+ ctx->vtcm_valid = false;
174
+ ctx->vtcm_needs_release = false;
175
+ HAP_compute_res_release_cached(ctx->vtcm_rctx);
176
+ }
177
+
178
+ return 0;
179
+ }
180
+
181
+ static int vtcm_release_callback(unsigned int rctx, void * state) {
182
+ struct htp_context * ctx = (struct htp_context *) state;
183
+
184
+ if (!ctx || ctx->vtcm_rctx != rctx) {
185
+ return AEE_EBADPARM;
186
+ }
187
+
188
+ // If VTCM is not inuse (not processing Ops) release it right here
189
+ // otherwise we'll release it once we're done with the current Op.
190
+
191
+ if (ctx->vtcm_inuse) {
192
+ ctx->vtcm_needs_release = true;
193
+ return 0;
194
+ }
195
+
196
+ ctx->vtcm_valid = false;
197
+ HAP_compute_res_release_cached(ctx->vtcm_rctx);
198
+
199
+ return 0;
200
+ }
201
+
202
+ static int vtcm_alloc(struct htp_context * ctx) {
203
+ unsigned int vtcm_size = 8 * 1024 * 1024; // 8MB default
204
+ HAP_compute_res_query_VTCM(0, &vtcm_size, NULL, NULL, NULL);
205
+
206
+ compute_res_attr_t attr;
207
+ HAP_compute_res_attr_init(&attr);
208
+ HAP_compute_res_attr_set_serialize(&attr, 0);
209
+ HAP_compute_res_attr_set_cache_mode(&attr, 1);
210
+ HAP_compute_res_attr_set_vtcm_param_v2(&attr, vtcm_size, 0, vtcm_size);
211
+ HAP_compute_res_attr_set_release_callback(&attr, vtcm_release_callback, (void *) ctx);
212
+ HAP_compute_res_attr_set_hmx_param(&attr, 1);
213
+
214
+ // Allocate VTCM for scratch pads
215
+ uint32_t rctx = HAP_compute_res_acquire(&attr, 1000000 /* timeout */);
216
+ if (!rctx) {
217
+ FARF(ERROR, "failed to allocate %zu bytes VTCM\n", ctx->vtcm_size);
218
+ return AEE_ENOMEMORY;
219
+ }
220
+
221
+ void * vtcm_ptr;
222
+ if (HAP_compute_res_attr_get_vtcm_ptr_v2(&attr, &vtcm_ptr, &vtcm_size) != 0) {
223
+ HAP_compute_res_release(rctx);
224
+ FARF(ERROR, "failed to allocate %zu bytes VTCM (new)\n", ctx->vtcm_size);
225
+ return AEE_ENOMEMORY;
226
+ }
227
+
228
+ ctx->vtcm_base = (uint8_t *) vtcm_ptr;
229
+ ctx->vtcm_size = vtcm_size;
230
+ ctx->vtcm_rctx = rctx;
231
+ ctx->vtcm_valid = false;
232
+ ctx->vtcm_inuse = false;
233
+ ctx->vtcm_needs_release = false;
234
+
235
+ return 0;
236
+ }
237
+
238
+ static void vtcm_free(struct htp_context * ctx) {
239
+ if (ctx->vtcm_rctx) {
240
+ HAP_compute_res_release(ctx->vtcm_rctx);
241
+ ctx->vtcm_base = 0;
242
+ ctx->vtcm_rctx = 0;
243
+ }
244
+ }
245
+
246
+ static void htp_packet_callback(dspqueue_t queue, int error, void * context);
247
+ static void htp_error_callback(dspqueue_t queue, int error, void * context);
248
+
249
+ AEEResult htp_iface_start(remote_handle64 handle, uint32 sess_id, uint64 dsp_queue_id, uint32 n_hvx) {
250
+ struct htp_context * ctx = (struct htp_context *) handle;
251
+
252
+ if (!ctx) {
253
+ return AEE_EBADPARM;
254
+ }
255
+
256
+ if (ctx->queue) {
257
+ FARF(ERROR, "Queue already open");
258
+ return AEE_EITEMBUSY;
259
+ }
260
+
261
+ // Import queue created on the CPU
262
+ int err = dspqueue_import(dsp_queue_id, // Queue ID from dspqueue_export
263
+ htp_packet_callback, // Packet callback
264
+ htp_error_callback, // Error callback; no errors expected on the DSP
265
+ (void *) ctx, // Callback context
266
+ &ctx->queue);
267
+
268
+ if (err) {
269
+ FARF(ERROR, "Queue import failed with 0x%08x", (unsigned) err);
270
+ return err;
271
+ }
272
+
273
+ ctx->thread_id = qurt_thread_get_id();
274
+ ctx->thread_prio = qurt_thread_get_priority(ctx->thread_id);
275
+
276
+ // allocate VTCM
277
+ err = vtcm_alloc(ctx);
278
+ if (err != AEE_SUCCESS) {
279
+ FARF(ERROR, "Unable to allocate VTCM");
280
+ return AEE_ENOMEMORY;
281
+ }
282
+
283
+ qurt_sysenv_max_hthreads_t hw_threads;
284
+ qurt_sysenv_get_max_hw_threads(&hw_threads);
285
+ uint32_t hw_nhvx = (qurt_hvx_get_units() >> 8) & 0xFF;
286
+
287
+ if (n_hvx == 0) {
288
+ n_hvx = hw_nhvx;
289
+ }
290
+ if (n_hvx > hw_threads.max_hthreads) {
291
+ n_hvx = hw_threads.max_hthreads;
292
+ }
293
+ if (n_hvx > HTP_MAX_NTHREADS) {
294
+ n_hvx = HTP_MAX_NTHREADS;
295
+ }
296
+
297
+ ctx->n_threads = n_hvx;
298
+ for (int i = 0; i < ctx->n_threads; i++) {
299
+ // see discussion https://github.com/ggml-org/llama.cpp/pull/18151#discussion_r2632388541
300
+ ctx->dma[i] = dma_queue_create(64);
301
+ }
302
+
303
+ // init worker pool
304
+ err = worker_pool_init(&ctx->worker_pool, n_hvx);
305
+ if (err != AEE_SUCCESS) {
306
+ FARF(ERROR, "Unable to create worker pool");
307
+ return err;
308
+ }
309
+
310
+ FARF(HIGH, "session %u started: n-hvx %u vtcm-size %zu vtcm-rctx %u n-threads %u thread-id %d thread-prio %d \n",
311
+ sess_id, hw_nhvx, ctx->vtcm_size, ctx->vtcm_rctx, ctx->n_threads, ctx->thread_id, ctx->thread_prio);
312
+
313
+ return AEE_SUCCESS;
314
+ }
315
+
316
+ AEEResult htp_iface_stop(remote_handle64 handle) {
317
+ struct htp_context * ctx = (struct htp_context *) handle;
318
+ if (!ctx) {
319
+ return AEE_EBADPARM;
320
+ }
321
+
322
+ if (!ctx->queue) {
323
+ FARF(ERROR, "Queue not open");
324
+ return AEE_EBADSTATE;
325
+ }
326
+
327
+ // Close queue. dspqueue_close() will also wait for callbacks to finish.
328
+ int err = dspqueue_close(ctx->queue);
329
+ ctx->queue = NULL;
330
+ if (err != 0) {
331
+ FARF(ERROR, "Queue close failed with 0x%08x", (unsigned) err);
332
+ return err;
333
+ }
334
+
335
+ if (ctx->worker_pool) {
336
+ // Release worker pool
337
+ worker_pool_release(&ctx->worker_pool);
338
+ }
339
+
340
+ for (int i = 0; i < ctx->n_threads; i++) {
341
+ dma_queue_delete(ctx->dma[i]);
342
+ }
343
+
344
+ vtcm_free(ctx);
345
+
346
+ return AEE_SUCCESS;
347
+ }
348
+
349
+ static void htp_error_callback(dspqueue_t queue, int error, void * context) {
350
+ // No errors expected on the DSP.
351
+ FARF(ERROR, "Error callback: 0x%08x", (unsigned) error);
352
+ }
353
+
354
+ struct profile_data {
355
+ uint64_t usecs;
356
+ uint64_t cycles;
357
+ uint64_t pkts;
358
+ };
359
+
360
+ static inline void profile_start(struct profile_data * d) {
361
+ d->usecs = HAP_perf_get_qtimer_count();
362
+ d->cycles = hex_get_cycles();
363
+ d->pkts = hex_get_pktcnt();
364
+ }
365
+
366
+ static inline void profile_stop(struct profile_data * d) {
367
+ d->usecs = HAP_perf_qtimer_count_to_us(HAP_perf_get_qtimer_count() - d->usecs);
368
+ d->cycles = hex_get_cycles() - d->cycles;
369
+ d->pkts = hex_get_pktcnt() - d->pkts;
370
+ }
371
+
372
+ static int send_htp_rsp(struct htp_context * c,
373
+ uint32_t op,
374
+ uint32_t status,
375
+ struct dspqueue_buffer * bufs,
376
+ size_t n_bufs,
377
+ struct profile_data * prof) {
378
+ // Prep response struct
379
+ struct htp_general_rsp rsp;
380
+ rsp.op = op;
381
+ rsp.status = status;
382
+ rsp.prof_usecs = prof->usecs;
383
+ rsp.prof_cycles = prof->cycles;
384
+ rsp.prof_pkts = prof->pkts;
385
+
386
+ int err = dspqueue_write(c->queue,
387
+ 0, // Flags
388
+ n_bufs,
389
+ bufs, // Buffer references
390
+ sizeof(rsp),
391
+ (const uint8_t *) &rsp, // Message
392
+ DSPQUEUE_TIMEOUT_NONE);
393
+
394
+ if (err != 0) {
395
+ FARF(ERROR, "dspqueue_write failed: 0x%08x", (unsigned) err);
396
+ }
397
+
398
+ return err;
399
+ }
400
+
401
+ static void proc_matmul_req(struct htp_context * ctx,
402
+ struct htp_general_req * req,
403
+ struct dspqueue_buffer * bufs,
404
+ size_t n_bufs) {
405
+ struct dspqueue_buffer rsp_bufs[1];
406
+
407
+ // We had written to the output buffer, we'd also need to flush it
408
+ rsp_bufs[0].fd = bufs[2].fd;
409
+ rsp_bufs[0].ptr = bufs[2].ptr;
410
+ rsp_bufs[0].size = bufs[2].size;
411
+ rsp_bufs[0].offset = bufs[2].offset;
412
+ rsp_bufs[0].flags = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER | // Flush HTP
413
+ DSPQUEUE_BUFFER_FLAG_INVALIDATE_RECIPIENT); // Invalidate CPU
414
+
415
+ // Setup Op context
416
+ struct htp_ops_context octx = { 0 };
417
+ octx.ctx = ctx;
418
+ octx.src0 = req->src0;
419
+ octx.src1 = req->src1;
420
+ octx.dst = req->dst;
421
+ octx.flags = req->flags;
422
+ octx.op = req->op;
423
+
424
+ // Update data pointers
425
+ octx.src0.data = (uint32_t) bufs[0].ptr;
426
+ octx.src1.data = (uint32_t) bufs[1].ptr;
427
+ octx.dst.data = (uint32_t) bufs[2].ptr;
428
+ octx.n_threads = ctx->n_threads;
429
+
430
+ struct profile_data prof;
431
+ profile_start(&prof);
432
+
433
+ uint32_t rsp_status = HTP_STATUS_INTERNAL_ERR;
434
+ if (vtcm_acquire(ctx) == AEE_SUCCESS) {
435
+ rsp_status = op_matmul(&octx);
436
+ vtcm_release(ctx);
437
+ }
438
+
439
+ profile_stop(&prof);
440
+ send_htp_rsp(ctx, req->op, rsp_status, rsp_bufs, 1, &prof);
441
+ }
442
+
443
+ static void proc_argsort_req(struct htp_context * ctx, struct htp_general_req * req, struct dspqueue_buffer * bufs) {
444
+ struct dspqueue_buffer rsp_bufs[1];
445
+
446
+ // We had written to the output buffer, we'd also need to flush it
447
+ rsp_bufs[0].fd = bufs[1].fd;
448
+ rsp_bufs[0].ptr = bufs[1].ptr;
449
+ rsp_bufs[0].offset = bufs[1].offset;
450
+ rsp_bufs[0].size = bufs[1].size;
451
+ rsp_bufs[0].flags = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER | // Flush HTP
452
+ DSPQUEUE_BUFFER_FLAG_INVALIDATE_RECIPIENT); // Invalidate CPU
453
+
454
+ // Setup Op context
455
+ struct htp_ops_context octx = { 0 };
456
+ octx.ctx = ctx;
457
+ octx.src0 = req->src0;
458
+ octx.dst = req->dst;
459
+ octx.flags = req->flags;
460
+ octx.op = req->op;
461
+
462
+ memcpy(octx.op_params, req->op_params, sizeof(octx.op_params));
463
+
464
+ // Update data pointers
465
+ octx.src0.data = (uint32_t) bufs[0].ptr;
466
+ octx.dst.data = (uint32_t) bufs[1].ptr;
467
+ octx.n_threads = ctx->n_threads;
468
+
469
+ struct profile_data prof;
470
+ profile_start(&prof);
471
+
472
+ uint32_t rsp_status = HTP_STATUS_INTERNAL_ERR;
473
+ if (vtcm_acquire(ctx) == AEE_SUCCESS) {
474
+ rsp_status = op_argsort(&octx);
475
+ vtcm_release(ctx);
476
+ }
477
+
478
+ profile_stop(&prof);
479
+ send_htp_rsp(ctx, req->op, rsp_status, rsp_bufs, 1, &prof);
480
+ }
481
+
482
+ static void proc_cpy_req(struct htp_context * ctx, struct htp_general_req * req, struct dspqueue_buffer * bufs) {
483
+ struct dspqueue_buffer rsp_bufs[1];
484
+
485
+ // We had written to the output buffer, we'd also need to flush it
486
+ rsp_bufs[0].fd = bufs[1].fd;
487
+ rsp_bufs[0].ptr = bufs[1].ptr;
488
+ rsp_bufs[0].offset = bufs[1].offset;
489
+ rsp_bufs[0].size = bufs[1].size;
490
+ rsp_bufs[0].flags = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER | // Flush HTP
491
+ DSPQUEUE_BUFFER_FLAG_INVALIDATE_RECIPIENT); // Invalidate CPU
492
+
493
+ // Setup Op context
494
+ struct htp_ops_context octx = { 0 };
495
+ octx.ctx = ctx;
496
+ octx.src0 = req->src0;
497
+ octx.dst = req->dst;
498
+ octx.flags = req->flags;
499
+ octx.op = req->op;
500
+
501
+ // Update data pointers
502
+ octx.src0.data = (uint32_t) bufs[0].ptr;
503
+ octx.dst.data = (uint32_t) bufs[1].ptr;
504
+ octx.n_threads = ctx->n_threads;
505
+
506
+ struct profile_data prof;
507
+ profile_start(&prof);
508
+
509
+ uint32_t rsp_status = HTP_STATUS_INTERNAL_ERR;
510
+ if (vtcm_acquire(ctx) == AEE_SUCCESS) {
511
+ rsp_status = op_cpy(&octx);
512
+ vtcm_release(ctx);
513
+ }
514
+
515
+ profile_stop(&prof);
516
+ send_htp_rsp(ctx, req->op, rsp_status, rsp_bufs, 1, &prof);
517
+ }
518
+
519
+ static void proc_get_rows_req(struct htp_context * ctx, struct htp_general_req * req, struct dspqueue_buffer * bufs) {
520
+ struct dspqueue_buffer rsp_bufs[1];
521
+
522
+ // We had written to the output buffer, we'd also need to flush it
523
+ rsp_bufs[0].fd = bufs[2].fd;
524
+ rsp_bufs[0].ptr = bufs[2].ptr;
525
+ rsp_bufs[0].offset = bufs[2].offset;
526
+ rsp_bufs[0].size = bufs[2].size;
527
+ rsp_bufs[0].flags = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER | // Flush HTP
528
+ DSPQUEUE_BUFFER_FLAG_INVALIDATE_RECIPIENT); // Invalidate CPU
529
+
530
+ // Setup Op context
531
+ struct htp_ops_context octx = { 0 };
532
+ octx.ctx = ctx;
533
+ octx.src0 = req->src0;
534
+ octx.src1 = req->src1;
535
+ octx.dst = req->dst;
536
+ octx.flags = req->flags;
537
+ octx.op = req->op;
538
+
539
+ // Update data pointers
540
+ octx.src0.data = (uint32_t) bufs[0].ptr;
541
+ octx.src1.data = (uint32_t) bufs[1].ptr;
542
+ octx.dst.data = (uint32_t) bufs[2].ptr;
543
+ octx.n_threads = ctx->n_threads;
544
+
545
+ struct profile_data prof;
546
+ profile_start(&prof);
547
+
548
+ uint32_t rsp_status = HTP_STATUS_INTERNAL_ERR;
549
+ if (vtcm_acquire(ctx) == AEE_SUCCESS) {
550
+ rsp_status = op_get_rows(&octx);
551
+ vtcm_release(ctx);
552
+ }
553
+
554
+ profile_stop(&prof);
555
+ send_htp_rsp(ctx, req->op, rsp_status, rsp_bufs, 1, &prof);
556
+ }
557
+
558
+ static void proc_matmul_id_req(struct htp_context * ctx,
559
+ struct htp_general_req * req,
560
+ struct dspqueue_buffer * bufs,
561
+ size_t n_bufs) {
562
+ struct dspqueue_buffer rsp_bufs[1];
563
+
564
+ // We had written to the output buffer, we'd also need to flush it
565
+ rsp_bufs[0].fd = bufs[3].fd;
566
+ rsp_bufs[0].ptr = bufs[3].ptr;
567
+ rsp_bufs[0].size = bufs[3].size;
568
+ rsp_bufs[0].offset = bufs[3].offset;
569
+ rsp_bufs[0].flags = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER | // Flush HTP
570
+ DSPQUEUE_BUFFER_FLAG_INVALIDATE_RECIPIENT); // Invalidate CPU
571
+
572
+ // Setup Op context
573
+ struct htp_ops_context octx = { 0 };
574
+ octx.ctx = ctx;
575
+ octx.src0 = req->src0;
576
+ octx.src1 = req->src1;
577
+ octx.src2 = req->src2;
578
+ octx.dst = req->dst;
579
+ octx.flags = req->flags;
580
+ octx.op = req->op;
581
+
582
+ // Update data pointers
583
+ octx.src0.data = (uint32_t) bufs[0].ptr;
584
+ octx.src1.data = (uint32_t) bufs[1].ptr;
585
+ octx.src2.data = (uint32_t) bufs[2].ptr;
586
+ octx.dst.data = (uint32_t) bufs[3].ptr;
587
+ octx.n_threads = ctx->n_threads;
588
+
589
+ struct profile_data prof;
590
+ profile_start(&prof);
591
+
592
+ uint32_t rsp_status = HTP_STATUS_INTERNAL_ERR;
593
+ if (vtcm_acquire(ctx) == AEE_SUCCESS) {
594
+ rsp_status = op_matmul_id(&octx);
595
+ vtcm_release(ctx);
596
+ }
597
+
598
+ profile_stop(&prof);
599
+ send_htp_rsp(ctx, req->op, rsp_status, rsp_bufs, 1, &prof);
600
+ }
601
+
602
+ static void proc_binary_req(struct htp_context * ctx, struct htp_general_req * req, struct dspqueue_buffer * bufs) {
603
+ struct dspqueue_buffer rsp_bufs[1];
604
+
605
+ // We had written to the output buffer, we'd also need to flush it
606
+ rsp_bufs[0].fd = bufs[2].fd;
607
+ rsp_bufs[0].ptr = bufs[2].ptr;
608
+ rsp_bufs[0].offset = bufs[2].offset;
609
+ rsp_bufs[0].size = bufs[2].size;
610
+ rsp_bufs[0].flags = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER | // Flush HTP
611
+ DSPQUEUE_BUFFER_FLAG_INVALIDATE_RECIPIENT); // Invalidate CPU
612
+
613
+ // Setup Op context
614
+ struct htp_ops_context octx = { 0 };
615
+ octx.ctx = ctx;
616
+ octx.src0 = req->src0;
617
+ octx.src1 = req->src1;
618
+ octx.dst = req->dst;
619
+ octx.flags = req->flags;
620
+ octx.op = req->op;
621
+
622
+ // Update data pointers
623
+ octx.src0.data = (uint32_t) bufs[0].ptr;
624
+ octx.src1.data = (uint32_t) bufs[1].ptr;
625
+ octx.dst.data = (uint32_t) bufs[2].ptr;
626
+ octx.n_threads = ctx->n_threads;
627
+
628
+ struct profile_data prof;
629
+ profile_start(&prof);
630
+
631
+ uint32_t rsp_status = HTP_STATUS_INTERNAL_ERR;
632
+ if (vtcm_acquire(ctx) == AEE_SUCCESS) {
633
+ rsp_status = op_binary(&octx);
634
+ vtcm_release(ctx);
635
+ }
636
+
637
+ profile_stop(&prof);
638
+ send_htp_rsp(ctx, req->op, rsp_status, rsp_bufs, 1, &prof);
639
+ }
640
+
641
+ static void proc_add_id_req(struct htp_context * ctx, struct htp_general_req * req, struct dspqueue_buffer * bufs) {
642
+ struct dspqueue_buffer rsp_bufs[1];
643
+
644
+ // We had written to the output buffer, we'd also need to flush it
645
+ rsp_bufs[0].fd = bufs[3].fd;
646
+ rsp_bufs[0].ptr = bufs[3].ptr;
647
+ rsp_bufs[0].offset = bufs[3].offset;
648
+ rsp_bufs[0].size = bufs[3].size;
649
+ rsp_bufs[0].flags = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER | // Flush HTP
650
+ DSPQUEUE_BUFFER_FLAG_INVALIDATE_RECIPIENT); // Invalidate CPU
651
+
652
+ // Setup Op context
653
+ struct htp_ops_context octx = { 0 };
654
+ octx.ctx = ctx;
655
+ octx.src0 = req->src0;
656
+ octx.src1 = req->src1;
657
+ octx.src2 = req->src2;
658
+ octx.dst = req->dst;
659
+ octx.flags = req->flags;
660
+ octx.op = req->op;
661
+
662
+ // Update data pointers
663
+ octx.src0.data = (uint32_t) bufs[0].ptr;
664
+ octx.src1.data = (uint32_t) bufs[1].ptr;
665
+ octx.src2.data = (uint32_t) bufs[2].ptr;
666
+ octx.dst.data = (uint32_t) bufs[3].ptr;
667
+ octx.n_threads = ctx->n_threads;
668
+
669
+ struct profile_data prof;
670
+ profile_start(&prof);
671
+
672
+ uint32_t rsp_status = HTP_STATUS_INTERNAL_ERR;
673
+ if (vtcm_acquire(ctx) == AEE_SUCCESS) {
674
+ rsp_status = op_binary(&octx);
675
+ vtcm_release(ctx);
676
+ }
677
+
678
+ profile_stop(&prof);
679
+ send_htp_rsp(ctx, req->op, rsp_status, rsp_bufs, 1, &prof);
680
+ }
681
+
682
+ static void proc_unary_req(struct htp_context * ctx, struct htp_general_req * req, struct dspqueue_buffer * bufs) {
683
+ struct dspqueue_buffer rsp_bufs[HTP_MAX_PACKET_BUFFERS];
684
+
685
+ // We had written to the output buffer, we'd also need to flush it
686
+ rsp_bufs[0].fd = bufs[1].fd;
687
+ rsp_bufs[0].ptr = bufs[1].ptr;
688
+ rsp_bufs[0].offset = bufs[1].offset;
689
+ rsp_bufs[0].size = bufs[1].size;
690
+ rsp_bufs[0].flags = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER | // Flush HTP
691
+ DSPQUEUE_BUFFER_FLAG_INVALIDATE_RECIPIENT); // Invalidate CPU
692
+
693
+ // Setup Op context
694
+ struct htp_ops_context octx = { 0 };
695
+ octx.ctx = ctx;
696
+ octx.src0 = req->src0;
697
+ octx.dst = req->dst;
698
+ octx.flags = req->flags;
699
+ octx.op = req->op;
700
+
701
+ memcpy(octx.op_params, req->op_params, sizeof(octx.op_params));
702
+
703
+ // Update data pointers
704
+ octx.src0.data = (uint32_t) bufs[0].ptr;
705
+ octx.dst.data = (uint32_t) bufs[1].ptr;
706
+ octx.n_threads = ctx->n_threads;
707
+
708
+ struct profile_data prof;
709
+ profile_start(&prof);
710
+
711
+ uint32_t rsp_status = HTP_STATUS_INTERNAL_ERR;
712
+ if (vtcm_acquire(ctx) == AEE_SUCCESS) {
713
+ rsp_status = op_unary(&octx);
714
+ vtcm_release(ctx);
715
+ }
716
+
717
+ profile_stop(&prof);
718
+ send_htp_rsp(ctx, req->op, rsp_status, rsp_bufs, 1, &prof);
719
+ }
720
+
721
+ static void proc_sum_rows_req(struct htp_context * ctx, struct htp_general_req * req, struct dspqueue_buffer * bufs) {
722
+ struct dspqueue_buffer rsp_bufs[HTP_MAX_PACKET_BUFFERS];
723
+
724
+ // We had written to the output buffer, we'd also need to flush it
725
+ rsp_bufs[0].fd = bufs[1].fd;
726
+ rsp_bufs[0].ptr = bufs[1].ptr;
727
+ rsp_bufs[0].offset = bufs[1].offset;
728
+ rsp_bufs[0].size = bufs[1].size;
729
+ rsp_bufs[0].flags = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER | // Flush HTP
730
+ DSPQUEUE_BUFFER_FLAG_INVALIDATE_RECIPIENT); // Invalidate CPU
731
+
732
+ // Setup Op context
733
+ struct htp_ops_context octx = { 0 };
734
+ octx.ctx = ctx;
735
+ octx.src0 = req->src0;
736
+ octx.dst = req->dst;
737
+ octx.flags = req->flags;
738
+ octx.op = req->op;
739
+
740
+ memcpy(octx.op_params, req->op_params, sizeof(octx.op_params));
741
+
742
+ // Update data pointers
743
+ octx.src0.data = (uint32_t) bufs[0].ptr;
744
+ octx.dst.data = (uint32_t) bufs[1].ptr;
745
+ octx.n_threads = ctx->n_threads;
746
+
747
+ struct profile_data prof;
748
+ profile_start(&prof);
749
+
750
+ uint32_t rsp_status = HTP_STATUS_INTERNAL_ERR;
751
+ if (vtcm_acquire(ctx) == AEE_SUCCESS) {
752
+ rsp_status = op_sum_rows(&octx);
753
+ vtcm_release(ctx);
754
+ }
755
+
756
+ profile_stop(&prof);
757
+ send_htp_rsp(ctx, req->op, rsp_status, rsp_bufs, 1, &prof);
758
+ }
759
+
760
+ static void proc_activations_req(struct htp_context * ctx,
761
+ struct htp_general_req * req,
762
+ struct dspqueue_buffer * bufs,
763
+ uint32_t n_bufs) {
764
+ struct dspqueue_buffer rsp_bufs[HTP_MAX_PACKET_BUFFERS];
765
+
766
+ int write_idx = (n_bufs == 3) ? 2 : 1;
767
+
768
+ // We had written to the output buffer, we'd also need to flush it
769
+ rsp_bufs[0].fd = bufs[write_idx].fd;
770
+ rsp_bufs[0].ptr = bufs[write_idx].ptr;
771
+ rsp_bufs[0].offset = bufs[write_idx].offset;
772
+ rsp_bufs[0].size = bufs[write_idx].size;
773
+ rsp_bufs[0].flags = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER | // Flush HTP
774
+ DSPQUEUE_BUFFER_FLAG_INVALIDATE_RECIPIENT); // Invalidate CPU
775
+
776
+ // Setup Op context
777
+ struct htp_ops_context octx = { 0 };
778
+ octx.ctx = ctx;
779
+ octx.src0 = req->src0;
780
+ if (3 == n_bufs) {
781
+ octx.src1 = req->src1;
782
+ }
783
+ octx.dst = req->dst;
784
+ octx.flags = req->flags;
785
+ octx.op = req->op;
786
+
787
+ memcpy(octx.op_params, req->op_params, sizeof(octx.op_params));
788
+
789
+ // Update data pointers
790
+ octx.src0.data = (uint32_t) bufs[0].ptr;
791
+ if (3 == n_bufs) {
792
+ octx.src1.data = (uint32_t) bufs[1].ptr;
793
+ octx.dst.data = (uint32_t) bufs[2].ptr;
794
+ } else {
795
+ octx.dst.data = (uint32_t) bufs[1].ptr;
796
+ }
797
+ octx.n_threads = ctx->n_threads;
798
+
799
+ struct profile_data prof;
800
+ profile_start(&prof);
801
+
802
+ uint32_t rsp_status = HTP_STATUS_INTERNAL_ERR;
803
+ if (vtcm_acquire(ctx) == AEE_SUCCESS) {
804
+ if (octx.op == HTP_OP_SOFTMAX) {
805
+ rsp_status = op_softmax(&octx);
806
+ } else {
807
+ rsp_status = op_activations(&octx);
808
+ }
809
+ vtcm_release(ctx);
810
+ }
811
+
812
+ profile_stop(&prof);
813
+ send_htp_rsp(ctx, req->op, rsp_status, rsp_bufs, 1, &prof);
814
+ }
815
+
816
+ static void proc_rope_req(struct htp_context * ctx,
817
+ struct htp_general_req * req,
818
+ struct dspqueue_buffer * bufs,
819
+ uint32_t n_bufs) {
820
+ struct dspqueue_buffer rsp_bufs[HTP_MAX_PACKET_BUFFERS];
821
+
822
+ int write_idx = n_bufs - 1;
823
+
824
+ // We had written to the output buffer, we'd also need to flush it
825
+ rsp_bufs[0].fd = bufs[write_idx].fd;
826
+ rsp_bufs[0].ptr = bufs[write_idx].ptr;
827
+ rsp_bufs[0].offset = bufs[write_idx].offset;
828
+ rsp_bufs[0].size = bufs[write_idx].size;
829
+ rsp_bufs[0].flags = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER | // Flush HTP
830
+ DSPQUEUE_BUFFER_FLAG_INVALIDATE_RECIPIENT); // Invalidate CPU
831
+
832
+ // Setup Op context
833
+ struct htp_ops_context octx = { 0 };
834
+ octx.ctx = ctx;
835
+ octx.src0 = req->src0;
836
+ octx.src1 = req->src1;
837
+ if (4 == n_bufs) {
838
+ octx.src2 = req->src2;
839
+ }
840
+ octx.dst = req->dst;
841
+ octx.flags = req->flags;
842
+ octx.op = req->op;
843
+
844
+ memcpy(octx.op_params, req->op_params, sizeof(octx.op_params));
845
+
846
+ // Update data pointers
847
+ octx.src0.data = (uint32_t) bufs[0].ptr;
848
+ octx.src1.data = (uint32_t) bufs[1].ptr;
849
+ if (4 == n_bufs) {
850
+ octx.src2.data = (uint32_t) bufs[2].ptr;
851
+ octx.dst.data = (uint32_t) bufs[3].ptr;
852
+ } else {
853
+ octx.dst.data = (uint32_t) bufs[2].ptr;
854
+ }
855
+ octx.n_threads = ctx->n_threads;
856
+
857
+ struct profile_data prof;
858
+ profile_start(&prof);
859
+
860
+ uint32_t rsp_status = HTP_STATUS_INTERNAL_ERR;
861
+ if (vtcm_acquire(ctx) == AEE_SUCCESS) {
862
+ rsp_status = op_rope(&octx);
863
+ vtcm_release(ctx);
864
+ }
865
+
866
+ profile_stop(&prof);
867
+ send_htp_rsp(ctx, req->op, rsp_status, rsp_bufs, 1, &prof);
868
+ }
869
+
870
+ static void proc_set_rows_req(struct htp_context * ctx, struct htp_general_req * req, struct dspqueue_buffer * bufs) {
871
+ struct dspqueue_buffer rsp_bufs[1];
872
+
873
+ // We had written to the output buffer, we'd also need to flush it
874
+ rsp_bufs[0].fd = bufs[2].fd;
875
+ rsp_bufs[0].ptr = bufs[2].ptr;
876
+ rsp_bufs[0].offset = bufs[2].offset;
877
+ rsp_bufs[0].size = bufs[2].size;
878
+ rsp_bufs[0].flags = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER | // Flush HTP
879
+ DSPQUEUE_BUFFER_FLAG_INVALIDATE_RECIPIENT); // Invalidate CPU
880
+
881
+ // Setup Op context
882
+ struct htp_ops_context octx = { 0 };
883
+ octx.ctx = ctx;
884
+ octx.src0 = req->src0;
885
+ octx.src1 = req->src1;
886
+ octx.dst = req->dst;
887
+ octx.flags = req->flags;
888
+ octx.op = req->op;
889
+
890
+ // Update data pointers
891
+ octx.src0.data = (uint32_t) bufs[0].ptr;
892
+ octx.src1.data = (uint32_t) bufs[1].ptr;
893
+ octx.dst.data = (uint32_t) bufs[2].ptr;
894
+ octx.n_threads = ctx->n_threads;
895
+
896
+ struct profile_data prof;
897
+ profile_start(&prof);
898
+
899
+ uint32_t rsp_status = HTP_STATUS_INTERNAL_ERR;
900
+ if (vtcm_acquire(ctx) == AEE_SUCCESS) {
901
+ rsp_status = op_set_rows(&octx);
902
+ vtcm_release(ctx);
903
+ }
904
+
905
+ profile_stop(&prof);
906
+ send_htp_rsp(ctx, req->op, rsp_status, rsp_bufs, 1, &prof);
907
+ }
908
+
909
+ static void proc_flash_attn_ext_req(struct htp_context * ctx,
910
+ struct htp_general_req * req,
911
+ struct dspqueue_buffer * bufs,
912
+ uint32_t n_bufs) {
913
+ // Setup Op context
914
+ struct htp_ops_context octx;
915
+ memset(&octx, 0, sizeof(octx));
916
+
917
+ octx.ctx = ctx;
918
+ octx.n_threads = ctx->n_threads;
919
+
920
+ octx.src0 = req->src0;
921
+ octx.src1 = req->src1;
922
+ octx.src2 = req->src2;
923
+ octx.src3 = req->src3;
924
+ octx.src4 = req->src4;
925
+ octx.dst = req->dst;
926
+ octx.flags = req->flags;
927
+ octx.op = req->op;
928
+
929
+ memcpy(octx.op_params, req->op_params, sizeof(octx.op_params));
930
+
931
+ // Update data pointers
932
+ octx.src0.data = (uint32_t) bufs[0].ptr;
933
+ octx.src1.data = (uint32_t) bufs[1].ptr;
934
+ octx.src2.data = (uint32_t) bufs[2].ptr;
935
+
936
+ int last_buf = 3;
937
+
938
+ if (octx.src3.ne[0]) {
939
+ octx.src3.data = (uint32_t) bufs[last_buf++].ptr; // mask is valid
940
+ }
941
+
942
+ if (octx.src4.ne[0]) {
943
+ octx.src4.data = (uint32_t) bufs[last_buf++].ptr; // sinks is valid
944
+ }
945
+
946
+ octx.dst.data = (uint32_t) bufs[last_buf].ptr;
947
+
948
+ struct profile_data prof;
949
+ profile_start(&prof);
950
+
951
+ uint32_t rsp_status = HTP_STATUS_INTERNAL_ERR;
952
+ if (vtcm_acquire(ctx) == AEE_SUCCESS) {
953
+ rsp_status = op_flash_attn_ext(&octx);
954
+ vtcm_release(ctx);
955
+ }
956
+
957
+ profile_stop(&prof);
958
+
959
+ struct dspqueue_buffer rsp_buf = bufs[last_buf];
960
+ rsp_buf.flags = (DSPQUEUE_BUFFER_FLAG_FLUSH_SENDER | // Flush HTP
961
+ DSPQUEUE_BUFFER_FLAG_INVALIDATE_RECIPIENT); // Invalidate CPU
962
+
963
+ send_htp_rsp(ctx, req->op, rsp_status, &bufs[last_buf], 1, &prof);
964
+ }
965
+
966
+ static void htp_packet_callback(dspqueue_t queue, int error, void * context) {
967
+ struct htp_context * ctx = (struct htp_context *) context;
968
+
969
+ // Repeatedly read packets from the queue until it's empty. We don't
970
+ // necessarily get a separate callback for each packet, and new packets
971
+ // may arrive while we're processing the previous one. This ensures we
972
+ // keep the DSP busy as much as possible and avoid waiting for the CPU.
973
+
974
+ while (1) {
975
+ struct htp_general_req req;
976
+ uint32_t req_size;
977
+
978
+ struct dspqueue_buffer bufs[HTP_MAX_PACKET_BUFFERS];
979
+ uint32_t n_bufs;
980
+ uint32_t flags;
981
+
982
+ // Read packet from queue
983
+ int err = dspqueue_read_noblock(queue, &flags,
984
+ HTP_MAX_PACKET_BUFFERS, // Maximum number of buffer references
985
+ &n_bufs, // Number of buffer references
986
+ bufs, // Buffer references
987
+ sizeof(req), // Max message length
988
+ &req_size, // Message length
989
+ (uint8_t *) &req); // Message
990
+
991
+ if (err == AEE_EWOULDBLOCK) {
992
+ // Consumed all packets available for now
993
+ return;
994
+ }
995
+
996
+ if (err != 0) {
997
+ FARF(ERROR, "dspqueue_read_noblock failed: 0x%08x", (unsigned) err);
998
+ return;
999
+ }
1000
+
1001
+ if (req_size != sizeof(req)) {
1002
+ FARF(ERROR, "Invalid request size");
1003
+ continue;
1004
+ }
1005
+
1006
+ if (req.flags & HTP_OPFLAGS_EARLY_WAKEUP) {
1007
+ // Host wants early notification
1008
+ dspqueue_write_early_wakeup_noblock(ctx->queue, 10, 0);
1009
+ }
1010
+
1011
+ // Process packet based on its message type
1012
+ switch (req.op) {
1013
+ case HTP_OP_MUL_MAT:
1014
+ if (n_bufs != 3) {
1015
+ FARF(ERROR, "Bad matmul-req buffer list");
1016
+ continue;
1017
+ }
1018
+ proc_matmul_req(ctx, &req, bufs, n_bufs);
1019
+ break;
1020
+
1021
+ case HTP_OP_MUL_MAT_ID:
1022
+ if (n_bufs != 4) {
1023
+ FARF(ERROR, "Bad matmul-id-req buffer list");
1024
+ continue;
1025
+ }
1026
+ proc_matmul_id_req(ctx, &req, bufs, n_bufs);
1027
+ break;
1028
+
1029
+ case HTP_OP_MUL:
1030
+ case HTP_OP_ADD:
1031
+ case HTP_OP_SUB:
1032
+ case HTP_OP_DIV:
1033
+ if (n_bufs != 3) {
1034
+ FARF(ERROR, "Bad binary-req buffer list");
1035
+ continue;
1036
+ }
1037
+ proc_binary_req(ctx, &req, bufs);
1038
+ break;
1039
+
1040
+ case HTP_OP_RMS_NORM:
1041
+ case HTP_OP_SCALE:
1042
+ if (n_bufs != 2) {
1043
+ FARF(ERROR, "Bad unary-req buffer list");
1044
+ continue;
1045
+ }
1046
+
1047
+ proc_unary_req(ctx, &req, bufs);
1048
+ break;
1049
+
1050
+ case HTP_OP_SQR:
1051
+ case HTP_OP_SQRT:
1052
+ if (n_bufs != 2) {
1053
+ FARF(ERROR, "Bad unary-req buffer list");
1054
+ continue;
1055
+ }
1056
+
1057
+ proc_unary_req(ctx, &req, bufs);
1058
+ break;
1059
+
1060
+ case HTP_OP_SUM_ROWS:
1061
+ if (n_bufs != 2) {
1062
+ FARF(ERROR, "Bad unary-req buffer list");
1063
+ continue;
1064
+ }
1065
+
1066
+ proc_sum_rows_req(ctx, &req, bufs);
1067
+ break;
1068
+
1069
+ case HTP_OP_UNARY_SILU:
1070
+ case HTP_OP_UNARY_GELU:
1071
+ if (n_bufs != 2) {
1072
+ FARF(ERROR, "Bad act-req buffer list");
1073
+ continue;
1074
+ }
1075
+ proc_activations_req(ctx, &req, bufs, n_bufs);
1076
+ break;
1077
+
1078
+ case HTP_OP_GLU_SWIGLU:
1079
+ case HTP_OP_GLU_SWIGLU_OAI:
1080
+ case HTP_OP_SOFTMAX:
1081
+ case HTP_OP_GLU_GEGLU:
1082
+ if ((n_bufs != 2) && (n_bufs != 3)) {
1083
+ FARF(ERROR, "Bad act-req buffer list");
1084
+ continue;
1085
+ }
1086
+ proc_activations_req(ctx, &req, bufs, n_bufs);
1087
+ break;
1088
+
1089
+ case HTP_OP_ADD_ID:
1090
+ if (n_bufs != 4) {
1091
+ FARF(ERROR, "Bad add-id-req buffer list");
1092
+ continue;
1093
+ }
1094
+ proc_add_id_req(ctx, &req, bufs);
1095
+ break;
1096
+
1097
+ case HTP_OP_ROPE:
1098
+ if ((n_bufs != 3) && (n_bufs != 4)) {
1099
+ FARF(ERROR, "Bad rope-req buffer list");
1100
+ continue;
1101
+ }
1102
+ proc_rope_req(ctx, &req, bufs, n_bufs);
1103
+ break;
1104
+
1105
+ case HTP_OP_FLASH_ATTN_EXT:
1106
+ if (!(n_bufs >= 4 && n_bufs <= 6)) {
1107
+ FARF(ERROR, "Bad flash-attn-ext-req buffer list");
1108
+ continue;
1109
+ }
1110
+ proc_flash_attn_ext_req(ctx, &req, bufs, n_bufs);
1111
+ break;
1112
+
1113
+ case HTP_OP_SET_ROWS:
1114
+ if (n_bufs != 3) {
1115
+ FARF(ERROR, "Bad set-rows-req buffer list");
1116
+ continue;
1117
+ }
1118
+ proc_set_rows_req(ctx, &req, bufs);
1119
+ break;
1120
+
1121
+ case HTP_OP_GET_ROWS:
1122
+ if (n_bufs != 3) {
1123
+ FARF(ERROR, "Bad get-rows-req buffer list");
1124
+ continue;
1125
+ }
1126
+ proc_get_rows_req(ctx, &req, bufs);
1127
+ break;
1128
+
1129
+ case HTP_OP_CPY:
1130
+ if (n_bufs != 2) {
1131
+ FARF(ERROR, "Bad cpy-req buffer list");
1132
+ continue;
1133
+ }
1134
+ proc_cpy_req(ctx, &req, bufs);
1135
+ break;
1136
+
1137
+ case HTP_OP_ARGSORT:
1138
+ if (n_bufs != 2) {
1139
+ FARF(ERROR, "Bad argsort-req buffer list");
1140
+ continue;
1141
+ }
1142
+ proc_argsort_req(ctx, &req, bufs);
1143
+ break;
1144
+
1145
+ default:
1146
+ FARF(ERROR, "Unknown Op %u", req.op);
1147
+ break;
1148
+ }
1149
+ }
1150
+ }