local-llm-rn 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (626) hide show
  1. package/cpp/CMakeLists.txt +285 -0
  2. package/cpp/common/CMakeLists.txt +149 -0
  3. package/cpp/common/arg.cpp +3799 -0
  4. package/cpp/common/arg.h +131 -0
  5. package/cpp/common/base64.hpp +392 -0
  6. package/cpp/common/build-info.cpp.in +4 -0
  7. package/cpp/common/chat-parser-xml-toolcall.cpp +879 -0
  8. package/cpp/common/chat-parser-xml-toolcall.h +45 -0
  9. package/cpp/common/chat-parser.cpp +1649 -0
  10. package/cpp/common/chat-parser.h +133 -0
  11. package/cpp/common/chat-peg-parser.cpp +124 -0
  12. package/cpp/common/chat-peg-parser.h +105 -0
  13. package/cpp/common/chat.cpp +3355 -0
  14. package/cpp/common/chat.h +252 -0
  15. package/cpp/common/common.cpp +1824 -0
  16. package/cpp/common/common.h +930 -0
  17. package/cpp/common/console.cpp +1137 -0
  18. package/cpp/common/console.h +41 -0
  19. package/cpp/common/debug.cpp +167 -0
  20. package/cpp/common/debug.h +43 -0
  21. package/cpp/common/download.cpp +792 -0
  22. package/cpp/common/download.h +84 -0
  23. package/cpp/common/http.h +84 -0
  24. package/cpp/common/jinja/README.md +88 -0
  25. package/cpp/common/jinja/caps.cpp +285 -0
  26. package/cpp/common/jinja/caps.h +30 -0
  27. package/cpp/common/jinja/lexer.cpp +341 -0
  28. package/cpp/common/jinja/lexer.h +157 -0
  29. package/cpp/common/jinja/parser.cpp +591 -0
  30. package/cpp/common/jinja/parser.h +21 -0
  31. package/cpp/common/jinja/runtime.cpp +867 -0
  32. package/cpp/common/jinja/runtime.h +638 -0
  33. package/cpp/common/jinja/string.cpp +213 -0
  34. package/cpp/common/jinja/string.h +61 -0
  35. package/cpp/common/jinja/utils.h +149 -0
  36. package/cpp/common/jinja/value.cpp +1393 -0
  37. package/cpp/common/jinja/value.h +756 -0
  38. package/cpp/common/json-partial.cpp +324 -0
  39. package/cpp/common/json-partial.h +39 -0
  40. package/cpp/common/json-schema-to-grammar.cpp +1153 -0
  41. package/cpp/common/json-schema-to-grammar.h +43 -0
  42. package/cpp/common/llguidance.cpp +258 -0
  43. package/cpp/common/log.cpp +446 -0
  44. package/cpp/common/log.h +119 -0
  45. package/cpp/common/ngram-cache.cpp +285 -0
  46. package/cpp/common/ngram-cache.h +101 -0
  47. package/cpp/common/ngram-map.cpp +530 -0
  48. package/cpp/common/ngram-map.h +115 -0
  49. package/cpp/common/ngram-mod.cpp +60 -0
  50. package/cpp/common/ngram-mod.h +38 -0
  51. package/cpp/common/peg-parser.cpp +1712 -0
  52. package/cpp/common/peg-parser.h +459 -0
  53. package/cpp/common/preset.cpp +483 -0
  54. package/cpp/common/preset.h +83 -0
  55. package/cpp/common/regex-partial.cpp +204 -0
  56. package/cpp/common/regex-partial.h +56 -0
  57. package/cpp/common/sampling.cpp +745 -0
  58. package/cpp/common/sampling.h +119 -0
  59. package/cpp/common/speculative.cpp +1074 -0
  60. package/cpp/common/speculative.h +41 -0
  61. package/cpp/common/unicode.cpp +64 -0
  62. package/cpp/common/unicode.h +22 -0
  63. package/cpp/ggml/CMakeLists.txt +494 -0
  64. package/cpp/ggml/cmake/GitVars.cmake +22 -0
  65. package/cpp/ggml/cmake/common.cmake +50 -0
  66. package/cpp/ggml/cmake/ggml-config.cmake.in +191 -0
  67. package/cpp/ggml/include/ggml-alloc.h +85 -0
  68. package/cpp/ggml/include/ggml-backend.h +373 -0
  69. package/cpp/ggml/include/ggml-blas.h +25 -0
  70. package/cpp/ggml/include/ggml-cann.h +123 -0
  71. package/cpp/ggml/include/ggml-cpp.h +39 -0
  72. package/cpp/ggml/include/ggml-cpu.h +151 -0
  73. package/cpp/ggml/include/ggml-cuda.h +47 -0
  74. package/cpp/ggml/include/ggml-hexagon.h +19 -0
  75. package/cpp/ggml/include/ggml-metal.h +61 -0
  76. package/cpp/ggml/include/ggml-opencl.h +26 -0
  77. package/cpp/ggml/include/ggml-opt.h +256 -0
  78. package/cpp/ggml/include/ggml-rpc.h +30 -0
  79. package/cpp/ggml/include/ggml-sycl.h +49 -0
  80. package/cpp/ggml/include/ggml-virtgpu.h +14 -0
  81. package/cpp/ggml/include/ggml-vulkan.h +29 -0
  82. package/cpp/ggml/include/ggml-webgpu.h +19 -0
  83. package/cpp/ggml/include/ggml-zdnn.h +17 -0
  84. package/cpp/ggml/include/ggml-zendnn.h +22 -0
  85. package/cpp/ggml/include/ggml.h +2753 -0
  86. package/cpp/ggml/include/gguf.h +204 -0
  87. package/cpp/ggml/src/CMakeLists.txt +492 -0
  88. package/cpp/ggml/src/ggml-alloc.c +1244 -0
  89. package/cpp/ggml/src/ggml-backend-dl.cpp +48 -0
  90. package/cpp/ggml/src/ggml-backend-dl.h +45 -0
  91. package/cpp/ggml/src/ggml-backend-impl.h +255 -0
  92. package/cpp/ggml/src/ggml-backend-reg.cpp +566 -0
  93. package/cpp/ggml/src/ggml-backend.cpp +2270 -0
  94. package/cpp/ggml/src/ggml-blas/CMakeLists.txt +101 -0
  95. package/cpp/ggml/src/ggml-blas/ggml-blas.cpp +518 -0
  96. package/cpp/ggml/src/ggml-common.h +1878 -0
  97. package/cpp/ggml/src/ggml-cpu/CMakeLists.txt +691 -0
  98. package/cpp/ggml/src/ggml-cpu/amx/amx.cpp +247 -0
  99. package/cpp/ggml/src/ggml-cpu/amx/amx.h +8 -0
  100. package/cpp/ggml/src/ggml-cpu/amx/common.h +91 -0
  101. package/cpp/ggml/src/ggml-cpu/amx/mmq.cpp +2512 -0
  102. package/cpp/ggml/src/ggml-cpu/amx/mmq.h +10 -0
  103. package/cpp/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +98 -0
  104. package/cpp/ggml/src/ggml-cpu/arch/arm/quants.c +4052 -0
  105. package/cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +4935 -0
  106. package/cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +2159 -0
  107. package/cpp/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
  108. package/cpp/ggml/src/ggml-cpu/arch/powerpc/quants.c +2305 -0
  109. package/cpp/ggml/src/ggml-cpu/arch/riscv/cpu-feats.cpp +38 -0
  110. package/cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +2726 -0
  111. package/cpp/ggml/src/ggml-cpu/arch/riscv/repack.cpp +342 -0
  112. package/cpp/ggml/src/ggml-cpu/arch/s390/cpu-feats.cpp +50 -0
  113. package/cpp/ggml/src/ggml-cpu/arch/s390/quants.c +1468 -0
  114. package/cpp/ggml/src/ggml-cpu/arch/wasm/quants.c +1221 -0
  115. package/cpp/ggml/src/ggml-cpu/arch/x86/cpu-feats.cpp +327 -0
  116. package/cpp/ggml/src/ggml-cpu/arch/x86/quants.c +3820 -0
  117. package/cpp/ggml/src/ggml-cpu/arch/x86/repack.cpp +6307 -0
  118. package/cpp/ggml/src/ggml-cpu/arch-fallback.h +313 -0
  119. package/cpp/ggml/src/ggml-cpu/binary-ops.cpp +154 -0
  120. package/cpp/ggml/src/ggml-cpu/binary-ops.h +16 -0
  121. package/cpp/ggml/src/ggml-cpu/cmake/FindSIMD.cmake +100 -0
  122. package/cpp/ggml/src/ggml-cpu/common.h +95 -0
  123. package/cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +529 -0
  124. package/cpp/ggml/src/ggml-cpu/ggml-cpu.c +3734 -0
  125. package/cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +701 -0
  126. package/cpp/ggml/src/ggml-cpu/hbm.cpp +55 -0
  127. package/cpp/ggml/src/ggml-cpu/hbm.h +8 -0
  128. package/cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +938 -0
  129. package/cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +90 -0
  130. package/cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +798 -0
  131. package/cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.h +17 -0
  132. package/cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +4033 -0
  133. package/cpp/ggml/src/ggml-cpu/llamafile/sgemm.h +25 -0
  134. package/cpp/ggml/src/ggml-cpu/ops.cpp +10978 -0
  135. package/cpp/ggml/src/ggml-cpu/ops.h +116 -0
  136. package/cpp/ggml/src/ggml-cpu/quants.c +1193 -0
  137. package/cpp/ggml/src/ggml-cpu/quants.h +97 -0
  138. package/cpp/ggml/src/ggml-cpu/repack.cpp +3316 -0
  139. package/cpp/ggml/src/ggml-cpu/repack.h +173 -0
  140. package/cpp/ggml/src/ggml-cpu/simd-gemm.h +136 -0
  141. package/cpp/ggml/src/ggml-cpu/simd-mappings.h +1279 -0
  142. package/cpp/ggml/src/ggml-cpu/spacemit/ime.cpp +1025 -0
  143. package/cpp/ggml/src/ggml-cpu/spacemit/ime.h +13 -0
  144. package/cpp/ggml/src/ggml-cpu/spacemit/ime1_kernels.cpp +3196 -0
  145. package/cpp/ggml/src/ggml-cpu/spacemit/ime_kernels.h +26 -0
  146. package/cpp/ggml/src/ggml-cpu/traits.cpp +36 -0
  147. package/cpp/ggml/src/ggml-cpu/traits.h +38 -0
  148. package/cpp/ggml/src/ggml-cpu/unary-ops.cpp +337 -0
  149. package/cpp/ggml/src/ggml-cpu/unary-ops.h +35 -0
  150. package/cpp/ggml/src/ggml-cpu/vec.cpp +629 -0
  151. package/cpp/ggml/src/ggml-cpu/vec.h +1585 -0
  152. package/cpp/ggml/src/ggml-hexagon/CMakeLists.txt +117 -0
  153. package/cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp +3232 -0
  154. package/cpp/ggml/src/ggml-hexagon/htp/CMakeLists.txt +45 -0
  155. package/cpp/ggml/src/ggml-hexagon/htp/act-ops.c +815 -0
  156. package/cpp/ggml/src/ggml-hexagon/htp/argsort-ops.c +281 -0
  157. package/cpp/ggml/src/ggml-hexagon/htp/binary-ops.c +827 -0
  158. package/cpp/ggml/src/ggml-hexagon/htp/cmake-toolchain.cmake +157 -0
  159. package/cpp/ggml/src/ggml-hexagon/htp/cpy-ops.c +251 -0
  160. package/cpp/ggml/src/ggml-hexagon/htp/flash-attn-ops.c +666 -0
  161. package/cpp/ggml/src/ggml-hexagon/htp/get-rows-ops.c +111 -0
  162. package/cpp/ggml/src/ggml-hexagon/htp/hex-dma.c +63 -0
  163. package/cpp/ggml/src/ggml-hexagon/htp/hex-dma.h +182 -0
  164. package/cpp/ggml/src/ggml-hexagon/htp/hex-dump.h +77 -0
  165. package/cpp/ggml/src/ggml-hexagon/htp/hex-fastdiv.h +37 -0
  166. package/cpp/ggml/src/ggml-hexagon/htp/hex-utils.h +51 -0
  167. package/cpp/ggml/src/ggml-hexagon/htp/htp-ctx.h +35 -0
  168. package/cpp/ggml/src/ggml-hexagon/htp/htp-msg.h +154 -0
  169. package/cpp/ggml/src/ggml-hexagon/htp/htp-ops.h +65 -0
  170. package/cpp/ggml/src/ggml-hexagon/htp/htp_iface.idl +16 -0
  171. package/cpp/ggml/src/ggml-hexagon/htp/hvx-arith.h +470 -0
  172. package/cpp/ggml/src/ggml-hexagon/htp/hvx-base.h +173 -0
  173. package/cpp/ggml/src/ggml-hexagon/htp/hvx-copy.h +245 -0
  174. package/cpp/ggml/src/ggml-hexagon/htp/hvx-div.h +116 -0
  175. package/cpp/ggml/src/ggml-hexagon/htp/hvx-dump.h +129 -0
  176. package/cpp/ggml/src/ggml-hexagon/htp/hvx-exp.h +215 -0
  177. package/cpp/ggml/src/ggml-hexagon/htp/hvx-floor.h +100 -0
  178. package/cpp/ggml/src/ggml-hexagon/htp/hvx-inverse.h +176 -0
  179. package/cpp/ggml/src/ggml-hexagon/htp/hvx-reduce.h +266 -0
  180. package/cpp/ggml/src/ggml-hexagon/htp/hvx-scale.h +133 -0
  181. package/cpp/ggml/src/ggml-hexagon/htp/hvx-sigmoid.h +141 -0
  182. package/cpp/ggml/src/ggml-hexagon/htp/hvx-sqrt.h +126 -0
  183. package/cpp/ggml/src/ggml-hexagon/htp/hvx-types.h +36 -0
  184. package/cpp/ggml/src/ggml-hexagon/htp/hvx-utils.h +18 -0
  185. package/cpp/ggml/src/ggml-hexagon/htp/main.c +1150 -0
  186. package/cpp/ggml/src/ggml-hexagon/htp/matmul-ops.c +2595 -0
  187. package/cpp/ggml/src/ggml-hexagon/htp/rope-ops.c +498 -0
  188. package/cpp/ggml/src/ggml-hexagon/htp/set-rows-ops.c +167 -0
  189. package/cpp/ggml/src/ggml-hexagon/htp/softmax-ops.c +421 -0
  190. package/cpp/ggml/src/ggml-hexagon/htp/sum-rows-ops.c +130 -0
  191. package/cpp/ggml/src/ggml-hexagon/htp/unary-ops.c +384 -0
  192. package/cpp/ggml/src/ggml-hexagon/htp/worker-pool.c +293 -0
  193. package/cpp/ggml/src/ggml-hexagon/htp/worker-pool.h +57 -0
  194. package/cpp/ggml/src/ggml-hexagon/htp-drv.cpp +418 -0
  195. package/cpp/ggml/src/ggml-hexagon/htp-drv.h +121 -0
  196. package/cpp/ggml/src/ggml-hexagon/libdl.h +79 -0
  197. package/cpp/ggml/src/ggml-hexagon/libggml-htp.inf +38 -0
  198. package/cpp/ggml/src/ggml-hexagon/op-desc.h +153 -0
  199. package/cpp/ggml/src/ggml-impl.h +724 -0
  200. package/cpp/ggml/src/ggml-metal/CMakeLists.txt +124 -0
  201. package/cpp/ggml/src/ggml-metal/ggml-metal-common.cpp +457 -0
  202. package/cpp/ggml/src/ggml-metal/ggml-metal-common.h +52 -0
  203. package/cpp/ggml/src/ggml-metal/ggml-metal-context.h +41 -0
  204. package/cpp/ggml/src/ggml-metal/ggml-metal-context.m +702 -0
  205. package/cpp/ggml/src/ggml-metal/ggml-metal-device.cpp +1890 -0
  206. package/cpp/ggml/src/ggml-metal/ggml-metal-device.h +290 -0
  207. package/cpp/ggml/src/ggml-metal/ggml-metal-device.m +1749 -0
  208. package/cpp/ggml/src/ggml-metal/ggml-metal-impl.h +1054 -0
  209. package/cpp/ggml/src/ggml-metal/ggml-metal-ops.cpp +4370 -0
  210. package/cpp/ggml/src/ggml-metal/ggml-metal-ops.h +94 -0
  211. package/cpp/ggml/src/ggml-metal/ggml-metal.cpp +937 -0
  212. package/cpp/ggml/src/ggml-metal/ggml-metal.metal +9819 -0
  213. package/cpp/ggml/src/ggml-musa/CMakeLists.txt +125 -0
  214. package/cpp/ggml/src/ggml-musa/mudnn.cu +112 -0
  215. package/cpp/ggml/src/ggml-musa/mudnn.cuh +12 -0
  216. package/cpp/ggml/src/ggml-opencl/CMakeLists.txt +150 -0
  217. package/cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +11553 -0
  218. package/cpp/ggml/src/ggml-opencl/kernels/add.cl +190 -0
  219. package/cpp/ggml/src/ggml-opencl/kernels/add_id.cl +42 -0
  220. package/cpp/ggml/src/ggml-opencl/kernels/argsort.cl +86 -0
  221. package/cpp/ggml/src/ggml-opencl/kernels/clamp.cl +20 -0
  222. package/cpp/ggml/src/ggml-opencl/kernels/concat.cl +51 -0
  223. package/cpp/ggml/src/ggml-opencl/kernels/conv2d.cl +185 -0
  224. package/cpp/ggml/src/ggml-opencl/kernels/conv2d_f16_f32.cl +176 -0
  225. package/cpp/ggml/src/ggml-opencl/kernels/cpy.cl +184 -0
  226. package/cpp/ggml/src/ggml-opencl/kernels/cvt.cl +417 -0
  227. package/cpp/ggml/src/ggml-opencl/kernels/diag_mask_inf.cl +58 -0
  228. package/cpp/ggml/src/ggml-opencl/kernels/div.cl +138 -0
  229. package/cpp/ggml/src/ggml-opencl/kernels/embed_kernel.py +26 -0
  230. package/cpp/ggml/src/ggml-opencl/kernels/expm1.cl +113 -0
  231. package/cpp/ggml/src/ggml-opencl/kernels/fill.cl +17 -0
  232. package/cpp/ggml/src/ggml-opencl/kernels/flash_attn_f16.cl +370 -0
  233. package/cpp/ggml/src/ggml-opencl/kernels/flash_attn_f32.cl +371 -0
  234. package/cpp/ggml/src/ggml-opencl/kernels/flash_attn_f32_f16.cl +373 -0
  235. package/cpp/ggml/src/ggml-opencl/kernels/gelu.cl +89 -0
  236. package/cpp/ggml/src/ggml-opencl/kernels/gemm_moe_mxfp4_f32.cl +162 -0
  237. package/cpp/ggml/src/ggml-opencl/kernels/gemv_moe_mxfp4_f32.cl +156 -0
  238. package/cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle.cl +268 -0
  239. package/cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general.cl +274 -0
  240. package/cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general_q8_0_f32.cl +195 -0
  241. package/cpp/ggml/src/ggml-opencl/kernels/get_rows.cl +187 -0
  242. package/cpp/ggml/src/ggml-opencl/kernels/glu.cl +378 -0
  243. package/cpp/ggml/src/ggml-opencl/kernels/group_norm.cl +121 -0
  244. package/cpp/ggml/src/ggml-opencl/kernels/im2col_f16.cl +57 -0
  245. package/cpp/ggml/src/ggml-opencl/kernels/im2col_f32.cl +57 -0
  246. package/cpp/ggml/src/ggml-opencl/kernels/mean.cl +140 -0
  247. package/cpp/ggml/src/ggml-opencl/kernels/mul.cl +152 -0
  248. package/cpp/ggml/src/ggml-opencl/kernels/mul_mat_Ab_Bi_8x4.cl +139 -0
  249. package/cpp/ggml/src/ggml-opencl/kernels/mul_mat_f16_f32.cl +130 -0
  250. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_kq_kqv.cl +273 -0
  251. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_l4_lm.cl +146 -0
  252. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_f32_f32_l4_lm.cl +147 -0
  253. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q4_0_f32_l4_lm.cl +163 -0
  254. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q4_1_f32_l4_lm.cl +165 -0
  255. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q6_k_f32_l4_lm.cl +158 -0
  256. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_8x4.cl +129 -0
  257. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_l4_lm.cl +154 -0
  258. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f16.cl +118 -0
  259. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32.cl +118 -0
  260. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_1row.cl +94 -0
  261. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_l4.cl +84 -0
  262. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f32_f32.cl +118 -0
  263. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32.cl +189 -0
  264. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32_flat.cl +176 -0
  265. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q4_0_f32_8x_flat.cl +283 -0
  266. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32.cl +140 -0
  267. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32_flat.cl +222 -0
  268. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32.cl +144 -0
  269. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32_flat.cl +167 -0
  270. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32.cl +192 -0
  271. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_16x_flat.cl +307 -0
  272. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_8x_flat.cl +265 -0
  273. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_8x_flat.cl +272 -0
  274. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_v.cl +254 -0
  275. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32.cl +219 -0
  276. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32_flat.cl +229 -0
  277. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_k_f32.cl +180 -0
  278. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q6_k_f32.cl +194 -0
  279. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q6_k_f32_flat.cl +194 -0
  280. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32.cl +125 -0
  281. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32_flat.cl +202 -0
  282. package/cpp/ggml/src/ggml-opencl/kernels/norm.cl +161 -0
  283. package/cpp/ggml/src/ggml-opencl/kernels/pad.cl +39 -0
  284. package/cpp/ggml/src/ggml-opencl/kernels/relu.cl +16 -0
  285. package/cpp/ggml/src/ggml-opencl/kernels/repeat.cl +38 -0
  286. package/cpp/ggml/src/ggml-opencl/kernels/rms_norm.cl +190 -0
  287. package/cpp/ggml/src/ggml-opencl/kernels/rope.cl +747 -0
  288. package/cpp/ggml/src/ggml-opencl/kernels/scale.cl +27 -0
  289. package/cpp/ggml/src/ggml-opencl/kernels/set_rows.cl +208 -0
  290. package/cpp/ggml/src/ggml-opencl/kernels/sigmoid.cl +29 -0
  291. package/cpp/ggml/src/ggml-opencl/kernels/silu.cl +30 -0
  292. package/cpp/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +108 -0
  293. package/cpp/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +108 -0
  294. package/cpp/ggml/src/ggml-opencl/kernels/softmax_f16.cl +107 -0
  295. package/cpp/ggml/src/ggml-opencl/kernels/softmax_f32.cl +107 -0
  296. package/cpp/ggml/src/ggml-opencl/kernels/softplus.cl +116 -0
  297. package/cpp/ggml/src/ggml-opencl/kernels/solve_tri.cl +51 -0
  298. package/cpp/ggml/src/ggml-opencl/kernels/sqr.cl +53 -0
  299. package/cpp/ggml/src/ggml-opencl/kernels/sqrt.cl +53 -0
  300. package/cpp/ggml/src/ggml-opencl/kernels/ssm_conv.cl +77 -0
  301. package/cpp/ggml/src/ggml-opencl/kernels/sub.cl +138 -0
  302. package/cpp/ggml/src/ggml-opencl/kernels/sum_rows.cl +140 -0
  303. package/cpp/ggml/src/ggml-opencl/kernels/tanh.cl +109 -0
  304. package/cpp/ggml/src/ggml-opencl/kernels/transpose.cl +117 -0
  305. package/cpp/ggml/src/ggml-opencl/kernels/tri.cl +32 -0
  306. package/cpp/ggml/src/ggml-opencl/kernels/tsembd.cl +48 -0
  307. package/cpp/ggml/src/ggml-opencl/kernels/upscale.cl +120 -0
  308. package/cpp/ggml/src/ggml-opt.cpp +1093 -0
  309. package/cpp/ggml/src/ggml-quants.c +5325 -0
  310. package/cpp/ggml/src/ggml-quants.h +106 -0
  311. package/cpp/ggml/src/ggml-rpc/CMakeLists.txt +9 -0
  312. package/cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +2118 -0
  313. package/cpp/ggml/src/ggml-threading.cpp +12 -0
  314. package/cpp/ggml/src/ggml-threading.h +14 -0
  315. package/cpp/ggml/src/ggml-virtgpu/CMakeLists.txt +70 -0
  316. package/cpp/ggml/src/ggml-virtgpu/apir_cs_ggml-rpc-front.cpp +87 -0
  317. package/cpp/ggml/src/ggml-virtgpu/backend/CMakeLists.txt +21 -0
  318. package/cpp/ggml/src/ggml-virtgpu/backend/apir_cs_ggml-rpc-back.cpp +115 -0
  319. package/cpp/ggml/src/ggml-virtgpu/backend/backend-convert.h +13 -0
  320. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-backend.cpp +102 -0
  321. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer-type.cpp +105 -0
  322. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer.cpp +179 -0
  323. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-device.cpp +148 -0
  324. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched.cpp +51 -0
  325. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched.gen.h +73 -0
  326. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched.h +27 -0
  327. package/cpp/ggml/src/ggml-virtgpu/backend/backend-virgl-apir.h +32 -0
  328. package/cpp/ggml/src/ggml-virtgpu/backend/backend.cpp +144 -0
  329. package/cpp/ggml/src/ggml-virtgpu/backend/shared/api_remoting.h +95 -0
  330. package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_backend.gen.h +94 -0
  331. package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_backend.h +50 -0
  332. package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_cs.h +378 -0
  333. package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h +232 -0
  334. package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_cs_rpc.h +58 -0
  335. package/cpp/ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp +81 -0
  336. package/cpp/ggml/src/ggml-virtgpu/ggml-backend-buffer.cpp +119 -0
  337. package/cpp/ggml/src/ggml-virtgpu/ggml-backend-device.cpp +158 -0
  338. package/cpp/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp +213 -0
  339. package/cpp/ggml/src/ggml-virtgpu/ggml-backend.cpp +69 -0
  340. package/cpp/ggml/src/ggml-virtgpu/ggml-remoting.h +71 -0
  341. package/cpp/ggml/src/ggml-virtgpu/ggmlremoting_functions.yaml +166 -0
  342. package/cpp/ggml/src/ggml-virtgpu/include/apir_hw.h +9 -0
  343. package/cpp/ggml/src/ggml-virtgpu/regenerate_remoting.py +333 -0
  344. package/cpp/ggml/src/ggml-virtgpu/virtgpu-apir.h +15 -0
  345. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-backend.cpp +58 -0
  346. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp +110 -0
  347. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-buffer.cpp +173 -0
  348. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-device.cpp +192 -0
  349. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-impl.h +36 -0
  350. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward.gen.h +53 -0
  351. package/cpp/ggml/src/ggml-virtgpu/virtgpu-shm.cpp +98 -0
  352. package/cpp/ggml/src/ggml-virtgpu/virtgpu-shm.h +23 -0
  353. package/cpp/ggml/src/ggml-virtgpu/virtgpu-utils.cpp +179 -0
  354. package/cpp/ggml/src/ggml-virtgpu/virtgpu-utils.h +86 -0
  355. package/cpp/ggml/src/ggml-virtgpu/virtgpu.cpp +544 -0
  356. package/cpp/ggml/src/ggml-virtgpu/virtgpu.h +117 -0
  357. package/cpp/ggml/src/ggml-webgpu/CMakeLists.txt +80 -0
  358. package/cpp/ggml/src/ggml-webgpu/ggml-webgpu-shader-lib.hpp +1231 -0
  359. package/cpp/ggml/src/ggml-webgpu/ggml-webgpu.cpp +3150 -0
  360. package/cpp/ggml/src/ggml-webgpu/pre_wgsl.hpp +778 -0
  361. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/argmax.wgsl +72 -0
  362. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/argsort.wgsl +106 -0
  363. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/argsort_merge.wgsl +134 -0
  364. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/binary.wgsl +107 -0
  365. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/common_decls.tmpl +923 -0
  366. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/cpy.tmpl.wgsl +107 -0
  367. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/cumsum.wgsl +66 -0
  368. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +182 -0
  369. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn.wgsl +636 -0
  370. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/get_rows.wgsl +668 -0
  371. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/glu.tmpl.wgsl +323 -0
  372. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/memset.wgsl +40 -0
  373. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.wgsl +713 -0
  374. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_decls.tmpl +103 -0
  375. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_reg_tile.wgsl +138 -0
  376. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_subgroup_matrix.wgsl +188 -0
  377. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec.wgsl +194 -0
  378. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/pad.wgsl +86 -0
  379. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm.wgsl +123 -0
  380. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/rope.tmpl.wgsl +295 -0
  381. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/scale.wgsl +63 -0
  382. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.wgsl +109 -0
  383. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/soft_max.tmpl.wgsl +345 -0
  384. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/sum_rows.wgsl +55 -0
  385. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/unary.wgsl +193 -0
  386. package/cpp/ggml/src/ggml-zdnn/CMakeLists.txt +36 -0
  387. package/cpp/ggml/src/ggml-zdnn/common.hpp +59 -0
  388. package/cpp/ggml/src/ggml-zdnn/ggml-zdnn.cpp +633 -0
  389. package/cpp/ggml/src/ggml-zdnn/mmf.cpp +80 -0
  390. package/cpp/ggml/src/ggml-zdnn/mmf.hpp +12 -0
  391. package/cpp/ggml/src/ggml-zdnn/utils.cpp +79 -0
  392. package/cpp/ggml/src/ggml-zdnn/utils.hpp +19 -0
  393. package/cpp/ggml/src/ggml-zendnn/CMakeLists.txt +92 -0
  394. package/cpp/ggml/src/ggml-zendnn/ggml-zendnn.cpp +469 -0
  395. package/cpp/ggml/src/ggml.c +7669 -0
  396. package/cpp/ggml/src/ggml.cpp +26 -0
  397. package/cpp/ggml/src/gguf.cpp +1699 -0
  398. package/cpp/include/llama-cpp.h +32 -0
  399. package/cpp/include/llama.h +1568 -0
  400. package/cpp/mtmd/CMakeLists.txt +98 -0
  401. package/cpp/mtmd/README.md +63 -0
  402. package/cpp/mtmd/clip-graph.h +117 -0
  403. package/cpp/mtmd/clip-impl.h +586 -0
  404. package/cpp/mtmd/clip-model.h +390 -0
  405. package/cpp/mtmd/clip.cpp +4154 -0
  406. package/cpp/mtmd/clip.h +121 -0
  407. package/cpp/mtmd/deprecation-warning.cpp +22 -0
  408. package/cpp/mtmd/legacy-models/convert_image_encoder_to_gguf.py +412 -0
  409. package/cpp/mtmd/legacy-models/glmedge-convert-image-encoder-to-gguf.py +280 -0
  410. package/cpp/mtmd/legacy-models/glmedge-surgery.py +33 -0
  411. package/cpp/mtmd/legacy-models/llava_surgery.py +38 -0
  412. package/cpp/mtmd/legacy-models/llava_surgery_v2.py +180 -0
  413. package/cpp/mtmd/legacy-models/minicpmv-convert-image-encoder-to-gguf.py +892 -0
  414. package/cpp/mtmd/legacy-models/minicpmv-surgery.py +47 -0
  415. package/cpp/mtmd/models/cogvlm.cpp +98 -0
  416. package/cpp/mtmd/models/conformer.cpp +216 -0
  417. package/cpp/mtmd/models/glm4v.cpp +122 -0
  418. package/cpp/mtmd/models/internvl.cpp +69 -0
  419. package/cpp/mtmd/models/kimik25.cpp +101 -0
  420. package/cpp/mtmd/models/kimivl.cpp +63 -0
  421. package/cpp/mtmd/models/llama4.cpp +96 -0
  422. package/cpp/mtmd/models/llava.cpp +374 -0
  423. package/cpp/mtmd/models/minicpmv.cpp +114 -0
  424. package/cpp/mtmd/models/mobilenetv5.cpp +451 -0
  425. package/cpp/mtmd/models/models.h +128 -0
  426. package/cpp/mtmd/models/nemotron-v2-vl.cpp +35 -0
  427. package/cpp/mtmd/models/paddleocr.cpp +52 -0
  428. package/cpp/mtmd/models/pixtral.cpp +86 -0
  429. package/cpp/mtmd/models/qwen2vl.cpp +183 -0
  430. package/cpp/mtmd/models/qwen3vl.cpp +193 -0
  431. package/cpp/mtmd/models/siglip.cpp +86 -0
  432. package/cpp/mtmd/models/whisper-enc.cpp +115 -0
  433. package/cpp/mtmd/models/youtuvl.cpp +179 -0
  434. package/cpp/mtmd/mtmd-audio.cpp +730 -0
  435. package/cpp/mtmd/mtmd-audio.h +113 -0
  436. package/cpp/mtmd/mtmd-cli.cpp +437 -0
  437. package/cpp/mtmd/mtmd-helper.cpp +521 -0
  438. package/cpp/mtmd/mtmd-helper.h +96 -0
  439. package/cpp/mtmd/mtmd.cpp +1156 -0
  440. package/cpp/mtmd/mtmd.h +319 -0
  441. package/cpp/mtmd/requirements.txt +5 -0
  442. package/cpp/mtmd/test-1.jpeg +0 -0
  443. package/cpp/mtmd/test-2.mp3 +0 -0
  444. package/cpp/mtmd/tests.sh +192 -0
  445. package/cpp/src/CMakeLists.txt +169 -0
  446. package/cpp/src/llama-adapter.cpp +488 -0
  447. package/cpp/src/llama-adapter.h +89 -0
  448. package/cpp/src/llama-arch.cpp +2855 -0
  449. package/cpp/src/llama-arch.h +619 -0
  450. package/cpp/src/llama-batch.cpp +917 -0
  451. package/cpp/src/llama-batch.h +173 -0
  452. package/cpp/src/llama-chat.cpp +896 -0
  453. package/cpp/src/llama-chat.h +71 -0
  454. package/cpp/src/llama-context.cpp +3512 -0
  455. package/cpp/src/llama-context.h +359 -0
  456. package/cpp/src/llama-cparams.cpp +5 -0
  457. package/cpp/src/llama-cparams.h +44 -0
  458. package/cpp/src/llama-grammar.cpp +1464 -0
  459. package/cpp/src/llama-grammar.h +194 -0
  460. package/cpp/src/llama-graph.cpp +2685 -0
  461. package/cpp/src/llama-graph.h +1026 -0
  462. package/cpp/src/llama-hparams.cpp +234 -0
  463. package/cpp/src/llama-hparams.h +339 -0
  464. package/cpp/src/llama-impl.cpp +171 -0
  465. package/cpp/src/llama-impl.h +73 -0
  466. package/cpp/src/llama-io.cpp +15 -0
  467. package/cpp/src/llama-io.h +35 -0
  468. package/cpp/src/llama-kv-cache-iswa.cpp +330 -0
  469. package/cpp/src/llama-kv-cache-iswa.h +137 -0
  470. package/cpp/src/llama-kv-cache.cpp +2271 -0
  471. package/cpp/src/llama-kv-cache.h +388 -0
  472. package/cpp/src/llama-kv-cells.h +533 -0
  473. package/cpp/src/llama-memory-hybrid-iswa.cpp +275 -0
  474. package/cpp/src/llama-memory-hybrid-iswa.h +140 -0
  475. package/cpp/src/llama-memory-hybrid.cpp +268 -0
  476. package/cpp/src/llama-memory-hybrid.h +139 -0
  477. package/cpp/src/llama-memory-recurrent.cpp +1165 -0
  478. package/cpp/src/llama-memory-recurrent.h +182 -0
  479. package/cpp/src/llama-memory.cpp +59 -0
  480. package/cpp/src/llama-memory.h +122 -0
  481. package/cpp/src/llama-mmap.cpp +785 -0
  482. package/cpp/src/llama-mmap.h +92 -0
  483. package/cpp/src/llama-model-loader.cpp +1414 -0
  484. package/cpp/src/llama-model-loader.h +203 -0
  485. package/cpp/src/llama-model-saver.cpp +286 -0
  486. package/cpp/src/llama-model-saver.h +37 -0
  487. package/cpp/src/llama-model.cpp +9253 -0
  488. package/cpp/src/llama-model.h +576 -0
  489. package/cpp/src/llama-quant.cpp +1119 -0
  490. package/cpp/src/llama-quant.h +1 -0
  491. package/cpp/src/llama-sampler.cpp +3885 -0
  492. package/cpp/src/llama-sampler.h +42 -0
  493. package/cpp/src/llama-vocab.cpp +3970 -0
  494. package/cpp/src/llama-vocab.h +187 -0
  495. package/cpp/src/llama.cpp +1313 -0
  496. package/cpp/src/models/afmoe.cpp +191 -0
  497. package/cpp/src/models/apertus.cpp +125 -0
  498. package/cpp/src/models/arcee.cpp +135 -0
  499. package/cpp/src/models/arctic.cpp +138 -0
  500. package/cpp/src/models/arwkv7.cpp +86 -0
  501. package/cpp/src/models/baichuan.cpp +122 -0
  502. package/cpp/src/models/bailingmoe.cpp +144 -0
  503. package/cpp/src/models/bailingmoe2.cpp +135 -0
  504. package/cpp/src/models/bert.cpp +178 -0
  505. package/cpp/src/models/bitnet.cpp +160 -0
  506. package/cpp/src/models/bloom.cpp +101 -0
  507. package/cpp/src/models/chameleon.cpp +178 -0
  508. package/cpp/src/models/chatglm.cpp +132 -0
  509. package/cpp/src/models/codeshell.cpp +111 -0
  510. package/cpp/src/models/cogvlm.cpp +102 -0
  511. package/cpp/src/models/cohere2-iswa.cpp +134 -0
  512. package/cpp/src/models/command-r.cpp +122 -0
  513. package/cpp/src/models/dbrx.cpp +123 -0
  514. package/cpp/src/models/deci.cpp +135 -0
  515. package/cpp/src/models/deepseek.cpp +144 -0
  516. package/cpp/src/models/deepseek2.cpp +262 -0
  517. package/cpp/src/models/delta-net-base.cpp +376 -0
  518. package/cpp/src/models/dots1.cpp +134 -0
  519. package/cpp/src/models/dream.cpp +105 -0
  520. package/cpp/src/models/ernie4-5-moe.cpp +150 -0
  521. package/cpp/src/models/ernie4-5.cpp +110 -0
  522. package/cpp/src/models/eurobert.cpp +97 -0
  523. package/cpp/src/models/exaone-moe.cpp +146 -0
  524. package/cpp/src/models/exaone.cpp +114 -0
  525. package/cpp/src/models/exaone4.cpp +123 -0
  526. package/cpp/src/models/falcon-h1.cpp +111 -0
  527. package/cpp/src/models/falcon.cpp +120 -0
  528. package/cpp/src/models/gemma-embedding.cpp +116 -0
  529. package/cpp/src/models/gemma.cpp +112 -0
  530. package/cpp/src/models/gemma2-iswa.cpp +128 -0
  531. package/cpp/src/models/gemma3.cpp +155 -0
  532. package/cpp/src/models/gemma3n-iswa.cpp +384 -0
  533. package/cpp/src/models/glm4-moe.cpp +170 -0
  534. package/cpp/src/models/glm4.cpp +157 -0
  535. package/cpp/src/models/gpt2.cpp +105 -0
  536. package/cpp/src/models/gptneox.cpp +144 -0
  537. package/cpp/src/models/granite-hybrid.cpp +196 -0
  538. package/cpp/src/models/granite.cpp +211 -0
  539. package/cpp/src/models/grok.cpp +159 -0
  540. package/cpp/src/models/grovemoe.cpp +141 -0
  541. package/cpp/src/models/hunyuan-dense.cpp +132 -0
  542. package/cpp/src/models/hunyuan-moe.cpp +154 -0
  543. package/cpp/src/models/internlm2.cpp +120 -0
  544. package/cpp/src/models/jais.cpp +86 -0
  545. package/cpp/src/models/jais2.cpp +123 -0
  546. package/cpp/src/models/jamba.cpp +106 -0
  547. package/cpp/src/models/kimi-linear.cpp +392 -0
  548. package/cpp/src/models/lfm2.cpp +190 -0
  549. package/cpp/src/models/llada-moe.cpp +122 -0
  550. package/cpp/src/models/llada.cpp +99 -0
  551. package/cpp/src/models/llama-iswa.cpp +178 -0
  552. package/cpp/src/models/llama.cpp +168 -0
  553. package/cpp/src/models/maincoder.cpp +117 -0
  554. package/cpp/src/models/mamba-base.cpp +285 -0
  555. package/cpp/src/models/mamba.cpp +54 -0
  556. package/cpp/src/models/mimo2-iswa.cpp +123 -0
  557. package/cpp/src/models/minicpm3.cpp +200 -0
  558. package/cpp/src/models/minimax-m2.cpp +124 -0
  559. package/cpp/src/models/mistral3.cpp +160 -0
  560. package/cpp/src/models/models.h +684 -0
  561. package/cpp/src/models/modern-bert.cpp +109 -0
  562. package/cpp/src/models/mpt.cpp +126 -0
  563. package/cpp/src/models/nemotron-h.cpp +148 -0
  564. package/cpp/src/models/nemotron.cpp +122 -0
  565. package/cpp/src/models/neo-bert.cpp +104 -0
  566. package/cpp/src/models/olmo.cpp +121 -0
  567. package/cpp/src/models/olmo2.cpp +150 -0
  568. package/cpp/src/models/olmoe.cpp +124 -0
  569. package/cpp/src/models/openai-moe-iswa.cpp +127 -0
  570. package/cpp/src/models/openelm.cpp +124 -0
  571. package/cpp/src/models/orion.cpp +123 -0
  572. package/cpp/src/models/paddleocr.cpp +122 -0
  573. package/cpp/src/models/pangu-embedded.cpp +121 -0
  574. package/cpp/src/models/phi2.cpp +121 -0
  575. package/cpp/src/models/phi3.cpp +152 -0
  576. package/cpp/src/models/plamo.cpp +110 -0
  577. package/cpp/src/models/plamo2.cpp +318 -0
  578. package/cpp/src/models/plamo3.cpp +128 -0
  579. package/cpp/src/models/plm.cpp +169 -0
  580. package/cpp/src/models/qwen.cpp +108 -0
  581. package/cpp/src/models/qwen2.cpp +126 -0
  582. package/cpp/src/models/qwen2moe.cpp +151 -0
  583. package/cpp/src/models/qwen2vl.cpp +117 -0
  584. package/cpp/src/models/qwen3.cpp +117 -0
  585. package/cpp/src/models/qwen35.cpp +386 -0
  586. package/cpp/src/models/qwen35moe.cpp +420 -0
  587. package/cpp/src/models/qwen3moe.cpp +124 -0
  588. package/cpp/src/models/qwen3next.cpp +525 -0
  589. package/cpp/src/models/qwen3vl-moe.cpp +140 -0
  590. package/cpp/src/models/qwen3vl.cpp +132 -0
  591. package/cpp/src/models/refact.cpp +94 -0
  592. package/cpp/src/models/rnd1.cpp +126 -0
  593. package/cpp/src/models/rwkv6-base.cpp +164 -0
  594. package/cpp/src/models/rwkv6.cpp +94 -0
  595. package/cpp/src/models/rwkv6qwen2.cpp +86 -0
  596. package/cpp/src/models/rwkv7-base.cpp +137 -0
  597. package/cpp/src/models/rwkv7.cpp +90 -0
  598. package/cpp/src/models/seed-oss.cpp +124 -0
  599. package/cpp/src/models/smallthinker.cpp +126 -0
  600. package/cpp/src/models/smollm3.cpp +128 -0
  601. package/cpp/src/models/stablelm.cpp +146 -0
  602. package/cpp/src/models/starcoder.cpp +100 -0
  603. package/cpp/src/models/starcoder2.cpp +121 -0
  604. package/cpp/src/models/step35-iswa.cpp +168 -0
  605. package/cpp/src/models/t5-dec.cpp +166 -0
  606. package/cpp/src/models/t5-enc.cpp +96 -0
  607. package/cpp/src/models/wavtokenizer-dec.cpp +149 -0
  608. package/cpp/src/models/xverse.cpp +108 -0
  609. package/cpp/src/unicode-data.cpp +7034 -0
  610. package/cpp/src/unicode-data.h +20 -0
  611. package/cpp/src/unicode.cpp +1103 -0
  612. package/cpp/src/unicode.h +111 -0
  613. package/cpp/vendor/nlohmann/json.hpp +25526 -0
  614. package/cpp/vendor/nlohmann/json_fwd.hpp +187 -0
  615. package/cpp/vendor/stb/stb_image.h +7988 -0
  616. package/ios/LocalLLM-Bridging-Header.h +2 -0
  617. package/ios/LocalLLM.h +5 -0
  618. package/ios/LocalLLM.mm +1267 -0
  619. package/local-llm-rn.podspec +60 -0
  620. package/package.json +35 -0
  621. package/src/NativeLocalLLM.ts +73 -0
  622. package/src/device.ts +50 -0
  623. package/src/download-adapter.ts +17 -0
  624. package/src/index.ts +21 -0
  625. package/src/native-bridge.ts +142 -0
  626. package/src/rn-downloader.ts +37 -0
@@ -0,0 +1,384 @@
1
+ #pragma clang diagnostic ignored "-Wunused-variable"
2
+ #pragma clang diagnostic ignored "-Wunused-function"
3
+ #pragma clang diagnostic ignored "-Wunused-but-set-variable"
4
+
5
+ #include <HAP_farf.h>
6
+ #include <HAP_perf.h>
7
+
8
+ #include <math.h>
9
+ #include <string.h>
10
+
11
+ #include "hex-dma.h"
12
+ #include "hvx-utils.h"
13
+
14
+ #define GGML_COMMON_DECL_C
15
+ #include "ggml-common.h"
16
+ #include "htp-ctx.h"
17
+ #include "htp-msg.h"
18
+ #include "htp-ops.h"
19
+
20
+ struct htp_unary_context {
21
+ struct htp_ops_context * octx;
22
+
23
+ // Precomputed values
24
+ const uint8_t * data_src0;
25
+ uint8_t * data_dst;
26
+
27
+ size_t src0_row_size;
28
+ size_t dst_row_size;
29
+
30
+ size_t src0_row_size_aligned;
31
+ size_t dst_row_size_aligned;
32
+
33
+ size_t src0_spad_half_size;
34
+ size_t dst_spad_half_size;
35
+
36
+ uint32_t block;
37
+ uint32_t src0_nrows;
38
+ uint32_t src0_nrows_per_thread;
39
+ uint32_t nc;
40
+ };
41
+
42
+ #define htp_unary_preamble \
43
+ const uint32_t ne00 = src->ne[0]; \
44
+ const uint32_t ne01 = src->ne[1]; \
45
+ const uint32_t ne02 = src->ne[2]; \
46
+ const uint32_t ne03 = src->ne[3]; \
47
+ \
48
+ const uint32_t ne0 = dst->ne[0]; \
49
+ const uint32_t ne1 = dst->ne[1]; \
50
+ const uint32_t ne2 = dst->ne[2]; \
51
+ const uint32_t ne3 = dst->ne[3]; \
52
+ \
53
+ const uint32_t nb00 = src->nb[0]; \
54
+ const uint32_t nb01 = src->nb[1]; \
55
+ const uint32_t nb02 = src->nb[2]; \
56
+ const uint32_t nb03 = src->nb[3]; \
57
+ \
58
+ const uint32_t nb0 = dst->nb[0]; \
59
+ const uint32_t nb1 = dst->nb[1]; \
60
+ const uint32_t nb2 = dst->nb[2]; \
61
+ const uint32_t nb3 = dst->nb[3];
62
+
63
+ static void hvx_fast_rms_norm_f32(const uint8_t * restrict src,
64
+ uint8_t * restrict dst,
65
+ uint8_t * restrict pad,
66
+ const int num_elems,
67
+ float epsilon) {
68
+ const HVX_Vector * restrict v_src = (HVX_Vector *) src;
69
+ HVX_Vector * restrict v_dst = (HVX_Vector *) dst;
70
+
71
+ HVX_Vector sum_v = Q6_V_vsplat_R(0x00000000);
72
+ HVX_Vector epsilon_v = hvx_vec_splat_f32(epsilon);
73
+
74
+ int step_of_1 = num_elems >> 5;
75
+ #pragma unroll(4)
76
+ for (int i = 0; i < step_of_1; i++) {
77
+ HVX_Vector v1 = v_src[i];
78
+ HVX_Vector v2 = Q6_Vqf32_vmpy_VsfVsf(v1, v1);
79
+ sum_v = Q6_Vqf32_vadd_Vqf32Vqf32(sum_v, v2);
80
+ }
81
+
82
+ sum_v = hvx_vec_reduce_sum_f32(Q6_Vsf_equals_Vqf32(sum_v)); // replicated over all lanes
83
+
84
+ HVX_Vector t_v = hvx_vec_splat_f32((float) num_elems);
85
+ HVX_Vector denom_v = hvx_vec_inverse_f32(t_v);
86
+ HVX_Vector mean_v = Q6_Vqf32_vmpy_VsfVsf(sum_v, denom_v);
87
+ HVX_Vector mean_epsilon_v = Q6_Vqf32_vadd_Vqf32Vsf(mean_v, epsilon_v);
88
+
89
+ HVX_Vector scale_v = hvx_vec_rsqrt_f32(Q6_Vsf_equals_Vqf32(mean_epsilon_v));
90
+
91
+ #pragma unroll(4)
92
+ for (int i = 0; i < step_of_1; i++) {
93
+ HVX_Vector v1 = v_src[i];
94
+ HVX_Vector v2 = Q6_Vqf32_vmpy_VsfVsf(v1, scale_v);
95
+ v_dst[i] = Q6_Vsf_equals_Vqf32(v2);
96
+ }
97
+ }
98
+
99
+ static void scale_f32(const float * restrict src,
100
+ float * restrict dst,
101
+ uint8_t * restrict spad,
102
+ const uint32_t num_rows,
103
+ const uint32_t row_elems,
104
+ const size_t row_size,
105
+ int32_t * op_params) {
106
+ float scale = 0.f;
107
+ float bias = 0.f;
108
+ memcpy(&scale, &op_params[0], sizeof(float));
109
+ memcpy(&bias, &op_params[1], sizeof(float));
110
+
111
+ for (uint32_t ir = 0; ir < num_rows; ir++) {
112
+ const uint8_t * restrict src_local = (const uint8_t *)src + (ir * row_size);
113
+ uint8_t * restrict dst_local = (uint8_t *)dst + (ir * row_size);
114
+
115
+ hvx_scale_offset_f32_aa((uint8_t *) dst_local, (const uint8_t *) src_local, row_elems, scale, bias);
116
+ }
117
+ }
118
+
119
+ static void rms_norm_f32(const float * restrict src,
120
+ float * restrict dst,
121
+ uint8_t * restrict spad,
122
+ const uint32_t num_rows,
123
+ const uint32_t row_elems,
124
+ const size_t row_size,
125
+ int32_t * op_params) {
126
+ float epsilon = 0.f;
127
+ memcpy(&epsilon, op_params, sizeof(float));
128
+
129
+ for (uint32_t ir = 0; ir < num_rows; ir++) {
130
+ const uint8_t * restrict src_local = (const uint8_t *)src + (ir * row_size);
131
+ uint8_t * restrict dst_local = (uint8_t *)dst + (ir * row_size);
132
+
133
+ hvx_fast_rms_norm_f32((const uint8_t *) src_local, (uint8_t *) dst_local, spad, row_elems, epsilon);
134
+ }
135
+ }
136
+
137
+ static void sqr_f32(const float * restrict src,
138
+ float * restrict dst,
139
+ uint8_t * restrict spad,
140
+ const uint32_t num_rows,
141
+ const uint32_t row_elems,
142
+ const size_t row_size,
143
+ int32_t * op_params) {
144
+
145
+ for (uint32_t ir = 0; ir < num_rows; ir++) {
146
+ const uint8_t * restrict src_local = (const uint8_t *)src + (ir * row_size);
147
+ uint8_t * restrict dst_local = (uint8_t *)dst + (ir * row_size);
148
+
149
+ hvx_sqr_f32_aa((uint8_t *) dst_local, (const uint8_t *) src_local, row_elems);
150
+ }
151
+ }
152
+
153
+ static void sqrt_f32(const float * restrict src,
154
+ float * restrict dst,
155
+ uint8_t * restrict spad,
156
+ const uint32_t num_rows,
157
+ const uint32_t row_elems,
158
+ const size_t row_size,
159
+ int32_t * op_params) {
160
+
161
+ for (uint32_t ir = 0; ir < num_rows; ir++) {
162
+ const uint8_t * restrict src_local = (const uint8_t *)src + (ir * row_size);
163
+ uint8_t * restrict dst_local = (uint8_t *)dst + (ir * row_size);
164
+
165
+ hvx_sqrt_f32_aa((uint8_t *) dst_local, (const uint8_t *) src_local, row_elems);
166
+ }
167
+ }
168
+
169
+ static void unary_job_f32_per_thread(unsigned int nth, unsigned int ith, void * data) {
170
+ const struct htp_unary_context * uctx = (const struct htp_unary_context *) data;
171
+ struct htp_ops_context * octx = uctx->octx;
172
+ const struct htp_tensor * src = &octx->src0;
173
+ const struct htp_tensor * dst = &octx->dst;
174
+
175
+ htp_unary_preamble;
176
+
177
+ int htp_op = octx->op;
178
+ int32_t * op_params = octx->op_params;
179
+ uint32_t src0_nrows_per_thread = uctx->src0_nrows_per_thread;
180
+
181
+ const size_t src0_row_size = uctx->src0_row_size;
182
+ const size_t dst_row_size = uctx->dst_row_size;
183
+
184
+ const size_t src0_row_size_aligned = uctx->src0_row_size_aligned;
185
+ const size_t dst_row_size_aligned = uctx->dst_row_size_aligned;
186
+
187
+ const uint32_t src0_nrows = uctx->src0_nrows;
188
+ const uint32_t src0_start_row = src0_nrows_per_thread * ith;
189
+ const uint32_t src0_end_row = MIN(src0_start_row + src0_nrows_per_thread, src0_nrows);
190
+
191
+ // no work for this thread
192
+ if (src0_start_row >= src0_end_row) {
193
+ return;
194
+ }
195
+
196
+ uint64_t t1, t2;
197
+ t1 = HAP_perf_get_qtimer_count();
198
+
199
+ const uint8_t * restrict data_src = uctx->data_src0;
200
+ uint8_t * restrict data_dst = uctx->data_dst;
201
+
202
+ uint8_t * src0_spad_data = octx->src0_spad.data + (ith * octx->src0_spad.size_per_thread);
203
+ uint8_t * dst_spad_data = octx->dst_spad.data + (ith * octx->dst_spad.size_per_thread);
204
+
205
+ size_t src0_spad_half_size = uctx->src0_spad_half_size;
206
+ size_t dst_spad_half_size = uctx->dst_spad_half_size;
207
+
208
+ const int BLOCK = uctx->block;
209
+ if (BLOCK == 0) {
210
+ FARF(ERROR, "unary-f32 : current VTCM reservation %zu is too small for even 1 row per thread, needed at least %zu\n",
211
+ octx->src0_spad.size_per_thread, src0_row_size_aligned);
212
+ return;
213
+ }
214
+
215
+ dma_queue * dma_queue = octx->ctx->dma[ith];
216
+
217
+ for (uint32_t ir = src0_start_row, spad_idx = 0; ir < src0_end_row && spad_idx < 2; ir += BLOCK, spad_idx++) {
218
+ const uint32_t block_size = MIN(BLOCK, src0_end_row - ir);
219
+
220
+ // Dummy DMA transation for sequencing (interleaving dst,src,dst,...)
221
+ dma_queue_push_vtcm_to_ddr(dma_queue,
222
+ dma_make_ptr(data_dst, dst_spad_data + (spad_idx * dst_spad_half_size)),
223
+ dst_row_size, dst_row_size_aligned, 0);
224
+
225
+ dma_queue_push_ddr_to_vtcm(dma_queue,
226
+ dma_make_ptr(src0_spad_data + (spad_idx * src0_spad_half_size), data_src + (ir * src0_row_size)),
227
+ src0_row_size_aligned, src0_row_size, block_size);
228
+ }
229
+
230
+ for (uint32_t ir = src0_start_row; ir < src0_end_row; ir += BLOCK) {
231
+ const uint32_t block_size = MIN(BLOCK, src0_end_row - ir);
232
+
233
+ float * dst_spad = (float *) dma_queue_pop(dma_queue).src;
234
+ float * src0_spad = (float *) dma_queue_pop(dma_queue).dst;
235
+
236
+ // Process block in VTCM
237
+ switch (htp_op) {
238
+ case HTP_OP_RMS_NORM:
239
+ rms_norm_f32(src0_spad, dst_spad, NULL, block_size, ne0, src0_row_size_aligned, op_params);
240
+ break;
241
+ case HTP_OP_SCALE:
242
+ scale_f32(src0_spad, dst_spad, NULL, block_size, ne0, src0_row_size_aligned, op_params);
243
+ break;
244
+ case HTP_OP_SQR:
245
+ sqr_f32(src0_spad, dst_spad, NULL, block_size, ne0, src0_row_size_aligned, op_params);
246
+ break;
247
+ case HTP_OP_SQRT:
248
+ sqrt_f32(src0_spad, dst_spad, NULL, block_size, ne0, src0_row_size_aligned, op_params);
249
+ break;
250
+ default:
251
+ break;
252
+ }
253
+
254
+ dma_queue_push_vtcm_to_ddr(dma_queue,
255
+ dma_make_ptr(data_dst + (ir * dst_row_size), dst_spad),
256
+ dst_row_size, dst_row_size_aligned, block_size);
257
+
258
+ // prefetch N+2 loop iteration if any
259
+ const uint32_t pref_block = (ir + BLOCK * 2);
260
+ if (pref_block < src0_end_row) {
261
+ const uint32_t pref_block_size = MIN(BLOCK, src0_end_row - pref_block);
262
+ dma_queue_push_ddr_to_vtcm(dma_queue,
263
+ dma_make_ptr(src0_spad, data_src + (pref_block * src0_row_size)),
264
+ src0_row_size_aligned, src0_row_size, pref_block_size);
265
+ }
266
+ }
267
+
268
+ dma_queue_flush(dma_queue);
269
+
270
+ t2 = HAP_perf_get_qtimer_count();
271
+
272
+ FARF(HIGH, "unary-f32 %d/%d: %ux%ux%ux%u (%u:%u) -> %ux%ux%ux%u usec %u\n", ith, nth, src->ne[0],
273
+ src->ne[1], src->ne[2], src->ne[3], src0_start_row, src0_end_row, dst->ne[0], dst->ne[1], dst->ne[2],
274
+ dst->ne[3], (unsigned) HAP_perf_qtimer_count_to_us(t2 - t1));
275
+ }
276
+
277
+ static int execute_op_unary_f32(struct htp_ops_context * octx) {
278
+ int err = HTP_STATUS_OK;
279
+
280
+ const struct htp_tensor * src0 = &octx->src0;
281
+ struct htp_tensor * dst = &octx->dst;
282
+
283
+ const char * op_type = NULL;
284
+
285
+ switch (octx->op) {
286
+ case HTP_OP_RMS_NORM:
287
+ op_type = "rmsnorm-f32";
288
+ break;
289
+ case HTP_OP_SCALE:
290
+ op_type = "scale-f32";
291
+ break;
292
+ case HTP_OP_SQR:
293
+ op_type = "sqr-f32";
294
+ break;
295
+ case HTP_OP_SQRT:
296
+ op_type = "sqrt-f32";
297
+ break;
298
+
299
+ default:
300
+ FARF(ERROR, "Unsupported unary Op %u\n", octx->op);
301
+ return HTP_STATUS_NO_SUPPORT;
302
+ }
303
+
304
+ const int n_threads = octx->n_threads;
305
+ const uint32_t src0_nrows = src0->ne[1] * src0->ne[2] * src0->ne[3];
306
+
307
+ const size_t src0_row_size = src0->nb[1];
308
+ const size_t dst_row_size = dst->nb[1];
309
+
310
+ const size_t src0_row_size_aligned = hex_round_up(src0_row_size, VLEN);
311
+ const size_t dst_row_size_aligned = hex_round_up(dst_row_size, VLEN);
312
+
313
+ // VTCM scratchpads for all tensors
314
+ // N rows per thread, padded to HVX vector size
315
+ // Double buffering requires 2x size per buffer
316
+
317
+ size_t spad_size_per_row = 2 * (src0_row_size_aligned + dst_row_size_aligned);
318
+ size_t vtcm_row_per_thread = (octx->ctx->vtcm_size)/ (n_threads * spad_size_per_row);
319
+
320
+ // Make sure the reserved vtcm size is sufficient
321
+ if (vtcm_row_per_thread == 0) {
322
+ FARF(ERROR, "unary-%s : current VTCM reservation %zu is too small, needed %zu\n", op_type, octx->ctx->vtcm_size,
323
+ spad_size_per_row * n_threads);
324
+ return HTP_STATUS_VTCM_TOO_SMALL;
325
+ }
326
+
327
+ octx->src0_spad.size_per_thread = src0_row_size_aligned * vtcm_row_per_thread * 2;
328
+ octx->dst_spad.size_per_thread = dst_row_size_aligned * vtcm_row_per_thread * 2;
329
+
330
+ octx->src0_spad.size = n_threads * octx->src0_spad.size_per_thread;
331
+ octx->dst_spad.size = n_threads * octx->dst_spad.size_per_thread;
332
+
333
+ octx->src0_spad.data = octx->ctx->vtcm_base;
334
+ octx->dst_spad.data = octx->src0_spad.data + octx->src0_spad.size;
335
+
336
+ FARF(HIGH, "%s: (%ux%ux%ux%u) -> (%ux%ux%ux%u) : src0-spad-size %u src1-spad-size %u dst-spad-size %u\n", op_type,
337
+ src0->ne[0], src0->ne[1], src0->ne[2], src0->ne[3], dst->ne[0], dst->ne[1], dst->ne[2], dst->ne[3],
338
+ octx->src0_spad.size, octx->src1_spad.size, octx->dst_spad.size);
339
+
340
+ if (!(octx->flags & HTP_OPFLAGS_SKIP_COMPUTE)) {
341
+ uint32_t n_jobs = MIN(n_threads, src0_nrows);
342
+
343
+ struct htp_unary_context uctx = {
344
+ .octx = octx,
345
+ .src0_nrows_per_thread = (src0_nrows + n_jobs - 1) / n_jobs,
346
+ .src0_nrows = src0_nrows,
347
+
348
+ .data_src0 = (const uint8_t *)src0->data,
349
+ .data_dst = (uint8_t *)dst->data,
350
+
351
+ .src0_row_size = src0_row_size,
352
+ .dst_row_size = dst_row_size,
353
+
354
+ .src0_row_size_aligned = src0_row_size_aligned,
355
+ .dst_row_size_aligned = dst_row_size_aligned,
356
+
357
+ .src0_spad_half_size = octx->src0_spad.size_per_thread / 2,
358
+ .dst_spad_half_size = octx->dst_spad.size_per_thread / 2,
359
+
360
+ .block = (octx->src0_spad.size_per_thread / 2) / src0_row_size_aligned,
361
+ .nc = src0->ne[0],
362
+ };
363
+
364
+ worker_pool_run_func(octx->ctx->worker_pool, unary_job_f32_per_thread, &uctx, n_jobs);
365
+ }
366
+
367
+ return err;
368
+ }
369
+
370
+ int op_unary(struct htp_ops_context * octx) {
371
+ int err = HTP_STATUS_OK;
372
+
373
+ switch (octx->src0.type) {
374
+ case HTP_TYPE_F32:
375
+ err = execute_op_unary_f32(octx);
376
+ break;
377
+
378
+ default:
379
+ err = HTP_STATUS_NO_SUPPORT;
380
+ break;
381
+ }
382
+
383
+ return err;
384
+ }
@@ -0,0 +1,293 @@
1
+ #include "worker-pool.h"
2
+
3
+ #include <qurt.h>
4
+ #include <stdatomic.h>
5
+ #include <stdint.h>
6
+ #include <stdio.h>
7
+ #include <stdlib.h>
8
+ #include <string.h>
9
+
10
+ #include "HAP_farf.h"
11
+
12
+ #define WORKER_THREAD_STACK_SZ (2 * 16384)
13
+ #define LOWEST_USABLE_QURT_PRIO (254)
14
+
15
+ struct worker_pool_s;
16
+
17
+ // internal structure kept in thread-local storage per instance of worker pool
18
+ typedef struct {
19
+ struct worker_pool_s * pool;
20
+ unsigned int id;
21
+ } worker_context_t;
22
+
23
+ // internal structure kept in thread-local storage per instance of worker pool
24
+ typedef struct worker_pool_s {
25
+ worker_pool_job_t job[MAX_NUM_WORKERS]; // list of job descriptors
26
+ qurt_thread_t thread[MAX_NUM_WORKERS]; // thread ID's of the workers
27
+ worker_context_t context[MAX_NUM_WORKERS]; // worker contexts
28
+ void * stack[MAX_NUM_WORKERS]; // thread stack pointers
29
+ unsigned int n_threads; // number of workers in this pool
30
+
31
+ atomic_uint seqn; // seqno used to detect new jobs
32
+ atomic_uint next_job; // next job index
33
+ atomic_uint n_pending; // number of pending jobs
34
+ atomic_uint n_jobs; // number of current jobs
35
+ atomic_bool killed; // threads need to exit
36
+ } worker_pool_t;
37
+
38
+ static void worker_pool_main(void * context) {
39
+ worker_context_t * me = (worker_context_t *) context;
40
+ worker_pool_t * pool = me->pool;
41
+
42
+ FARF(HIGH, "worker-pool: thread %u started", me->id);
43
+
44
+ unsigned int prev_seqn = 0;
45
+ while (!atomic_load(&pool->killed)) {
46
+ unsigned int seqn = atomic_load(&pool->seqn);
47
+ if (seqn == prev_seqn) {
48
+ // Nothing to do
49
+ qurt_futex_wait(&pool->seqn, prev_seqn);
50
+ continue;
51
+ }
52
+
53
+ // New job
54
+ prev_seqn = seqn;
55
+
56
+ unsigned int n = atomic_load(&pool->n_jobs);
57
+ unsigned int i = atomic_fetch_add(&pool->next_job, 1);
58
+ if (i >= n) {
59
+ // Spurios wakeup
60
+ continue;
61
+ }
62
+
63
+ pool->job[i].func(n, i, pool->job[i].data);
64
+
65
+ atomic_fetch_sub(&pool->n_pending, 1);
66
+ }
67
+
68
+ FARF(HIGH, "worker-pool: thread %u stopped", me->id);
69
+ }
70
+
71
+ AEEResult worker_pool_init_with_stack_size(worker_pool_context_t * context, uint32_t n_threads, uint32_t stack_size) {
72
+ int err = 0;
73
+
74
+ if (NULL == context) {
75
+ FARF(ERROR, "NULL context passed to worker_pool_init().");
76
+ return AEE_EBADPARM;
77
+ }
78
+
79
+ // Allocations
80
+ int size = (stack_size * n_threads) + (sizeof(worker_pool_t));
81
+
82
+ unsigned char * mem_blob = (unsigned char *) malloc(size);
83
+ if (!mem_blob) {
84
+ FARF(ERROR, "Could not allocate memory for worker pool!!");
85
+ return AEE_ENOMEMORY;
86
+ }
87
+
88
+ worker_pool_t * me = (worker_pool_t *) (mem_blob + stack_size * n_threads);
89
+
90
+ // name for the first worker, useful in debugging threads
91
+ char name[19];
92
+ snprintf(name, 12, "0x%8x:", (int) me);
93
+ strcat(name, "worker0");
94
+ me->n_threads = n_threads;
95
+
96
+ // initializations
97
+ for (unsigned int i = 0; i < me->n_threads; i++) {
98
+ me->stack[i] = NULL;
99
+ me->thread[i] = 0;
100
+
101
+ me->context[i].id = i;
102
+ me->context[i].pool = me;
103
+ }
104
+
105
+ // initialize job queue
106
+ me->n_pending = 0;
107
+ me->n_jobs = 0;
108
+ me->next_job = 0;
109
+ me->seqn = 0;
110
+ me->killed = 0;
111
+
112
+ // launch the workers
113
+ qurt_thread_attr_t attr;
114
+ qurt_thread_attr_init(&attr);
115
+
116
+ for (unsigned int i = 0; i < me->n_threads; i++) {
117
+ // set up stack
118
+ me->stack[i] = mem_blob;
119
+ mem_blob += stack_size;
120
+ qurt_thread_attr_set_stack_addr(&attr, me->stack[i]);
121
+ qurt_thread_attr_set_stack_size(&attr, stack_size);
122
+
123
+ // set up name
124
+ qurt_thread_attr_set_name(&attr, name);
125
+ name[17] = (name[17] + 1);
126
+ // name threads context:worker0, context:worker1, .. (recycle at 9, but num threads should be less than that anyway)
127
+ if (name[17] > '9') {
128
+ name[17] = '0';
129
+ }
130
+
131
+ // set up priority - by default, match the creating thread's prio
132
+ int prio = qurt_thread_get_priority(qurt_thread_get_id());
133
+
134
+ if (prio < 1) {
135
+ prio = 1;
136
+ }
137
+ if (prio > LOWEST_USABLE_QURT_PRIO) {
138
+ prio = LOWEST_USABLE_QURT_PRIO;
139
+ }
140
+
141
+ qurt_thread_attr_set_priority(&attr, prio);
142
+
143
+ // launch
144
+ err = qurt_thread_create(&me->thread[i], &attr, worker_pool_main, (void *) &me->context[i]);
145
+ if (err) {
146
+ FARF(ERROR, "Could not launch worker threads!");
147
+ worker_pool_release((worker_pool_context_t *) &me);
148
+ return AEE_EQURTTHREADCREATE;
149
+ }
150
+ }
151
+ *context = (worker_pool_context_t *) me;
152
+ return AEE_SUCCESS;
153
+ }
154
+
155
+ AEEResult worker_pool_init(worker_pool_context_t * context, uint32_t n_threads) {
156
+ return worker_pool_init_with_stack_size(context, n_threads, WORKER_THREAD_STACK_SZ);
157
+ }
158
+
159
+ // clean up worker pool
160
+ void worker_pool_release(worker_pool_context_t * context) {
161
+ worker_pool_t * me = (worker_pool_t *) *context;
162
+
163
+ // if no worker pool exists, return error.
164
+ if (NULL == me) {
165
+ return;
166
+ }
167
+
168
+ atomic_store(&me->killed, 1);
169
+ atomic_fetch_add(&me->seqn, 1);
170
+ qurt_futex_wake(&me->seqn, me->n_threads);
171
+
172
+ // de-initializations
173
+ for (unsigned int i = 0; i < me->n_threads; i++) {
174
+ if (me->thread[i]) {
175
+ int status;
176
+ (void) qurt_thread_join(me->thread[i], &status);
177
+ }
178
+ }
179
+
180
+ // free allocated memory (were allocated as a single buffer starting at stack[0])
181
+ if (me->stack[0]) {
182
+ free(me->stack[0]);
183
+ }
184
+
185
+ *context = NULL;
186
+ }
187
+
188
+ // run jobs
189
+ AEEResult worker_pool_run_jobs(worker_pool_context_t context, worker_pool_job_t * job, unsigned int n) {
190
+ worker_pool_t * me = (worker_pool_t *) context;
191
+ if (NULL == me) {
192
+ FARF(ERROR, "worker-pool: invalid context");
193
+ return AEE_EBADPARM;
194
+ }
195
+
196
+ if (n > me->n_threads) {
197
+ FARF(ERROR, "worker-pool: invalid number of jobs %u for n-threads %u", n, me->n_threads);
198
+ return AEE_EBADPARM;
199
+ }
200
+
201
+ memcpy(me->job, job, sizeof(worker_pool_job_t) * n);
202
+
203
+ if (n > 1) {
204
+ atomic_store(&me->next_job, 1);
205
+ atomic_store(&me->n_jobs, n);
206
+ atomic_store(&me->n_pending, n - 1);
207
+
208
+ // wake up workers
209
+ atomic_fetch_add(&me->seqn, 1);
210
+ qurt_futex_wake(&me->seqn, n - 1);
211
+ }
212
+
213
+ // main thread runs job #0
214
+ me->job[0].func(n, 0, me->job[0].data);
215
+
216
+ if (n > 1) {
217
+ while (atomic_load(&me->n_pending))
218
+ ;
219
+ }
220
+
221
+ return 0;
222
+ }
223
+
224
+ // run func
225
+ AEEResult worker_pool_run_func(worker_pool_context_t context, worker_callback_t func, void * data, unsigned int n) {
226
+ worker_pool_job_t job[n];
227
+
228
+ for (unsigned int i = 0; i < n; i++) {
229
+ job[i].func = func;
230
+ job[i].data = data;
231
+ }
232
+
233
+ return worker_pool_run_jobs(context, job, n);
234
+ }
235
+
236
+ AEEResult worker_pool_set_thread_priority(worker_pool_context_t context, unsigned int prio) {
237
+ worker_pool_t * me = (worker_pool_t *) context;
238
+
239
+ // if no worker pool exists, return error.
240
+ if (!me) {
241
+ return AEE_ENOMORE;
242
+ }
243
+
244
+ int result = AEE_SUCCESS;
245
+ if (prio < 1) {
246
+ prio = 1;
247
+ }
248
+ if (prio > LOWEST_USABLE_QURT_PRIO) {
249
+ prio = LOWEST_USABLE_QURT_PRIO;
250
+ }
251
+
252
+ for (unsigned int i = 0; i < me->n_threads; i++) {
253
+ int res = qurt_thread_set_priority(me->thread[i], (unsigned short) prio);
254
+ if (0 != res) {
255
+ result = AEE_EBADPARM;
256
+ FARF(ERROR, "QURT failed to set priority of thread %d, ERROR = %d", me->thread[i], res);
257
+ }
258
+ }
259
+
260
+ return result;
261
+ }
262
+
263
+ AEEResult worker_pool_retrieve_thread_id(worker_pool_context_t context, unsigned int * tids) {
264
+ worker_pool_t * me = (worker_pool_t *) context;
265
+ if (!me) {
266
+ FARF(ERROR, "worker-pool: invalid context");
267
+ return AEE_EBADPARM;
268
+ ;
269
+ }
270
+
271
+ for (int i = 0; i < me->n_threads; i++) {
272
+ tids[i] = me->thread[i];
273
+ }
274
+
275
+ return AEE_SUCCESS;
276
+ }
277
+
278
+ AEEResult worker_pool_get_thread_priority(worker_pool_context_t context, unsigned int * prio) {
279
+ worker_pool_t * me = (worker_pool_t *) context;
280
+ if (!me) {
281
+ FARF(ERROR, "worker-pool: invalid context");
282
+ return AEE_EBADPARM;
283
+ }
284
+
285
+ int priority = qurt_thread_get_priority(me->thread[0]);
286
+ if (priority > 0) {
287
+ *prio = priority;
288
+ return 0;
289
+ } else {
290
+ *prio = 0;
291
+ return AEE_EBADSTATE;
292
+ }
293
+ }