local-llm-rn 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (626) hide show
  1. package/cpp/CMakeLists.txt +285 -0
  2. package/cpp/common/CMakeLists.txt +149 -0
  3. package/cpp/common/arg.cpp +3799 -0
  4. package/cpp/common/arg.h +131 -0
  5. package/cpp/common/base64.hpp +392 -0
  6. package/cpp/common/build-info.cpp.in +4 -0
  7. package/cpp/common/chat-parser-xml-toolcall.cpp +879 -0
  8. package/cpp/common/chat-parser-xml-toolcall.h +45 -0
  9. package/cpp/common/chat-parser.cpp +1649 -0
  10. package/cpp/common/chat-parser.h +133 -0
  11. package/cpp/common/chat-peg-parser.cpp +124 -0
  12. package/cpp/common/chat-peg-parser.h +105 -0
  13. package/cpp/common/chat.cpp +3355 -0
  14. package/cpp/common/chat.h +252 -0
  15. package/cpp/common/common.cpp +1824 -0
  16. package/cpp/common/common.h +930 -0
  17. package/cpp/common/console.cpp +1137 -0
  18. package/cpp/common/console.h +41 -0
  19. package/cpp/common/debug.cpp +167 -0
  20. package/cpp/common/debug.h +43 -0
  21. package/cpp/common/download.cpp +792 -0
  22. package/cpp/common/download.h +84 -0
  23. package/cpp/common/http.h +84 -0
  24. package/cpp/common/jinja/README.md +88 -0
  25. package/cpp/common/jinja/caps.cpp +285 -0
  26. package/cpp/common/jinja/caps.h +30 -0
  27. package/cpp/common/jinja/lexer.cpp +341 -0
  28. package/cpp/common/jinja/lexer.h +157 -0
  29. package/cpp/common/jinja/parser.cpp +591 -0
  30. package/cpp/common/jinja/parser.h +21 -0
  31. package/cpp/common/jinja/runtime.cpp +867 -0
  32. package/cpp/common/jinja/runtime.h +638 -0
  33. package/cpp/common/jinja/string.cpp +213 -0
  34. package/cpp/common/jinja/string.h +61 -0
  35. package/cpp/common/jinja/utils.h +149 -0
  36. package/cpp/common/jinja/value.cpp +1393 -0
  37. package/cpp/common/jinja/value.h +756 -0
  38. package/cpp/common/json-partial.cpp +324 -0
  39. package/cpp/common/json-partial.h +39 -0
  40. package/cpp/common/json-schema-to-grammar.cpp +1153 -0
  41. package/cpp/common/json-schema-to-grammar.h +43 -0
  42. package/cpp/common/llguidance.cpp +258 -0
  43. package/cpp/common/log.cpp +446 -0
  44. package/cpp/common/log.h +119 -0
  45. package/cpp/common/ngram-cache.cpp +285 -0
  46. package/cpp/common/ngram-cache.h +101 -0
  47. package/cpp/common/ngram-map.cpp +530 -0
  48. package/cpp/common/ngram-map.h +115 -0
  49. package/cpp/common/ngram-mod.cpp +60 -0
  50. package/cpp/common/ngram-mod.h +38 -0
  51. package/cpp/common/peg-parser.cpp +1712 -0
  52. package/cpp/common/peg-parser.h +459 -0
  53. package/cpp/common/preset.cpp +483 -0
  54. package/cpp/common/preset.h +83 -0
  55. package/cpp/common/regex-partial.cpp +204 -0
  56. package/cpp/common/regex-partial.h +56 -0
  57. package/cpp/common/sampling.cpp +745 -0
  58. package/cpp/common/sampling.h +119 -0
  59. package/cpp/common/speculative.cpp +1074 -0
  60. package/cpp/common/speculative.h +41 -0
  61. package/cpp/common/unicode.cpp +64 -0
  62. package/cpp/common/unicode.h +22 -0
  63. package/cpp/ggml/CMakeLists.txt +494 -0
  64. package/cpp/ggml/cmake/GitVars.cmake +22 -0
  65. package/cpp/ggml/cmake/common.cmake +50 -0
  66. package/cpp/ggml/cmake/ggml-config.cmake.in +191 -0
  67. package/cpp/ggml/include/ggml-alloc.h +85 -0
  68. package/cpp/ggml/include/ggml-backend.h +373 -0
  69. package/cpp/ggml/include/ggml-blas.h +25 -0
  70. package/cpp/ggml/include/ggml-cann.h +123 -0
  71. package/cpp/ggml/include/ggml-cpp.h +39 -0
  72. package/cpp/ggml/include/ggml-cpu.h +151 -0
  73. package/cpp/ggml/include/ggml-cuda.h +47 -0
  74. package/cpp/ggml/include/ggml-hexagon.h +19 -0
  75. package/cpp/ggml/include/ggml-metal.h +61 -0
  76. package/cpp/ggml/include/ggml-opencl.h +26 -0
  77. package/cpp/ggml/include/ggml-opt.h +256 -0
  78. package/cpp/ggml/include/ggml-rpc.h +30 -0
  79. package/cpp/ggml/include/ggml-sycl.h +49 -0
  80. package/cpp/ggml/include/ggml-virtgpu.h +14 -0
  81. package/cpp/ggml/include/ggml-vulkan.h +29 -0
  82. package/cpp/ggml/include/ggml-webgpu.h +19 -0
  83. package/cpp/ggml/include/ggml-zdnn.h +17 -0
  84. package/cpp/ggml/include/ggml-zendnn.h +22 -0
  85. package/cpp/ggml/include/ggml.h +2753 -0
  86. package/cpp/ggml/include/gguf.h +204 -0
  87. package/cpp/ggml/src/CMakeLists.txt +492 -0
  88. package/cpp/ggml/src/ggml-alloc.c +1244 -0
  89. package/cpp/ggml/src/ggml-backend-dl.cpp +48 -0
  90. package/cpp/ggml/src/ggml-backend-dl.h +45 -0
  91. package/cpp/ggml/src/ggml-backend-impl.h +255 -0
  92. package/cpp/ggml/src/ggml-backend-reg.cpp +566 -0
  93. package/cpp/ggml/src/ggml-backend.cpp +2270 -0
  94. package/cpp/ggml/src/ggml-blas/CMakeLists.txt +101 -0
  95. package/cpp/ggml/src/ggml-blas/ggml-blas.cpp +518 -0
  96. package/cpp/ggml/src/ggml-common.h +1878 -0
  97. package/cpp/ggml/src/ggml-cpu/CMakeLists.txt +691 -0
  98. package/cpp/ggml/src/ggml-cpu/amx/amx.cpp +247 -0
  99. package/cpp/ggml/src/ggml-cpu/amx/amx.h +8 -0
  100. package/cpp/ggml/src/ggml-cpu/amx/common.h +91 -0
  101. package/cpp/ggml/src/ggml-cpu/amx/mmq.cpp +2512 -0
  102. package/cpp/ggml/src/ggml-cpu/amx/mmq.h +10 -0
  103. package/cpp/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +98 -0
  104. package/cpp/ggml/src/ggml-cpu/arch/arm/quants.c +4052 -0
  105. package/cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +4935 -0
  106. package/cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +2159 -0
  107. package/cpp/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
  108. package/cpp/ggml/src/ggml-cpu/arch/powerpc/quants.c +2305 -0
  109. package/cpp/ggml/src/ggml-cpu/arch/riscv/cpu-feats.cpp +38 -0
  110. package/cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +2726 -0
  111. package/cpp/ggml/src/ggml-cpu/arch/riscv/repack.cpp +342 -0
  112. package/cpp/ggml/src/ggml-cpu/arch/s390/cpu-feats.cpp +50 -0
  113. package/cpp/ggml/src/ggml-cpu/arch/s390/quants.c +1468 -0
  114. package/cpp/ggml/src/ggml-cpu/arch/wasm/quants.c +1221 -0
  115. package/cpp/ggml/src/ggml-cpu/arch/x86/cpu-feats.cpp +327 -0
  116. package/cpp/ggml/src/ggml-cpu/arch/x86/quants.c +3820 -0
  117. package/cpp/ggml/src/ggml-cpu/arch/x86/repack.cpp +6307 -0
  118. package/cpp/ggml/src/ggml-cpu/arch-fallback.h +313 -0
  119. package/cpp/ggml/src/ggml-cpu/binary-ops.cpp +154 -0
  120. package/cpp/ggml/src/ggml-cpu/binary-ops.h +16 -0
  121. package/cpp/ggml/src/ggml-cpu/cmake/FindSIMD.cmake +100 -0
  122. package/cpp/ggml/src/ggml-cpu/common.h +95 -0
  123. package/cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +529 -0
  124. package/cpp/ggml/src/ggml-cpu/ggml-cpu.c +3734 -0
  125. package/cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +701 -0
  126. package/cpp/ggml/src/ggml-cpu/hbm.cpp +55 -0
  127. package/cpp/ggml/src/ggml-cpu/hbm.h +8 -0
  128. package/cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +938 -0
  129. package/cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +90 -0
  130. package/cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +798 -0
  131. package/cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.h +17 -0
  132. package/cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +4033 -0
  133. package/cpp/ggml/src/ggml-cpu/llamafile/sgemm.h +25 -0
  134. package/cpp/ggml/src/ggml-cpu/ops.cpp +10978 -0
  135. package/cpp/ggml/src/ggml-cpu/ops.h +116 -0
  136. package/cpp/ggml/src/ggml-cpu/quants.c +1193 -0
  137. package/cpp/ggml/src/ggml-cpu/quants.h +97 -0
  138. package/cpp/ggml/src/ggml-cpu/repack.cpp +3316 -0
  139. package/cpp/ggml/src/ggml-cpu/repack.h +173 -0
  140. package/cpp/ggml/src/ggml-cpu/simd-gemm.h +136 -0
  141. package/cpp/ggml/src/ggml-cpu/simd-mappings.h +1279 -0
  142. package/cpp/ggml/src/ggml-cpu/spacemit/ime.cpp +1025 -0
  143. package/cpp/ggml/src/ggml-cpu/spacemit/ime.h +13 -0
  144. package/cpp/ggml/src/ggml-cpu/spacemit/ime1_kernels.cpp +3196 -0
  145. package/cpp/ggml/src/ggml-cpu/spacemit/ime_kernels.h +26 -0
  146. package/cpp/ggml/src/ggml-cpu/traits.cpp +36 -0
  147. package/cpp/ggml/src/ggml-cpu/traits.h +38 -0
  148. package/cpp/ggml/src/ggml-cpu/unary-ops.cpp +337 -0
  149. package/cpp/ggml/src/ggml-cpu/unary-ops.h +35 -0
  150. package/cpp/ggml/src/ggml-cpu/vec.cpp +629 -0
  151. package/cpp/ggml/src/ggml-cpu/vec.h +1585 -0
  152. package/cpp/ggml/src/ggml-hexagon/CMakeLists.txt +117 -0
  153. package/cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp +3232 -0
  154. package/cpp/ggml/src/ggml-hexagon/htp/CMakeLists.txt +45 -0
  155. package/cpp/ggml/src/ggml-hexagon/htp/act-ops.c +815 -0
  156. package/cpp/ggml/src/ggml-hexagon/htp/argsort-ops.c +281 -0
  157. package/cpp/ggml/src/ggml-hexagon/htp/binary-ops.c +827 -0
  158. package/cpp/ggml/src/ggml-hexagon/htp/cmake-toolchain.cmake +157 -0
  159. package/cpp/ggml/src/ggml-hexagon/htp/cpy-ops.c +251 -0
  160. package/cpp/ggml/src/ggml-hexagon/htp/flash-attn-ops.c +666 -0
  161. package/cpp/ggml/src/ggml-hexagon/htp/get-rows-ops.c +111 -0
  162. package/cpp/ggml/src/ggml-hexagon/htp/hex-dma.c +63 -0
  163. package/cpp/ggml/src/ggml-hexagon/htp/hex-dma.h +182 -0
  164. package/cpp/ggml/src/ggml-hexagon/htp/hex-dump.h +77 -0
  165. package/cpp/ggml/src/ggml-hexagon/htp/hex-fastdiv.h +37 -0
  166. package/cpp/ggml/src/ggml-hexagon/htp/hex-utils.h +51 -0
  167. package/cpp/ggml/src/ggml-hexagon/htp/htp-ctx.h +35 -0
  168. package/cpp/ggml/src/ggml-hexagon/htp/htp-msg.h +154 -0
  169. package/cpp/ggml/src/ggml-hexagon/htp/htp-ops.h +65 -0
  170. package/cpp/ggml/src/ggml-hexagon/htp/htp_iface.idl +16 -0
  171. package/cpp/ggml/src/ggml-hexagon/htp/hvx-arith.h +470 -0
  172. package/cpp/ggml/src/ggml-hexagon/htp/hvx-base.h +173 -0
  173. package/cpp/ggml/src/ggml-hexagon/htp/hvx-copy.h +245 -0
  174. package/cpp/ggml/src/ggml-hexagon/htp/hvx-div.h +116 -0
  175. package/cpp/ggml/src/ggml-hexagon/htp/hvx-dump.h +129 -0
  176. package/cpp/ggml/src/ggml-hexagon/htp/hvx-exp.h +215 -0
  177. package/cpp/ggml/src/ggml-hexagon/htp/hvx-floor.h +100 -0
  178. package/cpp/ggml/src/ggml-hexagon/htp/hvx-inverse.h +176 -0
  179. package/cpp/ggml/src/ggml-hexagon/htp/hvx-reduce.h +266 -0
  180. package/cpp/ggml/src/ggml-hexagon/htp/hvx-scale.h +133 -0
  181. package/cpp/ggml/src/ggml-hexagon/htp/hvx-sigmoid.h +141 -0
  182. package/cpp/ggml/src/ggml-hexagon/htp/hvx-sqrt.h +126 -0
  183. package/cpp/ggml/src/ggml-hexagon/htp/hvx-types.h +36 -0
  184. package/cpp/ggml/src/ggml-hexagon/htp/hvx-utils.h +18 -0
  185. package/cpp/ggml/src/ggml-hexagon/htp/main.c +1150 -0
  186. package/cpp/ggml/src/ggml-hexagon/htp/matmul-ops.c +2595 -0
  187. package/cpp/ggml/src/ggml-hexagon/htp/rope-ops.c +498 -0
  188. package/cpp/ggml/src/ggml-hexagon/htp/set-rows-ops.c +167 -0
  189. package/cpp/ggml/src/ggml-hexagon/htp/softmax-ops.c +421 -0
  190. package/cpp/ggml/src/ggml-hexagon/htp/sum-rows-ops.c +130 -0
  191. package/cpp/ggml/src/ggml-hexagon/htp/unary-ops.c +384 -0
  192. package/cpp/ggml/src/ggml-hexagon/htp/worker-pool.c +293 -0
  193. package/cpp/ggml/src/ggml-hexagon/htp/worker-pool.h +57 -0
  194. package/cpp/ggml/src/ggml-hexagon/htp-drv.cpp +418 -0
  195. package/cpp/ggml/src/ggml-hexagon/htp-drv.h +121 -0
  196. package/cpp/ggml/src/ggml-hexagon/libdl.h +79 -0
  197. package/cpp/ggml/src/ggml-hexagon/libggml-htp.inf +38 -0
  198. package/cpp/ggml/src/ggml-hexagon/op-desc.h +153 -0
  199. package/cpp/ggml/src/ggml-impl.h +724 -0
  200. package/cpp/ggml/src/ggml-metal/CMakeLists.txt +124 -0
  201. package/cpp/ggml/src/ggml-metal/ggml-metal-common.cpp +457 -0
  202. package/cpp/ggml/src/ggml-metal/ggml-metal-common.h +52 -0
  203. package/cpp/ggml/src/ggml-metal/ggml-metal-context.h +41 -0
  204. package/cpp/ggml/src/ggml-metal/ggml-metal-context.m +702 -0
  205. package/cpp/ggml/src/ggml-metal/ggml-metal-device.cpp +1890 -0
  206. package/cpp/ggml/src/ggml-metal/ggml-metal-device.h +290 -0
  207. package/cpp/ggml/src/ggml-metal/ggml-metal-device.m +1749 -0
  208. package/cpp/ggml/src/ggml-metal/ggml-metal-impl.h +1054 -0
  209. package/cpp/ggml/src/ggml-metal/ggml-metal-ops.cpp +4370 -0
  210. package/cpp/ggml/src/ggml-metal/ggml-metal-ops.h +94 -0
  211. package/cpp/ggml/src/ggml-metal/ggml-metal.cpp +937 -0
  212. package/cpp/ggml/src/ggml-metal/ggml-metal.metal +9819 -0
  213. package/cpp/ggml/src/ggml-musa/CMakeLists.txt +125 -0
  214. package/cpp/ggml/src/ggml-musa/mudnn.cu +112 -0
  215. package/cpp/ggml/src/ggml-musa/mudnn.cuh +12 -0
  216. package/cpp/ggml/src/ggml-opencl/CMakeLists.txt +150 -0
  217. package/cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +11553 -0
  218. package/cpp/ggml/src/ggml-opencl/kernels/add.cl +190 -0
  219. package/cpp/ggml/src/ggml-opencl/kernels/add_id.cl +42 -0
  220. package/cpp/ggml/src/ggml-opencl/kernels/argsort.cl +86 -0
  221. package/cpp/ggml/src/ggml-opencl/kernels/clamp.cl +20 -0
  222. package/cpp/ggml/src/ggml-opencl/kernels/concat.cl +51 -0
  223. package/cpp/ggml/src/ggml-opencl/kernels/conv2d.cl +185 -0
  224. package/cpp/ggml/src/ggml-opencl/kernels/conv2d_f16_f32.cl +176 -0
  225. package/cpp/ggml/src/ggml-opencl/kernels/cpy.cl +184 -0
  226. package/cpp/ggml/src/ggml-opencl/kernels/cvt.cl +417 -0
  227. package/cpp/ggml/src/ggml-opencl/kernels/diag_mask_inf.cl +58 -0
  228. package/cpp/ggml/src/ggml-opencl/kernels/div.cl +138 -0
  229. package/cpp/ggml/src/ggml-opencl/kernels/embed_kernel.py +26 -0
  230. package/cpp/ggml/src/ggml-opencl/kernels/expm1.cl +113 -0
  231. package/cpp/ggml/src/ggml-opencl/kernels/fill.cl +17 -0
  232. package/cpp/ggml/src/ggml-opencl/kernels/flash_attn_f16.cl +370 -0
  233. package/cpp/ggml/src/ggml-opencl/kernels/flash_attn_f32.cl +371 -0
  234. package/cpp/ggml/src/ggml-opencl/kernels/flash_attn_f32_f16.cl +373 -0
  235. package/cpp/ggml/src/ggml-opencl/kernels/gelu.cl +89 -0
  236. package/cpp/ggml/src/ggml-opencl/kernels/gemm_moe_mxfp4_f32.cl +162 -0
  237. package/cpp/ggml/src/ggml-opencl/kernels/gemv_moe_mxfp4_f32.cl +156 -0
  238. package/cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle.cl +268 -0
  239. package/cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general.cl +274 -0
  240. package/cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general_q8_0_f32.cl +195 -0
  241. package/cpp/ggml/src/ggml-opencl/kernels/get_rows.cl +187 -0
  242. package/cpp/ggml/src/ggml-opencl/kernels/glu.cl +378 -0
  243. package/cpp/ggml/src/ggml-opencl/kernels/group_norm.cl +121 -0
  244. package/cpp/ggml/src/ggml-opencl/kernels/im2col_f16.cl +57 -0
  245. package/cpp/ggml/src/ggml-opencl/kernels/im2col_f32.cl +57 -0
  246. package/cpp/ggml/src/ggml-opencl/kernels/mean.cl +140 -0
  247. package/cpp/ggml/src/ggml-opencl/kernels/mul.cl +152 -0
  248. package/cpp/ggml/src/ggml-opencl/kernels/mul_mat_Ab_Bi_8x4.cl +139 -0
  249. package/cpp/ggml/src/ggml-opencl/kernels/mul_mat_f16_f32.cl +130 -0
  250. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_kq_kqv.cl +273 -0
  251. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_l4_lm.cl +146 -0
  252. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_f32_f32_l4_lm.cl +147 -0
  253. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q4_0_f32_l4_lm.cl +163 -0
  254. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q4_1_f32_l4_lm.cl +165 -0
  255. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q6_k_f32_l4_lm.cl +158 -0
  256. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_8x4.cl +129 -0
  257. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_l4_lm.cl +154 -0
  258. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f16.cl +118 -0
  259. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32.cl +118 -0
  260. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_1row.cl +94 -0
  261. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_l4.cl +84 -0
  262. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f32_f32.cl +118 -0
  263. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32.cl +189 -0
  264. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32_flat.cl +176 -0
  265. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q4_0_f32_8x_flat.cl +283 -0
  266. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32.cl +140 -0
  267. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32_flat.cl +222 -0
  268. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32.cl +144 -0
  269. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32_flat.cl +167 -0
  270. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32.cl +192 -0
  271. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_16x_flat.cl +307 -0
  272. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_8x_flat.cl +265 -0
  273. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_8x_flat.cl +272 -0
  274. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_v.cl +254 -0
  275. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32.cl +219 -0
  276. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32_flat.cl +229 -0
  277. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_k_f32.cl +180 -0
  278. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q6_k_f32.cl +194 -0
  279. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q6_k_f32_flat.cl +194 -0
  280. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32.cl +125 -0
  281. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32_flat.cl +202 -0
  282. package/cpp/ggml/src/ggml-opencl/kernels/norm.cl +161 -0
  283. package/cpp/ggml/src/ggml-opencl/kernels/pad.cl +39 -0
  284. package/cpp/ggml/src/ggml-opencl/kernels/relu.cl +16 -0
  285. package/cpp/ggml/src/ggml-opencl/kernels/repeat.cl +38 -0
  286. package/cpp/ggml/src/ggml-opencl/kernels/rms_norm.cl +190 -0
  287. package/cpp/ggml/src/ggml-opencl/kernels/rope.cl +747 -0
  288. package/cpp/ggml/src/ggml-opencl/kernels/scale.cl +27 -0
  289. package/cpp/ggml/src/ggml-opencl/kernels/set_rows.cl +208 -0
  290. package/cpp/ggml/src/ggml-opencl/kernels/sigmoid.cl +29 -0
  291. package/cpp/ggml/src/ggml-opencl/kernels/silu.cl +30 -0
  292. package/cpp/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +108 -0
  293. package/cpp/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +108 -0
  294. package/cpp/ggml/src/ggml-opencl/kernels/softmax_f16.cl +107 -0
  295. package/cpp/ggml/src/ggml-opencl/kernels/softmax_f32.cl +107 -0
  296. package/cpp/ggml/src/ggml-opencl/kernels/softplus.cl +116 -0
  297. package/cpp/ggml/src/ggml-opencl/kernels/solve_tri.cl +51 -0
  298. package/cpp/ggml/src/ggml-opencl/kernels/sqr.cl +53 -0
  299. package/cpp/ggml/src/ggml-opencl/kernels/sqrt.cl +53 -0
  300. package/cpp/ggml/src/ggml-opencl/kernels/ssm_conv.cl +77 -0
  301. package/cpp/ggml/src/ggml-opencl/kernels/sub.cl +138 -0
  302. package/cpp/ggml/src/ggml-opencl/kernels/sum_rows.cl +140 -0
  303. package/cpp/ggml/src/ggml-opencl/kernels/tanh.cl +109 -0
  304. package/cpp/ggml/src/ggml-opencl/kernels/transpose.cl +117 -0
  305. package/cpp/ggml/src/ggml-opencl/kernels/tri.cl +32 -0
  306. package/cpp/ggml/src/ggml-opencl/kernels/tsembd.cl +48 -0
  307. package/cpp/ggml/src/ggml-opencl/kernels/upscale.cl +120 -0
  308. package/cpp/ggml/src/ggml-opt.cpp +1093 -0
  309. package/cpp/ggml/src/ggml-quants.c +5325 -0
  310. package/cpp/ggml/src/ggml-quants.h +106 -0
  311. package/cpp/ggml/src/ggml-rpc/CMakeLists.txt +9 -0
  312. package/cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +2118 -0
  313. package/cpp/ggml/src/ggml-threading.cpp +12 -0
  314. package/cpp/ggml/src/ggml-threading.h +14 -0
  315. package/cpp/ggml/src/ggml-virtgpu/CMakeLists.txt +70 -0
  316. package/cpp/ggml/src/ggml-virtgpu/apir_cs_ggml-rpc-front.cpp +87 -0
  317. package/cpp/ggml/src/ggml-virtgpu/backend/CMakeLists.txt +21 -0
  318. package/cpp/ggml/src/ggml-virtgpu/backend/apir_cs_ggml-rpc-back.cpp +115 -0
  319. package/cpp/ggml/src/ggml-virtgpu/backend/backend-convert.h +13 -0
  320. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-backend.cpp +102 -0
  321. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer-type.cpp +105 -0
  322. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer.cpp +179 -0
  323. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-device.cpp +148 -0
  324. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched.cpp +51 -0
  325. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched.gen.h +73 -0
  326. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched.h +27 -0
  327. package/cpp/ggml/src/ggml-virtgpu/backend/backend-virgl-apir.h +32 -0
  328. package/cpp/ggml/src/ggml-virtgpu/backend/backend.cpp +144 -0
  329. package/cpp/ggml/src/ggml-virtgpu/backend/shared/api_remoting.h +95 -0
  330. package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_backend.gen.h +94 -0
  331. package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_backend.h +50 -0
  332. package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_cs.h +378 -0
  333. package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h +232 -0
  334. package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_cs_rpc.h +58 -0
  335. package/cpp/ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp +81 -0
  336. package/cpp/ggml/src/ggml-virtgpu/ggml-backend-buffer.cpp +119 -0
  337. package/cpp/ggml/src/ggml-virtgpu/ggml-backend-device.cpp +158 -0
  338. package/cpp/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp +213 -0
  339. package/cpp/ggml/src/ggml-virtgpu/ggml-backend.cpp +69 -0
  340. package/cpp/ggml/src/ggml-virtgpu/ggml-remoting.h +71 -0
  341. package/cpp/ggml/src/ggml-virtgpu/ggmlremoting_functions.yaml +166 -0
  342. package/cpp/ggml/src/ggml-virtgpu/include/apir_hw.h +9 -0
  343. package/cpp/ggml/src/ggml-virtgpu/regenerate_remoting.py +333 -0
  344. package/cpp/ggml/src/ggml-virtgpu/virtgpu-apir.h +15 -0
  345. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-backend.cpp +58 -0
  346. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp +110 -0
  347. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-buffer.cpp +173 -0
  348. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-device.cpp +192 -0
  349. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-impl.h +36 -0
  350. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward.gen.h +53 -0
  351. package/cpp/ggml/src/ggml-virtgpu/virtgpu-shm.cpp +98 -0
  352. package/cpp/ggml/src/ggml-virtgpu/virtgpu-shm.h +23 -0
  353. package/cpp/ggml/src/ggml-virtgpu/virtgpu-utils.cpp +179 -0
  354. package/cpp/ggml/src/ggml-virtgpu/virtgpu-utils.h +86 -0
  355. package/cpp/ggml/src/ggml-virtgpu/virtgpu.cpp +544 -0
  356. package/cpp/ggml/src/ggml-virtgpu/virtgpu.h +117 -0
  357. package/cpp/ggml/src/ggml-webgpu/CMakeLists.txt +80 -0
  358. package/cpp/ggml/src/ggml-webgpu/ggml-webgpu-shader-lib.hpp +1231 -0
  359. package/cpp/ggml/src/ggml-webgpu/ggml-webgpu.cpp +3150 -0
  360. package/cpp/ggml/src/ggml-webgpu/pre_wgsl.hpp +778 -0
  361. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/argmax.wgsl +72 -0
  362. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/argsort.wgsl +106 -0
  363. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/argsort_merge.wgsl +134 -0
  364. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/binary.wgsl +107 -0
  365. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/common_decls.tmpl +923 -0
  366. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/cpy.tmpl.wgsl +107 -0
  367. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/cumsum.wgsl +66 -0
  368. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +182 -0
  369. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn.wgsl +636 -0
  370. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/get_rows.wgsl +668 -0
  371. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/glu.tmpl.wgsl +323 -0
  372. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/memset.wgsl +40 -0
  373. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.wgsl +713 -0
  374. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_decls.tmpl +103 -0
  375. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_reg_tile.wgsl +138 -0
  376. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_subgroup_matrix.wgsl +188 -0
  377. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec.wgsl +194 -0
  378. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/pad.wgsl +86 -0
  379. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm.wgsl +123 -0
  380. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/rope.tmpl.wgsl +295 -0
  381. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/scale.wgsl +63 -0
  382. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.wgsl +109 -0
  383. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/soft_max.tmpl.wgsl +345 -0
  384. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/sum_rows.wgsl +55 -0
  385. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/unary.wgsl +193 -0
  386. package/cpp/ggml/src/ggml-zdnn/CMakeLists.txt +36 -0
  387. package/cpp/ggml/src/ggml-zdnn/common.hpp +59 -0
  388. package/cpp/ggml/src/ggml-zdnn/ggml-zdnn.cpp +633 -0
  389. package/cpp/ggml/src/ggml-zdnn/mmf.cpp +80 -0
  390. package/cpp/ggml/src/ggml-zdnn/mmf.hpp +12 -0
  391. package/cpp/ggml/src/ggml-zdnn/utils.cpp +79 -0
  392. package/cpp/ggml/src/ggml-zdnn/utils.hpp +19 -0
  393. package/cpp/ggml/src/ggml-zendnn/CMakeLists.txt +92 -0
  394. package/cpp/ggml/src/ggml-zendnn/ggml-zendnn.cpp +469 -0
  395. package/cpp/ggml/src/ggml.c +7669 -0
  396. package/cpp/ggml/src/ggml.cpp +26 -0
  397. package/cpp/ggml/src/gguf.cpp +1699 -0
  398. package/cpp/include/llama-cpp.h +32 -0
  399. package/cpp/include/llama.h +1568 -0
  400. package/cpp/mtmd/CMakeLists.txt +98 -0
  401. package/cpp/mtmd/README.md +63 -0
  402. package/cpp/mtmd/clip-graph.h +117 -0
  403. package/cpp/mtmd/clip-impl.h +586 -0
  404. package/cpp/mtmd/clip-model.h +390 -0
  405. package/cpp/mtmd/clip.cpp +4154 -0
  406. package/cpp/mtmd/clip.h +121 -0
  407. package/cpp/mtmd/deprecation-warning.cpp +22 -0
  408. package/cpp/mtmd/legacy-models/convert_image_encoder_to_gguf.py +412 -0
  409. package/cpp/mtmd/legacy-models/glmedge-convert-image-encoder-to-gguf.py +280 -0
  410. package/cpp/mtmd/legacy-models/glmedge-surgery.py +33 -0
  411. package/cpp/mtmd/legacy-models/llava_surgery.py +38 -0
  412. package/cpp/mtmd/legacy-models/llava_surgery_v2.py +180 -0
  413. package/cpp/mtmd/legacy-models/minicpmv-convert-image-encoder-to-gguf.py +892 -0
  414. package/cpp/mtmd/legacy-models/minicpmv-surgery.py +47 -0
  415. package/cpp/mtmd/models/cogvlm.cpp +98 -0
  416. package/cpp/mtmd/models/conformer.cpp +216 -0
  417. package/cpp/mtmd/models/glm4v.cpp +122 -0
  418. package/cpp/mtmd/models/internvl.cpp +69 -0
  419. package/cpp/mtmd/models/kimik25.cpp +101 -0
  420. package/cpp/mtmd/models/kimivl.cpp +63 -0
  421. package/cpp/mtmd/models/llama4.cpp +96 -0
  422. package/cpp/mtmd/models/llava.cpp +374 -0
  423. package/cpp/mtmd/models/minicpmv.cpp +114 -0
  424. package/cpp/mtmd/models/mobilenetv5.cpp +451 -0
  425. package/cpp/mtmd/models/models.h +128 -0
  426. package/cpp/mtmd/models/nemotron-v2-vl.cpp +35 -0
  427. package/cpp/mtmd/models/paddleocr.cpp +52 -0
  428. package/cpp/mtmd/models/pixtral.cpp +86 -0
  429. package/cpp/mtmd/models/qwen2vl.cpp +183 -0
  430. package/cpp/mtmd/models/qwen3vl.cpp +193 -0
  431. package/cpp/mtmd/models/siglip.cpp +86 -0
  432. package/cpp/mtmd/models/whisper-enc.cpp +115 -0
  433. package/cpp/mtmd/models/youtuvl.cpp +179 -0
  434. package/cpp/mtmd/mtmd-audio.cpp +730 -0
  435. package/cpp/mtmd/mtmd-audio.h +113 -0
  436. package/cpp/mtmd/mtmd-cli.cpp +437 -0
  437. package/cpp/mtmd/mtmd-helper.cpp +521 -0
  438. package/cpp/mtmd/mtmd-helper.h +96 -0
  439. package/cpp/mtmd/mtmd.cpp +1156 -0
  440. package/cpp/mtmd/mtmd.h +319 -0
  441. package/cpp/mtmd/requirements.txt +5 -0
  442. package/cpp/mtmd/test-1.jpeg +0 -0
  443. package/cpp/mtmd/test-2.mp3 +0 -0
  444. package/cpp/mtmd/tests.sh +192 -0
  445. package/cpp/src/CMakeLists.txt +169 -0
  446. package/cpp/src/llama-adapter.cpp +488 -0
  447. package/cpp/src/llama-adapter.h +89 -0
  448. package/cpp/src/llama-arch.cpp +2855 -0
  449. package/cpp/src/llama-arch.h +619 -0
  450. package/cpp/src/llama-batch.cpp +917 -0
  451. package/cpp/src/llama-batch.h +173 -0
  452. package/cpp/src/llama-chat.cpp +896 -0
  453. package/cpp/src/llama-chat.h +71 -0
  454. package/cpp/src/llama-context.cpp +3512 -0
  455. package/cpp/src/llama-context.h +359 -0
  456. package/cpp/src/llama-cparams.cpp +5 -0
  457. package/cpp/src/llama-cparams.h +44 -0
  458. package/cpp/src/llama-grammar.cpp +1464 -0
  459. package/cpp/src/llama-grammar.h +194 -0
  460. package/cpp/src/llama-graph.cpp +2685 -0
  461. package/cpp/src/llama-graph.h +1026 -0
  462. package/cpp/src/llama-hparams.cpp +234 -0
  463. package/cpp/src/llama-hparams.h +339 -0
  464. package/cpp/src/llama-impl.cpp +171 -0
  465. package/cpp/src/llama-impl.h +73 -0
  466. package/cpp/src/llama-io.cpp +15 -0
  467. package/cpp/src/llama-io.h +35 -0
  468. package/cpp/src/llama-kv-cache-iswa.cpp +330 -0
  469. package/cpp/src/llama-kv-cache-iswa.h +137 -0
  470. package/cpp/src/llama-kv-cache.cpp +2271 -0
  471. package/cpp/src/llama-kv-cache.h +388 -0
  472. package/cpp/src/llama-kv-cells.h +533 -0
  473. package/cpp/src/llama-memory-hybrid-iswa.cpp +275 -0
  474. package/cpp/src/llama-memory-hybrid-iswa.h +140 -0
  475. package/cpp/src/llama-memory-hybrid.cpp +268 -0
  476. package/cpp/src/llama-memory-hybrid.h +139 -0
  477. package/cpp/src/llama-memory-recurrent.cpp +1165 -0
  478. package/cpp/src/llama-memory-recurrent.h +182 -0
  479. package/cpp/src/llama-memory.cpp +59 -0
  480. package/cpp/src/llama-memory.h +122 -0
  481. package/cpp/src/llama-mmap.cpp +785 -0
  482. package/cpp/src/llama-mmap.h +92 -0
  483. package/cpp/src/llama-model-loader.cpp +1414 -0
  484. package/cpp/src/llama-model-loader.h +203 -0
  485. package/cpp/src/llama-model-saver.cpp +286 -0
  486. package/cpp/src/llama-model-saver.h +37 -0
  487. package/cpp/src/llama-model.cpp +9253 -0
  488. package/cpp/src/llama-model.h +576 -0
  489. package/cpp/src/llama-quant.cpp +1119 -0
  490. package/cpp/src/llama-quant.h +1 -0
  491. package/cpp/src/llama-sampler.cpp +3885 -0
  492. package/cpp/src/llama-sampler.h +42 -0
  493. package/cpp/src/llama-vocab.cpp +3970 -0
  494. package/cpp/src/llama-vocab.h +187 -0
  495. package/cpp/src/llama.cpp +1313 -0
  496. package/cpp/src/models/afmoe.cpp +191 -0
  497. package/cpp/src/models/apertus.cpp +125 -0
  498. package/cpp/src/models/arcee.cpp +135 -0
  499. package/cpp/src/models/arctic.cpp +138 -0
  500. package/cpp/src/models/arwkv7.cpp +86 -0
  501. package/cpp/src/models/baichuan.cpp +122 -0
  502. package/cpp/src/models/bailingmoe.cpp +144 -0
  503. package/cpp/src/models/bailingmoe2.cpp +135 -0
  504. package/cpp/src/models/bert.cpp +178 -0
  505. package/cpp/src/models/bitnet.cpp +160 -0
  506. package/cpp/src/models/bloom.cpp +101 -0
  507. package/cpp/src/models/chameleon.cpp +178 -0
  508. package/cpp/src/models/chatglm.cpp +132 -0
  509. package/cpp/src/models/codeshell.cpp +111 -0
  510. package/cpp/src/models/cogvlm.cpp +102 -0
  511. package/cpp/src/models/cohere2-iswa.cpp +134 -0
  512. package/cpp/src/models/command-r.cpp +122 -0
  513. package/cpp/src/models/dbrx.cpp +123 -0
  514. package/cpp/src/models/deci.cpp +135 -0
  515. package/cpp/src/models/deepseek.cpp +144 -0
  516. package/cpp/src/models/deepseek2.cpp +262 -0
  517. package/cpp/src/models/delta-net-base.cpp +376 -0
  518. package/cpp/src/models/dots1.cpp +134 -0
  519. package/cpp/src/models/dream.cpp +105 -0
  520. package/cpp/src/models/ernie4-5-moe.cpp +150 -0
  521. package/cpp/src/models/ernie4-5.cpp +110 -0
  522. package/cpp/src/models/eurobert.cpp +97 -0
  523. package/cpp/src/models/exaone-moe.cpp +146 -0
  524. package/cpp/src/models/exaone.cpp +114 -0
  525. package/cpp/src/models/exaone4.cpp +123 -0
  526. package/cpp/src/models/falcon-h1.cpp +111 -0
  527. package/cpp/src/models/falcon.cpp +120 -0
  528. package/cpp/src/models/gemma-embedding.cpp +116 -0
  529. package/cpp/src/models/gemma.cpp +112 -0
  530. package/cpp/src/models/gemma2-iswa.cpp +128 -0
  531. package/cpp/src/models/gemma3.cpp +155 -0
  532. package/cpp/src/models/gemma3n-iswa.cpp +384 -0
  533. package/cpp/src/models/glm4-moe.cpp +170 -0
  534. package/cpp/src/models/glm4.cpp +157 -0
  535. package/cpp/src/models/gpt2.cpp +105 -0
  536. package/cpp/src/models/gptneox.cpp +144 -0
  537. package/cpp/src/models/granite-hybrid.cpp +196 -0
  538. package/cpp/src/models/granite.cpp +211 -0
  539. package/cpp/src/models/grok.cpp +159 -0
  540. package/cpp/src/models/grovemoe.cpp +141 -0
  541. package/cpp/src/models/hunyuan-dense.cpp +132 -0
  542. package/cpp/src/models/hunyuan-moe.cpp +154 -0
  543. package/cpp/src/models/internlm2.cpp +120 -0
  544. package/cpp/src/models/jais.cpp +86 -0
  545. package/cpp/src/models/jais2.cpp +123 -0
  546. package/cpp/src/models/jamba.cpp +106 -0
  547. package/cpp/src/models/kimi-linear.cpp +392 -0
  548. package/cpp/src/models/lfm2.cpp +190 -0
  549. package/cpp/src/models/llada-moe.cpp +122 -0
  550. package/cpp/src/models/llada.cpp +99 -0
  551. package/cpp/src/models/llama-iswa.cpp +178 -0
  552. package/cpp/src/models/llama.cpp +168 -0
  553. package/cpp/src/models/maincoder.cpp +117 -0
  554. package/cpp/src/models/mamba-base.cpp +285 -0
  555. package/cpp/src/models/mamba.cpp +54 -0
  556. package/cpp/src/models/mimo2-iswa.cpp +123 -0
  557. package/cpp/src/models/minicpm3.cpp +200 -0
  558. package/cpp/src/models/minimax-m2.cpp +124 -0
  559. package/cpp/src/models/mistral3.cpp +160 -0
  560. package/cpp/src/models/models.h +684 -0
  561. package/cpp/src/models/modern-bert.cpp +109 -0
  562. package/cpp/src/models/mpt.cpp +126 -0
  563. package/cpp/src/models/nemotron-h.cpp +148 -0
  564. package/cpp/src/models/nemotron.cpp +122 -0
  565. package/cpp/src/models/neo-bert.cpp +104 -0
  566. package/cpp/src/models/olmo.cpp +121 -0
  567. package/cpp/src/models/olmo2.cpp +150 -0
  568. package/cpp/src/models/olmoe.cpp +124 -0
  569. package/cpp/src/models/openai-moe-iswa.cpp +127 -0
  570. package/cpp/src/models/openelm.cpp +124 -0
  571. package/cpp/src/models/orion.cpp +123 -0
  572. package/cpp/src/models/paddleocr.cpp +122 -0
  573. package/cpp/src/models/pangu-embedded.cpp +121 -0
  574. package/cpp/src/models/phi2.cpp +121 -0
  575. package/cpp/src/models/phi3.cpp +152 -0
  576. package/cpp/src/models/plamo.cpp +110 -0
  577. package/cpp/src/models/plamo2.cpp +318 -0
  578. package/cpp/src/models/plamo3.cpp +128 -0
  579. package/cpp/src/models/plm.cpp +169 -0
  580. package/cpp/src/models/qwen.cpp +108 -0
  581. package/cpp/src/models/qwen2.cpp +126 -0
  582. package/cpp/src/models/qwen2moe.cpp +151 -0
  583. package/cpp/src/models/qwen2vl.cpp +117 -0
  584. package/cpp/src/models/qwen3.cpp +117 -0
  585. package/cpp/src/models/qwen35.cpp +386 -0
  586. package/cpp/src/models/qwen35moe.cpp +420 -0
  587. package/cpp/src/models/qwen3moe.cpp +124 -0
  588. package/cpp/src/models/qwen3next.cpp +525 -0
  589. package/cpp/src/models/qwen3vl-moe.cpp +140 -0
  590. package/cpp/src/models/qwen3vl.cpp +132 -0
  591. package/cpp/src/models/refact.cpp +94 -0
  592. package/cpp/src/models/rnd1.cpp +126 -0
  593. package/cpp/src/models/rwkv6-base.cpp +164 -0
  594. package/cpp/src/models/rwkv6.cpp +94 -0
  595. package/cpp/src/models/rwkv6qwen2.cpp +86 -0
  596. package/cpp/src/models/rwkv7-base.cpp +137 -0
  597. package/cpp/src/models/rwkv7.cpp +90 -0
  598. package/cpp/src/models/seed-oss.cpp +124 -0
  599. package/cpp/src/models/smallthinker.cpp +126 -0
  600. package/cpp/src/models/smollm3.cpp +128 -0
  601. package/cpp/src/models/stablelm.cpp +146 -0
  602. package/cpp/src/models/starcoder.cpp +100 -0
  603. package/cpp/src/models/starcoder2.cpp +121 -0
  604. package/cpp/src/models/step35-iswa.cpp +168 -0
  605. package/cpp/src/models/t5-dec.cpp +166 -0
  606. package/cpp/src/models/t5-enc.cpp +96 -0
  607. package/cpp/src/models/wavtokenizer-dec.cpp +149 -0
  608. package/cpp/src/models/xverse.cpp +108 -0
  609. package/cpp/src/unicode-data.cpp +7034 -0
  610. package/cpp/src/unicode-data.h +20 -0
  611. package/cpp/src/unicode.cpp +1103 -0
  612. package/cpp/src/unicode.h +111 -0
  613. package/cpp/vendor/nlohmann/json.hpp +25526 -0
  614. package/cpp/vendor/nlohmann/json_fwd.hpp +187 -0
  615. package/cpp/vendor/stb/stb_image.h +7988 -0
  616. package/ios/LocalLLM-Bridging-Header.h +2 -0
  617. package/ios/LocalLLM.h +5 -0
  618. package/ios/LocalLLM.mm +1267 -0
  619. package/local-llm-rn.podspec +60 -0
  620. package/package.json +35 -0
  621. package/src/NativeLocalLLM.ts +73 -0
  622. package/src/device.ts +50 -0
  623. package/src/download-adapter.ts +17 -0
  624. package/src/index.ts +21 -0
  625. package/src/native-bridge.ts +142 -0
  626. package/src/rn-downloader.ts +37 -0
@@ -0,0 +1,192 @@
1
+ #include "virtgpu-forward-impl.h"
2
+ #include "virtgpu-shm.h"
3
+
4
+ int apir_device_get_count(virtgpu * gpu) {
5
+ apir_encoder * encoder;
6
+ apir_decoder * decoder;
7
+ ApirForwardReturnCode ret;
8
+
9
+ REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_GET_COUNT);
10
+ REMOTE_CALL(gpu, encoder, decoder, ret);
11
+
12
+ int32_t dev_count = -1;
13
+ apir_decode_int32_t(decoder, &dev_count);
14
+
15
+ remote_call_finish(gpu, encoder, decoder);
16
+
17
+ return dev_count;
18
+ }
19
+
20
+ char * apir_device_get_name(virtgpu * gpu) {
21
+ apir_encoder * encoder;
22
+ apir_decoder * decoder;
23
+ ApirForwardReturnCode ret;
24
+
25
+ REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_GET_NAME);
26
+ REMOTE_CALL(gpu, encoder, decoder, ret);
27
+
28
+ const size_t string_size = apir_decode_array_size_unchecked(decoder);
29
+ char * string = (char *) apir_decoder_alloc_array(sizeof(char), string_size);
30
+ if (!string) {
31
+ GGML_LOG_ERROR(GGML_VIRTGPU "%s: Could not allocate the device name buffer\n", __func__);
32
+ return NULL;
33
+ }
34
+ apir_decode_char_array(decoder, string, string_size);
35
+
36
+ remote_call_finish(gpu, encoder, decoder);
37
+
38
+ return string;
39
+ }
40
+
41
+ char * apir_device_get_description(virtgpu * gpu) {
42
+ apir_encoder * encoder;
43
+ apir_decoder * decoder;
44
+ ApirForwardReturnCode ret;
45
+
46
+ REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_GET_DESCRIPTION);
47
+
48
+ REMOTE_CALL(gpu, encoder, decoder, ret);
49
+
50
+ const size_t string_size = apir_decode_array_size_unchecked(decoder);
51
+ char * string = (char *) apir_decoder_alloc_array(sizeof(char), string_size);
52
+ if (!string) {
53
+ GGML_LOG_ERROR(GGML_VIRTGPU "%s: Could not allocate the device description buffer\n", __func__);
54
+
55
+ return NULL;
56
+ }
57
+ apir_decode_char_array(decoder, string, string_size);
58
+
59
+ remote_call_finish(gpu, encoder, decoder);
60
+
61
+ return string;
62
+ }
63
+
64
+ uint32_t apir_device_get_type(virtgpu * gpu) {
65
+ static uint32_t dev_type = 255;
66
+ if (dev_type != 255) {
67
+ return dev_type;
68
+ }
69
+
70
+ apir_encoder * encoder;
71
+ apir_decoder * decoder;
72
+ ApirForwardReturnCode ret;
73
+
74
+ REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_GET_TYPE);
75
+
76
+ REMOTE_CALL(gpu, encoder, decoder, ret);
77
+
78
+ apir_decode_uint32_t(decoder, &dev_type);
79
+
80
+ remote_call_finish(gpu, encoder, decoder);
81
+
82
+ return dev_type;
83
+ }
84
+
85
+ void apir_device_get_memory(virtgpu * gpu, size_t * free, size_t * total) {
86
+ static size_t dev_free = 0;
87
+ static size_t dev_total = 0;
88
+ apir_encoder * encoder;
89
+ apir_decoder * decoder;
90
+ ApirForwardReturnCode ret;
91
+
92
+ REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_GET_MEMORY);
93
+
94
+ REMOTE_CALL(gpu, encoder, decoder, ret);
95
+
96
+ apir_decode_size_t(decoder, &dev_free);
97
+ apir_decode_size_t(decoder, &dev_total);
98
+
99
+ *free = dev_free;
100
+ *total = dev_total;
101
+
102
+ remote_call_finish(gpu, encoder, decoder);
103
+
104
+ return;
105
+ }
106
+
107
+ bool apir_device_supports_op(virtgpu * gpu, const ggml_tensor * op) {
108
+ apir_encoder * encoder;
109
+ apir_decoder * decoder;
110
+ ApirForwardReturnCode ret;
111
+
112
+ REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_SUPPORTS_OP);
113
+
114
+ apir_encode_ggml_tensor_inline(encoder, op);
115
+
116
+ REMOTE_CALL(gpu, encoder, decoder, ret);
117
+
118
+ bool supports_op;
119
+ apir_decode_bool_t(decoder, &supports_op);
120
+
121
+ remote_call_finish(gpu, encoder, decoder);
122
+
123
+ return supports_op;
124
+ }
125
+
126
+ apir_buffer_type_host_handle_t apir_device_get_buffer_type(virtgpu * gpu) {
127
+ apir_encoder * encoder;
128
+ apir_decoder * decoder;
129
+ ApirForwardReturnCode ret;
130
+
131
+ REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_GET_BUFFER_TYPE);
132
+
133
+ REMOTE_CALL(gpu, encoder, decoder, ret);
134
+
135
+ apir_buffer_type_host_handle_t buft_handle;
136
+ apir_decode_apir_buffer_type_host_handle_t(decoder, &buft_handle);
137
+
138
+ remote_call_finish(gpu, encoder, decoder);
139
+
140
+ return buft_handle;
141
+ }
142
+
143
+ void apir_device_get_props(virtgpu * gpu,
144
+ bool * async,
145
+ bool * host_buffer,
146
+ bool * buffer_from_host_ptr,
147
+ bool * events) {
148
+ apir_encoder * encoder;
149
+ apir_decoder * decoder;
150
+ ApirForwardReturnCode ret;
151
+
152
+ REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_GET_PROPS);
153
+
154
+ REMOTE_CALL(gpu, encoder, decoder, ret);
155
+
156
+ apir_decode_bool_t(decoder, async);
157
+ apir_decode_bool_t(decoder, host_buffer);
158
+ apir_decode_bool_t(decoder, buffer_from_host_ptr);
159
+ apir_decode_bool_t(decoder, events);
160
+
161
+ remote_call_finish(gpu, encoder, decoder);
162
+
163
+ return;
164
+ }
165
+
166
+ apir_buffer_context_t apir_device_buffer_from_ptr(virtgpu * gpu, size_t size, size_t max_tensor_size) {
167
+ apir_encoder * encoder;
168
+ apir_decoder * decoder;
169
+ ApirForwardReturnCode ret;
170
+
171
+ apir_buffer_context_t buffer_context;
172
+
173
+ REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_BUFFER_FROM_PTR);
174
+
175
+ if (virtgpu_shmem_create(gpu, size, &buffer_context.shmem)) {
176
+ GGML_ABORT(GGML_VIRTGPU "%s: Couldn't allocate %ldb of guest-host shared buffer", __func__, size);
177
+ }
178
+
179
+ apir_encode_virtgpu_shmem_res_id(encoder, buffer_context.shmem.res_id);
180
+
181
+ apir_encode_size_t(encoder, &size);
182
+ apir_encode_size_t(encoder, &max_tensor_size);
183
+
184
+ REMOTE_CALL(gpu, encoder, decoder, ret);
185
+
186
+ apir_decode_apir_buffer_host_handle_t(decoder, &buffer_context.host_handle);
187
+ buffer_context.buft_host_handle = apir_decode_apir_buffer_type_host_handle(decoder);
188
+
189
+ remote_call_finish(gpu, encoder, decoder);
190
+
191
+ return buffer_context;
192
+ }
@@ -0,0 +1,36 @@
1
+ #pragma once
2
+
3
+ // clang-format off
4
+ #include "virtgpu.h"
5
+ #include "ggml-remoting.h"
6
+ #include "backend/shared/apir_backend.h"
7
+ #include "backend/shared/apir_cs_ggml.h"
8
+ #include "ggml-backend-impl.h"
9
+ // clang-format on
10
+
11
+ #define REMOTE_CALL_PREPARE(gpu_dev_name, encoder_name, apir_command_type__) \
12
+ int32_t REMOTE_CALL_PREPARE_forward_flag = (int32_t) apir_command_type__; \
13
+ const char * REMOTE_CALL_PREPARE_command_name = apir_dispatch_command_name(apir_command_type__); \
14
+ do { \
15
+ encoder_name = remote_call_prepare(gpu_dev_name, APIR_COMMAND_TYPE_FORWARD, REMOTE_CALL_PREPARE_forward_flag); \
16
+ if (!encoder_name) { \
17
+ GGML_ABORT(GGML_VIRTGPU "%s: failed to prepare the remote call encoder", __func__); \
18
+ } \
19
+ } while (0)
20
+
21
+ #define REMOTE_CALL(gpu_dev_name, encoder_name, decoder_name, ret_name) \
22
+ do { \
23
+ ret_name = (ApirForwardReturnCode) remote_call(gpu_dev_name, encoder_name, &decoder_name, 0, NULL); \
24
+ if (!decoder_name) { \
25
+ GGML_ABORT(GGML_VIRTGPU "%s: failed to kick the remote call", __func__); \
26
+ } \
27
+ if (ret_name < APIR_FORWARD_BASE_INDEX) { \
28
+ GGML_ABORT(GGML_VIRTGPU "%s: failed to forward the API call: %s: code %d", __func__, \
29
+ apir_forward_error(ret_name), ret_name); \
30
+ } \
31
+ ret_name = (ApirForwardReturnCode) (ret_name - APIR_FORWARD_BASE_INDEX); \
32
+ if (ret_name != 0) { \
33
+ GGML_ABORT(GGML_VIRTGPU "backend function '%s' failed (return code: %d)", \
34
+ REMOTE_CALL_PREPARE_command_name, ret_name); \
35
+ } \
36
+ } while (0)
@@ -0,0 +1,53 @@
1
+ #pragma once
2
+
3
+ /* device */
4
+ void apir_device_get_device_count(struct virtgpu * gpu);
5
+ int apir_device_get_count(struct virtgpu * gpu);
6
+ char * apir_device_get_name(struct virtgpu * gpu);
7
+ char * apir_device_get_description(struct virtgpu * gpu);
8
+ uint32_t apir_device_get_type(struct virtgpu * gpu);
9
+ void apir_device_get_memory(struct virtgpu * gpu, size_t * free, size_t * total);
10
+ bool apir_device_supports_op(struct virtgpu * gpu, const ggml_tensor * op);
11
+ apir_buffer_type_host_handle_t apir_device_get_buffer_type(struct virtgpu * gpu);
12
+ void apir_device_get_props(struct virtgpu * gpu,
13
+ bool * async,
14
+ bool * host_buffer,
15
+ bool * buffer_from_host_ptr,
16
+ bool * events);
17
+ apir_buffer_context_t apir_device_buffer_from_ptr(struct virtgpu * gpu, size_t size, size_t max_tensor_size);
18
+
19
+ /* buffer-type */
20
+ char * apir_buffer_type_get_name(struct virtgpu * gpu, apir_buffer_type_host_handle_t host_handle);
21
+ size_t apir_buffer_type_get_alignment(struct virtgpu * gpu, apir_buffer_type_host_handle_t host_handle);
22
+ size_t apir_buffer_type_get_max_size(struct virtgpu * gpu, apir_buffer_type_host_handle_t host_handle);
23
+ /* apir_buffer_type_is_host is deprecated. */
24
+ apir_buffer_context_t apir_buffer_type_alloc_buffer(struct virtgpu * gpu,
25
+ apir_buffer_type_host_handle_t host_handle,
26
+ size_t size);
27
+ size_t apir_buffer_type_get_alloc_size(struct virtgpu * gpu,
28
+ apir_buffer_type_host_handle_t host_handle,
29
+ const ggml_tensor * op);
30
+
31
+ /* buffer */
32
+ void * apir_buffer_get_base(struct virtgpu * gpu, apir_buffer_context_t * buffer_context);
33
+ void apir_buffer_set_tensor(struct virtgpu * gpu,
34
+ apir_buffer_context_t * buffer_context,
35
+ ggml_tensor * tensor,
36
+ const void * data,
37
+ size_t offset,
38
+ size_t size);
39
+ void apir_buffer_get_tensor(struct virtgpu * gpu,
40
+ apir_buffer_context_t * buffer_context,
41
+ const ggml_tensor * tensor,
42
+ void * data,
43
+ size_t offset,
44
+ size_t size);
45
+ bool apir_buffer_cpy_tensor(struct virtgpu * gpu,
46
+ apir_buffer_context_t * buffer_context,
47
+ const ggml_tensor * src,
48
+ const ggml_tensor * dst);
49
+ void apir_buffer_clear(struct virtgpu * gpu, apir_buffer_context_t * buffer_context, uint8_t value);
50
+ void apir_buffer_free_buffer(struct virtgpu * gpu, apir_buffer_context_t * buffer_context);
51
+
52
+ /* backend */
53
+ ggml_status apir_backend_graph_compute(struct virtgpu * gpu, ggml_cgraph * cgraph);
@@ -0,0 +1,98 @@
1
+ #include "virtgpu-shm.h"
2
+
3
+ #include "virtgpu.h"
4
+
5
+ #include <assert.h>
6
+
7
+ static uint32_t virtgpu_ioctl_resource_create_blob(virtgpu * gpu,
8
+ uint32_t blob_mem,
9
+ uint32_t blob_flags,
10
+ size_t blob_size,
11
+ uint64_t blob_id,
12
+ uint32_t * res_id) {
13
+ #ifdef SIMULATE_BO_SIZE_FIX
14
+ blob_size = align64(blob_size, 4096);
15
+ #endif
16
+
17
+ drm_virtgpu_resource_create_blob args = {
18
+ .blob_mem = blob_mem,
19
+ .blob_flags = blob_flags,
20
+ .bo_handle = 0,
21
+ .res_handle = 0,
22
+ .size = blob_size,
23
+ .pad = 0,
24
+ .cmd_size = 0,
25
+ .cmd = 0,
26
+ .blob_id = blob_id,
27
+ };
28
+
29
+ if (virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_RESOURCE_CREATE_BLOB, &args)) {
30
+ return 0;
31
+ }
32
+
33
+ *res_id = args.res_handle;
34
+ return args.bo_handle;
35
+ }
36
+
37
+ static void virtgpu_ioctl_gem_close(virtgpu * gpu, uint32_t gem_handle) {
38
+ drm_gem_close args = {
39
+ .handle = gem_handle,
40
+ .pad = 0,
41
+ };
42
+
43
+ const int ret = virtgpu_ioctl(gpu, DRM_IOCTL_GEM_CLOSE, &args);
44
+ assert(!ret);
45
+ #ifdef NDEBUG
46
+ UNUSED(ret);
47
+ #endif
48
+ }
49
+
50
+ static void * virtgpu_ioctl_map(virtgpu * gpu, uint32_t gem_handle, size_t size) {
51
+ drm_virtgpu_map args = {
52
+ .offset = 0,
53
+ .handle = gem_handle,
54
+ .pad = 0,
55
+ };
56
+
57
+ if (virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_MAP, &args)) {
58
+ return NULL;
59
+ }
60
+
61
+ void * ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, gpu->fd, args.offset);
62
+ if (ptr == MAP_FAILED) {
63
+ return NULL;
64
+ }
65
+
66
+ return ptr;
67
+ }
68
+
69
+ void virtgpu_shmem_destroy(virtgpu * gpu, virtgpu_shmem * shmem) {
70
+ munmap(shmem->mmap_ptr, shmem->mmap_size);
71
+ virtgpu_ioctl_gem_close(gpu, shmem->gem_handle);
72
+ }
73
+
74
+ int virtgpu_shmem_create(virtgpu * gpu, size_t size, virtgpu_shmem * shmem) {
75
+ size = align64(size, 16384);
76
+
77
+ uint32_t res_id;
78
+ uint32_t gem_handle = virtgpu_ioctl_resource_create_blob(gpu, VIRTGPU_BLOB_MEM_HOST3D,
79
+ VIRTGPU_BLOB_FLAG_USE_MAPPABLE, size, 0, &res_id);
80
+
81
+ if (!gem_handle) {
82
+ return 1;
83
+ }
84
+
85
+ void * ptr = virtgpu_ioctl_map(gpu, gem_handle, size);
86
+ if (!ptr) {
87
+ virtgpu_ioctl_gem_close(gpu, gem_handle);
88
+ GGML_LOG_ERROR(GGML_VIRTGPU "%s: virtgpu_ioctl_map failed\n", __func__);
89
+ return 1;
90
+ }
91
+
92
+ shmem->res_id = res_id;
93
+ shmem->mmap_size = size;
94
+ shmem->mmap_ptr = ptr;
95
+ shmem->gem_handle = gem_handle;
96
+
97
+ return 0;
98
+ }
@@ -0,0 +1,23 @@
1
+ #pragma once
2
+
3
+ #include "virtgpu-utils.h"
4
+
5
+ #include <sys/mman.h>
6
+
7
+ #include <atomic>
8
+ #include <cassert>
9
+ #include <cstddef>
10
+ #include <cstdint>
11
+
12
+ struct virtgpu;
13
+
14
+ struct virtgpu_shmem {
15
+ uint32_t res_id;
16
+ size_t mmap_size;
17
+ void * mmap_ptr;
18
+
19
+ uint32_t gem_handle;
20
+ };
21
+
22
+ int virtgpu_shmem_create(virtgpu * gpu, size_t size, virtgpu_shmem * shmem);
23
+ void virtgpu_shmem_destroy(virtgpu * gpu, virtgpu_shmem * shmem);
@@ -0,0 +1,179 @@
1
+ #include "virtgpu-utils.h"
2
+
3
+ #include <malloc.h>
4
+ #include <stdlib.h>
5
+
6
+ #include <cstring>
7
+
8
+ #define NODE_ALLOC_ALIGN 64
9
+ #define NODE_PTR_MASK (~((uintptr_t) NODE_ALLOC_ALIGN - 1))
10
+ #define NODE_LEVEL_MASK ((uintptr_t) NODE_ALLOC_ALIGN - 1)
11
+ #define NULL_NODE 0
12
+
13
+ #define os_malloc_aligned(_size, _align) _aligned_malloc(_size, _align)
14
+ #define os_free_aligned(_ptr) free(_ptr)
15
+ #define p_atomic_cmpxchg(v, old, _new) __sync_val_compare_and_swap((v), (old), (_new))
16
+
17
+ static inline uint64_t util_logbase2_64(uint64_t n) {
18
+ #if defined(HAVE___BUILTIN_CLZLL)
19
+ return ((sizeof(uint64_t) * 8 - 1) - __builtin_clzll(n | 1));
20
+ #else
21
+ uint64_t pos = 0ull;
22
+ if (n >= 1ull << 32) {
23
+ n >>= 32;
24
+ pos += 32;
25
+ }
26
+ if (n >= 1ull << 16) {
27
+ n >>= 16;
28
+ pos += 16;
29
+ }
30
+ if (n >= 1ull << 8) {
31
+ n >>= 8;
32
+ pos += 8;
33
+ }
34
+ if (n >= 1ull << 4) {
35
+ n >>= 4;
36
+ pos += 4;
37
+ }
38
+ if (n >= 1ull << 2) {
39
+ n >>= 2;
40
+ pos += 2;
41
+ }
42
+ if (n >= 1ull << 1) {
43
+ pos += 1;
44
+ }
45
+ return pos;
46
+ #endif
47
+ }
48
+
49
+ void util_sparse_array_init(util_sparse_array * arr, size_t elem_size, size_t node_size) {
50
+ memset(arr, 0, sizeof(*arr));
51
+ arr->elem_size = elem_size;
52
+ arr->node_size_log2 = util_logbase2_64(node_size);
53
+ assert(node_size >= 2 && node_size == (1ull << arr->node_size_log2));
54
+ }
55
+
56
+ static inline void * os_malloc_aligned(size_t size, size_t alignment) {
57
+ void * ptr;
58
+ alignment = (alignment + sizeof(void *) - 1) & ~(sizeof(void *) - 1);
59
+ if (posix_memalign(&ptr, alignment, size) != 0) {
60
+ return NULL;
61
+ }
62
+ return ptr;
63
+ }
64
+
65
+ static inline void * _util_sparse_array_node_data(uintptr_t handle) {
66
+ return (void *) (handle & NODE_PTR_MASK);
67
+ }
68
+
69
+ static inline unsigned _util_sparse_array_node_level(uintptr_t handle) {
70
+ return handle & NODE_LEVEL_MASK;
71
+ }
72
+
73
+ static inline void _util_sparse_array_node_finish(util_sparse_array * arr, uintptr_t node) {
74
+ if (_util_sparse_array_node_level(node) > 0) {
75
+ uintptr_t * children = (uintptr_t *) _util_sparse_array_node_data(node);
76
+ size_t node_size = 1ull << arr->node_size_log2;
77
+ for (size_t i = 0; i < node_size; i++) {
78
+ if (children[i]) {
79
+ _util_sparse_array_node_finish(arr, children[i]);
80
+ }
81
+ }
82
+ }
83
+
84
+ os_free_aligned(_util_sparse_array_node_data(node));
85
+ }
86
+
87
+ static inline uintptr_t _util_sparse_array_node(void * data, unsigned level) {
88
+ assert(data != NULL);
89
+ assert(((uintptr_t) data & NODE_LEVEL_MASK) == 0);
90
+ assert((level & NODE_PTR_MASK) == 0);
91
+ return (uintptr_t) data | level;
92
+ }
93
+
94
+ inline uintptr_t _util_sparse_array_node_alloc(util_sparse_array * arr, unsigned level) {
95
+ size_t size;
96
+ if (level == 0) {
97
+ size = arr->elem_size << arr->node_size_log2;
98
+ } else {
99
+ size = sizeof(uintptr_t) << arr->node_size_log2;
100
+ }
101
+
102
+ void * data = os_malloc_aligned(size, NODE_ALLOC_ALIGN);
103
+ memset(data, 0, size);
104
+
105
+ return _util_sparse_array_node(data, level);
106
+ }
107
+
108
+ static inline uintptr_t _util_sparse_array_set_or_free_node(uintptr_t * node_ptr, uintptr_t cmp_node, uintptr_t node) {
109
+ uintptr_t prev_node = p_atomic_cmpxchg(node_ptr, cmp_node, node);
110
+
111
+ if (prev_node != cmp_node) {
112
+ /* We lost the race. Free this one and return the one that was already
113
+ * allocated.
114
+ */
115
+ os_free_aligned(_util_sparse_array_node_data(node));
116
+ return prev_node;
117
+ } else {
118
+ return node;
119
+ }
120
+ }
121
+
122
+ void * util_sparse_array_get(util_sparse_array * arr, uint64_t idx) {
123
+ const unsigned node_size_log2 = arr->node_size_log2;
124
+ uintptr_t root = p_atomic_read(&arr->root);
125
+ if (unlikely(!root)) {
126
+ unsigned root_level = 0;
127
+ uint64_t idx_iter = idx >> node_size_log2;
128
+ while (idx_iter) {
129
+ idx_iter >>= node_size_log2;
130
+ root_level++;
131
+ }
132
+ uintptr_t new_root = _util_sparse_array_node_alloc(arr, root_level);
133
+ root = _util_sparse_array_set_or_free_node(&arr->root, NULL_NODE, new_root);
134
+ }
135
+
136
+ while (1) {
137
+ unsigned root_level = _util_sparse_array_node_level(root);
138
+ uint64_t root_idx = idx >> (root_level * node_size_log2);
139
+ if (likely(root_idx < (1ull << node_size_log2))) {
140
+ break;
141
+ }
142
+
143
+ /* In this case, we have a root but its level is low enough that the
144
+ * requested index is out-of-bounds.
145
+ */
146
+ uintptr_t new_root = _util_sparse_array_node_alloc(arr, root_level + 1);
147
+
148
+ uintptr_t * new_root_children = (uintptr_t *) _util_sparse_array_node_data(new_root);
149
+ new_root_children[0] = root;
150
+
151
+ /* We only add one at a time instead of the whole tree because it's
152
+ * easier to ensure correctness of both the tree building and the
153
+ * clean-up path. Because we're only adding one node we never have to
154
+ * worry about trying to free multiple things without freeing the old
155
+ * things.
156
+ */
157
+ root = _util_sparse_array_set_or_free_node(&arr->root, root, new_root);
158
+ }
159
+
160
+ void * node_data = _util_sparse_array_node_data(root);
161
+ unsigned node_level = _util_sparse_array_node_level(root);
162
+ while (node_level > 0) {
163
+ uint64_t child_idx = (idx >> (node_level * node_size_log2)) & ((1ull << node_size_log2) - 1);
164
+
165
+ uintptr_t * children = (uintptr_t *) node_data;
166
+ uintptr_t child = p_atomic_read(&children[child_idx]);
167
+
168
+ if (unlikely(!child)) {
169
+ child = _util_sparse_array_node_alloc(arr, node_level - 1);
170
+ child = _util_sparse_array_set_or_free_node(&children[child_idx], NULL_NODE, child);
171
+ }
172
+
173
+ node_data = _util_sparse_array_node_data(child);
174
+ node_level = _util_sparse_array_node_level(child);
175
+ }
176
+
177
+ uint64_t elem_idx = idx & ((1ull << node_size_log2) - 1);
178
+ return (void *) ((char *) node_data + (elem_idx * arr->elem_size));
179
+ }
@@ -0,0 +1,86 @@
1
+ #pragma once
2
+
3
+ #include <atomic>
4
+ #include <cassert>
5
+ #include <cerrno>
6
+ #include <cstdarg>
7
+ #include <cstddef>
8
+ #include <cstdint>
9
+ #include <cstdio>
10
+ #include <cstdlib>
11
+ #include <ctime>
12
+
13
+ #define unlikely(x) __builtin_expect(!!(x), 0)
14
+ #define likely(x) __builtin_expect(!!(x), 1)
15
+
16
+ #ifndef UNUSED
17
+ # define UNUSED(x) (void) (x)
18
+ #endif
19
+
20
+ /** Checks is a value is a power of two. Does not handle zero. */
21
+ #define IS_POT(v) (((v) & ((v) - 1)) == 0)
22
+
23
+ /** Checks is a value is a power of two. Zero handled. */
24
+ #define IS_POT_NONZERO(v) ((v) != 0 && IS_POT(v))
25
+
26
+ /** Align a value to a power of two */
27
+ #define ALIGN_POT(x, pot_align) (((x) + (pot_align) - 1) & ~((pot_align) - 1))
28
+
29
+ #define p_atomic_read(_v) __atomic_load_n((_v), __ATOMIC_ACQUIRE)
30
+
31
+ static inline bool util_is_power_of_two_nonzero64(uint64_t v) {
32
+ return IS_POT_NONZERO(v);
33
+ }
34
+
35
+ static inline uint64_t align64(uint64_t value, uint64_t alignment) {
36
+ assert(util_is_power_of_two_nonzero64(alignment));
37
+ return ALIGN_POT(value, alignment);
38
+ }
39
+
40
+ struct list_head {
41
+ list_head * prev;
42
+ list_head * next;
43
+ };
44
+
45
+ struct util_sparse_array {
46
+ size_t elem_size;
47
+ unsigned node_size_log2;
48
+
49
+ uintptr_t root;
50
+ };
51
+
52
+ void * util_sparse_array_get(util_sparse_array * arr, uint64_t idx);
53
+ void util_sparse_array_init(util_sparse_array * arr, size_t elem_size, size_t node_size);
54
+
55
+ inline void os_time_sleep(int64_t usecs) {
56
+ timespec time;
57
+ time.tv_sec = usecs / 1000000;
58
+ time.tv_nsec = (usecs % 1000000) * 1000;
59
+ while (clock_nanosleep(CLOCK_MONOTONIC, 0, &time, &time) == EINTR)
60
+ ;
61
+ }
62
+
63
+ struct timer_data {
64
+ long long start;
65
+ long long total;
66
+ long long count;
67
+ };
68
+
69
+ static inline void start_timer(timer_data * timer) {
70
+ timespec ts;
71
+ clock_gettime(CLOCK_MONOTONIC, &ts);
72
+ timer->start = (long long) ts.tv_sec * 1000000000LL + ts.tv_nsec;
73
+ }
74
+
75
+ // returns the duration in ns
76
+ static inline long long stop_timer(timer_data * timer) {
77
+ timespec ts;
78
+ clock_gettime(CLOCK_MONOTONIC, &ts);
79
+ long long timer_end = (long long) ts.tv_sec * 1000000000LL + ts.tv_nsec;
80
+
81
+ long long duration = (timer_end - timer->start);
82
+ timer->total += duration;
83
+ timer->count += 1;
84
+
85
+ return duration;
86
+ }