local-llm-rn 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (626) hide show
  1. package/cpp/CMakeLists.txt +285 -0
  2. package/cpp/common/CMakeLists.txt +149 -0
  3. package/cpp/common/arg.cpp +3799 -0
  4. package/cpp/common/arg.h +131 -0
  5. package/cpp/common/base64.hpp +392 -0
  6. package/cpp/common/build-info.cpp.in +4 -0
  7. package/cpp/common/chat-parser-xml-toolcall.cpp +879 -0
  8. package/cpp/common/chat-parser-xml-toolcall.h +45 -0
  9. package/cpp/common/chat-parser.cpp +1649 -0
  10. package/cpp/common/chat-parser.h +133 -0
  11. package/cpp/common/chat-peg-parser.cpp +124 -0
  12. package/cpp/common/chat-peg-parser.h +105 -0
  13. package/cpp/common/chat.cpp +3355 -0
  14. package/cpp/common/chat.h +252 -0
  15. package/cpp/common/common.cpp +1824 -0
  16. package/cpp/common/common.h +930 -0
  17. package/cpp/common/console.cpp +1137 -0
  18. package/cpp/common/console.h +41 -0
  19. package/cpp/common/debug.cpp +167 -0
  20. package/cpp/common/debug.h +43 -0
  21. package/cpp/common/download.cpp +792 -0
  22. package/cpp/common/download.h +84 -0
  23. package/cpp/common/http.h +84 -0
  24. package/cpp/common/jinja/README.md +88 -0
  25. package/cpp/common/jinja/caps.cpp +285 -0
  26. package/cpp/common/jinja/caps.h +30 -0
  27. package/cpp/common/jinja/lexer.cpp +341 -0
  28. package/cpp/common/jinja/lexer.h +157 -0
  29. package/cpp/common/jinja/parser.cpp +591 -0
  30. package/cpp/common/jinja/parser.h +21 -0
  31. package/cpp/common/jinja/runtime.cpp +867 -0
  32. package/cpp/common/jinja/runtime.h +638 -0
  33. package/cpp/common/jinja/string.cpp +213 -0
  34. package/cpp/common/jinja/string.h +61 -0
  35. package/cpp/common/jinja/utils.h +149 -0
  36. package/cpp/common/jinja/value.cpp +1393 -0
  37. package/cpp/common/jinja/value.h +756 -0
  38. package/cpp/common/json-partial.cpp +324 -0
  39. package/cpp/common/json-partial.h +39 -0
  40. package/cpp/common/json-schema-to-grammar.cpp +1153 -0
  41. package/cpp/common/json-schema-to-grammar.h +43 -0
  42. package/cpp/common/llguidance.cpp +258 -0
  43. package/cpp/common/log.cpp +446 -0
  44. package/cpp/common/log.h +119 -0
  45. package/cpp/common/ngram-cache.cpp +285 -0
  46. package/cpp/common/ngram-cache.h +101 -0
  47. package/cpp/common/ngram-map.cpp +530 -0
  48. package/cpp/common/ngram-map.h +115 -0
  49. package/cpp/common/ngram-mod.cpp +60 -0
  50. package/cpp/common/ngram-mod.h +38 -0
  51. package/cpp/common/peg-parser.cpp +1712 -0
  52. package/cpp/common/peg-parser.h +459 -0
  53. package/cpp/common/preset.cpp +483 -0
  54. package/cpp/common/preset.h +83 -0
  55. package/cpp/common/regex-partial.cpp +204 -0
  56. package/cpp/common/regex-partial.h +56 -0
  57. package/cpp/common/sampling.cpp +745 -0
  58. package/cpp/common/sampling.h +119 -0
  59. package/cpp/common/speculative.cpp +1074 -0
  60. package/cpp/common/speculative.h +41 -0
  61. package/cpp/common/unicode.cpp +64 -0
  62. package/cpp/common/unicode.h +22 -0
  63. package/cpp/ggml/CMakeLists.txt +494 -0
  64. package/cpp/ggml/cmake/GitVars.cmake +22 -0
  65. package/cpp/ggml/cmake/common.cmake +50 -0
  66. package/cpp/ggml/cmake/ggml-config.cmake.in +191 -0
  67. package/cpp/ggml/include/ggml-alloc.h +85 -0
  68. package/cpp/ggml/include/ggml-backend.h +373 -0
  69. package/cpp/ggml/include/ggml-blas.h +25 -0
  70. package/cpp/ggml/include/ggml-cann.h +123 -0
  71. package/cpp/ggml/include/ggml-cpp.h +39 -0
  72. package/cpp/ggml/include/ggml-cpu.h +151 -0
  73. package/cpp/ggml/include/ggml-cuda.h +47 -0
  74. package/cpp/ggml/include/ggml-hexagon.h +19 -0
  75. package/cpp/ggml/include/ggml-metal.h +61 -0
  76. package/cpp/ggml/include/ggml-opencl.h +26 -0
  77. package/cpp/ggml/include/ggml-opt.h +256 -0
  78. package/cpp/ggml/include/ggml-rpc.h +30 -0
  79. package/cpp/ggml/include/ggml-sycl.h +49 -0
  80. package/cpp/ggml/include/ggml-virtgpu.h +14 -0
  81. package/cpp/ggml/include/ggml-vulkan.h +29 -0
  82. package/cpp/ggml/include/ggml-webgpu.h +19 -0
  83. package/cpp/ggml/include/ggml-zdnn.h +17 -0
  84. package/cpp/ggml/include/ggml-zendnn.h +22 -0
  85. package/cpp/ggml/include/ggml.h +2753 -0
  86. package/cpp/ggml/include/gguf.h +204 -0
  87. package/cpp/ggml/src/CMakeLists.txt +492 -0
  88. package/cpp/ggml/src/ggml-alloc.c +1244 -0
  89. package/cpp/ggml/src/ggml-backend-dl.cpp +48 -0
  90. package/cpp/ggml/src/ggml-backend-dl.h +45 -0
  91. package/cpp/ggml/src/ggml-backend-impl.h +255 -0
  92. package/cpp/ggml/src/ggml-backend-reg.cpp +566 -0
  93. package/cpp/ggml/src/ggml-backend.cpp +2270 -0
  94. package/cpp/ggml/src/ggml-blas/CMakeLists.txt +101 -0
  95. package/cpp/ggml/src/ggml-blas/ggml-blas.cpp +518 -0
  96. package/cpp/ggml/src/ggml-common.h +1878 -0
  97. package/cpp/ggml/src/ggml-cpu/CMakeLists.txt +691 -0
  98. package/cpp/ggml/src/ggml-cpu/amx/amx.cpp +247 -0
  99. package/cpp/ggml/src/ggml-cpu/amx/amx.h +8 -0
  100. package/cpp/ggml/src/ggml-cpu/amx/common.h +91 -0
  101. package/cpp/ggml/src/ggml-cpu/amx/mmq.cpp +2512 -0
  102. package/cpp/ggml/src/ggml-cpu/amx/mmq.h +10 -0
  103. package/cpp/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +98 -0
  104. package/cpp/ggml/src/ggml-cpu/arch/arm/quants.c +4052 -0
  105. package/cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +4935 -0
  106. package/cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +2159 -0
  107. package/cpp/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
  108. package/cpp/ggml/src/ggml-cpu/arch/powerpc/quants.c +2305 -0
  109. package/cpp/ggml/src/ggml-cpu/arch/riscv/cpu-feats.cpp +38 -0
  110. package/cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +2726 -0
  111. package/cpp/ggml/src/ggml-cpu/arch/riscv/repack.cpp +342 -0
  112. package/cpp/ggml/src/ggml-cpu/arch/s390/cpu-feats.cpp +50 -0
  113. package/cpp/ggml/src/ggml-cpu/arch/s390/quants.c +1468 -0
  114. package/cpp/ggml/src/ggml-cpu/arch/wasm/quants.c +1221 -0
  115. package/cpp/ggml/src/ggml-cpu/arch/x86/cpu-feats.cpp +327 -0
  116. package/cpp/ggml/src/ggml-cpu/arch/x86/quants.c +3820 -0
  117. package/cpp/ggml/src/ggml-cpu/arch/x86/repack.cpp +6307 -0
  118. package/cpp/ggml/src/ggml-cpu/arch-fallback.h +313 -0
  119. package/cpp/ggml/src/ggml-cpu/binary-ops.cpp +154 -0
  120. package/cpp/ggml/src/ggml-cpu/binary-ops.h +16 -0
  121. package/cpp/ggml/src/ggml-cpu/cmake/FindSIMD.cmake +100 -0
  122. package/cpp/ggml/src/ggml-cpu/common.h +95 -0
  123. package/cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +529 -0
  124. package/cpp/ggml/src/ggml-cpu/ggml-cpu.c +3734 -0
  125. package/cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +701 -0
  126. package/cpp/ggml/src/ggml-cpu/hbm.cpp +55 -0
  127. package/cpp/ggml/src/ggml-cpu/hbm.h +8 -0
  128. package/cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +938 -0
  129. package/cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +90 -0
  130. package/cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +798 -0
  131. package/cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.h +17 -0
  132. package/cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +4033 -0
  133. package/cpp/ggml/src/ggml-cpu/llamafile/sgemm.h +25 -0
  134. package/cpp/ggml/src/ggml-cpu/ops.cpp +10978 -0
  135. package/cpp/ggml/src/ggml-cpu/ops.h +116 -0
  136. package/cpp/ggml/src/ggml-cpu/quants.c +1193 -0
  137. package/cpp/ggml/src/ggml-cpu/quants.h +97 -0
  138. package/cpp/ggml/src/ggml-cpu/repack.cpp +3316 -0
  139. package/cpp/ggml/src/ggml-cpu/repack.h +173 -0
  140. package/cpp/ggml/src/ggml-cpu/simd-gemm.h +136 -0
  141. package/cpp/ggml/src/ggml-cpu/simd-mappings.h +1279 -0
  142. package/cpp/ggml/src/ggml-cpu/spacemit/ime.cpp +1025 -0
  143. package/cpp/ggml/src/ggml-cpu/spacemit/ime.h +13 -0
  144. package/cpp/ggml/src/ggml-cpu/spacemit/ime1_kernels.cpp +3196 -0
  145. package/cpp/ggml/src/ggml-cpu/spacemit/ime_kernels.h +26 -0
  146. package/cpp/ggml/src/ggml-cpu/traits.cpp +36 -0
  147. package/cpp/ggml/src/ggml-cpu/traits.h +38 -0
  148. package/cpp/ggml/src/ggml-cpu/unary-ops.cpp +337 -0
  149. package/cpp/ggml/src/ggml-cpu/unary-ops.h +35 -0
  150. package/cpp/ggml/src/ggml-cpu/vec.cpp +629 -0
  151. package/cpp/ggml/src/ggml-cpu/vec.h +1585 -0
  152. package/cpp/ggml/src/ggml-hexagon/CMakeLists.txt +117 -0
  153. package/cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp +3232 -0
  154. package/cpp/ggml/src/ggml-hexagon/htp/CMakeLists.txt +45 -0
  155. package/cpp/ggml/src/ggml-hexagon/htp/act-ops.c +815 -0
  156. package/cpp/ggml/src/ggml-hexagon/htp/argsort-ops.c +281 -0
  157. package/cpp/ggml/src/ggml-hexagon/htp/binary-ops.c +827 -0
  158. package/cpp/ggml/src/ggml-hexagon/htp/cmake-toolchain.cmake +157 -0
  159. package/cpp/ggml/src/ggml-hexagon/htp/cpy-ops.c +251 -0
  160. package/cpp/ggml/src/ggml-hexagon/htp/flash-attn-ops.c +666 -0
  161. package/cpp/ggml/src/ggml-hexagon/htp/get-rows-ops.c +111 -0
  162. package/cpp/ggml/src/ggml-hexagon/htp/hex-dma.c +63 -0
  163. package/cpp/ggml/src/ggml-hexagon/htp/hex-dma.h +182 -0
  164. package/cpp/ggml/src/ggml-hexagon/htp/hex-dump.h +77 -0
  165. package/cpp/ggml/src/ggml-hexagon/htp/hex-fastdiv.h +37 -0
  166. package/cpp/ggml/src/ggml-hexagon/htp/hex-utils.h +51 -0
  167. package/cpp/ggml/src/ggml-hexagon/htp/htp-ctx.h +35 -0
  168. package/cpp/ggml/src/ggml-hexagon/htp/htp-msg.h +154 -0
  169. package/cpp/ggml/src/ggml-hexagon/htp/htp-ops.h +65 -0
  170. package/cpp/ggml/src/ggml-hexagon/htp/htp_iface.idl +16 -0
  171. package/cpp/ggml/src/ggml-hexagon/htp/hvx-arith.h +470 -0
  172. package/cpp/ggml/src/ggml-hexagon/htp/hvx-base.h +173 -0
  173. package/cpp/ggml/src/ggml-hexagon/htp/hvx-copy.h +245 -0
  174. package/cpp/ggml/src/ggml-hexagon/htp/hvx-div.h +116 -0
  175. package/cpp/ggml/src/ggml-hexagon/htp/hvx-dump.h +129 -0
  176. package/cpp/ggml/src/ggml-hexagon/htp/hvx-exp.h +215 -0
  177. package/cpp/ggml/src/ggml-hexagon/htp/hvx-floor.h +100 -0
  178. package/cpp/ggml/src/ggml-hexagon/htp/hvx-inverse.h +176 -0
  179. package/cpp/ggml/src/ggml-hexagon/htp/hvx-reduce.h +266 -0
  180. package/cpp/ggml/src/ggml-hexagon/htp/hvx-scale.h +133 -0
  181. package/cpp/ggml/src/ggml-hexagon/htp/hvx-sigmoid.h +141 -0
  182. package/cpp/ggml/src/ggml-hexagon/htp/hvx-sqrt.h +126 -0
  183. package/cpp/ggml/src/ggml-hexagon/htp/hvx-types.h +36 -0
  184. package/cpp/ggml/src/ggml-hexagon/htp/hvx-utils.h +18 -0
  185. package/cpp/ggml/src/ggml-hexagon/htp/main.c +1150 -0
  186. package/cpp/ggml/src/ggml-hexagon/htp/matmul-ops.c +2595 -0
  187. package/cpp/ggml/src/ggml-hexagon/htp/rope-ops.c +498 -0
  188. package/cpp/ggml/src/ggml-hexagon/htp/set-rows-ops.c +167 -0
  189. package/cpp/ggml/src/ggml-hexagon/htp/softmax-ops.c +421 -0
  190. package/cpp/ggml/src/ggml-hexagon/htp/sum-rows-ops.c +130 -0
  191. package/cpp/ggml/src/ggml-hexagon/htp/unary-ops.c +384 -0
  192. package/cpp/ggml/src/ggml-hexagon/htp/worker-pool.c +293 -0
  193. package/cpp/ggml/src/ggml-hexagon/htp/worker-pool.h +57 -0
  194. package/cpp/ggml/src/ggml-hexagon/htp-drv.cpp +418 -0
  195. package/cpp/ggml/src/ggml-hexagon/htp-drv.h +121 -0
  196. package/cpp/ggml/src/ggml-hexagon/libdl.h +79 -0
  197. package/cpp/ggml/src/ggml-hexagon/libggml-htp.inf +38 -0
  198. package/cpp/ggml/src/ggml-hexagon/op-desc.h +153 -0
  199. package/cpp/ggml/src/ggml-impl.h +724 -0
  200. package/cpp/ggml/src/ggml-metal/CMakeLists.txt +124 -0
  201. package/cpp/ggml/src/ggml-metal/ggml-metal-common.cpp +457 -0
  202. package/cpp/ggml/src/ggml-metal/ggml-metal-common.h +52 -0
  203. package/cpp/ggml/src/ggml-metal/ggml-metal-context.h +41 -0
  204. package/cpp/ggml/src/ggml-metal/ggml-metal-context.m +702 -0
  205. package/cpp/ggml/src/ggml-metal/ggml-metal-device.cpp +1890 -0
  206. package/cpp/ggml/src/ggml-metal/ggml-metal-device.h +290 -0
  207. package/cpp/ggml/src/ggml-metal/ggml-metal-device.m +1749 -0
  208. package/cpp/ggml/src/ggml-metal/ggml-metal-impl.h +1054 -0
  209. package/cpp/ggml/src/ggml-metal/ggml-metal-ops.cpp +4370 -0
  210. package/cpp/ggml/src/ggml-metal/ggml-metal-ops.h +94 -0
  211. package/cpp/ggml/src/ggml-metal/ggml-metal.cpp +937 -0
  212. package/cpp/ggml/src/ggml-metal/ggml-metal.metal +9819 -0
  213. package/cpp/ggml/src/ggml-musa/CMakeLists.txt +125 -0
  214. package/cpp/ggml/src/ggml-musa/mudnn.cu +112 -0
  215. package/cpp/ggml/src/ggml-musa/mudnn.cuh +12 -0
  216. package/cpp/ggml/src/ggml-opencl/CMakeLists.txt +150 -0
  217. package/cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +11553 -0
  218. package/cpp/ggml/src/ggml-opencl/kernels/add.cl +190 -0
  219. package/cpp/ggml/src/ggml-opencl/kernels/add_id.cl +42 -0
  220. package/cpp/ggml/src/ggml-opencl/kernels/argsort.cl +86 -0
  221. package/cpp/ggml/src/ggml-opencl/kernels/clamp.cl +20 -0
  222. package/cpp/ggml/src/ggml-opencl/kernels/concat.cl +51 -0
  223. package/cpp/ggml/src/ggml-opencl/kernels/conv2d.cl +185 -0
  224. package/cpp/ggml/src/ggml-opencl/kernels/conv2d_f16_f32.cl +176 -0
  225. package/cpp/ggml/src/ggml-opencl/kernels/cpy.cl +184 -0
  226. package/cpp/ggml/src/ggml-opencl/kernels/cvt.cl +417 -0
  227. package/cpp/ggml/src/ggml-opencl/kernels/diag_mask_inf.cl +58 -0
  228. package/cpp/ggml/src/ggml-opencl/kernels/div.cl +138 -0
  229. package/cpp/ggml/src/ggml-opencl/kernels/embed_kernel.py +26 -0
  230. package/cpp/ggml/src/ggml-opencl/kernels/expm1.cl +113 -0
  231. package/cpp/ggml/src/ggml-opencl/kernels/fill.cl +17 -0
  232. package/cpp/ggml/src/ggml-opencl/kernels/flash_attn_f16.cl +370 -0
  233. package/cpp/ggml/src/ggml-opencl/kernels/flash_attn_f32.cl +371 -0
  234. package/cpp/ggml/src/ggml-opencl/kernels/flash_attn_f32_f16.cl +373 -0
  235. package/cpp/ggml/src/ggml-opencl/kernels/gelu.cl +89 -0
  236. package/cpp/ggml/src/ggml-opencl/kernels/gemm_moe_mxfp4_f32.cl +162 -0
  237. package/cpp/ggml/src/ggml-opencl/kernels/gemv_moe_mxfp4_f32.cl +156 -0
  238. package/cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle.cl +268 -0
  239. package/cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general.cl +274 -0
  240. package/cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general_q8_0_f32.cl +195 -0
  241. package/cpp/ggml/src/ggml-opencl/kernels/get_rows.cl +187 -0
  242. package/cpp/ggml/src/ggml-opencl/kernels/glu.cl +378 -0
  243. package/cpp/ggml/src/ggml-opencl/kernels/group_norm.cl +121 -0
  244. package/cpp/ggml/src/ggml-opencl/kernels/im2col_f16.cl +57 -0
  245. package/cpp/ggml/src/ggml-opencl/kernels/im2col_f32.cl +57 -0
  246. package/cpp/ggml/src/ggml-opencl/kernels/mean.cl +140 -0
  247. package/cpp/ggml/src/ggml-opencl/kernels/mul.cl +152 -0
  248. package/cpp/ggml/src/ggml-opencl/kernels/mul_mat_Ab_Bi_8x4.cl +139 -0
  249. package/cpp/ggml/src/ggml-opencl/kernels/mul_mat_f16_f32.cl +130 -0
  250. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_kq_kqv.cl +273 -0
  251. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_l4_lm.cl +146 -0
  252. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_f32_f32_l4_lm.cl +147 -0
  253. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q4_0_f32_l4_lm.cl +163 -0
  254. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q4_1_f32_l4_lm.cl +165 -0
  255. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q6_k_f32_l4_lm.cl +158 -0
  256. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_8x4.cl +129 -0
  257. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_l4_lm.cl +154 -0
  258. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f16.cl +118 -0
  259. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32.cl +118 -0
  260. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_1row.cl +94 -0
  261. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_l4.cl +84 -0
  262. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f32_f32.cl +118 -0
  263. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32.cl +189 -0
  264. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32_flat.cl +176 -0
  265. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q4_0_f32_8x_flat.cl +283 -0
  266. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32.cl +140 -0
  267. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32_flat.cl +222 -0
  268. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32.cl +144 -0
  269. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32_flat.cl +167 -0
  270. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32.cl +192 -0
  271. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_16x_flat.cl +307 -0
  272. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_8x_flat.cl +265 -0
  273. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_8x_flat.cl +272 -0
  274. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_v.cl +254 -0
  275. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32.cl +219 -0
  276. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32_flat.cl +229 -0
  277. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_k_f32.cl +180 -0
  278. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q6_k_f32.cl +194 -0
  279. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q6_k_f32_flat.cl +194 -0
  280. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32.cl +125 -0
  281. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32_flat.cl +202 -0
  282. package/cpp/ggml/src/ggml-opencl/kernels/norm.cl +161 -0
  283. package/cpp/ggml/src/ggml-opencl/kernels/pad.cl +39 -0
  284. package/cpp/ggml/src/ggml-opencl/kernels/relu.cl +16 -0
  285. package/cpp/ggml/src/ggml-opencl/kernels/repeat.cl +38 -0
  286. package/cpp/ggml/src/ggml-opencl/kernels/rms_norm.cl +190 -0
  287. package/cpp/ggml/src/ggml-opencl/kernels/rope.cl +747 -0
  288. package/cpp/ggml/src/ggml-opencl/kernels/scale.cl +27 -0
  289. package/cpp/ggml/src/ggml-opencl/kernels/set_rows.cl +208 -0
  290. package/cpp/ggml/src/ggml-opencl/kernels/sigmoid.cl +29 -0
  291. package/cpp/ggml/src/ggml-opencl/kernels/silu.cl +30 -0
  292. package/cpp/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +108 -0
  293. package/cpp/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +108 -0
  294. package/cpp/ggml/src/ggml-opencl/kernels/softmax_f16.cl +107 -0
  295. package/cpp/ggml/src/ggml-opencl/kernels/softmax_f32.cl +107 -0
  296. package/cpp/ggml/src/ggml-opencl/kernels/softplus.cl +116 -0
  297. package/cpp/ggml/src/ggml-opencl/kernels/solve_tri.cl +51 -0
  298. package/cpp/ggml/src/ggml-opencl/kernels/sqr.cl +53 -0
  299. package/cpp/ggml/src/ggml-opencl/kernels/sqrt.cl +53 -0
  300. package/cpp/ggml/src/ggml-opencl/kernels/ssm_conv.cl +77 -0
  301. package/cpp/ggml/src/ggml-opencl/kernels/sub.cl +138 -0
  302. package/cpp/ggml/src/ggml-opencl/kernels/sum_rows.cl +140 -0
  303. package/cpp/ggml/src/ggml-opencl/kernels/tanh.cl +109 -0
  304. package/cpp/ggml/src/ggml-opencl/kernels/transpose.cl +117 -0
  305. package/cpp/ggml/src/ggml-opencl/kernels/tri.cl +32 -0
  306. package/cpp/ggml/src/ggml-opencl/kernels/tsembd.cl +48 -0
  307. package/cpp/ggml/src/ggml-opencl/kernels/upscale.cl +120 -0
  308. package/cpp/ggml/src/ggml-opt.cpp +1093 -0
  309. package/cpp/ggml/src/ggml-quants.c +5325 -0
  310. package/cpp/ggml/src/ggml-quants.h +106 -0
  311. package/cpp/ggml/src/ggml-rpc/CMakeLists.txt +9 -0
  312. package/cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +2118 -0
  313. package/cpp/ggml/src/ggml-threading.cpp +12 -0
  314. package/cpp/ggml/src/ggml-threading.h +14 -0
  315. package/cpp/ggml/src/ggml-virtgpu/CMakeLists.txt +70 -0
  316. package/cpp/ggml/src/ggml-virtgpu/apir_cs_ggml-rpc-front.cpp +87 -0
  317. package/cpp/ggml/src/ggml-virtgpu/backend/CMakeLists.txt +21 -0
  318. package/cpp/ggml/src/ggml-virtgpu/backend/apir_cs_ggml-rpc-back.cpp +115 -0
  319. package/cpp/ggml/src/ggml-virtgpu/backend/backend-convert.h +13 -0
  320. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-backend.cpp +102 -0
  321. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer-type.cpp +105 -0
  322. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer.cpp +179 -0
  323. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-device.cpp +148 -0
  324. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched.cpp +51 -0
  325. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched.gen.h +73 -0
  326. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched.h +27 -0
  327. package/cpp/ggml/src/ggml-virtgpu/backend/backend-virgl-apir.h +32 -0
  328. package/cpp/ggml/src/ggml-virtgpu/backend/backend.cpp +144 -0
  329. package/cpp/ggml/src/ggml-virtgpu/backend/shared/api_remoting.h +95 -0
  330. package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_backend.gen.h +94 -0
  331. package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_backend.h +50 -0
  332. package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_cs.h +378 -0
  333. package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h +232 -0
  334. package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_cs_rpc.h +58 -0
  335. package/cpp/ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp +81 -0
  336. package/cpp/ggml/src/ggml-virtgpu/ggml-backend-buffer.cpp +119 -0
  337. package/cpp/ggml/src/ggml-virtgpu/ggml-backend-device.cpp +158 -0
  338. package/cpp/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp +213 -0
  339. package/cpp/ggml/src/ggml-virtgpu/ggml-backend.cpp +69 -0
  340. package/cpp/ggml/src/ggml-virtgpu/ggml-remoting.h +71 -0
  341. package/cpp/ggml/src/ggml-virtgpu/ggmlremoting_functions.yaml +166 -0
  342. package/cpp/ggml/src/ggml-virtgpu/include/apir_hw.h +9 -0
  343. package/cpp/ggml/src/ggml-virtgpu/regenerate_remoting.py +333 -0
  344. package/cpp/ggml/src/ggml-virtgpu/virtgpu-apir.h +15 -0
  345. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-backend.cpp +58 -0
  346. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp +110 -0
  347. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-buffer.cpp +173 -0
  348. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-device.cpp +192 -0
  349. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-impl.h +36 -0
  350. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward.gen.h +53 -0
  351. package/cpp/ggml/src/ggml-virtgpu/virtgpu-shm.cpp +98 -0
  352. package/cpp/ggml/src/ggml-virtgpu/virtgpu-shm.h +23 -0
  353. package/cpp/ggml/src/ggml-virtgpu/virtgpu-utils.cpp +179 -0
  354. package/cpp/ggml/src/ggml-virtgpu/virtgpu-utils.h +86 -0
  355. package/cpp/ggml/src/ggml-virtgpu/virtgpu.cpp +544 -0
  356. package/cpp/ggml/src/ggml-virtgpu/virtgpu.h +117 -0
  357. package/cpp/ggml/src/ggml-webgpu/CMakeLists.txt +80 -0
  358. package/cpp/ggml/src/ggml-webgpu/ggml-webgpu-shader-lib.hpp +1231 -0
  359. package/cpp/ggml/src/ggml-webgpu/ggml-webgpu.cpp +3150 -0
  360. package/cpp/ggml/src/ggml-webgpu/pre_wgsl.hpp +778 -0
  361. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/argmax.wgsl +72 -0
  362. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/argsort.wgsl +106 -0
  363. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/argsort_merge.wgsl +134 -0
  364. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/binary.wgsl +107 -0
  365. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/common_decls.tmpl +923 -0
  366. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/cpy.tmpl.wgsl +107 -0
  367. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/cumsum.wgsl +66 -0
  368. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +182 -0
  369. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn.wgsl +636 -0
  370. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/get_rows.wgsl +668 -0
  371. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/glu.tmpl.wgsl +323 -0
  372. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/memset.wgsl +40 -0
  373. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.wgsl +713 -0
  374. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_decls.tmpl +103 -0
  375. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_reg_tile.wgsl +138 -0
  376. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_subgroup_matrix.wgsl +188 -0
  377. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec.wgsl +194 -0
  378. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/pad.wgsl +86 -0
  379. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm.wgsl +123 -0
  380. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/rope.tmpl.wgsl +295 -0
  381. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/scale.wgsl +63 -0
  382. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.wgsl +109 -0
  383. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/soft_max.tmpl.wgsl +345 -0
  384. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/sum_rows.wgsl +55 -0
  385. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/unary.wgsl +193 -0
  386. package/cpp/ggml/src/ggml-zdnn/CMakeLists.txt +36 -0
  387. package/cpp/ggml/src/ggml-zdnn/common.hpp +59 -0
  388. package/cpp/ggml/src/ggml-zdnn/ggml-zdnn.cpp +633 -0
  389. package/cpp/ggml/src/ggml-zdnn/mmf.cpp +80 -0
  390. package/cpp/ggml/src/ggml-zdnn/mmf.hpp +12 -0
  391. package/cpp/ggml/src/ggml-zdnn/utils.cpp +79 -0
  392. package/cpp/ggml/src/ggml-zdnn/utils.hpp +19 -0
  393. package/cpp/ggml/src/ggml-zendnn/CMakeLists.txt +92 -0
  394. package/cpp/ggml/src/ggml-zendnn/ggml-zendnn.cpp +469 -0
  395. package/cpp/ggml/src/ggml.c +7669 -0
  396. package/cpp/ggml/src/ggml.cpp +26 -0
  397. package/cpp/ggml/src/gguf.cpp +1699 -0
  398. package/cpp/include/llama-cpp.h +32 -0
  399. package/cpp/include/llama.h +1568 -0
  400. package/cpp/mtmd/CMakeLists.txt +98 -0
  401. package/cpp/mtmd/README.md +63 -0
  402. package/cpp/mtmd/clip-graph.h +117 -0
  403. package/cpp/mtmd/clip-impl.h +586 -0
  404. package/cpp/mtmd/clip-model.h +390 -0
  405. package/cpp/mtmd/clip.cpp +4154 -0
  406. package/cpp/mtmd/clip.h +121 -0
  407. package/cpp/mtmd/deprecation-warning.cpp +22 -0
  408. package/cpp/mtmd/legacy-models/convert_image_encoder_to_gguf.py +412 -0
  409. package/cpp/mtmd/legacy-models/glmedge-convert-image-encoder-to-gguf.py +280 -0
  410. package/cpp/mtmd/legacy-models/glmedge-surgery.py +33 -0
  411. package/cpp/mtmd/legacy-models/llava_surgery.py +38 -0
  412. package/cpp/mtmd/legacy-models/llava_surgery_v2.py +180 -0
  413. package/cpp/mtmd/legacy-models/minicpmv-convert-image-encoder-to-gguf.py +892 -0
  414. package/cpp/mtmd/legacy-models/minicpmv-surgery.py +47 -0
  415. package/cpp/mtmd/models/cogvlm.cpp +98 -0
  416. package/cpp/mtmd/models/conformer.cpp +216 -0
  417. package/cpp/mtmd/models/glm4v.cpp +122 -0
  418. package/cpp/mtmd/models/internvl.cpp +69 -0
  419. package/cpp/mtmd/models/kimik25.cpp +101 -0
  420. package/cpp/mtmd/models/kimivl.cpp +63 -0
  421. package/cpp/mtmd/models/llama4.cpp +96 -0
  422. package/cpp/mtmd/models/llava.cpp +374 -0
  423. package/cpp/mtmd/models/minicpmv.cpp +114 -0
  424. package/cpp/mtmd/models/mobilenetv5.cpp +451 -0
  425. package/cpp/mtmd/models/models.h +128 -0
  426. package/cpp/mtmd/models/nemotron-v2-vl.cpp +35 -0
  427. package/cpp/mtmd/models/paddleocr.cpp +52 -0
  428. package/cpp/mtmd/models/pixtral.cpp +86 -0
  429. package/cpp/mtmd/models/qwen2vl.cpp +183 -0
  430. package/cpp/mtmd/models/qwen3vl.cpp +193 -0
  431. package/cpp/mtmd/models/siglip.cpp +86 -0
  432. package/cpp/mtmd/models/whisper-enc.cpp +115 -0
  433. package/cpp/mtmd/models/youtuvl.cpp +179 -0
  434. package/cpp/mtmd/mtmd-audio.cpp +730 -0
  435. package/cpp/mtmd/mtmd-audio.h +113 -0
  436. package/cpp/mtmd/mtmd-cli.cpp +437 -0
  437. package/cpp/mtmd/mtmd-helper.cpp +521 -0
  438. package/cpp/mtmd/mtmd-helper.h +96 -0
  439. package/cpp/mtmd/mtmd.cpp +1156 -0
  440. package/cpp/mtmd/mtmd.h +319 -0
  441. package/cpp/mtmd/requirements.txt +5 -0
  442. package/cpp/mtmd/test-1.jpeg +0 -0
  443. package/cpp/mtmd/test-2.mp3 +0 -0
  444. package/cpp/mtmd/tests.sh +192 -0
  445. package/cpp/src/CMakeLists.txt +169 -0
  446. package/cpp/src/llama-adapter.cpp +488 -0
  447. package/cpp/src/llama-adapter.h +89 -0
  448. package/cpp/src/llama-arch.cpp +2855 -0
  449. package/cpp/src/llama-arch.h +619 -0
  450. package/cpp/src/llama-batch.cpp +917 -0
  451. package/cpp/src/llama-batch.h +173 -0
  452. package/cpp/src/llama-chat.cpp +896 -0
  453. package/cpp/src/llama-chat.h +71 -0
  454. package/cpp/src/llama-context.cpp +3512 -0
  455. package/cpp/src/llama-context.h +359 -0
  456. package/cpp/src/llama-cparams.cpp +5 -0
  457. package/cpp/src/llama-cparams.h +44 -0
  458. package/cpp/src/llama-grammar.cpp +1464 -0
  459. package/cpp/src/llama-grammar.h +194 -0
  460. package/cpp/src/llama-graph.cpp +2685 -0
  461. package/cpp/src/llama-graph.h +1026 -0
  462. package/cpp/src/llama-hparams.cpp +234 -0
  463. package/cpp/src/llama-hparams.h +339 -0
  464. package/cpp/src/llama-impl.cpp +171 -0
  465. package/cpp/src/llama-impl.h +73 -0
  466. package/cpp/src/llama-io.cpp +15 -0
  467. package/cpp/src/llama-io.h +35 -0
  468. package/cpp/src/llama-kv-cache-iswa.cpp +330 -0
  469. package/cpp/src/llama-kv-cache-iswa.h +137 -0
  470. package/cpp/src/llama-kv-cache.cpp +2271 -0
  471. package/cpp/src/llama-kv-cache.h +388 -0
  472. package/cpp/src/llama-kv-cells.h +533 -0
  473. package/cpp/src/llama-memory-hybrid-iswa.cpp +275 -0
  474. package/cpp/src/llama-memory-hybrid-iswa.h +140 -0
  475. package/cpp/src/llama-memory-hybrid.cpp +268 -0
  476. package/cpp/src/llama-memory-hybrid.h +139 -0
  477. package/cpp/src/llama-memory-recurrent.cpp +1165 -0
  478. package/cpp/src/llama-memory-recurrent.h +182 -0
  479. package/cpp/src/llama-memory.cpp +59 -0
  480. package/cpp/src/llama-memory.h +122 -0
  481. package/cpp/src/llama-mmap.cpp +785 -0
  482. package/cpp/src/llama-mmap.h +92 -0
  483. package/cpp/src/llama-model-loader.cpp +1414 -0
  484. package/cpp/src/llama-model-loader.h +203 -0
  485. package/cpp/src/llama-model-saver.cpp +286 -0
  486. package/cpp/src/llama-model-saver.h +37 -0
  487. package/cpp/src/llama-model.cpp +9253 -0
  488. package/cpp/src/llama-model.h +576 -0
  489. package/cpp/src/llama-quant.cpp +1119 -0
  490. package/cpp/src/llama-quant.h +1 -0
  491. package/cpp/src/llama-sampler.cpp +3885 -0
  492. package/cpp/src/llama-sampler.h +42 -0
  493. package/cpp/src/llama-vocab.cpp +3970 -0
  494. package/cpp/src/llama-vocab.h +187 -0
  495. package/cpp/src/llama.cpp +1313 -0
  496. package/cpp/src/models/afmoe.cpp +191 -0
  497. package/cpp/src/models/apertus.cpp +125 -0
  498. package/cpp/src/models/arcee.cpp +135 -0
  499. package/cpp/src/models/arctic.cpp +138 -0
  500. package/cpp/src/models/arwkv7.cpp +86 -0
  501. package/cpp/src/models/baichuan.cpp +122 -0
  502. package/cpp/src/models/bailingmoe.cpp +144 -0
  503. package/cpp/src/models/bailingmoe2.cpp +135 -0
  504. package/cpp/src/models/bert.cpp +178 -0
  505. package/cpp/src/models/bitnet.cpp +160 -0
  506. package/cpp/src/models/bloom.cpp +101 -0
  507. package/cpp/src/models/chameleon.cpp +178 -0
  508. package/cpp/src/models/chatglm.cpp +132 -0
  509. package/cpp/src/models/codeshell.cpp +111 -0
  510. package/cpp/src/models/cogvlm.cpp +102 -0
  511. package/cpp/src/models/cohere2-iswa.cpp +134 -0
  512. package/cpp/src/models/command-r.cpp +122 -0
  513. package/cpp/src/models/dbrx.cpp +123 -0
  514. package/cpp/src/models/deci.cpp +135 -0
  515. package/cpp/src/models/deepseek.cpp +144 -0
  516. package/cpp/src/models/deepseek2.cpp +262 -0
  517. package/cpp/src/models/delta-net-base.cpp +376 -0
  518. package/cpp/src/models/dots1.cpp +134 -0
  519. package/cpp/src/models/dream.cpp +105 -0
  520. package/cpp/src/models/ernie4-5-moe.cpp +150 -0
  521. package/cpp/src/models/ernie4-5.cpp +110 -0
  522. package/cpp/src/models/eurobert.cpp +97 -0
  523. package/cpp/src/models/exaone-moe.cpp +146 -0
  524. package/cpp/src/models/exaone.cpp +114 -0
  525. package/cpp/src/models/exaone4.cpp +123 -0
  526. package/cpp/src/models/falcon-h1.cpp +111 -0
  527. package/cpp/src/models/falcon.cpp +120 -0
  528. package/cpp/src/models/gemma-embedding.cpp +116 -0
  529. package/cpp/src/models/gemma.cpp +112 -0
  530. package/cpp/src/models/gemma2-iswa.cpp +128 -0
  531. package/cpp/src/models/gemma3.cpp +155 -0
  532. package/cpp/src/models/gemma3n-iswa.cpp +384 -0
  533. package/cpp/src/models/glm4-moe.cpp +170 -0
  534. package/cpp/src/models/glm4.cpp +157 -0
  535. package/cpp/src/models/gpt2.cpp +105 -0
  536. package/cpp/src/models/gptneox.cpp +144 -0
  537. package/cpp/src/models/granite-hybrid.cpp +196 -0
  538. package/cpp/src/models/granite.cpp +211 -0
  539. package/cpp/src/models/grok.cpp +159 -0
  540. package/cpp/src/models/grovemoe.cpp +141 -0
  541. package/cpp/src/models/hunyuan-dense.cpp +132 -0
  542. package/cpp/src/models/hunyuan-moe.cpp +154 -0
  543. package/cpp/src/models/internlm2.cpp +120 -0
  544. package/cpp/src/models/jais.cpp +86 -0
  545. package/cpp/src/models/jais2.cpp +123 -0
  546. package/cpp/src/models/jamba.cpp +106 -0
  547. package/cpp/src/models/kimi-linear.cpp +392 -0
  548. package/cpp/src/models/lfm2.cpp +190 -0
  549. package/cpp/src/models/llada-moe.cpp +122 -0
  550. package/cpp/src/models/llada.cpp +99 -0
  551. package/cpp/src/models/llama-iswa.cpp +178 -0
  552. package/cpp/src/models/llama.cpp +168 -0
  553. package/cpp/src/models/maincoder.cpp +117 -0
  554. package/cpp/src/models/mamba-base.cpp +285 -0
  555. package/cpp/src/models/mamba.cpp +54 -0
  556. package/cpp/src/models/mimo2-iswa.cpp +123 -0
  557. package/cpp/src/models/minicpm3.cpp +200 -0
  558. package/cpp/src/models/minimax-m2.cpp +124 -0
  559. package/cpp/src/models/mistral3.cpp +160 -0
  560. package/cpp/src/models/models.h +684 -0
  561. package/cpp/src/models/modern-bert.cpp +109 -0
  562. package/cpp/src/models/mpt.cpp +126 -0
  563. package/cpp/src/models/nemotron-h.cpp +148 -0
  564. package/cpp/src/models/nemotron.cpp +122 -0
  565. package/cpp/src/models/neo-bert.cpp +104 -0
  566. package/cpp/src/models/olmo.cpp +121 -0
  567. package/cpp/src/models/olmo2.cpp +150 -0
  568. package/cpp/src/models/olmoe.cpp +124 -0
  569. package/cpp/src/models/openai-moe-iswa.cpp +127 -0
  570. package/cpp/src/models/openelm.cpp +124 -0
  571. package/cpp/src/models/orion.cpp +123 -0
  572. package/cpp/src/models/paddleocr.cpp +122 -0
  573. package/cpp/src/models/pangu-embedded.cpp +121 -0
  574. package/cpp/src/models/phi2.cpp +121 -0
  575. package/cpp/src/models/phi3.cpp +152 -0
  576. package/cpp/src/models/plamo.cpp +110 -0
  577. package/cpp/src/models/plamo2.cpp +318 -0
  578. package/cpp/src/models/plamo3.cpp +128 -0
  579. package/cpp/src/models/plm.cpp +169 -0
  580. package/cpp/src/models/qwen.cpp +108 -0
  581. package/cpp/src/models/qwen2.cpp +126 -0
  582. package/cpp/src/models/qwen2moe.cpp +151 -0
  583. package/cpp/src/models/qwen2vl.cpp +117 -0
  584. package/cpp/src/models/qwen3.cpp +117 -0
  585. package/cpp/src/models/qwen35.cpp +386 -0
  586. package/cpp/src/models/qwen35moe.cpp +420 -0
  587. package/cpp/src/models/qwen3moe.cpp +124 -0
  588. package/cpp/src/models/qwen3next.cpp +525 -0
  589. package/cpp/src/models/qwen3vl-moe.cpp +140 -0
  590. package/cpp/src/models/qwen3vl.cpp +132 -0
  591. package/cpp/src/models/refact.cpp +94 -0
  592. package/cpp/src/models/rnd1.cpp +126 -0
  593. package/cpp/src/models/rwkv6-base.cpp +164 -0
  594. package/cpp/src/models/rwkv6.cpp +94 -0
  595. package/cpp/src/models/rwkv6qwen2.cpp +86 -0
  596. package/cpp/src/models/rwkv7-base.cpp +137 -0
  597. package/cpp/src/models/rwkv7.cpp +90 -0
  598. package/cpp/src/models/seed-oss.cpp +124 -0
  599. package/cpp/src/models/smallthinker.cpp +126 -0
  600. package/cpp/src/models/smollm3.cpp +128 -0
  601. package/cpp/src/models/stablelm.cpp +146 -0
  602. package/cpp/src/models/starcoder.cpp +100 -0
  603. package/cpp/src/models/starcoder2.cpp +121 -0
  604. package/cpp/src/models/step35-iswa.cpp +168 -0
  605. package/cpp/src/models/t5-dec.cpp +166 -0
  606. package/cpp/src/models/t5-enc.cpp +96 -0
  607. package/cpp/src/models/wavtokenizer-dec.cpp +149 -0
  608. package/cpp/src/models/xverse.cpp +108 -0
  609. package/cpp/src/unicode-data.cpp +7034 -0
  610. package/cpp/src/unicode-data.h +20 -0
  611. package/cpp/src/unicode.cpp +1103 -0
  612. package/cpp/src/unicode.h +111 -0
  613. package/cpp/vendor/nlohmann/json.hpp +25526 -0
  614. package/cpp/vendor/nlohmann/json_fwd.hpp +187 -0
  615. package/cpp/vendor/stb/stb_image.h +7988 -0
  616. package/ios/LocalLLM-Bridging-Header.h +2 -0
  617. package/ios/LocalLLM.h +5 -0
  618. package/ios/LocalLLM.mm +1267 -0
  619. package/local-llm-rn.podspec +60 -0
  620. package/package.json +35 -0
  621. package/src/NativeLocalLLM.ts +73 -0
  622. package/src/device.ts +50 -0
  623. package/src/download-adapter.ts +17 -0
  624. package/src/index.ts +21 -0
  625. package/src/native-bridge.ts +142 -0
  626. package/src/rn-downloader.ts +37 -0
@@ -0,0 +1,333 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ # Generated by Claude AI
4
+
5
+ Script to completely regenerate the GGML remoting codebase from YAML configuration.
6
+
7
+ This script reads api_functions.yaml and regenerates all the header files and
8
+ implementation templates for the GGML remoting layer.
9
+
10
+ Usage:
11
+ python regenerate_remoting.py
12
+
13
+ The script will:
14
+ 1. Read ggmlremoting_functions.yaml configuration
15
+ 2. Generate updated header files
16
+ 3. Generate implementation templates in dedicated files
17
+ 4. Show a summary of what was generated
18
+ """
19
+
20
+ import yaml
21
+ from typing import Dict, List, Any
22
+ from pathlib import Path
23
+ import os
24
+ import subprocess
25
+ import shutil
26
+ import logging
27
+
28
+ NL = '\n' # can't have f"{'\n'}" in f-strings
29
+
30
+
31
+ class RemotingCodebaseGenerator:
32
+ def __init__(self, yaml_path: str = "ggmlremoting_functions.yaml"):
33
+ """Initialize the generator with the YAML configuration."""
34
+ self.yaml_path = yaml_path
35
+
36
+ if not Path(yaml_path).exists():
37
+ raise FileNotFoundError(f"Configuration file {yaml_path} not found")
38
+
39
+ with open(yaml_path, 'r') as f:
40
+ self.config = yaml.safe_load(f)
41
+
42
+ self.functions = self.config['functions']
43
+ self.naming_patterns = self.config['naming_patterns']
44
+ self.config_data = self.config['config']
45
+
46
+ # Check if clang-format is available
47
+ self.clang_format_available = self._check_clang_format_available()
48
+
49
+ def _check_clang_format_available(self) -> bool:
50
+ """Check if clang-format is available in the system PATH."""
51
+ return shutil.which("clang-format") is not None
52
+
53
+ def _format_file_with_clang_format(self, file_path: Path) -> bool:
54
+ """Format a file with clang-format -i. Returns True if successful, False otherwise."""
55
+ if not self.clang_format_available:
56
+ return False
57
+
58
+ try:
59
+ subprocess.run(
60
+ ["clang-format", "-i", str(file_path)],
61
+ check=True,
62
+ capture_output=True,
63
+ text=True
64
+ )
65
+ return True
66
+ except subprocess.CalledProcessError:
67
+ logging.exception(f" ⚠️ clang-format failed for {file_path}")
68
+ return False
69
+ except Exception as e:
70
+ logging.exception(f" ⚠️ Unexpected error formatting {file_path}: {e}")
71
+ return False
72
+
73
+ def generate_enum_name(self, group_name: str, function_name: str) -> str:
74
+ """Generate the APIR_COMMAND_TYPE enum name for a function."""
75
+ prefix = self.naming_patterns['enum_prefix']
76
+ return f"{prefix}{group_name.upper()}_{function_name.upper()}"
77
+
78
+ def generate_backend_function_name(self, group_name: str, function_name: str) -> str:
79
+ """Generate the backend function name."""
80
+ function_key = f"{group_name}_{function_name}"
81
+ overrides = self.naming_patterns.get('backend_function_overrides', {})
82
+
83
+ if function_key in overrides:
84
+ return overrides[function_key]
85
+
86
+ prefix = self.naming_patterns['backend_function_prefix']
87
+ return f"{prefix}{group_name}_{function_name}"
88
+
89
+ def generate_frontend_function_name(self, group_name: str, function_name: str) -> str:
90
+ """Generate the frontend function name."""
91
+ prefix = self.naming_patterns['frontend_function_prefix']
92
+ return f"{prefix}{group_name}_{function_name}"
93
+
94
+ def get_enabled_functions(self) -> List[Dict[str, Any]]:
95
+ """Get all enabled functions with their metadata."""
96
+ functions = []
97
+ enum_value = 0
98
+
99
+ for group_name, group_data in self.functions.items():
100
+ group_description = group_data['group_description']
101
+
102
+ for function_name, func_metadata in group_data['functions'].items():
103
+ # Handle case where func_metadata is None or empty (functions with only comments)
104
+ if func_metadata is None:
105
+ func_metadata = {}
106
+
107
+ # Functions are enabled by default unless explicitly disabled
108
+ if func_metadata.get('enabled', True):
109
+ functions.append({
110
+ 'group_name': group_name,
111
+ 'function_name': function_name,
112
+ 'enum_name': self.generate_enum_name(group_name, function_name),
113
+ 'enum_value': enum_value,
114
+ 'backend_function': self.generate_backend_function_name(group_name, function_name),
115
+ 'frontend_function': self.generate_frontend_function_name(group_name, function_name),
116
+ 'frontend_return': func_metadata.get('frontend_return', 'void'),
117
+ 'frontend_extra_params': func_metadata.get('frontend_extra_params', []),
118
+ 'group_description': group_description,
119
+ 'deprecated': func_metadata.get('deprecated', False),
120
+ })
121
+ enum_value += 1
122
+
123
+ return functions
124
+
125
+ def generate_apir_backend_header(self) -> str:
126
+ """Generate the complete apir_backend.h file."""
127
+ functions = self.get_enabled_functions()
128
+
129
+ # Generate the enum section
130
+ enum_lines = ["typedef enum ApirBackendCommandType {"]
131
+ current_group = None
132
+
133
+ for func in functions:
134
+ # Add comment for new group
135
+ if func['group_name'] != current_group:
136
+ enum_lines.append("")
137
+ enum_lines.append(f" /* {func['group_description']} */")
138
+ current_group = func['group_name']
139
+
140
+ enum_lines.append(f" {func['enum_name']} = {func['enum_value']},")
141
+
142
+ # Add the count
143
+ total_count = len(functions)
144
+ enum_lines.append("\n // last command_type index + 1")
145
+ enum_lines.append(f" APIR_BACKEND_DISPATCH_TABLE_COUNT = {total_count},")
146
+ enum_lines.append("} ApirBackendCommandType;")
147
+
148
+ # Generate function name mapping
149
+ func_lines = []
150
+ func_lines.append("static inline const char * apir_dispatch_command_name(ApirBackendCommandType type) {")
151
+ func_lines.append(" switch (type) {")
152
+
153
+ current_group = None
154
+ for func in functions:
155
+ # Add comment for new group
156
+ if func['group_name'] != current_group:
157
+ func_lines.append(f" /* {func['group_description']} */")
158
+ current_group = func['group_name']
159
+
160
+ # Generate clean function name without backend_ prefix
161
+ clean_name = f"{func['group_name']}_{func['function_name']}"
162
+ func_lines.append(f" case {func['enum_name']}:")
163
+ func_lines.append(f" return \"{clean_name}\";")
164
+
165
+ func_lines.append("")
166
+ func_lines.append(" default:")
167
+ func_lines.append(" return \"unknown\";")
168
+ func_lines.append(" }")
169
+ func_lines.append("}")
170
+
171
+ # Full header template
172
+ header_content = NL.join(enum_lines) + "\n\n" + NL.join(func_lines) + "\n"
173
+
174
+ return header_content
175
+
176
+ def generate_backend_dispatched_header(self) -> str:
177
+ """Generate the complete backend-dispatched.h file."""
178
+ functions = self.get_enabled_functions()
179
+
180
+ # Function declarations
181
+ decl_lines = []
182
+ current_group = None
183
+
184
+ for func in functions:
185
+ if func['group_name'] != current_group:
186
+ decl_lines.append(f"\n/* {func['group_description']} */")
187
+ current_group = func['group_name']
188
+
189
+ signature = "uint32_t"
190
+ params = "apir_encoder *enc, apir_decoder *dec, virgl_apir_context *ctx"
191
+ if func['deprecated']:
192
+ decl_lines.append(f"/* {func['enum_name']} is deprecated. Keeping the handler for backward compatibility. */")
193
+
194
+ decl_lines.append(f"{signature} {func['backend_function']}({params});")
195
+
196
+ # Dispatch table
197
+ table_lines = []
198
+ current_group = None
199
+
200
+ for func in functions:
201
+ if func['group_name'] != current_group:
202
+ table_lines.append(f"\n /* {func['group_description']} */")
203
+ table_lines.append("")
204
+ current_group = func['group_name']
205
+
206
+ deprecated = " /* DEPRECATED */" if func['deprecated'] else ""
207
+ table_lines.append(f" /* {func['enum_name']} = */ {func['backend_function']}{deprecated},")
208
+
209
+ header_content = f'''\
210
+ #pragma once
211
+
212
+ {NL.join(decl_lines)}
213
+
214
+ extern "C" {{
215
+ static const backend_dispatch_t apir_backend_dispatch_table[APIR_BACKEND_DISPATCH_TABLE_COUNT] = {{
216
+ {NL.join(table_lines)}
217
+ }};
218
+ }}
219
+ '''
220
+ return header_content
221
+
222
+ def generate_virtgpu_forward_header(self) -> str:
223
+ """Generate the complete virtgpu-forward.gen.h file."""
224
+ functions = self.get_enabled_functions()
225
+
226
+ decl_lines = []
227
+ current_group = None
228
+
229
+ for func in functions:
230
+ if func['group_name'] != current_group:
231
+ decl_lines.append("")
232
+ decl_lines.append(f"/* {func['group_description']} */")
233
+ current_group = func['group_name']
234
+
235
+ if func['deprecated']:
236
+ decl_lines.append(f"/* {func['frontend_function']} is deprecated. */")
237
+ continue
238
+
239
+ # Build parameter list
240
+ params = [self.naming_patterns['frontend_base_param']]
241
+ params.extend(func['frontend_extra_params'])
242
+ param_str = ', '.join(params)
243
+
244
+ decl_lines.append(f"{func['frontend_return']} {func['frontend_function']}({param_str});")
245
+
246
+ header_content = f'''\
247
+ #pragma once
248
+ {NL.join(decl_lines)}
249
+ '''
250
+ return header_content
251
+
252
+ def regenerate_codebase(self) -> None:
253
+ """Regenerate the entire remoting codebase."""
254
+ logging.info("🔄 Regenerating GGML Remoting Codebase...")
255
+ logging.info("=" * 50)
256
+
257
+ # Detect if we're running from frontend directory
258
+ current_dir = os.getcwd()
259
+ is_frontend_dir = current_dir.endswith('ggml-virtgpu')
260
+
261
+ if is_frontend_dir:
262
+ # Running from ggml/src/ggml-virtgpu-apir
263
+ logging.info("📍 Detected frontend directory execution")
264
+ frontend_base = Path(".")
265
+ else:
266
+ # Running from project root (fallback to original behavior)
267
+ logging.info("📍 Detected project root execution")
268
+ base_path = self.config_data.get('base_path', 'ggml/src')
269
+ frontend_base = Path(base_path) / "ggml-virtgpu"
270
+
271
+ # Compute final file paths
272
+ backend_base = frontend_base / "backend"
273
+ apir_backend_path = backend_base / "shared" / "apir_backend.gen.h"
274
+ backend_dispatched_path = backend_base / "backend-dispatched.gen.h"
275
+ virtgpu_forward_path = frontend_base / "virtgpu-forward.gen.h"
276
+
277
+ # Create output directories for each file
278
+ apir_backend_path.parent.mkdir(parents=True, exist_ok=True)
279
+ backend_dispatched_path.parent.mkdir(parents=True, exist_ok=True)
280
+ virtgpu_forward_path.parent.mkdir(parents=True, exist_ok=True)
281
+
282
+ # Generate header files
283
+ logging.info("📁 Generating header files...")
284
+
285
+ apir_backend_content = self.generate_apir_backend_header()
286
+ apir_backend_path.write_text(apir_backend_content)
287
+ logging.info(f" ✅ {apir_backend_path.resolve()}")
288
+
289
+ backend_dispatched_content = self.generate_backend_dispatched_header()
290
+ backend_dispatched_path.write_text(backend_dispatched_content)
291
+ logging.info(f" ✅ {backend_dispatched_path.resolve()}")
292
+
293
+ virtgpu_forward_content = self.generate_virtgpu_forward_header()
294
+ virtgpu_forward_path.write_text(virtgpu_forward_content)
295
+ logging.info(f" ✅ {virtgpu_forward_path.resolve()}")
296
+
297
+ # Format generated files with clang-format
298
+ generated_files = [apir_backend_path, backend_dispatched_path, virtgpu_forward_path]
299
+
300
+ if not self.clang_format_available:
301
+ logging.warning("\n⚠️clang-format not found in PATH. Generated files will not be formatted.\n"
302
+ " Install clang-format to enable automatic code formatting.")
303
+ else:
304
+ logging.info("\n🎨 Formatting files with clang-format...")
305
+ for file_path in generated_files:
306
+ if self._format_file_with_clang_format(file_path):
307
+ logging.info(f" ✅ Formatted {file_path.name}")
308
+ else:
309
+ logging.warning(f" ❌ Failed to format {file_path.name}")
310
+
311
+ # Generate summary
312
+ functions = self.get_enabled_functions()
313
+ total_functions = len(functions)
314
+
315
+ logging.info("\n📊 Generation Summary:")
316
+ logging.info("=" * 50)
317
+ logging.info(f" Total functions: {total_functions}")
318
+ logging.info(f" Function groups: {len(self.functions)}")
319
+ logging.info(" Header files: 3")
320
+ logging.info(f" Working directory: {current_dir}")
321
+
322
+
323
+ def main():
324
+ try:
325
+ generator = RemotingCodebaseGenerator()
326
+ generator.regenerate_codebase()
327
+ except Exception as e:
328
+ logging.exception(f"❌ Error: {e}")
329
+ exit(1)
330
+
331
+
332
+ if __name__ == "__main__":
333
+ main()
@@ -0,0 +1,15 @@
1
+ #include "backend/shared/apir_backend.h"
2
+ #include "ggml-alloc.h"
3
+ #include "ggml-impl.h"
4
+ #include "ggml.h"
5
+ #include "virtgpu-shm.h"
6
+ #include "virtgpu-utils.h"
7
+
8
+ struct apir_buffer_context_t {
9
+ apir_buffer_host_handle_t host_handle;
10
+
11
+ struct virtgpu_shmem shmem;
12
+ apir_buffer_type_host_handle_t buft_host_handle;
13
+ };
14
+
15
+ #include "virtgpu-forward.gen.h"
@@ -0,0 +1,58 @@
1
+ #include "virtgpu-forward-impl.h"
2
+
3
+ static long long current_time_ms() {
4
+ timespec ts;
5
+ clock_gettime(CLOCK_REALTIME, &ts); // Use CLOCK_MONOTONIC for elapsed time
6
+ return (long long) ts.tv_sec * 1000000000LL + ts.tv_nsec;
7
+ }
8
+
9
+ ggml_status apir_backend_graph_compute(virtgpu * gpu, ggml_cgraph * cgraph) {
10
+ apir_encoder * encoder;
11
+ apir_decoder * decoder;
12
+ ApirForwardReturnCode ret;
13
+
14
+ REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BACKEND_GRAPH_COMPUTE);
15
+
16
+ std::vector<uint8_t> cgraph_data;
17
+ size_t cgraph_size = apir_serialize_ggml_cgraph(cgraph, cgraph_data);
18
+
19
+ virtgpu_shmem temp_shmem; // Local storage for large buffers
20
+ virtgpu_shmem * shmem = &temp_shmem;
21
+ bool using_shared_shmem = false;
22
+
23
+ if (cgraph_size <= gpu->data_shmem.mmap_size) {
24
+ // Lock mutex before using shared data_shmem buffer
25
+ if (mtx_lock(&gpu->data_shmem_mutex) != thrd_success) {
26
+ GGML_ABORT(GGML_VIRTGPU "%s: Failed to lock data_shmem mutex", __func__);
27
+ }
28
+ using_shared_shmem = true;
29
+ shmem = &gpu->data_shmem;
30
+ } else if (virtgpu_shmem_create(gpu, cgraph_size, shmem)) {
31
+ GGML_ABORT(GGML_VIRTGPU "%s: Couldn't allocate the guest-host shared buffer", __func__);
32
+ }
33
+
34
+ apir_encode_virtgpu_shmem_res_id(encoder, shmem->res_id);
35
+
36
+ apir_encode_size_t(encoder, &cgraph_size);
37
+
38
+ char * shmem_data = (char *) shmem->mmap_ptr;
39
+ apir_encoder secondary_enc = apir_new_encoder(shmem_data, cgraph_size);
40
+
41
+ apir_encode_cgraph_data(&secondary_enc, cgraph_data);
42
+
43
+ REMOTE_CALL(gpu, encoder, decoder, ret);
44
+
45
+ ggml_status status = GGML_STATUS_ABORTED;
46
+ apir_decode_ggml_status(decoder, &status);
47
+
48
+ remote_call_finish(gpu, encoder, decoder);
49
+
50
+ // Unlock mutex before cleanup
51
+ if (using_shared_shmem) {
52
+ mtx_unlock(&gpu->data_shmem_mutex);
53
+ } else {
54
+ virtgpu_shmem_destroy(gpu, shmem);
55
+ }
56
+
57
+ return status;
58
+ }
@@ -0,0 +1,110 @@
1
+ #include "virtgpu-forward-impl.h"
2
+
3
+ char * apir_buffer_type_get_name(virtgpu * gpu, apir_buffer_type_host_handle_t host_handle) {
4
+ apir_encoder * encoder;
5
+ apir_decoder * decoder;
6
+ ApirForwardReturnCode ret;
7
+
8
+ REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_GET_NAME);
9
+
10
+ apir_encode_apir_buffer_type_host_handle(encoder, host_handle);
11
+
12
+ REMOTE_CALL(gpu, encoder, decoder, ret);
13
+
14
+ const size_t string_size = apir_decode_array_size_unchecked(decoder);
15
+ char * string = (char *) apir_decoder_alloc_array(sizeof(char), string_size);
16
+ if (!string) {
17
+ GGML_LOG_ERROR(GGML_VIRTGPU "%s: Could not allocate the device name buffer\n", __func__);
18
+ apir_decoder_set_fatal(decoder);
19
+ }
20
+ apir_decode_char_array(decoder, string, string_size);
21
+
22
+ remote_call_finish(gpu, encoder, decoder);
23
+
24
+ return string;
25
+ }
26
+
27
+ size_t apir_buffer_type_get_alignment(virtgpu * gpu, apir_buffer_type_host_handle_t host_handle) {
28
+ apir_encoder * encoder;
29
+ apir_decoder * decoder;
30
+ ApirForwardReturnCode ret;
31
+
32
+ REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALIGNMENT);
33
+
34
+ apir_encode_apir_buffer_type_host_handle(encoder, host_handle);
35
+
36
+ REMOTE_CALL(gpu, encoder, decoder, ret);
37
+
38
+ size_t alignment;
39
+ apir_decode_size_t(decoder, &alignment);
40
+
41
+ remote_call_finish(gpu, encoder, decoder);
42
+
43
+ return alignment;
44
+ }
45
+
46
+ size_t apir_buffer_type_get_max_size(virtgpu * gpu, apir_buffer_type_host_handle_t host_handle) {
47
+ apir_encoder * encoder;
48
+ apir_decoder * decoder;
49
+ ApirForwardReturnCode ret;
50
+
51
+ REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_GET_MAX_SIZE);
52
+
53
+ apir_encode_apir_buffer_type_host_handle(encoder, host_handle);
54
+
55
+ REMOTE_CALL(gpu, encoder, decoder, ret);
56
+
57
+ size_t max_size;
58
+ apir_decode_size_t(decoder, &max_size);
59
+
60
+ remote_call_finish(gpu, encoder, decoder);
61
+
62
+ return max_size;
63
+ }
64
+
65
+ apir_buffer_context_t apir_buffer_type_alloc_buffer(virtgpu * gpu,
66
+ apir_buffer_type_host_handle_t host_handle,
67
+ size_t size) {
68
+ apir_encoder * encoder;
69
+ apir_decoder * decoder;
70
+ ApirForwardReturnCode ret;
71
+
72
+ apir_buffer_context_t buffer_context;
73
+
74
+ REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_ALLOC_BUFFER);
75
+
76
+ apir_encode_apir_buffer_type_host_handle(encoder, host_handle);
77
+
78
+ apir_encode_size_t(encoder, &size);
79
+
80
+ REMOTE_CALL(gpu, encoder, decoder, ret);
81
+
82
+ apir_decode_apir_buffer_host_handle_t(decoder, &buffer_context.host_handle);
83
+
84
+ remote_call_finish(gpu, encoder, decoder);
85
+
86
+ return buffer_context;
87
+ }
88
+
89
+ size_t apir_buffer_type_get_alloc_size(virtgpu * gpu,
90
+ apir_buffer_type_host_handle_t host_handle,
91
+ const ggml_tensor * op) {
92
+ apir_encoder * encoder;
93
+ apir_decoder * decoder;
94
+ ApirForwardReturnCode ret;
95
+
96
+ REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALLOC_SIZE);
97
+
98
+ apir_encode_apir_buffer_type_host_handle(encoder, host_handle);
99
+
100
+ apir_encode_ggml_tensor_inline(encoder, op);
101
+
102
+ REMOTE_CALL(gpu, encoder, decoder, ret);
103
+
104
+ size_t alloc_size;
105
+ apir_decode_size_t(decoder, &alloc_size);
106
+
107
+ remote_call_finish(gpu, encoder, decoder);
108
+
109
+ return alloc_size;
110
+ }
@@ -0,0 +1,173 @@
1
+ #include "virtgpu-forward-impl.h"
2
+
3
+ void * apir_buffer_get_base(virtgpu * gpu, apir_buffer_context_t * buffer_context) {
4
+ apir_encoder * encoder;
5
+ apir_decoder * decoder;
6
+ ApirForwardReturnCode ret;
7
+
8
+ REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_GET_BASE);
9
+
10
+ apir_encode_apir_buffer_host_handle_t(encoder, &buffer_context->host_handle);
11
+
12
+ REMOTE_CALL(gpu, encoder, decoder, ret);
13
+
14
+ uintptr_t base;
15
+ apir_decode_uintptr_t(decoder, &base);
16
+
17
+ remote_call_finish(gpu, encoder, decoder);
18
+
19
+ return (void *) base;
20
+ }
21
+
22
+ void apir_buffer_set_tensor(virtgpu * gpu,
23
+ apir_buffer_context_t * buffer_context,
24
+ ggml_tensor * tensor,
25
+ const void * data,
26
+ size_t offset,
27
+ size_t size) {
28
+ apir_encoder * encoder;
29
+ apir_decoder * decoder;
30
+ ApirForwardReturnCode ret;
31
+
32
+ REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_SET_TENSOR);
33
+
34
+ apir_encode_apir_buffer_host_handle_t(encoder, &buffer_context->host_handle);
35
+ apir_encode_ggml_tensor(encoder, tensor);
36
+
37
+ virtgpu_shmem temp_shmem; // Local storage for large buffers
38
+ virtgpu_shmem * shmem = &temp_shmem;
39
+ bool using_shared_shmem = false;
40
+
41
+ if (size <= gpu->data_shmem.mmap_size) {
42
+ // Lock mutex before using shared data_shmem buffer
43
+ if (mtx_lock(&gpu->data_shmem_mutex) != thrd_success) {
44
+ GGML_ABORT(GGML_VIRTGPU "%s: Failed to lock data_shmem mutex", __func__);
45
+ }
46
+ using_shared_shmem = true;
47
+ shmem = &gpu->data_shmem;
48
+
49
+ } else if (virtgpu_shmem_create(gpu, size, shmem)) {
50
+ GGML_ABORT(GGML_VIRTGPU "%s: Couldn't allocate the guest-host shared buffer", __func__);
51
+ }
52
+
53
+ memcpy(shmem->mmap_ptr, data, size);
54
+ apir_encode_virtgpu_shmem_res_id(encoder, shmem->res_id);
55
+
56
+ apir_encode_size_t(encoder, &offset);
57
+ apir_encode_size_t(encoder, &size);
58
+
59
+ REMOTE_CALL(gpu, encoder, decoder, ret);
60
+
61
+ remote_call_finish(gpu, encoder, decoder);
62
+
63
+ // Unlock mutex before cleanup
64
+ if (using_shared_shmem) {
65
+ mtx_unlock(&gpu->data_shmem_mutex);
66
+ } else {
67
+ virtgpu_shmem_destroy(gpu, shmem);
68
+ }
69
+
70
+ return;
71
+ }
72
+
73
+ void apir_buffer_get_tensor(virtgpu * gpu,
74
+ apir_buffer_context_t * buffer_context,
75
+ const ggml_tensor * tensor,
76
+ void * data,
77
+ size_t offset,
78
+ size_t size) {
79
+ apir_encoder * encoder;
80
+ apir_decoder * decoder;
81
+ ApirForwardReturnCode ret;
82
+
83
+ REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_GET_TENSOR);
84
+
85
+ apir_encode_apir_buffer_host_handle_t(encoder, &buffer_context->host_handle);
86
+ apir_encode_ggml_tensor(encoder, tensor);
87
+
88
+ virtgpu_shmem temp_shmem; // Local storage for large buffers
89
+ virtgpu_shmem * shmem = &temp_shmem;
90
+ bool using_shared_shmem = false;
91
+
92
+ if (size <= gpu->data_shmem.mmap_size) {
93
+ // Lock mutex before using shared data_shmem buffer
94
+ if (mtx_lock(&gpu->data_shmem_mutex) != thrd_success) {
95
+ GGML_ABORT(GGML_VIRTGPU "%s: Failed to lock data_shmem mutex", __func__);
96
+ }
97
+ using_shared_shmem = true;
98
+ shmem = &gpu->data_shmem;
99
+
100
+ } else if (virtgpu_shmem_create(gpu, size, shmem)) {
101
+ GGML_ABORT(GGML_VIRTGPU "%s: Couldn't allocate the guest-host shared buffer", __func__);
102
+ }
103
+
104
+ apir_encode_virtgpu_shmem_res_id(encoder, shmem->res_id);
105
+ apir_encode_size_t(encoder, &offset);
106
+ apir_encode_size_t(encoder, &size);
107
+
108
+ REMOTE_CALL(gpu, encoder, decoder, ret);
109
+
110
+ memcpy(data, shmem->mmap_ptr, size);
111
+
112
+ remote_call_finish(gpu, encoder, decoder);
113
+
114
+ // Unlock mutex before cleanup
115
+ if (using_shared_shmem) {
116
+ mtx_unlock(&gpu->data_shmem_mutex);
117
+ } else {
118
+ virtgpu_shmem_destroy(gpu, shmem);
119
+ }
120
+ }
121
+
122
+ bool apir_buffer_cpy_tensor(virtgpu * gpu,
123
+ apir_buffer_context_t * buffer_context,
124
+ const ggml_tensor * src,
125
+ const ggml_tensor * dst) {
126
+ apir_encoder * encoder;
127
+ apir_decoder * decoder;
128
+ ApirForwardReturnCode ret;
129
+
130
+ REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_CPY_TENSOR);
131
+
132
+ apir_encode_apir_buffer_host_handle_t(encoder, &buffer_context->host_handle);
133
+ apir_encode_ggml_tensor(encoder, src);
134
+ apir_encode_ggml_tensor(encoder, dst);
135
+
136
+ REMOTE_CALL(gpu, encoder, decoder, ret);
137
+
138
+ bool ret_val;
139
+ apir_decode_bool_t(decoder, &ret_val);
140
+
141
+ remote_call_finish(gpu, encoder, decoder);
142
+
143
+ return ret_val;
144
+ }
145
+
146
+ void apir_buffer_clear(virtgpu * gpu, apir_buffer_context_t * buffer_context, uint8_t value) {
147
+ apir_encoder * encoder;
148
+ apir_decoder * decoder;
149
+ ApirForwardReturnCode ret;
150
+
151
+ REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_CLEAR);
152
+
153
+ apir_encode_apir_buffer_host_handle_t(encoder, &buffer_context->host_handle);
154
+ apir_encode_uint8_t(encoder, &value);
155
+
156
+ REMOTE_CALL(gpu, encoder, decoder, ret);
157
+
158
+ remote_call_finish(gpu, encoder, decoder);
159
+ }
160
+
161
+ void apir_buffer_free_buffer(virtgpu * gpu, apir_buffer_context_t * buffer_context) {
162
+ apir_encoder * encoder;
163
+ apir_decoder * decoder;
164
+ ApirForwardReturnCode ret;
165
+
166
+ REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_FREE_BUFFER);
167
+
168
+ apir_encode_apir_buffer_host_handle_t(encoder, &buffer_context->host_handle);
169
+
170
+ REMOTE_CALL(gpu, encoder, decoder, ret);
171
+
172
+ remote_call_finish(gpu, encoder, decoder);
173
+ }