local-llm-rn 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cpp/CMakeLists.txt +285 -0
- package/cpp/common/CMakeLists.txt +149 -0
- package/cpp/common/arg.cpp +3799 -0
- package/cpp/common/arg.h +131 -0
- package/cpp/common/base64.hpp +392 -0
- package/cpp/common/build-info.cpp.in +4 -0
- package/cpp/common/chat-parser-xml-toolcall.cpp +879 -0
- package/cpp/common/chat-parser-xml-toolcall.h +45 -0
- package/cpp/common/chat-parser.cpp +1649 -0
- package/cpp/common/chat-parser.h +133 -0
- package/cpp/common/chat-peg-parser.cpp +124 -0
- package/cpp/common/chat-peg-parser.h +105 -0
- package/cpp/common/chat.cpp +3355 -0
- package/cpp/common/chat.h +252 -0
- package/cpp/common/common.cpp +1824 -0
- package/cpp/common/common.h +930 -0
- package/cpp/common/console.cpp +1137 -0
- package/cpp/common/console.h +41 -0
- package/cpp/common/debug.cpp +167 -0
- package/cpp/common/debug.h +43 -0
- package/cpp/common/download.cpp +792 -0
- package/cpp/common/download.h +84 -0
- package/cpp/common/http.h +84 -0
- package/cpp/common/jinja/README.md +88 -0
- package/cpp/common/jinja/caps.cpp +285 -0
- package/cpp/common/jinja/caps.h +30 -0
- package/cpp/common/jinja/lexer.cpp +341 -0
- package/cpp/common/jinja/lexer.h +157 -0
- package/cpp/common/jinja/parser.cpp +591 -0
- package/cpp/common/jinja/parser.h +21 -0
- package/cpp/common/jinja/runtime.cpp +867 -0
- package/cpp/common/jinja/runtime.h +638 -0
- package/cpp/common/jinja/string.cpp +213 -0
- package/cpp/common/jinja/string.h +61 -0
- package/cpp/common/jinja/utils.h +149 -0
- package/cpp/common/jinja/value.cpp +1393 -0
- package/cpp/common/jinja/value.h +756 -0
- package/cpp/common/json-partial.cpp +324 -0
- package/cpp/common/json-partial.h +39 -0
- package/cpp/common/json-schema-to-grammar.cpp +1153 -0
- package/cpp/common/json-schema-to-grammar.h +43 -0
- package/cpp/common/llguidance.cpp +258 -0
- package/cpp/common/log.cpp +446 -0
- package/cpp/common/log.h +119 -0
- package/cpp/common/ngram-cache.cpp +285 -0
- package/cpp/common/ngram-cache.h +101 -0
- package/cpp/common/ngram-map.cpp +530 -0
- package/cpp/common/ngram-map.h +115 -0
- package/cpp/common/ngram-mod.cpp +60 -0
- package/cpp/common/ngram-mod.h +38 -0
- package/cpp/common/peg-parser.cpp +1712 -0
- package/cpp/common/peg-parser.h +459 -0
- package/cpp/common/preset.cpp +483 -0
- package/cpp/common/preset.h +83 -0
- package/cpp/common/regex-partial.cpp +204 -0
- package/cpp/common/regex-partial.h +56 -0
- package/cpp/common/sampling.cpp +745 -0
- package/cpp/common/sampling.h +119 -0
- package/cpp/common/speculative.cpp +1074 -0
- package/cpp/common/speculative.h +41 -0
- package/cpp/common/unicode.cpp +64 -0
- package/cpp/common/unicode.h +22 -0
- package/cpp/ggml/CMakeLists.txt +494 -0
- package/cpp/ggml/cmake/GitVars.cmake +22 -0
- package/cpp/ggml/cmake/common.cmake +50 -0
- package/cpp/ggml/cmake/ggml-config.cmake.in +191 -0
- package/cpp/ggml/include/ggml-alloc.h +85 -0
- package/cpp/ggml/include/ggml-backend.h +373 -0
- package/cpp/ggml/include/ggml-blas.h +25 -0
- package/cpp/ggml/include/ggml-cann.h +123 -0
- package/cpp/ggml/include/ggml-cpp.h +39 -0
- package/cpp/ggml/include/ggml-cpu.h +151 -0
- package/cpp/ggml/include/ggml-cuda.h +47 -0
- package/cpp/ggml/include/ggml-hexagon.h +19 -0
- package/cpp/ggml/include/ggml-metal.h +61 -0
- package/cpp/ggml/include/ggml-opencl.h +26 -0
- package/cpp/ggml/include/ggml-opt.h +256 -0
- package/cpp/ggml/include/ggml-rpc.h +30 -0
- package/cpp/ggml/include/ggml-sycl.h +49 -0
- package/cpp/ggml/include/ggml-virtgpu.h +14 -0
- package/cpp/ggml/include/ggml-vulkan.h +29 -0
- package/cpp/ggml/include/ggml-webgpu.h +19 -0
- package/cpp/ggml/include/ggml-zdnn.h +17 -0
- package/cpp/ggml/include/ggml-zendnn.h +22 -0
- package/cpp/ggml/include/ggml.h +2753 -0
- package/cpp/ggml/include/gguf.h +204 -0
- package/cpp/ggml/src/CMakeLists.txt +492 -0
- package/cpp/ggml/src/ggml-alloc.c +1244 -0
- package/cpp/ggml/src/ggml-backend-dl.cpp +48 -0
- package/cpp/ggml/src/ggml-backend-dl.h +45 -0
- package/cpp/ggml/src/ggml-backend-impl.h +255 -0
- package/cpp/ggml/src/ggml-backend-reg.cpp +566 -0
- package/cpp/ggml/src/ggml-backend.cpp +2270 -0
- package/cpp/ggml/src/ggml-blas/CMakeLists.txt +101 -0
- package/cpp/ggml/src/ggml-blas/ggml-blas.cpp +518 -0
- package/cpp/ggml/src/ggml-common.h +1878 -0
- package/cpp/ggml/src/ggml-cpu/CMakeLists.txt +691 -0
- package/cpp/ggml/src/ggml-cpu/amx/amx.cpp +247 -0
- package/cpp/ggml/src/ggml-cpu/amx/amx.h +8 -0
- package/cpp/ggml/src/ggml-cpu/amx/common.h +91 -0
- package/cpp/ggml/src/ggml-cpu/amx/mmq.cpp +2512 -0
- package/cpp/ggml/src/ggml-cpu/amx/mmq.h +10 -0
- package/cpp/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +98 -0
- package/cpp/ggml/src/ggml-cpu/arch/arm/quants.c +4052 -0
- package/cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +4935 -0
- package/cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +2159 -0
- package/cpp/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
- package/cpp/ggml/src/ggml-cpu/arch/powerpc/quants.c +2305 -0
- package/cpp/ggml/src/ggml-cpu/arch/riscv/cpu-feats.cpp +38 -0
- package/cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +2726 -0
- package/cpp/ggml/src/ggml-cpu/arch/riscv/repack.cpp +342 -0
- package/cpp/ggml/src/ggml-cpu/arch/s390/cpu-feats.cpp +50 -0
- package/cpp/ggml/src/ggml-cpu/arch/s390/quants.c +1468 -0
- package/cpp/ggml/src/ggml-cpu/arch/wasm/quants.c +1221 -0
- package/cpp/ggml/src/ggml-cpu/arch/x86/cpu-feats.cpp +327 -0
- package/cpp/ggml/src/ggml-cpu/arch/x86/quants.c +3820 -0
- package/cpp/ggml/src/ggml-cpu/arch/x86/repack.cpp +6307 -0
- package/cpp/ggml/src/ggml-cpu/arch-fallback.h +313 -0
- package/cpp/ggml/src/ggml-cpu/binary-ops.cpp +154 -0
- package/cpp/ggml/src/ggml-cpu/binary-ops.h +16 -0
- package/cpp/ggml/src/ggml-cpu/cmake/FindSIMD.cmake +100 -0
- package/cpp/ggml/src/ggml-cpu/common.h +95 -0
- package/cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +529 -0
- package/cpp/ggml/src/ggml-cpu/ggml-cpu.c +3734 -0
- package/cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +701 -0
- package/cpp/ggml/src/ggml-cpu/hbm.cpp +55 -0
- package/cpp/ggml/src/ggml-cpu/hbm.h +8 -0
- package/cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +938 -0
- package/cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +90 -0
- package/cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +798 -0
- package/cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.h +17 -0
- package/cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +4033 -0
- package/cpp/ggml/src/ggml-cpu/llamafile/sgemm.h +25 -0
- package/cpp/ggml/src/ggml-cpu/ops.cpp +10978 -0
- package/cpp/ggml/src/ggml-cpu/ops.h +116 -0
- package/cpp/ggml/src/ggml-cpu/quants.c +1193 -0
- package/cpp/ggml/src/ggml-cpu/quants.h +97 -0
- package/cpp/ggml/src/ggml-cpu/repack.cpp +3316 -0
- package/cpp/ggml/src/ggml-cpu/repack.h +173 -0
- package/cpp/ggml/src/ggml-cpu/simd-gemm.h +136 -0
- package/cpp/ggml/src/ggml-cpu/simd-mappings.h +1279 -0
- package/cpp/ggml/src/ggml-cpu/spacemit/ime.cpp +1025 -0
- package/cpp/ggml/src/ggml-cpu/spacemit/ime.h +13 -0
- package/cpp/ggml/src/ggml-cpu/spacemit/ime1_kernels.cpp +3196 -0
- package/cpp/ggml/src/ggml-cpu/spacemit/ime_kernels.h +26 -0
- package/cpp/ggml/src/ggml-cpu/traits.cpp +36 -0
- package/cpp/ggml/src/ggml-cpu/traits.h +38 -0
- package/cpp/ggml/src/ggml-cpu/unary-ops.cpp +337 -0
- package/cpp/ggml/src/ggml-cpu/unary-ops.h +35 -0
- package/cpp/ggml/src/ggml-cpu/vec.cpp +629 -0
- package/cpp/ggml/src/ggml-cpu/vec.h +1585 -0
- package/cpp/ggml/src/ggml-hexagon/CMakeLists.txt +117 -0
- package/cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp +3232 -0
- package/cpp/ggml/src/ggml-hexagon/htp/CMakeLists.txt +45 -0
- package/cpp/ggml/src/ggml-hexagon/htp/act-ops.c +815 -0
- package/cpp/ggml/src/ggml-hexagon/htp/argsort-ops.c +281 -0
- package/cpp/ggml/src/ggml-hexagon/htp/binary-ops.c +827 -0
- package/cpp/ggml/src/ggml-hexagon/htp/cmake-toolchain.cmake +157 -0
- package/cpp/ggml/src/ggml-hexagon/htp/cpy-ops.c +251 -0
- package/cpp/ggml/src/ggml-hexagon/htp/flash-attn-ops.c +666 -0
- package/cpp/ggml/src/ggml-hexagon/htp/get-rows-ops.c +111 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hex-dma.c +63 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hex-dma.h +182 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hex-dump.h +77 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hex-fastdiv.h +37 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hex-utils.h +51 -0
- package/cpp/ggml/src/ggml-hexagon/htp/htp-ctx.h +35 -0
- package/cpp/ggml/src/ggml-hexagon/htp/htp-msg.h +154 -0
- package/cpp/ggml/src/ggml-hexagon/htp/htp-ops.h +65 -0
- package/cpp/ggml/src/ggml-hexagon/htp/htp_iface.idl +16 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-arith.h +470 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-base.h +173 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-copy.h +245 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-div.h +116 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-dump.h +129 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-exp.h +215 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-floor.h +100 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-inverse.h +176 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-reduce.h +266 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-scale.h +133 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-sigmoid.h +141 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-sqrt.h +126 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-types.h +36 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-utils.h +18 -0
- package/cpp/ggml/src/ggml-hexagon/htp/main.c +1150 -0
- package/cpp/ggml/src/ggml-hexagon/htp/matmul-ops.c +2595 -0
- package/cpp/ggml/src/ggml-hexagon/htp/rope-ops.c +498 -0
- package/cpp/ggml/src/ggml-hexagon/htp/set-rows-ops.c +167 -0
- package/cpp/ggml/src/ggml-hexagon/htp/softmax-ops.c +421 -0
- package/cpp/ggml/src/ggml-hexagon/htp/sum-rows-ops.c +130 -0
- package/cpp/ggml/src/ggml-hexagon/htp/unary-ops.c +384 -0
- package/cpp/ggml/src/ggml-hexagon/htp/worker-pool.c +293 -0
- package/cpp/ggml/src/ggml-hexagon/htp/worker-pool.h +57 -0
- package/cpp/ggml/src/ggml-hexagon/htp-drv.cpp +418 -0
- package/cpp/ggml/src/ggml-hexagon/htp-drv.h +121 -0
- package/cpp/ggml/src/ggml-hexagon/libdl.h +79 -0
- package/cpp/ggml/src/ggml-hexagon/libggml-htp.inf +38 -0
- package/cpp/ggml/src/ggml-hexagon/op-desc.h +153 -0
- package/cpp/ggml/src/ggml-impl.h +724 -0
- package/cpp/ggml/src/ggml-metal/CMakeLists.txt +124 -0
- package/cpp/ggml/src/ggml-metal/ggml-metal-common.cpp +457 -0
- package/cpp/ggml/src/ggml-metal/ggml-metal-common.h +52 -0
- package/cpp/ggml/src/ggml-metal/ggml-metal-context.h +41 -0
- package/cpp/ggml/src/ggml-metal/ggml-metal-context.m +702 -0
- package/cpp/ggml/src/ggml-metal/ggml-metal-device.cpp +1890 -0
- package/cpp/ggml/src/ggml-metal/ggml-metal-device.h +290 -0
- package/cpp/ggml/src/ggml-metal/ggml-metal-device.m +1749 -0
- package/cpp/ggml/src/ggml-metal/ggml-metal-impl.h +1054 -0
- package/cpp/ggml/src/ggml-metal/ggml-metal-ops.cpp +4370 -0
- package/cpp/ggml/src/ggml-metal/ggml-metal-ops.h +94 -0
- package/cpp/ggml/src/ggml-metal/ggml-metal.cpp +937 -0
- package/cpp/ggml/src/ggml-metal/ggml-metal.metal +9819 -0
- package/cpp/ggml/src/ggml-musa/CMakeLists.txt +125 -0
- package/cpp/ggml/src/ggml-musa/mudnn.cu +112 -0
- package/cpp/ggml/src/ggml-musa/mudnn.cuh +12 -0
- package/cpp/ggml/src/ggml-opencl/CMakeLists.txt +150 -0
- package/cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +11553 -0
- package/cpp/ggml/src/ggml-opencl/kernels/add.cl +190 -0
- package/cpp/ggml/src/ggml-opencl/kernels/add_id.cl +42 -0
- package/cpp/ggml/src/ggml-opencl/kernels/argsort.cl +86 -0
- package/cpp/ggml/src/ggml-opencl/kernels/clamp.cl +20 -0
- package/cpp/ggml/src/ggml-opencl/kernels/concat.cl +51 -0
- package/cpp/ggml/src/ggml-opencl/kernels/conv2d.cl +185 -0
- package/cpp/ggml/src/ggml-opencl/kernels/conv2d_f16_f32.cl +176 -0
- package/cpp/ggml/src/ggml-opencl/kernels/cpy.cl +184 -0
- package/cpp/ggml/src/ggml-opencl/kernels/cvt.cl +417 -0
- package/cpp/ggml/src/ggml-opencl/kernels/diag_mask_inf.cl +58 -0
- package/cpp/ggml/src/ggml-opencl/kernels/div.cl +138 -0
- package/cpp/ggml/src/ggml-opencl/kernels/embed_kernel.py +26 -0
- package/cpp/ggml/src/ggml-opencl/kernels/expm1.cl +113 -0
- package/cpp/ggml/src/ggml-opencl/kernels/fill.cl +17 -0
- package/cpp/ggml/src/ggml-opencl/kernels/flash_attn_f16.cl +370 -0
- package/cpp/ggml/src/ggml-opencl/kernels/flash_attn_f32.cl +371 -0
- package/cpp/ggml/src/ggml-opencl/kernels/flash_attn_f32_f16.cl +373 -0
- package/cpp/ggml/src/ggml-opencl/kernels/gelu.cl +89 -0
- package/cpp/ggml/src/ggml-opencl/kernels/gemm_moe_mxfp4_f32.cl +162 -0
- package/cpp/ggml/src/ggml-opencl/kernels/gemv_moe_mxfp4_f32.cl +156 -0
- package/cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle.cl +268 -0
- package/cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general.cl +274 -0
- package/cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general_q8_0_f32.cl +195 -0
- package/cpp/ggml/src/ggml-opencl/kernels/get_rows.cl +187 -0
- package/cpp/ggml/src/ggml-opencl/kernels/glu.cl +378 -0
- package/cpp/ggml/src/ggml-opencl/kernels/group_norm.cl +121 -0
- package/cpp/ggml/src/ggml-opencl/kernels/im2col_f16.cl +57 -0
- package/cpp/ggml/src/ggml-opencl/kernels/im2col_f32.cl +57 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mean.cl +140 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul.cl +152 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mat_Ab_Bi_8x4.cl +139 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mat_f16_f32.cl +130 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_kq_kqv.cl +273 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_l4_lm.cl +146 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_f32_f32_l4_lm.cl +147 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q4_0_f32_l4_lm.cl +163 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q4_1_f32_l4_lm.cl +165 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q6_k_f32_l4_lm.cl +158 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_8x4.cl +129 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_l4_lm.cl +154 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f16.cl +118 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32.cl +118 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_1row.cl +94 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_l4.cl +84 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f32_f32.cl +118 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32.cl +189 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32_flat.cl +176 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q4_0_f32_8x_flat.cl +283 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32.cl +140 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32_flat.cl +222 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32.cl +144 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32_flat.cl +167 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32.cl +192 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_16x_flat.cl +307 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_8x_flat.cl +265 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_8x_flat.cl +272 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_v.cl +254 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32.cl +219 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32_flat.cl +229 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_k_f32.cl +180 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q6_k_f32.cl +194 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q6_k_f32_flat.cl +194 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32.cl +125 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32_flat.cl +202 -0
- package/cpp/ggml/src/ggml-opencl/kernels/norm.cl +161 -0
- package/cpp/ggml/src/ggml-opencl/kernels/pad.cl +39 -0
- package/cpp/ggml/src/ggml-opencl/kernels/relu.cl +16 -0
- package/cpp/ggml/src/ggml-opencl/kernels/repeat.cl +38 -0
- package/cpp/ggml/src/ggml-opencl/kernels/rms_norm.cl +190 -0
- package/cpp/ggml/src/ggml-opencl/kernels/rope.cl +747 -0
- package/cpp/ggml/src/ggml-opencl/kernels/scale.cl +27 -0
- package/cpp/ggml/src/ggml-opencl/kernels/set_rows.cl +208 -0
- package/cpp/ggml/src/ggml-opencl/kernels/sigmoid.cl +29 -0
- package/cpp/ggml/src/ggml-opencl/kernels/silu.cl +30 -0
- package/cpp/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +108 -0
- package/cpp/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +108 -0
- package/cpp/ggml/src/ggml-opencl/kernels/softmax_f16.cl +107 -0
- package/cpp/ggml/src/ggml-opencl/kernels/softmax_f32.cl +107 -0
- package/cpp/ggml/src/ggml-opencl/kernels/softplus.cl +116 -0
- package/cpp/ggml/src/ggml-opencl/kernels/solve_tri.cl +51 -0
- package/cpp/ggml/src/ggml-opencl/kernels/sqr.cl +53 -0
- package/cpp/ggml/src/ggml-opencl/kernels/sqrt.cl +53 -0
- package/cpp/ggml/src/ggml-opencl/kernels/ssm_conv.cl +77 -0
- package/cpp/ggml/src/ggml-opencl/kernels/sub.cl +138 -0
- package/cpp/ggml/src/ggml-opencl/kernels/sum_rows.cl +140 -0
- package/cpp/ggml/src/ggml-opencl/kernels/tanh.cl +109 -0
- package/cpp/ggml/src/ggml-opencl/kernels/transpose.cl +117 -0
- package/cpp/ggml/src/ggml-opencl/kernels/tri.cl +32 -0
- package/cpp/ggml/src/ggml-opencl/kernels/tsembd.cl +48 -0
- package/cpp/ggml/src/ggml-opencl/kernels/upscale.cl +120 -0
- package/cpp/ggml/src/ggml-opt.cpp +1093 -0
- package/cpp/ggml/src/ggml-quants.c +5325 -0
- package/cpp/ggml/src/ggml-quants.h +106 -0
- package/cpp/ggml/src/ggml-rpc/CMakeLists.txt +9 -0
- package/cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +2118 -0
- package/cpp/ggml/src/ggml-threading.cpp +12 -0
- package/cpp/ggml/src/ggml-threading.h +14 -0
- package/cpp/ggml/src/ggml-virtgpu/CMakeLists.txt +70 -0
- package/cpp/ggml/src/ggml-virtgpu/apir_cs_ggml-rpc-front.cpp +87 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/CMakeLists.txt +21 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/apir_cs_ggml-rpc-back.cpp +115 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/backend-convert.h +13 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-backend.cpp +102 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer-type.cpp +105 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer.cpp +179 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-device.cpp +148 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched.cpp +51 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched.gen.h +73 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched.h +27 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/backend-virgl-apir.h +32 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/backend.cpp +144 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/shared/api_remoting.h +95 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_backend.gen.h +94 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_backend.h +50 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_cs.h +378 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h +232 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_cs_rpc.h +58 -0
- package/cpp/ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp +81 -0
- package/cpp/ggml/src/ggml-virtgpu/ggml-backend-buffer.cpp +119 -0
- package/cpp/ggml/src/ggml-virtgpu/ggml-backend-device.cpp +158 -0
- package/cpp/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp +213 -0
- package/cpp/ggml/src/ggml-virtgpu/ggml-backend.cpp +69 -0
- package/cpp/ggml/src/ggml-virtgpu/ggml-remoting.h +71 -0
- package/cpp/ggml/src/ggml-virtgpu/ggmlremoting_functions.yaml +166 -0
- package/cpp/ggml/src/ggml-virtgpu/include/apir_hw.h +9 -0
- package/cpp/ggml/src/ggml-virtgpu/regenerate_remoting.py +333 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-apir.h +15 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-backend.cpp +58 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp +110 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-buffer.cpp +173 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-device.cpp +192 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-impl.h +36 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward.gen.h +53 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-shm.cpp +98 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-shm.h +23 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-utils.cpp +179 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-utils.h +86 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu.cpp +544 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu.h +117 -0
- package/cpp/ggml/src/ggml-webgpu/CMakeLists.txt +80 -0
- package/cpp/ggml/src/ggml-webgpu/ggml-webgpu-shader-lib.hpp +1231 -0
- package/cpp/ggml/src/ggml-webgpu/ggml-webgpu.cpp +3150 -0
- package/cpp/ggml/src/ggml-webgpu/pre_wgsl.hpp +778 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/argmax.wgsl +72 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/argsort.wgsl +106 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/argsort_merge.wgsl +134 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/binary.wgsl +107 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/common_decls.tmpl +923 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/cpy.tmpl.wgsl +107 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/cumsum.wgsl +66 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +182 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn.wgsl +636 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/get_rows.wgsl +668 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/glu.tmpl.wgsl +323 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/memset.wgsl +40 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.wgsl +713 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_decls.tmpl +103 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_reg_tile.wgsl +138 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_subgroup_matrix.wgsl +188 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec.wgsl +194 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/pad.wgsl +86 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm.wgsl +123 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/rope.tmpl.wgsl +295 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/scale.wgsl +63 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.wgsl +109 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/soft_max.tmpl.wgsl +345 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/sum_rows.wgsl +55 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/unary.wgsl +193 -0
- package/cpp/ggml/src/ggml-zdnn/CMakeLists.txt +36 -0
- package/cpp/ggml/src/ggml-zdnn/common.hpp +59 -0
- package/cpp/ggml/src/ggml-zdnn/ggml-zdnn.cpp +633 -0
- package/cpp/ggml/src/ggml-zdnn/mmf.cpp +80 -0
- package/cpp/ggml/src/ggml-zdnn/mmf.hpp +12 -0
- package/cpp/ggml/src/ggml-zdnn/utils.cpp +79 -0
- package/cpp/ggml/src/ggml-zdnn/utils.hpp +19 -0
- package/cpp/ggml/src/ggml-zendnn/CMakeLists.txt +92 -0
- package/cpp/ggml/src/ggml-zendnn/ggml-zendnn.cpp +469 -0
- package/cpp/ggml/src/ggml.c +7669 -0
- package/cpp/ggml/src/ggml.cpp +26 -0
- package/cpp/ggml/src/gguf.cpp +1699 -0
- package/cpp/include/llama-cpp.h +32 -0
- package/cpp/include/llama.h +1568 -0
- package/cpp/mtmd/CMakeLists.txt +98 -0
- package/cpp/mtmd/README.md +63 -0
- package/cpp/mtmd/clip-graph.h +117 -0
- package/cpp/mtmd/clip-impl.h +586 -0
- package/cpp/mtmd/clip-model.h +390 -0
- package/cpp/mtmd/clip.cpp +4154 -0
- package/cpp/mtmd/clip.h +121 -0
- package/cpp/mtmd/deprecation-warning.cpp +22 -0
- package/cpp/mtmd/legacy-models/convert_image_encoder_to_gguf.py +412 -0
- package/cpp/mtmd/legacy-models/glmedge-convert-image-encoder-to-gguf.py +280 -0
- package/cpp/mtmd/legacy-models/glmedge-surgery.py +33 -0
- package/cpp/mtmd/legacy-models/llava_surgery.py +38 -0
- package/cpp/mtmd/legacy-models/llava_surgery_v2.py +180 -0
- package/cpp/mtmd/legacy-models/minicpmv-convert-image-encoder-to-gguf.py +892 -0
- package/cpp/mtmd/legacy-models/minicpmv-surgery.py +47 -0
- package/cpp/mtmd/models/cogvlm.cpp +98 -0
- package/cpp/mtmd/models/conformer.cpp +216 -0
- package/cpp/mtmd/models/glm4v.cpp +122 -0
- package/cpp/mtmd/models/internvl.cpp +69 -0
- package/cpp/mtmd/models/kimik25.cpp +101 -0
- package/cpp/mtmd/models/kimivl.cpp +63 -0
- package/cpp/mtmd/models/llama4.cpp +96 -0
- package/cpp/mtmd/models/llava.cpp +374 -0
- package/cpp/mtmd/models/minicpmv.cpp +114 -0
- package/cpp/mtmd/models/mobilenetv5.cpp +451 -0
- package/cpp/mtmd/models/models.h +128 -0
- package/cpp/mtmd/models/nemotron-v2-vl.cpp +35 -0
- package/cpp/mtmd/models/paddleocr.cpp +52 -0
- package/cpp/mtmd/models/pixtral.cpp +86 -0
- package/cpp/mtmd/models/qwen2vl.cpp +183 -0
- package/cpp/mtmd/models/qwen3vl.cpp +193 -0
- package/cpp/mtmd/models/siglip.cpp +86 -0
- package/cpp/mtmd/models/whisper-enc.cpp +115 -0
- package/cpp/mtmd/models/youtuvl.cpp +179 -0
- package/cpp/mtmd/mtmd-audio.cpp +730 -0
- package/cpp/mtmd/mtmd-audio.h +113 -0
- package/cpp/mtmd/mtmd-cli.cpp +437 -0
- package/cpp/mtmd/mtmd-helper.cpp +521 -0
- package/cpp/mtmd/mtmd-helper.h +96 -0
- package/cpp/mtmd/mtmd.cpp +1156 -0
- package/cpp/mtmd/mtmd.h +319 -0
- package/cpp/mtmd/requirements.txt +5 -0
- package/cpp/mtmd/test-1.jpeg +0 -0
- package/cpp/mtmd/test-2.mp3 +0 -0
- package/cpp/mtmd/tests.sh +192 -0
- package/cpp/src/CMakeLists.txt +169 -0
- package/cpp/src/llama-adapter.cpp +488 -0
- package/cpp/src/llama-adapter.h +89 -0
- package/cpp/src/llama-arch.cpp +2855 -0
- package/cpp/src/llama-arch.h +619 -0
- package/cpp/src/llama-batch.cpp +917 -0
- package/cpp/src/llama-batch.h +173 -0
- package/cpp/src/llama-chat.cpp +896 -0
- package/cpp/src/llama-chat.h +71 -0
- package/cpp/src/llama-context.cpp +3512 -0
- package/cpp/src/llama-context.h +359 -0
- package/cpp/src/llama-cparams.cpp +5 -0
- package/cpp/src/llama-cparams.h +44 -0
- package/cpp/src/llama-grammar.cpp +1464 -0
- package/cpp/src/llama-grammar.h +194 -0
- package/cpp/src/llama-graph.cpp +2685 -0
- package/cpp/src/llama-graph.h +1026 -0
- package/cpp/src/llama-hparams.cpp +234 -0
- package/cpp/src/llama-hparams.h +339 -0
- package/cpp/src/llama-impl.cpp +171 -0
- package/cpp/src/llama-impl.h +73 -0
- package/cpp/src/llama-io.cpp +15 -0
- package/cpp/src/llama-io.h +35 -0
- package/cpp/src/llama-kv-cache-iswa.cpp +330 -0
- package/cpp/src/llama-kv-cache-iswa.h +137 -0
- package/cpp/src/llama-kv-cache.cpp +2271 -0
- package/cpp/src/llama-kv-cache.h +388 -0
- package/cpp/src/llama-kv-cells.h +533 -0
- package/cpp/src/llama-memory-hybrid-iswa.cpp +275 -0
- package/cpp/src/llama-memory-hybrid-iswa.h +140 -0
- package/cpp/src/llama-memory-hybrid.cpp +268 -0
- package/cpp/src/llama-memory-hybrid.h +139 -0
- package/cpp/src/llama-memory-recurrent.cpp +1165 -0
- package/cpp/src/llama-memory-recurrent.h +182 -0
- package/cpp/src/llama-memory.cpp +59 -0
- package/cpp/src/llama-memory.h +122 -0
- package/cpp/src/llama-mmap.cpp +785 -0
- package/cpp/src/llama-mmap.h +92 -0
- package/cpp/src/llama-model-loader.cpp +1414 -0
- package/cpp/src/llama-model-loader.h +203 -0
- package/cpp/src/llama-model-saver.cpp +286 -0
- package/cpp/src/llama-model-saver.h +37 -0
- package/cpp/src/llama-model.cpp +9253 -0
- package/cpp/src/llama-model.h +576 -0
- package/cpp/src/llama-quant.cpp +1119 -0
- package/cpp/src/llama-quant.h +1 -0
- package/cpp/src/llama-sampler.cpp +3885 -0
- package/cpp/src/llama-sampler.h +42 -0
- package/cpp/src/llama-vocab.cpp +3970 -0
- package/cpp/src/llama-vocab.h +187 -0
- package/cpp/src/llama.cpp +1313 -0
- package/cpp/src/models/afmoe.cpp +191 -0
- package/cpp/src/models/apertus.cpp +125 -0
- package/cpp/src/models/arcee.cpp +135 -0
- package/cpp/src/models/arctic.cpp +138 -0
- package/cpp/src/models/arwkv7.cpp +86 -0
- package/cpp/src/models/baichuan.cpp +122 -0
- package/cpp/src/models/bailingmoe.cpp +144 -0
- package/cpp/src/models/bailingmoe2.cpp +135 -0
- package/cpp/src/models/bert.cpp +178 -0
- package/cpp/src/models/bitnet.cpp +160 -0
- package/cpp/src/models/bloom.cpp +101 -0
- package/cpp/src/models/chameleon.cpp +178 -0
- package/cpp/src/models/chatglm.cpp +132 -0
- package/cpp/src/models/codeshell.cpp +111 -0
- package/cpp/src/models/cogvlm.cpp +102 -0
- package/cpp/src/models/cohere2-iswa.cpp +134 -0
- package/cpp/src/models/command-r.cpp +122 -0
- package/cpp/src/models/dbrx.cpp +123 -0
- package/cpp/src/models/deci.cpp +135 -0
- package/cpp/src/models/deepseek.cpp +144 -0
- package/cpp/src/models/deepseek2.cpp +262 -0
- package/cpp/src/models/delta-net-base.cpp +376 -0
- package/cpp/src/models/dots1.cpp +134 -0
- package/cpp/src/models/dream.cpp +105 -0
- package/cpp/src/models/ernie4-5-moe.cpp +150 -0
- package/cpp/src/models/ernie4-5.cpp +110 -0
- package/cpp/src/models/eurobert.cpp +97 -0
- package/cpp/src/models/exaone-moe.cpp +146 -0
- package/cpp/src/models/exaone.cpp +114 -0
- package/cpp/src/models/exaone4.cpp +123 -0
- package/cpp/src/models/falcon-h1.cpp +111 -0
- package/cpp/src/models/falcon.cpp +120 -0
- package/cpp/src/models/gemma-embedding.cpp +116 -0
- package/cpp/src/models/gemma.cpp +112 -0
- package/cpp/src/models/gemma2-iswa.cpp +128 -0
- package/cpp/src/models/gemma3.cpp +155 -0
- package/cpp/src/models/gemma3n-iswa.cpp +384 -0
- package/cpp/src/models/glm4-moe.cpp +170 -0
- package/cpp/src/models/glm4.cpp +157 -0
- package/cpp/src/models/gpt2.cpp +105 -0
- package/cpp/src/models/gptneox.cpp +144 -0
- package/cpp/src/models/granite-hybrid.cpp +196 -0
- package/cpp/src/models/granite.cpp +211 -0
- package/cpp/src/models/grok.cpp +159 -0
- package/cpp/src/models/grovemoe.cpp +141 -0
- package/cpp/src/models/hunyuan-dense.cpp +132 -0
- package/cpp/src/models/hunyuan-moe.cpp +154 -0
- package/cpp/src/models/internlm2.cpp +120 -0
- package/cpp/src/models/jais.cpp +86 -0
- package/cpp/src/models/jais2.cpp +123 -0
- package/cpp/src/models/jamba.cpp +106 -0
- package/cpp/src/models/kimi-linear.cpp +392 -0
- package/cpp/src/models/lfm2.cpp +190 -0
- package/cpp/src/models/llada-moe.cpp +122 -0
- package/cpp/src/models/llada.cpp +99 -0
- package/cpp/src/models/llama-iswa.cpp +178 -0
- package/cpp/src/models/llama.cpp +168 -0
- package/cpp/src/models/maincoder.cpp +117 -0
- package/cpp/src/models/mamba-base.cpp +285 -0
- package/cpp/src/models/mamba.cpp +54 -0
- package/cpp/src/models/mimo2-iswa.cpp +123 -0
- package/cpp/src/models/minicpm3.cpp +200 -0
- package/cpp/src/models/minimax-m2.cpp +124 -0
- package/cpp/src/models/mistral3.cpp +160 -0
- package/cpp/src/models/models.h +684 -0
- package/cpp/src/models/modern-bert.cpp +109 -0
- package/cpp/src/models/mpt.cpp +126 -0
- package/cpp/src/models/nemotron-h.cpp +148 -0
- package/cpp/src/models/nemotron.cpp +122 -0
- package/cpp/src/models/neo-bert.cpp +104 -0
- package/cpp/src/models/olmo.cpp +121 -0
- package/cpp/src/models/olmo2.cpp +150 -0
- package/cpp/src/models/olmoe.cpp +124 -0
- package/cpp/src/models/openai-moe-iswa.cpp +127 -0
- package/cpp/src/models/openelm.cpp +124 -0
- package/cpp/src/models/orion.cpp +123 -0
- package/cpp/src/models/paddleocr.cpp +122 -0
- package/cpp/src/models/pangu-embedded.cpp +121 -0
- package/cpp/src/models/phi2.cpp +121 -0
- package/cpp/src/models/phi3.cpp +152 -0
- package/cpp/src/models/plamo.cpp +110 -0
- package/cpp/src/models/plamo2.cpp +318 -0
- package/cpp/src/models/plamo3.cpp +128 -0
- package/cpp/src/models/plm.cpp +169 -0
- package/cpp/src/models/qwen.cpp +108 -0
- package/cpp/src/models/qwen2.cpp +126 -0
- package/cpp/src/models/qwen2moe.cpp +151 -0
- package/cpp/src/models/qwen2vl.cpp +117 -0
- package/cpp/src/models/qwen3.cpp +117 -0
- package/cpp/src/models/qwen35.cpp +386 -0
- package/cpp/src/models/qwen35moe.cpp +420 -0
- package/cpp/src/models/qwen3moe.cpp +124 -0
- package/cpp/src/models/qwen3next.cpp +525 -0
- package/cpp/src/models/qwen3vl-moe.cpp +140 -0
- package/cpp/src/models/qwen3vl.cpp +132 -0
- package/cpp/src/models/refact.cpp +94 -0
- package/cpp/src/models/rnd1.cpp +126 -0
- package/cpp/src/models/rwkv6-base.cpp +164 -0
- package/cpp/src/models/rwkv6.cpp +94 -0
- package/cpp/src/models/rwkv6qwen2.cpp +86 -0
- package/cpp/src/models/rwkv7-base.cpp +137 -0
- package/cpp/src/models/rwkv7.cpp +90 -0
- package/cpp/src/models/seed-oss.cpp +124 -0
- package/cpp/src/models/smallthinker.cpp +126 -0
- package/cpp/src/models/smollm3.cpp +128 -0
- package/cpp/src/models/stablelm.cpp +146 -0
- package/cpp/src/models/starcoder.cpp +100 -0
- package/cpp/src/models/starcoder2.cpp +121 -0
- package/cpp/src/models/step35-iswa.cpp +168 -0
- package/cpp/src/models/t5-dec.cpp +166 -0
- package/cpp/src/models/t5-enc.cpp +96 -0
- package/cpp/src/models/wavtokenizer-dec.cpp +149 -0
- package/cpp/src/models/xverse.cpp +108 -0
- package/cpp/src/unicode-data.cpp +7034 -0
- package/cpp/src/unicode-data.h +20 -0
- package/cpp/src/unicode.cpp +1103 -0
- package/cpp/src/unicode.h +111 -0
- package/cpp/vendor/nlohmann/json.hpp +25526 -0
- package/cpp/vendor/nlohmann/json_fwd.hpp +187 -0
- package/cpp/vendor/stb/stb_image.h +7988 -0
- package/ios/LocalLLM-Bridging-Header.h +2 -0
- package/ios/LocalLLM.h +5 -0
- package/ios/LocalLLM.mm +1267 -0
- package/local-llm-rn.podspec +60 -0
- package/package.json +35 -0
- package/src/NativeLocalLLM.ts +73 -0
- package/src/device.ts +50 -0
- package/src/download-adapter.ts +17 -0
- package/src/index.ts +21 -0
- package/src/native-bridge.ts +142 -0
- package/src/rn-downloader.ts +37 -0
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
#include "apir_cs.h"
|
|
2
|
+
#include "apir_cs_rpc.h"
|
|
3
|
+
#include "ggml-impl.h"
|
|
4
|
+
|
|
5
|
+
// ggml_buffer_to_apir_host_handle(ggml_backend_buffer_t buffer);
|
|
6
|
+
|
|
7
|
+
static inline void apir_encode_ggml_buffer_host_handle(apir_encoder * enc, const apir_buffer_host_handle_t * handle);
|
|
8
|
+
|
|
9
|
+
static inline ggml_backend_buffer_t apir_decode_ggml_buffer(apir_decoder * dec);
|
|
10
|
+
|
|
11
|
+
/* apir_rpc_tensor */
|
|
12
|
+
|
|
13
|
+
static inline void apir_encode_rcp_tensor(apir_encoder * enc, const apir_rpc_tensor * apir_rpc_tensor) {
|
|
14
|
+
size_t apir_rpc_tensor_size = sizeof(*apir_rpc_tensor);
|
|
15
|
+
apir_encode(enc, apir_rpc_tensor_size, apir_rpc_tensor, apir_rpc_tensor_size);
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
static inline apir_rpc_tensor * apir_decode_apir_rpc_tensor_inplace(apir_decoder * dec) {
|
|
19
|
+
size_t apir_rpc_tensor_size = sizeof(apir_rpc_tensor);
|
|
20
|
+
|
|
21
|
+
return (apir_rpc_tensor *) (uintptr_t) apir_decoder_use_inplace(dec, apir_rpc_tensor_size);
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
static inline apir_rpc_tensor * apir_decode_apir_rpc_tensor_array_inplace(apir_decoder * dec, uint32_t n_tensors) {
|
|
25
|
+
size_t apir_rpc_tensor_size = sizeof(apir_rpc_tensor) * n_tensors;
|
|
26
|
+
|
|
27
|
+
return (apir_rpc_tensor *) (uintptr_t) apir_decoder_use_inplace(dec, apir_rpc_tensor_size);
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
/* ggml_tensor */
|
|
31
|
+
|
|
32
|
+
static inline void apir_encode_ggml_tensor(apir_encoder * enc, const ggml_tensor * tensor) {
|
|
33
|
+
apir_rpc_tensor serialized = apir_serialize_tensor(tensor);
|
|
34
|
+
|
|
35
|
+
apir_encode_rcp_tensor(enc, &serialized);
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
static inline const ggml_tensor * apir_decode_ggml_tensor(apir_decoder * dec) {
|
|
39
|
+
const apir_rpc_tensor * apir_rpc_tensor = apir_decode_apir_rpc_tensor_inplace(dec);
|
|
40
|
+
|
|
41
|
+
if (!apir_rpc_tensor) {
|
|
42
|
+
return NULL;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
ggml_init_params params{
|
|
46
|
+
/*.mem_size =*/ggml_tensor_overhead(),
|
|
47
|
+
/*.mem_buffer =*/NULL,
|
|
48
|
+
/*.no_alloc =*/true,
|
|
49
|
+
};
|
|
50
|
+
|
|
51
|
+
ggml_context * ctx = ggml_init(params);
|
|
52
|
+
|
|
53
|
+
const ggml_tensor * tensor = apir_deserialize_tensor(ctx, apir_rpc_tensor);
|
|
54
|
+
|
|
55
|
+
return tensor;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/* *** ggml_backend_buffer_type_t *** */
|
|
59
|
+
|
|
60
|
+
// ggml_backend_buffer_type_t is a POINTER (to a struct).
|
|
61
|
+
// Only the host pointer is shared between the host and guest.
|
|
62
|
+
// The guest stores it in `buft->context`.
|
|
63
|
+
// The host simply writes the pointer address in the buffer variable.
|
|
64
|
+
|
|
65
|
+
static inline void apir_encode_ggml_buffer_type(apir_encoder * enc, ggml_backend_buffer_type_t buft) {
|
|
66
|
+
apir_buffer_type_host_handle_t handle = ggml_buffer_type_to_apir_handle(buft);
|
|
67
|
+
apir_encoder_write(enc, sizeof(handle), &handle, sizeof(handle));
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
static inline ggml_backend_buffer_type_t apir_decode_ggml_buffer_type(apir_decoder * dec) {
|
|
71
|
+
apir_buffer_type_host_handle_t handle;
|
|
72
|
+
|
|
73
|
+
apir_decoder_read(dec, sizeof(handle), &handle, sizeof(handle));
|
|
74
|
+
|
|
75
|
+
return (ggml_backend_buffer_type_t) handle;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
static inline void apir_encode_apir_buffer_type_host_handle(apir_encoder * enc, apir_buffer_type_host_handle_t handle) {
|
|
79
|
+
apir_encoder_write(enc, sizeof(handle), &handle, sizeof(handle));
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
static inline apir_buffer_type_host_handle_t apir_decode_apir_buffer_type_host_handle(apir_decoder * dec) {
|
|
83
|
+
apir_buffer_type_host_handle_t handle;
|
|
84
|
+
|
|
85
|
+
apir_decoder_read(dec, sizeof(handle), &handle, sizeof(handle));
|
|
86
|
+
|
|
87
|
+
return handle;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
/* *** ggml_backend_type_t *** */
|
|
91
|
+
|
|
92
|
+
// ggml_backend_buffer_t is a POINTER.
|
|
93
|
+
// same logic as for ggml_backend_buffer_type_t
|
|
94
|
+
|
|
95
|
+
static inline void apir_encode_ggml_buffer(apir_encoder * enc, const ggml_backend_buffer_t buffer) {
|
|
96
|
+
apir_buffer_host_handle_t handle = BUFFER_TO_HOST_HANDLE(buffer);
|
|
97
|
+
apir_encoder_write(enc, sizeof(handle), &handle, sizeof(handle));
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
static inline ggml_backend_buffer_t apir_decode_ggml_buffer(apir_decoder * dec) {
|
|
101
|
+
ggml_backend_buffer_t buffer;
|
|
102
|
+
size_t buffer_ptr_size = sizeof(buffer);
|
|
103
|
+
|
|
104
|
+
apir_decoder_read(dec, buffer_ptr_size, &buffer, buffer_ptr_size);
|
|
105
|
+
|
|
106
|
+
// SECURITY: Validate buffer handle against tracked buffers to prevent
|
|
107
|
+
// guest VM from providing arbitrary host memory addresses
|
|
108
|
+
if (buffer) {
|
|
109
|
+
extern std::unordered_set<ggml_backend_buffer_t> backend_buffers;
|
|
110
|
+
if (backend_buffers.find(buffer) == backend_buffers.end()) {
|
|
111
|
+
GGML_LOG_WARN("ggml-virtgpu-backend: %s: Invalid buffer handle from guest: %p\n", __func__,
|
|
112
|
+
(void *) buffer);
|
|
113
|
+
// Set fatal flag to prevent further processing with invalid handle
|
|
114
|
+
apir_decoder_set_fatal(dec);
|
|
115
|
+
return NULL;
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
return buffer;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
/* enum ggml_status */
|
|
123
|
+
|
|
124
|
+
static inline void apir_encode_ggml_status(apir_encoder * enc, const ggml_status * status) {
|
|
125
|
+
apir_encoder_write(enc, sizeof(*status), status, sizeof(*status));
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
static inline void apir_decode_ggml_status(apir_decoder * dec, ggml_status * status) {
|
|
129
|
+
apir_decoder_read(dec, sizeof(*status), status, sizeof(*status));
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
/* virtgpu_shmem */
|
|
133
|
+
|
|
134
|
+
static inline void apir_encode_virtgpu_shmem_res_id(apir_encoder * enc, uint32_t shmem_res_id) {
|
|
135
|
+
apir_encode_uint32_t(enc, &shmem_res_id);
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
static inline void apir_decode_virtgpu_shmem_res_id(apir_decoder * dec, uint32_t * shmem_res_id) {
|
|
139
|
+
apir_decode_uint32_t(dec, shmem_res_id);
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
/* ggml_cgraph */
|
|
143
|
+
|
|
144
|
+
static inline size_t apir_serialize_ggml_cgraph(ggml_cgraph * cgraph, std::vector<uint8_t> & cgraph_data) {
|
|
145
|
+
apir_serialize_graph(cgraph, cgraph_data);
|
|
146
|
+
|
|
147
|
+
return cgraph_data.size();
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
static inline void apir_encode_cgraph_data(apir_encoder * enc, std::vector<uint8_t> & cgraph_data) {
|
|
151
|
+
size_t cgraph_size = cgraph_data.size();
|
|
152
|
+
|
|
153
|
+
apir_encode(enc, cgraph_size, cgraph_data.data(), cgraph_size);
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
static inline ggml_cgraph * apir_decode_ggml_cgraph(apir_decoder * dec, size_t cgraph_size) {
|
|
157
|
+
GGML_UNUSED(cgraph_size);
|
|
158
|
+
|
|
159
|
+
uint32_t n_nodes;
|
|
160
|
+
apir_decode_uint32_t(dec, &n_nodes);
|
|
161
|
+
const uint64_t * nodes = apir_decode_uint64_t_array_inplace(dec, n_nodes);
|
|
162
|
+
|
|
163
|
+
uint32_t n_tensors;
|
|
164
|
+
apir_decode_uint32_t(dec, &n_tensors);
|
|
165
|
+
const apir_rpc_tensor * tensors = apir_decode_apir_rpc_tensor_array_inplace(dec, n_tensors);
|
|
166
|
+
|
|
167
|
+
return apir_deserialize_graph(n_nodes, n_tensors, tensors, nodes);
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
static inline void apir_encode_ggml_buffer_handle(apir_encoder * enc, const apir_buffer_host_handle_t * handle) {
|
|
171
|
+
apir_encoder_write(enc, sizeof(*handle), &handle, sizeof(*handle));
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
static inline void apir_encode_ggml_tensor_inline(apir_encoder * enc, const ggml_tensor * tensor) {
|
|
175
|
+
size_t tensor_size = sizeof(*tensor);
|
|
176
|
+
|
|
177
|
+
if (tensor->extra) {
|
|
178
|
+
GGML_ABORT("%s: Cannot pass tensors with extra", __func__);
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
if (tensor->src[0] && tensor->buffer) {
|
|
182
|
+
static int first = 1;
|
|
183
|
+
if (first) {
|
|
184
|
+
GGML_LOG_WARN("%s: Cannot pass tensors with src and buffer\n", __func__);
|
|
185
|
+
first = 0;
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
apir_encoder_write(enc, tensor_size, tensor, tensor_size);
|
|
190
|
+
|
|
191
|
+
// tensor->data is a pointer inside the device buffer. No need to touch it
|
|
192
|
+
// tensor->buffer is a pointer to a buffer. Encoding the buffer handle in sequence.
|
|
193
|
+
// (could also make a copy of the tensor, and update locally.)
|
|
194
|
+
|
|
195
|
+
if (tensor->buffer) {
|
|
196
|
+
apir_buffer_host_handle_t buffer_handle = ggml_buffer_to_apir_handle(tensor->buffer);
|
|
197
|
+
apir_encode_ggml_buffer_handle(enc, &buffer_handle);
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
if (tensor->view_src) {
|
|
201
|
+
apir_encoder_write(enc, tensor_size, tensor->view_src, tensor_size);
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
for (int i = 0; tensor->src[i]; i++) {
|
|
205
|
+
const ggml_tensor * tensor_src = tensor->src[i];
|
|
206
|
+
apir_encoder_write(enc, tensor_size, tensor_src, tensor_size);
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
static inline const ggml_tensor * apir_decode_ggml_tensor_inplace(apir_decoder * dec) {
|
|
211
|
+
// it safe to remove the `const` qualifier here, we *do* want to
|
|
212
|
+
// modify the shared memory data to fix the `src` pointers.
|
|
213
|
+
ggml_tensor * tensor = (ggml_tensor *) (uintptr_t) apir_decoder_use_inplace(dec, sizeof(ggml_tensor));
|
|
214
|
+
|
|
215
|
+
// tensor->data is a pointer inside the device buffer. No need to touch it
|
|
216
|
+
// tensor->buffer is a pointer to a buffer. Decode the buffer handle encoded in sequence.
|
|
217
|
+
if (tensor->buffer) {
|
|
218
|
+
tensor->buffer = apir_decode_ggml_buffer(dec);
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
if (tensor->view_src) {
|
|
222
|
+
ggml_tensor * tensor_view_src = (ggml_tensor *) (uintptr_t) apir_decoder_use_inplace(dec, sizeof(ggml_tensor));
|
|
223
|
+
tensor->view_src = tensor_view_src;
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
for (int i = 0; tensor->src[i]; i++) {
|
|
227
|
+
ggml_tensor * tensor_src = (ggml_tensor *) (uintptr_t) apir_decoder_use_inplace(dec, sizeof(ggml_tensor));
|
|
228
|
+
tensor->src[i] = tensor_src; // overwrite op->src[i] pointer with the actual location of the src tensor
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
return tensor;
|
|
232
|
+
}
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
// clang-format off
|
|
4
|
+
#include "ggml.h"
|
|
5
|
+
#include "ggml-backend-impl.h"
|
|
6
|
+
|
|
7
|
+
#include <unordered_map>
|
|
8
|
+
#include <unordered_set>
|
|
9
|
+
#include <vector>
|
|
10
|
+
#include <cstdint>
|
|
11
|
+
// clang-format on
|
|
12
|
+
|
|
13
|
+
// ggml_tensor is serialized into apir_rpc_tensor
|
|
14
|
+
struct apir_rpc_tensor {
|
|
15
|
+
uint64_t id;
|
|
16
|
+
uint32_t type;
|
|
17
|
+
uint64_t buffer;
|
|
18
|
+
uint32_t ne[GGML_MAX_DIMS];
|
|
19
|
+
uint32_t nb[GGML_MAX_DIMS];
|
|
20
|
+
uint32_t op;
|
|
21
|
+
int32_t op_params[GGML_MAX_OP_PARAMS / sizeof(int32_t)];
|
|
22
|
+
int32_t flags;
|
|
23
|
+
uint64_t src[GGML_MAX_SRC];
|
|
24
|
+
uint64_t view_src;
|
|
25
|
+
uint64_t view_offs;
|
|
26
|
+
uint64_t data;
|
|
27
|
+
char name[GGML_MAX_NAME];
|
|
28
|
+
|
|
29
|
+
char padding[4];
|
|
30
|
+
};
|
|
31
|
+
|
|
32
|
+
/* frontend */
|
|
33
|
+
|
|
34
|
+
apir_rpc_tensor apir_serialize_tensor(const ggml_tensor * tensor);
|
|
35
|
+
|
|
36
|
+
void apir_serialize_graph(const ggml_cgraph * cgraph, std::vector<uint8_t> & output);
|
|
37
|
+
|
|
38
|
+
/* backend */
|
|
39
|
+
|
|
40
|
+
void apir_track_backend_buffer(ggml_backend_buffer_t buffer);
|
|
41
|
+
bool apir_untrack_backend_buffer(ggml_backend_buffer_t buffer);
|
|
42
|
+
std::unordered_set<ggml_backend_buffer_t> apir_get_track_backend_buffers();
|
|
43
|
+
|
|
44
|
+
void apir_add_tensor(ggml_tensor * tensor,
|
|
45
|
+
std::vector<apir_rpc_tensor> & tensors,
|
|
46
|
+
std::unordered_set<ggml_tensor *> & visited);
|
|
47
|
+
|
|
48
|
+
ggml_tensor * apir_deserialize_tensor(ggml_context * ctx, const apir_rpc_tensor * tensor);
|
|
49
|
+
|
|
50
|
+
ggml_tensor * apir_create_node(uint64_t id,
|
|
51
|
+
ggml_context * ctx,
|
|
52
|
+
const std::unordered_map<uint64_t, const apir_rpc_tensor *> & tensor_ptrs,
|
|
53
|
+
std::unordered_map<uint64_t, ggml_tensor *> & tensor_map);
|
|
54
|
+
|
|
55
|
+
ggml_cgraph * apir_deserialize_graph(uint32_t n_nodes,
|
|
56
|
+
uint32_t n_tensors,
|
|
57
|
+
const apir_rpc_tensor * tensors,
|
|
58
|
+
const uint64_t * nodes);
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
#include "ggml-remoting.h"
|
|
2
|
+
|
|
3
|
+
static ggml_backend_buffer_t ggml_backend_remoting_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft,
|
|
4
|
+
size_t size) {
|
|
5
|
+
virtgpu * gpu = BUFT_TO_GPU(buft);
|
|
6
|
+
|
|
7
|
+
ggml_backend_remoting_buffer_context * context = (ggml_backend_remoting_buffer_context *) malloc(sizeof(*context));
|
|
8
|
+
if (!context) {
|
|
9
|
+
GGML_ABORT(GGML_VIRTGPU "%s: Couldn't allocate the buffer context ...", __func__);
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
context->gpu = gpu;
|
|
13
|
+
|
|
14
|
+
bool async__unused, host_buffer__unused, events__unused;
|
|
15
|
+
bool buffer_from_host_ptr;
|
|
16
|
+
apir_device_get_props(gpu, &async__unused, &host_buffer__unused, &buffer_from_host_ptr, &events__unused);
|
|
17
|
+
|
|
18
|
+
if (buffer_from_host_ptr) {
|
|
19
|
+
context->apir_context = apir_device_buffer_from_ptr(gpu, size, size);
|
|
20
|
+
context->base = context->apir_context.shmem.mmap_ptr;
|
|
21
|
+
context->is_from_ptr = true;
|
|
22
|
+
} else {
|
|
23
|
+
context->apir_context = apir_buffer_type_alloc_buffer(gpu, gpu->cached_buffer_type.host_handle, size);
|
|
24
|
+
context->is_from_ptr = false;
|
|
25
|
+
context->base = NULL;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
ggml_backend_buffer_t buffer =
|
|
29
|
+
ggml_backend_buffer_init(buft, ggml_backend_remoting_buffer_interface, (void *) context, size);
|
|
30
|
+
|
|
31
|
+
return buffer;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
static const char * ggml_backend_remoting_buffer_type_get_name(ggml_backend_buffer_type_t buft) {
|
|
35
|
+
virtgpu * gpu = BUFT_TO_GPU(buft);
|
|
36
|
+
|
|
37
|
+
// Return the prefixed name that was built once during initialization
|
|
38
|
+
return gpu->cached_buffer_type.name;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
static size_t ggml_backend_remoting_buffer_type_get_alignment(ggml_backend_buffer_type_t buft) {
|
|
42
|
+
virtgpu * gpu = BUFT_TO_GPU(buft);
|
|
43
|
+
|
|
44
|
+
return gpu->cached_buffer_type.alignment;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
static size_t ggml_backend_remoting_buffer_type_get_max_size(ggml_backend_buffer_type_t buft) {
|
|
48
|
+
virtgpu * gpu = BUFT_TO_GPU(buft);
|
|
49
|
+
|
|
50
|
+
return gpu->cached_buffer_type.max_size;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
static size_t ggml_backend_remoting_buffer_type_get_alloc_size(ggml_backend_buffer_type_t buft,
|
|
54
|
+
const ggml_tensor * tensor) {
|
|
55
|
+
virtgpu * gpu = BUFT_TO_GPU(buft);
|
|
56
|
+
|
|
57
|
+
if (tensor->buffer == NULL || !tensor->buffer->context ||
|
|
58
|
+
!buft->device->iface.supports_buft(buft->device, tensor->buffer->buft)) {
|
|
59
|
+
return ggml_nbytes(tensor);
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
return apir_buffer_type_get_alloc_size(gpu, gpu->cached_buffer_type.host_handle, tensor);
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
const ggml_backend_buffer_type_i ggml_backend_remoting_buffer_type_interface = {
|
|
66
|
+
/* .get_name = */ ggml_backend_remoting_buffer_type_get_name,
|
|
67
|
+
/* .alloc_buffer = */ ggml_backend_remoting_buffer_type_alloc_buffer,
|
|
68
|
+
/* .get_alignment = */ ggml_backend_remoting_buffer_type_get_alignment,
|
|
69
|
+
/* .get_max_size = */ ggml_backend_remoting_buffer_type_get_max_size,
|
|
70
|
+
/* .get_alloc_size = */ ggml_backend_remoting_buffer_type_get_alloc_size,
|
|
71
|
+
/* .is_host = */ NULL,
|
|
72
|
+
};
|
|
73
|
+
|
|
74
|
+
const ggml_backend_buffer_type_i ggml_backend_remoting_buffer_from_ptr_type_interface = {
|
|
75
|
+
/* .get_name = */ ggml_backend_remoting_buffer_type_get_name,
|
|
76
|
+
/* .alloc_buffer = */ NULL,
|
|
77
|
+
/* .get_alignment = */ ggml_backend_remoting_buffer_type_get_alignment,
|
|
78
|
+
/* .get_max_size = */ ggml_backend_remoting_buffer_type_get_max_size,
|
|
79
|
+
/* .get_alloc_size = */ ggml_backend_remoting_buffer_type_get_alloc_size,
|
|
80
|
+
/* .is_host = */ NULL,
|
|
81
|
+
};
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
#include "ggml-remoting.h"
|
|
2
|
+
|
|
3
|
+
#define BUFFER_TO_GPU(name) ((ggml_backend_remoting_buffer_context *) (name)->context)->gpu
|
|
4
|
+
|
|
5
|
+
static void * ggml_backend_remoting_buffer_get_base(ggml_backend_buffer_t buffer) {
|
|
6
|
+
ggml_backend_remoting_buffer_context * context = (ggml_backend_remoting_buffer_context *) buffer->context;
|
|
7
|
+
if (context->base) {
|
|
8
|
+
return context->base;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
context->base = apir_buffer_get_base(BUFFER_TO_GPU(buffer), BUFFER_TO_APIR_CONTEXT(buffer));
|
|
12
|
+
|
|
13
|
+
return context->base;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
static void ggml_backend_remoting_buffer_set_tensor(ggml_backend_buffer_t buffer,
|
|
17
|
+
ggml_tensor * tensor,
|
|
18
|
+
const void * data,
|
|
19
|
+
size_t offset,
|
|
20
|
+
size_t size) {
|
|
21
|
+
virtgpu * gpu = BUFFER_TO_GPU(buffer);
|
|
22
|
+
|
|
23
|
+
ggml_backend_remoting_buffer_context * context = BUFFER_TO_GGML_CONTEXT(buffer);
|
|
24
|
+
if (context->is_from_ptr) {
|
|
25
|
+
memcpy((char *) tensor->data + offset, data, size);
|
|
26
|
+
} else {
|
|
27
|
+
apir_buffer_set_tensor(gpu, BUFFER_TO_APIR_CONTEXT(buffer), tensor, data, offset, size);
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
return;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
static void ggml_backend_remoting_buffer_get_tensor(ggml_backend_buffer_t buffer,
|
|
34
|
+
const ggml_tensor * tensor,
|
|
35
|
+
void * data,
|
|
36
|
+
size_t offset,
|
|
37
|
+
size_t size) {
|
|
38
|
+
virtgpu * gpu = BUFFER_TO_GPU(buffer);
|
|
39
|
+
ggml_backend_remoting_buffer_context * context = BUFFER_TO_GGML_CONTEXT(buffer);
|
|
40
|
+
if (context->is_from_ptr) {
|
|
41
|
+
memcpy(data, (const char *) tensor->data + offset, size);
|
|
42
|
+
} else {
|
|
43
|
+
apir_buffer_get_tensor(gpu, BUFFER_TO_APIR_CONTEXT(buffer), tensor, data, offset, size);
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
static void ggml_backend_remoting_buffer_set_tensor_from_ptr(ggml_backend_buffer_t buffer,
|
|
48
|
+
ggml_tensor * tensor,
|
|
49
|
+
const void * data,
|
|
50
|
+
size_t offset,
|
|
51
|
+
size_t size) {
|
|
52
|
+
UNUSED(buffer);
|
|
53
|
+
|
|
54
|
+
memcpy((char *) tensor->data + offset, data, size);
|
|
55
|
+
|
|
56
|
+
return;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
static void ggml_backend_remoting_buffer_get_tensor_from_ptr(ggml_backend_buffer_t buffer,
|
|
60
|
+
const ggml_tensor * tensor,
|
|
61
|
+
void * data,
|
|
62
|
+
size_t offset,
|
|
63
|
+
size_t size) {
|
|
64
|
+
UNUSED(buffer);
|
|
65
|
+
|
|
66
|
+
memcpy(data, (const char *) tensor->data + offset, size);
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
static bool ggml_backend_remoting_buffer_cpy_tensor(ggml_backend_buffer_t buffer,
|
|
70
|
+
const ggml_tensor * src,
|
|
71
|
+
ggml_tensor * dst) {
|
|
72
|
+
virtgpu * gpu = BUFFER_TO_GPU(buffer);
|
|
73
|
+
|
|
74
|
+
bool ret = apir_buffer_cpy_tensor(gpu, BUFFER_TO_APIR_CONTEXT(buffer), src, dst);
|
|
75
|
+
|
|
76
|
+
return ret;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
static void ggml_backend_remoting_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) {
|
|
80
|
+
virtgpu * gpu = BUFFER_TO_GPU(buffer);
|
|
81
|
+
|
|
82
|
+
apir_buffer_clear(gpu, BUFFER_TO_APIR_CONTEXT(buffer), value);
|
|
83
|
+
|
|
84
|
+
return;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
static void ggml_backend_remoting_buffer_free_buffer(ggml_backend_buffer_t buffer) {
|
|
88
|
+
virtgpu * gpu = BUFFER_TO_GPU(buffer);
|
|
89
|
+
|
|
90
|
+
apir_buffer_free_buffer(gpu, BUFFER_TO_APIR_CONTEXT(buffer));
|
|
91
|
+
|
|
92
|
+
ggml_backend_remoting_buffer_context * context = BUFFER_TO_GGML_CONTEXT(buffer);
|
|
93
|
+
free(context);
|
|
94
|
+
buffer->context = NULL;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
const ggml_backend_buffer_i ggml_backend_remoting_buffer_interface = {
|
|
98
|
+
/* .free_buffer = */ ggml_backend_remoting_buffer_free_buffer,
|
|
99
|
+
/* .get_base = */ ggml_backend_remoting_buffer_get_base,
|
|
100
|
+
/* .init_tensor = */ NULL,
|
|
101
|
+
/* .memset_tensor = */ NULL,
|
|
102
|
+
/* .set_tensor = */ ggml_backend_remoting_buffer_set_tensor,
|
|
103
|
+
/* .get_tensor = */ ggml_backend_remoting_buffer_get_tensor,
|
|
104
|
+
/* .cpy_tensor = */ ggml_backend_remoting_buffer_cpy_tensor,
|
|
105
|
+
/* .clear = */ ggml_backend_remoting_buffer_clear,
|
|
106
|
+
/* .reset = */ NULL,
|
|
107
|
+
};
|
|
108
|
+
|
|
109
|
+
const ggml_backend_buffer_i ggml_backend_remoting_buffer_from_ptr_interface = {
|
|
110
|
+
/* .free_buffer = */ ggml_backend_remoting_buffer_free_buffer,
|
|
111
|
+
/* .get_base = */ ggml_backend_remoting_buffer_get_base,
|
|
112
|
+
/* .init_tensor = */ NULL,
|
|
113
|
+
/* .memset_tensor = */ NULL,
|
|
114
|
+
/* .set_tensor = */ ggml_backend_remoting_buffer_set_tensor_from_ptr,
|
|
115
|
+
/* .get_tensor = */ ggml_backend_remoting_buffer_get_tensor_from_ptr,
|
|
116
|
+
/* .cpy_tensor = */ ggml_backend_remoting_buffer_cpy_tensor,
|
|
117
|
+
/* .clear = */ ggml_backend_remoting_buffer_clear,
|
|
118
|
+
/* .reset = */ NULL,
|
|
119
|
+
};
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
#include "ggml-remoting.h"
|
|
2
|
+
|
|
3
|
+
static const char * ggml_backend_remoting_device_get_name(ggml_backend_dev_t dev) {
|
|
4
|
+
virtgpu * gpu = DEV_TO_GPU(dev);
|
|
5
|
+
|
|
6
|
+
// Return the prefixed name that was built once during initialization
|
|
7
|
+
return gpu->cached_device_info.name;
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
static const char * ggml_backend_remoting_device_get_description(ggml_backend_dev_t dev) {
|
|
11
|
+
virtgpu * gpu = DEV_TO_GPU(dev);
|
|
12
|
+
|
|
13
|
+
// Return the pre-cached description from the virtgpu structure
|
|
14
|
+
return gpu->cached_device_info.description;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
static enum ggml_backend_dev_type ggml_backend_remoting_device_get_type(ggml_backend_dev_t dev) {
|
|
18
|
+
virtgpu * gpu = DEV_TO_GPU(dev);
|
|
19
|
+
|
|
20
|
+
return (enum ggml_backend_dev_type) gpu->cached_device_info.type;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
static void ggml_backend_remoting_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
|
|
24
|
+
virtgpu * gpu = DEV_TO_GPU(dev);
|
|
25
|
+
|
|
26
|
+
*free = gpu->cached_device_info.memory_free;
|
|
27
|
+
*total = gpu->cached_device_info.memory_total;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
static bool ggml_backend_remoting_device_supports_op(ggml_backend_dev_t dev, const ggml_tensor * op) {
|
|
31
|
+
#if USE_ALWAYS_TRUE_SUPPORTS_OP == 1
|
|
32
|
+
/* ggml-rpc cheats it like this */
|
|
33
|
+
/* with the current implementation of serialize_tensor, the src/view aren't properly passed */
|
|
34
|
+
UNUSED(dev);
|
|
35
|
+
UNUSED(op);
|
|
36
|
+
|
|
37
|
+
return true;
|
|
38
|
+
#else
|
|
39
|
+
virtgpu * gpu = DEV_TO_GPU(dev);
|
|
40
|
+
|
|
41
|
+
return apir_device_supports_op(gpu, op);
|
|
42
|
+
#endif
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
static bool ggml_backend_remoting_device_supports_buft(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) {
|
|
46
|
+
bool supported = buft->device == dev;
|
|
47
|
+
|
|
48
|
+
return supported;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
static bool ggml_backend_remoting_device_offload_op(ggml_backend_dev_t dev, const ggml_tensor * op) {
|
|
52
|
+
UNUSED(dev);
|
|
53
|
+
UNUSED(op);
|
|
54
|
+
|
|
55
|
+
return false;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
static void ggml_backend_remoting_device_get_props(ggml_backend_dev_t dev, ggml_backend_dev_props * props) {
|
|
59
|
+
props->name = ggml_backend_remoting_device_get_name(dev);
|
|
60
|
+
props->description = ggml_backend_remoting_device_get_description(dev);
|
|
61
|
+
props->type = ggml_backend_remoting_device_get_type(dev);
|
|
62
|
+
ggml_backend_remoting_device_get_memory(dev, &props->memory_free, &props->memory_total);
|
|
63
|
+
|
|
64
|
+
virtgpu * gpu = DEV_TO_GPU(dev);
|
|
65
|
+
apir_device_get_props(gpu, &props->caps.async, &props->caps.host_buffer, &props->caps.buffer_from_host_ptr,
|
|
66
|
+
&props->caps.events);
|
|
67
|
+
|
|
68
|
+
props->caps.buffer_from_host_ptr = false;
|
|
69
|
+
props->caps.async = false;
|
|
70
|
+
props->caps.events = false;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
ggml_backend_buffer_type_t ggml_backend_remoting_device_get_buffer_type(ggml_backend_dev_t dev) {
|
|
74
|
+
virtgpu * gpu = DEV_TO_GPU(dev);
|
|
75
|
+
|
|
76
|
+
static std::atomic<bool> initialized = false;
|
|
77
|
+
static ggml_backend_buffer_type buft;
|
|
78
|
+
|
|
79
|
+
if (!initialized) {
|
|
80
|
+
static std::mutex mutex;
|
|
81
|
+
std::lock_guard<std::mutex> lock(mutex);
|
|
82
|
+
|
|
83
|
+
if (!initialized) {
|
|
84
|
+
buft = {
|
|
85
|
+
/* .iface = */ ggml_backend_remoting_buffer_type_interface,
|
|
86
|
+
/* .device = */ dev,
|
|
87
|
+
/* .context = */ (void *) gpu->cached_buffer_type.host_handle,
|
|
88
|
+
};
|
|
89
|
+
initialized = true;
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
return &buft;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
static ggml_backend_buffer_type_t ggml_backend_remoting_device_get_buffer_from_ptr_type(ggml_backend_dev_t dev) {
|
|
97
|
+
virtgpu * gpu = DEV_TO_GPU(dev);
|
|
98
|
+
|
|
99
|
+
static std::atomic<bool> initialized = false;
|
|
100
|
+
static ggml_backend_buffer_type buft;
|
|
101
|
+
|
|
102
|
+
if (!initialized) {
|
|
103
|
+
static std::mutex mutex;
|
|
104
|
+
std::lock_guard<std::mutex> lock(mutex);
|
|
105
|
+
|
|
106
|
+
if (!initialized) {
|
|
107
|
+
buft = {
|
|
108
|
+
/* .iface = */ ggml_backend_remoting_buffer_from_ptr_type_interface,
|
|
109
|
+
/* .device = */ dev,
|
|
110
|
+
/* .context = */ (void *) gpu->cached_buffer_type.host_handle,
|
|
111
|
+
};
|
|
112
|
+
initialized = true;
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
return &buft;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
static ggml_backend_buffer_t ggml_backend_remoting_device_buffer_from_ptr(ggml_backend_dev_t dev,
|
|
120
|
+
void * ptr,
|
|
121
|
+
size_t size,
|
|
122
|
+
size_t max_tensor_size) {
|
|
123
|
+
virtgpu * gpu = DEV_TO_GPU(dev);
|
|
124
|
+
|
|
125
|
+
ggml_backend_remoting_buffer_context * context = (ggml_backend_remoting_buffer_context *) malloc(sizeof(*context));
|
|
126
|
+
if (!context) {
|
|
127
|
+
GGML_ABORT(GGML_VIRTGPU "%s: Couldn't allocate the buffer context ...", __func__);
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
context->gpu = gpu;
|
|
131
|
+
context->apir_context = apir_device_buffer_from_ptr(gpu, size, max_tensor_size);
|
|
132
|
+
context->base = ptr;
|
|
133
|
+
context->is_from_ptr = true;
|
|
134
|
+
|
|
135
|
+
ggml_backend_buffer_t buffer =
|
|
136
|
+
ggml_backend_buffer_init(ggml_backend_remoting_device_get_buffer_from_ptr_type(dev),
|
|
137
|
+
ggml_backend_remoting_buffer_from_ptr_interface, (void *) context, size);
|
|
138
|
+
|
|
139
|
+
return buffer;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
const ggml_backend_device_i ggml_backend_remoting_device_interface = {
|
|
143
|
+
/* .get_name = */ ggml_backend_remoting_device_get_name,
|
|
144
|
+
/* .get_description = */ ggml_backend_remoting_device_get_description,
|
|
145
|
+
/* .get_memory = */ ggml_backend_remoting_device_get_memory,
|
|
146
|
+
/* .get_type = */ ggml_backend_remoting_device_get_type,
|
|
147
|
+
/* .get_props = */ ggml_backend_remoting_device_get_props,
|
|
148
|
+
/* .init_backend = */ ggml_backend_remoting_device_init,
|
|
149
|
+
/* .get_buffer_type = */ ggml_backend_remoting_device_get_buffer_type,
|
|
150
|
+
/* .get_host_buffer_type = */ NULL,
|
|
151
|
+
/* .buffer_from_host_ptr = */ ggml_backend_remoting_device_buffer_from_ptr,
|
|
152
|
+
/* .supports_op = */ ggml_backend_remoting_device_supports_op,
|
|
153
|
+
/* .supports_buft = */ ggml_backend_remoting_device_supports_buft,
|
|
154
|
+
/* .offload_op = */ ggml_backend_remoting_device_offload_op,
|
|
155
|
+
/* .event_new = */ NULL,
|
|
156
|
+
/* .event_free = */ NULL,
|
|
157
|
+
/* .event_synchronize = */ NULL,
|
|
158
|
+
};
|