local-llm-rn 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cpp/CMakeLists.txt +285 -0
- package/cpp/common/CMakeLists.txt +149 -0
- package/cpp/common/arg.cpp +3799 -0
- package/cpp/common/arg.h +131 -0
- package/cpp/common/base64.hpp +392 -0
- package/cpp/common/build-info.cpp.in +4 -0
- package/cpp/common/chat-parser-xml-toolcall.cpp +879 -0
- package/cpp/common/chat-parser-xml-toolcall.h +45 -0
- package/cpp/common/chat-parser.cpp +1649 -0
- package/cpp/common/chat-parser.h +133 -0
- package/cpp/common/chat-peg-parser.cpp +124 -0
- package/cpp/common/chat-peg-parser.h +105 -0
- package/cpp/common/chat.cpp +3355 -0
- package/cpp/common/chat.h +252 -0
- package/cpp/common/common.cpp +1824 -0
- package/cpp/common/common.h +930 -0
- package/cpp/common/console.cpp +1137 -0
- package/cpp/common/console.h +41 -0
- package/cpp/common/debug.cpp +167 -0
- package/cpp/common/debug.h +43 -0
- package/cpp/common/download.cpp +792 -0
- package/cpp/common/download.h +84 -0
- package/cpp/common/http.h +84 -0
- package/cpp/common/jinja/README.md +88 -0
- package/cpp/common/jinja/caps.cpp +285 -0
- package/cpp/common/jinja/caps.h +30 -0
- package/cpp/common/jinja/lexer.cpp +341 -0
- package/cpp/common/jinja/lexer.h +157 -0
- package/cpp/common/jinja/parser.cpp +591 -0
- package/cpp/common/jinja/parser.h +21 -0
- package/cpp/common/jinja/runtime.cpp +867 -0
- package/cpp/common/jinja/runtime.h +638 -0
- package/cpp/common/jinja/string.cpp +213 -0
- package/cpp/common/jinja/string.h +61 -0
- package/cpp/common/jinja/utils.h +149 -0
- package/cpp/common/jinja/value.cpp +1393 -0
- package/cpp/common/jinja/value.h +756 -0
- package/cpp/common/json-partial.cpp +324 -0
- package/cpp/common/json-partial.h +39 -0
- package/cpp/common/json-schema-to-grammar.cpp +1153 -0
- package/cpp/common/json-schema-to-grammar.h +43 -0
- package/cpp/common/llguidance.cpp +258 -0
- package/cpp/common/log.cpp +446 -0
- package/cpp/common/log.h +119 -0
- package/cpp/common/ngram-cache.cpp +285 -0
- package/cpp/common/ngram-cache.h +101 -0
- package/cpp/common/ngram-map.cpp +530 -0
- package/cpp/common/ngram-map.h +115 -0
- package/cpp/common/ngram-mod.cpp +60 -0
- package/cpp/common/ngram-mod.h +38 -0
- package/cpp/common/peg-parser.cpp +1712 -0
- package/cpp/common/peg-parser.h +459 -0
- package/cpp/common/preset.cpp +483 -0
- package/cpp/common/preset.h +83 -0
- package/cpp/common/regex-partial.cpp +204 -0
- package/cpp/common/regex-partial.h +56 -0
- package/cpp/common/sampling.cpp +745 -0
- package/cpp/common/sampling.h +119 -0
- package/cpp/common/speculative.cpp +1074 -0
- package/cpp/common/speculative.h +41 -0
- package/cpp/common/unicode.cpp +64 -0
- package/cpp/common/unicode.h +22 -0
- package/cpp/ggml/CMakeLists.txt +494 -0
- package/cpp/ggml/cmake/GitVars.cmake +22 -0
- package/cpp/ggml/cmake/common.cmake +50 -0
- package/cpp/ggml/cmake/ggml-config.cmake.in +191 -0
- package/cpp/ggml/include/ggml-alloc.h +85 -0
- package/cpp/ggml/include/ggml-backend.h +373 -0
- package/cpp/ggml/include/ggml-blas.h +25 -0
- package/cpp/ggml/include/ggml-cann.h +123 -0
- package/cpp/ggml/include/ggml-cpp.h +39 -0
- package/cpp/ggml/include/ggml-cpu.h +151 -0
- package/cpp/ggml/include/ggml-cuda.h +47 -0
- package/cpp/ggml/include/ggml-hexagon.h +19 -0
- package/cpp/ggml/include/ggml-metal.h +61 -0
- package/cpp/ggml/include/ggml-opencl.h +26 -0
- package/cpp/ggml/include/ggml-opt.h +256 -0
- package/cpp/ggml/include/ggml-rpc.h +30 -0
- package/cpp/ggml/include/ggml-sycl.h +49 -0
- package/cpp/ggml/include/ggml-virtgpu.h +14 -0
- package/cpp/ggml/include/ggml-vulkan.h +29 -0
- package/cpp/ggml/include/ggml-webgpu.h +19 -0
- package/cpp/ggml/include/ggml-zdnn.h +17 -0
- package/cpp/ggml/include/ggml-zendnn.h +22 -0
- package/cpp/ggml/include/ggml.h +2753 -0
- package/cpp/ggml/include/gguf.h +204 -0
- package/cpp/ggml/src/CMakeLists.txt +492 -0
- package/cpp/ggml/src/ggml-alloc.c +1244 -0
- package/cpp/ggml/src/ggml-backend-dl.cpp +48 -0
- package/cpp/ggml/src/ggml-backend-dl.h +45 -0
- package/cpp/ggml/src/ggml-backend-impl.h +255 -0
- package/cpp/ggml/src/ggml-backend-reg.cpp +566 -0
- package/cpp/ggml/src/ggml-backend.cpp +2270 -0
- package/cpp/ggml/src/ggml-blas/CMakeLists.txt +101 -0
- package/cpp/ggml/src/ggml-blas/ggml-blas.cpp +518 -0
- package/cpp/ggml/src/ggml-common.h +1878 -0
- package/cpp/ggml/src/ggml-cpu/CMakeLists.txt +691 -0
- package/cpp/ggml/src/ggml-cpu/amx/amx.cpp +247 -0
- package/cpp/ggml/src/ggml-cpu/amx/amx.h +8 -0
- package/cpp/ggml/src/ggml-cpu/amx/common.h +91 -0
- package/cpp/ggml/src/ggml-cpu/amx/mmq.cpp +2512 -0
- package/cpp/ggml/src/ggml-cpu/amx/mmq.h +10 -0
- package/cpp/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +98 -0
- package/cpp/ggml/src/ggml-cpu/arch/arm/quants.c +4052 -0
- package/cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +4935 -0
- package/cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +2159 -0
- package/cpp/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
- package/cpp/ggml/src/ggml-cpu/arch/powerpc/quants.c +2305 -0
- package/cpp/ggml/src/ggml-cpu/arch/riscv/cpu-feats.cpp +38 -0
- package/cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +2726 -0
- package/cpp/ggml/src/ggml-cpu/arch/riscv/repack.cpp +342 -0
- package/cpp/ggml/src/ggml-cpu/arch/s390/cpu-feats.cpp +50 -0
- package/cpp/ggml/src/ggml-cpu/arch/s390/quants.c +1468 -0
- package/cpp/ggml/src/ggml-cpu/arch/wasm/quants.c +1221 -0
- package/cpp/ggml/src/ggml-cpu/arch/x86/cpu-feats.cpp +327 -0
- package/cpp/ggml/src/ggml-cpu/arch/x86/quants.c +3820 -0
- package/cpp/ggml/src/ggml-cpu/arch/x86/repack.cpp +6307 -0
- package/cpp/ggml/src/ggml-cpu/arch-fallback.h +313 -0
- package/cpp/ggml/src/ggml-cpu/binary-ops.cpp +154 -0
- package/cpp/ggml/src/ggml-cpu/binary-ops.h +16 -0
- package/cpp/ggml/src/ggml-cpu/cmake/FindSIMD.cmake +100 -0
- package/cpp/ggml/src/ggml-cpu/common.h +95 -0
- package/cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +529 -0
- package/cpp/ggml/src/ggml-cpu/ggml-cpu.c +3734 -0
- package/cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +701 -0
- package/cpp/ggml/src/ggml-cpu/hbm.cpp +55 -0
- package/cpp/ggml/src/ggml-cpu/hbm.h +8 -0
- package/cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +938 -0
- package/cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +90 -0
- package/cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +798 -0
- package/cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.h +17 -0
- package/cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +4033 -0
- package/cpp/ggml/src/ggml-cpu/llamafile/sgemm.h +25 -0
- package/cpp/ggml/src/ggml-cpu/ops.cpp +10978 -0
- package/cpp/ggml/src/ggml-cpu/ops.h +116 -0
- package/cpp/ggml/src/ggml-cpu/quants.c +1193 -0
- package/cpp/ggml/src/ggml-cpu/quants.h +97 -0
- package/cpp/ggml/src/ggml-cpu/repack.cpp +3316 -0
- package/cpp/ggml/src/ggml-cpu/repack.h +173 -0
- package/cpp/ggml/src/ggml-cpu/simd-gemm.h +136 -0
- package/cpp/ggml/src/ggml-cpu/simd-mappings.h +1279 -0
- package/cpp/ggml/src/ggml-cpu/spacemit/ime.cpp +1025 -0
- package/cpp/ggml/src/ggml-cpu/spacemit/ime.h +13 -0
- package/cpp/ggml/src/ggml-cpu/spacemit/ime1_kernels.cpp +3196 -0
- package/cpp/ggml/src/ggml-cpu/spacemit/ime_kernels.h +26 -0
- package/cpp/ggml/src/ggml-cpu/traits.cpp +36 -0
- package/cpp/ggml/src/ggml-cpu/traits.h +38 -0
- package/cpp/ggml/src/ggml-cpu/unary-ops.cpp +337 -0
- package/cpp/ggml/src/ggml-cpu/unary-ops.h +35 -0
- package/cpp/ggml/src/ggml-cpu/vec.cpp +629 -0
- package/cpp/ggml/src/ggml-cpu/vec.h +1585 -0
- package/cpp/ggml/src/ggml-hexagon/CMakeLists.txt +117 -0
- package/cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp +3232 -0
- package/cpp/ggml/src/ggml-hexagon/htp/CMakeLists.txt +45 -0
- package/cpp/ggml/src/ggml-hexagon/htp/act-ops.c +815 -0
- package/cpp/ggml/src/ggml-hexagon/htp/argsort-ops.c +281 -0
- package/cpp/ggml/src/ggml-hexagon/htp/binary-ops.c +827 -0
- package/cpp/ggml/src/ggml-hexagon/htp/cmake-toolchain.cmake +157 -0
- package/cpp/ggml/src/ggml-hexagon/htp/cpy-ops.c +251 -0
- package/cpp/ggml/src/ggml-hexagon/htp/flash-attn-ops.c +666 -0
- package/cpp/ggml/src/ggml-hexagon/htp/get-rows-ops.c +111 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hex-dma.c +63 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hex-dma.h +182 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hex-dump.h +77 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hex-fastdiv.h +37 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hex-utils.h +51 -0
- package/cpp/ggml/src/ggml-hexagon/htp/htp-ctx.h +35 -0
- package/cpp/ggml/src/ggml-hexagon/htp/htp-msg.h +154 -0
- package/cpp/ggml/src/ggml-hexagon/htp/htp-ops.h +65 -0
- package/cpp/ggml/src/ggml-hexagon/htp/htp_iface.idl +16 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-arith.h +470 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-base.h +173 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-copy.h +245 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-div.h +116 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-dump.h +129 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-exp.h +215 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-floor.h +100 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-inverse.h +176 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-reduce.h +266 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-scale.h +133 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-sigmoid.h +141 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-sqrt.h +126 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-types.h +36 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-utils.h +18 -0
- package/cpp/ggml/src/ggml-hexagon/htp/main.c +1150 -0
- package/cpp/ggml/src/ggml-hexagon/htp/matmul-ops.c +2595 -0
- package/cpp/ggml/src/ggml-hexagon/htp/rope-ops.c +498 -0
- package/cpp/ggml/src/ggml-hexagon/htp/set-rows-ops.c +167 -0
- package/cpp/ggml/src/ggml-hexagon/htp/softmax-ops.c +421 -0
- package/cpp/ggml/src/ggml-hexagon/htp/sum-rows-ops.c +130 -0
- package/cpp/ggml/src/ggml-hexagon/htp/unary-ops.c +384 -0
- package/cpp/ggml/src/ggml-hexagon/htp/worker-pool.c +293 -0
- package/cpp/ggml/src/ggml-hexagon/htp/worker-pool.h +57 -0
- package/cpp/ggml/src/ggml-hexagon/htp-drv.cpp +418 -0
- package/cpp/ggml/src/ggml-hexagon/htp-drv.h +121 -0
- package/cpp/ggml/src/ggml-hexagon/libdl.h +79 -0
- package/cpp/ggml/src/ggml-hexagon/libggml-htp.inf +38 -0
- package/cpp/ggml/src/ggml-hexagon/op-desc.h +153 -0
- package/cpp/ggml/src/ggml-impl.h +724 -0
- package/cpp/ggml/src/ggml-metal/CMakeLists.txt +124 -0
- package/cpp/ggml/src/ggml-metal/ggml-metal-common.cpp +457 -0
- package/cpp/ggml/src/ggml-metal/ggml-metal-common.h +52 -0
- package/cpp/ggml/src/ggml-metal/ggml-metal-context.h +41 -0
- package/cpp/ggml/src/ggml-metal/ggml-metal-context.m +702 -0
- package/cpp/ggml/src/ggml-metal/ggml-metal-device.cpp +1890 -0
- package/cpp/ggml/src/ggml-metal/ggml-metal-device.h +290 -0
- package/cpp/ggml/src/ggml-metal/ggml-metal-device.m +1749 -0
- package/cpp/ggml/src/ggml-metal/ggml-metal-impl.h +1054 -0
- package/cpp/ggml/src/ggml-metal/ggml-metal-ops.cpp +4370 -0
- package/cpp/ggml/src/ggml-metal/ggml-metal-ops.h +94 -0
- package/cpp/ggml/src/ggml-metal/ggml-metal.cpp +937 -0
- package/cpp/ggml/src/ggml-metal/ggml-metal.metal +9819 -0
- package/cpp/ggml/src/ggml-musa/CMakeLists.txt +125 -0
- package/cpp/ggml/src/ggml-musa/mudnn.cu +112 -0
- package/cpp/ggml/src/ggml-musa/mudnn.cuh +12 -0
- package/cpp/ggml/src/ggml-opencl/CMakeLists.txt +150 -0
- package/cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +11553 -0
- package/cpp/ggml/src/ggml-opencl/kernels/add.cl +190 -0
- package/cpp/ggml/src/ggml-opencl/kernels/add_id.cl +42 -0
- package/cpp/ggml/src/ggml-opencl/kernels/argsort.cl +86 -0
- package/cpp/ggml/src/ggml-opencl/kernels/clamp.cl +20 -0
- package/cpp/ggml/src/ggml-opencl/kernels/concat.cl +51 -0
- package/cpp/ggml/src/ggml-opencl/kernels/conv2d.cl +185 -0
- package/cpp/ggml/src/ggml-opencl/kernels/conv2d_f16_f32.cl +176 -0
- package/cpp/ggml/src/ggml-opencl/kernels/cpy.cl +184 -0
- package/cpp/ggml/src/ggml-opencl/kernels/cvt.cl +417 -0
- package/cpp/ggml/src/ggml-opencl/kernels/diag_mask_inf.cl +58 -0
- package/cpp/ggml/src/ggml-opencl/kernels/div.cl +138 -0
- package/cpp/ggml/src/ggml-opencl/kernels/embed_kernel.py +26 -0
- package/cpp/ggml/src/ggml-opencl/kernels/expm1.cl +113 -0
- package/cpp/ggml/src/ggml-opencl/kernels/fill.cl +17 -0
- package/cpp/ggml/src/ggml-opencl/kernels/flash_attn_f16.cl +370 -0
- package/cpp/ggml/src/ggml-opencl/kernels/flash_attn_f32.cl +371 -0
- package/cpp/ggml/src/ggml-opencl/kernels/flash_attn_f32_f16.cl +373 -0
- package/cpp/ggml/src/ggml-opencl/kernels/gelu.cl +89 -0
- package/cpp/ggml/src/ggml-opencl/kernels/gemm_moe_mxfp4_f32.cl +162 -0
- package/cpp/ggml/src/ggml-opencl/kernels/gemv_moe_mxfp4_f32.cl +156 -0
- package/cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle.cl +268 -0
- package/cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general.cl +274 -0
- package/cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general_q8_0_f32.cl +195 -0
- package/cpp/ggml/src/ggml-opencl/kernels/get_rows.cl +187 -0
- package/cpp/ggml/src/ggml-opencl/kernels/glu.cl +378 -0
- package/cpp/ggml/src/ggml-opencl/kernels/group_norm.cl +121 -0
- package/cpp/ggml/src/ggml-opencl/kernels/im2col_f16.cl +57 -0
- package/cpp/ggml/src/ggml-opencl/kernels/im2col_f32.cl +57 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mean.cl +140 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul.cl +152 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mat_Ab_Bi_8x4.cl +139 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mat_f16_f32.cl +130 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_kq_kqv.cl +273 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_l4_lm.cl +146 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_f32_f32_l4_lm.cl +147 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q4_0_f32_l4_lm.cl +163 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q4_1_f32_l4_lm.cl +165 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q6_k_f32_l4_lm.cl +158 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_8x4.cl +129 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_l4_lm.cl +154 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f16.cl +118 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32.cl +118 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_1row.cl +94 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_l4.cl +84 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f32_f32.cl +118 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32.cl +189 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32_flat.cl +176 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q4_0_f32_8x_flat.cl +283 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32.cl +140 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32_flat.cl +222 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32.cl +144 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32_flat.cl +167 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32.cl +192 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_16x_flat.cl +307 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_8x_flat.cl +265 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_8x_flat.cl +272 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_v.cl +254 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32.cl +219 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32_flat.cl +229 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_k_f32.cl +180 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q6_k_f32.cl +194 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q6_k_f32_flat.cl +194 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32.cl +125 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32_flat.cl +202 -0
- package/cpp/ggml/src/ggml-opencl/kernels/norm.cl +161 -0
- package/cpp/ggml/src/ggml-opencl/kernels/pad.cl +39 -0
- package/cpp/ggml/src/ggml-opencl/kernels/relu.cl +16 -0
- package/cpp/ggml/src/ggml-opencl/kernels/repeat.cl +38 -0
- package/cpp/ggml/src/ggml-opencl/kernels/rms_norm.cl +190 -0
- package/cpp/ggml/src/ggml-opencl/kernels/rope.cl +747 -0
- package/cpp/ggml/src/ggml-opencl/kernels/scale.cl +27 -0
- package/cpp/ggml/src/ggml-opencl/kernels/set_rows.cl +208 -0
- package/cpp/ggml/src/ggml-opencl/kernels/sigmoid.cl +29 -0
- package/cpp/ggml/src/ggml-opencl/kernels/silu.cl +30 -0
- package/cpp/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +108 -0
- package/cpp/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +108 -0
- package/cpp/ggml/src/ggml-opencl/kernels/softmax_f16.cl +107 -0
- package/cpp/ggml/src/ggml-opencl/kernels/softmax_f32.cl +107 -0
- package/cpp/ggml/src/ggml-opencl/kernels/softplus.cl +116 -0
- package/cpp/ggml/src/ggml-opencl/kernels/solve_tri.cl +51 -0
- package/cpp/ggml/src/ggml-opencl/kernels/sqr.cl +53 -0
- package/cpp/ggml/src/ggml-opencl/kernels/sqrt.cl +53 -0
- package/cpp/ggml/src/ggml-opencl/kernels/ssm_conv.cl +77 -0
- package/cpp/ggml/src/ggml-opencl/kernels/sub.cl +138 -0
- package/cpp/ggml/src/ggml-opencl/kernels/sum_rows.cl +140 -0
- package/cpp/ggml/src/ggml-opencl/kernels/tanh.cl +109 -0
- package/cpp/ggml/src/ggml-opencl/kernels/transpose.cl +117 -0
- package/cpp/ggml/src/ggml-opencl/kernels/tri.cl +32 -0
- package/cpp/ggml/src/ggml-opencl/kernels/tsembd.cl +48 -0
- package/cpp/ggml/src/ggml-opencl/kernels/upscale.cl +120 -0
- package/cpp/ggml/src/ggml-opt.cpp +1093 -0
- package/cpp/ggml/src/ggml-quants.c +5325 -0
- package/cpp/ggml/src/ggml-quants.h +106 -0
- package/cpp/ggml/src/ggml-rpc/CMakeLists.txt +9 -0
- package/cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +2118 -0
- package/cpp/ggml/src/ggml-threading.cpp +12 -0
- package/cpp/ggml/src/ggml-threading.h +14 -0
- package/cpp/ggml/src/ggml-virtgpu/CMakeLists.txt +70 -0
- package/cpp/ggml/src/ggml-virtgpu/apir_cs_ggml-rpc-front.cpp +87 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/CMakeLists.txt +21 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/apir_cs_ggml-rpc-back.cpp +115 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/backend-convert.h +13 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-backend.cpp +102 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer-type.cpp +105 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer.cpp +179 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-device.cpp +148 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched.cpp +51 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched.gen.h +73 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched.h +27 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/backend-virgl-apir.h +32 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/backend.cpp +144 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/shared/api_remoting.h +95 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_backend.gen.h +94 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_backend.h +50 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_cs.h +378 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h +232 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_cs_rpc.h +58 -0
- package/cpp/ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp +81 -0
- package/cpp/ggml/src/ggml-virtgpu/ggml-backend-buffer.cpp +119 -0
- package/cpp/ggml/src/ggml-virtgpu/ggml-backend-device.cpp +158 -0
- package/cpp/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp +213 -0
- package/cpp/ggml/src/ggml-virtgpu/ggml-backend.cpp +69 -0
- package/cpp/ggml/src/ggml-virtgpu/ggml-remoting.h +71 -0
- package/cpp/ggml/src/ggml-virtgpu/ggmlremoting_functions.yaml +166 -0
- package/cpp/ggml/src/ggml-virtgpu/include/apir_hw.h +9 -0
- package/cpp/ggml/src/ggml-virtgpu/regenerate_remoting.py +333 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-apir.h +15 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-backend.cpp +58 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp +110 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-buffer.cpp +173 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-device.cpp +192 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-impl.h +36 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward.gen.h +53 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-shm.cpp +98 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-shm.h +23 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-utils.cpp +179 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-utils.h +86 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu.cpp +544 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu.h +117 -0
- package/cpp/ggml/src/ggml-webgpu/CMakeLists.txt +80 -0
- package/cpp/ggml/src/ggml-webgpu/ggml-webgpu-shader-lib.hpp +1231 -0
- package/cpp/ggml/src/ggml-webgpu/ggml-webgpu.cpp +3150 -0
- package/cpp/ggml/src/ggml-webgpu/pre_wgsl.hpp +778 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/argmax.wgsl +72 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/argsort.wgsl +106 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/argsort_merge.wgsl +134 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/binary.wgsl +107 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/common_decls.tmpl +923 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/cpy.tmpl.wgsl +107 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/cumsum.wgsl +66 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +182 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn.wgsl +636 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/get_rows.wgsl +668 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/glu.tmpl.wgsl +323 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/memset.wgsl +40 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.wgsl +713 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_decls.tmpl +103 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_reg_tile.wgsl +138 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_subgroup_matrix.wgsl +188 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec.wgsl +194 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/pad.wgsl +86 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm.wgsl +123 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/rope.tmpl.wgsl +295 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/scale.wgsl +63 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.wgsl +109 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/soft_max.tmpl.wgsl +345 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/sum_rows.wgsl +55 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/unary.wgsl +193 -0
- package/cpp/ggml/src/ggml-zdnn/CMakeLists.txt +36 -0
- package/cpp/ggml/src/ggml-zdnn/common.hpp +59 -0
- package/cpp/ggml/src/ggml-zdnn/ggml-zdnn.cpp +633 -0
- package/cpp/ggml/src/ggml-zdnn/mmf.cpp +80 -0
- package/cpp/ggml/src/ggml-zdnn/mmf.hpp +12 -0
- package/cpp/ggml/src/ggml-zdnn/utils.cpp +79 -0
- package/cpp/ggml/src/ggml-zdnn/utils.hpp +19 -0
- package/cpp/ggml/src/ggml-zendnn/CMakeLists.txt +92 -0
- package/cpp/ggml/src/ggml-zendnn/ggml-zendnn.cpp +469 -0
- package/cpp/ggml/src/ggml.c +7669 -0
- package/cpp/ggml/src/ggml.cpp +26 -0
- package/cpp/ggml/src/gguf.cpp +1699 -0
- package/cpp/include/llama-cpp.h +32 -0
- package/cpp/include/llama.h +1568 -0
- package/cpp/mtmd/CMakeLists.txt +98 -0
- package/cpp/mtmd/README.md +63 -0
- package/cpp/mtmd/clip-graph.h +117 -0
- package/cpp/mtmd/clip-impl.h +586 -0
- package/cpp/mtmd/clip-model.h +390 -0
- package/cpp/mtmd/clip.cpp +4154 -0
- package/cpp/mtmd/clip.h +121 -0
- package/cpp/mtmd/deprecation-warning.cpp +22 -0
- package/cpp/mtmd/legacy-models/convert_image_encoder_to_gguf.py +412 -0
- package/cpp/mtmd/legacy-models/glmedge-convert-image-encoder-to-gguf.py +280 -0
- package/cpp/mtmd/legacy-models/glmedge-surgery.py +33 -0
- package/cpp/mtmd/legacy-models/llava_surgery.py +38 -0
- package/cpp/mtmd/legacy-models/llava_surgery_v2.py +180 -0
- package/cpp/mtmd/legacy-models/minicpmv-convert-image-encoder-to-gguf.py +892 -0
- package/cpp/mtmd/legacy-models/minicpmv-surgery.py +47 -0
- package/cpp/mtmd/models/cogvlm.cpp +98 -0
- package/cpp/mtmd/models/conformer.cpp +216 -0
- package/cpp/mtmd/models/glm4v.cpp +122 -0
- package/cpp/mtmd/models/internvl.cpp +69 -0
- package/cpp/mtmd/models/kimik25.cpp +101 -0
- package/cpp/mtmd/models/kimivl.cpp +63 -0
- package/cpp/mtmd/models/llama4.cpp +96 -0
- package/cpp/mtmd/models/llava.cpp +374 -0
- package/cpp/mtmd/models/minicpmv.cpp +114 -0
- package/cpp/mtmd/models/mobilenetv5.cpp +451 -0
- package/cpp/mtmd/models/models.h +128 -0
- package/cpp/mtmd/models/nemotron-v2-vl.cpp +35 -0
- package/cpp/mtmd/models/paddleocr.cpp +52 -0
- package/cpp/mtmd/models/pixtral.cpp +86 -0
- package/cpp/mtmd/models/qwen2vl.cpp +183 -0
- package/cpp/mtmd/models/qwen3vl.cpp +193 -0
- package/cpp/mtmd/models/siglip.cpp +86 -0
- package/cpp/mtmd/models/whisper-enc.cpp +115 -0
- package/cpp/mtmd/models/youtuvl.cpp +179 -0
- package/cpp/mtmd/mtmd-audio.cpp +730 -0
- package/cpp/mtmd/mtmd-audio.h +113 -0
- package/cpp/mtmd/mtmd-cli.cpp +437 -0
- package/cpp/mtmd/mtmd-helper.cpp +521 -0
- package/cpp/mtmd/mtmd-helper.h +96 -0
- package/cpp/mtmd/mtmd.cpp +1156 -0
- package/cpp/mtmd/mtmd.h +319 -0
- package/cpp/mtmd/requirements.txt +5 -0
- package/cpp/mtmd/test-1.jpeg +0 -0
- package/cpp/mtmd/test-2.mp3 +0 -0
- package/cpp/mtmd/tests.sh +192 -0
- package/cpp/src/CMakeLists.txt +169 -0
- package/cpp/src/llama-adapter.cpp +488 -0
- package/cpp/src/llama-adapter.h +89 -0
- package/cpp/src/llama-arch.cpp +2855 -0
- package/cpp/src/llama-arch.h +619 -0
- package/cpp/src/llama-batch.cpp +917 -0
- package/cpp/src/llama-batch.h +173 -0
- package/cpp/src/llama-chat.cpp +896 -0
- package/cpp/src/llama-chat.h +71 -0
- package/cpp/src/llama-context.cpp +3512 -0
- package/cpp/src/llama-context.h +359 -0
- package/cpp/src/llama-cparams.cpp +5 -0
- package/cpp/src/llama-cparams.h +44 -0
- package/cpp/src/llama-grammar.cpp +1464 -0
- package/cpp/src/llama-grammar.h +194 -0
- package/cpp/src/llama-graph.cpp +2685 -0
- package/cpp/src/llama-graph.h +1026 -0
- package/cpp/src/llama-hparams.cpp +234 -0
- package/cpp/src/llama-hparams.h +339 -0
- package/cpp/src/llama-impl.cpp +171 -0
- package/cpp/src/llama-impl.h +73 -0
- package/cpp/src/llama-io.cpp +15 -0
- package/cpp/src/llama-io.h +35 -0
- package/cpp/src/llama-kv-cache-iswa.cpp +330 -0
- package/cpp/src/llama-kv-cache-iswa.h +137 -0
- package/cpp/src/llama-kv-cache.cpp +2271 -0
- package/cpp/src/llama-kv-cache.h +388 -0
- package/cpp/src/llama-kv-cells.h +533 -0
- package/cpp/src/llama-memory-hybrid-iswa.cpp +275 -0
- package/cpp/src/llama-memory-hybrid-iswa.h +140 -0
- package/cpp/src/llama-memory-hybrid.cpp +268 -0
- package/cpp/src/llama-memory-hybrid.h +139 -0
- package/cpp/src/llama-memory-recurrent.cpp +1165 -0
- package/cpp/src/llama-memory-recurrent.h +182 -0
- package/cpp/src/llama-memory.cpp +59 -0
- package/cpp/src/llama-memory.h +122 -0
- package/cpp/src/llama-mmap.cpp +785 -0
- package/cpp/src/llama-mmap.h +92 -0
- package/cpp/src/llama-model-loader.cpp +1414 -0
- package/cpp/src/llama-model-loader.h +203 -0
- package/cpp/src/llama-model-saver.cpp +286 -0
- package/cpp/src/llama-model-saver.h +37 -0
- package/cpp/src/llama-model.cpp +9253 -0
- package/cpp/src/llama-model.h +576 -0
- package/cpp/src/llama-quant.cpp +1119 -0
- package/cpp/src/llama-quant.h +1 -0
- package/cpp/src/llama-sampler.cpp +3885 -0
- package/cpp/src/llama-sampler.h +42 -0
- package/cpp/src/llama-vocab.cpp +3970 -0
- package/cpp/src/llama-vocab.h +187 -0
- package/cpp/src/llama.cpp +1313 -0
- package/cpp/src/models/afmoe.cpp +191 -0
- package/cpp/src/models/apertus.cpp +125 -0
- package/cpp/src/models/arcee.cpp +135 -0
- package/cpp/src/models/arctic.cpp +138 -0
- package/cpp/src/models/arwkv7.cpp +86 -0
- package/cpp/src/models/baichuan.cpp +122 -0
- package/cpp/src/models/bailingmoe.cpp +144 -0
- package/cpp/src/models/bailingmoe2.cpp +135 -0
- package/cpp/src/models/bert.cpp +178 -0
- package/cpp/src/models/bitnet.cpp +160 -0
- package/cpp/src/models/bloom.cpp +101 -0
- package/cpp/src/models/chameleon.cpp +178 -0
- package/cpp/src/models/chatglm.cpp +132 -0
- package/cpp/src/models/codeshell.cpp +111 -0
- package/cpp/src/models/cogvlm.cpp +102 -0
- package/cpp/src/models/cohere2-iswa.cpp +134 -0
- package/cpp/src/models/command-r.cpp +122 -0
- package/cpp/src/models/dbrx.cpp +123 -0
- package/cpp/src/models/deci.cpp +135 -0
- package/cpp/src/models/deepseek.cpp +144 -0
- package/cpp/src/models/deepseek2.cpp +262 -0
- package/cpp/src/models/delta-net-base.cpp +376 -0
- package/cpp/src/models/dots1.cpp +134 -0
- package/cpp/src/models/dream.cpp +105 -0
- package/cpp/src/models/ernie4-5-moe.cpp +150 -0
- package/cpp/src/models/ernie4-5.cpp +110 -0
- package/cpp/src/models/eurobert.cpp +97 -0
- package/cpp/src/models/exaone-moe.cpp +146 -0
- package/cpp/src/models/exaone.cpp +114 -0
- package/cpp/src/models/exaone4.cpp +123 -0
- package/cpp/src/models/falcon-h1.cpp +111 -0
- package/cpp/src/models/falcon.cpp +120 -0
- package/cpp/src/models/gemma-embedding.cpp +116 -0
- package/cpp/src/models/gemma.cpp +112 -0
- package/cpp/src/models/gemma2-iswa.cpp +128 -0
- package/cpp/src/models/gemma3.cpp +155 -0
- package/cpp/src/models/gemma3n-iswa.cpp +384 -0
- package/cpp/src/models/glm4-moe.cpp +170 -0
- package/cpp/src/models/glm4.cpp +157 -0
- package/cpp/src/models/gpt2.cpp +105 -0
- package/cpp/src/models/gptneox.cpp +144 -0
- package/cpp/src/models/granite-hybrid.cpp +196 -0
- package/cpp/src/models/granite.cpp +211 -0
- package/cpp/src/models/grok.cpp +159 -0
- package/cpp/src/models/grovemoe.cpp +141 -0
- package/cpp/src/models/hunyuan-dense.cpp +132 -0
- package/cpp/src/models/hunyuan-moe.cpp +154 -0
- package/cpp/src/models/internlm2.cpp +120 -0
- package/cpp/src/models/jais.cpp +86 -0
- package/cpp/src/models/jais2.cpp +123 -0
- package/cpp/src/models/jamba.cpp +106 -0
- package/cpp/src/models/kimi-linear.cpp +392 -0
- package/cpp/src/models/lfm2.cpp +190 -0
- package/cpp/src/models/llada-moe.cpp +122 -0
- package/cpp/src/models/llada.cpp +99 -0
- package/cpp/src/models/llama-iswa.cpp +178 -0
- package/cpp/src/models/llama.cpp +168 -0
- package/cpp/src/models/maincoder.cpp +117 -0
- package/cpp/src/models/mamba-base.cpp +285 -0
- package/cpp/src/models/mamba.cpp +54 -0
- package/cpp/src/models/mimo2-iswa.cpp +123 -0
- package/cpp/src/models/minicpm3.cpp +200 -0
- package/cpp/src/models/minimax-m2.cpp +124 -0
- package/cpp/src/models/mistral3.cpp +160 -0
- package/cpp/src/models/models.h +684 -0
- package/cpp/src/models/modern-bert.cpp +109 -0
- package/cpp/src/models/mpt.cpp +126 -0
- package/cpp/src/models/nemotron-h.cpp +148 -0
- package/cpp/src/models/nemotron.cpp +122 -0
- package/cpp/src/models/neo-bert.cpp +104 -0
- package/cpp/src/models/olmo.cpp +121 -0
- package/cpp/src/models/olmo2.cpp +150 -0
- package/cpp/src/models/olmoe.cpp +124 -0
- package/cpp/src/models/openai-moe-iswa.cpp +127 -0
- package/cpp/src/models/openelm.cpp +124 -0
- package/cpp/src/models/orion.cpp +123 -0
- package/cpp/src/models/paddleocr.cpp +122 -0
- package/cpp/src/models/pangu-embedded.cpp +121 -0
- package/cpp/src/models/phi2.cpp +121 -0
- package/cpp/src/models/phi3.cpp +152 -0
- package/cpp/src/models/plamo.cpp +110 -0
- package/cpp/src/models/plamo2.cpp +318 -0
- package/cpp/src/models/plamo3.cpp +128 -0
- package/cpp/src/models/plm.cpp +169 -0
- package/cpp/src/models/qwen.cpp +108 -0
- package/cpp/src/models/qwen2.cpp +126 -0
- package/cpp/src/models/qwen2moe.cpp +151 -0
- package/cpp/src/models/qwen2vl.cpp +117 -0
- package/cpp/src/models/qwen3.cpp +117 -0
- package/cpp/src/models/qwen35.cpp +386 -0
- package/cpp/src/models/qwen35moe.cpp +420 -0
- package/cpp/src/models/qwen3moe.cpp +124 -0
- package/cpp/src/models/qwen3next.cpp +525 -0
- package/cpp/src/models/qwen3vl-moe.cpp +140 -0
- package/cpp/src/models/qwen3vl.cpp +132 -0
- package/cpp/src/models/refact.cpp +94 -0
- package/cpp/src/models/rnd1.cpp +126 -0
- package/cpp/src/models/rwkv6-base.cpp +164 -0
- package/cpp/src/models/rwkv6.cpp +94 -0
- package/cpp/src/models/rwkv6qwen2.cpp +86 -0
- package/cpp/src/models/rwkv7-base.cpp +137 -0
- package/cpp/src/models/rwkv7.cpp +90 -0
- package/cpp/src/models/seed-oss.cpp +124 -0
- package/cpp/src/models/smallthinker.cpp +126 -0
- package/cpp/src/models/smollm3.cpp +128 -0
- package/cpp/src/models/stablelm.cpp +146 -0
- package/cpp/src/models/starcoder.cpp +100 -0
- package/cpp/src/models/starcoder2.cpp +121 -0
- package/cpp/src/models/step35-iswa.cpp +168 -0
- package/cpp/src/models/t5-dec.cpp +166 -0
- package/cpp/src/models/t5-enc.cpp +96 -0
- package/cpp/src/models/wavtokenizer-dec.cpp +149 -0
- package/cpp/src/models/xverse.cpp +108 -0
- package/cpp/src/unicode-data.cpp +7034 -0
- package/cpp/src/unicode-data.h +20 -0
- package/cpp/src/unicode.cpp +1103 -0
- package/cpp/src/unicode.h +111 -0
- package/cpp/vendor/nlohmann/json.hpp +25526 -0
- package/cpp/vendor/nlohmann/json_fwd.hpp +187 -0
- package/cpp/vendor/stb/stb_image.h +7988 -0
- package/ios/LocalLLM-Bridging-Header.h +2 -0
- package/ios/LocalLLM.h +5 -0
- package/ios/LocalLLM.mm +1267 -0
- package/local-llm-rn.podspec +60 -0
- package/package.json +35 -0
- package/src/NativeLocalLLM.ts +73 -0
- package/src/device.ts +50 -0
- package/src/download-adapter.ts +17 -0
- package/src/index.ts +21 -0
- package/src/native-bridge.ts +142 -0
- package/src/rn-downloader.ts +37 -0
|
@@ -0,0 +1,521 @@
|
|
|
1
|
+
// fix problem with std::min and std::max
|
|
2
|
+
#if defined(_WIN32)
|
|
3
|
+
#define WIN32_LEAN_AND_MEAN
|
|
4
|
+
#ifndef NOMINMAX
|
|
5
|
+
# define NOMINMAX
|
|
6
|
+
#endif
|
|
7
|
+
#include <windows.h>
|
|
8
|
+
#endif
|
|
9
|
+
|
|
10
|
+
#include "mtmd.h"
|
|
11
|
+
#include "mtmd-helper.h"
|
|
12
|
+
#include "llama.h"
|
|
13
|
+
|
|
14
|
+
#include <algorithm>
|
|
15
|
+
#include <cinttypes>
|
|
16
|
+
#include <vector>
|
|
17
|
+
|
|
18
|
+
//#define MTMD_AUDIO_DEBUG
|
|
19
|
+
|
|
20
|
+
#define MINIAUDIO_IMPLEMENTATION
|
|
21
|
+
#ifndef MTMD_AUDIO_DEBUG
|
|
22
|
+
# define MA_NO_ENCODING
|
|
23
|
+
#endif
|
|
24
|
+
#define MA_NO_DEVICE_IO
|
|
25
|
+
#define MA_NO_RESOURCE_MANAGER
|
|
26
|
+
#define MA_NO_NODE_GRAPH
|
|
27
|
+
#define MA_NO_ENGINE
|
|
28
|
+
#define MA_NO_GENERATION
|
|
29
|
+
#define MA_API static
|
|
30
|
+
#include "miniaudio/miniaudio.h"
|
|
31
|
+
|
|
32
|
+
#define STB_IMAGE_IMPLEMENTATION
|
|
33
|
+
#include "stb/stb_image.h"
|
|
34
|
+
|
|
35
|
+
#ifdef MTMD_INTERNAL_HEADER
|
|
36
|
+
#error "mtmd-helper is a public library outside of mtmd. it must not include internal headers"
|
|
37
|
+
#endif
|
|
38
|
+
|
|
39
|
+
//
|
|
40
|
+
// internal logging functions
|
|
41
|
+
//
|
|
42
|
+
|
|
43
|
+
struct mtmd_helper_logger {
|
|
44
|
+
ggml_log_callback default_callback = [](ggml_log_level level, const char * text, void * user_data) {
|
|
45
|
+
(void) level;
|
|
46
|
+
(void) user_data;
|
|
47
|
+
fputs(text, stderr);
|
|
48
|
+
fflush(stderr);
|
|
49
|
+
};
|
|
50
|
+
|
|
51
|
+
ggml_log_callback log_callback = default_callback;
|
|
52
|
+
void * log_callback_user_data;
|
|
53
|
+
|
|
54
|
+
void log_v(enum ggml_log_level level, const char * format, va_list args) {
|
|
55
|
+
if (format == NULL) {
|
|
56
|
+
return;
|
|
57
|
+
}
|
|
58
|
+
va_list args_copy;
|
|
59
|
+
va_copy(args_copy, args);
|
|
60
|
+
char buffer[128];
|
|
61
|
+
int len = vsnprintf(buffer, 128, format, args);
|
|
62
|
+
if (len < 128) {
|
|
63
|
+
log_callback(level, buffer, log_callback_user_data);
|
|
64
|
+
} else {
|
|
65
|
+
char * buffer2 = (char *) calloc(len + 1, sizeof(char));
|
|
66
|
+
vsnprintf(buffer2, len + 1, format, args_copy);
|
|
67
|
+
buffer2[len] = 0;
|
|
68
|
+
log_callback(level, buffer2, log_callback_user_data);
|
|
69
|
+
free(buffer2);
|
|
70
|
+
}
|
|
71
|
+
va_end(args_copy);
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
void log(enum ggml_log_level level, const char * format, ...) {
|
|
75
|
+
va_list args;
|
|
76
|
+
va_start(args, format);
|
|
77
|
+
log_v(level, format, args);
|
|
78
|
+
va_end(args);
|
|
79
|
+
}
|
|
80
|
+
} g_logger;
|
|
81
|
+
|
|
82
|
+
#define LOG_INF(...) g_logger.log(GGML_LOG_LEVEL_INFO, __VA_ARGS__)
|
|
83
|
+
#define LOG_WRN(...) g_logger.log(GGML_LOG_LEVEL_WARN, __VA_ARGS__)
|
|
84
|
+
#define LOG_ERR(...) g_logger.log(GGML_LOG_LEVEL_ERROR, __VA_ARGS__)
|
|
85
|
+
|
|
86
|
+
void mtmd_helper_log_set(ggml_log_callback log_callback, void * user_data) {
|
|
87
|
+
if (log_callback == nullptr) {
|
|
88
|
+
log_callback = g_logger.default_callback;
|
|
89
|
+
}
|
|
90
|
+
g_logger.log_callback = log_callback;
|
|
91
|
+
g_logger.log_callback_user_data = user_data;
|
|
92
|
+
mtmd_log_set(log_callback, user_data);
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
//
|
|
96
|
+
// helper functions
|
|
97
|
+
//
|
|
98
|
+
|
|
99
|
+
size_t mtmd_helper_get_n_tokens(const mtmd_input_chunks * chunks) {
|
|
100
|
+
size_t n_tokens = 0;
|
|
101
|
+
for (size_t i = 0; i < mtmd_input_chunks_size(chunks); i++) {
|
|
102
|
+
auto chunk = mtmd_input_chunks_get(chunks, i);
|
|
103
|
+
n_tokens += mtmd_input_chunk_get_n_tokens(chunk);
|
|
104
|
+
}
|
|
105
|
+
return n_tokens;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
llama_pos mtmd_helper_get_n_pos(const mtmd_input_chunks * chunks) {
|
|
109
|
+
llama_pos n_pos = 0;
|
|
110
|
+
for (size_t i = 0; i < mtmd_input_chunks_size(chunks); i++) {
|
|
111
|
+
auto chunk = mtmd_input_chunks_get(chunks, i);
|
|
112
|
+
n_pos += mtmd_input_chunk_get_n_pos(chunk);
|
|
113
|
+
}
|
|
114
|
+
return n_pos;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
// helper struct to make working with embd batch easier
|
|
118
|
+
// note: this will be removed after llama_batch_ext refactoring
|
|
119
|
+
struct decode_embd_batch {
|
|
120
|
+
int n_pos_per_embd;
|
|
121
|
+
int n_mmproj_embd;
|
|
122
|
+
std::vector<llama_pos> pos;
|
|
123
|
+
std::vector<llama_pos> pos_view; // used by mrope
|
|
124
|
+
std::vector<int32_t> n_seq_id;
|
|
125
|
+
std::vector<llama_seq_id> seq_id_0;
|
|
126
|
+
std::vector<llama_seq_id *> seq_ids;
|
|
127
|
+
std::vector<int8_t> logits;
|
|
128
|
+
llama_batch batch;
|
|
129
|
+
decode_embd_batch(float * embd, int32_t n_tokens, int n_pos_per_embd, int n_mmproj_embd) : n_pos_per_embd(n_pos_per_embd), n_mmproj_embd(n_mmproj_embd) {
|
|
130
|
+
pos .resize(n_tokens * n_pos_per_embd);
|
|
131
|
+
n_seq_id.resize(n_tokens);
|
|
132
|
+
seq_ids .resize(n_tokens + 1);
|
|
133
|
+
logits .resize(n_tokens);
|
|
134
|
+
seq_id_0.resize(1);
|
|
135
|
+
seq_ids [n_tokens] = nullptr;
|
|
136
|
+
batch = {
|
|
137
|
+
/*n_tokens =*/ n_tokens,
|
|
138
|
+
/*tokens =*/ nullptr,
|
|
139
|
+
/*embd =*/ embd,
|
|
140
|
+
/*pos =*/ pos.data(),
|
|
141
|
+
/*n_seq_id =*/ n_seq_id.data(),
|
|
142
|
+
/*seq_id =*/ seq_ids.data(),
|
|
143
|
+
/*logits =*/ logits.data(),
|
|
144
|
+
};
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
void set_position_normal(llama_pos pos_0, llama_seq_id seq_id) {
|
|
148
|
+
seq_id_0[0] = seq_id;
|
|
149
|
+
for (int i = 0; i < batch.n_tokens; i++) {
|
|
150
|
+
batch.pos [i] = pos_0 + i;
|
|
151
|
+
batch.n_seq_id[i] = 1;
|
|
152
|
+
batch.seq_id [i] = seq_id_0.data();
|
|
153
|
+
batch.logits [i] = false;
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
// M-RoPE for image
|
|
158
|
+
void set_position_mrope_2d(llama_pos pos_0, int nx, int ny, llama_seq_id seq_id) {
|
|
159
|
+
GGML_ASSERT(n_pos_per_embd == 4);
|
|
160
|
+
seq_id_0[0] = seq_id;
|
|
161
|
+
for (int y = 0; y < ny; y++) {
|
|
162
|
+
for (int x = 0; x < nx; x++) {
|
|
163
|
+
int i = y * nx + x;
|
|
164
|
+
pos[i ] = pos_0;
|
|
165
|
+
pos[i + batch.n_tokens ] = pos_0 + y;
|
|
166
|
+
pos[i + batch.n_tokens * 2] = pos_0 + x;
|
|
167
|
+
pos[i + batch.n_tokens * 3] = 0; // last pos dim is unused
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
for (int i = 0; i < batch.n_tokens; i++) {
|
|
171
|
+
batch.n_seq_id[i] = 1;
|
|
172
|
+
batch.seq_id [i] = seq_id_0.data();
|
|
173
|
+
batch.logits [i] = false;
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
// M-RoPE for audio
|
|
178
|
+
void set_position_mrope_1d(llama_pos pos_0, llama_seq_id seq_id) {
|
|
179
|
+
GGML_ASSERT(n_pos_per_embd == 4);
|
|
180
|
+
seq_id_0[0] = seq_id;
|
|
181
|
+
for (int i = 0; i < batch.n_tokens; i++) {
|
|
182
|
+
pos[i ] = pos_0 + i;
|
|
183
|
+
pos[i + batch.n_tokens ] = pos_0 + i;
|
|
184
|
+
pos[i + batch.n_tokens * 2] = pos_0 + i;
|
|
185
|
+
pos[i + batch.n_tokens * 3] = 0; // last pos dim is unused
|
|
186
|
+
}
|
|
187
|
+
for (int i = 0; i < batch.n_tokens; i++) {
|
|
188
|
+
batch.n_seq_id[i] = 1;
|
|
189
|
+
batch.seq_id [i] = seq_id_0.data();
|
|
190
|
+
batch.logits [i] = false;
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
llama_batch get_view(int offset, int n_tokens) {
|
|
195
|
+
llama_pos * pos_ptr;
|
|
196
|
+
pos_view.clear();
|
|
197
|
+
pos_view.reserve(n_tokens * n_pos_per_embd);
|
|
198
|
+
if (n_pos_per_embd > 1) {
|
|
199
|
+
// mrope
|
|
200
|
+
// for example, with layout of src: 1234...1234...1234...1234...
|
|
201
|
+
// offset 2 will give us dst: 34...34...34...34...
|
|
202
|
+
for (int i = 0; i < n_pos_per_embd; i++) {
|
|
203
|
+
// assume n_tokens is less than or equal to batch.n_tokens
|
|
204
|
+
// batch.n_tokens is number of **total** tokens
|
|
205
|
+
// n_tokens is number of viewed token
|
|
206
|
+
size_t src_idx = i * batch.n_tokens + offset;
|
|
207
|
+
pos_view.insert(pos_view.end(),
|
|
208
|
+
pos.data() + src_idx,
|
|
209
|
+
pos.data() + src_idx + n_tokens);
|
|
210
|
+
}
|
|
211
|
+
pos_ptr = pos_view.data();
|
|
212
|
+
} else {
|
|
213
|
+
// normal
|
|
214
|
+
pos_ptr = pos.data() + offset;
|
|
215
|
+
}
|
|
216
|
+
return {
|
|
217
|
+
/*n_tokens =*/ n_tokens,
|
|
218
|
+
/*tokens =*/ nullptr,
|
|
219
|
+
/*embd =*/ batch.embd + offset * n_mmproj_embd,
|
|
220
|
+
/*pos =*/ pos_ptr,
|
|
221
|
+
/*n_seq_id =*/ batch.n_seq_id + offset,
|
|
222
|
+
/*seq_id =*/ batch.seq_id + offset,
|
|
223
|
+
/*logits =*/ batch.logits + offset,
|
|
224
|
+
};
|
|
225
|
+
}
|
|
226
|
+
};
|
|
227
|
+
|
|
228
|
+
// Helper function for decoding an image whose embeddings have already been calculated
|
|
229
|
+
int32_t mtmd_helper_decode_image_chunk(
|
|
230
|
+
mtmd_context * ctx,
|
|
231
|
+
struct llama_context * lctx,
|
|
232
|
+
const mtmd_input_chunk * chunk,
|
|
233
|
+
float * encoded_embd,
|
|
234
|
+
llama_pos n_past,
|
|
235
|
+
llama_seq_id seq_id,
|
|
236
|
+
int32_t n_batch,
|
|
237
|
+
llama_pos * new_n_past) {
|
|
238
|
+
auto chunk_type = mtmd_input_chunk_get_type(chunk);
|
|
239
|
+
const char * name = chunk_type == MTMD_INPUT_CHUNK_TYPE_IMAGE ? "image" : "audio";
|
|
240
|
+
if (chunk_type == MTMD_INPUT_CHUNK_TYPE_TEXT) {
|
|
241
|
+
LOG_ERR("failed to decode chunk: input chunk not of image/audio type\n");
|
|
242
|
+
return -1;
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
const llama_model * model = llama_get_model(lctx);
|
|
246
|
+
int n_mmproj_embd = llama_model_n_embd_inp(model);
|
|
247
|
+
int n_pos_per_embd = mtmd_decode_use_mrope(ctx) ? 4 : 1;
|
|
248
|
+
|
|
249
|
+
int32_t n_tokens = mtmd_input_chunk_get_n_tokens(chunk);
|
|
250
|
+
int32_t i_batch = 0;
|
|
251
|
+
int32_t n_img_batches = (n_tokens + n_batch - 1) / n_batch;
|
|
252
|
+
decode_embd_batch batch_embd(encoded_embd, n_tokens, n_pos_per_embd, n_mmproj_embd);
|
|
253
|
+
|
|
254
|
+
if (mtmd_decode_use_mrope(ctx)) {
|
|
255
|
+
if (chunk_type == MTMD_INPUT_CHUNK_TYPE_IMAGE) {
|
|
256
|
+
const auto image_tokens = mtmd_input_chunk_get_tokens_image(chunk);
|
|
257
|
+
if (!image_tokens) {
|
|
258
|
+
LOG_ERR("failed to decode chunk: image tokens are null\n");
|
|
259
|
+
return -1;
|
|
260
|
+
}
|
|
261
|
+
const int nx = mtmd_image_tokens_get_nx(image_tokens);
|
|
262
|
+
const int ny = mtmd_image_tokens_get_ny(image_tokens);
|
|
263
|
+
batch_embd.set_position_mrope_2d(n_past, nx, ny, seq_id);
|
|
264
|
+
} else if (chunk_type == MTMD_INPUT_CHUNK_TYPE_AUDIO) {
|
|
265
|
+
batch_embd.set_position_mrope_1d(n_past, seq_id);
|
|
266
|
+
} else {
|
|
267
|
+
GGML_ABORT("invalid chunk type for M-RoPE");
|
|
268
|
+
}
|
|
269
|
+
} else {
|
|
270
|
+
batch_embd.set_position_normal(n_past, seq_id);
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
if (mtmd_decode_use_non_causal(ctx)) {
|
|
274
|
+
llama_set_causal_attn(lctx, false);
|
|
275
|
+
// TODO @ngxson : need to make sure only one image is processed at a time, and n_ubatch must be enough to hold the image
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
while (i_batch < n_img_batches) { // split into batches
|
|
279
|
+
int pos_offset = i_batch*n_batch;
|
|
280
|
+
int n_tokens_batch = std::min(n_batch, n_tokens - pos_offset);
|
|
281
|
+
llama_batch batch_embd_view = batch_embd.get_view(pos_offset, n_tokens_batch);
|
|
282
|
+
|
|
283
|
+
LOG_INF("decoding %s batch %d/%d, n_tokens_batch = %d\n", name, i_batch+1, n_img_batches, n_tokens_batch);
|
|
284
|
+
|
|
285
|
+
int64_t t1 = ggml_time_ms();
|
|
286
|
+
int32_t ret = llama_decode(lctx, batch_embd_view);
|
|
287
|
+
if (ret != 0) {
|
|
288
|
+
LOG_ERR("failed to decode %s\n", name);
|
|
289
|
+
llama_set_causal_attn(lctx, true); // restore causal attn
|
|
290
|
+
return ret;
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
LOG_INF("%s decoded (batch %d/%d) in %" PRId64 " ms\n", name, i_batch+1, n_img_batches, ggml_time_ms() - t1);
|
|
294
|
+
|
|
295
|
+
i_batch++;
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
n_past += mtmd_input_chunk_get_n_pos(chunk);
|
|
299
|
+
*new_n_past = n_past;
|
|
300
|
+
|
|
301
|
+
if (mtmd_decode_use_non_causal(ctx)) {
|
|
302
|
+
llama_set_causal_attn(lctx, true);
|
|
303
|
+
}
|
|
304
|
+
return 0;
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
int32_t mtmd_helper_eval_chunk_single(mtmd_context * ctx,
|
|
308
|
+
struct llama_context * lctx,
|
|
309
|
+
const mtmd_input_chunk * chunk,
|
|
310
|
+
llama_pos n_past,
|
|
311
|
+
llama_seq_id seq_id,
|
|
312
|
+
int32_t n_batch,
|
|
313
|
+
bool logits_last,
|
|
314
|
+
llama_pos * new_n_past) {
|
|
315
|
+
int32_t ret;
|
|
316
|
+
llama_batch text_batch = llama_batch_init(n_batch, 0, 1);
|
|
317
|
+
auto chunk_type = mtmd_input_chunk_get_type(chunk);
|
|
318
|
+
|
|
319
|
+
if (chunk_type == MTMD_INPUT_CHUNK_TYPE_TEXT) {
|
|
320
|
+
size_t n_tokens;
|
|
321
|
+
const auto tokens = mtmd_input_chunk_get_tokens_text(chunk, &n_tokens);
|
|
322
|
+
// LOG_INF("decoding text chunk, n_tokens = %zu\n", n_tokens);
|
|
323
|
+
size_t i = 0;
|
|
324
|
+
while (i < n_tokens) { // split into batches
|
|
325
|
+
text_batch.n_tokens = 0; // clear the batch
|
|
326
|
+
for (; i < n_tokens && text_batch.n_tokens < n_batch; i++) {
|
|
327
|
+
int32_t j = text_batch.n_tokens;
|
|
328
|
+
text_batch.token [j] = tokens[i];
|
|
329
|
+
text_batch.pos [j] = n_past++;
|
|
330
|
+
text_batch.n_seq_id[j] = 1;
|
|
331
|
+
text_batch.seq_id [j][0] = seq_id;
|
|
332
|
+
text_batch.logits [j] = false;
|
|
333
|
+
|
|
334
|
+
text_batch.n_tokens++;
|
|
335
|
+
}
|
|
336
|
+
bool is_last_token = (i == n_tokens);
|
|
337
|
+
if (logits_last && is_last_token) {
|
|
338
|
+
text_batch.logits[text_batch.n_tokens - 1] = true;
|
|
339
|
+
}
|
|
340
|
+
ret = llama_decode(lctx, text_batch);
|
|
341
|
+
if (ret != 0) {
|
|
342
|
+
LOG_ERR("failed to decode text\n");
|
|
343
|
+
llama_batch_free(text_batch);
|
|
344
|
+
return ret;
|
|
345
|
+
}
|
|
346
|
+
*new_n_past += text_batch.n_tokens;
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
} else if (chunk_type == MTMD_INPUT_CHUNK_TYPE_IMAGE || chunk_type == MTMD_INPUT_CHUNK_TYPE_AUDIO) {
|
|
350
|
+
const char * name = chunk_type == MTMD_INPUT_CHUNK_TYPE_IMAGE ? "image" : "audio";
|
|
351
|
+
int64_t t0 = ggml_time_ms();
|
|
352
|
+
|
|
353
|
+
LOG_INF("encoding %s slice...\n", name);
|
|
354
|
+
|
|
355
|
+
ret = mtmd_encode_chunk(ctx, chunk);
|
|
356
|
+
if (ret != 0) {
|
|
357
|
+
LOG_ERR("failed to encode %s slice\n", name);
|
|
358
|
+
llama_batch_free(text_batch);
|
|
359
|
+
return ret;
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
LOG_INF("%s slice encoded in %" PRId64 " ms\n", name, ggml_time_ms() - t0);
|
|
363
|
+
|
|
364
|
+
float * embd = mtmd_get_output_embd(ctx);
|
|
365
|
+
ret = mtmd_helper_decode_image_chunk(ctx, lctx, chunk, embd, n_past, seq_id, n_batch, new_n_past);
|
|
366
|
+
if (ret != 0) {
|
|
367
|
+
LOG_ERR("failed to decode %s\n", name);
|
|
368
|
+
llama_batch_free(text_batch);
|
|
369
|
+
return ret;
|
|
370
|
+
}
|
|
371
|
+
} else {
|
|
372
|
+
GGML_ABORT("chunk type not supported");
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
llama_batch_free(text_batch);
|
|
376
|
+
return 0;
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
int32_t mtmd_helper_eval_chunks(mtmd_context * ctx,
|
|
380
|
+
struct llama_context * lctx,
|
|
381
|
+
const mtmd_input_chunks * chunks,
|
|
382
|
+
llama_pos n_past,
|
|
383
|
+
llama_seq_id seq_id,
|
|
384
|
+
int32_t n_batch,
|
|
385
|
+
bool logits_last,
|
|
386
|
+
llama_pos * new_n_past) {
|
|
387
|
+
size_t n_chunks = mtmd_input_chunks_size(chunks);
|
|
388
|
+
if (n_chunks == 0) {
|
|
389
|
+
LOG_WRN("no chunks to eval\n");
|
|
390
|
+
return 0;
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
for (size_t i = 0; i < n_chunks; i++) {
|
|
394
|
+
bool chunk_logits_last = (i == n_chunks - 1) && logits_last;
|
|
395
|
+
auto chunk = mtmd_input_chunks_get(chunks, i);
|
|
396
|
+
|
|
397
|
+
int32_t res = mtmd_helper_eval_chunk_single(ctx, lctx, chunk, n_past, seq_id, n_batch, chunk_logits_last, &n_past);
|
|
398
|
+
if (res != 0) {
|
|
399
|
+
LOG_ERR("failed to eval chunk %zu\n", i);
|
|
400
|
+
return res;
|
|
401
|
+
}
|
|
402
|
+
*new_n_past = n_past;
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
return 0;
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
namespace audio_helpers {
|
|
409
|
+
|
|
410
|
+
static bool is_audio_file(const char * buf, size_t len) {
|
|
411
|
+
if (len < 12) {
|
|
412
|
+
return false;
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
// RIFF ref: https://en.wikipedia.org/wiki/Resource_Interchange_File_Format
|
|
416
|
+
// WAV ref: https://www.mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/WAVE.html
|
|
417
|
+
bool is_wav = memcmp(buf, "RIFF", 4) == 0 && memcmp(buf + 8, "WAVE", 4) == 0;
|
|
418
|
+
bool is_mp3 = len >= 3 && (
|
|
419
|
+
memcmp(buf, "ID3", 3) == 0 ||
|
|
420
|
+
// Check for MPEG sync word (simplified check)
|
|
421
|
+
((unsigned char)buf[0] == 0xFF && ((unsigned char)buf[1] & 0xE0) == 0xE0)
|
|
422
|
+
);
|
|
423
|
+
bool is_flac = memcmp(buf, "fLaC", 4) == 0;
|
|
424
|
+
|
|
425
|
+
return is_wav || is_mp3 || is_flac;
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
// returns true if the buffer is a valid audio file
|
|
429
|
+
static bool decode_audio_from_buf(const unsigned char * buf_in, size_t len, int target_sampler_rate, std::vector<float> & pcmf32_mono) {
|
|
430
|
+
ma_result result;
|
|
431
|
+
const int channels = 1;
|
|
432
|
+
ma_decoder_config decoder_config = ma_decoder_config_init(ma_format_f32, channels, target_sampler_rate);
|
|
433
|
+
ma_decoder decoder;
|
|
434
|
+
|
|
435
|
+
result = ma_decoder_init_memory(buf_in, len, &decoder_config, &decoder);
|
|
436
|
+
if (result != MA_SUCCESS) {
|
|
437
|
+
return false;
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
ma_uint64 frame_count;
|
|
441
|
+
ma_uint64 frames_read;
|
|
442
|
+
result = ma_decoder_get_length_in_pcm_frames(&decoder, &frame_count);
|
|
443
|
+
if (result != MA_SUCCESS) {
|
|
444
|
+
ma_decoder_uninit(&decoder);
|
|
445
|
+
return false;
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
pcmf32_mono.resize(frame_count);
|
|
449
|
+
result = ma_decoder_read_pcm_frames(&decoder, pcmf32_mono.data(), frame_count, &frames_read);
|
|
450
|
+
if (result != MA_SUCCESS) {
|
|
451
|
+
ma_decoder_uninit(&decoder);
|
|
452
|
+
return false;
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
#ifdef MTMD_AUDIO_DEBUG
|
|
456
|
+
// save audio to wav file
|
|
457
|
+
ma_encoder_config config = ma_encoder_config_init(ma_encoding_format_wav, ma_format_f32, 1, target_sampler_rate);
|
|
458
|
+
ma_encoder encoder;
|
|
459
|
+
ma_encoder_init_file("output.wav", &config, &encoder);
|
|
460
|
+
ma_encoder_write_pcm_frames(&encoder, pcmf32_mono.data(), pcmf32_mono.size(), &frames_read);
|
|
461
|
+
ma_encoder_uninit(&encoder);
|
|
462
|
+
#endif
|
|
463
|
+
|
|
464
|
+
ma_decoder_uninit(&decoder);
|
|
465
|
+
return true;
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
} // namespace audio_helpers
|
|
469
|
+
|
|
470
|
+
mtmd_bitmap * mtmd_helper_bitmap_init_from_buf(mtmd_context * ctx, const unsigned char * buf, size_t len) {
|
|
471
|
+
if (audio_helpers::is_audio_file((const char *)buf, len)) {
|
|
472
|
+
std::vector<float> pcmf32;
|
|
473
|
+
int bitrate = mtmd_get_audio_bitrate(ctx);
|
|
474
|
+
if (bitrate < 0) {
|
|
475
|
+
LOG_ERR("This model does not support audio input\n");
|
|
476
|
+
return nullptr;
|
|
477
|
+
}
|
|
478
|
+
if (!audio_helpers::decode_audio_from_buf(buf, len, bitrate, pcmf32)) {
|
|
479
|
+
LOG_ERR("Unable to read WAV audio file from buffer\n");
|
|
480
|
+
return nullptr;
|
|
481
|
+
}
|
|
482
|
+
return mtmd_bitmap_init_from_audio(pcmf32.size(), pcmf32.data());
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
// otherwise, we assume it's an image
|
|
486
|
+
mtmd_bitmap * result = nullptr;
|
|
487
|
+
{
|
|
488
|
+
int nx, ny, nc;
|
|
489
|
+
auto * data = stbi_load_from_memory(buf, len, &nx, &ny, &nc, 3);
|
|
490
|
+
if (!data) {
|
|
491
|
+
LOG_ERR("%s: failed to decode image bytes\n", __func__);
|
|
492
|
+
return nullptr;
|
|
493
|
+
}
|
|
494
|
+
result = mtmd_bitmap_init(nx, ny, data);
|
|
495
|
+
stbi_image_free(data);
|
|
496
|
+
}
|
|
497
|
+
return result;
|
|
498
|
+
}
|
|
499
|
+
|
|
500
|
+
mtmd_bitmap * mtmd_helper_bitmap_init_from_file(mtmd_context * ctx, const char * fname) {
|
|
501
|
+
std::vector<unsigned char> buf;
|
|
502
|
+
FILE * f = fopen(fname, "rb");
|
|
503
|
+
if (!f) {
|
|
504
|
+
LOG_ERR("Unable to open file %s: %s\n", fname, strerror(errno));
|
|
505
|
+
return nullptr;
|
|
506
|
+
}
|
|
507
|
+
|
|
508
|
+
fseek(f, 0, SEEK_END);
|
|
509
|
+
long file_size = ftell(f);
|
|
510
|
+
fseek(f, 0, SEEK_SET);
|
|
511
|
+
buf.resize(file_size);
|
|
512
|
+
|
|
513
|
+
size_t n_read = fread(buf.data(), 1, file_size, f);
|
|
514
|
+
fclose(f);
|
|
515
|
+
if (n_read != (size_t)file_size) {
|
|
516
|
+
LOG_ERR("Failed to read entire file %s", fname);
|
|
517
|
+
return nullptr;
|
|
518
|
+
}
|
|
519
|
+
|
|
520
|
+
return mtmd_helper_bitmap_init_from_buf(ctx, buf.data(), buf.size());
|
|
521
|
+
}
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
#ifndef MTMD_HELPER_H
|
|
2
|
+
#define MTMD_HELPER_H
|
|
3
|
+
|
|
4
|
+
#include "ggml.h"
|
|
5
|
+
#include "llama.h"
|
|
6
|
+
#include "mtmd.h"
|
|
7
|
+
|
|
8
|
+
#include <stddef.h>
|
|
9
|
+
#include <stdint.h>
|
|
10
|
+
#include <stdbool.h>
|
|
11
|
+
|
|
12
|
+
#ifdef __cplusplus
|
|
13
|
+
extern "C" {
|
|
14
|
+
#endif
|
|
15
|
+
|
|
16
|
+
//
|
|
17
|
+
// libmtmd helper functions
|
|
18
|
+
//
|
|
19
|
+
// Please note that these helpers are not guaranteed to be stable.
|
|
20
|
+
// BREAKING CHANGES are expected.
|
|
21
|
+
//
|
|
22
|
+
|
|
23
|
+
// Set callback for all future logging events.
|
|
24
|
+
// If this is not called, or NULL is supplied, everything is output on stderr.
|
|
25
|
+
// Note: this also call mtmd_log_set() internally
|
|
26
|
+
MTMD_API void mtmd_helper_log_set(ggml_log_callback log_callback, void * user_data);
|
|
27
|
+
|
|
28
|
+
// helper function to construct a mtmd_bitmap from a file
|
|
29
|
+
// it calls mtmd_helper_bitmap_init_from_buf() internally
|
|
30
|
+
// returns nullptr on failure
|
|
31
|
+
// this function is thread-safe
|
|
32
|
+
MTMD_API mtmd_bitmap * mtmd_helper_bitmap_init_from_file(mtmd_context * ctx, const char * fname);
|
|
33
|
+
|
|
34
|
+
// helper function to construct a mtmd_bitmap from a buffer containing a file
|
|
35
|
+
// supported formats:
|
|
36
|
+
// image: formats supported by stb_image: jpg, png, bmp, gif, etc.
|
|
37
|
+
// audio: formats supported by miniaudio: wav, mp3, flac
|
|
38
|
+
// note: audio files will be auto-detected based on magic bytes
|
|
39
|
+
// returns nullptr on failure
|
|
40
|
+
// this function is thread-safe
|
|
41
|
+
MTMD_API mtmd_bitmap * mtmd_helper_bitmap_init_from_buf(mtmd_context * ctx, const unsigned char * buf, size_t len);
|
|
42
|
+
|
|
43
|
+
// helper to count the total number of tokens from a list of chunks, useful to keep track of KV cache
|
|
44
|
+
MTMD_API size_t mtmd_helper_get_n_tokens(const mtmd_input_chunks * chunks);
|
|
45
|
+
|
|
46
|
+
// helper to count the total position of tokens from a list of chunks, useful to keep track of n_past
|
|
47
|
+
// normally, n_pos is equal to n_tokens, but for M-RoPE it is different
|
|
48
|
+
MTMD_API llama_pos mtmd_helper_get_n_pos(const mtmd_input_chunks * chunks);
|
|
49
|
+
|
|
50
|
+
// helper function that automatically:
|
|
51
|
+
// 1. run llama_decode() on text chunks
|
|
52
|
+
// 2. run mtmd_encode() on image chunks, then mtmd_get_output_embd() and then llama_decode()
|
|
53
|
+
// if any of the mtmd_encode() or llama_decode() calls return non-zero, stop and forward the error
|
|
54
|
+
// otherwise, returns 0 on success
|
|
55
|
+
// this function is NOT thread-safe
|
|
56
|
+
MTMD_API int32_t mtmd_helper_eval_chunks(mtmd_context * ctx,
|
|
57
|
+
struct llama_context * lctx,
|
|
58
|
+
const mtmd_input_chunks * chunks,
|
|
59
|
+
llama_pos n_past,
|
|
60
|
+
llama_seq_id seq_id,
|
|
61
|
+
int32_t n_batch,
|
|
62
|
+
bool logits_last,
|
|
63
|
+
llama_pos * new_n_past);
|
|
64
|
+
|
|
65
|
+
// works like mtmd_helper_eval_chunks(), but only for a single chunk
|
|
66
|
+
// this function is NOT thread-safe
|
|
67
|
+
MTMD_API int32_t mtmd_helper_eval_chunk_single(mtmd_context * ctx,
|
|
68
|
+
struct llama_context * lctx,
|
|
69
|
+
const mtmd_input_chunk * chunk,
|
|
70
|
+
llama_pos n_past,
|
|
71
|
+
llama_seq_id seq_id,
|
|
72
|
+
int32_t n_batch,
|
|
73
|
+
bool logits_last,
|
|
74
|
+
llama_pos * new_n_past);
|
|
75
|
+
|
|
76
|
+
// helper function to decode an image whose embeddings have already been calculated
|
|
77
|
+
// this helper will handle batching and pre/post decoding setup (for ex. gemma 3 requires non-causal attention)
|
|
78
|
+
// ret 0 on success, -1 on chunk not being a valid image chunk, 1 on decode failure
|
|
79
|
+
MTMD_API int32_t mtmd_helper_decode_image_chunk(mtmd_context * ctx,
|
|
80
|
+
struct llama_context * lctx,
|
|
81
|
+
const mtmd_input_chunk * chunk,
|
|
82
|
+
float * encoded_embd,
|
|
83
|
+
llama_pos n_past,
|
|
84
|
+
llama_seq_id seq_id,
|
|
85
|
+
int32_t n_batch,
|
|
86
|
+
llama_pos * new_n_past);
|
|
87
|
+
|
|
88
|
+
#ifdef __cplusplus
|
|
89
|
+
} // extern "C"
|
|
90
|
+
#endif
|
|
91
|
+
|
|
92
|
+
//
|
|
93
|
+
// C++ wrappers
|
|
94
|
+
//
|
|
95
|
+
|
|
96
|
+
#endif
|