local-llm-rn 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cpp/CMakeLists.txt +285 -0
- package/cpp/common/CMakeLists.txt +149 -0
- package/cpp/common/arg.cpp +3799 -0
- package/cpp/common/arg.h +131 -0
- package/cpp/common/base64.hpp +392 -0
- package/cpp/common/build-info.cpp.in +4 -0
- package/cpp/common/chat-parser-xml-toolcall.cpp +879 -0
- package/cpp/common/chat-parser-xml-toolcall.h +45 -0
- package/cpp/common/chat-parser.cpp +1649 -0
- package/cpp/common/chat-parser.h +133 -0
- package/cpp/common/chat-peg-parser.cpp +124 -0
- package/cpp/common/chat-peg-parser.h +105 -0
- package/cpp/common/chat.cpp +3355 -0
- package/cpp/common/chat.h +252 -0
- package/cpp/common/common.cpp +1824 -0
- package/cpp/common/common.h +930 -0
- package/cpp/common/console.cpp +1137 -0
- package/cpp/common/console.h +41 -0
- package/cpp/common/debug.cpp +167 -0
- package/cpp/common/debug.h +43 -0
- package/cpp/common/download.cpp +792 -0
- package/cpp/common/download.h +84 -0
- package/cpp/common/http.h +84 -0
- package/cpp/common/jinja/README.md +88 -0
- package/cpp/common/jinja/caps.cpp +285 -0
- package/cpp/common/jinja/caps.h +30 -0
- package/cpp/common/jinja/lexer.cpp +341 -0
- package/cpp/common/jinja/lexer.h +157 -0
- package/cpp/common/jinja/parser.cpp +591 -0
- package/cpp/common/jinja/parser.h +21 -0
- package/cpp/common/jinja/runtime.cpp +867 -0
- package/cpp/common/jinja/runtime.h +638 -0
- package/cpp/common/jinja/string.cpp +213 -0
- package/cpp/common/jinja/string.h +61 -0
- package/cpp/common/jinja/utils.h +149 -0
- package/cpp/common/jinja/value.cpp +1393 -0
- package/cpp/common/jinja/value.h +756 -0
- package/cpp/common/json-partial.cpp +324 -0
- package/cpp/common/json-partial.h +39 -0
- package/cpp/common/json-schema-to-grammar.cpp +1153 -0
- package/cpp/common/json-schema-to-grammar.h +43 -0
- package/cpp/common/llguidance.cpp +258 -0
- package/cpp/common/log.cpp +446 -0
- package/cpp/common/log.h +119 -0
- package/cpp/common/ngram-cache.cpp +285 -0
- package/cpp/common/ngram-cache.h +101 -0
- package/cpp/common/ngram-map.cpp +530 -0
- package/cpp/common/ngram-map.h +115 -0
- package/cpp/common/ngram-mod.cpp +60 -0
- package/cpp/common/ngram-mod.h +38 -0
- package/cpp/common/peg-parser.cpp +1712 -0
- package/cpp/common/peg-parser.h +459 -0
- package/cpp/common/preset.cpp +483 -0
- package/cpp/common/preset.h +83 -0
- package/cpp/common/regex-partial.cpp +204 -0
- package/cpp/common/regex-partial.h +56 -0
- package/cpp/common/sampling.cpp +745 -0
- package/cpp/common/sampling.h +119 -0
- package/cpp/common/speculative.cpp +1074 -0
- package/cpp/common/speculative.h +41 -0
- package/cpp/common/unicode.cpp +64 -0
- package/cpp/common/unicode.h +22 -0
- package/cpp/ggml/CMakeLists.txt +494 -0
- package/cpp/ggml/cmake/GitVars.cmake +22 -0
- package/cpp/ggml/cmake/common.cmake +50 -0
- package/cpp/ggml/cmake/ggml-config.cmake.in +191 -0
- package/cpp/ggml/include/ggml-alloc.h +85 -0
- package/cpp/ggml/include/ggml-backend.h +373 -0
- package/cpp/ggml/include/ggml-blas.h +25 -0
- package/cpp/ggml/include/ggml-cann.h +123 -0
- package/cpp/ggml/include/ggml-cpp.h +39 -0
- package/cpp/ggml/include/ggml-cpu.h +151 -0
- package/cpp/ggml/include/ggml-cuda.h +47 -0
- package/cpp/ggml/include/ggml-hexagon.h +19 -0
- package/cpp/ggml/include/ggml-metal.h +61 -0
- package/cpp/ggml/include/ggml-opencl.h +26 -0
- package/cpp/ggml/include/ggml-opt.h +256 -0
- package/cpp/ggml/include/ggml-rpc.h +30 -0
- package/cpp/ggml/include/ggml-sycl.h +49 -0
- package/cpp/ggml/include/ggml-virtgpu.h +14 -0
- package/cpp/ggml/include/ggml-vulkan.h +29 -0
- package/cpp/ggml/include/ggml-webgpu.h +19 -0
- package/cpp/ggml/include/ggml-zdnn.h +17 -0
- package/cpp/ggml/include/ggml-zendnn.h +22 -0
- package/cpp/ggml/include/ggml.h +2753 -0
- package/cpp/ggml/include/gguf.h +204 -0
- package/cpp/ggml/src/CMakeLists.txt +492 -0
- package/cpp/ggml/src/ggml-alloc.c +1244 -0
- package/cpp/ggml/src/ggml-backend-dl.cpp +48 -0
- package/cpp/ggml/src/ggml-backend-dl.h +45 -0
- package/cpp/ggml/src/ggml-backend-impl.h +255 -0
- package/cpp/ggml/src/ggml-backend-reg.cpp +566 -0
- package/cpp/ggml/src/ggml-backend.cpp +2270 -0
- package/cpp/ggml/src/ggml-blas/CMakeLists.txt +101 -0
- package/cpp/ggml/src/ggml-blas/ggml-blas.cpp +518 -0
- package/cpp/ggml/src/ggml-common.h +1878 -0
- package/cpp/ggml/src/ggml-cpu/CMakeLists.txt +691 -0
- package/cpp/ggml/src/ggml-cpu/amx/amx.cpp +247 -0
- package/cpp/ggml/src/ggml-cpu/amx/amx.h +8 -0
- package/cpp/ggml/src/ggml-cpu/amx/common.h +91 -0
- package/cpp/ggml/src/ggml-cpu/amx/mmq.cpp +2512 -0
- package/cpp/ggml/src/ggml-cpu/amx/mmq.h +10 -0
- package/cpp/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +98 -0
- package/cpp/ggml/src/ggml-cpu/arch/arm/quants.c +4052 -0
- package/cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +4935 -0
- package/cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +2159 -0
- package/cpp/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
- package/cpp/ggml/src/ggml-cpu/arch/powerpc/quants.c +2305 -0
- package/cpp/ggml/src/ggml-cpu/arch/riscv/cpu-feats.cpp +38 -0
- package/cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +2726 -0
- package/cpp/ggml/src/ggml-cpu/arch/riscv/repack.cpp +342 -0
- package/cpp/ggml/src/ggml-cpu/arch/s390/cpu-feats.cpp +50 -0
- package/cpp/ggml/src/ggml-cpu/arch/s390/quants.c +1468 -0
- package/cpp/ggml/src/ggml-cpu/arch/wasm/quants.c +1221 -0
- package/cpp/ggml/src/ggml-cpu/arch/x86/cpu-feats.cpp +327 -0
- package/cpp/ggml/src/ggml-cpu/arch/x86/quants.c +3820 -0
- package/cpp/ggml/src/ggml-cpu/arch/x86/repack.cpp +6307 -0
- package/cpp/ggml/src/ggml-cpu/arch-fallback.h +313 -0
- package/cpp/ggml/src/ggml-cpu/binary-ops.cpp +154 -0
- package/cpp/ggml/src/ggml-cpu/binary-ops.h +16 -0
- package/cpp/ggml/src/ggml-cpu/cmake/FindSIMD.cmake +100 -0
- package/cpp/ggml/src/ggml-cpu/common.h +95 -0
- package/cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +529 -0
- package/cpp/ggml/src/ggml-cpu/ggml-cpu.c +3734 -0
- package/cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +701 -0
- package/cpp/ggml/src/ggml-cpu/hbm.cpp +55 -0
- package/cpp/ggml/src/ggml-cpu/hbm.h +8 -0
- package/cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +938 -0
- package/cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +90 -0
- package/cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +798 -0
- package/cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.h +17 -0
- package/cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +4033 -0
- package/cpp/ggml/src/ggml-cpu/llamafile/sgemm.h +25 -0
- package/cpp/ggml/src/ggml-cpu/ops.cpp +10978 -0
- package/cpp/ggml/src/ggml-cpu/ops.h +116 -0
- package/cpp/ggml/src/ggml-cpu/quants.c +1193 -0
- package/cpp/ggml/src/ggml-cpu/quants.h +97 -0
- package/cpp/ggml/src/ggml-cpu/repack.cpp +3316 -0
- package/cpp/ggml/src/ggml-cpu/repack.h +173 -0
- package/cpp/ggml/src/ggml-cpu/simd-gemm.h +136 -0
- package/cpp/ggml/src/ggml-cpu/simd-mappings.h +1279 -0
- package/cpp/ggml/src/ggml-cpu/spacemit/ime.cpp +1025 -0
- package/cpp/ggml/src/ggml-cpu/spacemit/ime.h +13 -0
- package/cpp/ggml/src/ggml-cpu/spacemit/ime1_kernels.cpp +3196 -0
- package/cpp/ggml/src/ggml-cpu/spacemit/ime_kernels.h +26 -0
- package/cpp/ggml/src/ggml-cpu/traits.cpp +36 -0
- package/cpp/ggml/src/ggml-cpu/traits.h +38 -0
- package/cpp/ggml/src/ggml-cpu/unary-ops.cpp +337 -0
- package/cpp/ggml/src/ggml-cpu/unary-ops.h +35 -0
- package/cpp/ggml/src/ggml-cpu/vec.cpp +629 -0
- package/cpp/ggml/src/ggml-cpu/vec.h +1585 -0
- package/cpp/ggml/src/ggml-hexagon/CMakeLists.txt +117 -0
- package/cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp +3232 -0
- package/cpp/ggml/src/ggml-hexagon/htp/CMakeLists.txt +45 -0
- package/cpp/ggml/src/ggml-hexagon/htp/act-ops.c +815 -0
- package/cpp/ggml/src/ggml-hexagon/htp/argsort-ops.c +281 -0
- package/cpp/ggml/src/ggml-hexagon/htp/binary-ops.c +827 -0
- package/cpp/ggml/src/ggml-hexagon/htp/cmake-toolchain.cmake +157 -0
- package/cpp/ggml/src/ggml-hexagon/htp/cpy-ops.c +251 -0
- package/cpp/ggml/src/ggml-hexagon/htp/flash-attn-ops.c +666 -0
- package/cpp/ggml/src/ggml-hexagon/htp/get-rows-ops.c +111 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hex-dma.c +63 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hex-dma.h +182 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hex-dump.h +77 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hex-fastdiv.h +37 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hex-utils.h +51 -0
- package/cpp/ggml/src/ggml-hexagon/htp/htp-ctx.h +35 -0
- package/cpp/ggml/src/ggml-hexagon/htp/htp-msg.h +154 -0
- package/cpp/ggml/src/ggml-hexagon/htp/htp-ops.h +65 -0
- package/cpp/ggml/src/ggml-hexagon/htp/htp_iface.idl +16 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-arith.h +470 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-base.h +173 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-copy.h +245 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-div.h +116 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-dump.h +129 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-exp.h +215 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-floor.h +100 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-inverse.h +176 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-reduce.h +266 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-scale.h +133 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-sigmoid.h +141 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-sqrt.h +126 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-types.h +36 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-utils.h +18 -0
- package/cpp/ggml/src/ggml-hexagon/htp/main.c +1150 -0
- package/cpp/ggml/src/ggml-hexagon/htp/matmul-ops.c +2595 -0
- package/cpp/ggml/src/ggml-hexagon/htp/rope-ops.c +498 -0
- package/cpp/ggml/src/ggml-hexagon/htp/set-rows-ops.c +167 -0
- package/cpp/ggml/src/ggml-hexagon/htp/softmax-ops.c +421 -0
- package/cpp/ggml/src/ggml-hexagon/htp/sum-rows-ops.c +130 -0
- package/cpp/ggml/src/ggml-hexagon/htp/unary-ops.c +384 -0
- package/cpp/ggml/src/ggml-hexagon/htp/worker-pool.c +293 -0
- package/cpp/ggml/src/ggml-hexagon/htp/worker-pool.h +57 -0
- package/cpp/ggml/src/ggml-hexagon/htp-drv.cpp +418 -0
- package/cpp/ggml/src/ggml-hexagon/htp-drv.h +121 -0
- package/cpp/ggml/src/ggml-hexagon/libdl.h +79 -0
- package/cpp/ggml/src/ggml-hexagon/libggml-htp.inf +38 -0
- package/cpp/ggml/src/ggml-hexagon/op-desc.h +153 -0
- package/cpp/ggml/src/ggml-impl.h +724 -0
- package/cpp/ggml/src/ggml-metal/CMakeLists.txt +124 -0
- package/cpp/ggml/src/ggml-metal/ggml-metal-common.cpp +457 -0
- package/cpp/ggml/src/ggml-metal/ggml-metal-common.h +52 -0
- package/cpp/ggml/src/ggml-metal/ggml-metal-context.h +41 -0
- package/cpp/ggml/src/ggml-metal/ggml-metal-context.m +702 -0
- package/cpp/ggml/src/ggml-metal/ggml-metal-device.cpp +1890 -0
- package/cpp/ggml/src/ggml-metal/ggml-metal-device.h +290 -0
- package/cpp/ggml/src/ggml-metal/ggml-metal-device.m +1749 -0
- package/cpp/ggml/src/ggml-metal/ggml-metal-impl.h +1054 -0
- package/cpp/ggml/src/ggml-metal/ggml-metal-ops.cpp +4370 -0
- package/cpp/ggml/src/ggml-metal/ggml-metal-ops.h +94 -0
- package/cpp/ggml/src/ggml-metal/ggml-metal.cpp +937 -0
- package/cpp/ggml/src/ggml-metal/ggml-metal.metal +9819 -0
- package/cpp/ggml/src/ggml-musa/CMakeLists.txt +125 -0
- package/cpp/ggml/src/ggml-musa/mudnn.cu +112 -0
- package/cpp/ggml/src/ggml-musa/mudnn.cuh +12 -0
- package/cpp/ggml/src/ggml-opencl/CMakeLists.txt +150 -0
- package/cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +11553 -0
- package/cpp/ggml/src/ggml-opencl/kernels/add.cl +190 -0
- package/cpp/ggml/src/ggml-opencl/kernels/add_id.cl +42 -0
- package/cpp/ggml/src/ggml-opencl/kernels/argsort.cl +86 -0
- package/cpp/ggml/src/ggml-opencl/kernels/clamp.cl +20 -0
- package/cpp/ggml/src/ggml-opencl/kernels/concat.cl +51 -0
- package/cpp/ggml/src/ggml-opencl/kernels/conv2d.cl +185 -0
- package/cpp/ggml/src/ggml-opencl/kernels/conv2d_f16_f32.cl +176 -0
- package/cpp/ggml/src/ggml-opencl/kernels/cpy.cl +184 -0
- package/cpp/ggml/src/ggml-opencl/kernels/cvt.cl +417 -0
- package/cpp/ggml/src/ggml-opencl/kernels/diag_mask_inf.cl +58 -0
- package/cpp/ggml/src/ggml-opencl/kernels/div.cl +138 -0
- package/cpp/ggml/src/ggml-opencl/kernels/embed_kernel.py +26 -0
- package/cpp/ggml/src/ggml-opencl/kernels/expm1.cl +113 -0
- package/cpp/ggml/src/ggml-opencl/kernels/fill.cl +17 -0
- package/cpp/ggml/src/ggml-opencl/kernels/flash_attn_f16.cl +370 -0
- package/cpp/ggml/src/ggml-opencl/kernels/flash_attn_f32.cl +371 -0
- package/cpp/ggml/src/ggml-opencl/kernels/flash_attn_f32_f16.cl +373 -0
- package/cpp/ggml/src/ggml-opencl/kernels/gelu.cl +89 -0
- package/cpp/ggml/src/ggml-opencl/kernels/gemm_moe_mxfp4_f32.cl +162 -0
- package/cpp/ggml/src/ggml-opencl/kernels/gemv_moe_mxfp4_f32.cl +156 -0
- package/cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle.cl +268 -0
- package/cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general.cl +274 -0
- package/cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general_q8_0_f32.cl +195 -0
- package/cpp/ggml/src/ggml-opencl/kernels/get_rows.cl +187 -0
- package/cpp/ggml/src/ggml-opencl/kernels/glu.cl +378 -0
- package/cpp/ggml/src/ggml-opencl/kernels/group_norm.cl +121 -0
- package/cpp/ggml/src/ggml-opencl/kernels/im2col_f16.cl +57 -0
- package/cpp/ggml/src/ggml-opencl/kernels/im2col_f32.cl +57 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mean.cl +140 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul.cl +152 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mat_Ab_Bi_8x4.cl +139 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mat_f16_f32.cl +130 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_kq_kqv.cl +273 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_l4_lm.cl +146 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_f32_f32_l4_lm.cl +147 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q4_0_f32_l4_lm.cl +163 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q4_1_f32_l4_lm.cl +165 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q6_k_f32_l4_lm.cl +158 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_8x4.cl +129 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_l4_lm.cl +154 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f16.cl +118 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32.cl +118 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_1row.cl +94 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_l4.cl +84 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f32_f32.cl +118 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32.cl +189 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32_flat.cl +176 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q4_0_f32_8x_flat.cl +283 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32.cl +140 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32_flat.cl +222 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32.cl +144 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32_flat.cl +167 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32.cl +192 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_16x_flat.cl +307 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_8x_flat.cl +265 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_8x_flat.cl +272 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_v.cl +254 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32.cl +219 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32_flat.cl +229 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_k_f32.cl +180 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q6_k_f32.cl +194 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q6_k_f32_flat.cl +194 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32.cl +125 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32_flat.cl +202 -0
- package/cpp/ggml/src/ggml-opencl/kernels/norm.cl +161 -0
- package/cpp/ggml/src/ggml-opencl/kernels/pad.cl +39 -0
- package/cpp/ggml/src/ggml-opencl/kernels/relu.cl +16 -0
- package/cpp/ggml/src/ggml-opencl/kernels/repeat.cl +38 -0
- package/cpp/ggml/src/ggml-opencl/kernels/rms_norm.cl +190 -0
- package/cpp/ggml/src/ggml-opencl/kernels/rope.cl +747 -0
- package/cpp/ggml/src/ggml-opencl/kernels/scale.cl +27 -0
- package/cpp/ggml/src/ggml-opencl/kernels/set_rows.cl +208 -0
- package/cpp/ggml/src/ggml-opencl/kernels/sigmoid.cl +29 -0
- package/cpp/ggml/src/ggml-opencl/kernels/silu.cl +30 -0
- package/cpp/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +108 -0
- package/cpp/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +108 -0
- package/cpp/ggml/src/ggml-opencl/kernels/softmax_f16.cl +107 -0
- package/cpp/ggml/src/ggml-opencl/kernels/softmax_f32.cl +107 -0
- package/cpp/ggml/src/ggml-opencl/kernels/softplus.cl +116 -0
- package/cpp/ggml/src/ggml-opencl/kernels/solve_tri.cl +51 -0
- package/cpp/ggml/src/ggml-opencl/kernels/sqr.cl +53 -0
- package/cpp/ggml/src/ggml-opencl/kernels/sqrt.cl +53 -0
- package/cpp/ggml/src/ggml-opencl/kernels/ssm_conv.cl +77 -0
- package/cpp/ggml/src/ggml-opencl/kernels/sub.cl +138 -0
- package/cpp/ggml/src/ggml-opencl/kernels/sum_rows.cl +140 -0
- package/cpp/ggml/src/ggml-opencl/kernels/tanh.cl +109 -0
- package/cpp/ggml/src/ggml-opencl/kernels/transpose.cl +117 -0
- package/cpp/ggml/src/ggml-opencl/kernels/tri.cl +32 -0
- package/cpp/ggml/src/ggml-opencl/kernels/tsembd.cl +48 -0
- package/cpp/ggml/src/ggml-opencl/kernels/upscale.cl +120 -0
- package/cpp/ggml/src/ggml-opt.cpp +1093 -0
- package/cpp/ggml/src/ggml-quants.c +5325 -0
- package/cpp/ggml/src/ggml-quants.h +106 -0
- package/cpp/ggml/src/ggml-rpc/CMakeLists.txt +9 -0
- package/cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +2118 -0
- package/cpp/ggml/src/ggml-threading.cpp +12 -0
- package/cpp/ggml/src/ggml-threading.h +14 -0
- package/cpp/ggml/src/ggml-virtgpu/CMakeLists.txt +70 -0
- package/cpp/ggml/src/ggml-virtgpu/apir_cs_ggml-rpc-front.cpp +87 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/CMakeLists.txt +21 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/apir_cs_ggml-rpc-back.cpp +115 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/backend-convert.h +13 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-backend.cpp +102 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer-type.cpp +105 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer.cpp +179 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-device.cpp +148 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched.cpp +51 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched.gen.h +73 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched.h +27 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/backend-virgl-apir.h +32 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/backend.cpp +144 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/shared/api_remoting.h +95 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_backend.gen.h +94 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_backend.h +50 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_cs.h +378 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h +232 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_cs_rpc.h +58 -0
- package/cpp/ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp +81 -0
- package/cpp/ggml/src/ggml-virtgpu/ggml-backend-buffer.cpp +119 -0
- package/cpp/ggml/src/ggml-virtgpu/ggml-backend-device.cpp +158 -0
- package/cpp/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp +213 -0
- package/cpp/ggml/src/ggml-virtgpu/ggml-backend.cpp +69 -0
- package/cpp/ggml/src/ggml-virtgpu/ggml-remoting.h +71 -0
- package/cpp/ggml/src/ggml-virtgpu/ggmlremoting_functions.yaml +166 -0
- package/cpp/ggml/src/ggml-virtgpu/include/apir_hw.h +9 -0
- package/cpp/ggml/src/ggml-virtgpu/regenerate_remoting.py +333 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-apir.h +15 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-backend.cpp +58 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp +110 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-buffer.cpp +173 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-device.cpp +192 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-impl.h +36 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward.gen.h +53 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-shm.cpp +98 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-shm.h +23 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-utils.cpp +179 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-utils.h +86 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu.cpp +544 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu.h +117 -0
- package/cpp/ggml/src/ggml-webgpu/CMakeLists.txt +80 -0
- package/cpp/ggml/src/ggml-webgpu/ggml-webgpu-shader-lib.hpp +1231 -0
- package/cpp/ggml/src/ggml-webgpu/ggml-webgpu.cpp +3150 -0
- package/cpp/ggml/src/ggml-webgpu/pre_wgsl.hpp +778 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/argmax.wgsl +72 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/argsort.wgsl +106 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/argsort_merge.wgsl +134 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/binary.wgsl +107 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/common_decls.tmpl +923 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/cpy.tmpl.wgsl +107 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/cumsum.wgsl +66 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +182 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn.wgsl +636 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/get_rows.wgsl +668 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/glu.tmpl.wgsl +323 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/memset.wgsl +40 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.wgsl +713 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_decls.tmpl +103 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_reg_tile.wgsl +138 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_subgroup_matrix.wgsl +188 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec.wgsl +194 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/pad.wgsl +86 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm.wgsl +123 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/rope.tmpl.wgsl +295 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/scale.wgsl +63 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.wgsl +109 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/soft_max.tmpl.wgsl +345 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/sum_rows.wgsl +55 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/unary.wgsl +193 -0
- package/cpp/ggml/src/ggml-zdnn/CMakeLists.txt +36 -0
- package/cpp/ggml/src/ggml-zdnn/common.hpp +59 -0
- package/cpp/ggml/src/ggml-zdnn/ggml-zdnn.cpp +633 -0
- package/cpp/ggml/src/ggml-zdnn/mmf.cpp +80 -0
- package/cpp/ggml/src/ggml-zdnn/mmf.hpp +12 -0
- package/cpp/ggml/src/ggml-zdnn/utils.cpp +79 -0
- package/cpp/ggml/src/ggml-zdnn/utils.hpp +19 -0
- package/cpp/ggml/src/ggml-zendnn/CMakeLists.txt +92 -0
- package/cpp/ggml/src/ggml-zendnn/ggml-zendnn.cpp +469 -0
- package/cpp/ggml/src/ggml.c +7669 -0
- package/cpp/ggml/src/ggml.cpp +26 -0
- package/cpp/ggml/src/gguf.cpp +1699 -0
- package/cpp/include/llama-cpp.h +32 -0
- package/cpp/include/llama.h +1568 -0
- package/cpp/mtmd/CMakeLists.txt +98 -0
- package/cpp/mtmd/README.md +63 -0
- package/cpp/mtmd/clip-graph.h +117 -0
- package/cpp/mtmd/clip-impl.h +586 -0
- package/cpp/mtmd/clip-model.h +390 -0
- package/cpp/mtmd/clip.cpp +4154 -0
- package/cpp/mtmd/clip.h +121 -0
- package/cpp/mtmd/deprecation-warning.cpp +22 -0
- package/cpp/mtmd/legacy-models/convert_image_encoder_to_gguf.py +412 -0
- package/cpp/mtmd/legacy-models/glmedge-convert-image-encoder-to-gguf.py +280 -0
- package/cpp/mtmd/legacy-models/glmedge-surgery.py +33 -0
- package/cpp/mtmd/legacy-models/llava_surgery.py +38 -0
- package/cpp/mtmd/legacy-models/llava_surgery_v2.py +180 -0
- package/cpp/mtmd/legacy-models/minicpmv-convert-image-encoder-to-gguf.py +892 -0
- package/cpp/mtmd/legacy-models/minicpmv-surgery.py +47 -0
- package/cpp/mtmd/models/cogvlm.cpp +98 -0
- package/cpp/mtmd/models/conformer.cpp +216 -0
- package/cpp/mtmd/models/glm4v.cpp +122 -0
- package/cpp/mtmd/models/internvl.cpp +69 -0
- package/cpp/mtmd/models/kimik25.cpp +101 -0
- package/cpp/mtmd/models/kimivl.cpp +63 -0
- package/cpp/mtmd/models/llama4.cpp +96 -0
- package/cpp/mtmd/models/llava.cpp +374 -0
- package/cpp/mtmd/models/minicpmv.cpp +114 -0
- package/cpp/mtmd/models/mobilenetv5.cpp +451 -0
- package/cpp/mtmd/models/models.h +128 -0
- package/cpp/mtmd/models/nemotron-v2-vl.cpp +35 -0
- package/cpp/mtmd/models/paddleocr.cpp +52 -0
- package/cpp/mtmd/models/pixtral.cpp +86 -0
- package/cpp/mtmd/models/qwen2vl.cpp +183 -0
- package/cpp/mtmd/models/qwen3vl.cpp +193 -0
- package/cpp/mtmd/models/siglip.cpp +86 -0
- package/cpp/mtmd/models/whisper-enc.cpp +115 -0
- package/cpp/mtmd/models/youtuvl.cpp +179 -0
- package/cpp/mtmd/mtmd-audio.cpp +730 -0
- package/cpp/mtmd/mtmd-audio.h +113 -0
- package/cpp/mtmd/mtmd-cli.cpp +437 -0
- package/cpp/mtmd/mtmd-helper.cpp +521 -0
- package/cpp/mtmd/mtmd-helper.h +96 -0
- package/cpp/mtmd/mtmd.cpp +1156 -0
- package/cpp/mtmd/mtmd.h +319 -0
- package/cpp/mtmd/requirements.txt +5 -0
- package/cpp/mtmd/test-1.jpeg +0 -0
- package/cpp/mtmd/test-2.mp3 +0 -0
- package/cpp/mtmd/tests.sh +192 -0
- package/cpp/src/CMakeLists.txt +169 -0
- package/cpp/src/llama-adapter.cpp +488 -0
- package/cpp/src/llama-adapter.h +89 -0
- package/cpp/src/llama-arch.cpp +2855 -0
- package/cpp/src/llama-arch.h +619 -0
- package/cpp/src/llama-batch.cpp +917 -0
- package/cpp/src/llama-batch.h +173 -0
- package/cpp/src/llama-chat.cpp +896 -0
- package/cpp/src/llama-chat.h +71 -0
- package/cpp/src/llama-context.cpp +3512 -0
- package/cpp/src/llama-context.h +359 -0
- package/cpp/src/llama-cparams.cpp +5 -0
- package/cpp/src/llama-cparams.h +44 -0
- package/cpp/src/llama-grammar.cpp +1464 -0
- package/cpp/src/llama-grammar.h +194 -0
- package/cpp/src/llama-graph.cpp +2685 -0
- package/cpp/src/llama-graph.h +1026 -0
- package/cpp/src/llama-hparams.cpp +234 -0
- package/cpp/src/llama-hparams.h +339 -0
- package/cpp/src/llama-impl.cpp +171 -0
- package/cpp/src/llama-impl.h +73 -0
- package/cpp/src/llama-io.cpp +15 -0
- package/cpp/src/llama-io.h +35 -0
- package/cpp/src/llama-kv-cache-iswa.cpp +330 -0
- package/cpp/src/llama-kv-cache-iswa.h +137 -0
- package/cpp/src/llama-kv-cache.cpp +2271 -0
- package/cpp/src/llama-kv-cache.h +388 -0
- package/cpp/src/llama-kv-cells.h +533 -0
- package/cpp/src/llama-memory-hybrid-iswa.cpp +275 -0
- package/cpp/src/llama-memory-hybrid-iswa.h +140 -0
- package/cpp/src/llama-memory-hybrid.cpp +268 -0
- package/cpp/src/llama-memory-hybrid.h +139 -0
- package/cpp/src/llama-memory-recurrent.cpp +1165 -0
- package/cpp/src/llama-memory-recurrent.h +182 -0
- package/cpp/src/llama-memory.cpp +59 -0
- package/cpp/src/llama-memory.h +122 -0
- package/cpp/src/llama-mmap.cpp +785 -0
- package/cpp/src/llama-mmap.h +92 -0
- package/cpp/src/llama-model-loader.cpp +1414 -0
- package/cpp/src/llama-model-loader.h +203 -0
- package/cpp/src/llama-model-saver.cpp +286 -0
- package/cpp/src/llama-model-saver.h +37 -0
- package/cpp/src/llama-model.cpp +9253 -0
- package/cpp/src/llama-model.h +576 -0
- package/cpp/src/llama-quant.cpp +1119 -0
- package/cpp/src/llama-quant.h +1 -0
- package/cpp/src/llama-sampler.cpp +3885 -0
- package/cpp/src/llama-sampler.h +42 -0
- package/cpp/src/llama-vocab.cpp +3970 -0
- package/cpp/src/llama-vocab.h +187 -0
- package/cpp/src/llama.cpp +1313 -0
- package/cpp/src/models/afmoe.cpp +191 -0
- package/cpp/src/models/apertus.cpp +125 -0
- package/cpp/src/models/arcee.cpp +135 -0
- package/cpp/src/models/arctic.cpp +138 -0
- package/cpp/src/models/arwkv7.cpp +86 -0
- package/cpp/src/models/baichuan.cpp +122 -0
- package/cpp/src/models/bailingmoe.cpp +144 -0
- package/cpp/src/models/bailingmoe2.cpp +135 -0
- package/cpp/src/models/bert.cpp +178 -0
- package/cpp/src/models/bitnet.cpp +160 -0
- package/cpp/src/models/bloom.cpp +101 -0
- package/cpp/src/models/chameleon.cpp +178 -0
- package/cpp/src/models/chatglm.cpp +132 -0
- package/cpp/src/models/codeshell.cpp +111 -0
- package/cpp/src/models/cogvlm.cpp +102 -0
- package/cpp/src/models/cohere2-iswa.cpp +134 -0
- package/cpp/src/models/command-r.cpp +122 -0
- package/cpp/src/models/dbrx.cpp +123 -0
- package/cpp/src/models/deci.cpp +135 -0
- package/cpp/src/models/deepseek.cpp +144 -0
- package/cpp/src/models/deepseek2.cpp +262 -0
- package/cpp/src/models/delta-net-base.cpp +376 -0
- package/cpp/src/models/dots1.cpp +134 -0
- package/cpp/src/models/dream.cpp +105 -0
- package/cpp/src/models/ernie4-5-moe.cpp +150 -0
- package/cpp/src/models/ernie4-5.cpp +110 -0
- package/cpp/src/models/eurobert.cpp +97 -0
- package/cpp/src/models/exaone-moe.cpp +146 -0
- package/cpp/src/models/exaone.cpp +114 -0
- package/cpp/src/models/exaone4.cpp +123 -0
- package/cpp/src/models/falcon-h1.cpp +111 -0
- package/cpp/src/models/falcon.cpp +120 -0
- package/cpp/src/models/gemma-embedding.cpp +116 -0
- package/cpp/src/models/gemma.cpp +112 -0
- package/cpp/src/models/gemma2-iswa.cpp +128 -0
- package/cpp/src/models/gemma3.cpp +155 -0
- package/cpp/src/models/gemma3n-iswa.cpp +384 -0
- package/cpp/src/models/glm4-moe.cpp +170 -0
- package/cpp/src/models/glm4.cpp +157 -0
- package/cpp/src/models/gpt2.cpp +105 -0
- package/cpp/src/models/gptneox.cpp +144 -0
- package/cpp/src/models/granite-hybrid.cpp +196 -0
- package/cpp/src/models/granite.cpp +211 -0
- package/cpp/src/models/grok.cpp +159 -0
- package/cpp/src/models/grovemoe.cpp +141 -0
- package/cpp/src/models/hunyuan-dense.cpp +132 -0
- package/cpp/src/models/hunyuan-moe.cpp +154 -0
- package/cpp/src/models/internlm2.cpp +120 -0
- package/cpp/src/models/jais.cpp +86 -0
- package/cpp/src/models/jais2.cpp +123 -0
- package/cpp/src/models/jamba.cpp +106 -0
- package/cpp/src/models/kimi-linear.cpp +392 -0
- package/cpp/src/models/lfm2.cpp +190 -0
- package/cpp/src/models/llada-moe.cpp +122 -0
- package/cpp/src/models/llada.cpp +99 -0
- package/cpp/src/models/llama-iswa.cpp +178 -0
- package/cpp/src/models/llama.cpp +168 -0
- package/cpp/src/models/maincoder.cpp +117 -0
- package/cpp/src/models/mamba-base.cpp +285 -0
- package/cpp/src/models/mamba.cpp +54 -0
- package/cpp/src/models/mimo2-iswa.cpp +123 -0
- package/cpp/src/models/minicpm3.cpp +200 -0
- package/cpp/src/models/minimax-m2.cpp +124 -0
- package/cpp/src/models/mistral3.cpp +160 -0
- package/cpp/src/models/models.h +684 -0
- package/cpp/src/models/modern-bert.cpp +109 -0
- package/cpp/src/models/mpt.cpp +126 -0
- package/cpp/src/models/nemotron-h.cpp +148 -0
- package/cpp/src/models/nemotron.cpp +122 -0
- package/cpp/src/models/neo-bert.cpp +104 -0
- package/cpp/src/models/olmo.cpp +121 -0
- package/cpp/src/models/olmo2.cpp +150 -0
- package/cpp/src/models/olmoe.cpp +124 -0
- package/cpp/src/models/openai-moe-iswa.cpp +127 -0
- package/cpp/src/models/openelm.cpp +124 -0
- package/cpp/src/models/orion.cpp +123 -0
- package/cpp/src/models/paddleocr.cpp +122 -0
- package/cpp/src/models/pangu-embedded.cpp +121 -0
- package/cpp/src/models/phi2.cpp +121 -0
- package/cpp/src/models/phi3.cpp +152 -0
- package/cpp/src/models/plamo.cpp +110 -0
- package/cpp/src/models/plamo2.cpp +318 -0
- package/cpp/src/models/plamo3.cpp +128 -0
- package/cpp/src/models/plm.cpp +169 -0
- package/cpp/src/models/qwen.cpp +108 -0
- package/cpp/src/models/qwen2.cpp +126 -0
- package/cpp/src/models/qwen2moe.cpp +151 -0
- package/cpp/src/models/qwen2vl.cpp +117 -0
- package/cpp/src/models/qwen3.cpp +117 -0
- package/cpp/src/models/qwen35.cpp +386 -0
- package/cpp/src/models/qwen35moe.cpp +420 -0
- package/cpp/src/models/qwen3moe.cpp +124 -0
- package/cpp/src/models/qwen3next.cpp +525 -0
- package/cpp/src/models/qwen3vl-moe.cpp +140 -0
- package/cpp/src/models/qwen3vl.cpp +132 -0
- package/cpp/src/models/refact.cpp +94 -0
- package/cpp/src/models/rnd1.cpp +126 -0
- package/cpp/src/models/rwkv6-base.cpp +164 -0
- package/cpp/src/models/rwkv6.cpp +94 -0
- package/cpp/src/models/rwkv6qwen2.cpp +86 -0
- package/cpp/src/models/rwkv7-base.cpp +137 -0
- package/cpp/src/models/rwkv7.cpp +90 -0
- package/cpp/src/models/seed-oss.cpp +124 -0
- package/cpp/src/models/smallthinker.cpp +126 -0
- package/cpp/src/models/smollm3.cpp +128 -0
- package/cpp/src/models/stablelm.cpp +146 -0
- package/cpp/src/models/starcoder.cpp +100 -0
- package/cpp/src/models/starcoder2.cpp +121 -0
- package/cpp/src/models/step35-iswa.cpp +168 -0
- package/cpp/src/models/t5-dec.cpp +166 -0
- package/cpp/src/models/t5-enc.cpp +96 -0
- package/cpp/src/models/wavtokenizer-dec.cpp +149 -0
- package/cpp/src/models/xverse.cpp +108 -0
- package/cpp/src/unicode-data.cpp +7034 -0
- package/cpp/src/unicode-data.h +20 -0
- package/cpp/src/unicode.cpp +1103 -0
- package/cpp/src/unicode.h +111 -0
- package/cpp/vendor/nlohmann/json.hpp +25526 -0
- package/cpp/vendor/nlohmann/json_fwd.hpp +187 -0
- package/cpp/vendor/stb/stb_image.h +7988 -0
- package/ios/LocalLLM-Bridging-Header.h +2 -0
- package/ios/LocalLLM.h +5 -0
- package/ios/LocalLLM.mm +1267 -0
- package/local-llm-rn.podspec +60 -0
- package/package.json +35 -0
- package/src/NativeLocalLLM.ts +73 -0
- package/src/device.ts +50 -0
- package/src/download-adapter.ts +17 -0
- package/src/index.ts +21 -0
- package/src/native-bridge.ts +142 -0
- package/src/rn-downloader.ts +37 -0
package/cpp/mtmd/clip.h
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
#include "ggml.h"
|
|
4
|
+
#include "mtmd.h"
|
|
5
|
+
|
|
6
|
+
#include <stddef.h>
|
|
7
|
+
#include <stdint.h>
|
|
8
|
+
|
|
9
|
+
// !!! Internal header, to be used by mtmd only !!!
|
|
10
|
+
|
|
11
|
+
#define MTMD_INTERNAL_HEADER
|
|
12
|
+
|
|
13
|
+
struct clip_ctx;
|
|
14
|
+
|
|
15
|
+
struct clip_image_size {
|
|
16
|
+
int width;
|
|
17
|
+
int height;
|
|
18
|
+
};
|
|
19
|
+
|
|
20
|
+
struct clip_image_f32;
|
|
21
|
+
struct clip_image_u8_batch;
|
|
22
|
+
struct clip_image_f32_batch;
|
|
23
|
+
|
|
24
|
+
enum clip_modality {
|
|
25
|
+
CLIP_MODALITY_VISION,
|
|
26
|
+
CLIP_MODALITY_AUDIO,
|
|
27
|
+
};
|
|
28
|
+
|
|
29
|
+
enum clip_flash_attn_type {
|
|
30
|
+
CLIP_FLASH_ATTN_TYPE_AUTO = -1,
|
|
31
|
+
CLIP_FLASH_ATTN_TYPE_DISABLED = 0,
|
|
32
|
+
CLIP_FLASH_ATTN_TYPE_ENABLED = 1,
|
|
33
|
+
};
|
|
34
|
+
|
|
35
|
+
struct clip_context_params {
|
|
36
|
+
bool use_gpu;
|
|
37
|
+
enum clip_flash_attn_type flash_attn_type;
|
|
38
|
+
int image_min_tokens;
|
|
39
|
+
int image_max_tokens;
|
|
40
|
+
bool warmup;
|
|
41
|
+
ggml_backend_sched_eval_callback cb_eval;
|
|
42
|
+
void * cb_eval_user_data;
|
|
43
|
+
};
|
|
44
|
+
|
|
45
|
+
struct clip_init_result {
|
|
46
|
+
struct clip_ctx * ctx_v; // vision context
|
|
47
|
+
struct clip_ctx * ctx_a; // audio context
|
|
48
|
+
};
|
|
49
|
+
|
|
50
|
+
struct clip_init_result clip_init(const char * fname, struct clip_context_params ctx_params);
|
|
51
|
+
|
|
52
|
+
void clip_free(struct clip_ctx * ctx);
|
|
53
|
+
|
|
54
|
+
size_t clip_embd_nbytes(const struct clip_ctx * ctx);
|
|
55
|
+
size_t clip_embd_nbytes_by_img(const struct clip_ctx * ctx, int img_w, int img_h);
|
|
56
|
+
|
|
57
|
+
int32_t clip_get_image_size (const struct clip_ctx * ctx);
|
|
58
|
+
int32_t clip_get_patch_size (const struct clip_ctx * ctx);
|
|
59
|
+
int32_t clip_get_hidden_size(const struct clip_ctx * ctx);
|
|
60
|
+
|
|
61
|
+
// TODO: should be enum, not string
|
|
62
|
+
const char * clip_patch_merge_type(const struct clip_ctx * ctx);
|
|
63
|
+
|
|
64
|
+
int clip_n_output_tokens(const struct clip_ctx * ctx, struct clip_image_f32 * img);
|
|
65
|
+
|
|
66
|
+
// for M-RoPE, this will be the number of token positions in X and Y directions
|
|
67
|
+
// for other models, X will be the total number of tokens and Y will be 1
|
|
68
|
+
int clip_n_output_tokens_x(const struct clip_ctx * ctx, struct clip_image_f32 * img);
|
|
69
|
+
int clip_n_output_tokens_y(const struct clip_ctx * ctx, struct clip_image_f32 * img);
|
|
70
|
+
|
|
71
|
+
// this should be equal to the embedding dimension of the text model
|
|
72
|
+
int clip_n_mmproj_embd(const struct clip_ctx * ctx);
|
|
73
|
+
|
|
74
|
+
struct clip_image_size * clip_image_size_init(void);
|
|
75
|
+
struct clip_image_u8 * clip_image_u8_init (void);
|
|
76
|
+
struct clip_image_f32 * clip_image_f32_init(void);
|
|
77
|
+
struct clip_image_f32_batch * clip_image_f32_batch_init(void); // only used by libllava
|
|
78
|
+
|
|
79
|
+
// nx, ny are the output image dimensions
|
|
80
|
+
unsigned char * clip_image_u8_get_data(struct clip_image_u8 * img, uint32_t * nx, uint32_t * ny);
|
|
81
|
+
|
|
82
|
+
void clip_image_size_free (struct clip_image_size * img_size);
|
|
83
|
+
void clip_image_u8_free (struct clip_image_u8 * img);
|
|
84
|
+
void clip_image_f32_free(struct clip_image_f32 * img);
|
|
85
|
+
void clip_image_u8_batch_free (struct clip_image_u8_batch * batch);
|
|
86
|
+
void clip_image_f32_batch_free(struct clip_image_f32_batch * batch);
|
|
87
|
+
|
|
88
|
+
// use for accessing underlay data of clip_image_f32_batch
|
|
89
|
+
size_t clip_image_f32_batch_n_images(const struct clip_image_f32_batch * batch); // equivalent to batch->size()
|
|
90
|
+
size_t clip_image_f32_batch_nx(const struct clip_image_f32_batch * batch, int idx); // equivalent to batch[idx]->nx
|
|
91
|
+
size_t clip_image_f32_batch_ny(const struct clip_image_f32_batch * batch, int idx); // equivalent to batch[idx]->ny
|
|
92
|
+
struct clip_image_f32 * clip_image_f32_get_img(const struct clip_image_f32_batch * batch, int idx); // equivalent to batch[idx]->data
|
|
93
|
+
|
|
94
|
+
/**
|
|
95
|
+
* Build image from pixels decoded by other libraries instead of stb_image.h for better performance.
|
|
96
|
+
* The memory layout is RGBRGBRGB..., input buffer length must be 3*nx*ny bytes
|
|
97
|
+
*/
|
|
98
|
+
void clip_build_img_from_pixels(const unsigned char * rgb_pixels, int nx, int ny, struct clip_image_u8 * img);
|
|
99
|
+
|
|
100
|
+
/** preprocess img and store the result in res_imgs, pad_to_square may be overridden to false depending on model configuration */
|
|
101
|
+
bool clip_image_preprocess(struct clip_ctx * ctx, const struct clip_image_u8 * img, struct clip_image_f32_batch * res_imgs );
|
|
102
|
+
|
|
103
|
+
struct ggml_tensor * clip_get_newline_tensor(const struct clip_ctx * ctx);
|
|
104
|
+
|
|
105
|
+
bool clip_image_encode (struct clip_ctx * ctx, int n_threads, struct clip_image_f32 * img, float * vec);
|
|
106
|
+
bool clip_image_batch_encode(struct clip_ctx * ctx, int n_threads, const struct clip_image_f32_batch * imgs, float * vec);
|
|
107
|
+
|
|
108
|
+
int clip_is_minicpmv(const struct clip_ctx * ctx);
|
|
109
|
+
bool clip_is_glm(const struct clip_ctx * ctx);
|
|
110
|
+
bool clip_is_llava(const struct clip_ctx * ctx);
|
|
111
|
+
// note for contributor: this clip_is_(model) pattern is deprecated
|
|
112
|
+
// do NOT add new functions like this
|
|
113
|
+
|
|
114
|
+
bool clip_encode_float_image (struct clip_ctx * ctx, int n_threads, float * img, int h, int w, float * vec);
|
|
115
|
+
|
|
116
|
+
// use by audio input
|
|
117
|
+
void clip_image_f32_batch_add_mel(struct clip_image_f32_batch * batch, int n_mel, int n_frames, float * mel);
|
|
118
|
+
|
|
119
|
+
bool clip_has_vision_encoder(const struct clip_ctx * ctx);
|
|
120
|
+
bool clip_has_audio_encoder(const struct clip_ctx * ctx);
|
|
121
|
+
bool clip_has_whisper_encoder(const struct clip_ctx * ctx);
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
#include <cstdio>
|
|
2
|
+
#include <string>
|
|
3
|
+
|
|
4
|
+
int main(int argc, char** argv) {
|
|
5
|
+
std::string filename = "main";
|
|
6
|
+
if (argc >= 1) {
|
|
7
|
+
filename = argv[0];
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
// Get only the program name from the full path
|
|
11
|
+
size_t pos = filename.find_last_of("/\\");
|
|
12
|
+
if (pos != std::string::npos) {
|
|
13
|
+
filename = filename.substr(pos+1);
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
fprintf(stdout, "\n");
|
|
17
|
+
fprintf(stdout, "WARNING: The binary '%s' is deprecated.\n", filename.c_str());
|
|
18
|
+
fprintf(stdout, "Please use 'llama-mtmd-cli' instead.\n");
|
|
19
|
+
fprintf(stdout, "\n");
|
|
20
|
+
|
|
21
|
+
return EXIT_FAILURE;
|
|
22
|
+
}
|
|
@@ -0,0 +1,412 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import os
|
|
3
|
+
import json
|
|
4
|
+
import re
|
|
5
|
+
|
|
6
|
+
import torch
|
|
7
|
+
import numpy as np
|
|
8
|
+
from gguf import *
|
|
9
|
+
from transformers import CLIPModel, CLIPProcessor, CLIPVisionModel, SiglipVisionModel
|
|
10
|
+
|
|
11
|
+
TEXT = "clip.text"
|
|
12
|
+
VISION = "clip.vision"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def k(raw_key: str, arch: str) -> str:
|
|
16
|
+
return raw_key.format(arch=arch)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def should_skip_tensor(name: str, has_text: bool, has_vision: bool, has_llava: bool) -> bool:
|
|
20
|
+
if name in (
|
|
21
|
+
"logit_scale",
|
|
22
|
+
"text_model.embeddings.position_ids",
|
|
23
|
+
"vision_model.embeddings.position_ids",
|
|
24
|
+
):
|
|
25
|
+
return True
|
|
26
|
+
|
|
27
|
+
if has_llava and name in ["visual_projection.weight", "vision_model.post_layernorm.weight", "vision_model.post_layernorm.bias"]:
|
|
28
|
+
return True
|
|
29
|
+
|
|
30
|
+
if name.startswith("v") and not has_vision:
|
|
31
|
+
return True
|
|
32
|
+
|
|
33
|
+
if name.startswith("t") and not has_text:
|
|
34
|
+
return True
|
|
35
|
+
|
|
36
|
+
return False
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def get_tensor_name(name: str) -> str:
|
|
40
|
+
# Standardize the transformers llava next keys for
|
|
41
|
+
# image newline / mm projector with the classes in haotian-liu LLaVA
|
|
42
|
+
if name == "image_newline":
|
|
43
|
+
return "model.image_newline"
|
|
44
|
+
if name.startswith("multi_modal_projector"):
|
|
45
|
+
name = name.replace("multi_modal_projector", "mm")
|
|
46
|
+
if "linear_1" in name:
|
|
47
|
+
name = name.replace("linear_1", "0")
|
|
48
|
+
if "linear_2" in name:
|
|
49
|
+
name = name.replace("linear_2", "2")
|
|
50
|
+
return name
|
|
51
|
+
|
|
52
|
+
if "projection" in name:
|
|
53
|
+
return name
|
|
54
|
+
if "mm_projector" in name:
|
|
55
|
+
name = name.replace("model.mm_projector", "mm")
|
|
56
|
+
name = re.sub(r'mm\.mlp\.mlp', 'mm.model.mlp', name, count=1)
|
|
57
|
+
name = re.sub(r'mm\.peg\.peg', 'mm.model.peg', name, count=1)
|
|
58
|
+
return name
|
|
59
|
+
|
|
60
|
+
return name.replace("text_model", "t").replace("vision_model", "v").replace("encoder.layers", "blk").replace("embeddings.", "").replace("_proj", "").replace("self_attn.", "attn_").replace("layer_norm", "ln").replace("layernorm", "ln").replace("mlp.fc1", "ffn_down").replace("mlp.fc2", "ffn_up").replace("embedding", "embd").replace("final", "post").replace("layrnorm", "ln")
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def bytes_to_unicode():
|
|
64
|
+
"""
|
|
65
|
+
Returns list of utf-8 byte and a corresponding list of unicode strings.
|
|
66
|
+
The reversible bpe codes work on unicode strings.
|
|
67
|
+
This means you need a large # of unicode characters in your vocab if you want to avoid UNKs.
|
|
68
|
+
When you're at something like a 10B token dataset you end up needing around 5K for decent coverage.
|
|
69
|
+
This is a significant percentage of your normal, say, 32K bpe vocab.
|
|
70
|
+
To avoid that, we want lookup tables between utf-8 bytes and unicode strings.
|
|
71
|
+
And avoids mapping to whitespace/control characters the bpe code barfs on.
|
|
72
|
+
"""
|
|
73
|
+
bs = (
|
|
74
|
+
list(range(ord("!"), ord("~") + 1))
|
|
75
|
+
+ list(range(ord("¡"), ord("¬") + 1))
|
|
76
|
+
+ list(range(ord("®"), ord("ÿ") + 1))
|
|
77
|
+
)
|
|
78
|
+
cs = bs[:]
|
|
79
|
+
n = 0
|
|
80
|
+
for b in range(2**8):
|
|
81
|
+
if b not in bs:
|
|
82
|
+
bs.append(b)
|
|
83
|
+
cs.append(2**8 + n)
|
|
84
|
+
n += 1
|
|
85
|
+
cs = [chr(n) for n in cs]
|
|
86
|
+
return dict(zip(bs, cs))
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
ap = argparse.ArgumentParser()
|
|
90
|
+
ap.add_argument("-m", "--model-dir", help="Path to model directory cloned from HF Hub", required=True)
|
|
91
|
+
ap.add_argument("--use-f32", action="store_true", default=False, help="Use f32 instead of f16")
|
|
92
|
+
ap.add_argument('--bigendian', action="store_true", default=False, help="Model is executed on big-endian machine")
|
|
93
|
+
ap.add_argument("--text-only", action="store_true", required=False,
|
|
94
|
+
help="Save a text-only model. It can't be used to encode images")
|
|
95
|
+
ap.add_argument("--vision-only", action="store_true", required=False,
|
|
96
|
+
help="Save a vision-only model. It can't be used to encode texts")
|
|
97
|
+
ap.add_argument("--clip-model-is-vision", action="store_true", required=False,
|
|
98
|
+
help="The clip model is a pure vision model (ShareGPT4V vision extract for example)")
|
|
99
|
+
|
|
100
|
+
# Selectable visual encoders that are compatible with this script
|
|
101
|
+
encoder_group = ap.add_mutually_exclusive_group()
|
|
102
|
+
encoder_group.add_argument("--clip-model-is-openclip", action="store_true", required=False,
|
|
103
|
+
help="The clip model is from openclip (for ViT-SO400M type))")
|
|
104
|
+
encoder_group.add_argument("--clip-model-is-siglip", action="store_true", required=False,
|
|
105
|
+
help="the visual encoder is Siglip.")
|
|
106
|
+
|
|
107
|
+
ap.add_argument("--llava-projector", help="Path to llava.projector file. If specified, save an image encoder for LLaVA models.")
|
|
108
|
+
ap.add_argument("--projector-type", help="Type of projector. Possible values: mlp, ldp, ldpv2", choices=["mlp", "ldp", "ldpv2"], default="mlp")
|
|
109
|
+
ap.add_argument("-o", "--output-dir", help="Directory to save GGUF files. Default is the original model directory", default=None)
|
|
110
|
+
# Example --image_mean 0.48145466 0.4578275 0.40821073 --image_std 0.26862954 0.26130258 0.27577711
|
|
111
|
+
# Example --image_mean 0.5 0.5 0.5 --image_std 0.5 0.5 0.5
|
|
112
|
+
default_image_mean = [0.48145466, 0.4578275, 0.40821073]
|
|
113
|
+
default_image_std = [0.26862954, 0.26130258, 0.27577711]
|
|
114
|
+
ap.add_argument('--image-mean', type=float, nargs='+', help='Mean of the images for normalization (overrides processor) ', default=None)
|
|
115
|
+
ap.add_argument('--image-std', type=float, nargs='+', help='Standard deviation of the images for normalization (overrides processor)', default=None)
|
|
116
|
+
|
|
117
|
+
# with proper
|
|
118
|
+
args = ap.parse_args()
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
if args.text_only and args.vision_only:
|
|
122
|
+
print("--text-only and --image-only arguments cannot be specified at the same time.")
|
|
123
|
+
exit(1)
|
|
124
|
+
|
|
125
|
+
if args.use_f32:
|
|
126
|
+
print("WARNING: Weights for the convolution op is always saved in f16, as the convolution op in GGML does not support 32-bit kernel weights yet.")
|
|
127
|
+
|
|
128
|
+
# output in the same directory as the model if output_dir is None
|
|
129
|
+
dir_model = args.model_dir
|
|
130
|
+
|
|
131
|
+
if (
|
|
132
|
+
args.clip_model_is_vision or
|
|
133
|
+
not os.path.exists(dir_model + "/vocab.json") or
|
|
134
|
+
args.clip_model_is_openclip or
|
|
135
|
+
args.clip_model_is_siglip
|
|
136
|
+
):
|
|
137
|
+
vocab = None
|
|
138
|
+
tokens = None
|
|
139
|
+
else:
|
|
140
|
+
with open(dir_model + "/vocab.json", "r", encoding="utf-8") as f:
|
|
141
|
+
vocab = json.load(f)
|
|
142
|
+
tokens = [key for key in vocab]
|
|
143
|
+
|
|
144
|
+
with open(dir_model + "/config.json", "r", encoding="utf-8") as f:
|
|
145
|
+
config = json.load(f)
|
|
146
|
+
if args.clip_model_is_vision:
|
|
147
|
+
v_hparams = config
|
|
148
|
+
t_hparams = None
|
|
149
|
+
else:
|
|
150
|
+
v_hparams = config["vision_config"]
|
|
151
|
+
t_hparams = config["text_config"]
|
|
152
|
+
|
|
153
|
+
# possible data types
|
|
154
|
+
# ftype == 0 -> float32
|
|
155
|
+
# ftype == 1 -> float16
|
|
156
|
+
#
|
|
157
|
+
# map from ftype to string
|
|
158
|
+
ftype_str = ["f32", "f16"]
|
|
159
|
+
|
|
160
|
+
ftype = 1
|
|
161
|
+
if args.use_f32:
|
|
162
|
+
ftype = 0
|
|
163
|
+
|
|
164
|
+
if args.clip_model_is_siglip:
|
|
165
|
+
model = SiglipVisionModel.from_pretrained(dir_model)
|
|
166
|
+
processor = None
|
|
167
|
+
elif args.clip_model_is_vision or args.clip_model_is_openclip:
|
|
168
|
+
model = CLIPVisionModel.from_pretrained(dir_model)
|
|
169
|
+
processor = None
|
|
170
|
+
else:
|
|
171
|
+
model = CLIPModel.from_pretrained(dir_model)
|
|
172
|
+
processor = CLIPProcessor.from_pretrained(dir_model)
|
|
173
|
+
|
|
174
|
+
fname_middle = None
|
|
175
|
+
has_text_encoder = True
|
|
176
|
+
has_vision_encoder = True
|
|
177
|
+
has_llava_projector = False
|
|
178
|
+
if args.text_only:
|
|
179
|
+
fname_middle = "text-"
|
|
180
|
+
has_vision_encoder = False
|
|
181
|
+
elif args.llava_projector is not None:
|
|
182
|
+
fname_middle = "mmproj-"
|
|
183
|
+
has_text_encoder = False
|
|
184
|
+
has_llava_projector = True
|
|
185
|
+
elif args.vision_only:
|
|
186
|
+
fname_middle = "vision-"
|
|
187
|
+
has_text_encoder = False
|
|
188
|
+
else:
|
|
189
|
+
fname_middle = ""
|
|
190
|
+
|
|
191
|
+
output_dir = args.output_dir if args.output_dir is not None else dir_model
|
|
192
|
+
os.makedirs(output_dir, exist_ok=True)
|
|
193
|
+
output_prefix = os.path.basename(output_dir).replace("ggml_", "")
|
|
194
|
+
fname_out = os.path.join(output_dir, f"{fname_middle}model-{ftype_str[ftype]}.gguf")
|
|
195
|
+
fout = GGUFWriter(path=fname_out, arch="clip", endianess=GGUFEndian.LITTLE if not args.bigendian else GGUFEndian.BIG)
|
|
196
|
+
|
|
197
|
+
fout.add_bool("clip.has_text_encoder", has_text_encoder)
|
|
198
|
+
fout.add_bool("clip.has_vision_encoder", has_vision_encoder)
|
|
199
|
+
fout.add_bool("clip.has_llava_projector", has_llava_projector)
|
|
200
|
+
fout.add_file_type(ftype)
|
|
201
|
+
model_name = config["_name_or_path"] if "_name_or_path" in config else os.path.basename(dir_model)
|
|
202
|
+
fout.add_name(model_name)
|
|
203
|
+
if args.text_only:
|
|
204
|
+
fout.add_description("text-only CLIP model")
|
|
205
|
+
elif args.vision_only and not has_llava_projector:
|
|
206
|
+
fout.add_description("vision-only CLIP model")
|
|
207
|
+
elif has_llava_projector:
|
|
208
|
+
fout.add_description("image encoder for LLaVA")
|
|
209
|
+
# add projector type
|
|
210
|
+
fout.add_string("clip.projector_type", args.projector_type)
|
|
211
|
+
else:
|
|
212
|
+
fout.add_description("two-tower CLIP model")
|
|
213
|
+
|
|
214
|
+
if has_text_encoder:
|
|
215
|
+
assert t_hparams is not None
|
|
216
|
+
assert tokens is not None
|
|
217
|
+
if args.clip_model_is_siglip:
|
|
218
|
+
text_projection_dim = 0
|
|
219
|
+
else:
|
|
220
|
+
text_projection_dim = t_hparams.get("projection_dim", config["projection_dim"])
|
|
221
|
+
# text_model hparams
|
|
222
|
+
fout.add_uint32(k(KEY_CONTEXT_LENGTH, TEXT), t_hparams["max_position_embeddings"])
|
|
223
|
+
fout.add_uint32(k(KEY_EMBEDDING_LENGTH, TEXT), t_hparams["hidden_size"])
|
|
224
|
+
fout.add_uint32(k(KEY_FEED_FORWARD_LENGTH, TEXT), t_hparams["intermediate_size"])
|
|
225
|
+
fout.add_uint32("clip.text.projection_dim", text_projection_dim)
|
|
226
|
+
fout.add_uint32(k(KEY_ATTENTION_HEAD_COUNT, TEXT), t_hparams["num_attention_heads"])
|
|
227
|
+
fout.add_float32(k(KEY_ATTENTION_LAYERNORM_EPS, TEXT), t_hparams["layer_norm_eps"])
|
|
228
|
+
fout.add_uint32(k(KEY_BLOCK_COUNT, TEXT), t_hparams["num_hidden_layers"])
|
|
229
|
+
fout.add_token_list(tokens)
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
def get_non_negative_vision_feature_layers(v_hparams):
|
|
234
|
+
"""
|
|
235
|
+
Determine the vision feature layer(s) for the llava model, which are indices into the
|
|
236
|
+
hidden states of the visual encoder. Note that the hidden states array generally takes the
|
|
237
|
+
form:
|
|
238
|
+
|
|
239
|
+
[<emb input>, <output of enc block 0>, ... <output of enc block num_hidden_layers>]
|
|
240
|
+
|
|
241
|
+
so feature indices should be offset as n+1 to get the output of encoder block n.
|
|
242
|
+
We convert all vision feature layers to non-negative so that -1 can be used in
|
|
243
|
+
the model as an unset value. If no vision feature layer is found, we leave it unset.
|
|
244
|
+
"""
|
|
245
|
+
num_hidden_layers = v_hparams["num_hidden_layers"]
|
|
246
|
+
to_non_negative = lambda layer_idx: layer_idx if layer_idx >= 0 else num_hidden_layers + layer_idx + 1
|
|
247
|
+
feature_layers_key = None
|
|
248
|
+
# Key used for llava models in transformers
|
|
249
|
+
if "vision_feature_layer" in config:
|
|
250
|
+
feature_layers_key = "vision_feature_layer"
|
|
251
|
+
# Key used for llava models in the original format
|
|
252
|
+
elif "mm_vision_select_layer" in config:
|
|
253
|
+
feature_layers_key = "mm_vision_select_layer"
|
|
254
|
+
if feature_layers_key is not None:
|
|
255
|
+
feature_layers = config[feature_layers_key]
|
|
256
|
+
if isinstance(feature_layers, int):
|
|
257
|
+
feature_layers = [feature_layers]
|
|
258
|
+
return [to_non_negative(feature_layer) for feature_layer in feature_layers]
|
|
259
|
+
|
|
260
|
+
# Determine if we have explicitly specified vision feature layers in our config
|
|
261
|
+
feature_layers = get_non_negative_vision_feature_layers(v_hparams)
|
|
262
|
+
|
|
263
|
+
if has_vision_encoder:
|
|
264
|
+
# Siglip does not have a visual projector; set projection dim to 0
|
|
265
|
+
if args.clip_model_is_siglip:
|
|
266
|
+
visual_projection_dim = 0
|
|
267
|
+
else:
|
|
268
|
+
visual_projection_dim = v_hparams.get("projection_dim", config["projection_dim"])
|
|
269
|
+
|
|
270
|
+
# set vision_model hparams
|
|
271
|
+
fout.add_uint32("clip.vision.image_size", v_hparams["image_size"])
|
|
272
|
+
fout.add_uint32("clip.vision.patch_size", v_hparams["patch_size"])
|
|
273
|
+
fout.add_uint32(k(KEY_EMBEDDING_LENGTH, VISION), v_hparams["hidden_size"])
|
|
274
|
+
fout.add_uint32(k(KEY_FEED_FORWARD_LENGTH, VISION), v_hparams["intermediate_size"])
|
|
275
|
+
fout.add_uint32("clip.vision.projection_dim", visual_projection_dim)
|
|
276
|
+
fout.add_uint32(k(KEY_ATTENTION_HEAD_COUNT, VISION), v_hparams["num_attention_heads"])
|
|
277
|
+
fout.add_float32(k(KEY_ATTENTION_LAYERNORM_EPS, VISION), v_hparams["layer_norm_eps"])
|
|
278
|
+
if feature_layers:
|
|
279
|
+
block_count = max(feature_layers)
|
|
280
|
+
else:
|
|
281
|
+
block_count = v_hparams["num_hidden_layers"] - 1 if has_llava_projector else v_hparams["num_hidden_layers"]
|
|
282
|
+
fout.add_uint32(k(KEY_BLOCK_COUNT, VISION), block_count)
|
|
283
|
+
# /**
|
|
284
|
+
# "image_grid_pinpoints": [
|
|
285
|
+
# [
|
|
286
|
+
# 336,
|
|
287
|
+
# 672
|
|
288
|
+
# ],
|
|
289
|
+
# [
|
|
290
|
+
# 672,
|
|
291
|
+
# 336
|
|
292
|
+
# ],
|
|
293
|
+
# [
|
|
294
|
+
# 672,
|
|
295
|
+
# 672
|
|
296
|
+
# ],
|
|
297
|
+
# [
|
|
298
|
+
# 1008,
|
|
299
|
+
# 336
|
|
300
|
+
# ],
|
|
301
|
+
# [
|
|
302
|
+
# 336,
|
|
303
|
+
# 1008
|
|
304
|
+
# ]
|
|
305
|
+
# ],
|
|
306
|
+
# Flattened:
|
|
307
|
+
# [
|
|
308
|
+
# 336, 672,
|
|
309
|
+
# 672, 336,
|
|
310
|
+
# 672, 672,
|
|
311
|
+
# 1008, 336,
|
|
312
|
+
# 336, 1008
|
|
313
|
+
# ]
|
|
314
|
+
# *
|
|
315
|
+
# */
|
|
316
|
+
if "image_grid_pinpoints" in v_hparams:
|
|
317
|
+
# flatten it
|
|
318
|
+
image_grid_pinpoints = []
|
|
319
|
+
for pinpoint in v_hparams["image_grid_pinpoints"]:
|
|
320
|
+
for p in pinpoint:
|
|
321
|
+
image_grid_pinpoints.append(p)
|
|
322
|
+
fout.add_array("clip.vision.image_grid_pinpoints", image_grid_pinpoints)
|
|
323
|
+
if "image_crop_resolution" in v_hparams:
|
|
324
|
+
fout.add_uint32("clip.vision.image_crop_resolution", v_hparams["image_crop_resolution"])
|
|
325
|
+
if "image_aspect_ratio" in v_hparams:
|
|
326
|
+
fout.add_string("clip.vision.image_aspect_ratio", v_hparams["image_aspect_ratio"])
|
|
327
|
+
if "image_split_resolution" in v_hparams:
|
|
328
|
+
fout.add_uint32("clip.vision.image_split_resolution", v_hparams["image_split_resolution"])
|
|
329
|
+
if "mm_patch_merge_type" in v_hparams:
|
|
330
|
+
fout.add_string("clip.vision.mm_patch_merge_type", v_hparams["mm_patch_merge_type"])
|
|
331
|
+
if "mm_projector_type" in v_hparams:
|
|
332
|
+
fout.add_string("clip.vision.mm_projector_type", v_hparams["mm_projector_type"])
|
|
333
|
+
if feature_layers:
|
|
334
|
+
fout.add_array("clip.vision.feature_layer", feature_layers)
|
|
335
|
+
|
|
336
|
+
if processor is not None:
|
|
337
|
+
image_mean = processor.image_processor.image_mean if args.image_mean is None or args.image_mean == default_image_mean else args.image_mean # pyright: ignore[reportAttributeAccessIssue]
|
|
338
|
+
image_std = processor.image_processor.image_std if args.image_std is None or args.image_std == default_image_std else args.image_std # pyright: ignore[reportAttributeAccessIssue]
|
|
339
|
+
else:
|
|
340
|
+
image_mean = args.image_mean if args.image_mean is not None else default_image_mean
|
|
341
|
+
image_std = args.image_std if args.image_std is not None else default_image_std
|
|
342
|
+
fout.add_array("clip.vision.image_mean", image_mean)
|
|
343
|
+
fout.add_array("clip.vision.image_std", image_std)
|
|
344
|
+
|
|
345
|
+
use_gelu = v_hparams["hidden_act"] == "gelu"
|
|
346
|
+
fout.add_bool("clip.use_gelu", use_gelu)
|
|
347
|
+
|
|
348
|
+
|
|
349
|
+
if has_llava_projector:
|
|
350
|
+
# By default, we drop the last layer for llava projector
|
|
351
|
+
# models unless we have explicitly set vision feature layers
|
|
352
|
+
if feature_layers is None:
|
|
353
|
+
model.vision_model.encoder.layers.pop(-1)
|
|
354
|
+
else:
|
|
355
|
+
model.vision_model.encoder.layers = model.vision_model.encoder.layers[:max(feature_layers)]
|
|
356
|
+
|
|
357
|
+
projector = torch.load(args.llava_projector)
|
|
358
|
+
for name, data in projector.items():
|
|
359
|
+
name = get_tensor_name(name)
|
|
360
|
+
# pw and dw conv ndim==4
|
|
361
|
+
if data.ndim == 2 or data.ndim == 4:
|
|
362
|
+
data = data.squeeze().numpy().astype(np.float16)
|
|
363
|
+
else:
|
|
364
|
+
data = data.squeeze().numpy().astype(np.float32)
|
|
365
|
+
|
|
366
|
+
fout.add_tensor(name, data)
|
|
367
|
+
|
|
368
|
+
print("Projector tensors added\n")
|
|
369
|
+
|
|
370
|
+
state_dict = model.state_dict()
|
|
371
|
+
for name, data in state_dict.items():
|
|
372
|
+
if should_skip_tensor(name, has_text_encoder, has_vision_encoder, has_llava_projector):
|
|
373
|
+
# we don't need this
|
|
374
|
+
print(f"skipping parameter: {name}")
|
|
375
|
+
continue
|
|
376
|
+
|
|
377
|
+
name = get_tensor_name(name)
|
|
378
|
+
data = data.squeeze().numpy()
|
|
379
|
+
|
|
380
|
+
n_dims = len(data.shape)
|
|
381
|
+
|
|
382
|
+
# ftype == 0 -> float32, ftype == 1 -> float16
|
|
383
|
+
ftype_cur = 0
|
|
384
|
+
if n_dims == 4:
|
|
385
|
+
print(f"tensor {name} is always saved in f16")
|
|
386
|
+
data = data.astype(np.float16)
|
|
387
|
+
ftype_cur = 1
|
|
388
|
+
elif ftype == 1:
|
|
389
|
+
if name[-7:] == ".weight" and n_dims == 2:
|
|
390
|
+
print(" Converting to float16")
|
|
391
|
+
data = data.astype(np.float16)
|
|
392
|
+
ftype_cur = 1
|
|
393
|
+
else:
|
|
394
|
+
print(" Converting to float32")
|
|
395
|
+
data = data.astype(np.float32)
|
|
396
|
+
ftype_cur = 0
|
|
397
|
+
else:
|
|
398
|
+
if data.dtype != np.float32:
|
|
399
|
+
print(" Converting to float32")
|
|
400
|
+
data = data.astype(np.float32)
|
|
401
|
+
ftype_cur = 0
|
|
402
|
+
|
|
403
|
+
print(f"{name} - {ftype_str[ftype_cur]} - shape = {data.shape}")
|
|
404
|
+
fout.add_tensor(name, data)
|
|
405
|
+
|
|
406
|
+
|
|
407
|
+
fout.write_header_to_file()
|
|
408
|
+
fout.write_kv_data_to_file()
|
|
409
|
+
fout.write_tensors_to_file()
|
|
410
|
+
fout.close()
|
|
411
|
+
|
|
412
|
+
print("Done. Output file: " + fname_out)
|