local-llm-rn 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cpp/CMakeLists.txt +285 -0
- package/cpp/common/CMakeLists.txt +149 -0
- package/cpp/common/arg.cpp +3799 -0
- package/cpp/common/arg.h +131 -0
- package/cpp/common/base64.hpp +392 -0
- package/cpp/common/build-info.cpp.in +4 -0
- package/cpp/common/chat-parser-xml-toolcall.cpp +879 -0
- package/cpp/common/chat-parser-xml-toolcall.h +45 -0
- package/cpp/common/chat-parser.cpp +1649 -0
- package/cpp/common/chat-parser.h +133 -0
- package/cpp/common/chat-peg-parser.cpp +124 -0
- package/cpp/common/chat-peg-parser.h +105 -0
- package/cpp/common/chat.cpp +3355 -0
- package/cpp/common/chat.h +252 -0
- package/cpp/common/common.cpp +1824 -0
- package/cpp/common/common.h +930 -0
- package/cpp/common/console.cpp +1137 -0
- package/cpp/common/console.h +41 -0
- package/cpp/common/debug.cpp +167 -0
- package/cpp/common/debug.h +43 -0
- package/cpp/common/download.cpp +792 -0
- package/cpp/common/download.h +84 -0
- package/cpp/common/http.h +84 -0
- package/cpp/common/jinja/README.md +88 -0
- package/cpp/common/jinja/caps.cpp +285 -0
- package/cpp/common/jinja/caps.h +30 -0
- package/cpp/common/jinja/lexer.cpp +341 -0
- package/cpp/common/jinja/lexer.h +157 -0
- package/cpp/common/jinja/parser.cpp +591 -0
- package/cpp/common/jinja/parser.h +21 -0
- package/cpp/common/jinja/runtime.cpp +867 -0
- package/cpp/common/jinja/runtime.h +638 -0
- package/cpp/common/jinja/string.cpp +213 -0
- package/cpp/common/jinja/string.h +61 -0
- package/cpp/common/jinja/utils.h +149 -0
- package/cpp/common/jinja/value.cpp +1393 -0
- package/cpp/common/jinja/value.h +756 -0
- package/cpp/common/json-partial.cpp +324 -0
- package/cpp/common/json-partial.h +39 -0
- package/cpp/common/json-schema-to-grammar.cpp +1153 -0
- package/cpp/common/json-schema-to-grammar.h +43 -0
- package/cpp/common/llguidance.cpp +258 -0
- package/cpp/common/log.cpp +446 -0
- package/cpp/common/log.h +119 -0
- package/cpp/common/ngram-cache.cpp +285 -0
- package/cpp/common/ngram-cache.h +101 -0
- package/cpp/common/ngram-map.cpp +530 -0
- package/cpp/common/ngram-map.h +115 -0
- package/cpp/common/ngram-mod.cpp +60 -0
- package/cpp/common/ngram-mod.h +38 -0
- package/cpp/common/peg-parser.cpp +1712 -0
- package/cpp/common/peg-parser.h +459 -0
- package/cpp/common/preset.cpp +483 -0
- package/cpp/common/preset.h +83 -0
- package/cpp/common/regex-partial.cpp +204 -0
- package/cpp/common/regex-partial.h +56 -0
- package/cpp/common/sampling.cpp +745 -0
- package/cpp/common/sampling.h +119 -0
- package/cpp/common/speculative.cpp +1074 -0
- package/cpp/common/speculative.h +41 -0
- package/cpp/common/unicode.cpp +64 -0
- package/cpp/common/unicode.h +22 -0
- package/cpp/ggml/CMakeLists.txt +494 -0
- package/cpp/ggml/cmake/GitVars.cmake +22 -0
- package/cpp/ggml/cmake/common.cmake +50 -0
- package/cpp/ggml/cmake/ggml-config.cmake.in +191 -0
- package/cpp/ggml/include/ggml-alloc.h +85 -0
- package/cpp/ggml/include/ggml-backend.h +373 -0
- package/cpp/ggml/include/ggml-blas.h +25 -0
- package/cpp/ggml/include/ggml-cann.h +123 -0
- package/cpp/ggml/include/ggml-cpp.h +39 -0
- package/cpp/ggml/include/ggml-cpu.h +151 -0
- package/cpp/ggml/include/ggml-cuda.h +47 -0
- package/cpp/ggml/include/ggml-hexagon.h +19 -0
- package/cpp/ggml/include/ggml-metal.h +61 -0
- package/cpp/ggml/include/ggml-opencl.h +26 -0
- package/cpp/ggml/include/ggml-opt.h +256 -0
- package/cpp/ggml/include/ggml-rpc.h +30 -0
- package/cpp/ggml/include/ggml-sycl.h +49 -0
- package/cpp/ggml/include/ggml-virtgpu.h +14 -0
- package/cpp/ggml/include/ggml-vulkan.h +29 -0
- package/cpp/ggml/include/ggml-webgpu.h +19 -0
- package/cpp/ggml/include/ggml-zdnn.h +17 -0
- package/cpp/ggml/include/ggml-zendnn.h +22 -0
- package/cpp/ggml/include/ggml.h +2753 -0
- package/cpp/ggml/include/gguf.h +204 -0
- package/cpp/ggml/src/CMakeLists.txt +492 -0
- package/cpp/ggml/src/ggml-alloc.c +1244 -0
- package/cpp/ggml/src/ggml-backend-dl.cpp +48 -0
- package/cpp/ggml/src/ggml-backend-dl.h +45 -0
- package/cpp/ggml/src/ggml-backend-impl.h +255 -0
- package/cpp/ggml/src/ggml-backend-reg.cpp +566 -0
- package/cpp/ggml/src/ggml-backend.cpp +2270 -0
- package/cpp/ggml/src/ggml-blas/CMakeLists.txt +101 -0
- package/cpp/ggml/src/ggml-blas/ggml-blas.cpp +518 -0
- package/cpp/ggml/src/ggml-common.h +1878 -0
- package/cpp/ggml/src/ggml-cpu/CMakeLists.txt +691 -0
- package/cpp/ggml/src/ggml-cpu/amx/amx.cpp +247 -0
- package/cpp/ggml/src/ggml-cpu/amx/amx.h +8 -0
- package/cpp/ggml/src/ggml-cpu/amx/common.h +91 -0
- package/cpp/ggml/src/ggml-cpu/amx/mmq.cpp +2512 -0
- package/cpp/ggml/src/ggml-cpu/amx/mmq.h +10 -0
- package/cpp/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +98 -0
- package/cpp/ggml/src/ggml-cpu/arch/arm/quants.c +4052 -0
- package/cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +4935 -0
- package/cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +2159 -0
- package/cpp/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
- package/cpp/ggml/src/ggml-cpu/arch/powerpc/quants.c +2305 -0
- package/cpp/ggml/src/ggml-cpu/arch/riscv/cpu-feats.cpp +38 -0
- package/cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +2726 -0
- package/cpp/ggml/src/ggml-cpu/arch/riscv/repack.cpp +342 -0
- package/cpp/ggml/src/ggml-cpu/arch/s390/cpu-feats.cpp +50 -0
- package/cpp/ggml/src/ggml-cpu/arch/s390/quants.c +1468 -0
- package/cpp/ggml/src/ggml-cpu/arch/wasm/quants.c +1221 -0
- package/cpp/ggml/src/ggml-cpu/arch/x86/cpu-feats.cpp +327 -0
- package/cpp/ggml/src/ggml-cpu/arch/x86/quants.c +3820 -0
- package/cpp/ggml/src/ggml-cpu/arch/x86/repack.cpp +6307 -0
- package/cpp/ggml/src/ggml-cpu/arch-fallback.h +313 -0
- package/cpp/ggml/src/ggml-cpu/binary-ops.cpp +154 -0
- package/cpp/ggml/src/ggml-cpu/binary-ops.h +16 -0
- package/cpp/ggml/src/ggml-cpu/cmake/FindSIMD.cmake +100 -0
- package/cpp/ggml/src/ggml-cpu/common.h +95 -0
- package/cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +529 -0
- package/cpp/ggml/src/ggml-cpu/ggml-cpu.c +3734 -0
- package/cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +701 -0
- package/cpp/ggml/src/ggml-cpu/hbm.cpp +55 -0
- package/cpp/ggml/src/ggml-cpu/hbm.h +8 -0
- package/cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +938 -0
- package/cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +90 -0
- package/cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +798 -0
- package/cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.h +17 -0
- package/cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +4033 -0
- package/cpp/ggml/src/ggml-cpu/llamafile/sgemm.h +25 -0
- package/cpp/ggml/src/ggml-cpu/ops.cpp +10978 -0
- package/cpp/ggml/src/ggml-cpu/ops.h +116 -0
- package/cpp/ggml/src/ggml-cpu/quants.c +1193 -0
- package/cpp/ggml/src/ggml-cpu/quants.h +97 -0
- package/cpp/ggml/src/ggml-cpu/repack.cpp +3316 -0
- package/cpp/ggml/src/ggml-cpu/repack.h +173 -0
- package/cpp/ggml/src/ggml-cpu/simd-gemm.h +136 -0
- package/cpp/ggml/src/ggml-cpu/simd-mappings.h +1279 -0
- package/cpp/ggml/src/ggml-cpu/spacemit/ime.cpp +1025 -0
- package/cpp/ggml/src/ggml-cpu/spacemit/ime.h +13 -0
- package/cpp/ggml/src/ggml-cpu/spacemit/ime1_kernels.cpp +3196 -0
- package/cpp/ggml/src/ggml-cpu/spacemit/ime_kernels.h +26 -0
- package/cpp/ggml/src/ggml-cpu/traits.cpp +36 -0
- package/cpp/ggml/src/ggml-cpu/traits.h +38 -0
- package/cpp/ggml/src/ggml-cpu/unary-ops.cpp +337 -0
- package/cpp/ggml/src/ggml-cpu/unary-ops.h +35 -0
- package/cpp/ggml/src/ggml-cpu/vec.cpp +629 -0
- package/cpp/ggml/src/ggml-cpu/vec.h +1585 -0
- package/cpp/ggml/src/ggml-hexagon/CMakeLists.txt +117 -0
- package/cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp +3232 -0
- package/cpp/ggml/src/ggml-hexagon/htp/CMakeLists.txt +45 -0
- package/cpp/ggml/src/ggml-hexagon/htp/act-ops.c +815 -0
- package/cpp/ggml/src/ggml-hexagon/htp/argsort-ops.c +281 -0
- package/cpp/ggml/src/ggml-hexagon/htp/binary-ops.c +827 -0
- package/cpp/ggml/src/ggml-hexagon/htp/cmake-toolchain.cmake +157 -0
- package/cpp/ggml/src/ggml-hexagon/htp/cpy-ops.c +251 -0
- package/cpp/ggml/src/ggml-hexagon/htp/flash-attn-ops.c +666 -0
- package/cpp/ggml/src/ggml-hexagon/htp/get-rows-ops.c +111 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hex-dma.c +63 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hex-dma.h +182 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hex-dump.h +77 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hex-fastdiv.h +37 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hex-utils.h +51 -0
- package/cpp/ggml/src/ggml-hexagon/htp/htp-ctx.h +35 -0
- package/cpp/ggml/src/ggml-hexagon/htp/htp-msg.h +154 -0
- package/cpp/ggml/src/ggml-hexagon/htp/htp-ops.h +65 -0
- package/cpp/ggml/src/ggml-hexagon/htp/htp_iface.idl +16 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-arith.h +470 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-base.h +173 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-copy.h +245 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-div.h +116 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-dump.h +129 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-exp.h +215 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-floor.h +100 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-inverse.h +176 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-reduce.h +266 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-scale.h +133 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-sigmoid.h +141 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-sqrt.h +126 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-types.h +36 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-utils.h +18 -0
- package/cpp/ggml/src/ggml-hexagon/htp/main.c +1150 -0
- package/cpp/ggml/src/ggml-hexagon/htp/matmul-ops.c +2595 -0
- package/cpp/ggml/src/ggml-hexagon/htp/rope-ops.c +498 -0
- package/cpp/ggml/src/ggml-hexagon/htp/set-rows-ops.c +167 -0
- package/cpp/ggml/src/ggml-hexagon/htp/softmax-ops.c +421 -0
- package/cpp/ggml/src/ggml-hexagon/htp/sum-rows-ops.c +130 -0
- package/cpp/ggml/src/ggml-hexagon/htp/unary-ops.c +384 -0
- package/cpp/ggml/src/ggml-hexagon/htp/worker-pool.c +293 -0
- package/cpp/ggml/src/ggml-hexagon/htp/worker-pool.h +57 -0
- package/cpp/ggml/src/ggml-hexagon/htp-drv.cpp +418 -0
- package/cpp/ggml/src/ggml-hexagon/htp-drv.h +121 -0
- package/cpp/ggml/src/ggml-hexagon/libdl.h +79 -0
- package/cpp/ggml/src/ggml-hexagon/libggml-htp.inf +38 -0
- package/cpp/ggml/src/ggml-hexagon/op-desc.h +153 -0
- package/cpp/ggml/src/ggml-impl.h +724 -0
- package/cpp/ggml/src/ggml-metal/CMakeLists.txt +124 -0
- package/cpp/ggml/src/ggml-metal/ggml-metal-common.cpp +457 -0
- package/cpp/ggml/src/ggml-metal/ggml-metal-common.h +52 -0
- package/cpp/ggml/src/ggml-metal/ggml-metal-context.h +41 -0
- package/cpp/ggml/src/ggml-metal/ggml-metal-context.m +702 -0
- package/cpp/ggml/src/ggml-metal/ggml-metal-device.cpp +1890 -0
- package/cpp/ggml/src/ggml-metal/ggml-metal-device.h +290 -0
- package/cpp/ggml/src/ggml-metal/ggml-metal-device.m +1749 -0
- package/cpp/ggml/src/ggml-metal/ggml-metal-impl.h +1054 -0
- package/cpp/ggml/src/ggml-metal/ggml-metal-ops.cpp +4370 -0
- package/cpp/ggml/src/ggml-metal/ggml-metal-ops.h +94 -0
- package/cpp/ggml/src/ggml-metal/ggml-metal.cpp +937 -0
- package/cpp/ggml/src/ggml-metal/ggml-metal.metal +9819 -0
- package/cpp/ggml/src/ggml-musa/CMakeLists.txt +125 -0
- package/cpp/ggml/src/ggml-musa/mudnn.cu +112 -0
- package/cpp/ggml/src/ggml-musa/mudnn.cuh +12 -0
- package/cpp/ggml/src/ggml-opencl/CMakeLists.txt +150 -0
- package/cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +11553 -0
- package/cpp/ggml/src/ggml-opencl/kernels/add.cl +190 -0
- package/cpp/ggml/src/ggml-opencl/kernels/add_id.cl +42 -0
- package/cpp/ggml/src/ggml-opencl/kernels/argsort.cl +86 -0
- package/cpp/ggml/src/ggml-opencl/kernels/clamp.cl +20 -0
- package/cpp/ggml/src/ggml-opencl/kernels/concat.cl +51 -0
- package/cpp/ggml/src/ggml-opencl/kernels/conv2d.cl +185 -0
- package/cpp/ggml/src/ggml-opencl/kernels/conv2d_f16_f32.cl +176 -0
- package/cpp/ggml/src/ggml-opencl/kernels/cpy.cl +184 -0
- package/cpp/ggml/src/ggml-opencl/kernels/cvt.cl +417 -0
- package/cpp/ggml/src/ggml-opencl/kernels/diag_mask_inf.cl +58 -0
- package/cpp/ggml/src/ggml-opencl/kernels/div.cl +138 -0
- package/cpp/ggml/src/ggml-opencl/kernels/embed_kernel.py +26 -0
- package/cpp/ggml/src/ggml-opencl/kernels/expm1.cl +113 -0
- package/cpp/ggml/src/ggml-opencl/kernels/fill.cl +17 -0
- package/cpp/ggml/src/ggml-opencl/kernels/flash_attn_f16.cl +370 -0
- package/cpp/ggml/src/ggml-opencl/kernels/flash_attn_f32.cl +371 -0
- package/cpp/ggml/src/ggml-opencl/kernels/flash_attn_f32_f16.cl +373 -0
- package/cpp/ggml/src/ggml-opencl/kernels/gelu.cl +89 -0
- package/cpp/ggml/src/ggml-opencl/kernels/gemm_moe_mxfp4_f32.cl +162 -0
- package/cpp/ggml/src/ggml-opencl/kernels/gemv_moe_mxfp4_f32.cl +156 -0
- package/cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle.cl +268 -0
- package/cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general.cl +274 -0
- package/cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general_q8_0_f32.cl +195 -0
- package/cpp/ggml/src/ggml-opencl/kernels/get_rows.cl +187 -0
- package/cpp/ggml/src/ggml-opencl/kernels/glu.cl +378 -0
- package/cpp/ggml/src/ggml-opencl/kernels/group_norm.cl +121 -0
- package/cpp/ggml/src/ggml-opencl/kernels/im2col_f16.cl +57 -0
- package/cpp/ggml/src/ggml-opencl/kernels/im2col_f32.cl +57 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mean.cl +140 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul.cl +152 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mat_Ab_Bi_8x4.cl +139 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mat_f16_f32.cl +130 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_kq_kqv.cl +273 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_l4_lm.cl +146 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_f32_f32_l4_lm.cl +147 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q4_0_f32_l4_lm.cl +163 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q4_1_f32_l4_lm.cl +165 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q6_k_f32_l4_lm.cl +158 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_8x4.cl +129 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_l4_lm.cl +154 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f16.cl +118 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32.cl +118 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_1row.cl +94 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_l4.cl +84 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f32_f32.cl +118 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32.cl +189 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32_flat.cl +176 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q4_0_f32_8x_flat.cl +283 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32.cl +140 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32_flat.cl +222 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32.cl +144 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32_flat.cl +167 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32.cl +192 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_16x_flat.cl +307 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_8x_flat.cl +265 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_8x_flat.cl +272 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_v.cl +254 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32.cl +219 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32_flat.cl +229 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_k_f32.cl +180 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q6_k_f32.cl +194 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q6_k_f32_flat.cl +194 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32.cl +125 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32_flat.cl +202 -0
- package/cpp/ggml/src/ggml-opencl/kernels/norm.cl +161 -0
- package/cpp/ggml/src/ggml-opencl/kernels/pad.cl +39 -0
- package/cpp/ggml/src/ggml-opencl/kernels/relu.cl +16 -0
- package/cpp/ggml/src/ggml-opencl/kernels/repeat.cl +38 -0
- package/cpp/ggml/src/ggml-opencl/kernels/rms_norm.cl +190 -0
- package/cpp/ggml/src/ggml-opencl/kernels/rope.cl +747 -0
- package/cpp/ggml/src/ggml-opencl/kernels/scale.cl +27 -0
- package/cpp/ggml/src/ggml-opencl/kernels/set_rows.cl +208 -0
- package/cpp/ggml/src/ggml-opencl/kernels/sigmoid.cl +29 -0
- package/cpp/ggml/src/ggml-opencl/kernels/silu.cl +30 -0
- package/cpp/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +108 -0
- package/cpp/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +108 -0
- package/cpp/ggml/src/ggml-opencl/kernels/softmax_f16.cl +107 -0
- package/cpp/ggml/src/ggml-opencl/kernels/softmax_f32.cl +107 -0
- package/cpp/ggml/src/ggml-opencl/kernels/softplus.cl +116 -0
- package/cpp/ggml/src/ggml-opencl/kernels/solve_tri.cl +51 -0
- package/cpp/ggml/src/ggml-opencl/kernels/sqr.cl +53 -0
- package/cpp/ggml/src/ggml-opencl/kernels/sqrt.cl +53 -0
- package/cpp/ggml/src/ggml-opencl/kernels/ssm_conv.cl +77 -0
- package/cpp/ggml/src/ggml-opencl/kernels/sub.cl +138 -0
- package/cpp/ggml/src/ggml-opencl/kernels/sum_rows.cl +140 -0
- package/cpp/ggml/src/ggml-opencl/kernels/tanh.cl +109 -0
- package/cpp/ggml/src/ggml-opencl/kernels/transpose.cl +117 -0
- package/cpp/ggml/src/ggml-opencl/kernels/tri.cl +32 -0
- package/cpp/ggml/src/ggml-opencl/kernels/tsembd.cl +48 -0
- package/cpp/ggml/src/ggml-opencl/kernels/upscale.cl +120 -0
- package/cpp/ggml/src/ggml-opt.cpp +1093 -0
- package/cpp/ggml/src/ggml-quants.c +5325 -0
- package/cpp/ggml/src/ggml-quants.h +106 -0
- package/cpp/ggml/src/ggml-rpc/CMakeLists.txt +9 -0
- package/cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +2118 -0
- package/cpp/ggml/src/ggml-threading.cpp +12 -0
- package/cpp/ggml/src/ggml-threading.h +14 -0
- package/cpp/ggml/src/ggml-virtgpu/CMakeLists.txt +70 -0
- package/cpp/ggml/src/ggml-virtgpu/apir_cs_ggml-rpc-front.cpp +87 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/CMakeLists.txt +21 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/apir_cs_ggml-rpc-back.cpp +115 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/backend-convert.h +13 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-backend.cpp +102 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer-type.cpp +105 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer.cpp +179 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-device.cpp +148 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched.cpp +51 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched.gen.h +73 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched.h +27 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/backend-virgl-apir.h +32 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/backend.cpp +144 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/shared/api_remoting.h +95 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_backend.gen.h +94 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_backend.h +50 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_cs.h +378 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h +232 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_cs_rpc.h +58 -0
- package/cpp/ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp +81 -0
- package/cpp/ggml/src/ggml-virtgpu/ggml-backend-buffer.cpp +119 -0
- package/cpp/ggml/src/ggml-virtgpu/ggml-backend-device.cpp +158 -0
- package/cpp/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp +213 -0
- package/cpp/ggml/src/ggml-virtgpu/ggml-backend.cpp +69 -0
- package/cpp/ggml/src/ggml-virtgpu/ggml-remoting.h +71 -0
- package/cpp/ggml/src/ggml-virtgpu/ggmlremoting_functions.yaml +166 -0
- package/cpp/ggml/src/ggml-virtgpu/include/apir_hw.h +9 -0
- package/cpp/ggml/src/ggml-virtgpu/regenerate_remoting.py +333 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-apir.h +15 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-backend.cpp +58 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp +110 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-buffer.cpp +173 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-device.cpp +192 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-impl.h +36 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward.gen.h +53 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-shm.cpp +98 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-shm.h +23 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-utils.cpp +179 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-utils.h +86 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu.cpp +544 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu.h +117 -0
- package/cpp/ggml/src/ggml-webgpu/CMakeLists.txt +80 -0
- package/cpp/ggml/src/ggml-webgpu/ggml-webgpu-shader-lib.hpp +1231 -0
- package/cpp/ggml/src/ggml-webgpu/ggml-webgpu.cpp +3150 -0
- package/cpp/ggml/src/ggml-webgpu/pre_wgsl.hpp +778 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/argmax.wgsl +72 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/argsort.wgsl +106 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/argsort_merge.wgsl +134 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/binary.wgsl +107 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/common_decls.tmpl +923 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/cpy.tmpl.wgsl +107 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/cumsum.wgsl +66 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +182 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn.wgsl +636 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/get_rows.wgsl +668 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/glu.tmpl.wgsl +323 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/memset.wgsl +40 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.wgsl +713 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_decls.tmpl +103 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_reg_tile.wgsl +138 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_subgroup_matrix.wgsl +188 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec.wgsl +194 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/pad.wgsl +86 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm.wgsl +123 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/rope.tmpl.wgsl +295 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/scale.wgsl +63 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.wgsl +109 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/soft_max.tmpl.wgsl +345 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/sum_rows.wgsl +55 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/unary.wgsl +193 -0
- package/cpp/ggml/src/ggml-zdnn/CMakeLists.txt +36 -0
- package/cpp/ggml/src/ggml-zdnn/common.hpp +59 -0
- package/cpp/ggml/src/ggml-zdnn/ggml-zdnn.cpp +633 -0
- package/cpp/ggml/src/ggml-zdnn/mmf.cpp +80 -0
- package/cpp/ggml/src/ggml-zdnn/mmf.hpp +12 -0
- package/cpp/ggml/src/ggml-zdnn/utils.cpp +79 -0
- package/cpp/ggml/src/ggml-zdnn/utils.hpp +19 -0
- package/cpp/ggml/src/ggml-zendnn/CMakeLists.txt +92 -0
- package/cpp/ggml/src/ggml-zendnn/ggml-zendnn.cpp +469 -0
- package/cpp/ggml/src/ggml.c +7669 -0
- package/cpp/ggml/src/ggml.cpp +26 -0
- package/cpp/ggml/src/gguf.cpp +1699 -0
- package/cpp/include/llama-cpp.h +32 -0
- package/cpp/include/llama.h +1568 -0
- package/cpp/mtmd/CMakeLists.txt +98 -0
- package/cpp/mtmd/README.md +63 -0
- package/cpp/mtmd/clip-graph.h +117 -0
- package/cpp/mtmd/clip-impl.h +586 -0
- package/cpp/mtmd/clip-model.h +390 -0
- package/cpp/mtmd/clip.cpp +4154 -0
- package/cpp/mtmd/clip.h +121 -0
- package/cpp/mtmd/deprecation-warning.cpp +22 -0
- package/cpp/mtmd/legacy-models/convert_image_encoder_to_gguf.py +412 -0
- package/cpp/mtmd/legacy-models/glmedge-convert-image-encoder-to-gguf.py +280 -0
- package/cpp/mtmd/legacy-models/glmedge-surgery.py +33 -0
- package/cpp/mtmd/legacy-models/llava_surgery.py +38 -0
- package/cpp/mtmd/legacy-models/llava_surgery_v2.py +180 -0
- package/cpp/mtmd/legacy-models/minicpmv-convert-image-encoder-to-gguf.py +892 -0
- package/cpp/mtmd/legacy-models/minicpmv-surgery.py +47 -0
- package/cpp/mtmd/models/cogvlm.cpp +98 -0
- package/cpp/mtmd/models/conformer.cpp +216 -0
- package/cpp/mtmd/models/glm4v.cpp +122 -0
- package/cpp/mtmd/models/internvl.cpp +69 -0
- package/cpp/mtmd/models/kimik25.cpp +101 -0
- package/cpp/mtmd/models/kimivl.cpp +63 -0
- package/cpp/mtmd/models/llama4.cpp +96 -0
- package/cpp/mtmd/models/llava.cpp +374 -0
- package/cpp/mtmd/models/minicpmv.cpp +114 -0
- package/cpp/mtmd/models/mobilenetv5.cpp +451 -0
- package/cpp/mtmd/models/models.h +128 -0
- package/cpp/mtmd/models/nemotron-v2-vl.cpp +35 -0
- package/cpp/mtmd/models/paddleocr.cpp +52 -0
- package/cpp/mtmd/models/pixtral.cpp +86 -0
- package/cpp/mtmd/models/qwen2vl.cpp +183 -0
- package/cpp/mtmd/models/qwen3vl.cpp +193 -0
- package/cpp/mtmd/models/siglip.cpp +86 -0
- package/cpp/mtmd/models/whisper-enc.cpp +115 -0
- package/cpp/mtmd/models/youtuvl.cpp +179 -0
- package/cpp/mtmd/mtmd-audio.cpp +730 -0
- package/cpp/mtmd/mtmd-audio.h +113 -0
- package/cpp/mtmd/mtmd-cli.cpp +437 -0
- package/cpp/mtmd/mtmd-helper.cpp +521 -0
- package/cpp/mtmd/mtmd-helper.h +96 -0
- package/cpp/mtmd/mtmd.cpp +1156 -0
- package/cpp/mtmd/mtmd.h +319 -0
- package/cpp/mtmd/requirements.txt +5 -0
- package/cpp/mtmd/test-1.jpeg +0 -0
- package/cpp/mtmd/test-2.mp3 +0 -0
- package/cpp/mtmd/tests.sh +192 -0
- package/cpp/src/CMakeLists.txt +169 -0
- package/cpp/src/llama-adapter.cpp +488 -0
- package/cpp/src/llama-adapter.h +89 -0
- package/cpp/src/llama-arch.cpp +2855 -0
- package/cpp/src/llama-arch.h +619 -0
- package/cpp/src/llama-batch.cpp +917 -0
- package/cpp/src/llama-batch.h +173 -0
- package/cpp/src/llama-chat.cpp +896 -0
- package/cpp/src/llama-chat.h +71 -0
- package/cpp/src/llama-context.cpp +3512 -0
- package/cpp/src/llama-context.h +359 -0
- package/cpp/src/llama-cparams.cpp +5 -0
- package/cpp/src/llama-cparams.h +44 -0
- package/cpp/src/llama-grammar.cpp +1464 -0
- package/cpp/src/llama-grammar.h +194 -0
- package/cpp/src/llama-graph.cpp +2685 -0
- package/cpp/src/llama-graph.h +1026 -0
- package/cpp/src/llama-hparams.cpp +234 -0
- package/cpp/src/llama-hparams.h +339 -0
- package/cpp/src/llama-impl.cpp +171 -0
- package/cpp/src/llama-impl.h +73 -0
- package/cpp/src/llama-io.cpp +15 -0
- package/cpp/src/llama-io.h +35 -0
- package/cpp/src/llama-kv-cache-iswa.cpp +330 -0
- package/cpp/src/llama-kv-cache-iswa.h +137 -0
- package/cpp/src/llama-kv-cache.cpp +2271 -0
- package/cpp/src/llama-kv-cache.h +388 -0
- package/cpp/src/llama-kv-cells.h +533 -0
- package/cpp/src/llama-memory-hybrid-iswa.cpp +275 -0
- package/cpp/src/llama-memory-hybrid-iswa.h +140 -0
- package/cpp/src/llama-memory-hybrid.cpp +268 -0
- package/cpp/src/llama-memory-hybrid.h +139 -0
- package/cpp/src/llama-memory-recurrent.cpp +1165 -0
- package/cpp/src/llama-memory-recurrent.h +182 -0
- package/cpp/src/llama-memory.cpp +59 -0
- package/cpp/src/llama-memory.h +122 -0
- package/cpp/src/llama-mmap.cpp +785 -0
- package/cpp/src/llama-mmap.h +92 -0
- package/cpp/src/llama-model-loader.cpp +1414 -0
- package/cpp/src/llama-model-loader.h +203 -0
- package/cpp/src/llama-model-saver.cpp +286 -0
- package/cpp/src/llama-model-saver.h +37 -0
- package/cpp/src/llama-model.cpp +9253 -0
- package/cpp/src/llama-model.h +576 -0
- package/cpp/src/llama-quant.cpp +1119 -0
- package/cpp/src/llama-quant.h +1 -0
- package/cpp/src/llama-sampler.cpp +3885 -0
- package/cpp/src/llama-sampler.h +42 -0
- package/cpp/src/llama-vocab.cpp +3970 -0
- package/cpp/src/llama-vocab.h +187 -0
- package/cpp/src/llama.cpp +1313 -0
- package/cpp/src/models/afmoe.cpp +191 -0
- package/cpp/src/models/apertus.cpp +125 -0
- package/cpp/src/models/arcee.cpp +135 -0
- package/cpp/src/models/arctic.cpp +138 -0
- package/cpp/src/models/arwkv7.cpp +86 -0
- package/cpp/src/models/baichuan.cpp +122 -0
- package/cpp/src/models/bailingmoe.cpp +144 -0
- package/cpp/src/models/bailingmoe2.cpp +135 -0
- package/cpp/src/models/bert.cpp +178 -0
- package/cpp/src/models/bitnet.cpp +160 -0
- package/cpp/src/models/bloom.cpp +101 -0
- package/cpp/src/models/chameleon.cpp +178 -0
- package/cpp/src/models/chatglm.cpp +132 -0
- package/cpp/src/models/codeshell.cpp +111 -0
- package/cpp/src/models/cogvlm.cpp +102 -0
- package/cpp/src/models/cohere2-iswa.cpp +134 -0
- package/cpp/src/models/command-r.cpp +122 -0
- package/cpp/src/models/dbrx.cpp +123 -0
- package/cpp/src/models/deci.cpp +135 -0
- package/cpp/src/models/deepseek.cpp +144 -0
- package/cpp/src/models/deepseek2.cpp +262 -0
- package/cpp/src/models/delta-net-base.cpp +376 -0
- package/cpp/src/models/dots1.cpp +134 -0
- package/cpp/src/models/dream.cpp +105 -0
- package/cpp/src/models/ernie4-5-moe.cpp +150 -0
- package/cpp/src/models/ernie4-5.cpp +110 -0
- package/cpp/src/models/eurobert.cpp +97 -0
- package/cpp/src/models/exaone-moe.cpp +146 -0
- package/cpp/src/models/exaone.cpp +114 -0
- package/cpp/src/models/exaone4.cpp +123 -0
- package/cpp/src/models/falcon-h1.cpp +111 -0
- package/cpp/src/models/falcon.cpp +120 -0
- package/cpp/src/models/gemma-embedding.cpp +116 -0
- package/cpp/src/models/gemma.cpp +112 -0
- package/cpp/src/models/gemma2-iswa.cpp +128 -0
- package/cpp/src/models/gemma3.cpp +155 -0
- package/cpp/src/models/gemma3n-iswa.cpp +384 -0
- package/cpp/src/models/glm4-moe.cpp +170 -0
- package/cpp/src/models/glm4.cpp +157 -0
- package/cpp/src/models/gpt2.cpp +105 -0
- package/cpp/src/models/gptneox.cpp +144 -0
- package/cpp/src/models/granite-hybrid.cpp +196 -0
- package/cpp/src/models/granite.cpp +211 -0
- package/cpp/src/models/grok.cpp +159 -0
- package/cpp/src/models/grovemoe.cpp +141 -0
- package/cpp/src/models/hunyuan-dense.cpp +132 -0
- package/cpp/src/models/hunyuan-moe.cpp +154 -0
- package/cpp/src/models/internlm2.cpp +120 -0
- package/cpp/src/models/jais.cpp +86 -0
- package/cpp/src/models/jais2.cpp +123 -0
- package/cpp/src/models/jamba.cpp +106 -0
- package/cpp/src/models/kimi-linear.cpp +392 -0
- package/cpp/src/models/lfm2.cpp +190 -0
- package/cpp/src/models/llada-moe.cpp +122 -0
- package/cpp/src/models/llada.cpp +99 -0
- package/cpp/src/models/llama-iswa.cpp +178 -0
- package/cpp/src/models/llama.cpp +168 -0
- package/cpp/src/models/maincoder.cpp +117 -0
- package/cpp/src/models/mamba-base.cpp +285 -0
- package/cpp/src/models/mamba.cpp +54 -0
- package/cpp/src/models/mimo2-iswa.cpp +123 -0
- package/cpp/src/models/minicpm3.cpp +200 -0
- package/cpp/src/models/minimax-m2.cpp +124 -0
- package/cpp/src/models/mistral3.cpp +160 -0
- package/cpp/src/models/models.h +684 -0
- package/cpp/src/models/modern-bert.cpp +109 -0
- package/cpp/src/models/mpt.cpp +126 -0
- package/cpp/src/models/nemotron-h.cpp +148 -0
- package/cpp/src/models/nemotron.cpp +122 -0
- package/cpp/src/models/neo-bert.cpp +104 -0
- package/cpp/src/models/olmo.cpp +121 -0
- package/cpp/src/models/olmo2.cpp +150 -0
- package/cpp/src/models/olmoe.cpp +124 -0
- package/cpp/src/models/openai-moe-iswa.cpp +127 -0
- package/cpp/src/models/openelm.cpp +124 -0
- package/cpp/src/models/orion.cpp +123 -0
- package/cpp/src/models/paddleocr.cpp +122 -0
- package/cpp/src/models/pangu-embedded.cpp +121 -0
- package/cpp/src/models/phi2.cpp +121 -0
- package/cpp/src/models/phi3.cpp +152 -0
- package/cpp/src/models/plamo.cpp +110 -0
- package/cpp/src/models/plamo2.cpp +318 -0
- package/cpp/src/models/plamo3.cpp +128 -0
- package/cpp/src/models/plm.cpp +169 -0
- package/cpp/src/models/qwen.cpp +108 -0
- package/cpp/src/models/qwen2.cpp +126 -0
- package/cpp/src/models/qwen2moe.cpp +151 -0
- package/cpp/src/models/qwen2vl.cpp +117 -0
- package/cpp/src/models/qwen3.cpp +117 -0
- package/cpp/src/models/qwen35.cpp +386 -0
- package/cpp/src/models/qwen35moe.cpp +420 -0
- package/cpp/src/models/qwen3moe.cpp +124 -0
- package/cpp/src/models/qwen3next.cpp +525 -0
- package/cpp/src/models/qwen3vl-moe.cpp +140 -0
- package/cpp/src/models/qwen3vl.cpp +132 -0
- package/cpp/src/models/refact.cpp +94 -0
- package/cpp/src/models/rnd1.cpp +126 -0
- package/cpp/src/models/rwkv6-base.cpp +164 -0
- package/cpp/src/models/rwkv6.cpp +94 -0
- package/cpp/src/models/rwkv6qwen2.cpp +86 -0
- package/cpp/src/models/rwkv7-base.cpp +137 -0
- package/cpp/src/models/rwkv7.cpp +90 -0
- package/cpp/src/models/seed-oss.cpp +124 -0
- package/cpp/src/models/smallthinker.cpp +126 -0
- package/cpp/src/models/smollm3.cpp +128 -0
- package/cpp/src/models/stablelm.cpp +146 -0
- package/cpp/src/models/starcoder.cpp +100 -0
- package/cpp/src/models/starcoder2.cpp +121 -0
- package/cpp/src/models/step35-iswa.cpp +168 -0
- package/cpp/src/models/t5-dec.cpp +166 -0
- package/cpp/src/models/t5-enc.cpp +96 -0
- package/cpp/src/models/wavtokenizer-dec.cpp +149 -0
- package/cpp/src/models/xverse.cpp +108 -0
- package/cpp/src/unicode-data.cpp +7034 -0
- package/cpp/src/unicode-data.h +20 -0
- package/cpp/src/unicode.cpp +1103 -0
- package/cpp/src/unicode.h +111 -0
- package/cpp/vendor/nlohmann/json.hpp +25526 -0
- package/cpp/vendor/nlohmann/json_fwd.hpp +187 -0
- package/cpp/vendor/stb/stb_image.h +7988 -0
- package/ios/LocalLLM-Bridging-Header.h +2 -0
- package/ios/LocalLLM.h +5 -0
- package/ios/LocalLLM.mm +1267 -0
- package/local-llm-rn.podspec +60 -0
- package/package.json +35 -0
- package/src/NativeLocalLLM.ts +73 -0
- package/src/device.ts +50 -0
- package/src/download-adapter.ts +17 -0
- package/src/index.ts +21 -0
- package/src/native-bridge.ts +142 -0
- package/src/rn-downloader.ts +37 -0
|
@@ -0,0 +1,280 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import os
|
|
3
|
+
import json
|
|
4
|
+
import re
|
|
5
|
+
|
|
6
|
+
import torch
|
|
7
|
+
import numpy as np
|
|
8
|
+
from gguf import *
|
|
9
|
+
|
|
10
|
+
TEXT = "clip.text"
|
|
11
|
+
VISION = "clip.vision"
|
|
12
|
+
from transformers import SiglipVisionModel, SiglipVisionConfig
|
|
13
|
+
|
|
14
|
+
def k(raw_key: str, arch: str) -> str:
|
|
15
|
+
return raw_key.format(arch=arch)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def should_skip_tensor(name: str, has_text: bool, has_vision: bool, has_llava: bool) -> bool:
|
|
19
|
+
if name in (
|
|
20
|
+
"logit_scale",
|
|
21
|
+
"text_model.embeddings.position_ids",
|
|
22
|
+
"vision_model.embeddings.position_ids",
|
|
23
|
+
):
|
|
24
|
+
return True
|
|
25
|
+
|
|
26
|
+
if name in (
|
|
27
|
+
"vision_model.head.probe",
|
|
28
|
+
"vision_model.head.attention.in_proj_weight",
|
|
29
|
+
"vision_model.head.attention.in_proj_bias",
|
|
30
|
+
"vision_model.head.attention.out_proj.weight",
|
|
31
|
+
"vision_model.head.attention.out_proj.bias",
|
|
32
|
+
"vision_model.head.layernorm.weight",
|
|
33
|
+
"vision_model.head.layernorm.bias",
|
|
34
|
+
"vision_model.head.mlp.fc1.weight",
|
|
35
|
+
"vision_model.head.mlp.fc1.bias",
|
|
36
|
+
"vision_model.head.mlp.fc2.weight",
|
|
37
|
+
"vision_model.head.mlp.fc2.bias"
|
|
38
|
+
):
|
|
39
|
+
return True
|
|
40
|
+
|
|
41
|
+
if name.startswith("v") and not has_vision:
|
|
42
|
+
return True
|
|
43
|
+
|
|
44
|
+
if name.startswith("t") and not has_text:
|
|
45
|
+
return True
|
|
46
|
+
|
|
47
|
+
return False
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def get_tensor_name(name: str) -> str:
|
|
51
|
+
if "projection" in name:
|
|
52
|
+
return name
|
|
53
|
+
if "mm_projector" in name:
|
|
54
|
+
name = name.replace("model.mm_projector", "mm")
|
|
55
|
+
name = re.sub(r'mm\.mlp\.mlp', 'mm.model.mlp', name, count=1)
|
|
56
|
+
name = re.sub(r'mm\.peg\.peg', 'mm.model.peg', name, count=1)
|
|
57
|
+
return name
|
|
58
|
+
|
|
59
|
+
return name.replace("text_model", "t").replace("vision_model", "v").replace("encoder.layers", "blk").replace("embeddings.", "").replace("_proj", "").replace("self_attn.", "attn_").replace("layer_norm", "ln").replace("layernorm", "ln").replace("mlp.fc1", "ffn_down").replace("mlp.fc2", "ffn_up").replace("embedding", "embd").replace("final", "post").replace("layrnorm", "ln")
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def bytes_to_unicode():
|
|
63
|
+
"""
|
|
64
|
+
Returns list of utf-8 byte and a corresponding list of unicode strings.
|
|
65
|
+
The reversible bpe codes work on unicode strings.
|
|
66
|
+
This means you need a large # of unicode characters in your vocab if you want to avoid UNKs.
|
|
67
|
+
When you're at something like a 10B token dataset you end up needing around 5K for decent coverage.
|
|
68
|
+
This is a significant percentage of your normal, say, 32K bpe vocab.
|
|
69
|
+
To avoid that, we want lookup tables between utf-8 bytes and unicode strings.
|
|
70
|
+
And avoids mapping to whitespace/control characters the bpe code barfs on.
|
|
71
|
+
"""
|
|
72
|
+
bs = (
|
|
73
|
+
list(range(ord("!"), ord("~") + 1))
|
|
74
|
+
+ list(range(ord("¡"), ord("¬") + 1))
|
|
75
|
+
+ list(range(ord("®"), ord("ÿ") + 1))
|
|
76
|
+
)
|
|
77
|
+
cs = bs[:]
|
|
78
|
+
n = 0
|
|
79
|
+
for b in range(2**8):
|
|
80
|
+
if b not in bs:
|
|
81
|
+
bs.append(b)
|
|
82
|
+
cs.append(2**8 + n)
|
|
83
|
+
n += 1
|
|
84
|
+
cs = [chr(n) for n in cs]
|
|
85
|
+
return dict(zip(bs, cs))
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
ap = argparse.ArgumentParser()
|
|
89
|
+
ap.add_argument("-m", "--model-dir", help="Path to model directory cloned from HF Hub", required=True)
|
|
90
|
+
ap.add_argument("--use-f32", action="store_true", default=False, help="Use f32 instead of f16")
|
|
91
|
+
ap.add_argument("--text-only", action="store_true", required=False,
|
|
92
|
+
help="Save a text-only model. It can't be used to encode images")
|
|
93
|
+
ap.add_argument("--vision-only", action="store_true", required=False,
|
|
94
|
+
help="Save a vision-only model. It can't be used to encode texts")
|
|
95
|
+
ap.add_argument("--clip-model-is-vision", action="store_true", required=False,
|
|
96
|
+
help="The clip model is a pure vision model (ShareGPT4V vision extract for example)")
|
|
97
|
+
ap.add_argument("--clip-model-is-openclip", action="store_true", required=False,
|
|
98
|
+
help="The clip model is from openclip (for ViT-SO400M type))")
|
|
99
|
+
ap.add_argument("--llava-projector", help="Path to llava.projector file. If specified, save an image encoder for LLaVA models.")
|
|
100
|
+
ap.add_argument("--projector-type", help="Type of projector. Possible values: mlp, ldp, ldpv2", choices=["mlp", "ldp", "ldpv2","adapter"], default="adapter")
|
|
101
|
+
ap.add_argument("-o", "--output-dir", help="Directory to save GGUF files. Default is the original model directory", default=None)
|
|
102
|
+
# Example --image_mean 0.48145466 0.4578275 0.40821073 --image_std 0.26862954 0.26130258 0.27577711
|
|
103
|
+
# Example --image_mean 0.5 0.5 0.5 --image_std 0.5 0.5 0.5
|
|
104
|
+
default_image_mean = [0.5, 0.5, 0.5]
|
|
105
|
+
default_image_std = [0.5, 0.5, 0.5]
|
|
106
|
+
ap.add_argument('--image-mean', type=float, nargs='+', help='Mean of the images for normalization (overrides processor) ', default=None)
|
|
107
|
+
ap.add_argument('--image-std', type=float, nargs='+', help='Standard deviation of the images for normalization (overrides processor)', default=None)
|
|
108
|
+
|
|
109
|
+
# with proper
|
|
110
|
+
args = ap.parse_args()
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
if args.text_only and args.vision_only:
|
|
114
|
+
print("--text-only and --image-only arguments cannot be specified at the same time.")
|
|
115
|
+
exit(1)
|
|
116
|
+
|
|
117
|
+
if args.use_f32:
|
|
118
|
+
print("WARNING: Weights for the convolution op is always saved in f16, as the convolution op in GGML does not support 32-bit kernel weights yet.")
|
|
119
|
+
|
|
120
|
+
# output in the same directory as the model if output_dir is None
|
|
121
|
+
dir_model = args.model_dir
|
|
122
|
+
|
|
123
|
+
if args.clip_model_is_vision or not os.path.exists(dir_model + "/vocab.json") or args.clip_model_is_openclip:
|
|
124
|
+
vocab = None
|
|
125
|
+
tokens = None
|
|
126
|
+
else:
|
|
127
|
+
with open(dir_model + "/vocab.json", "r", encoding="utf-8") as f:
|
|
128
|
+
vocab = json.load(f)
|
|
129
|
+
tokens = [key for key in vocab]
|
|
130
|
+
|
|
131
|
+
with open(dir_model + "/config.json", "r", encoding="utf-8") as f:
|
|
132
|
+
config = json.load(f)
|
|
133
|
+
if args.clip_model_is_vision:
|
|
134
|
+
v_hparams = config
|
|
135
|
+
t_hparams = None
|
|
136
|
+
else:
|
|
137
|
+
v_hparams = config["vision_config"]
|
|
138
|
+
t_hparams = None
|
|
139
|
+
|
|
140
|
+
# possible data types
|
|
141
|
+
# ftype == 0 -> float32
|
|
142
|
+
# ftype == 1 -> float16
|
|
143
|
+
#
|
|
144
|
+
# map from ftype to string
|
|
145
|
+
ftype_str = ["f32", "f16"]
|
|
146
|
+
|
|
147
|
+
ftype = 1
|
|
148
|
+
if args.use_f32:
|
|
149
|
+
ftype = 0
|
|
150
|
+
|
|
151
|
+
vision_config = SiglipVisionConfig(**v_hparams)
|
|
152
|
+
model = SiglipVisionModel(vision_config)
|
|
153
|
+
model.load_state_dict(torch.load(os.path.join(dir_model, "glm.clip")))
|
|
154
|
+
|
|
155
|
+
fname_middle = None
|
|
156
|
+
has_text_encoder = False
|
|
157
|
+
has_vision_encoder = True
|
|
158
|
+
has_glm_projector = True
|
|
159
|
+
if args.text_only:
|
|
160
|
+
fname_middle = "text-"
|
|
161
|
+
has_vision_encoder = False
|
|
162
|
+
elif args.llava_projector is not None:
|
|
163
|
+
fname_middle = "mmproj-"
|
|
164
|
+
has_text_encoder = False
|
|
165
|
+
has_glm_projector = True
|
|
166
|
+
elif args.vision_only:
|
|
167
|
+
fname_middle = "vision-"
|
|
168
|
+
has_text_encoder = False
|
|
169
|
+
else:
|
|
170
|
+
fname_middle = ""
|
|
171
|
+
|
|
172
|
+
output_dir = args.output_dir if args.output_dir is not None else dir_model
|
|
173
|
+
os.makedirs(output_dir, exist_ok=True)
|
|
174
|
+
output_prefix = os.path.basename(output_dir).replace("ggml_", "")
|
|
175
|
+
fname_out = os.path.join(output_dir, f"{fname_middle}model-{ftype_str[ftype]}.gguf")
|
|
176
|
+
fout = GGUFWriter(path=fname_out, arch="clip")
|
|
177
|
+
|
|
178
|
+
fout.add_bool("clip.has_text_encoder", has_text_encoder)
|
|
179
|
+
fout.add_bool("clip.has_vision_encoder", has_vision_encoder)
|
|
180
|
+
fout.add_bool("clip.has_glm_projector", has_glm_projector)
|
|
181
|
+
fout.add_file_type(ftype)
|
|
182
|
+
model_name = config["_name_or_path"] if "_name_or_path" in config else os.path.basename(dir_model)
|
|
183
|
+
fout.add_name(model_name)
|
|
184
|
+
if has_glm_projector:
|
|
185
|
+
fout.add_description("image encoder for glm4v")
|
|
186
|
+
fout.add_string("clip.projector_type", "adapter")
|
|
187
|
+
else:
|
|
188
|
+
fout.add_description("two-tower CLIP model")
|
|
189
|
+
|
|
190
|
+
if has_text_encoder:
|
|
191
|
+
assert t_hparams is not None
|
|
192
|
+
assert tokens is not None
|
|
193
|
+
# text_model hparams
|
|
194
|
+
fout.add_uint32(k(KEY_CONTEXT_LENGTH, TEXT), t_hparams["max_position_embeddings"])
|
|
195
|
+
fout.add_uint32(k(KEY_EMBEDDING_LENGTH, TEXT), t_hparams["hidden_size"])
|
|
196
|
+
fout.add_uint32(k(KEY_FEED_FORWARD_LENGTH, TEXT), t_hparams["intermediate_size"])
|
|
197
|
+
fout.add_uint32("clip.text.projection_dim", t_hparams.get("projection_dim", config["projection_dim"]))
|
|
198
|
+
fout.add_uint32(k(KEY_ATTENTION_HEAD_COUNT, TEXT), t_hparams["num_attention_heads"])
|
|
199
|
+
fout.add_float32(k(KEY_ATTENTION_LAYERNORM_EPS, TEXT), t_hparams["layer_norm_eps"])
|
|
200
|
+
fout.add_uint32(k(KEY_BLOCK_COUNT, TEXT), t_hparams["num_hidden_layers"])
|
|
201
|
+
fout.add_token_list(tokens)
|
|
202
|
+
|
|
203
|
+
if has_vision_encoder:
|
|
204
|
+
# vision_model hparams
|
|
205
|
+
fout.add_uint32("clip.vision.image_size", v_hparams["image_size"])
|
|
206
|
+
fout.add_uint32("clip.vision.patch_size", v_hparams["patch_size"])
|
|
207
|
+
fout.add_uint32(k(KEY_EMBEDDING_LENGTH, VISION), v_hparams["hidden_size"])
|
|
208
|
+
fout.add_uint32(k(KEY_FEED_FORWARD_LENGTH, VISION), v_hparams["intermediate_size"])
|
|
209
|
+
fout.add_uint32("clip.vision.projection_dim", 0)
|
|
210
|
+
fout.add_uint32(k(KEY_ATTENTION_HEAD_COUNT, VISION), v_hparams["num_attention_heads"])
|
|
211
|
+
fout.add_float32(k(KEY_ATTENTION_LAYERNORM_EPS, VISION), 1e-6)
|
|
212
|
+
fout.add_uint32(k(KEY_BLOCK_COUNT, VISION), v_hparams["num_hidden_layers"])
|
|
213
|
+
|
|
214
|
+
image_mean = args.image_mean if args.image_mean is not None else default_image_mean
|
|
215
|
+
image_std = args.image_std if args.image_std is not None else default_image_std
|
|
216
|
+
fout.add_array("clip.vision.image_mean", image_mean)
|
|
217
|
+
fout.add_array("clip.vision.image_std", image_std)
|
|
218
|
+
|
|
219
|
+
fout.add_bool("clip.use_gelu", True)
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
if has_glm_projector:
|
|
223
|
+
# model.vision_model.encoder.layers.pop(-1) # pyright: ignore[reportAttributeAccessIssue]
|
|
224
|
+
projector = torch.load(args.llava_projector)
|
|
225
|
+
for name, data in projector.items():
|
|
226
|
+
name = get_tensor_name(name)
|
|
227
|
+
# pw and dw conv ndim==4
|
|
228
|
+
if data.ndim == 2 or data.ndim == 4:
|
|
229
|
+
data = data.squeeze().numpy().astype(np.float16)
|
|
230
|
+
else:
|
|
231
|
+
data = data.squeeze().numpy().astype(np.float32)
|
|
232
|
+
if name.startswith("vision."):
|
|
233
|
+
name=name.replace("vision.","")
|
|
234
|
+
fout.add_tensor(name, data)
|
|
235
|
+
print(f"Projector {name} - {data.dtype} - shape = {data.shape}")
|
|
236
|
+
# print(f"Projector {name} tensors added\n")
|
|
237
|
+
|
|
238
|
+
state_dict = model.state_dict() # pyright: ignore[reportAttributeAccessIssue]
|
|
239
|
+
for name, data in state_dict.items():
|
|
240
|
+
if should_skip_tensor(name, has_text_encoder, has_vision_encoder, has_glm_projector):
|
|
241
|
+
# we don't need this
|
|
242
|
+
print(f"skipping parameter: {name}")
|
|
243
|
+
continue
|
|
244
|
+
|
|
245
|
+
name = get_tensor_name(name)
|
|
246
|
+
data = data.squeeze().numpy()
|
|
247
|
+
|
|
248
|
+
n_dims = len(data.shape)
|
|
249
|
+
|
|
250
|
+
# ftype == 0 -> float32, ftype == 1 -> float16
|
|
251
|
+
ftype_cur = 0
|
|
252
|
+
if n_dims == 4:
|
|
253
|
+
print(f"tensor {name} is always saved in f16")
|
|
254
|
+
data = data.astype(np.float16)
|
|
255
|
+
ftype_cur = 1
|
|
256
|
+
elif ftype == 1:
|
|
257
|
+
if name[-7:] == ".weight" and n_dims == 2:
|
|
258
|
+
# print(" Converting to float16")
|
|
259
|
+
data = data.astype(np.float16)
|
|
260
|
+
ftype_cur = 1
|
|
261
|
+
else:
|
|
262
|
+
# print(" Converting to float32")
|
|
263
|
+
data = data.astype(np.float32)
|
|
264
|
+
ftype_cur = 0
|
|
265
|
+
else:
|
|
266
|
+
if data.dtype != np.float32:
|
|
267
|
+
# print(" Converting to float32")
|
|
268
|
+
data = data.astype(np.float32)
|
|
269
|
+
ftype_cur = 0
|
|
270
|
+
print(f"siglip {name} - {data.dtype} - shape = {data.shape}")
|
|
271
|
+
# print(f"{name} - {ftype_str[ftype_cur]} - shape = {data.shape}")
|
|
272
|
+
fout.add_tensor(name, data)
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
fout.write_header_to_file()
|
|
276
|
+
fout.write_kv_data_to_file()
|
|
277
|
+
fout.write_tensors_to_file()
|
|
278
|
+
fout.close()
|
|
279
|
+
|
|
280
|
+
print("Done. Output file: " + fname_out)
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import os
|
|
3
|
+
import torch
|
|
4
|
+
from transformers import AutoModel
|
|
5
|
+
|
|
6
|
+
ap = argparse.ArgumentParser()
|
|
7
|
+
ap.add_argument("-m", "--model", help="Path to GLM model")
|
|
8
|
+
args = ap.parse_args()
|
|
9
|
+
|
|
10
|
+
# find the model part that includes the the multimodal projector weights
|
|
11
|
+
model = AutoModel.from_pretrained(args.model, trust_remote_code=True, local_files_only=True)
|
|
12
|
+
checkpoint = model.state_dict()
|
|
13
|
+
|
|
14
|
+
# get a list of mm tensor names
|
|
15
|
+
mm_tensors = [k for k, v in checkpoint.items() if k.startswith("vision.adapter.")]
|
|
16
|
+
|
|
17
|
+
# store these tensors in a new dictionary and torch.save them
|
|
18
|
+
projector = {name: checkpoint[name].float() for name in mm_tensors}
|
|
19
|
+
torch.save(projector, f"{args.model}/glm.projector")
|
|
20
|
+
|
|
21
|
+
clip_tensors = [k for k, v in checkpoint.items() if k.startswith("vision.vit.model.vision_model.")]
|
|
22
|
+
if len(clip_tensors) > 0:
|
|
23
|
+
clip = {name.replace("vision.vit.model.", ""): checkpoint[name].float() for name in clip_tensors}
|
|
24
|
+
torch.save(clip, f"{args.model}/glm.clip")
|
|
25
|
+
|
|
26
|
+
# added tokens should be removed to be able to convert Mistral models
|
|
27
|
+
if os.path.exists(f"{args.model}/added_tokens.json"):
|
|
28
|
+
with open(f"{args.model}/added_tokens.json", "w") as f:
|
|
29
|
+
f.write("{}\n")
|
|
30
|
+
|
|
31
|
+
print("Done!")
|
|
32
|
+
print(f"Now you can convert {args.model} to a regular LLaMA GGUF file.")
|
|
33
|
+
print(f"Also, use {args.model}glm.projector to prepare a glm-encoder.gguf file.")
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import glob
|
|
3
|
+
import os
|
|
4
|
+
import torch
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
ap = argparse.ArgumentParser()
|
|
8
|
+
ap.add_argument("-m", "--model", help="Path to LLaVA v1.5 model")
|
|
9
|
+
args = ap.parse_args()
|
|
10
|
+
|
|
11
|
+
# find the model part that includes the the multimodal projector weights
|
|
12
|
+
path = sorted(glob.glob(f"{args.model}/pytorch_model*.bin"))[-1]
|
|
13
|
+
checkpoint = torch.load(path)
|
|
14
|
+
|
|
15
|
+
# get a list of mm tensor names
|
|
16
|
+
mm_tensors = [k for k, v in checkpoint.items() if k.startswith("model.mm_projector")]
|
|
17
|
+
|
|
18
|
+
# store these tensors in a new dictionary and torch.save them
|
|
19
|
+
projector = {name: checkpoint[name].float() for name in mm_tensors}
|
|
20
|
+
torch.save(projector, f"{args.model}/llava.projector")
|
|
21
|
+
|
|
22
|
+
# BakLLaVA models contain CLIP tensors in it
|
|
23
|
+
clip_tensors = [k for k, v in checkpoint.items() if k.startswith("model.vision_tower")]
|
|
24
|
+
if len(clip_tensors) > 0:
|
|
25
|
+
clip = {name.replace("vision_tower.vision_tower.", ""): checkpoint[name].float() for name in clip_tensors}
|
|
26
|
+
torch.save(clip, f"{args.model}/llava.clip")
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
# added tokens should be removed to be able to convert Mistral models
|
|
30
|
+
if os.path.exists(f"{args.model}/added_tokens.json"):
|
|
31
|
+
with open(f"{args.model}/added_tokens.json", "w") as f:
|
|
32
|
+
f.write("{}\n")
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
print("Done!")
|
|
37
|
+
print(f"Now you can convert {args.model} to a regular LLaMA GGUF file.")
|
|
38
|
+
print(f"Also, use {args.model}/llava.projector to prepare a llava-encoder.gguf file.")
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import glob
|
|
3
|
+
import os
|
|
4
|
+
import torch
|
|
5
|
+
from safetensors import safe_open
|
|
6
|
+
from safetensors.torch import save_file
|
|
7
|
+
from typing import Any, ContextManager, cast
|
|
8
|
+
|
|
9
|
+
# Function to determine if file is a SafeTensor file
|
|
10
|
+
def is_safetensor_file(file_path):
|
|
11
|
+
return file_path.endswith('.safetensors')
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
# Unified loading function
|
|
15
|
+
def load_model(file_path):
|
|
16
|
+
if is_safetensor_file(file_path):
|
|
17
|
+
tensors = {}
|
|
18
|
+
with cast(ContextManager[Any], safe_open(file_path, framework="pt", device="cpu")) as f:
|
|
19
|
+
for key in f.keys():
|
|
20
|
+
tensors[key] = f.get_tensor(key).clone()
|
|
21
|
+
# output shape
|
|
22
|
+
print(f"{key} : {tensors[key].shape}")
|
|
23
|
+
return tensors, 'safetensor'
|
|
24
|
+
else:
|
|
25
|
+
return torch.load(file_path, map_location=torch.device('cpu')), 'pytorch'
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
# Unified saving function
|
|
29
|
+
def save_model(model, file_path, file_type):
|
|
30
|
+
if file_type == 'safetensor':
|
|
31
|
+
# safe_save(model, file_path)
|
|
32
|
+
save_file(model, file_path)
|
|
33
|
+
else:
|
|
34
|
+
torch.save(model, file_path)
|
|
35
|
+
|
|
36
|
+
# Helpers to match weight names from specific components or
|
|
37
|
+
# determine if a saved shard contains that component
|
|
38
|
+
def is_vision_tower(weight_name):
|
|
39
|
+
return (
|
|
40
|
+
weight_name.startswith("model.vision_tower") or
|
|
41
|
+
weight_name.startswith("vit.") or
|
|
42
|
+
weight_name.startswith("vision_tower")
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
def is_newline(weight_name):
|
|
46
|
+
return (
|
|
47
|
+
weight_name.startswith("model.image_newline") or
|
|
48
|
+
weight_name.startswith("image_newline")
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
def is_mm_projector(weight_name):
|
|
52
|
+
return (
|
|
53
|
+
weight_name.startswith("model.mm_projector") or
|
|
54
|
+
weight_name.startswith("vision_proj.") or
|
|
55
|
+
weight_name.startswith("multi_modal_projector")
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
def newline_criteria(checkpoint):
|
|
59
|
+
return any(is_newline(k) for k in checkpoint.keys())
|
|
60
|
+
|
|
61
|
+
def proj_criteria(checkpoint):
|
|
62
|
+
return any(is_mm_projector(k) for k in checkpoint.keys())
|
|
63
|
+
|
|
64
|
+
# Adapted function to clean vision tower from checkpoint
|
|
65
|
+
def clean_vision_tower_from_checkpoint(checkpoint_path):
|
|
66
|
+
checkpoint, file_type = load_model(checkpoint_path)
|
|
67
|
+
# file_type = 'pytorch'
|
|
68
|
+
model_path = os.path.dirname(checkpoint_path)
|
|
69
|
+
print(f"Searching for vision tower tensors in {checkpoint_path}")
|
|
70
|
+
clip_tensors = [k for k, v in checkpoint.items() if is_vision_tower(k)]
|
|
71
|
+
|
|
72
|
+
if len(clip_tensors) > 0:
|
|
73
|
+
print(f"Found {len(clip_tensors)} tensors to extract from {checkpoint_path}")
|
|
74
|
+
# Adapted for file type
|
|
75
|
+
clip_path = os.path.join(model_path, "llava.clip")
|
|
76
|
+
|
|
77
|
+
if os.path.exists(clip_path):
|
|
78
|
+
print(f"Loading existing llava.clip from {clip_path}")
|
|
79
|
+
existing_clip, _ = load_model(clip_path)
|
|
80
|
+
else:
|
|
81
|
+
print(f"Creating new llava.clip at {clip_path}")
|
|
82
|
+
existing_clip = {}
|
|
83
|
+
# Update existing_clip with new tensors, avoid duplicates
|
|
84
|
+
for name in clip_tensors:
|
|
85
|
+
simple_name = name[name.index('vision_model.'):] if 'vision_model.' in name else name
|
|
86
|
+
print(f"Adding {simple_name} to llava.clip")
|
|
87
|
+
if simple_name not in existing_clip:
|
|
88
|
+
existing_clip[simple_name] = checkpoint[name]
|
|
89
|
+
|
|
90
|
+
# Save the updated clip tensors back to llava.clip
|
|
91
|
+
save_model(existing_clip, clip_path, 'pytorch')
|
|
92
|
+
|
|
93
|
+
# Remove the tensors from the original checkpoint
|
|
94
|
+
for name in clip_tensors:
|
|
95
|
+
del checkpoint[name]
|
|
96
|
+
|
|
97
|
+
checkpoint_path = checkpoint_path
|
|
98
|
+
return True
|
|
99
|
+
return False
|
|
100
|
+
|
|
101
|
+
def find_relevant_checkpoints(checkpoint_paths, newline_criteria, projector):
|
|
102
|
+
newline_checkpoint_path = None
|
|
103
|
+
projector_checkpoint_path = None
|
|
104
|
+
|
|
105
|
+
for path in checkpoint_paths:
|
|
106
|
+
checkpoint, _ = load_model(path)
|
|
107
|
+
if newline_criteria(checkpoint) and newline_checkpoint_path is None:
|
|
108
|
+
newline_checkpoint_path = path
|
|
109
|
+
if projector(checkpoint):
|
|
110
|
+
projector_checkpoint_path = path
|
|
111
|
+
|
|
112
|
+
return newline_checkpoint_path, projector_checkpoint_path
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
# Command-line interface setup
|
|
116
|
+
ap = argparse.ArgumentParser()
|
|
117
|
+
ap.add_argument("-m", "--model", required=True, help="Path to LLaVA v1.5+ model")
|
|
118
|
+
ap.add_argument("-C", "--clean-vision-tower", action="store_true", help="Remove any vision tower from the model files")
|
|
119
|
+
args = ap.parse_args()
|
|
120
|
+
|
|
121
|
+
if args.clean_vision_tower:
|
|
122
|
+
# Generalized to handle both PyTorch and SafeTensors models
|
|
123
|
+
model_files = sorted(glob.glob(f"{args.model}/*"), key=os.path.getmtime, reverse=True)
|
|
124
|
+
# checkpoint_paths = [path for path in model_files if (path.endswith('.bin') and path.startswith('pytorch')) or (path.endswith('.safetensors') and path.startswith('model'))]
|
|
125
|
+
checkpoint_paths = [path for path in model_files if (path.endswith('.bin') and 'pytorch' in path.split('/')[-1].split('\\')[-1]) or (path.endswith('.safetensors') and 'model' in path.split('/')[-1].split('\\')[-1])]
|
|
126
|
+
for projector_checkpoint_path in checkpoint_paths:
|
|
127
|
+
print(f"Cleaning {projector_checkpoint_path}")
|
|
128
|
+
if not clean_vision_tower_from_checkpoint(projector_checkpoint_path):
|
|
129
|
+
print(f"No vision tower found in {projector_checkpoint_path}")
|
|
130
|
+
# we break once none is found, so far all models append them at the end
|
|
131
|
+
# break
|
|
132
|
+
print("Done! All vision tower tensors are removed from the model files and stored in llava.clip file.")
|
|
133
|
+
|
|
134
|
+
# Now we look for the projector in the last checkpoint
|
|
135
|
+
model_files = sorted(glob.glob(f"{args.model}/*"), key=os.path.getmtime, reverse=True)
|
|
136
|
+
checkpoint_paths = [path for path in model_files if (path.endswith('.bin') and 'pytorch' in path.split('/')[-1].split('\\')[-1]) or (path.endswith('.safetensors') and 'model' in path.split('/')[-1].split('\\')[-1])]
|
|
137
|
+
# last_checkpoint_path = checkpoint_paths[0]
|
|
138
|
+
# first_checkpoint_path = checkpoint_paths[-1]
|
|
139
|
+
newline_checkpoint_path, projector_checkpoint_path = find_relevant_checkpoints(checkpoint_paths, newline_criteria, proj_criteria)
|
|
140
|
+
|
|
141
|
+
print(f"Taking projector from {projector_checkpoint_path}")
|
|
142
|
+
first_mm_tensors = []
|
|
143
|
+
first_checkpoint = None
|
|
144
|
+
if newline_checkpoint_path is not None:
|
|
145
|
+
print(f"Taking newline from {newline_checkpoint_path}")
|
|
146
|
+
first_checkpoint, file_type = load_model(newline_checkpoint_path)
|
|
147
|
+
first_mm_tensors = [k for k, v in first_checkpoint.items() if is_newline(k)]
|
|
148
|
+
|
|
149
|
+
# Load the checkpoint
|
|
150
|
+
mm_tensors = []
|
|
151
|
+
last_checkpoint = None
|
|
152
|
+
if projector_checkpoint_path is not None:
|
|
153
|
+
last_checkpoint, file_type = load_model(projector_checkpoint_path)
|
|
154
|
+
mm_tensors = [k for k, v in last_checkpoint.items() if is_mm_projector(k)]
|
|
155
|
+
|
|
156
|
+
if len(mm_tensors) == 0:
|
|
157
|
+
if last_checkpoint is not None:
|
|
158
|
+
for k, v in last_checkpoint.items():
|
|
159
|
+
print(k)
|
|
160
|
+
print(f"Found {len(mm_tensors)} tensors to extract out of {len(last_checkpoint) if last_checkpoint is not None else 0} tensors.")
|
|
161
|
+
print("No tensors found. Is this a LLaVA model?")
|
|
162
|
+
exit()
|
|
163
|
+
|
|
164
|
+
print(f"Found {len(mm_tensors)} tensors to extract.")
|
|
165
|
+
print(f"Found additional {len(first_mm_tensors)} tensors to extract.")
|
|
166
|
+
# projector = {name: checkpoint.[name].float() for name in mm_tensors}
|
|
167
|
+
projector = {}
|
|
168
|
+
for name in mm_tensors:
|
|
169
|
+
assert last_checkpoint is not None
|
|
170
|
+
projector[name] = last_checkpoint[name].float()
|
|
171
|
+
for name in first_mm_tensors:
|
|
172
|
+
assert first_checkpoint is not None
|
|
173
|
+
projector[name] = first_checkpoint[name].float()
|
|
174
|
+
|
|
175
|
+
if len(projector) > 0:
|
|
176
|
+
save_model(projector, f"{args.model}/llava.projector", 'pytorch')
|
|
177
|
+
|
|
178
|
+
print("Done!")
|
|
179
|
+
print(f"Now you can convert {args.model} to a regular LLaMA GGUF file.")
|
|
180
|
+
print(f"Also, use {args.model}/llava.projector to prepare a llava-encoder.gguf file.")
|