local-llm-rn 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cpp/CMakeLists.txt +285 -0
- package/cpp/common/CMakeLists.txt +149 -0
- package/cpp/common/arg.cpp +3799 -0
- package/cpp/common/arg.h +131 -0
- package/cpp/common/base64.hpp +392 -0
- package/cpp/common/build-info.cpp.in +4 -0
- package/cpp/common/chat-parser-xml-toolcall.cpp +879 -0
- package/cpp/common/chat-parser-xml-toolcall.h +45 -0
- package/cpp/common/chat-parser.cpp +1649 -0
- package/cpp/common/chat-parser.h +133 -0
- package/cpp/common/chat-peg-parser.cpp +124 -0
- package/cpp/common/chat-peg-parser.h +105 -0
- package/cpp/common/chat.cpp +3355 -0
- package/cpp/common/chat.h +252 -0
- package/cpp/common/common.cpp +1824 -0
- package/cpp/common/common.h +930 -0
- package/cpp/common/console.cpp +1137 -0
- package/cpp/common/console.h +41 -0
- package/cpp/common/debug.cpp +167 -0
- package/cpp/common/debug.h +43 -0
- package/cpp/common/download.cpp +792 -0
- package/cpp/common/download.h +84 -0
- package/cpp/common/http.h +84 -0
- package/cpp/common/jinja/README.md +88 -0
- package/cpp/common/jinja/caps.cpp +285 -0
- package/cpp/common/jinja/caps.h +30 -0
- package/cpp/common/jinja/lexer.cpp +341 -0
- package/cpp/common/jinja/lexer.h +157 -0
- package/cpp/common/jinja/parser.cpp +591 -0
- package/cpp/common/jinja/parser.h +21 -0
- package/cpp/common/jinja/runtime.cpp +867 -0
- package/cpp/common/jinja/runtime.h +638 -0
- package/cpp/common/jinja/string.cpp +213 -0
- package/cpp/common/jinja/string.h +61 -0
- package/cpp/common/jinja/utils.h +149 -0
- package/cpp/common/jinja/value.cpp +1393 -0
- package/cpp/common/jinja/value.h +756 -0
- package/cpp/common/json-partial.cpp +324 -0
- package/cpp/common/json-partial.h +39 -0
- package/cpp/common/json-schema-to-grammar.cpp +1153 -0
- package/cpp/common/json-schema-to-grammar.h +43 -0
- package/cpp/common/llguidance.cpp +258 -0
- package/cpp/common/log.cpp +446 -0
- package/cpp/common/log.h +119 -0
- package/cpp/common/ngram-cache.cpp +285 -0
- package/cpp/common/ngram-cache.h +101 -0
- package/cpp/common/ngram-map.cpp +530 -0
- package/cpp/common/ngram-map.h +115 -0
- package/cpp/common/ngram-mod.cpp +60 -0
- package/cpp/common/ngram-mod.h +38 -0
- package/cpp/common/peg-parser.cpp +1712 -0
- package/cpp/common/peg-parser.h +459 -0
- package/cpp/common/preset.cpp +483 -0
- package/cpp/common/preset.h +83 -0
- package/cpp/common/regex-partial.cpp +204 -0
- package/cpp/common/regex-partial.h +56 -0
- package/cpp/common/sampling.cpp +745 -0
- package/cpp/common/sampling.h +119 -0
- package/cpp/common/speculative.cpp +1074 -0
- package/cpp/common/speculative.h +41 -0
- package/cpp/common/unicode.cpp +64 -0
- package/cpp/common/unicode.h +22 -0
- package/cpp/ggml/CMakeLists.txt +494 -0
- package/cpp/ggml/cmake/GitVars.cmake +22 -0
- package/cpp/ggml/cmake/common.cmake +50 -0
- package/cpp/ggml/cmake/ggml-config.cmake.in +191 -0
- package/cpp/ggml/include/ggml-alloc.h +85 -0
- package/cpp/ggml/include/ggml-backend.h +373 -0
- package/cpp/ggml/include/ggml-blas.h +25 -0
- package/cpp/ggml/include/ggml-cann.h +123 -0
- package/cpp/ggml/include/ggml-cpp.h +39 -0
- package/cpp/ggml/include/ggml-cpu.h +151 -0
- package/cpp/ggml/include/ggml-cuda.h +47 -0
- package/cpp/ggml/include/ggml-hexagon.h +19 -0
- package/cpp/ggml/include/ggml-metal.h +61 -0
- package/cpp/ggml/include/ggml-opencl.h +26 -0
- package/cpp/ggml/include/ggml-opt.h +256 -0
- package/cpp/ggml/include/ggml-rpc.h +30 -0
- package/cpp/ggml/include/ggml-sycl.h +49 -0
- package/cpp/ggml/include/ggml-virtgpu.h +14 -0
- package/cpp/ggml/include/ggml-vulkan.h +29 -0
- package/cpp/ggml/include/ggml-webgpu.h +19 -0
- package/cpp/ggml/include/ggml-zdnn.h +17 -0
- package/cpp/ggml/include/ggml-zendnn.h +22 -0
- package/cpp/ggml/include/ggml.h +2753 -0
- package/cpp/ggml/include/gguf.h +204 -0
- package/cpp/ggml/src/CMakeLists.txt +492 -0
- package/cpp/ggml/src/ggml-alloc.c +1244 -0
- package/cpp/ggml/src/ggml-backend-dl.cpp +48 -0
- package/cpp/ggml/src/ggml-backend-dl.h +45 -0
- package/cpp/ggml/src/ggml-backend-impl.h +255 -0
- package/cpp/ggml/src/ggml-backend-reg.cpp +566 -0
- package/cpp/ggml/src/ggml-backend.cpp +2270 -0
- package/cpp/ggml/src/ggml-blas/CMakeLists.txt +101 -0
- package/cpp/ggml/src/ggml-blas/ggml-blas.cpp +518 -0
- package/cpp/ggml/src/ggml-common.h +1878 -0
- package/cpp/ggml/src/ggml-cpu/CMakeLists.txt +691 -0
- package/cpp/ggml/src/ggml-cpu/amx/amx.cpp +247 -0
- package/cpp/ggml/src/ggml-cpu/amx/amx.h +8 -0
- package/cpp/ggml/src/ggml-cpu/amx/common.h +91 -0
- package/cpp/ggml/src/ggml-cpu/amx/mmq.cpp +2512 -0
- package/cpp/ggml/src/ggml-cpu/amx/mmq.h +10 -0
- package/cpp/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +98 -0
- package/cpp/ggml/src/ggml-cpu/arch/arm/quants.c +4052 -0
- package/cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +4935 -0
- package/cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +2159 -0
- package/cpp/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
- package/cpp/ggml/src/ggml-cpu/arch/powerpc/quants.c +2305 -0
- package/cpp/ggml/src/ggml-cpu/arch/riscv/cpu-feats.cpp +38 -0
- package/cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +2726 -0
- package/cpp/ggml/src/ggml-cpu/arch/riscv/repack.cpp +342 -0
- package/cpp/ggml/src/ggml-cpu/arch/s390/cpu-feats.cpp +50 -0
- package/cpp/ggml/src/ggml-cpu/arch/s390/quants.c +1468 -0
- package/cpp/ggml/src/ggml-cpu/arch/wasm/quants.c +1221 -0
- package/cpp/ggml/src/ggml-cpu/arch/x86/cpu-feats.cpp +327 -0
- package/cpp/ggml/src/ggml-cpu/arch/x86/quants.c +3820 -0
- package/cpp/ggml/src/ggml-cpu/arch/x86/repack.cpp +6307 -0
- package/cpp/ggml/src/ggml-cpu/arch-fallback.h +313 -0
- package/cpp/ggml/src/ggml-cpu/binary-ops.cpp +154 -0
- package/cpp/ggml/src/ggml-cpu/binary-ops.h +16 -0
- package/cpp/ggml/src/ggml-cpu/cmake/FindSIMD.cmake +100 -0
- package/cpp/ggml/src/ggml-cpu/common.h +95 -0
- package/cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +529 -0
- package/cpp/ggml/src/ggml-cpu/ggml-cpu.c +3734 -0
- package/cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +701 -0
- package/cpp/ggml/src/ggml-cpu/hbm.cpp +55 -0
- package/cpp/ggml/src/ggml-cpu/hbm.h +8 -0
- package/cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +938 -0
- package/cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +90 -0
- package/cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +798 -0
- package/cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.h +17 -0
- package/cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +4033 -0
- package/cpp/ggml/src/ggml-cpu/llamafile/sgemm.h +25 -0
- package/cpp/ggml/src/ggml-cpu/ops.cpp +10978 -0
- package/cpp/ggml/src/ggml-cpu/ops.h +116 -0
- package/cpp/ggml/src/ggml-cpu/quants.c +1193 -0
- package/cpp/ggml/src/ggml-cpu/quants.h +97 -0
- package/cpp/ggml/src/ggml-cpu/repack.cpp +3316 -0
- package/cpp/ggml/src/ggml-cpu/repack.h +173 -0
- package/cpp/ggml/src/ggml-cpu/simd-gemm.h +136 -0
- package/cpp/ggml/src/ggml-cpu/simd-mappings.h +1279 -0
- package/cpp/ggml/src/ggml-cpu/spacemit/ime.cpp +1025 -0
- package/cpp/ggml/src/ggml-cpu/spacemit/ime.h +13 -0
- package/cpp/ggml/src/ggml-cpu/spacemit/ime1_kernels.cpp +3196 -0
- package/cpp/ggml/src/ggml-cpu/spacemit/ime_kernels.h +26 -0
- package/cpp/ggml/src/ggml-cpu/traits.cpp +36 -0
- package/cpp/ggml/src/ggml-cpu/traits.h +38 -0
- package/cpp/ggml/src/ggml-cpu/unary-ops.cpp +337 -0
- package/cpp/ggml/src/ggml-cpu/unary-ops.h +35 -0
- package/cpp/ggml/src/ggml-cpu/vec.cpp +629 -0
- package/cpp/ggml/src/ggml-cpu/vec.h +1585 -0
- package/cpp/ggml/src/ggml-hexagon/CMakeLists.txt +117 -0
- package/cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp +3232 -0
- package/cpp/ggml/src/ggml-hexagon/htp/CMakeLists.txt +45 -0
- package/cpp/ggml/src/ggml-hexagon/htp/act-ops.c +815 -0
- package/cpp/ggml/src/ggml-hexagon/htp/argsort-ops.c +281 -0
- package/cpp/ggml/src/ggml-hexagon/htp/binary-ops.c +827 -0
- package/cpp/ggml/src/ggml-hexagon/htp/cmake-toolchain.cmake +157 -0
- package/cpp/ggml/src/ggml-hexagon/htp/cpy-ops.c +251 -0
- package/cpp/ggml/src/ggml-hexagon/htp/flash-attn-ops.c +666 -0
- package/cpp/ggml/src/ggml-hexagon/htp/get-rows-ops.c +111 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hex-dma.c +63 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hex-dma.h +182 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hex-dump.h +77 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hex-fastdiv.h +37 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hex-utils.h +51 -0
- package/cpp/ggml/src/ggml-hexagon/htp/htp-ctx.h +35 -0
- package/cpp/ggml/src/ggml-hexagon/htp/htp-msg.h +154 -0
- package/cpp/ggml/src/ggml-hexagon/htp/htp-ops.h +65 -0
- package/cpp/ggml/src/ggml-hexagon/htp/htp_iface.idl +16 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-arith.h +470 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-base.h +173 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-copy.h +245 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-div.h +116 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-dump.h +129 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-exp.h +215 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-floor.h +100 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-inverse.h +176 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-reduce.h +266 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-scale.h +133 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-sigmoid.h +141 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-sqrt.h +126 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-types.h +36 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-utils.h +18 -0
- package/cpp/ggml/src/ggml-hexagon/htp/main.c +1150 -0
- package/cpp/ggml/src/ggml-hexagon/htp/matmul-ops.c +2595 -0
- package/cpp/ggml/src/ggml-hexagon/htp/rope-ops.c +498 -0
- package/cpp/ggml/src/ggml-hexagon/htp/set-rows-ops.c +167 -0
- package/cpp/ggml/src/ggml-hexagon/htp/softmax-ops.c +421 -0
- package/cpp/ggml/src/ggml-hexagon/htp/sum-rows-ops.c +130 -0
- package/cpp/ggml/src/ggml-hexagon/htp/unary-ops.c +384 -0
- package/cpp/ggml/src/ggml-hexagon/htp/worker-pool.c +293 -0
- package/cpp/ggml/src/ggml-hexagon/htp/worker-pool.h +57 -0
- package/cpp/ggml/src/ggml-hexagon/htp-drv.cpp +418 -0
- package/cpp/ggml/src/ggml-hexagon/htp-drv.h +121 -0
- package/cpp/ggml/src/ggml-hexagon/libdl.h +79 -0
- package/cpp/ggml/src/ggml-hexagon/libggml-htp.inf +38 -0
- package/cpp/ggml/src/ggml-hexagon/op-desc.h +153 -0
- package/cpp/ggml/src/ggml-impl.h +724 -0
- package/cpp/ggml/src/ggml-metal/CMakeLists.txt +124 -0
- package/cpp/ggml/src/ggml-metal/ggml-metal-common.cpp +457 -0
- package/cpp/ggml/src/ggml-metal/ggml-metal-common.h +52 -0
- package/cpp/ggml/src/ggml-metal/ggml-metal-context.h +41 -0
- package/cpp/ggml/src/ggml-metal/ggml-metal-context.m +702 -0
- package/cpp/ggml/src/ggml-metal/ggml-metal-device.cpp +1890 -0
- package/cpp/ggml/src/ggml-metal/ggml-metal-device.h +290 -0
- package/cpp/ggml/src/ggml-metal/ggml-metal-device.m +1749 -0
- package/cpp/ggml/src/ggml-metal/ggml-metal-impl.h +1054 -0
- package/cpp/ggml/src/ggml-metal/ggml-metal-ops.cpp +4370 -0
- package/cpp/ggml/src/ggml-metal/ggml-metal-ops.h +94 -0
- package/cpp/ggml/src/ggml-metal/ggml-metal.cpp +937 -0
- package/cpp/ggml/src/ggml-metal/ggml-metal.metal +9819 -0
- package/cpp/ggml/src/ggml-musa/CMakeLists.txt +125 -0
- package/cpp/ggml/src/ggml-musa/mudnn.cu +112 -0
- package/cpp/ggml/src/ggml-musa/mudnn.cuh +12 -0
- package/cpp/ggml/src/ggml-opencl/CMakeLists.txt +150 -0
- package/cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +11553 -0
- package/cpp/ggml/src/ggml-opencl/kernels/add.cl +190 -0
- package/cpp/ggml/src/ggml-opencl/kernels/add_id.cl +42 -0
- package/cpp/ggml/src/ggml-opencl/kernels/argsort.cl +86 -0
- package/cpp/ggml/src/ggml-opencl/kernels/clamp.cl +20 -0
- package/cpp/ggml/src/ggml-opencl/kernels/concat.cl +51 -0
- package/cpp/ggml/src/ggml-opencl/kernels/conv2d.cl +185 -0
- package/cpp/ggml/src/ggml-opencl/kernels/conv2d_f16_f32.cl +176 -0
- package/cpp/ggml/src/ggml-opencl/kernels/cpy.cl +184 -0
- package/cpp/ggml/src/ggml-opencl/kernels/cvt.cl +417 -0
- package/cpp/ggml/src/ggml-opencl/kernels/diag_mask_inf.cl +58 -0
- package/cpp/ggml/src/ggml-opencl/kernels/div.cl +138 -0
- package/cpp/ggml/src/ggml-opencl/kernels/embed_kernel.py +26 -0
- package/cpp/ggml/src/ggml-opencl/kernels/expm1.cl +113 -0
- package/cpp/ggml/src/ggml-opencl/kernels/fill.cl +17 -0
- package/cpp/ggml/src/ggml-opencl/kernels/flash_attn_f16.cl +370 -0
- package/cpp/ggml/src/ggml-opencl/kernels/flash_attn_f32.cl +371 -0
- package/cpp/ggml/src/ggml-opencl/kernels/flash_attn_f32_f16.cl +373 -0
- package/cpp/ggml/src/ggml-opencl/kernels/gelu.cl +89 -0
- package/cpp/ggml/src/ggml-opencl/kernels/gemm_moe_mxfp4_f32.cl +162 -0
- package/cpp/ggml/src/ggml-opencl/kernels/gemv_moe_mxfp4_f32.cl +156 -0
- package/cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle.cl +268 -0
- package/cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general.cl +274 -0
- package/cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general_q8_0_f32.cl +195 -0
- package/cpp/ggml/src/ggml-opencl/kernels/get_rows.cl +187 -0
- package/cpp/ggml/src/ggml-opencl/kernels/glu.cl +378 -0
- package/cpp/ggml/src/ggml-opencl/kernels/group_norm.cl +121 -0
- package/cpp/ggml/src/ggml-opencl/kernels/im2col_f16.cl +57 -0
- package/cpp/ggml/src/ggml-opencl/kernels/im2col_f32.cl +57 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mean.cl +140 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul.cl +152 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mat_Ab_Bi_8x4.cl +139 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mat_f16_f32.cl +130 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_kq_kqv.cl +273 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_l4_lm.cl +146 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_f32_f32_l4_lm.cl +147 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q4_0_f32_l4_lm.cl +163 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q4_1_f32_l4_lm.cl +165 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q6_k_f32_l4_lm.cl +158 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_8x4.cl +129 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_l4_lm.cl +154 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f16.cl +118 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32.cl +118 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_1row.cl +94 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_l4.cl +84 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f32_f32.cl +118 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32.cl +189 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32_flat.cl +176 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q4_0_f32_8x_flat.cl +283 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32.cl +140 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32_flat.cl +222 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32.cl +144 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32_flat.cl +167 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32.cl +192 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_16x_flat.cl +307 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_8x_flat.cl +265 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_8x_flat.cl +272 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_v.cl +254 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32.cl +219 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32_flat.cl +229 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_k_f32.cl +180 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q6_k_f32.cl +194 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q6_k_f32_flat.cl +194 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32.cl +125 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32_flat.cl +202 -0
- package/cpp/ggml/src/ggml-opencl/kernels/norm.cl +161 -0
- package/cpp/ggml/src/ggml-opencl/kernels/pad.cl +39 -0
- package/cpp/ggml/src/ggml-opencl/kernels/relu.cl +16 -0
- package/cpp/ggml/src/ggml-opencl/kernels/repeat.cl +38 -0
- package/cpp/ggml/src/ggml-opencl/kernels/rms_norm.cl +190 -0
- package/cpp/ggml/src/ggml-opencl/kernels/rope.cl +747 -0
- package/cpp/ggml/src/ggml-opencl/kernels/scale.cl +27 -0
- package/cpp/ggml/src/ggml-opencl/kernels/set_rows.cl +208 -0
- package/cpp/ggml/src/ggml-opencl/kernels/sigmoid.cl +29 -0
- package/cpp/ggml/src/ggml-opencl/kernels/silu.cl +30 -0
- package/cpp/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +108 -0
- package/cpp/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +108 -0
- package/cpp/ggml/src/ggml-opencl/kernels/softmax_f16.cl +107 -0
- package/cpp/ggml/src/ggml-opencl/kernels/softmax_f32.cl +107 -0
- package/cpp/ggml/src/ggml-opencl/kernels/softplus.cl +116 -0
- package/cpp/ggml/src/ggml-opencl/kernels/solve_tri.cl +51 -0
- package/cpp/ggml/src/ggml-opencl/kernels/sqr.cl +53 -0
- package/cpp/ggml/src/ggml-opencl/kernels/sqrt.cl +53 -0
- package/cpp/ggml/src/ggml-opencl/kernels/ssm_conv.cl +77 -0
- package/cpp/ggml/src/ggml-opencl/kernels/sub.cl +138 -0
- package/cpp/ggml/src/ggml-opencl/kernels/sum_rows.cl +140 -0
- package/cpp/ggml/src/ggml-opencl/kernels/tanh.cl +109 -0
- package/cpp/ggml/src/ggml-opencl/kernels/transpose.cl +117 -0
- package/cpp/ggml/src/ggml-opencl/kernels/tri.cl +32 -0
- package/cpp/ggml/src/ggml-opencl/kernels/tsembd.cl +48 -0
- package/cpp/ggml/src/ggml-opencl/kernels/upscale.cl +120 -0
- package/cpp/ggml/src/ggml-opt.cpp +1093 -0
- package/cpp/ggml/src/ggml-quants.c +5325 -0
- package/cpp/ggml/src/ggml-quants.h +106 -0
- package/cpp/ggml/src/ggml-rpc/CMakeLists.txt +9 -0
- package/cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +2118 -0
- package/cpp/ggml/src/ggml-threading.cpp +12 -0
- package/cpp/ggml/src/ggml-threading.h +14 -0
- package/cpp/ggml/src/ggml-virtgpu/CMakeLists.txt +70 -0
- package/cpp/ggml/src/ggml-virtgpu/apir_cs_ggml-rpc-front.cpp +87 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/CMakeLists.txt +21 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/apir_cs_ggml-rpc-back.cpp +115 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/backend-convert.h +13 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-backend.cpp +102 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer-type.cpp +105 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer.cpp +179 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-device.cpp +148 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched.cpp +51 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched.gen.h +73 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched.h +27 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/backend-virgl-apir.h +32 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/backend.cpp +144 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/shared/api_remoting.h +95 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_backend.gen.h +94 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_backend.h +50 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_cs.h +378 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h +232 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_cs_rpc.h +58 -0
- package/cpp/ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp +81 -0
- package/cpp/ggml/src/ggml-virtgpu/ggml-backend-buffer.cpp +119 -0
- package/cpp/ggml/src/ggml-virtgpu/ggml-backend-device.cpp +158 -0
- package/cpp/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp +213 -0
- package/cpp/ggml/src/ggml-virtgpu/ggml-backend.cpp +69 -0
- package/cpp/ggml/src/ggml-virtgpu/ggml-remoting.h +71 -0
- package/cpp/ggml/src/ggml-virtgpu/ggmlremoting_functions.yaml +166 -0
- package/cpp/ggml/src/ggml-virtgpu/include/apir_hw.h +9 -0
- package/cpp/ggml/src/ggml-virtgpu/regenerate_remoting.py +333 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-apir.h +15 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-backend.cpp +58 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp +110 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-buffer.cpp +173 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-device.cpp +192 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-impl.h +36 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward.gen.h +53 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-shm.cpp +98 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-shm.h +23 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-utils.cpp +179 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-utils.h +86 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu.cpp +544 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu.h +117 -0
- package/cpp/ggml/src/ggml-webgpu/CMakeLists.txt +80 -0
- package/cpp/ggml/src/ggml-webgpu/ggml-webgpu-shader-lib.hpp +1231 -0
- package/cpp/ggml/src/ggml-webgpu/ggml-webgpu.cpp +3150 -0
- package/cpp/ggml/src/ggml-webgpu/pre_wgsl.hpp +778 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/argmax.wgsl +72 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/argsort.wgsl +106 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/argsort_merge.wgsl +134 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/binary.wgsl +107 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/common_decls.tmpl +923 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/cpy.tmpl.wgsl +107 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/cumsum.wgsl +66 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +182 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn.wgsl +636 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/get_rows.wgsl +668 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/glu.tmpl.wgsl +323 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/memset.wgsl +40 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.wgsl +713 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_decls.tmpl +103 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_reg_tile.wgsl +138 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_subgroup_matrix.wgsl +188 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec.wgsl +194 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/pad.wgsl +86 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm.wgsl +123 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/rope.tmpl.wgsl +295 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/scale.wgsl +63 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.wgsl +109 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/soft_max.tmpl.wgsl +345 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/sum_rows.wgsl +55 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/unary.wgsl +193 -0
- package/cpp/ggml/src/ggml-zdnn/CMakeLists.txt +36 -0
- package/cpp/ggml/src/ggml-zdnn/common.hpp +59 -0
- package/cpp/ggml/src/ggml-zdnn/ggml-zdnn.cpp +633 -0
- package/cpp/ggml/src/ggml-zdnn/mmf.cpp +80 -0
- package/cpp/ggml/src/ggml-zdnn/mmf.hpp +12 -0
- package/cpp/ggml/src/ggml-zdnn/utils.cpp +79 -0
- package/cpp/ggml/src/ggml-zdnn/utils.hpp +19 -0
- package/cpp/ggml/src/ggml-zendnn/CMakeLists.txt +92 -0
- package/cpp/ggml/src/ggml-zendnn/ggml-zendnn.cpp +469 -0
- package/cpp/ggml/src/ggml.c +7669 -0
- package/cpp/ggml/src/ggml.cpp +26 -0
- package/cpp/ggml/src/gguf.cpp +1699 -0
- package/cpp/include/llama-cpp.h +32 -0
- package/cpp/include/llama.h +1568 -0
- package/cpp/mtmd/CMakeLists.txt +98 -0
- package/cpp/mtmd/README.md +63 -0
- package/cpp/mtmd/clip-graph.h +117 -0
- package/cpp/mtmd/clip-impl.h +586 -0
- package/cpp/mtmd/clip-model.h +390 -0
- package/cpp/mtmd/clip.cpp +4154 -0
- package/cpp/mtmd/clip.h +121 -0
- package/cpp/mtmd/deprecation-warning.cpp +22 -0
- package/cpp/mtmd/legacy-models/convert_image_encoder_to_gguf.py +412 -0
- package/cpp/mtmd/legacy-models/glmedge-convert-image-encoder-to-gguf.py +280 -0
- package/cpp/mtmd/legacy-models/glmedge-surgery.py +33 -0
- package/cpp/mtmd/legacy-models/llava_surgery.py +38 -0
- package/cpp/mtmd/legacy-models/llava_surgery_v2.py +180 -0
- package/cpp/mtmd/legacy-models/minicpmv-convert-image-encoder-to-gguf.py +892 -0
- package/cpp/mtmd/legacy-models/minicpmv-surgery.py +47 -0
- package/cpp/mtmd/models/cogvlm.cpp +98 -0
- package/cpp/mtmd/models/conformer.cpp +216 -0
- package/cpp/mtmd/models/glm4v.cpp +122 -0
- package/cpp/mtmd/models/internvl.cpp +69 -0
- package/cpp/mtmd/models/kimik25.cpp +101 -0
- package/cpp/mtmd/models/kimivl.cpp +63 -0
- package/cpp/mtmd/models/llama4.cpp +96 -0
- package/cpp/mtmd/models/llava.cpp +374 -0
- package/cpp/mtmd/models/minicpmv.cpp +114 -0
- package/cpp/mtmd/models/mobilenetv5.cpp +451 -0
- package/cpp/mtmd/models/models.h +128 -0
- package/cpp/mtmd/models/nemotron-v2-vl.cpp +35 -0
- package/cpp/mtmd/models/paddleocr.cpp +52 -0
- package/cpp/mtmd/models/pixtral.cpp +86 -0
- package/cpp/mtmd/models/qwen2vl.cpp +183 -0
- package/cpp/mtmd/models/qwen3vl.cpp +193 -0
- package/cpp/mtmd/models/siglip.cpp +86 -0
- package/cpp/mtmd/models/whisper-enc.cpp +115 -0
- package/cpp/mtmd/models/youtuvl.cpp +179 -0
- package/cpp/mtmd/mtmd-audio.cpp +730 -0
- package/cpp/mtmd/mtmd-audio.h +113 -0
- package/cpp/mtmd/mtmd-cli.cpp +437 -0
- package/cpp/mtmd/mtmd-helper.cpp +521 -0
- package/cpp/mtmd/mtmd-helper.h +96 -0
- package/cpp/mtmd/mtmd.cpp +1156 -0
- package/cpp/mtmd/mtmd.h +319 -0
- package/cpp/mtmd/requirements.txt +5 -0
- package/cpp/mtmd/test-1.jpeg +0 -0
- package/cpp/mtmd/test-2.mp3 +0 -0
- package/cpp/mtmd/tests.sh +192 -0
- package/cpp/src/CMakeLists.txt +169 -0
- package/cpp/src/llama-adapter.cpp +488 -0
- package/cpp/src/llama-adapter.h +89 -0
- package/cpp/src/llama-arch.cpp +2855 -0
- package/cpp/src/llama-arch.h +619 -0
- package/cpp/src/llama-batch.cpp +917 -0
- package/cpp/src/llama-batch.h +173 -0
- package/cpp/src/llama-chat.cpp +896 -0
- package/cpp/src/llama-chat.h +71 -0
- package/cpp/src/llama-context.cpp +3512 -0
- package/cpp/src/llama-context.h +359 -0
- package/cpp/src/llama-cparams.cpp +5 -0
- package/cpp/src/llama-cparams.h +44 -0
- package/cpp/src/llama-grammar.cpp +1464 -0
- package/cpp/src/llama-grammar.h +194 -0
- package/cpp/src/llama-graph.cpp +2685 -0
- package/cpp/src/llama-graph.h +1026 -0
- package/cpp/src/llama-hparams.cpp +234 -0
- package/cpp/src/llama-hparams.h +339 -0
- package/cpp/src/llama-impl.cpp +171 -0
- package/cpp/src/llama-impl.h +73 -0
- package/cpp/src/llama-io.cpp +15 -0
- package/cpp/src/llama-io.h +35 -0
- package/cpp/src/llama-kv-cache-iswa.cpp +330 -0
- package/cpp/src/llama-kv-cache-iswa.h +137 -0
- package/cpp/src/llama-kv-cache.cpp +2271 -0
- package/cpp/src/llama-kv-cache.h +388 -0
- package/cpp/src/llama-kv-cells.h +533 -0
- package/cpp/src/llama-memory-hybrid-iswa.cpp +275 -0
- package/cpp/src/llama-memory-hybrid-iswa.h +140 -0
- package/cpp/src/llama-memory-hybrid.cpp +268 -0
- package/cpp/src/llama-memory-hybrid.h +139 -0
- package/cpp/src/llama-memory-recurrent.cpp +1165 -0
- package/cpp/src/llama-memory-recurrent.h +182 -0
- package/cpp/src/llama-memory.cpp +59 -0
- package/cpp/src/llama-memory.h +122 -0
- package/cpp/src/llama-mmap.cpp +785 -0
- package/cpp/src/llama-mmap.h +92 -0
- package/cpp/src/llama-model-loader.cpp +1414 -0
- package/cpp/src/llama-model-loader.h +203 -0
- package/cpp/src/llama-model-saver.cpp +286 -0
- package/cpp/src/llama-model-saver.h +37 -0
- package/cpp/src/llama-model.cpp +9253 -0
- package/cpp/src/llama-model.h +576 -0
- package/cpp/src/llama-quant.cpp +1119 -0
- package/cpp/src/llama-quant.h +1 -0
- package/cpp/src/llama-sampler.cpp +3885 -0
- package/cpp/src/llama-sampler.h +42 -0
- package/cpp/src/llama-vocab.cpp +3970 -0
- package/cpp/src/llama-vocab.h +187 -0
- package/cpp/src/llama.cpp +1313 -0
- package/cpp/src/models/afmoe.cpp +191 -0
- package/cpp/src/models/apertus.cpp +125 -0
- package/cpp/src/models/arcee.cpp +135 -0
- package/cpp/src/models/arctic.cpp +138 -0
- package/cpp/src/models/arwkv7.cpp +86 -0
- package/cpp/src/models/baichuan.cpp +122 -0
- package/cpp/src/models/bailingmoe.cpp +144 -0
- package/cpp/src/models/bailingmoe2.cpp +135 -0
- package/cpp/src/models/bert.cpp +178 -0
- package/cpp/src/models/bitnet.cpp +160 -0
- package/cpp/src/models/bloom.cpp +101 -0
- package/cpp/src/models/chameleon.cpp +178 -0
- package/cpp/src/models/chatglm.cpp +132 -0
- package/cpp/src/models/codeshell.cpp +111 -0
- package/cpp/src/models/cogvlm.cpp +102 -0
- package/cpp/src/models/cohere2-iswa.cpp +134 -0
- package/cpp/src/models/command-r.cpp +122 -0
- package/cpp/src/models/dbrx.cpp +123 -0
- package/cpp/src/models/deci.cpp +135 -0
- package/cpp/src/models/deepseek.cpp +144 -0
- package/cpp/src/models/deepseek2.cpp +262 -0
- package/cpp/src/models/delta-net-base.cpp +376 -0
- package/cpp/src/models/dots1.cpp +134 -0
- package/cpp/src/models/dream.cpp +105 -0
- package/cpp/src/models/ernie4-5-moe.cpp +150 -0
- package/cpp/src/models/ernie4-5.cpp +110 -0
- package/cpp/src/models/eurobert.cpp +97 -0
- package/cpp/src/models/exaone-moe.cpp +146 -0
- package/cpp/src/models/exaone.cpp +114 -0
- package/cpp/src/models/exaone4.cpp +123 -0
- package/cpp/src/models/falcon-h1.cpp +111 -0
- package/cpp/src/models/falcon.cpp +120 -0
- package/cpp/src/models/gemma-embedding.cpp +116 -0
- package/cpp/src/models/gemma.cpp +112 -0
- package/cpp/src/models/gemma2-iswa.cpp +128 -0
- package/cpp/src/models/gemma3.cpp +155 -0
- package/cpp/src/models/gemma3n-iswa.cpp +384 -0
- package/cpp/src/models/glm4-moe.cpp +170 -0
- package/cpp/src/models/glm4.cpp +157 -0
- package/cpp/src/models/gpt2.cpp +105 -0
- package/cpp/src/models/gptneox.cpp +144 -0
- package/cpp/src/models/granite-hybrid.cpp +196 -0
- package/cpp/src/models/granite.cpp +211 -0
- package/cpp/src/models/grok.cpp +159 -0
- package/cpp/src/models/grovemoe.cpp +141 -0
- package/cpp/src/models/hunyuan-dense.cpp +132 -0
- package/cpp/src/models/hunyuan-moe.cpp +154 -0
- package/cpp/src/models/internlm2.cpp +120 -0
- package/cpp/src/models/jais.cpp +86 -0
- package/cpp/src/models/jais2.cpp +123 -0
- package/cpp/src/models/jamba.cpp +106 -0
- package/cpp/src/models/kimi-linear.cpp +392 -0
- package/cpp/src/models/lfm2.cpp +190 -0
- package/cpp/src/models/llada-moe.cpp +122 -0
- package/cpp/src/models/llada.cpp +99 -0
- package/cpp/src/models/llama-iswa.cpp +178 -0
- package/cpp/src/models/llama.cpp +168 -0
- package/cpp/src/models/maincoder.cpp +117 -0
- package/cpp/src/models/mamba-base.cpp +285 -0
- package/cpp/src/models/mamba.cpp +54 -0
- package/cpp/src/models/mimo2-iswa.cpp +123 -0
- package/cpp/src/models/minicpm3.cpp +200 -0
- package/cpp/src/models/minimax-m2.cpp +124 -0
- package/cpp/src/models/mistral3.cpp +160 -0
- package/cpp/src/models/models.h +684 -0
- package/cpp/src/models/modern-bert.cpp +109 -0
- package/cpp/src/models/mpt.cpp +126 -0
- package/cpp/src/models/nemotron-h.cpp +148 -0
- package/cpp/src/models/nemotron.cpp +122 -0
- package/cpp/src/models/neo-bert.cpp +104 -0
- package/cpp/src/models/olmo.cpp +121 -0
- package/cpp/src/models/olmo2.cpp +150 -0
- package/cpp/src/models/olmoe.cpp +124 -0
- package/cpp/src/models/openai-moe-iswa.cpp +127 -0
- package/cpp/src/models/openelm.cpp +124 -0
- package/cpp/src/models/orion.cpp +123 -0
- package/cpp/src/models/paddleocr.cpp +122 -0
- package/cpp/src/models/pangu-embedded.cpp +121 -0
- package/cpp/src/models/phi2.cpp +121 -0
- package/cpp/src/models/phi3.cpp +152 -0
- package/cpp/src/models/plamo.cpp +110 -0
- package/cpp/src/models/plamo2.cpp +318 -0
- package/cpp/src/models/plamo3.cpp +128 -0
- package/cpp/src/models/plm.cpp +169 -0
- package/cpp/src/models/qwen.cpp +108 -0
- package/cpp/src/models/qwen2.cpp +126 -0
- package/cpp/src/models/qwen2moe.cpp +151 -0
- package/cpp/src/models/qwen2vl.cpp +117 -0
- package/cpp/src/models/qwen3.cpp +117 -0
- package/cpp/src/models/qwen35.cpp +386 -0
- package/cpp/src/models/qwen35moe.cpp +420 -0
- package/cpp/src/models/qwen3moe.cpp +124 -0
- package/cpp/src/models/qwen3next.cpp +525 -0
- package/cpp/src/models/qwen3vl-moe.cpp +140 -0
- package/cpp/src/models/qwen3vl.cpp +132 -0
- package/cpp/src/models/refact.cpp +94 -0
- package/cpp/src/models/rnd1.cpp +126 -0
- package/cpp/src/models/rwkv6-base.cpp +164 -0
- package/cpp/src/models/rwkv6.cpp +94 -0
- package/cpp/src/models/rwkv6qwen2.cpp +86 -0
- package/cpp/src/models/rwkv7-base.cpp +137 -0
- package/cpp/src/models/rwkv7.cpp +90 -0
- package/cpp/src/models/seed-oss.cpp +124 -0
- package/cpp/src/models/smallthinker.cpp +126 -0
- package/cpp/src/models/smollm3.cpp +128 -0
- package/cpp/src/models/stablelm.cpp +146 -0
- package/cpp/src/models/starcoder.cpp +100 -0
- package/cpp/src/models/starcoder2.cpp +121 -0
- package/cpp/src/models/step35-iswa.cpp +168 -0
- package/cpp/src/models/t5-dec.cpp +166 -0
- package/cpp/src/models/t5-enc.cpp +96 -0
- package/cpp/src/models/wavtokenizer-dec.cpp +149 -0
- package/cpp/src/models/xverse.cpp +108 -0
- package/cpp/src/unicode-data.cpp +7034 -0
- package/cpp/src/unicode-data.h +20 -0
- package/cpp/src/unicode.cpp +1103 -0
- package/cpp/src/unicode.h +111 -0
- package/cpp/vendor/nlohmann/json.hpp +25526 -0
- package/cpp/vendor/nlohmann/json_fwd.hpp +187 -0
- package/cpp/vendor/stb/stb_image.h +7988 -0
- package/ios/LocalLLM-Bridging-Header.h +2 -0
- package/ios/LocalLLM.h +5 -0
- package/ios/LocalLLM.mm +1267 -0
- package/local-llm-rn.podspec +60 -0
- package/package.json +35 -0
- package/src/NativeLocalLLM.ts +73 -0
- package/src/device.ts +50 -0
- package/src/download-adapter.ts +17 -0
- package/src/index.ts +21 -0
- package/src/native-bridge.ts +142 -0
- package/src/rn-downloader.ts +37 -0
|
@@ -0,0 +1,792 @@
|
|
|
1
|
+
#include "arg.h"
|
|
2
|
+
|
|
3
|
+
#include "common.h"
|
|
4
|
+
#include "gguf.h" // for reading GGUF splits
|
|
5
|
+
#include "log.h"
|
|
6
|
+
#include "download.h"
|
|
7
|
+
|
|
8
|
+
#define JSON_ASSERT GGML_ASSERT
|
|
9
|
+
#include <nlohmann/json.hpp>
|
|
10
|
+
|
|
11
|
+
#include <algorithm>
|
|
12
|
+
#include <filesystem>
|
|
13
|
+
#include <fstream>
|
|
14
|
+
#include <future>
|
|
15
|
+
#include <map>
|
|
16
|
+
#include <mutex>
|
|
17
|
+
#include <regex>
|
|
18
|
+
#include <string>
|
|
19
|
+
#include <thread>
|
|
20
|
+
#include <vector>
|
|
21
|
+
|
|
22
|
+
#include "http.h"
|
|
23
|
+
|
|
24
|
+
#ifndef __EMSCRIPTEN__
|
|
25
|
+
#ifdef __linux__
|
|
26
|
+
#include <linux/limits.h>
|
|
27
|
+
#elif defined(_WIN32)
|
|
28
|
+
# if !defined(PATH_MAX)
|
|
29
|
+
# define PATH_MAX MAX_PATH
|
|
30
|
+
# endif
|
|
31
|
+
#elif defined(_AIX)
|
|
32
|
+
#include <sys/limits.h>
|
|
33
|
+
#else
|
|
34
|
+
#include <sys/syslimits.h>
|
|
35
|
+
#endif
|
|
36
|
+
#endif
|
|
37
|
+
|
|
38
|
+
#define LLAMA_MAX_URL_LENGTH 2084 // Maximum URL Length in Chrome: 2083
|
|
39
|
+
|
|
40
|
+
// isatty
|
|
41
|
+
#if defined(_WIN32)
|
|
42
|
+
#include <io.h>
|
|
43
|
+
#else
|
|
44
|
+
#include <unistd.h>
|
|
45
|
+
#endif
|
|
46
|
+
|
|
47
|
+
using json = nlohmann::ordered_json;
|
|
48
|
+
|
|
49
|
+
//
|
|
50
|
+
// downloader
|
|
51
|
+
//
|
|
52
|
+
|
|
53
|
+
// validate repo name format: owner/repo
|
|
54
|
+
static bool validate_repo_name(const std::string & repo) {
|
|
55
|
+
static const std::regex repo_regex(R"(^[A-Za-z0-9_.\-]+\/[A-Za-z0-9_.\-]+$)");
|
|
56
|
+
return std::regex_match(repo, repo_regex);
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
static std::string get_manifest_path(const std::string & repo, const std::string & tag) {
|
|
60
|
+
// we use "=" to avoid clashing with other component, while still being allowed on windows
|
|
61
|
+
std::string fname = "manifest=" + repo + "=" + tag + ".json";
|
|
62
|
+
if (!validate_repo_name(repo)) {
|
|
63
|
+
throw std::runtime_error("error: repo name must be in the format 'owner/repo'");
|
|
64
|
+
}
|
|
65
|
+
string_replace_all(fname, "/", "=");
|
|
66
|
+
return fs_get_cache_file(fname);
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
static std::string read_file(const std::string & fname) {
|
|
70
|
+
std::ifstream file(fname);
|
|
71
|
+
if (!file) {
|
|
72
|
+
throw std::runtime_error(string_format("error: failed to open file '%s'\n", fname.c_str()));
|
|
73
|
+
}
|
|
74
|
+
std::string content((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>());
|
|
75
|
+
file.close();
|
|
76
|
+
return content;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
static void write_file(const std::string & fname, const std::string & content) {
|
|
80
|
+
const std::string fname_tmp = fname + ".tmp";
|
|
81
|
+
std::ofstream file(fname_tmp);
|
|
82
|
+
if (!file) {
|
|
83
|
+
throw std::runtime_error(string_format("error: failed to open file '%s'\n", fname.c_str()));
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
try {
|
|
87
|
+
file << content;
|
|
88
|
+
file.close();
|
|
89
|
+
|
|
90
|
+
// Makes write atomic
|
|
91
|
+
if (rename(fname_tmp.c_str(), fname.c_str()) != 0) {
|
|
92
|
+
LOG_ERR("%s: unable to rename file: %s to %s\n", __func__, fname_tmp.c_str(), fname.c_str());
|
|
93
|
+
// If rename fails, try to delete the temporary file
|
|
94
|
+
if (remove(fname_tmp.c_str()) != 0) {
|
|
95
|
+
LOG_ERR("%s: unable to delete temporary file: %s\n", __func__, fname_tmp.c_str());
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
} catch (...) {
|
|
99
|
+
// If anything fails, try to delete the temporary file
|
|
100
|
+
if (remove(fname_tmp.c_str()) != 0) {
|
|
101
|
+
LOG_ERR("%s: unable to delete temporary file: %s\n", __func__, fname_tmp.c_str());
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
throw std::runtime_error(string_format("error: failed to write file '%s'\n", fname.c_str()));
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
static void write_etag(const std::string & path, const std::string & etag) {
|
|
109
|
+
const std::string etag_path = path + ".etag";
|
|
110
|
+
write_file(etag_path, etag);
|
|
111
|
+
LOG_DBG("%s: file etag saved: %s\n", __func__, etag_path.c_str());
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
static std::string read_etag(const std::string & path) {
|
|
115
|
+
const std::string etag_path = path + ".etag";
|
|
116
|
+
if (!std::filesystem::exists(etag_path)) {
|
|
117
|
+
return {};
|
|
118
|
+
}
|
|
119
|
+
std::ifstream etag_in(etag_path);
|
|
120
|
+
if (!etag_in) {
|
|
121
|
+
LOG_ERR("%s: could not open .etag file for reading: %s\n", __func__, etag_path.c_str());
|
|
122
|
+
return {};
|
|
123
|
+
}
|
|
124
|
+
std::string etag;
|
|
125
|
+
std::getline(etag_in, etag);
|
|
126
|
+
return etag;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
static bool is_http_status_ok(int status) {
|
|
130
|
+
return status >= 200 && status < 400;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
std::pair<std::string, std::string> common_download_split_repo_tag(const std::string & hf_repo_with_tag) {
|
|
134
|
+
auto parts = string_split<std::string>(hf_repo_with_tag, ':');
|
|
135
|
+
std::string tag = parts.size() > 1 ? parts.back() : "latest";
|
|
136
|
+
std::string hf_repo = parts[0];
|
|
137
|
+
if (string_split<std::string>(hf_repo, '/').size() != 2) {
|
|
138
|
+
throw std::invalid_argument("error: invalid HF repo format, expected <user>/<model>[:quant]\n");
|
|
139
|
+
}
|
|
140
|
+
return {hf_repo, tag};
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
class ProgressBar {
|
|
144
|
+
static inline std::mutex mutex;
|
|
145
|
+
static inline std::map<const ProgressBar *, int> lines;
|
|
146
|
+
static inline int max_line = 0;
|
|
147
|
+
|
|
148
|
+
static void cleanup(const ProgressBar * line) {
|
|
149
|
+
lines.erase(line);
|
|
150
|
+
if (lines.empty()) {
|
|
151
|
+
max_line = 0;
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
static bool is_output_a_tty() {
|
|
156
|
+
#if defined(_WIN32)
|
|
157
|
+
return _isatty(_fileno(stdout));
|
|
158
|
+
#else
|
|
159
|
+
return isatty(1);
|
|
160
|
+
#endif
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
public:
|
|
164
|
+
ProgressBar() = default;
|
|
165
|
+
|
|
166
|
+
~ProgressBar() {
|
|
167
|
+
std::lock_guard<std::mutex> lock(mutex);
|
|
168
|
+
cleanup(this);
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
void update(size_t current, size_t total) {
|
|
172
|
+
if (!is_output_a_tty()) {
|
|
173
|
+
return;
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
if (!total) {
|
|
177
|
+
return;
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
std::lock_guard<std::mutex> lock(mutex);
|
|
181
|
+
|
|
182
|
+
if (lines.find(this) == lines.end()) {
|
|
183
|
+
lines[this] = max_line++;
|
|
184
|
+
std::cout << "\n";
|
|
185
|
+
}
|
|
186
|
+
int lines_up = max_line - lines[this];
|
|
187
|
+
|
|
188
|
+
size_t width = 50;
|
|
189
|
+
size_t pct = (100 * current) / total;
|
|
190
|
+
size_t pos = (width * current) / total;
|
|
191
|
+
|
|
192
|
+
std::cout << "\033[s";
|
|
193
|
+
|
|
194
|
+
if (lines_up > 0) {
|
|
195
|
+
std::cout << "\033[" << lines_up << "A";
|
|
196
|
+
}
|
|
197
|
+
std::cout << "\033[2K\r["
|
|
198
|
+
<< std::string(pos, '=')
|
|
199
|
+
<< (pos < width ? ">" : "")
|
|
200
|
+
<< std::string(width - pos, ' ')
|
|
201
|
+
<< "] " << std::setw(3) << pct << "% ("
|
|
202
|
+
<< current / (1024 * 1024) << " MB / "
|
|
203
|
+
<< total / (1024 * 1024) << " MB) "
|
|
204
|
+
<< "\033[u";
|
|
205
|
+
|
|
206
|
+
std::cout.flush();
|
|
207
|
+
|
|
208
|
+
if (current == total) {
|
|
209
|
+
cleanup(this);
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
ProgressBar(const ProgressBar &) = delete;
|
|
214
|
+
ProgressBar & operator=(const ProgressBar &) = delete;
|
|
215
|
+
};
|
|
216
|
+
|
|
217
|
+
static bool common_pull_file(httplib::Client & cli,
|
|
218
|
+
const std::string & resolve_path,
|
|
219
|
+
const std::string & path_tmp,
|
|
220
|
+
bool supports_ranges,
|
|
221
|
+
size_t existing_size,
|
|
222
|
+
size_t & total_size) {
|
|
223
|
+
std::ofstream ofs(path_tmp, std::ios::binary | std::ios::app);
|
|
224
|
+
if (!ofs.is_open()) {
|
|
225
|
+
LOG_ERR("%s: error opening local file for writing: %s\n", __func__, path_tmp.c_str());
|
|
226
|
+
return false;
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
httplib::Headers headers;
|
|
230
|
+
if (supports_ranges && existing_size > 0) {
|
|
231
|
+
headers.emplace("Range", "bytes=" + std::to_string(existing_size) + "-");
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
const char * func = __func__; // avoid __func__ inside a lambda
|
|
235
|
+
size_t downloaded = existing_size;
|
|
236
|
+
size_t progress_step = 0;
|
|
237
|
+
ProgressBar bar;
|
|
238
|
+
|
|
239
|
+
auto res = cli.Get(resolve_path, headers,
|
|
240
|
+
[&](const httplib::Response &response) {
|
|
241
|
+
if (existing_size > 0 && response.status != 206) {
|
|
242
|
+
LOG_WRN("%s: server did not respond with 206 Partial Content for a resume request. Status: %d\n", func, response.status);
|
|
243
|
+
return false;
|
|
244
|
+
}
|
|
245
|
+
if (existing_size == 0 && response.status != 200) {
|
|
246
|
+
LOG_WRN("%s: download received non-successful status code: %d\n", func, response.status);
|
|
247
|
+
return false;
|
|
248
|
+
}
|
|
249
|
+
if (total_size == 0 && response.has_header("Content-Length")) {
|
|
250
|
+
try {
|
|
251
|
+
size_t content_length = std::stoull(response.get_header_value("Content-Length"));
|
|
252
|
+
total_size = existing_size + content_length;
|
|
253
|
+
} catch (const std::exception &e) {
|
|
254
|
+
LOG_WRN("%s: invalid Content-Length header: %s\n", func, e.what());
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
return true;
|
|
258
|
+
},
|
|
259
|
+
[&](const char *data, size_t len) {
|
|
260
|
+
ofs.write(data, len);
|
|
261
|
+
if (!ofs) {
|
|
262
|
+
LOG_ERR("%s: error writing to file: %s\n", func, path_tmp.c_str());
|
|
263
|
+
return false;
|
|
264
|
+
}
|
|
265
|
+
downloaded += len;
|
|
266
|
+
progress_step += len;
|
|
267
|
+
|
|
268
|
+
if (progress_step >= total_size / 1000 || downloaded == total_size) {
|
|
269
|
+
bar.update(downloaded, total_size);
|
|
270
|
+
progress_step = 0;
|
|
271
|
+
}
|
|
272
|
+
return true;
|
|
273
|
+
},
|
|
274
|
+
nullptr
|
|
275
|
+
);
|
|
276
|
+
|
|
277
|
+
if (!res) {
|
|
278
|
+
LOG_ERR("%s: download failed: %s (status: %d)\n",
|
|
279
|
+
__func__,
|
|
280
|
+
httplib::to_string(res.error()).c_str(),
|
|
281
|
+
res ? res->status : -1);
|
|
282
|
+
return false;
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
return true;
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
// download one single file from remote URL to local path
|
|
289
|
+
// returns status code or -1 on error
|
|
290
|
+
static int common_download_file_single_online(const std::string & url,
|
|
291
|
+
const std::string & path,
|
|
292
|
+
const std::string & bearer_token,
|
|
293
|
+
const common_header_list & custom_headers) {
|
|
294
|
+
static const int max_attempts = 3;
|
|
295
|
+
static const int retry_delay_seconds = 2;
|
|
296
|
+
|
|
297
|
+
auto [cli, parts] = common_http_client(url);
|
|
298
|
+
|
|
299
|
+
httplib::Headers headers;
|
|
300
|
+
for (const auto & h : custom_headers) {
|
|
301
|
+
headers.emplace(h.first, h.second);
|
|
302
|
+
}
|
|
303
|
+
if (headers.find("User-Agent") == headers.end()) {
|
|
304
|
+
headers.emplace("User-Agent", "llama-cpp/" + build_info);
|
|
305
|
+
}
|
|
306
|
+
if (!bearer_token.empty()) {
|
|
307
|
+
headers.emplace("Authorization", "Bearer " + bearer_token);
|
|
308
|
+
}
|
|
309
|
+
cli.set_default_headers(headers);
|
|
310
|
+
|
|
311
|
+
const bool file_exists = std::filesystem::exists(path);
|
|
312
|
+
|
|
313
|
+
std::string last_etag;
|
|
314
|
+
if (file_exists) {
|
|
315
|
+
last_etag = read_etag(path);
|
|
316
|
+
} else {
|
|
317
|
+
LOG_INF("%s: no previous model file found %s\n", __func__, path.c_str());
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
auto head = cli.Head(parts.path);
|
|
321
|
+
if (!head || head->status < 200 || head->status >= 300) {
|
|
322
|
+
LOG_WRN("%s: HEAD failed, status: %d\n", __func__, head ? head->status : -1);
|
|
323
|
+
if (file_exists) {
|
|
324
|
+
LOG_INF("%s: using cached file (HEAD failed): %s\n", __func__, path.c_str());
|
|
325
|
+
return 304; // 304 Not Modified - fake cached response
|
|
326
|
+
}
|
|
327
|
+
return head ? head->status : -1;
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
std::string etag;
|
|
331
|
+
if (head->has_header("ETag")) {
|
|
332
|
+
etag = head->get_header_value("ETag");
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
size_t total_size = 0;
|
|
336
|
+
if (head->has_header("Content-Length")) {
|
|
337
|
+
try {
|
|
338
|
+
total_size = std::stoull(head->get_header_value("Content-Length"));
|
|
339
|
+
} catch (const std::exception& e) {
|
|
340
|
+
LOG_WRN("%s: invalid Content-Length in HEAD response: %s\n", __func__, e.what());
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
bool supports_ranges = false;
|
|
345
|
+
if (head->has_header("Accept-Ranges")) {
|
|
346
|
+
supports_ranges = head->get_header_value("Accept-Ranges") != "none";
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
if (file_exists) {
|
|
350
|
+
if (etag.empty()) {
|
|
351
|
+
LOG_INF("%s: using cached file (no server etag): %s\n", __func__, path.c_str());
|
|
352
|
+
return 304; // 304 Not Modified - fake cached response
|
|
353
|
+
}
|
|
354
|
+
if (!last_etag.empty() && last_etag == etag) {
|
|
355
|
+
LOG_INF("%s: using cached file (same etag): %s\n", __func__, path.c_str());
|
|
356
|
+
return 304; // 304 Not Modified - fake cached response
|
|
357
|
+
}
|
|
358
|
+
if (remove(path.c_str()) != 0) {
|
|
359
|
+
LOG_ERR("%s: unable to delete file: %s\n", __func__, path.c_str());
|
|
360
|
+
return -1;
|
|
361
|
+
}
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
const std::string path_temporary = path + ".downloadInProgress";
|
|
365
|
+
int delay = retry_delay_seconds;
|
|
366
|
+
|
|
367
|
+
for (int i = 0; i < max_attempts; ++i) {
|
|
368
|
+
if (i) {
|
|
369
|
+
LOG_WRN("%s: retrying after %d seconds...\n", __func__, delay);
|
|
370
|
+
std::this_thread::sleep_for(std::chrono::seconds(delay));
|
|
371
|
+
delay *= retry_delay_seconds;
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
size_t existing_size = 0;
|
|
375
|
+
|
|
376
|
+
if (std::filesystem::exists(path_temporary)) {
|
|
377
|
+
if (supports_ranges) {
|
|
378
|
+
existing_size = std::filesystem::file_size(path_temporary);
|
|
379
|
+
} else if (remove(path_temporary.c_str()) != 0) {
|
|
380
|
+
LOG_ERR("%s: unable to delete file: %s\n", __func__, path_temporary.c_str());
|
|
381
|
+
return -1;
|
|
382
|
+
}
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
LOG_INF("%s: downloading from %s to %s (etag:%s)...\n",
|
|
386
|
+
__func__, common_http_show_masked_url(parts).c_str(),
|
|
387
|
+
path_temporary.c_str(), etag.c_str());
|
|
388
|
+
|
|
389
|
+
if (common_pull_file(cli, parts.path, path_temporary, supports_ranges, existing_size, total_size)) {
|
|
390
|
+
if (std::rename(path_temporary.c_str(), path.c_str()) != 0) {
|
|
391
|
+
LOG_ERR("%s: unable to rename file: %s to %s\n", __func__, path_temporary.c_str(), path.c_str());
|
|
392
|
+
return -1;
|
|
393
|
+
}
|
|
394
|
+
if (!etag.empty()) {
|
|
395
|
+
write_etag(path, etag);
|
|
396
|
+
}
|
|
397
|
+
return head->status;
|
|
398
|
+
}
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
LOG_ERR("%s: download failed after %d attempts\n", __func__, max_attempts);
|
|
402
|
+
return -1; // max attempts reached
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
std::pair<long, std::vector<char>> common_remote_get_content(const std::string & url,
|
|
406
|
+
const common_remote_params & params) {
|
|
407
|
+
auto [cli, parts] = common_http_client(url);
|
|
408
|
+
|
|
409
|
+
httplib::Headers headers;
|
|
410
|
+
for (const auto & h : params.headers) {
|
|
411
|
+
headers.emplace(h.first, h.second);
|
|
412
|
+
}
|
|
413
|
+
if (headers.find("User-Agent") == headers.end()) {
|
|
414
|
+
headers.emplace("User-Agent", "llama-cpp/" + build_info);
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
if (params.timeout > 0) {
|
|
418
|
+
cli.set_read_timeout(params.timeout, 0);
|
|
419
|
+
cli.set_write_timeout(params.timeout, 0);
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
std::vector<char> buf;
|
|
423
|
+
auto res = cli.Get(parts.path, headers,
|
|
424
|
+
[&](const char *data, size_t len) {
|
|
425
|
+
buf.insert(buf.end(), data, data + len);
|
|
426
|
+
return params.max_size == 0 ||
|
|
427
|
+
buf.size() <= static_cast<size_t>(params.max_size);
|
|
428
|
+
},
|
|
429
|
+
nullptr
|
|
430
|
+
);
|
|
431
|
+
|
|
432
|
+
if (!res) {
|
|
433
|
+
throw std::runtime_error("error: cannot make GET request");
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
return { res->status, std::move(buf) };
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
int common_download_file_single(const std::string & url,
|
|
440
|
+
const std::string & path,
|
|
441
|
+
const std::string & bearer_token,
|
|
442
|
+
bool offline,
|
|
443
|
+
const common_header_list & headers) {
|
|
444
|
+
if (!offline) {
|
|
445
|
+
return common_download_file_single_online(url, path, bearer_token, headers);
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
if (!std::filesystem::exists(path)) {
|
|
449
|
+
LOG_ERR("%s: required file is not available in cache (offline mode): %s\n", __func__, path.c_str());
|
|
450
|
+
return -1;
|
|
451
|
+
}
|
|
452
|
+
|
|
453
|
+
LOG_INF("%s: using cached file (offline mode): %s\n", __func__, path.c_str());
|
|
454
|
+
return 304; // Not Modified - fake cached response
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
// download multiple files from remote URLs to local paths
|
|
458
|
+
// the input is a vector of pairs <url, path>
|
|
459
|
+
static bool common_download_file_multiple(const std::vector<std::pair<std::string, std::string>> & urls,
|
|
460
|
+
const std::string & bearer_token,
|
|
461
|
+
bool offline,
|
|
462
|
+
const common_header_list & headers) {
|
|
463
|
+
// Prepare download in parallel
|
|
464
|
+
std::vector<std::future<bool>> futures_download;
|
|
465
|
+
futures_download.reserve(urls.size());
|
|
466
|
+
|
|
467
|
+
for (auto const & item : urls) {
|
|
468
|
+
futures_download.push_back(
|
|
469
|
+
std::async(
|
|
470
|
+
std::launch::async,
|
|
471
|
+
[&bearer_token, offline, &headers](const std::pair<std::string, std::string> & it) -> bool {
|
|
472
|
+
const int http_status = common_download_file_single(it.first, it.second, bearer_token, offline, headers);
|
|
473
|
+
return is_http_status_ok(http_status);
|
|
474
|
+
},
|
|
475
|
+
item
|
|
476
|
+
)
|
|
477
|
+
);
|
|
478
|
+
}
|
|
479
|
+
|
|
480
|
+
// Wait for all downloads to complete
|
|
481
|
+
for (auto & f : futures_download) {
|
|
482
|
+
if (!f.get()) {
|
|
483
|
+
return false;
|
|
484
|
+
}
|
|
485
|
+
}
|
|
486
|
+
|
|
487
|
+
return true;
|
|
488
|
+
}
|
|
489
|
+
|
|
490
|
+
bool common_download_model(const common_params_model & model,
|
|
491
|
+
const std::string & bearer_token,
|
|
492
|
+
bool offline,
|
|
493
|
+
const common_header_list & headers) {
|
|
494
|
+
// Basic validation of the model.url
|
|
495
|
+
if (model.url.empty()) {
|
|
496
|
+
LOG_ERR("%s: invalid model url\n", __func__);
|
|
497
|
+
return false;
|
|
498
|
+
}
|
|
499
|
+
|
|
500
|
+
const int http_status = common_download_file_single(model.url, model.path, bearer_token, offline, headers);
|
|
501
|
+
if (!is_http_status_ok(http_status)) {
|
|
502
|
+
return false;
|
|
503
|
+
}
|
|
504
|
+
|
|
505
|
+
// check for additional GGUFs split to download
|
|
506
|
+
int n_split = 0;
|
|
507
|
+
{
|
|
508
|
+
struct gguf_init_params gguf_params = {
|
|
509
|
+
/*.no_alloc = */ true,
|
|
510
|
+
/*.ctx = */ NULL,
|
|
511
|
+
};
|
|
512
|
+
auto * ctx_gguf = gguf_init_from_file(model.path.c_str(), gguf_params);
|
|
513
|
+
if (!ctx_gguf) {
|
|
514
|
+
LOG_ERR("\n%s: failed to load input GGUF from %s\n", __func__, model.path.c_str());
|
|
515
|
+
return false;
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
auto key_n_split = gguf_find_key(ctx_gguf, LLM_KV_SPLIT_COUNT);
|
|
519
|
+
if (key_n_split >= 0) {
|
|
520
|
+
n_split = gguf_get_val_u16(ctx_gguf, key_n_split);
|
|
521
|
+
}
|
|
522
|
+
|
|
523
|
+
gguf_free(ctx_gguf);
|
|
524
|
+
}
|
|
525
|
+
|
|
526
|
+
if (n_split > 1) {
|
|
527
|
+
char split_prefix[PATH_MAX] = {0};
|
|
528
|
+
char split_url_prefix[LLAMA_MAX_URL_LENGTH] = {0};
|
|
529
|
+
|
|
530
|
+
// Verify the first split file format
|
|
531
|
+
// and extract split URL and PATH prefixes
|
|
532
|
+
{
|
|
533
|
+
if (!llama_split_prefix(split_prefix, sizeof(split_prefix), model.path.c_str(), 0, n_split)) {
|
|
534
|
+
LOG_ERR("\n%s: unexpected model file name: %s n_split=%d\n", __func__, model.path.c_str(), n_split);
|
|
535
|
+
return false;
|
|
536
|
+
}
|
|
537
|
+
|
|
538
|
+
if (!llama_split_prefix(split_url_prefix, sizeof(split_url_prefix), model.url.c_str(), 0, n_split)) {
|
|
539
|
+
LOG_ERR("\n%s: unexpected model url: %s n_split=%d\n", __func__, model.url.c_str(), n_split);
|
|
540
|
+
return false;
|
|
541
|
+
}
|
|
542
|
+
}
|
|
543
|
+
|
|
544
|
+
std::vector<std::pair<std::string, std::string>> urls;
|
|
545
|
+
for (int idx = 1; idx < n_split; idx++) {
|
|
546
|
+
char split_path[PATH_MAX] = {0};
|
|
547
|
+
llama_split_path(split_path, sizeof(split_path), split_prefix, idx, n_split);
|
|
548
|
+
|
|
549
|
+
char split_url[LLAMA_MAX_URL_LENGTH] = {0};
|
|
550
|
+
llama_split_path(split_url, sizeof(split_url), split_url_prefix, idx, n_split);
|
|
551
|
+
|
|
552
|
+
if (std::string(split_path) == model.path) {
|
|
553
|
+
continue; // skip the already downloaded file
|
|
554
|
+
}
|
|
555
|
+
|
|
556
|
+
urls.push_back({split_url, split_path});
|
|
557
|
+
}
|
|
558
|
+
|
|
559
|
+
// Download in parallel
|
|
560
|
+
common_download_file_multiple(urls, bearer_token, offline, headers);
|
|
561
|
+
}
|
|
562
|
+
|
|
563
|
+
return true;
|
|
564
|
+
}
|
|
565
|
+
|
|
566
|
+
common_hf_file_res common_get_hf_file(const std::string & hf_repo_with_tag,
|
|
567
|
+
const std::string & bearer_token,
|
|
568
|
+
bool offline,
|
|
569
|
+
const common_header_list & custom_headers) {
|
|
570
|
+
// the returned hf_repo is without tag
|
|
571
|
+
auto [hf_repo, tag] = common_download_split_repo_tag(hf_repo_with_tag);
|
|
572
|
+
|
|
573
|
+
std::string url = get_model_endpoint() + "v2/" + hf_repo + "/manifests/" + tag;
|
|
574
|
+
|
|
575
|
+
// headers
|
|
576
|
+
common_header_list headers = custom_headers;
|
|
577
|
+
headers.push_back({"Accept", "application/json"});
|
|
578
|
+
if (!bearer_token.empty()) {
|
|
579
|
+
headers.push_back({"Authorization", "Bearer " + bearer_token});
|
|
580
|
+
}
|
|
581
|
+
// Important: the User-Agent must be "llama-cpp" to get the "ggufFile" field in the response
|
|
582
|
+
// User-Agent header is already set in common_remote_get_content, no need to set it here
|
|
583
|
+
|
|
584
|
+
// make the request
|
|
585
|
+
common_remote_params params;
|
|
586
|
+
params.headers = headers;
|
|
587
|
+
long res_code = 0;
|
|
588
|
+
std::string res_str;
|
|
589
|
+
bool use_cache = false;
|
|
590
|
+
std::string cached_response_path = get_manifest_path(hf_repo, tag);
|
|
591
|
+
if (!offline) {
|
|
592
|
+
try {
|
|
593
|
+
auto res = common_remote_get_content(url, params);
|
|
594
|
+
res_code = res.first;
|
|
595
|
+
res_str = std::string(res.second.data(), res.second.size());
|
|
596
|
+
} catch (const std::exception & e) {
|
|
597
|
+
LOG_WRN("error: failed to get manifest at %s: %s\n", url.c_str(), e.what());
|
|
598
|
+
}
|
|
599
|
+
}
|
|
600
|
+
if (res_code == 0) {
|
|
601
|
+
if (std::filesystem::exists(cached_response_path)) {
|
|
602
|
+
LOG_WRN("trying to read manifest from cache: %s\n", cached_response_path.c_str());
|
|
603
|
+
res_str = read_file(cached_response_path);
|
|
604
|
+
res_code = 200;
|
|
605
|
+
use_cache = true;
|
|
606
|
+
} else {
|
|
607
|
+
throw std::runtime_error(
|
|
608
|
+
offline ? "error: failed to get manifest (offline mode)"
|
|
609
|
+
: "error: failed to get manifest (check your internet connection)");
|
|
610
|
+
}
|
|
611
|
+
}
|
|
612
|
+
std::string ggufFile;
|
|
613
|
+
std::string mmprojFile;
|
|
614
|
+
|
|
615
|
+
if (res_code == 200 || res_code == 304) {
|
|
616
|
+
try {
|
|
617
|
+
auto j = json::parse(res_str);
|
|
618
|
+
|
|
619
|
+
if (j.contains("ggufFile") && j["ggufFile"].contains("rfilename")) {
|
|
620
|
+
ggufFile = j["ggufFile"]["rfilename"].get<std::string>();
|
|
621
|
+
}
|
|
622
|
+
if (j.contains("mmprojFile") && j["mmprojFile"].contains("rfilename")) {
|
|
623
|
+
mmprojFile = j["mmprojFile"]["rfilename"].get<std::string>();
|
|
624
|
+
}
|
|
625
|
+
} catch (const std::exception & e) {
|
|
626
|
+
throw std::runtime_error(std::string("error parsing manifest JSON: ") + e.what());
|
|
627
|
+
}
|
|
628
|
+
if (!use_cache) {
|
|
629
|
+
// if not using cached response, update the cache file
|
|
630
|
+
write_file(cached_response_path, res_str);
|
|
631
|
+
}
|
|
632
|
+
} else if (res_code == 401) {
|
|
633
|
+
throw std::runtime_error("error: model is private or does not exist; if you are accessing a gated model, please provide a valid HF token");
|
|
634
|
+
} else {
|
|
635
|
+
throw std::runtime_error(string_format("error from HF API (%s), response code: %ld, data: %s", url.c_str(), res_code, res_str.c_str()));
|
|
636
|
+
}
|
|
637
|
+
|
|
638
|
+
// check response
|
|
639
|
+
if (ggufFile.empty()) {
|
|
640
|
+
throw std::runtime_error("error: model does not have ggufFile");
|
|
641
|
+
}
|
|
642
|
+
|
|
643
|
+
return { hf_repo, ggufFile, mmprojFile };
|
|
644
|
+
}
|
|
645
|
+
|
|
646
|
+
//
|
|
647
|
+
// Docker registry functions
|
|
648
|
+
//
|
|
649
|
+
|
|
650
|
+
static std::string common_docker_get_token(const std::string & repo) {
|
|
651
|
+
std::string url = "https://auth.docker.io/token?service=registry.docker.io&scope=repository:" + repo + ":pull";
|
|
652
|
+
|
|
653
|
+
common_remote_params params;
|
|
654
|
+
auto res = common_remote_get_content(url, params);
|
|
655
|
+
|
|
656
|
+
if (res.first != 200) {
|
|
657
|
+
throw std::runtime_error("Failed to get Docker registry token, HTTP code: " + std::to_string(res.first));
|
|
658
|
+
}
|
|
659
|
+
|
|
660
|
+
std::string response_str(res.second.begin(), res.second.end());
|
|
661
|
+
nlohmann::ordered_json response = nlohmann::ordered_json::parse(response_str);
|
|
662
|
+
|
|
663
|
+
if (!response.contains("token")) {
|
|
664
|
+
throw std::runtime_error("Docker registry token response missing 'token' field");
|
|
665
|
+
}
|
|
666
|
+
|
|
667
|
+
return response["token"].get<std::string>();
|
|
668
|
+
}
|
|
669
|
+
|
|
670
|
+
std::string common_docker_resolve_model(const std::string & docker) {
|
|
671
|
+
// Parse ai/smollm2:135M-Q4_0
|
|
672
|
+
size_t colon_pos = docker.find(':');
|
|
673
|
+
std::string repo, tag;
|
|
674
|
+
if (colon_pos != std::string::npos) {
|
|
675
|
+
repo = docker.substr(0, colon_pos);
|
|
676
|
+
tag = docker.substr(colon_pos + 1);
|
|
677
|
+
} else {
|
|
678
|
+
repo = docker;
|
|
679
|
+
tag = "latest";
|
|
680
|
+
}
|
|
681
|
+
|
|
682
|
+
// ai/ is the default
|
|
683
|
+
size_t slash_pos = docker.find('/');
|
|
684
|
+
if (slash_pos == std::string::npos) {
|
|
685
|
+
repo.insert(0, "ai/");
|
|
686
|
+
}
|
|
687
|
+
|
|
688
|
+
LOG_INF("%s: Downloading Docker Model: %s:%s\n", __func__, repo.c_str(), tag.c_str());
|
|
689
|
+
try {
|
|
690
|
+
// --- helper: digest validation ---
|
|
691
|
+
auto validate_oci_digest = [](const std::string & digest) -> std::string {
|
|
692
|
+
// Expected: algo:hex ; start with sha256 (64 hex chars)
|
|
693
|
+
// You can extend this map if supporting other algorithms in future.
|
|
694
|
+
static const std::regex re("^sha256:([a-fA-F0-9]{64})$");
|
|
695
|
+
std::smatch m;
|
|
696
|
+
if (!std::regex_match(digest, m, re)) {
|
|
697
|
+
throw std::runtime_error("Invalid OCI digest format received in manifest: " + digest);
|
|
698
|
+
}
|
|
699
|
+
// normalize hex to lowercase
|
|
700
|
+
std::string normalized = digest;
|
|
701
|
+
std::transform(normalized.begin()+7, normalized.end(), normalized.begin()+7, [](unsigned char c){
|
|
702
|
+
return std::tolower(c);
|
|
703
|
+
});
|
|
704
|
+
return normalized;
|
|
705
|
+
};
|
|
706
|
+
|
|
707
|
+
std::string token = common_docker_get_token(repo); // Get authentication token
|
|
708
|
+
|
|
709
|
+
// Get manifest
|
|
710
|
+
// TODO: cache the manifest response so that it appears in the model list
|
|
711
|
+
const std::string url_prefix = "https://registry-1.docker.io/v2/" + repo;
|
|
712
|
+
std::string manifest_url = url_prefix + "/manifests/" + tag;
|
|
713
|
+
common_remote_params manifest_params;
|
|
714
|
+
manifest_params.headers.push_back({"Authorization", "Bearer " + token});
|
|
715
|
+
manifest_params.headers.push_back({"Accept",
|
|
716
|
+
"application/vnd.docker.distribution.manifest.v2+json,application/vnd.oci.image.manifest.v1+json"
|
|
717
|
+
});
|
|
718
|
+
auto manifest_res = common_remote_get_content(manifest_url, manifest_params);
|
|
719
|
+
if (manifest_res.first != 200) {
|
|
720
|
+
throw std::runtime_error("Failed to get Docker manifest, HTTP code: " + std::to_string(manifest_res.first));
|
|
721
|
+
}
|
|
722
|
+
|
|
723
|
+
std::string manifest_str(manifest_res.second.begin(), manifest_res.second.end());
|
|
724
|
+
nlohmann::ordered_json manifest = nlohmann::ordered_json::parse(manifest_str);
|
|
725
|
+
std::string gguf_digest; // Find the GGUF layer
|
|
726
|
+
if (manifest.contains("layers")) {
|
|
727
|
+
for (const auto & layer : manifest["layers"]) {
|
|
728
|
+
if (layer.contains("mediaType")) {
|
|
729
|
+
std::string media_type = layer["mediaType"].get<std::string>();
|
|
730
|
+
if (media_type == "application/vnd.docker.ai.gguf.v3" ||
|
|
731
|
+
media_type.find("gguf") != std::string::npos) {
|
|
732
|
+
gguf_digest = layer["digest"].get<std::string>();
|
|
733
|
+
break;
|
|
734
|
+
}
|
|
735
|
+
}
|
|
736
|
+
}
|
|
737
|
+
}
|
|
738
|
+
|
|
739
|
+
if (gguf_digest.empty()) {
|
|
740
|
+
throw std::runtime_error("No GGUF layer found in Docker manifest");
|
|
741
|
+
}
|
|
742
|
+
|
|
743
|
+
// Validate & normalize digest
|
|
744
|
+
gguf_digest = validate_oci_digest(gguf_digest);
|
|
745
|
+
LOG_DBG("%s: Using validated digest: %s\n", __func__, gguf_digest.c_str());
|
|
746
|
+
|
|
747
|
+
// Prepare local filename
|
|
748
|
+
std::string model_filename = repo;
|
|
749
|
+
std::replace(model_filename.begin(), model_filename.end(), '/', '_');
|
|
750
|
+
model_filename += "_" + tag + ".gguf";
|
|
751
|
+
std::string local_path = fs_get_cache_file(model_filename);
|
|
752
|
+
|
|
753
|
+
const std::string blob_url = url_prefix + "/blobs/" + gguf_digest;
|
|
754
|
+
const int http_status = common_download_file_single(blob_url, local_path, token, false, {});
|
|
755
|
+
if (!is_http_status_ok(http_status)) {
|
|
756
|
+
throw std::runtime_error("Failed to download Docker Model");
|
|
757
|
+
}
|
|
758
|
+
|
|
759
|
+
LOG_INF("%s: Downloaded Docker Model to: %s\n", __func__, local_path.c_str());
|
|
760
|
+
return local_path;
|
|
761
|
+
} catch (const std::exception & e) {
|
|
762
|
+
LOG_ERR("%s: Docker Model download failed: %s\n", __func__, e.what());
|
|
763
|
+
throw;
|
|
764
|
+
}
|
|
765
|
+
}
|
|
766
|
+
|
|
767
|
+
std::vector<common_cached_model_info> common_list_cached_models() {
|
|
768
|
+
std::vector<common_cached_model_info> models;
|
|
769
|
+
const std::string cache_dir = fs_get_cache_directory();
|
|
770
|
+
const std::vector<common_file_info> files = fs_list(cache_dir, false);
|
|
771
|
+
for (const auto & file : files) {
|
|
772
|
+
if (string_starts_with(file.name, "manifest=") && string_ends_with(file.name, ".json")) {
|
|
773
|
+
common_cached_model_info model_info;
|
|
774
|
+
model_info.manifest_path = file.path;
|
|
775
|
+
std::string fname = file.name;
|
|
776
|
+
string_replace_all(fname, ".json", ""); // remove extension
|
|
777
|
+
auto parts = string_split<std::string>(fname, '=');
|
|
778
|
+
if (parts.size() == 4) {
|
|
779
|
+
// expect format: manifest=<user>=<model>=<tag>=<other>
|
|
780
|
+
model_info.user = parts[1];
|
|
781
|
+
model_info.model = parts[2];
|
|
782
|
+
model_info.tag = parts[3];
|
|
783
|
+
} else {
|
|
784
|
+
// invalid format
|
|
785
|
+
continue;
|
|
786
|
+
}
|
|
787
|
+
model_info.size = 0; // TODO: get GGUF size, not manifest size
|
|
788
|
+
models.push_back(model_info);
|
|
789
|
+
}
|
|
790
|
+
}
|
|
791
|
+
return models;
|
|
792
|
+
}
|