local-llm-rn 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cpp/CMakeLists.txt +285 -0
- package/cpp/common/CMakeLists.txt +149 -0
- package/cpp/common/arg.cpp +3799 -0
- package/cpp/common/arg.h +131 -0
- package/cpp/common/base64.hpp +392 -0
- package/cpp/common/build-info.cpp.in +4 -0
- package/cpp/common/chat-parser-xml-toolcall.cpp +879 -0
- package/cpp/common/chat-parser-xml-toolcall.h +45 -0
- package/cpp/common/chat-parser.cpp +1649 -0
- package/cpp/common/chat-parser.h +133 -0
- package/cpp/common/chat-peg-parser.cpp +124 -0
- package/cpp/common/chat-peg-parser.h +105 -0
- package/cpp/common/chat.cpp +3355 -0
- package/cpp/common/chat.h +252 -0
- package/cpp/common/common.cpp +1824 -0
- package/cpp/common/common.h +930 -0
- package/cpp/common/console.cpp +1137 -0
- package/cpp/common/console.h +41 -0
- package/cpp/common/debug.cpp +167 -0
- package/cpp/common/debug.h +43 -0
- package/cpp/common/download.cpp +792 -0
- package/cpp/common/download.h +84 -0
- package/cpp/common/http.h +84 -0
- package/cpp/common/jinja/README.md +88 -0
- package/cpp/common/jinja/caps.cpp +285 -0
- package/cpp/common/jinja/caps.h +30 -0
- package/cpp/common/jinja/lexer.cpp +341 -0
- package/cpp/common/jinja/lexer.h +157 -0
- package/cpp/common/jinja/parser.cpp +591 -0
- package/cpp/common/jinja/parser.h +21 -0
- package/cpp/common/jinja/runtime.cpp +867 -0
- package/cpp/common/jinja/runtime.h +638 -0
- package/cpp/common/jinja/string.cpp +213 -0
- package/cpp/common/jinja/string.h +61 -0
- package/cpp/common/jinja/utils.h +149 -0
- package/cpp/common/jinja/value.cpp +1393 -0
- package/cpp/common/jinja/value.h +756 -0
- package/cpp/common/json-partial.cpp +324 -0
- package/cpp/common/json-partial.h +39 -0
- package/cpp/common/json-schema-to-grammar.cpp +1153 -0
- package/cpp/common/json-schema-to-grammar.h +43 -0
- package/cpp/common/llguidance.cpp +258 -0
- package/cpp/common/log.cpp +446 -0
- package/cpp/common/log.h +119 -0
- package/cpp/common/ngram-cache.cpp +285 -0
- package/cpp/common/ngram-cache.h +101 -0
- package/cpp/common/ngram-map.cpp +530 -0
- package/cpp/common/ngram-map.h +115 -0
- package/cpp/common/ngram-mod.cpp +60 -0
- package/cpp/common/ngram-mod.h +38 -0
- package/cpp/common/peg-parser.cpp +1712 -0
- package/cpp/common/peg-parser.h +459 -0
- package/cpp/common/preset.cpp +483 -0
- package/cpp/common/preset.h +83 -0
- package/cpp/common/regex-partial.cpp +204 -0
- package/cpp/common/regex-partial.h +56 -0
- package/cpp/common/sampling.cpp +745 -0
- package/cpp/common/sampling.h +119 -0
- package/cpp/common/speculative.cpp +1074 -0
- package/cpp/common/speculative.h +41 -0
- package/cpp/common/unicode.cpp +64 -0
- package/cpp/common/unicode.h +22 -0
- package/cpp/ggml/CMakeLists.txt +494 -0
- package/cpp/ggml/cmake/GitVars.cmake +22 -0
- package/cpp/ggml/cmake/common.cmake +50 -0
- package/cpp/ggml/cmake/ggml-config.cmake.in +191 -0
- package/cpp/ggml/include/ggml-alloc.h +85 -0
- package/cpp/ggml/include/ggml-backend.h +373 -0
- package/cpp/ggml/include/ggml-blas.h +25 -0
- package/cpp/ggml/include/ggml-cann.h +123 -0
- package/cpp/ggml/include/ggml-cpp.h +39 -0
- package/cpp/ggml/include/ggml-cpu.h +151 -0
- package/cpp/ggml/include/ggml-cuda.h +47 -0
- package/cpp/ggml/include/ggml-hexagon.h +19 -0
- package/cpp/ggml/include/ggml-metal.h +61 -0
- package/cpp/ggml/include/ggml-opencl.h +26 -0
- package/cpp/ggml/include/ggml-opt.h +256 -0
- package/cpp/ggml/include/ggml-rpc.h +30 -0
- package/cpp/ggml/include/ggml-sycl.h +49 -0
- package/cpp/ggml/include/ggml-virtgpu.h +14 -0
- package/cpp/ggml/include/ggml-vulkan.h +29 -0
- package/cpp/ggml/include/ggml-webgpu.h +19 -0
- package/cpp/ggml/include/ggml-zdnn.h +17 -0
- package/cpp/ggml/include/ggml-zendnn.h +22 -0
- package/cpp/ggml/include/ggml.h +2753 -0
- package/cpp/ggml/include/gguf.h +204 -0
- package/cpp/ggml/src/CMakeLists.txt +492 -0
- package/cpp/ggml/src/ggml-alloc.c +1244 -0
- package/cpp/ggml/src/ggml-backend-dl.cpp +48 -0
- package/cpp/ggml/src/ggml-backend-dl.h +45 -0
- package/cpp/ggml/src/ggml-backend-impl.h +255 -0
- package/cpp/ggml/src/ggml-backend-reg.cpp +566 -0
- package/cpp/ggml/src/ggml-backend.cpp +2270 -0
- package/cpp/ggml/src/ggml-blas/CMakeLists.txt +101 -0
- package/cpp/ggml/src/ggml-blas/ggml-blas.cpp +518 -0
- package/cpp/ggml/src/ggml-common.h +1878 -0
- package/cpp/ggml/src/ggml-cpu/CMakeLists.txt +691 -0
- package/cpp/ggml/src/ggml-cpu/amx/amx.cpp +247 -0
- package/cpp/ggml/src/ggml-cpu/amx/amx.h +8 -0
- package/cpp/ggml/src/ggml-cpu/amx/common.h +91 -0
- package/cpp/ggml/src/ggml-cpu/amx/mmq.cpp +2512 -0
- package/cpp/ggml/src/ggml-cpu/amx/mmq.h +10 -0
- package/cpp/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +98 -0
- package/cpp/ggml/src/ggml-cpu/arch/arm/quants.c +4052 -0
- package/cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +4935 -0
- package/cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +2159 -0
- package/cpp/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
- package/cpp/ggml/src/ggml-cpu/arch/powerpc/quants.c +2305 -0
- package/cpp/ggml/src/ggml-cpu/arch/riscv/cpu-feats.cpp +38 -0
- package/cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +2726 -0
- package/cpp/ggml/src/ggml-cpu/arch/riscv/repack.cpp +342 -0
- package/cpp/ggml/src/ggml-cpu/arch/s390/cpu-feats.cpp +50 -0
- package/cpp/ggml/src/ggml-cpu/arch/s390/quants.c +1468 -0
- package/cpp/ggml/src/ggml-cpu/arch/wasm/quants.c +1221 -0
- package/cpp/ggml/src/ggml-cpu/arch/x86/cpu-feats.cpp +327 -0
- package/cpp/ggml/src/ggml-cpu/arch/x86/quants.c +3820 -0
- package/cpp/ggml/src/ggml-cpu/arch/x86/repack.cpp +6307 -0
- package/cpp/ggml/src/ggml-cpu/arch-fallback.h +313 -0
- package/cpp/ggml/src/ggml-cpu/binary-ops.cpp +154 -0
- package/cpp/ggml/src/ggml-cpu/binary-ops.h +16 -0
- package/cpp/ggml/src/ggml-cpu/cmake/FindSIMD.cmake +100 -0
- package/cpp/ggml/src/ggml-cpu/common.h +95 -0
- package/cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +529 -0
- package/cpp/ggml/src/ggml-cpu/ggml-cpu.c +3734 -0
- package/cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +701 -0
- package/cpp/ggml/src/ggml-cpu/hbm.cpp +55 -0
- package/cpp/ggml/src/ggml-cpu/hbm.h +8 -0
- package/cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +938 -0
- package/cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +90 -0
- package/cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +798 -0
- package/cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.h +17 -0
- package/cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +4033 -0
- package/cpp/ggml/src/ggml-cpu/llamafile/sgemm.h +25 -0
- package/cpp/ggml/src/ggml-cpu/ops.cpp +10978 -0
- package/cpp/ggml/src/ggml-cpu/ops.h +116 -0
- package/cpp/ggml/src/ggml-cpu/quants.c +1193 -0
- package/cpp/ggml/src/ggml-cpu/quants.h +97 -0
- package/cpp/ggml/src/ggml-cpu/repack.cpp +3316 -0
- package/cpp/ggml/src/ggml-cpu/repack.h +173 -0
- package/cpp/ggml/src/ggml-cpu/simd-gemm.h +136 -0
- package/cpp/ggml/src/ggml-cpu/simd-mappings.h +1279 -0
- package/cpp/ggml/src/ggml-cpu/spacemit/ime.cpp +1025 -0
- package/cpp/ggml/src/ggml-cpu/spacemit/ime.h +13 -0
- package/cpp/ggml/src/ggml-cpu/spacemit/ime1_kernels.cpp +3196 -0
- package/cpp/ggml/src/ggml-cpu/spacemit/ime_kernels.h +26 -0
- package/cpp/ggml/src/ggml-cpu/traits.cpp +36 -0
- package/cpp/ggml/src/ggml-cpu/traits.h +38 -0
- package/cpp/ggml/src/ggml-cpu/unary-ops.cpp +337 -0
- package/cpp/ggml/src/ggml-cpu/unary-ops.h +35 -0
- package/cpp/ggml/src/ggml-cpu/vec.cpp +629 -0
- package/cpp/ggml/src/ggml-cpu/vec.h +1585 -0
- package/cpp/ggml/src/ggml-hexagon/CMakeLists.txt +117 -0
- package/cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp +3232 -0
- package/cpp/ggml/src/ggml-hexagon/htp/CMakeLists.txt +45 -0
- package/cpp/ggml/src/ggml-hexagon/htp/act-ops.c +815 -0
- package/cpp/ggml/src/ggml-hexagon/htp/argsort-ops.c +281 -0
- package/cpp/ggml/src/ggml-hexagon/htp/binary-ops.c +827 -0
- package/cpp/ggml/src/ggml-hexagon/htp/cmake-toolchain.cmake +157 -0
- package/cpp/ggml/src/ggml-hexagon/htp/cpy-ops.c +251 -0
- package/cpp/ggml/src/ggml-hexagon/htp/flash-attn-ops.c +666 -0
- package/cpp/ggml/src/ggml-hexagon/htp/get-rows-ops.c +111 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hex-dma.c +63 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hex-dma.h +182 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hex-dump.h +77 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hex-fastdiv.h +37 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hex-utils.h +51 -0
- package/cpp/ggml/src/ggml-hexagon/htp/htp-ctx.h +35 -0
- package/cpp/ggml/src/ggml-hexagon/htp/htp-msg.h +154 -0
- package/cpp/ggml/src/ggml-hexagon/htp/htp-ops.h +65 -0
- package/cpp/ggml/src/ggml-hexagon/htp/htp_iface.idl +16 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-arith.h +470 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-base.h +173 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-copy.h +245 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-div.h +116 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-dump.h +129 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-exp.h +215 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-floor.h +100 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-inverse.h +176 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-reduce.h +266 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-scale.h +133 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-sigmoid.h +141 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-sqrt.h +126 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-types.h +36 -0
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-utils.h +18 -0
- package/cpp/ggml/src/ggml-hexagon/htp/main.c +1150 -0
- package/cpp/ggml/src/ggml-hexagon/htp/matmul-ops.c +2595 -0
- package/cpp/ggml/src/ggml-hexagon/htp/rope-ops.c +498 -0
- package/cpp/ggml/src/ggml-hexagon/htp/set-rows-ops.c +167 -0
- package/cpp/ggml/src/ggml-hexagon/htp/softmax-ops.c +421 -0
- package/cpp/ggml/src/ggml-hexagon/htp/sum-rows-ops.c +130 -0
- package/cpp/ggml/src/ggml-hexagon/htp/unary-ops.c +384 -0
- package/cpp/ggml/src/ggml-hexagon/htp/worker-pool.c +293 -0
- package/cpp/ggml/src/ggml-hexagon/htp/worker-pool.h +57 -0
- package/cpp/ggml/src/ggml-hexagon/htp-drv.cpp +418 -0
- package/cpp/ggml/src/ggml-hexagon/htp-drv.h +121 -0
- package/cpp/ggml/src/ggml-hexagon/libdl.h +79 -0
- package/cpp/ggml/src/ggml-hexagon/libggml-htp.inf +38 -0
- package/cpp/ggml/src/ggml-hexagon/op-desc.h +153 -0
- package/cpp/ggml/src/ggml-impl.h +724 -0
- package/cpp/ggml/src/ggml-metal/CMakeLists.txt +124 -0
- package/cpp/ggml/src/ggml-metal/ggml-metal-common.cpp +457 -0
- package/cpp/ggml/src/ggml-metal/ggml-metal-common.h +52 -0
- package/cpp/ggml/src/ggml-metal/ggml-metal-context.h +41 -0
- package/cpp/ggml/src/ggml-metal/ggml-metal-context.m +702 -0
- package/cpp/ggml/src/ggml-metal/ggml-metal-device.cpp +1890 -0
- package/cpp/ggml/src/ggml-metal/ggml-metal-device.h +290 -0
- package/cpp/ggml/src/ggml-metal/ggml-metal-device.m +1749 -0
- package/cpp/ggml/src/ggml-metal/ggml-metal-impl.h +1054 -0
- package/cpp/ggml/src/ggml-metal/ggml-metal-ops.cpp +4370 -0
- package/cpp/ggml/src/ggml-metal/ggml-metal-ops.h +94 -0
- package/cpp/ggml/src/ggml-metal/ggml-metal.cpp +937 -0
- package/cpp/ggml/src/ggml-metal/ggml-metal.metal +9819 -0
- package/cpp/ggml/src/ggml-musa/CMakeLists.txt +125 -0
- package/cpp/ggml/src/ggml-musa/mudnn.cu +112 -0
- package/cpp/ggml/src/ggml-musa/mudnn.cuh +12 -0
- package/cpp/ggml/src/ggml-opencl/CMakeLists.txt +150 -0
- package/cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +11553 -0
- package/cpp/ggml/src/ggml-opencl/kernels/add.cl +190 -0
- package/cpp/ggml/src/ggml-opencl/kernels/add_id.cl +42 -0
- package/cpp/ggml/src/ggml-opencl/kernels/argsort.cl +86 -0
- package/cpp/ggml/src/ggml-opencl/kernels/clamp.cl +20 -0
- package/cpp/ggml/src/ggml-opencl/kernels/concat.cl +51 -0
- package/cpp/ggml/src/ggml-opencl/kernels/conv2d.cl +185 -0
- package/cpp/ggml/src/ggml-opencl/kernels/conv2d_f16_f32.cl +176 -0
- package/cpp/ggml/src/ggml-opencl/kernels/cpy.cl +184 -0
- package/cpp/ggml/src/ggml-opencl/kernels/cvt.cl +417 -0
- package/cpp/ggml/src/ggml-opencl/kernels/diag_mask_inf.cl +58 -0
- package/cpp/ggml/src/ggml-opencl/kernels/div.cl +138 -0
- package/cpp/ggml/src/ggml-opencl/kernels/embed_kernel.py +26 -0
- package/cpp/ggml/src/ggml-opencl/kernels/expm1.cl +113 -0
- package/cpp/ggml/src/ggml-opencl/kernels/fill.cl +17 -0
- package/cpp/ggml/src/ggml-opencl/kernels/flash_attn_f16.cl +370 -0
- package/cpp/ggml/src/ggml-opencl/kernels/flash_attn_f32.cl +371 -0
- package/cpp/ggml/src/ggml-opencl/kernels/flash_attn_f32_f16.cl +373 -0
- package/cpp/ggml/src/ggml-opencl/kernels/gelu.cl +89 -0
- package/cpp/ggml/src/ggml-opencl/kernels/gemm_moe_mxfp4_f32.cl +162 -0
- package/cpp/ggml/src/ggml-opencl/kernels/gemv_moe_mxfp4_f32.cl +156 -0
- package/cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle.cl +268 -0
- package/cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general.cl +274 -0
- package/cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general_q8_0_f32.cl +195 -0
- package/cpp/ggml/src/ggml-opencl/kernels/get_rows.cl +187 -0
- package/cpp/ggml/src/ggml-opencl/kernels/glu.cl +378 -0
- package/cpp/ggml/src/ggml-opencl/kernels/group_norm.cl +121 -0
- package/cpp/ggml/src/ggml-opencl/kernels/im2col_f16.cl +57 -0
- package/cpp/ggml/src/ggml-opencl/kernels/im2col_f32.cl +57 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mean.cl +140 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul.cl +152 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mat_Ab_Bi_8x4.cl +139 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mat_f16_f32.cl +130 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_kq_kqv.cl +273 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_l4_lm.cl +146 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_f32_f32_l4_lm.cl +147 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q4_0_f32_l4_lm.cl +163 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q4_1_f32_l4_lm.cl +165 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q6_k_f32_l4_lm.cl +158 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_8x4.cl +129 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_l4_lm.cl +154 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f16.cl +118 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32.cl +118 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_1row.cl +94 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_l4.cl +84 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f32_f32.cl +118 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32.cl +189 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32_flat.cl +176 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q4_0_f32_8x_flat.cl +283 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32.cl +140 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32_flat.cl +222 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32.cl +144 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32_flat.cl +167 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32.cl +192 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_16x_flat.cl +307 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_8x_flat.cl +265 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_8x_flat.cl +272 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_v.cl +254 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32.cl +219 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32_flat.cl +229 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_k_f32.cl +180 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q6_k_f32.cl +194 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q6_k_f32_flat.cl +194 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32.cl +125 -0
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32_flat.cl +202 -0
- package/cpp/ggml/src/ggml-opencl/kernels/norm.cl +161 -0
- package/cpp/ggml/src/ggml-opencl/kernels/pad.cl +39 -0
- package/cpp/ggml/src/ggml-opencl/kernels/relu.cl +16 -0
- package/cpp/ggml/src/ggml-opencl/kernels/repeat.cl +38 -0
- package/cpp/ggml/src/ggml-opencl/kernels/rms_norm.cl +190 -0
- package/cpp/ggml/src/ggml-opencl/kernels/rope.cl +747 -0
- package/cpp/ggml/src/ggml-opencl/kernels/scale.cl +27 -0
- package/cpp/ggml/src/ggml-opencl/kernels/set_rows.cl +208 -0
- package/cpp/ggml/src/ggml-opencl/kernels/sigmoid.cl +29 -0
- package/cpp/ggml/src/ggml-opencl/kernels/silu.cl +30 -0
- package/cpp/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +108 -0
- package/cpp/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +108 -0
- package/cpp/ggml/src/ggml-opencl/kernels/softmax_f16.cl +107 -0
- package/cpp/ggml/src/ggml-opencl/kernels/softmax_f32.cl +107 -0
- package/cpp/ggml/src/ggml-opencl/kernels/softplus.cl +116 -0
- package/cpp/ggml/src/ggml-opencl/kernels/solve_tri.cl +51 -0
- package/cpp/ggml/src/ggml-opencl/kernels/sqr.cl +53 -0
- package/cpp/ggml/src/ggml-opencl/kernels/sqrt.cl +53 -0
- package/cpp/ggml/src/ggml-opencl/kernels/ssm_conv.cl +77 -0
- package/cpp/ggml/src/ggml-opencl/kernels/sub.cl +138 -0
- package/cpp/ggml/src/ggml-opencl/kernels/sum_rows.cl +140 -0
- package/cpp/ggml/src/ggml-opencl/kernels/tanh.cl +109 -0
- package/cpp/ggml/src/ggml-opencl/kernels/transpose.cl +117 -0
- package/cpp/ggml/src/ggml-opencl/kernels/tri.cl +32 -0
- package/cpp/ggml/src/ggml-opencl/kernels/tsembd.cl +48 -0
- package/cpp/ggml/src/ggml-opencl/kernels/upscale.cl +120 -0
- package/cpp/ggml/src/ggml-opt.cpp +1093 -0
- package/cpp/ggml/src/ggml-quants.c +5325 -0
- package/cpp/ggml/src/ggml-quants.h +106 -0
- package/cpp/ggml/src/ggml-rpc/CMakeLists.txt +9 -0
- package/cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +2118 -0
- package/cpp/ggml/src/ggml-threading.cpp +12 -0
- package/cpp/ggml/src/ggml-threading.h +14 -0
- package/cpp/ggml/src/ggml-virtgpu/CMakeLists.txt +70 -0
- package/cpp/ggml/src/ggml-virtgpu/apir_cs_ggml-rpc-front.cpp +87 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/CMakeLists.txt +21 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/apir_cs_ggml-rpc-back.cpp +115 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/backend-convert.h +13 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-backend.cpp +102 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer-type.cpp +105 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer.cpp +179 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-device.cpp +148 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched.cpp +51 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched.gen.h +73 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched.h +27 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/backend-virgl-apir.h +32 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/backend.cpp +144 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/shared/api_remoting.h +95 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_backend.gen.h +94 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_backend.h +50 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_cs.h +378 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h +232 -0
- package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_cs_rpc.h +58 -0
- package/cpp/ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp +81 -0
- package/cpp/ggml/src/ggml-virtgpu/ggml-backend-buffer.cpp +119 -0
- package/cpp/ggml/src/ggml-virtgpu/ggml-backend-device.cpp +158 -0
- package/cpp/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp +213 -0
- package/cpp/ggml/src/ggml-virtgpu/ggml-backend.cpp +69 -0
- package/cpp/ggml/src/ggml-virtgpu/ggml-remoting.h +71 -0
- package/cpp/ggml/src/ggml-virtgpu/ggmlremoting_functions.yaml +166 -0
- package/cpp/ggml/src/ggml-virtgpu/include/apir_hw.h +9 -0
- package/cpp/ggml/src/ggml-virtgpu/regenerate_remoting.py +333 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-apir.h +15 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-backend.cpp +58 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp +110 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-buffer.cpp +173 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-device.cpp +192 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-impl.h +36 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward.gen.h +53 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-shm.cpp +98 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-shm.h +23 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-utils.cpp +179 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-utils.h +86 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu.cpp +544 -0
- package/cpp/ggml/src/ggml-virtgpu/virtgpu.h +117 -0
- package/cpp/ggml/src/ggml-webgpu/CMakeLists.txt +80 -0
- package/cpp/ggml/src/ggml-webgpu/ggml-webgpu-shader-lib.hpp +1231 -0
- package/cpp/ggml/src/ggml-webgpu/ggml-webgpu.cpp +3150 -0
- package/cpp/ggml/src/ggml-webgpu/pre_wgsl.hpp +778 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/argmax.wgsl +72 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/argsort.wgsl +106 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/argsort_merge.wgsl +134 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/binary.wgsl +107 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/common_decls.tmpl +923 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/cpy.tmpl.wgsl +107 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/cumsum.wgsl +66 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +182 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn.wgsl +636 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/get_rows.wgsl +668 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/glu.tmpl.wgsl +323 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/memset.wgsl +40 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.wgsl +713 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_decls.tmpl +103 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_reg_tile.wgsl +138 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_subgroup_matrix.wgsl +188 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec.wgsl +194 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/pad.wgsl +86 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm.wgsl +123 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/rope.tmpl.wgsl +295 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/scale.wgsl +63 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.wgsl +109 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/soft_max.tmpl.wgsl +345 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/sum_rows.wgsl +55 -0
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/unary.wgsl +193 -0
- package/cpp/ggml/src/ggml-zdnn/CMakeLists.txt +36 -0
- package/cpp/ggml/src/ggml-zdnn/common.hpp +59 -0
- package/cpp/ggml/src/ggml-zdnn/ggml-zdnn.cpp +633 -0
- package/cpp/ggml/src/ggml-zdnn/mmf.cpp +80 -0
- package/cpp/ggml/src/ggml-zdnn/mmf.hpp +12 -0
- package/cpp/ggml/src/ggml-zdnn/utils.cpp +79 -0
- package/cpp/ggml/src/ggml-zdnn/utils.hpp +19 -0
- package/cpp/ggml/src/ggml-zendnn/CMakeLists.txt +92 -0
- package/cpp/ggml/src/ggml-zendnn/ggml-zendnn.cpp +469 -0
- package/cpp/ggml/src/ggml.c +7669 -0
- package/cpp/ggml/src/ggml.cpp +26 -0
- package/cpp/ggml/src/gguf.cpp +1699 -0
- package/cpp/include/llama-cpp.h +32 -0
- package/cpp/include/llama.h +1568 -0
- package/cpp/mtmd/CMakeLists.txt +98 -0
- package/cpp/mtmd/README.md +63 -0
- package/cpp/mtmd/clip-graph.h +117 -0
- package/cpp/mtmd/clip-impl.h +586 -0
- package/cpp/mtmd/clip-model.h +390 -0
- package/cpp/mtmd/clip.cpp +4154 -0
- package/cpp/mtmd/clip.h +121 -0
- package/cpp/mtmd/deprecation-warning.cpp +22 -0
- package/cpp/mtmd/legacy-models/convert_image_encoder_to_gguf.py +412 -0
- package/cpp/mtmd/legacy-models/glmedge-convert-image-encoder-to-gguf.py +280 -0
- package/cpp/mtmd/legacy-models/glmedge-surgery.py +33 -0
- package/cpp/mtmd/legacy-models/llava_surgery.py +38 -0
- package/cpp/mtmd/legacy-models/llava_surgery_v2.py +180 -0
- package/cpp/mtmd/legacy-models/minicpmv-convert-image-encoder-to-gguf.py +892 -0
- package/cpp/mtmd/legacy-models/minicpmv-surgery.py +47 -0
- package/cpp/mtmd/models/cogvlm.cpp +98 -0
- package/cpp/mtmd/models/conformer.cpp +216 -0
- package/cpp/mtmd/models/glm4v.cpp +122 -0
- package/cpp/mtmd/models/internvl.cpp +69 -0
- package/cpp/mtmd/models/kimik25.cpp +101 -0
- package/cpp/mtmd/models/kimivl.cpp +63 -0
- package/cpp/mtmd/models/llama4.cpp +96 -0
- package/cpp/mtmd/models/llava.cpp +374 -0
- package/cpp/mtmd/models/minicpmv.cpp +114 -0
- package/cpp/mtmd/models/mobilenetv5.cpp +451 -0
- package/cpp/mtmd/models/models.h +128 -0
- package/cpp/mtmd/models/nemotron-v2-vl.cpp +35 -0
- package/cpp/mtmd/models/paddleocr.cpp +52 -0
- package/cpp/mtmd/models/pixtral.cpp +86 -0
- package/cpp/mtmd/models/qwen2vl.cpp +183 -0
- package/cpp/mtmd/models/qwen3vl.cpp +193 -0
- package/cpp/mtmd/models/siglip.cpp +86 -0
- package/cpp/mtmd/models/whisper-enc.cpp +115 -0
- package/cpp/mtmd/models/youtuvl.cpp +179 -0
- package/cpp/mtmd/mtmd-audio.cpp +730 -0
- package/cpp/mtmd/mtmd-audio.h +113 -0
- package/cpp/mtmd/mtmd-cli.cpp +437 -0
- package/cpp/mtmd/mtmd-helper.cpp +521 -0
- package/cpp/mtmd/mtmd-helper.h +96 -0
- package/cpp/mtmd/mtmd.cpp +1156 -0
- package/cpp/mtmd/mtmd.h +319 -0
- package/cpp/mtmd/requirements.txt +5 -0
- package/cpp/mtmd/test-1.jpeg +0 -0
- package/cpp/mtmd/test-2.mp3 +0 -0
- package/cpp/mtmd/tests.sh +192 -0
- package/cpp/src/CMakeLists.txt +169 -0
- package/cpp/src/llama-adapter.cpp +488 -0
- package/cpp/src/llama-adapter.h +89 -0
- package/cpp/src/llama-arch.cpp +2855 -0
- package/cpp/src/llama-arch.h +619 -0
- package/cpp/src/llama-batch.cpp +917 -0
- package/cpp/src/llama-batch.h +173 -0
- package/cpp/src/llama-chat.cpp +896 -0
- package/cpp/src/llama-chat.h +71 -0
- package/cpp/src/llama-context.cpp +3512 -0
- package/cpp/src/llama-context.h +359 -0
- package/cpp/src/llama-cparams.cpp +5 -0
- package/cpp/src/llama-cparams.h +44 -0
- package/cpp/src/llama-grammar.cpp +1464 -0
- package/cpp/src/llama-grammar.h +194 -0
- package/cpp/src/llama-graph.cpp +2685 -0
- package/cpp/src/llama-graph.h +1026 -0
- package/cpp/src/llama-hparams.cpp +234 -0
- package/cpp/src/llama-hparams.h +339 -0
- package/cpp/src/llama-impl.cpp +171 -0
- package/cpp/src/llama-impl.h +73 -0
- package/cpp/src/llama-io.cpp +15 -0
- package/cpp/src/llama-io.h +35 -0
- package/cpp/src/llama-kv-cache-iswa.cpp +330 -0
- package/cpp/src/llama-kv-cache-iswa.h +137 -0
- package/cpp/src/llama-kv-cache.cpp +2271 -0
- package/cpp/src/llama-kv-cache.h +388 -0
- package/cpp/src/llama-kv-cells.h +533 -0
- package/cpp/src/llama-memory-hybrid-iswa.cpp +275 -0
- package/cpp/src/llama-memory-hybrid-iswa.h +140 -0
- package/cpp/src/llama-memory-hybrid.cpp +268 -0
- package/cpp/src/llama-memory-hybrid.h +139 -0
- package/cpp/src/llama-memory-recurrent.cpp +1165 -0
- package/cpp/src/llama-memory-recurrent.h +182 -0
- package/cpp/src/llama-memory.cpp +59 -0
- package/cpp/src/llama-memory.h +122 -0
- package/cpp/src/llama-mmap.cpp +785 -0
- package/cpp/src/llama-mmap.h +92 -0
- package/cpp/src/llama-model-loader.cpp +1414 -0
- package/cpp/src/llama-model-loader.h +203 -0
- package/cpp/src/llama-model-saver.cpp +286 -0
- package/cpp/src/llama-model-saver.h +37 -0
- package/cpp/src/llama-model.cpp +9253 -0
- package/cpp/src/llama-model.h +576 -0
- package/cpp/src/llama-quant.cpp +1119 -0
- package/cpp/src/llama-quant.h +1 -0
- package/cpp/src/llama-sampler.cpp +3885 -0
- package/cpp/src/llama-sampler.h +42 -0
- package/cpp/src/llama-vocab.cpp +3970 -0
- package/cpp/src/llama-vocab.h +187 -0
- package/cpp/src/llama.cpp +1313 -0
- package/cpp/src/models/afmoe.cpp +191 -0
- package/cpp/src/models/apertus.cpp +125 -0
- package/cpp/src/models/arcee.cpp +135 -0
- package/cpp/src/models/arctic.cpp +138 -0
- package/cpp/src/models/arwkv7.cpp +86 -0
- package/cpp/src/models/baichuan.cpp +122 -0
- package/cpp/src/models/bailingmoe.cpp +144 -0
- package/cpp/src/models/bailingmoe2.cpp +135 -0
- package/cpp/src/models/bert.cpp +178 -0
- package/cpp/src/models/bitnet.cpp +160 -0
- package/cpp/src/models/bloom.cpp +101 -0
- package/cpp/src/models/chameleon.cpp +178 -0
- package/cpp/src/models/chatglm.cpp +132 -0
- package/cpp/src/models/codeshell.cpp +111 -0
- package/cpp/src/models/cogvlm.cpp +102 -0
- package/cpp/src/models/cohere2-iswa.cpp +134 -0
- package/cpp/src/models/command-r.cpp +122 -0
- package/cpp/src/models/dbrx.cpp +123 -0
- package/cpp/src/models/deci.cpp +135 -0
- package/cpp/src/models/deepseek.cpp +144 -0
- package/cpp/src/models/deepseek2.cpp +262 -0
- package/cpp/src/models/delta-net-base.cpp +376 -0
- package/cpp/src/models/dots1.cpp +134 -0
- package/cpp/src/models/dream.cpp +105 -0
- package/cpp/src/models/ernie4-5-moe.cpp +150 -0
- package/cpp/src/models/ernie4-5.cpp +110 -0
- package/cpp/src/models/eurobert.cpp +97 -0
- package/cpp/src/models/exaone-moe.cpp +146 -0
- package/cpp/src/models/exaone.cpp +114 -0
- package/cpp/src/models/exaone4.cpp +123 -0
- package/cpp/src/models/falcon-h1.cpp +111 -0
- package/cpp/src/models/falcon.cpp +120 -0
- package/cpp/src/models/gemma-embedding.cpp +116 -0
- package/cpp/src/models/gemma.cpp +112 -0
- package/cpp/src/models/gemma2-iswa.cpp +128 -0
- package/cpp/src/models/gemma3.cpp +155 -0
- package/cpp/src/models/gemma3n-iswa.cpp +384 -0
- package/cpp/src/models/glm4-moe.cpp +170 -0
- package/cpp/src/models/glm4.cpp +157 -0
- package/cpp/src/models/gpt2.cpp +105 -0
- package/cpp/src/models/gptneox.cpp +144 -0
- package/cpp/src/models/granite-hybrid.cpp +196 -0
- package/cpp/src/models/granite.cpp +211 -0
- package/cpp/src/models/grok.cpp +159 -0
- package/cpp/src/models/grovemoe.cpp +141 -0
- package/cpp/src/models/hunyuan-dense.cpp +132 -0
- package/cpp/src/models/hunyuan-moe.cpp +154 -0
- package/cpp/src/models/internlm2.cpp +120 -0
- package/cpp/src/models/jais.cpp +86 -0
- package/cpp/src/models/jais2.cpp +123 -0
- package/cpp/src/models/jamba.cpp +106 -0
- package/cpp/src/models/kimi-linear.cpp +392 -0
- package/cpp/src/models/lfm2.cpp +190 -0
- package/cpp/src/models/llada-moe.cpp +122 -0
- package/cpp/src/models/llada.cpp +99 -0
- package/cpp/src/models/llama-iswa.cpp +178 -0
- package/cpp/src/models/llama.cpp +168 -0
- package/cpp/src/models/maincoder.cpp +117 -0
- package/cpp/src/models/mamba-base.cpp +285 -0
- package/cpp/src/models/mamba.cpp +54 -0
- package/cpp/src/models/mimo2-iswa.cpp +123 -0
- package/cpp/src/models/minicpm3.cpp +200 -0
- package/cpp/src/models/minimax-m2.cpp +124 -0
- package/cpp/src/models/mistral3.cpp +160 -0
- package/cpp/src/models/models.h +684 -0
- package/cpp/src/models/modern-bert.cpp +109 -0
- package/cpp/src/models/mpt.cpp +126 -0
- package/cpp/src/models/nemotron-h.cpp +148 -0
- package/cpp/src/models/nemotron.cpp +122 -0
- package/cpp/src/models/neo-bert.cpp +104 -0
- package/cpp/src/models/olmo.cpp +121 -0
- package/cpp/src/models/olmo2.cpp +150 -0
- package/cpp/src/models/olmoe.cpp +124 -0
- package/cpp/src/models/openai-moe-iswa.cpp +127 -0
- package/cpp/src/models/openelm.cpp +124 -0
- package/cpp/src/models/orion.cpp +123 -0
- package/cpp/src/models/paddleocr.cpp +122 -0
- package/cpp/src/models/pangu-embedded.cpp +121 -0
- package/cpp/src/models/phi2.cpp +121 -0
- package/cpp/src/models/phi3.cpp +152 -0
- package/cpp/src/models/plamo.cpp +110 -0
- package/cpp/src/models/plamo2.cpp +318 -0
- package/cpp/src/models/plamo3.cpp +128 -0
- package/cpp/src/models/plm.cpp +169 -0
- package/cpp/src/models/qwen.cpp +108 -0
- package/cpp/src/models/qwen2.cpp +126 -0
- package/cpp/src/models/qwen2moe.cpp +151 -0
- package/cpp/src/models/qwen2vl.cpp +117 -0
- package/cpp/src/models/qwen3.cpp +117 -0
- package/cpp/src/models/qwen35.cpp +386 -0
- package/cpp/src/models/qwen35moe.cpp +420 -0
- package/cpp/src/models/qwen3moe.cpp +124 -0
- package/cpp/src/models/qwen3next.cpp +525 -0
- package/cpp/src/models/qwen3vl-moe.cpp +140 -0
- package/cpp/src/models/qwen3vl.cpp +132 -0
- package/cpp/src/models/refact.cpp +94 -0
- package/cpp/src/models/rnd1.cpp +126 -0
- package/cpp/src/models/rwkv6-base.cpp +164 -0
- package/cpp/src/models/rwkv6.cpp +94 -0
- package/cpp/src/models/rwkv6qwen2.cpp +86 -0
- package/cpp/src/models/rwkv7-base.cpp +137 -0
- package/cpp/src/models/rwkv7.cpp +90 -0
- package/cpp/src/models/seed-oss.cpp +124 -0
- package/cpp/src/models/smallthinker.cpp +126 -0
- package/cpp/src/models/smollm3.cpp +128 -0
- package/cpp/src/models/stablelm.cpp +146 -0
- package/cpp/src/models/starcoder.cpp +100 -0
- package/cpp/src/models/starcoder2.cpp +121 -0
- package/cpp/src/models/step35-iswa.cpp +168 -0
- package/cpp/src/models/t5-dec.cpp +166 -0
- package/cpp/src/models/t5-enc.cpp +96 -0
- package/cpp/src/models/wavtokenizer-dec.cpp +149 -0
- package/cpp/src/models/xverse.cpp +108 -0
- package/cpp/src/unicode-data.cpp +7034 -0
- package/cpp/src/unicode-data.h +20 -0
- package/cpp/src/unicode.cpp +1103 -0
- package/cpp/src/unicode.h +111 -0
- package/cpp/vendor/nlohmann/json.hpp +25526 -0
- package/cpp/vendor/nlohmann/json_fwd.hpp +187 -0
- package/cpp/vendor/stb/stb_image.h +7988 -0
- package/ios/LocalLLM-Bridging-Header.h +2 -0
- package/ios/LocalLLM.h +5 -0
- package/ios/LocalLLM.mm +1267 -0
- package/local-llm-rn.podspec +60 -0
- package/package.json +35 -0
- package/src/NativeLocalLLM.ts +73 -0
- package/src/device.ts +50 -0
- package/src/download-adapter.ts +17 -0
- package/src/index.ts +21 -0
- package/src/native-bridge.ts +142 -0
- package/src/rn-downloader.ts +37 -0
|
@@ -0,0 +1,483 @@
|
|
|
1
|
+
#include "arg.h"
|
|
2
|
+
#include "preset.h"
|
|
3
|
+
#include "peg-parser.h"
|
|
4
|
+
#include "log.h"
|
|
5
|
+
#include "download.h"
|
|
6
|
+
|
|
7
|
+
#include <fstream>
|
|
8
|
+
#include <sstream>
|
|
9
|
+
#include <filesystem>
|
|
10
|
+
|
|
11
|
+
static std::string rm_leading_dashes(const std::string & str) {
|
|
12
|
+
size_t pos = 0;
|
|
13
|
+
while (pos < str.size() && str[pos] == '-') {
|
|
14
|
+
++pos;
|
|
15
|
+
}
|
|
16
|
+
return str.substr(pos);
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
// only allow a subset of args for remote presets for security reasons
|
|
20
|
+
// do not add more args unless absolutely necessary
|
|
21
|
+
// args that output to files are strictly prohibited
|
|
22
|
+
static std::set<std::string> get_remote_preset_whitelist(const std::map<std::string, common_arg> & key_to_opt) {
|
|
23
|
+
static const std::set<std::string> allowed_options = {
|
|
24
|
+
"model-url",
|
|
25
|
+
"hf-repo",
|
|
26
|
+
"hf-repo-draft",
|
|
27
|
+
"hf-repo-v", // vocoder
|
|
28
|
+
"hf-file-v", // vocoder
|
|
29
|
+
"mmproj-url",
|
|
30
|
+
"pooling",
|
|
31
|
+
"jinja",
|
|
32
|
+
"batch-size",
|
|
33
|
+
"ubatch-size",
|
|
34
|
+
"cache-reuse",
|
|
35
|
+
"chat-template-kwargs",
|
|
36
|
+
"mmap",
|
|
37
|
+
// note: sampling params are automatically allowed by default
|
|
38
|
+
// negated args will be added automatically if the positive arg is specified above
|
|
39
|
+
};
|
|
40
|
+
|
|
41
|
+
std::set<std::string> allowed_keys;
|
|
42
|
+
|
|
43
|
+
for (const auto & it : key_to_opt) {
|
|
44
|
+
const std::string & key = it.first;
|
|
45
|
+
const common_arg & opt = it.second;
|
|
46
|
+
if (allowed_options.find(key) != allowed_options.end() || opt.is_sparam) {
|
|
47
|
+
allowed_keys.insert(key);
|
|
48
|
+
// also add variant keys (args without leading dashes and env vars)
|
|
49
|
+
for (const auto & arg : opt.get_args()) {
|
|
50
|
+
allowed_keys.insert(rm_leading_dashes(arg));
|
|
51
|
+
}
|
|
52
|
+
for (const auto & env : opt.get_env()) {
|
|
53
|
+
allowed_keys.insert(env);
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
return allowed_keys;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
std::vector<std::string> common_preset::to_args(const std::string & bin_path) const {
|
|
62
|
+
std::vector<std::string> args;
|
|
63
|
+
|
|
64
|
+
if (!bin_path.empty()) {
|
|
65
|
+
args.push_back(bin_path);
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
for (const auto & [opt, value] : options) {
|
|
69
|
+
if (opt.is_preset_only) {
|
|
70
|
+
continue; // skip preset-only options (they are not CLI args)
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
// use the last arg as the main arg (i.e. --long-form)
|
|
74
|
+
args.push_back(opt.args.back());
|
|
75
|
+
|
|
76
|
+
// handle value(s)
|
|
77
|
+
if (opt.value_hint == nullptr && opt.value_hint_2 == nullptr) {
|
|
78
|
+
// flag option, no value
|
|
79
|
+
if (common_arg_utils::is_falsey(value)) {
|
|
80
|
+
// use negative arg if available
|
|
81
|
+
if (!opt.args_neg.empty()) {
|
|
82
|
+
args.back() = opt.args_neg.back();
|
|
83
|
+
} else {
|
|
84
|
+
// otherwise, skip the flag
|
|
85
|
+
// TODO: maybe throw an error instead?
|
|
86
|
+
args.pop_back();
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
if (opt.value_hint != nullptr) {
|
|
91
|
+
// single value
|
|
92
|
+
args.push_back(value);
|
|
93
|
+
}
|
|
94
|
+
if (opt.value_hint != nullptr && opt.value_hint_2 != nullptr) {
|
|
95
|
+
throw std::runtime_error(string_format(
|
|
96
|
+
"common_preset::to_args(): option '%s' has two values, which is not supported yet",
|
|
97
|
+
opt.args.back()
|
|
98
|
+
));
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
return args;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
std::string common_preset::to_ini() const {
|
|
106
|
+
std::ostringstream ss;
|
|
107
|
+
|
|
108
|
+
ss << "[" << name << "]\n";
|
|
109
|
+
for (const auto & [opt, value] : options) {
|
|
110
|
+
auto espaced_value = value;
|
|
111
|
+
string_replace_all(espaced_value, "\n", "\\\n");
|
|
112
|
+
ss << rm_leading_dashes(opt.args.back()) << " = ";
|
|
113
|
+
ss << espaced_value << "\n";
|
|
114
|
+
}
|
|
115
|
+
ss << "\n";
|
|
116
|
+
|
|
117
|
+
return ss.str();
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
void common_preset::set_option(const common_preset_context & ctx, const std::string & env, const std::string & value) {
|
|
121
|
+
// try if option exists, update it
|
|
122
|
+
for (auto & [opt, val] : options) {
|
|
123
|
+
if (opt.env && env == opt.env) {
|
|
124
|
+
val = value;
|
|
125
|
+
return;
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
// if option does not exist, we need to add it
|
|
129
|
+
if (ctx.key_to_opt.find(env) == ctx.key_to_opt.end()) {
|
|
130
|
+
throw std::runtime_error(string_format(
|
|
131
|
+
"%s: option with env '%s' not found in ctx_params",
|
|
132
|
+
__func__, env.c_str()
|
|
133
|
+
));
|
|
134
|
+
}
|
|
135
|
+
options[ctx.key_to_opt.at(env)] = value;
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
void common_preset::unset_option(const std::string & env) {
|
|
139
|
+
for (auto it = options.begin(); it != options.end(); ) {
|
|
140
|
+
const common_arg & opt = it->first;
|
|
141
|
+
if (opt.env && env == opt.env) {
|
|
142
|
+
it = options.erase(it);
|
|
143
|
+
return;
|
|
144
|
+
} else {
|
|
145
|
+
++it;
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
bool common_preset::get_option(const std::string & env, std::string & value) const {
|
|
151
|
+
for (const auto & [opt, val] : options) {
|
|
152
|
+
if (opt.env && env == opt.env) {
|
|
153
|
+
value = val;
|
|
154
|
+
return true;
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
return false;
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
void common_preset::merge(const common_preset & other) {
|
|
161
|
+
for (const auto & [opt, val] : other.options) {
|
|
162
|
+
options[opt] = val; // overwrite existing options
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
void common_preset::apply_to_params(common_params & params) const {
|
|
167
|
+
for (const auto & [opt, val] : options) {
|
|
168
|
+
// apply each option to params
|
|
169
|
+
if (opt.handler_string) {
|
|
170
|
+
opt.handler_string(params, val);
|
|
171
|
+
} else if (opt.handler_int) {
|
|
172
|
+
opt.handler_int(params, std::stoi(val));
|
|
173
|
+
} else if (opt.handler_bool) {
|
|
174
|
+
opt.handler_bool(params, common_arg_utils::is_truthy(val));
|
|
175
|
+
} else if (opt.handler_str_str) {
|
|
176
|
+
// not supported yet
|
|
177
|
+
throw std::runtime_error(string_format(
|
|
178
|
+
"%s: option with two values is not supported yet",
|
|
179
|
+
__func__
|
|
180
|
+
));
|
|
181
|
+
} else if (opt.handler_void) {
|
|
182
|
+
opt.handler_void(params);
|
|
183
|
+
} else {
|
|
184
|
+
GGML_ABORT("unknown handler type");
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
static std::map<std::string, std::map<std::string, std::string>> parse_ini_from_file(const std::string & path) {
|
|
190
|
+
std::map<std::string, std::map<std::string, std::string>> parsed;
|
|
191
|
+
|
|
192
|
+
if (!std::filesystem::exists(path)) {
|
|
193
|
+
throw std::runtime_error("preset file does not exist: " + path);
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
std::ifstream file(path);
|
|
197
|
+
if (!file.good()) {
|
|
198
|
+
throw std::runtime_error("failed to open server preset file: " + path);
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
std::string contents((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>());
|
|
202
|
+
|
|
203
|
+
static const auto parser = build_peg_parser([](auto & p) {
|
|
204
|
+
// newline ::= "\r\n" / "\n" / "\r"
|
|
205
|
+
auto newline = p.rule("newline", p.literal("\r\n") | p.literal("\n") | p.literal("\r"));
|
|
206
|
+
|
|
207
|
+
// ws ::= [ \t]*
|
|
208
|
+
auto ws = p.rule("ws", p.chars("[ \t]", 0, -1));
|
|
209
|
+
|
|
210
|
+
// comment ::= [;#] (!newline .)*
|
|
211
|
+
auto comment = p.rule("comment", p.chars("[;#]", 1, 1) + p.zero_or_more(p.negate(newline) + p.any()));
|
|
212
|
+
|
|
213
|
+
// eol ::= ws comment? (newline / EOF)
|
|
214
|
+
auto eol = p.rule("eol", ws + p.optional(comment) + (newline | p.end()));
|
|
215
|
+
|
|
216
|
+
// ident ::= [a-zA-Z_] [a-zA-Z0-9_.-]*
|
|
217
|
+
auto ident = p.rule("ident", p.chars("[a-zA-Z_]", 1, 1) + p.chars("[a-zA-Z0-9_.-]", 0, -1));
|
|
218
|
+
|
|
219
|
+
// value ::= (!eol-start .)*
|
|
220
|
+
auto eol_start = p.rule("eol-start", ws + (p.chars("[;#]", 1, 1) | newline | p.end()));
|
|
221
|
+
auto value = p.rule("value", p.zero_or_more(p.negate(eol_start) + p.any()));
|
|
222
|
+
|
|
223
|
+
// header-line ::= "[" ws ident ws "]" eol
|
|
224
|
+
auto header_line = p.rule("header-line", "[" + ws + p.tag("section-name", p.chars("[^]]")) + ws + "]" + eol);
|
|
225
|
+
|
|
226
|
+
// kv-line ::= ident ws "=" ws value eol
|
|
227
|
+
auto kv_line = p.rule("kv-line", p.tag("key", ident) + ws + "=" + ws + p.tag("value", value) + eol);
|
|
228
|
+
|
|
229
|
+
// comment-line ::= ws comment (newline / EOF)
|
|
230
|
+
auto comment_line = p.rule("comment-line", ws + comment + (newline | p.end()));
|
|
231
|
+
|
|
232
|
+
// blank-line ::= ws (newline / EOF)
|
|
233
|
+
auto blank_line = p.rule("blank-line", ws + (newline | p.end()));
|
|
234
|
+
|
|
235
|
+
// line ::= header-line / kv-line / comment-line / blank-line
|
|
236
|
+
auto line = p.rule("line", header_line | kv_line | comment_line | blank_line);
|
|
237
|
+
|
|
238
|
+
// ini ::= line* EOF
|
|
239
|
+
auto ini = p.rule("ini", p.zero_or_more(line) + p.end());
|
|
240
|
+
|
|
241
|
+
return ini;
|
|
242
|
+
});
|
|
243
|
+
|
|
244
|
+
common_peg_parse_context ctx(contents);
|
|
245
|
+
const auto result = parser.parse(ctx);
|
|
246
|
+
if (!result.success()) {
|
|
247
|
+
throw std::runtime_error("failed to parse server config file: " + path);
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
std::string current_section = COMMON_PRESET_DEFAULT_NAME;
|
|
251
|
+
std::string current_key;
|
|
252
|
+
|
|
253
|
+
ctx.ast.visit(result, [&](const auto & node) {
|
|
254
|
+
if (node.tag == "section-name") {
|
|
255
|
+
const std::string section = std::string(node.text);
|
|
256
|
+
current_section = section;
|
|
257
|
+
parsed[current_section] = {};
|
|
258
|
+
} else if (node.tag == "key") {
|
|
259
|
+
const std::string key = std::string(node.text);
|
|
260
|
+
current_key = key;
|
|
261
|
+
} else if (node.tag == "value" && !current_key.empty() && !current_section.empty()) {
|
|
262
|
+
parsed[current_section][current_key] = std::string(node.text);
|
|
263
|
+
current_key.clear();
|
|
264
|
+
}
|
|
265
|
+
});
|
|
266
|
+
|
|
267
|
+
return parsed;
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
static std::map<std::string, common_arg> get_map_key_opt(common_params_context & ctx_params) {
|
|
271
|
+
std::map<std::string, common_arg> mapping;
|
|
272
|
+
for (const auto & opt : ctx_params.options) {
|
|
273
|
+
for (const auto & env : opt.get_env()) {
|
|
274
|
+
mapping[env] = opt;
|
|
275
|
+
}
|
|
276
|
+
for (const auto & arg : opt.get_args()) {
|
|
277
|
+
mapping[rm_leading_dashes(arg)] = opt;
|
|
278
|
+
}
|
|
279
|
+
}
|
|
280
|
+
return mapping;
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
static bool is_bool_arg(const common_arg & arg) {
|
|
284
|
+
return !arg.args_neg.empty();
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
static std::string parse_bool_arg(const common_arg & arg, const std::string & key, const std::string & value) {
|
|
288
|
+
// if this is a negated arg, we need to reverse the value
|
|
289
|
+
for (const auto & neg_arg : arg.args_neg) {
|
|
290
|
+
if (rm_leading_dashes(neg_arg) == key) {
|
|
291
|
+
return common_arg_utils::is_truthy(value) ? "false" : "true";
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
// otherwise, not negated
|
|
295
|
+
return value;
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
common_preset_context::common_preset_context(llama_example ex, bool only_remote_allowed)
|
|
299
|
+
: ctx_params(common_params_parser_init(default_params, ex)) {
|
|
300
|
+
common_params_add_preset_options(ctx_params.options);
|
|
301
|
+
key_to_opt = get_map_key_opt(ctx_params);
|
|
302
|
+
|
|
303
|
+
// setup allowed keys if only_remote_allowed is true
|
|
304
|
+
if (only_remote_allowed) {
|
|
305
|
+
filter_allowed_keys = true;
|
|
306
|
+
allowed_keys = get_remote_preset_whitelist(key_to_opt);
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
common_presets common_preset_context::load_from_ini(const std::string & path, common_preset & global) const {
|
|
311
|
+
common_presets out;
|
|
312
|
+
auto ini_data = parse_ini_from_file(path);
|
|
313
|
+
|
|
314
|
+
for (auto section : ini_data) {
|
|
315
|
+
common_preset preset;
|
|
316
|
+
if (section.first.empty()) {
|
|
317
|
+
preset.name = COMMON_PRESET_DEFAULT_NAME;
|
|
318
|
+
} else {
|
|
319
|
+
preset.name = section.first;
|
|
320
|
+
}
|
|
321
|
+
LOG_DBG("loading preset: %s\n", preset.name.c_str());
|
|
322
|
+
for (const auto & [key, value] : section.second) {
|
|
323
|
+
if (key == "version") {
|
|
324
|
+
// skip version key (reserved for future use)
|
|
325
|
+
continue;
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
LOG_DBG("option: %s = %s\n", key.c_str(), value.c_str());
|
|
329
|
+
if (filter_allowed_keys && allowed_keys.find(key) == allowed_keys.end()) {
|
|
330
|
+
throw std::runtime_error(string_format(
|
|
331
|
+
"option '%s' is not allowed in remote presets",
|
|
332
|
+
key.c_str()
|
|
333
|
+
));
|
|
334
|
+
}
|
|
335
|
+
if (key_to_opt.find(key) != key_to_opt.end()) {
|
|
336
|
+
const auto & opt = key_to_opt.at(key);
|
|
337
|
+
if (is_bool_arg(opt)) {
|
|
338
|
+
preset.options[opt] = parse_bool_arg(opt, key, value);
|
|
339
|
+
} else {
|
|
340
|
+
preset.options[opt] = value;
|
|
341
|
+
}
|
|
342
|
+
LOG_DBG("accepted option: %s = %s\n", key.c_str(), preset.options[opt].c_str());
|
|
343
|
+
} else {
|
|
344
|
+
throw std::runtime_error(string_format(
|
|
345
|
+
"option '%s' not recognized in preset '%s'",
|
|
346
|
+
key.c_str(), preset.name.c_str()
|
|
347
|
+
));
|
|
348
|
+
}
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
if (preset.name == "*") {
|
|
352
|
+
// handle global preset
|
|
353
|
+
global = preset;
|
|
354
|
+
} else {
|
|
355
|
+
out[preset.name] = preset;
|
|
356
|
+
}
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
return out;
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
common_presets common_preset_context::load_from_cache() const {
|
|
363
|
+
common_presets out;
|
|
364
|
+
|
|
365
|
+
auto cached_models = common_list_cached_models();
|
|
366
|
+
for (const auto & model : cached_models) {
|
|
367
|
+
common_preset preset;
|
|
368
|
+
preset.name = model.to_string();
|
|
369
|
+
preset.set_option(*this, "LLAMA_ARG_HF_REPO", model.to_string());
|
|
370
|
+
out[preset.name] = preset;
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
return out;
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
struct local_model {
|
|
377
|
+
std::string name;
|
|
378
|
+
std::string path;
|
|
379
|
+
std::string path_mmproj;
|
|
380
|
+
};
|
|
381
|
+
|
|
382
|
+
common_presets common_preset_context::load_from_models_dir(const std::string & models_dir) const {
|
|
383
|
+
if (!std::filesystem::exists(models_dir) || !std::filesystem::is_directory(models_dir)) {
|
|
384
|
+
throw std::runtime_error(string_format("error: '%s' does not exist or is not a directory\n", models_dir.c_str()));
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
std::vector<local_model> models;
|
|
388
|
+
auto scan_subdir = [&models](const std::string & subdir_path, const std::string & name) {
|
|
389
|
+
auto files = fs_list(subdir_path, false);
|
|
390
|
+
common_file_info model_file;
|
|
391
|
+
common_file_info first_shard_file;
|
|
392
|
+
common_file_info mmproj_file;
|
|
393
|
+
for (const auto & file : files) {
|
|
394
|
+
if (string_ends_with(file.name, ".gguf")) {
|
|
395
|
+
if (file.name.find("mmproj") != std::string::npos) {
|
|
396
|
+
mmproj_file = file;
|
|
397
|
+
} else if (file.name.find("-00001-of-") != std::string::npos) {
|
|
398
|
+
first_shard_file = file;
|
|
399
|
+
} else {
|
|
400
|
+
model_file = file;
|
|
401
|
+
}
|
|
402
|
+
}
|
|
403
|
+
}
|
|
404
|
+
// single file model
|
|
405
|
+
local_model model{
|
|
406
|
+
/* name */ name,
|
|
407
|
+
/* path */ first_shard_file.path.empty() ? model_file.path : first_shard_file.path,
|
|
408
|
+
/* path_mmproj */ mmproj_file.path // can be empty
|
|
409
|
+
};
|
|
410
|
+
if (!model.path.empty()) {
|
|
411
|
+
models.push_back(model);
|
|
412
|
+
}
|
|
413
|
+
};
|
|
414
|
+
|
|
415
|
+
auto files = fs_list(models_dir, true);
|
|
416
|
+
for (const auto & file : files) {
|
|
417
|
+
if (file.is_dir) {
|
|
418
|
+
scan_subdir(file.path, file.name);
|
|
419
|
+
} else if (string_ends_with(file.name, ".gguf")) {
|
|
420
|
+
// single file model
|
|
421
|
+
std::string name = file.name;
|
|
422
|
+
string_replace_all(name, ".gguf", "");
|
|
423
|
+
local_model model{
|
|
424
|
+
/* name */ name,
|
|
425
|
+
/* path */ file.path,
|
|
426
|
+
/* path_mmproj */ ""
|
|
427
|
+
};
|
|
428
|
+
models.push_back(model);
|
|
429
|
+
}
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
// convert local models to presets
|
|
433
|
+
common_presets out;
|
|
434
|
+
for (const auto & model : models) {
|
|
435
|
+
common_preset preset;
|
|
436
|
+
preset.name = model.name;
|
|
437
|
+
preset.set_option(*this, "LLAMA_ARG_MODEL", model.path);
|
|
438
|
+
if (!model.path_mmproj.empty()) {
|
|
439
|
+
preset.set_option(*this, "LLAMA_ARG_MMPROJ", model.path_mmproj);
|
|
440
|
+
}
|
|
441
|
+
out[preset.name] = preset;
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
return out;
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
common_preset common_preset_context::load_from_args(int argc, char ** argv) const {
|
|
448
|
+
common_preset preset;
|
|
449
|
+
preset.name = COMMON_PRESET_DEFAULT_NAME;
|
|
450
|
+
|
|
451
|
+
bool ok = common_params_to_map(argc, argv, ctx_params.ex, preset.options);
|
|
452
|
+
if (!ok) {
|
|
453
|
+
throw std::runtime_error("failed to parse CLI arguments into preset");
|
|
454
|
+
}
|
|
455
|
+
|
|
456
|
+
return preset;
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
common_presets common_preset_context::cascade(const common_presets & base, const common_presets & added) const {
|
|
460
|
+
common_presets out = base; // copy
|
|
461
|
+
for (const auto & [name, preset_added] : added) {
|
|
462
|
+
if (out.find(name) != out.end()) {
|
|
463
|
+
// if exists, merge
|
|
464
|
+
common_preset & target = out[name];
|
|
465
|
+
target.merge(preset_added);
|
|
466
|
+
} else {
|
|
467
|
+
// otherwise, add directly
|
|
468
|
+
out[name] = preset_added;
|
|
469
|
+
}
|
|
470
|
+
}
|
|
471
|
+
return out;
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
common_presets common_preset_context::cascade(const common_preset & base, const common_presets & presets) const {
|
|
475
|
+
common_presets out;
|
|
476
|
+
for (const auto & [name, preset] : presets) {
|
|
477
|
+
common_preset tmp = base; // copy
|
|
478
|
+
tmp.name = name;
|
|
479
|
+
tmp.merge(preset);
|
|
480
|
+
out[name] = std::move(tmp);
|
|
481
|
+
}
|
|
482
|
+
return out;
|
|
483
|
+
}
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
#include "common.h"
|
|
4
|
+
#include "arg.h"
|
|
5
|
+
|
|
6
|
+
#include <string>
|
|
7
|
+
#include <vector>
|
|
8
|
+
#include <map>
|
|
9
|
+
#include <set>
|
|
10
|
+
|
|
11
|
+
//
|
|
12
|
+
// INI preset parser and writer
|
|
13
|
+
//
|
|
14
|
+
|
|
15
|
+
constexpr const char * COMMON_PRESET_DEFAULT_NAME = "default";
|
|
16
|
+
|
|
17
|
+
struct common_preset_context;
|
|
18
|
+
|
|
19
|
+
struct common_preset {
|
|
20
|
+
std::string name;
|
|
21
|
+
|
|
22
|
+
// options are stored as common_arg to string mapping, representing CLI arg and its value
|
|
23
|
+
std::map<common_arg, std::string> options;
|
|
24
|
+
|
|
25
|
+
// convert preset to CLI argument list
|
|
26
|
+
std::vector<std::string> to_args(const std::string & bin_path = "") const;
|
|
27
|
+
|
|
28
|
+
// convert preset to INI format string
|
|
29
|
+
std::string to_ini() const;
|
|
30
|
+
|
|
31
|
+
// TODO: maybe implement to_env() if needed
|
|
32
|
+
|
|
33
|
+
// modify preset options where argument is identified by its env variable
|
|
34
|
+
void set_option(const common_preset_context & ctx, const std::string & env, const std::string & value);
|
|
35
|
+
|
|
36
|
+
// unset option by its env variable
|
|
37
|
+
void unset_option(const std::string & env);
|
|
38
|
+
|
|
39
|
+
// get option value by its env variable, return false if not found
|
|
40
|
+
bool get_option(const std::string & env, std::string & value) const;
|
|
41
|
+
|
|
42
|
+
// merge another preset into this one, overwriting existing options
|
|
43
|
+
void merge(const common_preset & other);
|
|
44
|
+
|
|
45
|
+
// apply preset options to common_params
|
|
46
|
+
void apply_to_params(common_params & params) const;
|
|
47
|
+
};
|
|
48
|
+
|
|
49
|
+
// interface for multiple presets in one file
|
|
50
|
+
using common_presets = std::map<std::string, common_preset>;
|
|
51
|
+
|
|
52
|
+
// context for loading and editing presets
|
|
53
|
+
struct common_preset_context {
|
|
54
|
+
common_params default_params; // unused for now
|
|
55
|
+
common_params_context ctx_params;
|
|
56
|
+
std::map<std::string, common_arg> key_to_opt;
|
|
57
|
+
|
|
58
|
+
bool filter_allowed_keys = false;
|
|
59
|
+
std::set<std::string> allowed_keys;
|
|
60
|
+
|
|
61
|
+
// if only_remote_allowed is true, only accept whitelisted keys
|
|
62
|
+
common_preset_context(llama_example ex, bool only_remote_allowed = false);
|
|
63
|
+
|
|
64
|
+
// load presets from INI file
|
|
65
|
+
common_presets load_from_ini(const std::string & path, common_preset & global) const;
|
|
66
|
+
|
|
67
|
+
// generate presets from cached models
|
|
68
|
+
common_presets load_from_cache() const;
|
|
69
|
+
|
|
70
|
+
// generate presets from local models directory
|
|
71
|
+
// for the directory structure, see "Using multiple models" in server/README.md
|
|
72
|
+
common_presets load_from_models_dir(const std::string & models_dir) const;
|
|
73
|
+
|
|
74
|
+
// generate one preset from CLI arguments
|
|
75
|
+
common_preset load_from_args(int argc, char ** argv) const;
|
|
76
|
+
|
|
77
|
+
// cascade multiple presets if exist on both: base < added
|
|
78
|
+
// if preset does not exist in base, it will be added without modification
|
|
79
|
+
common_presets cascade(const common_presets & base, const common_presets & added) const;
|
|
80
|
+
|
|
81
|
+
// apply presets over a base preset (same idea as CSS cascading)
|
|
82
|
+
common_presets cascade(const common_preset & base, const common_presets & presets) const;
|
|
83
|
+
};
|