npm - local-llm-rn - Versions diffs - 1.0.0 - Mend

local-llm-rn 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (626) hide show

package/cpp/CMakeLists.txt +285 -0
package/cpp/common/CMakeLists.txt +149 -0
package/cpp/common/arg.cpp +3799 -0
package/cpp/common/arg.h +131 -0
package/cpp/common/base64.hpp +392 -0
package/cpp/common/build-info.cpp.in +4 -0
package/cpp/common/chat-parser-xml-toolcall.cpp +879 -0
package/cpp/common/chat-parser-xml-toolcall.h +45 -0
package/cpp/common/chat-parser.cpp +1649 -0
package/cpp/common/chat-parser.h +133 -0
package/cpp/common/chat-peg-parser.cpp +124 -0
package/cpp/common/chat-peg-parser.h +105 -0
package/cpp/common/chat.cpp +3355 -0
package/cpp/common/chat.h +252 -0
package/cpp/common/common.cpp +1824 -0
package/cpp/common/common.h +930 -0
package/cpp/common/console.cpp +1137 -0
package/cpp/common/console.h +41 -0
package/cpp/common/debug.cpp +167 -0
package/cpp/common/debug.h +43 -0
package/cpp/common/download.cpp +792 -0
package/cpp/common/download.h +84 -0
package/cpp/common/http.h +84 -0
package/cpp/common/jinja/README.md +88 -0
package/cpp/common/jinja/caps.cpp +285 -0
package/cpp/common/jinja/caps.h +30 -0
package/cpp/common/jinja/lexer.cpp +341 -0
package/cpp/common/jinja/lexer.h +157 -0
package/cpp/common/jinja/parser.cpp +591 -0
package/cpp/common/jinja/parser.h +21 -0
package/cpp/common/jinja/runtime.cpp +867 -0
package/cpp/common/jinja/runtime.h +638 -0
package/cpp/common/jinja/string.cpp +213 -0
package/cpp/common/jinja/string.h +61 -0
package/cpp/common/jinja/utils.h +149 -0
package/cpp/common/jinja/value.cpp +1393 -0
package/cpp/common/jinja/value.h +756 -0
package/cpp/common/json-partial.cpp +324 -0
package/cpp/common/json-partial.h +39 -0
package/cpp/common/json-schema-to-grammar.cpp +1153 -0
package/cpp/common/json-schema-to-grammar.h +43 -0
package/cpp/common/llguidance.cpp +258 -0
package/cpp/common/log.cpp +446 -0
package/cpp/common/log.h +119 -0
package/cpp/common/ngram-cache.cpp +285 -0
package/cpp/common/ngram-cache.h +101 -0
package/cpp/common/ngram-map.cpp +530 -0
package/cpp/common/ngram-map.h +115 -0
package/cpp/common/ngram-mod.cpp +60 -0
package/cpp/common/ngram-mod.h +38 -0
package/cpp/common/peg-parser.cpp +1712 -0
package/cpp/common/peg-parser.h +459 -0
package/cpp/common/preset.cpp +483 -0
package/cpp/common/preset.h +83 -0
package/cpp/common/regex-partial.cpp +204 -0
package/cpp/common/regex-partial.h +56 -0
package/cpp/common/sampling.cpp +745 -0
package/cpp/common/sampling.h +119 -0
package/cpp/common/speculative.cpp +1074 -0
package/cpp/common/speculative.h +41 -0
package/cpp/common/unicode.cpp +64 -0
package/cpp/common/unicode.h +22 -0
package/cpp/ggml/CMakeLists.txt +494 -0
package/cpp/ggml/cmake/GitVars.cmake +22 -0
package/cpp/ggml/cmake/common.cmake +50 -0
package/cpp/ggml/cmake/ggml-config.cmake.in +191 -0
package/cpp/ggml/include/ggml-alloc.h +85 -0
package/cpp/ggml/include/ggml-backend.h +373 -0
package/cpp/ggml/include/ggml-blas.h +25 -0
package/cpp/ggml/include/ggml-cann.h +123 -0
package/cpp/ggml/include/ggml-cpp.h +39 -0
package/cpp/ggml/include/ggml-cpu.h +151 -0
package/cpp/ggml/include/ggml-cuda.h +47 -0
package/cpp/ggml/include/ggml-hexagon.h +19 -0
package/cpp/ggml/include/ggml-metal.h +61 -0
package/cpp/ggml/include/ggml-opencl.h +26 -0
package/cpp/ggml/include/ggml-opt.h +256 -0
package/cpp/ggml/include/ggml-rpc.h +30 -0
package/cpp/ggml/include/ggml-sycl.h +49 -0
package/cpp/ggml/include/ggml-virtgpu.h +14 -0
package/cpp/ggml/include/ggml-vulkan.h +29 -0
package/cpp/ggml/include/ggml-webgpu.h +19 -0
package/cpp/ggml/include/ggml-zdnn.h +17 -0
package/cpp/ggml/include/ggml-zendnn.h +22 -0
package/cpp/ggml/include/ggml.h +2753 -0
package/cpp/ggml/include/gguf.h +204 -0
package/cpp/ggml/src/CMakeLists.txt +492 -0
package/cpp/ggml/src/ggml-alloc.c +1244 -0
package/cpp/ggml/src/ggml-backend-dl.cpp +48 -0
package/cpp/ggml/src/ggml-backend-dl.h +45 -0
package/cpp/ggml/src/ggml-backend-impl.h +255 -0
package/cpp/ggml/src/ggml-backend-reg.cpp +566 -0
package/cpp/ggml/src/ggml-backend.cpp +2270 -0
package/cpp/ggml/src/ggml-blas/CMakeLists.txt +101 -0
package/cpp/ggml/src/ggml-blas/ggml-blas.cpp +518 -0
package/cpp/ggml/src/ggml-common.h +1878 -0
package/cpp/ggml/src/ggml-cpu/CMakeLists.txt +691 -0
package/cpp/ggml/src/ggml-cpu/amx/amx.cpp +247 -0
package/cpp/ggml/src/ggml-cpu/amx/amx.h +8 -0
package/cpp/ggml/src/ggml-cpu/amx/common.h +91 -0
package/cpp/ggml/src/ggml-cpu/amx/mmq.cpp +2512 -0
package/cpp/ggml/src/ggml-cpu/amx/mmq.h +10 -0
package/cpp/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +98 -0
package/cpp/ggml/src/ggml-cpu/arch/arm/quants.c +4052 -0
package/cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +4935 -0
package/cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +2159 -0
package/cpp/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
package/cpp/ggml/src/ggml-cpu/arch/powerpc/quants.c +2305 -0
package/cpp/ggml/src/ggml-cpu/arch/riscv/cpu-feats.cpp +38 -0
package/cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +2726 -0
package/cpp/ggml/src/ggml-cpu/arch/riscv/repack.cpp +342 -0
package/cpp/ggml/src/ggml-cpu/arch/s390/cpu-feats.cpp +50 -0
package/cpp/ggml/src/ggml-cpu/arch/s390/quants.c +1468 -0
package/cpp/ggml/src/ggml-cpu/arch/wasm/quants.c +1221 -0
package/cpp/ggml/src/ggml-cpu/arch/x86/cpu-feats.cpp +327 -0
package/cpp/ggml/src/ggml-cpu/arch/x86/quants.c +3820 -0
package/cpp/ggml/src/ggml-cpu/arch/x86/repack.cpp +6307 -0
package/cpp/ggml/src/ggml-cpu/arch-fallback.h +313 -0
package/cpp/ggml/src/ggml-cpu/binary-ops.cpp +154 -0
package/cpp/ggml/src/ggml-cpu/binary-ops.h +16 -0
package/cpp/ggml/src/ggml-cpu/cmake/FindSIMD.cmake +100 -0
package/cpp/ggml/src/ggml-cpu/common.h +95 -0
package/cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +529 -0
package/cpp/ggml/src/ggml-cpu/ggml-cpu.c +3734 -0
package/cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +701 -0
package/cpp/ggml/src/ggml-cpu/hbm.cpp +55 -0
package/cpp/ggml/src/ggml-cpu/hbm.h +8 -0
package/cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +938 -0
package/cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +90 -0
package/cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +798 -0
package/cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.h +17 -0
package/cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +4033 -0
package/cpp/ggml/src/ggml-cpu/llamafile/sgemm.h +25 -0
package/cpp/ggml/src/ggml-cpu/ops.cpp +10978 -0
package/cpp/ggml/src/ggml-cpu/ops.h +116 -0
package/cpp/ggml/src/ggml-cpu/quants.c +1193 -0
package/cpp/ggml/src/ggml-cpu/quants.h +97 -0
package/cpp/ggml/src/ggml-cpu/repack.cpp +3316 -0
package/cpp/ggml/src/ggml-cpu/repack.h +173 -0
package/cpp/ggml/src/ggml-cpu/simd-gemm.h +136 -0
package/cpp/ggml/src/ggml-cpu/simd-mappings.h +1279 -0
package/cpp/ggml/src/ggml-cpu/spacemit/ime.cpp +1025 -0
package/cpp/ggml/src/ggml-cpu/spacemit/ime.h +13 -0
package/cpp/ggml/src/ggml-cpu/spacemit/ime1_kernels.cpp +3196 -0
package/cpp/ggml/src/ggml-cpu/spacemit/ime_kernels.h +26 -0
package/cpp/ggml/src/ggml-cpu/traits.cpp +36 -0
package/cpp/ggml/src/ggml-cpu/traits.h +38 -0
package/cpp/ggml/src/ggml-cpu/unary-ops.cpp +337 -0
package/cpp/ggml/src/ggml-cpu/unary-ops.h +35 -0
package/cpp/ggml/src/ggml-cpu/vec.cpp +629 -0
package/cpp/ggml/src/ggml-cpu/vec.h +1585 -0
package/cpp/ggml/src/ggml-hexagon/CMakeLists.txt +117 -0
package/cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp +3232 -0
package/cpp/ggml/src/ggml-hexagon/htp/CMakeLists.txt +45 -0
package/cpp/ggml/src/ggml-hexagon/htp/act-ops.c +815 -0
package/cpp/ggml/src/ggml-hexagon/htp/argsort-ops.c +281 -0
package/cpp/ggml/src/ggml-hexagon/htp/binary-ops.c +827 -0
package/cpp/ggml/src/ggml-hexagon/htp/cmake-toolchain.cmake +157 -0
package/cpp/ggml/src/ggml-hexagon/htp/cpy-ops.c +251 -0
package/cpp/ggml/src/ggml-hexagon/htp/flash-attn-ops.c +666 -0
package/cpp/ggml/src/ggml-hexagon/htp/get-rows-ops.c +111 -0
package/cpp/ggml/src/ggml-hexagon/htp/hex-dma.c +63 -0
package/cpp/ggml/src/ggml-hexagon/htp/hex-dma.h +182 -0
package/cpp/ggml/src/ggml-hexagon/htp/hex-dump.h +77 -0
package/cpp/ggml/src/ggml-hexagon/htp/hex-fastdiv.h +37 -0
package/cpp/ggml/src/ggml-hexagon/htp/hex-utils.h +51 -0
package/cpp/ggml/src/ggml-hexagon/htp/htp-ctx.h +35 -0
package/cpp/ggml/src/ggml-hexagon/htp/htp-msg.h +154 -0
package/cpp/ggml/src/ggml-hexagon/htp/htp-ops.h +65 -0
package/cpp/ggml/src/ggml-hexagon/htp/htp_iface.idl +16 -0
package/cpp/ggml/src/ggml-hexagon/htp/hvx-arith.h +470 -0
package/cpp/ggml/src/ggml-hexagon/htp/hvx-base.h +173 -0
package/cpp/ggml/src/ggml-hexagon/htp/hvx-copy.h +245 -0
package/cpp/ggml/src/ggml-hexagon/htp/hvx-div.h +116 -0
package/cpp/ggml/src/ggml-hexagon/htp/hvx-dump.h +129 -0
package/cpp/ggml/src/ggml-hexagon/htp/hvx-exp.h +215 -0
package/cpp/ggml/src/ggml-hexagon/htp/hvx-floor.h +100 -0
package/cpp/ggml/src/ggml-hexagon/htp/hvx-inverse.h +176 -0
package/cpp/ggml/src/ggml-hexagon/htp/hvx-reduce.h +266 -0
package/cpp/ggml/src/ggml-hexagon/htp/hvx-scale.h +133 -0
package/cpp/ggml/src/ggml-hexagon/htp/hvx-sigmoid.h +141 -0
package/cpp/ggml/src/ggml-hexagon/htp/hvx-sqrt.h +126 -0
package/cpp/ggml/src/ggml-hexagon/htp/hvx-types.h +36 -0
package/cpp/ggml/src/ggml-hexagon/htp/hvx-utils.h +18 -0
package/cpp/ggml/src/ggml-hexagon/htp/main.c +1150 -0
package/cpp/ggml/src/ggml-hexagon/htp/matmul-ops.c +2595 -0
package/cpp/ggml/src/ggml-hexagon/htp/rope-ops.c +498 -0
package/cpp/ggml/src/ggml-hexagon/htp/set-rows-ops.c +167 -0
package/cpp/ggml/src/ggml-hexagon/htp/softmax-ops.c +421 -0
package/cpp/ggml/src/ggml-hexagon/htp/sum-rows-ops.c +130 -0
package/cpp/ggml/src/ggml-hexagon/htp/unary-ops.c +384 -0
package/cpp/ggml/src/ggml-hexagon/htp/worker-pool.c +293 -0
package/cpp/ggml/src/ggml-hexagon/htp/worker-pool.h +57 -0
package/cpp/ggml/src/ggml-hexagon/htp-drv.cpp +418 -0
package/cpp/ggml/src/ggml-hexagon/htp-drv.h +121 -0
package/cpp/ggml/src/ggml-hexagon/libdl.h +79 -0
package/cpp/ggml/src/ggml-hexagon/libggml-htp.inf +38 -0
package/cpp/ggml/src/ggml-hexagon/op-desc.h +153 -0
package/cpp/ggml/src/ggml-impl.h +724 -0
package/cpp/ggml/src/ggml-metal/CMakeLists.txt +124 -0
package/cpp/ggml/src/ggml-metal/ggml-metal-common.cpp +457 -0
package/cpp/ggml/src/ggml-metal/ggml-metal-common.h +52 -0
package/cpp/ggml/src/ggml-metal/ggml-metal-context.h +41 -0
package/cpp/ggml/src/ggml-metal/ggml-metal-context.m +702 -0
package/cpp/ggml/src/ggml-metal/ggml-metal-device.cpp +1890 -0
package/cpp/ggml/src/ggml-metal/ggml-metal-device.h +290 -0
package/cpp/ggml/src/ggml-metal/ggml-metal-device.m +1749 -0
package/cpp/ggml/src/ggml-metal/ggml-metal-impl.h +1054 -0
package/cpp/ggml/src/ggml-metal/ggml-metal-ops.cpp +4370 -0
package/cpp/ggml/src/ggml-metal/ggml-metal-ops.h +94 -0
package/cpp/ggml/src/ggml-metal/ggml-metal.cpp +937 -0
package/cpp/ggml/src/ggml-metal/ggml-metal.metal +9819 -0
package/cpp/ggml/src/ggml-musa/CMakeLists.txt +125 -0
package/cpp/ggml/src/ggml-musa/mudnn.cu +112 -0
package/cpp/ggml/src/ggml-musa/mudnn.cuh +12 -0
package/cpp/ggml/src/ggml-opencl/CMakeLists.txt +150 -0
package/cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +11553 -0
package/cpp/ggml/src/ggml-opencl/kernels/add.cl +190 -0
package/cpp/ggml/src/ggml-opencl/kernels/add_id.cl +42 -0
package/cpp/ggml/src/ggml-opencl/kernels/argsort.cl +86 -0
package/cpp/ggml/src/ggml-opencl/kernels/clamp.cl +20 -0
package/cpp/ggml/src/ggml-opencl/kernels/concat.cl +51 -0
package/cpp/ggml/src/ggml-opencl/kernels/conv2d.cl +185 -0
package/cpp/ggml/src/ggml-opencl/kernels/conv2d_f16_f32.cl +176 -0
package/cpp/ggml/src/ggml-opencl/kernels/cpy.cl +184 -0
package/cpp/ggml/src/ggml-opencl/kernels/cvt.cl +417 -0
package/cpp/ggml/src/ggml-opencl/kernels/diag_mask_inf.cl +58 -0
package/cpp/ggml/src/ggml-opencl/kernels/div.cl +138 -0
package/cpp/ggml/src/ggml-opencl/kernels/embed_kernel.py +26 -0
package/cpp/ggml/src/ggml-opencl/kernels/expm1.cl +113 -0
package/cpp/ggml/src/ggml-opencl/kernels/fill.cl +17 -0
package/cpp/ggml/src/ggml-opencl/kernels/flash_attn_f16.cl +370 -0
package/cpp/ggml/src/ggml-opencl/kernels/flash_attn_f32.cl +371 -0
package/cpp/ggml/src/ggml-opencl/kernels/flash_attn_f32_f16.cl +373 -0
package/cpp/ggml/src/ggml-opencl/kernels/gelu.cl +89 -0
package/cpp/ggml/src/ggml-opencl/kernels/gemm_moe_mxfp4_f32.cl +162 -0
package/cpp/ggml/src/ggml-opencl/kernels/gemv_moe_mxfp4_f32.cl +156 -0
package/cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle.cl +268 -0
package/cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general.cl +274 -0
package/cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general_q8_0_f32.cl +195 -0
package/cpp/ggml/src/ggml-opencl/kernels/get_rows.cl +187 -0
package/cpp/ggml/src/ggml-opencl/kernels/glu.cl +378 -0
package/cpp/ggml/src/ggml-opencl/kernels/group_norm.cl +121 -0
package/cpp/ggml/src/ggml-opencl/kernels/im2col_f16.cl +57 -0
package/cpp/ggml/src/ggml-opencl/kernels/im2col_f32.cl +57 -0
package/cpp/ggml/src/ggml-opencl/kernels/mean.cl +140 -0
package/cpp/ggml/src/ggml-opencl/kernels/mul.cl +152 -0
package/cpp/ggml/src/ggml-opencl/kernels/mul_mat_Ab_Bi_8x4.cl +139 -0
package/cpp/ggml/src/ggml-opencl/kernels/mul_mat_f16_f32.cl +130 -0
package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_kq_kqv.cl +273 -0
package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_l4_lm.cl +146 -0
package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_f32_f32_l4_lm.cl +147 -0
package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q4_0_f32_l4_lm.cl +163 -0
package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q4_1_f32_l4_lm.cl +165 -0
package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q6_k_f32_l4_lm.cl +158 -0
package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_8x4.cl +129 -0
package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_l4_lm.cl +154 -0
package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f16.cl +118 -0
package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32.cl +118 -0
package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_1row.cl +94 -0
package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_l4.cl +84 -0
package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f32_f32.cl +118 -0
package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32.cl +189 -0
package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32_flat.cl +176 -0
package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q4_0_f32_8x_flat.cl +283 -0
package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32.cl +140 -0
package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32_flat.cl +222 -0
package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32.cl +144 -0
package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32_flat.cl +167 -0
package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32.cl +192 -0
package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_16x_flat.cl +307 -0
package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_8x_flat.cl +265 -0
package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_8x_flat.cl +272 -0
package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_v.cl +254 -0
package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32.cl +219 -0
package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32_flat.cl +229 -0
package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_k_f32.cl +180 -0
package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q6_k_f32.cl +194 -0
package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q6_k_f32_flat.cl +194 -0
package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32.cl +125 -0
package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32_flat.cl +202 -0
package/cpp/ggml/src/ggml-opencl/kernels/norm.cl +161 -0
package/cpp/ggml/src/ggml-opencl/kernels/pad.cl +39 -0
package/cpp/ggml/src/ggml-opencl/kernels/relu.cl +16 -0
package/cpp/ggml/src/ggml-opencl/kernels/repeat.cl +38 -0
package/cpp/ggml/src/ggml-opencl/kernels/rms_norm.cl +190 -0
package/cpp/ggml/src/ggml-opencl/kernels/rope.cl +747 -0
package/cpp/ggml/src/ggml-opencl/kernels/scale.cl +27 -0
package/cpp/ggml/src/ggml-opencl/kernels/set_rows.cl +208 -0
package/cpp/ggml/src/ggml-opencl/kernels/sigmoid.cl +29 -0
package/cpp/ggml/src/ggml-opencl/kernels/silu.cl +30 -0
package/cpp/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +108 -0
package/cpp/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +108 -0
package/cpp/ggml/src/ggml-opencl/kernels/softmax_f16.cl +107 -0
package/cpp/ggml/src/ggml-opencl/kernels/softmax_f32.cl +107 -0
package/cpp/ggml/src/ggml-opencl/kernels/softplus.cl +116 -0
package/cpp/ggml/src/ggml-opencl/kernels/solve_tri.cl +51 -0
package/cpp/ggml/src/ggml-opencl/kernels/sqr.cl +53 -0
package/cpp/ggml/src/ggml-opencl/kernels/sqrt.cl +53 -0
package/cpp/ggml/src/ggml-opencl/kernels/ssm_conv.cl +77 -0
package/cpp/ggml/src/ggml-opencl/kernels/sub.cl +138 -0
package/cpp/ggml/src/ggml-opencl/kernels/sum_rows.cl +140 -0
package/cpp/ggml/src/ggml-opencl/kernels/tanh.cl +109 -0
package/cpp/ggml/src/ggml-opencl/kernels/transpose.cl +117 -0
package/cpp/ggml/src/ggml-opencl/kernels/tri.cl +32 -0
package/cpp/ggml/src/ggml-opencl/kernels/tsembd.cl +48 -0
package/cpp/ggml/src/ggml-opencl/kernels/upscale.cl +120 -0
package/cpp/ggml/src/ggml-opt.cpp +1093 -0
package/cpp/ggml/src/ggml-quants.c +5325 -0
package/cpp/ggml/src/ggml-quants.h +106 -0
package/cpp/ggml/src/ggml-rpc/CMakeLists.txt +9 -0
package/cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +2118 -0
package/cpp/ggml/src/ggml-threading.cpp +12 -0
package/cpp/ggml/src/ggml-threading.h +14 -0
package/cpp/ggml/src/ggml-virtgpu/CMakeLists.txt +70 -0
package/cpp/ggml/src/ggml-virtgpu/apir_cs_ggml-rpc-front.cpp +87 -0
package/cpp/ggml/src/ggml-virtgpu/backend/CMakeLists.txt +21 -0
package/cpp/ggml/src/ggml-virtgpu/backend/apir_cs_ggml-rpc-back.cpp +115 -0
package/cpp/ggml/src/ggml-virtgpu/backend/backend-convert.h +13 -0
package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-backend.cpp +102 -0
package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer-type.cpp +105 -0
package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer.cpp +179 -0
package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-device.cpp +148 -0
package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched.cpp +51 -0
package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched.gen.h +73 -0
package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched.h +27 -0
package/cpp/ggml/src/ggml-virtgpu/backend/backend-virgl-apir.h +32 -0
package/cpp/ggml/src/ggml-virtgpu/backend/backend.cpp +144 -0
package/cpp/ggml/src/ggml-virtgpu/backend/shared/api_remoting.h +95 -0
package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_backend.gen.h +94 -0
package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_backend.h +50 -0
package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_cs.h +378 -0
package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h +232 -0
package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_cs_rpc.h +58 -0
package/cpp/ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp +81 -0
package/cpp/ggml/src/ggml-virtgpu/ggml-backend-buffer.cpp +119 -0
package/cpp/ggml/src/ggml-virtgpu/ggml-backend-device.cpp +158 -0
package/cpp/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp +213 -0
package/cpp/ggml/src/ggml-virtgpu/ggml-backend.cpp +69 -0
package/cpp/ggml/src/ggml-virtgpu/ggml-remoting.h +71 -0
package/cpp/ggml/src/ggml-virtgpu/ggmlremoting_functions.yaml +166 -0
package/cpp/ggml/src/ggml-virtgpu/include/apir_hw.h +9 -0
package/cpp/ggml/src/ggml-virtgpu/regenerate_remoting.py +333 -0
package/cpp/ggml/src/ggml-virtgpu/virtgpu-apir.h +15 -0
package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-backend.cpp +58 -0
package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp +110 -0
package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-buffer.cpp +173 -0
package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-device.cpp +192 -0
package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-impl.h +36 -0
package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward.gen.h +53 -0
package/cpp/ggml/src/ggml-virtgpu/virtgpu-shm.cpp +98 -0
package/cpp/ggml/src/ggml-virtgpu/virtgpu-shm.h +23 -0
package/cpp/ggml/src/ggml-virtgpu/virtgpu-utils.cpp +179 -0
package/cpp/ggml/src/ggml-virtgpu/virtgpu-utils.h +86 -0
package/cpp/ggml/src/ggml-virtgpu/virtgpu.cpp +544 -0
package/cpp/ggml/src/ggml-virtgpu/virtgpu.h +117 -0
package/cpp/ggml/src/ggml-webgpu/CMakeLists.txt +80 -0
package/cpp/ggml/src/ggml-webgpu/ggml-webgpu-shader-lib.hpp +1231 -0
package/cpp/ggml/src/ggml-webgpu/ggml-webgpu.cpp +3150 -0
package/cpp/ggml/src/ggml-webgpu/pre_wgsl.hpp +778 -0
package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/argmax.wgsl +72 -0
package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/argsort.wgsl +106 -0
package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/argsort_merge.wgsl +134 -0
package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/binary.wgsl +107 -0
package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/common_decls.tmpl +923 -0
package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/cpy.tmpl.wgsl +107 -0
package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/cumsum.wgsl +66 -0
package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +182 -0
package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn.wgsl +636 -0
package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/get_rows.wgsl +668 -0
package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/glu.tmpl.wgsl +323 -0
package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/memset.wgsl +40 -0
package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.wgsl +713 -0
package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_decls.tmpl +103 -0
package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_reg_tile.wgsl +138 -0
package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_subgroup_matrix.wgsl +188 -0
package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec.wgsl +194 -0
package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/pad.wgsl +86 -0
package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm.wgsl +123 -0
package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/rope.tmpl.wgsl +295 -0
package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/scale.wgsl +63 -0
package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.wgsl +109 -0
package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/soft_max.tmpl.wgsl +345 -0
package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/sum_rows.wgsl +55 -0
package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/unary.wgsl +193 -0
package/cpp/ggml/src/ggml-zdnn/CMakeLists.txt +36 -0
package/cpp/ggml/src/ggml-zdnn/common.hpp +59 -0
package/cpp/ggml/src/ggml-zdnn/ggml-zdnn.cpp +633 -0
package/cpp/ggml/src/ggml-zdnn/mmf.cpp +80 -0
package/cpp/ggml/src/ggml-zdnn/mmf.hpp +12 -0
package/cpp/ggml/src/ggml-zdnn/utils.cpp +79 -0
package/cpp/ggml/src/ggml-zdnn/utils.hpp +19 -0
package/cpp/ggml/src/ggml-zendnn/CMakeLists.txt +92 -0
package/cpp/ggml/src/ggml-zendnn/ggml-zendnn.cpp +469 -0
package/cpp/ggml/src/ggml.c +7669 -0
package/cpp/ggml/src/ggml.cpp +26 -0
package/cpp/ggml/src/gguf.cpp +1699 -0
package/cpp/include/llama-cpp.h +32 -0
package/cpp/include/llama.h +1568 -0
package/cpp/mtmd/CMakeLists.txt +98 -0
package/cpp/mtmd/README.md +63 -0
package/cpp/mtmd/clip-graph.h +117 -0
package/cpp/mtmd/clip-impl.h +586 -0
package/cpp/mtmd/clip-model.h +390 -0
package/cpp/mtmd/clip.cpp +4154 -0
package/cpp/mtmd/clip.h +121 -0
package/cpp/mtmd/deprecation-warning.cpp +22 -0
package/cpp/mtmd/legacy-models/convert_image_encoder_to_gguf.py +412 -0
package/cpp/mtmd/legacy-models/glmedge-convert-image-encoder-to-gguf.py +280 -0
package/cpp/mtmd/legacy-models/glmedge-surgery.py +33 -0
package/cpp/mtmd/legacy-models/llava_surgery.py +38 -0
package/cpp/mtmd/legacy-models/llava_surgery_v2.py +180 -0
package/cpp/mtmd/legacy-models/minicpmv-convert-image-encoder-to-gguf.py +892 -0
package/cpp/mtmd/legacy-models/minicpmv-surgery.py +47 -0
package/cpp/mtmd/models/cogvlm.cpp +98 -0
package/cpp/mtmd/models/conformer.cpp +216 -0
package/cpp/mtmd/models/glm4v.cpp +122 -0
package/cpp/mtmd/models/internvl.cpp +69 -0
package/cpp/mtmd/models/kimik25.cpp +101 -0
package/cpp/mtmd/models/kimivl.cpp +63 -0
package/cpp/mtmd/models/llama4.cpp +96 -0
package/cpp/mtmd/models/llava.cpp +374 -0
package/cpp/mtmd/models/minicpmv.cpp +114 -0
package/cpp/mtmd/models/mobilenetv5.cpp +451 -0
package/cpp/mtmd/models/models.h +128 -0
package/cpp/mtmd/models/nemotron-v2-vl.cpp +35 -0
package/cpp/mtmd/models/paddleocr.cpp +52 -0
package/cpp/mtmd/models/pixtral.cpp +86 -0
package/cpp/mtmd/models/qwen2vl.cpp +183 -0
package/cpp/mtmd/models/qwen3vl.cpp +193 -0
package/cpp/mtmd/models/siglip.cpp +86 -0
package/cpp/mtmd/models/whisper-enc.cpp +115 -0
package/cpp/mtmd/models/youtuvl.cpp +179 -0
package/cpp/mtmd/mtmd-audio.cpp +730 -0
package/cpp/mtmd/mtmd-audio.h +113 -0
package/cpp/mtmd/mtmd-cli.cpp +437 -0
package/cpp/mtmd/mtmd-helper.cpp +521 -0
package/cpp/mtmd/mtmd-helper.h +96 -0
package/cpp/mtmd/mtmd.cpp +1156 -0
package/cpp/mtmd/mtmd.h +319 -0
package/cpp/mtmd/requirements.txt +5 -0
package/cpp/mtmd/test-1.jpeg +0 -0
package/cpp/mtmd/test-2.mp3 +0 -0
package/cpp/mtmd/tests.sh +192 -0
package/cpp/src/CMakeLists.txt +169 -0
package/cpp/src/llama-adapter.cpp +488 -0
package/cpp/src/llama-adapter.h +89 -0
package/cpp/src/llama-arch.cpp +2855 -0
package/cpp/src/llama-arch.h +619 -0
package/cpp/src/llama-batch.cpp +917 -0
package/cpp/src/llama-batch.h +173 -0
package/cpp/src/llama-chat.cpp +896 -0
package/cpp/src/llama-chat.h +71 -0
package/cpp/src/llama-context.cpp +3512 -0
package/cpp/src/llama-context.h +359 -0
package/cpp/src/llama-cparams.cpp +5 -0
package/cpp/src/llama-cparams.h +44 -0
package/cpp/src/llama-grammar.cpp +1464 -0
package/cpp/src/llama-grammar.h +194 -0
package/cpp/src/llama-graph.cpp +2685 -0
package/cpp/src/llama-graph.h +1026 -0
package/cpp/src/llama-hparams.cpp +234 -0
package/cpp/src/llama-hparams.h +339 -0
package/cpp/src/llama-impl.cpp +171 -0
package/cpp/src/llama-impl.h +73 -0
package/cpp/src/llama-io.cpp +15 -0
package/cpp/src/llama-io.h +35 -0
package/cpp/src/llama-kv-cache-iswa.cpp +330 -0
package/cpp/src/llama-kv-cache-iswa.h +137 -0
package/cpp/src/llama-kv-cache.cpp +2271 -0
package/cpp/src/llama-kv-cache.h +388 -0
package/cpp/src/llama-kv-cells.h +533 -0
package/cpp/src/llama-memory-hybrid-iswa.cpp +275 -0
package/cpp/src/llama-memory-hybrid-iswa.h +140 -0
package/cpp/src/llama-memory-hybrid.cpp +268 -0
package/cpp/src/llama-memory-hybrid.h +139 -0
package/cpp/src/llama-memory-recurrent.cpp +1165 -0
package/cpp/src/llama-memory-recurrent.h +182 -0
package/cpp/src/llama-memory.cpp +59 -0
package/cpp/src/llama-memory.h +122 -0
package/cpp/src/llama-mmap.cpp +785 -0
package/cpp/src/llama-mmap.h +92 -0
package/cpp/src/llama-model-loader.cpp +1414 -0
package/cpp/src/llama-model-loader.h +203 -0
package/cpp/src/llama-model-saver.cpp +286 -0
package/cpp/src/llama-model-saver.h +37 -0
package/cpp/src/llama-model.cpp +9253 -0
package/cpp/src/llama-model.h +576 -0
package/cpp/src/llama-quant.cpp +1119 -0
package/cpp/src/llama-quant.h +1 -0
package/cpp/src/llama-sampler.cpp +3885 -0
package/cpp/src/llama-sampler.h +42 -0
package/cpp/src/llama-vocab.cpp +3970 -0
package/cpp/src/llama-vocab.h +187 -0
package/cpp/src/llama.cpp +1313 -0
package/cpp/src/models/afmoe.cpp +191 -0
package/cpp/src/models/apertus.cpp +125 -0
package/cpp/src/models/arcee.cpp +135 -0
package/cpp/src/models/arctic.cpp +138 -0
package/cpp/src/models/arwkv7.cpp +86 -0
package/cpp/src/models/baichuan.cpp +122 -0
package/cpp/src/models/bailingmoe.cpp +144 -0
package/cpp/src/models/bailingmoe2.cpp +135 -0
package/cpp/src/models/bert.cpp +178 -0
package/cpp/src/models/bitnet.cpp +160 -0
package/cpp/src/models/bloom.cpp +101 -0
package/cpp/src/models/chameleon.cpp +178 -0
package/cpp/src/models/chatglm.cpp +132 -0
package/cpp/src/models/codeshell.cpp +111 -0
package/cpp/src/models/cogvlm.cpp +102 -0
package/cpp/src/models/cohere2-iswa.cpp +134 -0
package/cpp/src/models/command-r.cpp +122 -0
package/cpp/src/models/dbrx.cpp +123 -0
package/cpp/src/models/deci.cpp +135 -0
package/cpp/src/models/deepseek.cpp +144 -0
package/cpp/src/models/deepseek2.cpp +262 -0
package/cpp/src/models/delta-net-base.cpp +376 -0
package/cpp/src/models/dots1.cpp +134 -0
package/cpp/src/models/dream.cpp +105 -0
package/cpp/src/models/ernie4-5-moe.cpp +150 -0
package/cpp/src/models/ernie4-5.cpp +110 -0
package/cpp/src/models/eurobert.cpp +97 -0
package/cpp/src/models/exaone-moe.cpp +146 -0
package/cpp/src/models/exaone.cpp +114 -0
package/cpp/src/models/exaone4.cpp +123 -0
package/cpp/src/models/falcon-h1.cpp +111 -0
package/cpp/src/models/falcon.cpp +120 -0
package/cpp/src/models/gemma-embedding.cpp +116 -0
package/cpp/src/models/gemma.cpp +112 -0
package/cpp/src/models/gemma2-iswa.cpp +128 -0
package/cpp/src/models/gemma3.cpp +155 -0
package/cpp/src/models/gemma3n-iswa.cpp +384 -0
package/cpp/src/models/glm4-moe.cpp +170 -0
package/cpp/src/models/glm4.cpp +157 -0
package/cpp/src/models/gpt2.cpp +105 -0
package/cpp/src/models/gptneox.cpp +144 -0
package/cpp/src/models/granite-hybrid.cpp +196 -0
package/cpp/src/models/granite.cpp +211 -0
package/cpp/src/models/grok.cpp +159 -0
package/cpp/src/models/grovemoe.cpp +141 -0
package/cpp/src/models/hunyuan-dense.cpp +132 -0
package/cpp/src/models/hunyuan-moe.cpp +154 -0
package/cpp/src/models/internlm2.cpp +120 -0
package/cpp/src/models/jais.cpp +86 -0
package/cpp/src/models/jais2.cpp +123 -0
package/cpp/src/models/jamba.cpp +106 -0
package/cpp/src/models/kimi-linear.cpp +392 -0
package/cpp/src/models/lfm2.cpp +190 -0
package/cpp/src/models/llada-moe.cpp +122 -0
package/cpp/src/models/llada.cpp +99 -0
package/cpp/src/models/llama-iswa.cpp +178 -0
package/cpp/src/models/llama.cpp +168 -0
package/cpp/src/models/maincoder.cpp +117 -0
package/cpp/src/models/mamba-base.cpp +285 -0
package/cpp/src/models/mamba.cpp +54 -0
package/cpp/src/models/mimo2-iswa.cpp +123 -0
package/cpp/src/models/minicpm3.cpp +200 -0
package/cpp/src/models/minimax-m2.cpp +124 -0
package/cpp/src/models/mistral3.cpp +160 -0
package/cpp/src/models/models.h +684 -0
package/cpp/src/models/modern-bert.cpp +109 -0
package/cpp/src/models/mpt.cpp +126 -0
package/cpp/src/models/nemotron-h.cpp +148 -0
package/cpp/src/models/nemotron.cpp +122 -0
package/cpp/src/models/neo-bert.cpp +104 -0
package/cpp/src/models/olmo.cpp +121 -0
package/cpp/src/models/olmo2.cpp +150 -0
package/cpp/src/models/olmoe.cpp +124 -0
package/cpp/src/models/openai-moe-iswa.cpp +127 -0
package/cpp/src/models/openelm.cpp +124 -0
package/cpp/src/models/orion.cpp +123 -0
package/cpp/src/models/paddleocr.cpp +122 -0
package/cpp/src/models/pangu-embedded.cpp +121 -0
package/cpp/src/models/phi2.cpp +121 -0
package/cpp/src/models/phi3.cpp +152 -0
package/cpp/src/models/plamo.cpp +110 -0
package/cpp/src/models/plamo2.cpp +318 -0
package/cpp/src/models/plamo3.cpp +128 -0
package/cpp/src/models/plm.cpp +169 -0
package/cpp/src/models/qwen.cpp +108 -0
package/cpp/src/models/qwen2.cpp +126 -0
package/cpp/src/models/qwen2moe.cpp +151 -0
package/cpp/src/models/qwen2vl.cpp +117 -0
package/cpp/src/models/qwen3.cpp +117 -0
package/cpp/src/models/qwen35.cpp +386 -0
package/cpp/src/models/qwen35moe.cpp +420 -0
package/cpp/src/models/qwen3moe.cpp +124 -0
package/cpp/src/models/qwen3next.cpp +525 -0
package/cpp/src/models/qwen3vl-moe.cpp +140 -0
package/cpp/src/models/qwen3vl.cpp +132 -0
package/cpp/src/models/refact.cpp +94 -0
package/cpp/src/models/rnd1.cpp +126 -0
package/cpp/src/models/rwkv6-base.cpp +164 -0
package/cpp/src/models/rwkv6.cpp +94 -0
package/cpp/src/models/rwkv6qwen2.cpp +86 -0
package/cpp/src/models/rwkv7-base.cpp +137 -0
package/cpp/src/models/rwkv7.cpp +90 -0
package/cpp/src/models/seed-oss.cpp +124 -0
package/cpp/src/models/smallthinker.cpp +126 -0
package/cpp/src/models/smollm3.cpp +128 -0
package/cpp/src/models/stablelm.cpp +146 -0
package/cpp/src/models/starcoder.cpp +100 -0
package/cpp/src/models/starcoder2.cpp +121 -0
package/cpp/src/models/step35-iswa.cpp +168 -0
package/cpp/src/models/t5-dec.cpp +166 -0
package/cpp/src/models/t5-enc.cpp +96 -0
package/cpp/src/models/wavtokenizer-dec.cpp +149 -0
package/cpp/src/models/xverse.cpp +108 -0
package/cpp/src/unicode-data.cpp +7034 -0
package/cpp/src/unicode-data.h +20 -0
package/cpp/src/unicode.cpp +1103 -0
package/cpp/src/unicode.h +111 -0
package/cpp/vendor/nlohmann/json.hpp +25526 -0
package/cpp/vendor/nlohmann/json_fwd.hpp +187 -0
package/cpp/vendor/stb/stb_image.h +7988 -0
package/ios/LocalLLM-Bridging-Header.h +2 -0
package/ios/LocalLLM.h +5 -0
package/ios/LocalLLM.mm +1267 -0
package/local-llm-rn.podspec +60 -0
package/package.json +35 -0
package/src/NativeLocalLLM.ts +73 -0
package/src/device.ts +50 -0
package/src/download-adapter.ts +17 -0
package/src/index.ts +21 -0
package/src/native-bridge.ts +142 -0
package/src/rn-downloader.ts +37 -0

package/cpp/ggml/src/ggml-blas/CMakeLists.txt ADDED Viewed

@@ -0,0 +1,101 @@
+if (GGML_STATIC)
+    set(BLA_STATIC ON)
+endif()
+#if (CMAKE_VERSION VERSION_GREATER_EQUAL 3.22)
+#    set(BLA_SIZEOF_INTEGER 8)
+#endif()
+set(BLA_VENDOR ${GGML_BLAS_VENDOR})
+find_package(BLAS)
+if (BLAS_FOUND)
+    message(STATUS "BLAS found, Libraries: ${BLAS_LIBRARIES}")
+    ggml_add_backend_library(ggml-blas
+                             ggml-blas.cpp
+                            )
+    if (${GGML_BLAS_VENDOR} MATCHES "Apple")
+        add_compile_definitions(ACCELERATE_NEW_LAPACK)
+        add_compile_definitions(ACCELERATE_LAPACK_ILP64)
+        add_compile_definitions(GGML_BLAS_USE_ACCELERATE)
+    elseif ("${BLAS_INCLUDE_DIRS}" STREQUAL "")
+        # BLAS_INCLUDE_DIRS is missing in FindBLAS.cmake.
+        # see https://gitlab.kitware.com/cmake/cmake/-/issues/20268
+        find_package(PkgConfig REQUIRED)
+        if (${GGML_BLAS_VENDOR} MATCHES "Generic")
+            pkg_check_modules(DepBLAS blas)
+        elseif (${GGML_BLAS_VENDOR} MATCHES "OpenBLAS")
+            # As of openblas v0.3.22, the 64-bit is named openblas64.pc
+            pkg_check_modules(DepBLAS openblas64)
+            if (NOT DepBLAS_FOUND)
+                pkg_check_modules(DepBLAS openblas)
+            endif()
+        elseif (${GGML_BLAS_VENDOR} MATCHES "FLAME")
+            pkg_check_modules(DepBLAS blis)
+        elseif (${GGML_BLAS_VENDOR} MATCHES "ATLAS")
+            pkg_check_modules(DepBLAS blas-atlas)
+        elseif (${GGML_BLAS_VENDOR} MATCHES "FlexiBLAS")
+            pkg_check_modules(DepBLAS flexiblas_api)
+        elseif (${GGML_BLAS_VENDOR} MATCHES "Intel")
+            # all Intel* libraries share the same include path
+            pkg_check_modules(DepBLAS mkl-sdl)
+        elseif (${GGML_BLAS_VENDOR} MATCHES "NVHPC")
+            # this doesn't provide pkg-config
+            # suggest to assign BLAS_INCLUDE_DIRS on your own
+            if ("${NVHPC_VERSION}" STREQUAL "")
+                message(WARNING "Better to set NVHPC_VERSION")
+            else()
+                set(DepBLAS_FOUND ON)
+                set(DepBLAS_INCLUDE_DIRS "/opt/nvidia/hpc_sdk/${CMAKE_SYSTEM_NAME}_${CMAKE_SYSTEM_PROCESSOR}/${NVHPC_VERSION}/math_libs/include")
+            endif()
+        endif()
+        if (DepBLAS_FOUND)
+            set(BLAS_INCLUDE_DIRS ${DepBLAS_INCLUDE_DIRS})
+        else()
+            message(WARNING "BLAS_INCLUDE_DIRS neither been provided nor been automatically"
+            " detected by pkgconfig, trying to find cblas.h from possible paths...")
+            find_path(BLAS_INCLUDE_DIRS
+                NAMES cblas.h
+                HINTS
+                    /usr/include
+                    /usr/local/include
+                    /usr/include/openblas
+                    /opt/homebrew/opt/openblas/include
+                    /usr/local/opt/openblas/include
+                    /usr/include/x86_64-linux-gnu/openblas/include
+            )
+        endif()
+    endif()
+    message(STATUS "BLAS found, Includes: ${BLAS_INCLUDE_DIRS}")
+    target_compile_options(ggml-blas PRIVATE ${BLAS_LINKER_FLAGS})
+    if ("${GGML_BLAS_VENDOR}" STREQUAL "")
+        message(WARNING "GGML_BLAS_VENDOR is not set; some methods may not link properly.")
+    endif()
+    if ("${GGML_BLAS_VENDOR}" MATCHES "Intel" OR ("${BLAS_INCLUDE_DIRS}" MATCHES "mkl" AND "${GGML_BLAS_VENDOR}" MATCHES "Generic"))
+        add_compile_definitions(GGML_BLAS_USE_MKL)
+    endif()
+    if ("${GGML_BLAS_VENDOR}" MATCHES "OpenBLAS")
+        add_compile_definitions(GGML_BLAS_USE_OPENBLAS)
+    endif()
+    if ("${GGML_BLAS_VENDOR}" MATCHES "FLAME" OR "${GGML_BLAS_VENDOR}" MATCHES "AOCL" OR "${GGML_BLAS_VENDOR}" MATCHES "AOCL_mt")
+        add_compile_definitions(GGML_BLAS_USE_BLIS)
+    endif()
+    if ("${GGML_BLAS_VENDOR}" MATCHES "NVPL")
+        add_compile_definitions(GGML_BLAS_USE_NVPL)
+    endif()
+    target_link_libraries     (ggml-blas PRIVATE ${BLAS_LIBRARIES})
+    target_include_directories(ggml-blas SYSTEM PRIVATE ${BLAS_INCLUDE_DIRS})
+else()
+    message(FATAL_ERROR "BLAS not found, please refer to "
+                        "https://cmake.org/cmake/help/latest/module/FindBLAS.html#blas-lapack-vendors"
+                        " to set correct GGML_BLAS_VENDOR")
+endif()

package/cpp/ggml/src/ggml-blas/ggml-blas.cpp ADDED Viewed

@@ -0,0 +1,518 @@
+#include "ggml-impl.h"
+#include "ggml-blas.h"
+#include "ggml-backend-impl.h"
+#include <future>
+#include <vector>
+#include <cstring>
+#if defined(GGML_BLAS_USE_ACCELERATE)
+#   include <Accelerate/Accelerate.h>
+#elif defined(GGML_BLAS_USE_MKL)
+#   include <mkl.h>
+#elif defined(GGML_BLAS_USE_BLIS)
+#   include <blis.h>
+#elif defined(GGML_BLAS_USE_NVPL)
+#   include <nvpl_blas.h>
+#else
+#   include <cblas.h>
+#endif
+struct ggml_backend_blas_context {
+    int n_threads = GGML_DEFAULT_N_THREADS;
+    std::unique_ptr<char[]> work_data;
+    size_t work_size = 0;
+#ifndef GGML_USE_OPENMP
+    std::vector<std::future<void>> tasks;
+#endif
+};
+static void ggml_backend_blas_mul_mat(ggml_backend_blas_context * ctx, struct ggml_tensor * dst) {
+    const struct ggml_tensor * src0 = dst->src[0];
+    const struct ggml_tensor * src1 = dst->src[1];
+    GGML_TENSOR_BINARY_OP_LOCALS
+    const enum ggml_type type = src0->type;
+    GGML_ASSERT(ne0 == ne01);
+    GGML_ASSERT(ne1 == ne11);
+    GGML_ASSERT(ne2 == ne12);
+    GGML_ASSERT(ne3 == ne13);
+    // we don't support permuted src0 or src1
+    GGML_ASSERT(nb00 == ggml_type_size(type));
+    GGML_ASSERT(nb10 == ggml_type_size(src1->type));
+    // dst cannot be transposed or permuted
+    GGML_ASSERT(nb0 == sizeof(float));
+    GGML_ASSERT(nb0 <= nb1);
+    GGML_ASSERT(nb1 <= nb2);
+    GGML_ASSERT(nb2 <= nb3);
+    // broadcast factors
+    const int64_t r2 = ne12/ne02;
+    const int64_t r3 = ne13/ne03;
+    const int64_t ne_plane      = ne01*ne00;
+    const size_t  desired_wsize = type == GGML_TYPE_F32 ? 0 : ne03*ne02*ne_plane*sizeof(float);
+    if (ctx->work_size < desired_wsize) {
+        ctx->work_data.reset(new char[desired_wsize]);
+        ctx->work_size = desired_wsize;
+    }
+    void * wdata = ctx->work_data.get();
+    // convert src0 to float
+    if (type != GGML_TYPE_F32) {
+        const auto * type_traits = ggml_get_type_traits(type);
+        ggml_to_float_t const to_float = type_traits->to_float;
+        for (int64_t i03 = 0; i03 < ne03; i03++) {
+            for (int64_t i02 = 0; i02 < ne02; i02++) {
+                const void  *       x      = (char *)  src0->data + i02*nb02          + i03*nb03;
+                      float * const wplane = (float *) wdata      + i02*ne_plane      + i03*ne02*ne_plane;
+                const int min_cols_per_thread = 4096;
+                const int min_rows_per_thread = std::max((int)(min_cols_per_thread/ne00), 1);
+                const int n_threads = std::max(std::min(ctx->n_threads, (int)(ne01/min_rows_per_thread)), 1);
+#ifdef GGML_USE_OPENMP
+                #pragma omp parallel for num_threads(n_threads)
+                for (int64_t i01 = 0; i01 < ne01; i01++) {
+                    to_float((const char *) x + i01*nb01, wplane + i01*ne00, ne00);
+                }
+#else
+                for (int i = 1; i < n_threads; i++) {
+                    const int64_t start =       i*ne01/n_threads;
+                    const int64_t end   = (i + 1)*ne01/n_threads;
+                    if (start < end) {
+                        ctx->tasks.push_back(std::async(std::launch::async, [=]() {
+                            for (int64_t i01 = start; i01 < end; i01++) {
+                                to_float((const char *) x + i01*nb01, wplane + i01*ne00, ne00);
+                            }
+                        }));
+                    }
+                }
+                {
+                    // reuse the current thread for the first task
+                    const int64_t start = 0;
+                    const int64_t end   = ne01/n_threads;
+                    for (int64_t i01 = start; i01 < end; i01++) {
+                        to_float((const char *) x + i01*nb01, wplane + i01*ne00, ne00);
+                    }
+                }
+#endif
+            }
+        }
+#ifndef GGML_USE_OPENMP
+        // wait for all tasks to finish
+        for (auto & task : ctx->tasks) {
+            task.get();
+        }
+        ctx->tasks.clear();
+#endif
+    }
+#if defined(GGML_BLAS_USE_OPENBLAS)
+    openblas_set_num_threads(ctx->n_threads);
+#elif defined(GGML_BLAS_USE_BLIS)
+    bli_thread_set_num_threads(ctx->n_threads);
+#elif defined(GGML_BLAS_USE_NVPL)
+    nvpl_blas_set_num_threads(ctx->n_threads);
+#endif
+    for (int64_t i13 = 0; i13 < ne13; i13++) {
+        for (int64_t i12 = 0; i12 < ne12; i12++) {
+            const int64_t i03 = i13/r3;
+            const int64_t i02 = i12/r2;
+            const float * x = (float *) ((char *) src0->data + i02*nb02 + i03*nb03);
+            const float * y = (float *) ((char *) src1->data + i12*nb12 + i13*nb13);
+                  float * d = (float *) ((char *)  dst->data + i12*nb2  + i13*nb3);
+            if (type != GGML_TYPE_F32) {
+                x = (float *) wdata + i02*ne_plane + i03*ne02*ne_plane;
+            }
+            cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans,
+                        ne1, ne01, ne10,
+                        1.0f,   y, ne10,
+                                x, ne00,
+                        0.0f,   d, ne01);
+        }
+    }
+}
+static void ggml_backend_blas_out_prod(ggml_backend_blas_context * ctx, struct ggml_tensor * dst) {
+    const struct ggml_tensor * src0 = dst->src[0];
+    const struct ggml_tensor * src1 = dst->src[1];
+    GGML_TENSOR_BINARY_OP_LOCALS
+    GGML_ASSERT(ne0  == ne00);
+    GGML_ASSERT(ne1  == ne10);
+    GGML_ASSERT(ne2  == ne02);
+    GGML_ASSERT(ne02 == ne12);
+    GGML_ASSERT(ne3  == ne13);
+    GGML_ASSERT(ne03 == ne13);
+    // we don't support permuted src0 or src1
+    GGML_ASSERT(nb00 == sizeof(float));
+    // dst cannot be transposed or permuted
+    GGML_ASSERT(nb0 == sizeof(float));
+    // GGML_ASSERT(nb0 <= nb1);
+    // GGML_ASSERT(nb1 <= nb2);
+    // GGML_ASSERT(nb2 <= nb3);
+    // Arguments to ggml_compute_forward_out_prod (expressed as major,minor)
+    // src0: (k,n)
+    // src1: (k,m)
+    // dst:  (m,n)
+    //
+    // Arguments to sgemm (see https://github.com/Reference-LAPACK/lapack/blob/master/BLAS/SRC/sgemm.f)
+    // Also expressed as (major,minor)
+    // a: (m,k): so src1 transposed
+    // b: (k,n): so src0
+    // c: (m,n)
+    //
+    // However, if ggml_is_transposed(src1) is true, then
+    // src1->data already contains a transposed version, so sgemm mustn't
+    // transpose it further.
+    int n = src0->ne[0];
+    int k = src0->ne[1];
+    int m = src1->ne[0];
+    CBLAS_TRANSPOSE transposeA;
+    int lda;
+    if (!ggml_is_transposed(src1)) {
+        transposeA = CblasTrans;
+        lda = m;
+    } else {
+        transposeA = CblasNoTrans;
+        lda = k;
+    }
+    float * a = (float *) ((char *) src1->data);
+    float * b = (float *) ((char *) src0->data);
+    float * c = (float *) ((char *) dst->data);
+    cblas_sgemm(CblasRowMajor, transposeA, CblasNoTrans, m, n, k, 1.0, a, lda, b, n, 0.0, c, n);
+    GGML_UNUSED(ctx);
+}
+// backend interface
+static const char * ggml_backend_blas_get_name(ggml_backend_t backend) {
+    return "BLAS";
+    GGML_UNUSED(backend);
+}
+static void ggml_backend_blas_free(ggml_backend_t backend) {
+    ggml_backend_blas_context * ctx = (ggml_backend_blas_context *)backend->context;
+    delete ctx;
+    delete backend;
+}
+static enum ggml_status ggml_backend_blas_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
+    ggml_backend_blas_context * ctx = (ggml_backend_blas_context *)backend->context;
+    for (int i = 0; i < cgraph->n_nodes; i++) {
+        struct ggml_tensor * node = cgraph->nodes[i];
+        if ((node->flags & GGML_TENSOR_FLAG_COMPUTE) == 0) {
+            continue;
+        }
+        switch (node->op) {
+            case GGML_OP_MUL_MAT:
+                ggml_backend_blas_mul_mat(ctx, node);
+                break;
+            case GGML_OP_OUT_PROD:
+                ggml_backend_blas_out_prod(ctx, node);
+                break;
+            case GGML_OP_NONE:
+            case GGML_OP_RESHAPE:
+            case GGML_OP_VIEW:
+            case GGML_OP_PERMUTE:
+            case GGML_OP_TRANSPOSE:
+                break;
+            default:
+                GGML_ABORT("%s: unsupported op %s\n", __func__, ggml_op_desc(node));
+        }
+    }
+    return GGML_STATUS_SUCCESS;
+    GGML_UNUSED(backend);
+}
+static struct ggml_backend_i blas_backend_i = {
+    /* .get_name                = */ ggml_backend_blas_get_name,
+    /* .free                    = */ ggml_backend_blas_free,
+    /* .set_tensor_async        = */ NULL,
+    /* .get_tensor_async        = */ NULL,
+    /* .cpy_tensor_async        = */ NULL,
+    /* .synchronize             = */ NULL,
+    /* .graph_plan_create       = */ NULL,
+    /* .graph_plan_free         = */ NULL,
+    /* .graph_plan_update       = */ NULL,
+    /* .graph_plan_compute      = */ NULL,
+    /* .graph_compute           = */ ggml_backend_blas_graph_compute,
+    /* .event_record            = */ NULL,
+    /* .event_wait              = */ NULL,
+    /* .graph_optimize          = */ NULL,
+};
+static ggml_guid_t ggml_backend_blas_guid(void) {
+    static ggml_guid guid = { 0x12, 0xa8, 0xae, 0xf4, 0xc0, 0x1e, 0x61, 0x97, 0x8f, 0xeb, 0x33, 0x04, 0xa1, 0x33, 0x51, 0x2d };
+    return &guid;
+}
+ggml_backend_t ggml_backend_blas_init(void) {
+    ggml_backend_blas_context * ctx = new ggml_backend_blas_context;
+    ggml_backend_t backend = new ggml_backend {
+        /* .guid    = */ ggml_backend_blas_guid(),
+        /* .iface   = */ blas_backend_i,
+        /* .device  = */ ggml_backend_reg_dev_get(ggml_backend_blas_reg(), 0),
+        /* .context = */ ctx,
+    };
+#if defined(GGML_BLAS_USE_OPENBLAS) && defined(GGML_USE_OPENMP)
+    if (openblas_get_parallel() != OPENBLAS_OPENMP) {
+        GGML_LOG_DEBUG("%s: warning: ggml is using OpenMP, but OpenBLAS was compiled without OpenMP support\n", __func__);
+    }
+#endif
+#if defined(BLIS_ENABLE_CBLAS) && defined(GGML_USE_OPENMP) && !defined(BLIS_ENABLE_OPENMP)
+    GGML_LOG_DEBUG("%s: warning: ggml is using OpenMP, but BLIS was compiled without OpenMP support\n", __func__);
+#endif
+    return backend;
+}
+bool ggml_backend_is_blas(ggml_backend_t backend) {
+    return backend != NULL && ggml_guid_matches(backend->guid, ggml_backend_blas_guid());
+}
+void ggml_backend_blas_set_n_threads(ggml_backend_t backend_blas, int n_threads) {
+    GGML_ASSERT(ggml_backend_is_blas(backend_blas));
+    ggml_backend_blas_context * ctx = (ggml_backend_blas_context *)backend_blas->context;
+    ctx->n_threads = n_threads;
+}
+// device interface
+static const char * ggml_backend_blas_device_get_name(ggml_backend_dev_t dev) {
+    return "BLAS";
+    GGML_UNUSED(dev);
+}
+static const char * ggml_backend_blas_device_get_description(ggml_backend_dev_t dev) {
+    #if defined(GGML_BLAS_USE_ACCELERATE)
+        return "Accelerate";
+    #elif defined(GGML_BLAS_USE_MKL)
+        return "MKL";
+    #elif defined(GGML_BLAS_USE_BLIS)
+        return "BLIS";
+    #elif defined(GGML_BLAS_USE_NVPL)
+        return "NVPL";
+    #elif defined(GGML_BLAS_USE_OPENBLAS)
+        return "OpenBLAS";
+    #else
+        return "BLAS";
+    #endif
+    GGML_UNUSED(dev);
+}
+static void ggml_backend_blas_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
+    // TODO
+    *free = 0;
+    *total = 0;
+    GGML_UNUSED(dev);
+}
+static enum ggml_backend_dev_type ggml_backend_blas_device_get_type(ggml_backend_dev_t dev) {
+    return GGML_BACKEND_DEVICE_TYPE_ACCEL;
+    GGML_UNUSED(dev);
+}
+static void ggml_backend_blas_device_get_props(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props) {
+    props->name        = ggml_backend_blas_device_get_name(dev);
+    props->description = ggml_backend_blas_device_get_description(dev);
+    props->type        = ggml_backend_blas_device_get_type(dev);
+    ggml_backend_blas_device_get_memory(dev, &props->memory_free, &props->memory_total);
+    props->caps = {
+        /* .async                 = */ false,
+        /* .host_buffer           = */ false,
+        /* .buffer_from_host_ptr  = */ true,
+        /* .events                = */ false,
+    };
+}
+static ggml_backend_t ggml_backend_blas_device_init_backend(ggml_backend_dev_t dev, const char * params) {
+    return ggml_backend_blas_init();
+    GGML_UNUSED(dev);
+    GGML_UNUSED(params);
+}
+static ggml_backend_buffer_type_t ggml_backend_blas_device_get_buffer_type(ggml_backend_dev_t dev) {
+    return ggml_backend_cpu_buffer_type();
+    GGML_UNUSED(dev);
+}
+static ggml_backend_buffer_t ggml_backend_blas_device_buffer_from_host_ptr(ggml_backend_dev_t dev, void * ptr, size_t size, size_t max_tensor_size) {
+    return ggml_backend_cpu_buffer_from_ptr(ptr, size);
+    GGML_UNUSED(dev);
+    GGML_UNUSED(max_tensor_size);
+}
+static bool ggml_backend_blas_device_supports_op(ggml_backend_dev_t dev, const struct ggml_tensor * op) {
+    const struct ggml_tensor * src0 = op->src[0];
+    const struct ggml_tensor * src1 = op->src[1];
+    switch (op->op) {
+        case GGML_OP_NONE:
+        case GGML_OP_RESHAPE:
+        case GGML_OP_VIEW:
+        case GGML_OP_PERMUTE:
+        case GGML_OP_TRANSPOSE:
+            return true;
+        case GGML_OP_MUL_MAT:
+        {
+            // BLAS usually is only faster for large matrices
+            const struct ggml_tensor * src0 = op->src[0];
+            const struct ggml_tensor * src1 = op->src[1];
+            const int64_t ne10 = src1->ne[0];
+            const int64_t ne0 = op->ne[0];
+            const int64_t ne1 = op->ne[1];
+            // TODO: find the optimal value
+            const int64_t min_batch = 32;
+            return ggml_is_contiguous(src0) &&
+                   ggml_is_contiguous(src1) &&
+                   src1->type == GGML_TYPE_F32 &&
+                   (ne0 >= min_batch && ne1 >= min_batch && ne10 >= min_batch) &&
+                   (src0->type == GGML_TYPE_F32 || ggml_get_type_traits(src0->type)->to_float != NULL);
+        }
+        case GGML_OP_OUT_PROD:
+            return op->src[0]->type == GGML_TYPE_F32 &&
+                   op->src[1]->type == GGML_TYPE_F32 &&
+                   ggml_is_matrix(src0) &&
+                   ggml_is_matrix(src1) &&
+                   ggml_is_contiguous(src0) &&
+                   (ggml_is_contiguous(src1) || ggml_is_transposed(src1)) &&
+                   (src0->type == GGML_TYPE_F32 || ggml_get_type_traits(src0->type)->to_float != NULL);
+        default:
+            return false;
+    }
+    GGML_UNUSED(dev);
+}
+static bool ggml_backend_blas_device_supports_buft(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) {
+    return ggml_backend_buft_is_host(buft);
+    GGML_UNUSED(dev);
+}
+static const struct ggml_backend_device_i ggml_backend_blas_device_i = {
+    /* .get_name             = */ ggml_backend_blas_device_get_name,
+    /* .get_description      = */ ggml_backend_blas_device_get_description,
+    /* .get_memory           = */ ggml_backend_blas_device_get_memory,
+    /* .get_type             = */ ggml_backend_blas_device_get_type,
+    /* .get_props            = */ ggml_backend_blas_device_get_props,
+    /* .init_backend         = */ ggml_backend_blas_device_init_backend,
+    /* .get_buffer_type      = */ ggml_backend_blas_device_get_buffer_type,
+    /* .get_host_buffer_type = */ NULL,
+    /* .buffer_from_host_ptr = */ ggml_backend_blas_device_buffer_from_host_ptr,
+    /* .supports_op          = */ ggml_backend_blas_device_supports_op,
+    /* .supports_buft        = */ ggml_backend_blas_device_supports_buft,
+    /* .offload_op           = */ NULL,
+    /* .event_new            = */ NULL,
+    /* .event_free           = */ NULL,
+    /* .event_synchronize    = */ NULL,
+};
+// backend reg interface
+static const char * ggml_backend_blas_reg_get_name(ggml_backend_reg_t reg) {
+    return "BLAS";
+    GGML_UNUSED(reg);
+}
+static size_t ggml_backend_blas_reg_get_device_count(ggml_backend_reg_t reg) {
+    return 1;
+    GGML_UNUSED(reg);
+}
+static ggml_backend_dev_t ggml_backend_blas_reg_get_device(ggml_backend_reg_t reg, size_t index) {
+    GGML_ASSERT(index == 0);
+    static ggml_backend_device ggml_backend_blas_device = {
+        /* .iface   = */ ggml_backend_blas_device_i,
+        /* .reg     = */ reg,
+        /* .context = */ nullptr,
+    };
+    return &ggml_backend_blas_device;
+    GGML_UNUSED(reg);
+    GGML_UNUSED(index);
+}
+static void * ggml_backend_blas_get_proc_address(ggml_backend_reg_t reg, const char * name) {
+    if (std::strcmp(name, "ggml_backend_set_n_threads") == 0) {
+        return (void *)ggml_backend_blas_set_n_threads;
+    }
+    return NULL;
+    GGML_UNUSED(reg);
+    GGML_UNUSED(name);
+}
+static const struct ggml_backend_reg_i ggml_backend_blas_reg_i = {
+    /* .get_name         = */ ggml_backend_blas_reg_get_name,
+    /* .get_device_count = */ ggml_backend_blas_reg_get_device_count,
+    /* .get_device       = */ ggml_backend_blas_reg_get_device,
+    /* .get_proc_address = */ ggml_backend_blas_get_proc_address,
+};
+ggml_backend_reg_t ggml_backend_blas_reg(void) {
+    static struct ggml_backend_reg ggml_backend_blas_reg = {
+        /* .api_version = */ GGML_BACKEND_API_VERSION,
+        /* .iface       = */ ggml_backend_blas_reg_i,
+        /* .context     = */ NULL,
+    };
+    return &ggml_backend_blas_reg;
+}
+GGML_BACKEND_DL_IMPL(ggml_backend_blas_reg)