local-llm-rn 1.0.0 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +321 -0
- package/android/build.gradle.kts +169 -0
- package/android/proguard-rules.pro +14 -0
- package/android/src/main/AndroidManifest.xml +2 -0
- package/android/src/main/cpp/CMakeLists.txt +71 -0
- package/android/src/main/cpp/LocalLLM.cpp +1342 -0
- package/android/src/main/java/com/hilum/localllm/LocalLLMModule.kt +627 -0
- package/android/src/main/java/com/hilum/localllm/LocalLLMPackage.kt +26 -0
- package/cpp/CMakeLists.txt +10 -0
- package/cpp/LICENSE +21 -0
- package/cpp/cmake/arm64-apple-clang.cmake +16 -0
- package/cpp/cmake/arm64-windows-llvm.cmake +16 -0
- package/cpp/cmake/build-info.cmake +48 -0
- package/cpp/cmake/common.cmake +58 -0
- package/cpp/cmake/download-models.cmake +21 -0
- package/cpp/cmake/git-vars.cmake +22 -0
- package/cpp/cmake/license.cmake +40 -0
- package/cpp/cmake/llama-config.cmake.in +30 -0
- package/cpp/cmake/llama.pc.in +10 -0
- package/cpp/cmake/riscv64-spacemit-linux-gnu-gcc.cmake +29 -0
- package/cpp/cmake/x64-windows-llvm.cmake +5 -0
- package/cpp/common/CMakeLists.txt +1 -1
- package/cpp/common/build-info.cpp +4 -0
- package/cpp/common/jinja/README.md +1 -1
- package/cpp/common/jinja/string.cpp +1 -1
- package/cpp/common/jinja/value.h +1 -1
- package/cpp/ggml/src/ggml-cpu/CMakeLists.txt +49 -39
- package/cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +21 -20
- package/cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +965 -252
- package/cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +15 -1
- package/cpp/ggml/src/ggml-vulkan/CMakeLists.txt +265 -0
- package/cpp/ggml/src/ggml-vulkan/cmake/host-toolchain.cmake.in +16 -0
- package/cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +16886 -0
- package/cpp/ggml/src/ggml-vulkan/vma/vk_mem_alloc.h +19530 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +35 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/abs.comp +21 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/acc.comp +37 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/add.comp +69 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/add1.comp +28 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/add_id.comp +42 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/arange.comp +20 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/argmax.comp +60 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/argsort.comp +86 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/argsort_large.comp +114 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/ceil.comp +22 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/clamp.comp +17 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/concat.comp +41 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/contig_copy.comp +49 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_dw.comp +105 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_mm.comp +347 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/conv_transpose_1d.comp +98 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy.comp +23 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy_from_quant.comp +51 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +296 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy_transpose.comp +67 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/cos.comp +17 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/count_equal.comp +31 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/count_experts.comp +51 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/cumsum.comp +83 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/cumsum_multipass1.comp +60 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/cumsum_multipass2.comp +66 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_f32.comp +20 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs.glsl +610 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs_cm2.glsl +734 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_head.glsl +13 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp +42 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_s.comp +35 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_s.comp +44 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xs.comp +43 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xxs.comp +49 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_s.comp +40 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_xxs.comp +51 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_nl.comp +32 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_xs.comp +34 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_mxfp4.comp +32 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp +34 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp +42 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_0.comp +30 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_1.comp +32 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp +68 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_0.comp +34 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_1.comp +35 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp +70 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp +33 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q8_0.comp +31 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/diag.comp +29 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/diag_mask_inf.comp +34 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/div.comp +27 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/exp.comp +21 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/feature-tests/bfloat16.comp +7 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/feature-tests/coopmat.comp +7 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/feature-tests/coopmat2.comp +7 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/feature-tests/integer_dot.comp +7 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/fill.comp +19 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +608 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.glsl +264 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +642 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +390 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_mask_opt.comp +162 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +121 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/floor.comp +22 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp +13 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu_erf.comp +27 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu_quick.comp +11 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/gelu.comp +25 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/gelu_erf.comp +39 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/gelu_quick.comp +23 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/generic_binary_head.glsl +66 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/generic_head.glsl +11 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/generic_unary_head.glsl +83 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/get_rows.comp +42 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/get_rows_quant.comp +51 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/glu_head.glsl +19 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/glu_main.glsl +29 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/group_norm.comp +66 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/hardsigmoid.comp +22 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/hardswish.comp +22 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp +116 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/im2col_3d.comp +125 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/l2_norm.comp +44 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/leaky_relu.comp +22 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/log.comp +18 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul.comp +27 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_split_k_reduce.comp +48 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec.comp +169 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_base.glsl +230 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iface.glsl +35 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_m.comp +132 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_s.comp +95 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_s.comp +90 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xs.comp +105 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xxs.comp +87 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_s.comp +90 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_xxs.comp +88 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_nc.comp +124 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_p021.comp +156 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q2_k.comp +128 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q3_k.comp +132 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q4_k.comp +161 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q5_k.comp +165 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q6_k.comp +137 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vecq.comp +143 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vecq_funcs.glsl +494 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +464 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +624 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_funcs.glsl +606 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_id_funcs.glsl +74 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq.comp +311 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.glsl +454 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_shmem_types.glsl +78 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/multi_add.comp +195 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/neg.comp +20 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/norm.comp +44 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_adamw.comp +42 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_sgd.comp +22 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/pad.comp +64 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/pool2d.comp +74 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/quantize_q8_1.comp +145 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp +9 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/relu.comp +21 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/repeat.comp +26 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/repeat_back.comp +37 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +150 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_back.comp +55 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_partials.comp +65 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/roll.comp +46 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_funcs.glsl +207 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.glsl +20 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +17 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +17 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +17 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_params.glsl +33 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_vision.comp +17 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/round.comp +29 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/rte.glsl +5 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +24 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/sgn.comp +21 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/sigmoid.comp +20 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/silu.comp +22 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/silu_back.comp +26 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/sin.comp +17 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +195 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_back.comp +54 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large1.comp +62 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large2.comp +79 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large3.comp +65 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large_common.glsl +53 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/softplus.comp +23 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/solve_tri.comp +81 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/sqrt.comp +17 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/square.comp +17 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/ssm_conv.comp +50 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/ssm_scan.comp +124 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/step.comp +22 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/sub.comp +29 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.comp +47 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.glsl +25 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp +9 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/swiglu_oai.comp +14 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/tanh.comp +20 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/timestep_embedding.comp +42 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/topk_argsort.comp +118 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/topk_moe.comp +213 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/topk_nary_search.comp +246 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/tri.comp +43 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/trunc.comp +22 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/types.glsl +1784 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +178 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/utils.glsl +25 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +1318 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/wkv6.comp +87 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/wkv7.comp +91 -0
- package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/xielu.comp +35 -0
- package/cpp/hilum/CMakeLists.txt +58 -0
- package/cpp/hilum/hilum_llm.cpp +2151 -0
- package/cpp/hilum/hilum_llm.h +505 -0
- package/cpp/licenses/LICENSE-jsonhpp +21 -0
- package/cpp/mtmd/CMakeLists.txt +21 -12
- package/cpp/vendor/cpp-httplib/CMakeLists.txt +182 -0
- package/cpp/vendor/cpp-httplib/LICENSE +22 -0
- package/cpp/vendor/cpp-httplib/httplib.cpp +16164 -0
- package/cpp/vendor/cpp-httplib/httplib.h +3797 -0
- package/cpp/vendor/miniaudio/miniaudio.h +95747 -0
- package/ios/LocalLLM.h +5 -0
- package/ios/LocalLLM.mm +538 -612
- package/local-llm-rn.podspec +33 -7
- package/package.json +45 -6
- package/src/NativeLocalLLM.ts +31 -20
- package/src/cache.ts +129 -0
- package/src/device.ts +36 -10
- package/src/errors.ts +28 -0
- package/src/index.ts +10 -7
- package/src/local-llm.ts +147 -0
- package/src/model-manager.ts +82 -0
- package/src/native-bridge.ts +112 -9
- package/src/rn-downloader.ts +18 -4
- package/src/vision.ts +60 -0
- package/cpp/ggml/src/ggml-hexagon/CMakeLists.txt +0 -117
- package/cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp +0 -3232
- package/cpp/ggml/src/ggml-hexagon/htp/CMakeLists.txt +0 -45
- package/cpp/ggml/src/ggml-hexagon/htp/act-ops.c +0 -815
- package/cpp/ggml/src/ggml-hexagon/htp/argsort-ops.c +0 -281
- package/cpp/ggml/src/ggml-hexagon/htp/binary-ops.c +0 -827
- package/cpp/ggml/src/ggml-hexagon/htp/cmake-toolchain.cmake +0 -157
- package/cpp/ggml/src/ggml-hexagon/htp/cpy-ops.c +0 -251
- package/cpp/ggml/src/ggml-hexagon/htp/flash-attn-ops.c +0 -666
- package/cpp/ggml/src/ggml-hexagon/htp/get-rows-ops.c +0 -111
- package/cpp/ggml/src/ggml-hexagon/htp/hex-dma.c +0 -63
- package/cpp/ggml/src/ggml-hexagon/htp/hex-dma.h +0 -182
- package/cpp/ggml/src/ggml-hexagon/htp/hex-dump.h +0 -77
- package/cpp/ggml/src/ggml-hexagon/htp/hex-fastdiv.h +0 -37
- package/cpp/ggml/src/ggml-hexagon/htp/hex-utils.h +0 -51
- package/cpp/ggml/src/ggml-hexagon/htp/htp-ctx.h +0 -35
- package/cpp/ggml/src/ggml-hexagon/htp/htp-msg.h +0 -154
- package/cpp/ggml/src/ggml-hexagon/htp/htp-ops.h +0 -65
- package/cpp/ggml/src/ggml-hexagon/htp/htp_iface.idl +0 -16
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-arith.h +0 -470
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-base.h +0 -173
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-copy.h +0 -245
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-div.h +0 -116
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-dump.h +0 -129
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-exp.h +0 -215
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-floor.h +0 -100
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-inverse.h +0 -176
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-reduce.h +0 -266
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-scale.h +0 -133
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-sigmoid.h +0 -141
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-sqrt.h +0 -126
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-types.h +0 -36
- package/cpp/ggml/src/ggml-hexagon/htp/hvx-utils.h +0 -18
- package/cpp/ggml/src/ggml-hexagon/htp/main.c +0 -1150
- package/cpp/ggml/src/ggml-hexagon/htp/matmul-ops.c +0 -2595
- package/cpp/ggml/src/ggml-hexagon/htp/rope-ops.c +0 -498
- package/cpp/ggml/src/ggml-hexagon/htp/set-rows-ops.c +0 -167
- package/cpp/ggml/src/ggml-hexagon/htp/softmax-ops.c +0 -421
- package/cpp/ggml/src/ggml-hexagon/htp/sum-rows-ops.c +0 -130
- package/cpp/ggml/src/ggml-hexagon/htp/unary-ops.c +0 -384
- package/cpp/ggml/src/ggml-hexagon/htp/worker-pool.c +0 -293
- package/cpp/ggml/src/ggml-hexagon/htp/worker-pool.h +0 -57
- package/cpp/ggml/src/ggml-hexagon/htp-drv.cpp +0 -418
- package/cpp/ggml/src/ggml-hexagon/htp-drv.h +0 -121
- package/cpp/ggml/src/ggml-hexagon/libdl.h +0 -79
- package/cpp/ggml/src/ggml-hexagon/libggml-htp.inf +0 -38
- package/cpp/ggml/src/ggml-hexagon/op-desc.h +0 -153
- package/cpp/ggml/src/ggml-musa/CMakeLists.txt +0 -125
- package/cpp/ggml/src/ggml-musa/mudnn.cu +0 -112
- package/cpp/ggml/src/ggml-musa/mudnn.cuh +0 -12
- package/cpp/ggml/src/ggml-opencl/CMakeLists.txt +0 -150
- package/cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +0 -11553
- package/cpp/ggml/src/ggml-opencl/kernels/add.cl +0 -190
- package/cpp/ggml/src/ggml-opencl/kernels/add_id.cl +0 -42
- package/cpp/ggml/src/ggml-opencl/kernels/argsort.cl +0 -86
- package/cpp/ggml/src/ggml-opencl/kernels/clamp.cl +0 -20
- package/cpp/ggml/src/ggml-opencl/kernels/concat.cl +0 -51
- package/cpp/ggml/src/ggml-opencl/kernels/conv2d.cl +0 -185
- package/cpp/ggml/src/ggml-opencl/kernels/conv2d_f16_f32.cl +0 -176
- package/cpp/ggml/src/ggml-opencl/kernels/cpy.cl +0 -184
- package/cpp/ggml/src/ggml-opencl/kernels/cvt.cl +0 -417
- package/cpp/ggml/src/ggml-opencl/kernels/diag_mask_inf.cl +0 -58
- package/cpp/ggml/src/ggml-opencl/kernels/div.cl +0 -138
- package/cpp/ggml/src/ggml-opencl/kernels/embed_kernel.py +0 -26
- package/cpp/ggml/src/ggml-opencl/kernels/expm1.cl +0 -113
- package/cpp/ggml/src/ggml-opencl/kernels/fill.cl +0 -17
- package/cpp/ggml/src/ggml-opencl/kernels/flash_attn_f16.cl +0 -370
- package/cpp/ggml/src/ggml-opencl/kernels/flash_attn_f32.cl +0 -371
- package/cpp/ggml/src/ggml-opencl/kernels/flash_attn_f32_f16.cl +0 -373
- package/cpp/ggml/src/ggml-opencl/kernels/gelu.cl +0 -89
- package/cpp/ggml/src/ggml-opencl/kernels/gemm_moe_mxfp4_f32.cl +0 -162
- package/cpp/ggml/src/ggml-opencl/kernels/gemv_moe_mxfp4_f32.cl +0 -156
- package/cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle.cl +0 -268
- package/cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general.cl +0 -274
- package/cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general_q8_0_f32.cl +0 -195
- package/cpp/ggml/src/ggml-opencl/kernels/get_rows.cl +0 -187
- package/cpp/ggml/src/ggml-opencl/kernels/glu.cl +0 -378
- package/cpp/ggml/src/ggml-opencl/kernels/group_norm.cl +0 -121
- package/cpp/ggml/src/ggml-opencl/kernels/im2col_f16.cl +0 -57
- package/cpp/ggml/src/ggml-opencl/kernels/im2col_f32.cl +0 -57
- package/cpp/ggml/src/ggml-opencl/kernels/mean.cl +0 -140
- package/cpp/ggml/src/ggml-opencl/kernels/mul.cl +0 -152
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mat_Ab_Bi_8x4.cl +0 -139
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mat_f16_f32.cl +0 -130
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_kq_kqv.cl +0 -273
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_l4_lm.cl +0 -146
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_f32_f32_l4_lm.cl +0 -147
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q4_0_f32_l4_lm.cl +0 -163
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q4_1_f32_l4_lm.cl +0 -165
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q6_k_f32_l4_lm.cl +0 -158
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_8x4.cl +0 -129
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_l4_lm.cl +0 -154
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f16.cl +0 -118
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32.cl +0 -118
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_1row.cl +0 -94
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_l4.cl +0 -84
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f32_f32.cl +0 -118
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32.cl +0 -189
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32_flat.cl +0 -176
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q4_0_f32_8x_flat.cl +0 -283
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32.cl +0 -140
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32_flat.cl +0 -222
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32.cl +0 -144
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32_flat.cl +0 -167
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32.cl +0 -192
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_16x_flat.cl +0 -307
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_8x_flat.cl +0 -265
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_8x_flat.cl +0 -272
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_v.cl +0 -254
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32.cl +0 -219
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32_flat.cl +0 -229
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_k_f32.cl +0 -180
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q6_k_f32.cl +0 -194
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q6_k_f32_flat.cl +0 -194
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32.cl +0 -125
- package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32_flat.cl +0 -202
- package/cpp/ggml/src/ggml-opencl/kernels/norm.cl +0 -161
- package/cpp/ggml/src/ggml-opencl/kernels/pad.cl +0 -39
- package/cpp/ggml/src/ggml-opencl/kernels/relu.cl +0 -16
- package/cpp/ggml/src/ggml-opencl/kernels/repeat.cl +0 -38
- package/cpp/ggml/src/ggml-opencl/kernels/rms_norm.cl +0 -190
- package/cpp/ggml/src/ggml-opencl/kernels/rope.cl +0 -747
- package/cpp/ggml/src/ggml-opencl/kernels/scale.cl +0 -27
- package/cpp/ggml/src/ggml-opencl/kernels/set_rows.cl +0 -208
- package/cpp/ggml/src/ggml-opencl/kernels/sigmoid.cl +0 -29
- package/cpp/ggml/src/ggml-opencl/kernels/silu.cl +0 -30
- package/cpp/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +0 -108
- package/cpp/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +0 -108
- package/cpp/ggml/src/ggml-opencl/kernels/softmax_f16.cl +0 -107
- package/cpp/ggml/src/ggml-opencl/kernels/softmax_f32.cl +0 -107
- package/cpp/ggml/src/ggml-opencl/kernels/softplus.cl +0 -116
- package/cpp/ggml/src/ggml-opencl/kernels/solve_tri.cl +0 -51
- package/cpp/ggml/src/ggml-opencl/kernels/sqr.cl +0 -53
- package/cpp/ggml/src/ggml-opencl/kernels/sqrt.cl +0 -53
- package/cpp/ggml/src/ggml-opencl/kernels/ssm_conv.cl +0 -77
- package/cpp/ggml/src/ggml-opencl/kernels/sub.cl +0 -138
- package/cpp/ggml/src/ggml-opencl/kernels/sum_rows.cl +0 -140
- package/cpp/ggml/src/ggml-opencl/kernels/tanh.cl +0 -109
- package/cpp/ggml/src/ggml-opencl/kernels/transpose.cl +0 -117
- package/cpp/ggml/src/ggml-opencl/kernels/tri.cl +0 -32
- package/cpp/ggml/src/ggml-opencl/kernels/tsembd.cl +0 -48
- package/cpp/ggml/src/ggml-opencl/kernels/upscale.cl +0 -120
- package/cpp/ggml/src/ggml-rpc/CMakeLists.txt +0 -9
- package/cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +0 -2118
- package/cpp/ggml/src/ggml-virtgpu/CMakeLists.txt +0 -70
- package/cpp/ggml/src/ggml-virtgpu/apir_cs_ggml-rpc-front.cpp +0 -87
- package/cpp/ggml/src/ggml-virtgpu/backend/CMakeLists.txt +0 -21
- package/cpp/ggml/src/ggml-virtgpu/backend/apir_cs_ggml-rpc-back.cpp +0 -115
- package/cpp/ggml/src/ggml-virtgpu/backend/backend-convert.h +0 -13
- package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-backend.cpp +0 -102
- package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer-type.cpp +0 -105
- package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer.cpp +0 -179
- package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-device.cpp +0 -148
- package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched.cpp +0 -51
- package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched.gen.h +0 -73
- package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched.h +0 -27
- package/cpp/ggml/src/ggml-virtgpu/backend/backend-virgl-apir.h +0 -32
- package/cpp/ggml/src/ggml-virtgpu/backend/backend.cpp +0 -144
- package/cpp/ggml/src/ggml-virtgpu/backend/shared/api_remoting.h +0 -95
- package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_backend.gen.h +0 -94
- package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_backend.h +0 -50
- package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_cs.h +0 -378
- package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h +0 -232
- package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_cs_rpc.h +0 -58
- package/cpp/ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp +0 -81
- package/cpp/ggml/src/ggml-virtgpu/ggml-backend-buffer.cpp +0 -119
- package/cpp/ggml/src/ggml-virtgpu/ggml-backend-device.cpp +0 -158
- package/cpp/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp +0 -213
- package/cpp/ggml/src/ggml-virtgpu/ggml-backend.cpp +0 -69
- package/cpp/ggml/src/ggml-virtgpu/ggml-remoting.h +0 -71
- package/cpp/ggml/src/ggml-virtgpu/ggmlremoting_functions.yaml +0 -166
- package/cpp/ggml/src/ggml-virtgpu/include/apir_hw.h +0 -9
- package/cpp/ggml/src/ggml-virtgpu/regenerate_remoting.py +0 -333
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-apir.h +0 -15
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-backend.cpp +0 -58
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp +0 -110
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-buffer.cpp +0 -173
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-device.cpp +0 -192
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-impl.h +0 -36
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward.gen.h +0 -53
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-shm.cpp +0 -98
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-shm.h +0 -23
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-utils.cpp +0 -179
- package/cpp/ggml/src/ggml-virtgpu/virtgpu-utils.h +0 -86
- package/cpp/ggml/src/ggml-virtgpu/virtgpu.cpp +0 -544
- package/cpp/ggml/src/ggml-virtgpu/virtgpu.h +0 -117
- package/cpp/ggml/src/ggml-webgpu/CMakeLists.txt +0 -80
- package/cpp/ggml/src/ggml-webgpu/ggml-webgpu-shader-lib.hpp +0 -1231
- package/cpp/ggml/src/ggml-webgpu/ggml-webgpu.cpp +0 -3150
- package/cpp/ggml/src/ggml-webgpu/pre_wgsl.hpp +0 -778
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/argmax.wgsl +0 -72
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/argsort.wgsl +0 -106
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/argsort_merge.wgsl +0 -134
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/binary.wgsl +0 -107
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/common_decls.tmpl +0 -923
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/cpy.tmpl.wgsl +0 -107
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/cumsum.wgsl +0 -66
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +0 -182
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn.wgsl +0 -636
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/get_rows.wgsl +0 -668
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/glu.tmpl.wgsl +0 -323
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/memset.wgsl +0 -40
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.wgsl +0 -713
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_decls.tmpl +0 -103
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_reg_tile.wgsl +0 -138
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_subgroup_matrix.wgsl +0 -188
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec.wgsl +0 -194
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/pad.wgsl +0 -86
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm.wgsl +0 -123
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/rope.tmpl.wgsl +0 -295
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/scale.wgsl +0 -63
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.wgsl +0 -109
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/soft_max.tmpl.wgsl +0 -345
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/sum_rows.wgsl +0 -55
- package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/unary.wgsl +0 -193
- package/cpp/ggml/src/ggml-zdnn/CMakeLists.txt +0 -36
- package/cpp/ggml/src/ggml-zdnn/common.hpp +0 -59
- package/cpp/ggml/src/ggml-zdnn/ggml-zdnn.cpp +0 -633
- package/cpp/ggml/src/ggml-zdnn/mmf.cpp +0 -80
- package/cpp/ggml/src/ggml-zdnn/mmf.hpp +0 -12
- package/cpp/ggml/src/ggml-zdnn/utils.cpp +0 -79
- package/cpp/ggml/src/ggml-zdnn/utils.hpp +0 -19
- package/cpp/ggml/src/ggml-zendnn/CMakeLists.txt +0 -92
- package/cpp/ggml/src/ggml-zendnn/ggml-zendnn.cpp +0 -469
- package/cpp/mtmd/README.md +0 -63
- package/cpp/mtmd/legacy-models/convert_image_encoder_to_gguf.py +0 -412
- package/cpp/mtmd/legacy-models/glmedge-convert-image-encoder-to-gguf.py +0 -280
- package/cpp/mtmd/legacy-models/glmedge-surgery.py +0 -33
- package/cpp/mtmd/legacy-models/llava_surgery.py +0 -38
- package/cpp/mtmd/legacy-models/llava_surgery_v2.py +0 -180
- package/cpp/mtmd/legacy-models/minicpmv-convert-image-encoder-to-gguf.py +0 -892
- package/cpp/mtmd/legacy-models/minicpmv-surgery.py +0 -47
- package/cpp/mtmd/mtmd-cli.cpp +0 -437
- package/cpp/mtmd/requirements.txt +0 -5
- package/cpp/mtmd/test-1.jpeg +0 -0
- package/cpp/mtmd/test-2.mp3 +0 -0
- package/cpp/mtmd/tests.sh +0 -192
- package/src/download-adapter.ts +0 -17
- /package/cpp/common/jinja/{string.h → jinja_string.h} +0 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025-2026 Hilum Labs
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,321 @@
|
|
|
1
|
+
# local-llm-rn
|
|
2
|
+
|
|
3
|
+
Run LLMs on-device in React Native with Metal (iOS) and Vulkan (Android) GPU acceleration. Same OpenAI-compatible API as [`local-llm`](https://www.npmjs.com/package/local-llm).
|
|
4
|
+
|
|
5
|
+
[](https://www.npmjs.com/package/local-llm-rn)
|
|
6
|
+
[](https://opensource.org/licenses/MIT)
|
|
7
|
+

|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
npm install local-llm-rn
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
```typescript
|
|
14
|
+
import { LocalLLM } from 'local-llm-rn';
|
|
15
|
+
|
|
16
|
+
const ai = await LocalLLM.create({
|
|
17
|
+
model: 'TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf',
|
|
18
|
+
compute: 'gpu',
|
|
19
|
+
});
|
|
20
|
+
|
|
21
|
+
const response = await ai.chat.completions.create({
|
|
22
|
+
messages: [{ role: 'user', content: 'Hello!' }],
|
|
23
|
+
stream: true,
|
|
24
|
+
});
|
|
25
|
+
|
|
26
|
+
for await (const chunk of response) {
|
|
27
|
+
process.stdout.write(chunk.choices[0]?.delta?.content ?? '');
|
|
28
|
+
}
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
> Need to run on **Node.js** instead? Check out [`local-llm`](https://www.npmjs.com/package/local-llm) for macOS, Linux, and Windows.
|
|
32
|
+
|
|
33
|
+
## Why local-llm-rn?
|
|
34
|
+
|
|
35
|
+
- **On-device.** Models run entirely on the phone. No server, no API keys, no data leaves the device.
|
|
36
|
+
- **GPU accelerated.** Metal on iOS, Vulkan on Android. Not just CPU inference.
|
|
37
|
+
- **OpenAI-compatible API.** Same `chat.completions.create()` you already know from `local-llm` and OpenAI.
|
|
38
|
+
- **Device-aware.** Built-in helpers to check RAM, recommend quantization, and prevent OOM crashes.
|
|
39
|
+
- **Auto download.** Pass a HuggingFace URL, models are downloaded and cached on-device automatically.
|
|
40
|
+
- **Speculative decoding.** Use a small draft model for 2-3x faster generation with zero quality loss.
|
|
41
|
+
|
|
42
|
+
## Platform Support
|
|
43
|
+
|
|
44
|
+
| Platform | GPU Backend | Min Version | Notes |
|
|
45
|
+
|---|---|---|---|
|
|
46
|
+
| iOS | Metal | iOS 16+ | BF16 + Accelerate BLAS |
|
|
47
|
+
| Android | Vulkan | Android 8+ (API 26) | CPU fallback on devices without Vulkan |
|
|
48
|
+
|
|
49
|
+
### Tested Compatibility
|
|
50
|
+
|
|
51
|
+
| | Versions |
|
|
52
|
+
|---|---|
|
|
53
|
+
| React Native | 0.76 - 0.83 |
|
|
54
|
+
| Expo SDK | 53 - 55 |
|
|
55
|
+
| Xcode | 15+ |
|
|
56
|
+
| NDK | 27.x |
|
|
57
|
+
| CMake | 3.22.1+ |
|
|
58
|
+
|
|
59
|
+
## Setup
|
|
60
|
+
|
|
61
|
+
### Expo (recommended)
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
npm install local-llm-rn
|
|
65
|
+
npx expo prebuild
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
### Bare React Native
|
|
69
|
+
|
|
70
|
+
```bash
|
|
71
|
+
npm install local-llm-rn
|
|
72
|
+
cd ios && pod install
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
Requires React Native 0.76+ (New Architecture / Turbo Modules).
|
|
76
|
+
Examples and CI are pinned to React Native 0.83 / Expo SDK 55.
|
|
77
|
+
Examples target iOS 16.0 and Android SDK levels compatible with the native module.
|
|
78
|
+
|
|
79
|
+
> **Note:** `local-llm-rn` ships raw TypeScript source (`src/index.ts`) — no pre-compiled JS. This is intentional: Metro (the React Native bundler) handles TypeScript natively, and shipping `.ts` gives consumers full source maps, accurate go-to-definition, and smaller npm tarballs. This package is designed exclusively for the React Native / Metro ecosystem.
|
|
80
|
+
|
|
81
|
+
## Quick Start
|
|
82
|
+
|
|
83
|
+
### 1. Check device capabilities
|
|
84
|
+
|
|
85
|
+
Before loading a model, check if the device can handle it:
|
|
86
|
+
|
|
87
|
+
```typescript
|
|
88
|
+
import { canRunModel, getDeviceCapabilities, recommendQuantization } from 'local-llm-rn';
|
|
89
|
+
|
|
90
|
+
const caps = getDeviceCapabilities();
|
|
91
|
+
console.log(caps.gpuName); // "Apple A16 GPU"
|
|
92
|
+
console.log(caps.totalRAM); // 6442450944 (6 GB)
|
|
93
|
+
console.log(caps.metalFamily); // 9 (A17+)
|
|
94
|
+
|
|
95
|
+
const quant = recommendQuantization();
|
|
96
|
+
console.log(quant); // "Q6_K"
|
|
97
|
+
|
|
98
|
+
const check = canRunModel(1_800_000_000); // 1.8 GB model
|
|
99
|
+
if (!check.canRun) {
|
|
100
|
+
console.warn(check.reason); // "Model needs ~2160 MB but only 1500 MB available"
|
|
101
|
+
console.warn(check.suggestion); // "Try a Q4_K_M quantized variant or a smaller model"
|
|
102
|
+
}
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
### 2. Load a model
|
|
106
|
+
|
|
107
|
+
```typescript
|
|
108
|
+
import { LocalLLM } from 'local-llm-rn';
|
|
109
|
+
|
|
110
|
+
const ai = await LocalLLM.create({
|
|
111
|
+
model: 'TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf',
|
|
112
|
+
compute: 'gpu',
|
|
113
|
+
contextSize: 2048,
|
|
114
|
+
onProgress: (pct) => console.log(`Downloading: ${pct.toFixed(1)}%`),
|
|
115
|
+
});
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
### 3. Chat with streaming
|
|
119
|
+
|
|
120
|
+
```typescript
|
|
121
|
+
const response = await ai.chat.completions.create({
|
|
122
|
+
messages: [
|
|
123
|
+
{ role: 'system', content: 'You are a helpful assistant.' },
|
|
124
|
+
{ role: 'user', content: 'What is the capital of France?' },
|
|
125
|
+
],
|
|
126
|
+
stream: true,
|
|
127
|
+
});
|
|
128
|
+
|
|
129
|
+
let text = '';
|
|
130
|
+
for await (const chunk of response) {
|
|
131
|
+
text += chunk.choices[0]?.delta?.content ?? '';
|
|
132
|
+
// Update your UI here
|
|
133
|
+
}
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
### 4. Check performance
|
|
137
|
+
|
|
138
|
+
Every response includes inference speed metrics:
|
|
139
|
+
|
|
140
|
+
```typescript
|
|
141
|
+
console.log(`Speed: ${response._timing?.generatedTokensPerSec.toFixed(1)} tok/s`);
|
|
142
|
+
console.log(`TTFT: ${response._timing?.promptEvalMs.toFixed(0)} ms`);
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
When streaming, `_timing` is on the final chunk:
|
|
146
|
+
|
|
147
|
+
```typescript
|
|
148
|
+
for await (const chunk of response) {
|
|
149
|
+
const content = chunk.choices[0]?.delta?.content;
|
|
150
|
+
if (content) setText((t) => t + content);
|
|
151
|
+
if (chunk._timing) {
|
|
152
|
+
console.log(`Generation: ${chunk._timing.generatedTokensPerSec.toFixed(1)} tok/s`);
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
### 5. Clean up
|
|
158
|
+
|
|
159
|
+
```typescript
|
|
160
|
+
ai.dispose();
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
## Recommended Models
|
|
164
|
+
|
|
165
|
+
| Model | Quant | Size | Good for |
|
|
166
|
+
|---|---|---|---|
|
|
167
|
+
| [SmolLM2 1.7B](https://huggingface.co/bartowski/SmolLM2-1.7B-Instruct-GGUF) | Q4_K_M | ~1.0 GB | Fast, works on all devices |
|
|
168
|
+
| [TinyLlama 1.1B](https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF) | Q4_K_M | ~636 MB | Testing, development |
|
|
169
|
+
| [Llama 3.2 3B](https://huggingface.co/bartowski/Llama-3.2-3B-Instruct-GGUF) | Q4_K_M | ~1.8 GB | Best quality for flagship phones |
|
|
170
|
+
| [Phi-3 Mini](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf) | Q4_K_M | ~2.2 GB | Great balance of speed and quality |
|
|
171
|
+
|
|
172
|
+
**Quantization guide by device RAM:**
|
|
173
|
+
|
|
174
|
+
| Device RAM | Recommended | Examples |
|
|
175
|
+
|---|---|---|
|
|
176
|
+
| 8 GB | Q8_0 | iPhone 16 Pro |
|
|
177
|
+
| 6 GB | Q6_K | iPhone 14/15 Pro |
|
|
178
|
+
| 4 GB | Q4_K_M | iPhone 11-13, iPhone 14/15 base |
|
|
179
|
+
| 3 GB | Q3_K_S | iPhone X, older devices |
|
|
180
|
+
|
|
181
|
+
## Device Helpers API
|
|
182
|
+
|
|
183
|
+
```typescript
|
|
184
|
+
import { getDeviceCapabilities, canRunModel, recommendQuantization } from 'local-llm-rn';
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
### `getDeviceCapabilities()`
|
|
188
|
+
|
|
189
|
+
Returns device hardware info:
|
|
190
|
+
|
|
191
|
+
```typescript
|
|
192
|
+
{
|
|
193
|
+
totalRAM: number; // Total RAM in bytes
|
|
194
|
+
availableRAM: number; // Available RAM (respects iOS jetsam limits)
|
|
195
|
+
gpuName: string; // e.g. "Apple A16 GPU"
|
|
196
|
+
metalFamily: number; // Apple GPU family (5=A12+, 7=A14+, 9=A17+)
|
|
197
|
+
metalVersion: number; // Metal version (1, 2, or 3)
|
|
198
|
+
iosVersion: string; // e.g. "17.2.1"
|
|
199
|
+
isLowPowerMode: boolean;
|
|
200
|
+
}
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
### `canRunModel(modelSizeBytes)`
|
|
204
|
+
|
|
205
|
+
Checks if the device has enough RAM to run a model:
|
|
206
|
+
|
|
207
|
+
```typescript
|
|
208
|
+
const result = canRunModel(1_800_000_000);
|
|
209
|
+
// { canRun: true }
|
|
210
|
+
// or { canRun: false, reason: "...", suggestion: "..." }
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
### `recommendQuantization()`
|
|
214
|
+
|
|
215
|
+
Suggests the best quantization level based on device RAM:
|
|
216
|
+
|
|
217
|
+
```typescript
|
|
218
|
+
const quant = recommendQuantization();
|
|
219
|
+
// "Q8_0" | "Q6_K" | "Q4_K_M" | "Q3_K_S"
|
|
220
|
+
```
|
|
221
|
+
|
|
222
|
+
## Configuration
|
|
223
|
+
|
|
224
|
+
```typescript
|
|
225
|
+
const ai = await LocalLLM.create({
|
|
226
|
+
model: 'user/repo/file.gguf', // HuggingFace shorthand or local path
|
|
227
|
+
|
|
228
|
+
compute: 'gpu', // 'gpu' | 'cpu' | 'auto'
|
|
229
|
+
contextSize: 2048, // Context window size
|
|
230
|
+
batchSize: 512, // Batch size for prompt processing
|
|
231
|
+
|
|
232
|
+
warmup: true, // Warmup on load — eliminates cold-start (default: true)
|
|
233
|
+
|
|
234
|
+
// Speculative decoding (optional — 2-3x faster generation)
|
|
235
|
+
// draftModel: 'user/repo/small-model.gguf', // Small model from same family
|
|
236
|
+
// draftNMax: 16, // Max draft tokens per step
|
|
237
|
+
|
|
238
|
+
onProgress: (pct) => {}, // Download progress callback (0-100)
|
|
239
|
+
});
|
|
240
|
+
```
|
|
241
|
+
|
|
242
|
+
## Error Handling
|
|
243
|
+
|
|
244
|
+
All errors thrown by `local-llm-rn` are instances of `LocalLLMError` with a typed `code` property:
|
|
245
|
+
|
|
246
|
+
```typescript
|
|
247
|
+
import { LocalLLMError, LocalLLMErrorCode } from 'local-llm-rn';
|
|
248
|
+
|
|
249
|
+
try {
|
|
250
|
+
const ai = await LocalLLM.create({ model: 'user/repo/model.gguf' });
|
|
251
|
+
} catch (e) {
|
|
252
|
+
if (e instanceof LocalLLMError) {
|
|
253
|
+
switch (e.code) {
|
|
254
|
+
case LocalLLMErrorCode.MODEL_LOAD_FAILED:
|
|
255
|
+
// Handle model loading failure
|
|
256
|
+
break;
|
|
257
|
+
case LocalLLMErrorCode.DOWNLOAD_FAILED:
|
|
258
|
+
// Handle download failure
|
|
259
|
+
break;
|
|
260
|
+
case LocalLLMErrorCode.INSUFFICIENT_MEMORY:
|
|
261
|
+
// Suggest a smaller model
|
|
262
|
+
break;
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
```
|
|
267
|
+
|
|
268
|
+
Available error codes: `MODEL_LOAD_FAILED`, `MODEL_TOO_LARGE`, `CONTEXT_CREATE_FAILED`, `CONTEXT_EXHAUSTED`, `INFERENCE_FAILED`, `STREAM_FAILED`, `DOWNLOAD_FAILED`, `DOWNLOAD_INTEGRITY_MISMATCH`, `VISION_FAILED`, `VISION_FETCH_FAILED`, `EMBEDDING_FAILED`, `NOT_INITIALIZED`, `INVALID_PATH`, `CACHE_CORRUPT`, `QUANTIZE_FAILED`, `INSUFFICIENT_MEMORY`.
|
|
269
|
+
|
|
270
|
+
## Device + Performance Combo
|
|
271
|
+
|
|
272
|
+
Combine device capabilities with inference metrics:
|
|
273
|
+
|
|
274
|
+
```typescript
|
|
275
|
+
import { LocalLLM, getDeviceCapabilities } from 'local-llm-rn';
|
|
276
|
+
|
|
277
|
+
const caps = getDeviceCapabilities();
|
|
278
|
+
console.log(`Device: ${caps.gpuName}, ${(caps.totalRAM / 1e9).toFixed(1)} GB RAM`);
|
|
279
|
+
|
|
280
|
+
const ai = await LocalLLM.create({
|
|
281
|
+
model: modelPath,
|
|
282
|
+
compute: 'gpu',
|
|
283
|
+
});
|
|
284
|
+
|
|
285
|
+
const response = await ai.chat.completions.create({
|
|
286
|
+
messages: [{ role: 'user', content: 'Hello!' }],
|
|
287
|
+
});
|
|
288
|
+
|
|
289
|
+
console.log(response.choices[0].message.content);
|
|
290
|
+
console.log(`Speed: ${response._timing?.generatedTokensPerSec.toFixed(1)} tok/s on ${caps.gpuName}`);
|
|
291
|
+
|
|
292
|
+
ai.dispose();
|
|
293
|
+
```
|
|
294
|
+
|
|
295
|
+
## Examples
|
|
296
|
+
|
|
297
|
+
- **[Expo example](./examples/expo-test/)** — Complete chat UI with device detection, model downloading, and streaming responses
|
|
298
|
+
- **[Bare RN example](./examples/react-native-test/)** — Minimal bare React Native test app
|
|
299
|
+
|
|
300
|
+
## Ecosystem
|
|
301
|
+
|
|
302
|
+
| Package | Description | Install |
|
|
303
|
+
|---|---|---|
|
|
304
|
+
| [`local-llm`](https://www.npmjs.com/package/local-llm) | Node.js / Bun / Electron | `npm install local-llm` |
|
|
305
|
+
| [`local-llm-rn`](https://www.npmjs.com/package/local-llm-rn) | React Native / Expo (this package) | `npm install local-llm-rn` |
|
|
306
|
+
| [`local_llm`](https://pub.dev/packages/local_llm) | Flutter | `flutter pub add local_llm` |
|
|
307
|
+
| [`hilum-local-llm-engine`](https://github.com/hilum-labs/hilum-local-llm-engine) | Core C++ engine | Vendored automatically |
|
|
308
|
+
|
|
309
|
+
## Contributing
|
|
310
|
+
|
|
311
|
+
We welcome contributions! See [CONTRIBUTING.md](./CONTRIBUTING.md) for setup instructions.
|
|
312
|
+
|
|
313
|
+
## Contact
|
|
314
|
+
|
|
315
|
+
Questions, feedback, or partnership inquiries: [info@hilumlabs.com](mailto:info@hilumlabs.com)
|
|
316
|
+
|
|
317
|
+
## License
|
|
318
|
+
|
|
319
|
+
MIT — See [LICENSE](./LICENSE) for details.
|
|
320
|
+
|
|
321
|
+
Made by [Hilum Labs](https://github.com/hilum-labs).
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
import org.jetbrains.kotlin.gradle.dsl.KotlinAndroidProjectExtension
|
|
2
|
+
import java.util.Properties
|
|
3
|
+
|
|
4
|
+
buildscript {
|
|
5
|
+
repositories { mavenCentral(); google() }
|
|
6
|
+
}
|
|
7
|
+
|
|
8
|
+
fun resolveAndroidSdkDir(project: Project): File? {
|
|
9
|
+
val localProperties = project.rootProject.file("local.properties")
|
|
10
|
+
if (localProperties.exists()) {
|
|
11
|
+
val properties = Properties()
|
|
12
|
+
localProperties.inputStream().use(properties::load)
|
|
13
|
+
properties.getProperty("sdk.dir")?.let { return File(it) }
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
return sequenceOf("ANDROID_SDK_ROOT", "ANDROID_HOME")
|
|
17
|
+
.mapNotNull { System.getenv(it) }
|
|
18
|
+
.map(::File)
|
|
19
|
+
.firstOrNull(File::exists)
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
fun ensureCmake(project: Project, version: String) {
|
|
23
|
+
val sdkDir = resolveAndroidSdkDir(project) ?: return
|
|
24
|
+
val cmakeDir = sdkDir.resolve("cmake/$version")
|
|
25
|
+
if (cmakeDir.exists()) {
|
|
26
|
+
println("local-llm-rn: CMake $version found at ${cmakeDir.absolutePath}")
|
|
27
|
+
return
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
// Locate sdkmanager
|
|
31
|
+
val sdkmanager = sequenceOf(
|
|
32
|
+
sdkDir.resolve("cmdline-tools/latest/bin/sdkmanager"),
|
|
33
|
+
sdkDir.resolve("cmdline-tools/bin/sdkmanager"),
|
|
34
|
+
sdkDir.resolve("tools/bin/sdkmanager"),
|
|
35
|
+
).firstOrNull { it.exists() } ?: return
|
|
36
|
+
|
|
37
|
+
println("local-llm-rn: Installing CMake $version via sdkmanager…")
|
|
38
|
+
val process = ProcessBuilder(sdkmanager.absolutePath, "cmake;$version")
|
|
39
|
+
.redirectErrorStream(true)
|
|
40
|
+
.start()
|
|
41
|
+
process.inputStream.bufferedReader().forEachLine { println(it) }
|
|
42
|
+
val exitCode = process.waitFor()
|
|
43
|
+
if (exitCode != 0) {
|
|
44
|
+
println("WARNING: sdkmanager exited with code $exitCode — CMake $version may not be installed")
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
fun resolveGlslc(project: Project): String? {
|
|
49
|
+
val executableName = if (System.getProperty("os.name").startsWith("Windows")) "glslc.exe" else "glslc"
|
|
50
|
+
val sdkDir = resolveAndroidSdkDir(project)
|
|
51
|
+
|
|
52
|
+
val ndkRoots = buildList {
|
|
53
|
+
sequenceOf("ANDROID_NDK_ROOT", "ANDROID_NDK_HOME")
|
|
54
|
+
.mapNotNull { System.getenv(it) }
|
|
55
|
+
.map(::File)
|
|
56
|
+
.filter(File::exists)
|
|
57
|
+
.forEach(::add)
|
|
58
|
+
|
|
59
|
+
sdkDir?.resolve("ndk")?.listFiles()
|
|
60
|
+
?.sortedByDescending { it.name }
|
|
61
|
+
?.forEach(::add)
|
|
62
|
+
|
|
63
|
+
sdkDir?.resolve("ndk-bundle")
|
|
64
|
+
?.takeIf(File::exists)
|
|
65
|
+
?.let(::add)
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
return ndkRoots.asSequence()
|
|
69
|
+
.map { it.resolve("shader-tools") }
|
|
70
|
+
.filter(File::exists)
|
|
71
|
+
.flatMap { shaderTools ->
|
|
72
|
+
shaderTools.listFiles()
|
|
73
|
+
?.asSequence()
|
|
74
|
+
?.map { it.resolve(executableName) }
|
|
75
|
+
?: emptySequence()
|
|
76
|
+
}
|
|
77
|
+
.firstOrNull(File::exists)
|
|
78
|
+
?.absolutePath
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
plugins {
|
|
82
|
+
id("com.android.library")
|
|
83
|
+
id("org.jetbrains.kotlin.android")
|
|
84
|
+
id("com.facebook.react")
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
react {
|
|
88
|
+
root = file("..")
|
|
89
|
+
reactNativeDir = file("../../react-native")
|
|
90
|
+
codegenDir = file("../../@react-native/codegen")
|
|
91
|
+
cliFile = file("../../react-native/cli.js")
|
|
92
|
+
jsRootDir = file("../src")
|
|
93
|
+
libraryName = "LocalLLMSpec"
|
|
94
|
+
codegenJavaPackageName = "com.hilum.localllm"
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
// Auto-install CMake 4.1.2 if missing (plug-and-play for consumers).
|
|
98
|
+
ensureCmake(project, "4.1.2")
|
|
99
|
+
|
|
100
|
+
android {
|
|
101
|
+
namespace = "com.hilum.localllm"
|
|
102
|
+
compileSdk = 35
|
|
103
|
+
|
|
104
|
+
defaultConfig {
|
|
105
|
+
// API 29 = Android 10+. Vulkan 1.1 (required by the engine) is available
|
|
106
|
+
// from API 29. Devices without Vulkan fall back to CPU inference.
|
|
107
|
+
minSdk = 29
|
|
108
|
+
ndk { abiFilters += listOf("arm64-v8a") }
|
|
109
|
+
|
|
110
|
+
externalNativeBuild {
|
|
111
|
+
cmake {
|
|
112
|
+
// Resolve glslc from the Android SDK/NDK installation without
|
|
113
|
+
// relying on AGP's ndkDirectory during library configuration.
|
|
114
|
+
val glslc = resolveGlslc(project)
|
|
115
|
+
|
|
116
|
+
arguments += listOfNotNull(
|
|
117
|
+
"-Wno-dev",
|
|
118
|
+
"-DCMAKE_BUILD_TYPE=Release",
|
|
119
|
+
"-DBUILD_SHARED_LIBS=ON",
|
|
120
|
+
"-DLLAMA_BUILD_COMMON=ON",
|
|
121
|
+
"-DLLAMA_OPENSSL=OFF",
|
|
122
|
+
// CPU variant dispatch (2-4x speedup on modern ARM)
|
|
123
|
+
"-DGGML_NATIVE=OFF",
|
|
124
|
+
"-DGGML_BACKEND_DL=ON",
|
|
125
|
+
"-DGGML_CPU_ALL_VARIANTS=ON",
|
|
126
|
+
"-DGGML_LLAMAFILE=OFF",
|
|
127
|
+
// Vulkan GPU + Adreno optimizations
|
|
128
|
+
glslc?.let { "-DVulkan_GLSLC_EXECUTABLE=$it" },
|
|
129
|
+
"-DGGML_VULKAN=ON",
|
|
130
|
+
"-DGGML_VULKAN_VMA=ON",
|
|
131
|
+
"-DGGML_VULKAN_BUILD_ADRENO_SHADERS=ON",
|
|
132
|
+
// Disable unneeded targets
|
|
133
|
+
"-DLLAMA_BUILD_TOOLS=OFF",
|
|
134
|
+
"-DLLAMA_BUILD_TESTS=OFF",
|
|
135
|
+
"-DLLAMA_BUILD_EXAMPLES=OFF",
|
|
136
|
+
"-DLLAMA_BUILD_SERVER=OFF",
|
|
137
|
+
)
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
externalNativeBuild {
|
|
143
|
+
cmake {
|
|
144
|
+
path("src/main/cpp/CMakeLists.txt")
|
|
145
|
+
version = "4.1.2"
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
buildTypes {
|
|
150
|
+
release {
|
|
151
|
+
isMinifyEnabled = false
|
|
152
|
+
proguardFiles(getDefaultProguardFile("proguard-android-optimize.txt"), "proguard-rules.pro")
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
compileOptions {
|
|
157
|
+
sourceCompatibility = JavaVersion.VERSION_17
|
|
158
|
+
targetCompatibility = JavaVersion.VERSION_17
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
configure<KotlinAndroidProjectExtension> {
|
|
164
|
+
jvmToolchain(17)
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
dependencies {
|
|
168
|
+
implementation("com.facebook.react:react-android")
|
|
169
|
+
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# Keep all JNI-facing classes and methods
|
|
2
|
+
-keep class com.hilum.localllm.** { *; }
|
|
3
|
+
|
|
4
|
+
# Keep native methods from being stripped
|
|
5
|
+
-keepclassmembers class com.hilum.localllm.LocalLLMModule {
|
|
6
|
+
native <methods>;
|
|
7
|
+
void emitToken(...);
|
|
8
|
+
void emitBatchToken(...);
|
|
9
|
+
void emitDownloadProgress(...);
|
|
10
|
+
void emitDownloadComplete(...);
|
|
11
|
+
void emitDownloadError(...);
|
|
12
|
+
void emitQuantizeComplete(...);
|
|
13
|
+
void emitLog(...);
|
|
14
|
+
}
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
cmake_minimum_required(VERSION 3.22.1...4.1)
|
|
2
|
+
project("local-llm-rn" LANGUAGES C CXX)
|
|
3
|
+
|
|
4
|
+
set(CMAKE_CXX_STANDARD 17)
|
|
5
|
+
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
|
6
|
+
|
|
7
|
+
# Windows/MSVC: suppress high-volume conversion/sign warnings from vendor code.
|
|
8
|
+
if(MSVC)
|
|
9
|
+
add_compile_options(
|
|
10
|
+
/wd4018 # signed/unsigned mismatch
|
|
11
|
+
/wd4101 # unreferenced local variable
|
|
12
|
+
/wd4244 # narrowing conversion
|
|
13
|
+
/wd4267 # size_t -> smaller type
|
|
14
|
+
/wd4305 # truncation to smaller floating type
|
|
15
|
+
)
|
|
16
|
+
endif()
|
|
17
|
+
|
|
18
|
+
# ABI-specific settings (from engine's llama.android example)
|
|
19
|
+
if(DEFINED ANDROID_ABI)
|
|
20
|
+
if(ANDROID_ABI STREQUAL "arm64-v8a")
|
|
21
|
+
set(GGML_SYSTEM_ARCH "ARM")
|
|
22
|
+
set(GGML_CPU_KLEIDIAI ON)
|
|
23
|
+
set(GGML_OPENMP ON)
|
|
24
|
+
elseif(ANDROID_ABI STREQUAL "x86_64")
|
|
25
|
+
set(GGML_SYSTEM_ARCH "x86")
|
|
26
|
+
set(GGML_CPU_KLEIDIAI OFF)
|
|
27
|
+
set(GGML_OPENMP OFF)
|
|
28
|
+
endif()
|
|
29
|
+
endif()
|
|
30
|
+
|
|
31
|
+
# Engine source resolution:
|
|
32
|
+
# - published package / prepared repo: repo-root cpp/
|
|
33
|
+
# - local development fallback: vendor/hilum-local-llm-engine/
|
|
34
|
+
set(LLAMA_SRC_PACKAGE ${CMAKE_CURRENT_LIST_DIR}/../../../../cpp)
|
|
35
|
+
set(LLAMA_SRC_VENDOR ${CMAKE_CURRENT_LIST_DIR}/../../../../vendor/hilum-local-llm-engine)
|
|
36
|
+
|
|
37
|
+
if(EXISTS ${LLAMA_SRC_PACKAGE}/CMakeLists.txt)
|
|
38
|
+
set(LLAMA_SRC ${LLAMA_SRC_PACKAGE})
|
|
39
|
+
elseif(EXISTS ${LLAMA_SRC_VENDOR}/CMakeLists.txt)
|
|
40
|
+
set(LLAMA_SRC ${LLAMA_SRC_VENDOR})
|
|
41
|
+
else()
|
|
42
|
+
message(FATAL_ERROR
|
|
43
|
+
"Engine source not found. Expected either ${LLAMA_SRC_PACKAGE} or ${LLAMA_SRC_VENDOR} "
|
|
44
|
+
"to contain CMakeLists.txt. Run scripts/prepare.sh before building from the repo root.")
|
|
45
|
+
endif()
|
|
46
|
+
|
|
47
|
+
set(HILUM_BUILD_LIB ON CACHE BOOL "" FORCE)
|
|
48
|
+
add_subdirectory(${LLAMA_SRC} build-llama)
|
|
49
|
+
|
|
50
|
+
add_library(${CMAKE_PROJECT_NAME} SHARED LocalLLM.cpp)
|
|
51
|
+
|
|
52
|
+
# Include both dev paths (tools/mtmd) and published paths (mtmd)
|
|
53
|
+
target_include_directories(${CMAKE_PROJECT_NAME} PRIVATE
|
|
54
|
+
${LLAMA_SRC}/include
|
|
55
|
+
${LLAMA_SRC}/ggml/include
|
|
56
|
+
${LLAMA_SRC}/ggml/src
|
|
57
|
+
${LLAMA_SRC}/src
|
|
58
|
+
${LLAMA_SRC}/common
|
|
59
|
+
${LLAMA_SRC}/hilum
|
|
60
|
+
${LLAMA_SRC}/tools/mtmd
|
|
61
|
+
${LLAMA_SRC}/mtmd
|
|
62
|
+
${LLAMA_SRC}/vendor
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
target_link_libraries(${CMAKE_PROJECT_NAME}
|
|
66
|
+
hilum
|
|
67
|
+
llama
|
|
68
|
+
common
|
|
69
|
+
android
|
|
70
|
+
log
|
|
71
|
+
)
|