local-llm-rn 1.0.0 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (477) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +321 -0
  3. package/android/build.gradle.kts +169 -0
  4. package/android/proguard-rules.pro +14 -0
  5. package/android/src/main/AndroidManifest.xml +2 -0
  6. package/android/src/main/cpp/CMakeLists.txt +71 -0
  7. package/android/src/main/cpp/LocalLLM.cpp +1342 -0
  8. package/android/src/main/java/com/hilum/localllm/LocalLLMModule.kt +627 -0
  9. package/android/src/main/java/com/hilum/localllm/LocalLLMPackage.kt +26 -0
  10. package/cpp/CMakeLists.txt +10 -0
  11. package/cpp/LICENSE +21 -0
  12. package/cpp/cmake/arm64-apple-clang.cmake +16 -0
  13. package/cpp/cmake/arm64-windows-llvm.cmake +16 -0
  14. package/cpp/cmake/build-info.cmake +48 -0
  15. package/cpp/cmake/common.cmake +58 -0
  16. package/cpp/cmake/download-models.cmake +21 -0
  17. package/cpp/cmake/git-vars.cmake +22 -0
  18. package/cpp/cmake/license.cmake +40 -0
  19. package/cpp/cmake/llama-config.cmake.in +30 -0
  20. package/cpp/cmake/llama.pc.in +10 -0
  21. package/cpp/cmake/riscv64-spacemit-linux-gnu-gcc.cmake +29 -0
  22. package/cpp/cmake/x64-windows-llvm.cmake +5 -0
  23. package/cpp/common/CMakeLists.txt +1 -1
  24. package/cpp/common/build-info.cpp +4 -0
  25. package/cpp/common/jinja/README.md +1 -1
  26. package/cpp/common/jinja/string.cpp +1 -1
  27. package/cpp/common/jinja/value.h +1 -1
  28. package/cpp/ggml/src/ggml-cpu/CMakeLists.txt +49 -39
  29. package/cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +21 -20
  30. package/cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +965 -252
  31. package/cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +15 -1
  32. package/cpp/ggml/src/ggml-vulkan/CMakeLists.txt +265 -0
  33. package/cpp/ggml/src/ggml-vulkan/cmake/host-toolchain.cmake.in +16 -0
  34. package/cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +16886 -0
  35. package/cpp/ggml/src/ggml-vulkan/vma/vk_mem_alloc.h +19530 -0
  36. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +35 -0
  37. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/abs.comp +21 -0
  38. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/acc.comp +37 -0
  39. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/add.comp +69 -0
  40. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/add1.comp +28 -0
  41. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/add_id.comp +42 -0
  42. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/arange.comp +20 -0
  43. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/argmax.comp +60 -0
  44. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/argsort.comp +86 -0
  45. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/argsort_large.comp +114 -0
  46. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/ceil.comp +22 -0
  47. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/clamp.comp +17 -0
  48. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/concat.comp +41 -0
  49. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/contig_copy.comp +49 -0
  50. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_dw.comp +105 -0
  51. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_mm.comp +347 -0
  52. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/conv_transpose_1d.comp +98 -0
  53. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy.comp +23 -0
  54. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy_from_quant.comp +51 -0
  55. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +296 -0
  56. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy_transpose.comp +67 -0
  57. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/cos.comp +17 -0
  58. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/count_equal.comp +31 -0
  59. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/count_experts.comp +51 -0
  60. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/cumsum.comp +83 -0
  61. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/cumsum_multipass1.comp +60 -0
  62. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/cumsum_multipass2.comp +66 -0
  63. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_f32.comp +20 -0
  64. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs.glsl +610 -0
  65. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs_cm2.glsl +734 -0
  66. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_head.glsl +13 -0
  67. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp +42 -0
  68. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_s.comp +35 -0
  69. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_s.comp +44 -0
  70. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xs.comp +43 -0
  71. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xxs.comp +49 -0
  72. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_s.comp +40 -0
  73. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_xxs.comp +51 -0
  74. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_nl.comp +32 -0
  75. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_xs.comp +34 -0
  76. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_mxfp4.comp +32 -0
  77. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp +34 -0
  78. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp +42 -0
  79. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_0.comp +30 -0
  80. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_1.comp +32 -0
  81. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp +68 -0
  82. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_0.comp +34 -0
  83. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_1.comp +35 -0
  84. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp +70 -0
  85. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp +33 -0
  86. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q8_0.comp +31 -0
  87. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/diag.comp +29 -0
  88. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/diag_mask_inf.comp +34 -0
  89. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/div.comp +27 -0
  90. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/exp.comp +21 -0
  91. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/feature-tests/bfloat16.comp +7 -0
  92. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/feature-tests/coopmat.comp +7 -0
  93. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/feature-tests/coopmat2.comp +7 -0
  94. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/feature-tests/integer_dot.comp +7 -0
  95. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/fill.comp +19 -0
  96. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +608 -0
  97. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.glsl +264 -0
  98. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +642 -0
  99. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +390 -0
  100. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_mask_opt.comp +162 -0
  101. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +121 -0
  102. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/floor.comp +22 -0
  103. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp +13 -0
  104. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu_erf.comp +27 -0
  105. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu_quick.comp +11 -0
  106. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/gelu.comp +25 -0
  107. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/gelu_erf.comp +39 -0
  108. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/gelu_quick.comp +23 -0
  109. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/generic_binary_head.glsl +66 -0
  110. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/generic_head.glsl +11 -0
  111. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/generic_unary_head.glsl +83 -0
  112. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/get_rows.comp +42 -0
  113. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/get_rows_quant.comp +51 -0
  114. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/glu_head.glsl +19 -0
  115. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/glu_main.glsl +29 -0
  116. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/group_norm.comp +66 -0
  117. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/hardsigmoid.comp +22 -0
  118. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/hardswish.comp +22 -0
  119. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp +116 -0
  120. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/im2col_3d.comp +125 -0
  121. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/l2_norm.comp +44 -0
  122. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/leaky_relu.comp +22 -0
  123. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/log.comp +18 -0
  124. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul.comp +27 -0
  125. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_split_k_reduce.comp +48 -0
  126. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec.comp +169 -0
  127. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_base.glsl +230 -0
  128. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iface.glsl +35 -0
  129. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_m.comp +132 -0
  130. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_s.comp +95 -0
  131. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_s.comp +90 -0
  132. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xs.comp +105 -0
  133. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xxs.comp +87 -0
  134. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_s.comp +90 -0
  135. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_xxs.comp +88 -0
  136. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_nc.comp +124 -0
  137. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_p021.comp +156 -0
  138. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q2_k.comp +128 -0
  139. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q3_k.comp +132 -0
  140. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q4_k.comp +161 -0
  141. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q5_k.comp +165 -0
  142. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q6_k.comp +137 -0
  143. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vecq.comp +143 -0
  144. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vecq_funcs.glsl +494 -0
  145. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +464 -0
  146. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +624 -0
  147. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_funcs.glsl +606 -0
  148. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_id_funcs.glsl +74 -0
  149. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq.comp +311 -0
  150. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.glsl +454 -0
  151. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_shmem_types.glsl +78 -0
  152. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/multi_add.comp +195 -0
  153. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/neg.comp +20 -0
  154. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/norm.comp +44 -0
  155. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_adamw.comp +42 -0
  156. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_sgd.comp +22 -0
  157. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/pad.comp +64 -0
  158. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/pool2d.comp +74 -0
  159. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/quantize_q8_1.comp +145 -0
  160. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp +9 -0
  161. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/relu.comp +21 -0
  162. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/repeat.comp +26 -0
  163. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/repeat_back.comp +37 -0
  164. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +150 -0
  165. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_back.comp +55 -0
  166. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_partials.comp +65 -0
  167. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/roll.comp +46 -0
  168. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_funcs.glsl +207 -0
  169. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.glsl +20 -0
  170. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +17 -0
  171. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +17 -0
  172. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +17 -0
  173. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_params.glsl +33 -0
  174. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_vision.comp +17 -0
  175. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/round.comp +29 -0
  176. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/rte.glsl +5 -0
  177. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +24 -0
  178. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/sgn.comp +21 -0
  179. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/sigmoid.comp +20 -0
  180. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/silu.comp +22 -0
  181. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/silu_back.comp +26 -0
  182. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/sin.comp +17 -0
  183. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +195 -0
  184. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_back.comp +54 -0
  185. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large1.comp +62 -0
  186. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large2.comp +79 -0
  187. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large3.comp +65 -0
  188. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_large_common.glsl +53 -0
  189. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/softplus.comp +23 -0
  190. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/solve_tri.comp +81 -0
  191. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/sqrt.comp +17 -0
  192. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/square.comp +17 -0
  193. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/ssm_conv.comp +50 -0
  194. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/ssm_scan.comp +124 -0
  195. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/step.comp +22 -0
  196. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/sub.comp +29 -0
  197. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.comp +47 -0
  198. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.glsl +25 -0
  199. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp +9 -0
  200. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/swiglu_oai.comp +14 -0
  201. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/tanh.comp +20 -0
  202. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/timestep_embedding.comp +42 -0
  203. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/topk_argsort.comp +118 -0
  204. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/topk_moe.comp +213 -0
  205. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/topk_nary_search.comp +246 -0
  206. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/tri.comp +43 -0
  207. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/trunc.comp +22 -0
  208. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/types.glsl +1784 -0
  209. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +178 -0
  210. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/utils.glsl +25 -0
  211. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +1318 -0
  212. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/wkv6.comp +87 -0
  213. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/wkv7.comp +91 -0
  214. package/cpp/ggml/src/ggml-vulkan/vulkan-shaders/xielu.comp +35 -0
  215. package/cpp/hilum/CMakeLists.txt +58 -0
  216. package/cpp/hilum/hilum_llm.cpp +2151 -0
  217. package/cpp/hilum/hilum_llm.h +505 -0
  218. package/cpp/licenses/LICENSE-jsonhpp +21 -0
  219. package/cpp/mtmd/CMakeLists.txt +21 -12
  220. package/cpp/vendor/cpp-httplib/CMakeLists.txt +182 -0
  221. package/cpp/vendor/cpp-httplib/LICENSE +22 -0
  222. package/cpp/vendor/cpp-httplib/httplib.cpp +16164 -0
  223. package/cpp/vendor/cpp-httplib/httplib.h +3797 -0
  224. package/cpp/vendor/miniaudio/miniaudio.h +95747 -0
  225. package/ios/LocalLLM.h +5 -0
  226. package/ios/LocalLLM.mm +538 -612
  227. package/local-llm-rn.podspec +33 -7
  228. package/package.json +45 -6
  229. package/src/NativeLocalLLM.ts +31 -20
  230. package/src/cache.ts +129 -0
  231. package/src/device.ts +36 -10
  232. package/src/errors.ts +28 -0
  233. package/src/index.ts +10 -7
  234. package/src/local-llm.ts +147 -0
  235. package/src/model-manager.ts +82 -0
  236. package/src/native-bridge.ts +112 -9
  237. package/src/rn-downloader.ts +18 -4
  238. package/src/vision.ts +60 -0
  239. package/cpp/ggml/src/ggml-hexagon/CMakeLists.txt +0 -117
  240. package/cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp +0 -3232
  241. package/cpp/ggml/src/ggml-hexagon/htp/CMakeLists.txt +0 -45
  242. package/cpp/ggml/src/ggml-hexagon/htp/act-ops.c +0 -815
  243. package/cpp/ggml/src/ggml-hexagon/htp/argsort-ops.c +0 -281
  244. package/cpp/ggml/src/ggml-hexagon/htp/binary-ops.c +0 -827
  245. package/cpp/ggml/src/ggml-hexagon/htp/cmake-toolchain.cmake +0 -157
  246. package/cpp/ggml/src/ggml-hexagon/htp/cpy-ops.c +0 -251
  247. package/cpp/ggml/src/ggml-hexagon/htp/flash-attn-ops.c +0 -666
  248. package/cpp/ggml/src/ggml-hexagon/htp/get-rows-ops.c +0 -111
  249. package/cpp/ggml/src/ggml-hexagon/htp/hex-dma.c +0 -63
  250. package/cpp/ggml/src/ggml-hexagon/htp/hex-dma.h +0 -182
  251. package/cpp/ggml/src/ggml-hexagon/htp/hex-dump.h +0 -77
  252. package/cpp/ggml/src/ggml-hexagon/htp/hex-fastdiv.h +0 -37
  253. package/cpp/ggml/src/ggml-hexagon/htp/hex-utils.h +0 -51
  254. package/cpp/ggml/src/ggml-hexagon/htp/htp-ctx.h +0 -35
  255. package/cpp/ggml/src/ggml-hexagon/htp/htp-msg.h +0 -154
  256. package/cpp/ggml/src/ggml-hexagon/htp/htp-ops.h +0 -65
  257. package/cpp/ggml/src/ggml-hexagon/htp/htp_iface.idl +0 -16
  258. package/cpp/ggml/src/ggml-hexagon/htp/hvx-arith.h +0 -470
  259. package/cpp/ggml/src/ggml-hexagon/htp/hvx-base.h +0 -173
  260. package/cpp/ggml/src/ggml-hexagon/htp/hvx-copy.h +0 -245
  261. package/cpp/ggml/src/ggml-hexagon/htp/hvx-div.h +0 -116
  262. package/cpp/ggml/src/ggml-hexagon/htp/hvx-dump.h +0 -129
  263. package/cpp/ggml/src/ggml-hexagon/htp/hvx-exp.h +0 -215
  264. package/cpp/ggml/src/ggml-hexagon/htp/hvx-floor.h +0 -100
  265. package/cpp/ggml/src/ggml-hexagon/htp/hvx-inverse.h +0 -176
  266. package/cpp/ggml/src/ggml-hexagon/htp/hvx-reduce.h +0 -266
  267. package/cpp/ggml/src/ggml-hexagon/htp/hvx-scale.h +0 -133
  268. package/cpp/ggml/src/ggml-hexagon/htp/hvx-sigmoid.h +0 -141
  269. package/cpp/ggml/src/ggml-hexagon/htp/hvx-sqrt.h +0 -126
  270. package/cpp/ggml/src/ggml-hexagon/htp/hvx-types.h +0 -36
  271. package/cpp/ggml/src/ggml-hexagon/htp/hvx-utils.h +0 -18
  272. package/cpp/ggml/src/ggml-hexagon/htp/main.c +0 -1150
  273. package/cpp/ggml/src/ggml-hexagon/htp/matmul-ops.c +0 -2595
  274. package/cpp/ggml/src/ggml-hexagon/htp/rope-ops.c +0 -498
  275. package/cpp/ggml/src/ggml-hexagon/htp/set-rows-ops.c +0 -167
  276. package/cpp/ggml/src/ggml-hexagon/htp/softmax-ops.c +0 -421
  277. package/cpp/ggml/src/ggml-hexagon/htp/sum-rows-ops.c +0 -130
  278. package/cpp/ggml/src/ggml-hexagon/htp/unary-ops.c +0 -384
  279. package/cpp/ggml/src/ggml-hexagon/htp/worker-pool.c +0 -293
  280. package/cpp/ggml/src/ggml-hexagon/htp/worker-pool.h +0 -57
  281. package/cpp/ggml/src/ggml-hexagon/htp-drv.cpp +0 -418
  282. package/cpp/ggml/src/ggml-hexagon/htp-drv.h +0 -121
  283. package/cpp/ggml/src/ggml-hexagon/libdl.h +0 -79
  284. package/cpp/ggml/src/ggml-hexagon/libggml-htp.inf +0 -38
  285. package/cpp/ggml/src/ggml-hexagon/op-desc.h +0 -153
  286. package/cpp/ggml/src/ggml-musa/CMakeLists.txt +0 -125
  287. package/cpp/ggml/src/ggml-musa/mudnn.cu +0 -112
  288. package/cpp/ggml/src/ggml-musa/mudnn.cuh +0 -12
  289. package/cpp/ggml/src/ggml-opencl/CMakeLists.txt +0 -150
  290. package/cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +0 -11553
  291. package/cpp/ggml/src/ggml-opencl/kernels/add.cl +0 -190
  292. package/cpp/ggml/src/ggml-opencl/kernels/add_id.cl +0 -42
  293. package/cpp/ggml/src/ggml-opencl/kernels/argsort.cl +0 -86
  294. package/cpp/ggml/src/ggml-opencl/kernels/clamp.cl +0 -20
  295. package/cpp/ggml/src/ggml-opencl/kernels/concat.cl +0 -51
  296. package/cpp/ggml/src/ggml-opencl/kernels/conv2d.cl +0 -185
  297. package/cpp/ggml/src/ggml-opencl/kernels/conv2d_f16_f32.cl +0 -176
  298. package/cpp/ggml/src/ggml-opencl/kernels/cpy.cl +0 -184
  299. package/cpp/ggml/src/ggml-opencl/kernels/cvt.cl +0 -417
  300. package/cpp/ggml/src/ggml-opencl/kernels/diag_mask_inf.cl +0 -58
  301. package/cpp/ggml/src/ggml-opencl/kernels/div.cl +0 -138
  302. package/cpp/ggml/src/ggml-opencl/kernels/embed_kernel.py +0 -26
  303. package/cpp/ggml/src/ggml-opencl/kernels/expm1.cl +0 -113
  304. package/cpp/ggml/src/ggml-opencl/kernels/fill.cl +0 -17
  305. package/cpp/ggml/src/ggml-opencl/kernels/flash_attn_f16.cl +0 -370
  306. package/cpp/ggml/src/ggml-opencl/kernels/flash_attn_f32.cl +0 -371
  307. package/cpp/ggml/src/ggml-opencl/kernels/flash_attn_f32_f16.cl +0 -373
  308. package/cpp/ggml/src/ggml-opencl/kernels/gelu.cl +0 -89
  309. package/cpp/ggml/src/ggml-opencl/kernels/gemm_moe_mxfp4_f32.cl +0 -162
  310. package/cpp/ggml/src/ggml-opencl/kernels/gemv_moe_mxfp4_f32.cl +0 -156
  311. package/cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle.cl +0 -268
  312. package/cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general.cl +0 -274
  313. package/cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general_q8_0_f32.cl +0 -195
  314. package/cpp/ggml/src/ggml-opencl/kernels/get_rows.cl +0 -187
  315. package/cpp/ggml/src/ggml-opencl/kernels/glu.cl +0 -378
  316. package/cpp/ggml/src/ggml-opencl/kernels/group_norm.cl +0 -121
  317. package/cpp/ggml/src/ggml-opencl/kernels/im2col_f16.cl +0 -57
  318. package/cpp/ggml/src/ggml-opencl/kernels/im2col_f32.cl +0 -57
  319. package/cpp/ggml/src/ggml-opencl/kernels/mean.cl +0 -140
  320. package/cpp/ggml/src/ggml-opencl/kernels/mul.cl +0 -152
  321. package/cpp/ggml/src/ggml-opencl/kernels/mul_mat_Ab_Bi_8x4.cl +0 -139
  322. package/cpp/ggml/src/ggml-opencl/kernels/mul_mat_f16_f32.cl +0 -130
  323. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_kq_kqv.cl +0 -273
  324. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_l4_lm.cl +0 -146
  325. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_f32_f32_l4_lm.cl +0 -147
  326. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q4_0_f32_l4_lm.cl +0 -163
  327. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q4_1_f32_l4_lm.cl +0 -165
  328. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q6_k_f32_l4_lm.cl +0 -158
  329. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_8x4.cl +0 -129
  330. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_l4_lm.cl +0 -154
  331. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f16.cl +0 -118
  332. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32.cl +0 -118
  333. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_1row.cl +0 -94
  334. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_l4.cl +0 -84
  335. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f32_f32.cl +0 -118
  336. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32.cl +0 -189
  337. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32_flat.cl +0 -176
  338. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q4_0_f32_8x_flat.cl +0 -283
  339. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32.cl +0 -140
  340. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32_flat.cl +0 -222
  341. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32.cl +0 -144
  342. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32_flat.cl +0 -167
  343. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32.cl +0 -192
  344. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_16x_flat.cl +0 -307
  345. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_8x_flat.cl +0 -265
  346. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_8x_flat.cl +0 -272
  347. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_v.cl +0 -254
  348. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32.cl +0 -219
  349. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32_flat.cl +0 -229
  350. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_k_f32.cl +0 -180
  351. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q6_k_f32.cl +0 -194
  352. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q6_k_f32_flat.cl +0 -194
  353. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32.cl +0 -125
  354. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32_flat.cl +0 -202
  355. package/cpp/ggml/src/ggml-opencl/kernels/norm.cl +0 -161
  356. package/cpp/ggml/src/ggml-opencl/kernels/pad.cl +0 -39
  357. package/cpp/ggml/src/ggml-opencl/kernels/relu.cl +0 -16
  358. package/cpp/ggml/src/ggml-opencl/kernels/repeat.cl +0 -38
  359. package/cpp/ggml/src/ggml-opencl/kernels/rms_norm.cl +0 -190
  360. package/cpp/ggml/src/ggml-opencl/kernels/rope.cl +0 -747
  361. package/cpp/ggml/src/ggml-opencl/kernels/scale.cl +0 -27
  362. package/cpp/ggml/src/ggml-opencl/kernels/set_rows.cl +0 -208
  363. package/cpp/ggml/src/ggml-opencl/kernels/sigmoid.cl +0 -29
  364. package/cpp/ggml/src/ggml-opencl/kernels/silu.cl +0 -30
  365. package/cpp/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +0 -108
  366. package/cpp/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +0 -108
  367. package/cpp/ggml/src/ggml-opencl/kernels/softmax_f16.cl +0 -107
  368. package/cpp/ggml/src/ggml-opencl/kernels/softmax_f32.cl +0 -107
  369. package/cpp/ggml/src/ggml-opencl/kernels/softplus.cl +0 -116
  370. package/cpp/ggml/src/ggml-opencl/kernels/solve_tri.cl +0 -51
  371. package/cpp/ggml/src/ggml-opencl/kernels/sqr.cl +0 -53
  372. package/cpp/ggml/src/ggml-opencl/kernels/sqrt.cl +0 -53
  373. package/cpp/ggml/src/ggml-opencl/kernels/ssm_conv.cl +0 -77
  374. package/cpp/ggml/src/ggml-opencl/kernels/sub.cl +0 -138
  375. package/cpp/ggml/src/ggml-opencl/kernels/sum_rows.cl +0 -140
  376. package/cpp/ggml/src/ggml-opencl/kernels/tanh.cl +0 -109
  377. package/cpp/ggml/src/ggml-opencl/kernels/transpose.cl +0 -117
  378. package/cpp/ggml/src/ggml-opencl/kernels/tri.cl +0 -32
  379. package/cpp/ggml/src/ggml-opencl/kernels/tsembd.cl +0 -48
  380. package/cpp/ggml/src/ggml-opencl/kernels/upscale.cl +0 -120
  381. package/cpp/ggml/src/ggml-rpc/CMakeLists.txt +0 -9
  382. package/cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +0 -2118
  383. package/cpp/ggml/src/ggml-virtgpu/CMakeLists.txt +0 -70
  384. package/cpp/ggml/src/ggml-virtgpu/apir_cs_ggml-rpc-front.cpp +0 -87
  385. package/cpp/ggml/src/ggml-virtgpu/backend/CMakeLists.txt +0 -21
  386. package/cpp/ggml/src/ggml-virtgpu/backend/apir_cs_ggml-rpc-back.cpp +0 -115
  387. package/cpp/ggml/src/ggml-virtgpu/backend/backend-convert.h +0 -13
  388. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-backend.cpp +0 -102
  389. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer-type.cpp +0 -105
  390. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer.cpp +0 -179
  391. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-device.cpp +0 -148
  392. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched.cpp +0 -51
  393. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched.gen.h +0 -73
  394. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched.h +0 -27
  395. package/cpp/ggml/src/ggml-virtgpu/backend/backend-virgl-apir.h +0 -32
  396. package/cpp/ggml/src/ggml-virtgpu/backend/backend.cpp +0 -144
  397. package/cpp/ggml/src/ggml-virtgpu/backend/shared/api_remoting.h +0 -95
  398. package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_backend.gen.h +0 -94
  399. package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_backend.h +0 -50
  400. package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_cs.h +0 -378
  401. package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h +0 -232
  402. package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_cs_rpc.h +0 -58
  403. package/cpp/ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp +0 -81
  404. package/cpp/ggml/src/ggml-virtgpu/ggml-backend-buffer.cpp +0 -119
  405. package/cpp/ggml/src/ggml-virtgpu/ggml-backend-device.cpp +0 -158
  406. package/cpp/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp +0 -213
  407. package/cpp/ggml/src/ggml-virtgpu/ggml-backend.cpp +0 -69
  408. package/cpp/ggml/src/ggml-virtgpu/ggml-remoting.h +0 -71
  409. package/cpp/ggml/src/ggml-virtgpu/ggmlremoting_functions.yaml +0 -166
  410. package/cpp/ggml/src/ggml-virtgpu/include/apir_hw.h +0 -9
  411. package/cpp/ggml/src/ggml-virtgpu/regenerate_remoting.py +0 -333
  412. package/cpp/ggml/src/ggml-virtgpu/virtgpu-apir.h +0 -15
  413. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-backend.cpp +0 -58
  414. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp +0 -110
  415. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-buffer.cpp +0 -173
  416. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-device.cpp +0 -192
  417. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-impl.h +0 -36
  418. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward.gen.h +0 -53
  419. package/cpp/ggml/src/ggml-virtgpu/virtgpu-shm.cpp +0 -98
  420. package/cpp/ggml/src/ggml-virtgpu/virtgpu-shm.h +0 -23
  421. package/cpp/ggml/src/ggml-virtgpu/virtgpu-utils.cpp +0 -179
  422. package/cpp/ggml/src/ggml-virtgpu/virtgpu-utils.h +0 -86
  423. package/cpp/ggml/src/ggml-virtgpu/virtgpu.cpp +0 -544
  424. package/cpp/ggml/src/ggml-virtgpu/virtgpu.h +0 -117
  425. package/cpp/ggml/src/ggml-webgpu/CMakeLists.txt +0 -80
  426. package/cpp/ggml/src/ggml-webgpu/ggml-webgpu-shader-lib.hpp +0 -1231
  427. package/cpp/ggml/src/ggml-webgpu/ggml-webgpu.cpp +0 -3150
  428. package/cpp/ggml/src/ggml-webgpu/pre_wgsl.hpp +0 -778
  429. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/argmax.wgsl +0 -72
  430. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/argsort.wgsl +0 -106
  431. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/argsort_merge.wgsl +0 -134
  432. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/binary.wgsl +0 -107
  433. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/common_decls.tmpl +0 -923
  434. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/cpy.tmpl.wgsl +0 -107
  435. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/cumsum.wgsl +0 -66
  436. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +0 -182
  437. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn.wgsl +0 -636
  438. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/get_rows.wgsl +0 -668
  439. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/glu.tmpl.wgsl +0 -323
  440. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/memset.wgsl +0 -40
  441. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.wgsl +0 -713
  442. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_decls.tmpl +0 -103
  443. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_reg_tile.wgsl +0 -138
  444. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_subgroup_matrix.wgsl +0 -188
  445. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec.wgsl +0 -194
  446. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/pad.wgsl +0 -86
  447. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm.wgsl +0 -123
  448. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/rope.tmpl.wgsl +0 -295
  449. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/scale.wgsl +0 -63
  450. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.wgsl +0 -109
  451. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/soft_max.tmpl.wgsl +0 -345
  452. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/sum_rows.wgsl +0 -55
  453. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/unary.wgsl +0 -193
  454. package/cpp/ggml/src/ggml-zdnn/CMakeLists.txt +0 -36
  455. package/cpp/ggml/src/ggml-zdnn/common.hpp +0 -59
  456. package/cpp/ggml/src/ggml-zdnn/ggml-zdnn.cpp +0 -633
  457. package/cpp/ggml/src/ggml-zdnn/mmf.cpp +0 -80
  458. package/cpp/ggml/src/ggml-zdnn/mmf.hpp +0 -12
  459. package/cpp/ggml/src/ggml-zdnn/utils.cpp +0 -79
  460. package/cpp/ggml/src/ggml-zdnn/utils.hpp +0 -19
  461. package/cpp/ggml/src/ggml-zendnn/CMakeLists.txt +0 -92
  462. package/cpp/ggml/src/ggml-zendnn/ggml-zendnn.cpp +0 -469
  463. package/cpp/mtmd/README.md +0 -63
  464. package/cpp/mtmd/legacy-models/convert_image_encoder_to_gguf.py +0 -412
  465. package/cpp/mtmd/legacy-models/glmedge-convert-image-encoder-to-gguf.py +0 -280
  466. package/cpp/mtmd/legacy-models/glmedge-surgery.py +0 -33
  467. package/cpp/mtmd/legacy-models/llava_surgery.py +0 -38
  468. package/cpp/mtmd/legacy-models/llava_surgery_v2.py +0 -180
  469. package/cpp/mtmd/legacy-models/minicpmv-convert-image-encoder-to-gguf.py +0 -892
  470. package/cpp/mtmd/legacy-models/minicpmv-surgery.py +0 -47
  471. package/cpp/mtmd/mtmd-cli.cpp +0 -437
  472. package/cpp/mtmd/requirements.txt +0 -5
  473. package/cpp/mtmd/test-1.jpeg +0 -0
  474. package/cpp/mtmd/test-2.mp3 +0 -0
  475. package/cpp/mtmd/tests.sh +0 -192
  476. package/src/download-adapter.ts +0 -17
  477. /package/cpp/common/jinja/{string.h → jinja_string.h} +0 -0
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025-2026 Hilum Labs
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,321 @@
1
+ # local-llm-rn
2
+
3
+ Run LLMs on-device in React Native with Metal (iOS) and Vulkan (Android) GPU acceleration. Same OpenAI-compatible API as [`local-llm`](https://www.npmjs.com/package/local-llm).
4
+
5
+ [![npm](https://img.shields.io/npm/v/local-llm-rn)](https://www.npmjs.com/package/local-llm-rn)
6
+ [![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](https://opensource.org/licenses/MIT)
7
+ ![Platform: iOS | Android](https://img.shields.io/badge/platform-iOS%20%7C%20Android-lightgrey)
8
+
9
+ ```bash
10
+ npm install local-llm-rn
11
+ ```
12
+
13
+ ```typescript
14
+ import { LocalLLM } from 'local-llm-rn';
15
+
16
+ const ai = await LocalLLM.create({
17
+ model: 'TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf',
18
+ compute: 'gpu',
19
+ });
20
+
21
+ const response = await ai.chat.completions.create({
22
+ messages: [{ role: 'user', content: 'Hello!' }],
23
+ stream: true,
24
+ });
25
+
26
+ for await (const chunk of response) {
27
+ process.stdout.write(chunk.choices[0]?.delta?.content ?? '');
28
+ }
29
+ ```
30
+
31
+ > Need to run on **Node.js** instead? Check out [`local-llm`](https://www.npmjs.com/package/local-llm) for macOS, Linux, and Windows.
32
+
33
+ ## Why local-llm-rn?
34
+
35
+ - **On-device.** Models run entirely on the phone. No server, no API keys, no data leaves the device.
36
+ - **GPU accelerated.** Metal on iOS, Vulkan on Android. Not just CPU inference.
37
+ - **OpenAI-compatible API.** Same `chat.completions.create()` you already know from `local-llm` and OpenAI.
38
+ - **Device-aware.** Built-in helpers to check RAM, recommend quantization, and prevent OOM crashes.
39
+ - **Auto download.** Pass a HuggingFace URL, models are downloaded and cached on-device automatically.
40
+ - **Speculative decoding.** Use a small draft model for 2-3x faster generation with zero quality loss.
41
+
42
+ ## Platform Support
43
+
44
+ | Platform | GPU Backend | Min Version | Notes |
45
+ |---|---|---|---|
46
+ | iOS | Metal | iOS 16+ | BF16 + Accelerate BLAS |
47
+ | Android | Vulkan | Android 8+ (API 26) | CPU fallback on devices without Vulkan |
48
+
49
+ ### Tested Compatibility
50
+
51
+ | | Versions |
52
+ |---|---|
53
+ | React Native | 0.76 - 0.83 |
54
+ | Expo SDK | 53 - 55 |
55
+ | Xcode | 15+ |
56
+ | NDK | 27.x |
57
+ | CMake | 3.22.1+ |
58
+
59
+ ## Setup
60
+
61
+ ### Expo (recommended)
62
+
63
+ ```bash
64
+ npm install local-llm-rn
65
+ npx expo prebuild
66
+ ```
67
+
68
+ ### Bare React Native
69
+
70
+ ```bash
71
+ npm install local-llm-rn
72
+ cd ios && pod install
73
+ ```
74
+
75
+ Requires React Native 0.76+ (New Architecture / Turbo Modules).
76
+ Examples and CI are pinned to React Native 0.83 / Expo SDK 55.
77
+ Examples target iOS 16.0 and Android SDK levels compatible with the native module.
78
+
79
+ > **Note:** `local-llm-rn` ships raw TypeScript source (`src/index.ts`) — no pre-compiled JS. This is intentional: Metro (the React Native bundler) handles TypeScript natively, and shipping `.ts` gives consumers full source maps, accurate go-to-definition, and smaller npm tarballs. This package is designed exclusively for the React Native / Metro ecosystem.
80
+
81
+ ## Quick Start
82
+
83
+ ### 1. Check device capabilities
84
+
85
+ Before loading a model, check if the device can handle it:
86
+
87
+ ```typescript
88
+ import { canRunModel, getDeviceCapabilities, recommendQuantization } from 'local-llm-rn';
89
+
90
+ const caps = getDeviceCapabilities();
91
+ console.log(caps.gpuName); // "Apple A16 GPU"
92
+ console.log(caps.totalRAM); // 6442450944 (6 GB)
93
+ console.log(caps.metalFamily); // 9 (A17+)
94
+
95
+ const quant = recommendQuantization();
96
+ console.log(quant); // "Q6_K"
97
+
98
+ const check = canRunModel(1_800_000_000); // 1.8 GB model
99
+ if (!check.canRun) {
100
+ console.warn(check.reason); // "Model needs ~2160 MB but only 1500 MB available"
101
+ console.warn(check.suggestion); // "Try a Q4_K_M quantized variant or a smaller model"
102
+ }
103
+ ```
104
+
105
+ ### 2. Load a model
106
+
107
+ ```typescript
108
+ import { LocalLLM } from 'local-llm-rn';
109
+
110
+ const ai = await LocalLLM.create({
111
+ model: 'TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf',
112
+ compute: 'gpu',
113
+ contextSize: 2048,
114
+ onProgress: (pct) => console.log(`Downloading: ${pct.toFixed(1)}%`),
115
+ });
116
+ ```
117
+
118
+ ### 3. Chat with streaming
119
+
120
+ ```typescript
121
+ const response = await ai.chat.completions.create({
122
+ messages: [
123
+ { role: 'system', content: 'You are a helpful assistant.' },
124
+ { role: 'user', content: 'What is the capital of France?' },
125
+ ],
126
+ stream: true,
127
+ });
128
+
129
+ let text = '';
130
+ for await (const chunk of response) {
131
+ text += chunk.choices[0]?.delta?.content ?? '';
132
+ // Update your UI here
133
+ }
134
+ ```
135
+
136
+ ### 4. Check performance
137
+
138
+ Every response includes inference speed metrics:
139
+
140
+ ```typescript
141
+ console.log(`Speed: ${response._timing?.generatedTokensPerSec.toFixed(1)} tok/s`);
142
+ console.log(`TTFT: ${response._timing?.promptEvalMs.toFixed(0)} ms`);
143
+ ```
144
+
145
+ When streaming, `_timing` is on the final chunk:
146
+
147
+ ```typescript
148
+ for await (const chunk of response) {
149
+ const content = chunk.choices[0]?.delta?.content;
150
+ if (content) setText((t) => t + content);
151
+ if (chunk._timing) {
152
+ console.log(`Generation: ${chunk._timing.generatedTokensPerSec.toFixed(1)} tok/s`);
153
+ }
154
+ }
155
+ ```
156
+
157
+ ### 5. Clean up
158
+
159
+ ```typescript
160
+ ai.dispose();
161
+ ```
162
+
163
+ ## Recommended Models
164
+
165
+ | Model | Quant | Size | Good for |
166
+ |---|---|---|---|
167
+ | [SmolLM2 1.7B](https://huggingface.co/bartowski/SmolLM2-1.7B-Instruct-GGUF) | Q4_K_M | ~1.0 GB | Fast, works on all devices |
168
+ | [TinyLlama 1.1B](https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF) | Q4_K_M | ~636 MB | Testing, development |
169
+ | [Llama 3.2 3B](https://huggingface.co/bartowski/Llama-3.2-3B-Instruct-GGUF) | Q4_K_M | ~1.8 GB | Best quality for flagship phones |
170
+ | [Phi-3 Mini](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf) | Q4_K_M | ~2.2 GB | Great balance of speed and quality |
171
+
172
+ **Quantization guide by device RAM:**
173
+
174
+ | Device RAM | Recommended | Examples |
175
+ |---|---|---|
176
+ | 8 GB | Q8_0 | iPhone 16 Pro |
177
+ | 6 GB | Q6_K | iPhone 14/15 Pro |
178
+ | 4 GB | Q4_K_M | iPhone 11-13, iPhone 14/15 base |
179
+ | 3 GB | Q3_K_S | iPhone X, older devices |
180
+
181
+ ## Device Helpers API
182
+
183
+ ```typescript
184
+ import { getDeviceCapabilities, canRunModel, recommendQuantization } from 'local-llm-rn';
185
+ ```
186
+
187
+ ### `getDeviceCapabilities()`
188
+
189
+ Returns device hardware info:
190
+
191
+ ```typescript
192
+ {
193
+ totalRAM: number; // Total RAM in bytes
194
+ availableRAM: number; // Available RAM (respects iOS jetsam limits)
195
+ gpuName: string; // e.g. "Apple A16 GPU"
196
+ metalFamily: number; // Apple GPU family (5=A12+, 7=A14+, 9=A17+)
197
+ metalVersion: number; // Metal version (1, 2, or 3)
198
+ iosVersion: string; // e.g. "17.2.1"
199
+ isLowPowerMode: boolean;
200
+ }
201
+ ```
202
+
203
+ ### `canRunModel(modelSizeBytes)`
204
+
205
+ Checks if the device has enough RAM to run a model:
206
+
207
+ ```typescript
208
+ const result = canRunModel(1_800_000_000);
209
+ // { canRun: true }
210
+ // or { canRun: false, reason: "...", suggestion: "..." }
211
+ ```
212
+
213
+ ### `recommendQuantization()`
214
+
215
+ Suggests the best quantization level based on device RAM:
216
+
217
+ ```typescript
218
+ const quant = recommendQuantization();
219
+ // "Q8_0" | "Q6_K" | "Q4_K_M" | "Q3_K_S"
220
+ ```
221
+
222
+ ## Configuration
223
+
224
+ ```typescript
225
+ const ai = await LocalLLM.create({
226
+ model: 'user/repo/file.gguf', // HuggingFace shorthand or local path
227
+
228
+ compute: 'gpu', // 'gpu' | 'cpu' | 'auto'
229
+ contextSize: 2048, // Context window size
230
+ batchSize: 512, // Batch size for prompt processing
231
+
232
+ warmup: true, // Warmup on load — eliminates cold-start (default: true)
233
+
234
+ // Speculative decoding (optional — 2-3x faster generation)
235
+ // draftModel: 'user/repo/small-model.gguf', // Small model from same family
236
+ // draftNMax: 16, // Max draft tokens per step
237
+
238
+ onProgress: (pct) => {}, // Download progress callback (0-100)
239
+ });
240
+ ```
241
+
242
+ ## Error Handling
243
+
244
+ All errors thrown by `local-llm-rn` are instances of `LocalLLMError` with a typed `code` property:
245
+
246
+ ```typescript
247
+ import { LocalLLMError, LocalLLMErrorCode } from 'local-llm-rn';
248
+
249
+ try {
250
+ const ai = await LocalLLM.create({ model: 'user/repo/model.gguf' });
251
+ } catch (e) {
252
+ if (e instanceof LocalLLMError) {
253
+ switch (e.code) {
254
+ case LocalLLMErrorCode.MODEL_LOAD_FAILED:
255
+ // Handle model loading failure
256
+ break;
257
+ case LocalLLMErrorCode.DOWNLOAD_FAILED:
258
+ // Handle download failure
259
+ break;
260
+ case LocalLLMErrorCode.INSUFFICIENT_MEMORY:
261
+ // Suggest a smaller model
262
+ break;
263
+ }
264
+ }
265
+ }
266
+ ```
267
+
268
+ Available error codes: `MODEL_LOAD_FAILED`, `MODEL_TOO_LARGE`, `CONTEXT_CREATE_FAILED`, `CONTEXT_EXHAUSTED`, `INFERENCE_FAILED`, `STREAM_FAILED`, `DOWNLOAD_FAILED`, `DOWNLOAD_INTEGRITY_MISMATCH`, `VISION_FAILED`, `VISION_FETCH_FAILED`, `EMBEDDING_FAILED`, `NOT_INITIALIZED`, `INVALID_PATH`, `CACHE_CORRUPT`, `QUANTIZE_FAILED`, `INSUFFICIENT_MEMORY`.
269
+
270
+ ## Device + Performance Combo
271
+
272
+ Combine device capabilities with inference metrics:
273
+
274
+ ```typescript
275
+ import { LocalLLM, getDeviceCapabilities } from 'local-llm-rn';
276
+
277
+ const caps = getDeviceCapabilities();
278
+ console.log(`Device: ${caps.gpuName}, ${(caps.totalRAM / 1e9).toFixed(1)} GB RAM`);
279
+
280
+ const ai = await LocalLLM.create({
281
+ model: modelPath,
282
+ compute: 'gpu',
283
+ });
284
+
285
+ const response = await ai.chat.completions.create({
286
+ messages: [{ role: 'user', content: 'Hello!' }],
287
+ });
288
+
289
+ console.log(response.choices[0].message.content);
290
+ console.log(`Speed: ${response._timing?.generatedTokensPerSec.toFixed(1)} tok/s on ${caps.gpuName}`);
291
+
292
+ ai.dispose();
293
+ ```
294
+
295
+ ## Examples
296
+
297
+ - **[Expo example](./examples/expo-test/)** — Complete chat UI with device detection, model downloading, and streaming responses
298
+ - **[Bare RN example](./examples/react-native-test/)** — Minimal bare React Native test app
299
+
300
+ ## Ecosystem
301
+
302
+ | Package | Description | Install |
303
+ |---|---|---|
304
+ | [`local-llm`](https://www.npmjs.com/package/local-llm) | Node.js / Bun / Electron | `npm install local-llm` |
305
+ | [`local-llm-rn`](https://www.npmjs.com/package/local-llm-rn) | React Native / Expo (this package) | `npm install local-llm-rn` |
306
+ | [`local_llm`](https://pub.dev/packages/local_llm) | Flutter | `flutter pub add local_llm` |
307
+ | [`hilum-local-llm-engine`](https://github.com/hilum-labs/hilum-local-llm-engine) | Core C++ engine | Vendored automatically |
308
+
309
+ ## Contributing
310
+
311
+ We welcome contributions! See [CONTRIBUTING.md](./CONTRIBUTING.md) for setup instructions.
312
+
313
+ ## Contact
314
+
315
+ Questions, feedback, or partnership inquiries: [info@hilumlabs.com](mailto:info@hilumlabs.com)
316
+
317
+ ## License
318
+
319
+ MIT — See [LICENSE](./LICENSE) for details.
320
+
321
+ Made by [Hilum Labs](https://github.com/hilum-labs).
@@ -0,0 +1,169 @@
1
+ import org.jetbrains.kotlin.gradle.dsl.KotlinAndroidProjectExtension
2
+ import java.util.Properties
3
+
4
+ buildscript {
5
+ repositories { mavenCentral(); google() }
6
+ }
7
+
8
+ fun resolveAndroidSdkDir(project: Project): File? {
9
+ val localProperties = project.rootProject.file("local.properties")
10
+ if (localProperties.exists()) {
11
+ val properties = Properties()
12
+ localProperties.inputStream().use(properties::load)
13
+ properties.getProperty("sdk.dir")?.let { return File(it) }
14
+ }
15
+
16
+ return sequenceOf("ANDROID_SDK_ROOT", "ANDROID_HOME")
17
+ .mapNotNull { System.getenv(it) }
18
+ .map(::File)
19
+ .firstOrNull(File::exists)
20
+ }
21
+
22
+ fun ensureCmake(project: Project, version: String) {
23
+ val sdkDir = resolveAndroidSdkDir(project) ?: return
24
+ val cmakeDir = sdkDir.resolve("cmake/$version")
25
+ if (cmakeDir.exists()) {
26
+ println("local-llm-rn: CMake $version found at ${cmakeDir.absolutePath}")
27
+ return
28
+ }
29
+
30
+ // Locate sdkmanager
31
+ val sdkmanager = sequenceOf(
32
+ sdkDir.resolve("cmdline-tools/latest/bin/sdkmanager"),
33
+ sdkDir.resolve("cmdline-tools/bin/sdkmanager"),
34
+ sdkDir.resolve("tools/bin/sdkmanager"),
35
+ ).firstOrNull { it.exists() } ?: return
36
+
37
+ println("local-llm-rn: Installing CMake $version via sdkmanager…")
38
+ val process = ProcessBuilder(sdkmanager.absolutePath, "cmake;$version")
39
+ .redirectErrorStream(true)
40
+ .start()
41
+ process.inputStream.bufferedReader().forEachLine { println(it) }
42
+ val exitCode = process.waitFor()
43
+ if (exitCode != 0) {
44
+ println("WARNING: sdkmanager exited with code $exitCode — CMake $version may not be installed")
45
+ }
46
+ }
47
+
48
+ fun resolveGlslc(project: Project): String? {
49
+ val executableName = if (System.getProperty("os.name").startsWith("Windows")) "glslc.exe" else "glslc"
50
+ val sdkDir = resolveAndroidSdkDir(project)
51
+
52
+ val ndkRoots = buildList {
53
+ sequenceOf("ANDROID_NDK_ROOT", "ANDROID_NDK_HOME")
54
+ .mapNotNull { System.getenv(it) }
55
+ .map(::File)
56
+ .filter(File::exists)
57
+ .forEach(::add)
58
+
59
+ sdkDir?.resolve("ndk")?.listFiles()
60
+ ?.sortedByDescending { it.name }
61
+ ?.forEach(::add)
62
+
63
+ sdkDir?.resolve("ndk-bundle")
64
+ ?.takeIf(File::exists)
65
+ ?.let(::add)
66
+ }
67
+
68
+ return ndkRoots.asSequence()
69
+ .map { it.resolve("shader-tools") }
70
+ .filter(File::exists)
71
+ .flatMap { shaderTools ->
72
+ shaderTools.listFiles()
73
+ ?.asSequence()
74
+ ?.map { it.resolve(executableName) }
75
+ ?: emptySequence()
76
+ }
77
+ .firstOrNull(File::exists)
78
+ ?.absolutePath
79
+ }
80
+
81
+ plugins {
82
+ id("com.android.library")
83
+ id("org.jetbrains.kotlin.android")
84
+ id("com.facebook.react")
85
+ }
86
+
87
+ react {
88
+ root = file("..")
89
+ reactNativeDir = file("../../react-native")
90
+ codegenDir = file("../../@react-native/codegen")
91
+ cliFile = file("../../react-native/cli.js")
92
+ jsRootDir = file("../src")
93
+ libraryName = "LocalLLMSpec"
94
+ codegenJavaPackageName = "com.hilum.localllm"
95
+ }
96
+
97
+ // Auto-install CMake 4.1.2 if missing (plug-and-play for consumers).
98
+ ensureCmake(project, "4.1.2")
99
+
100
+ android {
101
+ namespace = "com.hilum.localllm"
102
+ compileSdk = 35
103
+
104
+ defaultConfig {
105
+ // API 29 = Android 10+. Vulkan 1.1 (required by the engine) is available
106
+ // from API 29. Devices without Vulkan fall back to CPU inference.
107
+ minSdk = 29
108
+ ndk { abiFilters += listOf("arm64-v8a") }
109
+
110
+ externalNativeBuild {
111
+ cmake {
112
+ // Resolve glslc from the Android SDK/NDK installation without
113
+ // relying on AGP's ndkDirectory during library configuration.
114
+ val glslc = resolveGlslc(project)
115
+
116
+ arguments += listOfNotNull(
117
+ "-Wno-dev",
118
+ "-DCMAKE_BUILD_TYPE=Release",
119
+ "-DBUILD_SHARED_LIBS=ON",
120
+ "-DLLAMA_BUILD_COMMON=ON",
121
+ "-DLLAMA_OPENSSL=OFF",
122
+ // CPU variant dispatch (2-4x speedup on modern ARM)
123
+ "-DGGML_NATIVE=OFF",
124
+ "-DGGML_BACKEND_DL=ON",
125
+ "-DGGML_CPU_ALL_VARIANTS=ON",
126
+ "-DGGML_LLAMAFILE=OFF",
127
+ // Vulkan GPU + Adreno optimizations
128
+ glslc?.let { "-DVulkan_GLSLC_EXECUTABLE=$it" },
129
+ "-DGGML_VULKAN=ON",
130
+ "-DGGML_VULKAN_VMA=ON",
131
+ "-DGGML_VULKAN_BUILD_ADRENO_SHADERS=ON",
132
+ // Disable unneeded targets
133
+ "-DLLAMA_BUILD_TOOLS=OFF",
134
+ "-DLLAMA_BUILD_TESTS=OFF",
135
+ "-DLLAMA_BUILD_EXAMPLES=OFF",
136
+ "-DLLAMA_BUILD_SERVER=OFF",
137
+ )
138
+ }
139
+ }
140
+ }
141
+
142
+ externalNativeBuild {
143
+ cmake {
144
+ path("src/main/cpp/CMakeLists.txt")
145
+ version = "4.1.2"
146
+ }
147
+ }
148
+
149
+ buildTypes {
150
+ release {
151
+ isMinifyEnabled = false
152
+ proguardFiles(getDefaultProguardFile("proguard-android-optimize.txt"), "proguard-rules.pro")
153
+ }
154
+ }
155
+
156
+ compileOptions {
157
+ sourceCompatibility = JavaVersion.VERSION_17
158
+ targetCompatibility = JavaVersion.VERSION_17
159
+ }
160
+
161
+ }
162
+
163
+ configure<KotlinAndroidProjectExtension> {
164
+ jvmToolchain(17)
165
+ }
166
+
167
+ dependencies {
168
+ implementation("com.facebook.react:react-android")
169
+ }
@@ -0,0 +1,14 @@
1
+ # Keep all JNI-facing classes and methods
2
+ -keep class com.hilum.localllm.** { *; }
3
+
4
+ # Keep native methods from being stripped
5
+ -keepclassmembers class com.hilum.localllm.LocalLLMModule {
6
+ native <methods>;
7
+ void emitToken(...);
8
+ void emitBatchToken(...);
9
+ void emitDownloadProgress(...);
10
+ void emitDownloadComplete(...);
11
+ void emitDownloadError(...);
12
+ void emitQuantizeComplete(...);
13
+ void emitLog(...);
14
+ }
@@ -0,0 +1,2 @@
1
+ <manifest xmlns:android="http://schemas.android.com/apk/res/android">
2
+ </manifest>
@@ -0,0 +1,71 @@
1
+ cmake_minimum_required(VERSION 3.22.1...4.1)
2
+ project("local-llm-rn" LANGUAGES C CXX)
3
+
4
+ set(CMAKE_CXX_STANDARD 17)
5
+ set(CMAKE_CXX_STANDARD_REQUIRED ON)
6
+
7
+ # Windows/MSVC: suppress high-volume conversion/sign warnings from vendor code.
8
+ if(MSVC)
9
+ add_compile_options(
10
+ /wd4018 # signed/unsigned mismatch
11
+ /wd4101 # unreferenced local variable
12
+ /wd4244 # narrowing conversion
13
+ /wd4267 # size_t -> smaller type
14
+ /wd4305 # truncation to smaller floating type
15
+ )
16
+ endif()
17
+
18
+ # ABI-specific settings (from engine's llama.android example)
19
+ if(DEFINED ANDROID_ABI)
20
+ if(ANDROID_ABI STREQUAL "arm64-v8a")
21
+ set(GGML_SYSTEM_ARCH "ARM")
22
+ set(GGML_CPU_KLEIDIAI ON)
23
+ set(GGML_OPENMP ON)
24
+ elseif(ANDROID_ABI STREQUAL "x86_64")
25
+ set(GGML_SYSTEM_ARCH "x86")
26
+ set(GGML_CPU_KLEIDIAI OFF)
27
+ set(GGML_OPENMP OFF)
28
+ endif()
29
+ endif()
30
+
31
+ # Engine source resolution:
32
+ # - published package / prepared repo: repo-root cpp/
33
+ # - local development fallback: vendor/hilum-local-llm-engine/
34
+ set(LLAMA_SRC_PACKAGE ${CMAKE_CURRENT_LIST_DIR}/../../../../cpp)
35
+ set(LLAMA_SRC_VENDOR ${CMAKE_CURRENT_LIST_DIR}/../../../../vendor/hilum-local-llm-engine)
36
+
37
+ if(EXISTS ${LLAMA_SRC_PACKAGE}/CMakeLists.txt)
38
+ set(LLAMA_SRC ${LLAMA_SRC_PACKAGE})
39
+ elseif(EXISTS ${LLAMA_SRC_VENDOR}/CMakeLists.txt)
40
+ set(LLAMA_SRC ${LLAMA_SRC_VENDOR})
41
+ else()
42
+ message(FATAL_ERROR
43
+ "Engine source not found. Expected either ${LLAMA_SRC_PACKAGE} or ${LLAMA_SRC_VENDOR} "
44
+ "to contain CMakeLists.txt. Run scripts/prepare.sh before building from the repo root.")
45
+ endif()
46
+
47
+ set(HILUM_BUILD_LIB ON CACHE BOOL "" FORCE)
48
+ add_subdirectory(${LLAMA_SRC} build-llama)
49
+
50
+ add_library(${CMAKE_PROJECT_NAME} SHARED LocalLLM.cpp)
51
+
52
+ # Include both dev paths (tools/mtmd) and published paths (mtmd)
53
+ target_include_directories(${CMAKE_PROJECT_NAME} PRIVATE
54
+ ${LLAMA_SRC}/include
55
+ ${LLAMA_SRC}/ggml/include
56
+ ${LLAMA_SRC}/ggml/src
57
+ ${LLAMA_SRC}/src
58
+ ${LLAMA_SRC}/common
59
+ ${LLAMA_SRC}/hilum
60
+ ${LLAMA_SRC}/tools/mtmd
61
+ ${LLAMA_SRC}/mtmd
62
+ ${LLAMA_SRC}/vendor
63
+ )
64
+
65
+ target_link_libraries(${CMAKE_PROJECT_NAME}
66
+ hilum
67
+ llama
68
+ common
69
+ android
70
+ log
71
+ )