local-llm-rn 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (626) hide show
  1. package/cpp/CMakeLists.txt +285 -0
  2. package/cpp/common/CMakeLists.txt +149 -0
  3. package/cpp/common/arg.cpp +3799 -0
  4. package/cpp/common/arg.h +131 -0
  5. package/cpp/common/base64.hpp +392 -0
  6. package/cpp/common/build-info.cpp.in +4 -0
  7. package/cpp/common/chat-parser-xml-toolcall.cpp +879 -0
  8. package/cpp/common/chat-parser-xml-toolcall.h +45 -0
  9. package/cpp/common/chat-parser.cpp +1649 -0
  10. package/cpp/common/chat-parser.h +133 -0
  11. package/cpp/common/chat-peg-parser.cpp +124 -0
  12. package/cpp/common/chat-peg-parser.h +105 -0
  13. package/cpp/common/chat.cpp +3355 -0
  14. package/cpp/common/chat.h +252 -0
  15. package/cpp/common/common.cpp +1824 -0
  16. package/cpp/common/common.h +930 -0
  17. package/cpp/common/console.cpp +1137 -0
  18. package/cpp/common/console.h +41 -0
  19. package/cpp/common/debug.cpp +167 -0
  20. package/cpp/common/debug.h +43 -0
  21. package/cpp/common/download.cpp +792 -0
  22. package/cpp/common/download.h +84 -0
  23. package/cpp/common/http.h +84 -0
  24. package/cpp/common/jinja/README.md +88 -0
  25. package/cpp/common/jinja/caps.cpp +285 -0
  26. package/cpp/common/jinja/caps.h +30 -0
  27. package/cpp/common/jinja/lexer.cpp +341 -0
  28. package/cpp/common/jinja/lexer.h +157 -0
  29. package/cpp/common/jinja/parser.cpp +591 -0
  30. package/cpp/common/jinja/parser.h +21 -0
  31. package/cpp/common/jinja/runtime.cpp +867 -0
  32. package/cpp/common/jinja/runtime.h +638 -0
  33. package/cpp/common/jinja/string.cpp +213 -0
  34. package/cpp/common/jinja/string.h +61 -0
  35. package/cpp/common/jinja/utils.h +149 -0
  36. package/cpp/common/jinja/value.cpp +1393 -0
  37. package/cpp/common/jinja/value.h +756 -0
  38. package/cpp/common/json-partial.cpp +324 -0
  39. package/cpp/common/json-partial.h +39 -0
  40. package/cpp/common/json-schema-to-grammar.cpp +1153 -0
  41. package/cpp/common/json-schema-to-grammar.h +43 -0
  42. package/cpp/common/llguidance.cpp +258 -0
  43. package/cpp/common/log.cpp +446 -0
  44. package/cpp/common/log.h +119 -0
  45. package/cpp/common/ngram-cache.cpp +285 -0
  46. package/cpp/common/ngram-cache.h +101 -0
  47. package/cpp/common/ngram-map.cpp +530 -0
  48. package/cpp/common/ngram-map.h +115 -0
  49. package/cpp/common/ngram-mod.cpp +60 -0
  50. package/cpp/common/ngram-mod.h +38 -0
  51. package/cpp/common/peg-parser.cpp +1712 -0
  52. package/cpp/common/peg-parser.h +459 -0
  53. package/cpp/common/preset.cpp +483 -0
  54. package/cpp/common/preset.h +83 -0
  55. package/cpp/common/regex-partial.cpp +204 -0
  56. package/cpp/common/regex-partial.h +56 -0
  57. package/cpp/common/sampling.cpp +745 -0
  58. package/cpp/common/sampling.h +119 -0
  59. package/cpp/common/speculative.cpp +1074 -0
  60. package/cpp/common/speculative.h +41 -0
  61. package/cpp/common/unicode.cpp +64 -0
  62. package/cpp/common/unicode.h +22 -0
  63. package/cpp/ggml/CMakeLists.txt +494 -0
  64. package/cpp/ggml/cmake/GitVars.cmake +22 -0
  65. package/cpp/ggml/cmake/common.cmake +50 -0
  66. package/cpp/ggml/cmake/ggml-config.cmake.in +191 -0
  67. package/cpp/ggml/include/ggml-alloc.h +85 -0
  68. package/cpp/ggml/include/ggml-backend.h +373 -0
  69. package/cpp/ggml/include/ggml-blas.h +25 -0
  70. package/cpp/ggml/include/ggml-cann.h +123 -0
  71. package/cpp/ggml/include/ggml-cpp.h +39 -0
  72. package/cpp/ggml/include/ggml-cpu.h +151 -0
  73. package/cpp/ggml/include/ggml-cuda.h +47 -0
  74. package/cpp/ggml/include/ggml-hexagon.h +19 -0
  75. package/cpp/ggml/include/ggml-metal.h +61 -0
  76. package/cpp/ggml/include/ggml-opencl.h +26 -0
  77. package/cpp/ggml/include/ggml-opt.h +256 -0
  78. package/cpp/ggml/include/ggml-rpc.h +30 -0
  79. package/cpp/ggml/include/ggml-sycl.h +49 -0
  80. package/cpp/ggml/include/ggml-virtgpu.h +14 -0
  81. package/cpp/ggml/include/ggml-vulkan.h +29 -0
  82. package/cpp/ggml/include/ggml-webgpu.h +19 -0
  83. package/cpp/ggml/include/ggml-zdnn.h +17 -0
  84. package/cpp/ggml/include/ggml-zendnn.h +22 -0
  85. package/cpp/ggml/include/ggml.h +2753 -0
  86. package/cpp/ggml/include/gguf.h +204 -0
  87. package/cpp/ggml/src/CMakeLists.txt +492 -0
  88. package/cpp/ggml/src/ggml-alloc.c +1244 -0
  89. package/cpp/ggml/src/ggml-backend-dl.cpp +48 -0
  90. package/cpp/ggml/src/ggml-backend-dl.h +45 -0
  91. package/cpp/ggml/src/ggml-backend-impl.h +255 -0
  92. package/cpp/ggml/src/ggml-backend-reg.cpp +566 -0
  93. package/cpp/ggml/src/ggml-backend.cpp +2270 -0
  94. package/cpp/ggml/src/ggml-blas/CMakeLists.txt +101 -0
  95. package/cpp/ggml/src/ggml-blas/ggml-blas.cpp +518 -0
  96. package/cpp/ggml/src/ggml-common.h +1878 -0
  97. package/cpp/ggml/src/ggml-cpu/CMakeLists.txt +691 -0
  98. package/cpp/ggml/src/ggml-cpu/amx/amx.cpp +247 -0
  99. package/cpp/ggml/src/ggml-cpu/amx/amx.h +8 -0
  100. package/cpp/ggml/src/ggml-cpu/amx/common.h +91 -0
  101. package/cpp/ggml/src/ggml-cpu/amx/mmq.cpp +2512 -0
  102. package/cpp/ggml/src/ggml-cpu/amx/mmq.h +10 -0
  103. package/cpp/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +98 -0
  104. package/cpp/ggml/src/ggml-cpu/arch/arm/quants.c +4052 -0
  105. package/cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +4935 -0
  106. package/cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +2159 -0
  107. package/cpp/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
  108. package/cpp/ggml/src/ggml-cpu/arch/powerpc/quants.c +2305 -0
  109. package/cpp/ggml/src/ggml-cpu/arch/riscv/cpu-feats.cpp +38 -0
  110. package/cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +2726 -0
  111. package/cpp/ggml/src/ggml-cpu/arch/riscv/repack.cpp +342 -0
  112. package/cpp/ggml/src/ggml-cpu/arch/s390/cpu-feats.cpp +50 -0
  113. package/cpp/ggml/src/ggml-cpu/arch/s390/quants.c +1468 -0
  114. package/cpp/ggml/src/ggml-cpu/arch/wasm/quants.c +1221 -0
  115. package/cpp/ggml/src/ggml-cpu/arch/x86/cpu-feats.cpp +327 -0
  116. package/cpp/ggml/src/ggml-cpu/arch/x86/quants.c +3820 -0
  117. package/cpp/ggml/src/ggml-cpu/arch/x86/repack.cpp +6307 -0
  118. package/cpp/ggml/src/ggml-cpu/arch-fallback.h +313 -0
  119. package/cpp/ggml/src/ggml-cpu/binary-ops.cpp +154 -0
  120. package/cpp/ggml/src/ggml-cpu/binary-ops.h +16 -0
  121. package/cpp/ggml/src/ggml-cpu/cmake/FindSIMD.cmake +100 -0
  122. package/cpp/ggml/src/ggml-cpu/common.h +95 -0
  123. package/cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +529 -0
  124. package/cpp/ggml/src/ggml-cpu/ggml-cpu.c +3734 -0
  125. package/cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +701 -0
  126. package/cpp/ggml/src/ggml-cpu/hbm.cpp +55 -0
  127. package/cpp/ggml/src/ggml-cpu/hbm.h +8 -0
  128. package/cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +938 -0
  129. package/cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +90 -0
  130. package/cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +798 -0
  131. package/cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.h +17 -0
  132. package/cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +4033 -0
  133. package/cpp/ggml/src/ggml-cpu/llamafile/sgemm.h +25 -0
  134. package/cpp/ggml/src/ggml-cpu/ops.cpp +10978 -0
  135. package/cpp/ggml/src/ggml-cpu/ops.h +116 -0
  136. package/cpp/ggml/src/ggml-cpu/quants.c +1193 -0
  137. package/cpp/ggml/src/ggml-cpu/quants.h +97 -0
  138. package/cpp/ggml/src/ggml-cpu/repack.cpp +3316 -0
  139. package/cpp/ggml/src/ggml-cpu/repack.h +173 -0
  140. package/cpp/ggml/src/ggml-cpu/simd-gemm.h +136 -0
  141. package/cpp/ggml/src/ggml-cpu/simd-mappings.h +1279 -0
  142. package/cpp/ggml/src/ggml-cpu/spacemit/ime.cpp +1025 -0
  143. package/cpp/ggml/src/ggml-cpu/spacemit/ime.h +13 -0
  144. package/cpp/ggml/src/ggml-cpu/spacemit/ime1_kernels.cpp +3196 -0
  145. package/cpp/ggml/src/ggml-cpu/spacemit/ime_kernels.h +26 -0
  146. package/cpp/ggml/src/ggml-cpu/traits.cpp +36 -0
  147. package/cpp/ggml/src/ggml-cpu/traits.h +38 -0
  148. package/cpp/ggml/src/ggml-cpu/unary-ops.cpp +337 -0
  149. package/cpp/ggml/src/ggml-cpu/unary-ops.h +35 -0
  150. package/cpp/ggml/src/ggml-cpu/vec.cpp +629 -0
  151. package/cpp/ggml/src/ggml-cpu/vec.h +1585 -0
  152. package/cpp/ggml/src/ggml-hexagon/CMakeLists.txt +117 -0
  153. package/cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp +3232 -0
  154. package/cpp/ggml/src/ggml-hexagon/htp/CMakeLists.txt +45 -0
  155. package/cpp/ggml/src/ggml-hexagon/htp/act-ops.c +815 -0
  156. package/cpp/ggml/src/ggml-hexagon/htp/argsort-ops.c +281 -0
  157. package/cpp/ggml/src/ggml-hexagon/htp/binary-ops.c +827 -0
  158. package/cpp/ggml/src/ggml-hexagon/htp/cmake-toolchain.cmake +157 -0
  159. package/cpp/ggml/src/ggml-hexagon/htp/cpy-ops.c +251 -0
  160. package/cpp/ggml/src/ggml-hexagon/htp/flash-attn-ops.c +666 -0
  161. package/cpp/ggml/src/ggml-hexagon/htp/get-rows-ops.c +111 -0
  162. package/cpp/ggml/src/ggml-hexagon/htp/hex-dma.c +63 -0
  163. package/cpp/ggml/src/ggml-hexagon/htp/hex-dma.h +182 -0
  164. package/cpp/ggml/src/ggml-hexagon/htp/hex-dump.h +77 -0
  165. package/cpp/ggml/src/ggml-hexagon/htp/hex-fastdiv.h +37 -0
  166. package/cpp/ggml/src/ggml-hexagon/htp/hex-utils.h +51 -0
  167. package/cpp/ggml/src/ggml-hexagon/htp/htp-ctx.h +35 -0
  168. package/cpp/ggml/src/ggml-hexagon/htp/htp-msg.h +154 -0
  169. package/cpp/ggml/src/ggml-hexagon/htp/htp-ops.h +65 -0
  170. package/cpp/ggml/src/ggml-hexagon/htp/htp_iface.idl +16 -0
  171. package/cpp/ggml/src/ggml-hexagon/htp/hvx-arith.h +470 -0
  172. package/cpp/ggml/src/ggml-hexagon/htp/hvx-base.h +173 -0
  173. package/cpp/ggml/src/ggml-hexagon/htp/hvx-copy.h +245 -0
  174. package/cpp/ggml/src/ggml-hexagon/htp/hvx-div.h +116 -0
  175. package/cpp/ggml/src/ggml-hexagon/htp/hvx-dump.h +129 -0
  176. package/cpp/ggml/src/ggml-hexagon/htp/hvx-exp.h +215 -0
  177. package/cpp/ggml/src/ggml-hexagon/htp/hvx-floor.h +100 -0
  178. package/cpp/ggml/src/ggml-hexagon/htp/hvx-inverse.h +176 -0
  179. package/cpp/ggml/src/ggml-hexagon/htp/hvx-reduce.h +266 -0
  180. package/cpp/ggml/src/ggml-hexagon/htp/hvx-scale.h +133 -0
  181. package/cpp/ggml/src/ggml-hexagon/htp/hvx-sigmoid.h +141 -0
  182. package/cpp/ggml/src/ggml-hexagon/htp/hvx-sqrt.h +126 -0
  183. package/cpp/ggml/src/ggml-hexagon/htp/hvx-types.h +36 -0
  184. package/cpp/ggml/src/ggml-hexagon/htp/hvx-utils.h +18 -0
  185. package/cpp/ggml/src/ggml-hexagon/htp/main.c +1150 -0
  186. package/cpp/ggml/src/ggml-hexagon/htp/matmul-ops.c +2595 -0
  187. package/cpp/ggml/src/ggml-hexagon/htp/rope-ops.c +498 -0
  188. package/cpp/ggml/src/ggml-hexagon/htp/set-rows-ops.c +167 -0
  189. package/cpp/ggml/src/ggml-hexagon/htp/softmax-ops.c +421 -0
  190. package/cpp/ggml/src/ggml-hexagon/htp/sum-rows-ops.c +130 -0
  191. package/cpp/ggml/src/ggml-hexagon/htp/unary-ops.c +384 -0
  192. package/cpp/ggml/src/ggml-hexagon/htp/worker-pool.c +293 -0
  193. package/cpp/ggml/src/ggml-hexagon/htp/worker-pool.h +57 -0
  194. package/cpp/ggml/src/ggml-hexagon/htp-drv.cpp +418 -0
  195. package/cpp/ggml/src/ggml-hexagon/htp-drv.h +121 -0
  196. package/cpp/ggml/src/ggml-hexagon/libdl.h +79 -0
  197. package/cpp/ggml/src/ggml-hexagon/libggml-htp.inf +38 -0
  198. package/cpp/ggml/src/ggml-hexagon/op-desc.h +153 -0
  199. package/cpp/ggml/src/ggml-impl.h +724 -0
  200. package/cpp/ggml/src/ggml-metal/CMakeLists.txt +124 -0
  201. package/cpp/ggml/src/ggml-metal/ggml-metal-common.cpp +457 -0
  202. package/cpp/ggml/src/ggml-metal/ggml-metal-common.h +52 -0
  203. package/cpp/ggml/src/ggml-metal/ggml-metal-context.h +41 -0
  204. package/cpp/ggml/src/ggml-metal/ggml-metal-context.m +702 -0
  205. package/cpp/ggml/src/ggml-metal/ggml-metal-device.cpp +1890 -0
  206. package/cpp/ggml/src/ggml-metal/ggml-metal-device.h +290 -0
  207. package/cpp/ggml/src/ggml-metal/ggml-metal-device.m +1749 -0
  208. package/cpp/ggml/src/ggml-metal/ggml-metal-impl.h +1054 -0
  209. package/cpp/ggml/src/ggml-metal/ggml-metal-ops.cpp +4370 -0
  210. package/cpp/ggml/src/ggml-metal/ggml-metal-ops.h +94 -0
  211. package/cpp/ggml/src/ggml-metal/ggml-metal.cpp +937 -0
  212. package/cpp/ggml/src/ggml-metal/ggml-metal.metal +9819 -0
  213. package/cpp/ggml/src/ggml-musa/CMakeLists.txt +125 -0
  214. package/cpp/ggml/src/ggml-musa/mudnn.cu +112 -0
  215. package/cpp/ggml/src/ggml-musa/mudnn.cuh +12 -0
  216. package/cpp/ggml/src/ggml-opencl/CMakeLists.txt +150 -0
  217. package/cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +11553 -0
  218. package/cpp/ggml/src/ggml-opencl/kernels/add.cl +190 -0
  219. package/cpp/ggml/src/ggml-opencl/kernels/add_id.cl +42 -0
  220. package/cpp/ggml/src/ggml-opencl/kernels/argsort.cl +86 -0
  221. package/cpp/ggml/src/ggml-opencl/kernels/clamp.cl +20 -0
  222. package/cpp/ggml/src/ggml-opencl/kernels/concat.cl +51 -0
  223. package/cpp/ggml/src/ggml-opencl/kernels/conv2d.cl +185 -0
  224. package/cpp/ggml/src/ggml-opencl/kernels/conv2d_f16_f32.cl +176 -0
  225. package/cpp/ggml/src/ggml-opencl/kernels/cpy.cl +184 -0
  226. package/cpp/ggml/src/ggml-opencl/kernels/cvt.cl +417 -0
  227. package/cpp/ggml/src/ggml-opencl/kernels/diag_mask_inf.cl +58 -0
  228. package/cpp/ggml/src/ggml-opencl/kernels/div.cl +138 -0
  229. package/cpp/ggml/src/ggml-opencl/kernels/embed_kernel.py +26 -0
  230. package/cpp/ggml/src/ggml-opencl/kernels/expm1.cl +113 -0
  231. package/cpp/ggml/src/ggml-opencl/kernels/fill.cl +17 -0
  232. package/cpp/ggml/src/ggml-opencl/kernels/flash_attn_f16.cl +370 -0
  233. package/cpp/ggml/src/ggml-opencl/kernels/flash_attn_f32.cl +371 -0
  234. package/cpp/ggml/src/ggml-opencl/kernels/flash_attn_f32_f16.cl +373 -0
  235. package/cpp/ggml/src/ggml-opencl/kernels/gelu.cl +89 -0
  236. package/cpp/ggml/src/ggml-opencl/kernels/gemm_moe_mxfp4_f32.cl +162 -0
  237. package/cpp/ggml/src/ggml-opencl/kernels/gemv_moe_mxfp4_f32.cl +156 -0
  238. package/cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle.cl +268 -0
  239. package/cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general.cl +274 -0
  240. package/cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general_q8_0_f32.cl +195 -0
  241. package/cpp/ggml/src/ggml-opencl/kernels/get_rows.cl +187 -0
  242. package/cpp/ggml/src/ggml-opencl/kernels/glu.cl +378 -0
  243. package/cpp/ggml/src/ggml-opencl/kernels/group_norm.cl +121 -0
  244. package/cpp/ggml/src/ggml-opencl/kernels/im2col_f16.cl +57 -0
  245. package/cpp/ggml/src/ggml-opencl/kernels/im2col_f32.cl +57 -0
  246. package/cpp/ggml/src/ggml-opencl/kernels/mean.cl +140 -0
  247. package/cpp/ggml/src/ggml-opencl/kernels/mul.cl +152 -0
  248. package/cpp/ggml/src/ggml-opencl/kernels/mul_mat_Ab_Bi_8x4.cl +139 -0
  249. package/cpp/ggml/src/ggml-opencl/kernels/mul_mat_f16_f32.cl +130 -0
  250. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_kq_kqv.cl +273 -0
  251. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_l4_lm.cl +146 -0
  252. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_f32_f32_l4_lm.cl +147 -0
  253. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q4_0_f32_l4_lm.cl +163 -0
  254. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q4_1_f32_l4_lm.cl +165 -0
  255. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q6_k_f32_l4_lm.cl +158 -0
  256. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_8x4.cl +129 -0
  257. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_l4_lm.cl +154 -0
  258. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f16.cl +118 -0
  259. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32.cl +118 -0
  260. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_1row.cl +94 -0
  261. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_l4.cl +84 -0
  262. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f32_f32.cl +118 -0
  263. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32.cl +189 -0
  264. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32_flat.cl +176 -0
  265. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q4_0_f32_8x_flat.cl +283 -0
  266. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32.cl +140 -0
  267. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32_flat.cl +222 -0
  268. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32.cl +144 -0
  269. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32_flat.cl +167 -0
  270. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32.cl +192 -0
  271. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_16x_flat.cl +307 -0
  272. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_8x_flat.cl +265 -0
  273. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_8x_flat.cl +272 -0
  274. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_v.cl +254 -0
  275. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32.cl +219 -0
  276. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32_flat.cl +229 -0
  277. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_k_f32.cl +180 -0
  278. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q6_k_f32.cl +194 -0
  279. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q6_k_f32_flat.cl +194 -0
  280. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32.cl +125 -0
  281. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32_flat.cl +202 -0
  282. package/cpp/ggml/src/ggml-opencl/kernels/norm.cl +161 -0
  283. package/cpp/ggml/src/ggml-opencl/kernels/pad.cl +39 -0
  284. package/cpp/ggml/src/ggml-opencl/kernels/relu.cl +16 -0
  285. package/cpp/ggml/src/ggml-opencl/kernels/repeat.cl +38 -0
  286. package/cpp/ggml/src/ggml-opencl/kernels/rms_norm.cl +190 -0
  287. package/cpp/ggml/src/ggml-opencl/kernels/rope.cl +747 -0
  288. package/cpp/ggml/src/ggml-opencl/kernels/scale.cl +27 -0
  289. package/cpp/ggml/src/ggml-opencl/kernels/set_rows.cl +208 -0
  290. package/cpp/ggml/src/ggml-opencl/kernels/sigmoid.cl +29 -0
  291. package/cpp/ggml/src/ggml-opencl/kernels/silu.cl +30 -0
  292. package/cpp/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +108 -0
  293. package/cpp/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +108 -0
  294. package/cpp/ggml/src/ggml-opencl/kernels/softmax_f16.cl +107 -0
  295. package/cpp/ggml/src/ggml-opencl/kernels/softmax_f32.cl +107 -0
  296. package/cpp/ggml/src/ggml-opencl/kernels/softplus.cl +116 -0
  297. package/cpp/ggml/src/ggml-opencl/kernels/solve_tri.cl +51 -0
  298. package/cpp/ggml/src/ggml-opencl/kernels/sqr.cl +53 -0
  299. package/cpp/ggml/src/ggml-opencl/kernels/sqrt.cl +53 -0
  300. package/cpp/ggml/src/ggml-opencl/kernels/ssm_conv.cl +77 -0
  301. package/cpp/ggml/src/ggml-opencl/kernels/sub.cl +138 -0
  302. package/cpp/ggml/src/ggml-opencl/kernels/sum_rows.cl +140 -0
  303. package/cpp/ggml/src/ggml-opencl/kernels/tanh.cl +109 -0
  304. package/cpp/ggml/src/ggml-opencl/kernels/transpose.cl +117 -0
  305. package/cpp/ggml/src/ggml-opencl/kernels/tri.cl +32 -0
  306. package/cpp/ggml/src/ggml-opencl/kernels/tsembd.cl +48 -0
  307. package/cpp/ggml/src/ggml-opencl/kernels/upscale.cl +120 -0
  308. package/cpp/ggml/src/ggml-opt.cpp +1093 -0
  309. package/cpp/ggml/src/ggml-quants.c +5325 -0
  310. package/cpp/ggml/src/ggml-quants.h +106 -0
  311. package/cpp/ggml/src/ggml-rpc/CMakeLists.txt +9 -0
  312. package/cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +2118 -0
  313. package/cpp/ggml/src/ggml-threading.cpp +12 -0
  314. package/cpp/ggml/src/ggml-threading.h +14 -0
  315. package/cpp/ggml/src/ggml-virtgpu/CMakeLists.txt +70 -0
  316. package/cpp/ggml/src/ggml-virtgpu/apir_cs_ggml-rpc-front.cpp +87 -0
  317. package/cpp/ggml/src/ggml-virtgpu/backend/CMakeLists.txt +21 -0
  318. package/cpp/ggml/src/ggml-virtgpu/backend/apir_cs_ggml-rpc-back.cpp +115 -0
  319. package/cpp/ggml/src/ggml-virtgpu/backend/backend-convert.h +13 -0
  320. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-backend.cpp +102 -0
  321. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer-type.cpp +105 -0
  322. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer.cpp +179 -0
  323. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-device.cpp +148 -0
  324. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched.cpp +51 -0
  325. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched.gen.h +73 -0
  326. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched.h +27 -0
  327. package/cpp/ggml/src/ggml-virtgpu/backend/backend-virgl-apir.h +32 -0
  328. package/cpp/ggml/src/ggml-virtgpu/backend/backend.cpp +144 -0
  329. package/cpp/ggml/src/ggml-virtgpu/backend/shared/api_remoting.h +95 -0
  330. package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_backend.gen.h +94 -0
  331. package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_backend.h +50 -0
  332. package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_cs.h +378 -0
  333. package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h +232 -0
  334. package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_cs_rpc.h +58 -0
  335. package/cpp/ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp +81 -0
  336. package/cpp/ggml/src/ggml-virtgpu/ggml-backend-buffer.cpp +119 -0
  337. package/cpp/ggml/src/ggml-virtgpu/ggml-backend-device.cpp +158 -0
  338. package/cpp/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp +213 -0
  339. package/cpp/ggml/src/ggml-virtgpu/ggml-backend.cpp +69 -0
  340. package/cpp/ggml/src/ggml-virtgpu/ggml-remoting.h +71 -0
  341. package/cpp/ggml/src/ggml-virtgpu/ggmlremoting_functions.yaml +166 -0
  342. package/cpp/ggml/src/ggml-virtgpu/include/apir_hw.h +9 -0
  343. package/cpp/ggml/src/ggml-virtgpu/regenerate_remoting.py +333 -0
  344. package/cpp/ggml/src/ggml-virtgpu/virtgpu-apir.h +15 -0
  345. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-backend.cpp +58 -0
  346. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp +110 -0
  347. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-buffer.cpp +173 -0
  348. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-device.cpp +192 -0
  349. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-impl.h +36 -0
  350. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward.gen.h +53 -0
  351. package/cpp/ggml/src/ggml-virtgpu/virtgpu-shm.cpp +98 -0
  352. package/cpp/ggml/src/ggml-virtgpu/virtgpu-shm.h +23 -0
  353. package/cpp/ggml/src/ggml-virtgpu/virtgpu-utils.cpp +179 -0
  354. package/cpp/ggml/src/ggml-virtgpu/virtgpu-utils.h +86 -0
  355. package/cpp/ggml/src/ggml-virtgpu/virtgpu.cpp +544 -0
  356. package/cpp/ggml/src/ggml-virtgpu/virtgpu.h +117 -0
  357. package/cpp/ggml/src/ggml-webgpu/CMakeLists.txt +80 -0
  358. package/cpp/ggml/src/ggml-webgpu/ggml-webgpu-shader-lib.hpp +1231 -0
  359. package/cpp/ggml/src/ggml-webgpu/ggml-webgpu.cpp +3150 -0
  360. package/cpp/ggml/src/ggml-webgpu/pre_wgsl.hpp +778 -0
  361. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/argmax.wgsl +72 -0
  362. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/argsort.wgsl +106 -0
  363. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/argsort_merge.wgsl +134 -0
  364. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/binary.wgsl +107 -0
  365. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/common_decls.tmpl +923 -0
  366. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/cpy.tmpl.wgsl +107 -0
  367. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/cumsum.wgsl +66 -0
  368. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +182 -0
  369. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn.wgsl +636 -0
  370. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/get_rows.wgsl +668 -0
  371. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/glu.tmpl.wgsl +323 -0
  372. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/memset.wgsl +40 -0
  373. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.wgsl +713 -0
  374. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_decls.tmpl +103 -0
  375. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_reg_tile.wgsl +138 -0
  376. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_subgroup_matrix.wgsl +188 -0
  377. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec.wgsl +194 -0
  378. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/pad.wgsl +86 -0
  379. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm.wgsl +123 -0
  380. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/rope.tmpl.wgsl +295 -0
  381. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/scale.wgsl +63 -0
  382. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.wgsl +109 -0
  383. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/soft_max.tmpl.wgsl +345 -0
  384. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/sum_rows.wgsl +55 -0
  385. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/unary.wgsl +193 -0
  386. package/cpp/ggml/src/ggml-zdnn/CMakeLists.txt +36 -0
  387. package/cpp/ggml/src/ggml-zdnn/common.hpp +59 -0
  388. package/cpp/ggml/src/ggml-zdnn/ggml-zdnn.cpp +633 -0
  389. package/cpp/ggml/src/ggml-zdnn/mmf.cpp +80 -0
  390. package/cpp/ggml/src/ggml-zdnn/mmf.hpp +12 -0
  391. package/cpp/ggml/src/ggml-zdnn/utils.cpp +79 -0
  392. package/cpp/ggml/src/ggml-zdnn/utils.hpp +19 -0
  393. package/cpp/ggml/src/ggml-zendnn/CMakeLists.txt +92 -0
  394. package/cpp/ggml/src/ggml-zendnn/ggml-zendnn.cpp +469 -0
  395. package/cpp/ggml/src/ggml.c +7669 -0
  396. package/cpp/ggml/src/ggml.cpp +26 -0
  397. package/cpp/ggml/src/gguf.cpp +1699 -0
  398. package/cpp/include/llama-cpp.h +32 -0
  399. package/cpp/include/llama.h +1568 -0
  400. package/cpp/mtmd/CMakeLists.txt +98 -0
  401. package/cpp/mtmd/README.md +63 -0
  402. package/cpp/mtmd/clip-graph.h +117 -0
  403. package/cpp/mtmd/clip-impl.h +586 -0
  404. package/cpp/mtmd/clip-model.h +390 -0
  405. package/cpp/mtmd/clip.cpp +4154 -0
  406. package/cpp/mtmd/clip.h +121 -0
  407. package/cpp/mtmd/deprecation-warning.cpp +22 -0
  408. package/cpp/mtmd/legacy-models/convert_image_encoder_to_gguf.py +412 -0
  409. package/cpp/mtmd/legacy-models/glmedge-convert-image-encoder-to-gguf.py +280 -0
  410. package/cpp/mtmd/legacy-models/glmedge-surgery.py +33 -0
  411. package/cpp/mtmd/legacy-models/llava_surgery.py +38 -0
  412. package/cpp/mtmd/legacy-models/llava_surgery_v2.py +180 -0
  413. package/cpp/mtmd/legacy-models/minicpmv-convert-image-encoder-to-gguf.py +892 -0
  414. package/cpp/mtmd/legacy-models/minicpmv-surgery.py +47 -0
  415. package/cpp/mtmd/models/cogvlm.cpp +98 -0
  416. package/cpp/mtmd/models/conformer.cpp +216 -0
  417. package/cpp/mtmd/models/glm4v.cpp +122 -0
  418. package/cpp/mtmd/models/internvl.cpp +69 -0
  419. package/cpp/mtmd/models/kimik25.cpp +101 -0
  420. package/cpp/mtmd/models/kimivl.cpp +63 -0
  421. package/cpp/mtmd/models/llama4.cpp +96 -0
  422. package/cpp/mtmd/models/llava.cpp +374 -0
  423. package/cpp/mtmd/models/minicpmv.cpp +114 -0
  424. package/cpp/mtmd/models/mobilenetv5.cpp +451 -0
  425. package/cpp/mtmd/models/models.h +128 -0
  426. package/cpp/mtmd/models/nemotron-v2-vl.cpp +35 -0
  427. package/cpp/mtmd/models/paddleocr.cpp +52 -0
  428. package/cpp/mtmd/models/pixtral.cpp +86 -0
  429. package/cpp/mtmd/models/qwen2vl.cpp +183 -0
  430. package/cpp/mtmd/models/qwen3vl.cpp +193 -0
  431. package/cpp/mtmd/models/siglip.cpp +86 -0
  432. package/cpp/mtmd/models/whisper-enc.cpp +115 -0
  433. package/cpp/mtmd/models/youtuvl.cpp +179 -0
  434. package/cpp/mtmd/mtmd-audio.cpp +730 -0
  435. package/cpp/mtmd/mtmd-audio.h +113 -0
  436. package/cpp/mtmd/mtmd-cli.cpp +437 -0
  437. package/cpp/mtmd/mtmd-helper.cpp +521 -0
  438. package/cpp/mtmd/mtmd-helper.h +96 -0
  439. package/cpp/mtmd/mtmd.cpp +1156 -0
  440. package/cpp/mtmd/mtmd.h +319 -0
  441. package/cpp/mtmd/requirements.txt +5 -0
  442. package/cpp/mtmd/test-1.jpeg +0 -0
  443. package/cpp/mtmd/test-2.mp3 +0 -0
  444. package/cpp/mtmd/tests.sh +192 -0
  445. package/cpp/src/CMakeLists.txt +169 -0
  446. package/cpp/src/llama-adapter.cpp +488 -0
  447. package/cpp/src/llama-adapter.h +89 -0
  448. package/cpp/src/llama-arch.cpp +2855 -0
  449. package/cpp/src/llama-arch.h +619 -0
  450. package/cpp/src/llama-batch.cpp +917 -0
  451. package/cpp/src/llama-batch.h +173 -0
  452. package/cpp/src/llama-chat.cpp +896 -0
  453. package/cpp/src/llama-chat.h +71 -0
  454. package/cpp/src/llama-context.cpp +3512 -0
  455. package/cpp/src/llama-context.h +359 -0
  456. package/cpp/src/llama-cparams.cpp +5 -0
  457. package/cpp/src/llama-cparams.h +44 -0
  458. package/cpp/src/llama-grammar.cpp +1464 -0
  459. package/cpp/src/llama-grammar.h +194 -0
  460. package/cpp/src/llama-graph.cpp +2685 -0
  461. package/cpp/src/llama-graph.h +1026 -0
  462. package/cpp/src/llama-hparams.cpp +234 -0
  463. package/cpp/src/llama-hparams.h +339 -0
  464. package/cpp/src/llama-impl.cpp +171 -0
  465. package/cpp/src/llama-impl.h +73 -0
  466. package/cpp/src/llama-io.cpp +15 -0
  467. package/cpp/src/llama-io.h +35 -0
  468. package/cpp/src/llama-kv-cache-iswa.cpp +330 -0
  469. package/cpp/src/llama-kv-cache-iswa.h +137 -0
  470. package/cpp/src/llama-kv-cache.cpp +2271 -0
  471. package/cpp/src/llama-kv-cache.h +388 -0
  472. package/cpp/src/llama-kv-cells.h +533 -0
  473. package/cpp/src/llama-memory-hybrid-iswa.cpp +275 -0
  474. package/cpp/src/llama-memory-hybrid-iswa.h +140 -0
  475. package/cpp/src/llama-memory-hybrid.cpp +268 -0
  476. package/cpp/src/llama-memory-hybrid.h +139 -0
  477. package/cpp/src/llama-memory-recurrent.cpp +1165 -0
  478. package/cpp/src/llama-memory-recurrent.h +182 -0
  479. package/cpp/src/llama-memory.cpp +59 -0
  480. package/cpp/src/llama-memory.h +122 -0
  481. package/cpp/src/llama-mmap.cpp +785 -0
  482. package/cpp/src/llama-mmap.h +92 -0
  483. package/cpp/src/llama-model-loader.cpp +1414 -0
  484. package/cpp/src/llama-model-loader.h +203 -0
  485. package/cpp/src/llama-model-saver.cpp +286 -0
  486. package/cpp/src/llama-model-saver.h +37 -0
  487. package/cpp/src/llama-model.cpp +9253 -0
  488. package/cpp/src/llama-model.h +576 -0
  489. package/cpp/src/llama-quant.cpp +1119 -0
  490. package/cpp/src/llama-quant.h +1 -0
  491. package/cpp/src/llama-sampler.cpp +3885 -0
  492. package/cpp/src/llama-sampler.h +42 -0
  493. package/cpp/src/llama-vocab.cpp +3970 -0
  494. package/cpp/src/llama-vocab.h +187 -0
  495. package/cpp/src/llama.cpp +1313 -0
  496. package/cpp/src/models/afmoe.cpp +191 -0
  497. package/cpp/src/models/apertus.cpp +125 -0
  498. package/cpp/src/models/arcee.cpp +135 -0
  499. package/cpp/src/models/arctic.cpp +138 -0
  500. package/cpp/src/models/arwkv7.cpp +86 -0
  501. package/cpp/src/models/baichuan.cpp +122 -0
  502. package/cpp/src/models/bailingmoe.cpp +144 -0
  503. package/cpp/src/models/bailingmoe2.cpp +135 -0
  504. package/cpp/src/models/bert.cpp +178 -0
  505. package/cpp/src/models/bitnet.cpp +160 -0
  506. package/cpp/src/models/bloom.cpp +101 -0
  507. package/cpp/src/models/chameleon.cpp +178 -0
  508. package/cpp/src/models/chatglm.cpp +132 -0
  509. package/cpp/src/models/codeshell.cpp +111 -0
  510. package/cpp/src/models/cogvlm.cpp +102 -0
  511. package/cpp/src/models/cohere2-iswa.cpp +134 -0
  512. package/cpp/src/models/command-r.cpp +122 -0
  513. package/cpp/src/models/dbrx.cpp +123 -0
  514. package/cpp/src/models/deci.cpp +135 -0
  515. package/cpp/src/models/deepseek.cpp +144 -0
  516. package/cpp/src/models/deepseek2.cpp +262 -0
  517. package/cpp/src/models/delta-net-base.cpp +376 -0
  518. package/cpp/src/models/dots1.cpp +134 -0
  519. package/cpp/src/models/dream.cpp +105 -0
  520. package/cpp/src/models/ernie4-5-moe.cpp +150 -0
  521. package/cpp/src/models/ernie4-5.cpp +110 -0
  522. package/cpp/src/models/eurobert.cpp +97 -0
  523. package/cpp/src/models/exaone-moe.cpp +146 -0
  524. package/cpp/src/models/exaone.cpp +114 -0
  525. package/cpp/src/models/exaone4.cpp +123 -0
  526. package/cpp/src/models/falcon-h1.cpp +111 -0
  527. package/cpp/src/models/falcon.cpp +120 -0
  528. package/cpp/src/models/gemma-embedding.cpp +116 -0
  529. package/cpp/src/models/gemma.cpp +112 -0
  530. package/cpp/src/models/gemma2-iswa.cpp +128 -0
  531. package/cpp/src/models/gemma3.cpp +155 -0
  532. package/cpp/src/models/gemma3n-iswa.cpp +384 -0
  533. package/cpp/src/models/glm4-moe.cpp +170 -0
  534. package/cpp/src/models/glm4.cpp +157 -0
  535. package/cpp/src/models/gpt2.cpp +105 -0
  536. package/cpp/src/models/gptneox.cpp +144 -0
  537. package/cpp/src/models/granite-hybrid.cpp +196 -0
  538. package/cpp/src/models/granite.cpp +211 -0
  539. package/cpp/src/models/grok.cpp +159 -0
  540. package/cpp/src/models/grovemoe.cpp +141 -0
  541. package/cpp/src/models/hunyuan-dense.cpp +132 -0
  542. package/cpp/src/models/hunyuan-moe.cpp +154 -0
  543. package/cpp/src/models/internlm2.cpp +120 -0
  544. package/cpp/src/models/jais.cpp +86 -0
  545. package/cpp/src/models/jais2.cpp +123 -0
  546. package/cpp/src/models/jamba.cpp +106 -0
  547. package/cpp/src/models/kimi-linear.cpp +392 -0
  548. package/cpp/src/models/lfm2.cpp +190 -0
  549. package/cpp/src/models/llada-moe.cpp +122 -0
  550. package/cpp/src/models/llada.cpp +99 -0
  551. package/cpp/src/models/llama-iswa.cpp +178 -0
  552. package/cpp/src/models/llama.cpp +168 -0
  553. package/cpp/src/models/maincoder.cpp +117 -0
  554. package/cpp/src/models/mamba-base.cpp +285 -0
  555. package/cpp/src/models/mamba.cpp +54 -0
  556. package/cpp/src/models/mimo2-iswa.cpp +123 -0
  557. package/cpp/src/models/minicpm3.cpp +200 -0
  558. package/cpp/src/models/minimax-m2.cpp +124 -0
  559. package/cpp/src/models/mistral3.cpp +160 -0
  560. package/cpp/src/models/models.h +684 -0
  561. package/cpp/src/models/modern-bert.cpp +109 -0
  562. package/cpp/src/models/mpt.cpp +126 -0
  563. package/cpp/src/models/nemotron-h.cpp +148 -0
  564. package/cpp/src/models/nemotron.cpp +122 -0
  565. package/cpp/src/models/neo-bert.cpp +104 -0
  566. package/cpp/src/models/olmo.cpp +121 -0
  567. package/cpp/src/models/olmo2.cpp +150 -0
  568. package/cpp/src/models/olmoe.cpp +124 -0
  569. package/cpp/src/models/openai-moe-iswa.cpp +127 -0
  570. package/cpp/src/models/openelm.cpp +124 -0
  571. package/cpp/src/models/orion.cpp +123 -0
  572. package/cpp/src/models/paddleocr.cpp +122 -0
  573. package/cpp/src/models/pangu-embedded.cpp +121 -0
  574. package/cpp/src/models/phi2.cpp +121 -0
  575. package/cpp/src/models/phi3.cpp +152 -0
  576. package/cpp/src/models/plamo.cpp +110 -0
  577. package/cpp/src/models/plamo2.cpp +318 -0
  578. package/cpp/src/models/plamo3.cpp +128 -0
  579. package/cpp/src/models/plm.cpp +169 -0
  580. package/cpp/src/models/qwen.cpp +108 -0
  581. package/cpp/src/models/qwen2.cpp +126 -0
  582. package/cpp/src/models/qwen2moe.cpp +151 -0
  583. package/cpp/src/models/qwen2vl.cpp +117 -0
  584. package/cpp/src/models/qwen3.cpp +117 -0
  585. package/cpp/src/models/qwen35.cpp +386 -0
  586. package/cpp/src/models/qwen35moe.cpp +420 -0
  587. package/cpp/src/models/qwen3moe.cpp +124 -0
  588. package/cpp/src/models/qwen3next.cpp +525 -0
  589. package/cpp/src/models/qwen3vl-moe.cpp +140 -0
  590. package/cpp/src/models/qwen3vl.cpp +132 -0
  591. package/cpp/src/models/refact.cpp +94 -0
  592. package/cpp/src/models/rnd1.cpp +126 -0
  593. package/cpp/src/models/rwkv6-base.cpp +164 -0
  594. package/cpp/src/models/rwkv6.cpp +94 -0
  595. package/cpp/src/models/rwkv6qwen2.cpp +86 -0
  596. package/cpp/src/models/rwkv7-base.cpp +137 -0
  597. package/cpp/src/models/rwkv7.cpp +90 -0
  598. package/cpp/src/models/seed-oss.cpp +124 -0
  599. package/cpp/src/models/smallthinker.cpp +126 -0
  600. package/cpp/src/models/smollm3.cpp +128 -0
  601. package/cpp/src/models/stablelm.cpp +146 -0
  602. package/cpp/src/models/starcoder.cpp +100 -0
  603. package/cpp/src/models/starcoder2.cpp +121 -0
  604. package/cpp/src/models/step35-iswa.cpp +168 -0
  605. package/cpp/src/models/t5-dec.cpp +166 -0
  606. package/cpp/src/models/t5-enc.cpp +96 -0
  607. package/cpp/src/models/wavtokenizer-dec.cpp +149 -0
  608. package/cpp/src/models/xverse.cpp +108 -0
  609. package/cpp/src/unicode-data.cpp +7034 -0
  610. package/cpp/src/unicode-data.h +20 -0
  611. package/cpp/src/unicode.cpp +1103 -0
  612. package/cpp/src/unicode.h +111 -0
  613. package/cpp/vendor/nlohmann/json.hpp +25526 -0
  614. package/cpp/vendor/nlohmann/json_fwd.hpp +187 -0
  615. package/cpp/vendor/stb/stb_image.h +7988 -0
  616. package/ios/LocalLLM-Bridging-Header.h +2 -0
  617. package/ios/LocalLLM.h +5 -0
  618. package/ios/LocalLLM.mm +1267 -0
  619. package/local-llm-rn.podspec +60 -0
  620. package/package.json +35 -0
  621. package/src/NativeLocalLLM.ts +73 -0
  622. package/src/device.ts +50 -0
  623. package/src/download-adapter.ts +17 -0
  624. package/src/index.ts +21 -0
  625. package/src/native-bridge.ts +142 -0
  626. package/src/rn-downloader.ts +37 -0
@@ -0,0 +1,785 @@
1
+ #include "llama-mmap.h"
2
+
3
+ #include "llama-impl.h"
4
+
5
+ #include "ggml.h"
6
+
7
+ #include <cstring>
8
+ #include <climits>
9
+ #include <stdexcept>
10
+ #include <cerrno>
11
+ #include <algorithm>
12
+
13
+ #ifdef __has_include
14
+ #if __has_include(<unistd.h>)
15
+ #include <unistd.h>
16
+ #include <fcntl.h>
17
+ #include <sys/stat.h>
18
+ #if defined(_POSIX_MAPPED_FILES)
19
+ #include <sys/mman.h>
20
+ #endif
21
+ #if defined(_POSIX_MEMLOCK_RANGE)
22
+ #include <sys/resource.h>
23
+ #endif
24
+ #endif
25
+ #endif
26
+
27
+ #if defined(_WIN32)
28
+ #define WIN32_LEAN_AND_MEAN
29
+ #ifndef NOMINMAX
30
+ #define NOMINMAX
31
+ #endif
32
+ #include <windows.h>
33
+ #ifndef PATH_MAX
34
+ #define PATH_MAX MAX_PATH
35
+ #endif
36
+ #include <io.h>
37
+ #endif
38
+
39
+ #if defined(__APPLE__)
40
+ #include <TargetConditionals.h>
41
+ #endif
42
+
43
+ // TODO: consider moving to llama-impl.h if needed in more places
44
+ #if defined(_WIN32)
45
+ static std::string llama_format_win_err(DWORD err) {
46
+ LPSTR buf;
47
+ size_t size = FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS,
48
+ NULL, err, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), (LPSTR)&buf, 0, NULL);
49
+ if (!size) {
50
+ return "FormatMessageA failed";
51
+ }
52
+ std::string ret(buf, size);
53
+ LocalFree(buf);
54
+ return ret;
55
+ }
56
+ #endif
57
+
58
+ // llama_file
59
+
60
+ struct llama_file::impl {
61
+ #if defined(_WIN32)
62
+ HANDLE fp_win32;
63
+ std::string GetErrorMessageWin32(DWORD error_code) const {
64
+ std::string ret;
65
+ LPSTR lpMsgBuf = NULL;
66
+ DWORD bufLen = FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS,
67
+ NULL, error_code, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), (LPSTR)&lpMsgBuf, 0, NULL);
68
+ if (!bufLen) {
69
+ ret = format("Win32 error code: %lx", error_code);
70
+ } else {
71
+ ret = lpMsgBuf;
72
+ LocalFree(lpMsgBuf);
73
+ }
74
+
75
+ return ret;
76
+ }
77
+
78
+ impl(const char * fname, const char * mode, [[maybe_unused]] const bool use_direct_io = false) {
79
+ fp = ggml_fopen(fname, mode);
80
+ if (fp == NULL) {
81
+ throw std::runtime_error(format("failed to open %s: %s", fname, strerror(errno)));
82
+ }
83
+ fp_win32 = (HANDLE) _get_osfhandle(_fileno(fp));
84
+ seek(0, SEEK_END);
85
+ size = tell();
86
+ seek(0, SEEK_SET);
87
+ }
88
+
89
+ size_t tell() const {
90
+ LARGE_INTEGER li;
91
+ li.QuadPart = 0;
92
+ BOOL ret = SetFilePointerEx(fp_win32, li, &li, FILE_CURRENT);
93
+ if (!ret) {
94
+ throw std::runtime_error(format("read error: %s", GetErrorMessageWin32(GetLastError()).c_str()));
95
+ }
96
+
97
+ return li.QuadPart;
98
+ }
99
+
100
+ void seek(size_t offset, int whence) const {
101
+ static_assert(SEEK_SET == FILE_BEGIN, "SEEK_SET != FILE_BEGIN");
102
+ static_assert(SEEK_CUR == FILE_CURRENT, "SEEK_CUR != FILE_CURRENT");
103
+ static_assert(SEEK_END == FILE_END, "SEEK_END != FILE_END");
104
+
105
+ LARGE_INTEGER li;
106
+ li.QuadPart = offset;
107
+ BOOL ret = SetFilePointerEx(fp_win32, li, NULL, whence);
108
+ if (!ret) {
109
+ throw std::runtime_error(format("read error: %s", GetErrorMessageWin32(GetLastError()).c_str()));
110
+ }
111
+ }
112
+
113
+ void read_raw(void * ptr, size_t len) {
114
+ size_t bytes_read = 0;
115
+ while (bytes_read < len) {
116
+ size_t chunk_size = std::min<size_t>(len - bytes_read, 64*1024*1024);
117
+ DWORD chunk_read = 0;
118
+ BOOL result = ReadFile(fp_win32, reinterpret_cast<char*>(ptr) + bytes_read, chunk_size, &chunk_read, NULL);
119
+ if (!result) {
120
+ throw std::runtime_error(format("read error: %s", GetErrorMessageWin32(GetLastError()).c_str()));
121
+ }
122
+ if (chunk_read < chunk_size || chunk_read == 0) {
123
+ throw std::runtime_error("unexpectedly reached end of file");
124
+ }
125
+
126
+ bytes_read += chunk_read;
127
+ }
128
+ }
129
+
130
+ uint32_t read_u32() {
131
+ uint32_t val;
132
+ read_raw(&val, sizeof(val));
133
+ return val;
134
+ }
135
+
136
+ void write_raw(const void * ptr, size_t len) const {
137
+ size_t bytes_written = 0;
138
+ while (bytes_written < len) {
139
+ size_t chunk_size = std::min<size_t>(len - bytes_written, 64*1024*1024);
140
+ DWORD chunk_written = 0;
141
+ BOOL result = WriteFile(fp_win32, reinterpret_cast<char const*>(ptr) + bytes_written, chunk_size, &chunk_written, NULL);
142
+ if (!result) {
143
+ throw std::runtime_error(format("write error: %s", GetErrorMessageWin32(GetLastError()).c_str()));
144
+ }
145
+ if (chunk_written < chunk_size || chunk_written == 0) {
146
+ throw std::runtime_error("unexpectedly failed to write bytes");
147
+ }
148
+
149
+ bytes_written += chunk_written;
150
+ }
151
+ }
152
+
153
+ void write_u32(uint32_t val) const {
154
+ write_raw(&val, sizeof(val));
155
+ }
156
+
157
+ bool has_direct_io() const {
158
+ return true;
159
+ }
160
+
161
+ ~impl() {
162
+ if (fp) {
163
+ std::fclose(fp);
164
+ }
165
+ }
166
+ #else
167
+ impl(const char * fname, const char * mode, [[maybe_unused]] const bool use_direct_io = false) : fname(fname) {
168
+ #ifdef __linux__
169
+ // Try unbuffered I/O for read only
170
+ if (use_direct_io && std::strcmp(mode, "rb") == 0) {
171
+ if (init_fd()) {
172
+ return;
173
+ }
174
+ LLAMA_LOG_WARN("Failed to open file '%s' with error: %s. Falling back to buffered I/O",
175
+ fname, strerror(errno));
176
+ }
177
+ #endif
178
+ init_fp(mode);
179
+ }
180
+
181
+ #ifdef __linux__
182
+ bool init_fd() {
183
+ fd = open(fname.c_str(), O_RDONLY | O_DIRECT);
184
+
185
+ if (fd != -1) {
186
+ struct stat file_stats{};
187
+ fstat(fd, &file_stats);
188
+
189
+ size = file_stats.st_size;
190
+ alignment = file_stats.st_blksize;
191
+
192
+ off_t ret = lseek(fd, 0, SEEK_SET);
193
+ if (ret == -1) {
194
+ throw std::runtime_error(format("seek error: %s", strerror(errno)));
195
+ }
196
+ return true;
197
+ }
198
+ return false;
199
+ }
200
+ #endif
201
+
202
+ void init_fp(const char * mode) {
203
+ fp = ggml_fopen(fname.c_str(), mode);
204
+ if (fp == NULL) {
205
+ throw std::runtime_error(format("failed to open %s: %s", fname.c_str(), strerror(errno)));
206
+ }
207
+ seek(0, SEEK_END);
208
+ size = tell();
209
+ seek(0, SEEK_SET);
210
+ }
211
+
212
+ size_t tell() const {
213
+ if (fd == -1) {
214
+ long ret = std::ftell(fp);
215
+ if (ret == -1) {
216
+ throw std::runtime_error(format("ftell error: %s", strerror(errno)));
217
+ }
218
+
219
+ return (size_t) ret;
220
+ }
221
+
222
+ off_t pos = lseek(fd, 0, SEEK_CUR);
223
+ if (pos == -1) {
224
+ throw std::runtime_error(format("lseek error: %s", strerror(errno)));
225
+ }
226
+ return (size_t) pos;
227
+ }
228
+
229
+ void seek(size_t offset, int whence) const {
230
+ off_t ret = 0;
231
+ if (fd == -1) {
232
+ ret = std::fseek(fp, (long) offset, whence);
233
+ } else {
234
+ ret = lseek(fd, offset, whence);
235
+ }
236
+ if (ret == -1) {
237
+ throw std::runtime_error(format("seek error: %s", strerror(errno)));
238
+ }
239
+ }
240
+
241
+ void read_raw_unsafe(void * ptr, size_t len) {
242
+ if (len == 0) {
243
+ return;
244
+ }
245
+ errno = 0;
246
+ if (fd == -1) {
247
+ const size_t curr_off = tell();
248
+ const size_t to_read = std::min(len, size - curr_off);
249
+
250
+ std::size_t ret = std::fread(ptr, to_read, 1, fp);
251
+ if (ferror(fp)) {
252
+ throw std::runtime_error(format("read error: %s", strerror(errno)));
253
+ }
254
+ if (to_read > 0 && ret != 1) {
255
+ throw std::runtime_error("unexpectedly reached end of file");
256
+ }
257
+ } else {
258
+ size_t bytes_read = 0;
259
+ while (bytes_read < len) {
260
+ const size_t to_read = len - bytes_read;
261
+ ssize_t ret = ::read(fd, reinterpret_cast<char *>(ptr) + bytes_read, to_read);
262
+
263
+ if (ret == -1) {
264
+ if (errno == EINTR) {
265
+ continue; // Interrupted by signal, retry
266
+ }
267
+ // Fallback to std::fread in case the DMA controller cannot access the buffer
268
+ if (errno == EFAULT || errno == EINVAL) {
269
+ LLAMA_LOG_WARN("%s: Falling back to buffered IO due to %s\n", __func__, strerror(errno));
270
+ auto curr_off = tell();
271
+ close(fd);
272
+ fd = -1;
273
+ alignment = 1;
274
+ init_fp("rb");
275
+ seek(curr_off, SEEK_SET);
276
+ read_raw_unsafe(ptr, len);
277
+ return;
278
+ }
279
+ throw std::runtime_error(format("read error: %s", strerror(errno)));
280
+ }
281
+ if (ret == 0) {
282
+ // EOF: allow if this read was only pulling alignment padding past file end
283
+ off_t pos = lseek(fd, 0, SEEK_CUR);
284
+ if (pos != -1 && (size_t) pos == size) {
285
+ std::memset(reinterpret_cast<char *>(ptr) + bytes_read, 0, len - bytes_read);
286
+ return;
287
+ }
288
+ throw std::runtime_error("unexpectedly reached end of file");
289
+ }
290
+
291
+ bytes_read += (size_t) ret;
292
+ }
293
+ }
294
+ }
295
+
296
+ void read_aligned_chunk(void * dest, size_t size) {
297
+ size_t offset = tell();
298
+ off_t aligned_offset = offset & ~(alignment - 1);
299
+ off_t offset_from_alignment = offset - aligned_offset;
300
+ size_t bytes_to_read = (offset_from_alignment + size + alignment - 1) & ~(alignment - 1);
301
+
302
+ void * raw_buffer = nullptr;
303
+ int ret = posix_memalign(&raw_buffer, alignment, bytes_to_read);
304
+ if (ret != 0) {
305
+ throw std::runtime_error(format("posix_memalign failed with error %d", ret));
306
+ }
307
+
308
+ struct aligned_buffer_deleter {
309
+ void operator()(void * p) const { free(p); }
310
+ };
311
+ std::unique_ptr<void, aligned_buffer_deleter> buffer(raw_buffer);
312
+
313
+ seek(aligned_offset, SEEK_SET);
314
+ read_raw_unsafe(buffer.get(), bytes_to_read);
315
+
316
+ uintptr_t actual_data = reinterpret_cast<uintptr_t>(buffer.get()) + offset_from_alignment;
317
+ memcpy(dest, reinterpret_cast<void *>(actual_data), size);
318
+ }
319
+
320
+ void read_raw(void * ptr, size_t len) {
321
+ if (has_direct_io()) {
322
+ read_aligned_chunk(ptr, len);
323
+ } else {
324
+ read_raw_unsafe(ptr, len);
325
+ }
326
+ }
327
+
328
+ uint32_t read_u32() {
329
+ uint32_t ret;
330
+ read_raw(&ret, sizeof(ret));
331
+ return ret;
332
+ }
333
+
334
+ void write_raw(const void * ptr, size_t len) const {
335
+ if (len == 0) {
336
+ return;
337
+ }
338
+ errno = 0;
339
+ size_t ret = std::fwrite(ptr, len, 1, fp);
340
+ if (ret != 1) {
341
+ throw std::runtime_error(format("write error: %s", strerror(errno)));
342
+ }
343
+ }
344
+
345
+ void write_u32(uint32_t val) const {
346
+ write_raw(&val, sizeof(val));
347
+ }
348
+
349
+ bool has_direct_io() const {
350
+ return fd != -1 && alignment > 1;
351
+ }
352
+
353
+ ~impl() {
354
+ if (fd != -1) {
355
+ close(fd);
356
+ } else {
357
+ std::fclose(fp);
358
+ }
359
+ }
360
+ int fd = -1;
361
+ std::string fname;
362
+ #endif
363
+
364
+ size_t read_alignment() const {
365
+ return alignment;
366
+ }
367
+
368
+ size_t alignment = 1;
369
+
370
+ FILE * fp{};
371
+ size_t size{};
372
+ };
373
+
374
+ llama_file::llama_file(const char * fname, const char * mode, const bool use_direct_io) :
375
+ pimpl(std::make_unique<impl>(fname, mode, use_direct_io)) {}
376
+ llama_file::~llama_file() = default;
377
+
378
+ size_t llama_file::tell() const { return pimpl->tell(); }
379
+ size_t llama_file::size() const { return pimpl->size; }
380
+
381
+ size_t llama_file::read_alignment() const { return pimpl->read_alignment(); }
382
+ bool llama_file::has_direct_io() const { return pimpl->has_direct_io(); }
383
+
384
+ int llama_file::file_id() const {
385
+ #ifdef _WIN32
386
+ return _fileno(pimpl->fp);
387
+ #else
388
+ if (pimpl->fd != -1) {
389
+ return pimpl->fd;
390
+ }
391
+ #if defined(fileno)
392
+ return fileno(pimpl->fp);
393
+ #else
394
+ return ::fileno(pimpl->fp);
395
+ #endif
396
+ #endif
397
+ }
398
+
399
+ void llama_file::seek(size_t offset, int whence) const { pimpl->seek(offset, whence); }
400
+ void llama_file::read_raw(void * ptr, size_t len) { pimpl->read_raw(ptr, len); }
401
+ #ifdef _WIN32
402
+ void llama_file::read_raw_unsafe(void * ptr, size_t len) { pimpl->read_raw(ptr, len); }
403
+ #else
404
+ void llama_file::read_raw_unsafe(void * ptr, size_t len) { pimpl->read_raw_unsafe(ptr, len); }
405
+ #endif
406
+
407
+ uint32_t llama_file::read_u32() { return pimpl->read_u32(); }
408
+
409
+ void llama_file::write_raw(const void * ptr, size_t len) const { pimpl->write_raw(ptr, len); }
410
+ void llama_file::write_u32(uint32_t val) const { pimpl->write_u32(val); }
411
+
412
+ // llama_buf_file
413
+
414
+ llama_buf_file::llama_buf_file(const uint8_t * data, size_t size)
415
+ : data(data), buf_size(size), offset(0) {}
416
+
417
+ size_t llama_buf_file::tell() const { return offset; }
418
+ size_t llama_buf_file::size() const { return buf_size; }
419
+
420
+ void llama_buf_file::seek(size_t off, int whence) const {
421
+ switch (whence) {
422
+ case SEEK_SET: offset = off; break;
423
+ case SEEK_CUR: offset += off; break;
424
+ case SEEK_END: offset = buf_size + off; break;
425
+ }
426
+ if (offset > buf_size) {
427
+ offset = buf_size;
428
+ }
429
+ }
430
+
431
+ void llama_buf_file::read_raw(void * ptr, size_t len) {
432
+ if (offset + len > buf_size) {
433
+ throw std::runtime_error("llama_buf_file: read past end of buffer");
434
+ }
435
+ memcpy(ptr, data + offset, len);
436
+ offset += len;
437
+ }
438
+
439
+ uint32_t llama_buf_file::read_u32() {
440
+ uint32_t val;
441
+ read_raw(&val, sizeof(val));
442
+ return val;
443
+ }
444
+
445
+ // llama_mmap
446
+
447
+ struct llama_mmap::impl {
448
+ #ifdef _POSIX_MAPPED_FILES
449
+ std::vector<std::pair<size_t, size_t>> mapped_fragments;
450
+
451
+ impl(struct llama_file * file, size_t prefetch, bool numa) {
452
+ size = file->size();
453
+ int fd = file->file_id();
454
+ int flags = MAP_SHARED;
455
+ if (numa) { prefetch = 0; }
456
+ #ifdef __linux__
457
+ if (posix_fadvise(fd, 0, 0, POSIX_FADV_SEQUENTIAL)) {
458
+ LLAMA_LOG_WARN("warning: posix_fadvise(.., POSIX_FADV_SEQUENTIAL) failed: %s\n",
459
+ strerror(errno));
460
+ }
461
+ if (prefetch) { flags |= MAP_POPULATE; }
462
+ #endif
463
+ addr = mmap(NULL, file->size(), PROT_READ, flags, fd, 0);
464
+ if (addr == MAP_FAILED) {
465
+ throw std::runtime_error(format("mmap failed: %s", strerror(errno)));
466
+ }
467
+
468
+ if (prefetch > 0) {
469
+ if (posix_madvise(addr, std::min(file->size(), prefetch), POSIX_MADV_WILLNEED)) {
470
+ LLAMA_LOG_WARN("warning: posix_madvise(.., POSIX_MADV_WILLNEED) failed: %s\n",
471
+ strerror(errno));
472
+ }
473
+ }
474
+ if (numa) {
475
+ if (posix_madvise(addr, file->size(), POSIX_MADV_RANDOM)) {
476
+ LLAMA_LOG_WARN("warning: posix_madvise(.., POSIX_MADV_RANDOM) failed: %s\n",
477
+ strerror(errno));
478
+ }
479
+ }
480
+
481
+ mapped_fragments.emplace_back(0, file->size());
482
+ }
483
+
484
+ static void align_range(size_t * first, size_t * last, size_t page_size) {
485
+ size_t offset_in_page = *first & (page_size - 1);
486
+ size_t offset_to_page = offset_in_page == 0 ? 0 : page_size - offset_in_page;
487
+ *first += offset_to_page;
488
+
489
+ *last = *last & ~(page_size - 1);
490
+
491
+ if (*last <= *first) {
492
+ *last = *first;
493
+ }
494
+ }
495
+
496
+ void unmap_fragment(size_t first, size_t last) {
497
+ int page_size = sysconf(_SC_PAGESIZE);
498
+ align_range(&first, &last, page_size);
499
+ size_t len = last - first;
500
+
501
+ if (len == 0) {
502
+ return;
503
+ }
504
+
505
+ GGML_ASSERT(first % page_size == 0);
506
+ GGML_ASSERT(last % page_size == 0);
507
+ GGML_ASSERT(last > first);
508
+
509
+ void * next_page_start = (uint8_t *) addr + first;
510
+
511
+ if (munmap(next_page_start, len)) {
512
+ LLAMA_LOG_WARN("warning: munmap failed: %s\n", strerror(errno));
513
+ }
514
+
515
+ std::vector<std::pair<size_t, size_t>> new_mapped_fragments;
516
+ for (const auto & frag : mapped_fragments) {
517
+ if (frag.first < first && frag.second > last) {
518
+ new_mapped_fragments.emplace_back(frag.first, first);
519
+ new_mapped_fragments.emplace_back(last, frag.second);
520
+ } else if (frag.first < first && frag.second > first) {
521
+ new_mapped_fragments.emplace_back(frag.first, first);
522
+ } else if (frag.first < last && frag.second > last) {
523
+ new_mapped_fragments.emplace_back(last, frag.second);
524
+ } else if (frag.first >= first && frag.second <= last) {
525
+ } else {
526
+ new_mapped_fragments.push_back(frag);
527
+ }
528
+ }
529
+ mapped_fragments = std::move(new_mapped_fragments);
530
+ }
531
+
532
+ ~impl() {
533
+ for (const auto & frag : mapped_fragments) {
534
+ if (munmap((char *) addr + frag.first, frag.second - frag.first)) {
535
+ LLAMA_LOG_WARN("warning: munmap failed: %s\n", strerror(errno));
536
+ }
537
+ }
538
+ }
539
+ #elif defined(_WIN32)
540
+ HANDLE hMapping = nullptr;
541
+
542
+ impl(struct llama_file * file, size_t prefetch, bool numa) {
543
+ GGML_UNUSED(numa);
544
+
545
+ size = file->size();
546
+
547
+ HANDLE hFile = (HANDLE) _get_osfhandle(file->file_id());
548
+
549
+ hMapping = CreateFileMappingA(hFile, NULL, PAGE_READONLY, 0, 0, NULL);
550
+
551
+ if (hMapping == NULL) {
552
+ DWORD error = GetLastError();
553
+ throw std::runtime_error(format("CreateFileMappingA failed: %s", llama_format_win_err(error).c_str()));
554
+ }
555
+
556
+ addr = MapViewOfFile(hMapping, FILE_MAP_READ, 0, 0, 0);
557
+ DWORD error = GetLastError();
558
+
559
+ if (addr == NULL) {
560
+ CloseHandle(hMapping);
561
+ throw std::runtime_error(format("MapViewOfFile failed: %s", llama_format_win_err(error).c_str()));
562
+ }
563
+
564
+ if (prefetch > 0) {
565
+ #if _WIN32_WINNT >= 0x602
566
+ BOOL (WINAPI *pPrefetchVirtualMemory) (HANDLE, ULONG_PTR, PWIN32_MEMORY_RANGE_ENTRY, ULONG);
567
+ HMODULE hKernel32 = GetModuleHandleW(L"kernel32.dll");
568
+
569
+ pPrefetchVirtualMemory = (decltype(pPrefetchVirtualMemory))(void *) GetProcAddress(hKernel32, "PrefetchVirtualMemory");
570
+
571
+ if (pPrefetchVirtualMemory) {
572
+ WIN32_MEMORY_RANGE_ENTRY range;
573
+ range.VirtualAddress = addr;
574
+ range.NumberOfBytes = (SIZE_T) std::min(size, prefetch);
575
+ if (!pPrefetchVirtualMemory(GetCurrentProcess(), 1, &range, 0)) {
576
+ LLAMA_LOG_WARN("warning: PrefetchVirtualMemory failed: %s\n",
577
+ llama_format_win_err(GetLastError()).c_str());
578
+ }
579
+ }
580
+ #else
581
+ LLAMA_LOG_DEBUG("skipping PrefetchVirtualMemory because _WIN32_WINNT < 0x602\n");
582
+ #endif
583
+ }
584
+ }
585
+
586
+ void unmap_fragment(size_t first, size_t last) {
587
+ GGML_UNUSED(first);
588
+ GGML_UNUSED(last);
589
+ }
590
+
591
+ ~impl() {
592
+ if (hMapping) {
593
+ if (addr) {
594
+ if (!UnmapViewOfFile(addr)) {
595
+ LLAMA_LOG_WARN("warning: UnmapViewOfFile failed: %s\n",
596
+ llama_format_win_err(GetLastError()).c_str());
597
+ }
598
+ }
599
+ if (!CloseHandle(hMapping)) {
600
+ LLAMA_LOG_WARN("warning: CloseHandle failed: %s\n",
601
+ llama_format_win_err(GetLastError()).c_str());
602
+ }
603
+ }
604
+ }
605
+ #else
606
+ impl(struct llama_file * file, size_t prefetch, bool numa) {
607
+ GGML_UNUSED(file);
608
+ GGML_UNUSED(prefetch);
609
+ GGML_UNUSED(numa);
610
+
611
+ throw std::runtime_error("mmap not supported");
612
+ }
613
+
614
+ void unmap_fragment(size_t first, size_t last) {
615
+ GGML_UNUSED(first);
616
+ GGML_UNUSED(last);
617
+
618
+ throw std::runtime_error("mmap not supported");
619
+ }
620
+ #endif
621
+
622
+ void * addr;
623
+ size_t size;
624
+ };
625
+
626
+ llama_mmap::llama_mmap(struct llama_file * file, size_t prefetch, bool numa) : pimpl(std::make_unique<impl>(file, prefetch, numa)) {}
627
+ llama_mmap::~llama_mmap() = default;
628
+
629
+ size_t llama_mmap::size() const { return pimpl->size; }
630
+ void * llama_mmap::addr() const { return pimpl->addr; }
631
+
632
+ void llama_mmap::unmap_fragment(size_t first, size_t last) { pimpl->unmap_fragment(first, last); }
633
+
634
+ #if defined(_POSIX_MEMLOCK_RANGE) || defined(_WIN32)
635
+ const bool llama_mmap::SUPPORTED = true;
636
+ #else
637
+ const bool llama_mmap::SUPPORTED = false;
638
+ #endif
639
+
640
+ // llama_mlock
641
+
642
+ struct llama_mlock::impl {
643
+ #ifdef _POSIX_MEMLOCK_RANGE
644
+ static size_t lock_granularity() {
645
+ return (size_t) sysconf(_SC_PAGESIZE);
646
+ }
647
+
648
+ bool raw_lock(const void * addr, size_t size) const {
649
+ if (!mlock(addr, size)) {
650
+ return true;
651
+ }
652
+
653
+ #ifdef __APPLE__
654
+ #define MLOCK_SUGGESTION \
655
+ "Try increasing the sysctl values 'vm.user_wire_limit' and 'vm.global_user_wire_limit' and/or " \
656
+ "decreasing 'vm.global_no_user_wire_amount'. Also try increasing RLIMIT_MEMLOCK (ulimit -l).\n"
657
+ #else
658
+ #define MLOCK_SUGGESTION \
659
+ "Try increasing RLIMIT_MEMLOCK ('ulimit -l' as root).\n"
660
+ #endif
661
+
662
+ char* errmsg = std::strerror(errno);
663
+ bool suggest = (errno == ENOMEM);
664
+ #if defined(TARGET_OS_VISION) || defined(TARGET_OS_TV) || defined(_AIX) || defined(__HAIKU__)
665
+ // visionOS/tvOS/Haiku don't support RLIMIT_MEMLOCK
666
+ // Skip resource limit checks on these platforms
667
+ suggest = false;
668
+ #else
669
+ struct rlimit lock_limit;
670
+ if (suggest && getrlimit(RLIMIT_MEMLOCK, &lock_limit)) {
671
+ suggest = false;
672
+ }
673
+ if (suggest && ((uint64_t)lock_limit.rlim_max > (uint64_t)lock_limit.rlim_cur + size)) {
674
+ suggest = false;
675
+ }
676
+ #endif
677
+
678
+ LLAMA_LOG_WARN("warning: failed to mlock %zu-byte buffer (after previously locking %zu bytes): %s\n%s",
679
+ size, this->size, errmsg, suggest ? MLOCK_SUGGESTION : "");
680
+ return false;
681
+ }
682
+
683
+ static void raw_unlock(void * addr, size_t size) {
684
+ if (munlock(addr, size)) {
685
+ LLAMA_LOG_WARN("warning: failed to munlock buffer: %s\n", std::strerror(errno));
686
+ }
687
+ }
688
+ #elif defined(_WIN32)
689
+ static size_t lock_granularity() {
690
+ SYSTEM_INFO si;
691
+ GetSystemInfo(&si);
692
+ return (size_t) si.dwPageSize;
693
+ }
694
+
695
+ bool raw_lock(void * ptr, size_t len) const {
696
+ for (int tries = 1; ; tries++) {
697
+ if (VirtualLock(ptr, len)) {
698
+ return true;
699
+ }
700
+ if (tries == 2) {
701
+ LLAMA_LOG_WARN("warning: failed to VirtualLock %zu-byte buffer (after previously locking %zu bytes): %s\n",
702
+ len, size, llama_format_win_err(GetLastError()).c_str());
703
+ return false;
704
+ }
705
+
706
+ SIZE_T min_ws_size, max_ws_size;
707
+ if (!GetProcessWorkingSetSize(GetCurrentProcess(), &min_ws_size, &max_ws_size)) {
708
+ LLAMA_LOG_WARN("warning: GetProcessWorkingSetSize failed: %s\n",
709
+ llama_format_win_err(GetLastError()).c_str());
710
+ return false;
711
+ }
712
+ size_t increment = len + 1048576;
713
+ min_ws_size += increment;
714
+ max_ws_size += increment;
715
+ if (!SetProcessWorkingSetSize(GetCurrentProcess(), min_ws_size, max_ws_size)) {
716
+ LLAMA_LOG_WARN("warning: SetProcessWorkingSetSize failed: %s\n",
717
+ llama_format_win_err(GetLastError()).c_str());
718
+ return false;
719
+ }
720
+ }
721
+ }
722
+
723
+ static void raw_unlock(void * ptr, size_t len) {
724
+ if (!VirtualUnlock(ptr, len)) {
725
+ LLAMA_LOG_WARN("warning: failed to VirtualUnlock buffer: %s\n",
726
+ llama_format_win_err(GetLastError()).c_str());
727
+ }
728
+ }
729
+ #else
730
+ static size_t lock_granularity() {
731
+ return (size_t) 65536;
732
+ }
733
+
734
+ bool raw_lock(const void * addr, size_t len) const {
735
+ LLAMA_LOG_WARN("warning: mlock not supported on this system\n");
736
+ return false;
737
+ }
738
+
739
+ static void raw_unlock(const void * addr, size_t len) {}
740
+ #endif
741
+
742
+ impl() : addr(NULL), size(0), failed_already(false) {}
743
+
744
+ void init(void * ptr) {
745
+ GGML_ASSERT(addr == NULL && size == 0);
746
+ addr = ptr;
747
+ }
748
+
749
+ void grow_to(size_t target_size) {
750
+ GGML_ASSERT(addr);
751
+ if (failed_already) {
752
+ return;
753
+ }
754
+ size_t granularity = lock_granularity();
755
+ target_size = (target_size + granularity - 1) & ~(granularity - 1);
756
+ if (target_size > size) {
757
+ if (raw_lock((uint8_t *) addr + size, target_size - size)) {
758
+ size = target_size;
759
+ } else {
760
+ failed_already = true;
761
+ }
762
+ }
763
+ }
764
+
765
+ void * addr;
766
+ size_t size;
767
+
768
+ bool failed_already;
769
+ };
770
+
771
+ llama_mlock::llama_mlock() : pimpl(std::make_unique<impl>()) {}
772
+ llama_mlock::~llama_mlock() = default;
773
+
774
+ void llama_mlock::init(void * ptr) { pimpl->init(ptr); }
775
+ void llama_mlock::grow_to(size_t target_size) { pimpl->grow_to(target_size); }
776
+
777
+ #if defined(_POSIX_MEMLOCK_RANGE) || defined(_WIN32)
778
+ const bool llama_mlock::SUPPORTED = true;
779
+ #else
780
+ const bool llama_mlock::SUPPORTED = false;
781
+ #endif
782
+
783
+ size_t llama_path_max() {
784
+ return PATH_MAX;
785
+ }