local-llm-rn 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (626) hide show
  1. package/cpp/CMakeLists.txt +285 -0
  2. package/cpp/common/CMakeLists.txt +149 -0
  3. package/cpp/common/arg.cpp +3799 -0
  4. package/cpp/common/arg.h +131 -0
  5. package/cpp/common/base64.hpp +392 -0
  6. package/cpp/common/build-info.cpp.in +4 -0
  7. package/cpp/common/chat-parser-xml-toolcall.cpp +879 -0
  8. package/cpp/common/chat-parser-xml-toolcall.h +45 -0
  9. package/cpp/common/chat-parser.cpp +1649 -0
  10. package/cpp/common/chat-parser.h +133 -0
  11. package/cpp/common/chat-peg-parser.cpp +124 -0
  12. package/cpp/common/chat-peg-parser.h +105 -0
  13. package/cpp/common/chat.cpp +3355 -0
  14. package/cpp/common/chat.h +252 -0
  15. package/cpp/common/common.cpp +1824 -0
  16. package/cpp/common/common.h +930 -0
  17. package/cpp/common/console.cpp +1137 -0
  18. package/cpp/common/console.h +41 -0
  19. package/cpp/common/debug.cpp +167 -0
  20. package/cpp/common/debug.h +43 -0
  21. package/cpp/common/download.cpp +792 -0
  22. package/cpp/common/download.h +84 -0
  23. package/cpp/common/http.h +84 -0
  24. package/cpp/common/jinja/README.md +88 -0
  25. package/cpp/common/jinja/caps.cpp +285 -0
  26. package/cpp/common/jinja/caps.h +30 -0
  27. package/cpp/common/jinja/lexer.cpp +341 -0
  28. package/cpp/common/jinja/lexer.h +157 -0
  29. package/cpp/common/jinja/parser.cpp +591 -0
  30. package/cpp/common/jinja/parser.h +21 -0
  31. package/cpp/common/jinja/runtime.cpp +867 -0
  32. package/cpp/common/jinja/runtime.h +638 -0
  33. package/cpp/common/jinja/string.cpp +213 -0
  34. package/cpp/common/jinja/string.h +61 -0
  35. package/cpp/common/jinja/utils.h +149 -0
  36. package/cpp/common/jinja/value.cpp +1393 -0
  37. package/cpp/common/jinja/value.h +756 -0
  38. package/cpp/common/json-partial.cpp +324 -0
  39. package/cpp/common/json-partial.h +39 -0
  40. package/cpp/common/json-schema-to-grammar.cpp +1153 -0
  41. package/cpp/common/json-schema-to-grammar.h +43 -0
  42. package/cpp/common/llguidance.cpp +258 -0
  43. package/cpp/common/log.cpp +446 -0
  44. package/cpp/common/log.h +119 -0
  45. package/cpp/common/ngram-cache.cpp +285 -0
  46. package/cpp/common/ngram-cache.h +101 -0
  47. package/cpp/common/ngram-map.cpp +530 -0
  48. package/cpp/common/ngram-map.h +115 -0
  49. package/cpp/common/ngram-mod.cpp +60 -0
  50. package/cpp/common/ngram-mod.h +38 -0
  51. package/cpp/common/peg-parser.cpp +1712 -0
  52. package/cpp/common/peg-parser.h +459 -0
  53. package/cpp/common/preset.cpp +483 -0
  54. package/cpp/common/preset.h +83 -0
  55. package/cpp/common/regex-partial.cpp +204 -0
  56. package/cpp/common/regex-partial.h +56 -0
  57. package/cpp/common/sampling.cpp +745 -0
  58. package/cpp/common/sampling.h +119 -0
  59. package/cpp/common/speculative.cpp +1074 -0
  60. package/cpp/common/speculative.h +41 -0
  61. package/cpp/common/unicode.cpp +64 -0
  62. package/cpp/common/unicode.h +22 -0
  63. package/cpp/ggml/CMakeLists.txt +494 -0
  64. package/cpp/ggml/cmake/GitVars.cmake +22 -0
  65. package/cpp/ggml/cmake/common.cmake +50 -0
  66. package/cpp/ggml/cmake/ggml-config.cmake.in +191 -0
  67. package/cpp/ggml/include/ggml-alloc.h +85 -0
  68. package/cpp/ggml/include/ggml-backend.h +373 -0
  69. package/cpp/ggml/include/ggml-blas.h +25 -0
  70. package/cpp/ggml/include/ggml-cann.h +123 -0
  71. package/cpp/ggml/include/ggml-cpp.h +39 -0
  72. package/cpp/ggml/include/ggml-cpu.h +151 -0
  73. package/cpp/ggml/include/ggml-cuda.h +47 -0
  74. package/cpp/ggml/include/ggml-hexagon.h +19 -0
  75. package/cpp/ggml/include/ggml-metal.h +61 -0
  76. package/cpp/ggml/include/ggml-opencl.h +26 -0
  77. package/cpp/ggml/include/ggml-opt.h +256 -0
  78. package/cpp/ggml/include/ggml-rpc.h +30 -0
  79. package/cpp/ggml/include/ggml-sycl.h +49 -0
  80. package/cpp/ggml/include/ggml-virtgpu.h +14 -0
  81. package/cpp/ggml/include/ggml-vulkan.h +29 -0
  82. package/cpp/ggml/include/ggml-webgpu.h +19 -0
  83. package/cpp/ggml/include/ggml-zdnn.h +17 -0
  84. package/cpp/ggml/include/ggml-zendnn.h +22 -0
  85. package/cpp/ggml/include/ggml.h +2753 -0
  86. package/cpp/ggml/include/gguf.h +204 -0
  87. package/cpp/ggml/src/CMakeLists.txt +492 -0
  88. package/cpp/ggml/src/ggml-alloc.c +1244 -0
  89. package/cpp/ggml/src/ggml-backend-dl.cpp +48 -0
  90. package/cpp/ggml/src/ggml-backend-dl.h +45 -0
  91. package/cpp/ggml/src/ggml-backend-impl.h +255 -0
  92. package/cpp/ggml/src/ggml-backend-reg.cpp +566 -0
  93. package/cpp/ggml/src/ggml-backend.cpp +2270 -0
  94. package/cpp/ggml/src/ggml-blas/CMakeLists.txt +101 -0
  95. package/cpp/ggml/src/ggml-blas/ggml-blas.cpp +518 -0
  96. package/cpp/ggml/src/ggml-common.h +1878 -0
  97. package/cpp/ggml/src/ggml-cpu/CMakeLists.txt +691 -0
  98. package/cpp/ggml/src/ggml-cpu/amx/amx.cpp +247 -0
  99. package/cpp/ggml/src/ggml-cpu/amx/amx.h +8 -0
  100. package/cpp/ggml/src/ggml-cpu/amx/common.h +91 -0
  101. package/cpp/ggml/src/ggml-cpu/amx/mmq.cpp +2512 -0
  102. package/cpp/ggml/src/ggml-cpu/amx/mmq.h +10 -0
  103. package/cpp/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +98 -0
  104. package/cpp/ggml/src/ggml-cpu/arch/arm/quants.c +4052 -0
  105. package/cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +4935 -0
  106. package/cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +2159 -0
  107. package/cpp/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
  108. package/cpp/ggml/src/ggml-cpu/arch/powerpc/quants.c +2305 -0
  109. package/cpp/ggml/src/ggml-cpu/arch/riscv/cpu-feats.cpp +38 -0
  110. package/cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +2726 -0
  111. package/cpp/ggml/src/ggml-cpu/arch/riscv/repack.cpp +342 -0
  112. package/cpp/ggml/src/ggml-cpu/arch/s390/cpu-feats.cpp +50 -0
  113. package/cpp/ggml/src/ggml-cpu/arch/s390/quants.c +1468 -0
  114. package/cpp/ggml/src/ggml-cpu/arch/wasm/quants.c +1221 -0
  115. package/cpp/ggml/src/ggml-cpu/arch/x86/cpu-feats.cpp +327 -0
  116. package/cpp/ggml/src/ggml-cpu/arch/x86/quants.c +3820 -0
  117. package/cpp/ggml/src/ggml-cpu/arch/x86/repack.cpp +6307 -0
  118. package/cpp/ggml/src/ggml-cpu/arch-fallback.h +313 -0
  119. package/cpp/ggml/src/ggml-cpu/binary-ops.cpp +154 -0
  120. package/cpp/ggml/src/ggml-cpu/binary-ops.h +16 -0
  121. package/cpp/ggml/src/ggml-cpu/cmake/FindSIMD.cmake +100 -0
  122. package/cpp/ggml/src/ggml-cpu/common.h +95 -0
  123. package/cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +529 -0
  124. package/cpp/ggml/src/ggml-cpu/ggml-cpu.c +3734 -0
  125. package/cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +701 -0
  126. package/cpp/ggml/src/ggml-cpu/hbm.cpp +55 -0
  127. package/cpp/ggml/src/ggml-cpu/hbm.h +8 -0
  128. package/cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +938 -0
  129. package/cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +90 -0
  130. package/cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +798 -0
  131. package/cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.h +17 -0
  132. package/cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +4033 -0
  133. package/cpp/ggml/src/ggml-cpu/llamafile/sgemm.h +25 -0
  134. package/cpp/ggml/src/ggml-cpu/ops.cpp +10978 -0
  135. package/cpp/ggml/src/ggml-cpu/ops.h +116 -0
  136. package/cpp/ggml/src/ggml-cpu/quants.c +1193 -0
  137. package/cpp/ggml/src/ggml-cpu/quants.h +97 -0
  138. package/cpp/ggml/src/ggml-cpu/repack.cpp +3316 -0
  139. package/cpp/ggml/src/ggml-cpu/repack.h +173 -0
  140. package/cpp/ggml/src/ggml-cpu/simd-gemm.h +136 -0
  141. package/cpp/ggml/src/ggml-cpu/simd-mappings.h +1279 -0
  142. package/cpp/ggml/src/ggml-cpu/spacemit/ime.cpp +1025 -0
  143. package/cpp/ggml/src/ggml-cpu/spacemit/ime.h +13 -0
  144. package/cpp/ggml/src/ggml-cpu/spacemit/ime1_kernels.cpp +3196 -0
  145. package/cpp/ggml/src/ggml-cpu/spacemit/ime_kernels.h +26 -0
  146. package/cpp/ggml/src/ggml-cpu/traits.cpp +36 -0
  147. package/cpp/ggml/src/ggml-cpu/traits.h +38 -0
  148. package/cpp/ggml/src/ggml-cpu/unary-ops.cpp +337 -0
  149. package/cpp/ggml/src/ggml-cpu/unary-ops.h +35 -0
  150. package/cpp/ggml/src/ggml-cpu/vec.cpp +629 -0
  151. package/cpp/ggml/src/ggml-cpu/vec.h +1585 -0
  152. package/cpp/ggml/src/ggml-hexagon/CMakeLists.txt +117 -0
  153. package/cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp +3232 -0
  154. package/cpp/ggml/src/ggml-hexagon/htp/CMakeLists.txt +45 -0
  155. package/cpp/ggml/src/ggml-hexagon/htp/act-ops.c +815 -0
  156. package/cpp/ggml/src/ggml-hexagon/htp/argsort-ops.c +281 -0
  157. package/cpp/ggml/src/ggml-hexagon/htp/binary-ops.c +827 -0
  158. package/cpp/ggml/src/ggml-hexagon/htp/cmake-toolchain.cmake +157 -0
  159. package/cpp/ggml/src/ggml-hexagon/htp/cpy-ops.c +251 -0
  160. package/cpp/ggml/src/ggml-hexagon/htp/flash-attn-ops.c +666 -0
  161. package/cpp/ggml/src/ggml-hexagon/htp/get-rows-ops.c +111 -0
  162. package/cpp/ggml/src/ggml-hexagon/htp/hex-dma.c +63 -0
  163. package/cpp/ggml/src/ggml-hexagon/htp/hex-dma.h +182 -0
  164. package/cpp/ggml/src/ggml-hexagon/htp/hex-dump.h +77 -0
  165. package/cpp/ggml/src/ggml-hexagon/htp/hex-fastdiv.h +37 -0
  166. package/cpp/ggml/src/ggml-hexagon/htp/hex-utils.h +51 -0
  167. package/cpp/ggml/src/ggml-hexagon/htp/htp-ctx.h +35 -0
  168. package/cpp/ggml/src/ggml-hexagon/htp/htp-msg.h +154 -0
  169. package/cpp/ggml/src/ggml-hexagon/htp/htp-ops.h +65 -0
  170. package/cpp/ggml/src/ggml-hexagon/htp/htp_iface.idl +16 -0
  171. package/cpp/ggml/src/ggml-hexagon/htp/hvx-arith.h +470 -0
  172. package/cpp/ggml/src/ggml-hexagon/htp/hvx-base.h +173 -0
  173. package/cpp/ggml/src/ggml-hexagon/htp/hvx-copy.h +245 -0
  174. package/cpp/ggml/src/ggml-hexagon/htp/hvx-div.h +116 -0
  175. package/cpp/ggml/src/ggml-hexagon/htp/hvx-dump.h +129 -0
  176. package/cpp/ggml/src/ggml-hexagon/htp/hvx-exp.h +215 -0
  177. package/cpp/ggml/src/ggml-hexagon/htp/hvx-floor.h +100 -0
  178. package/cpp/ggml/src/ggml-hexagon/htp/hvx-inverse.h +176 -0
  179. package/cpp/ggml/src/ggml-hexagon/htp/hvx-reduce.h +266 -0
  180. package/cpp/ggml/src/ggml-hexagon/htp/hvx-scale.h +133 -0
  181. package/cpp/ggml/src/ggml-hexagon/htp/hvx-sigmoid.h +141 -0
  182. package/cpp/ggml/src/ggml-hexagon/htp/hvx-sqrt.h +126 -0
  183. package/cpp/ggml/src/ggml-hexagon/htp/hvx-types.h +36 -0
  184. package/cpp/ggml/src/ggml-hexagon/htp/hvx-utils.h +18 -0
  185. package/cpp/ggml/src/ggml-hexagon/htp/main.c +1150 -0
  186. package/cpp/ggml/src/ggml-hexagon/htp/matmul-ops.c +2595 -0
  187. package/cpp/ggml/src/ggml-hexagon/htp/rope-ops.c +498 -0
  188. package/cpp/ggml/src/ggml-hexagon/htp/set-rows-ops.c +167 -0
  189. package/cpp/ggml/src/ggml-hexagon/htp/softmax-ops.c +421 -0
  190. package/cpp/ggml/src/ggml-hexagon/htp/sum-rows-ops.c +130 -0
  191. package/cpp/ggml/src/ggml-hexagon/htp/unary-ops.c +384 -0
  192. package/cpp/ggml/src/ggml-hexagon/htp/worker-pool.c +293 -0
  193. package/cpp/ggml/src/ggml-hexagon/htp/worker-pool.h +57 -0
  194. package/cpp/ggml/src/ggml-hexagon/htp-drv.cpp +418 -0
  195. package/cpp/ggml/src/ggml-hexagon/htp-drv.h +121 -0
  196. package/cpp/ggml/src/ggml-hexagon/libdl.h +79 -0
  197. package/cpp/ggml/src/ggml-hexagon/libggml-htp.inf +38 -0
  198. package/cpp/ggml/src/ggml-hexagon/op-desc.h +153 -0
  199. package/cpp/ggml/src/ggml-impl.h +724 -0
  200. package/cpp/ggml/src/ggml-metal/CMakeLists.txt +124 -0
  201. package/cpp/ggml/src/ggml-metal/ggml-metal-common.cpp +457 -0
  202. package/cpp/ggml/src/ggml-metal/ggml-metal-common.h +52 -0
  203. package/cpp/ggml/src/ggml-metal/ggml-metal-context.h +41 -0
  204. package/cpp/ggml/src/ggml-metal/ggml-metal-context.m +702 -0
  205. package/cpp/ggml/src/ggml-metal/ggml-metal-device.cpp +1890 -0
  206. package/cpp/ggml/src/ggml-metal/ggml-metal-device.h +290 -0
  207. package/cpp/ggml/src/ggml-metal/ggml-metal-device.m +1749 -0
  208. package/cpp/ggml/src/ggml-metal/ggml-metal-impl.h +1054 -0
  209. package/cpp/ggml/src/ggml-metal/ggml-metal-ops.cpp +4370 -0
  210. package/cpp/ggml/src/ggml-metal/ggml-metal-ops.h +94 -0
  211. package/cpp/ggml/src/ggml-metal/ggml-metal.cpp +937 -0
  212. package/cpp/ggml/src/ggml-metal/ggml-metal.metal +9819 -0
  213. package/cpp/ggml/src/ggml-musa/CMakeLists.txt +125 -0
  214. package/cpp/ggml/src/ggml-musa/mudnn.cu +112 -0
  215. package/cpp/ggml/src/ggml-musa/mudnn.cuh +12 -0
  216. package/cpp/ggml/src/ggml-opencl/CMakeLists.txt +150 -0
  217. package/cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +11553 -0
  218. package/cpp/ggml/src/ggml-opencl/kernels/add.cl +190 -0
  219. package/cpp/ggml/src/ggml-opencl/kernels/add_id.cl +42 -0
  220. package/cpp/ggml/src/ggml-opencl/kernels/argsort.cl +86 -0
  221. package/cpp/ggml/src/ggml-opencl/kernels/clamp.cl +20 -0
  222. package/cpp/ggml/src/ggml-opencl/kernels/concat.cl +51 -0
  223. package/cpp/ggml/src/ggml-opencl/kernels/conv2d.cl +185 -0
  224. package/cpp/ggml/src/ggml-opencl/kernels/conv2d_f16_f32.cl +176 -0
  225. package/cpp/ggml/src/ggml-opencl/kernels/cpy.cl +184 -0
  226. package/cpp/ggml/src/ggml-opencl/kernels/cvt.cl +417 -0
  227. package/cpp/ggml/src/ggml-opencl/kernels/diag_mask_inf.cl +58 -0
  228. package/cpp/ggml/src/ggml-opencl/kernels/div.cl +138 -0
  229. package/cpp/ggml/src/ggml-opencl/kernels/embed_kernel.py +26 -0
  230. package/cpp/ggml/src/ggml-opencl/kernels/expm1.cl +113 -0
  231. package/cpp/ggml/src/ggml-opencl/kernels/fill.cl +17 -0
  232. package/cpp/ggml/src/ggml-opencl/kernels/flash_attn_f16.cl +370 -0
  233. package/cpp/ggml/src/ggml-opencl/kernels/flash_attn_f32.cl +371 -0
  234. package/cpp/ggml/src/ggml-opencl/kernels/flash_attn_f32_f16.cl +373 -0
  235. package/cpp/ggml/src/ggml-opencl/kernels/gelu.cl +89 -0
  236. package/cpp/ggml/src/ggml-opencl/kernels/gemm_moe_mxfp4_f32.cl +162 -0
  237. package/cpp/ggml/src/ggml-opencl/kernels/gemv_moe_mxfp4_f32.cl +156 -0
  238. package/cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle.cl +268 -0
  239. package/cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general.cl +274 -0
  240. package/cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general_q8_0_f32.cl +195 -0
  241. package/cpp/ggml/src/ggml-opencl/kernels/get_rows.cl +187 -0
  242. package/cpp/ggml/src/ggml-opencl/kernels/glu.cl +378 -0
  243. package/cpp/ggml/src/ggml-opencl/kernels/group_norm.cl +121 -0
  244. package/cpp/ggml/src/ggml-opencl/kernels/im2col_f16.cl +57 -0
  245. package/cpp/ggml/src/ggml-opencl/kernels/im2col_f32.cl +57 -0
  246. package/cpp/ggml/src/ggml-opencl/kernels/mean.cl +140 -0
  247. package/cpp/ggml/src/ggml-opencl/kernels/mul.cl +152 -0
  248. package/cpp/ggml/src/ggml-opencl/kernels/mul_mat_Ab_Bi_8x4.cl +139 -0
  249. package/cpp/ggml/src/ggml-opencl/kernels/mul_mat_f16_f32.cl +130 -0
  250. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_kq_kqv.cl +273 -0
  251. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_l4_lm.cl +146 -0
  252. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_f32_f32_l4_lm.cl +147 -0
  253. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q4_0_f32_l4_lm.cl +163 -0
  254. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q4_1_f32_l4_lm.cl +165 -0
  255. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q6_k_f32_l4_lm.cl +158 -0
  256. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_8x4.cl +129 -0
  257. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_l4_lm.cl +154 -0
  258. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f16.cl +118 -0
  259. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32.cl +118 -0
  260. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_1row.cl +94 -0
  261. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_l4.cl +84 -0
  262. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f32_f32.cl +118 -0
  263. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32.cl +189 -0
  264. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32_flat.cl +176 -0
  265. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q4_0_f32_8x_flat.cl +283 -0
  266. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32.cl +140 -0
  267. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32_flat.cl +222 -0
  268. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32.cl +144 -0
  269. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32_flat.cl +167 -0
  270. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32.cl +192 -0
  271. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_16x_flat.cl +307 -0
  272. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_8x_flat.cl +265 -0
  273. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_8x_flat.cl +272 -0
  274. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_v.cl +254 -0
  275. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32.cl +219 -0
  276. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32_flat.cl +229 -0
  277. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_k_f32.cl +180 -0
  278. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q6_k_f32.cl +194 -0
  279. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q6_k_f32_flat.cl +194 -0
  280. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32.cl +125 -0
  281. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32_flat.cl +202 -0
  282. package/cpp/ggml/src/ggml-opencl/kernels/norm.cl +161 -0
  283. package/cpp/ggml/src/ggml-opencl/kernels/pad.cl +39 -0
  284. package/cpp/ggml/src/ggml-opencl/kernels/relu.cl +16 -0
  285. package/cpp/ggml/src/ggml-opencl/kernels/repeat.cl +38 -0
  286. package/cpp/ggml/src/ggml-opencl/kernels/rms_norm.cl +190 -0
  287. package/cpp/ggml/src/ggml-opencl/kernels/rope.cl +747 -0
  288. package/cpp/ggml/src/ggml-opencl/kernels/scale.cl +27 -0
  289. package/cpp/ggml/src/ggml-opencl/kernels/set_rows.cl +208 -0
  290. package/cpp/ggml/src/ggml-opencl/kernels/sigmoid.cl +29 -0
  291. package/cpp/ggml/src/ggml-opencl/kernels/silu.cl +30 -0
  292. package/cpp/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +108 -0
  293. package/cpp/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +108 -0
  294. package/cpp/ggml/src/ggml-opencl/kernels/softmax_f16.cl +107 -0
  295. package/cpp/ggml/src/ggml-opencl/kernels/softmax_f32.cl +107 -0
  296. package/cpp/ggml/src/ggml-opencl/kernels/softplus.cl +116 -0
  297. package/cpp/ggml/src/ggml-opencl/kernels/solve_tri.cl +51 -0
  298. package/cpp/ggml/src/ggml-opencl/kernels/sqr.cl +53 -0
  299. package/cpp/ggml/src/ggml-opencl/kernels/sqrt.cl +53 -0
  300. package/cpp/ggml/src/ggml-opencl/kernels/ssm_conv.cl +77 -0
  301. package/cpp/ggml/src/ggml-opencl/kernels/sub.cl +138 -0
  302. package/cpp/ggml/src/ggml-opencl/kernels/sum_rows.cl +140 -0
  303. package/cpp/ggml/src/ggml-opencl/kernels/tanh.cl +109 -0
  304. package/cpp/ggml/src/ggml-opencl/kernels/transpose.cl +117 -0
  305. package/cpp/ggml/src/ggml-opencl/kernels/tri.cl +32 -0
  306. package/cpp/ggml/src/ggml-opencl/kernels/tsembd.cl +48 -0
  307. package/cpp/ggml/src/ggml-opencl/kernels/upscale.cl +120 -0
  308. package/cpp/ggml/src/ggml-opt.cpp +1093 -0
  309. package/cpp/ggml/src/ggml-quants.c +5325 -0
  310. package/cpp/ggml/src/ggml-quants.h +106 -0
  311. package/cpp/ggml/src/ggml-rpc/CMakeLists.txt +9 -0
  312. package/cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +2118 -0
  313. package/cpp/ggml/src/ggml-threading.cpp +12 -0
  314. package/cpp/ggml/src/ggml-threading.h +14 -0
  315. package/cpp/ggml/src/ggml-virtgpu/CMakeLists.txt +70 -0
  316. package/cpp/ggml/src/ggml-virtgpu/apir_cs_ggml-rpc-front.cpp +87 -0
  317. package/cpp/ggml/src/ggml-virtgpu/backend/CMakeLists.txt +21 -0
  318. package/cpp/ggml/src/ggml-virtgpu/backend/apir_cs_ggml-rpc-back.cpp +115 -0
  319. package/cpp/ggml/src/ggml-virtgpu/backend/backend-convert.h +13 -0
  320. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-backend.cpp +102 -0
  321. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer-type.cpp +105 -0
  322. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer.cpp +179 -0
  323. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-device.cpp +148 -0
  324. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched.cpp +51 -0
  325. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched.gen.h +73 -0
  326. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched.h +27 -0
  327. package/cpp/ggml/src/ggml-virtgpu/backend/backend-virgl-apir.h +32 -0
  328. package/cpp/ggml/src/ggml-virtgpu/backend/backend.cpp +144 -0
  329. package/cpp/ggml/src/ggml-virtgpu/backend/shared/api_remoting.h +95 -0
  330. package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_backend.gen.h +94 -0
  331. package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_backend.h +50 -0
  332. package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_cs.h +378 -0
  333. package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h +232 -0
  334. package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_cs_rpc.h +58 -0
  335. package/cpp/ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp +81 -0
  336. package/cpp/ggml/src/ggml-virtgpu/ggml-backend-buffer.cpp +119 -0
  337. package/cpp/ggml/src/ggml-virtgpu/ggml-backend-device.cpp +158 -0
  338. package/cpp/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp +213 -0
  339. package/cpp/ggml/src/ggml-virtgpu/ggml-backend.cpp +69 -0
  340. package/cpp/ggml/src/ggml-virtgpu/ggml-remoting.h +71 -0
  341. package/cpp/ggml/src/ggml-virtgpu/ggmlremoting_functions.yaml +166 -0
  342. package/cpp/ggml/src/ggml-virtgpu/include/apir_hw.h +9 -0
  343. package/cpp/ggml/src/ggml-virtgpu/regenerate_remoting.py +333 -0
  344. package/cpp/ggml/src/ggml-virtgpu/virtgpu-apir.h +15 -0
  345. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-backend.cpp +58 -0
  346. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp +110 -0
  347. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-buffer.cpp +173 -0
  348. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-device.cpp +192 -0
  349. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-impl.h +36 -0
  350. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward.gen.h +53 -0
  351. package/cpp/ggml/src/ggml-virtgpu/virtgpu-shm.cpp +98 -0
  352. package/cpp/ggml/src/ggml-virtgpu/virtgpu-shm.h +23 -0
  353. package/cpp/ggml/src/ggml-virtgpu/virtgpu-utils.cpp +179 -0
  354. package/cpp/ggml/src/ggml-virtgpu/virtgpu-utils.h +86 -0
  355. package/cpp/ggml/src/ggml-virtgpu/virtgpu.cpp +544 -0
  356. package/cpp/ggml/src/ggml-virtgpu/virtgpu.h +117 -0
  357. package/cpp/ggml/src/ggml-webgpu/CMakeLists.txt +80 -0
  358. package/cpp/ggml/src/ggml-webgpu/ggml-webgpu-shader-lib.hpp +1231 -0
  359. package/cpp/ggml/src/ggml-webgpu/ggml-webgpu.cpp +3150 -0
  360. package/cpp/ggml/src/ggml-webgpu/pre_wgsl.hpp +778 -0
  361. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/argmax.wgsl +72 -0
  362. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/argsort.wgsl +106 -0
  363. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/argsort_merge.wgsl +134 -0
  364. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/binary.wgsl +107 -0
  365. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/common_decls.tmpl +923 -0
  366. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/cpy.tmpl.wgsl +107 -0
  367. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/cumsum.wgsl +66 -0
  368. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +182 -0
  369. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn.wgsl +636 -0
  370. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/get_rows.wgsl +668 -0
  371. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/glu.tmpl.wgsl +323 -0
  372. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/memset.wgsl +40 -0
  373. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.wgsl +713 -0
  374. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_decls.tmpl +103 -0
  375. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_reg_tile.wgsl +138 -0
  376. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_subgroup_matrix.wgsl +188 -0
  377. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec.wgsl +194 -0
  378. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/pad.wgsl +86 -0
  379. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm.wgsl +123 -0
  380. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/rope.tmpl.wgsl +295 -0
  381. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/scale.wgsl +63 -0
  382. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.wgsl +109 -0
  383. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/soft_max.tmpl.wgsl +345 -0
  384. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/sum_rows.wgsl +55 -0
  385. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/unary.wgsl +193 -0
  386. package/cpp/ggml/src/ggml-zdnn/CMakeLists.txt +36 -0
  387. package/cpp/ggml/src/ggml-zdnn/common.hpp +59 -0
  388. package/cpp/ggml/src/ggml-zdnn/ggml-zdnn.cpp +633 -0
  389. package/cpp/ggml/src/ggml-zdnn/mmf.cpp +80 -0
  390. package/cpp/ggml/src/ggml-zdnn/mmf.hpp +12 -0
  391. package/cpp/ggml/src/ggml-zdnn/utils.cpp +79 -0
  392. package/cpp/ggml/src/ggml-zdnn/utils.hpp +19 -0
  393. package/cpp/ggml/src/ggml-zendnn/CMakeLists.txt +92 -0
  394. package/cpp/ggml/src/ggml-zendnn/ggml-zendnn.cpp +469 -0
  395. package/cpp/ggml/src/ggml.c +7669 -0
  396. package/cpp/ggml/src/ggml.cpp +26 -0
  397. package/cpp/ggml/src/gguf.cpp +1699 -0
  398. package/cpp/include/llama-cpp.h +32 -0
  399. package/cpp/include/llama.h +1568 -0
  400. package/cpp/mtmd/CMakeLists.txt +98 -0
  401. package/cpp/mtmd/README.md +63 -0
  402. package/cpp/mtmd/clip-graph.h +117 -0
  403. package/cpp/mtmd/clip-impl.h +586 -0
  404. package/cpp/mtmd/clip-model.h +390 -0
  405. package/cpp/mtmd/clip.cpp +4154 -0
  406. package/cpp/mtmd/clip.h +121 -0
  407. package/cpp/mtmd/deprecation-warning.cpp +22 -0
  408. package/cpp/mtmd/legacy-models/convert_image_encoder_to_gguf.py +412 -0
  409. package/cpp/mtmd/legacy-models/glmedge-convert-image-encoder-to-gguf.py +280 -0
  410. package/cpp/mtmd/legacy-models/glmedge-surgery.py +33 -0
  411. package/cpp/mtmd/legacy-models/llava_surgery.py +38 -0
  412. package/cpp/mtmd/legacy-models/llava_surgery_v2.py +180 -0
  413. package/cpp/mtmd/legacy-models/minicpmv-convert-image-encoder-to-gguf.py +892 -0
  414. package/cpp/mtmd/legacy-models/minicpmv-surgery.py +47 -0
  415. package/cpp/mtmd/models/cogvlm.cpp +98 -0
  416. package/cpp/mtmd/models/conformer.cpp +216 -0
  417. package/cpp/mtmd/models/glm4v.cpp +122 -0
  418. package/cpp/mtmd/models/internvl.cpp +69 -0
  419. package/cpp/mtmd/models/kimik25.cpp +101 -0
  420. package/cpp/mtmd/models/kimivl.cpp +63 -0
  421. package/cpp/mtmd/models/llama4.cpp +96 -0
  422. package/cpp/mtmd/models/llava.cpp +374 -0
  423. package/cpp/mtmd/models/minicpmv.cpp +114 -0
  424. package/cpp/mtmd/models/mobilenetv5.cpp +451 -0
  425. package/cpp/mtmd/models/models.h +128 -0
  426. package/cpp/mtmd/models/nemotron-v2-vl.cpp +35 -0
  427. package/cpp/mtmd/models/paddleocr.cpp +52 -0
  428. package/cpp/mtmd/models/pixtral.cpp +86 -0
  429. package/cpp/mtmd/models/qwen2vl.cpp +183 -0
  430. package/cpp/mtmd/models/qwen3vl.cpp +193 -0
  431. package/cpp/mtmd/models/siglip.cpp +86 -0
  432. package/cpp/mtmd/models/whisper-enc.cpp +115 -0
  433. package/cpp/mtmd/models/youtuvl.cpp +179 -0
  434. package/cpp/mtmd/mtmd-audio.cpp +730 -0
  435. package/cpp/mtmd/mtmd-audio.h +113 -0
  436. package/cpp/mtmd/mtmd-cli.cpp +437 -0
  437. package/cpp/mtmd/mtmd-helper.cpp +521 -0
  438. package/cpp/mtmd/mtmd-helper.h +96 -0
  439. package/cpp/mtmd/mtmd.cpp +1156 -0
  440. package/cpp/mtmd/mtmd.h +319 -0
  441. package/cpp/mtmd/requirements.txt +5 -0
  442. package/cpp/mtmd/test-1.jpeg +0 -0
  443. package/cpp/mtmd/test-2.mp3 +0 -0
  444. package/cpp/mtmd/tests.sh +192 -0
  445. package/cpp/src/CMakeLists.txt +169 -0
  446. package/cpp/src/llama-adapter.cpp +488 -0
  447. package/cpp/src/llama-adapter.h +89 -0
  448. package/cpp/src/llama-arch.cpp +2855 -0
  449. package/cpp/src/llama-arch.h +619 -0
  450. package/cpp/src/llama-batch.cpp +917 -0
  451. package/cpp/src/llama-batch.h +173 -0
  452. package/cpp/src/llama-chat.cpp +896 -0
  453. package/cpp/src/llama-chat.h +71 -0
  454. package/cpp/src/llama-context.cpp +3512 -0
  455. package/cpp/src/llama-context.h +359 -0
  456. package/cpp/src/llama-cparams.cpp +5 -0
  457. package/cpp/src/llama-cparams.h +44 -0
  458. package/cpp/src/llama-grammar.cpp +1464 -0
  459. package/cpp/src/llama-grammar.h +194 -0
  460. package/cpp/src/llama-graph.cpp +2685 -0
  461. package/cpp/src/llama-graph.h +1026 -0
  462. package/cpp/src/llama-hparams.cpp +234 -0
  463. package/cpp/src/llama-hparams.h +339 -0
  464. package/cpp/src/llama-impl.cpp +171 -0
  465. package/cpp/src/llama-impl.h +73 -0
  466. package/cpp/src/llama-io.cpp +15 -0
  467. package/cpp/src/llama-io.h +35 -0
  468. package/cpp/src/llama-kv-cache-iswa.cpp +330 -0
  469. package/cpp/src/llama-kv-cache-iswa.h +137 -0
  470. package/cpp/src/llama-kv-cache.cpp +2271 -0
  471. package/cpp/src/llama-kv-cache.h +388 -0
  472. package/cpp/src/llama-kv-cells.h +533 -0
  473. package/cpp/src/llama-memory-hybrid-iswa.cpp +275 -0
  474. package/cpp/src/llama-memory-hybrid-iswa.h +140 -0
  475. package/cpp/src/llama-memory-hybrid.cpp +268 -0
  476. package/cpp/src/llama-memory-hybrid.h +139 -0
  477. package/cpp/src/llama-memory-recurrent.cpp +1165 -0
  478. package/cpp/src/llama-memory-recurrent.h +182 -0
  479. package/cpp/src/llama-memory.cpp +59 -0
  480. package/cpp/src/llama-memory.h +122 -0
  481. package/cpp/src/llama-mmap.cpp +785 -0
  482. package/cpp/src/llama-mmap.h +92 -0
  483. package/cpp/src/llama-model-loader.cpp +1414 -0
  484. package/cpp/src/llama-model-loader.h +203 -0
  485. package/cpp/src/llama-model-saver.cpp +286 -0
  486. package/cpp/src/llama-model-saver.h +37 -0
  487. package/cpp/src/llama-model.cpp +9253 -0
  488. package/cpp/src/llama-model.h +576 -0
  489. package/cpp/src/llama-quant.cpp +1119 -0
  490. package/cpp/src/llama-quant.h +1 -0
  491. package/cpp/src/llama-sampler.cpp +3885 -0
  492. package/cpp/src/llama-sampler.h +42 -0
  493. package/cpp/src/llama-vocab.cpp +3970 -0
  494. package/cpp/src/llama-vocab.h +187 -0
  495. package/cpp/src/llama.cpp +1313 -0
  496. package/cpp/src/models/afmoe.cpp +191 -0
  497. package/cpp/src/models/apertus.cpp +125 -0
  498. package/cpp/src/models/arcee.cpp +135 -0
  499. package/cpp/src/models/arctic.cpp +138 -0
  500. package/cpp/src/models/arwkv7.cpp +86 -0
  501. package/cpp/src/models/baichuan.cpp +122 -0
  502. package/cpp/src/models/bailingmoe.cpp +144 -0
  503. package/cpp/src/models/bailingmoe2.cpp +135 -0
  504. package/cpp/src/models/bert.cpp +178 -0
  505. package/cpp/src/models/bitnet.cpp +160 -0
  506. package/cpp/src/models/bloom.cpp +101 -0
  507. package/cpp/src/models/chameleon.cpp +178 -0
  508. package/cpp/src/models/chatglm.cpp +132 -0
  509. package/cpp/src/models/codeshell.cpp +111 -0
  510. package/cpp/src/models/cogvlm.cpp +102 -0
  511. package/cpp/src/models/cohere2-iswa.cpp +134 -0
  512. package/cpp/src/models/command-r.cpp +122 -0
  513. package/cpp/src/models/dbrx.cpp +123 -0
  514. package/cpp/src/models/deci.cpp +135 -0
  515. package/cpp/src/models/deepseek.cpp +144 -0
  516. package/cpp/src/models/deepseek2.cpp +262 -0
  517. package/cpp/src/models/delta-net-base.cpp +376 -0
  518. package/cpp/src/models/dots1.cpp +134 -0
  519. package/cpp/src/models/dream.cpp +105 -0
  520. package/cpp/src/models/ernie4-5-moe.cpp +150 -0
  521. package/cpp/src/models/ernie4-5.cpp +110 -0
  522. package/cpp/src/models/eurobert.cpp +97 -0
  523. package/cpp/src/models/exaone-moe.cpp +146 -0
  524. package/cpp/src/models/exaone.cpp +114 -0
  525. package/cpp/src/models/exaone4.cpp +123 -0
  526. package/cpp/src/models/falcon-h1.cpp +111 -0
  527. package/cpp/src/models/falcon.cpp +120 -0
  528. package/cpp/src/models/gemma-embedding.cpp +116 -0
  529. package/cpp/src/models/gemma.cpp +112 -0
  530. package/cpp/src/models/gemma2-iswa.cpp +128 -0
  531. package/cpp/src/models/gemma3.cpp +155 -0
  532. package/cpp/src/models/gemma3n-iswa.cpp +384 -0
  533. package/cpp/src/models/glm4-moe.cpp +170 -0
  534. package/cpp/src/models/glm4.cpp +157 -0
  535. package/cpp/src/models/gpt2.cpp +105 -0
  536. package/cpp/src/models/gptneox.cpp +144 -0
  537. package/cpp/src/models/granite-hybrid.cpp +196 -0
  538. package/cpp/src/models/granite.cpp +211 -0
  539. package/cpp/src/models/grok.cpp +159 -0
  540. package/cpp/src/models/grovemoe.cpp +141 -0
  541. package/cpp/src/models/hunyuan-dense.cpp +132 -0
  542. package/cpp/src/models/hunyuan-moe.cpp +154 -0
  543. package/cpp/src/models/internlm2.cpp +120 -0
  544. package/cpp/src/models/jais.cpp +86 -0
  545. package/cpp/src/models/jais2.cpp +123 -0
  546. package/cpp/src/models/jamba.cpp +106 -0
  547. package/cpp/src/models/kimi-linear.cpp +392 -0
  548. package/cpp/src/models/lfm2.cpp +190 -0
  549. package/cpp/src/models/llada-moe.cpp +122 -0
  550. package/cpp/src/models/llada.cpp +99 -0
  551. package/cpp/src/models/llama-iswa.cpp +178 -0
  552. package/cpp/src/models/llama.cpp +168 -0
  553. package/cpp/src/models/maincoder.cpp +117 -0
  554. package/cpp/src/models/mamba-base.cpp +285 -0
  555. package/cpp/src/models/mamba.cpp +54 -0
  556. package/cpp/src/models/mimo2-iswa.cpp +123 -0
  557. package/cpp/src/models/minicpm3.cpp +200 -0
  558. package/cpp/src/models/minimax-m2.cpp +124 -0
  559. package/cpp/src/models/mistral3.cpp +160 -0
  560. package/cpp/src/models/models.h +684 -0
  561. package/cpp/src/models/modern-bert.cpp +109 -0
  562. package/cpp/src/models/mpt.cpp +126 -0
  563. package/cpp/src/models/nemotron-h.cpp +148 -0
  564. package/cpp/src/models/nemotron.cpp +122 -0
  565. package/cpp/src/models/neo-bert.cpp +104 -0
  566. package/cpp/src/models/olmo.cpp +121 -0
  567. package/cpp/src/models/olmo2.cpp +150 -0
  568. package/cpp/src/models/olmoe.cpp +124 -0
  569. package/cpp/src/models/openai-moe-iswa.cpp +127 -0
  570. package/cpp/src/models/openelm.cpp +124 -0
  571. package/cpp/src/models/orion.cpp +123 -0
  572. package/cpp/src/models/paddleocr.cpp +122 -0
  573. package/cpp/src/models/pangu-embedded.cpp +121 -0
  574. package/cpp/src/models/phi2.cpp +121 -0
  575. package/cpp/src/models/phi3.cpp +152 -0
  576. package/cpp/src/models/plamo.cpp +110 -0
  577. package/cpp/src/models/plamo2.cpp +318 -0
  578. package/cpp/src/models/plamo3.cpp +128 -0
  579. package/cpp/src/models/plm.cpp +169 -0
  580. package/cpp/src/models/qwen.cpp +108 -0
  581. package/cpp/src/models/qwen2.cpp +126 -0
  582. package/cpp/src/models/qwen2moe.cpp +151 -0
  583. package/cpp/src/models/qwen2vl.cpp +117 -0
  584. package/cpp/src/models/qwen3.cpp +117 -0
  585. package/cpp/src/models/qwen35.cpp +386 -0
  586. package/cpp/src/models/qwen35moe.cpp +420 -0
  587. package/cpp/src/models/qwen3moe.cpp +124 -0
  588. package/cpp/src/models/qwen3next.cpp +525 -0
  589. package/cpp/src/models/qwen3vl-moe.cpp +140 -0
  590. package/cpp/src/models/qwen3vl.cpp +132 -0
  591. package/cpp/src/models/refact.cpp +94 -0
  592. package/cpp/src/models/rnd1.cpp +126 -0
  593. package/cpp/src/models/rwkv6-base.cpp +164 -0
  594. package/cpp/src/models/rwkv6.cpp +94 -0
  595. package/cpp/src/models/rwkv6qwen2.cpp +86 -0
  596. package/cpp/src/models/rwkv7-base.cpp +137 -0
  597. package/cpp/src/models/rwkv7.cpp +90 -0
  598. package/cpp/src/models/seed-oss.cpp +124 -0
  599. package/cpp/src/models/smallthinker.cpp +126 -0
  600. package/cpp/src/models/smollm3.cpp +128 -0
  601. package/cpp/src/models/stablelm.cpp +146 -0
  602. package/cpp/src/models/starcoder.cpp +100 -0
  603. package/cpp/src/models/starcoder2.cpp +121 -0
  604. package/cpp/src/models/step35-iswa.cpp +168 -0
  605. package/cpp/src/models/t5-dec.cpp +166 -0
  606. package/cpp/src/models/t5-enc.cpp +96 -0
  607. package/cpp/src/models/wavtokenizer-dec.cpp +149 -0
  608. package/cpp/src/models/xverse.cpp +108 -0
  609. package/cpp/src/unicode-data.cpp +7034 -0
  610. package/cpp/src/unicode-data.h +20 -0
  611. package/cpp/src/unicode.cpp +1103 -0
  612. package/cpp/src/unicode.h +111 -0
  613. package/cpp/vendor/nlohmann/json.hpp +25526 -0
  614. package/cpp/vendor/nlohmann/json_fwd.hpp +187 -0
  615. package/cpp/vendor/stb/stb_image.h +7988 -0
  616. package/ios/LocalLLM-Bridging-Header.h +2 -0
  617. package/ios/LocalLLM.h +5 -0
  618. package/ios/LocalLLM.mm +1267 -0
  619. package/local-llm-rn.podspec +60 -0
  620. package/package.json +35 -0
  621. package/src/NativeLocalLLM.ts +73 -0
  622. package/src/device.ts +50 -0
  623. package/src/download-adapter.ts +17 -0
  624. package/src/index.ts +21 -0
  625. package/src/native-bridge.ts +142 -0
  626. package/src/rn-downloader.ts +37 -0
@@ -0,0 +1,41 @@
1
+ #pragma once
2
+
3
+ #include "llama.h"
4
+ #include "common.h"
5
+
6
+ struct common_speculative;
7
+
8
+ // comma separated list of all types
9
+ std::string common_speculative_type_name_str();
10
+
11
+ // convert string to type
12
+ enum common_speculative_type common_speculative_type_from_name(const std::string & name);
13
+
14
+ // convert type to string
15
+ std::string common_speculative_type_to_str(enum common_speculative_type type);
16
+
17
+ // check if the llama_context is compatible for speculative decoding
18
+ // note: clears the memory of the context
19
+ bool common_speculative_is_compat(llama_context * ctx_tgt);
20
+
21
+ common_speculative * common_speculative_init(
22
+ common_params_speculative & params,
23
+ llama_context * ctx_tgt);
24
+
25
+ void common_speculative_free(common_speculative * spec);
26
+
27
+ // optionally call once at the beginning of a new generation
28
+ void common_speculative_begin(common_speculative * spec, const llama_tokens & prompt);
29
+
30
+ // sample up to n_draft tokens and add them to the batch using the draft model
31
+ llama_tokens common_speculative_draft(
32
+ common_speculative * spec,
33
+ const common_params_speculative & params,
34
+ const llama_tokens & prompt,
35
+ llama_token id_last);
36
+
37
+ // informs the speculative decoder that n_accepted tokens were accepted by the target model
38
+ void common_speculative_accept(common_speculative * spec, uint16_t n_accepted);
39
+
40
+ // print statistics about the speculative decoding
41
+ void common_speculative_print_stats(const common_speculative * spec);
@@ -0,0 +1,64 @@
1
+ #include "unicode.h"
2
+
3
+ // implementation adopted from src/unicode.cpp
4
+
5
+ size_t utf8_sequence_length(unsigned char first_byte) {
6
+ const size_t lookup[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 3, 4 };
7
+ uint8_t highbits = static_cast<uint8_t>(first_byte) >> 4;
8
+ return lookup[highbits];
9
+ }
10
+
11
+ utf8_parse_result parse_utf8_codepoint(std::string_view input, size_t offset) {
12
+ if (offset >= input.size()) {
13
+ return utf8_parse_result(utf8_parse_result::INCOMPLETE);
14
+ }
15
+
16
+ // ASCII fast path
17
+ if (!(input[offset] & 0x80)) {
18
+ return utf8_parse_result(utf8_parse_result::SUCCESS, input[offset], 1);
19
+ }
20
+
21
+ // Invalid: continuation byte as first byte
22
+ if (!(input[offset] & 0x40)) {
23
+ return utf8_parse_result(utf8_parse_result::INVALID);
24
+ }
25
+
26
+ // 2-byte sequence
27
+ if (!(input[offset] & 0x20)) {
28
+ if (offset + 1 >= input.size()) {
29
+ return utf8_parse_result(utf8_parse_result::INCOMPLETE);
30
+ }
31
+ if ((input[offset + 1] & 0xc0) != 0x80) {
32
+ return utf8_parse_result(utf8_parse_result::INVALID);
33
+ }
34
+ auto result = ((input[offset] & 0x1f) << 6) | (input[offset + 1] & 0x3f);
35
+ return utf8_parse_result(utf8_parse_result::SUCCESS, result, 2);
36
+ }
37
+
38
+ // 3-byte sequence
39
+ if (!(input[offset] & 0x10)) {
40
+ if (offset + 2 >= input.size()) {
41
+ return utf8_parse_result(utf8_parse_result::INCOMPLETE);
42
+ }
43
+ if ((input[offset + 1] & 0xc0) != 0x80 || (input[offset + 2] & 0xc0) != 0x80) {
44
+ return utf8_parse_result(utf8_parse_result::INVALID);
45
+ }
46
+ auto result = ((input[offset] & 0x0f) << 12) | ((input[offset + 1] & 0x3f) << 6) | (input[offset + 2] & 0x3f);
47
+ return utf8_parse_result(utf8_parse_result::SUCCESS, result, 3);
48
+ }
49
+
50
+ // 4-byte sequence
51
+ if (!(input[offset] & 0x08)) {
52
+ if (offset + 3 >= input.size()) {
53
+ return utf8_parse_result(utf8_parse_result::INCOMPLETE);
54
+ }
55
+ if ((input[offset + 1] & 0xc0) != 0x80 || (input[offset + 2] & 0xc0) != 0x80 || (input[offset + 3] & 0xc0) != 0x80) {
56
+ return utf8_parse_result(utf8_parse_result::INVALID);
57
+ }
58
+ auto result = ((input[offset] & 0x07) << 18) | ((input[offset + 1] & 0x3f) << 12) | ((input[offset + 2] & 0x3f) << 6) | (input[offset + 3] & 0x3f);
59
+ return utf8_parse_result(utf8_parse_result::SUCCESS, result, 4);
60
+ }
61
+
62
+ // Invalid first byte
63
+ return utf8_parse_result(utf8_parse_result::INVALID);
64
+ }
@@ -0,0 +1,22 @@
1
+ #pragma once
2
+
3
+ #include <cstdint>
4
+ #include <string_view>
5
+
6
+ // UTF-8 parsing utilities for streaming-aware unicode support
7
+
8
+ struct utf8_parse_result {
9
+ uint32_t codepoint; // Decoded codepoint (only valid if status == SUCCESS)
10
+ size_t bytes_consumed; // How many bytes this codepoint uses (1-4)
11
+ enum status { SUCCESS, INCOMPLETE, INVALID } status;
12
+
13
+ utf8_parse_result(enum status s, uint32_t cp = 0, size_t bytes = 0)
14
+ : codepoint(cp), bytes_consumed(bytes), status(s) {}
15
+ };
16
+
17
+ // Determine the expected length of a UTF-8 sequence from its first byte
18
+ // Returns 0 for invalid first bytes
19
+ size_t utf8_sequence_length(unsigned char first_byte);
20
+
21
+ // Parse a single UTF-8 codepoint from input
22
+ utf8_parse_result parse_utf8_codepoint(std::string_view input, size_t offset);
@@ -0,0 +1,494 @@
1
+ cmake_minimum_required(VERSION 3.14...3.28) # for add_link_options and implicit target directories.
2
+ project("ggml" C CXX ASM)
3
+
4
+ ### GGML Version
5
+ set(GGML_VERSION_MAJOR 0)
6
+ set(GGML_VERSION_MINOR 9)
7
+ set(GGML_VERSION_PATCH 7)
8
+ set(GGML_VERSION_BASE "${GGML_VERSION_MAJOR}.${GGML_VERSION_MINOR}.${GGML_VERSION_PATCH}")
9
+
10
+ find_program(GIT_EXE NAMES git git.exe NO_CMAKE_FIND_ROOT_PATH)
11
+ if(GIT_EXE)
12
+ # Get current git commit hash
13
+ execute_process(COMMAND ${GIT_EXE} rev-parse --short HEAD
14
+ WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
15
+ OUTPUT_VARIABLE GGML_BUILD_COMMIT
16
+ OUTPUT_STRIP_TRAILING_WHITESPACE
17
+ ERROR_QUIET
18
+ )
19
+
20
+ # Check if the working directory is dirty (i.e., has uncommitted changes)
21
+ execute_process(COMMAND ${GIT_EXE} diff-index --quiet HEAD -- .
22
+ WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
23
+ RESULT_VARIABLE GGML_GIT_DIRTY
24
+ ERROR_QUIET
25
+ )
26
+ endif()
27
+
28
+ set(GGML_VERSION "${GGML_VERSION_BASE}")
29
+
30
+ if(NOT GGML_BUILD_COMMIT)
31
+ set(GGML_BUILD_COMMIT "unknown")
32
+ endif()
33
+
34
+ # Build the commit string with optional dirty flag
35
+ if(DEFINED GGML_GIT_DIRTY AND GGML_GIT_DIRTY EQUAL 1)
36
+ set(GGML_BUILD_COMMIT "${GGML_BUILD_COMMIT}-dirty")
37
+ endif()
38
+
39
+ include(CheckIncludeFileCXX)
40
+
41
+ set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
42
+
43
+ if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE)
44
+ set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
45
+ set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo")
46
+ endif()
47
+
48
+ if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
49
+ set(GGML_STANDALONE ON)
50
+
51
+ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
52
+
53
+ # configure project version
54
+ # TODO
55
+ else()
56
+ set(GGML_STANDALONE OFF)
57
+
58
+ if (NOT CMAKE_RUNTIME_OUTPUT_DIRECTORY)
59
+ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
60
+ endif()
61
+ endif()
62
+
63
+ if (EMSCRIPTEN)
64
+ set(BUILD_SHARED_LIBS_DEFAULT OFF)
65
+
66
+ option(GGML_WASM_SINGLE_FILE "ggml: embed WASM inside the generated ggml.js" ON)
67
+ else()
68
+ if (MINGW)
69
+ set(BUILD_SHARED_LIBS_DEFAULT OFF)
70
+ else()
71
+ set(BUILD_SHARED_LIBS_DEFAULT ON)
72
+ endif()
73
+ endif()
74
+
75
+ # remove the lib prefix on win32 mingw
76
+ if (WIN32)
77
+ set(CMAKE_STATIC_LIBRARY_PREFIX "")
78
+ set(CMAKE_SHARED_LIBRARY_PREFIX "")
79
+ set(CMAKE_SHARED_MODULE_PREFIX "")
80
+ endif()
81
+
82
+ option(BUILD_SHARED_LIBS "ggml: build shared libraries" ${BUILD_SHARED_LIBS_DEFAULT})
83
+ option(GGML_BACKEND_DL "ggml: build backends as dynamic libraries (requires BUILD_SHARED_LIBS)" OFF)
84
+ set(GGML_BACKEND_DIR "" CACHE PATH "ggml: directory to load dynamic backends from (requires GGML_BACKEND_DL")
85
+
86
+ #
87
+ # option list
88
+ #
89
+
90
+ # TODO: mark all options as advanced when not GGML_STANDALONE
91
+
92
+ if (APPLE)
93
+ set(GGML_METAL_DEFAULT ON)
94
+ set(GGML_BLAS_DEFAULT ON)
95
+ set(GGML_BLAS_VENDOR_DEFAULT "Apple")
96
+ else()
97
+ set(GGML_METAL_DEFAULT OFF)
98
+ set(GGML_BLAS_DEFAULT OFF)
99
+ set(GGML_BLAS_VENDOR_DEFAULT "Generic")
100
+ endif()
101
+
102
+ if (CMAKE_CROSSCOMPILING OR DEFINED ENV{SOURCE_DATE_EPOCH})
103
+ message(STATUS "Setting GGML_NATIVE_DEFAULT to OFF")
104
+ set(GGML_NATIVE_DEFAULT OFF)
105
+ else()
106
+ set(GGML_NATIVE_DEFAULT ON)
107
+ endif()
108
+
109
+ # defaults
110
+ if (NOT GGML_LLAMAFILE_DEFAULT)
111
+ set(GGML_LLAMAFILE_DEFAULT OFF)
112
+ endif()
113
+
114
+ if (NOT GGML_CUDA_GRAPHS_DEFAULT)
115
+ set(GGML_CUDA_GRAPHS_DEFAULT OFF)
116
+ endif()
117
+
118
+ # general
119
+ option(GGML_STATIC "ggml: static link libraries" OFF)
120
+ option(GGML_NATIVE "ggml: optimize the build for the current system" ${GGML_NATIVE_DEFAULT})
121
+ option(GGML_LTO "ggml: enable link time optimization" OFF)
122
+ option(GGML_CCACHE "ggml: use ccache if available" ON)
123
+
124
+ # debug
125
+ option(GGML_ALL_WARNINGS "ggml: enable all compiler warnings" ON)
126
+ option(GGML_ALL_WARNINGS_3RD_PARTY "ggml: enable all compiler warnings in 3rd party libs" OFF)
127
+ option(GGML_GPROF "ggml: enable gprof" OFF)
128
+
129
+ # build
130
+ option(GGML_FATAL_WARNINGS "ggml: enable -Werror flag" OFF)
131
+
132
+ # sanitizers
133
+ option(GGML_SANITIZE_THREAD "ggml: enable thread sanitizer" OFF)
134
+ option(GGML_SANITIZE_ADDRESS "ggml: enable address sanitizer" OFF)
135
+ option(GGML_SANITIZE_UNDEFINED "ggml: enable undefined sanitizer" OFF)
136
+
137
+ # instruction set specific
138
+ if (GGML_NATIVE OR NOT GGML_NATIVE_DEFAULT)
139
+ set(INS_ENB OFF)
140
+ else()
141
+ set(INS_ENB ON)
142
+ endif()
143
+
144
+ message(DEBUG "GGML_NATIVE : ${GGML_NATIVE}")
145
+ message(DEBUG "GGML_NATIVE_DEFAULT : ${GGML_NATIVE_DEFAULT}")
146
+ message(DEBUG "INS_ENB : ${INS_ENB}")
147
+
148
+ option(GGML_CPU_HBM "ggml: use memkind for CPU HBM" OFF)
149
+ option(GGML_CPU_REPACK "ggml: use runtime weight conversion of Q4_0 to Q4_X_X" ON)
150
+ option(GGML_CPU_KLEIDIAI "ggml: use KleidiAI optimized kernels if applicable" OFF)
151
+ option(GGML_SSE42 "ggml: enable SSE 4.2" ${INS_ENB})
152
+ option(GGML_AVX "ggml: enable AVX" ${INS_ENB})
153
+ option(GGML_AVX_VNNI "ggml: enable AVX-VNNI" OFF)
154
+ option(GGML_AVX2 "ggml: enable AVX2" ${INS_ENB})
155
+ option(GGML_BMI2 "ggml: enable BMI2" ${INS_ENB})
156
+ option(GGML_AVX512 "ggml: enable AVX512F" OFF)
157
+ option(GGML_AVX512_VBMI "ggml: enable AVX512-VBMI" OFF)
158
+ option(GGML_AVX512_VNNI "ggml: enable AVX512-VNNI" OFF)
159
+ option(GGML_AVX512_BF16 "ggml: enable AVX512-BF16" OFF)
160
+ if (NOT MSVC)
161
+ # in MSVC F16C and FMA is implied with AVX2/AVX512
162
+ option(GGML_FMA "ggml: enable FMA" ${INS_ENB})
163
+ option(GGML_F16C "ggml: enable F16C" ${INS_ENB})
164
+ # MSVC does not seem to support AMX
165
+ option(GGML_AMX_TILE "ggml: enable AMX-TILE" OFF)
166
+ option(GGML_AMX_INT8 "ggml: enable AMX-INT8" OFF)
167
+ option(GGML_AMX_BF16 "ggml: enable AMX-BF16" OFF)
168
+ endif()
169
+ option(GGML_LASX "ggml: enable lasx" ON)
170
+ option(GGML_LSX "ggml: enable lsx" ON)
171
+ option(GGML_RVV "ggml: enable rvv" ON)
172
+ option(GGML_RV_ZFH "ggml: enable riscv zfh" ON)
173
+ option(GGML_RV_ZVFH "ggml: enable riscv zvfh" ON)
174
+ option(GGML_RV_ZICBOP "ggml: enable riscv zicbop" ON)
175
+ option(GGML_RV_ZIHINTPAUSE "ggml: enable riscv zihintpause " ON)
176
+ option(GGML_XTHEADVECTOR "ggml: enable xtheadvector" OFF)
177
+ option(GGML_VXE "ggml: enable vxe" ${GGML_NATIVE})
178
+
179
+ option(GGML_CPU_ALL_VARIANTS "ggml: build all variants of the CPU backend (requires GGML_BACKEND_DL)" OFF)
180
+ set(GGML_CPU_ARM_ARCH "" CACHE STRING "ggml: CPU architecture for ARM")
181
+ set(GGML_CPU_POWERPC_CPUTYPE "" CACHE STRING "ggml: CPU type for PowerPC")
182
+
183
+ # ggml core
184
+ set(GGML_SCHED_MAX_COPIES "4" CACHE STRING "ggml: max input copies for pipeline parallelism")
185
+ option(GGML_CPU "ggml: enable CPU backend" ON)
186
+ option(GGML_SCHED_NO_REALLOC "ggml: disallow reallocations in ggml-alloc (for debugging)" OFF)
187
+
188
+ # 3rd party libs / backends
189
+ option(GGML_ACCELERATE "ggml: enable Accelerate framework" ON)
190
+ option(GGML_BLAS "ggml: use BLAS" ${GGML_BLAS_DEFAULT})
191
+ set(GGML_BLAS_VENDOR ${GGML_BLAS_VENDOR_DEFAULT} CACHE STRING
192
+ "ggml: BLAS library vendor")
193
+ option(GGML_LLAMAFILE "ggml: use LLAMAFILE" ${GGML_LLAMAFILE_DEFAULT})
194
+
195
+ option(GGML_CUDA "ggml: use CUDA" OFF)
196
+ option(GGML_MUSA "ggml: use MUSA" OFF)
197
+ option(GGML_CUDA_FORCE_MMQ "ggml: use mmq kernels instead of cuBLAS" OFF)
198
+ option(GGML_CUDA_FORCE_CUBLAS "ggml: always use cuBLAS instead of mmq kernels" OFF)
199
+ set (GGML_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING
200
+ "ggml: max. batch size for using peer access")
201
+ option(GGML_CUDA_NO_PEER_COPY "ggml: do not use peer to peer copies" OFF)
202
+ option(GGML_CUDA_NO_VMM "ggml: do not try to use CUDA VMM" OFF)
203
+ option(GGML_CUDA_FA "ggml: compile ggml FlashAttention CUDA kernels" ON)
204
+ option(GGML_CUDA_FA_ALL_QUANTS "ggml: compile all quants for FlashAttention" OFF)
205
+ option(GGML_CUDA_GRAPHS "ggml: use CUDA graphs (llama.cpp only)" ${GGML_CUDA_GRAPHS_DEFAULT})
206
+ set (GGML_CUDA_COMPRESSION_MODE "size" CACHE STRING
207
+ "ggml: cuda link binary compression mode; requires cuda 12.8+")
208
+ set_property(CACHE GGML_CUDA_COMPRESSION_MODE PROPERTY STRINGS "none;speed;balance;size")
209
+
210
+ option(GGML_HIP "ggml: use HIP" OFF)
211
+ option(GGML_HIP_GRAPHS "ggml: use HIP graph, experimental, slow" OFF)
212
+ option(GGML_HIP_NO_VMM "ggml: do not try to use HIP VMM" ON)
213
+ option(GGML_HIP_ROCWMMA_FATTN "ggml: enable rocWMMA for FlashAttention" OFF)
214
+ option(GGML_HIP_MMQ_MFMA "ggml: enable MFMA MMA for CDNA in MMQ" ON)
215
+ option(GGML_HIP_EXPORT_METRICS "ggml: enable kernel perf metrics output" OFF)
216
+ option(GGML_MUSA_GRAPHS "ggml: use MUSA graph, experimental, unstable" OFF)
217
+ option(GGML_MUSA_MUDNN_COPY "ggml: enable muDNN for accelerated copy" OFF)
218
+ option(GGML_VULKAN "ggml: use Vulkan" OFF)
219
+ option(GGML_VULKAN_CHECK_RESULTS "ggml: run Vulkan op checks" OFF)
220
+ option(GGML_VULKAN_DEBUG "ggml: enable Vulkan debug output" OFF)
221
+ option(GGML_VULKAN_MEMORY_DEBUG "ggml: enable Vulkan memory debug output" OFF)
222
+ option(GGML_VULKAN_SHADER_DEBUG_INFO "ggml: enable Vulkan shader debug info" OFF)
223
+ option(GGML_VULKAN_VALIDATE "ggml: enable Vulkan validation" OFF)
224
+ option(GGML_VULKAN_RUN_TESTS "ggml: run Vulkan tests" OFF)
225
+ option(GGML_WEBGPU "ggml: use WebGPU" OFF)
226
+ option(GGML_WEBGPU_DEBUG "ggml: enable WebGPU debug output" OFF)
227
+ option(GGML_WEBGPU_CPU_PROFILE "ggml: enable WebGPU profiling (CPU)" OFF)
228
+ option(GGML_WEBGPU_GPU_PROFILE "ggml: enable WebGPU profiling (GPU)" OFF)
229
+ option(GGML_WEBGPU_JSPI "ggml: use JSPI for WebGPU" ON)
230
+ option(GGML_ZDNN "ggml: use zDNN" OFF)
231
+ option(GGML_VIRTGPU "ggml: use the VirtGPU/Virglrenderer API Remoting frontend" OFF)
232
+ option(GGML_VIRTGPU_BACKEND "ggml: build the VirtGPU/Virglrenderer API Remoting backend" OFF)
233
+ option(GGML_METAL "ggml: use Metal" ${GGML_METAL_DEFAULT})
234
+ option(GGML_METAL_NDEBUG "ggml: disable Metal debugging" OFF)
235
+ option(GGML_METAL_SHADER_DEBUG "ggml: compile Metal with -fno-fast-math" OFF)
236
+ option(GGML_METAL_EMBED_LIBRARY "ggml: embed Metal library" ${GGML_METAL})
237
+ set (GGML_METAL_MACOSX_VERSION_MIN "" CACHE STRING
238
+ "ggml: metal minimum macOS version")
239
+ set (GGML_METAL_STD "" CACHE STRING "ggml: metal standard version (-std flag)")
240
+ option(GGML_OPENMP "ggml: use OpenMP" ON)
241
+ option(GGML_RPC "ggml: use RPC" OFF)
242
+ option(GGML_SYCL "ggml: use SYCL" OFF)
243
+ option(GGML_SYCL_F16 "ggml: use 16 bit floats for sycl calculations" OFF)
244
+ option(GGML_SYCL_GRAPH "ggml: enable graphs in the SYCL backend" ON)
245
+ option(GGML_SYCL_DNN "ggml: enable oneDNN in the SYCL backend" ON)
246
+ set (GGML_SYCL_TARGET "INTEL" CACHE STRING
247
+ "ggml: sycl target device")
248
+ set (GGML_SYCL_DEVICE_ARCH "" CACHE STRING
249
+ "ggml: sycl device architecture")
250
+
251
+ option(GGML_OPENCL "ggml: use OpenCL" OFF)
252
+ option(GGML_OPENCL_PROFILING "ggml: use OpenCL profiling (increases overhead)" OFF)
253
+ option(GGML_OPENCL_EMBED_KERNELS "ggml: embed kernels" ON)
254
+ option(GGML_OPENCL_USE_ADRENO_KERNELS "ggml: use optimized kernels for Adreno" ON)
255
+ set (GGML_OPENCL_TARGET_VERSION "300" CACHE STRING
256
+ "gmml: OpenCL API version to target")
257
+
258
+ option(GGML_HEXAGON "ggml: enable Hexagon backend" OFF)
259
+ set(GGML_HEXAGON_FP32_QUANTIZE_GROUP_SIZE 128 CACHE STRING "ggml: quantize group size (32, 64, or 128)")
260
+
261
+ # toolchain for vulkan-shaders-gen
262
+ set (GGML_VULKAN_SHADERS_GEN_TOOLCHAIN "" CACHE FILEPATH "ggml: toolchain file for vulkan-shaders-gen")
263
+
264
+ option(GGML_ZENDNN "ggml: use ZenDNN" OFF)
265
+ option(ZENDNN_ROOT "ggml: path to ZenDNN installation" "")
266
+
267
+ # extra artifacts
268
+ option(GGML_BUILD_TESTS "ggml: build tests" ${GGML_STANDALONE})
269
+ option(GGML_BUILD_EXAMPLES "ggml: build examples" ${GGML_STANDALONE})
270
+
271
+ #
272
+ # dependencies
273
+ #
274
+
275
+ set(CMAKE_C_STANDARD 11)
276
+ set(CMAKE_C_STANDARD_REQUIRED true)
277
+
278
+ set(CMAKE_CXX_STANDARD 17)
279
+ set(CMAKE_CXX_STANDARD_REQUIRED true)
280
+
281
+ set(THREADS_PREFER_PTHREAD_FLAG ON)
282
+
283
+ find_package(Threads REQUIRED)
284
+
285
+ include(GNUInstallDirs)
286
+
287
+ #
288
+ # build the library
289
+ #
290
+
291
+ add_subdirectory(src)
292
+
293
+ #
294
+ # tests and examples
295
+ #
296
+
297
+ if (GGML_BUILD_TESTS)
298
+ enable_testing()
299
+ add_subdirectory(tests)
300
+ endif ()
301
+
302
+ if (GGML_BUILD_EXAMPLES)
303
+ add_subdirectory(examples)
304
+ endif ()
305
+
306
+ #
307
+ # install
308
+ #
309
+
310
+ include(CMakePackageConfigHelpers)
311
+
312
+ # all public headers
313
+ set(GGML_PUBLIC_HEADERS
314
+ include/ggml.h
315
+ include/ggml-cpu.h
316
+ include/ggml-alloc.h
317
+ include/ggml-backend.h
318
+ include/ggml-blas.h
319
+ include/ggml-cann.h
320
+ include/ggml-cpp.h
321
+ include/ggml-cuda.h
322
+ include/ggml-opt.h
323
+ include/ggml-metal.h
324
+ include/ggml-rpc.h
325
+ include/ggml-virtgpu.h
326
+ include/ggml-sycl.h
327
+ include/ggml-vulkan.h
328
+ include/ggml-webgpu.h
329
+ include/ggml-zendnn.h
330
+ include/gguf.h)
331
+
332
+ set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}")
333
+ #if (GGML_METAL)
334
+ # set_target_properties(ggml PROPERTIES RESOURCE "${CMAKE_CURRENT_SOURCE_DIR}/src/ggml-metal.metal")
335
+ #endif()
336
+ install(TARGETS ggml LIBRARY PUBLIC_HEADER)
337
+ install(TARGETS ggml-base LIBRARY)
338
+
339
+ if (GGML_STANDALONE)
340
+ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/ggml.pc.in
341
+ ${CMAKE_CURRENT_BINARY_DIR}/ggml.pc
342
+ @ONLY)
343
+
344
+ install(FILES ${CMAKE_CURRENT_BINARY_DIR}/ggml.pc
345
+ DESTINATION share/pkgconfig)
346
+ endif()
347
+
348
+ #
349
+ # Create CMake package
350
+ #
351
+
352
+
353
+
354
+ # Capture variables prefixed with GGML_.
355
+
356
+ set(variable_set_statements
357
+ "
358
+ ####### Expanded from @GGML_VARIABLES_EXPANED@ by configure_package_config_file() #######
359
+ ####### Any changes to this file will be overwritten by the next CMake run #######
360
+
361
+ ")
362
+
363
+ set(GGML_SHARED_LIB ${BUILD_SHARED_LIBS})
364
+
365
+ get_cmake_property(all_variables VARIABLES)
366
+ foreach(variable_name IN LISTS all_variables)
367
+ if(variable_name MATCHES "^GGML_")
368
+ string(REPLACE ";" "\\;"
369
+ variable_value "${${variable_name}}")
370
+
371
+ set(variable_set_statements
372
+ "${variable_set_statements}set(${variable_name} \"${variable_value}\")\n")
373
+ endif()
374
+ endforeach()
375
+
376
+ set(GGML_VARIABLES_EXPANDED ${variable_set_statements})
377
+
378
+ # Create the CMake package and set install location.
379
+
380
+ set(GGML_INSTALL_VERSION ${GGML_VERSION})
381
+ set(GGML_INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR} CACHE PATH "Location of header files")
382
+ set(GGML_LIB_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR} CACHE PATH "Location of library files")
383
+ set(GGML_BIN_INSTALL_DIR ${CMAKE_INSTALL_BINDIR} CACHE PATH "Location of binary files")
384
+
385
+ configure_package_config_file(
386
+ ${CMAKE_CURRENT_SOURCE_DIR}/cmake/ggml-config.cmake.in
387
+ ${CMAKE_CURRENT_BINARY_DIR}/ggml-config.cmake
388
+ INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/ggml
389
+ PATH_VARS GGML_INCLUDE_INSTALL_DIR
390
+ GGML_LIB_INSTALL_DIR
391
+ GGML_BIN_INSTALL_DIR)
392
+
393
+ write_basic_package_version_file(
394
+ ${CMAKE_CURRENT_BINARY_DIR}/ggml-version.cmake
395
+ VERSION ${GGML_INSTALL_VERSION}
396
+ COMPATIBILITY SameMajorVersion)
397
+
398
+ target_compile_definitions(ggml-base PRIVATE
399
+ GGML_VERSION="${GGML_INSTALL_VERSION}"
400
+ GGML_COMMIT="${GGML_BUILD_COMMIT}"
401
+ )
402
+ message(STATUS "ggml version: ${GGML_INSTALL_VERSION}")
403
+ message(STATUS "ggml commit: ${GGML_BUILD_COMMIT}")
404
+
405
+ install(FILES ${CMAKE_CURRENT_BINARY_DIR}/ggml-config.cmake
406
+ ${CMAKE_CURRENT_BINARY_DIR}/ggml-version.cmake
407
+ DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/ggml)
408
+
409
+ if (MSVC)
410
+ set(MSVC_WARNING_FLAGS
411
+ /wd4005 # Macro redefinition
412
+ /wd4244 # Conversion from one type to another type, possible loss of data
413
+ /wd4267 # Conversion from 'size_t' to a smaller type, possible loss of data
414
+ /wd4305 # Conversion from 'type1' to 'type2', possible loss of data
415
+ /wd4566 # Conversion from 'char' to 'wchar_t', possible loss of data
416
+ /wd4996 # Disable POSIX deprecation warnings
417
+ /wd4702 # Unreachable code warnings
418
+ )
419
+ set(MSVC_COMPILE_OPTIONS
420
+ "$<$<COMPILE_LANGUAGE:C>:/utf-8>"
421
+ "$<$<COMPILE_LANGUAGE:CXX>:/utf-8>"
422
+ )
423
+ function(configure_msvc_target target_name)
424
+ if(TARGET ${target_name})
425
+ target_compile_options(${target_name} PRIVATE ${MSVC_WARNING_FLAGS})
426
+ target_compile_options(${target_name} PRIVATE ${MSVC_COMPILE_OPTIONS})
427
+ endif()
428
+ endfunction()
429
+
430
+ configure_msvc_target(ggml-base)
431
+ configure_msvc_target(ggml)
432
+ configure_msvc_target(ggml-cpu)
433
+ configure_msvc_target(ggml-cpu-x64)
434
+ configure_msvc_target(ggml-cpu-sse42)
435
+ configure_msvc_target(ggml-cpu-sandybridge)
436
+ # __FMA__ and __F16C__ are not defined in MSVC, however they are implied with AVX2/AVX512
437
+ # skipping ggml-cpu-ivybridge
438
+ # skipping ggml-cpu-piledriver
439
+ configure_msvc_target(ggml-cpu-haswell)
440
+ configure_msvc_target(ggml-cpu-skylakex)
441
+ configure_msvc_target(ggml-cpu-cannonlake)
442
+ configure_msvc_target(ggml-cpu-cascadelake)
443
+ configure_msvc_target(ggml-cpu-icelake)
444
+ # MSVC 2022 doesn't support BF16 intrinsics without `/arch:AVX10.1` ?!
445
+ # https://learn.microsoft.com/en-us/cpp/intrinsics/x64-amd64-intrinsics-list?view=msvc-170
446
+ # https://learn.microsoft.com/en-us/cpp/build/reference/arch-x64?view=msvc-170
447
+ # skipping ggml-cpu-cooperlake
448
+ # skipping ggml-cpu-zen4
449
+ configure_msvc_target(ggml-cpu-alderlake)
450
+ # MSVC doesn't support AMX
451
+ # skipping ggml-cpu-sapphirerapids
452
+
453
+ if (GGML_BUILD_EXAMPLES)
454
+ configure_msvc_target(common-ggml)
455
+ configure_msvc_target(common)
456
+
457
+ configure_msvc_target(mnist-common)
458
+ configure_msvc_target(mnist-eval)
459
+ configure_msvc_target(mnist-train)
460
+
461
+ configure_msvc_target(gpt-2-ctx)
462
+ configure_msvc_target(gpt-2-alloc)
463
+ configure_msvc_target(gpt-2-backend)
464
+ configure_msvc_target(gpt-2-sched)
465
+ configure_msvc_target(gpt-2-quantize)
466
+ configure_msvc_target(gpt-2-batched)
467
+
468
+ configure_msvc_target(gpt-j)
469
+ configure_msvc_target(gpt-j-quantize)
470
+
471
+ configure_msvc_target(magika)
472
+ configure_msvc_target(yolov3-tiny)
473
+ configure_msvc_target(sam)
474
+
475
+ configure_msvc_target(simple-ctx)
476
+ configure_msvc_target(simple-backend)
477
+ endif()
478
+
479
+ if (GGML_BUILD_TESTS)
480
+ configure_msvc_target(test-mul-mat)
481
+ configure_msvc_target(test-arange)
482
+ configure_msvc_target(test-backend-ops)
483
+ configure_msvc_target(test-cont)
484
+ configure_msvc_target(test-conv-transpose)
485
+ configure_msvc_target(test-conv-transpose-1d)
486
+ configure_msvc_target(test-conv1d)
487
+ configure_msvc_target(test-conv2d)
488
+ configure_msvc_target(test-conv2d-dw)
489
+ configure_msvc_target(test-customop)
490
+ configure_msvc_target(test-dup)
491
+ configure_msvc_target(test-opt)
492
+ configure_msvc_target(test-pool)
493
+ endif ()
494
+ endif()
@@ -0,0 +1,22 @@
1
+ find_package(Git)
2
+
3
+ # the commit's SHA1
4
+ execute_process(COMMAND
5
+ "${GIT_EXECUTABLE}" describe --match=NeVeRmAtCh --always --abbrev=8
6
+ WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
7
+ OUTPUT_VARIABLE GIT_SHA1
8
+ ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
9
+
10
+ # the date of the commit
11
+ execute_process(COMMAND
12
+ "${GIT_EXECUTABLE}" log -1 --format=%ad --date=local
13
+ WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
14
+ OUTPUT_VARIABLE GIT_DATE
15
+ ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
16
+
17
+ # the subject of the commit
18
+ execute_process(COMMAND
19
+ "${GIT_EXECUTABLE}" log -1 --format=%s
20
+ WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
21
+ OUTPUT_VARIABLE GIT_COMMIT_SUBJECT
22
+ ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)