local-llm-rn 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (626) hide show
  1. package/cpp/CMakeLists.txt +285 -0
  2. package/cpp/common/CMakeLists.txt +149 -0
  3. package/cpp/common/arg.cpp +3799 -0
  4. package/cpp/common/arg.h +131 -0
  5. package/cpp/common/base64.hpp +392 -0
  6. package/cpp/common/build-info.cpp.in +4 -0
  7. package/cpp/common/chat-parser-xml-toolcall.cpp +879 -0
  8. package/cpp/common/chat-parser-xml-toolcall.h +45 -0
  9. package/cpp/common/chat-parser.cpp +1649 -0
  10. package/cpp/common/chat-parser.h +133 -0
  11. package/cpp/common/chat-peg-parser.cpp +124 -0
  12. package/cpp/common/chat-peg-parser.h +105 -0
  13. package/cpp/common/chat.cpp +3355 -0
  14. package/cpp/common/chat.h +252 -0
  15. package/cpp/common/common.cpp +1824 -0
  16. package/cpp/common/common.h +930 -0
  17. package/cpp/common/console.cpp +1137 -0
  18. package/cpp/common/console.h +41 -0
  19. package/cpp/common/debug.cpp +167 -0
  20. package/cpp/common/debug.h +43 -0
  21. package/cpp/common/download.cpp +792 -0
  22. package/cpp/common/download.h +84 -0
  23. package/cpp/common/http.h +84 -0
  24. package/cpp/common/jinja/README.md +88 -0
  25. package/cpp/common/jinja/caps.cpp +285 -0
  26. package/cpp/common/jinja/caps.h +30 -0
  27. package/cpp/common/jinja/lexer.cpp +341 -0
  28. package/cpp/common/jinja/lexer.h +157 -0
  29. package/cpp/common/jinja/parser.cpp +591 -0
  30. package/cpp/common/jinja/parser.h +21 -0
  31. package/cpp/common/jinja/runtime.cpp +867 -0
  32. package/cpp/common/jinja/runtime.h +638 -0
  33. package/cpp/common/jinja/string.cpp +213 -0
  34. package/cpp/common/jinja/string.h +61 -0
  35. package/cpp/common/jinja/utils.h +149 -0
  36. package/cpp/common/jinja/value.cpp +1393 -0
  37. package/cpp/common/jinja/value.h +756 -0
  38. package/cpp/common/json-partial.cpp +324 -0
  39. package/cpp/common/json-partial.h +39 -0
  40. package/cpp/common/json-schema-to-grammar.cpp +1153 -0
  41. package/cpp/common/json-schema-to-grammar.h +43 -0
  42. package/cpp/common/llguidance.cpp +258 -0
  43. package/cpp/common/log.cpp +446 -0
  44. package/cpp/common/log.h +119 -0
  45. package/cpp/common/ngram-cache.cpp +285 -0
  46. package/cpp/common/ngram-cache.h +101 -0
  47. package/cpp/common/ngram-map.cpp +530 -0
  48. package/cpp/common/ngram-map.h +115 -0
  49. package/cpp/common/ngram-mod.cpp +60 -0
  50. package/cpp/common/ngram-mod.h +38 -0
  51. package/cpp/common/peg-parser.cpp +1712 -0
  52. package/cpp/common/peg-parser.h +459 -0
  53. package/cpp/common/preset.cpp +483 -0
  54. package/cpp/common/preset.h +83 -0
  55. package/cpp/common/regex-partial.cpp +204 -0
  56. package/cpp/common/regex-partial.h +56 -0
  57. package/cpp/common/sampling.cpp +745 -0
  58. package/cpp/common/sampling.h +119 -0
  59. package/cpp/common/speculative.cpp +1074 -0
  60. package/cpp/common/speculative.h +41 -0
  61. package/cpp/common/unicode.cpp +64 -0
  62. package/cpp/common/unicode.h +22 -0
  63. package/cpp/ggml/CMakeLists.txt +494 -0
  64. package/cpp/ggml/cmake/GitVars.cmake +22 -0
  65. package/cpp/ggml/cmake/common.cmake +50 -0
  66. package/cpp/ggml/cmake/ggml-config.cmake.in +191 -0
  67. package/cpp/ggml/include/ggml-alloc.h +85 -0
  68. package/cpp/ggml/include/ggml-backend.h +373 -0
  69. package/cpp/ggml/include/ggml-blas.h +25 -0
  70. package/cpp/ggml/include/ggml-cann.h +123 -0
  71. package/cpp/ggml/include/ggml-cpp.h +39 -0
  72. package/cpp/ggml/include/ggml-cpu.h +151 -0
  73. package/cpp/ggml/include/ggml-cuda.h +47 -0
  74. package/cpp/ggml/include/ggml-hexagon.h +19 -0
  75. package/cpp/ggml/include/ggml-metal.h +61 -0
  76. package/cpp/ggml/include/ggml-opencl.h +26 -0
  77. package/cpp/ggml/include/ggml-opt.h +256 -0
  78. package/cpp/ggml/include/ggml-rpc.h +30 -0
  79. package/cpp/ggml/include/ggml-sycl.h +49 -0
  80. package/cpp/ggml/include/ggml-virtgpu.h +14 -0
  81. package/cpp/ggml/include/ggml-vulkan.h +29 -0
  82. package/cpp/ggml/include/ggml-webgpu.h +19 -0
  83. package/cpp/ggml/include/ggml-zdnn.h +17 -0
  84. package/cpp/ggml/include/ggml-zendnn.h +22 -0
  85. package/cpp/ggml/include/ggml.h +2753 -0
  86. package/cpp/ggml/include/gguf.h +204 -0
  87. package/cpp/ggml/src/CMakeLists.txt +492 -0
  88. package/cpp/ggml/src/ggml-alloc.c +1244 -0
  89. package/cpp/ggml/src/ggml-backend-dl.cpp +48 -0
  90. package/cpp/ggml/src/ggml-backend-dl.h +45 -0
  91. package/cpp/ggml/src/ggml-backend-impl.h +255 -0
  92. package/cpp/ggml/src/ggml-backend-reg.cpp +566 -0
  93. package/cpp/ggml/src/ggml-backend.cpp +2270 -0
  94. package/cpp/ggml/src/ggml-blas/CMakeLists.txt +101 -0
  95. package/cpp/ggml/src/ggml-blas/ggml-blas.cpp +518 -0
  96. package/cpp/ggml/src/ggml-common.h +1878 -0
  97. package/cpp/ggml/src/ggml-cpu/CMakeLists.txt +691 -0
  98. package/cpp/ggml/src/ggml-cpu/amx/amx.cpp +247 -0
  99. package/cpp/ggml/src/ggml-cpu/amx/amx.h +8 -0
  100. package/cpp/ggml/src/ggml-cpu/amx/common.h +91 -0
  101. package/cpp/ggml/src/ggml-cpu/amx/mmq.cpp +2512 -0
  102. package/cpp/ggml/src/ggml-cpu/amx/mmq.h +10 -0
  103. package/cpp/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +98 -0
  104. package/cpp/ggml/src/ggml-cpu/arch/arm/quants.c +4052 -0
  105. package/cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +4935 -0
  106. package/cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +2159 -0
  107. package/cpp/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
  108. package/cpp/ggml/src/ggml-cpu/arch/powerpc/quants.c +2305 -0
  109. package/cpp/ggml/src/ggml-cpu/arch/riscv/cpu-feats.cpp +38 -0
  110. package/cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +2726 -0
  111. package/cpp/ggml/src/ggml-cpu/arch/riscv/repack.cpp +342 -0
  112. package/cpp/ggml/src/ggml-cpu/arch/s390/cpu-feats.cpp +50 -0
  113. package/cpp/ggml/src/ggml-cpu/arch/s390/quants.c +1468 -0
  114. package/cpp/ggml/src/ggml-cpu/arch/wasm/quants.c +1221 -0
  115. package/cpp/ggml/src/ggml-cpu/arch/x86/cpu-feats.cpp +327 -0
  116. package/cpp/ggml/src/ggml-cpu/arch/x86/quants.c +3820 -0
  117. package/cpp/ggml/src/ggml-cpu/arch/x86/repack.cpp +6307 -0
  118. package/cpp/ggml/src/ggml-cpu/arch-fallback.h +313 -0
  119. package/cpp/ggml/src/ggml-cpu/binary-ops.cpp +154 -0
  120. package/cpp/ggml/src/ggml-cpu/binary-ops.h +16 -0
  121. package/cpp/ggml/src/ggml-cpu/cmake/FindSIMD.cmake +100 -0
  122. package/cpp/ggml/src/ggml-cpu/common.h +95 -0
  123. package/cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +529 -0
  124. package/cpp/ggml/src/ggml-cpu/ggml-cpu.c +3734 -0
  125. package/cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +701 -0
  126. package/cpp/ggml/src/ggml-cpu/hbm.cpp +55 -0
  127. package/cpp/ggml/src/ggml-cpu/hbm.h +8 -0
  128. package/cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +938 -0
  129. package/cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +90 -0
  130. package/cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +798 -0
  131. package/cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.h +17 -0
  132. package/cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +4033 -0
  133. package/cpp/ggml/src/ggml-cpu/llamafile/sgemm.h +25 -0
  134. package/cpp/ggml/src/ggml-cpu/ops.cpp +10978 -0
  135. package/cpp/ggml/src/ggml-cpu/ops.h +116 -0
  136. package/cpp/ggml/src/ggml-cpu/quants.c +1193 -0
  137. package/cpp/ggml/src/ggml-cpu/quants.h +97 -0
  138. package/cpp/ggml/src/ggml-cpu/repack.cpp +3316 -0
  139. package/cpp/ggml/src/ggml-cpu/repack.h +173 -0
  140. package/cpp/ggml/src/ggml-cpu/simd-gemm.h +136 -0
  141. package/cpp/ggml/src/ggml-cpu/simd-mappings.h +1279 -0
  142. package/cpp/ggml/src/ggml-cpu/spacemit/ime.cpp +1025 -0
  143. package/cpp/ggml/src/ggml-cpu/spacemit/ime.h +13 -0
  144. package/cpp/ggml/src/ggml-cpu/spacemit/ime1_kernels.cpp +3196 -0
  145. package/cpp/ggml/src/ggml-cpu/spacemit/ime_kernels.h +26 -0
  146. package/cpp/ggml/src/ggml-cpu/traits.cpp +36 -0
  147. package/cpp/ggml/src/ggml-cpu/traits.h +38 -0
  148. package/cpp/ggml/src/ggml-cpu/unary-ops.cpp +337 -0
  149. package/cpp/ggml/src/ggml-cpu/unary-ops.h +35 -0
  150. package/cpp/ggml/src/ggml-cpu/vec.cpp +629 -0
  151. package/cpp/ggml/src/ggml-cpu/vec.h +1585 -0
  152. package/cpp/ggml/src/ggml-hexagon/CMakeLists.txt +117 -0
  153. package/cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp +3232 -0
  154. package/cpp/ggml/src/ggml-hexagon/htp/CMakeLists.txt +45 -0
  155. package/cpp/ggml/src/ggml-hexagon/htp/act-ops.c +815 -0
  156. package/cpp/ggml/src/ggml-hexagon/htp/argsort-ops.c +281 -0
  157. package/cpp/ggml/src/ggml-hexagon/htp/binary-ops.c +827 -0
  158. package/cpp/ggml/src/ggml-hexagon/htp/cmake-toolchain.cmake +157 -0
  159. package/cpp/ggml/src/ggml-hexagon/htp/cpy-ops.c +251 -0
  160. package/cpp/ggml/src/ggml-hexagon/htp/flash-attn-ops.c +666 -0
  161. package/cpp/ggml/src/ggml-hexagon/htp/get-rows-ops.c +111 -0
  162. package/cpp/ggml/src/ggml-hexagon/htp/hex-dma.c +63 -0
  163. package/cpp/ggml/src/ggml-hexagon/htp/hex-dma.h +182 -0
  164. package/cpp/ggml/src/ggml-hexagon/htp/hex-dump.h +77 -0
  165. package/cpp/ggml/src/ggml-hexagon/htp/hex-fastdiv.h +37 -0
  166. package/cpp/ggml/src/ggml-hexagon/htp/hex-utils.h +51 -0
  167. package/cpp/ggml/src/ggml-hexagon/htp/htp-ctx.h +35 -0
  168. package/cpp/ggml/src/ggml-hexagon/htp/htp-msg.h +154 -0
  169. package/cpp/ggml/src/ggml-hexagon/htp/htp-ops.h +65 -0
  170. package/cpp/ggml/src/ggml-hexagon/htp/htp_iface.idl +16 -0
  171. package/cpp/ggml/src/ggml-hexagon/htp/hvx-arith.h +470 -0
  172. package/cpp/ggml/src/ggml-hexagon/htp/hvx-base.h +173 -0
  173. package/cpp/ggml/src/ggml-hexagon/htp/hvx-copy.h +245 -0
  174. package/cpp/ggml/src/ggml-hexagon/htp/hvx-div.h +116 -0
  175. package/cpp/ggml/src/ggml-hexagon/htp/hvx-dump.h +129 -0
  176. package/cpp/ggml/src/ggml-hexagon/htp/hvx-exp.h +215 -0
  177. package/cpp/ggml/src/ggml-hexagon/htp/hvx-floor.h +100 -0
  178. package/cpp/ggml/src/ggml-hexagon/htp/hvx-inverse.h +176 -0
  179. package/cpp/ggml/src/ggml-hexagon/htp/hvx-reduce.h +266 -0
  180. package/cpp/ggml/src/ggml-hexagon/htp/hvx-scale.h +133 -0
  181. package/cpp/ggml/src/ggml-hexagon/htp/hvx-sigmoid.h +141 -0
  182. package/cpp/ggml/src/ggml-hexagon/htp/hvx-sqrt.h +126 -0
  183. package/cpp/ggml/src/ggml-hexagon/htp/hvx-types.h +36 -0
  184. package/cpp/ggml/src/ggml-hexagon/htp/hvx-utils.h +18 -0
  185. package/cpp/ggml/src/ggml-hexagon/htp/main.c +1150 -0
  186. package/cpp/ggml/src/ggml-hexagon/htp/matmul-ops.c +2595 -0
  187. package/cpp/ggml/src/ggml-hexagon/htp/rope-ops.c +498 -0
  188. package/cpp/ggml/src/ggml-hexagon/htp/set-rows-ops.c +167 -0
  189. package/cpp/ggml/src/ggml-hexagon/htp/softmax-ops.c +421 -0
  190. package/cpp/ggml/src/ggml-hexagon/htp/sum-rows-ops.c +130 -0
  191. package/cpp/ggml/src/ggml-hexagon/htp/unary-ops.c +384 -0
  192. package/cpp/ggml/src/ggml-hexagon/htp/worker-pool.c +293 -0
  193. package/cpp/ggml/src/ggml-hexagon/htp/worker-pool.h +57 -0
  194. package/cpp/ggml/src/ggml-hexagon/htp-drv.cpp +418 -0
  195. package/cpp/ggml/src/ggml-hexagon/htp-drv.h +121 -0
  196. package/cpp/ggml/src/ggml-hexagon/libdl.h +79 -0
  197. package/cpp/ggml/src/ggml-hexagon/libggml-htp.inf +38 -0
  198. package/cpp/ggml/src/ggml-hexagon/op-desc.h +153 -0
  199. package/cpp/ggml/src/ggml-impl.h +724 -0
  200. package/cpp/ggml/src/ggml-metal/CMakeLists.txt +124 -0
  201. package/cpp/ggml/src/ggml-metal/ggml-metal-common.cpp +457 -0
  202. package/cpp/ggml/src/ggml-metal/ggml-metal-common.h +52 -0
  203. package/cpp/ggml/src/ggml-metal/ggml-metal-context.h +41 -0
  204. package/cpp/ggml/src/ggml-metal/ggml-metal-context.m +702 -0
  205. package/cpp/ggml/src/ggml-metal/ggml-metal-device.cpp +1890 -0
  206. package/cpp/ggml/src/ggml-metal/ggml-metal-device.h +290 -0
  207. package/cpp/ggml/src/ggml-metal/ggml-metal-device.m +1749 -0
  208. package/cpp/ggml/src/ggml-metal/ggml-metal-impl.h +1054 -0
  209. package/cpp/ggml/src/ggml-metal/ggml-metal-ops.cpp +4370 -0
  210. package/cpp/ggml/src/ggml-metal/ggml-metal-ops.h +94 -0
  211. package/cpp/ggml/src/ggml-metal/ggml-metal.cpp +937 -0
  212. package/cpp/ggml/src/ggml-metal/ggml-metal.metal +9819 -0
  213. package/cpp/ggml/src/ggml-musa/CMakeLists.txt +125 -0
  214. package/cpp/ggml/src/ggml-musa/mudnn.cu +112 -0
  215. package/cpp/ggml/src/ggml-musa/mudnn.cuh +12 -0
  216. package/cpp/ggml/src/ggml-opencl/CMakeLists.txt +150 -0
  217. package/cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +11553 -0
  218. package/cpp/ggml/src/ggml-opencl/kernels/add.cl +190 -0
  219. package/cpp/ggml/src/ggml-opencl/kernels/add_id.cl +42 -0
  220. package/cpp/ggml/src/ggml-opencl/kernels/argsort.cl +86 -0
  221. package/cpp/ggml/src/ggml-opencl/kernels/clamp.cl +20 -0
  222. package/cpp/ggml/src/ggml-opencl/kernels/concat.cl +51 -0
  223. package/cpp/ggml/src/ggml-opencl/kernels/conv2d.cl +185 -0
  224. package/cpp/ggml/src/ggml-opencl/kernels/conv2d_f16_f32.cl +176 -0
  225. package/cpp/ggml/src/ggml-opencl/kernels/cpy.cl +184 -0
  226. package/cpp/ggml/src/ggml-opencl/kernels/cvt.cl +417 -0
  227. package/cpp/ggml/src/ggml-opencl/kernels/diag_mask_inf.cl +58 -0
  228. package/cpp/ggml/src/ggml-opencl/kernels/div.cl +138 -0
  229. package/cpp/ggml/src/ggml-opencl/kernels/embed_kernel.py +26 -0
  230. package/cpp/ggml/src/ggml-opencl/kernels/expm1.cl +113 -0
  231. package/cpp/ggml/src/ggml-opencl/kernels/fill.cl +17 -0
  232. package/cpp/ggml/src/ggml-opencl/kernels/flash_attn_f16.cl +370 -0
  233. package/cpp/ggml/src/ggml-opencl/kernels/flash_attn_f32.cl +371 -0
  234. package/cpp/ggml/src/ggml-opencl/kernels/flash_attn_f32_f16.cl +373 -0
  235. package/cpp/ggml/src/ggml-opencl/kernels/gelu.cl +89 -0
  236. package/cpp/ggml/src/ggml-opencl/kernels/gemm_moe_mxfp4_f32.cl +162 -0
  237. package/cpp/ggml/src/ggml-opencl/kernels/gemv_moe_mxfp4_f32.cl +156 -0
  238. package/cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle.cl +268 -0
  239. package/cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general.cl +274 -0
  240. package/cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general_q8_0_f32.cl +195 -0
  241. package/cpp/ggml/src/ggml-opencl/kernels/get_rows.cl +187 -0
  242. package/cpp/ggml/src/ggml-opencl/kernels/glu.cl +378 -0
  243. package/cpp/ggml/src/ggml-opencl/kernels/group_norm.cl +121 -0
  244. package/cpp/ggml/src/ggml-opencl/kernels/im2col_f16.cl +57 -0
  245. package/cpp/ggml/src/ggml-opencl/kernels/im2col_f32.cl +57 -0
  246. package/cpp/ggml/src/ggml-opencl/kernels/mean.cl +140 -0
  247. package/cpp/ggml/src/ggml-opencl/kernels/mul.cl +152 -0
  248. package/cpp/ggml/src/ggml-opencl/kernels/mul_mat_Ab_Bi_8x4.cl +139 -0
  249. package/cpp/ggml/src/ggml-opencl/kernels/mul_mat_f16_f32.cl +130 -0
  250. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_kq_kqv.cl +273 -0
  251. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_f16_f32_l4_lm.cl +146 -0
  252. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_f32_f32_l4_lm.cl +147 -0
  253. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q4_0_f32_l4_lm.cl +163 -0
  254. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q4_1_f32_l4_lm.cl +165 -0
  255. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q6_k_f32_l4_lm.cl +158 -0
  256. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_8x4.cl +129 -0
  257. package/cpp/ggml/src/ggml-opencl/kernels/mul_mm_q8_0_f32_l4_lm.cl +154 -0
  258. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f16.cl +118 -0
  259. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32.cl +118 -0
  260. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_1row.cl +94 -0
  261. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_l4.cl +84 -0
  262. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_f32_f32.cl +118 -0
  263. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32.cl +189 -0
  264. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_mxfp4_f32_flat.cl +176 -0
  265. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q4_0_f32_8x_flat.cl +283 -0
  266. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32.cl +140 -0
  267. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q8_0_f32_flat.cl +222 -0
  268. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32.cl +144 -0
  269. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_mxfp4_f32_flat.cl +167 -0
  270. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32.cl +192 -0
  271. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_16x_flat.cl +307 -0
  272. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_8x_flat.cl +265 -0
  273. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_8x_flat.cl +272 -0
  274. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_v.cl +254 -0
  275. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32.cl +219 -0
  276. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_1_f32_flat.cl +229 -0
  277. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_k_f32.cl +180 -0
  278. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q6_k_f32.cl +194 -0
  279. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q6_k_f32_flat.cl +194 -0
  280. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32.cl +125 -0
  281. package/cpp/ggml/src/ggml-opencl/kernels/mul_mv_q8_0_f32_flat.cl +202 -0
  282. package/cpp/ggml/src/ggml-opencl/kernels/norm.cl +161 -0
  283. package/cpp/ggml/src/ggml-opencl/kernels/pad.cl +39 -0
  284. package/cpp/ggml/src/ggml-opencl/kernels/relu.cl +16 -0
  285. package/cpp/ggml/src/ggml-opencl/kernels/repeat.cl +38 -0
  286. package/cpp/ggml/src/ggml-opencl/kernels/rms_norm.cl +190 -0
  287. package/cpp/ggml/src/ggml-opencl/kernels/rope.cl +747 -0
  288. package/cpp/ggml/src/ggml-opencl/kernels/scale.cl +27 -0
  289. package/cpp/ggml/src/ggml-opencl/kernels/set_rows.cl +208 -0
  290. package/cpp/ggml/src/ggml-opencl/kernels/sigmoid.cl +29 -0
  291. package/cpp/ggml/src/ggml-opencl/kernels/silu.cl +30 -0
  292. package/cpp/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +108 -0
  293. package/cpp/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +108 -0
  294. package/cpp/ggml/src/ggml-opencl/kernels/softmax_f16.cl +107 -0
  295. package/cpp/ggml/src/ggml-opencl/kernels/softmax_f32.cl +107 -0
  296. package/cpp/ggml/src/ggml-opencl/kernels/softplus.cl +116 -0
  297. package/cpp/ggml/src/ggml-opencl/kernels/solve_tri.cl +51 -0
  298. package/cpp/ggml/src/ggml-opencl/kernels/sqr.cl +53 -0
  299. package/cpp/ggml/src/ggml-opencl/kernels/sqrt.cl +53 -0
  300. package/cpp/ggml/src/ggml-opencl/kernels/ssm_conv.cl +77 -0
  301. package/cpp/ggml/src/ggml-opencl/kernels/sub.cl +138 -0
  302. package/cpp/ggml/src/ggml-opencl/kernels/sum_rows.cl +140 -0
  303. package/cpp/ggml/src/ggml-opencl/kernels/tanh.cl +109 -0
  304. package/cpp/ggml/src/ggml-opencl/kernels/transpose.cl +117 -0
  305. package/cpp/ggml/src/ggml-opencl/kernels/tri.cl +32 -0
  306. package/cpp/ggml/src/ggml-opencl/kernels/tsembd.cl +48 -0
  307. package/cpp/ggml/src/ggml-opencl/kernels/upscale.cl +120 -0
  308. package/cpp/ggml/src/ggml-opt.cpp +1093 -0
  309. package/cpp/ggml/src/ggml-quants.c +5325 -0
  310. package/cpp/ggml/src/ggml-quants.h +106 -0
  311. package/cpp/ggml/src/ggml-rpc/CMakeLists.txt +9 -0
  312. package/cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +2118 -0
  313. package/cpp/ggml/src/ggml-threading.cpp +12 -0
  314. package/cpp/ggml/src/ggml-threading.h +14 -0
  315. package/cpp/ggml/src/ggml-virtgpu/CMakeLists.txt +70 -0
  316. package/cpp/ggml/src/ggml-virtgpu/apir_cs_ggml-rpc-front.cpp +87 -0
  317. package/cpp/ggml/src/ggml-virtgpu/backend/CMakeLists.txt +21 -0
  318. package/cpp/ggml/src/ggml-virtgpu/backend/apir_cs_ggml-rpc-back.cpp +115 -0
  319. package/cpp/ggml/src/ggml-virtgpu/backend/backend-convert.h +13 -0
  320. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-backend.cpp +102 -0
  321. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer-type.cpp +105 -0
  322. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer.cpp +179 -0
  323. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched-device.cpp +148 -0
  324. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched.cpp +51 -0
  325. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched.gen.h +73 -0
  326. package/cpp/ggml/src/ggml-virtgpu/backend/backend-dispatched.h +27 -0
  327. package/cpp/ggml/src/ggml-virtgpu/backend/backend-virgl-apir.h +32 -0
  328. package/cpp/ggml/src/ggml-virtgpu/backend/backend.cpp +144 -0
  329. package/cpp/ggml/src/ggml-virtgpu/backend/shared/api_remoting.h +95 -0
  330. package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_backend.gen.h +94 -0
  331. package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_backend.h +50 -0
  332. package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_cs.h +378 -0
  333. package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h +232 -0
  334. package/cpp/ggml/src/ggml-virtgpu/backend/shared/apir_cs_rpc.h +58 -0
  335. package/cpp/ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp +81 -0
  336. package/cpp/ggml/src/ggml-virtgpu/ggml-backend-buffer.cpp +119 -0
  337. package/cpp/ggml/src/ggml-virtgpu/ggml-backend-device.cpp +158 -0
  338. package/cpp/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp +213 -0
  339. package/cpp/ggml/src/ggml-virtgpu/ggml-backend.cpp +69 -0
  340. package/cpp/ggml/src/ggml-virtgpu/ggml-remoting.h +71 -0
  341. package/cpp/ggml/src/ggml-virtgpu/ggmlremoting_functions.yaml +166 -0
  342. package/cpp/ggml/src/ggml-virtgpu/include/apir_hw.h +9 -0
  343. package/cpp/ggml/src/ggml-virtgpu/regenerate_remoting.py +333 -0
  344. package/cpp/ggml/src/ggml-virtgpu/virtgpu-apir.h +15 -0
  345. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-backend.cpp +58 -0
  346. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp +110 -0
  347. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-buffer.cpp +173 -0
  348. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-device.cpp +192 -0
  349. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward-impl.h +36 -0
  350. package/cpp/ggml/src/ggml-virtgpu/virtgpu-forward.gen.h +53 -0
  351. package/cpp/ggml/src/ggml-virtgpu/virtgpu-shm.cpp +98 -0
  352. package/cpp/ggml/src/ggml-virtgpu/virtgpu-shm.h +23 -0
  353. package/cpp/ggml/src/ggml-virtgpu/virtgpu-utils.cpp +179 -0
  354. package/cpp/ggml/src/ggml-virtgpu/virtgpu-utils.h +86 -0
  355. package/cpp/ggml/src/ggml-virtgpu/virtgpu.cpp +544 -0
  356. package/cpp/ggml/src/ggml-virtgpu/virtgpu.h +117 -0
  357. package/cpp/ggml/src/ggml-webgpu/CMakeLists.txt +80 -0
  358. package/cpp/ggml/src/ggml-webgpu/ggml-webgpu-shader-lib.hpp +1231 -0
  359. package/cpp/ggml/src/ggml-webgpu/ggml-webgpu.cpp +3150 -0
  360. package/cpp/ggml/src/ggml-webgpu/pre_wgsl.hpp +778 -0
  361. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/argmax.wgsl +72 -0
  362. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/argsort.wgsl +106 -0
  363. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/argsort_merge.wgsl +134 -0
  364. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/binary.wgsl +107 -0
  365. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/common_decls.tmpl +923 -0
  366. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/cpy.tmpl.wgsl +107 -0
  367. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/cumsum.wgsl +66 -0
  368. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +182 -0
  369. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/flash_attn.wgsl +636 -0
  370. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/get_rows.wgsl +668 -0
  371. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/glu.tmpl.wgsl +323 -0
  372. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/memset.wgsl +40 -0
  373. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.wgsl +713 -0
  374. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_decls.tmpl +103 -0
  375. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_reg_tile.wgsl +138 -0
  376. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_subgroup_matrix.wgsl +188 -0
  377. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec.wgsl +194 -0
  378. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/pad.wgsl +86 -0
  379. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm.wgsl +123 -0
  380. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/rope.tmpl.wgsl +295 -0
  381. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/scale.wgsl +63 -0
  382. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/set_rows.wgsl +109 -0
  383. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/soft_max.tmpl.wgsl +345 -0
  384. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/sum_rows.wgsl +55 -0
  385. package/cpp/ggml/src/ggml-webgpu/wgsl-shaders/unary.wgsl +193 -0
  386. package/cpp/ggml/src/ggml-zdnn/CMakeLists.txt +36 -0
  387. package/cpp/ggml/src/ggml-zdnn/common.hpp +59 -0
  388. package/cpp/ggml/src/ggml-zdnn/ggml-zdnn.cpp +633 -0
  389. package/cpp/ggml/src/ggml-zdnn/mmf.cpp +80 -0
  390. package/cpp/ggml/src/ggml-zdnn/mmf.hpp +12 -0
  391. package/cpp/ggml/src/ggml-zdnn/utils.cpp +79 -0
  392. package/cpp/ggml/src/ggml-zdnn/utils.hpp +19 -0
  393. package/cpp/ggml/src/ggml-zendnn/CMakeLists.txt +92 -0
  394. package/cpp/ggml/src/ggml-zendnn/ggml-zendnn.cpp +469 -0
  395. package/cpp/ggml/src/ggml.c +7669 -0
  396. package/cpp/ggml/src/ggml.cpp +26 -0
  397. package/cpp/ggml/src/gguf.cpp +1699 -0
  398. package/cpp/include/llama-cpp.h +32 -0
  399. package/cpp/include/llama.h +1568 -0
  400. package/cpp/mtmd/CMakeLists.txt +98 -0
  401. package/cpp/mtmd/README.md +63 -0
  402. package/cpp/mtmd/clip-graph.h +117 -0
  403. package/cpp/mtmd/clip-impl.h +586 -0
  404. package/cpp/mtmd/clip-model.h +390 -0
  405. package/cpp/mtmd/clip.cpp +4154 -0
  406. package/cpp/mtmd/clip.h +121 -0
  407. package/cpp/mtmd/deprecation-warning.cpp +22 -0
  408. package/cpp/mtmd/legacy-models/convert_image_encoder_to_gguf.py +412 -0
  409. package/cpp/mtmd/legacy-models/glmedge-convert-image-encoder-to-gguf.py +280 -0
  410. package/cpp/mtmd/legacy-models/glmedge-surgery.py +33 -0
  411. package/cpp/mtmd/legacy-models/llava_surgery.py +38 -0
  412. package/cpp/mtmd/legacy-models/llava_surgery_v2.py +180 -0
  413. package/cpp/mtmd/legacy-models/minicpmv-convert-image-encoder-to-gguf.py +892 -0
  414. package/cpp/mtmd/legacy-models/minicpmv-surgery.py +47 -0
  415. package/cpp/mtmd/models/cogvlm.cpp +98 -0
  416. package/cpp/mtmd/models/conformer.cpp +216 -0
  417. package/cpp/mtmd/models/glm4v.cpp +122 -0
  418. package/cpp/mtmd/models/internvl.cpp +69 -0
  419. package/cpp/mtmd/models/kimik25.cpp +101 -0
  420. package/cpp/mtmd/models/kimivl.cpp +63 -0
  421. package/cpp/mtmd/models/llama4.cpp +96 -0
  422. package/cpp/mtmd/models/llava.cpp +374 -0
  423. package/cpp/mtmd/models/minicpmv.cpp +114 -0
  424. package/cpp/mtmd/models/mobilenetv5.cpp +451 -0
  425. package/cpp/mtmd/models/models.h +128 -0
  426. package/cpp/mtmd/models/nemotron-v2-vl.cpp +35 -0
  427. package/cpp/mtmd/models/paddleocr.cpp +52 -0
  428. package/cpp/mtmd/models/pixtral.cpp +86 -0
  429. package/cpp/mtmd/models/qwen2vl.cpp +183 -0
  430. package/cpp/mtmd/models/qwen3vl.cpp +193 -0
  431. package/cpp/mtmd/models/siglip.cpp +86 -0
  432. package/cpp/mtmd/models/whisper-enc.cpp +115 -0
  433. package/cpp/mtmd/models/youtuvl.cpp +179 -0
  434. package/cpp/mtmd/mtmd-audio.cpp +730 -0
  435. package/cpp/mtmd/mtmd-audio.h +113 -0
  436. package/cpp/mtmd/mtmd-cli.cpp +437 -0
  437. package/cpp/mtmd/mtmd-helper.cpp +521 -0
  438. package/cpp/mtmd/mtmd-helper.h +96 -0
  439. package/cpp/mtmd/mtmd.cpp +1156 -0
  440. package/cpp/mtmd/mtmd.h +319 -0
  441. package/cpp/mtmd/requirements.txt +5 -0
  442. package/cpp/mtmd/test-1.jpeg +0 -0
  443. package/cpp/mtmd/test-2.mp3 +0 -0
  444. package/cpp/mtmd/tests.sh +192 -0
  445. package/cpp/src/CMakeLists.txt +169 -0
  446. package/cpp/src/llama-adapter.cpp +488 -0
  447. package/cpp/src/llama-adapter.h +89 -0
  448. package/cpp/src/llama-arch.cpp +2855 -0
  449. package/cpp/src/llama-arch.h +619 -0
  450. package/cpp/src/llama-batch.cpp +917 -0
  451. package/cpp/src/llama-batch.h +173 -0
  452. package/cpp/src/llama-chat.cpp +896 -0
  453. package/cpp/src/llama-chat.h +71 -0
  454. package/cpp/src/llama-context.cpp +3512 -0
  455. package/cpp/src/llama-context.h +359 -0
  456. package/cpp/src/llama-cparams.cpp +5 -0
  457. package/cpp/src/llama-cparams.h +44 -0
  458. package/cpp/src/llama-grammar.cpp +1464 -0
  459. package/cpp/src/llama-grammar.h +194 -0
  460. package/cpp/src/llama-graph.cpp +2685 -0
  461. package/cpp/src/llama-graph.h +1026 -0
  462. package/cpp/src/llama-hparams.cpp +234 -0
  463. package/cpp/src/llama-hparams.h +339 -0
  464. package/cpp/src/llama-impl.cpp +171 -0
  465. package/cpp/src/llama-impl.h +73 -0
  466. package/cpp/src/llama-io.cpp +15 -0
  467. package/cpp/src/llama-io.h +35 -0
  468. package/cpp/src/llama-kv-cache-iswa.cpp +330 -0
  469. package/cpp/src/llama-kv-cache-iswa.h +137 -0
  470. package/cpp/src/llama-kv-cache.cpp +2271 -0
  471. package/cpp/src/llama-kv-cache.h +388 -0
  472. package/cpp/src/llama-kv-cells.h +533 -0
  473. package/cpp/src/llama-memory-hybrid-iswa.cpp +275 -0
  474. package/cpp/src/llama-memory-hybrid-iswa.h +140 -0
  475. package/cpp/src/llama-memory-hybrid.cpp +268 -0
  476. package/cpp/src/llama-memory-hybrid.h +139 -0
  477. package/cpp/src/llama-memory-recurrent.cpp +1165 -0
  478. package/cpp/src/llama-memory-recurrent.h +182 -0
  479. package/cpp/src/llama-memory.cpp +59 -0
  480. package/cpp/src/llama-memory.h +122 -0
  481. package/cpp/src/llama-mmap.cpp +785 -0
  482. package/cpp/src/llama-mmap.h +92 -0
  483. package/cpp/src/llama-model-loader.cpp +1414 -0
  484. package/cpp/src/llama-model-loader.h +203 -0
  485. package/cpp/src/llama-model-saver.cpp +286 -0
  486. package/cpp/src/llama-model-saver.h +37 -0
  487. package/cpp/src/llama-model.cpp +9253 -0
  488. package/cpp/src/llama-model.h +576 -0
  489. package/cpp/src/llama-quant.cpp +1119 -0
  490. package/cpp/src/llama-quant.h +1 -0
  491. package/cpp/src/llama-sampler.cpp +3885 -0
  492. package/cpp/src/llama-sampler.h +42 -0
  493. package/cpp/src/llama-vocab.cpp +3970 -0
  494. package/cpp/src/llama-vocab.h +187 -0
  495. package/cpp/src/llama.cpp +1313 -0
  496. package/cpp/src/models/afmoe.cpp +191 -0
  497. package/cpp/src/models/apertus.cpp +125 -0
  498. package/cpp/src/models/arcee.cpp +135 -0
  499. package/cpp/src/models/arctic.cpp +138 -0
  500. package/cpp/src/models/arwkv7.cpp +86 -0
  501. package/cpp/src/models/baichuan.cpp +122 -0
  502. package/cpp/src/models/bailingmoe.cpp +144 -0
  503. package/cpp/src/models/bailingmoe2.cpp +135 -0
  504. package/cpp/src/models/bert.cpp +178 -0
  505. package/cpp/src/models/bitnet.cpp +160 -0
  506. package/cpp/src/models/bloom.cpp +101 -0
  507. package/cpp/src/models/chameleon.cpp +178 -0
  508. package/cpp/src/models/chatglm.cpp +132 -0
  509. package/cpp/src/models/codeshell.cpp +111 -0
  510. package/cpp/src/models/cogvlm.cpp +102 -0
  511. package/cpp/src/models/cohere2-iswa.cpp +134 -0
  512. package/cpp/src/models/command-r.cpp +122 -0
  513. package/cpp/src/models/dbrx.cpp +123 -0
  514. package/cpp/src/models/deci.cpp +135 -0
  515. package/cpp/src/models/deepseek.cpp +144 -0
  516. package/cpp/src/models/deepseek2.cpp +262 -0
  517. package/cpp/src/models/delta-net-base.cpp +376 -0
  518. package/cpp/src/models/dots1.cpp +134 -0
  519. package/cpp/src/models/dream.cpp +105 -0
  520. package/cpp/src/models/ernie4-5-moe.cpp +150 -0
  521. package/cpp/src/models/ernie4-5.cpp +110 -0
  522. package/cpp/src/models/eurobert.cpp +97 -0
  523. package/cpp/src/models/exaone-moe.cpp +146 -0
  524. package/cpp/src/models/exaone.cpp +114 -0
  525. package/cpp/src/models/exaone4.cpp +123 -0
  526. package/cpp/src/models/falcon-h1.cpp +111 -0
  527. package/cpp/src/models/falcon.cpp +120 -0
  528. package/cpp/src/models/gemma-embedding.cpp +116 -0
  529. package/cpp/src/models/gemma.cpp +112 -0
  530. package/cpp/src/models/gemma2-iswa.cpp +128 -0
  531. package/cpp/src/models/gemma3.cpp +155 -0
  532. package/cpp/src/models/gemma3n-iswa.cpp +384 -0
  533. package/cpp/src/models/glm4-moe.cpp +170 -0
  534. package/cpp/src/models/glm4.cpp +157 -0
  535. package/cpp/src/models/gpt2.cpp +105 -0
  536. package/cpp/src/models/gptneox.cpp +144 -0
  537. package/cpp/src/models/granite-hybrid.cpp +196 -0
  538. package/cpp/src/models/granite.cpp +211 -0
  539. package/cpp/src/models/grok.cpp +159 -0
  540. package/cpp/src/models/grovemoe.cpp +141 -0
  541. package/cpp/src/models/hunyuan-dense.cpp +132 -0
  542. package/cpp/src/models/hunyuan-moe.cpp +154 -0
  543. package/cpp/src/models/internlm2.cpp +120 -0
  544. package/cpp/src/models/jais.cpp +86 -0
  545. package/cpp/src/models/jais2.cpp +123 -0
  546. package/cpp/src/models/jamba.cpp +106 -0
  547. package/cpp/src/models/kimi-linear.cpp +392 -0
  548. package/cpp/src/models/lfm2.cpp +190 -0
  549. package/cpp/src/models/llada-moe.cpp +122 -0
  550. package/cpp/src/models/llada.cpp +99 -0
  551. package/cpp/src/models/llama-iswa.cpp +178 -0
  552. package/cpp/src/models/llama.cpp +168 -0
  553. package/cpp/src/models/maincoder.cpp +117 -0
  554. package/cpp/src/models/mamba-base.cpp +285 -0
  555. package/cpp/src/models/mamba.cpp +54 -0
  556. package/cpp/src/models/mimo2-iswa.cpp +123 -0
  557. package/cpp/src/models/minicpm3.cpp +200 -0
  558. package/cpp/src/models/minimax-m2.cpp +124 -0
  559. package/cpp/src/models/mistral3.cpp +160 -0
  560. package/cpp/src/models/models.h +684 -0
  561. package/cpp/src/models/modern-bert.cpp +109 -0
  562. package/cpp/src/models/mpt.cpp +126 -0
  563. package/cpp/src/models/nemotron-h.cpp +148 -0
  564. package/cpp/src/models/nemotron.cpp +122 -0
  565. package/cpp/src/models/neo-bert.cpp +104 -0
  566. package/cpp/src/models/olmo.cpp +121 -0
  567. package/cpp/src/models/olmo2.cpp +150 -0
  568. package/cpp/src/models/olmoe.cpp +124 -0
  569. package/cpp/src/models/openai-moe-iswa.cpp +127 -0
  570. package/cpp/src/models/openelm.cpp +124 -0
  571. package/cpp/src/models/orion.cpp +123 -0
  572. package/cpp/src/models/paddleocr.cpp +122 -0
  573. package/cpp/src/models/pangu-embedded.cpp +121 -0
  574. package/cpp/src/models/phi2.cpp +121 -0
  575. package/cpp/src/models/phi3.cpp +152 -0
  576. package/cpp/src/models/plamo.cpp +110 -0
  577. package/cpp/src/models/plamo2.cpp +318 -0
  578. package/cpp/src/models/plamo3.cpp +128 -0
  579. package/cpp/src/models/plm.cpp +169 -0
  580. package/cpp/src/models/qwen.cpp +108 -0
  581. package/cpp/src/models/qwen2.cpp +126 -0
  582. package/cpp/src/models/qwen2moe.cpp +151 -0
  583. package/cpp/src/models/qwen2vl.cpp +117 -0
  584. package/cpp/src/models/qwen3.cpp +117 -0
  585. package/cpp/src/models/qwen35.cpp +386 -0
  586. package/cpp/src/models/qwen35moe.cpp +420 -0
  587. package/cpp/src/models/qwen3moe.cpp +124 -0
  588. package/cpp/src/models/qwen3next.cpp +525 -0
  589. package/cpp/src/models/qwen3vl-moe.cpp +140 -0
  590. package/cpp/src/models/qwen3vl.cpp +132 -0
  591. package/cpp/src/models/refact.cpp +94 -0
  592. package/cpp/src/models/rnd1.cpp +126 -0
  593. package/cpp/src/models/rwkv6-base.cpp +164 -0
  594. package/cpp/src/models/rwkv6.cpp +94 -0
  595. package/cpp/src/models/rwkv6qwen2.cpp +86 -0
  596. package/cpp/src/models/rwkv7-base.cpp +137 -0
  597. package/cpp/src/models/rwkv7.cpp +90 -0
  598. package/cpp/src/models/seed-oss.cpp +124 -0
  599. package/cpp/src/models/smallthinker.cpp +126 -0
  600. package/cpp/src/models/smollm3.cpp +128 -0
  601. package/cpp/src/models/stablelm.cpp +146 -0
  602. package/cpp/src/models/starcoder.cpp +100 -0
  603. package/cpp/src/models/starcoder2.cpp +121 -0
  604. package/cpp/src/models/step35-iswa.cpp +168 -0
  605. package/cpp/src/models/t5-dec.cpp +166 -0
  606. package/cpp/src/models/t5-enc.cpp +96 -0
  607. package/cpp/src/models/wavtokenizer-dec.cpp +149 -0
  608. package/cpp/src/models/xverse.cpp +108 -0
  609. package/cpp/src/unicode-data.cpp +7034 -0
  610. package/cpp/src/unicode-data.h +20 -0
  611. package/cpp/src/unicode.cpp +1103 -0
  612. package/cpp/src/unicode.h +111 -0
  613. package/cpp/vendor/nlohmann/json.hpp +25526 -0
  614. package/cpp/vendor/nlohmann/json_fwd.hpp +187 -0
  615. package/cpp/vendor/stb/stb_image.h +7988 -0
  616. package/ios/LocalLLM-Bridging-Header.h +2 -0
  617. package/ios/LocalLLM.h +5 -0
  618. package/ios/LocalLLM.mm +1267 -0
  619. package/local-llm-rn.podspec +60 -0
  620. package/package.json +35 -0
  621. package/src/NativeLocalLLM.ts +73 -0
  622. package/src/device.ts +50 -0
  623. package/src/download-adapter.ts +17 -0
  624. package/src/index.ts +21 -0
  625. package/src/native-bridge.ts +142 -0
  626. package/src/rn-downloader.ts +37 -0
@@ -0,0 +1,730 @@
1
+ #include "mtmd-audio.h"
2
+
3
+ #define _USE_MATH_DEFINES // for M_PI
4
+ #include <cmath>
5
+ #include <cstdint>
6
+ #include <cstring>
7
+ #include <thread>
8
+ #include <vector>
9
+ #include <fstream>
10
+ #include <algorithm>
11
+
12
+ // some of the code here is copied from whisper.cpp
13
+
14
+ constexpr bool DEBUG = false;
15
+
16
+ void mtmd_audio_cache::fill_sin_cos_table(int n) {
17
+ sin_vals.resize(n);
18
+ cos_vals.resize(n);
19
+ for (int i = 0; i < n; i++) {
20
+ double theta = (2 * M_PI * i) / n;
21
+ sin_vals[i] = sinf(theta);
22
+ cos_vals[i] = cosf(theta);
23
+ }
24
+ }
25
+
26
+ void mtmd_audio_cache::fill_hann_window(int length, bool periodic) {
27
+ hann_window.resize(length);
28
+ int offset = -1;
29
+ if (periodic) {
30
+ offset = 0;
31
+ }
32
+ for (int i = 0; i < length; i++) {
33
+ hann_window[i] = 0.5 * (1.0 - cosf((2.0 * M_PI * i) / (length + offset)));
34
+ }
35
+ }
36
+
37
+ void mtmd_audio_cache::fill_mel_filterbank_matrix(int n_mel,
38
+ int n_fft,
39
+ int sample_rate,
40
+ float fmin,
41
+ float fmax,
42
+ bool slaney_area_norm,
43
+ float scale) {
44
+ GGML_ASSERT(n_mel > 0 && n_fft > 1);
45
+ if (fmax <= 0.0f) {
46
+ fmax = 0.5f * sample_rate;
47
+ }
48
+
49
+ // Slaney scale (matches librosa default)
50
+ const double min_log_hz = 1000.0;
51
+ const double lin_slope = 3 / 200.;
52
+ const double min_log_mel = min_log_hz * lin_slope;
53
+ const double log_step = log(6.4) / 27.0;
54
+ auto hz_to_mel = [min_log_hz, lin_slope, log_step, min_log_mel](const double f_hz) -> double {
55
+ return (f_hz < min_log_hz) ? f_hz * lin_slope : min_log_mel + log(f_hz / min_log_hz) / log_step;
56
+ };
57
+ auto mel_to_hz = [min_log_hz, lin_slope, log_step, min_log_mel](const double m) -> double {
58
+ return (m < min_log_mel) ? m / lin_slope : min_log_hz * exp((m - min_log_mel) * log_step);
59
+ };
60
+
61
+ // infer N_fft from n_fft_bins
62
+ const double bin_hz_step = double(sample_rate) / double(n_fft);
63
+
64
+ // mel grid: n_mel + 2 edges
65
+ const double m_lo = hz_to_mel(fmin);
66
+ const double m_hi = hz_to_mel(fmax);
67
+ std::vector<double> mel_pts(n_mel + 2);
68
+ for (int i = 0; i < n_mel + 2; ++i) {
69
+ mel_pts[i] = m_lo + (m_hi - m_lo) * (double(i) / (n_mel + 1));
70
+ }
71
+
72
+ // convert to Hz
73
+ std::vector<double> hz_pts(n_mel + 2);
74
+ for (int i = 0; i < n_mel + 2; ++i) {
75
+ hz_pts[i] = mel_to_hz(mel_pts[i]);
76
+ }
77
+
78
+ const int n_fft_bins = n_fft / 2 + 1;
79
+
80
+ // filterbank
81
+ std::vector<float> out(n_mel * n_fft_bins, 0);
82
+ for (int m = 0; m < n_mel; ++m) {
83
+ const double f_left = hz_pts[m];
84
+ const double f_center = hz_pts[m + 1];
85
+ const double f_right = hz_pts[m + 2];
86
+
87
+ const double denom_l = std::max(1e-30, f_center - f_left);
88
+ const double denom_r = std::max(1e-30, f_right - f_center);
89
+ const double enorm = slaney_area_norm ? (2.0 / std::max(1e-30, f_right - f_left)) : 1.0;
90
+
91
+ for (int k = 0; k < n_fft_bins; ++k) {
92
+ const double f = k * bin_hz_step;
93
+ double w = 0.0;
94
+ if (f >= f_left && f <= f_center) {
95
+ w = (f - f_left) / denom_l;
96
+ } else if (f > f_center && f <= f_right) {
97
+ w = (f_right - f) / denom_r;
98
+ }
99
+ out[size_t(m) * size_t(n_fft_bins) + size_t(k)] = float(w * enorm * scale);
100
+ }
101
+ }
102
+
103
+ filters.n_mel = n_mel;
104
+ filters.n_fft = n_fft;
105
+ filters.data = std::move(out);
106
+
107
+ if (DEBUG) { // debug
108
+ for (size_t i = 0; i < filters.data.size(); ++i) {
109
+ if (filters.data[i] != 0.0f) {
110
+ printf("filters[%zu] = %f\n", i, filters.data[i] * 1000.0f);
111
+ }
112
+ }
113
+ }
114
+ }
115
+
116
+ // Unified DFT implementation for both forward and inverse transforms
117
+ // Template parameters:
118
+ // Inverse: false = DFT with exp(-2πi·k·n/N), no scaling
119
+ // true = IDFT with exp(+2πi·k·n/N), scales by 1/N
120
+ // RealInput: true = input is real-valued (stride 1), avoids imaginary computations
121
+ // false = input is complex-valued (interleaved real/imag, stride 2)
122
+ template <bool Inverse, bool RealInput>
123
+ static void dft_impl(const mtmd_audio_cache & cache, const float * in, int N, float * out) {
124
+ const int n_sin_cos_vals = cache.sin_vals.size();
125
+ const int sin_cos_step = n_sin_cos_vals / N;
126
+
127
+ constexpr float sign = Inverse ? 1.0f : -1.0f;
128
+ const float scale = Inverse ? (1.0f / N) : 1.0f;
129
+
130
+ for (int k = 0; k < N; k++) {
131
+ float re = 0;
132
+ float im = 0;
133
+
134
+ for (int n = 0; n < N; n++) {
135
+ int idx = (k * n * sin_cos_step) % n_sin_cos_vals;
136
+ float cos_val = cache.cos_vals[idx];
137
+ float sin_val = cache.sin_vals[idx];
138
+
139
+ if constexpr (RealInput) {
140
+ // Real input: in_im = 0, simplifies to:
141
+ // re += in_re * cos_val
142
+ // im += sign * in_re * sin_val
143
+ float in_re = in[n];
144
+ re += in_re * cos_val;
145
+ im += sign * in_re * sin_val;
146
+ } else {
147
+ float in_re = in[n * 2 + 0];
148
+ float in_im = in[n * 2 + 1];
149
+ // (a + bi) * (cos + sign*i*sin) = (a*cos - sign*b*sin) + (sign*a*sin + b*cos)i
150
+ re += in_re * cos_val - sign * in_im * sin_val;
151
+ im += sign * in_re * sin_val + in_im * cos_val;
152
+ }
153
+ }
154
+
155
+ out[k * 2 + 0] = re * scale;
156
+ out[k * 2 + 1] = im * scale;
157
+ }
158
+ }
159
+
160
+ // Cooley-Tukey FFT/IFFT unified implementation
161
+ // Template parameters:
162
+ // Inverse: false = FFT with exp(-2πi·k/N), no scaling
163
+ // true = IFFT with exp(+2πi·k/N), scales by 0.5 at each level
164
+ // RealInput: true = input is real-valued (stride 1)
165
+ // false = input is complex-valued (interleaved real/imag, stride 2)
166
+ template <bool Inverse, bool RealInput>
167
+ static void fft_impl(const mtmd_audio_cache & cache, float * in, int N, float * out) {
168
+ const int n_sin_cos_vals = cache.sin_vals.size();
169
+
170
+ if (N == 1) {
171
+ out[0] = in[0];
172
+ if constexpr (RealInput) {
173
+ out[1] = 0.0f;
174
+ } else {
175
+ out[1] = in[1];
176
+ }
177
+ return;
178
+ }
179
+
180
+ const int half_N = N / 2;
181
+ if (N - half_N * 2 == 1) {
182
+ // Odd N: fall back to DFT
183
+ dft_impl<Inverse, RealInput>(cache, in, N, out);
184
+ return;
185
+ }
186
+
187
+ // Split into even and odd
188
+ if constexpr (RealInput) {
189
+ // Real input: stride is 1, copy only real values
190
+ float * even = in + N;
191
+ for (int i = 0; i < half_N; ++i) {
192
+ even[i] = in[2 * i];
193
+ }
194
+ float * even_fft = out + 2 * N;
195
+ fft_impl<Inverse, true>(cache, even, half_N, even_fft);
196
+
197
+ float * odd = even;
198
+ for (int i = 0; i < half_N; ++i) {
199
+ odd[i] = in[2 * i + 1];
200
+ }
201
+ float * odd_fft = even_fft + N;
202
+ fft_impl<Inverse, true>(cache, odd, half_N, odd_fft);
203
+ } else {
204
+ // Complex input: stride is 2, copy complex pairs
205
+ float * even = in + N * 2;
206
+ for (int i = 0; i < half_N; ++i) {
207
+ even[i * 2 + 0] = in[2 * i * 2 + 0];
208
+ even[i * 2 + 1] = in[2 * i * 2 + 1];
209
+ }
210
+ float * even_fft = out + 2 * N;
211
+ fft_impl<Inverse, false>(cache, even, half_N, even_fft);
212
+
213
+ float * odd = even;
214
+ for (int i = 0; i < half_N; ++i) {
215
+ odd[i * 2 + 0] = in[(2 * i + 1) * 2 + 0];
216
+ odd[i * 2 + 1] = in[(2 * i + 1) * 2 + 1];
217
+ }
218
+ float * odd_fft = even_fft + N;
219
+ fft_impl<Inverse, false>(cache, odd, half_N, odd_fft);
220
+ }
221
+
222
+ float * even_fft = out + 2 * N;
223
+ float * odd_fft = even_fft + N;
224
+
225
+ const int sin_cos_step = n_sin_cos_vals / N;
226
+
227
+ constexpr float sign = Inverse ? 1.0f : -1.0f;
228
+ constexpr float scale = Inverse ? 0.5f : 1.0f;
229
+
230
+ for (int k = 0; k < half_N; k++) {
231
+ int idx = k * sin_cos_step; // t = 2*M_PI*k/N
232
+ float re = cache.cos_vals[idx];
233
+ float im = sign * cache.sin_vals[idx];
234
+
235
+ float re_odd = odd_fft[2 * k + 0];
236
+ float im_odd = odd_fft[2 * k + 1];
237
+
238
+ out[2 * k + 0] = scale * (even_fft[2 * k + 0] + re * re_odd - im * im_odd);
239
+ out[2 * k + 1] = scale * (even_fft[2 * k + 1] + re * im_odd + im * re_odd);
240
+
241
+ out[2 * (k + half_N) + 0] = scale * (even_fft[2 * k + 0] - re * re_odd + im * im_odd);
242
+ out[2 * (k + half_N) + 1] = scale * (even_fft[2 * k + 1] - re * im_odd - im * re_odd);
243
+ }
244
+ }
245
+
246
+ // Forward FFT for real input (used by mel spectrogram)
247
+ static void fft(const mtmd_audio_cache & cache, float * in, int N, float * out) {
248
+ fft_impl<false, true>(cache, in, N, out);
249
+ }
250
+
251
+ // Inverse FFT for complex input
252
+ static void ifft(const mtmd_audio_cache & cache, float * in, int N, float * out) {
253
+ fft_impl<true, false>(cache, in, N, out);
254
+ }
255
+
256
+ struct filter_params {
257
+ int32_t n_mel;
258
+ int32_t n_fft_bins;
259
+ int32_t hann_window_size;
260
+ int32_t hop_length;
261
+ int32_t sample_rate;
262
+ bool center_padding = false;
263
+ float preemph = 0.f;
264
+ bool use_natural_log = false;
265
+ bool norm_per_feature = false;
266
+ };
267
+
268
+ static void log_mel_spectrogram_worker_thread(int ith,
269
+ const float * hann,
270
+ const std::vector<float> & samples,
271
+ int n_samples,
272
+ int frame_size,
273
+ int frame_step,
274
+ int n_threads,
275
+ const filter_params & params,
276
+ const mtmd_audio_cache & cache,
277
+ mtmd_audio_mel & out) {
278
+ std::vector<float> fft_in(frame_size * 2, 0.0);
279
+ std::vector<float> fft_out(frame_size * 2 * 2 * 2);
280
+
281
+ int n_fft_bins = params.n_fft_bins;
282
+ int i = ith;
283
+
284
+ const auto & filters = cache.filters;
285
+
286
+ // make sure n_fft == 1 + (WHISPER_N_FFT / 2), bin_0 to bin_nyquist
287
+ GGML_ASSERT(n_fft_bins == 1 + (frame_size / 2));
288
+ GGML_ASSERT(cache.sin_vals.size() == cache.cos_vals.size());
289
+ // calculate FFT only when fft_in are not all zero
290
+ for (; i < std::min(n_samples / frame_step + 1, out.n_len); i += n_threads) {
291
+ const int offset = i * frame_step;
292
+
293
+ // apply Hann window (~10% faster)
294
+ for (int j = 0; j < std::min(frame_size, n_samples - offset); j++) {
295
+ fft_in[j] = hann[j] * samples[offset + j];
296
+ }
297
+
298
+ // fill the rest with zeros
299
+ if (n_samples - offset < frame_size) {
300
+ std::fill(fft_in.begin() + (n_samples - offset), fft_in.end(), 0.0);
301
+ }
302
+
303
+ // FFT
304
+ fft(cache, fft_in.data(), frame_size, fft_out.data());
305
+
306
+ // Calculate modulus^2 of complex numbers
307
+ // Use pow(fft_out[2 * j + 0], 2) + pow(fft_out[2 * j + 1], 2) causes inference quality problem? Interesting.
308
+ for (int j = 0; j < n_fft_bins; j++) {
309
+ fft_out[j] = (fft_out[2 * j + 0] * fft_out[2 * j + 0] + fft_out[2 * j + 1] * fft_out[2 * j + 1]);
310
+ }
311
+
312
+ // mel spectrogram
313
+ for (int j = 0; j < out.n_mel; j++) {
314
+ double sum = 0.0;
315
+ // unroll loop (suggested by GH user @lunixbochs)
316
+ int k = 0;
317
+ for (k = 0; k < n_fft_bins - 3; k += 4) {
318
+ size_t idx = size_t(j) * size_t(n_fft_bins) + size_t(k);
319
+ sum +=
320
+ fft_out[k + 0] * filters.data[idx + 0] +
321
+ fft_out[k + 1] * filters.data[idx + 1] +
322
+ fft_out[k + 2] * filters.data[idx + 2] +
323
+ fft_out[k + 3] * filters.data[idx + 3];
324
+ }
325
+ // handle n_fft remainder
326
+ for (; k < n_fft_bins; k++) {
327
+ sum += fft_out[k] * filters.data[j * n_fft_bins + k];
328
+ }
329
+ sum = params.use_natural_log
330
+ ? log(sum + 5.960464477539063e-08)
331
+ : log10(std::max(sum, 1e-10));
332
+ out.data[j * out.n_len + i] = sum;
333
+ }
334
+ }
335
+
336
+ // Otherwise fft_out are all zero
337
+ double sum = params.use_natural_log ? log(1e-10) : log10(1e-10);
338
+ for (; i < out.n_len; i += n_threads) {
339
+ for (int j = 0; j < out.n_mel; j++) {
340
+ out.data[j * out.n_len + i] = sum;
341
+ }
342
+ }
343
+ }
344
+
345
+ // ref: https://github.com/openai/whisper/blob/main/whisper/audio.py#L110-L157
346
+ static bool log_mel_spectrogram(
347
+ const float * samples,
348
+ const int n_samples_in,
349
+ const int n_threads,
350
+ const filter_params & params,
351
+ const mtmd_audio_cache & cache,
352
+ mtmd_audio_mel & out) {
353
+ //const int64_t t_start_us = ggml_time_us();
354
+
355
+ out.n_len_org = n_samples_in;
356
+ int n_samples = n_samples_in;
357
+
358
+ // Hann window
359
+ const float * hann = cache.hann_window.data();
360
+ const int frame_size = (params.n_fft_bins - 1) * 2;
361
+ const int frame_step = params.hop_length;
362
+
363
+ // Padding
364
+ std::vector<float> samples_padded;
365
+ if (params.center_padding) {
366
+ const auto pad_amount = frame_size / 2;
367
+ samples_padded = std::vector<float>(n_samples + 2 * pad_amount, 0);
368
+ std::copy(samples, samples + n_samples, samples_padded.data() + pad_amount);
369
+ samples = samples_padded.data();
370
+ n_samples = samples_padded.size();
371
+ } else {
372
+ // existing padding logic
373
+ int64_t stage_1_pad = params.sample_rate * 30;
374
+ int64_t stage_2_pad = frame_size / 2;
375
+ samples_padded.resize(n_samples + stage_1_pad + stage_2_pad * 2);
376
+ std::copy(samples, samples + n_samples, samples_padded.begin() + stage_2_pad);
377
+ // pad 30 seconds of zeros at the end of audio (480,000 samples) + reflective pad 200 samples at the end of audio
378
+ std::fill(samples_padded.begin() + n_samples + stage_2_pad, samples_padded.begin() + n_samples + stage_1_pad + 2 * stage_2_pad, 0);
379
+ // reflective pad 200 samples at the beginning of audio
380
+ if (n_samples < stage_2_pad + 1) {
381
+ // TODO: Handle short audio differently or return error
382
+ return false;
383
+ }
384
+ std::reverse_copy(samples + 1, samples + 1 + stage_2_pad, samples_padded.begin());
385
+ }
386
+
387
+ // preemphasis
388
+ if (params.preemph) {
389
+ const int pad_amount = frame_size / 2;
390
+ const float preemph = 0.97f;
391
+ float prev = samples_padded[pad_amount];
392
+ for (int i = pad_amount + 1; i + pad_amount < n_samples; ++i) {
393
+ float cur = samples_padded[i];
394
+ samples_padded[i] = cur - preemph * prev;
395
+ prev = cur;
396
+ }
397
+ }
398
+
399
+ // pad hann window if it's smaller than frame_size
400
+ // TODO: probably unnecessary here? (or better doing it in g_cache?)
401
+ std::vector<float> hann_window_padded;
402
+ if (params.hann_window_size < frame_size) {
403
+ hann_window_padded.resize(frame_size);
404
+ const int padding = (frame_size - params.hann_window_size) / 2;
405
+ std::copy(hann, hann + params.hann_window_size, &hann_window_padded[padding]);
406
+ hann = hann_window_padded.data();
407
+ }
408
+
409
+
410
+ out.n_mel = params.n_mel;
411
+ out.n_len = (n_samples - frame_size) / frame_step + 1;
412
+ // TODO: handle these checks better
413
+ if (out.n_mel > 0 && (unsigned long)out.n_len > SIZE_MAX / out.n_mel) {
414
+ LOG_ERR("%s: size overflow\n", __func__);
415
+ return false;
416
+ }
417
+ if (n_samples < frame_size) {
418
+ LOG_ERR("%s: not enough samples after padding\n", __func__);
419
+ return false;
420
+ }
421
+ out.data.resize(out.n_mel * out.n_len);
422
+
423
+ {
424
+ std::vector<std::thread> workers(n_threads - 1);
425
+ for (int iw = 0; iw < n_threads - 1; ++iw) {
426
+ workers[iw] =
427
+ std::thread(log_mel_spectrogram_worker_thread, iw + 1, hann, std::cref(samples_padded), n_samples,
428
+ frame_size, frame_step, n_threads, std::cref(params), std::cref(cache), std::ref(out));
429
+ }
430
+
431
+ // main thread
432
+ log_mel_spectrogram_worker_thread(0, hann, samples_padded, n_samples, frame_size, frame_step, n_threads, params,
433
+ cache, out);
434
+ for (int iw = 0; iw < n_threads - 1; ++iw) {
435
+ workers[iw].join();
436
+ }
437
+ }
438
+
439
+ const int effective_n_len = n_samples_in / frame_step;
440
+ if (params.norm_per_feature) {
441
+ for (int i = 0; i < out.n_mel; i++) {
442
+ double mean = 0;
443
+ for (int j = 0; j < effective_n_len; ++j) {
444
+ mean += out.data[i * out.n_len + j];
445
+ }
446
+ mean /= effective_n_len;
447
+
448
+ double var = 0.0;
449
+ for (int j = 0; j < effective_n_len; ++j) {
450
+ const double value = out.data[i * out.n_len + j] - mean;
451
+ var += value * value;
452
+ }
453
+ var /= effective_n_len - 1; // unbiased
454
+ const double mstd = std::sqrt(var + 1e-5);
455
+
456
+ for (int j = 0; j < effective_n_len; ++j) {
457
+ auto &value = out.data[i * out.n_len + j];
458
+ value = (value - mean) / mstd;
459
+ }
460
+
461
+ // pad the rest with zeros
462
+ for (int j = effective_n_len; j < out.n_len; ++j) {
463
+ out.data[i * out.n_len + j] = 0.0;
464
+ }
465
+ }
466
+ } else {
467
+ // clamping and normalization
468
+ double mmax = -1e20;
469
+ for (int i = 0; i < out.n_mel*out.n_len; i++) {
470
+ if (out.data[i] > mmax) {
471
+ mmax = out.data[i];
472
+ }
473
+ }
474
+
475
+ mmax -= 8.0;
476
+
477
+ for (int i = 0; i < out.n_mel*out.n_len; i++) {
478
+ if (out.data[i] < mmax) {
479
+ out.data[i] = mmax;
480
+ }
481
+ out.data[i] = (out.data[i] + 4.0)/4.0;
482
+ }
483
+ }
484
+
485
+ // Dump log_mel_spectrogram
486
+ if (DEBUG) {
487
+ std::ofstream outFile("log_mel_spectrogram.json");
488
+ outFile << "[";
489
+ for (uint64_t i = 0; i < out.data.size() - 1; i++) {
490
+ outFile << out.data[i] << ", ";
491
+ }
492
+ outFile << out.data[out.data.size() - 1] << "]";
493
+ outFile.close();
494
+ }
495
+
496
+ return true;
497
+ }
498
+
499
+ //
500
+ // mtmd_audio_preprocessor_whisper
501
+ //
502
+
503
+ void mtmd_audio_preprocessor_whisper::initialize() {
504
+ cache.fill_sin_cos_table(hparams.audio_n_fft);
505
+ cache.fill_hann_window(hparams.audio_window_len, true);
506
+ cache.fill_mel_filterbank_matrix(hparams.n_mel_bins, hparams.audio_n_fft, hparams.audio_sample_rate);
507
+ }
508
+
509
+ bool mtmd_audio_preprocessor_whisper::preprocess(const float * samples,
510
+ size_t n_samples,
511
+ std::vector<mtmd_audio_mel> & output) {
512
+ if (n_samples == 0) {
513
+ // empty audio
514
+ return false;
515
+ }
516
+
517
+ std::vector<float> smpl;
518
+ // if input is too short, pad with zeros
519
+ // this is to avoid potential issues with stage1/2 padding in log_mel_spectrogram
520
+ // TODO: maybe handle this better
521
+ size_t min_samples = (size_t) hparams.audio_sample_rate * (hparams.audio_chunk_len + 1); // +1 second margin
522
+ if (n_samples < min_samples) {
523
+ smpl.resize(min_samples, 0.0f);
524
+ std::memcpy(smpl.data(), samples, n_samples * sizeof(float));
525
+ samples = smpl.data();
526
+ n_samples = smpl.size();
527
+ }
528
+
529
+ filter_params params;
530
+ params.n_mel = hparams.n_mel_bins;
531
+ params.n_fft_bins = 1 + (hparams.audio_n_fft / 2);
532
+ params.hann_window_size = hparams.audio_window_len;
533
+ params.hop_length = hparams.audio_hop_len;
534
+ params.sample_rate = hparams.audio_sample_rate;
535
+ params.center_padding = false;
536
+ params.preemph = 0.0f; // disabled
537
+ params.use_natural_log = false;
538
+ params.norm_per_feature = false;
539
+
540
+ // make sure the cache is initialized
541
+ GGML_ASSERT(!cache.sin_vals.empty());
542
+ GGML_ASSERT(!cache.cos_vals.empty());
543
+ GGML_ASSERT(!cache.filters.data.empty());
544
+
545
+ mtmd_audio_mel out_full;
546
+ bool ok = log_mel_spectrogram(samples, n_samples,
547
+ 4, // n_threads
548
+ params, cache, out_full);
549
+ if (!ok) {
550
+ return false;
551
+ }
552
+
553
+ // because the cgraph in clip.cpp only accepts 3000 frames each, we need to split the mel
554
+ // we always expect the mel to have 3000 silent frames at the end
555
+ if (DEBUG) {
556
+ printf("output: n_mel = %d, n_len = %d\n", out_full.n_mel, out_full.n_len);
557
+ }
558
+ const size_t frames_per_chunk = 3000;
559
+ GGML_ASSERT((size_t) out_full.n_len > frames_per_chunk);
560
+ for (size_t off = 0; off < (size_t) out_full.n_len; off += frames_per_chunk) {
561
+ int n_len = std::min(frames_per_chunk, (size_t) out_full.n_len - off);
562
+ if ((size_t) n_len < frames_per_chunk) {
563
+ break; // last uncomplete chunk will always be a padded chunk, safe to ignore
564
+ }
565
+
566
+ mtmd_audio_mel out_chunk;
567
+ out_chunk.n_len = n_len;
568
+ out_chunk.n_mel = out_full.n_mel;
569
+ out_chunk.n_len_org = out_full.n_mel; // unused
570
+ out_chunk.data.reserve(out_chunk.n_mel * out_chunk.n_len);
571
+
572
+ for (int i = 0; i < out_full.n_mel; i++) {
573
+ auto src = out_full.data.begin() + i * out_full.n_len + off;
574
+ out_chunk.data.insert(out_chunk.data.end(), src, src + frames_per_chunk);
575
+ }
576
+
577
+ output.push_back(std::move(out_chunk));
578
+ }
579
+
580
+ return true;
581
+ }
582
+
583
+ //
584
+ // mtmd_audio_preprocessor_conformer
585
+ //
586
+
587
+ void mtmd_audio_preprocessor_conformer::initialize() {
588
+ cache.fill_sin_cos_table(hparams.audio_n_fft);
589
+ cache.fill_hann_window(hparams.audio_window_len, true);
590
+ cache.fill_mel_filterbank_matrix(hparams.n_mel_bins, hparams.audio_n_fft, hparams.audio_sample_rate);
591
+ }
592
+
593
+ bool mtmd_audio_preprocessor_conformer::preprocess(const float * samples,
594
+ size_t n_samples,
595
+ std::vector<mtmd_audio_mel> & output) {
596
+ // empty audio
597
+ if (n_samples == 0) {
598
+ return false;
599
+ }
600
+
601
+ filter_params params;
602
+ params.n_mel = hparams.n_mel_bins;
603
+ params.n_fft_bins = 1 + (hparams.audio_n_fft / 2);
604
+ params.hann_window_size = hparams.audio_window_len;
605
+ params.hop_length = hparams.audio_hop_len;
606
+ params.sample_rate = hparams.audio_sample_rate;
607
+ params.center_padding = true;
608
+ params.preemph = 0.97f;
609
+ params.use_natural_log = true;
610
+ params.norm_per_feature = true;
611
+
612
+ // make sure the cache is initialized
613
+ GGML_ASSERT(!cache.sin_vals.empty());
614
+ GGML_ASSERT(!cache.cos_vals.empty());
615
+ GGML_ASSERT(!cache.filters.data.empty());
616
+
617
+ mtmd_audio_mel out_full;
618
+ bool ok = log_mel_spectrogram(samples, n_samples,
619
+ 4, // n_threads
620
+ params, cache, out_full);
621
+ if (!ok) {
622
+ return false;
623
+ }
624
+
625
+ output.push_back(std::move(out_full));
626
+ return true;
627
+ }
628
+
629
+ //
630
+ // mtmd_audio_streaming_istft implementation
631
+ //
632
+
633
+ mtmd_audio_streaming_istft::mtmd_audio_streaming_istft(int n_fft, int hop_length) :
634
+ n_fft(n_fft),
635
+ hop_length(hop_length),
636
+ n_fft_bins(n_fft / 2 + 1),
637
+ overlap_buffer(n_fft, 0.0f),
638
+ window_sum_buffer(n_fft, 0.0f),
639
+ padding_to_remove((n_fft - hop_length) / 2),
640
+ ifft_in(n_fft * 2 * 4, 0.0f), // extra space for recursive IFFT
641
+ ifft_out(n_fft * 2 * 4, 0.0f) {
642
+ cache.fill_sin_cos_table(n_fft);
643
+ cache.fill_hann_window(n_fft, true);
644
+ }
645
+
646
+ void mtmd_audio_streaming_istft::reset() {
647
+ std::fill(overlap_buffer.begin(), overlap_buffer.end(), 0.0f);
648
+ std::fill(window_sum_buffer.begin(), window_sum_buffer.end(), 0.0f);
649
+ padding_to_remove = (n_fft - hop_length) / 2;
650
+ }
651
+
652
+ std::vector<float> mtmd_audio_streaming_istft::process_frame(const float * frame_spectrum) {
653
+ std::vector<float> output(hop_length);
654
+
655
+ // copy frequencies
656
+ for (int j = 0; j < n_fft_bins; j++) {
657
+ ifft_in[j * 2 + 0] = frame_spectrum[j * 2 + 0];
658
+ ifft_in[j * 2 + 1] = frame_spectrum[j * 2 + 1];
659
+ }
660
+
661
+ // mirror negative frequencies
662
+ for (int j = 1; j < n_fft_bins - 1; j++) {
663
+ int mirror_idx = n_fft - j;
664
+ ifft_in[mirror_idx * 2 + 0] = ifft_in[j * 2 + 0];
665
+ ifft_in[mirror_idx * 2 + 1] = -ifft_in[j * 2 + 1]; // conjugate
666
+ }
667
+
668
+ ifft(cache, ifft_in.data(), n_fft, ifft_out.data());
669
+
670
+ // update window sum and overlap buffer
671
+ for (int j = 0; j < n_fft; j++) {
672
+ window_sum_buffer[j] += cache.hann_window[j] * cache.hann_window[j];
673
+ overlap_buffer[j] += ifft_out[j * 2] * cache.hann_window[j];
674
+ }
675
+
676
+ // extract hop_length samples with normalization
677
+ for (int i = 0; i < hop_length; i++) {
678
+ if (window_sum_buffer[i] > 1e-8f) {
679
+ output[i] = overlap_buffer[i] / window_sum_buffer[i];
680
+ } else {
681
+ output[i] = overlap_buffer[i];
682
+ }
683
+ }
684
+
685
+ // shift buffers left by hop_length
686
+ std::copy(overlap_buffer.begin() + hop_length, overlap_buffer.end(), overlap_buffer.begin());
687
+ std::fill(overlap_buffer.end() - hop_length, overlap_buffer.end(), 0.0f);
688
+
689
+ std::copy(window_sum_buffer.begin() + hop_length, window_sum_buffer.end(), window_sum_buffer.begin());
690
+ std::fill(window_sum_buffer.end() - hop_length, window_sum_buffer.end(), 0.0f);
691
+
692
+ // Remove padding if needed
693
+ int to_remove = std::min(padding_to_remove, (int) output.size());
694
+ padding_to_remove -= to_remove;
695
+ output.erase(output.begin(), output.begin() + to_remove);
696
+
697
+ return output;
698
+ }
699
+
700
+ std::vector<float> mtmd_audio_streaming_istft::flush() {
701
+ std::vector<float> output;
702
+
703
+ // Extract remaining samples from overlap buffer
704
+ // Continue until we've extracted all meaningful samples
705
+ int remaining = n_fft - hop_length;
706
+ while (remaining > 0) {
707
+ int chunk_size = std::min(remaining, hop_length);
708
+
709
+ for (int i = 0; i < chunk_size; i++) {
710
+ float sample;
711
+ if (window_sum_buffer[i] > 1e-8f) {
712
+ sample = overlap_buffer[i] / window_sum_buffer[i];
713
+ } else {
714
+ sample = overlap_buffer[i];
715
+ }
716
+ output.push_back(sample);
717
+ }
718
+
719
+ // Shift buffers
720
+ std::copy(overlap_buffer.begin() + chunk_size, overlap_buffer.end(), overlap_buffer.begin());
721
+ std::fill(overlap_buffer.end() - chunk_size, overlap_buffer.end(), 0.0f);
722
+
723
+ std::copy(window_sum_buffer.begin() + chunk_size, window_sum_buffer.end(), window_sum_buffer.begin());
724
+ std::fill(window_sum_buffer.end() - chunk_size, window_sum_buffer.end(), 0.0f);
725
+
726
+ remaining -= chunk_size;
727
+ }
728
+
729
+ return output;
730
+ }