cui-llama.rn 1.6.0 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (285) hide show
  1. package/README.md +35 -7
  2. package/android/src/main/CMakeLists.txt +22 -11
  3. package/android/src/main/java/com/rnllama/LlamaContext.java +42 -6
  4. package/android/src/main/java/com/rnllama/RNLlama.java +139 -4
  5. package/android/src/main/jni.cpp +173 -18
  6. package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
  7. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
  8. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
  9. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
  10. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
  11. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
  12. package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
  13. package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
  14. package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +24 -4
  15. package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +22 -2
  16. package/cpp/LICENSE +21 -0
  17. package/cpp/chat.cpp +129 -107
  18. package/cpp/chat.h +2 -0
  19. package/cpp/common.cpp +58 -78
  20. package/cpp/common.h +29 -21
  21. package/cpp/ggml-alloc.c +4 -1
  22. package/cpp/ggml-backend.cpp +9 -5
  23. package/cpp/ggml-backend.h +4 -4
  24. package/cpp/ggml-cpp.h +1 -1
  25. package/cpp/ggml-cpu/amx/amx.cpp +221 -0
  26. package/cpp/ggml-cpu/amx/amx.h +8 -0
  27. package/cpp/ggml-cpu/amx/common.h +91 -0
  28. package/cpp/ggml-cpu/amx/mmq.cpp +2511 -0
  29. package/cpp/ggml-cpu/amx/mmq.h +10 -0
  30. package/{ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers → cpp/ggml-cpu}/binary-ops.h +1 -1
  31. package/cpp/ggml-cpu/common.h +72 -0
  32. package/cpp/{ggml-cpu-aarch64.cpp → ggml-cpu/ggml-cpu-aarch64.cpp} +809 -103
  33. package/cpp/{ggml-cpu-quants.c → ggml-cpu/ggml-cpu-quants.c} +306 -6
  34. package/cpp/{ggml-cpu.c → ggml-cpu/ggml-cpu.c} +114 -55
  35. package/cpp/{ggml-cpu.cpp → ggml-cpu/ggml-cpu.cpp} +32 -16
  36. package/cpp/{ops.cpp → ggml-cpu/ops.cpp} +353 -173
  37. package/{ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers → cpp/ggml-cpu}/ops.h +2 -20
  38. package/cpp/{sgemm.cpp → ggml-cpu/sgemm.cpp} +501 -0
  39. package/{ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers → cpp/ggml-cpu}/simd-mappings.h +7 -3
  40. package/{ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers → cpp/ggml-cpu}/unary-ops.h +1 -1
  41. package/cpp/{vec.cpp → ggml-cpu/vec.cpp} +0 -6
  42. package/{ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers → cpp/ggml-cpu}/vec.h +16 -0
  43. package/cpp/ggml-cpu.h +5 -0
  44. package/cpp/ggml-impl.h +16 -9
  45. package/cpp/ggml-llama-sim.metallib +0 -0
  46. package/cpp/ggml-llama.metallib +0 -0
  47. package/cpp/ggml-metal-impl.h +36 -11
  48. package/cpp/ggml-metal.m +810 -176
  49. package/cpp/ggml-opt.cpp +373 -190
  50. package/cpp/ggml-opt.h +49 -28
  51. package/cpp/ggml-quants.c +0 -6
  52. package/cpp/ggml.c +227 -282
  53. package/cpp/ggml.h +82 -101
  54. package/cpp/gguf.cpp +33 -33
  55. package/cpp/json-schema-to-grammar.cpp +3 -0
  56. package/cpp/llama-adapter.cpp +6 -0
  57. package/cpp/llama-arch.cpp +49 -17
  58. package/cpp/llama-arch.h +9 -0
  59. package/cpp/llama-batch.cpp +8 -2
  60. package/cpp/llama-batch.h +2 -1
  61. package/cpp/llama-chat.cpp +39 -16
  62. package/cpp/llama-chat.h +4 -2
  63. package/cpp/llama-context.cpp +440 -611
  64. package/cpp/llama-context.h +44 -33
  65. package/cpp/llama-cparams.h +1 -0
  66. package/cpp/llama-graph.cpp +214 -291
  67. package/cpp/llama-graph.h +69 -21
  68. package/cpp/llama-hparams.cpp +17 -1
  69. package/cpp/llama-hparams.h +39 -5
  70. package/cpp/llama-kv-cache.cpp +2067 -620
  71. package/cpp/llama-kv-cache.h +410 -108
  72. package/cpp/llama-memory.h +12 -1
  73. package/cpp/llama-model-loader.cpp +24 -15
  74. package/cpp/llama-model-saver.cpp +281 -0
  75. package/cpp/llama-model-saver.h +37 -0
  76. package/cpp/llama-model.cpp +1089 -359
  77. package/cpp/llama-model.h +19 -3
  78. package/cpp/llama-sampling.cpp +20 -7
  79. package/cpp/llama-vocab.cpp +54 -9
  80. package/cpp/llama-vocab.h +6 -0
  81. package/cpp/llama.cpp +14 -0
  82. package/cpp/llama.h +86 -142
  83. package/cpp/minja/chat-template.hpp +9 -5
  84. package/cpp/minja/minja.hpp +69 -36
  85. package/cpp/rn-llama.cpp +602 -190
  86. package/cpp/rn-llama.h +34 -8
  87. package/cpp/sampling.cpp +57 -50
  88. package/cpp/tools/mtmd/clip-impl.h +462 -0
  89. package/cpp/tools/mtmd/clip.cpp +4024 -0
  90. package/cpp/tools/mtmd/clip.h +101 -0
  91. package/cpp/tools/mtmd/miniaudio.h +93468 -0
  92. package/cpp/tools/mtmd/mtmd-audio.cpp +855 -0
  93. package/cpp/tools/mtmd/mtmd-audio.h +62 -0
  94. package/cpp/tools/mtmd/mtmd-helper.cpp +297 -0
  95. package/cpp/tools/mtmd/mtmd.cpp +942 -0
  96. package/cpp/tools/mtmd/mtmd.h +362 -0
  97. package/cpp/tools/mtmd/stb_image.h +7988 -0
  98. package/ios/CMakeLists.txt +20 -10
  99. package/ios/RNLlama.h +6 -0
  100. package/ios/RNLlama.mm +82 -3
  101. package/ios/RNLlamaContext.h +5 -1
  102. package/ios/RNLlamaContext.mm +131 -38
  103. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat.h +2 -0
  104. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/common.h +29 -21
  105. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-backend.h +4 -4
  106. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpp.h +1 -1
  107. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu.h +5 -0
  108. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-impl.h +16 -9
  109. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-metal-impl.h +36 -11
  110. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-opt.h +49 -28
  111. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml.h +82 -101
  112. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-arch.h +9 -0
  113. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-batch.h +2 -1
  114. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-chat.h +4 -2
  115. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-context.h +44 -33
  116. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-cparams.h +1 -0
  117. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-graph.h +69 -21
  118. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-hparams.h +39 -5
  119. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache.h +410 -108
  120. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory.h +12 -1
  121. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model-saver.h +37 -0
  122. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model.h +19 -3
  123. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-vocab.h +6 -0
  124. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama.h +86 -142
  125. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/chat-template.hpp +9 -5
  126. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/minja.hpp +69 -36
  127. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-llama.h +34 -8
  128. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Info.plist +0 -0
  129. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/ggml-llama.metallib +0 -0
  130. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/rnllama +0 -0
  131. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +2 -0
  132. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +29 -21
  133. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend.h +4 -4
  134. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpp.h +1 -1
  135. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +5 -0
  136. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +16 -9
  137. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal-impl.h +36 -11
  138. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-opt.h +49 -28
  139. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +82 -101
  140. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +9 -0
  141. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +2 -1
  142. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +4 -2
  143. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +44 -33
  144. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +1 -0
  145. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +69 -21
  146. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +39 -5
  147. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +410 -108
  148. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +12 -1
  149. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model-saver.h +37 -0
  150. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +19 -3
  151. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +6 -0
  152. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +86 -142
  153. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +9 -5
  154. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +69 -36
  155. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +34 -8
  156. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Info.plist +0 -0
  157. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/_CodeSignature/CodeResources +1 -1
  158. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
  159. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
  160. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat.h +2 -0
  161. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/common.h +29 -21
  162. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-backend.h +4 -4
  163. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpp.h +1 -1
  164. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu.h +5 -0
  165. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-impl.h +16 -9
  166. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-metal-impl.h +36 -11
  167. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-opt.h +49 -28
  168. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml.h +82 -101
  169. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-arch.h +9 -0
  170. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-batch.h +2 -1
  171. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-chat.h +4 -2
  172. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-context.h +44 -33
  173. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-cparams.h +1 -0
  174. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-graph.h +69 -21
  175. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-hparams.h +39 -5
  176. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache.h +410 -108
  177. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory.h +12 -1
  178. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model-saver.h +37 -0
  179. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model.h +19 -3
  180. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-vocab.h +6 -0
  181. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama.h +86 -142
  182. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/chat-template.hpp +9 -5
  183. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/minja.hpp +69 -36
  184. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-llama.h +34 -8
  185. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Info.plist +0 -0
  186. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/ggml-llama.metallib +0 -0
  187. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/rnllama +0 -0
  188. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +2 -0
  189. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +29 -21
  190. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend.h +4 -4
  191. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpp.h +1 -1
  192. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +5 -0
  193. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +16 -9
  194. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal-impl.h +36 -11
  195. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-opt.h +49 -28
  196. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +82 -101
  197. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +9 -0
  198. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +2 -1
  199. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +4 -2
  200. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +44 -33
  201. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +1 -0
  202. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +69 -21
  203. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +39 -5
  204. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +410 -108
  205. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +12 -1
  206. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model-saver.h +37 -0
  207. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +19 -3
  208. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +6 -0
  209. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +86 -142
  210. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +9 -5
  211. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +69 -36
  212. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +34 -8
  213. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Info.plist +0 -0
  214. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/_CodeSignature/CodeResources +1 -1
  215. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
  216. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
  217. package/jest/mock.js +33 -7
  218. package/lib/commonjs/NativeRNLlama.js.map +1 -1
  219. package/lib/commonjs/index.js +153 -21
  220. package/lib/commonjs/index.js.map +1 -1
  221. package/lib/module/NativeRNLlama.js.map +1 -1
  222. package/lib/module/index.js +152 -20
  223. package/lib/module/index.js.map +1 -1
  224. package/lib/typescript/NativeRNLlama.d.ts +54 -4
  225. package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
  226. package/lib/typescript/index.d.ts +72 -6
  227. package/lib/typescript/index.d.ts.map +1 -1
  228. package/package.json +1 -1
  229. package/src/NativeRNLlama.ts +72 -4
  230. package/src/index.ts +212 -38
  231. package/cpp/binary-ops.h +0 -16
  232. package/cpp/ops.h +0 -128
  233. package/cpp/simd-mappings.h +0 -888
  234. package/cpp/unary-ops.h +0 -28
  235. package/cpp/vec.h +0 -802
  236. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/binary-ops.h +0 -16
  237. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-aarch64.h +0 -8
  238. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-impl.h +0 -512
  239. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-quants.h +0 -63
  240. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-traits.h +0 -38
  241. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ops.h +0 -128
  242. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/sgemm.h +0 -14
  243. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/simd-mappings.h +0 -888
  244. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/vec.h +0 -802
  245. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-aarch64.h +0 -8
  246. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-impl.h +0 -512
  247. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-quants.h +0 -63
  248. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-traits.h +0 -38
  249. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/sgemm.h +0 -14
  250. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/unary-ops.h +0 -28
  251. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/vec.h +0 -802
  252. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/binary-ops.h +0 -16
  253. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-aarch64.h +0 -8
  254. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-impl.h +0 -512
  255. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-quants.h +0 -63
  256. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-traits.h +0 -38
  257. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ops.h +0 -128
  258. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/sgemm.h +0 -14
  259. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/simd-mappings.h +0 -888
  260. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/unary-ops.h +0 -28
  261. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/binary-ops.h +0 -16
  262. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-aarch64.h +0 -8
  263. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-impl.h +0 -512
  264. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-quants.h +0 -63
  265. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-traits.h +0 -38
  266. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ops.h +0 -128
  267. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/sgemm.h +0 -14
  268. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/simd-mappings.h +0 -888
  269. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/unary-ops.h +0 -28
  270. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/vec.h +0 -802
  271. package/lib/commonjs/chat.js +0 -37
  272. package/lib/commonjs/chat.js.map +0 -1
  273. package/lib/module/chat.js +0 -33
  274. package/lib/module/chat.js.map +0 -1
  275. package/lib/typescript/chat.d.ts +0 -10
  276. package/lib/typescript/chat.d.ts.map +0 -1
  277. package/src/chat.ts +0 -44
  278. /package/cpp/{binary-ops.cpp → ggml-cpu/binary-ops.cpp} +0 -0
  279. /package/cpp/{ggml-cpu-aarch64.h → ggml-cpu/ggml-cpu-aarch64.h} +0 -0
  280. /package/cpp/{ggml-cpu-impl.h → ggml-cpu/ggml-cpu-impl.h} +0 -0
  281. /package/cpp/{ggml-cpu-quants.h → ggml-cpu/ggml-cpu-quants.h} +0 -0
  282. /package/cpp/{ggml-cpu-traits.cpp → ggml-cpu/ggml-cpu-traits.cpp} +0 -0
  283. /package/cpp/{ggml-cpu-traits.h → ggml-cpu/ggml-cpu-traits.h} +0 -0
  284. /package/cpp/{sgemm.h → ggml-cpu/sgemm.h} +0 -0
  285. /package/cpp/{unary-ops.cpp → ggml-cpu/unary-ops.cpp} +0 -0
@@ -0,0 +1,62 @@
1
+ #pragma once
2
+
3
+ #include "ggml.h"
4
+
5
+ #include <cstdint>
6
+ #include <vector>
7
+ #include <string>
8
+
9
+ #define WHISPER_ASSERT LM_GGML_ASSERT
10
+
11
+ #define WHISPER_SAMPLE_RATE 16000
12
+ #define WHISPER_N_FFT 400
13
+ #define WHISPER_HOP_LENGTH 160
14
+ #define WHISPER_CHUNK_SIZE 30
15
+
16
+ #define COMMON_SAMPLE_RATE 16000
17
+
18
+ namespace whisper_preprocessor {
19
+
20
+ struct whisper_mel {
21
+ int n_len;
22
+ int n_len_org;
23
+ int n_mel;
24
+
25
+ std::vector<float> data;
26
+ };
27
+
28
+ struct whisper_filters {
29
+ int32_t n_mel;
30
+ int32_t n_fft;
31
+
32
+ std::vector<float> data;
33
+ };
34
+
35
+ extern bool preprocess_audio(
36
+ const float * samples,
37
+ size_t n_samples,
38
+ const whisper_filters & filters,
39
+ std::vector<whisper_mel> & output);
40
+
41
+ } // namespace whisper_preprocessor
42
+
43
+
44
+ // TODO @ngxson : move this helper to mtmd-helpers.cpp
45
+ namespace audio_helpers {
46
+
47
+ extern bool is_audio_file(const char * buf, size_t len);
48
+
49
+ extern bool decode_audio_from_buf(
50
+ const unsigned char * buf_in,
51
+ size_t len,
52
+ int target_sampler_rate,
53
+ std::vector<float> & pcmf32_mono);
54
+
55
+ } // namespace audio_helpers
56
+
57
+
58
+ namespace whisper_precalc_filters {
59
+
60
+ extern whisper_preprocessor::whisper_filters get_128_bins();
61
+
62
+ } // namespace whisper_precalc_filters
@@ -0,0 +1,297 @@
1
+ #include "mtmd.h"
2
+ #include "llama.h"
3
+
4
+ #include <algorithm>
5
+ #include <cinttypes>
6
+ #include <vector>
7
+
8
+ #define LOG_INF(...) fprintf(stdout, __VA_ARGS__)
9
+ #define LOG_ERR(...) fprintf(stderr, __VA_ARGS__)
10
+
11
+ size_t mtmd_helper_get_n_tokens(const mtmd_input_chunks * chunks) {
12
+ size_t n_tokens = 0;
13
+ for (size_t i = 0; i < mtmd_input_chunks_size(chunks); i++) {
14
+ auto chunk = mtmd_input_chunks_get(chunks, i);
15
+ n_tokens += mtmd_input_chunk_get_n_tokens(chunk);
16
+ }
17
+ return n_tokens;
18
+ }
19
+
20
+ llama_pos mtmd_helper_get_n_pos(const mtmd_input_chunks * chunks) {
21
+ llama_pos n_pos = 0;
22
+ for (size_t i = 0; i < mtmd_input_chunks_size(chunks); i++) {
23
+ auto chunk = mtmd_input_chunks_get(chunks, i);
24
+ n_pos += mtmd_input_chunk_get_n_pos(chunk);
25
+ }
26
+ return n_pos;
27
+ }
28
+
29
+ // helper struct to make working with embd batch easier
30
+ // note: this will be removed after llama_batch_ext refactoring
31
+ struct decode_embd_batch {
32
+ int n_pos_per_embd;
33
+ int n_mmproj_embd;
34
+ std::vector<llama_pos> pos;
35
+ std::vector<llama_pos> pos_view; // used by mrope
36
+ std::vector<int32_t> n_seq_id;
37
+ std::vector<llama_seq_id> seq_id_0;
38
+ std::vector<llama_seq_id *> seq_ids;
39
+ std::vector<int8_t> logits;
40
+ llama_batch batch;
41
+ decode_embd_batch(float * embd, int32_t n_tokens, int n_pos_per_embd, int n_mmproj_embd) : n_pos_per_embd(n_pos_per_embd), n_mmproj_embd(n_mmproj_embd) {
42
+ pos .resize(n_tokens * n_pos_per_embd);
43
+ n_seq_id.resize(n_tokens);
44
+ seq_ids .resize(n_tokens + 1);
45
+ logits .resize(n_tokens);
46
+ seq_id_0.resize(1);
47
+ seq_ids [n_tokens] = nullptr;
48
+ batch = {
49
+ /*n_tokens =*/ n_tokens,
50
+ /*tokens =*/ nullptr,
51
+ /*embd =*/ embd,
52
+ /*pos =*/ pos.data(),
53
+ /*n_seq_id =*/ n_seq_id.data(),
54
+ /*seq_id =*/ seq_ids.data(),
55
+ /*logits =*/ logits.data(),
56
+ };
57
+ }
58
+
59
+ void set_position_normal(llama_pos pos_0, llama_seq_id seq_id) {
60
+ seq_id_0[0] = seq_id;
61
+ for (int i = 0; i < batch.n_tokens; i++) {
62
+ batch.pos [i] = pos_0 + i;
63
+ batch.n_seq_id[i] = 1;
64
+ batch.seq_id [i] = seq_id_0.data();
65
+ batch.logits [i] = false;
66
+ }
67
+ }
68
+
69
+ void set_position_mrope(llama_pos pos_0, int nx, int ny, llama_seq_id seq_id) {
70
+ LM_GGML_ASSERT(n_pos_per_embd == 4);
71
+ seq_id_0[0] = seq_id;
72
+ for (int y = 0; y < ny; y++) {
73
+ for (int x = 0; x < nx; x++) {
74
+ int i = y * nx + x;
75
+ pos[i ] = pos_0;
76
+ pos[i + batch.n_tokens ] = pos_0 + y;
77
+ pos[i + batch.n_tokens * 2] = pos_0 + x;
78
+ pos[i + batch.n_tokens * 3] = 0; // last pos dim is unused
79
+ }
80
+ }
81
+ for (int i = 0; i < batch.n_tokens; i++) {
82
+ batch.n_seq_id[i] = 1;
83
+ batch.seq_id [i] = seq_id_0.data();
84
+ batch.logits [i] = false;
85
+ }
86
+ }
87
+
88
+ llama_batch get_view(int offset, int n_tokens) {
89
+ llama_pos * pos_ptr;
90
+ pos_view.clear();
91
+ pos_view.reserve(n_tokens * n_pos_per_embd);
92
+ if (n_pos_per_embd > 1) {
93
+ // mrope
94
+ // for example, with layout of src: 1234...1234...1234...1234...
95
+ // offset 2 will give us dst: 34...34...34...34...
96
+ for (int i = 0; i < n_pos_per_embd; i++) {
97
+ // assume n_tokens is less than or equal to batch.n_tokens
98
+ // batch.n_tokens is number of **total** tokens
99
+ // n_tokens is number of viewed token
100
+ size_t src_idx = i * batch.n_tokens + offset;
101
+ pos_view.insert(pos_view.end(),
102
+ pos.data() + src_idx,
103
+ pos.data() + src_idx + n_tokens);
104
+ }
105
+ pos_ptr = pos_view.data();
106
+ } else {
107
+ // normal
108
+ pos_ptr = pos.data() + offset;
109
+ }
110
+ return {
111
+ /*n_tokens =*/ n_tokens,
112
+ /*tokens =*/ nullptr,
113
+ /*embd =*/ batch.embd + offset * n_mmproj_embd,
114
+ /*pos =*/ pos_ptr,
115
+ /*n_seq_id =*/ batch.n_seq_id + offset,
116
+ /*seq_id =*/ batch.seq_id + offset,
117
+ /*logits =*/ batch.logits + offset,
118
+ };
119
+ }
120
+ };
121
+
122
+ // Helper function for decoding an image whose embeddings have already been calculated
123
+ int32_t mtmd_helper_decode_image_chunk(
124
+ mtmd_context * ctx,
125
+ struct llama_context * lctx,
126
+ const mtmd_input_chunk * chunk,
127
+ float * encoded_embd,
128
+ llama_pos n_past,
129
+ llama_seq_id seq_id,
130
+ int32_t n_batch,
131
+ llama_pos * new_n_past) {
132
+ auto chunk_type = mtmd_input_chunk_get_type(chunk);
133
+ const char * name = chunk_type == MTMD_INPUT_CHUNK_TYPE_IMAGE ? "image" : "audio";
134
+ if (chunk_type == MTMD_INPUT_CHUNK_TYPE_TEXT) {
135
+ LOG_ERR("failed to decode chunk: input chunk not of image/audio type\n");
136
+ return -1;
137
+ }
138
+
139
+ const llama_model * model = llama_get_model(lctx);
140
+ int n_mmproj_embd = llama_model_n_embd(model);
141
+ int n_pos_per_embd = mtmd_decode_use_mrope(ctx) ? 4 : 1;
142
+
143
+ int32_t n_tokens = mtmd_input_chunk_get_n_tokens(chunk);
144
+ int32_t i_batch = 0;
145
+ int32_t n_img_batches = LM_GGML_PAD(n_tokens, n_batch) / n_batch;
146
+ decode_embd_batch batch_embd(encoded_embd, n_tokens, n_pos_per_embd, n_mmproj_embd);
147
+
148
+ if (mtmd_decode_use_mrope(ctx)) {
149
+ const auto image_tokens = mtmd_input_chunk_get_tokens_image(chunk);
150
+ if (chunk_type != MTMD_INPUT_CHUNK_TYPE_IMAGE) {
151
+ LOG_ERR("failed to decode chunk: M-RoPE only accepts image chunk\n");
152
+ return -1;
153
+ }
154
+ if (!image_tokens) {
155
+ LOG_ERR("failed to decode chunk: image tokens are null\n");
156
+ return -1;
157
+ }
158
+ const int nx = mtmd_image_tokens_get_nx(image_tokens);
159
+ const int ny = mtmd_image_tokens_get_ny(image_tokens);
160
+ batch_embd.set_position_mrope(n_past, nx, ny, seq_id);
161
+ } else {
162
+ batch_embd.set_position_normal(n_past, seq_id);
163
+ }
164
+
165
+ if (mtmd_decode_use_non_causal(ctx)) {
166
+ llama_set_causal_attn(lctx, false);
167
+ // TODO @ngxson : need to make sure only one image is processed at a time, and n_ubatch must be enough to hold the image
168
+ }
169
+
170
+ while (i_batch < n_img_batches) { // split into batches
171
+ int pos_offset = i_batch*n_batch;
172
+ int n_tokens_batch = std::min(n_batch, n_tokens - pos_offset);
173
+ llama_batch batch_embd_view = batch_embd.get_view(pos_offset, n_tokens_batch);
174
+
175
+ LOG_INF("decoding %s batch %d/%d, n_tokens_batch = %d\n", name, i_batch+1, n_img_batches, n_tokens_batch);
176
+
177
+ int64_t t1 = lm_ggml_time_ms();
178
+ int32_t ret = llama_decode(lctx, batch_embd_view);
179
+ if (ret != 0) {
180
+ LOG_ERR("failed to decode %s\n", name);
181
+ llama_set_causal_attn(lctx, true); // restore causal attn
182
+ return ret;
183
+ }
184
+
185
+ LOG_INF("%s decoded (batch %d/%d) in %" PRId64 " ms\n", name, i_batch+1, n_img_batches, lm_ggml_time_ms() - t1);
186
+
187
+ i_batch++;
188
+ }
189
+
190
+ n_past += mtmd_input_chunk_get_n_pos(chunk);
191
+ *new_n_past = n_past;
192
+
193
+ if (mtmd_decode_use_non_causal(ctx)) {
194
+ llama_set_causal_attn(lctx, true);
195
+ }
196
+ return 0;
197
+ }
198
+
199
+ int32_t mtmd_helper_eval_chunk_single(mtmd_context * ctx,
200
+ struct llama_context * lctx,
201
+ const mtmd_input_chunk * chunk,
202
+ llama_pos n_past,
203
+ llama_seq_id seq_id,
204
+ int32_t n_batch,
205
+ bool logits_last,
206
+ llama_pos * new_n_past) {
207
+ int32_t ret;
208
+ llama_batch text_batch = llama_batch_init(n_batch, 0, 1);
209
+ auto chunk_type = mtmd_input_chunk_get_type(chunk);
210
+
211
+ if (chunk_type == MTMD_INPUT_CHUNK_TYPE_TEXT) {
212
+ size_t n_tokens;
213
+ const auto tokens = mtmd_input_chunk_get_tokens_text(chunk, &n_tokens);
214
+ // LOG_INF("decoding text chunk, n_tokens = %zu\n", n_tokens);
215
+ size_t i = 0;
216
+ while (i < n_tokens) { // split into batches
217
+ text_batch.n_tokens = 0; // clear the batch
218
+ for (; i < n_tokens && text_batch.n_tokens < n_batch; i++) {
219
+ int32_t j = text_batch.n_tokens;
220
+ text_batch.token [j] = tokens[i];
221
+ text_batch.pos [j] = n_past++;
222
+ text_batch.n_seq_id[j] = 1;
223
+ text_batch.seq_id [j][0] = seq_id;
224
+ text_batch.logits [j] = false;
225
+
226
+ text_batch.n_tokens++;
227
+ }
228
+ bool is_last_token = (i == n_tokens);
229
+ if (logits_last && is_last_token) {
230
+ text_batch.logits[text_batch.n_tokens - 1] = true;
231
+ }
232
+ ret = llama_decode(lctx, text_batch);
233
+ if (ret != 0) {
234
+ LOG_ERR("failed to decode text\n");
235
+ llama_batch_free(text_batch);
236
+ return ret;
237
+ }
238
+ *new_n_past += text_batch.n_tokens;
239
+ }
240
+
241
+ } else if (chunk_type == MTMD_INPUT_CHUNK_TYPE_IMAGE || chunk_type == MTMD_INPUT_CHUNK_TYPE_AUDIO) {
242
+ const char * name = chunk_type == MTMD_INPUT_CHUNK_TYPE_IMAGE ? "image" : "audio";
243
+ int64_t t0 = lm_ggml_time_ms();
244
+
245
+ LOG_INF("encoding %s slice...\n", name);
246
+
247
+ ret = mtmd_encode_chunk(ctx, chunk);
248
+ if (ret != 0) {
249
+ LOG_ERR("failed to encode %s slice\n", name);
250
+ llama_batch_free(text_batch);
251
+ return ret;
252
+ }
253
+
254
+ LOG_INF("%s slice encoded in %" PRId64 " ms\n", name, lm_ggml_time_ms() - t0);
255
+
256
+ float * embd = mtmd_get_output_embd(ctx);
257
+ ret = mtmd_helper_decode_image_chunk(ctx, lctx, chunk, embd, n_past, seq_id, n_batch, new_n_past);
258
+ if (ret != 0) {
259
+ LOG_ERR("failed to decode %s\n", name);
260
+ llama_batch_free(text_batch);
261
+ return ret;
262
+ }
263
+ } else {
264
+ LM_GGML_ABORT("chunk type not supported");
265
+ }
266
+
267
+ return 0;
268
+ }
269
+
270
+ int32_t mtmd_helper_eval_chunks(mtmd_context * ctx,
271
+ struct llama_context * lctx,
272
+ const mtmd_input_chunks * chunks,
273
+ llama_pos n_past,
274
+ llama_seq_id seq_id,
275
+ int32_t n_batch,
276
+ bool logits_last,
277
+ llama_pos * new_n_past) {
278
+ size_t n_chunks = mtmd_input_chunks_size(chunks);
279
+ if (n_chunks == 0) {
280
+ LOG_ERR("no chunks to eval\n");
281
+ return 0;
282
+ }
283
+
284
+ for (size_t i = 0; i < n_chunks; i++) {
285
+ bool chunk_logits_last = (i == n_chunks - 1) && logits_last;
286
+ auto chunk = mtmd_input_chunks_get(chunks, i);
287
+
288
+ int32_t res = mtmd_helper_eval_chunk_single(ctx, lctx, chunk, n_past, seq_id, n_batch, chunk_logits_last, &n_past);
289
+ if (res != 0) {
290
+ LOG_ERR("failed to eval chunk %zu\n", i);
291
+ return res;
292
+ }
293
+ *new_n_past = n_past;
294
+ }
295
+
296
+ return 0;
297
+ }