cui-llama.rn 1.6.0 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (285) hide show
  1. package/README.md +35 -7
  2. package/android/src/main/CMakeLists.txt +22 -11
  3. package/android/src/main/java/com/rnllama/LlamaContext.java +42 -6
  4. package/android/src/main/java/com/rnllama/RNLlama.java +139 -4
  5. package/android/src/main/jni.cpp +173 -18
  6. package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
  7. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
  8. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
  9. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
  10. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
  11. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
  12. package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
  13. package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
  14. package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +24 -4
  15. package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +22 -2
  16. package/cpp/LICENSE +21 -0
  17. package/cpp/chat.cpp +129 -107
  18. package/cpp/chat.h +2 -0
  19. package/cpp/common.cpp +58 -78
  20. package/cpp/common.h +29 -21
  21. package/cpp/ggml-alloc.c +4 -1
  22. package/cpp/ggml-backend.cpp +9 -5
  23. package/cpp/ggml-backend.h +4 -4
  24. package/cpp/ggml-cpp.h +1 -1
  25. package/cpp/ggml-cpu/amx/amx.cpp +221 -0
  26. package/cpp/ggml-cpu/amx/amx.h +8 -0
  27. package/cpp/ggml-cpu/amx/common.h +91 -0
  28. package/cpp/ggml-cpu/amx/mmq.cpp +2511 -0
  29. package/cpp/ggml-cpu/amx/mmq.h +10 -0
  30. package/{ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers → cpp/ggml-cpu}/binary-ops.h +1 -1
  31. package/cpp/ggml-cpu/common.h +72 -0
  32. package/cpp/{ggml-cpu-aarch64.cpp → ggml-cpu/ggml-cpu-aarch64.cpp} +809 -103
  33. package/cpp/{ggml-cpu-quants.c → ggml-cpu/ggml-cpu-quants.c} +306 -6
  34. package/cpp/{ggml-cpu.c → ggml-cpu/ggml-cpu.c} +114 -55
  35. package/cpp/{ggml-cpu.cpp → ggml-cpu/ggml-cpu.cpp} +32 -16
  36. package/cpp/{ops.cpp → ggml-cpu/ops.cpp} +353 -173
  37. package/{ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers → cpp/ggml-cpu}/ops.h +2 -20
  38. package/cpp/{sgemm.cpp → ggml-cpu/sgemm.cpp} +501 -0
  39. package/{ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers → cpp/ggml-cpu}/simd-mappings.h +7 -3
  40. package/{ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers → cpp/ggml-cpu}/unary-ops.h +1 -1
  41. package/cpp/{vec.cpp → ggml-cpu/vec.cpp} +0 -6
  42. package/{ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers → cpp/ggml-cpu}/vec.h +16 -0
  43. package/cpp/ggml-cpu.h +5 -0
  44. package/cpp/ggml-impl.h +16 -9
  45. package/cpp/ggml-llama-sim.metallib +0 -0
  46. package/cpp/ggml-llama.metallib +0 -0
  47. package/cpp/ggml-metal-impl.h +36 -11
  48. package/cpp/ggml-metal.m +810 -176
  49. package/cpp/ggml-opt.cpp +373 -190
  50. package/cpp/ggml-opt.h +49 -28
  51. package/cpp/ggml-quants.c +0 -6
  52. package/cpp/ggml.c +227 -282
  53. package/cpp/ggml.h +82 -101
  54. package/cpp/gguf.cpp +33 -33
  55. package/cpp/json-schema-to-grammar.cpp +3 -0
  56. package/cpp/llama-adapter.cpp +6 -0
  57. package/cpp/llama-arch.cpp +49 -17
  58. package/cpp/llama-arch.h +9 -0
  59. package/cpp/llama-batch.cpp +8 -2
  60. package/cpp/llama-batch.h +2 -1
  61. package/cpp/llama-chat.cpp +39 -16
  62. package/cpp/llama-chat.h +4 -2
  63. package/cpp/llama-context.cpp +440 -611
  64. package/cpp/llama-context.h +44 -33
  65. package/cpp/llama-cparams.h +1 -0
  66. package/cpp/llama-graph.cpp +214 -291
  67. package/cpp/llama-graph.h +69 -21
  68. package/cpp/llama-hparams.cpp +17 -1
  69. package/cpp/llama-hparams.h +39 -5
  70. package/cpp/llama-kv-cache.cpp +2067 -620
  71. package/cpp/llama-kv-cache.h +410 -108
  72. package/cpp/llama-memory.h +12 -1
  73. package/cpp/llama-model-loader.cpp +24 -15
  74. package/cpp/llama-model-saver.cpp +281 -0
  75. package/cpp/llama-model-saver.h +37 -0
  76. package/cpp/llama-model.cpp +1089 -359
  77. package/cpp/llama-model.h +19 -3
  78. package/cpp/llama-sampling.cpp +20 -7
  79. package/cpp/llama-vocab.cpp +54 -9
  80. package/cpp/llama-vocab.h +6 -0
  81. package/cpp/llama.cpp +14 -0
  82. package/cpp/llama.h +86 -142
  83. package/cpp/minja/chat-template.hpp +9 -5
  84. package/cpp/minja/minja.hpp +69 -36
  85. package/cpp/rn-llama.cpp +602 -190
  86. package/cpp/rn-llama.h +34 -8
  87. package/cpp/sampling.cpp +57 -50
  88. package/cpp/tools/mtmd/clip-impl.h +462 -0
  89. package/cpp/tools/mtmd/clip.cpp +4024 -0
  90. package/cpp/tools/mtmd/clip.h +101 -0
  91. package/cpp/tools/mtmd/miniaudio.h +93468 -0
  92. package/cpp/tools/mtmd/mtmd-audio.cpp +855 -0
  93. package/cpp/tools/mtmd/mtmd-audio.h +62 -0
  94. package/cpp/tools/mtmd/mtmd-helper.cpp +297 -0
  95. package/cpp/tools/mtmd/mtmd.cpp +942 -0
  96. package/cpp/tools/mtmd/mtmd.h +362 -0
  97. package/cpp/tools/mtmd/stb_image.h +7988 -0
  98. package/ios/CMakeLists.txt +20 -10
  99. package/ios/RNLlama.h +6 -0
  100. package/ios/RNLlama.mm +82 -3
  101. package/ios/RNLlamaContext.h +5 -1
  102. package/ios/RNLlamaContext.mm +131 -38
  103. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat.h +2 -0
  104. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/common.h +29 -21
  105. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-backend.h +4 -4
  106. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpp.h +1 -1
  107. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu.h +5 -0
  108. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-impl.h +16 -9
  109. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-metal-impl.h +36 -11
  110. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-opt.h +49 -28
  111. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml.h +82 -101
  112. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-arch.h +9 -0
  113. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-batch.h +2 -1
  114. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-chat.h +4 -2
  115. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-context.h +44 -33
  116. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-cparams.h +1 -0
  117. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-graph.h +69 -21
  118. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-hparams.h +39 -5
  119. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache.h +410 -108
  120. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory.h +12 -1
  121. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model-saver.h +37 -0
  122. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model.h +19 -3
  123. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-vocab.h +6 -0
  124. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama.h +86 -142
  125. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/chat-template.hpp +9 -5
  126. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/minja.hpp +69 -36
  127. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-llama.h +34 -8
  128. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Info.plist +0 -0
  129. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/ggml-llama.metallib +0 -0
  130. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/rnllama +0 -0
  131. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +2 -0
  132. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +29 -21
  133. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend.h +4 -4
  134. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpp.h +1 -1
  135. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +5 -0
  136. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +16 -9
  137. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal-impl.h +36 -11
  138. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-opt.h +49 -28
  139. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +82 -101
  140. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +9 -0
  141. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +2 -1
  142. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +4 -2
  143. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +44 -33
  144. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +1 -0
  145. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +69 -21
  146. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +39 -5
  147. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +410 -108
  148. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +12 -1
  149. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model-saver.h +37 -0
  150. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +19 -3
  151. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +6 -0
  152. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +86 -142
  153. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +9 -5
  154. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +69 -36
  155. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +34 -8
  156. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Info.plist +0 -0
  157. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/_CodeSignature/CodeResources +1 -1
  158. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
  159. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
  160. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat.h +2 -0
  161. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/common.h +29 -21
  162. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-backend.h +4 -4
  163. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpp.h +1 -1
  164. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu.h +5 -0
  165. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-impl.h +16 -9
  166. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-metal-impl.h +36 -11
  167. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-opt.h +49 -28
  168. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml.h +82 -101
  169. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-arch.h +9 -0
  170. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-batch.h +2 -1
  171. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-chat.h +4 -2
  172. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-context.h +44 -33
  173. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-cparams.h +1 -0
  174. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-graph.h +69 -21
  175. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-hparams.h +39 -5
  176. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache.h +410 -108
  177. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory.h +12 -1
  178. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model-saver.h +37 -0
  179. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model.h +19 -3
  180. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-vocab.h +6 -0
  181. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama.h +86 -142
  182. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/chat-template.hpp +9 -5
  183. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/minja.hpp +69 -36
  184. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-llama.h +34 -8
  185. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Info.plist +0 -0
  186. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/ggml-llama.metallib +0 -0
  187. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/rnllama +0 -0
  188. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +2 -0
  189. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +29 -21
  190. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend.h +4 -4
  191. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpp.h +1 -1
  192. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +5 -0
  193. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +16 -9
  194. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal-impl.h +36 -11
  195. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-opt.h +49 -28
  196. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +82 -101
  197. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +9 -0
  198. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +2 -1
  199. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +4 -2
  200. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +44 -33
  201. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +1 -0
  202. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +69 -21
  203. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +39 -5
  204. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +410 -108
  205. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +12 -1
  206. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model-saver.h +37 -0
  207. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +19 -3
  208. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +6 -0
  209. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +86 -142
  210. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +9 -5
  211. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +69 -36
  212. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +34 -8
  213. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Info.plist +0 -0
  214. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/_CodeSignature/CodeResources +1 -1
  215. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
  216. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
  217. package/jest/mock.js +33 -7
  218. package/lib/commonjs/NativeRNLlama.js.map +1 -1
  219. package/lib/commonjs/index.js +153 -21
  220. package/lib/commonjs/index.js.map +1 -1
  221. package/lib/module/NativeRNLlama.js.map +1 -1
  222. package/lib/module/index.js +152 -20
  223. package/lib/module/index.js.map +1 -1
  224. package/lib/typescript/NativeRNLlama.d.ts +54 -4
  225. package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
  226. package/lib/typescript/index.d.ts +72 -6
  227. package/lib/typescript/index.d.ts.map +1 -1
  228. package/package.json +1 -1
  229. package/src/NativeRNLlama.ts +72 -4
  230. package/src/index.ts +212 -38
  231. package/cpp/binary-ops.h +0 -16
  232. package/cpp/ops.h +0 -128
  233. package/cpp/simd-mappings.h +0 -888
  234. package/cpp/unary-ops.h +0 -28
  235. package/cpp/vec.h +0 -802
  236. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/binary-ops.h +0 -16
  237. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-aarch64.h +0 -8
  238. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-impl.h +0 -512
  239. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-quants.h +0 -63
  240. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-traits.h +0 -38
  241. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ops.h +0 -128
  242. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/sgemm.h +0 -14
  243. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/simd-mappings.h +0 -888
  244. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/vec.h +0 -802
  245. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-aarch64.h +0 -8
  246. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-impl.h +0 -512
  247. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-quants.h +0 -63
  248. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-traits.h +0 -38
  249. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/sgemm.h +0 -14
  250. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/unary-ops.h +0 -28
  251. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/vec.h +0 -802
  252. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/binary-ops.h +0 -16
  253. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-aarch64.h +0 -8
  254. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-impl.h +0 -512
  255. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-quants.h +0 -63
  256. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-traits.h +0 -38
  257. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ops.h +0 -128
  258. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/sgemm.h +0 -14
  259. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/simd-mappings.h +0 -888
  260. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/unary-ops.h +0 -28
  261. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/binary-ops.h +0 -16
  262. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-aarch64.h +0 -8
  263. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-impl.h +0 -512
  264. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-quants.h +0 -63
  265. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-traits.h +0 -38
  266. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ops.h +0 -128
  267. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/sgemm.h +0 -14
  268. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/simd-mappings.h +0 -888
  269. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/unary-ops.h +0 -28
  270. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/vec.h +0 -802
  271. package/lib/commonjs/chat.js +0 -37
  272. package/lib/commonjs/chat.js.map +0 -1
  273. package/lib/module/chat.js +0 -33
  274. package/lib/module/chat.js.map +0 -1
  275. package/lib/typescript/chat.d.ts +0 -10
  276. package/lib/typescript/chat.d.ts.map +0 -1
  277. package/src/chat.ts +0 -44
  278. /package/cpp/{binary-ops.cpp → ggml-cpu/binary-ops.cpp} +0 -0
  279. /package/cpp/{ggml-cpu-aarch64.h → ggml-cpu/ggml-cpu-aarch64.h} +0 -0
  280. /package/cpp/{ggml-cpu-impl.h → ggml-cpu/ggml-cpu-impl.h} +0 -0
  281. /package/cpp/{ggml-cpu-quants.h → ggml-cpu/ggml-cpu-quants.h} +0 -0
  282. /package/cpp/{ggml-cpu-traits.cpp → ggml-cpu/ggml-cpu-traits.cpp} +0 -0
  283. /package/cpp/{ggml-cpu-traits.h → ggml-cpu/ggml-cpu-traits.h} +0 -0
  284. /package/cpp/{sgemm.h → ggml-cpu/sgemm.h} +0 -0
  285. /package/cpp/{unary-ops.cpp → ggml-cpu/unary-ops.cpp} +0 -0
@@ -68,6 +68,11 @@ export type NativeContextParams = {
68
68
 
69
69
  pooling_type?: number
70
70
 
71
+ /**
72
+ * Enable context shifting to handle prompts larger than context size
73
+ */
74
+ ctx_shift?: boolean
75
+
71
76
  // Embedding params
72
77
  embedding?: boolean
73
78
  embd_normalize?: number
@@ -99,6 +104,12 @@ export type NativeCompletionParams = {
99
104
  }>
100
105
  preserved_tokens?: Array<string>
101
106
  chat_format?: number
107
+ /**
108
+ * Path to an image file to process before generating text.
109
+ * When provided, the image will be processed and added to the context.
110
+ * Requires multimodal support to be enabled via initMultimodal.
111
+ */
112
+ media_paths?: Array<string>
102
113
  /**
103
114
  * Specify a JSON array of stopping strings.
104
115
  * These words will not be included in the completion, so make sure to add them to the prompt for the next iteration. Default: `[]`
@@ -278,6 +289,22 @@ export type NativeCompletionResult = {
278
289
 
279
290
  export type NativeTokenizeResult = {
280
291
  tokens: Array<number>
292
+ /**
293
+ * Whether the tokenization contains images
294
+ */
295
+ has_images: boolean
296
+ /**
297
+ * Bitmap hashes of the images
298
+ */
299
+ bitmap_hashes: Array<number>
300
+ /**
301
+ * Chunk positions of the text and images
302
+ */
303
+ chunk_pos: Array<number>
304
+ /**
305
+ * Chunk positions of the images
306
+ */
307
+ chunk_pos_images: Array<number>
281
308
  }
282
309
 
283
310
  export type NativeEmbeddingResult = {
@@ -331,9 +358,14 @@ export type NativeSessionLoadResult = {
331
358
  prompt: string
332
359
  }
333
360
 
361
+ export type NativeLlamaMessagePart = {
362
+ type: 'text'
363
+ text: string
364
+ }
365
+
334
366
  export type NativeLlamaChatMessage = {
335
367
  role: string
336
- content: string
368
+ content: string | Array<NativeLlamaMessagePart>
337
369
  }
338
370
 
339
371
  export type NativeCPUFeatures = {
@@ -342,8 +374,14 @@ export type NativeCPUFeatures = {
342
374
  dotprod: boolean
343
375
  }
344
376
 
345
- export type JinjaFormattedChatResult = {
377
+ export type FormattedChatResult = {
378
+ type: 'jinja' | 'llama-chat'
346
379
  prompt: string
380
+ has_media: boolean
381
+ media_paths?: Array<string>
382
+ }
383
+
384
+ export type JinjaFormattedChatResult = FormattedChatResult & {
347
385
  chat_format?: number
348
386
  grammar?: string
349
387
  grammar_lazy?: boolean
@@ -356,6 +394,12 @@ export type JinjaFormattedChatResult = {
356
394
  additional_stops?: Array<string>
357
395
  }
358
396
 
397
+ export type NativeImageProcessingResult = {
398
+ success: boolean
399
+ prompt: string
400
+ error?: string
401
+ }
402
+
359
403
  export interface Spec extends TurboModule {
360
404
  toggleNativeLog(enabled: boolean): Promise<void>
361
405
  setContextLimit(limit: number): Promise<void>
@@ -392,8 +436,8 @@ export interface Spec extends TurboModule {
392
436
  params: NativeCompletionParams,
393
437
  ): Promise<NativeCompletionResult>
394
438
  stopCompletion(contextId: number): Promise<void>
395
- tokenizeAsync(contextId: number, text: string): Promise<NativeTokenizeResult>
396
- tokenizeSync(contextId: number, text: string): NativeTokenizeResult
439
+ tokenizeAsync(contextId: number, text: string, imagePaths?: Array<string>): Promise<NativeTokenizeResult>
440
+ tokenizeSync(contextId: number, text: string, imagePaths?: Array<string>): NativeTokenizeResult
397
441
  getCpuFeatures() : Promise<NativeCPUFeatures>
398
442
  detokenize(contextId: number, tokens: number[]): Promise<string>
399
443
  embedding(
@@ -418,6 +462,30 @@ export interface Spec extends TurboModule {
418
462
  contextId: number,
419
463
  ): Promise<Array<{ path: string; scaled?: number }>>
420
464
 
465
+ // Multimodal methods
466
+ initMultimodal(
467
+ contextId: number,
468
+ params: {
469
+ path: string
470
+ use_gpu: boolean
471
+ },
472
+ ): Promise<boolean>
473
+
474
+ isMultimodalEnabled(
475
+ contextId: number,
476
+ ): Promise<boolean>
477
+
478
+ getMultimodalSupport(
479
+ contextId: number,
480
+ ): Promise<{
481
+ vision: boolean
482
+ audio: boolean
483
+ }>
484
+
485
+ releaseMultimodal(
486
+ contextId: number,
487
+ ): Promise<void>
488
+
421
489
  releaseContext(contextId: number): Promise<void>
422
490
 
423
491
  releaseAllContexts(): Promise<void>
package/src/index.ts CHANGED
@@ -15,14 +15,33 @@ import type {
15
15
  NativeCompletionTokenProbItem,
16
16
  NativeCompletionResultTimings,
17
17
  JinjaFormattedChatResult,
18
+ FormattedChatResult,
19
+ NativeImageProcessingResult,
20
+ NativeLlamaChatMessage,
18
21
  } from './NativeRNLlama'
19
22
  import type {
20
23
  SchemaGrammarConverterPropOrder,
21
24
  SchemaGrammarConverterBuiltinRule,
22
25
  } from './grammar'
23
26
  import { SchemaGrammarConverter, convertJsonSchemaToGrammar } from './grammar'
24
- import type { RNLlamaMessagePart, RNLlamaOAICompatibleMessage } from './chat'
25
- import { formatChat } from './chat'
27
+
28
+ export type RNLlamaMessagePart = {
29
+ type: string
30
+ text?: string
31
+ image_url?: {
32
+ url?: string
33
+ }
34
+ input_audio?: {
35
+ format: string
36
+ data?: string
37
+ url?: string
38
+ }
39
+ }
40
+
41
+ export type RNLlamaOAICompatibleMessage = {
42
+ role: string
43
+ content?: string | RNLlamaMessagePart[]
44
+ }
26
45
 
27
46
  export type {
28
47
  NativeContextParams,
@@ -36,15 +55,17 @@ export type {
36
55
  NativeEmbeddingParams,
37
56
  NativeCompletionTokenProbItem,
38
57
  NativeCompletionResultTimings,
39
- RNLlamaMessagePart,
40
- RNLlamaOAICompatibleMessage,
58
+ FormattedChatResult,
41
59
  JinjaFormattedChatResult,
60
+ NativeImageProcessingResult,
42
61
 
43
62
  // Deprecated
44
63
  SchemaGrammarConverterPropOrder,
45
64
  SchemaGrammarConverterBuiltinRule,
46
65
  }
47
66
 
67
+ export const RNLLAMA_MTMD_DEFAULT_MEDIA_MARKER = '<__media__>'
68
+
48
69
  export { SchemaGrammarConverter, convertJsonSchemaToGrammar }
49
70
 
50
71
  const EVENT_ON_INIT_CONTEXT_PROGRESS = '@RNLlama_onInitContextProgress'
@@ -126,6 +147,7 @@ export type CompletionBaseParams = {
126
147
  parallel_tool_calls?: object
127
148
  tool_choice?: string
128
149
  response_format?: CompletionResponseFormat
150
+ media_paths?: string | string[]
129
151
  }
130
152
  export type CompletionParams = Omit<
131
153
  NativeCompletionParams,
@@ -207,23 +229,94 @@ export class LlamaContext {
207
229
  parallel_tool_calls?: object
208
230
  tool_choice?: string
209
231
  },
210
- ): Promise<JinjaFormattedChatResult | string> {
211
- const chat = formatChat(messages)
232
+ ): Promise<FormattedChatResult | JinjaFormattedChatResult> {
233
+ const mediaPaths: string[] = []
234
+ const chat = messages.map((msg) => {
235
+ if (Array.isArray(msg.content)) {
236
+ const content = msg.content.map((part) => {
237
+ // Handle multimodal content
238
+ if (part.type === 'image_url') {
239
+ let path = part.image_url?.url || ''
240
+ if (path?.startsWith('file://')) path = path.slice(7)
241
+ mediaPaths.push(path)
242
+ return {
243
+ type: 'text',
244
+ text: RNLLAMA_MTMD_DEFAULT_MEDIA_MARKER,
245
+ }
246
+ } else if (part.type === 'input_audio') {
247
+ const { input_audio: audio } = part
248
+ if (!audio) throw new Error('input_audio is required')
249
+
250
+ const { format } = audio
251
+ if (format != 'wav' && format != 'mp3') {
252
+ throw new Error(`Unsupported audio format: ${format}`)
253
+ }
254
+ if (audio.url) {
255
+ const path = audio.url.replace(/file:\/\//, '')
256
+ mediaPaths.push(path)
257
+ } else if (audio.data) {
258
+ mediaPaths.push(audio.data)
259
+ }
260
+ return {
261
+ type: 'text',
262
+ text: RNLLAMA_MTMD_DEFAULT_MEDIA_MARKER,
263
+ }
264
+ }
265
+ return part
266
+ })
267
+
268
+ return {
269
+ ...msg,
270
+ content,
271
+ }
272
+ }
273
+ return msg
274
+ }) as NativeLlamaChatMessage[]
275
+
212
276
  const useJinja = this.isJinjaSupported() && params?.jinja
213
- let tmpl = this.isLlamaChatSupported() || useJinja ? undefined : 'chatml'
277
+ let tmpl
214
278
  if (template) tmpl = template // Force replace if provided
215
279
  const jsonSchema = getJsonSchema(params?.response_format)
216
- return RNLlama.getFormattedChat(this.id, JSON.stringify(chat), tmpl, {
217
- jinja: useJinja,
218
- json_schema: jsonSchema ? JSON.stringify(jsonSchema) : undefined,
219
- tools: params?.tools ? JSON.stringify(params.tools) : undefined,
220
- parallel_tool_calls: params?.parallel_tool_calls
221
- ? JSON.stringify(params.parallel_tool_calls)
222
- : undefined,
223
- tool_choice: params?.tool_choice,
224
- })
280
+
281
+ const result = await RNLlama.getFormattedChat(
282
+ this.id,
283
+ JSON.stringify(chat),
284
+ tmpl,
285
+ {
286
+ jinja: useJinja,
287
+ json_schema: jsonSchema ? JSON.stringify(jsonSchema) : undefined,
288
+ tools: params?.tools ? JSON.stringify(params.tools) : undefined,
289
+ parallel_tool_calls: params?.parallel_tool_calls
290
+ ? JSON.stringify(params.parallel_tool_calls)
291
+ : undefined,
292
+ tool_choice: params?.tool_choice,
293
+ },
294
+ )
295
+ if (!useJinja) {
296
+ return {
297
+ type: 'llama-chat',
298
+ prompt: result as string,
299
+ has_media: mediaPaths.length > 0,
300
+ media_paths: mediaPaths,
301
+ }
302
+ }
303
+ const jinjaResult = result as JinjaFormattedChatResult
304
+ jinjaResult.type = 'jinja'
305
+ jinjaResult.has_media = mediaPaths.length > 0
306
+ jinjaResult.media_paths = mediaPaths
307
+ return jinjaResult
225
308
  }
226
309
 
310
+ /**
311
+ * Generate a completion based on the provided parameters
312
+ * @param params Completion parameters including prompt or messages
313
+ * @param callback Optional callback for token-by-token streaming
314
+ * @returns Promise resolving to the completion result
315
+ *
316
+ * Note: For multimodal support, you can include an media_paths parameter.
317
+ * This will process the images and add them to the context before generating text.
318
+ * Multimodal support must be enabled via initMultimodal() first.
319
+ */
227
320
  async completion(
228
321
  params: CompletionParams,
229
322
  callback?: (data: TokenData) => void,
@@ -233,8 +326,8 @@ export class LlamaContext {
233
326
  prompt: params.prompt || '',
234
327
  emit_partial_completion: !!callback,
235
328
  }
329
+
236
330
  if (params.messages) {
237
- // messages always win
238
331
  const formattedResult = await this.getFormattedChat(
239
332
  params.messages,
240
333
  params.chat_template || params.chatTemplate,
@@ -245,29 +338,42 @@ export class LlamaContext {
245
338
  tool_choice: params.tool_choice,
246
339
  },
247
340
  )
248
- if (typeof formattedResult === 'string') {
249
- nativeParams.prompt = formattedResult || ''
250
- } else {
251
- nativeParams.prompt = formattedResult.prompt || ''
252
- if (typeof formattedResult.chat_format === 'number')
253
- nativeParams.chat_format = formattedResult.chat_format
254
- if (formattedResult.grammar)
255
- nativeParams.grammar = formattedResult.grammar
256
- if (typeof formattedResult.grammar_lazy === 'boolean')
257
- nativeParams.grammar_lazy = formattedResult.grammar_lazy
258
- if (formattedResult.grammar_triggers)
259
- nativeParams.grammar_triggers = formattedResult.grammar_triggers
260
- if (formattedResult.preserved_tokens)
261
- nativeParams.preserved_tokens = formattedResult.preserved_tokens
262
- if (formattedResult.additional_stops) {
341
+ if (formattedResult.type === 'jinja') {
342
+ const jinjaResult = formattedResult as JinjaFormattedChatResult
343
+
344
+ nativeParams.prompt = jinjaResult.prompt || ''
345
+ if (typeof jinjaResult.chat_format === 'number')
346
+ nativeParams.chat_format = jinjaResult.chat_format
347
+ if (jinjaResult.grammar) nativeParams.grammar = jinjaResult.grammar
348
+ if (typeof jinjaResult.grammar_lazy === 'boolean')
349
+ nativeParams.grammar_lazy = jinjaResult.grammar_lazy
350
+ if (jinjaResult.grammar_triggers)
351
+ nativeParams.grammar_triggers = jinjaResult.grammar_triggers
352
+ if (jinjaResult.preserved_tokens)
353
+ nativeParams.preserved_tokens = jinjaResult.preserved_tokens
354
+ if (jinjaResult.additional_stops) {
263
355
  if (!nativeParams.stop) nativeParams.stop = []
264
- nativeParams.stop.push(...formattedResult.additional_stops)
356
+ nativeParams.stop.push(...jinjaResult.additional_stops)
357
+ }
358
+ if (jinjaResult.has_media) {
359
+ nativeParams.media_paths = jinjaResult.media_paths
360
+ }
361
+ } else if (formattedResult.type === 'llama-chat') {
362
+ const llamaChatResult = formattedResult as FormattedChatResult
363
+ nativeParams.prompt = llamaChatResult.prompt || ''
364
+ if (llamaChatResult.has_media) {
365
+ nativeParams.media_paths = llamaChatResult.media_paths
265
366
  }
266
367
  }
267
368
  } else {
268
369
  nativeParams.prompt = params.prompt || ''
269
370
  }
270
371
 
372
+ // If media_paths were explicitly provided or extracted from messages, use them
373
+ if (!nativeParams.media_paths && params.media_paths) {
374
+ nativeParams.media_paths = params.media_paths
375
+ }
376
+
271
377
  if (nativeParams.response_format && !nativeParams.grammar) {
272
378
  const jsonSchema = getJsonSchema(params.response_format)
273
379
  if (jsonSchema) nativeParams.json_schema = JSON.stringify(jsonSchema)
@@ -301,12 +407,32 @@ export class LlamaContext {
301
407
  return RNLlama.stopCompletion(this.id)
302
408
  }
303
409
 
304
- tokenizeAsync(text: string): Promise<NativeTokenizeResult> {
305
- return RNLlama.tokenizeAsync(this.id, text)
410
+ /**
411
+ * Tokenize text or text with images
412
+ * @param text Text to tokenize
413
+ * @param params.media_paths Array of image paths to tokenize (if multimodal is enabled)
414
+ * @returns Promise resolving to the tokenize result
415
+ */
416
+ tokenizeAsync(
417
+ text: string,
418
+ {
419
+ media_paths: mediaPaths,
420
+ }: {
421
+ media_paths?: string[]
422
+ } = {},
423
+ ): Promise<NativeTokenizeResult> {
424
+ return RNLlama.tokenizeAsync(this.id, text, mediaPaths)
306
425
  }
307
426
 
308
- tokenizeSync(text: string): NativeTokenizeResult {
309
- return RNLlama.tokenizeSync(this.id, text)
427
+ tokenizeSync(
428
+ text: string,
429
+ {
430
+ media_paths: mediaPaths,
431
+ }: {
432
+ media_paths?: string[]
433
+ } = {},
434
+ ): NativeTokenizeResult {
435
+ return RNLlama.tokenizeSync(this.id, text, mediaPaths)
310
436
  }
311
437
 
312
438
  detokenize(tokens: number[]): Promise<string> {
@@ -362,6 +488,54 @@ export class LlamaContext {
362
488
  return RNLlama.getLoadedLoraAdapters(this.id)
363
489
  }
364
490
 
491
+ /**
492
+ * Initialize multimodal support with a mmproj file
493
+ * @param params Parameters for multimodal support
494
+ * @param params.path Path to the multimodal projector file
495
+ * @param params.use_gpu Whether to use GPU
496
+ * @returns Promise resolving to true if initialization was successful
497
+ */
498
+ async initMultimodal({
499
+ path,
500
+ use_gpu: useGpu,
501
+ }: {
502
+ path: string
503
+ use_gpu?: boolean
504
+ }): Promise<boolean> {
505
+ if (path.startsWith('file://')) path = path.slice(7)
506
+ return RNLlama.initMultimodal(this.id, {
507
+ path,
508
+ use_gpu: useGpu ?? true,
509
+ })
510
+ }
511
+
512
+ /**
513
+ * Check if multimodal support is enabled
514
+ * @returns Promise resolving to true if multimodal is enabled
515
+ */
516
+ async isMultimodalEnabled(): Promise<boolean> {
517
+ return await RNLlama.isMultimodalEnabled(this.id)
518
+ }
519
+
520
+ /**
521
+ * Check multimodal support
522
+ * @returns Promise resolving to an object with vision and audio support
523
+ */
524
+ async getMultimodalSupport(): Promise<{
525
+ vision: boolean
526
+ audio: boolean
527
+ }> {
528
+ return await RNLlama.getMultimodalSupport(this.id)
529
+ }
530
+
531
+ /**
532
+ * Release multimodal support
533
+ * @returns Promise resolving to void
534
+ */
535
+ async releaseMultimodal(): Promise<void> {
536
+ return await RNLlama.releaseMultimodal(this.id)
537
+ }
538
+
365
539
  async release(): Promise<void> {
366
540
  return RNLlama.releaseContext(this.id)
367
541
  }
@@ -407,7 +581,7 @@ const modelInfoSkip = [
407
581
  'tokenizer.ggml.tokens',
408
582
  'tokenizer.ggml.token_type',
409
583
  'tokenizer.ggml.merges',
410
- 'tokenizer.ggml.scores'
584
+ 'tokenizer.ggml.scores',
411
585
  ]
412
586
  export async function loadLlamaModelInfo(model: string): Promise<Object> {
413
587
  let path = model
package/cpp/binary-ops.h DELETED
@@ -1,16 +0,0 @@
1
- #pragma once
2
-
3
- #include "cpu-common.h"
4
-
5
- #ifdef __cplusplus
6
- extern "C" {
7
- #endif
8
-
9
- void lm_ggml_compute_forward_add_non_quantized(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
10
- void lm_ggml_compute_forward_sub(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
11
- void lm_ggml_compute_forward_mul(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
12
- void lm_ggml_compute_forward_div(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
13
-
14
- #ifdef __cplusplus
15
- }
16
- #endif
package/cpp/ops.h DELETED
@@ -1,128 +0,0 @@
1
- #pragma once
2
-
3
- #include "ggml.h"
4
-
5
- //
6
- // cache line
7
- //
8
-
9
- #if defined(__cpp_lib_hardware_interference_size)
10
- #define CACHE_LINE_SIZE std::hardware_destructive_interference_size
11
- #else
12
- #if defined(__POWER9_VECTOR__)
13
- #define CACHE_LINE_SIZE 128
14
- #elif defined(__VXE__) || defined(__VXE2__)
15
- #define CACHE_LINE_SIZE 256
16
- #else
17
- #define CACHE_LINE_SIZE 64
18
- #endif
19
- #endif
20
-
21
- static const size_t CACHE_LINE_SIZE_F32 = CACHE_LINE_SIZE/sizeof(float);
22
-
23
- #ifdef __cplusplus
24
- extern "C" {
25
- #endif
26
-
27
- void lm_ggml_compute_forward_dup(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
28
- void lm_ggml_compute_forward_add(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
29
- void lm_ggml_compute_forward_add1(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
30
- void lm_ggml_compute_forward_acc(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
31
- void lm_ggml_compute_forward_sum(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
32
- void lm_ggml_compute_forward_sum_rows(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
33
- void lm_ggml_compute_forward_mean(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
34
- void lm_ggml_compute_forward_argmax(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
35
- void lm_ggml_compute_forward_count_equal(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
36
- void lm_ggml_compute_forward_repeat(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
37
- void lm_ggml_compute_forward_repeat_back(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
38
- void lm_ggml_compute_forward_concat(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
39
- void lm_ggml_compute_forward_silu_back(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
40
- void lm_ggml_compute_forward_norm(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
41
- void lm_ggml_compute_forward_rms_norm(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
42
- void lm_ggml_compute_forward_rms_norm_back(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
43
- void lm_ggml_compute_forward_group_norm(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
44
- void lm_ggml_compute_forward_l2_norm(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
45
- void lm_ggml_compute_forward_out_prod(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
46
- void lm_ggml_compute_forward_scale(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
47
- void lm_ggml_compute_forward_set(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
48
- void lm_ggml_compute_forward_cpy(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
49
- void lm_ggml_compute_forward_cont(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
50
- void lm_ggml_compute_forward_reshape(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
51
- void lm_ggml_compute_forward_view(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
52
- void lm_ggml_compute_forward_permute(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
53
- void lm_ggml_compute_forward_transpose(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
54
- void lm_ggml_compute_forward_get_rows(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
55
- void lm_ggml_compute_forward_get_rows_back(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
56
- void lm_ggml_compute_forward_diag(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
57
- void lm_ggml_compute_forward_diag_mask_inf(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
58
- void lm_ggml_compute_forward_diag_mask_zero(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
59
- void lm_ggml_compute_forward_soft_max(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
60
- void lm_ggml_compute_forward_soft_max_ext_back(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
61
- void lm_ggml_compute_forward_rope(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
62
- void lm_ggml_compute_forward_rope_back(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
63
- void lm_ggml_compute_forward_clamp(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
64
- void lm_ggml_compute_forward_conv_transpose_1d(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
65
- void lm_ggml_compute_forward_im2col(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
66
- void lm_ggml_compute_forward_im2col_back_f32(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
67
- void lm_ggml_compute_forward_conv_transpose_2d(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
68
- void lm_ggml_compute_forward_pool_1d(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
69
- void lm_ggml_compute_forward_pool_2d(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
70
- void lm_ggml_compute_forward_pool_2d_back(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
71
- void lm_ggml_compute_forward_upscale(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
72
- void lm_ggml_compute_forward_pad(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
73
- void lm_ggml_compute_forward_pad_reflect_1d(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
74
- void lm_ggml_compute_forward_arange(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
75
- void lm_ggml_compute_forward_timestep_embedding(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
76
- void lm_ggml_compute_forward_argsort(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
77
- void lm_ggml_compute_forward_leaky_relu(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
78
- void lm_ggml_compute_forward_flash_attn_ext(
79
- const struct lm_ggml_compute_params * params,
80
- const struct lm_ggml_tensor * q,
81
- const struct lm_ggml_tensor * k,
82
- const struct lm_ggml_tensor * v,
83
- const struct lm_ggml_tensor * mask,
84
- struct lm_ggml_tensor * dst);
85
- void lm_ggml_compute_forward_flash_attn_back(
86
- const struct lm_ggml_compute_params * params,
87
- const bool masked,
88
- struct lm_ggml_tensor * dst);
89
- void lm_ggml_compute_forward_ssm_conv(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
90
- void lm_ggml_compute_forward_ssm_scan(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
91
- void lm_ggml_compute_forward_win_part(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
92
- void lm_ggml_compute_forward_win_unpart(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
93
- void lm_ggml_compute_forward_unary(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
94
- void lm_ggml_compute_forward_get_rel_pos(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
95
- void lm_ggml_compute_forward_add_rel_pos(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
96
- void lm_ggml_compute_forward_rwkv_wkv6(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
97
- void lm_ggml_compute_forward_rwkv_wkv7(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
98
- void lm_ggml_compute_forward_gla(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
99
- void lm_ggml_compute_forward_map_unary(
100
- const struct lm_ggml_compute_params * params,
101
- struct lm_ggml_tensor * dst,
102
- const lm_ggml_unary_op_f32_t fun);
103
- void lm_ggml_compute_forward_map_binary(
104
- const struct lm_ggml_compute_params * params,
105
- struct lm_ggml_tensor * dst,
106
- const lm_ggml_binary_op_f32_t fun);
107
- void lm_ggml_compute_forward_map_custom1_f32(
108
- const struct lm_ggml_compute_params * params,
109
- struct lm_ggml_tensor * dst,
110
- const lm_ggml_custom1_op_f32_t fun);
111
- void lm_ggml_compute_forward_map_custom2_f32(
112
- const struct lm_ggml_compute_params * params,
113
- struct lm_ggml_tensor * dst,
114
- const lm_ggml_custom2_op_f32_t fun);
115
- void lm_ggml_compute_forward_map_custom3_f32(
116
- const struct lm_ggml_compute_params * params,
117
- struct lm_ggml_tensor * dst,
118
- const lm_ggml_custom3_op_f32_t fun);
119
- void lm_ggml_compute_forward_map_custom1(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
120
- void lm_ggml_compute_forward_map_custom2(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
121
- void lm_ggml_compute_forward_map_custom3(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
122
- void lm_ggml_compute_forward_cross_entropy_loss(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
123
- void lm_ggml_compute_forward_cross_entropy_loss_back(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
124
- void lm_ggml_compute_forward_opt_step_adamw(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
125
-
126
- #ifdef __cplusplus
127
- }
128
- #endif