cui-llama.rn 1.6.0 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (285) hide show
  1. package/README.md +35 -7
  2. package/android/src/main/CMakeLists.txt +22 -11
  3. package/android/src/main/java/com/rnllama/LlamaContext.java +42 -6
  4. package/android/src/main/java/com/rnllama/RNLlama.java +139 -4
  5. package/android/src/main/jni.cpp +173 -18
  6. package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
  7. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
  8. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
  9. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
  10. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
  11. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
  12. package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
  13. package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
  14. package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +24 -4
  15. package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +22 -2
  16. package/cpp/LICENSE +21 -0
  17. package/cpp/chat.cpp +129 -107
  18. package/cpp/chat.h +2 -0
  19. package/cpp/common.cpp +58 -78
  20. package/cpp/common.h +29 -21
  21. package/cpp/ggml-alloc.c +4 -1
  22. package/cpp/ggml-backend.cpp +9 -5
  23. package/cpp/ggml-backend.h +4 -4
  24. package/cpp/ggml-cpp.h +1 -1
  25. package/cpp/ggml-cpu/amx/amx.cpp +221 -0
  26. package/cpp/ggml-cpu/amx/amx.h +8 -0
  27. package/cpp/ggml-cpu/amx/common.h +91 -0
  28. package/cpp/ggml-cpu/amx/mmq.cpp +2511 -0
  29. package/cpp/ggml-cpu/amx/mmq.h +10 -0
  30. package/{ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers → cpp/ggml-cpu}/binary-ops.h +1 -1
  31. package/cpp/ggml-cpu/common.h +72 -0
  32. package/cpp/{ggml-cpu-aarch64.cpp → ggml-cpu/ggml-cpu-aarch64.cpp} +809 -103
  33. package/cpp/{ggml-cpu-quants.c → ggml-cpu/ggml-cpu-quants.c} +306 -6
  34. package/cpp/{ggml-cpu.c → ggml-cpu/ggml-cpu.c} +114 -55
  35. package/cpp/{ggml-cpu.cpp → ggml-cpu/ggml-cpu.cpp} +32 -16
  36. package/cpp/{ops.cpp → ggml-cpu/ops.cpp} +353 -173
  37. package/{ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers → cpp/ggml-cpu}/ops.h +2 -20
  38. package/cpp/{sgemm.cpp → ggml-cpu/sgemm.cpp} +501 -0
  39. package/{ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers → cpp/ggml-cpu}/simd-mappings.h +7 -3
  40. package/{ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers → cpp/ggml-cpu}/unary-ops.h +1 -1
  41. package/cpp/{vec.cpp → ggml-cpu/vec.cpp} +0 -6
  42. package/{ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers → cpp/ggml-cpu}/vec.h +16 -0
  43. package/cpp/ggml-cpu.h +5 -0
  44. package/cpp/ggml-impl.h +16 -9
  45. package/cpp/ggml-llama-sim.metallib +0 -0
  46. package/cpp/ggml-llama.metallib +0 -0
  47. package/cpp/ggml-metal-impl.h +36 -11
  48. package/cpp/ggml-metal.m +810 -176
  49. package/cpp/ggml-opt.cpp +373 -190
  50. package/cpp/ggml-opt.h +49 -28
  51. package/cpp/ggml-quants.c +0 -6
  52. package/cpp/ggml.c +227 -282
  53. package/cpp/ggml.h +82 -101
  54. package/cpp/gguf.cpp +33 -33
  55. package/cpp/json-schema-to-grammar.cpp +3 -0
  56. package/cpp/llama-adapter.cpp +6 -0
  57. package/cpp/llama-arch.cpp +49 -17
  58. package/cpp/llama-arch.h +9 -0
  59. package/cpp/llama-batch.cpp +8 -2
  60. package/cpp/llama-batch.h +2 -1
  61. package/cpp/llama-chat.cpp +39 -16
  62. package/cpp/llama-chat.h +4 -2
  63. package/cpp/llama-context.cpp +440 -611
  64. package/cpp/llama-context.h +44 -33
  65. package/cpp/llama-cparams.h +1 -0
  66. package/cpp/llama-graph.cpp +214 -291
  67. package/cpp/llama-graph.h +69 -21
  68. package/cpp/llama-hparams.cpp +17 -1
  69. package/cpp/llama-hparams.h +39 -5
  70. package/cpp/llama-kv-cache.cpp +2067 -620
  71. package/cpp/llama-kv-cache.h +410 -108
  72. package/cpp/llama-memory.h +12 -1
  73. package/cpp/llama-model-loader.cpp +24 -15
  74. package/cpp/llama-model-saver.cpp +281 -0
  75. package/cpp/llama-model-saver.h +37 -0
  76. package/cpp/llama-model.cpp +1089 -359
  77. package/cpp/llama-model.h +19 -3
  78. package/cpp/llama-sampling.cpp +20 -7
  79. package/cpp/llama-vocab.cpp +54 -9
  80. package/cpp/llama-vocab.h +6 -0
  81. package/cpp/llama.cpp +14 -0
  82. package/cpp/llama.h +86 -142
  83. package/cpp/minja/chat-template.hpp +9 -5
  84. package/cpp/minja/minja.hpp +69 -36
  85. package/cpp/rn-llama.cpp +602 -190
  86. package/cpp/rn-llama.h +34 -8
  87. package/cpp/sampling.cpp +57 -50
  88. package/cpp/tools/mtmd/clip-impl.h +462 -0
  89. package/cpp/tools/mtmd/clip.cpp +4024 -0
  90. package/cpp/tools/mtmd/clip.h +101 -0
  91. package/cpp/tools/mtmd/miniaudio.h +93468 -0
  92. package/cpp/tools/mtmd/mtmd-audio.cpp +855 -0
  93. package/cpp/tools/mtmd/mtmd-audio.h +62 -0
  94. package/cpp/tools/mtmd/mtmd-helper.cpp +297 -0
  95. package/cpp/tools/mtmd/mtmd.cpp +942 -0
  96. package/cpp/tools/mtmd/mtmd.h +362 -0
  97. package/cpp/tools/mtmd/stb_image.h +7988 -0
  98. package/ios/CMakeLists.txt +20 -10
  99. package/ios/RNLlama.h +6 -0
  100. package/ios/RNLlama.mm +82 -3
  101. package/ios/RNLlamaContext.h +5 -1
  102. package/ios/RNLlamaContext.mm +131 -38
  103. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat.h +2 -0
  104. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/common.h +29 -21
  105. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-backend.h +4 -4
  106. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpp.h +1 -1
  107. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu.h +5 -0
  108. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-impl.h +16 -9
  109. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-metal-impl.h +36 -11
  110. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-opt.h +49 -28
  111. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml.h +82 -101
  112. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-arch.h +9 -0
  113. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-batch.h +2 -1
  114. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-chat.h +4 -2
  115. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-context.h +44 -33
  116. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-cparams.h +1 -0
  117. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-graph.h +69 -21
  118. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-hparams.h +39 -5
  119. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache.h +410 -108
  120. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory.h +12 -1
  121. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model-saver.h +37 -0
  122. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model.h +19 -3
  123. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-vocab.h +6 -0
  124. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama.h +86 -142
  125. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/chat-template.hpp +9 -5
  126. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/minja.hpp +69 -36
  127. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-llama.h +34 -8
  128. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Info.plist +0 -0
  129. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/ggml-llama.metallib +0 -0
  130. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/rnllama +0 -0
  131. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +2 -0
  132. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +29 -21
  133. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend.h +4 -4
  134. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpp.h +1 -1
  135. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +5 -0
  136. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +16 -9
  137. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal-impl.h +36 -11
  138. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-opt.h +49 -28
  139. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +82 -101
  140. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +9 -0
  141. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +2 -1
  142. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +4 -2
  143. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +44 -33
  144. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +1 -0
  145. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +69 -21
  146. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +39 -5
  147. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +410 -108
  148. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +12 -1
  149. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model-saver.h +37 -0
  150. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +19 -3
  151. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +6 -0
  152. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +86 -142
  153. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +9 -5
  154. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +69 -36
  155. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +34 -8
  156. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Info.plist +0 -0
  157. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/_CodeSignature/CodeResources +1 -1
  158. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
  159. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
  160. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat.h +2 -0
  161. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/common.h +29 -21
  162. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-backend.h +4 -4
  163. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpp.h +1 -1
  164. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu.h +5 -0
  165. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-impl.h +16 -9
  166. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-metal-impl.h +36 -11
  167. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-opt.h +49 -28
  168. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml.h +82 -101
  169. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-arch.h +9 -0
  170. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-batch.h +2 -1
  171. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-chat.h +4 -2
  172. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-context.h +44 -33
  173. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-cparams.h +1 -0
  174. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-graph.h +69 -21
  175. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-hparams.h +39 -5
  176. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache.h +410 -108
  177. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory.h +12 -1
  178. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model-saver.h +37 -0
  179. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model.h +19 -3
  180. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-vocab.h +6 -0
  181. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama.h +86 -142
  182. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/chat-template.hpp +9 -5
  183. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/minja.hpp +69 -36
  184. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-llama.h +34 -8
  185. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Info.plist +0 -0
  186. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/ggml-llama.metallib +0 -0
  187. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/rnllama +0 -0
  188. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +2 -0
  189. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +29 -21
  190. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend.h +4 -4
  191. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpp.h +1 -1
  192. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +5 -0
  193. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +16 -9
  194. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal-impl.h +36 -11
  195. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-opt.h +49 -28
  196. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +82 -101
  197. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +9 -0
  198. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +2 -1
  199. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +4 -2
  200. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +44 -33
  201. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +1 -0
  202. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +69 -21
  203. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +39 -5
  204. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +410 -108
  205. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +12 -1
  206. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model-saver.h +37 -0
  207. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +19 -3
  208. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +6 -0
  209. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +86 -142
  210. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +9 -5
  211. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +69 -36
  212. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +34 -8
  213. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Info.plist +0 -0
  214. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/_CodeSignature/CodeResources +1 -1
  215. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
  216. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
  217. package/jest/mock.js +33 -7
  218. package/lib/commonjs/NativeRNLlama.js.map +1 -1
  219. package/lib/commonjs/index.js +153 -21
  220. package/lib/commonjs/index.js.map +1 -1
  221. package/lib/module/NativeRNLlama.js.map +1 -1
  222. package/lib/module/index.js +152 -20
  223. package/lib/module/index.js.map +1 -1
  224. package/lib/typescript/NativeRNLlama.d.ts +54 -4
  225. package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
  226. package/lib/typescript/index.d.ts +72 -6
  227. package/lib/typescript/index.d.ts.map +1 -1
  228. package/package.json +1 -1
  229. package/src/NativeRNLlama.ts +72 -4
  230. package/src/index.ts +212 -38
  231. package/cpp/binary-ops.h +0 -16
  232. package/cpp/ops.h +0 -128
  233. package/cpp/simd-mappings.h +0 -888
  234. package/cpp/unary-ops.h +0 -28
  235. package/cpp/vec.h +0 -802
  236. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/binary-ops.h +0 -16
  237. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-aarch64.h +0 -8
  238. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-impl.h +0 -512
  239. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-quants.h +0 -63
  240. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-traits.h +0 -38
  241. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ops.h +0 -128
  242. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/sgemm.h +0 -14
  243. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/simd-mappings.h +0 -888
  244. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/vec.h +0 -802
  245. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-aarch64.h +0 -8
  246. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-impl.h +0 -512
  247. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-quants.h +0 -63
  248. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-traits.h +0 -38
  249. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/sgemm.h +0 -14
  250. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/unary-ops.h +0 -28
  251. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/vec.h +0 -802
  252. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/binary-ops.h +0 -16
  253. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-aarch64.h +0 -8
  254. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-impl.h +0 -512
  255. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-quants.h +0 -63
  256. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-traits.h +0 -38
  257. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ops.h +0 -128
  258. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/sgemm.h +0 -14
  259. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/simd-mappings.h +0 -888
  260. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/unary-ops.h +0 -28
  261. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/binary-ops.h +0 -16
  262. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-aarch64.h +0 -8
  263. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-impl.h +0 -512
  264. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-quants.h +0 -63
  265. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-traits.h +0 -38
  266. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ops.h +0 -128
  267. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/sgemm.h +0 -14
  268. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/simd-mappings.h +0 -888
  269. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/unary-ops.h +0 -28
  270. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/vec.h +0 -802
  271. package/lib/commonjs/chat.js +0 -37
  272. package/lib/commonjs/chat.js.map +0 -1
  273. package/lib/module/chat.js +0 -33
  274. package/lib/module/chat.js.map +0 -1
  275. package/lib/typescript/chat.d.ts +0 -10
  276. package/lib/typescript/chat.d.ts.map +0 -1
  277. package/src/chat.ts +0 -44
  278. /package/cpp/{binary-ops.cpp → ggml-cpu/binary-ops.cpp} +0 -0
  279. /package/cpp/{ggml-cpu-aarch64.h → ggml-cpu/ggml-cpu-aarch64.h} +0 -0
  280. /package/cpp/{ggml-cpu-impl.h → ggml-cpu/ggml-cpu-impl.h} +0 -0
  281. /package/cpp/{ggml-cpu-quants.h → ggml-cpu/ggml-cpu-quants.h} +0 -0
  282. /package/cpp/{ggml-cpu-traits.cpp → ggml-cpu/ggml-cpu-traits.cpp} +0 -0
  283. /package/cpp/{ggml-cpu-traits.h → ggml-cpu/ggml-cpu-traits.h} +0 -0
  284. /package/cpp/{sgemm.h → ggml-cpu/sgemm.h} +0 -0
  285. /package/cpp/{unary-ops.cpp → ggml-cpu/unary-ops.cpp} +0 -0
package/cpp/llama-model.h CHANGED
@@ -36,14 +36,17 @@ enum llm_type {
36
36
  LLM_TYPE_335M,
37
37
  LLM_TYPE_410M,
38
38
  LLM_TYPE_450M,
39
+ LLM_TYPE_475M,
39
40
  LLM_TYPE_770M,
40
41
  LLM_TYPE_780M,
41
42
  LLM_TYPE_0_5B,
43
+ LLM_TYPE_0_6B,
42
44
  LLM_TYPE_1B,
43
45
  LLM_TYPE_1_3B,
44
46
  LLM_TYPE_1_4B,
45
47
  LLM_TYPE_1_5B,
46
48
  LLM_TYPE_1_6B,
49
+ LLM_TYPE_1_7B,
47
50
  LLM_TYPE_1_8B,
48
51
  LLM_TYPE_2B,
49
52
  LLM_TYPE_2_8B,
@@ -62,6 +65,7 @@ enum llm_type {
62
65
  LLM_TYPE_15B,
63
66
  LLM_TYPE_16B,
64
67
  LLM_TYPE_20B,
68
+ LLM_TYPE_27B,
65
69
  LLM_TYPE_30B,
66
70
  LLM_TYPE_32B,
67
71
  LLM_TYPE_34B,
@@ -70,7 +74,9 @@ enum llm_type {
70
74
  LLM_TYPE_65B,
71
75
  LLM_TYPE_70B,
72
76
  LLM_TYPE_236B,
77
+ LLM_TYPE_290B,
73
78
  LLM_TYPE_314B,
79
+ LLM_TYPE_405B,
74
80
  LLM_TYPE_671B,
75
81
  LLM_TYPE_SMALL,
76
82
  LLM_TYPE_MEDIUM,
@@ -84,12 +90,14 @@ enum llm_type {
84
90
  LLM_TYPE_16x3_8B,
85
91
  LLM_TYPE_10B_128x3_66B,
86
92
  LLM_TYPE_57B_A14B,
87
- LLM_TYPE_27B,
88
- LLM_TYPE_290B,
89
93
  LLM_TYPE_17B_16E, // llama4 Scout
90
94
  LLM_TYPE_17B_128E, // llama4 Maverick
95
+ LLM_TYPE_30B_A3B,
96
+ LLM_TYPE_235B_A22B,
91
97
  };
92
98
 
99
+ std::string llama_rope_scaling_type_name(llama_rope_scaling_type rope_scaling_type);
100
+
93
101
  struct llama_layer_posnet {
94
102
  // resnet
95
103
  struct lm_ggml_tensor * norm1 = nullptr;
@@ -171,6 +179,8 @@ struct llama_layer {
171
179
  struct lm_ggml_tensor * wq_b = nullptr;
172
180
  struct lm_ggml_tensor * wkv_a_mqa = nullptr;
173
181
  struct lm_ggml_tensor * wkv_b = nullptr;
182
+ struct lm_ggml_tensor * wk_b = nullptr;
183
+ struct lm_ggml_tensor * wv_b = nullptr;
174
184
  struct lm_ggml_tensor * wq_cross = nullptr;
175
185
  struct lm_ggml_tensor * wk_cross = nullptr;
176
186
  struct lm_ggml_tensor * wv_cross = nullptr;
@@ -388,8 +398,14 @@ struct llama_model {
388
398
 
389
399
  const struct lm_ggml_tensor * get_tensor(const char * name) const;
390
400
 
401
+ float get_rope_freq_base (const llama_cparams & cparams, int il) const;
402
+ float get_rope_freq_scale(const llama_cparams & cparams, int il) const;
403
+
404
+ lm_ggml_tensor * get_rope_factors(const llama_cparams & cparams, int il) const;
405
+
406
+ // note: can mutate `cparams`
391
407
  // TODO: move this to new llm_arch_model_i interface
392
- llama_memory_i * create_memory() const; // TODO: params
408
+ llama_memory_i * create_memory(const llama_memory_params & params, llama_cparams & cparams) const;
393
409
 
394
410
  // TODO: move this to new llm_arch_model_i interface
395
411
  llm_graph_result_ptr build_graph(
@@ -232,7 +232,7 @@ static void llama_sampler_top_k_impl(llama_token_data_array * cur_p, int32_t k)
232
232
  // }
233
233
 
234
234
  if (k <= 0) {
235
- k = cur_p->size;
235
+ return;
236
236
  }
237
237
 
238
238
  k = std::min(k, (int) cur_p->size);
@@ -298,6 +298,7 @@ static void llama_sampler_top_k_impl(llama_token_data_array * cur_p, int32_t k)
298
298
  }
299
299
  cur_p->sorted = true;
300
300
  }
301
+
301
302
  cur_p->size = k;
302
303
  }
303
304
 
@@ -1750,23 +1751,35 @@ static const char * llama_sampler_top_n_sigma_name(const struct llama_sampler *
1750
1751
  static void llama_sampler_top_n_sigma_apply(struct llama_sampler * smpl, llama_token_data_array * cur_p) {
1751
1752
  const auto * ctx = (llama_sampler_top_n_sigma *) smpl->ctx;
1752
1753
 
1754
+ if (ctx->n <= 0.0f || cur_p->size <= 1) {
1755
+ return;
1756
+ }
1757
+
1753
1758
  // find max logit and calculate mean
1754
1759
  float max = cur_p->data[0].logit;
1755
1760
  float logits_sum = 0;
1761
+ size_t valid_count = 0;
1756
1762
  for (size_t i = 0; i < cur_p->size; ++i) {
1757
- if (cur_p->data[i].logit > max) {
1758
- max = cur_p->data[i].logit;
1763
+ // Only count non-negative infinity values
1764
+ if (cur_p->data[i].logit != -INFINITY) {
1765
+ if (cur_p->data[i].logit > max) {
1766
+ max = cur_p->data[i].logit;
1767
+ }
1768
+ logits_sum += cur_p->data[i].logit;
1769
+ valid_count++;
1759
1770
  }
1760
- logits_sum += cur_p->data[i].logit;
1761
1771
  }
1762
- float mean = logits_sum/cur_p->size;
1772
+ float mean = valid_count > 0 ? logits_sum/valid_count : 0;
1763
1773
 
1764
1774
  // calculate standard deviation
1765
1775
  float acc = 0;
1766
1776
  for (size_t i = 0; i < cur_p->size; ++i) {
1767
- acc += pow(cur_p->data[i].logit - mean, 2);
1777
+ // Skip -infinity in std calculation
1778
+ if (cur_p->data[i].logit != -INFINITY) {
1779
+ acc += pow(cur_p->data[i].logit - mean, 2);
1780
+ }
1768
1781
  }
1769
- float std = sqrt(acc/cur_p->size);
1782
+ float std = valid_count > 0 ? sqrt(acc/valid_count) : 0;
1770
1783
 
1771
1784
  //apply mask
1772
1785
  for (size_t i = 0; i < cur_p->size; ++i) {
@@ -1,5 +1,7 @@
1
1
  #include "llama-vocab.h"
2
2
 
3
+ #include "ggml.h"
4
+ #include "gguf.h"
3
5
  #include "llama-impl.h"
4
6
  #include "llama-model-loader.h"
5
7
 
@@ -415,6 +417,13 @@ struct llm_tokenizer_bpe : llm_tokenizer {
415
417
  "'(?:[sSdDmMtT]|[lL][lL]|[vV][eE]|[rR][eE])|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]|\\s+(?!\\S)|\\s+",
416
418
  };
417
419
  break;
420
+ case LLAMA_VOCAB_PRE_TYPE_SEED_CODER:
421
+ regex_exprs = {
422
+ // original regex from tokenizer.json
423
+ // "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1}| ?[^\\s\\p{L}\\p{N}\r\n]+|\\s*[\r\n]+|\\s+(?!\\S)|\\s+"
424
+ "(?:'[sS]|'[tT]|'[rR][eE]|'[vV][eE]|'[mM]|'[lL][lL]|'[dD])|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1}| ?[^\\s\\p{L}\\p{N}\\r\\n]+|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
425
+ };
426
+ break;
418
427
  default:
419
428
  // default regex for BPE tokenization pre-processing
420
429
  regex_exprs = {
@@ -826,7 +835,7 @@ struct llm_tokenizer_ugm_session {
826
835
  }
827
836
 
828
837
  // initialize score_sum to -FLT_MAX so it will be always lower than sums of token scores
829
- std::vector<struct best_tokenization> tokenization_results(input_len + 1, {vocab.token_unk(), 0, -FLT_MAX});
838
+ std::vector<struct best_tokenization> tokenization_results(input_len + 1, {vocab.token_unk(), 0, -DBL_MAX});
830
839
  // at the beginning tokenization score is zero
831
840
  tokenization_results[0] = { vocab.token_unk(), 0, 0 };
832
841
 
@@ -858,7 +867,7 @@ struct llm_tokenizer_ugm_session {
858
867
  const double challenger_score = current_best.score_sum + token_score;
859
868
  struct best_tokenization & current_champ = tokenization_results[prefix_offset];
860
869
  if (challenger_score > current_champ.score_sum) {
861
- struct best_tokenization challenger = { token_id, input_offset, (float) challenger_score };
870
+ struct best_tokenization challenger = { token_id, input_offset, challenger_score };
862
871
  current_champ = challenger;
863
872
  }
864
873
  }
@@ -872,7 +881,7 @@ struct llm_tokenizer_ugm_session {
872
881
  prefix_offset = input_offset + n_utf8_code_units;
873
882
  struct best_tokenization & current_champ = tokenization_results[prefix_offset];
874
883
  if (challenger_score > current_champ.score_sum) {
875
- struct best_tokenization challenger = { vocab.token_unk(), input_offset, (float) challenger_score };
884
+ struct best_tokenization challenger = { vocab.token_unk(), input_offset, challenger_score };
876
885
  current_champ = challenger;
877
886
  }
878
887
  }
@@ -998,7 +1007,7 @@ private:
998
1007
  struct best_tokenization {
999
1008
  llama_token token_id;
1000
1009
  size_t input_offset;
1001
- float score_sum;
1010
+ double score_sum;
1002
1011
  };
1003
1012
 
1004
1013
  struct normalization_result normalize_prefix(const std::string & input, size_t input_offset) {
@@ -1227,6 +1236,9 @@ struct fragment_buffer_variant {
1227
1236
  struct llama_vocab::impl {
1228
1237
  uint32_t n_token_types = 0; // for BERT-style token types
1229
1238
 
1239
+ std::string tokenizer_model;
1240
+ std::string tokenizer_pre;
1241
+
1230
1242
  enum llama_vocab_type type = LLAMA_VOCAB_TYPE_SPM;
1231
1243
  enum llama_vocab_pre_type pre_type = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
1232
1244
 
@@ -1362,9 +1374,6 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
1362
1374
 
1363
1375
  // determine vocab type
1364
1376
  {
1365
- std::string tokenizer_model;
1366
- std::string tokenizer_pre;
1367
-
1368
1377
  ml.get_key(LLM_KV_TOKENIZER_MODEL, tokenizer_model);
1369
1378
  ml.get_key(LLM_KV_TOKENIZER_PRE, tokenizer_pre, false);
1370
1379
 
@@ -1459,7 +1468,10 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
1459
1468
 
1460
1469
  const int precompiled_charsmap_keyidx = lm_gguf_find_key(ctx, kv(LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP).c_str());
1461
1470
  if (precompiled_charsmap_keyidx != -1) {
1462
- size_t n_precompiled_charsmap = lm_gguf_get_arr_n(ctx, precompiled_charsmap_keyidx);
1471
+ const lm_gguf_type pc_type = lm_gguf_get_arr_type(ctx, precompiled_charsmap_keyidx);
1472
+ LM_GGML_ASSERT(pc_type == LM_GGUF_TYPE_INT8 || pc_type == LM_GGUF_TYPE_UINT8);
1473
+
1474
+ const size_t n_precompiled_charsmap = lm_gguf_get_arr_n(ctx, precompiled_charsmap_keyidx);
1463
1475
  const char * pc = (const char *) lm_gguf_get_arr_data(ctx, precompiled_charsmap_keyidx);
1464
1476
  precompiled_charsmap.assign(pc, pc + n_precompiled_charsmap);
1465
1477
  #ifdef IS_BIG_ENDIAN
@@ -1506,7 +1518,8 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
1506
1518
  tokenizer_pre == "llama3" ||
1507
1519
  tokenizer_pre == "llama-v3" ||
1508
1520
  tokenizer_pre == "llama-bpe"||
1509
- tokenizer_pre == "falcon3") {
1521
+ tokenizer_pre == "falcon3" ||
1522
+ tokenizer_pre == "pixtral") {
1510
1523
  pre_type = LLAMA_VOCAB_PRE_TYPE_LLAMA3;
1511
1524
  ignore_merges = true;
1512
1525
  add_bos = true;
@@ -1572,6 +1585,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
1572
1585
  pre_type = LLAMA_VOCAB_PRE_TYPE_PORO;
1573
1586
  clean_spaces = false;
1574
1587
  } else if (
1588
+ tokenizer_pre == "glm4" ||
1575
1589
  tokenizer_pre == "chatglm-bpe") {
1576
1590
  pre_type = LLAMA_VOCAB_PRE_TYPE_CHATGLM4;
1577
1591
  special_bos_id = LLAMA_TOKEN_NULL;
@@ -1632,6 +1646,10 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
1632
1646
  tokenizer_pre == "bailingmoe") {
1633
1647
  pre_type = LLAMA_VOCAB_PRE_TYPE_BAILINGMOE;
1634
1648
  clean_spaces = false;
1649
+ } else if (
1650
+ tokenizer_pre == "seed-coder") {
1651
+ pre_type = LLAMA_VOCAB_PRE_TYPE_SEED_CODER;
1652
+ clean_spaces = false;
1635
1653
  } else {
1636
1654
  throw std::runtime_error(format("unknown pre-tokenizer type: '%s'", tokenizer_pre.c_str()));
1637
1655
  }
@@ -1840,6 +1858,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
1840
1858
  if (false
1841
1859
  || t.first == "<|fim_prefix|>" // Qwen
1842
1860
  || t.first == "<fim-prefix>"
1861
+ || t.first == "<fim_prefix>" // Granite
1843
1862
  || t.first == "<|fim▁begin|>" // DeepSeek
1844
1863
  || t.first == "<PRE>"
1845
1864
  || t.first == "▁<PRE>" // CodeLlama
@@ -1858,6 +1877,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
1858
1877
  if (false
1859
1878
  || t.first == "<|fim_suffix|>" // Qwen
1860
1879
  || t.first == "<fim-suffix>"
1880
+ || t.first == "<fim_suffix>" // Granite
1861
1881
  || t.first == "<|fim▁hole|>" // DeepSeek
1862
1882
  || t.first == "<SUF>"
1863
1883
  || t.first == "▁<SUF>" // CodeLlama
@@ -1876,6 +1896,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
1876
1896
  if (false
1877
1897
  || t.first == "<|fim_middle|>" // Qwen
1878
1898
  || t.first == "<fim-middle>"
1899
+ || t.first == "<fim_middle>" // Granite
1879
1900
  || t.first == "<|fim▁end|>" // DeepSeek
1880
1901
  || t.first == "<MID>"
1881
1902
  || t.first == "▁<MID>" // CodeLlama
@@ -1894,6 +1915,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
1894
1915
  if (false
1895
1916
  || t.first == "<|fim_pad|>" // Qwen
1896
1917
  || t.first == "<fim-pad>"
1918
+ || t.first == "<fim_pad>" // Granite
1897
1919
  || t.first == "<PAD>"
1898
1920
  ) {
1899
1921
  special_fim_pad_id = t.second;
@@ -1912,6 +1934,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
1912
1934
  || t.first == "<|repo_name|>"
1913
1935
  || t.first == "<fim-repo>"
1914
1936
  || t.first == "<REPO>"
1937
+ || t.first == "<reponame>" // Granite
1915
1938
  ) {
1916
1939
  special_fim_rep_id = t.second;
1917
1940
  if ((id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
@@ -2771,6 +2794,14 @@ void llama_vocab::load(llama_model_loader & ml, const LLM_KV & kv) {
2771
2794
  pimpl->load(ml, kv);
2772
2795
  }
2773
2796
 
2797
+ std::string llama_vocab::get_tokenizer_model() const {
2798
+ return pimpl->tokenizer_model;
2799
+ }
2800
+
2801
+ std::string llama_vocab::get_tokenizer_pre() const {
2802
+ return pimpl->tokenizer_pre;
2803
+ }
2804
+
2774
2805
  enum llama_vocab_type llama_vocab::get_type() const {
2775
2806
  return pimpl->type;
2776
2807
  }
@@ -2993,6 +3024,20 @@ int llama_vocab::find_bpe_rank(const std::string & token_left, const std::string
2993
3024
  return it->second;
2994
3025
  }
2995
3026
 
3027
+ std::vector<std::string> llama_vocab::get_bpe_merges() const {
3028
+ std::vector<std::string> result(pimpl->bpe_ranks.size());
3029
+
3030
+ for (const auto & pair : pimpl->bpe_ranks) {
3031
+ result[pair.second] = pair.first.first + " " + pair.first.second;
3032
+ }
3033
+
3034
+ return result;
3035
+ }
3036
+
3037
+ std::vector<char> llama_vocab::get_precompiled_charsmap() const {
3038
+ return pimpl->precompiled_charsmap;
3039
+ }
3040
+
2996
3041
  int32_t llama_vocab::tokenize(
2997
3042
  const char * text,
2998
3043
  int32_t text_len,
package/cpp/llama-vocab.h CHANGED
@@ -21,6 +21,9 @@ struct llama_vocab {
21
21
 
22
22
  void load(llama_model_loader & ml, const LLM_KV & kv);
23
23
 
24
+ std::string get_tokenizer_model() const;
25
+ std::string get_tokenizer_pre() const;
26
+
24
27
  enum llama_vocab_type get_type() const;
25
28
  enum llama_vocab_pre_type get_pre_type() const;
26
29
 
@@ -80,6 +83,9 @@ struct llama_vocab {
80
83
  int max_token_len() const;
81
84
 
82
85
  int find_bpe_rank(const std::string & token_left, const std::string & token_right) const;
86
+ std::vector<std::string> get_bpe_merges() const;
87
+
88
+ std::vector<char> get_precompiled_charsmap() const;
83
89
 
84
90
  int32_t tokenize(
85
91
  const char * text,
package/cpp/llama.cpp CHANGED
@@ -4,6 +4,7 @@
4
4
  #include "llama-mmap.h"
5
5
  #include "llama-vocab.h"
6
6
  #include "llama-model-loader.h"
7
+ #include "llama-model-saver.h"
7
8
  #include "llama-model.h"
8
9
 
9
10
  #include "ggml.h"
@@ -150,6 +151,11 @@ static struct llama_model * llama_model_load_from_file_impl(
150
151
  struct llama_model_params params) {
151
152
  lm_ggml_time_init();
152
153
 
154
+ if (!params.vocab_only && lm_ggml_backend_reg_count() == 0) {
155
+ LLAMA_LOG_ERROR("%s: no backends are loaded. hint: use lm_ggml_backend_load() or lm_ggml_backend_load_all() to load a backend before calling this function\n", __func__);
156
+ return nullptr;
157
+ }
158
+
153
159
  unsigned cur_percentage = 0;
154
160
  if (params.progress_callback == NULL) {
155
161
  params.progress_callback_user_data = &cur_percentage;
@@ -264,6 +270,13 @@ struct llama_model * llama_model_load_from_splits(
264
270
  return llama_model_load_from_file_impl(splits.front(), splits, params);
265
271
  }
266
272
 
273
+ void llama_model_save_to_file(const struct llama_model * model, const char * path_model) {
274
+ llama_model_saver ms(*model);
275
+ ms.add_kv_from_model();
276
+ ms.add_tensors_from_model();
277
+ ms.save(path_model);
278
+ }
279
+
267
280
  //
268
281
  // chat templates
269
282
  //
@@ -349,3 +362,4 @@ const char * llama_print_system_info(void) {
349
362
 
350
363
  return s.c_str();
351
364
  }
365
+