cui-llama.rn 1.6.0 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (285) hide show
  1. package/README.md +35 -7
  2. package/android/src/main/CMakeLists.txt +22 -11
  3. package/android/src/main/java/com/rnllama/LlamaContext.java +42 -6
  4. package/android/src/main/java/com/rnllama/RNLlama.java +139 -4
  5. package/android/src/main/jni.cpp +173 -18
  6. package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
  7. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
  8. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
  9. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
  10. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
  11. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
  12. package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
  13. package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
  14. package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +24 -4
  15. package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +22 -2
  16. package/cpp/LICENSE +21 -0
  17. package/cpp/chat.cpp +129 -107
  18. package/cpp/chat.h +2 -0
  19. package/cpp/common.cpp +58 -78
  20. package/cpp/common.h +29 -21
  21. package/cpp/ggml-alloc.c +4 -1
  22. package/cpp/ggml-backend.cpp +9 -5
  23. package/cpp/ggml-backend.h +4 -4
  24. package/cpp/ggml-cpp.h +1 -1
  25. package/cpp/ggml-cpu/amx/amx.cpp +221 -0
  26. package/cpp/ggml-cpu/amx/amx.h +8 -0
  27. package/cpp/ggml-cpu/amx/common.h +91 -0
  28. package/cpp/ggml-cpu/amx/mmq.cpp +2511 -0
  29. package/cpp/ggml-cpu/amx/mmq.h +10 -0
  30. package/{ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers → cpp/ggml-cpu}/binary-ops.h +1 -1
  31. package/cpp/ggml-cpu/common.h +72 -0
  32. package/cpp/{ggml-cpu-aarch64.cpp → ggml-cpu/ggml-cpu-aarch64.cpp} +809 -103
  33. package/cpp/{ggml-cpu-quants.c → ggml-cpu/ggml-cpu-quants.c} +306 -6
  34. package/cpp/{ggml-cpu.c → ggml-cpu/ggml-cpu.c} +114 -55
  35. package/cpp/{ggml-cpu.cpp → ggml-cpu/ggml-cpu.cpp} +32 -16
  36. package/cpp/{ops.cpp → ggml-cpu/ops.cpp} +353 -173
  37. package/{ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers → cpp/ggml-cpu}/ops.h +2 -20
  38. package/cpp/{sgemm.cpp → ggml-cpu/sgemm.cpp} +501 -0
  39. package/{ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers → cpp/ggml-cpu}/simd-mappings.h +7 -3
  40. package/{ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers → cpp/ggml-cpu}/unary-ops.h +1 -1
  41. package/cpp/{vec.cpp → ggml-cpu/vec.cpp} +0 -6
  42. package/{ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers → cpp/ggml-cpu}/vec.h +16 -0
  43. package/cpp/ggml-cpu.h +5 -0
  44. package/cpp/ggml-impl.h +16 -9
  45. package/cpp/ggml-llama-sim.metallib +0 -0
  46. package/cpp/ggml-llama.metallib +0 -0
  47. package/cpp/ggml-metal-impl.h +36 -11
  48. package/cpp/ggml-metal.m +810 -176
  49. package/cpp/ggml-opt.cpp +373 -190
  50. package/cpp/ggml-opt.h +49 -28
  51. package/cpp/ggml-quants.c +0 -6
  52. package/cpp/ggml.c +227 -282
  53. package/cpp/ggml.h +82 -101
  54. package/cpp/gguf.cpp +33 -33
  55. package/cpp/json-schema-to-grammar.cpp +3 -0
  56. package/cpp/llama-adapter.cpp +6 -0
  57. package/cpp/llama-arch.cpp +49 -17
  58. package/cpp/llama-arch.h +9 -0
  59. package/cpp/llama-batch.cpp +8 -2
  60. package/cpp/llama-batch.h +2 -1
  61. package/cpp/llama-chat.cpp +39 -16
  62. package/cpp/llama-chat.h +4 -2
  63. package/cpp/llama-context.cpp +440 -611
  64. package/cpp/llama-context.h +44 -33
  65. package/cpp/llama-cparams.h +1 -0
  66. package/cpp/llama-graph.cpp +214 -291
  67. package/cpp/llama-graph.h +69 -21
  68. package/cpp/llama-hparams.cpp +17 -1
  69. package/cpp/llama-hparams.h +39 -5
  70. package/cpp/llama-kv-cache.cpp +2067 -620
  71. package/cpp/llama-kv-cache.h +410 -108
  72. package/cpp/llama-memory.h +12 -1
  73. package/cpp/llama-model-loader.cpp +24 -15
  74. package/cpp/llama-model-saver.cpp +281 -0
  75. package/cpp/llama-model-saver.h +37 -0
  76. package/cpp/llama-model.cpp +1089 -359
  77. package/cpp/llama-model.h +19 -3
  78. package/cpp/llama-sampling.cpp +20 -7
  79. package/cpp/llama-vocab.cpp +54 -9
  80. package/cpp/llama-vocab.h +6 -0
  81. package/cpp/llama.cpp +14 -0
  82. package/cpp/llama.h +86 -142
  83. package/cpp/minja/chat-template.hpp +9 -5
  84. package/cpp/minja/minja.hpp +69 -36
  85. package/cpp/rn-llama.cpp +602 -190
  86. package/cpp/rn-llama.h +34 -8
  87. package/cpp/sampling.cpp +57 -50
  88. package/cpp/tools/mtmd/clip-impl.h +462 -0
  89. package/cpp/tools/mtmd/clip.cpp +4024 -0
  90. package/cpp/tools/mtmd/clip.h +101 -0
  91. package/cpp/tools/mtmd/miniaudio.h +93468 -0
  92. package/cpp/tools/mtmd/mtmd-audio.cpp +855 -0
  93. package/cpp/tools/mtmd/mtmd-audio.h +62 -0
  94. package/cpp/tools/mtmd/mtmd-helper.cpp +297 -0
  95. package/cpp/tools/mtmd/mtmd.cpp +942 -0
  96. package/cpp/tools/mtmd/mtmd.h +362 -0
  97. package/cpp/tools/mtmd/stb_image.h +7988 -0
  98. package/ios/CMakeLists.txt +20 -10
  99. package/ios/RNLlama.h +6 -0
  100. package/ios/RNLlama.mm +82 -3
  101. package/ios/RNLlamaContext.h +5 -1
  102. package/ios/RNLlamaContext.mm +131 -38
  103. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat.h +2 -0
  104. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/common.h +29 -21
  105. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-backend.h +4 -4
  106. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpp.h +1 -1
  107. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu.h +5 -0
  108. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-impl.h +16 -9
  109. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-metal-impl.h +36 -11
  110. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-opt.h +49 -28
  111. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml.h +82 -101
  112. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-arch.h +9 -0
  113. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-batch.h +2 -1
  114. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-chat.h +4 -2
  115. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-context.h +44 -33
  116. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-cparams.h +1 -0
  117. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-graph.h +69 -21
  118. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-hparams.h +39 -5
  119. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache.h +410 -108
  120. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory.h +12 -1
  121. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model-saver.h +37 -0
  122. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model.h +19 -3
  123. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-vocab.h +6 -0
  124. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama.h +86 -142
  125. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/chat-template.hpp +9 -5
  126. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/minja.hpp +69 -36
  127. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-llama.h +34 -8
  128. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Info.plist +0 -0
  129. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/ggml-llama.metallib +0 -0
  130. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/rnllama +0 -0
  131. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +2 -0
  132. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +29 -21
  133. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend.h +4 -4
  134. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpp.h +1 -1
  135. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +5 -0
  136. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +16 -9
  137. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal-impl.h +36 -11
  138. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-opt.h +49 -28
  139. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +82 -101
  140. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +9 -0
  141. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +2 -1
  142. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +4 -2
  143. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +44 -33
  144. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +1 -0
  145. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +69 -21
  146. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +39 -5
  147. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +410 -108
  148. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +12 -1
  149. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model-saver.h +37 -0
  150. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +19 -3
  151. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +6 -0
  152. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +86 -142
  153. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +9 -5
  154. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +69 -36
  155. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +34 -8
  156. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Info.plist +0 -0
  157. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/_CodeSignature/CodeResources +1 -1
  158. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
  159. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
  160. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat.h +2 -0
  161. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/common.h +29 -21
  162. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-backend.h +4 -4
  163. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpp.h +1 -1
  164. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu.h +5 -0
  165. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-impl.h +16 -9
  166. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-metal-impl.h +36 -11
  167. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-opt.h +49 -28
  168. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml.h +82 -101
  169. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-arch.h +9 -0
  170. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-batch.h +2 -1
  171. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-chat.h +4 -2
  172. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-context.h +44 -33
  173. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-cparams.h +1 -0
  174. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-graph.h +69 -21
  175. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-hparams.h +39 -5
  176. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache.h +410 -108
  177. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory.h +12 -1
  178. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model-saver.h +37 -0
  179. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model.h +19 -3
  180. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-vocab.h +6 -0
  181. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama.h +86 -142
  182. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/chat-template.hpp +9 -5
  183. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/minja.hpp +69 -36
  184. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-llama.h +34 -8
  185. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Info.plist +0 -0
  186. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/ggml-llama.metallib +0 -0
  187. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/rnllama +0 -0
  188. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +2 -0
  189. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +29 -21
  190. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend.h +4 -4
  191. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpp.h +1 -1
  192. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +5 -0
  193. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +16 -9
  194. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal-impl.h +36 -11
  195. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-opt.h +49 -28
  196. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +82 -101
  197. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +9 -0
  198. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +2 -1
  199. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +4 -2
  200. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +44 -33
  201. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +1 -0
  202. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +69 -21
  203. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +39 -5
  204. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +410 -108
  205. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +12 -1
  206. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model-saver.h +37 -0
  207. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +19 -3
  208. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +6 -0
  209. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +86 -142
  210. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +9 -5
  211. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +69 -36
  212. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +34 -8
  213. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Info.plist +0 -0
  214. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/_CodeSignature/CodeResources +1 -1
  215. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
  216. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
  217. package/jest/mock.js +33 -7
  218. package/lib/commonjs/NativeRNLlama.js.map +1 -1
  219. package/lib/commonjs/index.js +153 -21
  220. package/lib/commonjs/index.js.map +1 -1
  221. package/lib/module/NativeRNLlama.js.map +1 -1
  222. package/lib/module/index.js +152 -20
  223. package/lib/module/index.js.map +1 -1
  224. package/lib/typescript/NativeRNLlama.d.ts +54 -4
  225. package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
  226. package/lib/typescript/index.d.ts +72 -6
  227. package/lib/typescript/index.d.ts.map +1 -1
  228. package/package.json +1 -1
  229. package/src/NativeRNLlama.ts +72 -4
  230. package/src/index.ts +212 -38
  231. package/cpp/binary-ops.h +0 -16
  232. package/cpp/ops.h +0 -128
  233. package/cpp/simd-mappings.h +0 -888
  234. package/cpp/unary-ops.h +0 -28
  235. package/cpp/vec.h +0 -802
  236. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/binary-ops.h +0 -16
  237. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-aarch64.h +0 -8
  238. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-impl.h +0 -512
  239. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-quants.h +0 -63
  240. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-traits.h +0 -38
  241. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ops.h +0 -128
  242. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/sgemm.h +0 -14
  243. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/simd-mappings.h +0 -888
  244. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/vec.h +0 -802
  245. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-aarch64.h +0 -8
  246. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-impl.h +0 -512
  247. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-quants.h +0 -63
  248. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-traits.h +0 -38
  249. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/sgemm.h +0 -14
  250. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/unary-ops.h +0 -28
  251. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/vec.h +0 -802
  252. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/binary-ops.h +0 -16
  253. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-aarch64.h +0 -8
  254. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-impl.h +0 -512
  255. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-quants.h +0 -63
  256. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-traits.h +0 -38
  257. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ops.h +0 -128
  258. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/sgemm.h +0 -14
  259. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/simd-mappings.h +0 -888
  260. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/unary-ops.h +0 -28
  261. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/binary-ops.h +0 -16
  262. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-aarch64.h +0 -8
  263. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-impl.h +0 -512
  264. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-quants.h +0 -63
  265. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-traits.h +0 -38
  266. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ops.h +0 -128
  267. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/sgemm.h +0 -14
  268. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/simd-mappings.h +0 -888
  269. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/unary-ops.h +0 -28
  270. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/vec.h +0 -802
  271. package/lib/commonjs/chat.js +0 -37
  272. package/lib/commonjs/chat.js.map +0 -1
  273. package/lib/module/chat.js +0 -33
  274. package/lib/module/chat.js.map +0 -1
  275. package/lib/typescript/chat.d.ts +0 -10
  276. package/lib/typescript/chat.d.ts.map +0 -1
  277. package/src/chat.ts +0 -44
  278. /package/cpp/{binary-ops.cpp → ggml-cpu/binary-ops.cpp} +0 -0
  279. /package/cpp/{ggml-cpu-aarch64.h → ggml-cpu/ggml-cpu-aarch64.h} +0 -0
  280. /package/cpp/{ggml-cpu-impl.h → ggml-cpu/ggml-cpu-impl.h} +0 -0
  281. /package/cpp/{ggml-cpu-quants.h → ggml-cpu/ggml-cpu-quants.h} +0 -0
  282. /package/cpp/{ggml-cpu-traits.cpp → ggml-cpu/ggml-cpu-traits.cpp} +0 -0
  283. /package/cpp/{ggml-cpu-traits.h → ggml-cpu/ggml-cpu-traits.h} +0 -0
  284. /package/cpp/{sgemm.h → ggml-cpu/sgemm.h} +0 -0
  285. /package/cpp/{unary-ops.cpp → ggml-cpu/unary-ops.cpp} +0 -0
@@ -301,12 +301,12 @@ namespace GGUFMeta {
301
301
  GGUFMeta::GKV<GGUFMeta::ArrayInfo>::get_kv(meta.get(), kid);
302
302
 
303
303
  switch (arr_info.gt) {
304
- case LM_GGUF_TYPE_FLOAT32: LM_GGML_ASSERT((std::is_same<T, float>::value)); break;
305
- case LM_GGUF_TYPE_INT32: LM_GGML_ASSERT(
306
- (std::is_same<T, int32_t>::value) ||
307
- (std::is_same<T, uint32_t>::value)); break;
304
+ case LM_GGUF_TYPE_UINT32:
305
+ case LM_GGUF_TYPE_INT32: LM_GGML_ASSERT((std::is_same<T, int32_t>::value) ||
306
+ (std::is_same<T, uint32_t>::value)); break;
307
+ case LM_GGUF_TYPE_FLOAT32: LM_GGML_ASSERT((std::is_same<T, float>::value)); break;
308
308
  default:
309
- throw std::runtime_error(format("%s is not a float32, int32 array", key.c_str()));
309
+ throw std::runtime_error(format("%s is not a float32/uint32/int32 array", key.c_str()));
310
310
  }
311
311
 
312
312
  result.resize(arr_info.length);
@@ -330,12 +330,12 @@ namespace GGUFMeta {
330
330
  GGUFMeta::GKV<GGUFMeta::ArrayInfo>::get_kv(meta.get(), kid);
331
331
 
332
332
  switch (arr_info.gt) {
333
- case LM_GGUF_TYPE_FLOAT32: LM_GGML_ASSERT((std::is_same<T, float>::value)); break;
334
- case LM_GGUF_TYPE_INT32: LM_GGML_ASSERT(
335
- (std::is_same<T, int32_t>::value) ||
336
- (std::is_same<T, uint32_t>::value)); break;
333
+ case LM_GGUF_TYPE_UINT32:
334
+ case LM_GGUF_TYPE_INT32: LM_GGML_ASSERT((std::is_same<T, int32_t>::value) ||
335
+ (std::is_same<T, uint32_t>::value)); break;
336
+ case LM_GGUF_TYPE_FLOAT32: LM_GGML_ASSERT((std::is_same<T, float>::value)); break;
337
337
  default:
338
- throw std::runtime_error(format("%s is not a float32, int32 array", key.c_str()));
338
+ throw std::runtime_error(format("%s is not a float32/uint32/int32 array", key.c_str()));
339
339
  }
340
340
 
341
341
  if (arr_info.length > N_MAX) {
@@ -469,7 +469,7 @@ llama_model_loader::llama_model_loader(
469
469
 
470
470
  meta.reset(lm_gguf_init_from_file(fname.c_str(), params));
471
471
  if (!meta) {
472
- throw std::runtime_error(format("%s: failed to load model from %s\n", __func__, fname.c_str()));
472
+ throw std::runtime_error(format("%s: failed to load model from %s", __func__, fname.c_str()));
473
473
  }
474
474
 
475
475
  get_key(llm_kv(LLM_KV_GENERAL_ARCHITECTURE), arch_name, false);
@@ -528,7 +528,7 @@ llama_model_loader::llama_model_loader(
528
528
  };
529
529
  lm_gguf_context_ptr ctx_gguf { lm_gguf_init_from_file(fname_split, split_params) };
530
530
  if (!ctx_gguf) {
531
- throw std::runtime_error(format("%s: failed to load GGUF split from %s\n", __func__, fname_split));
531
+ throw std::runtime_error(format("%s: failed to load GGUF split from %s", __func__, fname_split));
532
532
  }
533
533
 
534
534
  // check idx
@@ -822,9 +822,18 @@ void llama_model_loader::init_mappings(bool prefetch, llama_mlocks * mlock_mmaps
822
822
  mappings.reserve(files.size());
823
823
  mmaps_used.reserve(files.size());
824
824
  for (const auto & file : files) {
825
- auto * reg = lm_ggml_backend_dev_backend_reg(lm_ggml_backend_dev_by_type(LM_GGML_BACKEND_DEVICE_TYPE_CPU));
826
- auto * is_numa_fn = (decltype(lm_ggml_is_numa) *) lm_ggml_backend_reg_get_proc_address(reg, "lm_ggml_backend_cpu_is_numa");
827
- std::unique_ptr<llama_mmap> mapping = std::make_unique<llama_mmap>(file.get(), prefetch ? -1 : 0, is_numa_fn());
825
+ bool is_numa = false;
826
+
827
+ auto * dev = lm_ggml_backend_dev_by_type(LM_GGML_BACKEND_DEVICE_TYPE_CPU);
828
+ if (dev) {
829
+ auto * reg = lm_ggml_backend_dev_backend_reg(dev);
830
+ auto * is_numa_fn = (decltype(lm_ggml_is_numa) *) lm_ggml_backend_reg_get_proc_address(reg, "lm_ggml_backend_cpu_is_numa");
831
+ if (is_numa_fn) {
832
+ is_numa = is_numa_fn();
833
+ }
834
+ }
835
+
836
+ std::unique_ptr<llama_mmap> mapping = std::make_unique<llama_mmap>(file.get(), prefetch ? -1 : 0, is_numa);
828
837
  mmaps_used.emplace_back(mapping->size(), 0);
829
838
  if (mlock_mmaps) {
830
839
  std::unique_ptr<llama_mlock> mlock_mmap(new llama_mlock());
@@ -0,0 +1,281 @@
1
+ #include "llama-model-saver.h"
2
+
3
+ #include "gguf.h"
4
+
5
+ #include "llama.h"
6
+ #include "llama-hparams.h"
7
+ #include "llama-model.h"
8
+ #include "llama-vocab.h"
9
+
10
+ #include <string>
11
+
12
+ llama_model_saver::llama_model_saver(const struct llama_model & model) : model(model), llm_kv(model.arch) {
13
+ lm_gguf_ctx = lm_gguf_init_empty();
14
+ }
15
+
16
+ llama_model_saver::~llama_model_saver() {
17
+ lm_gguf_free(lm_gguf_ctx);
18
+ }
19
+
20
+ void llama_model_saver::add_kv(const enum llm_kv key, const uint32_t value) {
21
+ lm_gguf_set_val_u32(lm_gguf_ctx, llm_kv(key).c_str(), value);
22
+ }
23
+
24
+ void llama_model_saver::add_kv(const enum llm_kv key, const int32_t value) {
25
+ lm_gguf_set_val_i32(lm_gguf_ctx, llm_kv(key).c_str(), value);
26
+ }
27
+
28
+ void llama_model_saver::add_kv(const enum llm_kv key, const float value) {
29
+ lm_gguf_set_val_f32(lm_gguf_ctx, llm_kv(key).c_str(), value);
30
+ }
31
+
32
+ void llama_model_saver::add_kv(const enum llm_kv key, const bool value) {
33
+ lm_gguf_set_val_bool(lm_gguf_ctx, llm_kv(key).c_str(), value);
34
+ }
35
+
36
+ void llama_model_saver::add_kv(const enum llm_kv key, const char * value) {
37
+ lm_gguf_set_val_str(lm_gguf_ctx, llm_kv(key).c_str(), value);
38
+ }
39
+
40
+ [[noreturn]]
41
+ void llama_model_saver::add_kv(const enum llm_kv key, const char value) {
42
+ LM_GGML_UNUSED(key);
43
+ LM_GGML_UNUSED(value);
44
+ LM_GGML_ABORT("fatal error"); // this should never be called, only needed to make the template below compile
45
+ }
46
+
47
+ template <typename Container>
48
+ void llama_model_saver::add_kv(const enum llm_kv key, const Container & value, const bool per_layer) {
49
+ const size_t n_values = per_layer ? size_t(model.hparams.n_layer) : value.size();
50
+ LM_GGML_ASSERT(n_values <= value.size());
51
+
52
+ if (n_values == 0) {
53
+ return;
54
+ }
55
+
56
+ if (per_layer) {
57
+ bool all_values_the_same = true;
58
+ for (size_t i = 1; i < n_values; ++i) {
59
+ if (value[i] != value[0]) {
60
+ all_values_the_same = false;
61
+ break;
62
+ }
63
+ }
64
+ if (all_values_the_same) {
65
+ add_kv(key, value[0]);
66
+ return;
67
+ }
68
+ }
69
+
70
+ if (std::is_same<typename Container::value_type, uint8_t>::value) {
71
+ lm_gguf_set_arr_data(lm_gguf_ctx, llm_kv(key).c_str(), LM_GGUF_TYPE_UINT8, value.data(), n_values);
72
+ } else if (std::is_same<typename Container::value_type, int8_t>::value) {
73
+ lm_gguf_set_arr_data(lm_gguf_ctx, llm_kv(key).c_str(), LM_GGUF_TYPE_INT8, value.data(), n_values);
74
+ } else if (std::is_same<typename Container::value_type, uint32_t>::value) {
75
+ lm_gguf_set_arr_data(lm_gguf_ctx, llm_kv(key).c_str(), LM_GGUF_TYPE_UINT32, value.data(), n_values);
76
+ } else if (std::is_same<typename Container::value_type, int32_t>::value) {
77
+ lm_gguf_set_arr_data(lm_gguf_ctx, llm_kv(key).c_str(), LM_GGUF_TYPE_INT32, value.data(), n_values);
78
+ } else if (std::is_same<typename Container::value_type, float>::value) {
79
+ lm_gguf_set_arr_data(lm_gguf_ctx, llm_kv(key).c_str(), LM_GGUF_TYPE_FLOAT32, value.data(), n_values);
80
+ } else if (std::is_same<Container, std::string>::value) {
81
+ lm_gguf_set_val_str(lm_gguf_ctx, llm_kv(key).c_str(), reinterpret_cast<const char *>(value.data()));
82
+ } else {
83
+ LM_GGML_ABORT("fatal error");
84
+ }
85
+ }
86
+
87
+ void llama_model_saver::add_kv(const enum llm_kv key, const std::vector<std::string> & value) {
88
+ std::vector<const char *> tmp(value.size());
89
+ for (size_t i = 0; i < value.size(); ++i) {
90
+ tmp[i] = value[i].c_str();
91
+ }
92
+ lm_gguf_set_arr_str(lm_gguf_ctx, llm_kv(key).c_str(), tmp.data(), tmp.size());
93
+ }
94
+
95
+ void llama_model_saver::add_tensor(const struct lm_ggml_tensor * tensor) {
96
+ if (!tensor) {
97
+ return;
98
+ }
99
+ if (lm_gguf_find_tensor(lm_gguf_ctx, tensor->name) >= 0) {
100
+ LM_GGML_ASSERT(std::string(tensor->name) == "rope_freqs.weight"); // FIXME
101
+ return;
102
+ }
103
+ lm_gguf_add_tensor(lm_gguf_ctx, tensor);
104
+ }
105
+
106
+ void llama_model_saver::add_kv_from_model() {
107
+ const llama_hparams & hparams = model.hparams;
108
+ const llama_vocab & vocab = model.vocab;
109
+
110
+ const int32_t n_vocab = vocab.n_tokens();
111
+ std::vector<std::string> tokens(n_vocab);
112
+ std::vector<float> scores(n_vocab);
113
+ std::vector<int32_t> token_types(n_vocab);
114
+
115
+ for (int32_t id = 0; id < n_vocab; ++id) {
116
+ const llama_vocab::token_data & token_data = vocab.get_token_data(id);
117
+
118
+ tokens[id] = token_data.text;
119
+ scores[id] = token_data.score;
120
+
121
+ switch(token_data.attr) {
122
+ case LLAMA_TOKEN_ATTR_UNKNOWN: token_types[id] = LLAMA_TOKEN_TYPE_UNKNOWN; break;
123
+ case LLAMA_TOKEN_ATTR_UNUSED: token_types[id] = LLAMA_TOKEN_TYPE_UNUSED; break;
124
+ case LLAMA_TOKEN_ATTR_NORMAL: token_types[id] = LLAMA_TOKEN_TYPE_NORMAL; break;
125
+ case LLAMA_TOKEN_ATTR_CONTROL: token_types[id] = LLAMA_TOKEN_TYPE_CONTROL; break;
126
+ case LLAMA_TOKEN_ATTR_USER_DEFINED: token_types[id] = LLAMA_TOKEN_TYPE_USER_DEFINED; break;
127
+ case LLAMA_TOKEN_ATTR_BYTE: token_types[id] = LLAMA_TOKEN_TYPE_BYTE; break;
128
+ case LLAMA_TOKEN_ATTR_UNDEFINED:
129
+ default: token_types[id] = LLAMA_TOKEN_TYPE_UNDEFINED; break;
130
+ }
131
+ }
132
+
133
+ // add_kv(LLM_KV_GENERAL_TYPE, ???);
134
+ add_kv(LLM_KV_GENERAL_ARCHITECTURE, model.arch_name());
135
+ // add_kv(LLM_KV_GENERAL_QUANTIZATION_VERSION, ???);
136
+ // add_kv(LLM_KV_GENERAL_ALIGNMENT, ???);
137
+ add_kv(LLM_KV_GENERAL_NAME, model.name);
138
+ // add_kv(LLM_KV_GENERAL_AUTHOR, ???);
139
+ // add_kv(LLM_KV_GENERAL_VERSION, ???);
140
+ // add_kv(LLM_KV_GENERAL_URL, ???);
141
+ // add_kv(LLM_KV_GENERAL_DESCRIPTION, ???);
142
+ // add_kv(LLM_KV_GENERAL_LICENSE, ???);
143
+ // add_kv(LLM_KV_GENERAL_SOURCE_URL, ???);
144
+ // add_kv(LLM_KV_GENERAL_SOURCE_HF_REPO, ???);
145
+
146
+ add_kv(LLM_KV_VOCAB_SIZE, vocab.n_tokens());
147
+ add_kv(LLM_KV_CONTEXT_LENGTH, hparams.n_ctx_train);
148
+ add_kv(LLM_KV_EMBEDDING_LENGTH, hparams.n_embd);
149
+ add_kv(LLM_KV_BLOCK_COUNT, hparams.n_layer);
150
+ add_kv(LLM_KV_LEADING_DENSE_BLOCK_COUNT, hparams.n_layer_dense_lead);
151
+ add_kv(LLM_KV_FEED_FORWARD_LENGTH, hparams.n_ff_arr, true);
152
+ add_kv(LLM_KV_EXPERT_FEED_FORWARD_LENGTH, hparams.n_ff_exp);
153
+ add_kv(LLM_KV_EXPERT_SHARED_FEED_FORWARD_LENGTH, hparams.n_ff_exp);
154
+ add_kv(LLM_KV_USE_PARALLEL_RESIDUAL, hparams.use_par_res);
155
+ // add_kv(LLM_KV_TENSOR_DATA_LAYOUT, ???);
156
+ add_kv(LLM_KV_EXPERT_COUNT, hparams.n_expert);
157
+ add_kv(LLM_KV_EXPERT_USED_COUNT, hparams.n_expert_used);
158
+ add_kv(LLM_KV_EXPERT_SHARED_COUNT, hparams.n_expert_shared);
159
+ add_kv(LLM_KV_EXPERT_WEIGHTS_SCALE, hparams.expert_weights_scale);
160
+ add_kv(LLM_KV_POOLING_TYPE, uint32_t(hparams.pooling_type));
161
+ add_kv(LLM_KV_LOGIT_SCALE, hparams.f_logit_scale);
162
+ add_kv(LLM_KV_DECODER_START_TOKEN_ID, hparams.dec_start_token_id);
163
+ add_kv(LLM_KV_ATTN_LOGIT_SOFTCAPPING, hparams.f_attn_logit_softcapping);
164
+ add_kv(LLM_KV_FINAL_LOGIT_SOFTCAPPING, hparams.f_final_logit_softcapping);
165
+ add_kv(LLM_KV_SWIN_NORM, hparams.swin_norm);
166
+ add_kv(LLM_KV_RESCALE_EVERY_N_LAYERS, hparams.rescale_every_n_layers);
167
+ add_kv(LLM_KV_TIME_MIX_EXTRA_DIM, hparams.time_mix_extra_dim);
168
+ add_kv(LLM_KV_TIME_DECAY_EXTRA_DIM, hparams.time_decay_extra_dim);
169
+ add_kv(LLM_KV_RESIDUAL_SCALE, hparams.f_residual_scale);
170
+ add_kv(LLM_KV_EMBEDDING_SCALE, hparams.f_embedding_scale);
171
+
172
+ add_kv(LLM_KV_ATTENTION_HEAD_COUNT, hparams.n_head_arr, true);
173
+ add_kv(LLM_KV_ATTENTION_HEAD_COUNT_KV, hparams.n_head_kv_arr, true);
174
+ add_kv(LLM_KV_ATTENTION_MAX_ALIBI_BIAS, hparams.f_max_alibi_bias);
175
+ add_kv(LLM_KV_ATTENTION_CLAMP_KQV, hparams.f_clamp_kqv);
176
+ add_kv(LLM_KV_ATTENTION_KEY_LENGTH, hparams.n_embd_head_k);
177
+ add_kv(LLM_KV_ATTENTION_VALUE_LENGTH, hparams.n_embd_head_v);
178
+ add_kv(LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps);
179
+ add_kv(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
180
+ add_kv(LLM_KV_ATTENTION_CAUSAL, hparams.causal_attn);
181
+ add_kv(LLM_KV_ATTENTION_Q_LORA_RANK, hparams.n_lora_q);
182
+ add_kv(LLM_KV_ATTENTION_KV_LORA_RANK, hparams.n_lora_kv);
183
+ add_kv(LLM_KV_ATTENTION_RELATIVE_BUCKETS_COUNT, hparams.n_rel_attn_bkts);
184
+ add_kv(LLM_KV_ATTENTION_SLIDING_WINDOW, hparams.n_swa);
185
+ add_kv(LLM_KV_ATTENTION_SCALE, hparams.f_attention_scale);
186
+
187
+ const float rope_scaling_factor = hparams.rope_freq_scale_train == 1.0f ? 0.0f : 1.0f/hparams.rope_freq_scale_train;
188
+
189
+ add_kv(LLM_KV_ROPE_DIMENSION_COUNT, hparams.n_rot);
190
+ add_kv(LLM_KV_ROPE_FREQ_BASE, hparams.rope_freq_base_train);
191
+ // add_kv(LLM_KV_ROPE_SCALE_LINEAR, rope_scaling_factor); // old name
192
+ add_kv(LLM_KV_ROPE_SCALING_TYPE, llama_rope_scaling_type_name(hparams.rope_scaling_type_train));
193
+ add_kv(LLM_KV_ROPE_SCALING_FACTOR, rope_scaling_factor);
194
+ add_kv(LLM_KV_ROPE_SCALING_ATTN_FACTOR, hparams.rope_attn_factor);
195
+ add_kv(LLM_KV_ROPE_SCALING_ORIG_CTX_LEN, hparams.n_ctx_orig_yarn);
196
+ add_kv(LLM_KV_ROPE_SCALING_FINETUNED, hparams.rope_finetuned);
197
+ add_kv(LLM_KV_ROPE_SCALING_YARN_LOG_MUL, hparams.rope_yarn_log_mul);
198
+
199
+ // TODO: implement split file support
200
+ // add_kv(LLM_KV_SPLIT_NO, ???);
201
+ // add_kv(LLM_KV_SPLIT_COUNT, ???);
202
+ // add_kv(LLM_KV_SPLIT_TENSORS_COUNT, ???);
203
+
204
+ add_kv(LLM_KV_SSM_INNER_SIZE, hparams.ssm_d_inner);
205
+ add_kv(LLM_KV_SSM_CONV_KERNEL, hparams.ssm_d_conv);
206
+ add_kv(LLM_KV_SSM_STATE_SIZE, hparams.ssm_d_state);
207
+ add_kv(LLM_KV_SSM_TIME_STEP_RANK, hparams.ssm_dt_rank);
208
+ add_kv(LLM_KV_SSM_DT_B_C_RMS, hparams.ssm_dt_b_c_rms);
209
+
210
+ add_kv(LLM_KV_WKV_HEAD_SIZE, hparams.wkv_head_size);
211
+
212
+ add_kv(LLM_KV_TOKENIZER_MODEL, vocab.get_tokenizer_model());
213
+ add_kv(LLM_KV_TOKENIZER_PRE, vocab.get_tokenizer_pre());
214
+ add_kv(LLM_KV_TOKENIZER_LIST, tokens);
215
+ add_kv(LLM_KV_TOKENIZER_TOKEN_TYPE, token_types);
216
+ add_kv(LLM_KV_TOKENIZER_TOKEN_TYPE_COUNT, vocab.n_token_types());
217
+ add_kv(LLM_KV_TOKENIZER_SCORES, scores);
218
+ add_kv(LLM_KV_TOKENIZER_MERGES, vocab.get_bpe_merges());
219
+ // FIXME llama_token is type i32 but when reading in a GGUF file u32 is expected, not an issue for writing though
220
+ add_kv(LLM_KV_TOKENIZER_BOS_ID, uint32_t(vocab.token_bos()));
221
+ add_kv(LLM_KV_TOKENIZER_EOS_ID, uint32_t(vocab.token_eos()));
222
+ add_kv(LLM_KV_TOKENIZER_EOT_ID, uint32_t(vocab.token_eot()));
223
+ add_kv(LLM_KV_TOKENIZER_EOM_ID, uint32_t(vocab.token_eom()));
224
+ add_kv(LLM_KV_TOKENIZER_UNK_ID, uint32_t(vocab.token_unk()));
225
+ add_kv(LLM_KV_TOKENIZER_SEP_ID, uint32_t(vocab.token_sep()));
226
+ add_kv(LLM_KV_TOKENIZER_PAD_ID, uint32_t(vocab.token_pad()));
227
+ // add_kv(LLM_KV_TOKENIZER_CLS_ID, uint32_t(vocab.token_bos())); // deprecated
228
+ // add_kv(LLM_KV_TOKENIZER_MASK_ID, ???);
229
+ add_kv(LLM_KV_TOKENIZER_ADD_BOS, vocab.get_add_bos());
230
+ add_kv(LLM_KV_TOKENIZER_ADD_EOS, vocab.get_add_eos());
231
+ add_kv(LLM_KV_TOKENIZER_ADD_PREFIX, vocab.get_add_space_prefix());
232
+ add_kv(LLM_KV_TOKENIZER_REMOVE_EXTRA_WS, vocab.get_remove_extra_whitespaces());
233
+ add_kv(LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP, vocab.get_precompiled_charsmap());
234
+ // add_kv(LLM_KV_TOKENIZER_HF_JSON, ???);
235
+ // add_kv(LLM_KV_TOKENIZER_RWKV, ???);
236
+ add_kv(LLM_KV_TOKENIZER_FIM_PRE_ID, uint32_t(vocab.token_fim_pre()));
237
+ add_kv(LLM_KV_TOKENIZER_FIM_SUF_ID, uint32_t(vocab.token_fim_suf()));
238
+ add_kv(LLM_KV_TOKENIZER_FIM_MID_ID, uint32_t(vocab.token_fim_mid()));
239
+ add_kv(LLM_KV_TOKENIZER_FIM_PAD_ID, uint32_t(vocab.token_fim_pad()));
240
+ add_kv(LLM_KV_TOKENIZER_FIM_REP_ID, uint32_t(vocab.token_fim_rep()));
241
+ add_kv(LLM_KV_TOKENIZER_FIM_SEP_ID, uint32_t(vocab.token_fim_sep()));
242
+
243
+ // TODO: implement LoRA support
244
+ // add_kv(LLM_KV_ADAPTER_TYPE, ???);
245
+ // add_kv(LLM_KV_ADAPTER_LORA_ALPHA, ???);
246
+
247
+ // deprecated
248
+ // add_kv(LLM_KV_TOKENIZER_PREFIX_ID, ???);
249
+ // add_kv(LLM_KV_TOKENIZER_SUFFIX_ID, ???);
250
+ // add_kv(LLM_KV_TOKENIZER_MIDDLE_ID, ???);
251
+ }
252
+
253
+ void llama_model_saver::add_tensors_from_model() {
254
+ if (std::string(model.output->name) != std::string(model.tok_embd->name)) {
255
+ add_tensor(model.tok_embd); // some models use the same tensor for tok_embd and output
256
+ }
257
+ add_tensor(model.type_embd);
258
+ add_tensor(model.pos_embd);
259
+ add_tensor(model.tok_norm);
260
+ add_tensor(model.tok_norm_b);
261
+ add_tensor(model.output_norm);
262
+ add_tensor(model.output_norm_b);
263
+ add_tensor(model.output);
264
+ add_tensor(model.output_b);
265
+ add_tensor(model.output_norm_enc);
266
+ add_tensor(model.cls);
267
+ add_tensor(model.cls_b);
268
+ add_tensor(model.cls_out);
269
+ add_tensor(model.cls_out_b);
270
+
271
+ for (const struct llama_layer & layer : model.layers) {
272
+ for (size_t i = 0; i < sizeof(layer)/sizeof(struct lm_ggml_tensor *); ++i) {
273
+ add_tensor(reinterpret_cast<const struct lm_ggml_tensor * const *>(&layer)[i]);
274
+ }
275
+ }
276
+ }
277
+
278
+ void llama_model_saver::save(const std::string & path_model) {
279
+ lm_gguf_write_to_file(lm_gguf_ctx, path_model.c_str(), false);
280
+ }
281
+
@@ -0,0 +1,37 @@
1
+ #pragma once
2
+
3
+ #include "llama.h"
4
+ #include "llama-arch.h"
5
+
6
+ #include <vector>
7
+
8
+ struct llama_model_saver {
9
+ struct lm_gguf_context * lm_gguf_ctx = nullptr;
10
+ const struct llama_model & model;
11
+ const struct LLM_KV llm_kv;
12
+
13
+ llama_model_saver(const struct llama_model & model);
14
+ ~llama_model_saver();
15
+
16
+ void add_kv(enum llm_kv key, uint32_t value);
17
+ void add_kv(enum llm_kv key, int32_t value);
18
+ void add_kv(enum llm_kv key, float value);
19
+ void add_kv(enum llm_kv key, bool value);
20
+ void add_kv(enum llm_kv key, const char * value);
21
+
22
+ [[noreturn]]
23
+ void add_kv(enum llm_kv key, char value); // needed to make the template below compile
24
+
25
+ template <typename Container>
26
+ void add_kv(enum llm_kv key, const Container & value, bool per_layer = false);
27
+
28
+ void add_kv(enum llm_kv key, const std::vector<std::string> & value);
29
+
30
+ void add_tensor(const struct lm_ggml_tensor * tensor);
31
+
32
+ void add_kv_from_model();
33
+
34
+ void add_tensors_from_model();
35
+
36
+ void save(const std::string & path_model);
37
+ };