cui-llama.rn 1.6.0 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (285) hide show
  1. package/README.md +35 -7
  2. package/android/src/main/CMakeLists.txt +22 -11
  3. package/android/src/main/java/com/rnllama/LlamaContext.java +42 -6
  4. package/android/src/main/java/com/rnllama/RNLlama.java +139 -4
  5. package/android/src/main/jni.cpp +173 -18
  6. package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
  7. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
  8. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
  9. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
  10. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
  11. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
  12. package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
  13. package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
  14. package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +24 -4
  15. package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +22 -2
  16. package/cpp/LICENSE +21 -0
  17. package/cpp/chat.cpp +129 -107
  18. package/cpp/chat.h +2 -0
  19. package/cpp/common.cpp +58 -78
  20. package/cpp/common.h +29 -21
  21. package/cpp/ggml-alloc.c +4 -1
  22. package/cpp/ggml-backend.cpp +9 -5
  23. package/cpp/ggml-backend.h +4 -4
  24. package/cpp/ggml-cpp.h +1 -1
  25. package/cpp/ggml-cpu/amx/amx.cpp +221 -0
  26. package/cpp/ggml-cpu/amx/amx.h +8 -0
  27. package/cpp/ggml-cpu/amx/common.h +91 -0
  28. package/cpp/ggml-cpu/amx/mmq.cpp +2511 -0
  29. package/cpp/ggml-cpu/amx/mmq.h +10 -0
  30. package/{ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers → cpp/ggml-cpu}/binary-ops.h +1 -1
  31. package/cpp/ggml-cpu/common.h +72 -0
  32. package/cpp/{ggml-cpu-aarch64.cpp → ggml-cpu/ggml-cpu-aarch64.cpp} +809 -103
  33. package/cpp/{ggml-cpu-quants.c → ggml-cpu/ggml-cpu-quants.c} +306 -6
  34. package/cpp/{ggml-cpu.c → ggml-cpu/ggml-cpu.c} +114 -55
  35. package/cpp/{ggml-cpu.cpp → ggml-cpu/ggml-cpu.cpp} +32 -16
  36. package/cpp/{ops.cpp → ggml-cpu/ops.cpp} +353 -173
  37. package/{ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers → cpp/ggml-cpu}/ops.h +2 -20
  38. package/cpp/{sgemm.cpp → ggml-cpu/sgemm.cpp} +501 -0
  39. package/{ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers → cpp/ggml-cpu}/simd-mappings.h +7 -3
  40. package/{ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers → cpp/ggml-cpu}/unary-ops.h +1 -1
  41. package/cpp/{vec.cpp → ggml-cpu/vec.cpp} +0 -6
  42. package/{ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers → cpp/ggml-cpu}/vec.h +16 -0
  43. package/cpp/ggml-cpu.h +5 -0
  44. package/cpp/ggml-impl.h +16 -9
  45. package/cpp/ggml-llama-sim.metallib +0 -0
  46. package/cpp/ggml-llama.metallib +0 -0
  47. package/cpp/ggml-metal-impl.h +36 -11
  48. package/cpp/ggml-metal.m +810 -176
  49. package/cpp/ggml-opt.cpp +373 -190
  50. package/cpp/ggml-opt.h +49 -28
  51. package/cpp/ggml-quants.c +0 -6
  52. package/cpp/ggml.c +227 -282
  53. package/cpp/ggml.h +82 -101
  54. package/cpp/gguf.cpp +33 -33
  55. package/cpp/json-schema-to-grammar.cpp +3 -0
  56. package/cpp/llama-adapter.cpp +6 -0
  57. package/cpp/llama-arch.cpp +49 -17
  58. package/cpp/llama-arch.h +9 -0
  59. package/cpp/llama-batch.cpp +8 -2
  60. package/cpp/llama-batch.h +2 -1
  61. package/cpp/llama-chat.cpp +39 -16
  62. package/cpp/llama-chat.h +4 -2
  63. package/cpp/llama-context.cpp +440 -611
  64. package/cpp/llama-context.h +44 -33
  65. package/cpp/llama-cparams.h +1 -0
  66. package/cpp/llama-graph.cpp +214 -291
  67. package/cpp/llama-graph.h +69 -21
  68. package/cpp/llama-hparams.cpp +17 -1
  69. package/cpp/llama-hparams.h +39 -5
  70. package/cpp/llama-kv-cache.cpp +2067 -620
  71. package/cpp/llama-kv-cache.h +410 -108
  72. package/cpp/llama-memory.h +12 -1
  73. package/cpp/llama-model-loader.cpp +24 -15
  74. package/cpp/llama-model-saver.cpp +281 -0
  75. package/cpp/llama-model-saver.h +37 -0
  76. package/cpp/llama-model.cpp +1089 -359
  77. package/cpp/llama-model.h +19 -3
  78. package/cpp/llama-sampling.cpp +20 -7
  79. package/cpp/llama-vocab.cpp +54 -9
  80. package/cpp/llama-vocab.h +6 -0
  81. package/cpp/llama.cpp +14 -0
  82. package/cpp/llama.h +86 -142
  83. package/cpp/minja/chat-template.hpp +9 -5
  84. package/cpp/minja/minja.hpp +69 -36
  85. package/cpp/rn-llama.cpp +602 -190
  86. package/cpp/rn-llama.h +34 -8
  87. package/cpp/sampling.cpp +57 -50
  88. package/cpp/tools/mtmd/clip-impl.h +462 -0
  89. package/cpp/tools/mtmd/clip.cpp +4024 -0
  90. package/cpp/tools/mtmd/clip.h +101 -0
  91. package/cpp/tools/mtmd/miniaudio.h +93468 -0
  92. package/cpp/tools/mtmd/mtmd-audio.cpp +855 -0
  93. package/cpp/tools/mtmd/mtmd-audio.h +62 -0
  94. package/cpp/tools/mtmd/mtmd-helper.cpp +297 -0
  95. package/cpp/tools/mtmd/mtmd.cpp +942 -0
  96. package/cpp/tools/mtmd/mtmd.h +362 -0
  97. package/cpp/tools/mtmd/stb_image.h +7988 -0
  98. package/ios/CMakeLists.txt +20 -10
  99. package/ios/RNLlama.h +6 -0
  100. package/ios/RNLlama.mm +82 -3
  101. package/ios/RNLlamaContext.h +5 -1
  102. package/ios/RNLlamaContext.mm +131 -38
  103. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat.h +2 -0
  104. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/common.h +29 -21
  105. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-backend.h +4 -4
  106. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpp.h +1 -1
  107. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu.h +5 -0
  108. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-impl.h +16 -9
  109. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-metal-impl.h +36 -11
  110. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-opt.h +49 -28
  111. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml.h +82 -101
  112. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-arch.h +9 -0
  113. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-batch.h +2 -1
  114. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-chat.h +4 -2
  115. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-context.h +44 -33
  116. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-cparams.h +1 -0
  117. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-graph.h +69 -21
  118. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-hparams.h +39 -5
  119. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache.h +410 -108
  120. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory.h +12 -1
  121. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model-saver.h +37 -0
  122. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model.h +19 -3
  123. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-vocab.h +6 -0
  124. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama.h +86 -142
  125. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/chat-template.hpp +9 -5
  126. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/minja.hpp +69 -36
  127. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-llama.h +34 -8
  128. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Info.plist +0 -0
  129. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/ggml-llama.metallib +0 -0
  130. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/rnllama +0 -0
  131. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +2 -0
  132. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +29 -21
  133. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend.h +4 -4
  134. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpp.h +1 -1
  135. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +5 -0
  136. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +16 -9
  137. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal-impl.h +36 -11
  138. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-opt.h +49 -28
  139. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +82 -101
  140. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +9 -0
  141. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +2 -1
  142. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +4 -2
  143. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +44 -33
  144. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +1 -0
  145. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +69 -21
  146. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +39 -5
  147. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +410 -108
  148. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +12 -1
  149. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model-saver.h +37 -0
  150. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +19 -3
  151. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +6 -0
  152. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +86 -142
  153. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +9 -5
  154. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +69 -36
  155. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +34 -8
  156. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Info.plist +0 -0
  157. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/_CodeSignature/CodeResources +1 -1
  158. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
  159. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
  160. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat.h +2 -0
  161. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/common.h +29 -21
  162. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-backend.h +4 -4
  163. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpp.h +1 -1
  164. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu.h +5 -0
  165. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-impl.h +16 -9
  166. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-metal-impl.h +36 -11
  167. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-opt.h +49 -28
  168. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml.h +82 -101
  169. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-arch.h +9 -0
  170. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-batch.h +2 -1
  171. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-chat.h +4 -2
  172. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-context.h +44 -33
  173. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-cparams.h +1 -0
  174. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-graph.h +69 -21
  175. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-hparams.h +39 -5
  176. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache.h +410 -108
  177. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory.h +12 -1
  178. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model-saver.h +37 -0
  179. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model.h +19 -3
  180. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-vocab.h +6 -0
  181. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama.h +86 -142
  182. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/chat-template.hpp +9 -5
  183. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/minja.hpp +69 -36
  184. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-llama.h +34 -8
  185. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Info.plist +0 -0
  186. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/ggml-llama.metallib +0 -0
  187. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/rnllama +0 -0
  188. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +2 -0
  189. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +29 -21
  190. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend.h +4 -4
  191. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpp.h +1 -1
  192. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +5 -0
  193. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +16 -9
  194. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal-impl.h +36 -11
  195. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-opt.h +49 -28
  196. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +82 -101
  197. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +9 -0
  198. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +2 -1
  199. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +4 -2
  200. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +44 -33
  201. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +1 -0
  202. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +69 -21
  203. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +39 -5
  204. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +410 -108
  205. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +12 -1
  206. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model-saver.h +37 -0
  207. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +19 -3
  208. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +6 -0
  209. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +86 -142
  210. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +9 -5
  211. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +69 -36
  212. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +34 -8
  213. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Info.plist +0 -0
  214. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/_CodeSignature/CodeResources +1 -1
  215. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
  216. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
  217. package/jest/mock.js +33 -7
  218. package/lib/commonjs/NativeRNLlama.js.map +1 -1
  219. package/lib/commonjs/index.js +153 -21
  220. package/lib/commonjs/index.js.map +1 -1
  221. package/lib/module/NativeRNLlama.js.map +1 -1
  222. package/lib/module/index.js +152 -20
  223. package/lib/module/index.js.map +1 -1
  224. package/lib/typescript/NativeRNLlama.d.ts +54 -4
  225. package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
  226. package/lib/typescript/index.d.ts +72 -6
  227. package/lib/typescript/index.d.ts.map +1 -1
  228. package/package.json +1 -1
  229. package/src/NativeRNLlama.ts +72 -4
  230. package/src/index.ts +212 -38
  231. package/cpp/binary-ops.h +0 -16
  232. package/cpp/ops.h +0 -128
  233. package/cpp/simd-mappings.h +0 -888
  234. package/cpp/unary-ops.h +0 -28
  235. package/cpp/vec.h +0 -802
  236. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/binary-ops.h +0 -16
  237. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-aarch64.h +0 -8
  238. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-impl.h +0 -512
  239. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-quants.h +0 -63
  240. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-traits.h +0 -38
  241. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ops.h +0 -128
  242. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/sgemm.h +0 -14
  243. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/simd-mappings.h +0 -888
  244. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/vec.h +0 -802
  245. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-aarch64.h +0 -8
  246. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-impl.h +0 -512
  247. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-quants.h +0 -63
  248. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-traits.h +0 -38
  249. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/sgemm.h +0 -14
  250. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/unary-ops.h +0 -28
  251. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/vec.h +0 -802
  252. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/binary-ops.h +0 -16
  253. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-aarch64.h +0 -8
  254. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-impl.h +0 -512
  255. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-quants.h +0 -63
  256. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-traits.h +0 -38
  257. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ops.h +0 -128
  258. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/sgemm.h +0 -14
  259. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/simd-mappings.h +0 -888
  260. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/unary-ops.h +0 -28
  261. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/binary-ops.h +0 -16
  262. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-aarch64.h +0 -8
  263. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-impl.h +0 -512
  264. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-quants.h +0 -63
  265. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-traits.h +0 -38
  266. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ops.h +0 -128
  267. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/sgemm.h +0 -14
  268. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/simd-mappings.h +0 -888
  269. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/unary-ops.h +0 -28
  270. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/vec.h +0 -802
  271. package/lib/commonjs/chat.js +0 -37
  272. package/lib/commonjs/chat.js.map +0 -1
  273. package/lib/module/chat.js +0 -33
  274. package/lib/module/chat.js.map +0 -1
  275. package/lib/typescript/chat.d.ts +0 -10
  276. package/lib/typescript/chat.d.ts.map +0 -1
  277. package/src/chat.ts +0 -44
  278. /package/cpp/{binary-ops.cpp → ggml-cpu/binary-ops.cpp} +0 -0
  279. /package/cpp/{ggml-cpu-aarch64.h → ggml-cpu/ggml-cpu-aarch64.h} +0 -0
  280. /package/cpp/{ggml-cpu-impl.h → ggml-cpu/ggml-cpu-impl.h} +0 -0
  281. /package/cpp/{ggml-cpu-quants.h → ggml-cpu/ggml-cpu-quants.h} +0 -0
  282. /package/cpp/{ggml-cpu-traits.cpp → ggml-cpu/ggml-cpu-traits.cpp} +0 -0
  283. /package/cpp/{ggml-cpu-traits.h → ggml-cpu/ggml-cpu-traits.h} +0 -0
  284. /package/cpp/{sgemm.h → ggml-cpu/sgemm.h} +0 -0
  285. /package/cpp/{unary-ops.cpp → ggml-cpu/unary-ops.cpp} +0 -0
package/cpp/ggml.h CHANGED
@@ -394,8 +394,8 @@ extern "C" {
394
394
 
395
395
  // precision
396
396
  enum lm_ggml_prec {
397
- LM_GGML_PREC_DEFAULT,
398
- LM_GGML_PREC_F32,
397
+ LM_GGML_PREC_DEFAULT = 0, // stored as lm_ggml_tensor.op_params, 0 by default
398
+ LM_GGML_PREC_F32 = 10,
399
399
  };
400
400
 
401
401
  // model file types
@@ -482,6 +482,7 @@ extern "C" {
482
482
  LM_GGML_OP_CONV_TRANSPOSE_1D,
483
483
  LM_GGML_OP_IM2COL,
484
484
  LM_GGML_OP_IM2COL_BACK,
485
+ LM_GGML_OP_CONV_2D_DW,
485
486
  LM_GGML_OP_CONV_TRANSPOSE_2D,
486
487
  LM_GGML_OP_POOL_1D,
487
488
  LM_GGML_OP_POOL_2D,
@@ -508,17 +509,12 @@ extern "C" {
508
509
 
509
510
  LM_GGML_OP_UNARY,
510
511
 
511
- LM_GGML_OP_MAP_UNARY,
512
- LM_GGML_OP_MAP_BINARY,
513
-
514
- LM_GGML_OP_MAP_CUSTOM1_F32,
515
- LM_GGML_OP_MAP_CUSTOM2_F32,
516
- LM_GGML_OP_MAP_CUSTOM3_F32,
517
-
518
512
  LM_GGML_OP_MAP_CUSTOM1,
519
513
  LM_GGML_OP_MAP_CUSTOM2,
520
514
  LM_GGML_OP_MAP_CUSTOM3,
521
515
 
516
+ LM_GGML_OP_CUSTOM,
517
+
522
518
  LM_GGML_OP_CROSS_ENTROPY_LOSS,
523
519
  LM_GGML_OP_CROSS_ENTROPY_LOSS_BACK,
524
520
  LM_GGML_OP_OPT_STEP_ADAMW,
@@ -541,6 +537,7 @@ extern "C" {
541
537
  LM_GGML_UNARY_OP_HARDSWISH,
542
538
  LM_GGML_UNARY_OP_HARDSIGMOID,
543
539
  LM_GGML_UNARY_OP_EXP,
540
+ LM_GGML_UNARY_OP_GELU_ERF,
544
541
 
545
542
  LM_GGML_UNARY_OP_COUNT,
546
543
  };
@@ -678,11 +675,18 @@ extern "C" {
678
675
  LM_GGML_API bool lm_ggml_is_3d (const struct lm_ggml_tensor * tensor);
679
676
  LM_GGML_API int lm_ggml_n_dims (const struct lm_ggml_tensor * tensor); // returns 1 for scalars
680
677
 
678
+ // returns whether the tensor elements can be iterated over with a flattened index (no gaps, no permutation)
681
679
  LM_GGML_API bool lm_ggml_is_contiguous (const struct lm_ggml_tensor * tensor);
682
680
  LM_GGML_API bool lm_ggml_is_contiguous_0(const struct lm_ggml_tensor * tensor); // same as lm_ggml_is_contiguous()
683
681
  LM_GGML_API bool lm_ggml_is_contiguous_1(const struct lm_ggml_tensor * tensor); // contiguous for dims >= 1
684
682
  LM_GGML_API bool lm_ggml_is_contiguous_2(const struct lm_ggml_tensor * tensor); // contiguous for dims >= 2
685
683
 
684
+ // returns whether the tensor elements are allocated as one contiguous block of memory (no gaps, but permutation ok)
685
+ LM_GGML_API bool lm_ggml_is_contiguously_allocated(const struct lm_ggml_tensor * tensor);
686
+
687
+ // true for tensor that is stored in memory as CxWxHxN and has been permuted to WxHxCxN
688
+ LM_GGML_API bool lm_ggml_is_contiguous_channels(const struct lm_ggml_tensor * tensor);
689
+
686
690
  LM_GGML_API bool lm_ggml_are_same_shape (const struct lm_ggml_tensor * t0, const struct lm_ggml_tensor * t1);
687
691
  LM_GGML_API bool lm_ggml_are_same_stride(const struct lm_ggml_tensor * t0, const struct lm_ggml_tensor * t1);
688
692
 
@@ -766,7 +770,7 @@ extern "C" {
766
770
  // Tensor flags
767
771
  LM_GGML_API void lm_ggml_set_input(struct lm_ggml_tensor * tensor);
768
772
  LM_GGML_API void lm_ggml_set_output(struct lm_ggml_tensor * tensor);
769
- LM_GGML_API void lm_ggml_set_param(struct lm_ggml_context * ctx, struct lm_ggml_tensor * tensor);
773
+ LM_GGML_API void lm_ggml_set_param(struct lm_ggml_tensor * tensor);
770
774
  LM_GGML_API void lm_ggml_set_loss(struct lm_ggml_tensor * tensor);
771
775
 
772
776
  //
@@ -936,7 +940,7 @@ extern "C" {
936
940
  LM_GGML_API struct lm_ggml_tensor * lm_ggml_repeat_back(
937
941
  struct lm_ggml_context * ctx,
938
942
  struct lm_ggml_tensor * a,
939
- struct lm_ggml_tensor * b);
943
+ struct lm_ggml_tensor * b); // sum up values that are adjacent in dims > 0 instead of repeated with same stride
940
944
 
941
945
  // concat a and b along dim
942
946
  // used in stable-diffusion
@@ -1022,6 +1026,16 @@ extern "C" {
1022
1026
  struct lm_ggml_context * ctx,
1023
1027
  struct lm_ggml_tensor * a);
1024
1028
 
1029
+ // GELU using erf (error function) when possible
1030
+ // some backends may fallback to approximation based on Abramowitz and Stegun formula
1031
+ LM_GGML_API struct lm_ggml_tensor * lm_ggml_gelu_erf(
1032
+ struct lm_ggml_context * ctx,
1033
+ struct lm_ggml_tensor * a);
1034
+
1035
+ LM_GGML_API struct lm_ggml_tensor * lm_ggml_gelu_erf_inplace(
1036
+ struct lm_ggml_context * ctx,
1037
+ struct lm_ggml_tensor * a);
1038
+
1025
1039
  LM_GGML_API struct lm_ggml_tensor * lm_ggml_gelu_quick(
1026
1040
  struct lm_ggml_context * ctx,
1027
1041
  struct lm_ggml_tensor * a);
@@ -1666,7 +1680,7 @@ extern "C" {
1666
1680
  struct lm_ggml_tensor * a,
1667
1681
  struct lm_ggml_tensor * b);
1668
1682
 
1669
- // depthwise
1683
+ // depthwise (via im2col and mul_mat)
1670
1684
  LM_GGML_API struct lm_ggml_tensor * lm_ggml_conv_2d_dw(
1671
1685
  struct lm_ggml_context * ctx,
1672
1686
  struct lm_ggml_tensor * a, // convolution kernel
@@ -1678,6 +1692,22 @@ extern "C" {
1678
1692
  int d0, // dilation dimension 0
1679
1693
  int d1); // dilation dimension 1
1680
1694
 
1695
+ // Depthwise 2D convolution
1696
+ // may be faster than lm_ggml_conv_2d_dw, but not available in all backends
1697
+ // a: KW KH 1 C convolution kernel
1698
+ // b: W H C N input data
1699
+ // res: W_out H_out C N
1700
+ LM_GGML_API struct lm_ggml_tensor * lm_ggml_conv_2d_dw_direct(
1701
+ struct lm_ggml_context * ctx,
1702
+ struct lm_ggml_tensor * a,
1703
+ struct lm_ggml_tensor * b,
1704
+ int stride0,
1705
+ int stride1,
1706
+ int pad0,
1707
+ int pad1,
1708
+ int dilation0,
1709
+ int dilation1);
1710
+
1681
1711
  LM_GGML_API struct lm_ggml_tensor * lm_ggml_conv_transpose_2d_p0(
1682
1712
  struct lm_ggml_context * ctx,
1683
1713
  struct lm_ggml_tensor * a,
@@ -1723,24 +1753,29 @@ extern "C" {
1723
1753
  float p0,
1724
1754
  float p1);
1725
1755
 
1726
- // nearest interpolate
1756
+ enum lm_ggml_scale_mode {
1757
+ LM_GGML_SCALE_MODE_NEAREST = 0,
1758
+ LM_GGML_SCALE_MODE_BILINEAR = 1,
1759
+ };
1760
+
1761
+ // interpolate
1727
1762
  // multiplies ne0 and ne1 by scale factor
1728
- // used in stable-diffusion
1729
1763
  LM_GGML_API struct lm_ggml_tensor * lm_ggml_upscale(
1730
1764
  struct lm_ggml_context * ctx,
1731
1765
  struct lm_ggml_tensor * a,
1732
- int scale_factor);
1766
+ int scale_factor,
1767
+ enum lm_ggml_scale_mode mode);
1733
1768
 
1734
- // nearest interpolate
1735
- // nearest interpolate to specified dimensions
1736
- // used in tortoise.cpp
1769
+ // interpolate
1770
+ // interpolate scale to specified dimensions
1737
1771
  LM_GGML_API struct lm_ggml_tensor * lm_ggml_upscale_ext(
1738
1772
  struct lm_ggml_context * ctx,
1739
1773
  struct lm_ggml_tensor * a,
1740
1774
  int ne0,
1741
1775
  int ne1,
1742
1776
  int ne2,
1743
- int ne3);
1777
+ int ne3,
1778
+ enum lm_ggml_scale_mode mode);
1744
1779
 
1745
1780
  // pad each dimension with zeros: [x, ..., x] -> [x, ..., x, 0, ..., 0]
1746
1781
  LM_GGML_API struct lm_ggml_tensor * lm_ggml_pad(
@@ -1917,83 +1952,6 @@ extern "C" {
1917
1952
 
1918
1953
  // custom operators
1919
1954
 
1920
- typedef void (*lm_ggml_unary_op_f32_t) (const int, float *, const float *);
1921
- typedef void (*lm_ggml_binary_op_f32_t)(const int, float *, const float *, const float *);
1922
-
1923
- typedef void (*lm_ggml_custom1_op_f32_t)(struct lm_ggml_tensor *, const struct lm_ggml_tensor *);
1924
- typedef void (*lm_ggml_custom2_op_f32_t)(struct lm_ggml_tensor *, const struct lm_ggml_tensor *, const struct lm_ggml_tensor *);
1925
- typedef void (*lm_ggml_custom3_op_f32_t)(struct lm_ggml_tensor *, const struct lm_ggml_tensor *, const struct lm_ggml_tensor *, const struct lm_ggml_tensor *);
1926
-
1927
- LM_GGML_DEPRECATED(LM_GGML_API struct lm_ggml_tensor * lm_ggml_map_unary_f32(
1928
- struct lm_ggml_context * ctx,
1929
- struct lm_ggml_tensor * a,
1930
- lm_ggml_unary_op_f32_t fun),
1931
- "use lm_ggml_map_custom1 instead");
1932
-
1933
- LM_GGML_DEPRECATED(LM_GGML_API struct lm_ggml_tensor * lm_ggml_map_unary_inplace_f32(
1934
- struct lm_ggml_context * ctx,
1935
- struct lm_ggml_tensor * a,
1936
- lm_ggml_unary_op_f32_t fun),
1937
- "use lm_ggml_map_custom1_inplace instead");
1938
-
1939
- LM_GGML_DEPRECATED(LM_GGML_API struct lm_ggml_tensor * lm_ggml_map_binary_f32(
1940
- struct lm_ggml_context * ctx,
1941
- struct lm_ggml_tensor * a,
1942
- struct lm_ggml_tensor * b,
1943
- lm_ggml_binary_op_f32_t fun),
1944
- "use lm_ggml_map_custom2 instead");
1945
-
1946
- LM_GGML_DEPRECATED(LM_GGML_API struct lm_ggml_tensor * lm_ggml_map_binary_inplace_f32(
1947
- struct lm_ggml_context * ctx,
1948
- struct lm_ggml_tensor * a,
1949
- struct lm_ggml_tensor * b,
1950
- lm_ggml_binary_op_f32_t fun),
1951
- "use lm_ggml_map_custom2_inplace instead");
1952
-
1953
- LM_GGML_DEPRECATED(LM_GGML_API struct lm_ggml_tensor * lm_ggml_map_custom1_f32(
1954
- struct lm_ggml_context * ctx,
1955
- struct lm_ggml_tensor * a,
1956
- lm_ggml_custom1_op_f32_t fun),
1957
- "use lm_ggml_map_custom1 instead");
1958
-
1959
- LM_GGML_DEPRECATED(LM_GGML_API struct lm_ggml_tensor * lm_ggml_map_custom1_inplace_f32(
1960
- struct lm_ggml_context * ctx,
1961
- struct lm_ggml_tensor * a,
1962
- lm_ggml_custom1_op_f32_t fun),
1963
- "use lm_ggml_map_custom1_inplace instead");
1964
-
1965
- LM_GGML_DEPRECATED(LM_GGML_API struct lm_ggml_tensor * lm_ggml_map_custom2_f32(
1966
- struct lm_ggml_context * ctx,
1967
- struct lm_ggml_tensor * a,
1968
- struct lm_ggml_tensor * b,
1969
- lm_ggml_custom2_op_f32_t fun),
1970
- "use lm_ggml_map_custom2 instead");
1971
-
1972
- LM_GGML_DEPRECATED(LM_GGML_API struct lm_ggml_tensor * lm_ggml_map_custom2_inplace_f32(
1973
- struct lm_ggml_context * ctx,
1974
- struct lm_ggml_tensor * a,
1975
- struct lm_ggml_tensor * b,
1976
- lm_ggml_custom2_op_f32_t fun),
1977
- "use lm_ggml_map_custom2_inplace instead");
1978
-
1979
- LM_GGML_DEPRECATED(LM_GGML_API struct lm_ggml_tensor * lm_ggml_map_custom3_f32(
1980
- struct lm_ggml_context * ctx,
1981
- struct lm_ggml_tensor * a,
1982
- struct lm_ggml_tensor * b,
1983
- struct lm_ggml_tensor * c,
1984
- lm_ggml_custom3_op_f32_t fun),
1985
- "use lm_ggml_map_custom3 instead");
1986
-
1987
- LM_GGML_DEPRECATED(LM_GGML_API struct lm_ggml_tensor * lm_ggml_map_custom3_inplace_f32(
1988
- struct lm_ggml_context * ctx,
1989
- struct lm_ggml_tensor * a,
1990
- struct lm_ggml_tensor * b,
1991
- struct lm_ggml_tensor * c,
1992
- lm_ggml_custom3_op_f32_t fun),
1993
- "use lm_ggml_map_custom3_inplace instead");
1994
-
1995
- // custom operators v2
1996
-
1997
1955
  typedef void (*lm_ggml_custom1_op_t)(struct lm_ggml_tensor * dst , const struct lm_ggml_tensor * a, int ith, int nth, void * userdata);
1998
1956
  typedef void (*lm_ggml_custom2_op_t)(struct lm_ggml_tensor * dst , const struct lm_ggml_tensor * a, const struct lm_ggml_tensor * b, int ith, int nth, void * userdata);
1999
1957
  typedef void (*lm_ggml_custom3_op_t)(struct lm_ggml_tensor * dst , const struct lm_ggml_tensor * a, const struct lm_ggml_tensor * b, const struct lm_ggml_tensor * c, int ith, int nth, void * userdata);
@@ -2049,6 +2007,30 @@ extern "C" {
2049
2007
  int n_tasks,
2050
2008
  void * userdata);
2051
2009
 
2010
+ typedef void (*lm_ggml_custom_op_t)(struct lm_ggml_tensor * dst , int ith, int nth, void * userdata);
2011
+
2012
+ LM_GGML_API struct lm_ggml_tensor * lm_ggml_custom_4d(
2013
+ struct lm_ggml_context * ctx,
2014
+ enum lm_ggml_type type,
2015
+ int64_t ne0,
2016
+ int64_t ne1,
2017
+ int64_t ne2,
2018
+ int64_t ne3,
2019
+ struct lm_ggml_tensor ** args,
2020
+ int n_args,
2021
+ lm_ggml_custom_op_t fun,
2022
+ int n_tasks,
2023
+ void * userdata);
2024
+
2025
+ LM_GGML_API struct lm_ggml_tensor * lm_ggml_custom_inplace(
2026
+ struct lm_ggml_context * ctx,
2027
+ struct lm_ggml_tensor * a,
2028
+ struct lm_ggml_tensor ** args,
2029
+ int n_args,
2030
+ lm_ggml_custom_op_t fun,
2031
+ int n_tasks,
2032
+ void * userdata);
2033
+
2052
2034
  // loss function
2053
2035
 
2054
2036
  LM_GGML_API struct lm_ggml_tensor * lm_ggml_cross_entropy_loss(
@@ -2079,15 +2061,14 @@ extern "C" {
2079
2061
 
2080
2062
  LM_GGML_API void lm_ggml_build_forward_expand(struct lm_ggml_cgraph * cgraph, struct lm_ggml_tensor * tensor);
2081
2063
  LM_GGML_API void lm_ggml_build_backward_expand(
2082
- struct lm_ggml_context * ctx_static, // context for static gradients (loss + gradient accumulation)
2083
- struct lm_ggml_context * ctx_compute, // context for gradient computation
2084
- struct lm_ggml_cgraph * cgraph,
2085
- bool accumulate); // whether or not gradients should be accumulated, requires static allocation of tensors in ctx_static
2064
+ struct lm_ggml_context * ctx, // context for gradient computation
2065
+ struct lm_ggml_cgraph * cgraph,
2066
+ struct lm_ggml_tensor ** grad_accs);
2086
2067
 
2087
2068
  // graph allocation in a context
2088
2069
  LM_GGML_API struct lm_ggml_cgraph * lm_ggml_new_graph (struct lm_ggml_context * ctx); // size = LM_GGML_DEFAULT_GRAPH_SIZE, grads = false
2089
2070
  LM_GGML_API struct lm_ggml_cgraph * lm_ggml_new_graph_custom(struct lm_ggml_context * ctx, size_t size, bool grads);
2090
- LM_GGML_API struct lm_ggml_cgraph * lm_ggml_graph_dup (struct lm_ggml_context * ctx, struct lm_ggml_cgraph * cgraph);
2071
+ LM_GGML_API struct lm_ggml_cgraph * lm_ggml_graph_dup (struct lm_ggml_context * ctx, struct lm_ggml_cgraph * cgraph, bool force_grads);
2091
2072
  LM_GGML_API void lm_ggml_graph_cpy (struct lm_ggml_cgraph * src, struct lm_ggml_cgraph * dst);
2092
2073
  LM_GGML_API void lm_ggml_graph_reset (struct lm_ggml_cgraph * cgraph); // set regular grads + optimizer momenta to 0, set loss grad to 1
2093
2074
  LM_GGML_API void lm_ggml_graph_clear (struct lm_ggml_cgraph * cgraph);
package/cpp/gguf.cpp CHANGED
@@ -299,10 +299,10 @@ bool lm_gguf_read_emplace_helper(const struct lm_gguf_reader & gr, std::vector<s
299
299
  return false;
300
300
  }
301
301
  } catch (std::length_error &) {
302
- fprintf(stderr, "%s: encountered length_error while reading value for key '%s'\n", __func__, key.c_str());
302
+ LM_GGML_LOG_ERROR("%s: encountered length_error while reading value for key '%s'\n", __func__, key.c_str());
303
303
  return false;
304
304
  } catch (std::bad_alloc &) {
305
- fprintf(stderr, "%s: encountered bad_alloc error while reading value for key '%s'\n", __func__, key.c_str());
305
+ LM_GGML_LOG_ERROR("%s: encountered bad_alloc error while reading value for key '%s'\n", __func__, key.c_str());
306
306
  return false;
307
307
  }
308
308
  kv.emplace_back(key, value);
@@ -328,14 +328,14 @@ struct lm_gguf_context * lm_gguf_init_from_file_impl(FILE * file, struct lm_gguf
328
328
  ok = ok && gr.read(magic, 4);
329
329
 
330
330
  if (!ok) {
331
- fprintf(stderr, "%s: failed to read magic\n", __func__);
331
+ LM_GGML_LOG_ERROR("%s: failed to read magic\n", __func__);
332
332
  lm_gguf_free(ctx);
333
333
  return nullptr;
334
334
  }
335
335
 
336
336
  for (uint32_t i = 0; i < magic.size(); i++) {
337
337
  if (magic[i] != LM_GGUF_MAGIC[i]) {
338
- fprintf(stderr, "%s: invalid magic characters: '%c%c%c%c', expected 'GGUF'\n", __func__, magic[0], magic[1], magic[2], magic[3]);
338
+ LM_GGML_LOG_ERROR("%s: invalid magic characters: '%c%c%c%c', expected 'GGUF'\n", __func__, magic[0], magic[1], magic[2], magic[3]);
339
339
  lm_gguf_free(ctx);
340
340
  return nullptr;
341
341
  }
@@ -348,11 +348,11 @@ struct lm_gguf_context * lm_gguf_init_from_file_impl(FILE * file, struct lm_gguf
348
348
 
349
349
  if (ok && gr.read(ctx->version)) {
350
350
  if (ctx->version == 1) {
351
- fprintf(stderr, "%s: GGUFv1 is no longer supported, please use a more up-to-date version\n", __func__);
351
+ LM_GGML_LOG_ERROR("%s: GGUFv1 is no longer supported, please use a more up-to-date version\n", __func__);
352
352
  ok = false;
353
353
  }
354
354
  if (ctx->version > LM_GGUF_VERSION) {
355
- fprintf(stderr, "%s: this GGUF file is version %" PRIu32 " but this software only supports up to version %d\n",
355
+ LM_GGML_LOG_ERROR("%s: this GGUF file is version %" PRIu32 " but this software only supports up to version %d\n",
356
356
  __func__, ctx->version, LM_GGUF_VERSION);
357
357
  ok = false;
358
358
  }
@@ -363,7 +363,7 @@ struct lm_gguf_context * lm_gguf_init_from_file_impl(FILE * file, struct lm_gguf
363
363
  if (ok && gr.read(n_tensors)) {
364
364
  static_assert(sizeof(size_t) <= 8 && sizeof(lm_gguf_tensor_info) >= 2, "int64_t insufficient for indexing");
365
365
  if (n_tensors < 0 || n_tensors > int64_t(SIZE_MAX/sizeof(lm_gguf_tensor_info))) {
366
- fprintf(stderr, "%s: number of tensors is %" PRIi64 " but must be in [0, %zu]\n",
366
+ LM_GGML_LOG_ERROR("%s: number of tensors is %" PRIi64 " but must be in [0, %zu]\n",
367
367
  __func__, n_tensors, SIZE_MAX/sizeof(lm_gguf_tensor_info));
368
368
  ok = false;
369
369
  }
@@ -374,7 +374,7 @@ struct lm_gguf_context * lm_gguf_init_from_file_impl(FILE * file, struct lm_gguf
374
374
  if (ok && gr.read(n_kv)) {
375
375
  static_assert(sizeof(size_t) <= 8 && sizeof(lm_gguf_tensor_info) >= 2, "int64_t insufficient for indexing");
376
376
  if (n_kv < 0 || n_kv > int64_t(SIZE_MAX/sizeof(lm_gguf_kv))) {
377
- fprintf(stderr, "%s: number of key value pairs is %" PRIi64 " but must be in [0, %zu]\n",
377
+ LM_GGML_LOG_ERROR("%s: number of key value pairs is %" PRIi64 " but must be in [0, %zu]\n",
378
378
  __func__, n_kv, SIZE_MAX/sizeof(lm_gguf_kv));
379
379
  ok = false;
380
380
  }
@@ -383,7 +383,7 @@ struct lm_gguf_context * lm_gguf_init_from_file_impl(FILE * file, struct lm_gguf
383
383
  }
384
384
 
385
385
  if (!ok) {
386
- fprintf(stderr, "%s: failed to read header\n", __func__);
386
+ LM_GGML_LOG_ERROR("%s: failed to read header\n", __func__);
387
387
  lm_gguf_free(ctx);
388
388
  return nullptr;
389
389
  }
@@ -399,15 +399,15 @@ struct lm_gguf_context * lm_gguf_init_from_file_impl(FILE * file, struct lm_gguf
399
399
  try {
400
400
  ok = ok && gr.read(key);
401
401
  } catch (std::length_error &) {
402
- fprintf(stderr, "%s: encountered length_error while reading key %" PRIi64 "\n", __func__, i);
402
+ LM_GGML_LOG_ERROR("%s: encountered length_error while reading key %" PRIi64 "\n", __func__, i);
403
403
  ok = false;
404
404
  } catch (std::bad_alloc &) {
405
- fprintf(stderr, "%s: encountered bad_alloc error while reading key %" PRIi64 "\n", __func__, i);
405
+ LM_GGML_LOG_ERROR("%s: encountered bad_alloc error while reading key %" PRIi64 "\n", __func__, i);
406
406
  ok = false;
407
407
  }
408
408
  for (size_t j = 0; ok && j < ctx->kv.size(); ++j) {
409
409
  if (key == ctx->kv[j].key) {
410
- fprintf(stderr, "%s: duplicate key '%s' for tensors %zu and %" PRIi64 " \n", __func__, key.c_str(), j, i);
410
+ LM_GGML_LOG_ERROR("%s: duplicate key '%s' for tensors %zu and %" PRIi64 " \n", __func__, key.c_str(), j, i);
411
411
  ok = false;
412
412
  }
413
413
  }
@@ -441,14 +441,14 @@ struct lm_gguf_context * lm_gguf_init_from_file_impl(FILE * file, struct lm_gguf
441
441
  case LM_GGUF_TYPE_ARRAY:
442
442
  default:
443
443
  {
444
- fprintf(stderr, "%s: key '%s' has invalid GGUF type %d\n", __func__, key.c_str(), type);
444
+ LM_GGML_LOG_ERROR("%s: key '%s' has invalid GGUF type %d\n", __func__, key.c_str(), type);
445
445
  ok = false;
446
446
  } break;
447
447
  }
448
448
  }
449
449
 
450
450
  if (!ok) {
451
- fprintf(stderr, "%s: failed to read key-value pairs\n", __func__);
451
+ LM_GGML_LOG_ERROR("%s: failed to read key-value pairs\n", __func__);
452
452
  lm_gguf_free(ctx);
453
453
  return nullptr;
454
454
  }
@@ -458,7 +458,7 @@ struct lm_gguf_context * lm_gguf_init_from_file_impl(FILE * file, struct lm_gguf
458
458
  ctx->alignment = alignment_idx == -1 ? LM_GGUF_DEFAULT_ALIGNMENT : lm_gguf_get_val_u32(ctx, alignment_idx);
459
459
 
460
460
  if (ctx->alignment == 0 || (ctx->alignment & (ctx->alignment - 1)) != 0) {
461
- fprintf(stderr, "%s: alignment %zu is not a power of 2\n", __func__, ctx->alignment);
461
+ LM_GGML_LOG_ERROR("%s: alignment %zu is not a power of 2\n", __func__, ctx->alignment);
462
462
  lm_gguf_free(ctx);
463
463
  return nullptr;
464
464
  }
@@ -474,14 +474,14 @@ struct lm_gguf_context * lm_gguf_init_from_file_impl(FILE * file, struct lm_gguf
474
474
  try {
475
475
  ok = ok && gr.read(name);
476
476
  } catch (std::length_error &) {
477
- fprintf(stderr, "%s: encountered length_error while reading tensor name %" PRIi64 "\n", __func__, i);
477
+ LM_GGML_LOG_ERROR("%s: encountered length_error while reading tensor name %" PRIi64 "\n", __func__, i);
478
478
  ok = false;
479
479
  } catch (std::bad_alloc &) {
480
- fprintf(stderr, "%s: encountered bad_alloc error while reading tensor name %" PRIi64 "\n", __func__, i);
480
+ LM_GGML_LOG_ERROR("%s: encountered bad_alloc error while reading tensor name %" PRIi64 "\n", __func__, i);
481
481
  ok = false;
482
482
  }
483
483
  if (name.length() >= LM_GGML_MAX_NAME) {
484
- fprintf(stderr, "%s: tensor name %" PRIi64 " is too long: %zu >= %d\n", __func__, i, name.length(), LM_GGML_MAX_NAME);
484
+ LM_GGML_LOG_ERROR("%s: tensor name %" PRIi64 " is too long: %zu >= %d\n", __func__, i, name.length(), LM_GGML_MAX_NAME);
485
485
  ok = false;
486
486
  break;
487
487
  }
@@ -490,7 +490,7 @@ struct lm_gguf_context * lm_gguf_init_from_file_impl(FILE * file, struct lm_gguf
490
490
  // make sure there are no duplicate tensor names
491
491
  for (int64_t j = 0; ok && j < i; ++j) {
492
492
  if (strcmp(info.t.name, ctx->info[j].t.name) == 0) {
493
- fprintf(stderr, "%s: duplicate tensor name '%s' for tensors %" PRIi64 " and %" PRIi64 "\n", __func__, info.t.name, j, i);
493
+ LM_GGML_LOG_ERROR("%s: duplicate tensor name '%s' for tensors %" PRIi64 " and %" PRIi64 "\n", __func__, info.t.name, j, i);
494
494
  ok = false;
495
495
  break;
496
496
  }
@@ -505,7 +505,7 @@ struct lm_gguf_context * lm_gguf_init_from_file_impl(FILE * file, struct lm_gguf
505
505
  uint32_t n_dims = -1;
506
506
  ok = ok && gr.read(n_dims);
507
507
  if (n_dims > LM_GGML_MAX_DIMS) {
508
- fprintf(stderr, "%s: tensor '%s' has invalid number of dimensions: %" PRIu32 " > %" PRIu32 "\n",
508
+ LM_GGML_LOG_ERROR("%s: tensor '%s' has invalid number of dimensions: %" PRIu32 " > %" PRIu32 "\n",
509
509
  __func__, info.t.name, n_dims, LM_GGML_MAX_DIMS);
510
510
  ok = false;
511
511
  break;
@@ -518,7 +518,7 @@ struct lm_gguf_context * lm_gguf_init_from_file_impl(FILE * file, struct lm_gguf
518
518
 
519
519
  // check that all ne are non-negative
520
520
  if (info.t.ne[j] < 0) {
521
- fprintf(stderr, "%s: tensor '%s' dimension %" PRIu32 " has invalid number of elements: %" PRIi64 " < 0\n",
521
+ LM_GGML_LOG_ERROR("%s: tensor '%s' dimension %" PRIu32 " has invalid number of elements: %" PRIi64 " < 0\n",
522
522
  __func__, info.t.name, j, info.t.ne[j]);
523
523
  ok = false;
524
524
  break;
@@ -530,7 +530,7 @@ struct lm_gguf_context * lm_gguf_init_from_file_impl(FILE * file, struct lm_gguf
530
530
  (INT64_MAX/info.t.ne[2] <= info.t.ne[0]*info.t.ne[1]) ||
531
531
  (INT64_MAX/info.t.ne[3] <= info.t.ne[0]*info.t.ne[1]*info.t.ne[2]))) {
532
532
 
533
- fprintf(stderr, "%s: total number of elements in tensor '%s' with shape "
533
+ LM_GGML_LOG_ERROR("%s: total number of elements in tensor '%s' with shape "
534
534
  "(%" PRIi64 ", %" PRIi64 ", %" PRIi64 ", %" PRIi64 ") is >= %" PRIi64 "\n",
535
535
  __func__, info.t.name, info.t.ne[0], info.t.ne[1], info.t.ne[2], info.t.ne[3], INT64_MAX);
536
536
  ok = false;
@@ -547,7 +547,7 @@ struct lm_gguf_context * lm_gguf_init_from_file_impl(FILE * file, struct lm_gguf
547
547
 
548
548
  // check that tensor type is within defined range
549
549
  if (info.t.type < 0 || info.t.type >= LM_GGML_TYPE_COUNT) {
550
- fprintf(stderr, "%s: tensor '%s' has invalid ggml type %d (%s)\n",
550
+ LM_GGML_LOG_ERROR("%s: tensor '%s' has invalid ggml type %d (%s)\n",
551
551
  __func__, info.t.name, info.t.type, lm_ggml_type_name(info.t.type));
552
552
  ok = false;
553
553
  break;
@@ -557,7 +557,7 @@ struct lm_gguf_context * lm_gguf_init_from_file_impl(FILE * file, struct lm_gguf
557
557
 
558
558
  // check that row size is divisible by block size
559
559
  if (blck_size == 0 || info.t.ne[0] % blck_size != 0) {
560
- fprintf(stderr, "%s: tensor '%s' of type %d (%s) has %" PRId64 " elements per row, "
560
+ LM_GGML_LOG_ERROR("%s: tensor '%s' of type %d (%s) has %" PRId64 " elements per row, "
561
561
  "not a multiple of block size (%" PRId64 ")\n",
562
562
  __func__, info.t.name, (int) info.t.type, lm_ggml_type_name(info.t.type), info.t.ne[0], blck_size);
563
563
  ok = false;
@@ -582,7 +582,7 @@ struct lm_gguf_context * lm_gguf_init_from_file_impl(FILE * file, struct lm_gguf
582
582
  }
583
583
 
584
584
  if (!ok) {
585
- fprintf(stderr, "%s: failed to read tensor info\n", __func__);
585
+ LM_GGML_LOG_ERROR("%s: failed to read tensor info\n", __func__);
586
586
  lm_gguf_free(ctx);
587
587
  return nullptr;
588
588
  }
@@ -590,7 +590,7 @@ struct lm_gguf_context * lm_gguf_init_from_file_impl(FILE * file, struct lm_gguf
590
590
 
591
591
  // we require the data section to be aligned, so take into account any padding
592
592
  if (fseek(file, LM_GGML_PAD(ftell(file), ctx->alignment), SEEK_SET) != 0) {
593
- fprintf(stderr, "%s: failed to seek to beginning of data section\n", __func__);
593
+ LM_GGML_LOG_ERROR("%s: failed to seek to beginning of data section\n", __func__);
594
594
  lm_gguf_free(ctx);
595
595
  return nullptr;
596
596
  }
@@ -604,9 +604,9 @@ struct lm_gguf_context * lm_gguf_init_from_file_impl(FILE * file, struct lm_gguf
604
604
  for (size_t i = 0; i < ctx->info.size(); ++i) {
605
605
  const lm_gguf_tensor_info & ti = ctx->info[i];
606
606
  if (ti.offset != ctx->size) {
607
- fprintf(stderr, "%s: tensor '%s' has offset %" PRIu64 ", expected %zu\n",
607
+ LM_GGML_LOG_ERROR("%s: tensor '%s' has offset %" PRIu64 ", expected %zu\n",
608
608
  __func__, ti.t.name, ti.offset, ctx->size);
609
- fprintf(stderr, "%s: failed to read tensor data\n", __func__);
609
+ LM_GGML_LOG_ERROR("%s: failed to read tensor data\n", __func__);
610
610
  lm_gguf_free(ctx);
611
611
  return nullptr;
612
612
  }
@@ -634,7 +634,7 @@ struct lm_gguf_context * lm_gguf_init_from_file_impl(FILE * file, struct lm_gguf
634
634
 
635
635
  *params.ctx = lm_ggml_init(pdata);
636
636
  if (*params.ctx == nullptr) {
637
- fprintf(stderr, "%s: failed to initialize ggml context for storing tensors\n", __func__);
637
+ LM_GGML_LOG_ERROR("%s: failed to initialize ggml context for storing tensors\n", __func__);
638
638
  lm_gguf_free(ctx);
639
639
  return nullptr;
640
640
  }
@@ -656,7 +656,7 @@ struct lm_gguf_context * lm_gguf_init_from_file_impl(FILE * file, struct lm_gguf
656
656
  ok = ok && gr.read(data->data, ctx->size);
657
657
 
658
658
  if (!ok) {
659
- fprintf(stderr, "%s: failed to read tensor data binary blob\n", __func__);
659
+ LM_GGML_LOG_ERROR("%s: failed to read tensor data binary blob\n", __func__);
660
660
  lm_ggml_free(ctx_data);
661
661
  *params.ctx = nullptr;
662
662
  lm_gguf_free(ctx);
@@ -689,7 +689,7 @@ struct lm_gguf_context * lm_gguf_init_from_file_impl(FILE * file, struct lm_gguf
689
689
  }
690
690
 
691
691
  if (!ok) {
692
- fprintf(stderr, "%s: failed to create tensors\n", __func__);
692
+ LM_GGML_LOG_ERROR("%s: failed to create tensors\n", __func__);
693
693
  lm_ggml_free(ctx_data);
694
694
  *params.ctx = nullptr;
695
695
  lm_gguf_free(ctx);
@@ -706,7 +706,7 @@ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gg
706
706
  FILE * file = lm_ggml_fopen(fname, "rb");
707
707
 
708
708
  if (!file) {
709
- fprintf(stderr, "%s: failed to open GGUF file '%s'\n", __func__, fname);
709
+ LM_GGML_LOG_ERROR("%s: failed to open GGUF file '%s'\n", __func__, fname);
710
710
  return nullptr;
711
711
  }
712
712
 
@@ -1305,7 +1305,7 @@ bool lm_gguf_write_to_file(const struct lm_gguf_context * ctx, const char * fnam
1305
1305
  FILE * file = lm_ggml_fopen(fname, "wb");
1306
1306
 
1307
1307
  if (!file) {
1308
- fprintf(stderr, "%s: failed to open file '%s' for writing GGUF data\n", __func__, fname);
1308
+ LM_GGML_LOG_ERROR("%s: failed to open file '%s' for writing GGUF data\n", __func__, fname);
1309
1309
  return false;
1310
1310
  }
1311
1311
 
@@ -16,6 +16,9 @@ using json = nlohmann::ordered_json;
16
16
  static std::string build_repetition(const std::string & item_rule, int min_items, int max_items, const std::string & separator_rule = "") {
17
17
  auto has_max = max_items != std::numeric_limits<int>::max();
18
18
 
19
+ if (max_items == 0) {
20
+ return "";
21
+ }
19
22
  if (min_items == 0 && max_items == 1) {
20
23
  return item_rule + "?";
21
24
  }
@@ -253,6 +253,9 @@ static void llama_adapter_lora_init_impl(llama_model & model, const char * path_
253
253
  std::vector<lm_ggml_backend_buffer_type_t> buft_extra;
254
254
  {
255
255
  auto * cpu_dev = lm_ggml_backend_dev_by_type(LM_GGML_BACKEND_DEVICE_TYPE_CPU);
256
+ if (!cpu_dev) {
257
+ throw std::runtime_error(format("%s: no CPU backend found", __func__));
258
+ }
256
259
  auto * cpu_reg = lm_ggml_backend_dev_backend_reg(cpu_dev);
257
260
 
258
261
  auto lm_ggml_backend_dev_get_extra_bufts_fn = (lm_ggml_backend_dev_get_extra_bufts_t)
@@ -291,6 +294,9 @@ static void llama_adapter_lora_init_impl(llama_model & model, const char * path_
291
294
  LLAMA_LOG_WARN("%s: lora for '%s' cannot use buft '%s', fallback to CPU\n", __func__, model_tensor->name, lm_ggml_backend_buft_name(buft));
292
295
 
293
296
  auto * cpu_dev = lm_ggml_backend_dev_by_type(LM_GGML_BACKEND_DEVICE_TYPE_CPU);
297
+ if (!cpu_dev) {
298
+ throw std::runtime_error(format("%s: no CPU backend found", __func__));
299
+ }
294
300
  buft = lm_ggml_backend_dev_buffer_type(cpu_dev);
295
301
 
296
302
  break;