@novastera-oss/llamarn 0.2.1 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (268) hide show
  1. package/README.md +80 -14
  2. package/RNLlamaCpp.podspec +10 -3
  3. package/android/CMakeLists.txt +8 -0
  4. package/android/src/main/cpp/include/llama.h +62 -125
  5. package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
  6. package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
  7. package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
  8. package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
  9. package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
  10. package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
  11. package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
  12. package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
  13. package/cpp/PureCppImpl.cpp +9 -27
  14. package/cpp/SystemUtils.h +2 -2
  15. package/cpp/build-info.cpp +2 -2
  16. package/cpp/llama.cpp/README.md +11 -3
  17. package/cpp/llama.cpp/build-xcframework.sh +1 -0
  18. package/cpp/llama.cpp/common/CMakeLists.txt +8 -2
  19. package/cpp/llama.cpp/common/arg.cpp +153 -113
  20. package/cpp/llama.cpp/common/chat-parser.cpp +379 -0
  21. package/cpp/llama.cpp/common/chat-parser.h +117 -0
  22. package/cpp/llama.cpp/common/chat.cpp +847 -699
  23. package/cpp/llama.cpp/common/chat.h +73 -6
  24. package/cpp/llama.cpp/common/common.cpp +50 -82
  25. package/cpp/llama.cpp/common/common.h +21 -17
  26. package/cpp/llama.cpp/common/json-partial.cpp +255 -0
  27. package/cpp/llama.cpp/common/json-partial.h +37 -0
  28. package/cpp/llama.cpp/common/minja/chat-template.hpp +9 -5
  29. package/cpp/llama.cpp/common/minja/minja.hpp +69 -36
  30. package/cpp/llama.cpp/common/regex-partial.cpp +204 -0
  31. package/cpp/llama.cpp/common/regex-partial.h +56 -0
  32. package/cpp/llama.cpp/common/sampling.cpp +7 -8
  33. package/cpp/llama.cpp/convert_hf_to_gguf.py +453 -118
  34. package/cpp/llama.cpp/convert_hf_to_gguf_update.py +120 -68
  35. package/cpp/llama.cpp/ggml/CMakeLists.txt +2 -1
  36. package/cpp/llama.cpp/ggml/cmake/common.cmake +25 -0
  37. package/cpp/llama.cpp/ggml/include/ggml-opt.h +49 -28
  38. package/cpp/llama.cpp/ggml/include/ggml.h +26 -7
  39. package/cpp/llama.cpp/ggml/src/CMakeLists.txt +16 -10
  40. package/cpp/llama.cpp/ggml/src/ggml-backend.cpp +4 -1
  41. package/cpp/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +1 -0
  42. package/cpp/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +2 -0
  43. package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +604 -0
  44. package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +42 -0
  45. package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +54 -2
  46. package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +50 -51
  47. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +2 -2
  48. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +5 -9
  49. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +779 -19
  50. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +22 -0
  51. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +88 -5
  52. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +47 -12
  53. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +264 -69
  54. package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +322 -100
  55. package/cpp/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +117 -1
  56. package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.cpp +85 -16
  57. package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.h +220 -49
  58. package/cpp/llama.cpp/ggml/src/ggml-cuda/acc.cu +40 -26
  59. package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +1 -1
  60. package/cpp/llama.cpp/ggml/src/ggml-cuda/cpy.cu +11 -1
  61. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-common.cuh +15 -7
  62. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-mma-f16.cuh +266 -64
  63. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f16.cuh +49 -4
  64. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f32.cuh +48 -4
  65. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn.cu +2 -1
  66. package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +5 -1
  67. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cu +2 -0
  68. package/cpp/llama.cpp/ggml/src/ggml-cuda/quantize.cu +7 -6
  69. package/cpp/llama.cpp/ggml/src/ggml-cuda/sum.cu +1 -1
  70. package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cu +10 -0
  71. package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cuh +2 -0
  72. package/cpp/llama.cpp/ggml/src/ggml-impl.h +1 -1
  73. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +4 -0
  74. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +99 -17
  75. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +200 -2
  76. package/cpp/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +8 -2
  77. package/cpp/llama.cpp/ggml/src/ggml-musa/mudnn.cu +112 -0
  78. package/cpp/llama.cpp/ggml/src/ggml-musa/mudnn.cuh +12 -0
  79. package/cpp/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +6 -0
  80. package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +972 -178
  81. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/argsort.cl +86 -0
  82. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/div.cl +72 -0
  83. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/group_norm.cl +72 -0
  84. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/sigmoid.cl +29 -0
  85. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/sub.cl +72 -0
  86. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/sum_rows.cl +39 -0
  87. package/cpp/llama.cpp/ggml/src/ggml-opt.cpp +373 -190
  88. package/cpp/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +29 -23
  89. package/cpp/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +5 -10
  90. package/cpp/llama.cpp/ggml/src/ggml-sycl/common.hpp +101 -5
  91. package/cpp/llama.cpp/ggml/src/ggml-sycl/concat.cpp +31 -33
  92. package/cpp/llama.cpp/ggml/src/ggml-sycl/conv.cpp +1 -0
  93. package/cpp/llama.cpp/ggml/src/ggml-sycl/convert.cpp +29 -2
  94. package/cpp/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +4 -5
  95. package/cpp/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +59 -21
  96. package/cpp/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +9 -1
  97. package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +84 -72
  98. package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +2 -0
  99. package/cpp/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +37 -8
  100. package/cpp/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +1 -3
  101. package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +324 -129
  102. package/cpp/llama.cpp/ggml/src/ggml-sycl/gla.cpp +1 -0
  103. package/cpp/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +31 -2
  104. package/cpp/llama.cpp/ggml/src/ggml-sycl/norm.cpp +95 -68
  105. package/cpp/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +1 -0
  106. package/cpp/llama.cpp/ggml/src/ggml-sycl/quants.hpp +22 -0
  107. package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +1 -2
  108. package/cpp/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +1 -4
  109. package/cpp/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +2 -3
  110. package/cpp/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +69 -43
  111. package/cpp/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +2 -14
  112. package/cpp/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +81 -91
  113. package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +432 -181
  114. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +17 -0
  115. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp +1 -1
  116. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +6 -152
  117. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp +162 -0
  118. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +360 -0
  119. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +2 -118
  120. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +1 -1
  121. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +12 -1
  122. package/cpp/llama.cpp/ggml/src/ggml.c +107 -36
  123. package/cpp/llama.cpp/ggml/src/gguf.cpp +33 -33
  124. package/cpp/llama.cpp/gguf-py/gguf/constants.py +100 -15
  125. package/cpp/llama.cpp/gguf-py/gguf/gguf_reader.py +1 -1
  126. package/cpp/llama.cpp/gguf-py/gguf/gguf_writer.py +44 -12
  127. package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_editor_gui.py +21 -10
  128. package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_new_metadata.py +5 -2
  129. package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +128 -31
  130. package/cpp/llama.cpp/gguf-py/gguf/utility.py +1 -1
  131. package/cpp/llama.cpp/gguf-py/pyproject.toml +1 -1
  132. package/cpp/llama.cpp/include/llama.h +62 -125
  133. package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf.inp +1 -1
  134. package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf.out +1 -1
  135. package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf.inp +1 -1
  136. package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf.out +1 -1
  137. package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.inp +1 -1
  138. package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.out +1 -1
  139. package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.inp +1 -1
  140. package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.out +1 -1
  141. package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf.inp +1 -1
  142. package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf.out +1 -1
  143. package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf.inp +1 -1
  144. package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf.out +1 -1
  145. package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf.inp +1 -1
  146. package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf.out +1 -1
  147. package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf.inp +1 -1
  148. package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf.out +1 -1
  149. package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf.inp +1 -1
  150. package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf.out +1 -1
  151. package/cpp/llama.cpp/models/ggml-vocab-nomic-bert-moe.gguf +0 -0
  152. package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf.inp +1 -1
  153. package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf.out +1 -1
  154. package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf.inp +1 -1
  155. package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf.out +1 -1
  156. package/cpp/llama.cpp/models/ggml-vocab-refact.gguf.inp +1 -1
  157. package/cpp/llama.cpp/models/ggml-vocab-refact.gguf.out +1 -1
  158. package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf.inp +1 -1
  159. package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf.out +1 -1
  160. package/cpp/llama.cpp/models/templates/Qwen-QwQ-32B.jinja +62 -0
  161. package/cpp/llama.cpp/models/templates/Qwen-Qwen3-0.6B.jinja +85 -0
  162. package/cpp/llama.cpp/models/templates/README.md +2 -0
  163. package/cpp/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +5 -1
  164. package/cpp/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +5 -1
  165. package/cpp/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +2 -0
  166. package/cpp/llama.cpp/requirements/requirements-gguf_editor_gui.txt +1 -1
  167. package/cpp/llama.cpp/src/CMakeLists.txt +2 -0
  168. package/cpp/llama.cpp/src/llama-arch.cpp +6 -0
  169. package/cpp/llama.cpp/src/llama-arch.h +2 -0
  170. package/cpp/llama.cpp/src/llama-batch.cpp +3 -1
  171. package/cpp/llama.cpp/src/llama-context.cpp +340 -123
  172. package/cpp/llama.cpp/src/llama-context.h +30 -0
  173. package/cpp/llama.cpp/src/llama-cparams.cpp +4 -0
  174. package/cpp/llama.cpp/src/llama-cparams.h +2 -0
  175. package/cpp/llama.cpp/src/llama-grammar.cpp +12 -2
  176. package/cpp/llama.cpp/src/llama-graph.cpp +157 -247
  177. package/cpp/llama.cpp/src/llama-graph.h +52 -7
  178. package/cpp/llama.cpp/src/llama-hparams.cpp +17 -1
  179. package/cpp/llama.cpp/src/llama-hparams.h +37 -5
  180. package/cpp/llama.cpp/src/llama-kv-cache.cpp +742 -481
  181. package/cpp/llama.cpp/src/llama-kv-cache.h +196 -99
  182. package/cpp/llama.cpp/src/llama-kv-cells.h +379 -0
  183. package/cpp/llama.cpp/src/llama-memory.h +4 -3
  184. package/cpp/llama.cpp/src/llama-model-loader.cpp +22 -17
  185. package/cpp/llama.cpp/src/llama-model-saver.cpp +281 -0
  186. package/cpp/llama.cpp/src/llama-model-saver.h +37 -0
  187. package/cpp/llama.cpp/src/llama-model.cpp +529 -172
  188. package/cpp/llama.cpp/src/llama-model.h +6 -1
  189. package/cpp/llama.cpp/src/llama-quant.cpp +15 -13
  190. package/cpp/llama.cpp/src/llama-sampling.cpp +2 -2
  191. package/cpp/llama.cpp/src/llama-vocab.cpp +35 -8
  192. package/cpp/llama.cpp/src/llama-vocab.h +6 -0
  193. package/cpp/llama.cpp/src/llama.cpp +14 -0
  194. package/cpp/rn-completion.cpp +60 -5
  195. package/ios/include/chat.h +73 -6
  196. package/ios/include/common/minja/chat-template.hpp +9 -5
  197. package/ios/include/common/minja/minja.hpp +69 -36
  198. package/ios/include/common.h +21 -17
  199. package/ios/include/llama.h +62 -125
  200. package/ios/libs/llama.xcframework/Info.plist +19 -19
  201. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  202. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4617 -4487
  203. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-opt.h +237 -0
  204. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +26 -7
  205. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +62 -125
  206. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
  207. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  208. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4638 -4508
  209. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3557 -3435
  210. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +237 -0
  211. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +26 -7
  212. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +62 -125
  213. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
  214. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  215. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4638 -4508
  216. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3559 -3437
  217. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-opt.h +237 -0
  218. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +26 -7
  219. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +62 -125
  220. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-opt.h +237 -0
  221. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +26 -7
  222. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +62 -125
  223. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
  224. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-opt.h +237 -0
  225. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +26 -7
  226. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +62 -125
  227. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
  228. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
  229. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  230. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4616 -4487
  231. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-opt.h +237 -0
  232. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +26 -7
  233. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +62 -125
  234. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
  235. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  236. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4637 -4508
  237. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3556 -3435
  238. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +237 -0
  239. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +26 -7
  240. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +62 -125
  241. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
  242. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  243. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4653 -4523
  244. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-opt.h +237 -0
  245. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +26 -7
  246. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +62 -125
  247. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
  248. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  249. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4674 -4544
  250. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3587 -3465
  251. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +237 -0
  252. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +26 -7
  253. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +62 -125
  254. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
  255. package/package.json +1 -1
  256. package/cpp/llama.cpp/common/stb_image.h +0 -7988
  257. package/cpp/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +0 -112
  258. package/cpp/llama.cpp/models/ggml-vocab-chameleon.gguf.out +0 -46
  259. package/cpp/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.inp +0 -112
  260. package/cpp/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.out +0 -46
  261. package/cpp/llama.cpp/models/ggml-vocab-gpt-4o.gguf.inp +0 -112
  262. package/cpp/llama.cpp/models/ggml-vocab-gpt-4o.gguf.out +0 -46
  263. package/cpp/llama.cpp/models/ggml-vocab-llama4.gguf.inp +0 -112
  264. package/cpp/llama.cpp/models/ggml-vocab-llama4.gguf.out +0 -46
  265. package/cpp/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +0 -112
  266. package/cpp/llama.cpp/models/ggml-vocab-pixtral.gguf.out +0 -46
  267. package/cpp/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +0 -112
  268. package/cpp/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +0 -46
@@ -64,12 +64,17 @@
64
64
  // precomputed f32 table for f16 (256 KB) (ggml-impl.h)
65
65
  float ggml_table_f32_f16[1 << 16];
66
66
 
67
- #if (defined(__linux__) || defined(__APPLE__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__)) && \
68
- (!defined(TARGET_OS_TV) && !defined(TARGET_OS_WATCH))
67
+ #if defined(__linux__) || \
68
+ defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \
69
+ (defined(__APPLE__) && !TARGET_OS_TV && !TARGET_OS_WATCH)
70
+
69
71
  #include <unistd.h>
70
72
  #include <sys/types.h>
71
73
  #include <sys/stat.h>
72
74
  #include <sys/wait.h>
75
+ #if defined(__linux__)
76
+ #include <sys/prctl.h>
77
+ #endif
73
78
 
74
79
  #if defined(__ANDROID__)
75
80
  #include <unwind.h>
@@ -133,10 +138,36 @@ static void ggml_print_backtrace(void) {
133
138
  if (GGML_NO_BACKTRACE) {
134
139
  return;
135
140
  }
136
- char attach[32];
137
- snprintf(attach, sizeof(attach), "attach %d", getpid());
138
- int pid = fork();
139
- if (pid == 0) {
141
+ #if defined(__linux__)
142
+ FILE * f = fopen("/proc/self/status", "r");
143
+ size_t size = 0;
144
+ char * line = NULL;
145
+ ssize_t length = 0;
146
+ while ((length = getline(&line, &size, f)) > 0) {
147
+ if (!strncmp(line, "TracerPid:", sizeof("TracerPid:") - 1) &&
148
+ (length != sizeof("TracerPid:\t0\n") - 1 || line[length - 2] != '0')) {
149
+ // Already being debugged, and the breakpoint is the later abort()
150
+ free(line);
151
+ fclose(f);
152
+ return;
153
+ }
154
+ }
155
+ free(line);
156
+ fclose(f);
157
+ int lock[2] = { -1, -1 };
158
+ (void) !pipe(lock); // Don't start gdb until after PR_SET_PTRACER
159
+ #endif
160
+ const int parent_pid = getpid();
161
+ const int child_pid = fork();
162
+ if (child_pid < 0) { // error
163
+ return;
164
+ } else if (child_pid == 0) { // child
165
+ char attach[32];
166
+ snprintf(attach, sizeof(attach), "attach %d", parent_pid);
167
+ #if defined(__linux__)
168
+ close(lock[1]);
169
+ (void) !read(lock[0], lock, 1);
170
+ #endif
140
171
  // try gdb
141
172
  execlp("gdb", "gdb", "--batch",
142
173
  "-ex", "set style enabled on",
@@ -149,18 +180,18 @@ static void ggml_print_backtrace(void) {
149
180
  execlp("lldb", "lldb", "--batch",
150
181
  "-o", "bt",
151
182
  "-o", "quit",
152
- "-p", attach,
183
+ "-p", &attach[sizeof("attach ") - 1],
153
184
  (char *) NULL);
154
- exit(EXIT_FAILURE);
155
- } else {
156
- int wstatus;
157
- waitpid(pid, &wstatus, 0);
158
- if (WIFEXITED(wstatus)) {
159
- if (WEXITSTATUS(wstatus) == EXIT_FAILURE) {
160
- // gdb failed, fallback to backtrace_symbols
161
- ggml_print_backtrace_symbols();
162
- }
163
- }
185
+ // gdb failed, fallback to backtrace_symbols
186
+ ggml_print_backtrace_symbols();
187
+ _Exit(0);
188
+ } else { // parent
189
+ #if defined(__linux__)
190
+ prctl(PR_SET_PTRACER, child_pid);
191
+ close(lock[1]);
192
+ close(lock[0]);
193
+ #endif
194
+ waitpid(child_pid, NULL, 0);
164
195
  }
165
196
  }
166
197
  #else
@@ -1068,9 +1099,10 @@ static const char * GGML_UNARY_OP_NAME[GGML_UNARY_OP_COUNT] = {
1068
1099
  "HARDSWISH",
1069
1100
  "HARDSIGMOID",
1070
1101
  "EXP",
1102
+ "GELU_ERF",
1071
1103
  };
1072
1104
 
1073
- static_assert(GGML_UNARY_OP_COUNT == 14, "GGML_UNARY_OP_COUNT != 14");
1105
+ static_assert(GGML_UNARY_OP_COUNT == 15, "GGML_UNARY_OP_COUNT != 15");
1074
1106
 
1075
1107
 
1076
1108
  static_assert(sizeof(struct ggml_object)%GGML_MEM_ALIGN == 0, "ggml_object size must be a multiple of GGML_MEM_ALIGN");
@@ -2280,6 +2312,26 @@ struct ggml_tensor * ggml_repeat(
2280
2312
  return result;
2281
2313
  }
2282
2314
 
2315
+ struct ggml_tensor * ggml_repeat_4d(
2316
+ struct ggml_context * ctx,
2317
+ struct ggml_tensor * a,
2318
+ int64_t ne0, int64_t ne1, int64_t ne2, int64_t ne3) {
2319
+ const bool can_repeat = ggml_is_empty(a) || (
2320
+ (ne0 % a->ne[0] == 0) &&
2321
+ (ne1 % a->ne[1] == 0) &&
2322
+ (ne2 % a->ne[2] == 0) &&
2323
+ (ne3 % a->ne[3] == 0)
2324
+ );
2325
+ GGML_ASSERT(can_repeat);
2326
+
2327
+ struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);
2328
+
2329
+ result->op = GGML_OP_REPEAT;
2330
+ result->src[0] = a;
2331
+
2332
+ return result;
2333
+ }
2334
+
2283
2335
  // ggml_repeat_back
2284
2336
 
2285
2337
  struct ggml_tensor * ggml_repeat_back(
@@ -2470,6 +2522,20 @@ struct ggml_tensor * ggml_gelu_inplace(
2470
2522
  return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_GELU);
2471
2523
  }
2472
2524
 
2525
+ // ggml_gelu_erf
2526
+
2527
+ struct ggml_tensor * ggml_gelu_erf(
2528
+ struct ggml_context * ctx,
2529
+ struct ggml_tensor * a) {
2530
+ return ggml_unary(ctx, a, GGML_UNARY_OP_GELU_ERF);
2531
+ }
2532
+
2533
+ struct ggml_tensor * ggml_gelu_erf_inplace(
2534
+ struct ggml_context * ctx,
2535
+ struct ggml_tensor * a) {
2536
+ return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_GELU_ERF);
2537
+ }
2538
+
2473
2539
  // ggml_gelu_quick
2474
2540
 
2475
2541
  struct ggml_tensor * ggml_gelu_quick(
@@ -5499,7 +5565,7 @@ static void ggml_compute_backward(
5499
5565
  // tensor = src0 * 1 + src1 * 0
5500
5566
  if (src0_needs_grads) {
5501
5567
  // dsrc0 = dtensor * 1
5502
- ggml_add_or_set(ctx, cgraph, isrc0, grad);
5568
+ ggml_add_or_set(ctx, cgraph, isrc0, ggml_reshape(ctx, grad, src0));
5503
5569
  }
5504
5570
  if (src1_needs_grads) {
5505
5571
  // dsrc1 = dtensor * 0 -> noop
@@ -5780,10 +5846,9 @@ void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor *
5780
5846
  }
5781
5847
 
5782
5848
  void ggml_build_backward_expand(
5783
- struct ggml_context * ctx_static,
5784
- struct ggml_context * ctx_compute,
5785
- struct ggml_cgraph * cgraph,
5786
- bool accumulate) {
5849
+ struct ggml_context * ctx,
5850
+ struct ggml_cgraph * cgraph,
5851
+ struct ggml_tensor ** grad_accs) {
5787
5852
  GGML_ASSERT(cgraph->n_nodes > 0);
5788
5853
  GGML_ASSERT(cgraph->grads);
5789
5854
  GGML_ASSERT(cgraph->grad_accs);
@@ -5856,21 +5921,24 @@ void ggml_build_backward_expand(
5856
5921
  GGML_ASSERT(!node->view_src || node->op == GGML_OP_CPY || node->op == GGML_OP_VIEW ||
5857
5922
  node->op == GGML_OP_RESHAPE || node->op == GGML_OP_PERMUTE || node->op == GGML_OP_TRANSPOSE);
5858
5923
 
5859
- const size_t igrad = ggml_hash_find(&cgraph->visited_hash_set, node);
5860
- GGML_ASSERT(igrad != GGML_HASHSET_FULL);
5861
- GGML_ASSERT(ggml_bitset_get(cgraph->visited_hash_set.used, igrad));
5862
- if ((accumulate && (node->flags & GGML_TENSOR_FLAG_PARAM)) || (node->flags & GGML_TENSOR_FLAG_LOSS)) {
5863
- cgraph->grad_accs[igrad] = ggml_dup_tensor(ctx_static, node);
5864
- cgraph->grads[igrad] = cgraph->grad_accs[igrad];
5865
- ggml_format_name(cgraph->grad_accs[igrad], "grad acc for %s", node->name);
5924
+ const size_t ihash = ggml_hash_find(&cgraph->visited_hash_set, node);
5925
+ GGML_ASSERT(ihash != GGML_HASHSET_FULL);
5926
+ GGML_ASSERT(ggml_bitset_get(cgraph->visited_hash_set.used, ihash));
5927
+ if (grad_accs && grad_accs[i]) {
5928
+ cgraph->grad_accs[ihash] = grad_accs[i];
5929
+ cgraph->grads[ihash] = cgraph->grad_accs[ihash];
5930
+ } else if (node->flags & GGML_TENSOR_FLAG_LOSS) {
5931
+ // loss tensors always need a gradient accumulator
5932
+ cgraph->grad_accs[ihash] = ggml_new_tensor(ctx, GGML_TYPE_F32, GGML_MAX_DIMS, node->ne);
5933
+ cgraph->grads[ihash] = cgraph->grad_accs[ihash];
5866
5934
  }
5867
- grads_needed[igrad] = true;
5935
+ grads_needed[ihash] = true;
5868
5936
  }
5869
5937
 
5870
5938
  for (int i = n_nodes_f - 1; i >= 0; --i) {
5871
5939
  // inplace operations to add gradients are not created by ggml_compute_backward except for gradient accumulation
5872
5940
  // use allocator to automatically make inplace operations
5873
- ggml_compute_backward(ctx_compute, cgraph, i, grads_needed);
5941
+ ggml_compute_backward(ctx, cgraph, i, grads_needed);
5874
5942
  }
5875
5943
 
5876
5944
  free(grads_needed);
@@ -6016,8 +6084,8 @@ void ggml_graph_cpy(struct ggml_cgraph * src, struct ggml_cgraph * dst) {
6016
6084
  }
6017
6085
  }
6018
6086
 
6019
- struct ggml_cgraph * ggml_graph_dup(struct ggml_context * ctx, struct ggml_cgraph * cgraph) {
6020
- struct ggml_cgraph * result = ggml_new_graph_custom(ctx, cgraph->size, cgraph->grads != NULL);
6087
+ struct ggml_cgraph * ggml_graph_dup(struct ggml_context * ctx, struct ggml_cgraph * cgraph, bool force_grads) {
6088
+ struct ggml_cgraph * result = ggml_new_graph_custom(ctx, cgraph->size, cgraph->grads || force_grads);
6021
6089
  ggml_graph_cpy(cgraph, result);
6022
6090
  return result;
6023
6091
  }
@@ -6036,6 +6104,9 @@ struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor) {
6036
6104
  }
6037
6105
 
6038
6106
  void ggml_graph_reset(struct ggml_cgraph * cgraph) {
6107
+ if (!cgraph) {
6108
+ return;
6109
+ }
6039
6110
  GGML_ASSERT(cgraph->grads != NULL);
6040
6111
 
6041
6112
  for (int i = 0; i < cgraph->n_nodes; i++) {
@@ -6345,8 +6416,8 @@ void ggml_set_output(struct ggml_tensor * tensor) {
6345
6416
  tensor->flags |= GGML_TENSOR_FLAG_OUTPUT;
6346
6417
  }
6347
6418
 
6348
- void ggml_set_param(struct ggml_context * ctx, struct ggml_tensor * tensor) {
6349
- GGML_UNUSED(ctx); // TODO: remove this parameter
6419
+ void ggml_set_param(struct ggml_tensor * tensor) {
6420
+ GGML_ASSERT(tensor->op == GGML_OP_NONE);
6350
6421
  tensor->flags |= GGML_TENSOR_FLAG_PARAM;
6351
6422
  }
6352
6423
 
@@ -299,10 +299,10 @@ bool gguf_read_emplace_helper(const struct gguf_reader & gr, std::vector<struct
299
299
  return false;
300
300
  }
301
301
  } catch (std::length_error &) {
302
- fprintf(stderr, "%s: encountered length_error while reading value for key '%s'\n", __func__, key.c_str());
302
+ GGML_LOG_ERROR("%s: encountered length_error while reading value for key '%s'\n", __func__, key.c_str());
303
303
  return false;
304
304
  } catch (std::bad_alloc &) {
305
- fprintf(stderr, "%s: encountered bad_alloc error while reading value for key '%s'\n", __func__, key.c_str());
305
+ GGML_LOG_ERROR("%s: encountered bad_alloc error while reading value for key '%s'\n", __func__, key.c_str());
306
306
  return false;
307
307
  }
308
308
  kv.emplace_back(key, value);
@@ -328,14 +328,14 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
328
328
  ok = ok && gr.read(magic, 4);
329
329
 
330
330
  if (!ok) {
331
- fprintf(stderr, "%s: failed to read magic\n", __func__);
331
+ GGML_LOG_ERROR("%s: failed to read magic\n", __func__);
332
332
  gguf_free(ctx);
333
333
  return nullptr;
334
334
  }
335
335
 
336
336
  for (uint32_t i = 0; i < magic.size(); i++) {
337
337
  if (magic[i] != GGUF_MAGIC[i]) {
338
- fprintf(stderr, "%s: invalid magic characters: '%c%c%c%c', expected 'GGUF'\n", __func__, magic[0], magic[1], magic[2], magic[3]);
338
+ GGML_LOG_ERROR("%s: invalid magic characters: '%c%c%c%c', expected 'GGUF'\n", __func__, magic[0], magic[1], magic[2], magic[3]);
339
339
  gguf_free(ctx);
340
340
  return nullptr;
341
341
  }
@@ -348,11 +348,11 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
348
348
 
349
349
  if (ok && gr.read(ctx->version)) {
350
350
  if (ctx->version == 1) {
351
- fprintf(stderr, "%s: GGUFv1 is no longer supported, please use a more up-to-date version\n", __func__);
351
+ GGML_LOG_ERROR("%s: GGUFv1 is no longer supported, please use a more up-to-date version\n", __func__);
352
352
  ok = false;
353
353
  }
354
354
  if (ctx->version > GGUF_VERSION) {
355
- fprintf(stderr, "%s: this GGUF file is version %" PRIu32 " but this software only supports up to version %d\n",
355
+ GGML_LOG_ERROR("%s: this GGUF file is version %" PRIu32 " but this software only supports up to version %d\n",
356
356
  __func__, ctx->version, GGUF_VERSION);
357
357
  ok = false;
358
358
  }
@@ -363,7 +363,7 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
363
363
  if (ok && gr.read(n_tensors)) {
364
364
  static_assert(sizeof(size_t) <= 8 && sizeof(gguf_tensor_info) >= 2, "int64_t insufficient for indexing");
365
365
  if (n_tensors < 0 || n_tensors > int64_t(SIZE_MAX/sizeof(gguf_tensor_info))) {
366
- fprintf(stderr, "%s: number of tensors is %" PRIi64 " but must be in [0, %zu]\n",
366
+ GGML_LOG_ERROR("%s: number of tensors is %" PRIi64 " but must be in [0, %zu]\n",
367
367
  __func__, n_tensors, SIZE_MAX/sizeof(gguf_tensor_info));
368
368
  ok = false;
369
369
  }
@@ -374,7 +374,7 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
374
374
  if (ok && gr.read(n_kv)) {
375
375
  static_assert(sizeof(size_t) <= 8 && sizeof(gguf_tensor_info) >= 2, "int64_t insufficient for indexing");
376
376
  if (n_kv < 0 || n_kv > int64_t(SIZE_MAX/sizeof(gguf_kv))) {
377
- fprintf(stderr, "%s: number of key value pairs is %" PRIi64 " but must be in [0, %zu]\n",
377
+ GGML_LOG_ERROR("%s: number of key value pairs is %" PRIi64 " but must be in [0, %zu]\n",
378
378
  __func__, n_kv, SIZE_MAX/sizeof(gguf_kv));
379
379
  ok = false;
380
380
  }
@@ -383,7 +383,7 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
383
383
  }
384
384
 
385
385
  if (!ok) {
386
- fprintf(stderr, "%s: failed to read header\n", __func__);
386
+ GGML_LOG_ERROR("%s: failed to read header\n", __func__);
387
387
  gguf_free(ctx);
388
388
  return nullptr;
389
389
  }
@@ -399,15 +399,15 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
399
399
  try {
400
400
  ok = ok && gr.read(key);
401
401
  } catch (std::length_error &) {
402
- fprintf(stderr, "%s: encountered length_error while reading key %" PRIi64 "\n", __func__, i);
402
+ GGML_LOG_ERROR("%s: encountered length_error while reading key %" PRIi64 "\n", __func__, i);
403
403
  ok = false;
404
404
  } catch (std::bad_alloc &) {
405
- fprintf(stderr, "%s: encountered bad_alloc error while reading key %" PRIi64 "\n", __func__, i);
405
+ GGML_LOG_ERROR("%s: encountered bad_alloc error while reading key %" PRIi64 "\n", __func__, i);
406
406
  ok = false;
407
407
  }
408
408
  for (size_t j = 0; ok && j < ctx->kv.size(); ++j) {
409
409
  if (key == ctx->kv[j].key) {
410
- fprintf(stderr, "%s: duplicate key '%s' for tensors %zu and %" PRIi64 " \n", __func__, key.c_str(), j, i);
410
+ GGML_LOG_ERROR("%s: duplicate key '%s' for tensors %zu and %" PRIi64 " \n", __func__, key.c_str(), j, i);
411
411
  ok = false;
412
412
  }
413
413
  }
@@ -441,14 +441,14 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
441
441
  case GGUF_TYPE_ARRAY:
442
442
  default:
443
443
  {
444
- fprintf(stderr, "%s: key '%s' has invalid GGUF type %d\n", __func__, key.c_str(), type);
444
+ GGML_LOG_ERROR("%s: key '%s' has invalid GGUF type %d\n", __func__, key.c_str(), type);
445
445
  ok = false;
446
446
  } break;
447
447
  }
448
448
  }
449
449
 
450
450
  if (!ok) {
451
- fprintf(stderr, "%s: failed to read key-value pairs\n", __func__);
451
+ GGML_LOG_ERROR("%s: failed to read key-value pairs\n", __func__);
452
452
  gguf_free(ctx);
453
453
  return nullptr;
454
454
  }
@@ -458,7 +458,7 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
458
458
  ctx->alignment = alignment_idx == -1 ? GGUF_DEFAULT_ALIGNMENT : gguf_get_val_u32(ctx, alignment_idx);
459
459
 
460
460
  if (ctx->alignment == 0 || (ctx->alignment & (ctx->alignment - 1)) != 0) {
461
- fprintf(stderr, "%s: alignment %zu is not a power of 2\n", __func__, ctx->alignment);
461
+ GGML_LOG_ERROR("%s: alignment %zu is not a power of 2\n", __func__, ctx->alignment);
462
462
  gguf_free(ctx);
463
463
  return nullptr;
464
464
  }
@@ -474,14 +474,14 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
474
474
  try {
475
475
  ok = ok && gr.read(name);
476
476
  } catch (std::length_error &) {
477
- fprintf(stderr, "%s: encountered length_error while reading tensor name %" PRIi64 "\n", __func__, i);
477
+ GGML_LOG_ERROR("%s: encountered length_error while reading tensor name %" PRIi64 "\n", __func__, i);
478
478
  ok = false;
479
479
  } catch (std::bad_alloc &) {
480
- fprintf(stderr, "%s: encountered bad_alloc error while reading tensor name %" PRIi64 "\n", __func__, i);
480
+ GGML_LOG_ERROR("%s: encountered bad_alloc error while reading tensor name %" PRIi64 "\n", __func__, i);
481
481
  ok = false;
482
482
  }
483
483
  if (name.length() >= GGML_MAX_NAME) {
484
- fprintf(stderr, "%s: tensor name %" PRIi64 " is too long: %zu >= %d\n", __func__, i, name.length(), GGML_MAX_NAME);
484
+ GGML_LOG_ERROR("%s: tensor name %" PRIi64 " is too long: %zu >= %d\n", __func__, i, name.length(), GGML_MAX_NAME);
485
485
  ok = false;
486
486
  break;
487
487
  }
@@ -490,7 +490,7 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
490
490
  // make sure there are no duplicate tensor names
491
491
  for (int64_t j = 0; ok && j < i; ++j) {
492
492
  if (strcmp(info.t.name, ctx->info[j].t.name) == 0) {
493
- fprintf(stderr, "%s: duplicate tensor name '%s' for tensors %" PRIi64 " and %" PRIi64 "\n", __func__, info.t.name, j, i);
493
+ GGML_LOG_ERROR("%s: duplicate tensor name '%s' for tensors %" PRIi64 " and %" PRIi64 "\n", __func__, info.t.name, j, i);
494
494
  ok = false;
495
495
  break;
496
496
  }
@@ -505,7 +505,7 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
505
505
  uint32_t n_dims = -1;
506
506
  ok = ok && gr.read(n_dims);
507
507
  if (n_dims > GGML_MAX_DIMS) {
508
- fprintf(stderr, "%s: tensor '%s' has invalid number of dimensions: %" PRIu32 " > %" PRIu32 "\n",
508
+ GGML_LOG_ERROR("%s: tensor '%s' has invalid number of dimensions: %" PRIu32 " > %" PRIu32 "\n",
509
509
  __func__, info.t.name, n_dims, GGML_MAX_DIMS);
510
510
  ok = false;
511
511
  break;
@@ -518,7 +518,7 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
518
518
 
519
519
  // check that all ne are non-negative
520
520
  if (info.t.ne[j] < 0) {
521
- fprintf(stderr, "%s: tensor '%s' dimension %" PRIu32 " has invalid number of elements: %" PRIi64 " < 0\n",
521
+ GGML_LOG_ERROR("%s: tensor '%s' dimension %" PRIu32 " has invalid number of elements: %" PRIi64 " < 0\n",
522
522
  __func__, info.t.name, j, info.t.ne[j]);
523
523
  ok = false;
524
524
  break;
@@ -530,7 +530,7 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
530
530
  (INT64_MAX/info.t.ne[2] <= info.t.ne[0]*info.t.ne[1]) ||
531
531
  (INT64_MAX/info.t.ne[3] <= info.t.ne[0]*info.t.ne[1]*info.t.ne[2]))) {
532
532
 
533
- fprintf(stderr, "%s: total number of elements in tensor '%s' with shape "
533
+ GGML_LOG_ERROR("%s: total number of elements in tensor '%s' with shape "
534
534
  "(%" PRIi64 ", %" PRIi64 ", %" PRIi64 ", %" PRIi64 ") is >= %" PRIi64 "\n",
535
535
  __func__, info.t.name, info.t.ne[0], info.t.ne[1], info.t.ne[2], info.t.ne[3], INT64_MAX);
536
536
  ok = false;
@@ -547,7 +547,7 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
547
547
 
548
548
  // check that tensor type is within defined range
549
549
  if (info.t.type < 0 || info.t.type >= GGML_TYPE_COUNT) {
550
- fprintf(stderr, "%s: tensor '%s' has invalid ggml type %d (%s)\n",
550
+ GGML_LOG_ERROR("%s: tensor '%s' has invalid ggml type %d (%s)\n",
551
551
  __func__, info.t.name, info.t.type, ggml_type_name(info.t.type));
552
552
  ok = false;
553
553
  break;
@@ -557,7 +557,7 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
557
557
 
558
558
  // check that row size is divisible by block size
559
559
  if (blck_size == 0 || info.t.ne[0] % blck_size != 0) {
560
- fprintf(stderr, "%s: tensor '%s' of type %d (%s) has %" PRId64 " elements per row, "
560
+ GGML_LOG_ERROR("%s: tensor '%s' of type %d (%s) has %" PRId64 " elements per row, "
561
561
  "not a multiple of block size (%" PRId64 ")\n",
562
562
  __func__, info.t.name, (int) info.t.type, ggml_type_name(info.t.type), info.t.ne[0], blck_size);
563
563
  ok = false;
@@ -582,7 +582,7 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
582
582
  }
583
583
 
584
584
  if (!ok) {
585
- fprintf(stderr, "%s: failed to read tensor info\n", __func__);
585
+ GGML_LOG_ERROR("%s: failed to read tensor info\n", __func__);
586
586
  gguf_free(ctx);
587
587
  return nullptr;
588
588
  }
@@ -590,7 +590,7 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
590
590
 
591
591
  // we require the data section to be aligned, so take into account any padding
592
592
  if (fseek(file, GGML_PAD(ftell(file), ctx->alignment), SEEK_SET) != 0) {
593
- fprintf(stderr, "%s: failed to seek to beginning of data section\n", __func__);
593
+ GGML_LOG_ERROR("%s: failed to seek to beginning of data section\n", __func__);
594
594
  gguf_free(ctx);
595
595
  return nullptr;
596
596
  }
@@ -604,9 +604,9 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
604
604
  for (size_t i = 0; i < ctx->info.size(); ++i) {
605
605
  const gguf_tensor_info & ti = ctx->info[i];
606
606
  if (ti.offset != ctx->size) {
607
- fprintf(stderr, "%s: tensor '%s' has offset %" PRIu64 ", expected %zu\n",
607
+ GGML_LOG_ERROR("%s: tensor '%s' has offset %" PRIu64 ", expected %zu\n",
608
608
  __func__, ti.t.name, ti.offset, ctx->size);
609
- fprintf(stderr, "%s: failed to read tensor data\n", __func__);
609
+ GGML_LOG_ERROR("%s: failed to read tensor data\n", __func__);
610
610
  gguf_free(ctx);
611
611
  return nullptr;
612
612
  }
@@ -634,7 +634,7 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
634
634
 
635
635
  *params.ctx = ggml_init(pdata);
636
636
  if (*params.ctx == nullptr) {
637
- fprintf(stderr, "%s: failed to initialize ggml context for storing tensors\n", __func__);
637
+ GGML_LOG_ERROR("%s: failed to initialize ggml context for storing tensors\n", __func__);
638
638
  gguf_free(ctx);
639
639
  return nullptr;
640
640
  }
@@ -656,7 +656,7 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
656
656
  ok = ok && gr.read(data->data, ctx->size);
657
657
 
658
658
  if (!ok) {
659
- fprintf(stderr, "%s: failed to read tensor data binary blob\n", __func__);
659
+ GGML_LOG_ERROR("%s: failed to read tensor data binary blob\n", __func__);
660
660
  ggml_free(ctx_data);
661
661
  *params.ctx = nullptr;
662
662
  gguf_free(ctx);
@@ -689,7 +689,7 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
689
689
  }
690
690
 
691
691
  if (!ok) {
692
- fprintf(stderr, "%s: failed to create tensors\n", __func__);
692
+ GGML_LOG_ERROR("%s: failed to create tensors\n", __func__);
693
693
  ggml_free(ctx_data);
694
694
  *params.ctx = nullptr;
695
695
  gguf_free(ctx);
@@ -706,7 +706,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
706
706
  FILE * file = ggml_fopen(fname, "rb");
707
707
 
708
708
  if (!file) {
709
- fprintf(stderr, "%s: failed to open GGUF file '%s'\n", __func__, fname);
709
+ GGML_LOG_ERROR("%s: failed to open GGUF file '%s'\n", __func__, fname);
710
710
  return nullptr;
711
711
  }
712
712
 
@@ -1305,7 +1305,7 @@ bool gguf_write_to_file(const struct gguf_context * ctx, const char * fname, boo
1305
1305
  FILE * file = ggml_fopen(fname, "wb");
1306
1306
 
1307
1307
  if (!file) {
1308
- fprintf(stderr, "%s: failed to open file '%s' for writing GGUF data\n", __func__, fname);
1308
+ GGML_LOG_ERROR("%s: failed to open file '%s' for writing GGUF data\n", __func__, fname);
1309
1309
  return false;
1310
1310
  }
1311
1311