@novastera-oss/llamarn 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (266) hide show
  1. package/README.md +80 -14
  2. package/RNLlamaCpp.podspec +10 -3
  3. package/android/CMakeLists.txt +8 -0
  4. package/android/src/main/cpp/include/llama.h +62 -125
  5. package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
  6. package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
  7. package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
  8. package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
  9. package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
  10. package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
  11. package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
  12. package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
  13. package/cpp/build-info.cpp +2 -2
  14. package/cpp/llama.cpp/README.md +11 -3
  15. package/cpp/llama.cpp/build-xcframework.sh +1 -0
  16. package/cpp/llama.cpp/common/CMakeLists.txt +8 -2
  17. package/cpp/llama.cpp/common/arg.cpp +153 -113
  18. package/cpp/llama.cpp/common/chat-parser.cpp +379 -0
  19. package/cpp/llama.cpp/common/chat-parser.h +117 -0
  20. package/cpp/llama.cpp/common/chat.cpp +847 -699
  21. package/cpp/llama.cpp/common/chat.h +73 -6
  22. package/cpp/llama.cpp/common/common.cpp +50 -82
  23. package/cpp/llama.cpp/common/common.h +21 -17
  24. package/cpp/llama.cpp/common/json-partial.cpp +255 -0
  25. package/cpp/llama.cpp/common/json-partial.h +37 -0
  26. package/cpp/llama.cpp/common/minja/chat-template.hpp +9 -5
  27. package/cpp/llama.cpp/common/minja/minja.hpp +69 -36
  28. package/cpp/llama.cpp/common/regex-partial.cpp +204 -0
  29. package/cpp/llama.cpp/common/regex-partial.h +56 -0
  30. package/cpp/llama.cpp/common/sampling.cpp +7 -8
  31. package/cpp/llama.cpp/convert_hf_to_gguf.py +453 -118
  32. package/cpp/llama.cpp/convert_hf_to_gguf_update.py +120 -68
  33. package/cpp/llama.cpp/ggml/CMakeLists.txt +2 -1
  34. package/cpp/llama.cpp/ggml/cmake/common.cmake +25 -0
  35. package/cpp/llama.cpp/ggml/include/ggml-opt.h +49 -28
  36. package/cpp/llama.cpp/ggml/include/ggml.h +26 -7
  37. package/cpp/llama.cpp/ggml/src/CMakeLists.txt +16 -10
  38. package/cpp/llama.cpp/ggml/src/ggml-backend.cpp +4 -1
  39. package/cpp/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +1 -0
  40. package/cpp/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +2 -0
  41. package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +604 -0
  42. package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +42 -0
  43. package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +54 -2
  44. package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +50 -51
  45. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +2 -2
  46. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +5 -9
  47. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +779 -19
  48. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +22 -0
  49. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +88 -5
  50. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +47 -12
  51. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +264 -69
  52. package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +322 -100
  53. package/cpp/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +117 -1
  54. package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.cpp +85 -16
  55. package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.h +220 -49
  56. package/cpp/llama.cpp/ggml/src/ggml-cuda/acc.cu +40 -26
  57. package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +1 -1
  58. package/cpp/llama.cpp/ggml/src/ggml-cuda/cpy.cu +11 -1
  59. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-common.cuh +15 -7
  60. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-mma-f16.cuh +266 -64
  61. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f16.cuh +49 -4
  62. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f32.cuh +48 -4
  63. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn.cu +2 -1
  64. package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +5 -1
  65. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cu +2 -0
  66. package/cpp/llama.cpp/ggml/src/ggml-cuda/quantize.cu +7 -6
  67. package/cpp/llama.cpp/ggml/src/ggml-cuda/sum.cu +1 -1
  68. package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cu +10 -0
  69. package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cuh +2 -0
  70. package/cpp/llama.cpp/ggml/src/ggml-impl.h +1 -1
  71. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +4 -0
  72. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +99 -17
  73. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +200 -2
  74. package/cpp/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +8 -2
  75. package/cpp/llama.cpp/ggml/src/ggml-musa/mudnn.cu +112 -0
  76. package/cpp/llama.cpp/ggml/src/ggml-musa/mudnn.cuh +12 -0
  77. package/cpp/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +6 -0
  78. package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +972 -178
  79. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/argsort.cl +86 -0
  80. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/div.cl +72 -0
  81. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/group_norm.cl +72 -0
  82. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/sigmoid.cl +29 -0
  83. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/sub.cl +72 -0
  84. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/sum_rows.cl +39 -0
  85. package/cpp/llama.cpp/ggml/src/ggml-opt.cpp +373 -190
  86. package/cpp/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +29 -23
  87. package/cpp/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +5 -10
  88. package/cpp/llama.cpp/ggml/src/ggml-sycl/common.hpp +101 -5
  89. package/cpp/llama.cpp/ggml/src/ggml-sycl/concat.cpp +31 -33
  90. package/cpp/llama.cpp/ggml/src/ggml-sycl/conv.cpp +1 -0
  91. package/cpp/llama.cpp/ggml/src/ggml-sycl/convert.cpp +29 -2
  92. package/cpp/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +4 -5
  93. package/cpp/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +59 -21
  94. package/cpp/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +9 -1
  95. package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +84 -72
  96. package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +2 -0
  97. package/cpp/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +37 -8
  98. package/cpp/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +1 -3
  99. package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +324 -129
  100. package/cpp/llama.cpp/ggml/src/ggml-sycl/gla.cpp +1 -0
  101. package/cpp/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +31 -2
  102. package/cpp/llama.cpp/ggml/src/ggml-sycl/norm.cpp +95 -68
  103. package/cpp/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +1 -0
  104. package/cpp/llama.cpp/ggml/src/ggml-sycl/quants.hpp +22 -0
  105. package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +1 -2
  106. package/cpp/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +1 -4
  107. package/cpp/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +2 -3
  108. package/cpp/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +69 -43
  109. package/cpp/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +2 -14
  110. package/cpp/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +81 -91
  111. package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +432 -181
  112. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +17 -0
  113. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp +1 -1
  114. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +6 -152
  115. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp +162 -0
  116. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +360 -0
  117. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +2 -118
  118. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +1 -1
  119. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +12 -1
  120. package/cpp/llama.cpp/ggml/src/ggml.c +107 -36
  121. package/cpp/llama.cpp/ggml/src/gguf.cpp +33 -33
  122. package/cpp/llama.cpp/gguf-py/gguf/constants.py +100 -15
  123. package/cpp/llama.cpp/gguf-py/gguf/gguf_reader.py +1 -1
  124. package/cpp/llama.cpp/gguf-py/gguf/gguf_writer.py +44 -12
  125. package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_editor_gui.py +21 -10
  126. package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_new_metadata.py +5 -2
  127. package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +128 -31
  128. package/cpp/llama.cpp/gguf-py/gguf/utility.py +1 -1
  129. package/cpp/llama.cpp/gguf-py/pyproject.toml +1 -1
  130. package/cpp/llama.cpp/include/llama.h +62 -125
  131. package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf.inp +1 -1
  132. package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf.out +1 -1
  133. package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf.inp +1 -1
  134. package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf.out +1 -1
  135. package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.inp +1 -1
  136. package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.out +1 -1
  137. package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.inp +1 -1
  138. package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.out +1 -1
  139. package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf.inp +1 -1
  140. package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf.out +1 -1
  141. package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf.inp +1 -1
  142. package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf.out +1 -1
  143. package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf.inp +1 -1
  144. package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf.out +1 -1
  145. package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf.inp +1 -1
  146. package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf.out +1 -1
  147. package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf.inp +1 -1
  148. package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf.out +1 -1
  149. package/cpp/llama.cpp/models/ggml-vocab-nomic-bert-moe.gguf +0 -0
  150. package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf.inp +1 -1
  151. package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf.out +1 -1
  152. package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf.inp +1 -1
  153. package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf.out +1 -1
  154. package/cpp/llama.cpp/models/ggml-vocab-refact.gguf.inp +1 -1
  155. package/cpp/llama.cpp/models/ggml-vocab-refact.gguf.out +1 -1
  156. package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf.inp +1 -1
  157. package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf.out +1 -1
  158. package/cpp/llama.cpp/models/templates/Qwen-QwQ-32B.jinja +62 -0
  159. package/cpp/llama.cpp/models/templates/Qwen-Qwen3-0.6B.jinja +85 -0
  160. package/cpp/llama.cpp/models/templates/README.md +2 -0
  161. package/cpp/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +5 -1
  162. package/cpp/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +5 -1
  163. package/cpp/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +2 -0
  164. package/cpp/llama.cpp/requirements/requirements-gguf_editor_gui.txt +1 -1
  165. package/cpp/llama.cpp/src/CMakeLists.txt +2 -0
  166. package/cpp/llama.cpp/src/llama-arch.cpp +6 -0
  167. package/cpp/llama.cpp/src/llama-arch.h +2 -0
  168. package/cpp/llama.cpp/src/llama-batch.cpp +3 -1
  169. package/cpp/llama.cpp/src/llama-context.cpp +340 -123
  170. package/cpp/llama.cpp/src/llama-context.h +30 -0
  171. package/cpp/llama.cpp/src/llama-cparams.cpp +4 -0
  172. package/cpp/llama.cpp/src/llama-cparams.h +2 -0
  173. package/cpp/llama.cpp/src/llama-grammar.cpp +12 -2
  174. package/cpp/llama.cpp/src/llama-graph.cpp +157 -247
  175. package/cpp/llama.cpp/src/llama-graph.h +52 -7
  176. package/cpp/llama.cpp/src/llama-hparams.cpp +17 -1
  177. package/cpp/llama.cpp/src/llama-hparams.h +37 -5
  178. package/cpp/llama.cpp/src/llama-kv-cache.cpp +742 -481
  179. package/cpp/llama.cpp/src/llama-kv-cache.h +196 -99
  180. package/cpp/llama.cpp/src/llama-kv-cells.h +379 -0
  181. package/cpp/llama.cpp/src/llama-memory.h +4 -3
  182. package/cpp/llama.cpp/src/llama-model-loader.cpp +22 -17
  183. package/cpp/llama.cpp/src/llama-model-saver.cpp +281 -0
  184. package/cpp/llama.cpp/src/llama-model-saver.h +37 -0
  185. package/cpp/llama.cpp/src/llama-model.cpp +529 -172
  186. package/cpp/llama.cpp/src/llama-model.h +6 -1
  187. package/cpp/llama.cpp/src/llama-quant.cpp +15 -13
  188. package/cpp/llama.cpp/src/llama-sampling.cpp +2 -2
  189. package/cpp/llama.cpp/src/llama-vocab.cpp +35 -8
  190. package/cpp/llama.cpp/src/llama-vocab.h +6 -0
  191. package/cpp/llama.cpp/src/llama.cpp +14 -0
  192. package/cpp/rn-completion.cpp +4 -2
  193. package/ios/include/chat.h +73 -6
  194. package/ios/include/common/minja/chat-template.hpp +9 -5
  195. package/ios/include/common/minja/minja.hpp +69 -36
  196. package/ios/include/common.h +21 -17
  197. package/ios/include/llama.h +62 -125
  198. package/ios/libs/llama.xcframework/Info.plist +19 -19
  199. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  200. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4617 -4487
  201. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-opt.h +237 -0
  202. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +26 -7
  203. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +62 -125
  204. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
  205. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  206. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4638 -4508
  207. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3557 -3435
  208. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +237 -0
  209. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +26 -7
  210. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +62 -125
  211. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
  212. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  213. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4638 -4508
  214. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3559 -3437
  215. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-opt.h +237 -0
  216. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +26 -7
  217. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +62 -125
  218. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-opt.h +237 -0
  219. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +26 -7
  220. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +62 -125
  221. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
  222. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-opt.h +237 -0
  223. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +26 -7
  224. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +62 -125
  225. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
  226. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
  227. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  228. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4616 -4487
  229. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-opt.h +237 -0
  230. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +26 -7
  231. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +62 -125
  232. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
  233. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  234. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4637 -4508
  235. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3556 -3435
  236. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +237 -0
  237. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +26 -7
  238. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +62 -125
  239. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
  240. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  241. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4653 -4523
  242. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-opt.h +237 -0
  243. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +26 -7
  244. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +62 -125
  245. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
  246. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  247. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4674 -4544
  248. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3587 -3465
  249. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +237 -0
  250. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +26 -7
  251. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +62 -125
  252. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
  253. package/package.json +1 -1
  254. package/cpp/llama.cpp/common/stb_image.h +0 -7988
  255. package/cpp/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +0 -112
  256. package/cpp/llama.cpp/models/ggml-vocab-chameleon.gguf.out +0 -46
  257. package/cpp/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.inp +0 -112
  258. package/cpp/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.out +0 -46
  259. package/cpp/llama.cpp/models/ggml-vocab-gpt-4o.gguf.inp +0 -112
  260. package/cpp/llama.cpp/models/ggml-vocab-gpt-4o.gguf.out +0 -46
  261. package/cpp/llama.cpp/models/ggml-vocab-llama4.gguf.inp +0 -112
  262. package/cpp/llama.cpp/models/ggml-vocab-llama4.gguf.out +0 -46
  263. package/cpp/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +0 -112
  264. package/cpp/llama.cpp/models/ggml-vocab-pixtral.gguf.out +0 -46
  265. package/cpp/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +0 -112
  266. package/cpp/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +0 -46
@@ -1,28 +1,6 @@
1
1
  #!/usr/bin/env python3
2
2
  # -*- coding: utf-8 -*-
3
3
 
4
- # This script downloads the tokenizer models of the specified models from Huggingface and
5
- # generates the get_vocab_base_pre() function for convert_hf_to_gguf.py
6
- #
7
- # This is necessary in order to analyze the type of pre-tokenizer used by the model and
8
- # provide the necessary information to llama.cpp via the GGUF header in order to implement
9
- # the same pre-tokenizer.
10
- #
11
- # ref: https://github.com/ggml-org/llama.cpp/pull/6920
12
- #
13
- # Instructions:
14
- #
15
- # - Add a new model to the "models" list
16
- # - Run the script with your huggingface token:
17
- #
18
- # python3 convert_hf_to_gguf_update.py <huggingface_token>
19
- #
20
- # - The convert_hf_to_gguf.py script will have had its get_vocab_base_pre() function updated
21
- # - Update llama.cpp with the new pre-tokenizer if necessary
22
- #
23
- # TODO: generate tokenizer tests for llama.cpp
24
- #
25
-
26
4
  import logging
27
5
  import os
28
6
  import pathlib
@@ -32,6 +10,7 @@ import requests
32
10
  import sys
33
11
  import json
34
12
  import shutil
13
+ import argparse
35
14
 
36
15
  from hashlib import sha256
37
16
  from enum import IntEnum, auto
@@ -41,6 +20,11 @@ logging.basicConfig(level=logging.DEBUG)
41
20
  logger = logging.getLogger("convert_hf_to_gguf_update")
42
21
  sess = requests.Session()
43
22
 
23
+ convert_py_pth = pathlib.Path("convert_hf_to_gguf.py")
24
+ convert_py = convert_py_pth.read_text(encoding="utf-8")
25
+ hf_token_pth = pathlib.Path.home() / ".cache" / "huggingface" / "token"
26
+ hf_token = hf_token_pth.read_text(encoding="utf-8").strip() if hf_token_pth.exists() else None
27
+
44
28
 
45
29
  class TOKENIZER_TYPE(IntEnum):
46
30
  SPM = auto()
@@ -49,20 +33,49 @@ class TOKENIZER_TYPE(IntEnum):
49
33
  UGM = auto()
50
34
 
51
35
 
36
+ DOC_STRING = """
37
+ This script downloads the tokenizer models of the specified models from Huggingface and
38
+ generates the get_vocab_base_pre() function for convert_hf_to_gguf.py
39
+
40
+ /!\\ It is intended to be used by contributors and is not meant to be run by end users
41
+
42
+ This is necessary in order to analyze the type of pre-tokenizer used by the model and
43
+ provide the necessary information to llama.cpp via the GGUF header in order to implement
44
+ the same pre-tokenizer.
45
+
46
+ ref: https://github.com/ggml-org/llama.cpp/pull/6920
47
+
48
+ Instructions:
49
+
50
+ - Add a new model to the "models" list
51
+ - Run the script with your huggingface token
52
+ By default, token will be read from ~/.cache/huggingface/token
53
+ - The convert_hf_to_gguf.py script will have had its get_vocab_base_pre() function updated
54
+ - Update llama.cpp with the new pre-tokenizer if necessary
55
+ """
56
+ # TODO: generate tokenizer tests for llama.cpp
57
+
58
+ parser = argparse.ArgumentParser(description=DOC_STRING, formatter_class=argparse.RawTextHelpFormatter)
59
+ parser.add_argument(
60
+ "--full", action="store_true",
61
+ help="download full list of models - make sure you have access to all of them",
62
+ )
63
+ parser.add_argument(
64
+ "hf_token",
65
+ help="optional HF token",
66
+ nargs="?",
67
+ )
68
+ args = parser.parse_args()
69
+ hf_token = args.hf_token if args.hf_token is not None else hf_token
70
+
71
+ if hf_token is None:
72
+ logger.error("HF token is required. Please provide it as an argument or set it in ~/.cache/huggingface/token")
73
+ sys.exit(1)
74
+
52
75
  # TODO: this string has to exercise as much pre-tokenizer functionality as possible
53
76
  # will be updated with time - contributions welcome
54
77
  CHK_TXT = '\n \n\n \n\n\n \t \t\t \t\n \n \n \n \n🚀 (normal) 😶‍🌫️ (multiple emojis concatenated) ✅ 🦙🦙 3 33 333 3333 33333 333333 3333333 33333333 3.3 3..3 3...3 កាន់តែពិសេសអាច😁 ?我想在apple工作1314151天~ ------======= нещо на Български \'\'\'\'\'\'```````\"\"\"\"......!!!!!!?????? I\'ve been \'told he\'s there, \'RE you sure? \'M not sure I\'ll make it, \'D you like some tea? We\'Ve a\'lL'
55
78
 
56
- if len(sys.argv) == 2:
57
- token = sys.argv[1]
58
- if not token.startswith("hf_"):
59
- logger.info("Huggingface token seems invalid")
60
- logger.info("Usage: python convert_hf_to_gguf_update.py <huggingface_token>")
61
- sys.exit(1)
62
- else:
63
- logger.info("Usage: python convert_hf_to_gguf_update.py <huggingface_token>")
64
- sys.exit(1)
65
-
66
79
  # TODO: add models here, base models preferred
67
80
  models = [
68
81
  {"name": "llama-spm", "tokt": TOKENIZER_TYPE.SPM, "repo": "https://huggingface.co/meta-llama/Llama-2-7b-hf", },
@@ -103,7 +116,6 @@ models = [
103
116
  {"name": "exaone", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct", },
104
117
  {"name": "phi-2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/microsoft/phi-2", },
105
118
  {"name": "chameleon", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/facebook/chameleon-7b", },
106
- {"name": "minerva-7b", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/sapienzanlp/Minerva-7B-base-v1.0", },
107
119
  {"name": "roberta-bpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/sentence-transformers/stsb-roberta-base"},
108
120
  {"name": "gigachat", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/ai-sage/GigaChat-20B-A3B-instruct"},
109
121
  {"name": "megrez", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/Infinigence/Megrez-3B-Instruct"},
@@ -114,11 +126,19 @@ models = [
114
126
  {"name": "trillion", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/trillionlabs/Trillion-7B-preview", },
115
127
  {"name": "bailingmoe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/inclusionAI/Ling-lite", },
116
128
  {"name": "llama4", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct", },
117
- {"name": "glm4", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/THUDM/glm-4-9b-hf", },
118
129
  {"name": "pixtral", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/mistral-community/pixtral-12b", },
119
130
  {"name": "seed-coder", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/ByteDance-Seed/Seed-Coder-8B-Base", },
120
131
  ]
121
132
 
133
+ # some models are known to be broken upstream, so we will skip them as exceptions
134
+ pre_computed_hashes = [
135
+ # chatglm-bpe has 2 hashes, why?
136
+ {"name": "chatglm-bpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/THUDM/glm-4-9b-chat", "chkhsh": "b6e8e1518dc4305be2fe39c313ed643381c4da5db34a98f6a04c093f8afbe99b"},
137
+ {"name": "chatglm-bpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/THUDM/glm-4-9b-chat", "chkhsh": "81d72c7348a9f0ebe86f23298d37debe0a5e71149e29bd283904c02262b27516"},
138
+ {"name": "glm4", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/THUDM/glm-4-9b-hf", "chkhsh": "a1336059768a55c99a734006ffb02203cd450fed003e9a71886c88acf24fdbc2"},
139
+ {"name": "minerva-7b", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/sapienzanlp/Minerva-7B-base-v1.0", "chkhsh": "1431a23e583c97432bc230bff598d103ddb5a1f89960c8f1d1051aaa944d0b35"},
140
+ ]
141
+
122
142
 
123
143
  def download_file_with_auth(url, token, save_path):
124
144
  headers = {"Authorization": f"Bearer {token}"}
@@ -169,9 +189,29 @@ def download_model(model):
169
189
  if os.path.isfile(save_path):
170
190
  logger.info(f"{name}: File {save_path} already exists - skipping")
171
191
  continue
172
- download_file_with_auth(f"{repo}/resolve/main/{file}", token, save_path)
192
+ download_file_with_auth(f"{repo}/resolve/main/{file}", hf_token, save_path)
193
+
194
+
195
+ # get list of existing models and chkhsh from the convert_hf_to_gguf.py file
196
+ # returns mapping res --> chkhsh
197
+ def get_existing_models(convert_py):
198
+ pattern = r'if chkhsh == "([a-f0-9]{64})":\s*\n\s*.*\s*res = "([^"]+)"'
199
+ matches = re.findall(pattern, convert_py)
200
+ output = {}
201
+ for chkhsh, res in matches:
202
+ output[res] = chkhsh
203
+ return output
204
+
173
205
 
206
+ existing_models = {}
207
+ all_models = models.copy()
208
+ if not args.full:
209
+ # Filter out models that already exist in convert_hf_to_gguf.py
210
+ existing_models = get_existing_models(convert_py)
211
+ all_models = models.copy()
212
+ models = [model for model in all_models if model["name"] not in existing_models]
174
213
 
214
+ logging.info(f"Downloading {len(models)} models...")
175
215
  for model in models:
176
216
  try:
177
217
  download_model(model)
@@ -182,9 +222,10 @@ for model in models:
182
222
  # generate the source code for the convert_hf_to_gguf.py:get_vocab_base_pre() function:
183
223
 
184
224
  src_ifs = ""
185
- for model in models:
225
+ for model in [*all_models, *pre_computed_hashes]:
186
226
  name = model["name"]
187
227
  tokt = model["tokt"]
228
+ chkhsh = model.get("chkhsh")
188
229
 
189
230
  if tokt == TOKENIZER_TYPE.SPM or tokt == TOKENIZER_TYPE.UGM:
190
231
  continue
@@ -195,35 +236,44 @@ for model in models:
195
236
  continue
196
237
 
197
238
  # create the tokenizer
198
- try:
199
- if name == "t5":
200
- tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}", use_fast=False)
201
- else:
202
- tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}")
203
- except OSError as e:
204
- logger.error(f"Error loading tokenizer for model {name}. The model may not exist or is not accessible with the provided token. Error: {e}")
205
- continue # Skip to the next model if the tokenizer can't be loaded
206
-
207
- chktok = tokenizer.encode(CHK_TXT)
208
- chkhsh = sha256(str(chktok).encode()).hexdigest()
209
-
210
- logger.info(f"model: {name}")
211
- logger.info(f"tokt: {tokt}")
212
- logger.info(f"repo: {model['repo']}")
213
- logger.info(f"chktok: {chktok}")
214
- logger.info(f"chkhsh: {chkhsh}")
215
-
216
- # print the "pre_tokenizer" content from the tokenizer.json
217
- with open(f"models/tokenizers/{name}/tokenizer.json", "r", encoding="utf-8") as f:
218
- cfg = json.load(f)
219
- normalizer = cfg["normalizer"]
220
- logger.info("normalizer: " + json.dumps(normalizer, indent=4))
221
- pre_tokenizer = cfg["pre_tokenizer"]
222
- logger.info("pre_tokenizer: " + json.dumps(pre_tokenizer, indent=4))
223
- if "ignore_merges" in cfg["model"]:
224
- logger.info("ignore_merges: " + json.dumps(cfg["model"]["ignore_merges"], indent=4))
225
-
226
- logger.info("")
239
+ if chkhsh is not None:
240
+ # if the model has a pre-computed hash, use it
241
+ logger.info(f"Using pre-computed hash for model {name}: {chkhsh}")
242
+ elif name in existing_models:
243
+ # if the model already exists in convert_hf_to_gguf.py, skip compute hash
244
+ chkhsh = existing_models[name]
245
+ else:
246
+ # otherwise, compute the hash of the tokenizer
247
+ try:
248
+ logger.info(f"Loading tokenizer from {f'models/tokenizers/{name}'}...")
249
+ if name == "t5":
250
+ tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}", use_fast=False)
251
+ else:
252
+ tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}")
253
+ except OSError as e:
254
+ logger.error(f"Error loading tokenizer for model {name}. The model may not exist or is not accessible with the provided token. Error: {e}")
255
+ continue # Skip to the next model if the tokenizer can't be loaded
256
+
257
+ chktok = tokenizer.encode(CHK_TXT)
258
+ chkhsh = sha256(str(chktok).encode()).hexdigest()
259
+
260
+ logger.info(f"model: {name}")
261
+ logger.info(f"tokt: {tokt}")
262
+ logger.info(f"repo: {model['repo']}")
263
+ logger.info(f"chktok: {chktok}")
264
+ logger.info(f"chkhsh: {chkhsh}")
265
+
266
+ # print the "pre_tokenizer" content from the tokenizer.json
267
+ with open(f"models/tokenizers/{name}/tokenizer.json", "r", encoding="utf-8") as f:
268
+ cfg = json.load(f)
269
+ normalizer = cfg["normalizer"]
270
+ logger.info("normalizer: " + json.dumps(normalizer, indent=4))
271
+ pre_tokenizer = cfg["pre_tokenizer"]
272
+ logger.info("pre_tokenizer: " + json.dumps(pre_tokenizer, indent=4))
273
+ if "ignore_merges" in cfg["model"]:
274
+ logger.info("ignore_merges: " + json.dumps(cfg["model"]["ignore_merges"], indent=4))
275
+
276
+ logger.info("")
227
277
 
228
278
  src_ifs += f" if chkhsh == \"{chkhsh}\":\n"
229
279
  src_ifs += f" # ref: {model['repo']}\n"
@@ -271,8 +321,6 @@ src_func = f"""
271
321
  return res
272
322
  """
273
323
 
274
- convert_py_pth = pathlib.Path("convert_hf_to_gguf.py")
275
- convert_py = convert_py_pth.read_text(encoding="utf-8")
276
324
  convert_py = re.sub(
277
325
  r"(# Marker: Start get_vocab_base_pre)(.+?)( +# Marker: End get_vocab_base_pre)",
278
326
  lambda m: m.group(1) + src_func + m.group(3),
@@ -288,7 +336,7 @@ logger.info("+++ convert_hf_to_gguf.py was updated")
288
336
 
289
337
  tests = [
290
338
  "ied 4 ½ months",
291
- "Führer",
339
+ "Äpfel",
292
340
  "",
293
341
  " ",
294
342
  " ",
@@ -367,6 +415,10 @@ for model in models:
367
415
  logger.error(f"Failed to load tokenizer for model {name}. Error: {e}")
368
416
  continue # Skip this model and continue with the next one in the loop
369
417
 
418
+ if not os.path.exists(f"models/ggml-vocab-{name}.gguf"):
419
+ logger.info(f"Skip vocab files for model {name}, no GGUF file found")
420
+ continue
421
+
370
422
  with open(f"models/ggml-vocab-{name}.gguf.inp", "w", encoding="utf-8") as f:
371
423
  for text in tests:
372
424
  f.write(f"{text}")
@@ -129,6 +129,7 @@ option(GGML_LASX "ggml: enable lasx" ON)
129
129
  option(GGML_LSX "ggml: enable lsx" ON)
130
130
  option(GGML_RVV "ggml: enable rvv" ON)
131
131
  option(GGML_RV_ZFH "ggml: enable riscv zfh" OFF)
132
+ option(GGML_XTHEADVECTOR "ggml: enable xtheadvector" OFF)
132
133
  option(GGML_VXE "ggml: enable vxe" ON)
133
134
 
134
135
  option(GGML_CPU_ALL_VARIANTS "ggml: build all variants of the CPU backend (requires GGML_BACKEND_DL)" OFF)
@@ -176,7 +177,6 @@ option(GGML_VULKAN_CHECK_RESULTS "ggml: run Vulkan op checks"
176
177
  option(GGML_VULKAN_DEBUG "ggml: enable Vulkan debug output" OFF)
177
178
  option(GGML_VULKAN_MEMORY_DEBUG "ggml: enable Vulkan memory debug output" OFF)
178
179
  option(GGML_VULKAN_SHADER_DEBUG_INFO "ggml: enable Vulkan shader debug info" OFF)
179
- option(GGML_VULKAN_PERF "ggml: enable Vulkan perf output" OFF)
180
180
  option(GGML_VULKAN_VALIDATE "ggml: enable Vulkan validation" OFF)
181
181
  option(GGML_VULKAN_RUN_TESTS "ggml: run Vulkan tests" OFF)
182
182
  option(GGML_KOMPUTE "ggml: use Kompute" OFF)
@@ -193,6 +193,7 @@ option(GGML_RPC "ggml: use RPC"
193
193
  option(GGML_SYCL "ggml: use SYCL" OFF)
194
194
  option(GGML_SYCL_F16 "ggml: use 16 bit floats for sycl calculations" OFF)
195
195
  option(GGML_SYCL_GRAPH "ggml: enable graphs in the SYCL backend" ON)
196
+ option(GGML_SYCL_DNN "ggml: enable oneDNN in the SYCL backend" ON)
196
197
  set (GGML_SYCL_TARGET "INTEL" CACHE STRING
197
198
  "ggml: sycl target device")
198
199
  set (GGML_SYCL_DEVICE_ARCH "" CACHE STRING
@@ -24,3 +24,28 @@ function(ggml_get_flags CCID CCVER)
24
24
  set(GF_C_FLAGS ${C_FLAGS} PARENT_SCOPE)
25
25
  set(GF_CXX_FLAGS ${CXX_FLAGS} PARENT_SCOPE)
26
26
  endfunction()
27
+
28
+ function(ggml_get_system_arch)
29
+ if (CMAKE_OSX_ARCHITECTURES STREQUAL "arm64" OR
30
+ CMAKE_GENERATOR_PLATFORM_LWR STREQUAL "arm64" OR
31
+ (NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND
32
+ CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm.*|ARM64)$"))
33
+ set(GGML_SYSTEM_ARCH "ARM" PARENT_SCOPE)
34
+ elseif (CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64" OR
35
+ CMAKE_GENERATOR_PLATFORM_LWR MATCHES "^(x86_64|i686|amd64|x64|win32)$" OR
36
+ (NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND
37
+ CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|i686|AMD64|amd64)$"))
38
+ set(GGML_SYSTEM_ARCH "x86" PARENT_SCOPE)
39
+ elseif ("${CMAKE_SYSTEM_PROCESSOR} " STREQUAL "ppc64le " OR
40
+ "${CMAKE_SYSTEM_PROCESSOR} " STREQUAL "powerpc ")
41
+ set(GGML_SYSTEM_ARCH "PowerPC" PARENT_SCOPE)
42
+ elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "loongarch64")
43
+ set(GGML_SYSTEM_ARCH "loongarch64" PARENT_SCOPE)
44
+ elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "riscv64")
45
+ set(GGML_SYSTEM_ARCH "riscv64" PARENT_SCOPE)
46
+ elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "s390x")
47
+ set(GGML_SYSTEM_ARCH "s390x" PARENT_SCOPE)
48
+ else()
49
+ set(GGML_SYSTEM_ARCH "UNKNOWN" PARENT_SCOPE)
50
+ endif()
51
+ endfunction()
@@ -37,13 +37,16 @@ extern "C" {
37
37
  // ====== Dataset ======
38
38
 
39
39
  GGML_API ggml_opt_dataset_t ggml_opt_dataset_init(
40
- int64_t ne_datapoint, // number of elements per datapoint
41
- int64_t ne_label, // number of elements per label
42
- int64_t ndata, // total number of datapoints/labels
43
- int64_t ndata_shard); // number of datapoints/labels per shard (unit at which the dataset is shuffled/copied)
40
+ enum ggml_type type_data, // the type for the internal data tensor
41
+ enum ggml_type type_label, // the type for the internal labels tensor
42
+ int64_t ne_datapoint, // number of elements per datapoint
43
+ int64_t ne_label, // number of elements per label
44
+ int64_t ndata, // total number of datapoints/labels
45
+ int64_t ndata_shard); // number of datapoints/labels per shard (unit at which the dataset is shuffled/copied)
44
46
  GGML_API void ggml_opt_dataset_free(ggml_opt_dataset_t dataset);
45
47
 
46
48
  // get underlying tensors that store the data
49
+ GGML_API int64_t ggml_opt_dataset_ndata (ggml_opt_dataset_t dataset);
47
50
  GGML_API struct ggml_tensor * ggml_opt_dataset_data (ggml_opt_dataset_t dataset); // shape = [ne_datapoint, ndata]
48
51
  GGML_API struct ggml_tensor * ggml_opt_dataset_labels(ggml_opt_dataset_t dataset); // shape = [nd_label, ndata]
49
52
 
@@ -56,13 +59,19 @@ extern "C" {
56
59
  struct ggml_tensor * data_batch, // shape = [ne_datapoint, ndata_batch]
57
60
  struct ggml_tensor * labels_batch, // shape = [ne_label, ndata_batch]
58
61
  int64_t ibatch);
62
+ GGML_API void ggml_opt_dataset_get_batch_host(
63
+ ggml_opt_dataset_t dataset,
64
+ void * data_batch,
65
+ size_t nb_data_batch,
66
+ void * labels_batch,
67
+ int64_t ibatch);
59
68
 
60
69
  // ====== Model / Context ======
61
70
 
62
71
  enum ggml_opt_build_type {
63
- GGML_OPT_BUILD_TYPE_FORWARD,
64
- GGML_OPT_BUILD_TYPE_GRAD,
65
- GGML_OPT_BUILD_TYPE_OPT,
72
+ GGML_OPT_BUILD_TYPE_FORWARD = 10,
73
+ GGML_OPT_BUILD_TYPE_GRAD = 20,
74
+ GGML_OPT_BUILD_TYPE_OPT = 30,
66
75
  };
67
76
 
68
77
  // parameters that control which optimizer is used and how said optimizer tries to find the minimal loss
@@ -81,20 +90,22 @@ extern "C" {
81
90
  // userdata can be used to pass arbitrary data
82
91
  typedef struct ggml_opt_optimizer_params (*ggml_opt_get_optimizer_params)(void * userdata);
83
92
 
84
- // returns the default optimizer params (constant)
93
+ // returns the default optimizer params (constant, hard-coded values)
85
94
  // userdata is not used
86
95
  GGML_API struct ggml_opt_optimizer_params ggml_opt_get_default_optimizer_params(void * userdata);
87
96
 
97
+ // casts userdata to ggml_opt_optimizer_params and returns it
98
+ GGML_API struct ggml_opt_optimizer_params ggml_opt_get_constant_optimizer_params(void * userdata);
99
+
88
100
  // parameters for initializing a new optimization context
89
101
  struct ggml_opt_params {
90
102
  ggml_backend_sched_t backend_sched; // defines which backends are used to construct the compute graphs
91
103
 
92
- struct ggml_context * ctx_compute; // created in user code, holds non-static tensors
93
-
94
- // the forward graph is defined by inputs and outputs
95
- // those tensors and all tensors inbetween are not intended to be reusable between multiple optimization contexts
96
- struct ggml_tensor * inputs;
97
- struct ggml_tensor * outputs;
104
+ // by default the forward graph needs to be reconstructed for each eval
105
+ // if ctx_compute, inputs, and outputs are set the graphs are instead allocated statically
106
+ struct ggml_context * ctx_compute;
107
+ struct ggml_tensor * inputs;
108
+ struct ggml_tensor * outputs;
98
109
 
99
110
  enum ggml_opt_loss_type loss_type;
100
111
  enum ggml_opt_build_type build_type;
@@ -107,12 +118,9 @@ extern "C" {
107
118
 
108
119
  // get parameters for an optimization context with defaults set where possible
109
120
  // parameters for which no sensible defaults exist are supplied as arguments to this function
110
- GGML_API ggml_opt_params ggml_opt_default_params(
111
- ggml_backend_sched_t backend_sched,
112
- struct ggml_context * ctx_compute,
113
- struct ggml_tensor * inputs,
114
- struct ggml_tensor * outputs,
115
- enum ggml_opt_loss_type loss_type);
121
+ GGML_API struct ggml_opt_params ggml_opt_default_params(
122
+ ggml_backend_sched_t backend_sched,
123
+ enum ggml_opt_loss_type loss_type);
116
124
 
117
125
  GGML_API ggml_opt_context_t ggml_opt_init(struct ggml_opt_params params);
118
126
  GGML_API void ggml_opt_free(ggml_opt_context_t opt_ctx);
@@ -120,7 +128,10 @@ extern "C" {
120
128
  // set gradients to zero, initilize loss, and optionally reset the optimizer
121
129
  GGML_API void ggml_opt_reset(ggml_opt_context_t opt_ctx, bool optimizer);
122
130
 
131
+ GGML_API bool ggml_opt_static_graphs(ggml_opt_context_t opt_ctx); // whether the graphs are allocated_statically
132
+
123
133
  // get underlying tensors that store data
134
+ // if not using static graphs these pointers become invalid with the next call to ggml_opt_alloc
124
135
  GGML_API struct ggml_tensor * ggml_opt_inputs( ggml_opt_context_t opt_ctx); // forward graph input tensor
125
136
  GGML_API struct ggml_tensor * ggml_opt_outputs( ggml_opt_context_t opt_ctx); // forward graph output tensor
126
137
  GGML_API struct ggml_tensor * ggml_opt_labels( ggml_opt_context_t opt_ctx); // labels to compare outputs against
@@ -128,11 +139,12 @@ extern "C" {
128
139
  GGML_API struct ggml_tensor * ggml_opt_pred( ggml_opt_context_t opt_ctx); // predictions made by outputs
129
140
  GGML_API struct ggml_tensor * ggml_opt_ncorrect(ggml_opt_context_t opt_ctx); // number of matching predictions between outputs and labels
130
141
 
142
+ // get the gradient accumulator for a node from the forward graph
131
143
  GGML_API struct ggml_tensor * ggml_opt_grad_acc(ggml_opt_context_t opt_ctx, struct ggml_tensor * node);
132
144
 
133
145
  // ====== Optimization Result ======
134
146
 
135
- GGML_API ggml_opt_result_t ggml_opt_result_init();
147
+ GGML_API ggml_opt_result_t ggml_opt_result_init(void);
136
148
  GGML_API void ggml_opt_result_free(ggml_opt_result_t result);
137
149
  GGML_API void ggml_opt_result_reset(ggml_opt_result_t result);
138
150
 
@@ -144,11 +156,20 @@ extern "C" {
144
156
 
145
157
  // ====== Computation ======
146
158
 
147
- // do forward pass, increment result if not NULL
148
- GGML_API void ggml_opt_forward(ggml_opt_context_t opt_ctx, ggml_opt_result_t result);
159
+ // if not using static graphs, this function must be called prior to ggml_opt_alloc
160
+ GGML_API void ggml_opt_prepare_alloc(
161
+ ggml_opt_context_t opt_ctx,
162
+ struct ggml_context * ctx_compute,
163
+ struct ggml_cgraph * gf,
164
+ struct ggml_tensor * inputs,
165
+ struct ggml_tensor * outputs);
166
+
167
+ // allocate the next graph for evaluation, either forward or forward + backward
168
+ // must be called exactly once prior to calling ggml_opt_eval
169
+ GGML_API void ggml_opt_alloc(ggml_opt_context_t opt_ctx, bool backward);
149
170
 
150
- // do forward pass, increment result if not NULL, do backward pass
151
- GGML_API void ggml_opt_forward_backward(ggml_opt_context_t opt_ctx, ggml_opt_result_t result);
171
+ // do forward pass, increment result if not NULL, do backward pass if allocated
172
+ GGML_API void ggml_opt_eval(ggml_opt_context_t opt_ctx, ggml_opt_result_t result);
152
173
 
153
174
  // ############################################################################
154
175
  // ## The high-level functions start here. They do not depend on any private ##
@@ -200,9 +221,9 @@ extern "C" {
200
221
  // fit model defined by inputs and outputs to dataset
201
222
  GGML_API void ggml_opt_fit(
202
223
  ggml_backend_sched_t backend_sched, // backend scheduler for constructing the compute graphs
203
- ggml_context * ctx_compute, // context with temporarily allocated tensors to calculate the outputs
204
- ggml_tensor * inputs, // input tensor with shape [ne_datapoint, ndata_batch]
205
- ggml_tensor * outputs, // output tensor, must have shape [ne_label, ndata_batch] if labels are used
224
+ struct ggml_context * ctx_compute, // context with temporarily allocated tensors to calculate the outputs
225
+ struct ggml_tensor * inputs, // input tensor with shape [ne_datapoint, ndata_batch]
226
+ struct ggml_tensor * outputs, // output tensor, must have shape [ne_label, ndata_batch] if labels are used
206
227
  ggml_opt_dataset_t dataset, // dataset with data and optionally also labels
207
228
  enum ggml_opt_loss_type loss_type, // loss to minimize
208
229
  ggml_opt_get_optimizer_params get_opt_pars, // callback to get optimizer params, userdata is pointer to epoch (of type int64_t)
@@ -536,6 +536,7 @@ extern "C" {
536
536
  GGML_UNARY_OP_HARDSWISH,
537
537
  GGML_UNARY_OP_HARDSIGMOID,
538
538
  GGML_UNARY_OP_EXP,
539
+ GGML_UNARY_OP_GELU_ERF,
539
540
 
540
541
  GGML_UNARY_OP_COUNT,
541
542
  };
@@ -768,7 +769,7 @@ extern "C" {
768
769
  // Tensor flags
769
770
  GGML_API void ggml_set_input(struct ggml_tensor * tensor);
770
771
  GGML_API void ggml_set_output(struct ggml_tensor * tensor);
771
- GGML_API void ggml_set_param(struct ggml_context * ctx, struct ggml_tensor * tensor);
772
+ GGML_API void ggml_set_param(struct ggml_tensor * tensor);
772
773
  GGML_API void ggml_set_loss(struct ggml_tensor * tensor);
773
774
 
774
775
  //
@@ -934,11 +935,20 @@ extern "C" {
934
935
  struct ggml_tensor * a,
935
936
  struct ggml_tensor * b);
936
937
 
938
+ // repeat a to the specified shape
939
+ GGML_API struct ggml_tensor * ggml_repeat_4d(
940
+ struct ggml_context * ctx,
941
+ struct ggml_tensor * a,
942
+ int64_t ne0,
943
+ int64_t ne1,
944
+ int64_t ne2,
945
+ int64_t ne3);
946
+
937
947
  // sums repetitions in a into shape of b
938
948
  GGML_API struct ggml_tensor * ggml_repeat_back(
939
949
  struct ggml_context * ctx,
940
950
  struct ggml_tensor * a,
941
- struct ggml_tensor * b);
951
+ struct ggml_tensor * b); // sum up values that are adjacent in dims > 0 instead of repeated with same stride
942
952
 
943
953
  // concat a and b along dim
944
954
  // used in stable-diffusion
@@ -1024,6 +1034,16 @@ extern "C" {
1024
1034
  struct ggml_context * ctx,
1025
1035
  struct ggml_tensor * a);
1026
1036
 
1037
+ // GELU using erf (error function) when possible
1038
+ // some backends may fallback to approximation based on Abramowitz and Stegun formula
1039
+ GGML_API struct ggml_tensor * ggml_gelu_erf(
1040
+ struct ggml_context * ctx,
1041
+ struct ggml_tensor * a);
1042
+
1043
+ GGML_API struct ggml_tensor * ggml_gelu_erf_inplace(
1044
+ struct ggml_context * ctx,
1045
+ struct ggml_tensor * a);
1046
+
1027
1047
  GGML_API struct ggml_tensor * ggml_gelu_quick(
1028
1048
  struct ggml_context * ctx,
1029
1049
  struct ggml_tensor * a);
@@ -2049,15 +2069,14 @@ extern "C" {
2049
2069
 
2050
2070
  GGML_API void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
2051
2071
  GGML_API void ggml_build_backward_expand(
2052
- struct ggml_context * ctx_static, // context for static gradients (loss + gradient accumulation)
2053
- struct ggml_context * ctx_compute, // context for gradient computation
2054
- struct ggml_cgraph * cgraph,
2055
- bool accumulate); // whether or not gradients should be accumulated, requires static allocation of tensors in ctx_static
2072
+ struct ggml_context * ctx, // context for gradient computation
2073
+ struct ggml_cgraph * cgraph,
2074
+ struct ggml_tensor ** grad_accs);
2056
2075
 
2057
2076
  // graph allocation in a context
2058
2077
  GGML_API struct ggml_cgraph * ggml_new_graph (struct ggml_context * ctx); // size = GGML_DEFAULT_GRAPH_SIZE, grads = false
2059
2078
  GGML_API struct ggml_cgraph * ggml_new_graph_custom(struct ggml_context * ctx, size_t size, bool grads);
2060
- GGML_API struct ggml_cgraph * ggml_graph_dup (struct ggml_context * ctx, struct ggml_cgraph * cgraph);
2079
+ GGML_API struct ggml_cgraph * ggml_graph_dup (struct ggml_context * ctx, struct ggml_cgraph * cgraph, bool force_grads);
2061
2080
  GGML_API void ggml_graph_cpy (struct ggml_cgraph * src, struct ggml_cgraph * dst);
2062
2081
  GGML_API void ggml_graph_reset (struct ggml_cgraph * cgraph); // set regular grads + optimizer momenta to 0, set loss grad to 1
2063
2082
  GGML_API void ggml_graph_clear (struct ggml_cgraph * cgraph);
@@ -109,6 +109,8 @@ if (MSVC)
109
109
  else ()
110
110
  set(CMAKE_GENERATOR_PLATFORM_LWR "")
111
111
  endif ()
112
+ ggml_get_system_arch()
113
+ message(STATUS "GGML_SYSTEM_ARCH: ${GGML_SYSTEM_ARCH}")
112
114
 
113
115
  if (NOT MSVC)
114
116
  if (GGML_STATIC)
@@ -287,16 +289,20 @@ if (GGML_CPU_ALL_VARIANTS)
287
289
  if (NOT GGML_BACKEND_DL)
288
290
  message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS requires GGML_BACKEND_DL")
289
291
  endif()
290
- ggml_add_cpu_backend_variant(x64)
291
- ggml_add_cpu_backend_variant(sse42 SSE42)
292
- ggml_add_cpu_backend_variant(sandybridge SSE42 AVX)
293
- ggml_add_cpu_backend_variant(haswell SSE42 AVX F16C AVX2 BMI2 FMA)
294
- ggml_add_cpu_backend_variant(skylakex SSE42 AVX F16C AVX2 BMI2 FMA AVX512)
295
- ggml_add_cpu_backend_variant(icelake SSE42 AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI)
296
- ggml_add_cpu_backend_variant(alderlake SSE42 AVX F16C AVX2 BMI2 FMA AVX_VNNI)
297
- if (NOT MSVC)
298
- # MSVC doesn't support AMX
299
- ggml_add_cpu_backend_variant(sapphirerapids SSE42 AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8)
292
+ if (GGML_SYSTEM_ARCH STREQUAL "x86")
293
+ ggml_add_cpu_backend_variant(x64)
294
+ ggml_add_cpu_backend_variant(sse42 SSE42)
295
+ ggml_add_cpu_backend_variant(sandybridge SSE42 AVX)
296
+ ggml_add_cpu_backend_variant(haswell SSE42 AVX F16C AVX2 BMI2 FMA)
297
+ ggml_add_cpu_backend_variant(skylakex SSE42 AVX F16C AVX2 BMI2 FMA AVX512)
298
+ ggml_add_cpu_backend_variant(icelake SSE42 AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI)
299
+ ggml_add_cpu_backend_variant(alderlake SSE42 AVX F16C AVX2 BMI2 FMA AVX_VNNI)
300
+ if (NOT MSVC)
301
+ # MSVC doesn't support AMX
302
+ ggml_add_cpu_backend_variant(sapphirerapids SSE42 AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8)
303
+ endif()
304
+ else()
305
+ message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS not yet supported on ${GGML_SYSTEM_ARCH}")
300
306
  endif()
301
307
  elseif (GGML_CPU)
302
308
  ggml_add_cpu_backend_variant_impl("")
@@ -1111,7 +1111,7 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
1111
1111
 
1112
1112
  const int node_backend_id = tensor_backend_id(node);
1113
1113
 
1114
- assert(node_backend_id != -1); // all nodes should be assigned by now
1114
+ assert(node_backend_id != -1); // all nodes should be assigned by now, this can happen if there is no CPU fallback
1115
1115
 
1116
1116
  // check if we should start a new split based on the sources of the current node
1117
1117
  bool need_new_split = false;
@@ -1598,6 +1598,9 @@ void ggml_backend_sched_synchronize(ggml_backend_sched_t sched) {
1598
1598
  for (int i = 0; i < sched->n_backends; i++) {
1599
1599
  ggml_backend_synchronize(sched->backends[i]);
1600
1600
  }
1601
+ // reset the current copy to 0 so that the graphs will be similar during generation
1602
+ // necessary for CUDA graphs
1603
+ sched->cur_copy = 0;
1601
1604
  }
1602
1605
 
1603
1606
  void ggml_backend_sched_set_eval_callback(ggml_backend_sched_t sched, ggml_backend_sched_eval_callback callback, void * user_data) {
@@ -30,6 +30,7 @@ string(TOLOWER ${SOC_TYPE} SOC_VERSION) # SOC_VERSION need lower
30
30
  string(REGEX MATCH "[0-9]+[a-zA-Z]" SOC_TYPE_MAJOR_SN "${SOC_VERSION}")
31
31
  set(SOC_TYPE_COMPILE_OPTION "ASCEND_${SOC_TYPE_MAJOR_SN}")
32
32
  string(TOUPPER ${SOC_TYPE_COMPILE_OPTION} SOC_TYPE_COMPILE_OPTION)
33
+ message(STATUS "CANN: SOC_VERSION = ${SOC_VERSION}")
33
34
 
34
35
  if (CANN_INSTALL_DIR)
35
36
  # Only Support Linux.
@@ -31,6 +31,8 @@ aclDataType ggml_cann_type_mapping(ggml_type type) {
31
31
  return ACL_FLOAT;
32
32
  case GGML_TYPE_F16:
33
33
  return ACL_FLOAT16;
34
+ case GGML_TYPE_BF16:
35
+ return ACL_BF16;
34
36
  case GGML_TYPE_I8:
35
37
  return ACL_INT8;
36
38
  case GGML_TYPE_I16: