@novastera-oss/llamarn 0.2.1 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (268) hide show
  1. package/README.md +80 -14
  2. package/RNLlamaCpp.podspec +10 -3
  3. package/android/CMakeLists.txt +8 -0
  4. package/android/src/main/cpp/include/llama.h +62 -125
  5. package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
  6. package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
  7. package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
  8. package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
  9. package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
  10. package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
  11. package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
  12. package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
  13. package/cpp/PureCppImpl.cpp +9 -27
  14. package/cpp/SystemUtils.h +2 -2
  15. package/cpp/build-info.cpp +2 -2
  16. package/cpp/llama.cpp/README.md +11 -3
  17. package/cpp/llama.cpp/build-xcframework.sh +1 -0
  18. package/cpp/llama.cpp/common/CMakeLists.txt +8 -2
  19. package/cpp/llama.cpp/common/arg.cpp +153 -113
  20. package/cpp/llama.cpp/common/chat-parser.cpp +379 -0
  21. package/cpp/llama.cpp/common/chat-parser.h +117 -0
  22. package/cpp/llama.cpp/common/chat.cpp +847 -699
  23. package/cpp/llama.cpp/common/chat.h +73 -6
  24. package/cpp/llama.cpp/common/common.cpp +50 -82
  25. package/cpp/llama.cpp/common/common.h +21 -17
  26. package/cpp/llama.cpp/common/json-partial.cpp +255 -0
  27. package/cpp/llama.cpp/common/json-partial.h +37 -0
  28. package/cpp/llama.cpp/common/minja/chat-template.hpp +9 -5
  29. package/cpp/llama.cpp/common/minja/minja.hpp +69 -36
  30. package/cpp/llama.cpp/common/regex-partial.cpp +204 -0
  31. package/cpp/llama.cpp/common/regex-partial.h +56 -0
  32. package/cpp/llama.cpp/common/sampling.cpp +7 -8
  33. package/cpp/llama.cpp/convert_hf_to_gguf.py +453 -118
  34. package/cpp/llama.cpp/convert_hf_to_gguf_update.py +120 -68
  35. package/cpp/llama.cpp/ggml/CMakeLists.txt +2 -1
  36. package/cpp/llama.cpp/ggml/cmake/common.cmake +25 -0
  37. package/cpp/llama.cpp/ggml/include/ggml-opt.h +49 -28
  38. package/cpp/llama.cpp/ggml/include/ggml.h +26 -7
  39. package/cpp/llama.cpp/ggml/src/CMakeLists.txt +16 -10
  40. package/cpp/llama.cpp/ggml/src/ggml-backend.cpp +4 -1
  41. package/cpp/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +1 -0
  42. package/cpp/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +2 -0
  43. package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +604 -0
  44. package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +42 -0
  45. package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +54 -2
  46. package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +50 -51
  47. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +2 -2
  48. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +5 -9
  49. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +779 -19
  50. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +22 -0
  51. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +88 -5
  52. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +47 -12
  53. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +264 -69
  54. package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +322 -100
  55. package/cpp/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +117 -1
  56. package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.cpp +85 -16
  57. package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.h +220 -49
  58. package/cpp/llama.cpp/ggml/src/ggml-cuda/acc.cu +40 -26
  59. package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +1 -1
  60. package/cpp/llama.cpp/ggml/src/ggml-cuda/cpy.cu +11 -1
  61. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-common.cuh +15 -7
  62. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-mma-f16.cuh +266 -64
  63. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f16.cuh +49 -4
  64. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f32.cuh +48 -4
  65. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn.cu +2 -1
  66. package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +5 -1
  67. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cu +2 -0
  68. package/cpp/llama.cpp/ggml/src/ggml-cuda/quantize.cu +7 -6
  69. package/cpp/llama.cpp/ggml/src/ggml-cuda/sum.cu +1 -1
  70. package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cu +10 -0
  71. package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cuh +2 -0
  72. package/cpp/llama.cpp/ggml/src/ggml-impl.h +1 -1
  73. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +4 -0
  74. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +99 -17
  75. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +200 -2
  76. package/cpp/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +8 -2
  77. package/cpp/llama.cpp/ggml/src/ggml-musa/mudnn.cu +112 -0
  78. package/cpp/llama.cpp/ggml/src/ggml-musa/mudnn.cuh +12 -0
  79. package/cpp/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +6 -0
  80. package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +972 -178
  81. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/argsort.cl +86 -0
  82. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/div.cl +72 -0
  83. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/group_norm.cl +72 -0
  84. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/sigmoid.cl +29 -0
  85. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/sub.cl +72 -0
  86. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/sum_rows.cl +39 -0
  87. package/cpp/llama.cpp/ggml/src/ggml-opt.cpp +373 -190
  88. package/cpp/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +29 -23
  89. package/cpp/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +5 -10
  90. package/cpp/llama.cpp/ggml/src/ggml-sycl/common.hpp +101 -5
  91. package/cpp/llama.cpp/ggml/src/ggml-sycl/concat.cpp +31 -33
  92. package/cpp/llama.cpp/ggml/src/ggml-sycl/conv.cpp +1 -0
  93. package/cpp/llama.cpp/ggml/src/ggml-sycl/convert.cpp +29 -2
  94. package/cpp/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +4 -5
  95. package/cpp/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +59 -21
  96. package/cpp/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +9 -1
  97. package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +84 -72
  98. package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +2 -0
  99. package/cpp/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +37 -8
  100. package/cpp/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +1 -3
  101. package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +324 -129
  102. package/cpp/llama.cpp/ggml/src/ggml-sycl/gla.cpp +1 -0
  103. package/cpp/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +31 -2
  104. package/cpp/llama.cpp/ggml/src/ggml-sycl/norm.cpp +95 -68
  105. package/cpp/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +1 -0
  106. package/cpp/llama.cpp/ggml/src/ggml-sycl/quants.hpp +22 -0
  107. package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +1 -2
  108. package/cpp/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +1 -4
  109. package/cpp/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +2 -3
  110. package/cpp/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +69 -43
  111. package/cpp/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +2 -14
  112. package/cpp/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +81 -91
  113. package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +432 -181
  114. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +17 -0
  115. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp +1 -1
  116. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +6 -152
  117. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp +162 -0
  118. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +360 -0
  119. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +2 -118
  120. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +1 -1
  121. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +12 -1
  122. package/cpp/llama.cpp/ggml/src/ggml.c +107 -36
  123. package/cpp/llama.cpp/ggml/src/gguf.cpp +33 -33
  124. package/cpp/llama.cpp/gguf-py/gguf/constants.py +100 -15
  125. package/cpp/llama.cpp/gguf-py/gguf/gguf_reader.py +1 -1
  126. package/cpp/llama.cpp/gguf-py/gguf/gguf_writer.py +44 -12
  127. package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_editor_gui.py +21 -10
  128. package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_new_metadata.py +5 -2
  129. package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +128 -31
  130. package/cpp/llama.cpp/gguf-py/gguf/utility.py +1 -1
  131. package/cpp/llama.cpp/gguf-py/pyproject.toml +1 -1
  132. package/cpp/llama.cpp/include/llama.h +62 -125
  133. package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf.inp +1 -1
  134. package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf.out +1 -1
  135. package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf.inp +1 -1
  136. package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf.out +1 -1
  137. package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.inp +1 -1
  138. package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.out +1 -1
  139. package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.inp +1 -1
  140. package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.out +1 -1
  141. package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf.inp +1 -1
  142. package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf.out +1 -1
  143. package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf.inp +1 -1
  144. package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf.out +1 -1
  145. package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf.inp +1 -1
  146. package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf.out +1 -1
  147. package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf.inp +1 -1
  148. package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf.out +1 -1
  149. package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf.inp +1 -1
  150. package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf.out +1 -1
  151. package/cpp/llama.cpp/models/ggml-vocab-nomic-bert-moe.gguf +0 -0
  152. package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf.inp +1 -1
  153. package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf.out +1 -1
  154. package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf.inp +1 -1
  155. package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf.out +1 -1
  156. package/cpp/llama.cpp/models/ggml-vocab-refact.gguf.inp +1 -1
  157. package/cpp/llama.cpp/models/ggml-vocab-refact.gguf.out +1 -1
  158. package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf.inp +1 -1
  159. package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf.out +1 -1
  160. package/cpp/llama.cpp/models/templates/Qwen-QwQ-32B.jinja +62 -0
  161. package/cpp/llama.cpp/models/templates/Qwen-Qwen3-0.6B.jinja +85 -0
  162. package/cpp/llama.cpp/models/templates/README.md +2 -0
  163. package/cpp/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +5 -1
  164. package/cpp/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +5 -1
  165. package/cpp/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +2 -0
  166. package/cpp/llama.cpp/requirements/requirements-gguf_editor_gui.txt +1 -1
  167. package/cpp/llama.cpp/src/CMakeLists.txt +2 -0
  168. package/cpp/llama.cpp/src/llama-arch.cpp +6 -0
  169. package/cpp/llama.cpp/src/llama-arch.h +2 -0
  170. package/cpp/llama.cpp/src/llama-batch.cpp +3 -1
  171. package/cpp/llama.cpp/src/llama-context.cpp +340 -123
  172. package/cpp/llama.cpp/src/llama-context.h +30 -0
  173. package/cpp/llama.cpp/src/llama-cparams.cpp +4 -0
  174. package/cpp/llama.cpp/src/llama-cparams.h +2 -0
  175. package/cpp/llama.cpp/src/llama-grammar.cpp +12 -2
  176. package/cpp/llama.cpp/src/llama-graph.cpp +157 -247
  177. package/cpp/llama.cpp/src/llama-graph.h +52 -7
  178. package/cpp/llama.cpp/src/llama-hparams.cpp +17 -1
  179. package/cpp/llama.cpp/src/llama-hparams.h +37 -5
  180. package/cpp/llama.cpp/src/llama-kv-cache.cpp +742 -481
  181. package/cpp/llama.cpp/src/llama-kv-cache.h +196 -99
  182. package/cpp/llama.cpp/src/llama-kv-cells.h +379 -0
  183. package/cpp/llama.cpp/src/llama-memory.h +4 -3
  184. package/cpp/llama.cpp/src/llama-model-loader.cpp +22 -17
  185. package/cpp/llama.cpp/src/llama-model-saver.cpp +281 -0
  186. package/cpp/llama.cpp/src/llama-model-saver.h +37 -0
  187. package/cpp/llama.cpp/src/llama-model.cpp +529 -172
  188. package/cpp/llama.cpp/src/llama-model.h +6 -1
  189. package/cpp/llama.cpp/src/llama-quant.cpp +15 -13
  190. package/cpp/llama.cpp/src/llama-sampling.cpp +2 -2
  191. package/cpp/llama.cpp/src/llama-vocab.cpp +35 -8
  192. package/cpp/llama.cpp/src/llama-vocab.h +6 -0
  193. package/cpp/llama.cpp/src/llama.cpp +14 -0
  194. package/cpp/rn-completion.cpp +60 -5
  195. package/ios/include/chat.h +73 -6
  196. package/ios/include/common/minja/chat-template.hpp +9 -5
  197. package/ios/include/common/minja/minja.hpp +69 -36
  198. package/ios/include/common.h +21 -17
  199. package/ios/include/llama.h +62 -125
  200. package/ios/libs/llama.xcframework/Info.plist +19 -19
  201. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  202. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4617 -4487
  203. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-opt.h +237 -0
  204. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +26 -7
  205. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +62 -125
  206. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
  207. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  208. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4638 -4508
  209. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3557 -3435
  210. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +237 -0
  211. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +26 -7
  212. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +62 -125
  213. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
  214. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  215. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4638 -4508
  216. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3559 -3437
  217. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-opt.h +237 -0
  218. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +26 -7
  219. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +62 -125
  220. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-opt.h +237 -0
  221. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +26 -7
  222. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +62 -125
  223. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
  224. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-opt.h +237 -0
  225. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +26 -7
  226. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +62 -125
  227. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
  228. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
  229. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  230. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4616 -4487
  231. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-opt.h +237 -0
  232. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +26 -7
  233. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +62 -125
  234. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
  235. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  236. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4637 -4508
  237. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3556 -3435
  238. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +237 -0
  239. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +26 -7
  240. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +62 -125
  241. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
  242. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  243. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4653 -4523
  244. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-opt.h +237 -0
  245. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +26 -7
  246. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +62 -125
  247. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
  248. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  249. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4674 -4544
  250. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3587 -3465
  251. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +237 -0
  252. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +26 -7
  253. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +62 -125
  254. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
  255. package/package.json +1 -1
  256. package/cpp/llama.cpp/common/stb_image.h +0 -7988
  257. package/cpp/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +0 -112
  258. package/cpp/llama.cpp/models/ggml-vocab-chameleon.gguf.out +0 -46
  259. package/cpp/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.inp +0 -112
  260. package/cpp/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.out +0 -46
  261. package/cpp/llama.cpp/models/ggml-vocab-gpt-4o.gguf.inp +0 -112
  262. package/cpp/llama.cpp/models/ggml-vocab-gpt-4o.gguf.out +0 -46
  263. package/cpp/llama.cpp/models/ggml-vocab-llama4.gguf.inp +0 -112
  264. package/cpp/llama.cpp/models/ggml-vocab-llama4.gguf.out +0 -46
  265. package/cpp/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +0 -112
  266. package/cpp/llama.cpp/models/ggml-vocab-pixtral.gguf.out +0 -46
  267. package/cpp/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +0 -112
  268. package/cpp/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +0 -46
@@ -4,6 +4,7 @@
4
4
  #include "ggml.h"
5
5
  #include "ggml-cpu.h"
6
6
  #include "ggml-backend.h"
7
+ #include "ggml-opt.h"
7
8
 
8
9
  #include <stddef.h>
9
10
  #include <stdint.h>
@@ -344,7 +345,7 @@ extern "C" {
344
345
  float yarn_beta_fast; // YaRN low correction dim
345
346
  float yarn_beta_slow; // YaRN high correction dim
346
347
  uint32_t yarn_orig_ctx; // YaRN original context size
347
- float defrag_thold; // defragment the KV cache if holes/size > thold, < 0 disabled (default)
348
+ float defrag_thold; // defragment the KV cache if holes/size > thold, <= 0 disabled (default)
348
349
 
349
350
  ggml_backend_sched_eval_callback cb_eval;
350
351
  void * cb_eval_user_data;
@@ -360,10 +361,11 @@ extern "C" {
360
361
 
361
362
  // Keep the booleans together and at the end of the struct to avoid misalignment during copy-by-value.
362
363
  bool embeddings; // if true, extract embeddings (together with logits)
363
- bool offload_kqv; // whether to offload the KQV ops (including the KV cache) to GPU
364
- bool flash_attn; // whether to use flash attention [EXPERIMENTAL]
365
- bool no_perf; // whether to measure performance timings
366
- bool op_offload; // whether to offload host tensor operations to device
364
+ bool offload_kqv; // offload the KQV ops (including the KV cache) to GPU
365
+ bool flash_attn; // use flash attention [EXPERIMENTAL]
366
+ bool no_perf; // measure performance timings
367
+ bool op_offload; // offload host tensor operations to device
368
+ bool swa_full; // use full-size SWA cache (https://github.com/ggml-org/llama.cpp/pull/13194#issuecomment-2868343055)
367
369
  };
368
370
 
369
371
  // model quantization parameters
@@ -445,6 +447,10 @@ extern "C" {
445
447
  size_t n_paths,
446
448
  struct llama_model_params params);
447
449
 
450
+ LLAMA_API void llama_model_save_to_file(
451
+ const struct llama_model * model,
452
+ const char * path_model);
453
+
448
454
  DEPRECATED(LLAMA_API void llama_free_model(struct llama_model * model),
449
455
  "use llama_model_free instead");
450
456
 
@@ -465,6 +471,7 @@ extern "C" {
465
471
  LLAMA_API int64_t llama_time_us(void);
466
472
 
467
473
  LLAMA_API size_t llama_max_devices(void);
474
+ LLAMA_API size_t llama_max_parallel_sequences(void);
468
475
 
469
476
  LLAMA_API bool llama_supports_mmap (void);
470
477
  LLAMA_API bool llama_supports_mlock (void);
@@ -602,71 +609,14 @@ extern "C" {
602
609
  // KV cache
603
610
  //
604
611
 
605
- // TODO: start using struct llama_kv_cache
606
-
607
- // Information associated with an individual cell in the KV cache view.
608
- struct llama_kv_cache_view_cell {
609
- // The position for this cell. Takes KV cache shifts into account.
610
- // May be negative if the cell is not populated.
611
- llama_pos pos;
612
- };
613
-
614
- // An updateable view of the KV cache.
615
- struct llama_kv_cache_view {
616
- // Number of KV cache cells. This will be the same as the context size.
617
- int32_t n_cells;
618
-
619
- // Maximum number of sequences that can exist in a cell. It's not an error
620
- // if there are more sequences in a cell than this value, however they will
621
- // not be visible in the view cells_sequences.
622
- int32_t n_seq_max;
623
-
624
- // Number of tokens in the cache. For example, if there are two populated
625
- // cells, the first with 1 sequence id in it and the second with 2 sequence
626
- // ids then you'll have 3 tokens.
627
- int32_t token_count;
628
-
629
- // Number of populated cache cells.
630
- int32_t used_cells;
631
-
632
- // Maximum contiguous empty slots in the cache.
633
- int32_t max_contiguous;
634
-
635
- // Index to the start of the max_contiguous slot range. Can be negative
636
- // when cache is full.
637
- int32_t max_contiguous_idx;
638
-
639
- // Information for an individual cell.
640
- struct llama_kv_cache_view_cell * cells;
641
-
642
- // The sequences for each cell. There will be n_seq_max items per cell.
643
- llama_seq_id * cells_sequences;
644
- };
645
-
646
- // Create an empty KV cache view. (use only for debugging purposes)
647
- LLAMA_API struct llama_kv_cache_view llama_kv_cache_view_init(const struct llama_context * ctx, int32_t n_seq_max);
648
-
649
- // Free a KV cache view. (use only for debugging purposes)
650
- LLAMA_API void llama_kv_cache_view_free(struct llama_kv_cache_view * view);
651
-
652
- // Update the KV cache view structure with the current state of the KV cache. (use only for debugging purposes)
653
- // TODO: change signature to llama_kv_cache_view_update(struct llama_kv_cache_view * view, const struct llama_context * ctx)
654
- LLAMA_API void llama_kv_cache_view_update(const struct llama_context * ctx, struct llama_kv_cache_view * view);
655
-
656
- ///
657
-
658
612
  // Returns the number of tokens in the KV cache (slow, use only for debug)
659
613
  // If a KV cell has multiple sequences assigned to it, it will be counted multiple times
660
- LLAMA_API int32_t llama_kv_self_n_tokens(const struct llama_context * ctx);
661
-
662
- DEPRECATED(LLAMA_API int32_t llama_get_kv_cache_token_count(const struct llama_context * ctx),
663
- "use llama_kv_self_n_tokens instead");
614
+ DEPRECATED(LLAMA_API int32_t llama_kv_self_n_tokens(const struct llama_context * ctx),
615
+ "Use llama_kv_self_seq_pos_max() and llama_kv_self_seq_pos_min() instead (https://github.com/ggml-org/llama.cpp/issues/13793)");
664
616
 
665
617
  // Returns the number of used KV cells (i.e. have at least one sequence assigned to them)
666
- LLAMA_API int32_t llama_kv_self_used_cells(const struct llama_context * ctx);
667
-
668
- DEPRECATED(LLAMA_API int32_t llama_get_kv_cache_used_cells(const struct llama_context * ctx),
669
- "use llama_kv_self_used_cells instead");
618
+ DEPRECATED(LLAMA_API int32_t llama_kv_self_used_cells(const struct llama_context * ctx),
619
+ "Use llama_kv_self_seq_pos_max() and llama_kv_self_seq_pos_min() instead (https://github.com/ggml-org/llama.cpp/issues/13793)");
670
620
 
671
621
  // Clear the KV cache - both cell info is erased and KV data is zeroed
672
622
  LLAMA_API void llama_kv_self_clear(
@@ -725,10 +675,18 @@ extern "C" {
725
675
  llama_pos p1,
726
676
  int d);
727
677
 
678
+ // Returns the smallest position present in the KV cache for the specified sequence
679
+ // This is typically non-zero only for SWA caches
680
+ // Return -1 if the sequence is empty
681
+ LLAMA_API llama_pos llama_kv_self_seq_pos_min(
682
+ struct llama_context * ctx,
683
+ llama_seq_id seq_id);
684
+
728
685
  // Returns the largest position present in the KV cache for the specified sequence
686
+ // Return -1 if the sequence is empty
729
687
  LLAMA_API llama_pos llama_kv_self_seq_pos_max(
730
688
  struct llama_context * ctx,
731
- llama_seq_id seq_id);
689
+ llama_seq_id seq_id);
732
690
 
733
691
  // Defragment the KV cache
734
692
  // This will be applied:
@@ -742,61 +700,6 @@ extern "C" {
742
700
  // Apply the KV cache updates (such as K-shifts, defragmentation, etc.)
743
701
  LLAMA_API void llama_kv_self_update(struct llama_context * ctx);
744
702
 
745
- DEPRECATED(LLAMA_API void llama_kv_cache_clear(
746
- struct llama_context * ctx),
747
- "use llama_kv_self_clear instead");
748
-
749
- DEPRECATED(LLAMA_API bool llama_kv_cache_seq_rm(
750
- struct llama_context * ctx,
751
- llama_seq_id seq_id,
752
- llama_pos p0,
753
- llama_pos p1),
754
- "use llama_kv_self_seq_rm instead");
755
-
756
- DEPRECATED(LLAMA_API void llama_kv_cache_seq_cp(
757
- struct llama_context * ctx,
758
- llama_seq_id seq_id_src,
759
- llama_seq_id seq_id_dst,
760
- llama_pos p0,
761
- llama_pos p1),
762
- "use llama_kv_self_seq_cp instead");
763
-
764
- DEPRECATED(LLAMA_API void llama_kv_cache_seq_keep(
765
- struct llama_context * ctx,
766
- llama_seq_id seq_id),
767
- "use llama_kv_self_seq_keep instead");
768
-
769
- DEPRECATED(LLAMA_API void llama_kv_cache_seq_add(
770
- struct llama_context * ctx,
771
- llama_seq_id seq_id,
772
- llama_pos p0,
773
- llama_pos p1,
774
- llama_pos delta),
775
- "use llama_kv_self_seq_add instead");
776
-
777
- DEPRECATED(LLAMA_API void llama_kv_cache_seq_div(
778
- struct llama_context * ctx,
779
- llama_seq_id seq_id,
780
- llama_pos p0,
781
- llama_pos p1,
782
- int d),
783
- "use llama_kv_self_seq_div instead");
784
-
785
- DEPRECATED(LLAMA_API llama_pos llama_kv_cache_seq_pos_max(
786
- struct llama_context * ctx,
787
- llama_seq_id seq_id),
788
- "use llama_kv_self_seq_pos_max instead");
789
-
790
- DEPRECATED(LLAMA_API void llama_kv_cache_defrag(struct llama_context * ctx),
791
- "use llama_kv_self_defrag instead");
792
-
793
- DEPRECATED(LLAMA_API bool llama_kv_cache_can_shift(const struct llama_context * ctx),
794
- "use llama_kv_self_can_shift instead");
795
-
796
- DEPRECATED(LLAMA_API void llama_kv_cache_update(struct llama_context * ctx),
797
- "use llama_kv_self_update instead");
798
-
799
-
800
703
  //
801
704
  // State / sessions
802
705
  //
@@ -938,9 +841,12 @@ extern "C" {
938
841
  // Requires KV cache.
939
842
  // For encode-decoder contexts, processes the batch using the decoder.
940
843
  // Positive return values does not mean a fatal error, but rather a warning.
941
- // 0 - success
942
- // 1 - could not find a KV slot for the batch (try reducing the size of the batch or increase the context)
943
- // < 0 - error. the KV cache state is restored to the state before this call
844
+ // Upon non-zero return values, the KV cache state is restored to the state before this call
845
+ // 0 - success
846
+ // 1 - could not find a KV slot for the batch (try reducing the size of the batch or increase the context)
847
+ // 2 - aborted
848
+ // -1 - invalid input batch
849
+ // < -1 - error
944
850
  LLAMA_API int32_t llama_decode(
945
851
  struct llama_context * ctx,
946
852
  struct llama_batch batch);
@@ -1433,6 +1339,37 @@ extern "C" {
1433
1339
  LLAMA_API void llama_perf_sampler_print(const struct llama_sampler * chain);
1434
1340
  LLAMA_API void llama_perf_sampler_reset( struct llama_sampler * chain);
1435
1341
 
1342
+ //
1343
+ // training
1344
+ //
1345
+
1346
+ // function that returns whether or not a given tensor contains trainable parameters
1347
+ typedef bool (*llama_opt_param_filter)(const struct ggml_tensor * tensor, void * userdata);
1348
+
1349
+ // always returns true
1350
+ LLAMA_API bool llama_opt_param_filter_all(const struct ggml_tensor * tensor, void * userdata);
1351
+
1352
+ struct llama_opt_params {
1353
+ uint32_t n_ctx_train; // assumed context size post training, use context size specified in llama_context if 0
1354
+
1355
+ llama_opt_param_filter param_filter; // callback for determining which tensors contain trainable parameters
1356
+ void * param_filter_ud; // userdata for determining which tensors contain trainable parameters
1357
+
1358
+ ggml_opt_get_optimizer_params get_opt_pars; // callback for calculating optimizer parameters
1359
+ void * get_opt_pars_ud; // userdata for calculating optimizer parameters
1360
+ };
1361
+
1362
+ LLAMA_API void llama_opt_init(struct llama_context * lctx, struct llama_model * model, struct llama_opt_params lopt_params);
1363
+
1364
+ LLAMA_API void llama_opt_epoch(
1365
+ struct llama_context * lctx,
1366
+ ggml_opt_dataset_t dataset,
1367
+ ggml_opt_result_t result_train,
1368
+ ggml_opt_result_t result_eval,
1369
+ int64_t idata_split,
1370
+ ggml_opt_epoch_callback callback_train,
1371
+ ggml_opt_epoch_callback callback_eval);
1372
+
1436
1373
  #ifdef __cplusplus
1437
1374
  }
1438
1375
  #endif
@@ -1,6 +1,6 @@
1
1
  ied 4 ½ months
2
2
  __ggml_vocab_test__
3
- Führer
3
+ Äpfel
4
4
  __ggml_vocab_test__
5
5
 
6
6
  __ggml_vocab_test__
@@ -1,5 +1,5 @@
1
1
  29464 2094 1018 1092 2706
2
- 11865 17875
2
+ 9706 7959 2140
3
3
 
4
4
 
5
5
 
@@ -1,6 +1,6 @@
1
1
  ied 4 ½ months
2
2
  __ggml_vocab_test__
3
- Führer
3
+ Äpfel
4
4
  __ggml_vocab_test__
5
5
 
6
6
  __ggml_vocab_test__
@@ -1,5 +1,5 @@
1
1
  2536 228 27 228 22957 6983
2
- 45 193433
2
+ 90711 87 20910
3
3
 
4
4
  228
5
5
  1667
@@ -1,6 +1,6 @@
1
1
  ied 4 ½ months
2
2
  __ggml_vocab_test__
3
- Führer
3
+ Äpfel
4
4
  __ggml_vocab_test__
5
5
 
6
6
  __ggml_vocab_test__
@@ -1,5 +1,5 @@
1
1
  1050 207 19 207 19192 4217
2
- 37 32009 71 6247
2
+ 125 213 26862 282
3
3
 
4
4
  207
5
5
  243
@@ -1,6 +1,6 @@
1
1
  ied 4 ½ months
2
2
  __ggml_vocab_test__
3
- Führer
3
+ Äpfel
4
4
  __ggml_vocab_test__
5
5
 
6
6
  __ggml_vocab_test__
@@ -1,5 +1,5 @@
1
1
  1052 207 19 207 19109 4223
2
- 37 100014 71 6245
2
+ 82077 26723 282
3
3
 
4
4
  207
5
5
  243
@@ -1,6 +1,6 @@
1
1
  ied 4 ½ months
2
2
  __ggml_vocab_test__
3
- Führer
3
+ Äpfel
4
4
  __ggml_vocab_test__
5
5
 
6
6
  __ggml_vocab_test__
@@ -1,5 +1,5 @@
1
1
  878 204 31 3068 133 2137
2
- 28611 132 30042
2
+ 34502 18614 286
3
3
 
4
4
  204
5
5
  258
@@ -1,6 +1,6 @@
1
1
  ied 4 ½ months
2
2
  __ggml_vocab_test__
3
- Führer
3
+ Äpfel
4
4
  __ggml_vocab_test__
5
5
 
6
6
  __ggml_vocab_test__
@@ -1,5 +1,5 @@
1
1
  798 604 25208 1933
2
- 37 9116 71 11751
2
+ 127 226 79 69 417
3
3
 
4
4
  220
5
5
  220 220
@@ -1,6 +1,6 @@
1
1
  ied 4 ½ months
2
2
  __ggml_vocab_test__
3
- Führer
3
+ Äpfel
4
4
  __ggml_vocab_test__
5
5
 
6
6
  __ggml_vocab_test__
@@ -1,5 +1,5 @@
1
1
  1142 220 19 220 27154 4038
2
- 37 51853 261
2
+ 88075 16276 301
3
3
 
4
4
  220
5
5
  256
@@ -1,6 +1,6 @@
1
1
  ied 4 ½ months
2
2
  __ggml_vocab_test__
3
- Führer
3
+ Äpfel
4
4
  __ggml_vocab_test__
5
5
 
6
6
  __ggml_vocab_test__
@@ -1,5 +1,5 @@
1
1
  474 287 29871 29946 29871 30226 7378
2
- 383 4000 261
2
+ 11585 7810 295
3
3
 
4
4
  259
5
5
  1678
@@ -1,6 +1,6 @@
1
1
  ied 4 ½ months
2
2
  __ggml_vocab_test__
3
- Führer
3
+ Äpfel
4
4
  __ggml_vocab_test__
5
5
 
6
6
  __ggml_vocab_test__
@@ -1,5 +1,5 @@
1
1
  728 577 24142 2607
2
- 39 26288 6554
2
+ 37515 18569 293
3
3
 
4
4
  209
5
5
  50276
@@ -1,6 +1,6 @@
1
1
  ied 4 ½ months
2
2
  __ggml_vocab_test__
3
- Führer
3
+ Äpfel
4
4
  __ggml_vocab_test__
5
5
 
6
6
  __ggml_vocab_test__
@@ -1,5 +1,5 @@
1
1
  474 287 29871 29946 29871 30226 7378
2
- 383 4000 261
2
+ 11585 7810 295
3
3
 
4
4
  259
5
5
  1678
@@ -1,6 +1,6 @@
1
1
  ied 4 ½ months
2
2
  __ggml_vocab_test__
3
- Führer
3
+ Äpfel
4
4
  __ggml_vocab_test__
5
5
 
6
6
  __ggml_vocab_test__
@@ -1,5 +1,5 @@
1
1
  1122 220 19 220 26062 3951
2
- 37 50753 261
2
+ 86975 15897 301
3
3
 
4
4
  220
5
5
  256
@@ -1,6 +1,6 @@
1
1
  ied 4 ½ months
2
2
  __ggml_vocab_test__
3
- Führer
3
+ Äpfel
4
4
  __ggml_vocab_test__
5
5
 
6
6
  __ggml_vocab_test__
@@ -1,5 +1,5 @@
1
1
  4833 225 38 225 143 140 17723
2
- 56 2006 3935 265
2
+ 144 231 7132 342
3
3
 
4
4
  225
5
5
  261
@@ -1,6 +1,6 @@
1
1
  ied 4 ½ months
2
2
  __ggml_vocab_test__
3
- Führer
3
+ Äpfel
4
4
  __ggml_vocab_test__
5
5
 
6
6
  __ggml_vocab_test__
@@ -1,5 +1,5 @@
1
1
  4850 244 57 244 162 159 17722
2
- 75 2022 3943 284
2
+ 163 250 7146 361
3
3
 
4
4
  244
5
5
  280
@@ -0,0 +1,62 @@
1
+ {%- if tools %}
2
+ {{- '<|im_start|>system\n' }}
3
+ {%- if messages[0]['role'] == 'system' %}
4
+ {{- messages[0]['content'] }}
5
+ {%- else %}
6
+ {{- '' }}
7
+ {%- endif %}
8
+ {{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
9
+ {%- for tool in tools %}
10
+ {{- "\n" }}
11
+ {{- tool | tojson }}
12
+ {%- endfor %}
13
+ {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
14
+ {%- else %}
15
+ {%- if messages[0]['role'] == 'system' %}
16
+ {{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }}
17
+ {%- endif %}
18
+ {%- endif %}
19
+ {%- for message in messages %}
20
+ {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
21
+ {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
22
+ {%- elif message.role == "assistant" and not message.tool_calls %}
23
+ {%- set content = message.content %}
24
+ {%- if not loop.last %}
25
+ {%- set content = message.content.split('</think>')[-1].lstrip('\n') %}
26
+ {%- endif %}
27
+ {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
28
+ {%- elif message.role == "assistant" %}
29
+ {%- set content = message.content %}
30
+ {%- if not loop.last %}
31
+ {%- set content = message.content.split('</think>')[-1].lstrip('\n') %}
32
+ {%- endif %}
33
+ {{- '<|im_start|>' + message.role }}
34
+ {%- if message.content %}
35
+ {{- '\n' + content }}
36
+ {%- endif %}
37
+ {%- for tool_call in message.tool_calls %}
38
+ {%- if tool_call.function is defined %}
39
+ {%- set tool_call = tool_call.function %}
40
+ {%- endif %}
41
+ {{- '\n<tool_call>\n{"name": "' }}
42
+ {{- tool_call.name }}
43
+ {{- '", "arguments": ' }}
44
+ {{- tool_call.arguments | tojson }}
45
+ {{- '}\n</tool_call>' }}
46
+ {%- endfor %}
47
+ {{- '<|im_end|>\n' }}
48
+ {%- elif message.role == "tool" %}
49
+ {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %}
50
+ {{- '<|im_start|>user' }}
51
+ {%- endif %}
52
+ {{- '\n<tool_response>\n' }}
53
+ {{- message.content }}
54
+ {{- '\n</tool_response>' }}
55
+ {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
56
+ {{- '<|im_end|>\n' }}
57
+ {%- endif %}
58
+ {%- endif %}
59
+ {%- endfor %}
60
+ {%- if add_generation_prompt %}
61
+ {{- '<|im_start|>assistant\n<think>\n' }}
62
+ {%- endif %}
@@ -0,0 +1,85 @@
1
+ {%- if tools %}
2
+ {{- '<|im_start|>system\n' }}
3
+ {%- if messages[0].role == 'system' %}
4
+ {{- messages[0].content + '\n\n' }}
5
+ {%- endif %}
6
+ {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
7
+ {%- for tool in tools %}
8
+ {{- "\n" }}
9
+ {{- tool | tojson }}
10
+ {%- endfor %}
11
+ {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
12
+ {%- else %}
13
+ {%- if messages[0].role == 'system' %}
14
+ {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
15
+ {%- endif %}
16
+ {%- endif %}
17
+ {%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
18
+ {%- for message in messages[::-1] %}
19
+ {%- set index = (messages|length - 1) - loop.index0 %}
20
+ {%- if ns.multi_step_tool and message.role == "user" and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
21
+ {%- set ns.multi_step_tool = false %}
22
+ {%- set ns.last_query_index = index %}
23
+ {%- endif %}
24
+ {%- endfor %}
25
+ {%- for message in messages %}
26
+ {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
27
+ {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
28
+ {%- elif message.role == "assistant" %}
29
+ {%- set content = message.content %}
30
+ {%- set reasoning_content = '' %}
31
+ {%- if message.reasoning_content is defined and message.reasoning_content is not none %}
32
+ {%- set reasoning_content = message.reasoning_content %}
33
+ {%- else %}
34
+ {%- if '</think>' in message.content %}
35
+ {%- set content = message.content.split('</think>')[-1].lstrip('\n') %}
36
+ {%- set reasoning_content = message.content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
37
+ {%- endif %}
38
+ {%- endif %}
39
+ {%- if loop.index0 > ns.last_query_index %}
40
+ {%- if loop.last or (not loop.last and reasoning_content) %}
41
+ {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
42
+ {%- else %}
43
+ {{- '<|im_start|>' + message.role + '\n' + content }}
44
+ {%- endif %}
45
+ {%- else %}
46
+ {{- '<|im_start|>' + message.role + '\n' + content }}
47
+ {%- endif %}
48
+ {%- if message.tool_calls %}
49
+ {%- for tool_call in message.tool_calls %}
50
+ {%- if (loop.first and content) or (not loop.first) %}
51
+ {{- '\n' }}
52
+ {%- endif %}
53
+ {%- if tool_call.function %}
54
+ {%- set tool_call = tool_call.function %}
55
+ {%- endif %}
56
+ {{- '<tool_call>\n{"name": "' }}
57
+ {{- tool_call.name }}
58
+ {{- '", "arguments": ' }}
59
+ {%- if tool_call.arguments is string %}
60
+ {{- tool_call.arguments }}
61
+ {%- else %}
62
+ {{- tool_call.arguments | tojson }}
63
+ {%- endif %}
64
+ {{- '}\n</tool_call>' }}
65
+ {%- endfor %}
66
+ {%- endif %}
67
+ {{- '<|im_end|>\n' }}
68
+ {%- elif message.role == "tool" %}
69
+ {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
70
+ {{- '<|im_start|>user' }}
71
+ {%- endif %}
72
+ {{- '\n<tool_response>\n' }}
73
+ {{- message.content }}
74
+ {{- '\n</tool_response>' }}
75
+ {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
76
+ {{- '<|im_end|>\n' }}
77
+ {%- endif %}
78
+ {%- endif %}
79
+ {%- endfor %}
80
+ {%- if add_generation_prompt %}
81
+ {{- '<|im_start|>assistant\n' }}
82
+ {%- if enable_thinking is defined and enable_thinking is false %}
83
+ {{- '<think>\n\n</think>\n\n' }}
84
+ {%- endif %}
85
+ {%- endif %}
@@ -19,4 +19,6 @@ These templates can be updated with the following commands:
19
19
  ./scripts/get_chat_template.py NousResearch/Hermes-2-Pro-Llama-3-8B tool_use > models/templates/NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja
20
20
  ./scripts/get_chat_template.py NousResearch/Hermes-3-Llama-3.1-8B tool_use > models/templates/NousResearch-Hermes-3-Llama-3.1-8B-tool_use.jinja
21
21
  ./scripts/get_chat_template.py Qwen/Qwen2.5-7B-Instruct > models/templates/Qwen-Qwen2.5-7B-Instruct.jinja
22
+ ./scripts/get_chat_template.py Qwen/QwQ-32B > models/templates/Qwen-QwQ-32B.jinja
23
+ ./scripts/get_chat_template.py Qwen/Qwen3-0.6B > models/templates/Qwen-Qwen3-0.6B.jinja
22
24
  ```
@@ -1,3 +1,7 @@
1
1
  -r ./requirements-convert_legacy_llama.txt
2
2
  --extra-index-url https://download.pytorch.org/whl/cpu
3
- torch~=2.2.1
3
+ torch~=2.2.1; platform_machine != "s390x"
4
+
5
+ # torch s390x packages can only be found from nightly builds
6
+ --extra-index-url https://download.pytorch.org/whl/nightly
7
+ torch>=0.0.0.dev0; platform_machine == "s390x"