@novastera-oss/llamarn 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (266) hide show
  1. package/README.md +80 -14
  2. package/RNLlamaCpp.podspec +10 -3
  3. package/android/CMakeLists.txt +8 -0
  4. package/android/src/main/cpp/include/llama.h +62 -125
  5. package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
  6. package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
  7. package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
  8. package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
  9. package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
  10. package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
  11. package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
  12. package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
  13. package/cpp/build-info.cpp +2 -2
  14. package/cpp/llama.cpp/README.md +11 -3
  15. package/cpp/llama.cpp/build-xcframework.sh +1 -0
  16. package/cpp/llama.cpp/common/CMakeLists.txt +8 -2
  17. package/cpp/llama.cpp/common/arg.cpp +153 -113
  18. package/cpp/llama.cpp/common/chat-parser.cpp +379 -0
  19. package/cpp/llama.cpp/common/chat-parser.h +117 -0
  20. package/cpp/llama.cpp/common/chat.cpp +847 -699
  21. package/cpp/llama.cpp/common/chat.h +73 -6
  22. package/cpp/llama.cpp/common/common.cpp +50 -82
  23. package/cpp/llama.cpp/common/common.h +21 -17
  24. package/cpp/llama.cpp/common/json-partial.cpp +255 -0
  25. package/cpp/llama.cpp/common/json-partial.h +37 -0
  26. package/cpp/llama.cpp/common/minja/chat-template.hpp +9 -5
  27. package/cpp/llama.cpp/common/minja/minja.hpp +69 -36
  28. package/cpp/llama.cpp/common/regex-partial.cpp +204 -0
  29. package/cpp/llama.cpp/common/regex-partial.h +56 -0
  30. package/cpp/llama.cpp/common/sampling.cpp +7 -8
  31. package/cpp/llama.cpp/convert_hf_to_gguf.py +453 -118
  32. package/cpp/llama.cpp/convert_hf_to_gguf_update.py +120 -68
  33. package/cpp/llama.cpp/ggml/CMakeLists.txt +2 -1
  34. package/cpp/llama.cpp/ggml/cmake/common.cmake +25 -0
  35. package/cpp/llama.cpp/ggml/include/ggml-opt.h +49 -28
  36. package/cpp/llama.cpp/ggml/include/ggml.h +26 -7
  37. package/cpp/llama.cpp/ggml/src/CMakeLists.txt +16 -10
  38. package/cpp/llama.cpp/ggml/src/ggml-backend.cpp +4 -1
  39. package/cpp/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +1 -0
  40. package/cpp/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +2 -0
  41. package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +604 -0
  42. package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +42 -0
  43. package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +54 -2
  44. package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +50 -51
  45. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +2 -2
  46. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +5 -9
  47. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +779 -19
  48. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +22 -0
  49. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +88 -5
  50. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +47 -12
  51. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +264 -69
  52. package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +322 -100
  53. package/cpp/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +117 -1
  54. package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.cpp +85 -16
  55. package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.h +220 -49
  56. package/cpp/llama.cpp/ggml/src/ggml-cuda/acc.cu +40 -26
  57. package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +1 -1
  58. package/cpp/llama.cpp/ggml/src/ggml-cuda/cpy.cu +11 -1
  59. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-common.cuh +15 -7
  60. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-mma-f16.cuh +266 -64
  61. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f16.cuh +49 -4
  62. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f32.cuh +48 -4
  63. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn.cu +2 -1
  64. package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +5 -1
  65. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cu +2 -0
  66. package/cpp/llama.cpp/ggml/src/ggml-cuda/quantize.cu +7 -6
  67. package/cpp/llama.cpp/ggml/src/ggml-cuda/sum.cu +1 -1
  68. package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cu +10 -0
  69. package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cuh +2 -0
  70. package/cpp/llama.cpp/ggml/src/ggml-impl.h +1 -1
  71. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +4 -0
  72. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +99 -17
  73. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +200 -2
  74. package/cpp/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +8 -2
  75. package/cpp/llama.cpp/ggml/src/ggml-musa/mudnn.cu +112 -0
  76. package/cpp/llama.cpp/ggml/src/ggml-musa/mudnn.cuh +12 -0
  77. package/cpp/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +6 -0
  78. package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +972 -178
  79. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/argsort.cl +86 -0
  80. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/div.cl +72 -0
  81. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/group_norm.cl +72 -0
  82. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/sigmoid.cl +29 -0
  83. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/sub.cl +72 -0
  84. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/sum_rows.cl +39 -0
  85. package/cpp/llama.cpp/ggml/src/ggml-opt.cpp +373 -190
  86. package/cpp/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +29 -23
  87. package/cpp/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +5 -10
  88. package/cpp/llama.cpp/ggml/src/ggml-sycl/common.hpp +101 -5
  89. package/cpp/llama.cpp/ggml/src/ggml-sycl/concat.cpp +31 -33
  90. package/cpp/llama.cpp/ggml/src/ggml-sycl/conv.cpp +1 -0
  91. package/cpp/llama.cpp/ggml/src/ggml-sycl/convert.cpp +29 -2
  92. package/cpp/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +4 -5
  93. package/cpp/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +59 -21
  94. package/cpp/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +9 -1
  95. package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +84 -72
  96. package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +2 -0
  97. package/cpp/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +37 -8
  98. package/cpp/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +1 -3
  99. package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +324 -129
  100. package/cpp/llama.cpp/ggml/src/ggml-sycl/gla.cpp +1 -0
  101. package/cpp/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +31 -2
  102. package/cpp/llama.cpp/ggml/src/ggml-sycl/norm.cpp +95 -68
  103. package/cpp/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +1 -0
  104. package/cpp/llama.cpp/ggml/src/ggml-sycl/quants.hpp +22 -0
  105. package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +1 -2
  106. package/cpp/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +1 -4
  107. package/cpp/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +2 -3
  108. package/cpp/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +69 -43
  109. package/cpp/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +2 -14
  110. package/cpp/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +81 -91
  111. package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +432 -181
  112. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +17 -0
  113. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp +1 -1
  114. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +6 -152
  115. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp +162 -0
  116. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +360 -0
  117. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +2 -118
  118. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +1 -1
  119. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +12 -1
  120. package/cpp/llama.cpp/ggml/src/ggml.c +107 -36
  121. package/cpp/llama.cpp/ggml/src/gguf.cpp +33 -33
  122. package/cpp/llama.cpp/gguf-py/gguf/constants.py +100 -15
  123. package/cpp/llama.cpp/gguf-py/gguf/gguf_reader.py +1 -1
  124. package/cpp/llama.cpp/gguf-py/gguf/gguf_writer.py +44 -12
  125. package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_editor_gui.py +21 -10
  126. package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_new_metadata.py +5 -2
  127. package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +128 -31
  128. package/cpp/llama.cpp/gguf-py/gguf/utility.py +1 -1
  129. package/cpp/llama.cpp/gguf-py/pyproject.toml +1 -1
  130. package/cpp/llama.cpp/include/llama.h +62 -125
  131. package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf.inp +1 -1
  132. package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf.out +1 -1
  133. package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf.inp +1 -1
  134. package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf.out +1 -1
  135. package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.inp +1 -1
  136. package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.out +1 -1
  137. package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.inp +1 -1
  138. package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.out +1 -1
  139. package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf.inp +1 -1
  140. package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf.out +1 -1
  141. package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf.inp +1 -1
  142. package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf.out +1 -1
  143. package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf.inp +1 -1
  144. package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf.out +1 -1
  145. package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf.inp +1 -1
  146. package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf.out +1 -1
  147. package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf.inp +1 -1
  148. package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf.out +1 -1
  149. package/cpp/llama.cpp/models/ggml-vocab-nomic-bert-moe.gguf +0 -0
  150. package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf.inp +1 -1
  151. package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf.out +1 -1
  152. package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf.inp +1 -1
  153. package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf.out +1 -1
  154. package/cpp/llama.cpp/models/ggml-vocab-refact.gguf.inp +1 -1
  155. package/cpp/llama.cpp/models/ggml-vocab-refact.gguf.out +1 -1
  156. package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf.inp +1 -1
  157. package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf.out +1 -1
  158. package/cpp/llama.cpp/models/templates/Qwen-QwQ-32B.jinja +62 -0
  159. package/cpp/llama.cpp/models/templates/Qwen-Qwen3-0.6B.jinja +85 -0
  160. package/cpp/llama.cpp/models/templates/README.md +2 -0
  161. package/cpp/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +5 -1
  162. package/cpp/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +5 -1
  163. package/cpp/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +2 -0
  164. package/cpp/llama.cpp/requirements/requirements-gguf_editor_gui.txt +1 -1
  165. package/cpp/llama.cpp/src/CMakeLists.txt +2 -0
  166. package/cpp/llama.cpp/src/llama-arch.cpp +6 -0
  167. package/cpp/llama.cpp/src/llama-arch.h +2 -0
  168. package/cpp/llama.cpp/src/llama-batch.cpp +3 -1
  169. package/cpp/llama.cpp/src/llama-context.cpp +340 -123
  170. package/cpp/llama.cpp/src/llama-context.h +30 -0
  171. package/cpp/llama.cpp/src/llama-cparams.cpp +4 -0
  172. package/cpp/llama.cpp/src/llama-cparams.h +2 -0
  173. package/cpp/llama.cpp/src/llama-grammar.cpp +12 -2
  174. package/cpp/llama.cpp/src/llama-graph.cpp +157 -247
  175. package/cpp/llama.cpp/src/llama-graph.h +52 -7
  176. package/cpp/llama.cpp/src/llama-hparams.cpp +17 -1
  177. package/cpp/llama.cpp/src/llama-hparams.h +37 -5
  178. package/cpp/llama.cpp/src/llama-kv-cache.cpp +742 -481
  179. package/cpp/llama.cpp/src/llama-kv-cache.h +196 -99
  180. package/cpp/llama.cpp/src/llama-kv-cells.h +379 -0
  181. package/cpp/llama.cpp/src/llama-memory.h +4 -3
  182. package/cpp/llama.cpp/src/llama-model-loader.cpp +22 -17
  183. package/cpp/llama.cpp/src/llama-model-saver.cpp +281 -0
  184. package/cpp/llama.cpp/src/llama-model-saver.h +37 -0
  185. package/cpp/llama.cpp/src/llama-model.cpp +529 -172
  186. package/cpp/llama.cpp/src/llama-model.h +6 -1
  187. package/cpp/llama.cpp/src/llama-quant.cpp +15 -13
  188. package/cpp/llama.cpp/src/llama-sampling.cpp +2 -2
  189. package/cpp/llama.cpp/src/llama-vocab.cpp +35 -8
  190. package/cpp/llama.cpp/src/llama-vocab.h +6 -0
  191. package/cpp/llama.cpp/src/llama.cpp +14 -0
  192. package/cpp/rn-completion.cpp +4 -2
  193. package/ios/include/chat.h +73 -6
  194. package/ios/include/common/minja/chat-template.hpp +9 -5
  195. package/ios/include/common/minja/minja.hpp +69 -36
  196. package/ios/include/common.h +21 -17
  197. package/ios/include/llama.h +62 -125
  198. package/ios/libs/llama.xcframework/Info.plist +19 -19
  199. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  200. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4617 -4487
  201. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-opt.h +237 -0
  202. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +26 -7
  203. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +62 -125
  204. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
  205. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  206. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4638 -4508
  207. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3557 -3435
  208. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +237 -0
  209. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +26 -7
  210. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +62 -125
  211. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
  212. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  213. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4638 -4508
  214. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3559 -3437
  215. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-opt.h +237 -0
  216. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +26 -7
  217. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +62 -125
  218. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-opt.h +237 -0
  219. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +26 -7
  220. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +62 -125
  221. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
  222. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-opt.h +237 -0
  223. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +26 -7
  224. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +62 -125
  225. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
  226. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
  227. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  228. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4616 -4487
  229. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-opt.h +237 -0
  230. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +26 -7
  231. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +62 -125
  232. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
  233. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  234. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4637 -4508
  235. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3556 -3435
  236. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +237 -0
  237. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +26 -7
  238. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +62 -125
  239. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
  240. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  241. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4653 -4523
  242. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-opt.h +237 -0
  243. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +26 -7
  244. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +62 -125
  245. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
  246. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  247. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4674 -4544
  248. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3587 -3465
  249. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +237 -0
  250. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +26 -7
  251. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +62 -125
  252. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
  253. package/package.json +1 -1
  254. package/cpp/llama.cpp/common/stb_image.h +0 -7988
  255. package/cpp/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +0 -112
  256. package/cpp/llama.cpp/models/ggml-vocab-chameleon.gguf.out +0 -46
  257. package/cpp/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.inp +0 -112
  258. package/cpp/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.out +0 -46
  259. package/cpp/llama.cpp/models/ggml-vocab-gpt-4o.gguf.inp +0 -112
  260. package/cpp/llama.cpp/models/ggml-vocab-gpt-4o.gguf.out +0 -46
  261. package/cpp/llama.cpp/models/ggml-vocab-llama4.gguf.inp +0 -112
  262. package/cpp/llama.cpp/models/ggml-vocab-llama4.gguf.out +0 -46
  263. package/cpp/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +0 -112
  264. package/cpp/llama.cpp/models/ggml-vocab-pixtral.gguf.out +0 -46
  265. package/cpp/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +0 -112
  266. package/cpp/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +0 -46
@@ -3,6 +3,8 @@
3
3
  #pragma once
4
4
 
5
5
  #include "common.h"
6
+ #include <functional>
7
+ #include <chrono>
6
8
  #include <string>
7
9
  #include <vector>
8
10
 
@@ -12,11 +14,19 @@ struct common_chat_tool_call {
12
14
  std::string name;
13
15
  std::string arguments;
14
16
  std::string id;
17
+
18
+ bool operator==(const common_chat_tool_call & other) const {
19
+ return name == other.name && arguments == other.arguments && id == other.id;
20
+ }
15
21
  };
16
22
 
17
23
  struct common_chat_msg_content_part {
18
24
  std::string type;
19
25
  std::string text;
26
+
27
+ bool operator==(const common_chat_msg_content_part & other) const {
28
+ return type == other.type && text == other.text;
29
+ }
20
30
  };
21
31
 
22
32
  struct common_chat_msg {
@@ -27,6 +37,51 @@ struct common_chat_msg {
27
37
  std::string reasoning_content;
28
38
  std::string tool_name;
29
39
  std::string tool_call_id;
40
+
41
+ template <class T> T to_json_oaicompat() const;
42
+
43
+ bool empty() const {
44
+ return content.empty() && content_parts.empty() && tool_calls.empty() && reasoning_content.empty() && tool_name.empty() && tool_call_id.empty();
45
+ }
46
+ void ensure_tool_call_ids_set(std::vector<std::string> & ids_cache, const std::function<std::string()> & gen_tool_call_id) {
47
+ for (auto i = 0u; i < tool_calls.size(); i++) {
48
+ if (ids_cache.size() <= i) {
49
+ auto id = tool_calls[i].id;
50
+ if (id.empty()) {
51
+ id = gen_tool_call_id();
52
+ }
53
+ ids_cache.push_back(id);
54
+ }
55
+ tool_calls[i].id = ids_cache[i];
56
+ }
57
+ }
58
+ bool operator==(const common_chat_msg & other) const {
59
+ return role == other.role
60
+ && content == other.content
61
+ && content_parts == other.content_parts
62
+ && tool_calls == other.tool_calls
63
+ && reasoning_content == other.reasoning_content
64
+ && tool_name == other.tool_name
65
+ && tool_call_id == other.tool_call_id;
66
+ }
67
+ bool operator!=(const common_chat_msg & other) const {
68
+ return !(*this == other);
69
+ }
70
+ };
71
+
72
+ struct common_chat_msg_diff {
73
+ // std::string reasoning_content_delta;
74
+ std::string content_delta;
75
+ size_t tool_call_index = std::string::npos;
76
+ common_chat_tool_call tool_call_delta;
77
+
78
+ static std::vector<common_chat_msg_diff> compute_diffs(const common_chat_msg & previous_msg, const common_chat_msg & new_msg);
79
+
80
+ bool operator==(const common_chat_msg_diff & other) const {
81
+ return content_delta == other.content_delta
82
+ && tool_call_index == other.tool_call_index
83
+ && tool_call_delta == other.tool_call_delta;
84
+ }
30
85
  };
31
86
 
32
87
  struct common_chat_tool {
@@ -48,14 +103,11 @@ enum common_chat_format {
48
103
  COMMON_CHAT_FORMAT_LLAMA_3_X,
49
104
  COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS,
50
105
  COMMON_CHAT_FORMAT_DEEPSEEK_R1,
51
- COMMON_CHAT_FORMAT_DEEPSEEK_R1_EXTRACT_REASONING,
52
106
  COMMON_CHAT_FORMAT_FIREFUNCTION_V2,
53
107
  COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2,
54
108
  COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1,
55
109
  COMMON_CHAT_FORMAT_HERMES_2_PRO,
56
- COMMON_CHAT_FORMAT_HERMES_2_PRO_EXTRACT_REASONING,
57
110
  COMMON_CHAT_FORMAT_COMMAND_R7B,
58
- COMMON_CHAT_FORMAT_COMMAND_R7B_EXTRACT_REASONING,
59
111
 
60
112
  COMMON_CHAT_FORMAT_COUNT, // Not a format, just the # formats
61
113
  };
@@ -70,7 +122,9 @@ struct common_chat_templates_inputs {
70
122
  std::vector<common_chat_tool> tools;
71
123
  common_chat_tool_choice tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO;
72
124
  bool parallel_tool_calls = false;
73
- bool extract_reasoning = true;
125
+ common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_NONE;
126
+ bool enable_thinking = true;
127
+ std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
74
128
  };
75
129
 
76
130
  struct common_chat_params {
@@ -78,11 +132,21 @@ struct common_chat_params {
78
132
  std::string prompt;
79
133
  std::string grammar;
80
134
  bool grammar_lazy = false;
135
+ bool thinking_forced_open = false;
81
136
  std::vector<common_grammar_trigger> grammar_triggers;
82
137
  std::vector<std::string> preserved_tokens;
83
138
  std::vector<std::string> additional_stops;
84
139
  };
85
140
 
141
+ struct common_chat_syntax {
142
+ common_chat_format format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
143
+ common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_NONE;
144
+ // Whether reasoning_content should be inlined in the content (e.g. for reasoning_format=deepseek in stream mode)
145
+ bool reasoning_in_content = false;
146
+ bool thinking_forced_open = false;
147
+ bool parse_tool_calls = true;
148
+ };
149
+
86
150
  // Check if the template supplied via "--chat-template" is supported or not. Returns true if it's valid
87
151
  bool common_chat_verify_template(const std::string & tmpl, bool use_jinja);
88
152
 
@@ -119,8 +183,9 @@ std::string common_chat_format_example(
119
183
  const struct common_chat_templates * tmpls,
120
184
  bool use_jinja);
121
185
 
122
- std::string common_chat_format_name(common_chat_format format);
123
- common_chat_msg common_chat_parse( const std::string & input, common_chat_format format);
186
+ const char* common_chat_format_name(common_chat_format format);
187
+ const char* common_reasoning_format_name(common_reasoning_format format);
188
+ common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_syntax & syntax);
124
189
 
125
190
  common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::string & tool_choice);
126
191
 
@@ -133,3 +198,5 @@ template <class T> T common_chat_msgs_to_json_oaicompat(const std::vector<common
133
198
  // T can be std::string containing JSON or nlohmann::ordered_json
134
199
  template <class T> std::vector<common_chat_tool> common_chat_tools_parse_oaicompat(const T & tools);
135
200
  template <class T> T common_chat_tools_to_json_oaicompat(const std::vector<common_chat_tool> & tools);
201
+
202
+ template <class T> T common_chat_msg_diff_to_json_oaicompat(const common_chat_msg_diff & diff);
@@ -443,6 +443,25 @@ void string_replace_all(std::string & s, const std::string & search, const std::
443
443
  s = std::move(builder);
444
444
  }
445
445
 
446
+ bool string_ends_with(const std::string_view & str, const std::string_view & suffix) {
447
+ return str.size() >= suffix.size() && str.compare(str.size()-suffix.size(), suffix.size(), suffix) == 0;
448
+ }
449
+ size_t string_find_partial_stop(const std::string_view & str, const std::string_view & stop) {
450
+ if (!str.empty() && !stop.empty()) {
451
+ const char text_last_char = str.back();
452
+ for (int64_t char_index = stop.size() - 1; char_index >= 0; char_index--) {
453
+ if (stop[char_index] == text_last_char) {
454
+ const auto current_partial = stop.substr(0, char_index + 1);
455
+ if (string_ends_with(str, current_partial)) {
456
+ return str.size() - char_index - 1;
457
+ }
458
+ }
459
+ }
460
+ }
461
+
462
+ return std::string::npos;
463
+ }
464
+
446
465
  std::string regex_escape(const std::string & s) {
447
466
  static const std::regex special_chars("[.^$|()*+?\\[\\]{}\\\\]");
448
467
  return std::regex_replace(s, special_chars, "\\$0");
@@ -830,7 +849,7 @@ std::string fs_get_cache_directory() {
830
849
  if (getenv("LLAMA_CACHE")) {
831
850
  cache_directory = std::getenv("LLAMA_CACHE");
832
851
  } else {
833
- #if defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)
852
+ #if defined(__linux__) || defined(__FreeBSD__) || defined(_AIX) || defined(__OpenBSD__)
834
853
  if (std::getenv("XDG_CACHE_HOME")) {
835
854
  cache_directory = std::getenv("XDG_CACHE_HOME");
836
855
  } else {
@@ -884,13 +903,16 @@ struct common_init_result common_init_from_params(common_params & params) {
884
903
  ok = false;
885
904
  }
886
905
 
887
- if (llama_vocab_eos(vocab) == LLAMA_TOKEN_NULL) {
888
- LOG_WRN("%s: warning: vocab does not have an EOS token, reranking will not work\n", __func__);
889
- ok = false;
890
- }
906
+ bool has_eos = llama_vocab_eos(vocab) != LLAMA_TOKEN_NULL;
907
+ bool has_sep = llama_vocab_sep(vocab) != LLAMA_TOKEN_NULL;
891
908
 
892
- if (llama_vocab_sep(vocab) == LLAMA_TOKEN_NULL) {
893
- LOG_WRN("%s: warning: vocab does not have a SEP token, reranking will not work\n", __func__);
909
+ if (!has_eos && !has_sep) {
910
+ LOG_WRN("%s: warning: vocab does not have an EOS token or SEP token, reranking will not work\n", __func__);
911
+ ok = false;
912
+ } else if (!has_eos) {
913
+ LOG_WRN("%s: warning: vocab does not have an EOS token, using SEP token as fallback\n", __func__);
914
+ } else if (!has_sep) {
915
+ LOG_WRN("%s: warning: vocab does not have a SEP token, reranking will not work\n", __func__);
894
916
  ok = false;
895
917
  }
896
918
 
@@ -1083,6 +1105,9 @@ struct llama_model_params common_model_params_to_llama(common_params & params) {
1083
1105
  mparams.tensor_buft_overrides = params.tensor_buft_overrides.data();
1084
1106
  }
1085
1107
 
1108
+ mparams.progress_callback = params.load_progress_callback;
1109
+ mparams.progress_callback_user_data = params.load_progress_callback_user_data;
1110
+
1086
1111
  return mparams;
1087
1112
  }
1088
1113
 
@@ -1114,6 +1139,7 @@ struct llama_context_params common_context_params_to_llama(const common_params &
1114
1139
  cparams.flash_attn = params.flash_attn;
1115
1140
  cparams.no_perf = params.no_perf;
1116
1141
  cparams.op_offload = !params.no_op_offload;
1142
+ cparams.swa_full = params.swa_full;
1117
1143
 
1118
1144
  if (params.reranking) {
1119
1145
  cparams.embeddings = true;
@@ -1306,81 +1332,6 @@ std::string common_detokenize(const struct llama_vocab * vocab, const std::vecto
1306
1332
  return text;
1307
1333
  }
1308
1334
 
1309
- //
1310
- // KV cache utils
1311
- //
1312
-
1313
- void common_kv_cache_dump_view(const llama_kv_cache_view & view, int row_size) {
1314
- static const char slot_chars[] = ".123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+";
1315
-
1316
- printf("=== Dumping KV cache. total cells %d, max sequences per cell %d, populated cells %d, total tokens in cache %d, largest empty slot=%d @ %d",
1317
- view.n_cells, view.n_seq_max, view.used_cells, view.token_count, view.max_contiguous, view.max_contiguous_idx);
1318
-
1319
- llama_kv_cache_view_cell * c_curr = view.cells;
1320
- llama_seq_id * cs_curr = view.cells_sequences;
1321
-
1322
- for (int i = 0; i < view.n_cells; i++, c_curr++, cs_curr += view.n_seq_max) {
1323
- if (i % row_size == 0) {
1324
- printf("\n%5d: ", i);
1325
- }
1326
- int seq_count = 0;
1327
- for (int j = 0; j < view.n_seq_max; j++) {
1328
- if (cs_curr[j] >= 0) { seq_count++; }
1329
- }
1330
- putchar(slot_chars[std::min(sizeof(slot_chars) - 2, size_t(seq_count))]);
1331
- }
1332
-
1333
- printf("\n=== Done dumping\n");
1334
- }
1335
-
1336
- void common_kv_cache_dump_view_seqs(const llama_kv_cache_view & view, int row_size) {
1337
- static const char slot_chars[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
1338
-
1339
- printf("=== Dumping KV cache. total cells %d, max sequences per cell %d, populated cells %d, total tokens in cache %d, largest empty slot=%d @ %d\n",
1340
- view.n_cells, view.n_seq_max, view.used_cells, view.token_count, view.max_contiguous, view.max_contiguous_idx);
1341
-
1342
- std::unordered_map<llama_seq_id, size_t> seqs;
1343
- llama_kv_cache_view_cell * c_curr = view.cells;
1344
- llama_seq_id * cs_curr = view.cells_sequences;
1345
-
1346
- for (int i = 0; i < view.n_cells; i++, c_curr++, cs_curr += view.n_seq_max) {
1347
- for (int j = 0; j < view.n_seq_max; j++) {
1348
- if (cs_curr[j] < 0) { continue; }
1349
- if (seqs.find(cs_curr[j]) == seqs.end()) {
1350
- if (seqs.size() + 1 >= sizeof(slot_chars)) { break; }
1351
- const size_t sz = seqs.size();
1352
- seqs[cs_curr[j]] = sz;
1353
- }
1354
- }
1355
- if (seqs.size() + 1 >= sizeof(slot_chars)) { break; }
1356
- }
1357
-
1358
- printf("=== Sequence legend: ");
1359
- for (const auto & it : seqs) {
1360
- printf("%zu=%d, ", it.second, it.first);
1361
- }
1362
- printf("'+'=other sequence ids");
1363
-
1364
- c_curr = view.cells;
1365
- cs_curr = view.cells_sequences;
1366
- for (int i = 0; i < view.n_cells; i++, c_curr++, cs_curr += view.n_seq_max) {
1367
- if (i % row_size == 0) {
1368
- printf("\n%5d: ", i);
1369
- }
1370
- for (int j = 0; j < view.n_seq_max; j++) {
1371
- if (cs_curr[j] >= 0) {
1372
- const auto & it = seqs.find(cs_curr[j]);
1373
- putchar(it != seqs.end() ? int(slot_chars[it->second]) : '+');
1374
- } else {
1375
- putchar('.');
1376
- }
1377
- }
1378
- putchar(' ');
1379
- }
1380
-
1381
- printf("\n=== Done dumping\n");
1382
- }
1383
-
1384
1335
  //
1385
1336
  // Embedding utils
1386
1337
  //
@@ -1565,3 +1516,20 @@ common_control_vector_data common_control_vector_load(const std::vector<common_c
1565
1516
 
1566
1517
  return result;
1567
1518
  }
1519
+
1520
+ ggml_opt_dataset_t common_opt_dataset_init(struct llama_context * ctx, const std::vector<llama_token> & tokens, int64_t stride) {
1521
+ const int64_t ne_datapoint = llama_n_ctx(ctx);
1522
+ const int64_t ndata = (tokens.size() - ne_datapoint - 1) / stride;
1523
+ ggml_opt_dataset_t result = ggml_opt_dataset_init(
1524
+ GGML_TYPE_I32, GGML_TYPE_I32, ne_datapoint, ne_datapoint, ndata, /*ndata_shard =*/ 1);
1525
+
1526
+ llama_token * data = (llama_token *) ggml_opt_dataset_data(result)->data;
1527
+ llama_token * labels = (llama_token *) ggml_opt_dataset_labels(result)->data;
1528
+
1529
+ for (int64_t idata = 0; idata < ndata; ++idata) {
1530
+ memcpy(data + idata*ne_datapoint, tokens.data() + idata*stride + 0, ne_datapoint*sizeof(llama_token));
1531
+ memcpy(labels + idata*ne_datapoint, tokens.data() + idata*stride + 1, ne_datapoint*sizeof(llama_token));
1532
+ }
1533
+
1534
+ return result;
1535
+ }
@@ -6,6 +6,7 @@
6
6
 
7
7
  #include <set>
8
8
  #include <string>
9
+ #include <string_view>
9
10
  #include <vector>
10
11
  #include <sstream>
11
12
 
@@ -75,7 +76,7 @@ enum llama_example {
75
76
  LLAMA_EXAMPLE_SERVER,
76
77
  LLAMA_EXAMPLE_CVECTOR_GENERATOR,
77
78
  LLAMA_EXAMPLE_EXPORT_LORA,
78
- LLAMA_EXAMPLE_LLAVA,
79
+ LLAMA_EXAMPLE_MTMD,
79
80
  LLAMA_EXAMPLE_LOOKUP,
80
81
  LLAMA_EXAMPLE_PARALLEL,
81
82
  LLAMA_EXAMPLE_TTS,
@@ -114,7 +115,7 @@ enum common_grammar_trigger_type {
114
115
  COMMON_GRAMMAR_TRIGGER_TYPE_TOKEN,
115
116
  COMMON_GRAMMAR_TRIGGER_TYPE_WORD,
116
117
  COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN,
117
- COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_START,
118
+ COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
118
119
  };
119
120
 
120
121
  struct common_grammar_trigger {
@@ -290,6 +291,7 @@ struct common_params {
290
291
  int32_t verbosity = 0;
291
292
  int32_t control_vector_layer_start = -1; // layer range for control vector
292
293
  int32_t control_vector_layer_end = -1; // layer range for control vector
294
+ bool offline = false;
293
295
 
294
296
  int32_t ppl_stride = 0; // stride for perplexity calculations. If left at 0, the pre-existing approach will be used.
295
297
  int32_t ppl_output_type = 0; // = 0 -> ppl output is as usual, = 1 -> ppl output is num_tokens, ppl, one per line
@@ -322,13 +324,13 @@ struct common_params {
322
324
  bool flash_attn = false; // flash attention
323
325
  bool no_perf = false; // disable performance metrics
324
326
  bool ctx_shift = true; // context shift on inifinite text generation
327
+ bool swa_full = false; // use full-size SWA cache (https://github.com/ggml-org/llama.cpp/pull/13194#issuecomment-2868343055)
325
328
 
326
329
  bool input_prefix_bos = false; // prefix BOS to user inputs, preceding input_prefix
327
330
  bool use_mmap = true; // use mmap for faster loads
328
331
  bool use_mlock = false; // use mlock to keep model in memory
329
332
  bool verbose_prompt = false; // print prompt tokens before generation
330
333
  bool display_prompt = true; // print prompt before generation
331
- bool dump_kv_cache = false; // dump the KV cache contents for debugging purposes
332
334
  bool no_kv_offload = false; // disable KV offloading
333
335
  bool warmup = true; // warmup run
334
336
  bool check_tensors = false; // validate tensor data
@@ -367,6 +369,8 @@ struct common_params {
367
369
  bool use_jinja = false; // NOLINT
368
370
  bool enable_chat_template = true;
369
371
  common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
372
+ int reasoning_budget = -1;
373
+ bool prefill_assistant = true; // if true, any trailing assistant message will be prefilled into the response
370
374
 
371
375
  std::vector<std::string> api_keys;
372
376
 
@@ -426,6 +430,11 @@ struct common_params {
426
430
 
427
431
  // common params
428
432
  std::string out_file; // output filename for all example programs
433
+ // optional callback for model loading progress and cancellation:
434
+ // called with a progress value between 0.0 and 1.0.
435
+ // return false from callback to abort model loading or true to continue
436
+ llama_progress_callback load_progress_callback = NULL;
437
+ void * load_progress_callback_user_data = NULL;
429
438
  };
430
439
 
431
440
  // call once at the start of a program if it uses libcommon
@@ -503,10 +512,9 @@ static bool string_starts_with(const std::string & str,
503
512
  return str.rfind(prefix, 0) == 0;
504
513
  }
505
514
 
506
- static bool string_ends_with(const std::string & str,
507
- const std::string & suffix) { // While we wait for C++20's std::string::ends_with...
508
- return str.size() >= suffix.size() && str.compare(str.size()-suffix.size(), suffix.size(), suffix) == 0;
509
- }
515
+ // While we wait for C++20's std::string::ends_with...
516
+ bool string_ends_with(const std::string_view & str, const std::string_view & suffix);
517
+ size_t string_find_partial_stop(const std::string_view & str, const std::string_view & stop);
510
518
 
511
519
  bool string_parse_kv_override(const char * data, std::vector<llama_model_kv_override> & overrides);
512
520
  void string_process_escapes(std::string & input);
@@ -615,16 +623,6 @@ std::string common_detokenize(
615
623
  const std::vector<llama_token> & tokens,
616
624
  bool special = true);
617
625
 
618
- //
619
- // KV cache utils
620
- //
621
-
622
- // Dump the KV cache view with the number of sequences per cell.
623
- void common_kv_cache_dump_view(const llama_kv_cache_view & view, int row_size = 80);
624
-
625
- // Dump the KV cache view showing individual sequences in each cell (long output).
626
- void common_kv_cache_dump_view_seqs(const llama_kv_cache_view & view, int row_size = 40);
627
-
628
626
  //
629
627
  // Embedding utils
630
628
  //
@@ -666,3 +664,9 @@ const char * const LLM_KV_SPLIT_COUNT = "split.count";
666
664
  const char * const LLM_KV_SPLIT_TENSORS_COUNT = "split.tensors.count";
667
665
 
668
666
  }
667
+
668
+ //
669
+ // training utils
670
+ //
671
+
672
+ ggml_opt_dataset_t common_opt_dataset_init(struct llama_context * ctx, const std::vector<llama_token> & tokens, int64_t stride);