@novastera-oss/llamarn 0.1.5-beta.3 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (274) hide show
  1. package/README.md +80 -14
  2. package/RNLlamaCpp.podspec +10 -3
  3. package/android/CMakeLists.txt +8 -0
  4. package/android/src/main/cpp/include/llama.h +62 -125
  5. package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
  6. package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
  7. package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
  8. package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
  9. package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
  10. package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
  11. package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
  12. package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
  13. package/cpp/LlamaCppModel.cpp +148 -5
  14. package/cpp/LlamaCppModel.h +11 -2
  15. package/cpp/PureCppImpl.cpp +3 -3
  16. package/cpp/PureCppImpl.h +3 -0
  17. package/cpp/build-info.cpp +2 -2
  18. package/cpp/llama.cpp/README.md +11 -3
  19. package/cpp/llama.cpp/build-xcframework.sh +1 -0
  20. package/cpp/llama.cpp/common/CMakeLists.txt +8 -2
  21. package/cpp/llama.cpp/common/arg.cpp +153 -113
  22. package/cpp/llama.cpp/common/chat-parser.cpp +379 -0
  23. package/cpp/llama.cpp/common/chat-parser.h +117 -0
  24. package/cpp/llama.cpp/common/chat.cpp +847 -699
  25. package/cpp/llama.cpp/common/chat.h +73 -6
  26. package/cpp/llama.cpp/common/common.cpp +50 -82
  27. package/cpp/llama.cpp/common/common.h +21 -17
  28. package/cpp/llama.cpp/common/json-partial.cpp +255 -0
  29. package/cpp/llama.cpp/common/json-partial.h +37 -0
  30. package/cpp/llama.cpp/common/minja/chat-template.hpp +9 -5
  31. package/cpp/llama.cpp/common/minja/minja.hpp +69 -36
  32. package/cpp/llama.cpp/common/regex-partial.cpp +204 -0
  33. package/cpp/llama.cpp/common/regex-partial.h +56 -0
  34. package/cpp/llama.cpp/common/sampling.cpp +7 -8
  35. package/cpp/llama.cpp/convert_hf_to_gguf.py +453 -118
  36. package/cpp/llama.cpp/convert_hf_to_gguf_update.py +120 -68
  37. package/cpp/llama.cpp/ggml/CMakeLists.txt +2 -1
  38. package/cpp/llama.cpp/ggml/cmake/common.cmake +25 -0
  39. package/cpp/llama.cpp/ggml/include/ggml-opt.h +49 -28
  40. package/cpp/llama.cpp/ggml/include/ggml.h +26 -7
  41. package/cpp/llama.cpp/ggml/src/CMakeLists.txt +16 -10
  42. package/cpp/llama.cpp/ggml/src/ggml-backend.cpp +4 -1
  43. package/cpp/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +1 -0
  44. package/cpp/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +2 -0
  45. package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +604 -0
  46. package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +42 -0
  47. package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +54 -2
  48. package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +50 -51
  49. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +2 -2
  50. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +5 -9
  51. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +779 -19
  52. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +22 -0
  53. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +88 -5
  54. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +47 -12
  55. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +264 -69
  56. package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +322 -100
  57. package/cpp/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +117 -1
  58. package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.cpp +85 -16
  59. package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.h +220 -49
  60. package/cpp/llama.cpp/ggml/src/ggml-cuda/acc.cu +40 -26
  61. package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +1 -1
  62. package/cpp/llama.cpp/ggml/src/ggml-cuda/cpy.cu +11 -1
  63. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-common.cuh +15 -7
  64. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-mma-f16.cuh +266 -64
  65. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f16.cuh +49 -4
  66. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f32.cuh +48 -4
  67. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn.cu +2 -1
  68. package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +5 -1
  69. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cu +2 -0
  70. package/cpp/llama.cpp/ggml/src/ggml-cuda/quantize.cu +7 -6
  71. package/cpp/llama.cpp/ggml/src/ggml-cuda/sum.cu +1 -1
  72. package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cu +10 -0
  73. package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cuh +2 -0
  74. package/cpp/llama.cpp/ggml/src/ggml-impl.h +1 -1
  75. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +4 -0
  76. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +99 -17
  77. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +200 -2
  78. package/cpp/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +8 -2
  79. package/cpp/llama.cpp/ggml/src/ggml-musa/mudnn.cu +112 -0
  80. package/cpp/llama.cpp/ggml/src/ggml-musa/mudnn.cuh +12 -0
  81. package/cpp/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +6 -0
  82. package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +972 -178
  83. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/argsort.cl +86 -0
  84. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/div.cl +72 -0
  85. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/group_norm.cl +72 -0
  86. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/sigmoid.cl +29 -0
  87. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/sub.cl +72 -0
  88. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/sum_rows.cl +39 -0
  89. package/cpp/llama.cpp/ggml/src/ggml-opt.cpp +373 -190
  90. package/cpp/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +29 -23
  91. package/cpp/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +5 -10
  92. package/cpp/llama.cpp/ggml/src/ggml-sycl/common.hpp +101 -5
  93. package/cpp/llama.cpp/ggml/src/ggml-sycl/concat.cpp +31 -33
  94. package/cpp/llama.cpp/ggml/src/ggml-sycl/conv.cpp +1 -0
  95. package/cpp/llama.cpp/ggml/src/ggml-sycl/convert.cpp +29 -2
  96. package/cpp/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +4 -5
  97. package/cpp/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +59 -21
  98. package/cpp/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +9 -1
  99. package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +84 -72
  100. package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +2 -0
  101. package/cpp/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +37 -8
  102. package/cpp/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +1 -3
  103. package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +324 -129
  104. package/cpp/llama.cpp/ggml/src/ggml-sycl/gla.cpp +1 -0
  105. package/cpp/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +31 -2
  106. package/cpp/llama.cpp/ggml/src/ggml-sycl/norm.cpp +95 -68
  107. package/cpp/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +1 -0
  108. package/cpp/llama.cpp/ggml/src/ggml-sycl/quants.hpp +22 -0
  109. package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +1 -2
  110. package/cpp/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +1 -4
  111. package/cpp/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +2 -3
  112. package/cpp/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +69 -43
  113. package/cpp/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +2 -14
  114. package/cpp/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +81 -91
  115. package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +432 -181
  116. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +17 -0
  117. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp +1 -1
  118. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +6 -152
  119. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp +162 -0
  120. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +360 -0
  121. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +2 -118
  122. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +1 -1
  123. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +12 -1
  124. package/cpp/llama.cpp/ggml/src/ggml.c +107 -36
  125. package/cpp/llama.cpp/ggml/src/gguf.cpp +33 -33
  126. package/cpp/llama.cpp/gguf-py/gguf/constants.py +100 -15
  127. package/cpp/llama.cpp/gguf-py/gguf/gguf_reader.py +1 -1
  128. package/cpp/llama.cpp/gguf-py/gguf/gguf_writer.py +44 -12
  129. package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_editor_gui.py +21 -10
  130. package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_new_metadata.py +5 -2
  131. package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +128 -31
  132. package/cpp/llama.cpp/gguf-py/gguf/utility.py +1 -1
  133. package/cpp/llama.cpp/gguf-py/pyproject.toml +1 -1
  134. package/cpp/llama.cpp/include/llama.h +62 -125
  135. package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf.inp +1 -1
  136. package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf.out +1 -1
  137. package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf.inp +1 -1
  138. package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf.out +1 -1
  139. package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.inp +1 -1
  140. package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.out +1 -1
  141. package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.inp +1 -1
  142. package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.out +1 -1
  143. package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf.inp +1 -1
  144. package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf.out +1 -1
  145. package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf.inp +1 -1
  146. package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf.out +1 -1
  147. package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf.inp +1 -1
  148. package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf.out +1 -1
  149. package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf.inp +1 -1
  150. package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf.out +1 -1
  151. package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf.inp +1 -1
  152. package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf.out +1 -1
  153. package/cpp/llama.cpp/models/ggml-vocab-nomic-bert-moe.gguf +0 -0
  154. package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf.inp +1 -1
  155. package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf.out +1 -1
  156. package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf.inp +1 -1
  157. package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf.out +1 -1
  158. package/cpp/llama.cpp/models/ggml-vocab-refact.gguf.inp +1 -1
  159. package/cpp/llama.cpp/models/ggml-vocab-refact.gguf.out +1 -1
  160. package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf.inp +1 -1
  161. package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf.out +1 -1
  162. package/cpp/llama.cpp/models/templates/Qwen-QwQ-32B.jinja +62 -0
  163. package/cpp/llama.cpp/models/templates/Qwen-Qwen3-0.6B.jinja +85 -0
  164. package/cpp/llama.cpp/models/templates/README.md +2 -0
  165. package/cpp/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +5 -1
  166. package/cpp/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +5 -1
  167. package/cpp/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +2 -0
  168. package/cpp/llama.cpp/requirements/requirements-gguf_editor_gui.txt +1 -1
  169. package/cpp/llama.cpp/src/CMakeLists.txt +2 -0
  170. package/cpp/llama.cpp/src/llama-arch.cpp +6 -0
  171. package/cpp/llama.cpp/src/llama-arch.h +2 -0
  172. package/cpp/llama.cpp/src/llama-batch.cpp +3 -1
  173. package/cpp/llama.cpp/src/llama-context.cpp +340 -123
  174. package/cpp/llama.cpp/src/llama-context.h +30 -0
  175. package/cpp/llama.cpp/src/llama-cparams.cpp +4 -0
  176. package/cpp/llama.cpp/src/llama-cparams.h +2 -0
  177. package/cpp/llama.cpp/src/llama-grammar.cpp +12 -2
  178. package/cpp/llama.cpp/src/llama-graph.cpp +157 -247
  179. package/cpp/llama.cpp/src/llama-graph.h +52 -7
  180. package/cpp/llama.cpp/src/llama-hparams.cpp +17 -1
  181. package/cpp/llama.cpp/src/llama-hparams.h +37 -5
  182. package/cpp/llama.cpp/src/llama-kv-cache.cpp +742 -481
  183. package/cpp/llama.cpp/src/llama-kv-cache.h +196 -99
  184. package/cpp/llama.cpp/src/llama-kv-cells.h +379 -0
  185. package/cpp/llama.cpp/src/llama-memory.h +4 -3
  186. package/cpp/llama.cpp/src/llama-model-loader.cpp +22 -17
  187. package/cpp/llama.cpp/src/llama-model-saver.cpp +281 -0
  188. package/cpp/llama.cpp/src/llama-model-saver.h +37 -0
  189. package/cpp/llama.cpp/src/llama-model.cpp +529 -172
  190. package/cpp/llama.cpp/src/llama-model.h +6 -1
  191. package/cpp/llama.cpp/src/llama-quant.cpp +15 -13
  192. package/cpp/llama.cpp/src/llama-sampling.cpp +2 -2
  193. package/cpp/llama.cpp/src/llama-vocab.cpp +35 -8
  194. package/cpp/llama.cpp/src/llama-vocab.h +6 -0
  195. package/cpp/llama.cpp/src/llama.cpp +14 -0
  196. package/cpp/rn-completion.cpp +4 -2
  197. package/ios/include/chat.h +73 -6
  198. package/ios/include/common/minja/chat-template.hpp +9 -5
  199. package/ios/include/common/minja/minja.hpp +69 -36
  200. package/ios/include/common.h +21 -17
  201. package/ios/include/llama.h +62 -125
  202. package/ios/libs/llama.xcframework/Info.plist +19 -19
  203. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  204. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4617 -4487
  205. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-opt.h +237 -0
  206. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +26 -7
  207. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +62 -125
  208. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
  209. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  210. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4638 -4508
  211. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3557 -3435
  212. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +237 -0
  213. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +26 -7
  214. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +62 -125
  215. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
  216. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  217. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4638 -4508
  218. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3559 -3437
  219. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-opt.h +237 -0
  220. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +26 -7
  221. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +62 -125
  222. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-opt.h +237 -0
  223. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +26 -7
  224. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +62 -125
  225. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
  226. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-opt.h +237 -0
  227. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +26 -7
  228. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +62 -125
  229. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
  230. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
  231. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  232. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4616 -4487
  233. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-opt.h +237 -0
  234. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +26 -7
  235. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +62 -125
  236. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
  237. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  238. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4637 -4508
  239. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3556 -3435
  240. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +237 -0
  241. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +26 -7
  242. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +62 -125
  243. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
  244. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  245. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4653 -4523
  246. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-opt.h +237 -0
  247. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +26 -7
  248. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +62 -125
  249. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
  250. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  251. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4674 -4544
  252. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3587 -3465
  253. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +237 -0
  254. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +26 -7
  255. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +62 -125
  256. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
  257. package/lib/module/NativeRNLlamaCpp.js.map +1 -1
  258. package/lib/typescript/src/NativeRNLlamaCpp.d.ts +3 -0
  259. package/lib/typescript/src/NativeRNLlamaCpp.d.ts.map +1 -1
  260. package/package.json +2 -1
  261. package/src/NativeRNLlamaCpp.ts +1 -0
  262. package/cpp/llama.cpp/common/stb_image.h +0 -7988
  263. package/cpp/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +0 -112
  264. package/cpp/llama.cpp/models/ggml-vocab-chameleon.gguf.out +0 -46
  265. package/cpp/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.inp +0 -112
  266. package/cpp/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.out +0 -46
  267. package/cpp/llama.cpp/models/ggml-vocab-gpt-4o.gguf.inp +0 -112
  268. package/cpp/llama.cpp/models/ggml-vocab-gpt-4o.gguf.out +0 -46
  269. package/cpp/llama.cpp/models/ggml-vocab-llama4.gguf.inp +0 -112
  270. package/cpp/llama.cpp/models/ggml-vocab-llama4.gguf.out +0 -46
  271. package/cpp/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +0 -112
  272. package/cpp/llama.cpp/models/ggml-vocab-pixtral.gguf.out +0 -46
  273. package/cpp/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +0 -112
  274. package/cpp/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +0 -46
@@ -0,0 +1,379 @@
1
+ #pragma once
2
+
3
+ #include "llama.h"
4
+ #include "llama-cparams.h"
5
+
6
+ #include <bitset>
7
+ #include <cassert>
8
+ #include <vector>
9
+ #include <set>
10
+
11
+ // meta information about KV cells that can be part of multiple sequences at the same time
12
+ // TODO: add unit tests
13
+ class llama_kv_cells_unified {
14
+ public:
15
+ void reset() {
16
+ for (uint32_t i = 0; i < pos.size(); ++i) {
17
+ pos[i] = -1;
18
+ shift[i] = 0;
19
+ seq[i].reset();
20
+ }
21
+
22
+ has_shift = false;
23
+
24
+ used.clear();
25
+
26
+ for (uint32_t s = 0; s < LLAMA_MAX_PARALLEL_SEQUENCES; ++s) {
27
+ seq_pos[s].clear();
28
+ }
29
+ }
30
+
31
+ void reset_shift() {
32
+ has_shift = false;
33
+
34
+ for (uint32_t i = 0; i < shift.size(); ++i) {
35
+ shift[i] = 0;
36
+ }
37
+ }
38
+
39
+ uint32_t size() const {
40
+ return pos.size();
41
+ }
42
+
43
+ void resize(uint32_t n) {
44
+ pos.resize(n);
45
+ shift.resize(n);
46
+ seq.resize(n);
47
+
48
+ reset();
49
+ }
50
+
51
+ bool is_empty(uint32_t i) const {
52
+ assert(i < pos.size());
53
+ assert((pos[i] < 0 && pos[i] == -1) || pos[i] >= 0);
54
+
55
+ return pos[i] == -1;
56
+ }
57
+
58
+ uint32_t get_used() const {
59
+ return used.size();
60
+ }
61
+
62
+ // the index of the first cell that is used
63
+ // return 0 if no cells are used
64
+ uint32_t used_min() const {
65
+ return used.empty() ? 0 : *used.begin();
66
+ }
67
+
68
+ // the index of the last cell that is used + 1
69
+ // return 0 if no cells are used
70
+ uint32_t used_max_p1() const {
71
+ #if 0
72
+ if (!seq_pos[0].empty()) printf("kv_cells: min[0] = %5d, max[0] = %5d\n", *seq_pos[0].begin(), *seq_pos[0].rbegin());
73
+ if (!seq_pos[1].empty()) printf("kv_cells: min[1] = %5d, max[1] = %5d\n", *seq_pos[1].begin(), *seq_pos[1].rbegin());
74
+ if (!seq_pos[2].empty()) printf("kv_cells: min[2] = %5d, max[2] = %5d\n", *seq_pos[2].begin(), *seq_pos[2].rbegin());
75
+ #endif
76
+
77
+ return used.empty() ? 0 : *used.rbegin() + 1;
78
+ }
79
+
80
+ bool get_has_shift() const {
81
+ return has_shift;
82
+ }
83
+
84
+ // move cell isrc to idst (used during defrag)
85
+ void mv(uint32_t isrc, uint32_t idst) {
86
+ assert(isrc < pos.size());
87
+ assert(idst < pos.size());
88
+
89
+ pos [idst] = pos [isrc];
90
+ shift[idst] = shift[isrc];
91
+ seq [idst] = seq [isrc];
92
+
93
+ pos [isrc] = -1;
94
+ shift[isrc] = 0;
95
+ seq [isrc].reset();
96
+
97
+ used.erase (isrc);
98
+ used.insert(idst);
99
+ }
100
+
101
+ // copy the state of cells [i, i + n) (used for save/restore the state of the cells)
102
+ llama_kv_cells_unified cp(uint32_t i, uint32_t n) const {
103
+ assert(i + n <= pos.size());
104
+
105
+ llama_kv_cells_unified res;
106
+
107
+ res.resize(n);
108
+
109
+ for (uint32_t j = 0; j < n; ++j) {
110
+ res.pos[j] = pos[i + j];
111
+ res.seq[j] = seq[i + j];
112
+
113
+ assert(shift[i + j] == 0);
114
+ }
115
+
116
+ return res;
117
+ }
118
+
119
+ // set the state of cells [i, i + other.pos.size()) (used for save/restore the state of the cells)
120
+ void set(uint32_t i, const llama_kv_cells_unified & other) {
121
+ assert(i + other.pos.size() <= pos.size());
122
+
123
+ for (uint32_t j = 0; j < other.pos.size(); ++j) {
124
+ if (pos[i + j] == -1 && other.pos[j] != -1) {
125
+ used.insert(i + j);
126
+ }
127
+
128
+ if (pos[i + j] != -1 && other.pos[j] == -1) {
129
+ used.erase(i + j);
130
+ }
131
+
132
+ if (pos[i + j] != -1) {
133
+ seq_pos_rm(i + j);
134
+ }
135
+
136
+ pos[i + j] = other.pos[j];
137
+ seq[i + j] = other.seq[j];
138
+
139
+ if (pos[i + j] != -1) {
140
+ seq_pos_add(i + j);
141
+ }
142
+
143
+ assert(shift[i + j] == 0);
144
+ }
145
+ }
146
+
147
+ // note: call only if the cell has seq_id
148
+ // return true if the cell becomes empty
149
+ bool seq_rm(uint32_t i, llama_seq_id seq_id) {
150
+ assert(i < pos.size());
151
+ assert(seq[i].test(seq_id));
152
+ assert(pos[i] != -1);
153
+ assert(seq_id >= 0);
154
+
155
+ seq[i].reset(seq_id);
156
+ seq_pos[seq_id].erase(pos[i]);
157
+
158
+ if (seq[i].none()) {
159
+ pos[i] = -1;
160
+
161
+ used.erase(i);
162
+
163
+ return true;
164
+ }
165
+
166
+ return false;
167
+ }
168
+
169
+ // return true if the cell becomes empty (i.e. it did not contain seq_id before the call)
170
+ bool seq_keep(uint32_t i, llama_seq_id seq_id) {
171
+ assert(i < pos.size());
172
+
173
+ if (seq[i].test(seq_id)) {
174
+ seq_pos_rm(i);
175
+ seq[i].reset();
176
+
177
+ seq[i].set(seq_id);
178
+ seq_pos[seq_id].insert(pos[i]);
179
+
180
+ return false;
181
+ }
182
+
183
+ if (seq[i].any()) {
184
+ seq_pos_rm(i);
185
+ seq[i].reset();
186
+
187
+ pos[i] = -1;
188
+
189
+ used.erase(i);
190
+
191
+ return true;
192
+ }
193
+
194
+ assert(pos[i] == -1);
195
+
196
+ return false;
197
+ }
198
+
199
+ bool seq_has(uint32_t i, llama_seq_id seq_id) const {
200
+ assert(i < pos.size());
201
+ assert(seq_id >= 0);
202
+
203
+ return seq[i].test(seq_id);
204
+ }
205
+
206
+ // note: call only if the cell is not empty and the seq_id is not in the cell
207
+ void seq_add(uint32_t i, llama_seq_id seq_id) {
208
+ assert(i < pos.size());
209
+ assert(pos[i] != -1);
210
+ assert(!seq[i].test(seq_id));
211
+
212
+ seq[i].set(seq_id);
213
+ seq_pos[seq_id].insert(pos[i]);
214
+ }
215
+
216
+ // the minimum position of sequence seq_id currently present in any of the cells
217
+ // return -1 if the sequence is not present
218
+ llama_pos seq_pos_min(llama_seq_id seq_id) const {
219
+ assert(seq_id >= 0);
220
+ assert(seq_id < LLAMA_MAX_PARALLEL_SEQUENCES);
221
+
222
+ if (seq_pos[seq_id].empty()) {
223
+ return -1;
224
+ }
225
+
226
+ return *seq_pos[seq_id].begin();
227
+ }
228
+
229
+ // the maximum position of sequence seq_id currently present in any of the cells
230
+ // return -1 if the sequence is not present
231
+ llama_pos seq_pos_max(llama_seq_id seq_id) const {
232
+ assert(seq_id >= 0);
233
+ assert(seq_id < LLAMA_MAX_PARALLEL_SEQUENCES);
234
+
235
+ if (seq_pos[seq_id].empty()) {
236
+ return -1;
237
+ }
238
+
239
+ return *seq_pos[seq_id].rbegin();
240
+ }
241
+
242
+ // note: call only if the cell is not empty
243
+ llama_pos pos_get(uint32_t i) const {
244
+ assert(i < pos.size());
245
+ assert(pos[i] != -1);
246
+
247
+ return pos[i];
248
+ }
249
+
250
+ // note: call only if the cell is not empty
251
+ llama_pos get_shift(uint32_t i) const {
252
+ assert(i < pos.size());
253
+ assert(pos[i] != -1);
254
+
255
+ return shift[i];
256
+ }
257
+
258
+ // check if a cell is not empty and its position is within [p0, p1)
259
+ bool pos_in(uint32_t i, llama_pos p0, llama_pos p1) const {
260
+ assert(i < pos.size());
261
+
262
+ return pos[i] >= p0 && pos[i] < p1;
263
+ }
264
+
265
+ // set the position of an empty cell
266
+ // does not modify "has_shift"
267
+ // note: call only if the cell is empty
268
+ void pos_set(uint32_t i, llama_pos p) {
269
+ assert(i < pos.size());
270
+ assert(pos[i] == -1);
271
+
272
+ pos[i] = p;
273
+
274
+ used.insert(i);
275
+ }
276
+
277
+ // pos[i] = pos[i] + d
278
+ // sets "has_shift" to true
279
+ // note: call only if the cell is not empty
280
+ bool pos_add(uint32_t i, llama_pos d) {
281
+ assert(i < pos.size());
282
+ assert(pos[i] != -1);
283
+
284
+ seq_pos_rm(i);
285
+
286
+ pos[i] += d;
287
+ shift[i] += d;
288
+
289
+ seq_pos_add(i);
290
+
291
+ has_shift = true;
292
+
293
+ if (pos[i] < 0) {
294
+ seq_pos_rm(i);
295
+
296
+ seq[i].reset();
297
+ pos[i] = -1;
298
+
299
+ used.erase(i);
300
+
301
+ return true;
302
+ }
303
+
304
+ return false;
305
+ }
306
+
307
+ // pos[i] = pos[i] / d
308
+ // sets "has_shift" to true
309
+ // note: call only if the cell is not empty
310
+ void pos_div(uint32_t i, int d) {
311
+ assert(i < pos.size());
312
+ assert(pos[i] != -1);
313
+
314
+ const llama_pos p_old = pos[i];
315
+
316
+ seq_pos_rm(i);
317
+
318
+ pos[i] /= d;
319
+ shift[i] += p_old - pos[i];
320
+
321
+ seq_pos_add(i);
322
+
323
+ has_shift = true;
324
+ }
325
+
326
+ private:
327
+ bool has_shift = false;
328
+
329
+ // set of indices of used cells (i.e. pos[i] != -1, allowed to not have any seq_id)
330
+ std::set<uint32_t> used;
331
+
332
+ std::vector<llama_pos> pos;
333
+
334
+ // this array accumulates any applied shifts to the pos array since the last reset_shift() call
335
+ // this is used to queue multiple updates to the pos array, which in the end can be applied in one go:
336
+ //
337
+ // cells.pos_add(x, shift_x);
338
+ // cells.pos_div(y, shift_y);
339
+ // ...
340
+ //
341
+ // if (cells.has_shift()) {
342
+ // for (int i = 0; i < n; ++i) {
343
+ // auto shift_i = cells.get_shift(i);
344
+ // ...
345
+ // }
346
+ // cells.reset_shift();
347
+ // }
348
+ //
349
+ std::vector<llama_pos> shift;
350
+
351
+ using bits_t = std::bitset<LLAMA_MAX_PARALLEL_SEQUENCES>;
352
+
353
+ // the bitset seq[i] tells us which sequences are currently occupying the i-th cell
354
+ std::vector<bits_t> seq;
355
+
356
+ // the set seq_pos[s] tells us which positions are currently present for sequence s
357
+ // this way seq_pos[s].begin() and seq_pos[s].rbegin() give us the min/max positions currently in the cache
358
+ std::set<llama_pos> seq_pos[LLAMA_MAX_PARALLEL_SEQUENCES];
359
+
360
+ // helper functions for updating `seq_pos`, once cell at a time:
361
+
362
+ // remove cell i
363
+ void seq_pos_rm(uint32_t i) {
364
+ for (int s = 0; s < LLAMA_MAX_PARALLEL_SEQUENCES; ++s) {
365
+ if (seq[i].test(s)) {
366
+ seq_pos[s].erase(pos[i]);
367
+ }
368
+ }
369
+ }
370
+
371
+ // add cell i
372
+ void seq_pos_add(uint32_t i) {
373
+ for (int s = 0; s < LLAMA_MAX_PARALLEL_SEQUENCES; ++s) {
374
+ if (seq[i].test(s)) {
375
+ seq_pos[s].insert(pos[i]);
376
+ }
377
+ }
378
+ }
379
+ };
@@ -7,8 +7,8 @@ struct llama_memory_params {
7
7
  ggml_type type_k;
8
8
  ggml_type type_v;
9
9
 
10
- // parameters for other types of memory
11
- // ...
10
+ // use full-size SWA cache
11
+ bool swa_full;
12
12
  };
13
13
 
14
14
  // general concept of LLM memory
@@ -22,9 +22,10 @@ public:
22
22
  virtual bool seq_rm (llama_seq_id seq_id, llama_pos p0, llama_pos p1) = 0;
23
23
  virtual void seq_cp (llama_seq_id seq_id_src, llama_seq_id seq_id_dst, llama_pos p0, llama_pos p1) = 0;
24
24
  virtual void seq_keep(llama_seq_id seq_id) = 0;
25
- virtual void seq_add (llama_seq_id seq_id, llama_pos p0, llama_pos p1, llama_pos delta) = 0;
25
+ virtual void seq_add (llama_seq_id seq_id, llama_pos p0, llama_pos p1, llama_pos shift) = 0;
26
26
  virtual void seq_div (llama_seq_id seq_id, llama_pos p0, llama_pos p1, int d) = 0;
27
27
 
28
+ virtual llama_pos seq_pos_min(llama_seq_id seq_id) const = 0;
28
29
  virtual llama_pos seq_pos_max(llama_seq_id seq_id) const = 0;
29
30
 
30
31
  virtual bool get_can_edit() const = 0;
@@ -301,12 +301,12 @@ namespace GGUFMeta {
301
301
  GGUFMeta::GKV<GGUFMeta::ArrayInfo>::get_kv(meta.get(), kid);
302
302
 
303
303
  switch (arr_info.gt) {
304
- case GGUF_TYPE_FLOAT32: GGML_ASSERT((std::is_same<T, float>::value)); break;
305
- case GGUF_TYPE_INT32: GGML_ASSERT(
306
- (std::is_same<T, int32_t>::value) ||
307
- (std::is_same<T, uint32_t>::value)); break;
304
+ case GGUF_TYPE_UINT32:
305
+ case GGUF_TYPE_INT32: GGML_ASSERT((std::is_same<T, int32_t>::value) ||
306
+ (std::is_same<T, uint32_t>::value)); break;
307
+ case GGUF_TYPE_FLOAT32: GGML_ASSERT((std::is_same<T, float>::value)); break;
308
308
  default:
309
- throw std::runtime_error(format("%s is not a float32, int32 array", key.c_str()));
309
+ throw std::runtime_error(format("%s is not a float32/uint32/int32 array", key.c_str()));
310
310
  }
311
311
 
312
312
  result.resize(arr_info.length);
@@ -330,12 +330,12 @@ namespace GGUFMeta {
330
330
  GGUFMeta::GKV<GGUFMeta::ArrayInfo>::get_kv(meta.get(), kid);
331
331
 
332
332
  switch (arr_info.gt) {
333
- case GGUF_TYPE_FLOAT32: GGML_ASSERT((std::is_same<T, float>::value)); break;
334
- case GGUF_TYPE_INT32: GGML_ASSERT(
335
- (std::is_same<T, int32_t>::value) ||
336
- (std::is_same<T, uint32_t>::value)); break;
333
+ case GGUF_TYPE_UINT32:
334
+ case GGUF_TYPE_INT32: GGML_ASSERT((std::is_same<T, int32_t>::value) ||
335
+ (std::is_same<T, uint32_t>::value)); break;
336
+ case GGUF_TYPE_FLOAT32: GGML_ASSERT((std::is_same<T, float>::value)); break;
337
337
  default:
338
- throw std::runtime_error(format("%s is not a float32, int32 array", key.c_str()));
338
+ throw std::runtime_error(format("%s is not a float32/uint32/int32 array", key.c_str()));
339
339
  }
340
340
 
341
341
  if (arr_info.length > N_MAX) {
@@ -469,7 +469,7 @@ llama_model_loader::llama_model_loader(
469
469
 
470
470
  meta.reset(gguf_init_from_file(fname.c_str(), params));
471
471
  if (!meta) {
472
- throw std::runtime_error(format("%s: failed to load model from %s\n", __func__, fname.c_str()));
472
+ throw std::runtime_error(format("%s: failed to load model from %s", __func__, fname.c_str()));
473
473
  }
474
474
 
475
475
  get_key(llm_kv(LLM_KV_GENERAL_ARCHITECTURE), arch_name, false);
@@ -528,7 +528,7 @@ llama_model_loader::llama_model_loader(
528
528
  };
529
529
  gguf_context_ptr ctx_gguf { gguf_init_from_file(fname_split, split_params) };
530
530
  if (!ctx_gguf) {
531
- throw std::runtime_error(format("%s: failed to load GGUF split from %s\n", __func__, fname_split));
531
+ throw std::runtime_error(format("%s: failed to load GGUF split from %s", __func__, fname_split));
532
532
  }
533
533
 
534
534
  // check idx
@@ -822,13 +822,18 @@ void llama_model_loader::init_mappings(bool prefetch, llama_mlocks * mlock_mmaps
822
822
  mappings.reserve(files.size());
823
823
  mmaps_used.reserve(files.size());
824
824
  for (const auto & file : files) {
825
- auto * reg = ggml_backend_dev_backend_reg(ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU));
826
- if (!reg) {
827
- throw std::runtime_error(format("%s: no CPU backend found", __func__));
825
+ bool is_numa = false;
826
+
827
+ auto * dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
828
+ if (dev) {
829
+ auto * reg = ggml_backend_dev_backend_reg(dev);
830
+ auto * is_numa_fn = (decltype(ggml_is_numa) *) ggml_backend_reg_get_proc_address(reg, "ggml_backend_cpu_is_numa");
831
+ if (is_numa_fn) {
832
+ is_numa = is_numa_fn();
833
+ }
828
834
  }
829
835
 
830
- auto * is_numa_fn = (decltype(ggml_is_numa) *) ggml_backend_reg_get_proc_address(reg, "ggml_backend_cpu_is_numa");
831
- std::unique_ptr<llama_mmap> mapping = std::make_unique<llama_mmap>(file.get(), prefetch ? -1 : 0, is_numa_fn());
836
+ std::unique_ptr<llama_mmap> mapping = std::make_unique<llama_mmap>(file.get(), prefetch ? -1 : 0, is_numa);
832
837
  mmaps_used.emplace_back(mapping->size(), 0);
833
838
  if (mlock_mmaps) {
834
839
  std::unique_ptr<llama_mlock> mlock_mmap(new llama_mlock());