@novastera-oss/llamarn 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (266) hide show
  1. package/README.md +80 -14
  2. package/RNLlamaCpp.podspec +10 -3
  3. package/android/CMakeLists.txt +8 -0
  4. package/android/src/main/cpp/include/llama.h +62 -125
  5. package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
  6. package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
  7. package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
  8. package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
  9. package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
  10. package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
  11. package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
  12. package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
  13. package/cpp/build-info.cpp +2 -2
  14. package/cpp/llama.cpp/README.md +11 -3
  15. package/cpp/llama.cpp/build-xcframework.sh +1 -0
  16. package/cpp/llama.cpp/common/CMakeLists.txt +8 -2
  17. package/cpp/llama.cpp/common/arg.cpp +153 -113
  18. package/cpp/llama.cpp/common/chat-parser.cpp +379 -0
  19. package/cpp/llama.cpp/common/chat-parser.h +117 -0
  20. package/cpp/llama.cpp/common/chat.cpp +847 -699
  21. package/cpp/llama.cpp/common/chat.h +73 -6
  22. package/cpp/llama.cpp/common/common.cpp +50 -82
  23. package/cpp/llama.cpp/common/common.h +21 -17
  24. package/cpp/llama.cpp/common/json-partial.cpp +255 -0
  25. package/cpp/llama.cpp/common/json-partial.h +37 -0
  26. package/cpp/llama.cpp/common/minja/chat-template.hpp +9 -5
  27. package/cpp/llama.cpp/common/minja/minja.hpp +69 -36
  28. package/cpp/llama.cpp/common/regex-partial.cpp +204 -0
  29. package/cpp/llama.cpp/common/regex-partial.h +56 -0
  30. package/cpp/llama.cpp/common/sampling.cpp +7 -8
  31. package/cpp/llama.cpp/convert_hf_to_gguf.py +453 -118
  32. package/cpp/llama.cpp/convert_hf_to_gguf_update.py +120 -68
  33. package/cpp/llama.cpp/ggml/CMakeLists.txt +2 -1
  34. package/cpp/llama.cpp/ggml/cmake/common.cmake +25 -0
  35. package/cpp/llama.cpp/ggml/include/ggml-opt.h +49 -28
  36. package/cpp/llama.cpp/ggml/include/ggml.h +26 -7
  37. package/cpp/llama.cpp/ggml/src/CMakeLists.txt +16 -10
  38. package/cpp/llama.cpp/ggml/src/ggml-backend.cpp +4 -1
  39. package/cpp/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +1 -0
  40. package/cpp/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +2 -0
  41. package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +604 -0
  42. package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +42 -0
  43. package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +54 -2
  44. package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +50 -51
  45. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +2 -2
  46. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +5 -9
  47. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +779 -19
  48. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +22 -0
  49. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +88 -5
  50. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +47 -12
  51. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +264 -69
  52. package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +322 -100
  53. package/cpp/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +117 -1
  54. package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.cpp +85 -16
  55. package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.h +220 -49
  56. package/cpp/llama.cpp/ggml/src/ggml-cuda/acc.cu +40 -26
  57. package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +1 -1
  58. package/cpp/llama.cpp/ggml/src/ggml-cuda/cpy.cu +11 -1
  59. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-common.cuh +15 -7
  60. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-mma-f16.cuh +266 -64
  61. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f16.cuh +49 -4
  62. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f32.cuh +48 -4
  63. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn.cu +2 -1
  64. package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +5 -1
  65. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cu +2 -0
  66. package/cpp/llama.cpp/ggml/src/ggml-cuda/quantize.cu +7 -6
  67. package/cpp/llama.cpp/ggml/src/ggml-cuda/sum.cu +1 -1
  68. package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cu +10 -0
  69. package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cuh +2 -0
  70. package/cpp/llama.cpp/ggml/src/ggml-impl.h +1 -1
  71. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +4 -0
  72. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +99 -17
  73. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +200 -2
  74. package/cpp/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +8 -2
  75. package/cpp/llama.cpp/ggml/src/ggml-musa/mudnn.cu +112 -0
  76. package/cpp/llama.cpp/ggml/src/ggml-musa/mudnn.cuh +12 -0
  77. package/cpp/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +6 -0
  78. package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +972 -178
  79. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/argsort.cl +86 -0
  80. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/div.cl +72 -0
  81. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/group_norm.cl +72 -0
  82. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/sigmoid.cl +29 -0
  83. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/sub.cl +72 -0
  84. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/sum_rows.cl +39 -0
  85. package/cpp/llama.cpp/ggml/src/ggml-opt.cpp +373 -190
  86. package/cpp/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +29 -23
  87. package/cpp/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +5 -10
  88. package/cpp/llama.cpp/ggml/src/ggml-sycl/common.hpp +101 -5
  89. package/cpp/llama.cpp/ggml/src/ggml-sycl/concat.cpp +31 -33
  90. package/cpp/llama.cpp/ggml/src/ggml-sycl/conv.cpp +1 -0
  91. package/cpp/llama.cpp/ggml/src/ggml-sycl/convert.cpp +29 -2
  92. package/cpp/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +4 -5
  93. package/cpp/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +59 -21
  94. package/cpp/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +9 -1
  95. package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +84 -72
  96. package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +2 -0
  97. package/cpp/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +37 -8
  98. package/cpp/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +1 -3
  99. package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +324 -129
  100. package/cpp/llama.cpp/ggml/src/ggml-sycl/gla.cpp +1 -0
  101. package/cpp/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +31 -2
  102. package/cpp/llama.cpp/ggml/src/ggml-sycl/norm.cpp +95 -68
  103. package/cpp/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +1 -0
  104. package/cpp/llama.cpp/ggml/src/ggml-sycl/quants.hpp +22 -0
  105. package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +1 -2
  106. package/cpp/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +1 -4
  107. package/cpp/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +2 -3
  108. package/cpp/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +69 -43
  109. package/cpp/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +2 -14
  110. package/cpp/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +81 -91
  111. package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +432 -181
  112. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +17 -0
  113. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp +1 -1
  114. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +6 -152
  115. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp +162 -0
  116. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +360 -0
  117. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +2 -118
  118. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +1 -1
  119. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +12 -1
  120. package/cpp/llama.cpp/ggml/src/ggml.c +107 -36
  121. package/cpp/llama.cpp/ggml/src/gguf.cpp +33 -33
  122. package/cpp/llama.cpp/gguf-py/gguf/constants.py +100 -15
  123. package/cpp/llama.cpp/gguf-py/gguf/gguf_reader.py +1 -1
  124. package/cpp/llama.cpp/gguf-py/gguf/gguf_writer.py +44 -12
  125. package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_editor_gui.py +21 -10
  126. package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_new_metadata.py +5 -2
  127. package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +128 -31
  128. package/cpp/llama.cpp/gguf-py/gguf/utility.py +1 -1
  129. package/cpp/llama.cpp/gguf-py/pyproject.toml +1 -1
  130. package/cpp/llama.cpp/include/llama.h +62 -125
  131. package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf.inp +1 -1
  132. package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf.out +1 -1
  133. package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf.inp +1 -1
  134. package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf.out +1 -1
  135. package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.inp +1 -1
  136. package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.out +1 -1
  137. package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.inp +1 -1
  138. package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.out +1 -1
  139. package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf.inp +1 -1
  140. package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf.out +1 -1
  141. package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf.inp +1 -1
  142. package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf.out +1 -1
  143. package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf.inp +1 -1
  144. package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf.out +1 -1
  145. package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf.inp +1 -1
  146. package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf.out +1 -1
  147. package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf.inp +1 -1
  148. package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf.out +1 -1
  149. package/cpp/llama.cpp/models/ggml-vocab-nomic-bert-moe.gguf +0 -0
  150. package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf.inp +1 -1
  151. package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf.out +1 -1
  152. package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf.inp +1 -1
  153. package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf.out +1 -1
  154. package/cpp/llama.cpp/models/ggml-vocab-refact.gguf.inp +1 -1
  155. package/cpp/llama.cpp/models/ggml-vocab-refact.gguf.out +1 -1
  156. package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf.inp +1 -1
  157. package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf.out +1 -1
  158. package/cpp/llama.cpp/models/templates/Qwen-QwQ-32B.jinja +62 -0
  159. package/cpp/llama.cpp/models/templates/Qwen-Qwen3-0.6B.jinja +85 -0
  160. package/cpp/llama.cpp/models/templates/README.md +2 -0
  161. package/cpp/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +5 -1
  162. package/cpp/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +5 -1
  163. package/cpp/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +2 -0
  164. package/cpp/llama.cpp/requirements/requirements-gguf_editor_gui.txt +1 -1
  165. package/cpp/llama.cpp/src/CMakeLists.txt +2 -0
  166. package/cpp/llama.cpp/src/llama-arch.cpp +6 -0
  167. package/cpp/llama.cpp/src/llama-arch.h +2 -0
  168. package/cpp/llama.cpp/src/llama-batch.cpp +3 -1
  169. package/cpp/llama.cpp/src/llama-context.cpp +340 -123
  170. package/cpp/llama.cpp/src/llama-context.h +30 -0
  171. package/cpp/llama.cpp/src/llama-cparams.cpp +4 -0
  172. package/cpp/llama.cpp/src/llama-cparams.h +2 -0
  173. package/cpp/llama.cpp/src/llama-grammar.cpp +12 -2
  174. package/cpp/llama.cpp/src/llama-graph.cpp +157 -247
  175. package/cpp/llama.cpp/src/llama-graph.h +52 -7
  176. package/cpp/llama.cpp/src/llama-hparams.cpp +17 -1
  177. package/cpp/llama.cpp/src/llama-hparams.h +37 -5
  178. package/cpp/llama.cpp/src/llama-kv-cache.cpp +742 -481
  179. package/cpp/llama.cpp/src/llama-kv-cache.h +196 -99
  180. package/cpp/llama.cpp/src/llama-kv-cells.h +379 -0
  181. package/cpp/llama.cpp/src/llama-memory.h +4 -3
  182. package/cpp/llama.cpp/src/llama-model-loader.cpp +22 -17
  183. package/cpp/llama.cpp/src/llama-model-saver.cpp +281 -0
  184. package/cpp/llama.cpp/src/llama-model-saver.h +37 -0
  185. package/cpp/llama.cpp/src/llama-model.cpp +529 -172
  186. package/cpp/llama.cpp/src/llama-model.h +6 -1
  187. package/cpp/llama.cpp/src/llama-quant.cpp +15 -13
  188. package/cpp/llama.cpp/src/llama-sampling.cpp +2 -2
  189. package/cpp/llama.cpp/src/llama-vocab.cpp +35 -8
  190. package/cpp/llama.cpp/src/llama-vocab.h +6 -0
  191. package/cpp/llama.cpp/src/llama.cpp +14 -0
  192. package/cpp/rn-completion.cpp +4 -2
  193. package/ios/include/chat.h +73 -6
  194. package/ios/include/common/minja/chat-template.hpp +9 -5
  195. package/ios/include/common/minja/minja.hpp +69 -36
  196. package/ios/include/common.h +21 -17
  197. package/ios/include/llama.h +62 -125
  198. package/ios/libs/llama.xcframework/Info.plist +19 -19
  199. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  200. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4617 -4487
  201. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-opt.h +237 -0
  202. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +26 -7
  203. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +62 -125
  204. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
  205. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  206. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4638 -4508
  207. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3557 -3435
  208. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +237 -0
  209. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +26 -7
  210. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +62 -125
  211. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
  212. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  213. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4638 -4508
  214. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3559 -3437
  215. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-opt.h +237 -0
  216. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +26 -7
  217. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +62 -125
  218. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-opt.h +237 -0
  219. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +26 -7
  220. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +62 -125
  221. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
  222. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-opt.h +237 -0
  223. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +26 -7
  224. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +62 -125
  225. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
  226. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
  227. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  228. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4616 -4487
  229. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-opt.h +237 -0
  230. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +26 -7
  231. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +62 -125
  232. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
  233. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  234. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4637 -4508
  235. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3556 -3435
  236. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +237 -0
  237. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +26 -7
  238. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +62 -125
  239. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
  240. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  241. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4653 -4523
  242. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-opt.h +237 -0
  243. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +26 -7
  244. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +62 -125
  245. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
  246. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  247. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4674 -4544
  248. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3587 -3465
  249. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +237 -0
  250. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +26 -7
  251. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +62 -125
  252. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
  253. package/package.json +1 -1
  254. package/cpp/llama.cpp/common/stb_image.h +0 -7988
  255. package/cpp/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +0 -112
  256. package/cpp/llama.cpp/models/ggml-vocab-chameleon.gguf.out +0 -46
  257. package/cpp/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.inp +0 -112
  258. package/cpp/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.out +0 -46
  259. package/cpp/llama.cpp/models/ggml-vocab-gpt-4o.gguf.inp +0 -112
  260. package/cpp/llama.cpp/models/ggml-vocab-gpt-4o.gguf.out +0 -46
  261. package/cpp/llama.cpp/models/ggml-vocab-llama4.gguf.inp +0 -112
  262. package/cpp/llama.cpp/models/ggml-vocab-llama4.gguf.out +0 -46
  263. package/cpp/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +0 -112
  264. package/cpp/llama.cpp/models/ggml-vocab-pixtral.gguf.out +0 -46
  265. package/cpp/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +0 -112
  266. package/cpp/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +0 -46
@@ -0,0 +1,281 @@
1
+ #include "llama-model-saver.h"
2
+
3
+ #include "gguf.h"
4
+
5
+ #include "llama.h"
6
+ #include "llama-hparams.h"
7
+ #include "llama-model.h"
8
+ #include "llama-vocab.h"
9
+
10
+ #include <string>
11
+
12
+ llama_model_saver::llama_model_saver(const struct llama_model & model) : model(model), llm_kv(model.arch) {
13
+ gguf_ctx = gguf_init_empty();
14
+ }
15
+
16
+ llama_model_saver::~llama_model_saver() {
17
+ gguf_free(gguf_ctx);
18
+ }
19
+
20
+ void llama_model_saver::add_kv(const enum llm_kv key, const uint32_t value) {
21
+ gguf_set_val_u32(gguf_ctx, llm_kv(key).c_str(), value);
22
+ }
23
+
24
+ void llama_model_saver::add_kv(const enum llm_kv key, const int32_t value) {
25
+ gguf_set_val_i32(gguf_ctx, llm_kv(key).c_str(), value);
26
+ }
27
+
28
+ void llama_model_saver::add_kv(const enum llm_kv key, const float value) {
29
+ gguf_set_val_f32(gguf_ctx, llm_kv(key).c_str(), value);
30
+ }
31
+
32
+ void llama_model_saver::add_kv(const enum llm_kv key, const bool value) {
33
+ gguf_set_val_bool(gguf_ctx, llm_kv(key).c_str(), value);
34
+ }
35
+
36
+ void llama_model_saver::add_kv(const enum llm_kv key, const char * value) {
37
+ gguf_set_val_str(gguf_ctx, llm_kv(key).c_str(), value);
38
+ }
39
+
40
+ [[noreturn]]
41
+ void llama_model_saver::add_kv(const enum llm_kv key, const char value) {
42
+ GGML_UNUSED(key);
43
+ GGML_UNUSED(value);
44
+ GGML_ABORT("fatal error"); // this should never be called, only needed to make the template below compile
45
+ }
46
+
47
+ template <typename Container>
48
+ void llama_model_saver::add_kv(const enum llm_kv key, const Container & value, const bool per_layer) {
49
+ const size_t n_values = per_layer ? size_t(model.hparams.n_layer) : value.size();
50
+ GGML_ASSERT(n_values <= value.size());
51
+
52
+ if (n_values == 0) {
53
+ return;
54
+ }
55
+
56
+ if (per_layer) {
57
+ bool all_values_the_same = true;
58
+ for (size_t i = 1; i < n_values; ++i) {
59
+ if (value[i] != value[0]) {
60
+ all_values_the_same = false;
61
+ break;
62
+ }
63
+ }
64
+ if (all_values_the_same) {
65
+ add_kv(key, value[0]);
66
+ return;
67
+ }
68
+ }
69
+
70
+ if (std::is_same<typename Container::value_type, uint8_t>::value) {
71
+ gguf_set_arr_data(gguf_ctx, llm_kv(key).c_str(), GGUF_TYPE_UINT8, value.data(), n_values);
72
+ } else if (std::is_same<typename Container::value_type, int8_t>::value) {
73
+ gguf_set_arr_data(gguf_ctx, llm_kv(key).c_str(), GGUF_TYPE_INT8, value.data(), n_values);
74
+ } else if (std::is_same<typename Container::value_type, uint32_t>::value) {
75
+ gguf_set_arr_data(gguf_ctx, llm_kv(key).c_str(), GGUF_TYPE_UINT32, value.data(), n_values);
76
+ } else if (std::is_same<typename Container::value_type, int32_t>::value) {
77
+ gguf_set_arr_data(gguf_ctx, llm_kv(key).c_str(), GGUF_TYPE_INT32, value.data(), n_values);
78
+ } else if (std::is_same<typename Container::value_type, float>::value) {
79
+ gguf_set_arr_data(gguf_ctx, llm_kv(key).c_str(), GGUF_TYPE_FLOAT32, value.data(), n_values);
80
+ } else if (std::is_same<Container, std::string>::value) {
81
+ gguf_set_val_str(gguf_ctx, llm_kv(key).c_str(), reinterpret_cast<const char *>(value.data()));
82
+ } else {
83
+ GGML_ABORT("fatal error");
84
+ }
85
+ }
86
+
87
+ void llama_model_saver::add_kv(const enum llm_kv key, const std::vector<std::string> & value) {
88
+ std::vector<const char *> tmp(value.size());
89
+ for (size_t i = 0; i < value.size(); ++i) {
90
+ tmp[i] = value[i].c_str();
91
+ }
92
+ gguf_set_arr_str(gguf_ctx, llm_kv(key).c_str(), tmp.data(), tmp.size());
93
+ }
94
+
95
+ void llama_model_saver::add_tensor(const struct ggml_tensor * tensor) {
96
+ if (!tensor) {
97
+ return;
98
+ }
99
+ if (gguf_find_tensor(gguf_ctx, tensor->name) >= 0) {
100
+ GGML_ASSERT(std::string(tensor->name) == "rope_freqs.weight"); // FIXME
101
+ return;
102
+ }
103
+ gguf_add_tensor(gguf_ctx, tensor);
104
+ }
105
+
106
+ void llama_model_saver::add_kv_from_model() {
107
+ const llama_hparams & hparams = model.hparams;
108
+ const llama_vocab & vocab = model.vocab;
109
+
110
+ const int32_t n_vocab = vocab.n_tokens();
111
+ std::vector<std::string> tokens(n_vocab);
112
+ std::vector<float> scores(n_vocab);
113
+ std::vector<int32_t> token_types(n_vocab);
114
+
115
+ for (int32_t id = 0; id < n_vocab; ++id) {
116
+ const llama_vocab::token_data & token_data = vocab.get_token_data(id);
117
+
118
+ tokens[id] = token_data.text;
119
+ scores[id] = token_data.score;
120
+
121
+ switch(token_data.attr) {
122
+ case LLAMA_TOKEN_ATTR_UNKNOWN: token_types[id] = LLAMA_TOKEN_TYPE_UNKNOWN; break;
123
+ case LLAMA_TOKEN_ATTR_UNUSED: token_types[id] = LLAMA_TOKEN_TYPE_UNUSED; break;
124
+ case LLAMA_TOKEN_ATTR_NORMAL: token_types[id] = LLAMA_TOKEN_TYPE_NORMAL; break;
125
+ case LLAMA_TOKEN_ATTR_CONTROL: token_types[id] = LLAMA_TOKEN_TYPE_CONTROL; break;
126
+ case LLAMA_TOKEN_ATTR_USER_DEFINED: token_types[id] = LLAMA_TOKEN_TYPE_USER_DEFINED; break;
127
+ case LLAMA_TOKEN_ATTR_BYTE: token_types[id] = LLAMA_TOKEN_TYPE_BYTE; break;
128
+ case LLAMA_TOKEN_ATTR_UNDEFINED:
129
+ default: token_types[id] = LLAMA_TOKEN_TYPE_UNDEFINED; break;
130
+ }
131
+ }
132
+
133
+ // add_kv(LLM_KV_GENERAL_TYPE, ???);
134
+ add_kv(LLM_KV_GENERAL_ARCHITECTURE, model.arch_name());
135
+ // add_kv(LLM_KV_GENERAL_QUANTIZATION_VERSION, ???);
136
+ // add_kv(LLM_KV_GENERAL_ALIGNMENT, ???);
137
+ add_kv(LLM_KV_GENERAL_NAME, model.name);
138
+ // add_kv(LLM_KV_GENERAL_AUTHOR, ???);
139
+ // add_kv(LLM_KV_GENERAL_VERSION, ???);
140
+ // add_kv(LLM_KV_GENERAL_URL, ???);
141
+ // add_kv(LLM_KV_GENERAL_DESCRIPTION, ???);
142
+ // add_kv(LLM_KV_GENERAL_LICENSE, ???);
143
+ // add_kv(LLM_KV_GENERAL_SOURCE_URL, ???);
144
+ // add_kv(LLM_KV_GENERAL_SOURCE_HF_REPO, ???);
145
+
146
+ add_kv(LLM_KV_VOCAB_SIZE, vocab.n_tokens());
147
+ add_kv(LLM_KV_CONTEXT_LENGTH, hparams.n_ctx_train);
148
+ add_kv(LLM_KV_EMBEDDING_LENGTH, hparams.n_embd);
149
+ add_kv(LLM_KV_BLOCK_COUNT, hparams.n_layer);
150
+ add_kv(LLM_KV_LEADING_DENSE_BLOCK_COUNT, hparams.n_layer_dense_lead);
151
+ add_kv(LLM_KV_FEED_FORWARD_LENGTH, hparams.n_ff_arr, true);
152
+ add_kv(LLM_KV_EXPERT_FEED_FORWARD_LENGTH, hparams.n_ff_exp);
153
+ add_kv(LLM_KV_EXPERT_SHARED_FEED_FORWARD_LENGTH, hparams.n_ff_exp);
154
+ add_kv(LLM_KV_USE_PARALLEL_RESIDUAL, hparams.use_par_res);
155
+ // add_kv(LLM_KV_TENSOR_DATA_LAYOUT, ???);
156
+ add_kv(LLM_KV_EXPERT_COUNT, hparams.n_expert);
157
+ add_kv(LLM_KV_EXPERT_USED_COUNT, hparams.n_expert_used);
158
+ add_kv(LLM_KV_EXPERT_SHARED_COUNT, hparams.n_expert_shared);
159
+ add_kv(LLM_KV_EXPERT_WEIGHTS_SCALE, hparams.expert_weights_scale);
160
+ add_kv(LLM_KV_POOLING_TYPE, uint32_t(hparams.pooling_type));
161
+ add_kv(LLM_KV_LOGIT_SCALE, hparams.f_logit_scale);
162
+ add_kv(LLM_KV_DECODER_START_TOKEN_ID, hparams.dec_start_token_id);
163
+ add_kv(LLM_KV_ATTN_LOGIT_SOFTCAPPING, hparams.f_attn_logit_softcapping);
164
+ add_kv(LLM_KV_FINAL_LOGIT_SOFTCAPPING, hparams.f_final_logit_softcapping);
165
+ add_kv(LLM_KV_SWIN_NORM, hparams.swin_norm);
166
+ add_kv(LLM_KV_RESCALE_EVERY_N_LAYERS, hparams.rescale_every_n_layers);
167
+ add_kv(LLM_KV_TIME_MIX_EXTRA_DIM, hparams.time_mix_extra_dim);
168
+ add_kv(LLM_KV_TIME_DECAY_EXTRA_DIM, hparams.time_decay_extra_dim);
169
+ add_kv(LLM_KV_RESIDUAL_SCALE, hparams.f_residual_scale);
170
+ add_kv(LLM_KV_EMBEDDING_SCALE, hparams.f_embedding_scale);
171
+
172
+ add_kv(LLM_KV_ATTENTION_HEAD_COUNT, hparams.n_head_arr, true);
173
+ add_kv(LLM_KV_ATTENTION_HEAD_COUNT_KV, hparams.n_head_kv_arr, true);
174
+ add_kv(LLM_KV_ATTENTION_MAX_ALIBI_BIAS, hparams.f_max_alibi_bias);
175
+ add_kv(LLM_KV_ATTENTION_CLAMP_KQV, hparams.f_clamp_kqv);
176
+ add_kv(LLM_KV_ATTENTION_KEY_LENGTH, hparams.n_embd_head_k);
177
+ add_kv(LLM_KV_ATTENTION_VALUE_LENGTH, hparams.n_embd_head_v);
178
+ add_kv(LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps);
179
+ add_kv(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
180
+ add_kv(LLM_KV_ATTENTION_CAUSAL, hparams.causal_attn);
181
+ add_kv(LLM_KV_ATTENTION_Q_LORA_RANK, hparams.n_lora_q);
182
+ add_kv(LLM_KV_ATTENTION_KV_LORA_RANK, hparams.n_lora_kv);
183
+ add_kv(LLM_KV_ATTENTION_RELATIVE_BUCKETS_COUNT, hparams.n_rel_attn_bkts);
184
+ add_kv(LLM_KV_ATTENTION_SLIDING_WINDOW, hparams.n_swa);
185
+ add_kv(LLM_KV_ATTENTION_SCALE, hparams.f_attention_scale);
186
+
187
+ const float rope_scaling_factor = hparams.rope_freq_scale_train == 1.0f ? 0.0f : 1.0f/hparams.rope_freq_scale_train;
188
+
189
+ add_kv(LLM_KV_ROPE_DIMENSION_COUNT, hparams.n_rot);
190
+ add_kv(LLM_KV_ROPE_FREQ_BASE, hparams.rope_freq_base_train);
191
+ // add_kv(LLM_KV_ROPE_SCALE_LINEAR, rope_scaling_factor); // old name
192
+ add_kv(LLM_KV_ROPE_SCALING_TYPE, llama_rope_scaling_type_name(hparams.rope_scaling_type_train));
193
+ add_kv(LLM_KV_ROPE_SCALING_FACTOR, rope_scaling_factor);
194
+ add_kv(LLM_KV_ROPE_SCALING_ATTN_FACTOR, hparams.rope_attn_factor);
195
+ add_kv(LLM_KV_ROPE_SCALING_ORIG_CTX_LEN, hparams.n_ctx_orig_yarn);
196
+ add_kv(LLM_KV_ROPE_SCALING_FINETUNED, hparams.rope_finetuned);
197
+ add_kv(LLM_KV_ROPE_SCALING_YARN_LOG_MUL, hparams.rope_yarn_log_mul);
198
+
199
+ // TODO: implement split file support
200
+ // add_kv(LLM_KV_SPLIT_NO, ???);
201
+ // add_kv(LLM_KV_SPLIT_COUNT, ???);
202
+ // add_kv(LLM_KV_SPLIT_TENSORS_COUNT, ???);
203
+
204
+ add_kv(LLM_KV_SSM_INNER_SIZE, hparams.ssm_d_inner);
205
+ add_kv(LLM_KV_SSM_CONV_KERNEL, hparams.ssm_d_conv);
206
+ add_kv(LLM_KV_SSM_STATE_SIZE, hparams.ssm_d_state);
207
+ add_kv(LLM_KV_SSM_TIME_STEP_RANK, hparams.ssm_dt_rank);
208
+ add_kv(LLM_KV_SSM_DT_B_C_RMS, hparams.ssm_dt_b_c_rms);
209
+
210
+ add_kv(LLM_KV_WKV_HEAD_SIZE, hparams.wkv_head_size);
211
+
212
+ add_kv(LLM_KV_TOKENIZER_MODEL, vocab.get_tokenizer_model());
213
+ add_kv(LLM_KV_TOKENIZER_PRE, vocab.get_tokenizer_pre());
214
+ add_kv(LLM_KV_TOKENIZER_LIST, tokens);
215
+ add_kv(LLM_KV_TOKENIZER_TOKEN_TYPE, token_types);
216
+ add_kv(LLM_KV_TOKENIZER_TOKEN_TYPE_COUNT, vocab.n_token_types());
217
+ add_kv(LLM_KV_TOKENIZER_SCORES, scores);
218
+ add_kv(LLM_KV_TOKENIZER_MERGES, vocab.get_bpe_merges());
219
+ // FIXME llama_token is type i32 but when reading in a GGUF file u32 is expected, not an issue for writing though
220
+ add_kv(LLM_KV_TOKENIZER_BOS_ID, uint32_t(vocab.token_bos()));
221
+ add_kv(LLM_KV_TOKENIZER_EOS_ID, uint32_t(vocab.token_eos()));
222
+ add_kv(LLM_KV_TOKENIZER_EOT_ID, uint32_t(vocab.token_eot()));
223
+ add_kv(LLM_KV_TOKENIZER_EOM_ID, uint32_t(vocab.token_eom()));
224
+ add_kv(LLM_KV_TOKENIZER_UNK_ID, uint32_t(vocab.token_unk()));
225
+ add_kv(LLM_KV_TOKENIZER_SEP_ID, uint32_t(vocab.token_sep()));
226
+ add_kv(LLM_KV_TOKENIZER_PAD_ID, uint32_t(vocab.token_pad()));
227
+ // add_kv(LLM_KV_TOKENIZER_CLS_ID, uint32_t(vocab.token_bos())); // deprecated
228
+ // add_kv(LLM_KV_TOKENIZER_MASK_ID, ???);
229
+ add_kv(LLM_KV_TOKENIZER_ADD_BOS, vocab.get_add_bos());
230
+ add_kv(LLM_KV_TOKENIZER_ADD_EOS, vocab.get_add_eos());
231
+ add_kv(LLM_KV_TOKENIZER_ADD_PREFIX, vocab.get_add_space_prefix());
232
+ add_kv(LLM_KV_TOKENIZER_REMOVE_EXTRA_WS, vocab.get_remove_extra_whitespaces());
233
+ add_kv(LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP, vocab.get_precompiled_charsmap());
234
+ // add_kv(LLM_KV_TOKENIZER_HF_JSON, ???);
235
+ // add_kv(LLM_KV_TOKENIZER_RWKV, ???);
236
+ add_kv(LLM_KV_TOKENIZER_FIM_PRE_ID, uint32_t(vocab.token_fim_pre()));
237
+ add_kv(LLM_KV_TOKENIZER_FIM_SUF_ID, uint32_t(vocab.token_fim_suf()));
238
+ add_kv(LLM_KV_TOKENIZER_FIM_MID_ID, uint32_t(vocab.token_fim_mid()));
239
+ add_kv(LLM_KV_TOKENIZER_FIM_PAD_ID, uint32_t(vocab.token_fim_pad()));
240
+ add_kv(LLM_KV_TOKENIZER_FIM_REP_ID, uint32_t(vocab.token_fim_rep()));
241
+ add_kv(LLM_KV_TOKENIZER_FIM_SEP_ID, uint32_t(vocab.token_fim_sep()));
242
+
243
+ // TODO: implement LoRA support
244
+ // add_kv(LLM_KV_ADAPTER_TYPE, ???);
245
+ // add_kv(LLM_KV_ADAPTER_LORA_ALPHA, ???);
246
+
247
+ // deprecated
248
+ // add_kv(LLM_KV_TOKENIZER_PREFIX_ID, ???);
249
+ // add_kv(LLM_KV_TOKENIZER_SUFFIX_ID, ???);
250
+ // add_kv(LLM_KV_TOKENIZER_MIDDLE_ID, ???);
251
+ }
252
+
253
+ void llama_model_saver::add_tensors_from_model() {
254
+ if (std::string(model.output->name) != std::string(model.tok_embd->name)) {
255
+ add_tensor(model.tok_embd); // some models use the same tensor for tok_embd and output
256
+ }
257
+ add_tensor(model.type_embd);
258
+ add_tensor(model.pos_embd);
259
+ add_tensor(model.tok_norm);
260
+ add_tensor(model.tok_norm_b);
261
+ add_tensor(model.output_norm);
262
+ add_tensor(model.output_norm_b);
263
+ add_tensor(model.output);
264
+ add_tensor(model.output_b);
265
+ add_tensor(model.output_norm_enc);
266
+ add_tensor(model.cls);
267
+ add_tensor(model.cls_b);
268
+ add_tensor(model.cls_out);
269
+ add_tensor(model.cls_out_b);
270
+
271
+ for (const struct llama_layer & layer : model.layers) {
272
+ for (size_t i = 0; i < sizeof(layer)/sizeof(struct ggml_tensor *); ++i) {
273
+ add_tensor(reinterpret_cast<const struct ggml_tensor * const *>(&layer)[i]);
274
+ }
275
+ }
276
+ }
277
+
278
+ void llama_model_saver::save(const std::string & path_model) {
279
+ gguf_write_to_file(gguf_ctx, path_model.c_str(), false);
280
+ }
281
+
@@ -0,0 +1,37 @@
1
+ #pragma once
2
+
3
+ #include "llama.h"
4
+ #include "llama-arch.h"
5
+
6
+ #include <vector>
7
+
8
+ struct llama_model_saver {
9
+ struct gguf_context * gguf_ctx = nullptr;
10
+ const struct llama_model & model;
11
+ const struct LLM_KV llm_kv;
12
+
13
+ llama_model_saver(const struct llama_model & model);
14
+ ~llama_model_saver();
15
+
16
+ void add_kv(enum llm_kv key, uint32_t value);
17
+ void add_kv(enum llm_kv key, int32_t value);
18
+ void add_kv(enum llm_kv key, float value);
19
+ void add_kv(enum llm_kv key, bool value);
20
+ void add_kv(enum llm_kv key, const char * value);
21
+
22
+ [[noreturn]]
23
+ void add_kv(enum llm_kv key, char value); // needed to make the template below compile
24
+
25
+ template <typename Container>
26
+ void add_kv(enum llm_kv key, const Container & value, bool per_layer = false);
27
+
28
+ void add_kv(enum llm_kv key, const std::vector<std::string> & value);
29
+
30
+ void add_tensor(const struct ggml_tensor * tensor);
31
+
32
+ void add_kv_from_model();
33
+
34
+ void add_tensors_from_model();
35
+
36
+ void save(const std::string & path_model);
37
+ };