@novastera-oss/llamarn 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (266) hide show
  1. package/README.md +80 -14
  2. package/RNLlamaCpp.podspec +10 -3
  3. package/android/CMakeLists.txt +8 -0
  4. package/android/src/main/cpp/include/llama.h +62 -125
  5. package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
  6. package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
  7. package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
  8. package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
  9. package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
  10. package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
  11. package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
  12. package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
  13. package/cpp/build-info.cpp +2 -2
  14. package/cpp/llama.cpp/README.md +11 -3
  15. package/cpp/llama.cpp/build-xcframework.sh +1 -0
  16. package/cpp/llama.cpp/common/CMakeLists.txt +8 -2
  17. package/cpp/llama.cpp/common/arg.cpp +153 -113
  18. package/cpp/llama.cpp/common/chat-parser.cpp +379 -0
  19. package/cpp/llama.cpp/common/chat-parser.h +117 -0
  20. package/cpp/llama.cpp/common/chat.cpp +847 -699
  21. package/cpp/llama.cpp/common/chat.h +73 -6
  22. package/cpp/llama.cpp/common/common.cpp +50 -82
  23. package/cpp/llama.cpp/common/common.h +21 -17
  24. package/cpp/llama.cpp/common/json-partial.cpp +255 -0
  25. package/cpp/llama.cpp/common/json-partial.h +37 -0
  26. package/cpp/llama.cpp/common/minja/chat-template.hpp +9 -5
  27. package/cpp/llama.cpp/common/minja/minja.hpp +69 -36
  28. package/cpp/llama.cpp/common/regex-partial.cpp +204 -0
  29. package/cpp/llama.cpp/common/regex-partial.h +56 -0
  30. package/cpp/llama.cpp/common/sampling.cpp +7 -8
  31. package/cpp/llama.cpp/convert_hf_to_gguf.py +453 -118
  32. package/cpp/llama.cpp/convert_hf_to_gguf_update.py +120 -68
  33. package/cpp/llama.cpp/ggml/CMakeLists.txt +2 -1
  34. package/cpp/llama.cpp/ggml/cmake/common.cmake +25 -0
  35. package/cpp/llama.cpp/ggml/include/ggml-opt.h +49 -28
  36. package/cpp/llama.cpp/ggml/include/ggml.h +26 -7
  37. package/cpp/llama.cpp/ggml/src/CMakeLists.txt +16 -10
  38. package/cpp/llama.cpp/ggml/src/ggml-backend.cpp +4 -1
  39. package/cpp/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +1 -0
  40. package/cpp/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +2 -0
  41. package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +604 -0
  42. package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +42 -0
  43. package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +54 -2
  44. package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +50 -51
  45. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +2 -2
  46. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +5 -9
  47. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +779 -19
  48. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +22 -0
  49. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +88 -5
  50. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +47 -12
  51. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +264 -69
  52. package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +322 -100
  53. package/cpp/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +117 -1
  54. package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.cpp +85 -16
  55. package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.h +220 -49
  56. package/cpp/llama.cpp/ggml/src/ggml-cuda/acc.cu +40 -26
  57. package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +1 -1
  58. package/cpp/llama.cpp/ggml/src/ggml-cuda/cpy.cu +11 -1
  59. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-common.cuh +15 -7
  60. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-mma-f16.cuh +266 -64
  61. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f16.cuh +49 -4
  62. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f32.cuh +48 -4
  63. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn.cu +2 -1
  64. package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +5 -1
  65. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cu +2 -0
  66. package/cpp/llama.cpp/ggml/src/ggml-cuda/quantize.cu +7 -6
  67. package/cpp/llama.cpp/ggml/src/ggml-cuda/sum.cu +1 -1
  68. package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cu +10 -0
  69. package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cuh +2 -0
  70. package/cpp/llama.cpp/ggml/src/ggml-impl.h +1 -1
  71. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +4 -0
  72. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +99 -17
  73. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +200 -2
  74. package/cpp/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +8 -2
  75. package/cpp/llama.cpp/ggml/src/ggml-musa/mudnn.cu +112 -0
  76. package/cpp/llama.cpp/ggml/src/ggml-musa/mudnn.cuh +12 -0
  77. package/cpp/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +6 -0
  78. package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +972 -178
  79. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/argsort.cl +86 -0
  80. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/div.cl +72 -0
  81. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/group_norm.cl +72 -0
  82. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/sigmoid.cl +29 -0
  83. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/sub.cl +72 -0
  84. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/sum_rows.cl +39 -0
  85. package/cpp/llama.cpp/ggml/src/ggml-opt.cpp +373 -190
  86. package/cpp/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +29 -23
  87. package/cpp/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +5 -10
  88. package/cpp/llama.cpp/ggml/src/ggml-sycl/common.hpp +101 -5
  89. package/cpp/llama.cpp/ggml/src/ggml-sycl/concat.cpp +31 -33
  90. package/cpp/llama.cpp/ggml/src/ggml-sycl/conv.cpp +1 -0
  91. package/cpp/llama.cpp/ggml/src/ggml-sycl/convert.cpp +29 -2
  92. package/cpp/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +4 -5
  93. package/cpp/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +59 -21
  94. package/cpp/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +9 -1
  95. package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +84 -72
  96. package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +2 -0
  97. package/cpp/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +37 -8
  98. package/cpp/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +1 -3
  99. package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +324 -129
  100. package/cpp/llama.cpp/ggml/src/ggml-sycl/gla.cpp +1 -0
  101. package/cpp/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +31 -2
  102. package/cpp/llama.cpp/ggml/src/ggml-sycl/norm.cpp +95 -68
  103. package/cpp/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +1 -0
  104. package/cpp/llama.cpp/ggml/src/ggml-sycl/quants.hpp +22 -0
  105. package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +1 -2
  106. package/cpp/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +1 -4
  107. package/cpp/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +2 -3
  108. package/cpp/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +69 -43
  109. package/cpp/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +2 -14
  110. package/cpp/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +81 -91
  111. package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +432 -181
  112. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +17 -0
  113. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp +1 -1
  114. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +6 -152
  115. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp +162 -0
  116. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +360 -0
  117. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +2 -118
  118. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +1 -1
  119. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +12 -1
  120. package/cpp/llama.cpp/ggml/src/ggml.c +107 -36
  121. package/cpp/llama.cpp/ggml/src/gguf.cpp +33 -33
  122. package/cpp/llama.cpp/gguf-py/gguf/constants.py +100 -15
  123. package/cpp/llama.cpp/gguf-py/gguf/gguf_reader.py +1 -1
  124. package/cpp/llama.cpp/gguf-py/gguf/gguf_writer.py +44 -12
  125. package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_editor_gui.py +21 -10
  126. package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_new_metadata.py +5 -2
  127. package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +128 -31
  128. package/cpp/llama.cpp/gguf-py/gguf/utility.py +1 -1
  129. package/cpp/llama.cpp/gguf-py/pyproject.toml +1 -1
  130. package/cpp/llama.cpp/include/llama.h +62 -125
  131. package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf.inp +1 -1
  132. package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf.out +1 -1
  133. package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf.inp +1 -1
  134. package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf.out +1 -1
  135. package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.inp +1 -1
  136. package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.out +1 -1
  137. package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.inp +1 -1
  138. package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.out +1 -1
  139. package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf.inp +1 -1
  140. package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf.out +1 -1
  141. package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf.inp +1 -1
  142. package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf.out +1 -1
  143. package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf.inp +1 -1
  144. package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf.out +1 -1
  145. package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf.inp +1 -1
  146. package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf.out +1 -1
  147. package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf.inp +1 -1
  148. package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf.out +1 -1
  149. package/cpp/llama.cpp/models/ggml-vocab-nomic-bert-moe.gguf +0 -0
  150. package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf.inp +1 -1
  151. package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf.out +1 -1
  152. package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf.inp +1 -1
  153. package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf.out +1 -1
  154. package/cpp/llama.cpp/models/ggml-vocab-refact.gguf.inp +1 -1
  155. package/cpp/llama.cpp/models/ggml-vocab-refact.gguf.out +1 -1
  156. package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf.inp +1 -1
  157. package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf.out +1 -1
  158. package/cpp/llama.cpp/models/templates/Qwen-QwQ-32B.jinja +62 -0
  159. package/cpp/llama.cpp/models/templates/Qwen-Qwen3-0.6B.jinja +85 -0
  160. package/cpp/llama.cpp/models/templates/README.md +2 -0
  161. package/cpp/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +5 -1
  162. package/cpp/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +5 -1
  163. package/cpp/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +2 -0
  164. package/cpp/llama.cpp/requirements/requirements-gguf_editor_gui.txt +1 -1
  165. package/cpp/llama.cpp/src/CMakeLists.txt +2 -0
  166. package/cpp/llama.cpp/src/llama-arch.cpp +6 -0
  167. package/cpp/llama.cpp/src/llama-arch.h +2 -0
  168. package/cpp/llama.cpp/src/llama-batch.cpp +3 -1
  169. package/cpp/llama.cpp/src/llama-context.cpp +340 -123
  170. package/cpp/llama.cpp/src/llama-context.h +30 -0
  171. package/cpp/llama.cpp/src/llama-cparams.cpp +4 -0
  172. package/cpp/llama.cpp/src/llama-cparams.h +2 -0
  173. package/cpp/llama.cpp/src/llama-grammar.cpp +12 -2
  174. package/cpp/llama.cpp/src/llama-graph.cpp +157 -247
  175. package/cpp/llama.cpp/src/llama-graph.h +52 -7
  176. package/cpp/llama.cpp/src/llama-hparams.cpp +17 -1
  177. package/cpp/llama.cpp/src/llama-hparams.h +37 -5
  178. package/cpp/llama.cpp/src/llama-kv-cache.cpp +742 -481
  179. package/cpp/llama.cpp/src/llama-kv-cache.h +196 -99
  180. package/cpp/llama.cpp/src/llama-kv-cells.h +379 -0
  181. package/cpp/llama.cpp/src/llama-memory.h +4 -3
  182. package/cpp/llama.cpp/src/llama-model-loader.cpp +22 -17
  183. package/cpp/llama.cpp/src/llama-model-saver.cpp +281 -0
  184. package/cpp/llama.cpp/src/llama-model-saver.h +37 -0
  185. package/cpp/llama.cpp/src/llama-model.cpp +529 -172
  186. package/cpp/llama.cpp/src/llama-model.h +6 -1
  187. package/cpp/llama.cpp/src/llama-quant.cpp +15 -13
  188. package/cpp/llama.cpp/src/llama-sampling.cpp +2 -2
  189. package/cpp/llama.cpp/src/llama-vocab.cpp +35 -8
  190. package/cpp/llama.cpp/src/llama-vocab.h +6 -0
  191. package/cpp/llama.cpp/src/llama.cpp +14 -0
  192. package/cpp/rn-completion.cpp +4 -2
  193. package/ios/include/chat.h +73 -6
  194. package/ios/include/common/minja/chat-template.hpp +9 -5
  195. package/ios/include/common/minja/minja.hpp +69 -36
  196. package/ios/include/common.h +21 -17
  197. package/ios/include/llama.h +62 -125
  198. package/ios/libs/llama.xcframework/Info.plist +19 -19
  199. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  200. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4617 -4487
  201. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-opt.h +237 -0
  202. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +26 -7
  203. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +62 -125
  204. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
  205. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  206. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4638 -4508
  207. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3557 -3435
  208. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +237 -0
  209. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +26 -7
  210. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +62 -125
  211. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
  212. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  213. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4638 -4508
  214. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3559 -3437
  215. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-opt.h +237 -0
  216. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +26 -7
  217. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +62 -125
  218. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-opt.h +237 -0
  219. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +26 -7
  220. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +62 -125
  221. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
  222. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-opt.h +237 -0
  223. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +26 -7
  224. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +62 -125
  225. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
  226. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
  227. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  228. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4616 -4487
  229. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-opt.h +237 -0
  230. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +26 -7
  231. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +62 -125
  232. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
  233. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  234. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4637 -4508
  235. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3556 -3435
  236. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +237 -0
  237. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +26 -7
  238. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +62 -125
  239. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
  240. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  241. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4653 -4523
  242. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-opt.h +237 -0
  243. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +26 -7
  244. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +62 -125
  245. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
  246. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  247. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4674 -4544
  248. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3587 -3465
  249. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +237 -0
  250. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +26 -7
  251. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +62 -125
  252. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
  253. package/package.json +1 -1
  254. package/cpp/llama.cpp/common/stb_image.h +0 -7988
  255. package/cpp/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +0 -112
  256. package/cpp/llama.cpp/models/ggml-vocab-chameleon.gguf.out +0 -46
  257. package/cpp/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.inp +0 -112
  258. package/cpp/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.out +0 -46
  259. package/cpp/llama.cpp/models/ggml-vocab-gpt-4o.gguf.inp +0 -112
  260. package/cpp/llama.cpp/models/ggml-vocab-gpt-4o.gguf.out +0 -46
  261. package/cpp/llama.cpp/models/ggml-vocab-llama4.gguf.inp +0 -112
  262. package/cpp/llama.cpp/models/ggml-vocab-llama4.gguf.out +0 -46
  263. package/cpp/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +0 -112
  264. package/cpp/llama.cpp/models/ggml-vocab-pixtral.gguf.out +0 -46
  265. package/cpp/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +0 -112
  266. package/cpp/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +0 -46
@@ -177,6 +177,9 @@ class Keys:
177
177
  EMBEDDING_LENGTH = "{arch}.convnext.embedding_length"
178
178
  BLOCK_COUNT = "{arch}.convnext.block_count"
179
179
 
180
+ class Classifier:
181
+ OUTPUT_LABELS = "{arch}.classifier.output_labels"
182
+
180
183
  class Tokenizer:
181
184
  MODEL = "tokenizer.ggml.model"
182
185
  PRE = "tokenizer.ggml.pre"
@@ -219,10 +222,13 @@ class Keys:
219
222
  TYPE = "adapter.type"
220
223
  LORA_ALPHA = "adapter.lora.alpha"
221
224
 
222
- class ClipVision:
225
+ class Clip:
223
226
  PROJECTOR_TYPE = "clip.projector_type"
224
227
  HAS_VISION_ENCODER = "clip.has_vision_encoder"
228
+ HAS_AUDIO_ENCODER = "clip.has_audio_encoder"
225
229
  HAS_LLAVA_PROJECTOR = "clip.has_llava_projector"
230
+
231
+ class ClipVision:
226
232
  IMAGE_SIZE = "clip.vision.image_size"
227
233
  PATCH_SIZE = "clip.vision.patch_size"
228
234
  EMBEDDING_LENGTH = "clip.vision.embedding_length"
@@ -243,19 +249,33 @@ class Keys:
243
249
  class Projector:
244
250
  SCALE_FACTOR = "clip.vision.projector.scale_factor"
245
251
 
252
+ class ClipAudio:
253
+ NUM_MEL_BINS = "clip.audio.num_mel_bins"
254
+ EMBEDDING_LENGTH = "clip.audio.embedding_length"
255
+ FEED_FORWARD_LENGTH = "clip.audio.feed_forward_length"
256
+ PROJECTION_DIM = "clip.audio.projection_dim"
257
+ BLOCK_COUNT = "clip.audio.block_count"
258
+
259
+ class Attention:
260
+ HEAD_COUNT = "clip.audio.attention.head_count"
261
+ LAYERNORM_EPS = "clip.audio.attention.layer_norm_epsilon"
262
+
263
+ class Projector:
264
+ STACK_FACTOR = "clip.audio.projector.stack_factor"
265
+
246
266
  #
247
267
  # recommended mapping of model tensor names for storage in gguf
248
268
  #
249
269
 
250
270
 
251
271
  class GGUFType:
252
- MODEL = "model"
253
- ADAPTER = "adapter"
254
- CLIP_VISION = "clip-vision"
272
+ MODEL = "model"
273
+ ADAPTER = "adapter"
274
+ MMPROJ = "mmproj" # dummy, unused for now
255
275
 
256
276
 
257
277
  class MODEL_ARCH(IntEnum):
258
- CLIP_VISION = auto() # dummy arch for clip.cpp
278
+ MMPROJ = auto() # dummy arch for clip.cpp
259
279
  LLAMA = auto()
260
280
  LLAMA4 = auto()
261
281
  DECI = auto()
@@ -482,14 +502,15 @@ class MODEL_TENSOR(IntEnum):
482
502
  V_ENC_EMBD_CLS = auto()
483
503
  V_ENC_EMBD_PATCH = auto()
484
504
  V_ENC_EMBD_POS = auto()
505
+ V_ENC_INPUT_NORM = auto()
485
506
  V_ENC_ATTN_Q = auto()
486
507
  V_ENC_ATTN_Q_NORM = auto()
487
508
  V_ENC_ATTN_K = auto()
488
509
  V_ENC_ATTN_K_NORM = auto()
489
510
  V_ENC_ATTN_V = auto()
490
- V_ENC_INPUT_NORM = auto()
491
- V_ENC_OUTPUT = auto()
492
- V_ENC_OUTPUT_NORM = auto()
511
+ V_ENC_ATTN_O = auto()
512
+ V_ENC_ATTN_O_NORM = auto()
513
+ V_ENC_POST_ATTN_NORM = auto()
493
514
  V_ENC_FFN_UP = auto()
494
515
  V_ENC_FFN_GATE = auto()
495
516
  V_ENC_FFN_DOWN = auto()
@@ -513,10 +534,28 @@ class MODEL_TENSOR(IntEnum):
513
534
  V_RESMPL_QUERY = auto() # minicpmv
514
535
  V_TOK_EMBD_IMG_BREAK = auto() # pixtral
515
536
  V_MM_PATCH_MERGER = auto() # mistral small 3.1
537
+ # audio (mtmd)
538
+ A_ENC_EMBD_POS = auto()
539
+ A_ENC_CONV1D = auto()
540
+ A_PRE_NORM = auto()
541
+ A_POST_NORM = auto()
542
+ A_ENC_ATTN_Q = auto()
543
+ A_ENC_ATTN_K = auto()
544
+ A_ENC_ATTN_V = auto()
545
+ A_ENC_INPUT_NORM = auto()
546
+ A_ENC_OUTPUT = auto()
547
+ A_ENC_OUTPUT_NORM = auto()
548
+ A_ENC_FFN_UP = auto()
549
+ A_ENC_FFN_GATE = auto()
550
+ A_ENC_FFN_DOWN = auto()
551
+ A_MMPROJ = auto()
552
+ A_MMPROJ_FC = auto()
553
+ A_MM_NORM_PRE = auto()
554
+ A_MM_NORM_MID = auto()
516
555
 
517
556
 
518
557
  MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
519
- MODEL_ARCH.CLIP_VISION: "clip", # dummy arch for clip.cpp
558
+ MODEL_ARCH.MMPROJ: "clip", # dummy arch for clip.cpp
520
559
  MODEL_ARCH.LLAMA: "llama",
521
560
  MODEL_ARCH.LLAMA4: "llama4",
522
561
  MODEL_ARCH.DECI: "deci",
@@ -749,8 +788,9 @@ TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
749
788
  MODEL_TENSOR.V_ENC_ATTN_K_NORM: "v.blk.{bid}.attn_k_norm",
750
789
  MODEL_TENSOR.V_ENC_ATTN_V: "v.blk.{bid}.attn_v",
751
790
  MODEL_TENSOR.V_ENC_INPUT_NORM: "v.blk.{bid}.ln1",
752
- MODEL_TENSOR.V_ENC_OUTPUT: "v.blk.{bid}.attn_out",
753
- MODEL_TENSOR.V_ENC_OUTPUT_NORM: "v.blk.{bid}.ln2",
791
+ MODEL_TENSOR.V_ENC_ATTN_O: "v.blk.{bid}.attn_out",
792
+ MODEL_TENSOR.V_ENC_ATTN_O_NORM: "v.blk.{bid}.attn_out_norm",
793
+ MODEL_TENSOR.V_ENC_POST_ATTN_NORM: "v.blk.{bid}.ln2",
754
794
  MODEL_TENSOR.V_ENC_FFN_UP: "v.blk.{bid}.ffn_up",
755
795
  MODEL_TENSOR.V_ENC_FFN_GATE: "v.blk.{bid}.ffn_gate",
756
796
  MODEL_TENSOR.V_ENC_FFN_DOWN: "v.blk.{bid}.ffn_down",
@@ -774,10 +814,28 @@ TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
774
814
  MODEL_TENSOR.V_RESMPL_QUERY: "resampler.query",
775
815
  MODEL_TENSOR.V_TOK_EMBD_IMG_BREAK: "v.token_embd.img_break", # pixtral
776
816
  MODEL_TENSOR.V_MM_PATCH_MERGER: "mm.patch_merger", # mistral small 3.1
817
+ # audio (mtmd)
818
+ MODEL_TENSOR.A_ENC_EMBD_POS: "a.position_embd",
819
+ MODEL_TENSOR.A_ENC_CONV1D: "a.conv1d.{bid}",
820
+ MODEL_TENSOR.A_PRE_NORM: "a.pre_ln",
821
+ MODEL_TENSOR.A_POST_NORM: "a.post_ln",
822
+ MODEL_TENSOR.A_ENC_ATTN_Q: "a.blk.{bid}.attn_q",
823
+ MODEL_TENSOR.A_ENC_ATTN_K: "a.blk.{bid}.attn_k",
824
+ MODEL_TENSOR.A_ENC_ATTN_V: "a.blk.{bid}.attn_v",
825
+ MODEL_TENSOR.A_ENC_INPUT_NORM: "a.blk.{bid}.ln1",
826
+ MODEL_TENSOR.A_ENC_OUTPUT: "a.blk.{bid}.attn_out",
827
+ MODEL_TENSOR.A_ENC_OUTPUT_NORM: "a.blk.{bid}.ln2",
828
+ MODEL_TENSOR.A_ENC_FFN_UP: "a.blk.{bid}.ffn_up",
829
+ MODEL_TENSOR.A_ENC_FFN_GATE: "a.blk.{bid}.ffn_gate",
830
+ MODEL_TENSOR.A_ENC_FFN_DOWN: "a.blk.{bid}.ffn_down",
831
+ MODEL_TENSOR.A_MMPROJ: "mm.a.mlp.{bid}",
832
+ MODEL_TENSOR.A_MMPROJ_FC: "mm.a.fc",
833
+ MODEL_TENSOR.A_MM_NORM_PRE: "mm.a.norm_pre",
834
+ MODEL_TENSOR.A_MM_NORM_MID: "mm.a.norm_mid",
777
835
  }
778
836
 
779
837
  MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
780
- MODEL_ARCH.CLIP_VISION: [
838
+ MODEL_ARCH.MMPROJ: [
781
839
  MODEL_TENSOR.V_MMPROJ,
782
840
  MODEL_TENSOR.V_MMPROJ_FC,
783
841
  MODEL_TENSOR.V_MMPROJ_MLP,
@@ -785,14 +843,15 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
785
843
  MODEL_TENSOR.V_ENC_EMBD_CLS,
786
844
  MODEL_TENSOR.V_ENC_EMBD_PATCH,
787
845
  MODEL_TENSOR.V_ENC_EMBD_POS,
846
+ MODEL_TENSOR.V_ENC_INPUT_NORM,
788
847
  MODEL_TENSOR.V_ENC_ATTN_Q,
789
848
  MODEL_TENSOR.V_ENC_ATTN_Q_NORM,
790
849
  MODEL_TENSOR.V_ENC_ATTN_K,
791
850
  MODEL_TENSOR.V_ENC_ATTN_K_NORM,
792
851
  MODEL_TENSOR.V_ENC_ATTN_V,
793
- MODEL_TENSOR.V_ENC_INPUT_NORM,
794
- MODEL_TENSOR.V_ENC_OUTPUT,
795
- MODEL_TENSOR.V_ENC_OUTPUT_NORM,
852
+ MODEL_TENSOR.V_ENC_ATTN_O,
853
+ MODEL_TENSOR.V_ENC_ATTN_O_NORM,
854
+ MODEL_TENSOR.V_ENC_POST_ATTN_NORM,
796
855
  MODEL_TENSOR.V_ENC_FFN_UP,
797
856
  MODEL_TENSOR.V_ENC_FFN_GATE,
798
857
  MODEL_TENSOR.V_ENC_FFN_DOWN,
@@ -816,6 +875,24 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
816
875
  MODEL_TENSOR.V_RESMPL_QUERY,
817
876
  MODEL_TENSOR.V_TOK_EMBD_IMG_BREAK,
818
877
  MODEL_TENSOR.V_MM_PATCH_MERGER,
878
+ # audio
879
+ MODEL_TENSOR.A_ENC_EMBD_POS,
880
+ MODEL_TENSOR.A_ENC_CONV1D,
881
+ MODEL_TENSOR.A_PRE_NORM,
882
+ MODEL_TENSOR.A_POST_NORM,
883
+ MODEL_TENSOR.A_ENC_ATTN_Q,
884
+ MODEL_TENSOR.A_ENC_ATTN_K,
885
+ MODEL_TENSOR.A_ENC_ATTN_V,
886
+ MODEL_TENSOR.A_ENC_INPUT_NORM,
887
+ MODEL_TENSOR.A_ENC_OUTPUT,
888
+ MODEL_TENSOR.A_ENC_OUTPUT_NORM,
889
+ MODEL_TENSOR.A_ENC_FFN_UP,
890
+ MODEL_TENSOR.A_ENC_FFN_GATE,
891
+ MODEL_TENSOR.A_ENC_FFN_DOWN,
892
+ MODEL_TENSOR.A_MMPROJ,
893
+ MODEL_TENSOR.A_MMPROJ_FC,
894
+ MODEL_TENSOR.A_MM_NORM_PRE,
895
+ MODEL_TENSOR.A_MM_NORM_MID,
819
896
  ],
820
897
  MODEL_ARCH.LLAMA: [
821
898
  MODEL_TENSOR.TOKEN_EMBD,
@@ -959,6 +1036,7 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
959
1036
  MODEL_TENSOR.POS_EMBD,
960
1037
  MODEL_TENSOR.OUTPUT_NORM,
961
1038
  MODEL_TENSOR.ATTN_OUT_NORM,
1039
+ MODEL_TENSOR.ATTN_QKV,
962
1040
  MODEL_TENSOR.ATTN_Q,
963
1041
  MODEL_TENSOR.ATTN_K,
964
1042
  MODEL_TENSOR.ATTN_V,
@@ -1905,6 +1983,9 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
1905
1983
  MODEL_TENSOR.FFN_GATE_EXP,
1906
1984
  MODEL_TENSOR.FFN_DOWN_EXP,
1907
1985
  MODEL_TENSOR.FFN_UP_EXP,
1986
+ MODEL_TENSOR.FFN_GATE_SHEXP,
1987
+ MODEL_TENSOR.FFN_UP_SHEXP,
1988
+ MODEL_TENSOR.FFN_DOWN_SHEXP,
1908
1989
  ],
1909
1990
  MODEL_ARCH.CHAMELEON: [
1910
1991
  MODEL_TENSOR.TOKEN_EMBD,
@@ -2177,9 +2258,13 @@ class VisionProjectorType:
2177
2258
  GEMMA3 = "gemma3"
2178
2259
  IDEFICS3 = "idefics3"
2179
2260
  PIXTRAL = "pixtral"
2261
+ LLAMA4 = "llama4"
2180
2262
  QWEN2VL = "qwen2vl_merger"
2181
2263
  QWEN25VL = "qwen2.5vl_merger"
2264
+ ULTRAVOX = "ultravox"
2182
2265
  INTERNVL = "internvl"
2266
+ QWEN2A = "qwen2a" # audio
2267
+ QWEN25O = "qwen2.5o" # omni
2183
2268
 
2184
2269
 
2185
2270
  # Items here are (block size, type size)
@@ -251,7 +251,7 @@ class GGUFReader:
251
251
  offs += curr_size
252
252
  return offs - orig_offs, aparts, data_idxs, types
253
253
  # We can't deal with this one.
254
- raise ValueError('Unknown/unhandled field type {gtype}')
254
+ raise ValueError(f'Unknown/unhandled field type {gtype}')
255
255
 
256
256
  def _get_tensor_info_field(self, orig_offs: int) -> ReaderField:
257
257
  offs = orig_offs
@@ -49,6 +49,7 @@ class TensorInfo:
49
49
  class GGUFValue:
50
50
  value: Any
51
51
  type: GGUFValueType
52
+ sub_type: GGUFValueType | None = None
52
53
 
53
54
 
54
55
  class WriterState(Enum):
@@ -238,7 +239,7 @@ class GGUFWriter:
238
239
 
239
240
  for key, val in kv_data.items():
240
241
  kv_bytes += self._pack_val(key, GGUFValueType.STRING, add_vtype=False)
241
- kv_bytes += self._pack_val(val.value, val.type, add_vtype=True)
242
+ kv_bytes += self._pack_val(val.value, val.type, add_vtype=True, sub_type=val.sub_type)
242
243
 
243
244
  fout.write(kv_bytes)
244
245
 
@@ -268,11 +269,11 @@ class GGUFWriter:
268
269
  fout.flush()
269
270
  self.state = WriterState.TI_DATA
270
271
 
271
- def add_key_value(self, key: str, val: Any, vtype: GGUFValueType) -> None:
272
+ def add_key_value(self, key: str, val: Any, vtype: GGUFValueType, sub_type: GGUFValueType | None = None) -> None:
272
273
  if any(key in kv_data for kv_data in self.kv_data):
273
274
  raise ValueError(f'Duplicated key name {key!r}')
274
275
 
275
- self.kv_data[0][key] = GGUFValue(value=val, type=vtype)
276
+ self.kv_data[0][key] = GGUFValue(value=val, type=vtype, sub_type=sub_type)
276
277
 
277
278
  def add_uint8(self, key: str, val: int) -> None:
278
279
  self.add_key_value(key,val, GGUFValueType.UINT8)
@@ -896,7 +897,7 @@ class GGUFWriter:
896
897
  def add_remove_extra_whitespaces(self, value: bool) -> None:
897
898
  self.add_bool(Keys.Tokenizer.REMOVE_EXTRA_WS, value)
898
899
 
899
- def add_precompiled_charsmap(self, charsmap: Sequence[bytes]) -> None:
900
+ def add_precompiled_charsmap(self, charsmap: bytes) -> None:
900
901
  self.add_array(Keys.Tokenizer.PRECOMPILED_CHARSMAP, charsmap)
901
902
 
902
903
  def add_chat_template(self, value: str | Sequence[Mapping[str, str]]) -> None:
@@ -936,12 +937,18 @@ class GGUFWriter:
936
937
 
937
938
  # for vision models
938
939
 
940
+ def add_clip_has_vision_encoder(self, value: bool) -> None:
941
+ self.add_bool(Keys.Clip.HAS_VISION_ENCODER, value)
942
+
943
+ def add_clip_has_audio_encoder(self, value: bool) -> None:
944
+ self.add_bool(Keys.Clip.HAS_AUDIO_ENCODER, value)
945
+
946
+ def add_clip_projector_type(self, value: str) -> None:
947
+ self.add_string(Keys.Clip.PROJECTOR_TYPE, value)
948
+
939
949
  def add_vision_projection_dim(self, value: int) -> None:
940
950
  self.add_uint32(Keys.ClipVision.PROJECTION_DIM, value)
941
951
 
942
- def add_vision_has_vision_encoder(self, value: bool) -> None:
943
- self.add_bool(Keys.ClipVision.HAS_VISION_ENCODER, value)
944
-
945
952
  def add_vision_patch_size(self, value: int) -> None:
946
953
  self.add_uint32(Keys.ClipVision.PATCH_SIZE, value)
947
954
 
@@ -957,9 +964,6 @@ class GGUFWriter:
957
964
  def add_vision_head_count(self, value: int) -> None:
958
965
  self.add_uint32(Keys.ClipVision.Attention.HEAD_COUNT, value)
959
966
 
960
- def add_vision_projector_type(self, value: str) -> None:
961
- self.add_string(Keys.ClipVision.PROJECTOR_TYPE, value)
962
-
963
967
  def add_vision_attention_layernorm_eps(self, value: float) -> None:
964
968
  self.add_float32(Keys.ClipVision.Attention.LAYERNORM_EPS, value)
965
969
 
@@ -987,13 +991,39 @@ class GGUFWriter:
987
991
  def add_vision_n_wa_pattern(self, value: int) -> None:
988
992
  self.add_uint32(Keys.ClipVision.N_WA_PATTERN, value)
989
993
 
994
+ # audio models
995
+
996
+ def add_audio_projection_dim(self, value: int) -> None:
997
+ self.add_uint32(Keys.ClipAudio.PROJECTION_DIM, value)
998
+
999
+ def add_audio_embedding_length(self, value: int) -> None:
1000
+ self.add_uint32(Keys.ClipAudio.EMBEDDING_LENGTH, value)
1001
+
1002
+ def add_audio_feed_forward_length(self, value: int) -> None:
1003
+ self.add_uint32(Keys.ClipAudio.FEED_FORWARD_LENGTH, value)
1004
+
1005
+ def add_audio_block_count(self, value: int) -> None:
1006
+ self.add_uint32(Keys.ClipAudio.BLOCK_COUNT, value)
1007
+
1008
+ def add_audio_head_count(self, value: int) -> None:
1009
+ self.add_uint32(Keys.ClipAudio.Attention.HEAD_COUNT, value)
1010
+
1011
+ def add_audio_attention_layernorm_eps(self, value: float) -> None:
1012
+ self.add_float32(Keys.ClipAudio.Attention.LAYERNORM_EPS, value)
1013
+
1014
+ def add_audio_num_mel_bins(self, value: int) -> None:
1015
+ self.add_uint32(Keys.ClipAudio.NUM_MEL_BINS, value)
1016
+
1017
+ def add_audio_stack_factor(self, value: int) -> None:
1018
+ self.add_uint32(Keys.ClipAudio.Projector.STACK_FACTOR, value)
1019
+
990
1020
  def _pack(self, fmt: str, value: Any, skip_pack_prefix: bool = False) -> bytes:
991
1021
  pack_prefix = ''
992
1022
  if not skip_pack_prefix:
993
1023
  pack_prefix = '<' if self.endianess == GGUFEndian.LITTLE else '>'
994
1024
  return struct.pack(f'{pack_prefix}{fmt}', value)
995
1025
 
996
- def _pack_val(self, val: Any, vtype: GGUFValueType, add_vtype: bool) -> bytes:
1026
+ def _pack_val(self, val: Any, vtype: GGUFValueType, add_vtype: bool, sub_type: GGUFValueType | None = None) -> bytes:
997
1027
  kv_data = bytearray()
998
1028
 
999
1029
  if add_vtype:
@@ -1014,7 +1044,9 @@ class GGUFWriter:
1014
1044
  if len(val) == 0:
1015
1045
  raise ValueError("Invalid GGUF metadata array. Empty array")
1016
1046
 
1017
- if isinstance(val, bytes):
1047
+ if sub_type is not None:
1048
+ ltype = sub_type
1049
+ elif isinstance(val, bytes):
1018
1050
  ltype = GGUFValueType.UINT8
1019
1051
  else:
1020
1052
  ltype = GGUFValueType.get_type(val[0])
@@ -823,6 +823,7 @@ class GGUFEditorWindow(QMainWindow):
823
823
  self.modified = False
824
824
  self.metadata_changes = {} # Store changes to apply when saving
825
825
  self.metadata_to_remove = set() # Store keys to remove when saving
826
+ self.on_metadata_changed_is_connected = False
826
827
 
827
828
  self.setup_ui()
828
829
 
@@ -941,9 +942,11 @@ class GGUFEditorWindow(QMainWindow):
941
942
  return
942
943
 
943
944
  # Disconnect to prevent triggering during loading
944
- with warnings.catch_warnings():
945
- warnings.filterwarnings('ignore')
946
- self.metadata_table.itemChanged.disconnect(self.on_metadata_changed)
945
+ if self.on_metadata_changed_is_connected:
946
+ with warnings.catch_warnings():
947
+ warnings.filterwarnings('ignore')
948
+ self.metadata_table.itemChanged.disconnect(self.on_metadata_changed)
949
+ self.on_metadata_changed_is_connected = False
947
950
 
948
951
  for i, (key, field) in enumerate(self.reader.fields.items()):
949
952
  self.metadata_table.insertRow(i)
@@ -1021,6 +1024,7 @@ class GGUFEditorWindow(QMainWindow):
1021
1024
 
1022
1025
  # Reconnect after loading
1023
1026
  self.metadata_table.itemChanged.connect(self.on_metadata_changed)
1027
+ self.on_metadata_changed_is_connected = True
1024
1028
 
1025
1029
  def extract_array_values(self, field: ReaderField) -> list:
1026
1030
  """Extract all values from an array field."""
@@ -1517,19 +1521,21 @@ class GGUFEditorWindow(QMainWindow):
1517
1521
  continue
1518
1522
 
1519
1523
  # Apply changes if any
1524
+ sub_type = None
1520
1525
  if field.name in self.metadata_changes:
1521
1526
  value_type, value = self.metadata_changes[field.name]
1522
1527
  if value_type == GGUFValueType.ARRAY:
1523
1528
  # Handle array values
1524
- element_type, array_values = value
1525
- writer.add_array(field.name, array_values)
1526
- else:
1527
- writer.add_key_value(field.name, value, value_type)
1529
+ sub_type, value = value
1528
1530
  else:
1529
1531
  # Copy original value
1530
1532
  value = field.contents()
1531
- if value is not None and field.types:
1532
- writer.add_key_value(field.name, value, field.types[0])
1533
+ value_type = field.types[0]
1534
+ if value_type == GGUFValueType.ARRAY:
1535
+ sub_type = field.types[-1]
1536
+
1537
+ if value is not None:
1538
+ writer.add_key_value(field.name, value, value_type, sub_type=sub_type)
1533
1539
 
1534
1540
  # Add new metadata
1535
1541
  for key, (value_type, value) in self.metadata_changes.items():
@@ -1537,7 +1543,12 @@ class GGUFEditorWindow(QMainWindow):
1537
1543
  if self.reader.get_field(key) is not None:
1538
1544
  continue
1539
1545
 
1540
- writer.add_key_value(key, value, value_type)
1546
+ sub_type = None
1547
+ if value_type == GGUFValueType.ARRAY:
1548
+ # Handle array values
1549
+ sub_type, value = value
1550
+
1551
+ writer.add_key_value(key, value, value_type, sub_type=sub_type)
1541
1552
 
1542
1553
  # Add tensors (including data)
1543
1554
  for tensor in self.reader.tensors:
@@ -24,6 +24,7 @@ class MetadataDetails(NamedTuple):
24
24
  type: gguf.GGUFValueType
25
25
  value: Any
26
26
  description: str = ''
27
+ sub_type: gguf.GGUFValueType | None = None
27
28
 
28
29
 
29
30
  def get_field_data(reader: gguf.GGUFReader, key: str) -> Any:
@@ -57,7 +58,9 @@ def copy_with_new_metadata(reader: gguf.GGUFReader, writer: gguf.GGUFWriter, new
57
58
  logger.debug(f'Removing {field.name}')
58
59
  continue
59
60
 
60
- old_val = MetadataDetails(field.types[0], field.contents())
61
+ val_type = field.types[0]
62
+ sub_type = field.types[-1] if val_type == gguf.GGUFValueType.ARRAY else None
63
+ old_val = MetadataDetails(val_type, field.contents(), sub_type=sub_type)
61
64
  val = new_metadata.get(field.name, old_val)
62
65
 
63
66
  if field.name in new_metadata:
@@ -67,7 +70,7 @@ def copy_with_new_metadata(reader: gguf.GGUFReader, writer: gguf.GGUFWriter, new
67
70
  logger.debug(f'Copying {field.name}')
68
71
 
69
72
  if val.value is not None:
70
- writer.add_key_value(field.name, val.value, val.type)
73
+ writer.add_key_value(field.name, val.value, val.type, sub_type=sub_type if val.sub_type is None else val.sub_type)
71
74
 
72
75
  if gguf.Keys.Tokenizer.CHAT_TEMPLATE in new_metadata:
73
76
  logger.debug('Adding chat template(s)')