@novastera-oss/llamarn 0.2.6 → 0.2.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (253) hide show
  1. package/android/src/main/cpp/include/llama.h +141 -38
  2. package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
  3. package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
  4. package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
  5. package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
  6. package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
  7. package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
  8. package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
  9. package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
  10. package/cpp/LlamaCppModel.cpp +58 -24
  11. package/cpp/LlamaCppModel.h +3 -3
  12. package/cpp/PureCppImpl.cpp +1 -1
  13. package/cpp/PureCppImpl.h +2 -2
  14. package/cpp/build-info.cpp +2 -2
  15. package/cpp/llama.cpp/CMakeLists.txt +15 -4
  16. package/cpp/llama.cpp/Makefile +2 -2
  17. package/cpp/llama.cpp/README.md +32 -13
  18. package/cpp/llama.cpp/common/CMakeLists.txt +10 -20
  19. package/cpp/llama.cpp/common/arg.cpp +37 -6
  20. package/cpp/llama.cpp/common/build-info.cpp.in +2 -2
  21. package/cpp/llama.cpp/common/chat-parser.cpp +5 -0
  22. package/cpp/llama.cpp/common/chat-parser.h +2 -0
  23. package/cpp/llama.cpp/common/chat.cpp +12 -9
  24. package/cpp/llama.cpp/common/chat.h +1 -1
  25. package/cpp/llama.cpp/common/common.cpp +53 -40
  26. package/cpp/llama.cpp/common/common.h +6 -2
  27. package/cpp/llama.cpp/common/json-schema-to-grammar.cpp +3 -46
  28. package/cpp/llama.cpp/common/speculative.cpp +6 -4
  29. package/cpp/llama.cpp/convert_hf_to_gguf.py +215 -76
  30. package/cpp/llama.cpp/ggml/CMakeLists.txt +48 -2
  31. package/cpp/llama.cpp/ggml/cmake/common.cmake +1 -2
  32. package/cpp/llama.cpp/ggml/include/ggml-cpu.h +2 -0
  33. package/cpp/llama.cpp/ggml/include/ggml.h +33 -0
  34. package/cpp/llama.cpp/ggml/src/CMakeLists.txt +64 -13
  35. package/cpp/llama.cpp/ggml/src/ggml-backend-reg.cpp +5 -0
  36. package/cpp/llama.cpp/ggml/src/ggml-cann/common.h +6 -1
  37. package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +33 -9
  38. package/cpp/llama.cpp/ggml/src/ggml-common.h +4 -0
  39. package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +124 -26
  40. package/cpp/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +1 -1
  41. package/cpp/llama.cpp/ggml/src/ggml-cpu/amx/mmq.cpp +11 -10
  42. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
  43. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/arm/quants.c +4114 -0
  44. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +2163 -0
  45. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +2639 -0
  46. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
  47. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/powerpc/quants.c +2732 -0
  48. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +2069 -0
  49. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/riscv/repack.cpp +397 -0
  50. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/s390/quants.c +1300 -0
  51. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/wasm/quants.c +1481 -0
  52. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/x86/quants.c +4311 -0
  53. package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-aarch64.cpp → arch/x86/repack.cpp} +79 -3225
  54. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch-fallback.h +184 -0
  55. package/cpp/llama.cpp/ggml/src/ggml-cpu/common.h +4 -3
  56. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +16 -7
  57. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +93 -104
  58. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +12 -8
  59. package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-hbm.cpp → hbm.cpp} +1 -1
  60. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +1 -1
  61. package/cpp/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +58 -8
  62. package/cpp/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.h +5 -0
  63. package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +194 -69
  64. package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.h +2 -0
  65. package/cpp/llama.cpp/ggml/src/ggml-cpu/quants.c +1158 -0
  66. package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-quants.h → quants.h} +26 -0
  67. package/cpp/llama.cpp/ggml/src/ggml-cpu/repack.cpp +1571 -0
  68. package/cpp/llama.cpp/ggml/src/ggml-cpu/repack.h +98 -0
  69. package/cpp/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +213 -37
  70. package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-traits.cpp → traits.cpp} +1 -1
  71. package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.cpp +2 -2
  72. package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.h +45 -45
  73. package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +59 -37
  74. package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-dw.cu +161 -0
  75. package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-dw.cuh +5 -0
  76. package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-transpose.cu +91 -0
  77. package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-transpose.cuh +4 -0
  78. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-mma-f16.cuh +4 -1
  79. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu +4 -0
  80. package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +90 -39
  81. package/cpp/llama.cpp/ggml/src/ggml-cuda/mean.cu +19 -0
  82. package/cpp/llama.cpp/ggml/src/ggml-cuda/mean.cuh +3 -0
  83. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmv.cu +257 -87
  84. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmv.cuh +2 -3
  85. package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-scan.cu +6 -4
  86. package/cpp/llama.cpp/ggml/src/ggml-cuda/sumrows.cu +5 -18
  87. package/cpp/llama.cpp/ggml/src/ggml-cuda/sumrows.cuh +0 -1
  88. package/cpp/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +4 -0
  89. package/cpp/llama.cpp/ggml/src/ggml-impl.h +61 -183
  90. package/cpp/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +11 -10
  91. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +16 -0
  92. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +260 -49
  93. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +497 -282
  94. package/cpp/llama.cpp/ggml/src/ggml-musa/mudnn.cuh +2 -2
  95. package/cpp/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +7 -0
  96. package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +1078 -468
  97. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/concat.cl +109 -0
  98. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q4_0_f32_8x_flat.cl +283 -0
  99. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/pad.cl +30 -0
  100. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/repeat.cl +39 -0
  101. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/tanh.cl +63 -0
  102. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/tsembd.cl +48 -0
  103. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/upscale.cl +121 -0
  104. package/cpp/llama.cpp/ggml/src/ggml-quants.c +0 -2
  105. package/cpp/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +18 -15
  106. package/cpp/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +1 -1
  107. package/cpp/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +5 -6
  108. package/cpp/llama.cpp/ggml/src/ggml-sycl/common.hpp +20 -48
  109. package/cpp/llama.cpp/ggml/src/ggml-sycl/concat.cpp +28 -41
  110. package/cpp/llama.cpp/ggml/src/ggml-sycl/conv.cpp +4 -10
  111. package/cpp/llama.cpp/ggml/src/ggml-sycl/convert.cpp +117 -165
  112. package/cpp/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +192 -53
  113. package/cpp/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +32 -0
  114. package/cpp/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +49 -67
  115. package/cpp/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +31 -1
  116. package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +99 -159
  117. package/cpp/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +3 -0
  118. package/cpp/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +8 -105
  119. package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +209 -92
  120. package/cpp/llama.cpp/ggml/src/ggml-sycl/gla.cpp +2 -2
  121. package/cpp/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +1 -1
  122. package/cpp/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +60 -80
  123. package/cpp/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +158 -203
  124. package/cpp/llama.cpp/ggml/src/ggml-sycl/norm.cpp +55 -74
  125. package/cpp/llama.cpp/ggml/src/ggml-sycl/quants.hpp +38 -10
  126. package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +24 -20
  127. package/cpp/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +3 -3
  128. package/cpp/llama.cpp/ggml/src/ggml-sycl/sycl_hw.cpp +3 -1
  129. package/cpp/llama.cpp/ggml/src/ggml-sycl/sycl_hw.hpp +3 -0
  130. package/cpp/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +3 -8
  131. package/cpp/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +108 -16
  132. package/cpp/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +12 -16
  133. package/cpp/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +36 -28
  134. package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +487 -247
  135. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +4 -12
  136. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/conv_transpose_1d.comp +98 -0
  137. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +2 -0
  138. package/cpp/llama.cpp/ggml/src/ggml.c +69 -19
  139. package/cpp/llama.cpp/ggml/src/gguf.cpp +5 -1
  140. package/cpp/llama.cpp/gguf-py/gguf/constants.py +133 -0
  141. package/cpp/llama.cpp/gguf-py/gguf/gguf_writer.py +25 -1
  142. package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +78 -3
  143. package/cpp/llama.cpp/gguf-py/gguf/vocab.py +97 -4
  144. package/cpp/llama.cpp/gguf-py/pyproject.toml +2 -2
  145. package/cpp/llama.cpp/include/llama.h +141 -38
  146. package/cpp/llama.cpp/models/templates/Mistral-Small-3.2-24B-Instruct-2506.jinja +124 -0
  147. package/cpp/llama.cpp/requirements/requirements-compare-llama-bench.txt +1 -0
  148. package/cpp/llama.cpp/src/CMakeLists.txt +2 -2
  149. package/cpp/llama.cpp/src/llama-arch.cpp +150 -3
  150. package/cpp/llama.cpp/src/llama-arch.h +25 -1
  151. package/cpp/llama.cpp/src/llama-batch.cpp +736 -274
  152. package/cpp/llama.cpp/src/llama-batch.h +110 -57
  153. package/cpp/llama.cpp/src/llama-chat.cpp +30 -8
  154. package/cpp/llama.cpp/src/llama-chat.h +1 -0
  155. package/cpp/llama.cpp/src/llama-context.cpp +360 -266
  156. package/cpp/llama.cpp/src/llama-context.h +27 -23
  157. package/cpp/llama.cpp/src/llama-cparams.cpp +1 -1
  158. package/cpp/llama.cpp/src/llama-cparams.h +1 -1
  159. package/cpp/llama.cpp/src/llama-graph.cpp +411 -344
  160. package/cpp/llama.cpp/src/llama-graph.h +126 -58
  161. package/cpp/llama.cpp/src/llama-hparams.cpp +10 -2
  162. package/cpp/llama.cpp/src/llama-hparams.h +16 -2
  163. package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.cpp +103 -73
  164. package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.h +34 -42
  165. package/cpp/llama.cpp/src/llama-kv-cache-unified.cpp +345 -221
  166. package/cpp/llama.cpp/src/llama-kv-cache-unified.h +75 -50
  167. package/cpp/llama.cpp/src/llama-kv-cells.h +51 -22
  168. package/cpp/llama.cpp/src/llama-memory-hybrid.cpp +246 -0
  169. package/cpp/llama.cpp/src/llama-memory-hybrid.h +138 -0
  170. package/cpp/llama.cpp/src/{llama-kv-cache-recurrent.cpp → llama-memory-recurrent.cpp} +302 -317
  171. package/cpp/llama.cpp/src/{llama-kv-cache-recurrent.h → llama-memory-recurrent.h} +60 -68
  172. package/cpp/llama.cpp/src/llama-memory.cpp +41 -0
  173. package/cpp/llama.cpp/src/llama-memory.h +73 -36
  174. package/cpp/llama.cpp/src/llama-mmap.cpp +1 -1
  175. package/cpp/llama.cpp/src/llama-model-loader.cpp +42 -17
  176. package/cpp/llama.cpp/src/llama-model-saver.cpp +1 -0
  177. package/cpp/llama.cpp/src/llama-model.cpp +1630 -511
  178. package/cpp/llama.cpp/src/llama-model.h +26 -0
  179. package/cpp/llama.cpp/src/llama-quant.cpp +89 -6
  180. package/cpp/llama.cpp/src/llama-vocab.cpp +58 -26
  181. package/cpp/llama.cpp/src/llama-vocab.h +1 -0
  182. package/cpp/llama.cpp/src/llama.cpp +11 -7
  183. package/cpp/llama.cpp/src/unicode.cpp +5 -0
  184. package/cpp/rn-completion.cpp +2 -2
  185. package/cpp/{rn-llama.hpp → rn-llama.h} +1 -1
  186. package/cpp/{rn-utils.hpp → rn-utils.h} +3 -0
  187. package/ios/include/chat.h +1 -1
  188. package/ios/include/common.h +6 -2
  189. package/ios/include/llama.h +141 -38
  190. package/ios/libs/llama.xcframework/Info.plist +15 -15
  191. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  192. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4890 -4689
  193. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-cpu.h +2 -0
  194. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +33 -0
  195. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +141 -38
  196. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
  197. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  198. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4861 -4710
  199. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3764 -3622
  200. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +2 -0
  201. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +33 -0
  202. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +141 -38
  203. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
  204. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  205. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4861 -4710
  206. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3766 -3624
  207. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-cpu.h +2 -0
  208. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +33 -0
  209. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +141 -38
  210. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-cpu.h +2 -0
  211. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +33 -0
  212. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +141 -38
  213. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
  214. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-cpu.h +2 -0
  215. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +33 -0
  216. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +141 -38
  217. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
  218. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
  219. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  220. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4890 -4689
  221. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-cpu.h +2 -0
  222. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +33 -0
  223. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +141 -38
  224. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
  225. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  226. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4861 -4710
  227. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3764 -3622
  228. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +2 -0
  229. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +33 -0
  230. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +141 -38
  231. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
  232. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  233. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4926 -4725
  234. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-cpu.h +2 -0
  235. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +33 -0
  236. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +141 -38
  237. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
  238. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  239. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4897 -4746
  240. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3794 -3652
  241. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +2 -0
  242. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +33 -0
  243. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +141 -38
  244. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
  245. package/package.json +1 -2
  246. package/cpp/llama.cpp/common/cmake/build-info-gen-cpp.cmake +0 -24
  247. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +0 -8
  248. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +0 -13891
  249. package/cpp/llama.cpp/src/llama-kv-cache.cpp +0 -1
  250. package/cpp/llama.cpp/src/llama-kv-cache.h +0 -44
  251. /package/cpp/llama.cpp/ggml/src/ggml-cpu/{cpu-feats-x86.cpp → arch/x86/cpu-feats.cpp} +0 -0
  252. /package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-hbm.h → hbm.h} +0 -0
  253. /package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-traits.h → traits.h} +0 -0
@@ -31,6 +31,7 @@ class TensorNameMap:
31
31
  "model.embeddings", # rwkv7
32
32
  "model.word_embeddings", # bailingmoe
33
33
  "language_model.model.embed_tokens", # llama4
34
+ "encoder", # neobert
34
35
  ),
35
36
 
36
37
  # Token type embeddings
@@ -134,6 +135,7 @@ class TensorNameMap:
134
135
  "rwkv.blocks.{bid}.ln1", # rwkv6
135
136
  "model.layers.{bid}.ln1", # rwkv7
136
137
  "model.layers.{bid}.input_layernorm", # llama4
138
+ "transformer_encoder.{bid}.attention_norm", # neobert
137
139
  ),
138
140
 
139
141
  # Attention norm 2
@@ -161,6 +163,7 @@ class TensorNameMap:
161
163
  "model.layers.{bid}.self_attn.qkv_proj", # phi3
162
164
  "encoder.layers.{bid}.self_attention.query_key_value", # chatglm
163
165
  "transformer.layers.{bid}.attn.qkv_proj", # openelm
166
+ "transformer_encoder.{bid}.qkv", # neobert
164
167
  ),
165
168
 
166
169
  # Attention query
@@ -236,6 +239,7 @@ class TensorNameMap:
236
239
  "transformer.layers.{bid}.attn.out_proj", # openelm
237
240
  "transformer.h.{bid}.attn.attention.out_proj", # exaone
238
241
  "model.layers.{bid}.self_attn.o_proj", # llama4
242
+ "transformer_encoder.{bid}.wo", # neobert
239
243
  ),
240
244
 
241
245
  # Attention output norm
@@ -276,6 +280,7 @@ class TensorNameMap:
276
280
  "encoder.layers.{bid}.post_attention_layernorm", # chatglm
277
281
  "transformer.layers.{bid}.ffn_norm", # openelm
278
282
  "model.layers.{bid}.post_attention_layernorm", # llama4
283
+ "transformer_encoder.{bid}.ffn_norm", # neobert
279
284
  ),
280
285
 
281
286
  # Post feed-forward norm
@@ -305,7 +310,7 @@ class TensorNameMap:
305
310
  ),
306
311
 
307
312
  MODEL_TENSOR.FFN_EXP_PROBS_B: (
308
- "model.layers.{bid}.mlp.gate.e_score_correction", # deepseek-v3
313
+ "model.layers.{bid}.mlp.gate.e_score_correction", # deepseek-v3 dots1
309
314
  ),
310
315
 
311
316
  # Feed-forward up
@@ -333,11 +338,14 @@ class TensorNameMap:
333
338
  "encoder.layers.{bid}.mlp.fc11", # nomic-bert
334
339
  "encoder.layers.{bid}.mlp.fc1", # nomic-bert-moe
335
340
  "model.layers.{bid}.mlp.c_fc", # starcoder2
336
- "encoder.layer.{bid}.mlp.gated_layers_v", # jina-bert-v2
341
+ "encoder.layer.{bid}.mlp.gated_layers_v", # jina-bert-v2 (split up/gate, no longer used)
342
+ "encoder.layer.{bid}.mlp.gated_layers", # jina-bert-v2 (GEGLU)
343
+ "encoder.layer.{bid}.mlp.up_gated_layer", # jina-v2-code (GEGLU)
337
344
  "model.layers.{bid}.residual_mlp.w3", # arctic
338
345
  "encoder.layers.{bid}.mlp.dense_h_to_4h", # chatglm
339
346
  "transformer.h.{bid}.mlp.c_fc_1", # exaone
340
347
  "model.layers.{bid}.feed_forward.up_proj", # llama4
348
+ "transformer_encoder.{bid}.ffn.w12", # neobert
341
349
  ),
342
350
 
343
351
  MODEL_TENSOR.FFN_UP_EXP: (
@@ -370,7 +378,7 @@ class TensorNameMap:
370
378
  "model.layers.layers.{bid}.mlp.gate_proj", # plamo
371
379
  "model.layers.{bid}.feed_forward.w1", # internlm2
372
380
  "encoder.layers.{bid}.mlp.fc12", # nomic-bert
373
- "encoder.layer.{bid}.mlp.gated_layers_w", # jina-bert-v2
381
+ "encoder.layer.{bid}.mlp.gated_layers_w", # jina-bert-v2 (split up/gate, no longer used)
374
382
  "transformer.h.{bid}.mlp.linear_1", # refact
375
383
  "model.layers.{bid}.residual_mlp.w1", # arctic
376
384
  "transformer.h.{bid}.mlp.c_fc_0", # exaone
@@ -420,6 +428,7 @@ class TensorNameMap:
420
428
  "encoder.layers.{bid}.mlp.dense_4h_to_h", # chatglm
421
429
  "model.layers.h.{bid}.mlp.c_proj", # exaone
422
430
  "model.layers.{bid}.feed_forward.down_proj", # llama4
431
+ "transformer_encoder.{bid}.ffn.w3", # neobert
423
432
  ),
424
433
 
425
434
  MODEL_TENSOR.FFN_DOWN_EXP: (
@@ -471,6 +480,70 @@ class TensorNameMap:
471
480
  "encoder.layer.{bid}.layer_norm_2" # jina-v2-code
472
481
  ),
473
482
 
483
+ MODEL_TENSOR.PER_LAYER_TOKEN_EMBD: (
484
+ "model.embed_tokens_per_layer", # gemma3n
485
+ ),
486
+
487
+ MODEL_TENSOR.PER_LAYER_MODEL_PROJ: (
488
+ "model.per_layer_model_projection", # gemma3n
489
+ ),
490
+
491
+ MODEL_TENSOR.PER_LAYER_PROJ_NORM: (
492
+ "model.per_layer_projection_norm", # gemma3n
493
+ ),
494
+
495
+ MODEL_TENSOR.ALTUP_PROJ: (
496
+ "model.altup_projections", # gemma3n
497
+ ),
498
+
499
+ MODEL_TENSOR.ALTUP_UNEMBD_PROJ: (
500
+ "model.altup_unembed_projections", # gemma3n
501
+ ),
502
+
503
+ MODEL_TENSOR.PER_LAYER_INP_GATE: (
504
+ "model.layers.{bid}.per_layer_input_gate", # gemma3n
505
+ ),
506
+
507
+ MODEL_TENSOR.PER_LAYER_PROJ: (
508
+ "model.layers.{bid}.per_layer_projection", # gemma3n
509
+ ),
510
+
511
+ MODEL_TENSOR.PER_LAYER_POST_NORM: (
512
+ "model.layers.{bid}.post_per_layer_input_norm", # gemma3n
513
+ ),
514
+
515
+ MODEL_TENSOR.ALTUP_CORRECT_COEF: (
516
+ "model.layers.{bid}.altup.correction_coefs", # gemma3n
517
+ ),
518
+
519
+ MODEL_TENSOR.ALTUP_CORRECT_SCALE: (
520
+ "model.layers.{bid}.altup.correct_output_scale", # gemma3n
521
+ ),
522
+
523
+ MODEL_TENSOR.ALTUP_PREDICT_COEF: (
524
+ "model.layers.{bid}.altup.prediction_coefs", # gemma3n
525
+ ),
526
+
527
+ MODEL_TENSOR.ALTUP_ROUTER: (
528
+ "model.layers.{bid}.altup.modality_router", # gemma3n
529
+ ),
530
+
531
+ MODEL_TENSOR.ALTUP_ROUTER_NORM: (
532
+ "model.layers.{bid}.altup.router_norm", # gemma3n
533
+ ),
534
+
535
+ MODEL_TENSOR.LAUREL_L: (
536
+ "model.layers.{bid}.laurel.linear_left", # gemma3n
537
+ ),
538
+
539
+ MODEL_TENSOR.LAUREL_R: (
540
+ "model.layers.{bid}.laurel.linear_right", # gemma3n
541
+ ),
542
+
543
+ MODEL_TENSOR.LAUREL_POST_NORM: (
544
+ "model.layers.{bid}.laurel.post_laurel_norm", # gemma3n
545
+ ),
546
+
474
547
  MODEL_TENSOR.SSM_IN: (
475
548
  "model.layers.{bid}.in_proj",
476
549
  "backbone.layers.{bid}.mixer.in_proj",
@@ -830,12 +903,14 @@ class TensorNameMap:
830
903
  # TODO: these do not belong to block_mappings_cfg - move them to mappings_cfg
831
904
  MODEL_TENSOR.ENC_OUTPUT_NORM: (
832
905
  "encoder.final_layer_norm", # t5
906
+ "layer_norm", # neobert
833
907
  ),
834
908
 
835
909
  MODEL_TENSOR.CLS: (
836
910
  "classifier", # jina
837
911
  "classifier.dense", # roberta
838
912
  "pre_classifier", # distillbert
913
+ "dense", # neobert
839
914
  ),
840
915
 
841
916
  MODEL_TENSOR.CLS_OUT: (
@@ -7,7 +7,10 @@ import os
7
7
  from pathlib import Path
8
8
  from typing import Any, Callable, Sequence, Mapping, Iterable, Protocol, ClassVar, runtime_checkable
9
9
 
10
- from sentencepiece import SentencePieceProcessor
10
+ try:
11
+ from sentencepiece import SentencePieceProcessor
12
+ except ImportError:
13
+ SentencePieceProcessor = None
11
14
 
12
15
  import gguf
13
16
 
@@ -116,6 +119,7 @@ class SpecialVocab:
116
119
  logger.warning(f'Special token type {typ}, id {tid} out of range, must be under {self.n_vocab} - skipping')
117
120
 
118
121
  def _try_load_from_tokenizer_json(self, path: Path) -> bool:
122
+ tokenizer = None
119
123
  tokenizer_file = path / 'tokenizer.json'
120
124
  if tokenizer_file.is_file():
121
125
  with open(tokenizer_file, encoding = 'utf-8') as f:
@@ -149,11 +153,97 @@ class SpecialVocab:
149
153
  added_tokens = tokenizer.get('added_tokens', {})
150
154
  else:
151
155
  added_tokens = {}
156
+ tokenizer_config = None
152
157
  tokenizer_config_file = path / 'tokenizer_config.json'
153
- if not tokenizer_config_file.is_file():
158
+ if tokenizer_config_file.is_file():
159
+ with open(tokenizer_config_file, encoding = 'utf-8') as f:
160
+ tokenizer_config = json.load(f)
161
+ if tokenizer:
162
+ special_bos = (tokenizer_config or {}).get('bos_token')
163
+ special_cls = (tokenizer_config or {}).get('cls_token')
164
+ special_eos = (tokenizer_config or {}).get('eos_token')
165
+ special_sep = (tokenizer_config or {}).get('sep_token')
166
+ if not special_bos and special_cls and tokenizer_config:
167
+ tokenizer_config['bos_token'] = special_bos = special_cls
168
+ if not special_eos and special_sep and tokenizer_config:
169
+ tokenizer_config['eos_token'] = special_eos = special_sep
170
+ if post_processor := tokenizer.get('post_processor'):
171
+ for processor in post_processor.get('processors', [post_processor]):
172
+ if processor.get('type') == 'RobertaProcessing':
173
+ self.add_special_token['bos'] = True
174
+ self.add_special_token['eos'] = True
175
+ self.add_special_token['sep'] = True
176
+ if not special_cls and tokenizer_config:
177
+ special_cls = processor.get('cls', [special_bos])[0]
178
+ tokenizer_config['cls_token'] = special_cls
179
+ if not special_sep and tokenizer_config:
180
+ special_sep = processor.get('sep', [special_eos])[0]
181
+ tokenizer_config['sep_token'] = special_sep
182
+ continue
183
+ # Crude parsing of TemplateProcessing to determine if BOS/SEP/EOS should be added
184
+ # Only works with simple templates, **will** get it wrong on unusual sequences
185
+ if processor.get('type') == 'TemplateProcessing':
186
+ tmpl_single = processor.get('single', [])
187
+ tmpl_pair = processor.get('pair', [])
188
+ special_first = None
189
+ special_last = None
190
+ if len(tmpl_single) > 1:
191
+ if special_first := tmpl_single[0].get('SpecialToken', {}).get('id'):
192
+ if not tokenizer_config:
193
+ special_bos = special_first
194
+ self.add_special_token['bos'] = True if special_first in (special_bos, special_cls) else False
195
+ if special_first not in (special_bos, special_cls):
196
+ logger.warning(f'Unknown leading special token {special_first!r} in TemplateProcessing<single>')
197
+ if special_last := tmpl_single[-1].get('SpecialToken', {}).get('id'):
198
+ if not tokenizer_config:
199
+ special_eos = special_last
200
+ elif special_last != special_eos:
201
+ if 'eot' not in self.special_token_types:
202
+ self.special_token_types = tuple(self.special_token_types) + ('eot', )
203
+ tokenizer_config['eot_token'] = special_eos
204
+ elif 'eom' not in self.special_token_types:
205
+ self.special_token_types = tuple(self.special_token_types) + ('eom', )
206
+ tokenizer_config['eom_token'] = special_eos
207
+ else:
208
+ logger.warning(f'Overriding EOS token {special_eos!r} with {special_last!r} without EOT/EOM fallback!')
209
+ tokenizer_config['eos_token'] = special_eos = special_last
210
+ self.add_special_token['eos'] = True if special_last == special_eos else False
211
+ if special_last != special_eos:
212
+ logger.warning(f'Unknown trailing special token {special_last!r} in TemplateProcessing<single>')
213
+ if tmpl_pair:
214
+ seq_start = 1 if special_first and tmpl_pair[0].get('SpecialToken', {}).get('id') == special_first else 0
215
+ seq_stop = -1 if special_last and tmpl_pair[-1].get('SpecialToken', {}).get('id') == special_last else None
216
+ if (special_first and seq_start == 0) or (special_last and seq_stop is None):
217
+ logger.warning('TemplateProcessing<single> leading/trailing special tokens do not match TemplateProcessing<pair>')
218
+ if tmpl_pair := tmpl_pair[slice(seq_start, seq_stop)]:
219
+ tmpl_a = tmpl_pair[0].get('Sequence', {}).get('id')
220
+ tmpl_b = tmpl_pair[-1].get('Sequence', {}).get('id')
221
+ if tmpl_a != 'A' or tmpl_b != 'B':
222
+ logger.warning(f'Unknown sequence {tmpl_a}...{tmpl_b} in TemplateProcessing<pair>')
223
+ # A [sep] [eos] B
224
+ if tmpl_a == 'A' and tmpl_b == 'B' and (tmpl_pair := tmpl_pair[1:-1]):
225
+ add_sep = False
226
+ if special_entry := tmpl_pair[0].get('SpecialToken', {}).get('id'):
227
+ if special_entry in (special_sep, special_eos) and not special_last:
228
+ add_sep = True
229
+ if special_entry not in (special_sep, special_eos):
230
+ logger.warning(f'Unknown separator token {special_entry!r} in TemplateProcessing<pair>')
231
+ else:
232
+ logger.warning(f'Unknown middle sequence {tmpl_pair[0]!r} in TemplateProcessing<pair>')
233
+ if len(tmpl_pair) == 2:
234
+ if special_entry := tmpl_pair[1].get('SpecialToken', {}).get('id'):
235
+ if special_entry in (special_sep, special_eos):
236
+ add_sep = True
237
+ if special_entry not in (special_sep, special_eos):
238
+ logger.warning(f'Unknown second separator token {special_entry!r} in TemplateProcessing<pair>')
239
+ else:
240
+ logger.warning(f'Unknown second middle sequence {tmpl_pair[1]!r} in TemplateProcessing<pair>')
241
+ self.add_special_token['sep'] = add_sep
242
+ if add_sep and not special_sep and tokenizer_config:
243
+ tokenizer_config['sep_token'] = special_eos
244
+ continue
245
+ if not tokenizer_config:
154
246
  return True
155
- with open(tokenizer_config_file, encoding = 'utf-8') as f:
156
- tokenizer_config = json.load(f)
157
247
  chat_template_alt = None
158
248
  chat_template_file = path / 'chat_template.json'
159
249
  if chat_template_file.is_file():
@@ -302,6 +392,9 @@ class SentencePieceVocab(Vocab):
302
392
  name = "spm"
303
393
 
304
394
  def __init__(self, base_path: Path):
395
+ if SentencePieceProcessor is None:
396
+ raise RuntimeError("sentencepiece is not installed")
397
+
305
398
  added_tokens: dict[str, int] = {}
306
399
  if (fname_tokenizer := base_path / 'tokenizer.model').exists():
307
400
  # normal location
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "gguf"
3
- version = "0.17.0"
3
+ version = "0.17.1"
4
4
  description = "Read and write ML models in GGUF for GGML"
5
5
  authors = ["GGML <ggml@ggml.ai>"]
6
6
  packages = [
@@ -22,7 +22,7 @@ python = ">=3.8"
22
22
  numpy = ">=1.17"
23
23
  tqdm = ">=4.27"
24
24
  pyyaml = ">=5.1"
25
- sentencepiece = ">=0.1.98,<=0.2.0"
25
+ sentencepiece = { version = ">=0.1.98,<=0.2.0", optional = true }
26
26
  PySide6 = { version = "^6.9", python = ">=3.9,<3.14", optional = true }
27
27
 
28
28
  [tool.poetry.dev-dependencies]