@novastera-oss/llamarn 0.2.6 → 0.2.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (253) hide show
  1. package/android/src/main/cpp/include/llama.h +141 -38
  2. package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
  3. package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
  4. package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
  5. package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
  6. package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
  7. package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
  8. package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
  9. package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
  10. package/cpp/LlamaCppModel.cpp +58 -24
  11. package/cpp/LlamaCppModel.h +3 -3
  12. package/cpp/PureCppImpl.cpp +1 -1
  13. package/cpp/PureCppImpl.h +2 -2
  14. package/cpp/build-info.cpp +2 -2
  15. package/cpp/llama.cpp/CMakeLists.txt +15 -4
  16. package/cpp/llama.cpp/Makefile +2 -2
  17. package/cpp/llama.cpp/README.md +32 -13
  18. package/cpp/llama.cpp/common/CMakeLists.txt +10 -20
  19. package/cpp/llama.cpp/common/arg.cpp +37 -6
  20. package/cpp/llama.cpp/common/build-info.cpp.in +2 -2
  21. package/cpp/llama.cpp/common/chat-parser.cpp +5 -0
  22. package/cpp/llama.cpp/common/chat-parser.h +2 -0
  23. package/cpp/llama.cpp/common/chat.cpp +12 -9
  24. package/cpp/llama.cpp/common/chat.h +1 -1
  25. package/cpp/llama.cpp/common/common.cpp +53 -40
  26. package/cpp/llama.cpp/common/common.h +6 -2
  27. package/cpp/llama.cpp/common/json-schema-to-grammar.cpp +3 -46
  28. package/cpp/llama.cpp/common/speculative.cpp +6 -4
  29. package/cpp/llama.cpp/convert_hf_to_gguf.py +215 -76
  30. package/cpp/llama.cpp/ggml/CMakeLists.txt +48 -2
  31. package/cpp/llama.cpp/ggml/cmake/common.cmake +1 -2
  32. package/cpp/llama.cpp/ggml/include/ggml-cpu.h +2 -0
  33. package/cpp/llama.cpp/ggml/include/ggml.h +33 -0
  34. package/cpp/llama.cpp/ggml/src/CMakeLists.txt +64 -13
  35. package/cpp/llama.cpp/ggml/src/ggml-backend-reg.cpp +5 -0
  36. package/cpp/llama.cpp/ggml/src/ggml-cann/common.h +6 -1
  37. package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +33 -9
  38. package/cpp/llama.cpp/ggml/src/ggml-common.h +4 -0
  39. package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +124 -26
  40. package/cpp/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +1 -1
  41. package/cpp/llama.cpp/ggml/src/ggml-cpu/amx/mmq.cpp +11 -10
  42. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
  43. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/arm/quants.c +4114 -0
  44. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +2163 -0
  45. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +2639 -0
  46. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
  47. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/powerpc/quants.c +2732 -0
  48. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +2069 -0
  49. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/riscv/repack.cpp +397 -0
  50. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/s390/quants.c +1300 -0
  51. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/wasm/quants.c +1481 -0
  52. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/x86/quants.c +4311 -0
  53. package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-aarch64.cpp → arch/x86/repack.cpp} +79 -3225
  54. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch-fallback.h +184 -0
  55. package/cpp/llama.cpp/ggml/src/ggml-cpu/common.h +4 -3
  56. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +16 -7
  57. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +93 -104
  58. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +12 -8
  59. package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-hbm.cpp → hbm.cpp} +1 -1
  60. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +1 -1
  61. package/cpp/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +58 -8
  62. package/cpp/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.h +5 -0
  63. package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +194 -69
  64. package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.h +2 -0
  65. package/cpp/llama.cpp/ggml/src/ggml-cpu/quants.c +1158 -0
  66. package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-quants.h → quants.h} +26 -0
  67. package/cpp/llama.cpp/ggml/src/ggml-cpu/repack.cpp +1571 -0
  68. package/cpp/llama.cpp/ggml/src/ggml-cpu/repack.h +98 -0
  69. package/cpp/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +213 -37
  70. package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-traits.cpp → traits.cpp} +1 -1
  71. package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.cpp +2 -2
  72. package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.h +45 -45
  73. package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +59 -37
  74. package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-dw.cu +161 -0
  75. package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-dw.cuh +5 -0
  76. package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-transpose.cu +91 -0
  77. package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-transpose.cuh +4 -0
  78. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-mma-f16.cuh +4 -1
  79. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu +4 -0
  80. package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +90 -39
  81. package/cpp/llama.cpp/ggml/src/ggml-cuda/mean.cu +19 -0
  82. package/cpp/llama.cpp/ggml/src/ggml-cuda/mean.cuh +3 -0
  83. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmv.cu +257 -87
  84. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmv.cuh +2 -3
  85. package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-scan.cu +6 -4
  86. package/cpp/llama.cpp/ggml/src/ggml-cuda/sumrows.cu +5 -18
  87. package/cpp/llama.cpp/ggml/src/ggml-cuda/sumrows.cuh +0 -1
  88. package/cpp/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +4 -0
  89. package/cpp/llama.cpp/ggml/src/ggml-impl.h +61 -183
  90. package/cpp/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +11 -10
  91. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +16 -0
  92. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +260 -49
  93. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +497 -282
  94. package/cpp/llama.cpp/ggml/src/ggml-musa/mudnn.cuh +2 -2
  95. package/cpp/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +7 -0
  96. package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +1078 -468
  97. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/concat.cl +109 -0
  98. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q4_0_f32_8x_flat.cl +283 -0
  99. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/pad.cl +30 -0
  100. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/repeat.cl +39 -0
  101. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/tanh.cl +63 -0
  102. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/tsembd.cl +48 -0
  103. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/upscale.cl +121 -0
  104. package/cpp/llama.cpp/ggml/src/ggml-quants.c +0 -2
  105. package/cpp/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +18 -15
  106. package/cpp/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +1 -1
  107. package/cpp/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +5 -6
  108. package/cpp/llama.cpp/ggml/src/ggml-sycl/common.hpp +20 -48
  109. package/cpp/llama.cpp/ggml/src/ggml-sycl/concat.cpp +28 -41
  110. package/cpp/llama.cpp/ggml/src/ggml-sycl/conv.cpp +4 -10
  111. package/cpp/llama.cpp/ggml/src/ggml-sycl/convert.cpp +117 -165
  112. package/cpp/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +192 -53
  113. package/cpp/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +32 -0
  114. package/cpp/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +49 -67
  115. package/cpp/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +31 -1
  116. package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +99 -159
  117. package/cpp/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +3 -0
  118. package/cpp/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +8 -105
  119. package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +209 -92
  120. package/cpp/llama.cpp/ggml/src/ggml-sycl/gla.cpp +2 -2
  121. package/cpp/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +1 -1
  122. package/cpp/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +60 -80
  123. package/cpp/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +158 -203
  124. package/cpp/llama.cpp/ggml/src/ggml-sycl/norm.cpp +55 -74
  125. package/cpp/llama.cpp/ggml/src/ggml-sycl/quants.hpp +38 -10
  126. package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +24 -20
  127. package/cpp/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +3 -3
  128. package/cpp/llama.cpp/ggml/src/ggml-sycl/sycl_hw.cpp +3 -1
  129. package/cpp/llama.cpp/ggml/src/ggml-sycl/sycl_hw.hpp +3 -0
  130. package/cpp/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +3 -8
  131. package/cpp/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +108 -16
  132. package/cpp/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +12 -16
  133. package/cpp/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +36 -28
  134. package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +487 -247
  135. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +4 -12
  136. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/conv_transpose_1d.comp +98 -0
  137. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +2 -0
  138. package/cpp/llama.cpp/ggml/src/ggml.c +69 -19
  139. package/cpp/llama.cpp/ggml/src/gguf.cpp +5 -1
  140. package/cpp/llama.cpp/gguf-py/gguf/constants.py +133 -0
  141. package/cpp/llama.cpp/gguf-py/gguf/gguf_writer.py +25 -1
  142. package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +78 -3
  143. package/cpp/llama.cpp/gguf-py/gguf/vocab.py +97 -4
  144. package/cpp/llama.cpp/gguf-py/pyproject.toml +2 -2
  145. package/cpp/llama.cpp/include/llama.h +141 -38
  146. package/cpp/llama.cpp/models/templates/Mistral-Small-3.2-24B-Instruct-2506.jinja +124 -0
  147. package/cpp/llama.cpp/requirements/requirements-compare-llama-bench.txt +1 -0
  148. package/cpp/llama.cpp/src/CMakeLists.txt +2 -2
  149. package/cpp/llama.cpp/src/llama-arch.cpp +150 -3
  150. package/cpp/llama.cpp/src/llama-arch.h +25 -1
  151. package/cpp/llama.cpp/src/llama-batch.cpp +736 -274
  152. package/cpp/llama.cpp/src/llama-batch.h +110 -57
  153. package/cpp/llama.cpp/src/llama-chat.cpp +30 -8
  154. package/cpp/llama.cpp/src/llama-chat.h +1 -0
  155. package/cpp/llama.cpp/src/llama-context.cpp +360 -266
  156. package/cpp/llama.cpp/src/llama-context.h +27 -23
  157. package/cpp/llama.cpp/src/llama-cparams.cpp +1 -1
  158. package/cpp/llama.cpp/src/llama-cparams.h +1 -1
  159. package/cpp/llama.cpp/src/llama-graph.cpp +411 -344
  160. package/cpp/llama.cpp/src/llama-graph.h +126 -58
  161. package/cpp/llama.cpp/src/llama-hparams.cpp +10 -2
  162. package/cpp/llama.cpp/src/llama-hparams.h +16 -2
  163. package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.cpp +103 -73
  164. package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.h +34 -42
  165. package/cpp/llama.cpp/src/llama-kv-cache-unified.cpp +345 -221
  166. package/cpp/llama.cpp/src/llama-kv-cache-unified.h +75 -50
  167. package/cpp/llama.cpp/src/llama-kv-cells.h +51 -22
  168. package/cpp/llama.cpp/src/llama-memory-hybrid.cpp +246 -0
  169. package/cpp/llama.cpp/src/llama-memory-hybrid.h +138 -0
  170. package/cpp/llama.cpp/src/{llama-kv-cache-recurrent.cpp → llama-memory-recurrent.cpp} +302 -317
  171. package/cpp/llama.cpp/src/{llama-kv-cache-recurrent.h → llama-memory-recurrent.h} +60 -68
  172. package/cpp/llama.cpp/src/llama-memory.cpp +41 -0
  173. package/cpp/llama.cpp/src/llama-memory.h +73 -36
  174. package/cpp/llama.cpp/src/llama-mmap.cpp +1 -1
  175. package/cpp/llama.cpp/src/llama-model-loader.cpp +42 -17
  176. package/cpp/llama.cpp/src/llama-model-saver.cpp +1 -0
  177. package/cpp/llama.cpp/src/llama-model.cpp +1630 -511
  178. package/cpp/llama.cpp/src/llama-model.h +26 -0
  179. package/cpp/llama.cpp/src/llama-quant.cpp +89 -6
  180. package/cpp/llama.cpp/src/llama-vocab.cpp +58 -26
  181. package/cpp/llama.cpp/src/llama-vocab.h +1 -0
  182. package/cpp/llama.cpp/src/llama.cpp +11 -7
  183. package/cpp/llama.cpp/src/unicode.cpp +5 -0
  184. package/cpp/rn-completion.cpp +2 -2
  185. package/cpp/{rn-llama.hpp → rn-llama.h} +1 -1
  186. package/cpp/{rn-utils.hpp → rn-utils.h} +3 -0
  187. package/ios/include/chat.h +1 -1
  188. package/ios/include/common.h +6 -2
  189. package/ios/include/llama.h +141 -38
  190. package/ios/libs/llama.xcframework/Info.plist +15 -15
  191. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  192. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4890 -4689
  193. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-cpu.h +2 -0
  194. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +33 -0
  195. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +141 -38
  196. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
  197. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  198. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4861 -4710
  199. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3764 -3622
  200. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +2 -0
  201. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +33 -0
  202. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +141 -38
  203. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
  204. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  205. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4861 -4710
  206. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3766 -3624
  207. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-cpu.h +2 -0
  208. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +33 -0
  209. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +141 -38
  210. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-cpu.h +2 -0
  211. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +33 -0
  212. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +141 -38
  213. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
  214. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-cpu.h +2 -0
  215. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +33 -0
  216. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +141 -38
  217. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
  218. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
  219. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  220. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4890 -4689
  221. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-cpu.h +2 -0
  222. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +33 -0
  223. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +141 -38
  224. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
  225. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  226. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4861 -4710
  227. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3764 -3622
  228. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +2 -0
  229. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +33 -0
  230. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +141 -38
  231. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
  232. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  233. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4926 -4725
  234. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-cpu.h +2 -0
  235. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +33 -0
  236. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +141 -38
  237. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
  238. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  239. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4897 -4746
  240. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3794 -3652
  241. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +2 -0
  242. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +33 -0
  243. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +141 -38
  244. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
  245. package/package.json +1 -2
  246. package/cpp/llama.cpp/common/cmake/build-info-gen-cpp.cmake +0 -24
  247. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +0 -8
  248. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +0 -13891
  249. package/cpp/llama.cpp/src/llama-kv-cache.cpp +0 -1
  250. package/cpp/llama.cpp/src/llama-kv-cache.h +0 -44
  251. /package/cpp/llama.cpp/ggml/src/ggml-cpu/{cpu-feats-x86.cpp → arch/x86/cpu-feats.cpp} +0 -0
  252. /package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-hbm.h → hbm.h} +0 -0
  253. /package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-traits.h → traits.h} +0 -0
@@ -20,6 +20,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
20
20
  { LLM_ARCH_BERT, "bert" },
21
21
  { LLM_ARCH_NOMIC_BERT, "nomic-bert" },
22
22
  { LLM_ARCH_NOMIC_BERT_MOE, "nomic-bert-moe" },
23
+ { LLM_ARCH_NEO_BERT, "neo-bert" },
23
24
  { LLM_ARCH_JINA_BERT_V2, "jina-bert-v2" },
24
25
  { LLM_ARCH_BLOOM, "bloom" },
25
26
  { LLM_ARCH_STABLELM, "stablelm" },
@@ -41,6 +42,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
41
42
  { LLM_ARCH_GEMMA, "gemma" },
42
43
  { LLM_ARCH_GEMMA2, "gemma2" },
43
44
  { LLM_ARCH_GEMMA3, "gemma3" },
45
+ { LLM_ARCH_GEMMA3N, "gemma3n" },
44
46
  { LLM_ARCH_STARCODER2, "starcoder2" },
45
47
  { LLM_ARCH_MAMBA, "mamba" },
46
48
  { LLM_ARCH_XVERSE, "xverse" },
@@ -72,6 +74,8 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
72
74
  { LLM_ARCH_WAVTOKENIZER_DEC, "wavtokenizer-dec" },
73
75
  { LLM_ARCH_PLM, "plm" },
74
76
  { LLM_ARCH_BAILINGMOE, "bailingmoe" },
77
+ { LLM_ARCH_DOTS1, "dots1" },
78
+ { LLM_ARCH_ARCEE, "arcee" },
75
79
  { LLM_ARCH_UNKNOWN, "(unknown)" },
76
80
  };
77
81
 
@@ -144,6 +148,7 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
144
148
  { LLM_KV_ATTENTION_SCALE, "%s.attention.scale" },
145
149
  { LLM_KV_ATTENTION_KEY_LENGTH_MLA, "%s.attention.key_length_mla" },
146
150
  { LLM_KV_ATTENTION_VALUE_LENGTH_MLA, "%s.attention.value_length_mla" },
151
+ { LLM_KV_ATTENTION_LAYER_INDICES, "%s.attention.layer_indices" },
147
152
 
148
153
  { LLM_KV_ROPE_DIMENSION_COUNT, "%s.rope.dimension_count" },
149
154
  { LLM_KV_ROPE_DIMENSION_SECTIONS, "%s.rope.dimension_sections" },
@@ -194,13 +199,13 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
194
199
  { LLM_KV_TOKENIZER_MASK_ID, "tokenizer.ggml.mask_token_id" },
195
200
  { LLM_KV_TOKENIZER_ADD_BOS, "tokenizer.ggml.add_bos_token" },
196
201
  { LLM_KV_TOKENIZER_ADD_EOS, "tokenizer.ggml.add_eos_token" },
202
+ { LLM_KV_TOKENIZER_ADD_SEP, "tokenizer.ggml.add_sep_token" },
197
203
  { LLM_KV_TOKENIZER_ADD_PREFIX, "tokenizer.ggml.add_space_prefix" },
198
204
  { LLM_KV_TOKENIZER_REMOVE_EXTRA_WS, "tokenizer.ggml.remove_extra_whitespaces" },
199
205
  { LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP, "tokenizer.ggml.precompiled_charsmap" },
200
206
  { LLM_KV_TOKENIZER_HF_JSON, "tokenizer.huggingface.json" },
201
207
  { LLM_KV_TOKENIZER_RWKV, "tokenizer.rwkv.world" },
202
208
  { LLM_KV_TOKENIZER_CHAT_TEMPLATE, "tokenizer.chat_template" },
203
- { LLM_KV_TOKENIZER_CHAT_TEMPLATE_N, "tokenizer.chat_template.%s" },
204
209
  { LLM_KV_TOKENIZER_FIM_PRE_ID, "tokenizer.ggml.fim_pre_token_id" },
205
210
  { LLM_KV_TOKENIZER_FIM_SUF_ID, "tokenizer.ggml.fim_suf_token_id" },
206
211
  { LLM_KV_TOKENIZER_FIM_MID_ID, "tokenizer.ggml.fim_mid_token_id" },
@@ -244,6 +249,24 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
244
249
  { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
245
250
  },
246
251
  },
252
+ {
253
+ LLM_ARCH_ARCEE,
254
+ {
255
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
256
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
257
+ { LLM_TENSOR_OUTPUT, "output" },
258
+ { LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
259
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
260
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
261
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
262
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
263
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
264
+ { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
265
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
266
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
267
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
268
+ },
269
+ },
247
270
  {
248
271
  LLM_ARCH_LLAMA4,
249
272
  {
@@ -495,6 +518,21 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
495
518
  { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
496
519
  },
497
520
  },
521
+ {
522
+ LLM_ARCH_NEO_BERT,
523
+ {
524
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
525
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
526
+ { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
527
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
528
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
529
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
530
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
531
+ { LLM_TENSOR_ENC_OUTPUT_NORM, "enc.output_norm" },
532
+ { LLM_TENSOR_CLS, "cls" },
533
+ { LLM_TENSOR_CLS_OUT, "cls.output" },
534
+ },
535
+ },
498
536
  {
499
537
  LLM_ARCH_JINA_BERT_V2,
500
538
  {
@@ -895,6 +933,42 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
895
933
  { LLM_TENSOR_FFN_POST_NORM, "blk.%d.post_ffw_norm" },
896
934
  },
897
935
  },
936
+ {
937
+ LLM_ARCH_GEMMA3N,
938
+ {
939
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
940
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
941
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
942
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
943
+ { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
944
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
945
+ { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
946
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
947
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
948
+ { LLM_TENSOR_ATTN_POST_NORM, "blk.%d.post_attention_norm" },
949
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
950
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
951
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
952
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
953
+ { LLM_TENSOR_FFN_POST_NORM, "blk.%d.post_ffw_norm" },
954
+ { LLM_TENSOR_PER_LAYER_TOKEN_EMBD, "per_layer_token_embd" },
955
+ { LLM_TENSOR_PER_LAYER_MODEL_PROJ, "per_layer_model_proj" },
956
+ { LLM_TENSOR_PER_LAYER_PROJ_NORM, "per_layer_proj_norm" },
957
+ { LLM_TENSOR_ALTUP_UNEMBD_PROJ, "altup_unembd_proj" },
958
+ { LLM_TENSOR_ALTUP_PROJ, "altup_proj" },
959
+ { LLM_TENSOR_PER_LAYER_INP_GATE, "blk.%d.inp_gate" },
960
+ { LLM_TENSOR_PER_LAYER_PROJ, "blk.%d.proj" },
961
+ { LLM_TENSOR_PER_LAYER_POST_NORM, "blk.%d.post_norm" },
962
+ { LLM_TENSOR_ALTUP_CORRECT_COEF, "blk.%d.altup_correct_coef" },
963
+ { LLM_TENSOR_ALTUP_CORRECT_SCALE, "blk.%d.altup_correct_scale" },
964
+ { LLM_TENSOR_ALTUP_PREDICT_COEF, "blk.%d.altup_predict_coef" },
965
+ { LLM_TENSOR_ALTUP_ROUTER, "blk.%d.altup_router" },
966
+ { LLM_TENSOR_ALTUP_ROUTER_NORM, "blk.%d.altup_router_norm" },
967
+ { LLM_TENSOR_LAUREL_L, "blk.%d.laurel_l" },
968
+ { LLM_TENSOR_LAUREL_R, "blk.%d.laurel_r" },
969
+ { LLM_TENSOR_LAUREL_POST_NORM, "blk.%d.laurel_post_norm" },
970
+ },
971
+ },
898
972
  {
899
973
  LLM_ARCH_STARCODER2,
900
974
  {
@@ -1556,6 +1630,34 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
1556
1630
  { LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
1557
1631
  },
1558
1632
  },
1633
+ {
1634
+ LLM_ARCH_DOTS1,
1635
+ {
1636
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1637
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1638
+ { LLM_TENSOR_OUTPUT, "output" },
1639
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1640
+ { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1641
+ { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
1642
+ { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1643
+ { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
1644
+ { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1645
+ { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1646
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1647
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1648
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1649
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1650
+ { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
1651
+ { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
1652
+ { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
1653
+ { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
1654
+ { LLM_TENSOR_FFN_GATE_INP_SHEXP, "blk.%d.ffn_gate_inp_shexp" },
1655
+ { LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
1656
+ { LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
1657
+ { LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
1658
+ { LLM_TENSOR_FFN_EXP_PROBS_B, "blk.%d.exp_probs_b" },
1659
+ }
1660
+ },
1559
1661
  {
1560
1662
  LLM_ARCH_UNKNOWN,
1561
1663
  {
@@ -1684,6 +1786,23 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
1684
1786
  {LLM_TENSOR_FFN_GATE_EXPS, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}},
1685
1787
  {LLM_TENSOR_FFN_UP_EXPS, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}},
1686
1788
  {LLM_TENSOR_FFN_EXP_PROBS_B, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
1789
+ // altup / laurel (gemma 3n)
1790
+ {LLM_TENSOR_PER_LAYER_TOKEN_EMBD, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_GET_ROWS}},
1791
+ {LLM_TENSOR_PER_LAYER_MODEL_PROJ, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}},
1792
+ {LLM_TENSOR_PER_LAYER_PROJ_NORM, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL}},
1793
+ {LLM_TENSOR_ALTUP_PROJ, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}},
1794
+ {LLM_TENSOR_ALTUP_UNEMBD_PROJ, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}},
1795
+ {LLM_TENSOR_PER_LAYER_INP_GATE, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1796
+ {LLM_TENSOR_PER_LAYER_PROJ, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1797
+ {LLM_TENSOR_PER_LAYER_POST_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1798
+ {LLM_TENSOR_ALTUP_CORRECT_COEF, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1799
+ {LLM_TENSOR_ALTUP_CORRECT_SCALE, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1800
+ {LLM_TENSOR_ALTUP_PREDICT_COEF, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1801
+ {LLM_TENSOR_ALTUP_ROUTER, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1802
+ {LLM_TENSOR_ALTUP_ROUTER_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1803
+ {LLM_TENSOR_LAUREL_L, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1804
+ {LLM_TENSOR_LAUREL_R, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1805
+ {LLM_TENSOR_LAUREL_POST_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1687
1806
  // this tensor is loaded for T5, but never used
1688
1807
  {LLM_TENSOR_DEC_CROSS_ATTN_REL_B, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_NONE}},
1689
1808
  {LLM_TENSOR_CONV1D, {LLM_TENSOR_LAYER_INPUT, GGML_OP_IM2COL}},
@@ -1707,8 +1826,14 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
1707
1826
  LLM_KV::LLM_KV(llm_arch arch, const char * suffix) : arch(arch), suffix(suffix) {}
1708
1827
 
1709
1828
  std::string LLM_KV::operator()(llm_kv kv) const {
1710
- return suffix ? ::format(LLM_KV_NAMES.at(kv), LLM_ARCH_NAMES.at(arch), suffix)
1711
- : ::format(LLM_KV_NAMES.at(kv), LLM_ARCH_NAMES.at(arch));
1829
+ std::string name = ::format(LLM_KV_NAMES.at(kv), LLM_ARCH_NAMES.at(arch));
1830
+
1831
+ if (suffix != nullptr) {
1832
+ name += ".";
1833
+ name += suffix;
1834
+ }
1835
+
1836
+ return name;
1712
1837
  }
1713
1838
 
1714
1839
  std::string LLM_TN_IMPL::str() const {
@@ -1747,3 +1872,25 @@ llm_arch llm_arch_from_string(const std::string & name) {
1747
1872
  const llm_tensor_info & llm_tensor_info_for(llm_tensor tensor) {
1748
1873
  return LLM_TENSOR_INFOS.at(tensor);
1749
1874
  }
1875
+
1876
+ bool llm_arch_is_recurrent(const llm_arch & arch) {
1877
+ switch (arch) {
1878
+ case LLM_ARCH_MAMBA:
1879
+ case LLM_ARCH_RWKV6:
1880
+ case LLM_ARCH_RWKV6QWEN2:
1881
+ case LLM_ARCH_RWKV7:
1882
+ case LLM_ARCH_ARWKV7:
1883
+ return true;
1884
+ default:
1885
+ return false;
1886
+ }
1887
+ }
1888
+
1889
+ bool llm_arch_is_hybrid(const llm_arch & arch) {
1890
+ // TODO: There are currently no hybrid models! Once there are, this will be
1891
+ // the place to identify them
1892
+ switch (arch) {
1893
+ default:
1894
+ return false;
1895
+ }
1896
+ }
@@ -24,6 +24,7 @@ enum llm_arch {
24
24
  LLM_ARCH_BERT,
25
25
  LLM_ARCH_NOMIC_BERT,
26
26
  LLM_ARCH_NOMIC_BERT_MOE,
27
+ LLM_ARCH_NEO_BERT,
27
28
  LLM_ARCH_JINA_BERT_V2,
28
29
  LLM_ARCH_BLOOM,
29
30
  LLM_ARCH_STABLELM,
@@ -45,6 +46,7 @@ enum llm_arch {
45
46
  LLM_ARCH_GEMMA,
46
47
  LLM_ARCH_GEMMA2,
47
48
  LLM_ARCH_GEMMA3,
49
+ LLM_ARCH_GEMMA3N,
48
50
  LLM_ARCH_STARCODER2,
49
51
  LLM_ARCH_MAMBA,
50
52
  LLM_ARCH_XVERSE,
@@ -76,6 +78,8 @@ enum llm_arch {
76
78
  LLM_ARCH_WAVTOKENIZER_DEC,
77
79
  LLM_ARCH_PLM,
78
80
  LLM_ARCH_BAILINGMOE,
81
+ LLM_ARCH_DOTS1,
82
+ LLM_ARCH_ARCEE,
79
83
  LLM_ARCH_UNKNOWN,
80
84
  };
81
85
 
@@ -148,6 +152,7 @@ enum llm_kv {
148
152
  LLM_KV_ATTENTION_SCALE,
149
153
  LLM_KV_ATTENTION_KEY_LENGTH_MLA,
150
154
  LLM_KV_ATTENTION_VALUE_LENGTH_MLA,
155
+ LLM_KV_ATTENTION_LAYER_INDICES,
151
156
 
152
157
  LLM_KV_ROPE_DIMENSION_COUNT,
153
158
  LLM_KV_ROPE_DIMENSION_SECTIONS,
@@ -190,13 +195,13 @@ enum llm_kv {
190
195
  LLM_KV_TOKENIZER_MASK_ID,
191
196
  LLM_KV_TOKENIZER_ADD_BOS,
192
197
  LLM_KV_TOKENIZER_ADD_EOS,
198
+ LLM_KV_TOKENIZER_ADD_SEP,
193
199
  LLM_KV_TOKENIZER_ADD_PREFIX,
194
200
  LLM_KV_TOKENIZER_REMOVE_EXTRA_WS,
195
201
  LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP,
196
202
  LLM_KV_TOKENIZER_HF_JSON,
197
203
  LLM_KV_TOKENIZER_RWKV,
198
204
  LLM_KV_TOKENIZER_CHAT_TEMPLATE,
199
- LLM_KV_TOKENIZER_CHAT_TEMPLATE_N,
200
205
  LLM_KV_TOKENIZER_FIM_PRE_ID,
201
206
  LLM_KV_TOKENIZER_FIM_SUF_ID,
202
207
  LLM_KV_TOKENIZER_FIM_MID_ID,
@@ -265,6 +270,22 @@ enum llm_tensor {
265
270
  LLM_TENSOR_LAYER_OUT_NORM,
266
271
  LLM_TENSOR_POST_ATTN_NORM,
267
272
  LLM_TENSOR_POST_MLP_NORM,
273
+ LLM_TENSOR_PER_LAYER_TOKEN_EMBD, // gemma3n
274
+ LLM_TENSOR_PER_LAYER_MODEL_PROJ, // gemma3n
275
+ LLM_TENSOR_PER_LAYER_INP_GATE, // gemma3n
276
+ LLM_TENSOR_PER_LAYER_PROJ, // gemma3n
277
+ LLM_TENSOR_PER_LAYER_PROJ_NORM, // gemma3n
278
+ LLM_TENSOR_PER_LAYER_POST_NORM, // gemma3n
279
+ LLM_TENSOR_ALTUP_PROJ, // gemma3n
280
+ LLM_TENSOR_ALTUP_UNEMBD_PROJ, // gemma3n
281
+ LLM_TENSOR_ALTUP_CORRECT_COEF, // gemma3n
282
+ LLM_TENSOR_ALTUP_CORRECT_SCALE, // gemma3n
283
+ LLM_TENSOR_ALTUP_PREDICT_COEF, // gemma3n
284
+ LLM_TENSOR_ALTUP_ROUTER, // gemma3n
285
+ LLM_TENSOR_ALTUP_ROUTER_NORM, // gemma3n
286
+ LLM_TENSOR_LAUREL_L, // gemma3n
287
+ LLM_TENSOR_LAUREL_R, // gemma3n
288
+ LLM_TENSOR_LAUREL_POST_NORM, // gemma3n
268
289
  LLM_TENSOR_SSM_IN,
269
290
  LLM_TENSOR_SSM_CONV1D,
270
291
  LLM_TENSOR_SSM_X,
@@ -437,3 +458,6 @@ const char * llm_arch_name(llm_arch arch);
437
458
  llm_arch llm_arch_from_string(const std::string & name);
438
459
 
439
460
  const llm_tensor_info & llm_tensor_info_for(llm_tensor tensor);
461
+
462
+ bool llm_arch_is_recurrent(const llm_arch & arch);
463
+ bool llm_arch_is_hybrid (const llm_arch & arch);