@novastera-oss/llamarn 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (190) hide show
  1. package/android/build.gradle +2 -1
  2. package/android/proguard-rules.pro +12 -0
  3. package/android/src/main/cpp/include/llama.h +15 -47
  4. package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
  5. package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
  6. package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
  7. package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
  8. package/android/src/main/jniLibs/armeabi-v7a/libggml-base.so +0 -0
  9. package/android/src/main/jniLibs/armeabi-v7a/libggml-cpu.so +0 -0
  10. package/android/src/main/jniLibs/armeabi-v7a/libggml.so +0 -0
  11. package/android/src/main/jniLibs/armeabi-v7a/libllama.so +0 -0
  12. package/android/src/main/jniLibs/x86/libggml-base.so +0 -0
  13. package/android/src/main/jniLibs/x86/libggml-cpu.so +0 -0
  14. package/android/src/main/jniLibs/x86/libggml.so +0 -0
  15. package/android/src/main/jniLibs/x86/libllama.so +0 -0
  16. package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
  17. package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
  18. package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
  19. package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
  20. package/cpp/build-info.cpp +2 -2
  21. package/cpp/llama.cpp/CMakePresets.json +11 -0
  22. package/cpp/llama.cpp/CODEOWNERS +1 -0
  23. package/cpp/llama.cpp/README.md +4 -3
  24. package/cpp/llama.cpp/common/arg.cpp +45 -1
  25. package/cpp/llama.cpp/common/common.cpp +22 -6
  26. package/cpp/llama.cpp/common/common.h +18 -4
  27. package/cpp/llama.cpp/convert_hf_to_gguf.py +500 -32
  28. package/cpp/llama.cpp/convert_hf_to_gguf_update.py +12 -13
  29. package/cpp/llama.cpp/ggml/CMakeLists.txt +6 -1
  30. package/cpp/llama.cpp/ggml/cmake/ggml-config.cmake.in +85 -47
  31. package/cpp/llama.cpp/ggml/include/ggml-webgpu.h +19 -0
  32. package/cpp/llama.cpp/ggml/src/CMakeLists.txt +1 -0
  33. package/cpp/llama.cpp/ggml/src/ggml-alloc.c +0 -15
  34. package/cpp/llama.cpp/ggml/src/ggml-backend-reg.cpp +7 -0
  35. package/cpp/llama.cpp/ggml/src/ggml-backend.cpp +8 -20
  36. package/cpp/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +3 -1
  37. package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +58 -3
  38. package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +130 -22
  39. package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +122 -16
  40. package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +5 -2
  41. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +1 -1
  42. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +109 -12
  43. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +3 -0
  44. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +88 -10
  45. package/cpp/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +343 -1094
  46. package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +3 -0
  47. package/cpp/llama.cpp/ggml/src/ggml-cpu/repack.cpp +0 -1
  48. package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.cpp +3 -0
  49. package/cpp/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +3 -3
  50. package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +14 -4
  51. package/cpp/llama.cpp/ggml/src/ggml-cuda/convert.cu +64 -17
  52. package/cpp/llama.cpp/ggml/src/ggml-cuda/cpy-utils.cuh +225 -0
  53. package/cpp/llama.cpp/ggml/src/ggml-cuda/cpy.cu +41 -301
  54. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-common.cuh +85 -67
  55. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-mma-f16.cuh +45 -62
  56. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f16.cu +28 -43
  57. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f32.cu +41 -56
  58. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f16.cuh +36 -47
  59. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f32.cuh +31 -43
  60. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu +22 -37
  61. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn.cu +3 -13
  62. package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +73 -23
  63. package/cpp/llama.cpp/ggml/src/ggml-cuda/im2col.cu +1 -1
  64. package/cpp/llama.cpp/ggml/src/ggml-cuda/mma.cuh +111 -3
  65. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cu +6 -4
  66. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cuh +1152 -689
  67. package/cpp/llama.cpp/ggml/src/ggml-cuda/norm.cu +92 -5
  68. package/cpp/llama.cpp/ggml/src/ggml-cuda/norm.cuh +2 -0
  69. package/cpp/llama.cpp/ggml/src/ggml-cuda/set-rows.cu +275 -0
  70. package/cpp/llama.cpp/ggml/src/ggml-cuda/set-rows.cuh +7 -0
  71. package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cu +7 -0
  72. package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cuh +2 -0
  73. package/cpp/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +13 -1
  74. package/cpp/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +2 -2
  75. package/cpp/llama.cpp/ggml/src/ggml-impl.h +16 -0
  76. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +13 -3
  77. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +407 -69
  78. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +380 -83
  79. package/cpp/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +18 -4
  80. package/cpp/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +2 -0
  81. package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +295 -2
  82. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/conv2d.cl +185 -0
  83. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/conv2d_f16_f32.cl +176 -0
  84. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/im2col_f16.cl +1 -1
  85. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/im2col_f32.cl +1 -1
  86. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/rms_norm.cl +79 -0
  87. package/cpp/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +4 -4
  88. package/cpp/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +14 -26
  89. package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +131 -46
  90. package/cpp/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +1 -1
  91. package/cpp/llama.cpp/ggml/src/ggml-sycl/quants.hpp +8 -9
  92. package/cpp/llama.cpp/ggml/src/ggml-sycl/set_rows.cpp +43 -43
  93. package/cpp/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +2 -6
  94. package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +287 -22
  95. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_mm.comp +265 -0
  96. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +1 -5
  97. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp +1 -1
  98. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp +1 -1
  99. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp +1 -1
  100. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp +1 -1
  101. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp +1 -1
  102. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/generic_binary_head.comp +2 -0
  103. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/glu_head.comp +2 -0
  104. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp +3 -8
  105. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +8 -2
  106. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.comp +1 -4
  107. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rte.comp +5 -0
  108. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +71 -16
  109. package/cpp/llama.cpp/ggml/src/ggml-webgpu/CMakeLists.txt +54 -0
  110. package/cpp/llama.cpp/ggml/src/ggml-webgpu/ggml-webgpu.cpp +907 -0
  111. package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/cpy.wgsl +60 -0
  112. package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +35 -0
  113. package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/memset.wgsl +40 -0
  114. package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.wgsl +56 -0
  115. package/cpp/llama.cpp/ggml/src/ggml.c +4 -6
  116. package/cpp/llama.cpp/gguf-py/gguf/constants.py +98 -0
  117. package/cpp/llama.cpp/gguf-py/gguf/metadata.py +4 -0
  118. package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_dump.py +24 -1
  119. package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +75 -52
  120. package/cpp/llama.cpp/include/llama.h +15 -7
  121. package/cpp/llama.cpp/models/templates/llama-cpp-rwkv-world.jinja +34 -0
  122. package/cpp/llama.cpp/models/templates/moonshotai-Kimi-K2.jinja +43 -0
  123. package/cpp/llama.cpp/requirements/requirements-all.txt +1 -0
  124. package/cpp/llama.cpp/requirements/requirements-server-bench.txt +5 -0
  125. package/cpp/llama.cpp/src/llama-arch.cpp +106 -0
  126. package/cpp/llama.cpp/src/llama-arch.h +5 -0
  127. package/cpp/llama.cpp/src/llama-batch.cpp +76 -70
  128. package/cpp/llama.cpp/src/llama-batch.h +24 -18
  129. package/cpp/llama.cpp/src/llama-chat.cpp +43 -1
  130. package/cpp/llama.cpp/src/llama-chat.h +2 -0
  131. package/cpp/llama.cpp/src/llama-context.cpp +180 -106
  132. package/cpp/llama.cpp/src/llama-context.h +26 -16
  133. package/cpp/llama.cpp/src/llama-cparams.h +3 -2
  134. package/cpp/llama.cpp/src/llama-graph.cpp +203 -39
  135. package/cpp/llama.cpp/src/llama-graph.h +147 -72
  136. package/cpp/llama.cpp/src/llama-hparams.cpp +40 -0
  137. package/cpp/llama.cpp/src/llama-hparams.h +10 -2
  138. package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.cpp +11 -5
  139. package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.h +3 -0
  140. package/cpp/llama.cpp/src/llama-kv-cache-unified.cpp +698 -302
  141. package/cpp/llama.cpp/src/llama-kv-cache-unified.h +89 -31
  142. package/cpp/llama.cpp/src/llama-memory-hybrid.cpp +1 -0
  143. package/cpp/llama.cpp/src/llama-memory-recurrent.cpp +16 -1
  144. package/cpp/llama.cpp/src/llama-model.cpp +1293 -312
  145. package/cpp/llama.cpp/src/llama-model.h +3 -4
  146. package/cpp/llama.cpp/src/llama-quant.cpp +1 -2
  147. package/cpp/llama.cpp/src/llama-vocab.cpp +363 -8
  148. package/cpp/llama.cpp/src/llama-vocab.h +2 -0
  149. package/cpp/llama.cpp/src/unicode.cpp +207 -0
  150. package/cpp/llama.cpp/src/unicode.h +2 -0
  151. package/ios/include/common.h +18 -4
  152. package/ios/include/llama.h +15 -7
  153. package/ios/libs/llama.xcframework/Info.plist +15 -15
  154. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  155. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5267 -5059
  156. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +15 -7
  157. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
  158. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  159. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5238 -5030
  160. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +4014 -3889
  161. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +15 -7
  162. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
  163. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  164. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5238 -5030
  165. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +4016 -3891
  166. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +15 -7
  167. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +15 -7
  168. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
  169. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +15 -7
  170. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
  171. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
  172. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  173. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5267 -5059
  174. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +15 -7
  175. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
  176. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  177. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5238 -5030
  178. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +4014 -3889
  179. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +15 -7
  180. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
  181. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  182. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5303 -5095
  183. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +15 -7
  184. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
  185. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  186. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5274 -5066
  187. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +4044 -3919
  188. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +15 -7
  189. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
  190. package/package.json +4 -4
@@ -669,6 +669,36 @@ class TextModel(ModelBase):
669
669
  # NOTE: if you get an error here, you need to update the convert_hf_to_gguf_update.py script
670
670
  # or pull the latest version of the model from Huggingface
671
671
  # don't edit the hashes manually!
672
+ if chkhsh == "b6e8e1518dc4305be2fe39c313ed643381c4da5db34a98f6a04c093f8afbe99b":
673
+ # ref: https://huggingface.co/THUDM/glm-4-9b-chat
674
+ res = "chatglm-bpe"
675
+ if chkhsh == "81d72c7348a9f0ebe86f23298d37debe0a5e71149e29bd283904c02262b27516":
676
+ # ref: https://huggingface.co/THUDM/glm-4-9b-chat
677
+ res = "chatglm-bpe"
678
+ if chkhsh == "a1336059768a55c99a734006ffb02203cd450fed003e9a71886c88acf24fdbc2":
679
+ # ref: https://huggingface.co/THUDM/glm-4-9b-hf
680
+ res = "glm4"
681
+ if chkhsh == "1431a23e583c97432bc230bff598d103ddb5a1f89960c8f1d1051aaa944d0b35":
682
+ # ref: https://huggingface.co/sapienzanlp/Minerva-7B-base-v1.0
683
+ res = "minerva-7b"
684
+ if chkhsh == "7e57df22b1fe23a7b1e1c7f3dc4e3f96d43a4eb0836d0c6bdc3436d7b2f1c664":
685
+ # ref: https://huggingface.co/tencent/Hunyuan-A13B-Instruct
686
+ res = "hunyuan"
687
+ if chkhsh == "a6b57017d60e6edb4d88ecc2845188e0eb333a70357e45dcc9b53964a73bbae6":
688
+ # ref: https://huggingface.co/tiiuae/Falcon-H1-0.5B-Base
689
+ res = "falcon-h1"
690
+ if chkhsh == "60476e1243776c4fb1b993dbd7a5f15ac22f83c80afdf425fa5ae01c8d44ef86":
691
+ # ref: https://huggingface.co/tiiuae/Falcon-H1-1B-Base
692
+ res = "falcon-h1"
693
+ if chkhsh == "3eda48b4c4dc7de733d1a8b3e3b4a85243dbbf704da2ee9d42c6beced8897896":
694
+ # ref: https://huggingface.co/tiiuae/Falcon-H1-7B-Base
695
+ res = "falcon-h1"
696
+ if chkhsh == "48f8e02c0359c0bbdd82f26909171fac1c18a457bb47573ed1fe3bbb2c1cfd4b":
697
+ # ref: https://huggingface.co/tiiuae/Falcon-H1-34B-Base
698
+ res = "falcon-h1"
699
+ if chkhsh == "81212dc7cdb7e0c1074ca62c5aeab0d43c9f52b8a737be7b12a777c953027890":
700
+ # ref: https://huggingface.co/moonshotai/Kimi-K2-Base
701
+ res = "kimi-k2"
672
702
  if chkhsh == "0ef9807a4087ebef797fc749390439009c3b9eda9ad1a097abbe738f486c01e5":
673
703
  # ref: https://huggingface.co/meta-llama/Meta-Llama-3-8B
674
704
  res = "llama-bpe"
@@ -804,42 +834,18 @@ class TextModel(ModelBase):
804
834
  if chkhsh == "d5f1dd6f980fec569fb218a81a7658ac45fc56b38c5a0adeb1c232fbe04ef5ec":
805
835
  # ref: https://huggingface.co/ByteDance-Seed/Seed-Coder-8B-Base
806
836
  res = "seed-coder"
807
- if chkhsh == "b6e8e1518dc4305be2fe39c313ed643381c4da5db34a98f6a04c093f8afbe99b":
808
- # ref: https://huggingface.co/THUDM/glm-4-9b-chat
809
- res = "chatglm-bpe"
810
- if chkhsh == "81d72c7348a9f0ebe86f23298d37debe0a5e71149e29bd283904c02262b27516":
811
- # ref: https://huggingface.co/THUDM/glm-4-9b-chat
812
- res = "chatglm-bpe"
813
- if chkhsh == "a1336059768a55c99a734006ffb02203cd450fed003e9a71886c88acf24fdbc2":
814
- # ref: https://huggingface.co/THUDM/glm-4-9b-hf
815
- res = "glm4"
816
- if chkhsh == "1431a23e583c97432bc230bff598d103ddb5a1f89960c8f1d1051aaa944d0b35":
817
- # ref: https://huggingface.co/sapienzanlp/Minerva-7B-base-v1.0
818
- res = "minerva-7b"
819
- if chkhsh == "7e57df22b1fe23a7b1e1c7f3dc4e3f96d43a4eb0836d0c6bdc3436d7b2f1c664":
820
- # ref: https://huggingface.co/tencent/Hunyuan-A13B-Instruct
821
- res = "hunyuan"
822
837
  if chkhsh == "b0a6b1c0bd5998ebd9df08611efde34a4ff03faed45ae09c43e6b31ebd4b94cf":
823
838
  # ref: https://huggingface.co/skt/A.X-4.0
824
839
  res = "a.x-4.0"
825
- if chkhsh == "a6b57017d60e6edb4d88ecc2845188e0eb333a70357e45dcc9b53964a73bbae6":
826
- # ref: https://huggingface.co/tiiuae/Falcon-H1-0.5B-Base
827
- res = "falcon-h1"
828
- if chkhsh == "60476e1243776c4fb1b993dbd7a5f15ac22f83c80afdf425fa5ae01c8d44ef86":
829
- # ref: https://huggingface.co/tiiuae/Falcon-H1-1B-Base
830
- res = "falcon-h1"
831
- if chkhsh == "3eda48b4c4dc7de733d1a8b3e3b4a85243dbbf704da2ee9d42c6beced8897896":
832
- # ref: https://huggingface.co/tiiuae/Falcon-H1-7B-Base
833
- res = "falcon-h1"
834
- if chkhsh == "48f8e02c0359c0bbdd82f26909171fac1c18a457bb47573ed1fe3bbb2c1cfd4b":
835
- # ref: https://huggingface.co/tiiuae/Falcon-H1-34B-Base
836
- res = "falcon-h1"
837
840
  if chkhsh == "f6791d196f87ce6b56a7d234be618e0d58f8cda3549416635b2bebcd22cd95c4":
838
841
  # ref: https://huggingface.co/K-intelligence/Midm-2.0-Base-Instruct
839
842
  res = "midm-2.0"
840
843
  if chkhsh == "169bf0296a13c4d9b7672313f749eb36501d931022de052aad6e36f2bf34dd51":
841
844
  # ref: https://huggingface.co/LiquidAI/LFM2-Tokenizer
842
845
  res = "lfm2"
846
+ if chkhsh == "2085e1638f6c377a0aa4ead21b27bb4cb941bf800df86ed391011769c1758dfb":
847
+ # ref: https://huggingface.co/LGAI-EXAONE/EXAONE-4.0-32B
848
+ res = "exaone4"
843
849
 
844
850
  if res is None:
845
851
  logger.warning("\n")
@@ -1082,7 +1088,14 @@ class TextModel(ModelBase):
1082
1088
  self.gguf_writer.add_token_list(tokens)
1083
1089
  self.gguf_writer.add_token_types(toktypes)
1084
1090
  special_vocab = gguf.SpecialVocab(self.dir_model, load_merges=False)
1085
- special_vocab.chat_template = "rwkv-world"
1091
+ if special_vocab.chat_template is None:
1092
+ template_path = Path(__file__).parent / "models" / "templates" / "llama-cpp-rwkv-world.jinja"
1093
+ if template_path.is_file():
1094
+ with open(template_path, "r", encoding="utf-8") as f:
1095
+ template = f.read()
1096
+ else:
1097
+ template = "rwkv-world"
1098
+ special_vocab.chat_template = template
1086
1099
  # hack: Add '\n\n' as the EOT token to make it chat normally
1087
1100
  special_vocab._set_special_token("eot", 261)
1088
1101
  # hack: Override these as they have already been set (incorrectly)
@@ -2768,6 +2781,76 @@ class Qwen2Model(TextModel):
2768
2781
  yield from super().modify_tensors(data_torch, name, bid)
2769
2782
 
2770
2783
 
2784
+ @ModelBase.register("DreamModel")
2785
+ class DreamModel(TextModel):
2786
+ model_arch = gguf.MODEL_ARCH.DREAM
2787
+
2788
+ def get_vocab_base(self) -> tuple[list[str], list[int], str]:
2789
+ tokens: list[str] = []
2790
+ toktypes: list[int] = []
2791
+
2792
+ from transformers import AutoTokenizer
2793
+ tokenizer = AutoTokenizer.from_pretrained(self.dir_model, trust_remote_code=True)
2794
+
2795
+ vocab_dict = tokenizer.get_vocab()
2796
+ vocab_size = self.hparams.get("vocab_size", len(vocab_dict))
2797
+ assert max(vocab_dict.values()) < vocab_size
2798
+
2799
+ tokpre = self.get_vocab_base_pre(tokenizer)
2800
+
2801
+ reverse_vocab = {id_: encoded_tok for encoded_tok, id_ in vocab_dict.items()}
2802
+ added_vocab = tokenizer.get_added_vocab()
2803
+
2804
+ for i in range(vocab_size):
2805
+ if i not in reverse_vocab:
2806
+ tokens.append(f"[PAD{i}]")
2807
+ toktypes.append(gguf.TokenType.UNUSED)
2808
+ elif reverse_vocab[i] in added_vocab:
2809
+ tokens.append(reverse_vocab[i])
2810
+ # Check if it's a special token - treat special tokens as CONTROL tokens
2811
+ if hasattr(tokenizer, 'added_tokens_decoder') and i in tokenizer.added_tokens_decoder:
2812
+ if tokenizer.added_tokens_decoder[i].special:
2813
+ toktypes.append(gguf.TokenType.CONTROL)
2814
+ else:
2815
+ toktypes.append(gguf.TokenType.USER_DEFINED)
2816
+ else:
2817
+ # Fallback: treat all added vocab as control tokens for special tokens like <|im_start|>
2818
+ toktypes.append(gguf.TokenType.CONTROL)
2819
+ else:
2820
+ tokens.append(reverse_vocab[i])
2821
+ toktypes.append(gguf.TokenType.NORMAL)
2822
+
2823
+ return tokens, toktypes, tokpre
2824
+
2825
+ def set_vocab(self):
2826
+ try:
2827
+ self._set_vocab_sentencepiece()
2828
+ except FileNotFoundError:
2829
+ self._set_vocab_gpt2()
2830
+
2831
+ def set_gguf_parameters(self):
2832
+ super().set_gguf_parameters()
2833
+ self._try_set_pooling_type()
2834
+
2835
+ # Dream models use non-causal attention for diffusion
2836
+ self.gguf_writer.add_causal_attention(False)
2837
+ # Handle RoPE scaling similar to Qwen2
2838
+ rope_scaling = self.hparams.get("rope_scaling") or {}
2839
+ if rope_scaling.get("rope_type", rope_scaling.get("type")) == "yarn" and "factor" in rope_scaling:
2840
+ self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.YARN)
2841
+ self.gguf_writer.add_rope_scaling_factor(rope_scaling["factor"])
2842
+ self.gguf_writer.add_rope_scaling_orig_ctx_len(rope_scaling["original_max_position_embeddings"])
2843
+
2844
+ # Add Dream-specific parameters
2845
+ mask_token_id = self.hparams.get("mask_token_id")
2846
+ if mask_token_id is not None:
2847
+ self.gguf_writer.add_mask_token_id(mask_token_id)
2848
+
2849
+ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
2850
+ # Dream model tensors should be mapped directly since it's the base model
2851
+ yield from super().modify_tensors(data_torch, name, bid)
2852
+
2853
+
2771
2854
  @ModelBase.register("Ernie4_5_ForCausalLM")
2772
2855
  class Ernie4_5Model(TextModel):
2773
2856
  model_arch = gguf.MODEL_ARCH.ERNIE4_5
@@ -2781,7 +2864,8 @@ class Ernie4_5Model(TextModel):
2781
2864
  def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
2782
2865
  num_heads = self.hparams["num_attention_heads"]
2783
2866
  num_kv_heads = self.hparams["num_key_value_heads"]
2784
- head_dim = self.hparams["head_dim"]
2867
+ if (head_dim := self.hparams.get("head_dim")) is None:
2868
+ head_dim = self.hparams["hidden_size"] // num_heads
2785
2869
 
2786
2870
  if "ernie." in name:
2787
2871
  name = name.replace("ernie.", "model.")
@@ -2814,6 +2898,93 @@ class Ernie4_5Model(TextModel):
2814
2898
  return [(self.map_tensor_name(name), data_torch)]
2815
2899
 
2816
2900
 
2901
+ @ModelBase.register("Ernie4_5_MoeForCausalLM")
2902
+ class Ernie4_5MoeModel(Ernie4_5Model):
2903
+ model_arch = gguf.MODEL_ARCH.ERNIE4_5_MOE
2904
+ _experts: list[dict[str, Tensor]] | None = None
2905
+
2906
+ def __init__(self, *args, **kwargs):
2907
+ super().__init__(*args, **kwargs)
2908
+ self._experts = [{} for _ in range(self.block_count)]
2909
+
2910
+ def set_gguf_parameters(self):
2911
+ super().set_gguf_parameters()
2912
+ self.gguf_writer.add_expert_count(self.hparams["moe_num_experts"])
2913
+ self.gguf_writer.add_expert_used_count(self.hparams["moe_k"])
2914
+ self.gguf_writer.add_interleave_moe_layer_step(self.hparams["moe_layer_interval"])
2915
+ self.gguf_writer.add_leading_dense_block_count(self.hparams["moe_layer_start_index"])
2916
+ if (moe_intermediate_size := self.hparams.get("moe_intermediate_size")) is not None:
2917
+ self.gguf_writer.add_expert_feed_forward_length(moe_intermediate_size)
2918
+ if (shared_expert_count := self.hparams.get('moe_num_shared_experts')) is not None:
2919
+ self.gguf_writer.add_expert_shared_count(shared_expert_count)
2920
+ if shared_expert_count > 0 and (shared_expert_intermediate_size := self.hparams.get('intermediate_size')) is not None and (num_key_value_heads := self.hparams.get('num_key_value_heads')) is not None:
2921
+ self.gguf_writer.add_expert_shared_feed_forward_length(shared_expert_intermediate_size // num_key_value_heads)
2922
+
2923
+ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
2924
+ # Modify correction bias name as in DeepseekV2
2925
+ if name.endswith("e_score_correction_bias"):
2926
+ name = name.replace("e_score_correction_bias", "e_score_correction.bias")
2927
+
2928
+ # skip Multi-Token Prediction (MTP) layers (again, same as DeepseekV2)
2929
+ match = re.match(r"model.mtp_block.(\d+)", name)
2930
+ if match:
2931
+ return []
2932
+
2933
+ # skip all other MTP tensors for now
2934
+ match = re.match(r"model.mtp_emb_norm.(\d+)", name)
2935
+ if match:
2936
+ return []
2937
+
2938
+ match = re.match(r"model.mtp_hidden_norm.(\d+)", name)
2939
+ if match:
2940
+ return []
2941
+
2942
+ match = re.match(r"model.mtp_linear_proj.(\d+)", name)
2943
+ if match:
2944
+ return []
2945
+
2946
+ # process the experts separately
2947
+ if name.find("mlp.experts") != -1:
2948
+ n_experts = self.hparams["moe_num_experts"]
2949
+ assert bid is not None
2950
+
2951
+ if self._experts is None:
2952
+ self._experts = [{} for _ in range(self.block_count)]
2953
+
2954
+ self._experts[bid][name] = data_torch
2955
+
2956
+ if len(self._experts[bid]) >= n_experts * 3:
2957
+ tensors: list[tuple[str, Tensor]] = []
2958
+
2959
+ # merge the experts into a single 3d tensor
2960
+ for w_name in ["gate_proj", "up_proj", "down_proj"]:
2961
+ datas: list[Tensor] = []
2962
+
2963
+ for xid in range(n_experts):
2964
+ ename_to_retrieve = f"model.layers.{bid}.mlp.experts.{xid}.{w_name}.weight"
2965
+ datas.append(self._experts[bid][ename_to_retrieve])
2966
+ del self._experts[bid][ename_to_retrieve]
2967
+
2968
+ data_torch = torch.stack(datas, dim=0)
2969
+ merged_name = f"model.layers.{bid}.mlp.experts.{w_name}.weight"
2970
+ new_name = self.map_tensor_name(merged_name)
2971
+ tensors.append((new_name, data_torch))
2972
+
2973
+ return tensors
2974
+ else:
2975
+ return []
2976
+ return [(self.map_tensor_name(name), data_torch)]
2977
+
2978
+ def prepare_tensors(self):
2979
+ super().prepare_tensors()
2980
+
2981
+ if self._experts is not None:
2982
+ # flatten `list[dict[str, Tensor]]` into `list[str]`
2983
+ experts = [k for d in self._experts for k in d.keys()]
2984
+ if len(experts) > 0:
2985
+ raise ValueError(f"Unprocessed experts: {experts}")
2986
+
2987
+
2817
2988
  @ModelBase.register(
2818
2989
  "Qwen2VLModel",
2819
2990
  "Qwen2VLForConditionalGeneration",
@@ -3501,6 +3672,175 @@ class PlamoModel(TextModel):
3501
3672
  return [(new_name, data_torch)]
3502
3673
 
3503
3674
 
3675
+ @ModelBase.register("Plamo2ForCausalLM", "PLaMo2ForCausalLM")
3676
+ class Plamo2Model(TextModel):
3677
+ model_arch = gguf.MODEL_ARCH.PLAMO2
3678
+
3679
+ def set_vocab(self):
3680
+ # PLaMo 2 uses a custom tokenizer with a .jsonl file
3681
+ # We need to handle this specially
3682
+ tokenizer_jsonl_path = self.dir_model / "tokenizer.jsonl"
3683
+ tokenizer_config_path = self.dir_model / "tokenizer_config.json"
3684
+
3685
+ if not tokenizer_jsonl_path.is_file():
3686
+ raise FileNotFoundError(f"PLaMo 2 tokenizer file not found: {tokenizer_jsonl_path}")
3687
+
3688
+ # Load tokenizer config
3689
+ with open(tokenizer_config_path, 'r', encoding='utf-8') as f:
3690
+ tokenizer_config = json.load(f)
3691
+
3692
+ # Load tokens from JSONL file (actually a list format)
3693
+ tokens = []
3694
+ scores = []
3695
+ toktypes = []
3696
+
3697
+ with open(tokenizer_jsonl_path, 'r', encoding='utf-8') as f:
3698
+ for line_num, line in enumerate(f):
3699
+ if line.strip():
3700
+ token_data = json.loads(line)
3701
+ # Format: [token, score, type, ?, ?, ?, ?]
3702
+ token = token_data[0].encode("utf-8")
3703
+ score = float(token_data[1])
3704
+ token_type_str = token_data[2] if len(token_data) > 2 else "NORMAL"
3705
+
3706
+ tokens.append(token)
3707
+ scores.append(score)
3708
+
3709
+ # Map token type strings to GGUF token types
3710
+ if token_type_str == "UNKNOWN":
3711
+ toktypes.append(gguf.TokenType.UNKNOWN)
3712
+ elif token_type_str == "CONTROL":
3713
+ toktypes.append(gguf.TokenType.CONTROL)
3714
+ elif token_type_str == "BYTE":
3715
+ toktypes.append(gguf.TokenType.BYTE)
3716
+ else:
3717
+ # Check for PLaMo-2 special tokens
3718
+ token_str = token_data[0]
3719
+ if token_str.startswith("<|plamo:") and token_str.endswith("|>"):
3720
+ toktypes.append(gguf.TokenType.CONTROL)
3721
+ else:
3722
+ toktypes.append(gguf.TokenType.NORMAL)
3723
+
3724
+ vocab_size = self.hparams["vocab_size"]
3725
+ if vocab_size > len(tokens):
3726
+ pad_count = vocab_size - len(tokens)
3727
+ logger.debug(f"Padding vocab with {pad_count} token(s) - [PAD1] through [PAD{pad_count}]")
3728
+ for i in range(1, pad_count + 1):
3729
+ tokens.append(bytes(f"[PAD{i}]", encoding="utf-8"))
3730
+ scores.append(-1000.0)
3731
+ toktypes.append(gguf.TokenType.UNUSED)
3732
+
3733
+ # Use "plamo2" tokenizer type for PLaMo-2's custom Aho-Corasick tokenizer
3734
+ self.gguf_writer.add_tokenizer_model("plamo2")
3735
+ self.gguf_writer.add_tokenizer_pre("default")
3736
+ self.gguf_writer.add_token_list(tokens)
3737
+ self.gguf_writer.add_token_scores(scores)
3738
+ self.gguf_writer.add_token_types(toktypes)
3739
+
3740
+ # Add special tokens from config
3741
+ if "bos_token" in tokenizer_config and tokenizer_config["bos_token"] is not None:
3742
+ token_id = tokens.index(tokenizer_config["bos_token"].encode("utf-8"))
3743
+ self.gguf_writer.add_bos_token_id(token_id)
3744
+ if "eos_token" in tokenizer_config and tokenizer_config["eos_token"] is not None:
3745
+ token_id = tokens.index(tokenizer_config["eos_token"].encode("utf-8"))
3746
+ self.gguf_writer.add_eos_token_id(token_id)
3747
+ if "pad_token" in tokenizer_config and tokenizer_config["pad_token"] is not None:
3748
+ token_id = tokens.index(tokenizer_config["pad_token"].encode("utf-8"))
3749
+ self.gguf_writer.add_pad_token_id(token_id)
3750
+ if "sep_token" in tokenizer_config and tokenizer_config["sep_token"] is not None:
3751
+ token_id = tokens.index(tokenizer_config["sep_token"].encode("utf-8"))
3752
+ self.gguf_writer.add_sep_token_id(token_id)
3753
+ if "unk_token" in tokenizer_config and tokenizer_config["unk_token"] is not None:
3754
+ token_id = tokens.index(tokenizer_config["unk_token"].encode("utf-8"))
3755
+ self.gguf_writer.add_unk_token_id(token_id)
3756
+
3757
+ # Add <|plamo:op|> as EOT to ensure appropriate end of generation
3758
+ self.gguf_writer.add_eot_token_id(4)
3759
+
3760
+ self.gguf_writer.add_add_space_prefix(False)
3761
+
3762
+ def set_gguf_parameters(self):
3763
+ hparams = self.hparams
3764
+ block_count = hparams["num_hidden_layers"]
3765
+ self.gguf_writer.add_vocab_size(self.hparams["vocab_size"])
3766
+
3767
+ # Which layers are Mamba layers
3768
+ # PLaMo 2 uses mamba_step to indicate the pattern (e.g., 2 means every other layer)
3769
+ # This logic matches modeling_plamo.py's is_mamba function
3770
+ mamba_step = hparams.get("mamba_step", 2)
3771
+ mamba_enabled = hparams.get("mamba_enabled", True)
3772
+ mamba_layers = []
3773
+
3774
+ if mamba_enabled:
3775
+ for i in range(block_count):
3776
+ if block_count <= (mamba_step // 2):
3777
+ # use attention in last layer
3778
+ is_mamba = (i != block_count - 1)
3779
+ else:
3780
+ is_mamba = (i % mamba_step) != (mamba_step // 2)
3781
+ if is_mamba:
3782
+ mamba_layers.append(0)
3783
+ else:
3784
+ mamba_layers.append(hparams.get("num_key_value_heads", 4))
3785
+
3786
+ if mamba_layers:
3787
+ self.gguf_writer.add_head_count_kv(mamba_layers)
3788
+
3789
+ self.gguf_writer.add_context_length(hparams.get("max_position_embeddings", 2048))
3790
+ self.gguf_writer.add_embedding_length(hparams.get("hidden_size", 4096))
3791
+ self.gguf_writer.add_block_count(block_count)
3792
+ self.gguf_writer.add_head_count(hparams.get("num_attention_heads", 32))
3793
+ self.gguf_writer.add_layer_norm_rms_eps(hparams.get("rms_norm_eps", 1e-06))
3794
+ self.gguf_writer.add_rope_freq_base(hparams.get("rope_theta", 10000))
3795
+
3796
+ # Mamba parameters
3797
+ self.gguf_writer.add_ssm_state_size(hparams.get("mamba_d_state", 64))
3798
+ self.gguf_writer.add_ssm_conv_kernel(hparams.get("mamba_d_conv", 4))
3799
+ self.gguf_writer.add_ssm_time_step_rank(hparams.get("mamba_num_heads", 64))
3800
+ intermediate_size = hparams.get("mamba_num_heads", 64) * hparams.get("hidden_size_per_head", 128)
3801
+ self.gguf_writer.add_ssm_inner_size(intermediate_size)
3802
+ self.gguf_writer.add_ssm_group_count(0)
3803
+
3804
+ # MLP feed forward parameters (for attention layers)
3805
+ self.gguf_writer.add_feed_forward_length(hparams.get("intermediate_size", 13312))
3806
+ self.gguf_writer.add_file_type(self.ftype)
3807
+
3808
+ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
3809
+ del bid # unused
3810
+
3811
+ if name.endswith(".A_log"):
3812
+ data_torch = -torch.exp(data_torch)
3813
+ elif name.endswith(".dt_bias"):
3814
+ name = name.rpartition(".dt_bias")[0] + ".dt_proj.bias"
3815
+ elif name.endswith(".dt_norm_weight"):
3816
+ name = name.rpartition(".dt_norm_weight")[0] + ".dt_norm.weight"
3817
+ elif name.endswith(".B_norm_weight"):
3818
+ name = name.rpartition(".B_norm_weight")[0] + ".B_norm.weight"
3819
+ elif name.endswith(".C_norm_weight"):
3820
+ name = name.rpartition(".C_norm_weight")[0] + ".C_norm.weight"
3821
+ elif name.endswith(".k_weight"):
3822
+ name = name.rpartition(".k_weight")[0] + ".k.weight"
3823
+ elif name.endswith(".q_weight"):
3824
+ name = name.rpartition(".q_weight")[0] + ".q.weight"
3825
+ elif name.endswith(".conv1d.weight"):
3826
+ data_torch = torch.squeeze(data_torch) # remove (, 1, )
3827
+ assert data_torch.ndim == 2
3828
+ elif name.endswith(".pre_mixer_norm.weight"):
3829
+ data_torch += 1.0
3830
+ elif name.endswith(".post_mixer_norm.weight"):
3831
+ data_torch += 1.0 / 5
3832
+ elif name.endswith(".pre_mlp_norm.weight"):
3833
+ data_torch += 1.0
3834
+ elif name.endswith(".post_mlp_norm.weight"):
3835
+ data_torch += 1.0 / (5**1.5)
3836
+ elif name.endswith(".norm.weight"):
3837
+ data_torch += 1.0
3838
+
3839
+ new_name = self.map_tensor_name(name)
3840
+
3841
+ return [(new_name, data_torch)]
3842
+
3843
+
3504
3844
  @ModelBase.register("CodeShellForCausalLM")
3505
3845
  class CodeShellModel(TextModel):
3506
3846
  model_arch = gguf.MODEL_ARCH.CODESHELL
@@ -5563,7 +5903,58 @@ class DeepseekV2Model(TextModel):
5563
5903
  model_arch = gguf.MODEL_ARCH.DEEPSEEK2
5564
5904
 
5565
5905
  def set_vocab(self):
5566
- self._set_vocab_gpt2()
5906
+ try:
5907
+ self._set_vocab_gpt2()
5908
+ return
5909
+ except Exception:
5910
+ pass
5911
+
5912
+ from transformers import AutoTokenizer
5913
+ tokenizer = AutoTokenizer.from_pretrained(self.dir_model, trust_remote_code=True)
5914
+ tokpre = self.get_vocab_base_pre(tokenizer)
5915
+
5916
+ if tokpre == "kimi-k2":
5917
+ # Build merges list using the approach similar to HunYuanMoE
5918
+ merges = []
5919
+ vocab = {}
5920
+ mergeable_ranks = tokenizer.model._mergeable_ranks
5921
+ for token, rank in mergeable_ranks.items():
5922
+ vocab[QwenModel.token_bytes_to_string(token)] = rank
5923
+ if len(token) == 1:
5924
+ continue
5925
+ merged = QwenModel.bpe(mergeable_ranks, token, max_rank=rank)
5926
+ if len(merged) == 2:
5927
+ merges.append(' '.join(map(QwenModel.token_bytes_to_string, merged)))
5928
+
5929
+ # Build token list
5930
+ vocab_size = self.hparams["vocab_size"]
5931
+ special_tokens = tokenizer.special_tokens
5932
+ reverse_vocab = {id_ : encoded_tok for encoded_tok, id_ in {**vocab, **special_tokens}.items()}
5933
+ tokens: list[str] = []
5934
+ toktypes: list[int] = []
5935
+
5936
+ for i in range(vocab_size):
5937
+ if i not in reverse_vocab:
5938
+ tokens.append(f"[PAD{i}]")
5939
+ toktypes.append(gguf.TokenType.UNUSED)
5940
+ else:
5941
+ token = reverse_vocab[i]
5942
+ tokens.append(token)
5943
+ if i in special_tokens.values():
5944
+ toktypes.append(gguf.TokenType.CONTROL)
5945
+ else:
5946
+ toktypes.append(gguf.TokenType.NORMAL)
5947
+
5948
+ self.gguf_writer.add_tokenizer_model("gpt2")
5949
+ self.gguf_writer.add_tokenizer_pre(tokpre)
5950
+ self.gguf_writer.add_token_list(tokens)
5951
+ self.gguf_writer.add_token_types(toktypes)
5952
+ self.gguf_writer.add_token_merges(merges)
5953
+
5954
+ special_vocab = gguf.SpecialVocab(self.dir_model, load_merges=False)
5955
+ special_vocab.add_to_gguf(self.gguf_writer)
5956
+ else:
5957
+ raise NotImplementedError(f"Deepseek pre-tokenizer {tokpre!r} is not supported yet!")
5567
5958
 
5568
5959
  def set_gguf_parameters(self):
5569
5960
 
@@ -6095,7 +6486,7 @@ class JaisModel(TextModel):
6095
6486
  self.gguf_writer.add_max_alibi_bias(self.max_alibi_bias)
6096
6487
 
6097
6488
 
6098
- @ModelBase.register("Glm4ForCausalLM")
6489
+ @ModelBase.register("Glm4ForCausalLM", "Glm4vForConditionalGeneration")
6099
6490
  class Glm4Model(TextModel):
6100
6491
  model_arch = gguf.MODEL_ARCH.GLM4
6101
6492
 
@@ -6117,7 +6508,8 @@ class Glm4Model(TextModel):
6117
6508
 
6118
6509
  def set_gguf_parameters(self):
6119
6510
  super().set_gguf_parameters()
6120
- rope_dim = self.hparams["head_dim"]
6511
+ if (rope_dim := self.hparams.get("head_dim")) is None:
6512
+ rope_dim = self.hparams["hidden_size"] // self.hparams["num_attention_heads"]
6121
6513
  self.gguf_writer.add_rope_dimension_count(int(rope_dim * self.hparams.get("partial_rotary_factor", 0.5)))
6122
6514
  rope_scaling = self.hparams.get("rope_scaling") or {}
6123
6515
  if rope_scaling.get("rope_type", rope_scaling.get("type")) == "yarn" and "factor" in rope_scaling:
@@ -6125,6 +6517,13 @@ class Glm4Model(TextModel):
6125
6517
  self.gguf_writer.add_rope_scaling_factor(rope_scaling["factor"])
6126
6518
  self.gguf_writer.add_rope_scaling_orig_ctx_len(rope_scaling["original_max_position_embeddings"])
6127
6519
 
6520
+ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
6521
+ if name.startswith("model.visual."): # ignore visual part of Glm4v
6522
+ return []
6523
+ elif name.startswith("model.language_model."):
6524
+ name = name.replace("language_model.", "") # for Glm4v
6525
+ return super().modify_tensors(data_torch, name, bid)
6526
+
6128
6527
 
6129
6528
  @ModelBase.register("GlmForCausalLM", "ChatGLMModel", "ChatGLMForConditionalGeneration")
6130
6529
  class ChatGLMModel(TextModel):
@@ -6392,6 +6791,75 @@ class ExaoneModel(TextModel):
6392
6791
  yield (self.format_tensor_name(gguf.MODEL_TENSOR.ROPE_FREQS), torch.tensor(rope_factors, dtype=torch.float32))
6393
6792
 
6394
6793
 
6794
+ @ModelBase.register("Exaone4ForCausalLM")
6795
+ class Exaone4Model(TextModel):
6796
+ model_arch = gguf.MODEL_ARCH.EXAONE4
6797
+
6798
+ def set_vocab(self):
6799
+ tokens, toktypes, tokpre = self.get_vocab_base()
6800
+ self.gguf_writer.add_tokenizer_model("gpt2")
6801
+ self.gguf_writer.add_tokenizer_pre(tokpre)
6802
+ self.gguf_writer.add_token_list(tokens)
6803
+ self.gguf_writer.add_token_types(toktypes)
6804
+
6805
+ special_vocab = gguf.SpecialVocab(self.dir_model, load_merges=True)
6806
+ special_vocab.add_to_gguf(self.gguf_writer)
6807
+
6808
+ def set_gguf_parameters(self):
6809
+ super().set_gguf_parameters()
6810
+ hparams = self.hparams
6811
+ self.gguf_writer.add_vocab_size(hparams["vocab_size"])
6812
+
6813
+ if hparams.get("sliding_window") is not None:
6814
+ self.gguf_writer.add_sliding_window(hparams["sliding_window"])
6815
+ if "layer_types" in hparams:
6816
+ self.gguf_writer.add_sliding_window_pattern([t == "sliding_attention" for t in hparams["layer_types"]])
6817
+ elif "sliding_window_pattern" in hparams:
6818
+ sliding_window_pattern = []
6819
+ if isinstance(hparams["sliding_window_pattern"], str): # e.g. LLLG
6820
+ for i in range(hparams["num_hidden_layers"]):
6821
+ sliding_window_pattern.append(hparams["sliding_window_pattern"][i % len(hparams["sliding_window_pattern"])] == "L")
6822
+ if isinstance(hparams["sliding_window_pattern"], int): # e.g. 4
6823
+ for i in range(hparams["num_hidden_layers"]):
6824
+ sliding_window_pattern.append((i + 1) % hparams["sliding_window_pattern"] != 0)
6825
+ if len(sliding_window_pattern) == hparams["num_hidden_layers"]:
6826
+ self.gguf_writer.add_sliding_window_pattern(sliding_window_pattern)
6827
+
6828
+ rope_scaling = self.hparams.get("rope_scaling") or {}
6829
+ if rope_scaling.get("rope_type", rope_scaling.get("type")) == "linear" and "factor" in rope_scaling:
6830
+ self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.LINEAR)
6831
+ self.gguf_writer.add_rope_scaling_factor(rope_scaling["factor"])
6832
+
6833
+ def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
6834
+ if rope_scaling := self.find_hparam(["rope_scaling"], optional=True):
6835
+ if rope_scaling.get("rope_type", '').lower() == "llama3":
6836
+ base = self.hparams.get("rope_theta", 10_000.0)
6837
+ if (dim := self.hparams.get("head_dim")) is None:
6838
+ dim = self.hparams["hidden_size"] // self.hparams["num_attention_heads"]
6839
+ freqs = 1.0 / (base ** (torch.arange(0, dim, 2, dtype=torch.float32) / dim))
6840
+
6841
+ factor = rope_scaling.get("factor", 16.0)
6842
+ low_freq_factor = rope_scaling.get("low_freq_factor", 1.0)
6843
+ high_freq_factor = rope_scaling.get("high_freq_factor", 4.0)
6844
+ old_context_len = self.hparams.get("original_max_position_embeddings", 8192)
6845
+
6846
+ low_freq_wavelen = old_context_len / low_freq_factor
6847
+ high_freq_wavelen = old_context_len / high_freq_factor
6848
+
6849
+ rope_factors = []
6850
+ for freq in freqs:
6851
+ wavelen = 2 * math.pi / freq
6852
+ if wavelen < high_freq_wavelen:
6853
+ rope_factors.append(1)
6854
+ elif wavelen > low_freq_wavelen:
6855
+ rope_factors.append(factor)
6856
+ else:
6857
+ smooth = (old_context_len / wavelen - low_freq_factor) / (high_freq_factor - low_freq_factor)
6858
+ rope_factors.append(1 / ((1 - smooth) / factor + smooth))
6859
+
6860
+ yield (self.format_tensor_name(gguf.MODEL_TENSOR.ROPE_FREQS), torch.tensor(rope_factors, dtype=torch.float32))
6861
+
6862
+
6395
6863
  @ModelBase.register("GraniteForCausalLM")
6396
6864
  class GraniteModel(LlamaModel):
6397
6865
  """Conversion for IBM's GraniteForCausalLM"""