@novastera-oss/llamarn 0.2.9 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/build.gradle +2 -1
- package/android/proguard-rules.pro +12 -0
- package/android/src/main/cpp/include/llama.h +15 -47
- package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libggml-base.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libggml.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libllama.so +0 -0
- package/android/src/main/jniLibs/x86/libggml-base.so +0 -0
- package/android/src/main/jniLibs/x86/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/x86/libggml.so +0 -0
- package/android/src/main/jniLibs/x86/libllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
- package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
- package/cpp/build-info.cpp +2 -2
- package/cpp/llama.cpp/CMakeLists.txt +0 -1
- package/cpp/llama.cpp/CMakePresets.json +11 -0
- package/cpp/llama.cpp/CODEOWNERS +1 -0
- package/cpp/llama.cpp/README.md +8 -8
- package/cpp/llama.cpp/build-xcframework.sh +1 -1
- package/cpp/llama.cpp/common/CMakeLists.txt +4 -5
- package/cpp/llama.cpp/common/arg.cpp +62 -1
- package/cpp/llama.cpp/common/chat.cpp +37 -20
- package/cpp/llama.cpp/common/chat.h +2 -0
- package/cpp/llama.cpp/common/common.cpp +22 -6
- package/cpp/llama.cpp/common/common.h +22 -4
- package/cpp/llama.cpp/convert_hf_to_gguf.py +1250 -43
- package/cpp/llama.cpp/convert_hf_to_gguf_update.py +21 -13
- package/cpp/llama.cpp/ggml/CMakeLists.txt +13 -3
- package/cpp/llama.cpp/ggml/cmake/ggml-config.cmake.in +85 -47
- package/cpp/llama.cpp/ggml/include/ggml-backend.h +1 -1
- package/cpp/llama.cpp/ggml/include/ggml-webgpu.h +19 -0
- package/cpp/llama.cpp/ggml/include/ggml.h +173 -10
- package/cpp/llama.cpp/ggml/src/CMakeLists.txt +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-alloc.c +0 -15
- package/cpp/llama.cpp/ggml/src/ggml-backend-reg.cpp +7 -8
- package/cpp/llama.cpp/ggml/src/ggml-backend.cpp +44 -38
- package/cpp/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +3 -1
- package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +126 -8
- package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +130 -22
- package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +138 -18
- package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +11 -3
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +28 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +109 -12
- package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +3 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +88 -10
- package/cpp/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +343 -1094
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +1206 -163
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.h +6 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/repack.cpp +0 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.cpp +36 -9
- package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.h +142 -9
- package/cpp/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +3 -3
- package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +31 -4
- package/cpp/llama.cpp/ggml/src/ggml-cuda/convert.cu +86 -17
- package/cpp/llama.cpp/ggml/src/ggml-cuda/convert.cuh +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/cpy-utils.cuh +225 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/cpy.cu +41 -301
- package/cpp/llama.cpp/ggml/src/ggml-cuda/cross-entropy-loss.cu +2 -14
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-common.cuh +85 -64
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-mma-f16.cuh +47 -60
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f16.cu +29 -42
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f32.cu +46 -59
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f16.cuh +36 -45
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f32.cuh +38 -45
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu +23 -36
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn.cu +3 -13
- package/cpp/llama.cpp/ggml/src/ggml-cuda/getrows.cu +8 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +255 -99
- package/cpp/llama.cpp/ggml/src/ggml-cuda/im2col.cu +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mma.cuh +111 -3
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cu +6 -4
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cuh +1152 -695
- package/cpp/llama.cpp/ggml/src/ggml-cuda/norm.cu +92 -5
- package/cpp/llama.cpp/ggml/src/ggml-cuda/norm.cuh +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/rope.cu +21 -27
- package/cpp/llama.cpp/ggml/src/ggml-cuda/scale.cu +8 -6
- package/cpp/llama.cpp/ggml/src/ggml-cuda/set-rows.cu +275 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/set-rows.cuh +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/softmax.cu +119 -58
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-conv.cu +10 -2
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-scan.cu +192 -52
- package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cu +104 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cuh +13 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/upscale.cu +92 -6
- package/cpp/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +27 -6
- package/cpp/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-impl.h +80 -0
- package/cpp/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +4 -2
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +48 -12
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +572 -106
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +599 -105
- package/cpp/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +18 -4
- package/cpp/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +800 -42
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/conv2d.cl +185 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/conv2d_f16_f32.cl +176 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/gelu.cl +27 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/glu.cl +337 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/im2col_f16.cl +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/im2col_f32.cl +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mat_f16_f32.cl +130 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/rms_norm.cl +79 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/scale.cl +3 -2
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/set_rows.cl +95 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +24 -11
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +24 -11
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_f16.cl +24 -11
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_f32.cl +24 -11
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/upscale.cl +2 -3
- package/cpp/llama.cpp/ggml/src/ggml-quants.c +6 -6
- package/cpp/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +4 -4
- package/cpp/llama.cpp/ggml/src/ggml-sycl/backend.hpp +1 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +693 -1034
- package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +18 -9
- package/cpp/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +14 -26
- package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +191 -55
- package/cpp/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-sycl/quants.hpp +8 -9
- package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +15 -18
- package/cpp/llama.cpp/ggml/src/ggml-sycl/set_rows.cpp +131 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/set_rows.hpp +8 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +2 -6
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +991 -307
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_mm.comp +265 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +59 -12
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +28 -23
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp +14 -9
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +38 -32
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +32 -27
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +44 -12
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp +13 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu_erf.comp +27 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu_quick.comp +11 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/gelu_erf.comp +39 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/generic_binary_head.comp +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/glu_head.comp +17 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/glu_main.comp +29 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp +3 -8
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +128 -72
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +38 -9
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp +9 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +18 -3
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/roll.comp +46 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.comp +1 -4
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +7 -9
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +7 -9
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +7 -9
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rte.comp +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +20 -4
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp +9 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +69 -5
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +84 -9
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/CMakeLists.txt +54 -0
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/ggml-webgpu.cpp +907 -0
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/cpy.wgsl +60 -0
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py +35 -0
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/memset.wgsl +40 -0
- package/cpp/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.wgsl +56 -0
- package/cpp/llama.cpp/ggml/src/ggml.c +386 -67
- package/cpp/llama.cpp/ggml/src/gguf.cpp +8 -1
- package/cpp/llama.cpp/gguf-py/gguf/constants.py +307 -0
- package/cpp/llama.cpp/gguf-py/gguf/gguf_writer.py +8 -2
- package/cpp/llama.cpp/gguf-py/gguf/metadata.py +4 -0
- package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_dump.py +24 -1
- package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +122 -47
- package/cpp/llama.cpp/gguf-py/gguf/vocab.py +12 -3
- package/cpp/llama.cpp/include/llama.h +15 -47
- package/cpp/llama.cpp/models/templates/llama-cpp-rwkv-world.jinja +34 -0
- package/cpp/llama.cpp/models/templates/moonshotai-Kimi-K2.jinja +43 -0
- package/cpp/llama.cpp/requirements/requirements-all.txt +1 -0
- package/cpp/llama.cpp/requirements/requirements-server-bench.txt +5 -0
- package/cpp/llama.cpp/src/llama-arch.cpp +316 -3
- package/cpp/llama.cpp/src/llama-arch.h +23 -1
- package/cpp/llama.cpp/src/llama-batch.cpp +103 -71
- package/cpp/llama.cpp/src/llama-batch.h +31 -18
- package/cpp/llama.cpp/src/llama-chat.cpp +58 -1
- package/cpp/llama.cpp/src/llama-chat.h +3 -0
- package/cpp/llama.cpp/src/llama-context.cpp +180 -106
- package/cpp/llama.cpp/src/llama-context.h +26 -16
- package/cpp/llama.cpp/src/llama-cparams.h +3 -2
- package/cpp/llama.cpp/src/llama-graph.cpp +310 -211
- package/cpp/llama.cpp/src/llama-graph.h +184 -122
- package/cpp/llama.cpp/src/llama-hparams.cpp +47 -1
- package/cpp/llama.cpp/src/llama-hparams.h +13 -2
- package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.cpp +38 -22
- package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.h +7 -2
- package/cpp/llama.cpp/src/llama-kv-cache-unified.cpp +849 -304
- package/cpp/llama.cpp/src/llama-kv-cache-unified.h +143 -47
- package/cpp/llama.cpp/src/llama-kv-cells.h +62 -10
- package/cpp/llama.cpp/src/llama-memory-hybrid.cpp +10 -4
- package/cpp/llama.cpp/src/llama-memory-hybrid.h +3 -1
- package/cpp/llama.cpp/src/llama-memory-recurrent.cpp +36 -11
- package/cpp/llama.cpp/src/llama-memory.cpp +17 -0
- package/cpp/llama.cpp/src/llama-memory.h +3 -0
- package/cpp/llama.cpp/src/llama-model.cpp +3545 -719
- package/cpp/llama.cpp/src/llama-model.h +21 -4
- package/cpp/llama.cpp/src/llama-quant.cpp +2 -2
- package/cpp/llama.cpp/src/llama-vocab.cpp +376 -10
- package/cpp/llama.cpp/src/llama-vocab.h +43 -0
- package/cpp/llama.cpp/src/unicode.cpp +207 -0
- package/cpp/llama.cpp/src/unicode.h +2 -0
- package/ios/include/chat.h +2 -0
- package/ios/include/common.h +22 -4
- package/ios/include/llama.h +15 -47
- package/ios/libs/llama.xcframework/Info.plist +13 -13
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5267 -4890
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +173 -10
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +15 -47
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5238 -4861
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +4014 -3764
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +173 -10
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +15 -47
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5238 -4861
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +4016 -3766
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +173 -10
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +15 -47
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +173 -10
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +15 -47
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +173 -10
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +15 -47
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5267 -4890
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +173 -10
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +15 -47
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5238 -4861
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +4014 -3764
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +173 -10
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +15 -47
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5303 -4926
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +173 -10
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +15 -47
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5274 -4897
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +4044 -3794
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +173 -10
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +15 -47
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/package.json +4 -4
- package/cpp/llama.cpp/ggml/include/ggml-kompute.h +0 -50
- package/cpp/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +0 -166
- package/cpp/llama.cpp/ggml/src/ggml-kompute/ggml-kompute.cpp +0 -2251
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/common.comp +0 -112
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_add.comp +0 -58
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_addrow.comp +0 -25
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f16.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f32.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f16.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f32.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_diagmask.comp +0 -30
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_gelu.comp +0 -22
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows.comp +0 -17
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f16.comp +0 -31
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f32.comp +0 -31
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_0.comp +0 -38
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_1.comp +0 -39
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q6_k.comp +0 -44
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_f16.comp +0 -69
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_mat_f32.comp +0 -51
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_0.comp +0 -33
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_1.comp +0 -35
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_k.comp +0 -140
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q6_k.comp +0 -106
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q8_0.comp +0 -73
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n_pre.comp +0 -28
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_norm.comp +0 -84
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_relu.comp +0 -21
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rmsnorm.comp +0 -53
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f16.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f32.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f16.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f32.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_scale.comp +0 -19
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_scale_8.comp +0 -23
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_silu.comp +0 -22
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_softmax.comp +0 -72
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/rope_common.comp +0 -71
|
@@ -34,6 +34,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
|
|
|
34
34
|
{ LLM_ARCH_PHI3, "phi3" },
|
|
35
35
|
{ LLM_ARCH_PHIMOE, "phimoe" },
|
|
36
36
|
{ LLM_ARCH_PLAMO, "plamo" },
|
|
37
|
+
{ LLM_ARCH_PLAMO2, "plamo2" },
|
|
37
38
|
{ LLM_ARCH_CODESHELL, "codeshell" },
|
|
38
39
|
{ LLM_ARCH_ORION, "orion" },
|
|
39
40
|
{ LLM_ARCH_INTERNLM2, "internlm2" },
|
|
@@ -45,6 +46,9 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
|
|
|
45
46
|
{ LLM_ARCH_GEMMA3N, "gemma3n" },
|
|
46
47
|
{ LLM_ARCH_STARCODER2, "starcoder2" },
|
|
47
48
|
{ LLM_ARCH_MAMBA, "mamba" },
|
|
49
|
+
{ LLM_ARCH_MAMBA2, "mamba2" },
|
|
50
|
+
{ LLM_ARCH_JAMBA, "jamba" },
|
|
51
|
+
{ LLM_ARCH_FALCON_H1, "falcon-h1" },
|
|
48
52
|
{ LLM_ARCH_XVERSE, "xverse" },
|
|
49
53
|
{ LLM_ARCH_COMMAND_R, "command-r" },
|
|
50
54
|
{ LLM_ARCH_COHERE2, "cohere2" },
|
|
@@ -64,18 +68,26 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
|
|
|
64
68
|
{ LLM_ARCH_JAIS, "jais" },
|
|
65
69
|
{ LLM_ARCH_NEMOTRON, "nemotron" },
|
|
66
70
|
{ LLM_ARCH_EXAONE, "exaone" },
|
|
71
|
+
{ LLM_ARCH_EXAONE4, "exaone4" },
|
|
67
72
|
{ LLM_ARCH_RWKV6, "rwkv6" },
|
|
68
73
|
{ LLM_ARCH_RWKV6QWEN2, "rwkv6qwen2" },
|
|
69
74
|
{ LLM_ARCH_RWKV7, "rwkv7" },
|
|
70
75
|
{ LLM_ARCH_ARWKV7, "arwkv7" },
|
|
71
76
|
{ LLM_ARCH_GRANITE, "granite" },
|
|
72
77
|
{ LLM_ARCH_GRANITE_MOE, "granitemoe" },
|
|
78
|
+
{ LLM_ARCH_GRANITE_HYBRID, "granitehybrid" },
|
|
73
79
|
{ LLM_ARCH_CHAMELEON, "chameleon" },
|
|
74
80
|
{ LLM_ARCH_WAVTOKENIZER_DEC, "wavtokenizer-dec" },
|
|
75
81
|
{ LLM_ARCH_PLM, "plm" },
|
|
76
82
|
{ LLM_ARCH_BAILINGMOE, "bailingmoe" },
|
|
77
83
|
{ LLM_ARCH_DOTS1, "dots1" },
|
|
78
84
|
{ LLM_ARCH_ARCEE, "arcee" },
|
|
85
|
+
{ LLM_ARCH_ERNIE4_5, "ernie4_5" },
|
|
86
|
+
{ LLM_ARCH_ERNIE4_5_MOE, "ernie4_5-moe" },
|
|
87
|
+
{ LLM_ARCH_HUNYUAN_MOE, "hunyuan-moe" },
|
|
88
|
+
{ LLM_ARCH_SMOLLM3, "smollm3" },
|
|
89
|
+
{ LLM_ARCH_LFM2, "lfm2" },
|
|
90
|
+
{ LLM_ARCH_DREAM, "dream" },
|
|
79
91
|
{ LLM_ARCH_UNKNOWN, "(unknown)" },
|
|
80
92
|
};
|
|
81
93
|
|
|
@@ -148,7 +160,6 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
|
|
|
148
160
|
{ LLM_KV_ATTENTION_SCALE, "%s.attention.scale" },
|
|
149
161
|
{ LLM_KV_ATTENTION_KEY_LENGTH_MLA, "%s.attention.key_length_mla" },
|
|
150
162
|
{ LLM_KV_ATTENTION_VALUE_LENGTH_MLA, "%s.attention.value_length_mla" },
|
|
151
|
-
{ LLM_KV_ATTENTION_LAYER_INDICES, "%s.attention.layer_indices" },
|
|
152
163
|
|
|
153
164
|
{ LLM_KV_ROPE_DIMENSION_COUNT, "%s.rope.dimension_count" },
|
|
154
165
|
{ LLM_KV_ROPE_DIMENSION_SECTIONS, "%s.rope.dimension_sections" },
|
|
@@ -169,6 +180,7 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
|
|
|
169
180
|
{ LLM_KV_SSM_INNER_SIZE, "%s.ssm.inner_size" },
|
|
170
181
|
{ LLM_KV_SSM_STATE_SIZE, "%s.ssm.state_size" },
|
|
171
182
|
{ LLM_KV_SSM_TIME_STEP_RANK, "%s.ssm.time_step_rank" },
|
|
183
|
+
{ LLM_KV_SSM_GROUP_COUNT, "%s.ssm.group_count" },
|
|
172
184
|
{ LLM_KV_SSM_DT_B_C_RMS, "%s.ssm.dt_b_c_rms" },
|
|
173
185
|
|
|
174
186
|
{ LLM_KV_WKV_HEAD_SIZE, "%s.wkv.head_size" },
|
|
@@ -181,6 +193,8 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
|
|
|
181
193
|
|
|
182
194
|
{ LLM_KV_CLASSIFIER_OUTPUT_LABELS, "%s.classifier.output_labels" },
|
|
183
195
|
|
|
196
|
+
{ LLM_KV_SHORTCONV_L_CACHE, "%s.shortconv.l_cache" },
|
|
197
|
+
|
|
184
198
|
{ LLM_KV_TOKENIZER_MODEL, "tokenizer.ggml.model" },
|
|
185
199
|
{ LLM_KV_TOKENIZER_PRE, "tokenizer.ggml.pre" },
|
|
186
200
|
{ LLM_KV_TOKENIZER_LIST, "tokenizer.ggml.tokens" },
|
|
@@ -774,6 +788,36 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
|
|
|
774
788
|
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
|
|
775
789
|
},
|
|
776
790
|
},
|
|
791
|
+
{
|
|
792
|
+
LLM_ARCH_PLAMO2,
|
|
793
|
+
{
|
|
794
|
+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
|
795
|
+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
|
|
796
|
+
{ LLM_TENSOR_OUTPUT, "output" },
|
|
797
|
+
{ LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
|
|
798
|
+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
|
|
799
|
+
{ LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
|
|
800
|
+
{ LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
|
|
801
|
+
{ LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
|
|
802
|
+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
|
|
803
|
+
{ LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
|
|
804
|
+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
|
|
805
|
+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
|
|
806
|
+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
|
|
807
|
+
{ LLM_TENSOR_SSM_IN, "blk.%d.ssm_in" },
|
|
808
|
+
{ LLM_TENSOR_SSM_CONV1D, "blk.%d.ssm_conv1d" },
|
|
809
|
+
{ LLM_TENSOR_SSM_X, "blk.%d.ssm_x" },
|
|
810
|
+
{ LLM_TENSOR_SSM_DT, "blk.%d.ssm_dt" },
|
|
811
|
+
{ LLM_TENSOR_SSM_A, "blk.%d.ssm_a" },
|
|
812
|
+
{ LLM_TENSOR_SSM_D, "blk.%d.ssm_d" },
|
|
813
|
+
{ LLM_TENSOR_SSM_OUT, "blk.%d.ssm_out" },
|
|
814
|
+
{ LLM_TENSOR_SSM_DT_NORM, "blk.%d.ssm_dt_norm" },
|
|
815
|
+
{ LLM_TENSOR_SSM_B_NORM, "blk.%d.ssm_b_norm" },
|
|
816
|
+
{ LLM_TENSOR_SSM_C_NORM, "blk.%d.ssm_c_norm" },
|
|
817
|
+
{ LLM_TENSOR_ATTN_POST_NORM, "blk.%d.post_attention_norm" },
|
|
818
|
+
{ LLM_TENSOR_FFN_POST_NORM, "blk.%d.post_ffw_norm" },
|
|
819
|
+
},
|
|
820
|
+
},
|
|
777
821
|
{
|
|
778
822
|
LLM_ARCH_CODESHELL,
|
|
779
823
|
{
|
|
@@ -1003,6 +1047,77 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
|
|
|
1003
1047
|
{ LLM_TENSOR_SSM_OUT, "blk.%d.ssm_out" },
|
|
1004
1048
|
},
|
|
1005
1049
|
},
|
|
1050
|
+
{
|
|
1051
|
+
LLM_ARCH_MAMBA2,
|
|
1052
|
+
{
|
|
1053
|
+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
|
1054
|
+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
|
|
1055
|
+
{ LLM_TENSOR_OUTPUT, "output" },
|
|
1056
|
+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
|
|
1057
|
+
{ LLM_TENSOR_SSM_IN, "blk.%d.ssm_in" },
|
|
1058
|
+
{ LLM_TENSOR_SSM_CONV1D, "blk.%d.ssm_conv1d" },
|
|
1059
|
+
{ LLM_TENSOR_SSM_DT, "blk.%d.ssm_dt" },
|
|
1060
|
+
{ LLM_TENSOR_SSM_A, "blk.%d.ssm_a" },
|
|
1061
|
+
{ LLM_TENSOR_SSM_D, "blk.%d.ssm_d" },
|
|
1062
|
+
{ LLM_TENSOR_SSM_NORM, "blk.%d.ssm_norm" },
|
|
1063
|
+
{ LLM_TENSOR_SSM_OUT, "blk.%d.ssm_out" },
|
|
1064
|
+
},
|
|
1065
|
+
},
|
|
1066
|
+
{
|
|
1067
|
+
LLM_ARCH_JAMBA,
|
|
1068
|
+
{
|
|
1069
|
+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
|
1070
|
+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
|
|
1071
|
+
{ LLM_TENSOR_OUTPUT, "output" },
|
|
1072
|
+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
|
|
1073
|
+
{ LLM_TENSOR_SSM_IN, "blk.%d.ssm_in" },
|
|
1074
|
+
{ LLM_TENSOR_SSM_CONV1D, "blk.%d.ssm_conv1d" },
|
|
1075
|
+
{ LLM_TENSOR_SSM_X, "blk.%d.ssm_x" },
|
|
1076
|
+
{ LLM_TENSOR_SSM_DT, "blk.%d.ssm_dt" },
|
|
1077
|
+
{ LLM_TENSOR_SSM_DT_NORM, "blk.%d.ssm_dt_norm" },
|
|
1078
|
+
{ LLM_TENSOR_SSM_A, "blk.%d.ssm_a" },
|
|
1079
|
+
{ LLM_TENSOR_SSM_B_NORM, "blk.%d.ssm_b_norm" },
|
|
1080
|
+
{ LLM_TENSOR_SSM_C_NORM, "blk.%d.ssm_c_norm" },
|
|
1081
|
+
{ LLM_TENSOR_SSM_D, "blk.%d.ssm_d" },
|
|
1082
|
+
{ LLM_TENSOR_SSM_OUT, "blk.%d.ssm_out" },
|
|
1083
|
+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
|
|
1084
|
+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
|
|
1085
|
+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
|
|
1086
|
+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
|
|
1087
|
+
{ LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
|
|
1088
|
+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
|
|
1089
|
+
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
|
|
1090
|
+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
|
|
1091
|
+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
|
|
1092
|
+
{ LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
|
|
1093
|
+
{ LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
|
|
1094
|
+
{ LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
|
|
1095
|
+
},
|
|
1096
|
+
},
|
|
1097
|
+
{
|
|
1098
|
+
LLM_ARCH_FALCON_H1,
|
|
1099
|
+
{
|
|
1100
|
+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
|
1101
|
+
{ LLM_TENSOR_OUTPUT, "output" },
|
|
1102
|
+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
|
|
1103
|
+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
|
|
1104
|
+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
|
|
1105
|
+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
|
|
1106
|
+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
|
|
1107
|
+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
|
|
1108
|
+
{ LLM_TENSOR_SSM_IN, "blk.%d.ssm_in" },
|
|
1109
|
+
{ LLM_TENSOR_SSM_CONV1D, "blk.%d.ssm_conv1d" },
|
|
1110
|
+
{ LLM_TENSOR_SSM_DT, "blk.%d.ssm_dt" },
|
|
1111
|
+
{ LLM_TENSOR_SSM_A, "blk.%d.ssm_a" },
|
|
1112
|
+
{ LLM_TENSOR_SSM_D, "blk.%d.ssm_d" },
|
|
1113
|
+
{ LLM_TENSOR_SSM_NORM, "blk.%d.ssm_norm" },
|
|
1114
|
+
{ LLM_TENSOR_SSM_OUT, "blk.%d.ssm_out" },
|
|
1115
|
+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
|
|
1116
|
+
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
|
|
1117
|
+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
|
|
1118
|
+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
|
|
1119
|
+
},
|
|
1120
|
+
},
|
|
1006
1121
|
{
|
|
1007
1122
|
LLM_ARCH_XVERSE,
|
|
1008
1123
|
{
|
|
@@ -1396,6 +1511,26 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
|
|
|
1396
1511
|
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
|
|
1397
1512
|
},
|
|
1398
1513
|
},
|
|
1514
|
+
{
|
|
1515
|
+
LLM_ARCH_EXAONE4,
|
|
1516
|
+
{
|
|
1517
|
+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
|
1518
|
+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
|
|
1519
|
+
{ LLM_TENSOR_OUTPUT, "output" },
|
|
1520
|
+
{ LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
|
|
1521
|
+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
|
|
1522
|
+
{ LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
|
|
1523
|
+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
|
|
1524
|
+
{ LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
|
|
1525
|
+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
|
|
1526
|
+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
|
|
1527
|
+
{ LLM_TENSOR_ATTN_POST_NORM, "blk.%d.post_attention_norm" },
|
|
1528
|
+
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
|
|
1529
|
+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
|
|
1530
|
+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
|
|
1531
|
+
{ LLM_TENSOR_FFN_POST_NORM, "blk.%d.post_ffw_norm" },
|
|
1532
|
+
}
|
|
1533
|
+
},
|
|
1399
1534
|
{
|
|
1400
1535
|
LLM_ARCH_RWKV6,
|
|
1401
1536
|
{
|
|
@@ -1563,6 +1698,43 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
|
|
|
1563
1698
|
{ LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
|
|
1564
1699
|
},
|
|
1565
1700
|
},
|
|
1701
|
+
{
|
|
1702
|
+
LLM_ARCH_GRANITE_HYBRID,
|
|
1703
|
+
{
|
|
1704
|
+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
|
1705
|
+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
|
|
1706
|
+
{ LLM_TENSOR_OUTPUT, "output" },
|
|
1707
|
+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
|
|
1708
|
+
// mamba(2) ssm layers
|
|
1709
|
+
{ LLM_TENSOR_SSM_IN, "blk.%d.ssm_in" },
|
|
1710
|
+
{ LLM_TENSOR_SSM_CONV1D, "blk.%d.ssm_conv1d" },
|
|
1711
|
+
{ LLM_TENSOR_SSM_DT, "blk.%d.ssm_dt" },
|
|
1712
|
+
{ LLM_TENSOR_SSM_A, "blk.%d.ssm_a" },
|
|
1713
|
+
{ LLM_TENSOR_SSM_D, "blk.%d.ssm_d" },
|
|
1714
|
+
{ LLM_TENSOR_SSM_NORM, "blk.%d.ssm_norm" },
|
|
1715
|
+
{ LLM_TENSOR_SSM_OUT, "blk.%d.ssm_out" },
|
|
1716
|
+
// attention layers
|
|
1717
|
+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
|
|
1718
|
+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
|
|
1719
|
+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
|
|
1720
|
+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
|
|
1721
|
+
// dense FFN
|
|
1722
|
+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
|
|
1723
|
+
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
|
|
1724
|
+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
|
|
1725
|
+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
|
|
1726
|
+
// moe FFN
|
|
1727
|
+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
|
|
1728
|
+
{ LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
|
|
1729
|
+
{ LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
|
|
1730
|
+
{ LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
|
|
1731
|
+
{ LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
|
|
1732
|
+
// shared expert
|
|
1733
|
+
{ LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
|
|
1734
|
+
{ LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
|
|
1735
|
+
{ LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
|
|
1736
|
+
},
|
|
1737
|
+
},
|
|
1566
1738
|
{
|
|
1567
1739
|
LLM_ARCH_CHAMELEON,
|
|
1568
1740
|
{
|
|
@@ -1658,6 +1830,126 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
|
|
|
1658
1830
|
{ LLM_TENSOR_FFN_EXP_PROBS_B, "blk.%d.exp_probs_b" },
|
|
1659
1831
|
}
|
|
1660
1832
|
},
|
|
1833
|
+
{
|
|
1834
|
+
LLM_ARCH_ERNIE4_5,
|
|
1835
|
+
{
|
|
1836
|
+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
|
1837
|
+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
|
|
1838
|
+
{ LLM_TENSOR_OUTPUT, "output" },
|
|
1839
|
+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
|
|
1840
|
+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
|
|
1841
|
+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
|
|
1842
|
+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
|
|
1843
|
+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
|
|
1844
|
+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
|
|
1845
|
+
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
|
|
1846
|
+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
|
|
1847
|
+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
|
|
1848
|
+
},
|
|
1849
|
+
},
|
|
1850
|
+
{
|
|
1851
|
+
LLM_ARCH_ERNIE4_5_MOE,
|
|
1852
|
+
{
|
|
1853
|
+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
|
1854
|
+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
|
|
1855
|
+
{ LLM_TENSOR_OUTPUT, "output" },
|
|
1856
|
+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
|
|
1857
|
+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
|
|
1858
|
+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
|
|
1859
|
+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
|
|
1860
|
+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
|
|
1861
|
+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
|
|
1862
|
+
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
|
|
1863
|
+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
|
|
1864
|
+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
|
|
1865
|
+
{ LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
|
|
1866
|
+
{ LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
|
|
1867
|
+
{ LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
|
|
1868
|
+
{ LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
|
|
1869
|
+
{ LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
|
|
1870
|
+
{ LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
|
|
1871
|
+
{ LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
|
|
1872
|
+
{ LLM_TENSOR_FFN_EXP_PROBS_B, "blk.%d.exp_probs_b" },
|
|
1873
|
+
},
|
|
1874
|
+
},
|
|
1875
|
+
{
|
|
1876
|
+
LLM_ARCH_HUNYUAN_MOE,
|
|
1877
|
+
{
|
|
1878
|
+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
|
1879
|
+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
|
|
1880
|
+
{ LLM_TENSOR_OUTPUT, "output" },
|
|
1881
|
+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
|
|
1882
|
+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
|
|
1883
|
+
{ LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
|
|
1884
|
+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
|
|
1885
|
+
{ LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
|
|
1886
|
+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
|
|
1887
|
+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
|
|
1888
|
+
{ LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
|
|
1889
|
+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
|
|
1890
|
+
{ LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
|
|
1891
|
+
{ LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
|
|
1892
|
+
{ LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
|
|
1893
|
+
{ LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
|
|
1894
|
+
{ LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
|
|
1895
|
+
{ LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
|
|
1896
|
+
},
|
|
1897
|
+
},
|
|
1898
|
+
{
|
|
1899
|
+
LLM_ARCH_SMOLLM3,
|
|
1900
|
+
{
|
|
1901
|
+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
|
1902
|
+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
|
|
1903
|
+
{ LLM_TENSOR_OUTPUT, "output" },
|
|
1904
|
+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
|
|
1905
|
+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
|
|
1906
|
+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
|
|
1907
|
+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
|
|
1908
|
+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
|
|
1909
|
+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
|
|
1910
|
+
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
|
|
1911
|
+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
|
|
1912
|
+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
|
|
1913
|
+
},
|
|
1914
|
+
},
|
|
1915
|
+
{
|
|
1916
|
+
LLM_ARCH_LFM2,
|
|
1917
|
+
{
|
|
1918
|
+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
|
|
1919
|
+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
|
|
1920
|
+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
|
|
1921
|
+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
|
|
1922
|
+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
|
|
1923
|
+
{ LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
|
|
1924
|
+
{ LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
|
|
1925
|
+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
|
|
1926
|
+
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
|
|
1927
|
+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
|
|
1928
|
+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
|
|
1929
|
+
{ LLM_TENSOR_SHORTCONV_CONV, "blk.%d.shortconv.conv" },
|
|
1930
|
+
{ LLM_TENSOR_SHORTCONV_INPROJ, "blk.%d.shortconv.in_proj" },
|
|
1931
|
+
{ LLM_TENSOR_SHORTCONV_OUTPROJ, "blk.%d.shortconv.out_proj" },
|
|
1932
|
+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
|
1933
|
+
{ LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
|
|
1934
|
+
}
|
|
1935
|
+
},
|
|
1936
|
+
{
|
|
1937
|
+
LLM_ARCH_DREAM,
|
|
1938
|
+
{
|
|
1939
|
+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
|
1940
|
+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
|
|
1941
|
+
{ LLM_TENSOR_OUTPUT, "output" },
|
|
1942
|
+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
|
|
1943
|
+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
|
|
1944
|
+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
|
|
1945
|
+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
|
|
1946
|
+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
|
|
1947
|
+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
|
|
1948
|
+
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
|
|
1949
|
+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
|
|
1950
|
+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
|
|
1951
|
+
},
|
|
1952
|
+
},
|
|
1661
1953
|
{
|
|
1662
1954
|
LLM_ARCH_UNKNOWN,
|
|
1663
1955
|
{
|
|
@@ -1742,7 +2034,11 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
|
|
|
1742
2034
|
{LLM_TENSOR_FFN_ACT, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_DIV}},
|
|
1743
2035
|
{LLM_TENSOR_SSM_CONV1D, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_SSM_CONV}},
|
|
1744
2036
|
{LLM_TENSOR_SSM_A, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_SSM_SCAN}},
|
|
2037
|
+
{LLM_TENSOR_SSM_DT_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
|
|
2038
|
+
{LLM_TENSOR_SSM_B_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
|
|
2039
|
+
{LLM_TENSOR_SSM_C_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
|
|
1745
2040
|
{LLM_TENSOR_SSM_D, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
|
|
2041
|
+
{LLM_TENSOR_SSM_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
|
|
1746
2042
|
{LLM_TENSOR_TIME_MIX_LERP_X, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
|
|
1747
2043
|
{LLM_TENSOR_TIME_MIX_LN, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
|
|
1748
2044
|
{LLM_TENSOR_CHANNEL_MIX_LERP_K, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
|
|
@@ -1821,6 +2117,9 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
|
|
|
1821
2117
|
{LLM_TENSOR_CONVNEXT_PW1, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
|
|
1822
2118
|
{LLM_TENSOR_CONVNEXT_PW2, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
|
|
1823
2119
|
{LLM_TENSOR_CONVNEXT_GAMMA, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
|
|
2120
|
+
{LLM_TENSOR_SHORTCONV_CONV, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_SSM_CONV}},
|
|
2121
|
+
{LLM_TENSOR_SHORTCONV_INPROJ, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
|
|
2122
|
+
{LLM_TENSOR_SHORTCONV_OUTPROJ, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
|
|
1824
2123
|
};
|
|
1825
2124
|
|
|
1826
2125
|
LLM_KV::LLM_KV(llm_arch arch, const char * suffix) : arch(arch), suffix(suffix) {}
|
|
@@ -1876,6 +2175,7 @@ const llm_tensor_info & llm_tensor_info_for(llm_tensor tensor) {
|
|
|
1876
2175
|
bool llm_arch_is_recurrent(const llm_arch & arch) {
|
|
1877
2176
|
switch (arch) {
|
|
1878
2177
|
case LLM_ARCH_MAMBA:
|
|
2178
|
+
case LLM_ARCH_MAMBA2:
|
|
1879
2179
|
case LLM_ARCH_RWKV6:
|
|
1880
2180
|
case LLM_ARCH_RWKV6QWEN2:
|
|
1881
2181
|
case LLM_ARCH_RWKV7:
|
|
@@ -1887,9 +2187,22 @@ bool llm_arch_is_recurrent(const llm_arch & arch) {
|
|
|
1887
2187
|
}
|
|
1888
2188
|
|
|
1889
2189
|
bool llm_arch_is_hybrid(const llm_arch & arch) {
|
|
1890
|
-
// TODO: There are currently no hybrid models! Once there are, this will be
|
|
1891
|
-
// the place to identify them
|
|
1892
2190
|
switch (arch) {
|
|
2191
|
+
case LLM_ARCH_JAMBA:
|
|
2192
|
+
case LLM_ARCH_FALCON_H1:
|
|
2193
|
+
case LLM_ARCH_PLAMO2:
|
|
2194
|
+
case LLM_ARCH_GRANITE_HYBRID:
|
|
2195
|
+
case LLM_ARCH_LFM2:
|
|
2196
|
+
return true;
|
|
2197
|
+
default:
|
|
2198
|
+
return false;
|
|
2199
|
+
}
|
|
2200
|
+
}
|
|
2201
|
+
|
|
2202
|
+
bool llm_arch_is_diffusion(const llm_arch & arch) {
|
|
2203
|
+
switch (arch) {
|
|
2204
|
+
case LLM_ARCH_DREAM:
|
|
2205
|
+
return true;
|
|
1893
2206
|
default:
|
|
1894
2207
|
return false;
|
|
1895
2208
|
}
|
|
@@ -38,6 +38,7 @@ enum llm_arch {
|
|
|
38
38
|
LLM_ARCH_PHI3,
|
|
39
39
|
LLM_ARCH_PHIMOE,
|
|
40
40
|
LLM_ARCH_PLAMO,
|
|
41
|
+
LLM_ARCH_PLAMO2,
|
|
41
42
|
LLM_ARCH_CODESHELL,
|
|
42
43
|
LLM_ARCH_ORION,
|
|
43
44
|
LLM_ARCH_INTERNLM2,
|
|
@@ -49,6 +50,9 @@ enum llm_arch {
|
|
|
49
50
|
LLM_ARCH_GEMMA3N,
|
|
50
51
|
LLM_ARCH_STARCODER2,
|
|
51
52
|
LLM_ARCH_MAMBA,
|
|
53
|
+
LLM_ARCH_MAMBA2,
|
|
54
|
+
LLM_ARCH_JAMBA,
|
|
55
|
+
LLM_ARCH_FALCON_H1,
|
|
52
56
|
LLM_ARCH_XVERSE,
|
|
53
57
|
LLM_ARCH_COMMAND_R,
|
|
54
58
|
LLM_ARCH_COHERE2,
|
|
@@ -68,18 +72,26 @@ enum llm_arch {
|
|
|
68
72
|
LLM_ARCH_JAIS,
|
|
69
73
|
LLM_ARCH_NEMOTRON,
|
|
70
74
|
LLM_ARCH_EXAONE,
|
|
75
|
+
LLM_ARCH_EXAONE4,
|
|
71
76
|
LLM_ARCH_RWKV6,
|
|
72
77
|
LLM_ARCH_RWKV6QWEN2,
|
|
73
78
|
LLM_ARCH_RWKV7,
|
|
74
79
|
LLM_ARCH_ARWKV7,
|
|
75
80
|
LLM_ARCH_GRANITE,
|
|
76
81
|
LLM_ARCH_GRANITE_MOE,
|
|
82
|
+
LLM_ARCH_GRANITE_HYBRID,
|
|
77
83
|
LLM_ARCH_CHAMELEON,
|
|
78
84
|
LLM_ARCH_WAVTOKENIZER_DEC,
|
|
79
85
|
LLM_ARCH_PLM,
|
|
80
86
|
LLM_ARCH_BAILINGMOE,
|
|
81
87
|
LLM_ARCH_DOTS1,
|
|
82
88
|
LLM_ARCH_ARCEE,
|
|
89
|
+
LLM_ARCH_ERNIE4_5,
|
|
90
|
+
LLM_ARCH_ERNIE4_5_MOE,
|
|
91
|
+
LLM_ARCH_HUNYUAN_MOE,
|
|
92
|
+
LLM_ARCH_SMOLLM3,
|
|
93
|
+
LLM_ARCH_LFM2,
|
|
94
|
+
LLM_ARCH_DREAM,
|
|
83
95
|
LLM_ARCH_UNKNOWN,
|
|
84
96
|
};
|
|
85
97
|
|
|
@@ -152,7 +164,6 @@ enum llm_kv {
|
|
|
152
164
|
LLM_KV_ATTENTION_SCALE,
|
|
153
165
|
LLM_KV_ATTENTION_KEY_LENGTH_MLA,
|
|
154
166
|
LLM_KV_ATTENTION_VALUE_LENGTH_MLA,
|
|
155
|
-
LLM_KV_ATTENTION_LAYER_INDICES,
|
|
156
167
|
|
|
157
168
|
LLM_KV_ROPE_DIMENSION_COUNT,
|
|
158
169
|
LLM_KV_ROPE_DIMENSION_SECTIONS,
|
|
@@ -173,6 +184,7 @@ enum llm_kv {
|
|
|
173
184
|
LLM_KV_SSM_CONV_KERNEL,
|
|
174
185
|
LLM_KV_SSM_STATE_SIZE,
|
|
175
186
|
LLM_KV_SSM_TIME_STEP_RANK,
|
|
187
|
+
LLM_KV_SSM_GROUP_COUNT,
|
|
176
188
|
LLM_KV_SSM_DT_B_C_RMS,
|
|
177
189
|
|
|
178
190
|
LLM_KV_WKV_HEAD_SIZE,
|
|
@@ -220,6 +232,8 @@ enum llm_kv {
|
|
|
220
232
|
|
|
221
233
|
LLM_KV_CLASSIFIER_OUTPUT_LABELS,
|
|
222
234
|
|
|
235
|
+
LLM_KV_SHORTCONV_L_CACHE,
|
|
236
|
+
|
|
223
237
|
// deprecated:
|
|
224
238
|
LLM_KV_TOKENIZER_PREFIX_ID,
|
|
225
239
|
LLM_KV_TOKENIZER_SUFFIX_ID,
|
|
@@ -290,8 +304,12 @@ enum llm_tensor {
|
|
|
290
304
|
LLM_TENSOR_SSM_CONV1D,
|
|
291
305
|
LLM_TENSOR_SSM_X,
|
|
292
306
|
LLM_TENSOR_SSM_DT,
|
|
307
|
+
LLM_TENSOR_SSM_DT_NORM,
|
|
293
308
|
LLM_TENSOR_SSM_A,
|
|
309
|
+
LLM_TENSOR_SSM_B_NORM,
|
|
310
|
+
LLM_TENSOR_SSM_C_NORM,
|
|
294
311
|
LLM_TENSOR_SSM_D,
|
|
312
|
+
LLM_TENSOR_SSM_NORM,
|
|
295
313
|
LLM_TENSOR_SSM_OUT,
|
|
296
314
|
LLM_TENSOR_TIME_MIX_W0,
|
|
297
315
|
LLM_TENSOR_TIME_MIX_W1,
|
|
@@ -385,6 +403,9 @@ enum llm_tensor {
|
|
|
385
403
|
LLM_TENSOR_POS_NET_ATTN_K,
|
|
386
404
|
LLM_TENSOR_POS_NET_ATTN_V,
|
|
387
405
|
LLM_TENSOR_POS_NET_ATTN_OUT,
|
|
406
|
+
LLM_TENSOR_SHORTCONV_CONV,
|
|
407
|
+
LLM_TENSOR_SHORTCONV_INPROJ,
|
|
408
|
+
LLM_TENSOR_SHORTCONV_OUTPROJ,
|
|
388
409
|
};
|
|
389
410
|
|
|
390
411
|
enum llm_tensor_layer {
|
|
@@ -461,3 +482,4 @@ const llm_tensor_info & llm_tensor_info_for(llm_tensor tensor);
|
|
|
461
482
|
|
|
462
483
|
bool llm_arch_is_recurrent(const llm_arch & arch);
|
|
463
484
|
bool llm_arch_is_hybrid (const llm_arch & arch);
|
|
485
|
+
bool llm_arch_is_diffusion(const llm_arch & arch);
|