@novastera-oss/llamarn 0.2.5 → 0.2.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/RNLlamaCpp.podspec +3 -2
- package/android/CMakeLists.txt +6 -3
- package/android/src/main/cpp/include/llama.h +140 -38
- package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
- package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
- package/cpp/LlamaCppModel.cpp +48 -67
- package/cpp/LlamaCppModel.h +8 -3
- package/cpp/PureCppImpl.cpp +1 -1
- package/cpp/PureCppImpl.h +2 -2
- package/cpp/build-info.cpp +2 -2
- package/cpp/llama.cpp/CMakeLists.txt +15 -4
- package/cpp/llama.cpp/Makefile +2 -2
- package/cpp/llama.cpp/README.md +33 -13
- package/cpp/llama.cpp/common/CMakeLists.txt +15 -28
- package/cpp/llama.cpp/common/arg.cpp +38 -12
- package/cpp/llama.cpp/common/build-info.cpp.in +2 -2
- package/cpp/llama.cpp/common/chat-parser.cpp +9 -3
- package/cpp/llama.cpp/common/chat-parser.h +4 -1
- package/cpp/llama.cpp/common/chat.cpp +16 -13
- package/cpp/llama.cpp/common/chat.h +1 -1
- package/cpp/llama.cpp/common/common.cpp +52 -40
- package/cpp/llama.cpp/common/common.h +5 -2
- package/cpp/llama.cpp/common/json-partial.cpp +5 -4
- package/cpp/llama.cpp/common/json-partial.h +2 -1
- package/cpp/llama.cpp/common/json-schema-to-grammar.cpp +2 -1
- package/cpp/llama.cpp/common/json-schema-to-grammar.h +4 -4
- package/cpp/llama.cpp/common/speculative.cpp +6 -4
- package/cpp/llama.cpp/convert_hf_to_gguf.py +128 -84
- package/cpp/llama.cpp/ggml/CMakeLists.txt +47 -2
- package/cpp/llama.cpp/ggml/cmake/common.cmake +1 -2
- package/cpp/llama.cpp/ggml/include/ggml.h +1 -3
- package/cpp/llama.cpp/ggml/src/CMakeLists.txt +49 -13
- package/cpp/llama.cpp/ggml/src/ggml-backend-reg.cpp +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-backend.cpp +10 -5
- package/cpp/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +3 -3
- package/cpp/llama.cpp/ggml/src/ggml-cann/common.h +6 -1
- package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +33 -9
- package/cpp/llama.cpp/ggml/src/ggml-common.h +4 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +93 -24
- package/cpp/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/amx/mmq.cpp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/arm/quants.c +4113 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +2174 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +2638 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/powerpc/quants.c +2731 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +2068 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/riscv/repack.cpp +396 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/s390/quants.c +1299 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/wasm/quants.c +1480 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/x86/quants.c +4310 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-aarch64.cpp → arch/x86/repack.cpp} +59 -3206
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch-fallback.h +184 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/common.h +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +7 -4
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +33 -2
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +8 -8
- package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-hbm.cpp → hbm.cpp} +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +56 -7
- package/cpp/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.h +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-cpu/quants.c +1157 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-quants.h → quants.h} +26 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/repack.cpp +1555 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/repack.h +98 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +2 -4
- package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-traits.cpp → traits.cpp} +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +6 -8
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-mma-f16.cuh +5 -2
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +25 -16
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-scan.cu +6 -4
- package/cpp/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +4 -0
- package/cpp/llama.cpp/ggml/src/ggml-impl.h +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +11 -10
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +33 -8
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +135 -100
- package/cpp/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +908 -3
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/concat.cl +109 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q4_0_f32_8x_flat.cl +283 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/pad.cl +30 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/repeat.cl +39 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/tanh.cl +63 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/tsembd.cl +48 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/upscale.cl +121 -0
- package/cpp/llama.cpp/ggml/src/ggml-quants.c +0 -2
- package/cpp/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +18 -15
- package/cpp/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +3 -3
- package/cpp/llama.cpp/ggml/src/ggml-sycl/common.hpp +19 -24
- package/cpp/llama.cpp/ggml/src/ggml-sycl/convert.cpp +21 -2
- package/cpp/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +121 -4
- package/cpp/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +32 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +3 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +2 -96
- package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +164 -46
- package/cpp/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +32 -8
- package/cpp/llama.cpp/ggml/src/ggml-sycl/quants.hpp +38 -10
- package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +118 -11
- package/cpp/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +108 -16
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +26 -29
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +432 -248
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +0 -12
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/conv_transpose_1d.comp +98 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +2 -0
- package/cpp/llama.cpp/ggml/src/ggml.c +9 -8
- package/cpp/llama.cpp/ggml/src/ggml.cpp +26 -0
- package/cpp/llama.cpp/ggml/src/gguf.cpp +19 -2
- package/cpp/llama.cpp/gguf-py/gguf/constants.py +57 -0
- package/cpp/llama.cpp/gguf-py/gguf/gguf_writer.py +4 -1
- package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +14 -3
- package/cpp/llama.cpp/include/llama.h +140 -38
- package/cpp/llama.cpp/requirements/requirements-compare-llama-bench.txt +1 -0
- package/cpp/llama.cpp/src/CMakeLists.txt +4 -1
- package/cpp/llama.cpp/src/llama-arch.cpp +95 -3
- package/cpp/llama.cpp/src/llama-arch.h +7 -1
- package/cpp/llama.cpp/src/llama-batch.cpp +289 -31
- package/cpp/llama.cpp/src/llama-batch.h +47 -17
- package/cpp/llama.cpp/src/llama-chat.cpp +19 -2
- package/cpp/llama.cpp/src/llama-chat.h +1 -0
- package/cpp/llama.cpp/src/llama-context.cpp +488 -313
- package/cpp/llama.cpp/src/llama-context.h +38 -17
- package/cpp/llama.cpp/src/llama-cparams.cpp +1 -1
- package/cpp/llama.cpp/src/llama-cparams.h +1 -1
- package/cpp/llama.cpp/src/llama-graph.cpp +275 -152
- package/cpp/llama.cpp/src/llama-graph.h +109 -52
- package/cpp/llama.cpp/src/llama-hparams.cpp +6 -2
- package/cpp/llama.cpp/src/llama-hparams.h +8 -2
- package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.cpp +281 -0
- package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.h +133 -0
- package/cpp/llama.cpp/src/llama-kv-cache-unified.cpp +1835 -0
- package/cpp/llama.cpp/src/llama-kv-cache-unified.h +308 -0
- package/cpp/llama.cpp/src/llama-kv-cells.h +53 -17
- package/cpp/llama.cpp/src/llama-memory-hybrid.cpp +247 -0
- package/cpp/llama.cpp/src/llama-memory-hybrid.h +143 -0
- package/cpp/llama.cpp/src/llama-memory-recurrent.cpp +1116 -0
- package/cpp/llama.cpp/src/llama-memory-recurrent.h +188 -0
- package/cpp/llama.cpp/src/llama-memory.cpp +41 -0
- package/cpp/llama.cpp/src/llama-memory.h +89 -4
- package/cpp/llama.cpp/src/llama-mmap.cpp +1 -1
- package/cpp/llama.cpp/src/llama-model-loader.cpp +42 -17
- package/cpp/llama.cpp/src/llama-model.cpp +735 -143
- package/cpp/llama.cpp/src/llama-model.h +4 -0
- package/cpp/llama.cpp/src/llama-quant.cpp +2 -1
- package/cpp/llama.cpp/src/llama-vocab.cpp +39 -25
- package/cpp/llama.cpp/src/llama.cpp +11 -7
- package/cpp/llama.cpp/src/unicode.cpp +5 -0
- package/cpp/llama.cpp/vendor/cpp-httplib/httplib.h +10518 -0
- package/cpp/llama.cpp/vendor/miniaudio/miniaudio.h +93468 -0
- package/cpp/llama.cpp/{common → vendor}/minja/chat-template.hpp +1 -1
- package/cpp/llama.cpp/{common → vendor}/minja/minja.hpp +1 -1
- package/cpp/llama.cpp/{common → vendor/nlohmann}/json.hpp +3027 -2267
- package/cpp/llama.cpp/vendor/nlohmann/json_fwd.hpp +187 -0
- package/cpp/llama.cpp/vendor/stb/stb_image.h +7988 -0
- package/cpp/rn-completion.cpp +65 -10
- package/cpp/{rn-llama.hpp → rn-llama.h} +1 -1
- package/cpp/{rn-utils.hpp → rn-utils.h} +8 -1
- package/ios/include/chat.h +1 -1
- package/ios/include/common/minja/chat-template.hpp +1 -1
- package/ios/include/common/minja/minja.hpp +1 -1
- package/ios/include/common.h +5 -2
- package/ios/include/json-schema-to-grammar.h +4 -4
- package/ios/include/llama.h +140 -38
- package/ios/include/{common → nlohmann}/json.hpp +3027 -2267
- package/ios/libs/llama.xcframework/Info.plist +20 -20
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4863 -4617
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +1 -3
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +140 -38
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4834 -4638
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3742 -3557
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +1 -3
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +140 -38
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4834 -4638
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3744 -3559
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +1 -3
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +140 -38
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +1 -3
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +140 -38
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +1 -3
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +140 -38
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4863 -4616
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +1 -3
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +140 -38
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4834 -4637
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3742 -3556
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +1 -3
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +140 -38
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4900 -4653
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +1 -3
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +140 -38
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4871 -4674
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3773 -3587
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +1 -3
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +140 -38
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/package.json +1 -2
- package/cpp/llama.cpp/common/cmake/build-info-gen-cpp.cmake +0 -24
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +0 -8
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +0 -13891
- package/cpp/llama.cpp/src/llama-kv-cache.cpp +0 -2747
- package/cpp/llama.cpp/src/llama-kv-cache.h +0 -502
- /package/cpp/llama.cpp/ggml/src/ggml-cpu/{cpu-feats-x86.cpp → arch/x86/cpu-feats.cpp} +0 -0
- /package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-hbm.h → hbm.h} +0 -0
- /package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-traits.h → traits.h} +0 -0
|
@@ -25,15 +25,3 @@ add_executable(${TARGET} vulkan-shaders-gen.cpp)
|
|
|
25
25
|
install(TARGETS ${TARGET} RUNTIME)
|
|
26
26
|
target_compile_features(${TARGET} PRIVATE cxx_std_17)
|
|
27
27
|
target_link_libraries(vulkan-shaders-gen PUBLIC Threads::Threads)
|
|
28
|
-
|
|
29
|
-
# Configure output directories for MSVC builds
|
|
30
|
-
if(MSVC)
|
|
31
|
-
# Get the main project's runtime output directory if possible
|
|
32
|
-
if(DEFINED CMAKE_RUNTIME_OUTPUT_DIRECTORY)
|
|
33
|
-
foreach(CONFIG ${CMAKE_CONFIGURATION_TYPES})
|
|
34
|
-
string(TOUPPER ${CONFIG} CONFIG)
|
|
35
|
-
set_target_properties(${TARGET} PROPERTIES
|
|
36
|
-
RUNTIME_OUTPUT_DIRECTORY_${CONFIG} ${CMAKE_RUNTIME_OUTPUT_DIRECTORY})
|
|
37
|
-
endforeach()
|
|
38
|
-
endif()
|
|
39
|
-
endif()
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
#version 450
|
|
2
|
+
|
|
3
|
+
#include "types.comp"
|
|
4
|
+
|
|
5
|
+
layout (binding = 0) readonly buffer A {A_TYPE data_a[];}; // src0 - kernel: [K, Cout, Cin]
|
|
6
|
+
layout (binding = 1) readonly buffer B {B_TYPE data_b[];}; // src1 - input: [L, Cin]
|
|
7
|
+
layout (binding = 2) writeonly buffer D {D_TYPE data_d[];}; // dst - result [KL, Cout]
|
|
8
|
+
|
|
9
|
+
layout(local_size_x = 128 , local_size_y = 1, local_size_z = 1) in;
|
|
10
|
+
|
|
11
|
+
layout (push_constant) uniform parameter {
|
|
12
|
+
uint32_t Cout;
|
|
13
|
+
uint32_t Cin;
|
|
14
|
+
uint32_t K;
|
|
15
|
+
uint32_t L;
|
|
16
|
+
uint32_t KL;
|
|
17
|
+
|
|
18
|
+
uint32_t nb01;
|
|
19
|
+
uint32_t nb02;
|
|
20
|
+
uint32_t nb11;
|
|
21
|
+
uint32_t nb1;
|
|
22
|
+
|
|
23
|
+
int32_t s0;
|
|
24
|
+
} p;
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
uint32_t Cout_idx = gl_WorkGroupID.x;
|
|
28
|
+
const uint32_t bs = gl_WorkGroupSize.x;
|
|
29
|
+
uint32_t tid = gl_LocalInvocationID.x;
|
|
30
|
+
// Code is more straightforward if we assume it is bs*s0+K instead of (bs-1)*s0+K.
|
|
31
|
+
uint32_t tmp_len = bs*p.s0+p.K;
|
|
32
|
+
shared D_TYPE tmp[4096];
|
|
33
|
+
|
|
34
|
+
uint splitWork(uint workSize){
|
|
35
|
+
return (bs + workSize -1) / bs;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
void main(){
|
|
39
|
+
for(uint32_t i = 0; i < splitWork(tmp_len); i++){
|
|
40
|
+
uint32_t idx = i*bs+tid;
|
|
41
|
+
if(idx < tmp_len){
|
|
42
|
+
tmp[idx] = 0.0;
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
uint32_t L_blocks = splitWork(p.L);
|
|
47
|
+
for(uint32_t L_block_id = 0; L_block_id < L_blocks; L_block_id++){
|
|
48
|
+
if(L_block_id > 0){
|
|
49
|
+
barrier();
|
|
50
|
+
// Shift values in tmp to the current processing window
|
|
51
|
+
for(int i = 0; i < splitWork(tmp_len); i++){
|
|
52
|
+
uint32_t idx = i*bs+tid;
|
|
53
|
+
if(idx >= bs*p.s0 && idx < tmp_len){
|
|
54
|
+
tmp[idx-bs*p.s0] = tmp[idx];
|
|
55
|
+
tmp[idx] = 0.0;
|
|
56
|
+
}else if(idx >= p.K && idx < bs*p.s0){
|
|
57
|
+
tmp[idx] = 0.0;
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
barrier();
|
|
62
|
+
|
|
63
|
+
// Save contributions of the block to tmp
|
|
64
|
+
uint32_t L_idx = L_block_id*bs + tid;
|
|
65
|
+
for(uint32_t K_idx = 0; K_idx < p.K; K_idx++){
|
|
66
|
+
D_TYPE dp = 0.0;
|
|
67
|
+
for(uint32_t Cin_idx = 0; Cin_idx < p.Cin; Cin_idx++){
|
|
68
|
+
A_TYPE elemKrn = data_a[K_idx + Cout_idx * p.nb01 + Cin_idx * p.nb02];
|
|
69
|
+
if(L_idx < p.L){
|
|
70
|
+
B_TYPE elemInp = data_b[L_idx + Cin_idx*p.nb11];
|
|
71
|
+
dp = fma(elemKrn, elemInp, dp);
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
tmp[tid*p.s0 + K_idx] += dp;
|
|
75
|
+
barrier();
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
// Save the computed values except the last block that can have different size
|
|
79
|
+
uint32_t KLb_idx = L_block_id*bs*p.s0;
|
|
80
|
+
if(L_block_id < L_blocks-1){
|
|
81
|
+
for(uint32_t s0_idx = 0; s0_idx < p.s0; s0_idx++){
|
|
82
|
+
uint32_t sh_idx = p.s0*tid+s0_idx;
|
|
83
|
+
uint32_t KL_idx = KLb_idx+sh_idx;
|
|
84
|
+
if(KL_idx < p.KL){
|
|
85
|
+
data_d[KL_idx + Cout_idx*p.nb1] = tmp[sh_idx];
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
for(uint32_t i = 0; i < splitWork(tmp_len); i++){
|
|
92
|
+
uint32_t idx = i*bs+tid;
|
|
93
|
+
uint32_t KL_idx = (L_blocks-1)*bs*p.s0+idx;
|
|
94
|
+
if(KL_idx < p.KL){
|
|
95
|
+
data_d[KL_idx + Cout_idx*p.nb1] = tmp[idx];
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
}
|
|
@@ -622,6 +622,8 @@ void process_shaders() {
|
|
|
622
622
|
|
|
623
623
|
string_to_spv("timestep_embedding_f32", "timestep_embedding.comp", merge_maps(base_dict, {{"A_TYPE", "float"}, {"D_TYPE", "float"}}));
|
|
624
624
|
|
|
625
|
+
string_to_spv("conv_transpose_1d_f32", "conv_transpose_1d.comp", {{"A_TYPE", "float"}, {"B_TYPE", "float"}, {"D_TYPE", "float"}});
|
|
626
|
+
|
|
625
627
|
string_to_spv("pool2d_f32", "pool2d.comp", merge_maps(base_dict, {{"A_TYPE", "float"}, {"D_TYPE", "float"}}));
|
|
626
628
|
|
|
627
629
|
string_to_spv("rwkv_wkv6_f32", "wkv6.comp", merge_maps(base_dict, {{"A_TYPE", "float"}}));
|
|
@@ -133,7 +133,7 @@ static void ggml_print_backtrace_symbols(void) {
|
|
|
133
133
|
}
|
|
134
134
|
#endif
|
|
135
135
|
|
|
136
|
-
|
|
136
|
+
void ggml_print_backtrace(void) {
|
|
137
137
|
const char * GGML_NO_BACKTRACE = getenv("GGML_NO_BACKTRACE");
|
|
138
138
|
if (GGML_NO_BACKTRACE) {
|
|
139
139
|
return;
|
|
@@ -160,6 +160,10 @@ static void ggml_print_backtrace(void) {
|
|
|
160
160
|
const int parent_pid = getpid();
|
|
161
161
|
const int child_pid = fork();
|
|
162
162
|
if (child_pid < 0) { // error
|
|
163
|
+
#if defined(__linux__)
|
|
164
|
+
close(lock[1]);
|
|
165
|
+
close(lock[0]);
|
|
166
|
+
#endif
|
|
163
167
|
return;
|
|
164
168
|
} else if (child_pid == 0) { // child
|
|
165
169
|
char attach[32];
|
|
@@ -167,6 +171,7 @@ static void ggml_print_backtrace(void) {
|
|
|
167
171
|
#if defined(__linux__)
|
|
168
172
|
close(lock[1]);
|
|
169
173
|
(void) !read(lock[0], lock, 1);
|
|
174
|
+
close(lock[0]);
|
|
170
175
|
#endif
|
|
171
176
|
// try gdb
|
|
172
177
|
execlp("gdb", "gdb", "--batch",
|
|
@@ -195,7 +200,7 @@ static void ggml_print_backtrace(void) {
|
|
|
195
200
|
}
|
|
196
201
|
}
|
|
197
202
|
#else
|
|
198
|
-
|
|
203
|
+
void ggml_print_backtrace(void) {
|
|
199
204
|
// platform not supported
|
|
200
205
|
}
|
|
201
206
|
#endif
|
|
@@ -216,6 +221,8 @@ void ggml_abort(const char * file, int line, const char * fmt, ...) {
|
|
|
216
221
|
abort();
|
|
217
222
|
}
|
|
218
223
|
|
|
224
|
+
// ggml_print_backtrace is registered with std::set_terminate by ggml.cpp
|
|
225
|
+
|
|
219
226
|
//
|
|
220
227
|
// logging
|
|
221
228
|
//
|
|
@@ -881,12 +888,6 @@ struct ggml_context {
|
|
|
881
888
|
struct ggml_object * objects_end;
|
|
882
889
|
};
|
|
883
890
|
|
|
884
|
-
struct ggml_context_container {
|
|
885
|
-
bool used;
|
|
886
|
-
|
|
887
|
-
struct ggml_context context;
|
|
888
|
-
};
|
|
889
|
-
|
|
890
891
|
//
|
|
891
892
|
// data types
|
|
892
893
|
//
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
#include "ggml-impl.h"
|
|
2
|
+
|
|
3
|
+
#include <cstdlib>
|
|
4
|
+
#include <exception>
|
|
5
|
+
|
|
6
|
+
static std::terminate_handler previous_terminate_handler;
|
|
7
|
+
|
|
8
|
+
GGML_NORETURN static void ggml_uncaught_exception() {
|
|
9
|
+
ggml_print_backtrace();
|
|
10
|
+
if (previous_terminate_handler) {
|
|
11
|
+
previous_terminate_handler();
|
|
12
|
+
}
|
|
13
|
+
abort(); // unreachable unless previous_terminate_handler was nullptr
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
static bool ggml_uncaught_exception_init = []{
|
|
17
|
+
const char * GGML_NO_BACKTRACE = getenv("GGML_NO_BACKTRACE");
|
|
18
|
+
if (GGML_NO_BACKTRACE) {
|
|
19
|
+
return false;
|
|
20
|
+
}
|
|
21
|
+
const auto prev{std::get_terminate()};
|
|
22
|
+
GGML_ASSERT(prev != ggml_uncaught_exception);
|
|
23
|
+
previous_terminate_handler = prev;
|
|
24
|
+
std::set_terminate(ggml_uncaught_exception);
|
|
25
|
+
return true;
|
|
26
|
+
}();
|
|
@@ -347,11 +347,28 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
|
|
|
347
347
|
int64_t n_tensors = 0;
|
|
348
348
|
|
|
349
349
|
if (ok && gr.read(ctx->version)) {
|
|
350
|
-
if (ctx->version ==
|
|
350
|
+
if (ok && ctx->version == 0) {
|
|
351
|
+
GGML_LOG_ERROR("%s: bad GGUF version: %" PRIu32 "\n", __func__, ctx->version);
|
|
352
|
+
ok = false;
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
/*
|
|
356
|
+
* bit layout is different when reading non-native endian models.
|
|
357
|
+
* assuming that the GGUF version is 3, the non-native endian model
|
|
358
|
+
* would read it as 0x30000000. we can use the AND operation against
|
|
359
|
+
* the last 4 hexadecimal digits to check if the model is the same
|
|
360
|
+
* endianness as the host system.
|
|
361
|
+
*/
|
|
362
|
+
if (ok && (ctx->version & 0x0000FFFF) == 0x00000000) {
|
|
363
|
+
GGML_LOG_ERROR("%s: failed to load model: this GGUF file version %" PRIu32 " is extremely large, is there a mismatch between the host and model endianness?\n", __func__, ctx->version);
|
|
364
|
+
ok = false;
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
if (ok && ctx->version == 1) {
|
|
351
368
|
GGML_LOG_ERROR("%s: GGUFv1 is no longer supported, please use a more up-to-date version\n", __func__);
|
|
352
369
|
ok = false;
|
|
353
370
|
}
|
|
354
|
-
if (ctx->version > GGUF_VERSION) {
|
|
371
|
+
if (ok && ctx->version > GGUF_VERSION) {
|
|
355
372
|
GGML_LOG_ERROR("%s: this GGUF file is version %" PRIu32 " but this software only supports up to version %d\n",
|
|
356
373
|
__func__, ctx->version, GGUF_VERSION);
|
|
357
374
|
ok = false;
|
|
@@ -291,6 +291,7 @@ class MODEL_ARCH(IntEnum):
|
|
|
291
291
|
BERT = auto()
|
|
292
292
|
NOMIC_BERT = auto()
|
|
293
293
|
NOMIC_BERT_MOE = auto()
|
|
294
|
+
NEO_BERT = auto()
|
|
294
295
|
JINA_BERT_V2 = auto()
|
|
295
296
|
BLOOM = auto()
|
|
296
297
|
STABLELM = auto()
|
|
@@ -343,6 +344,8 @@ class MODEL_ARCH(IntEnum):
|
|
|
343
344
|
WAVTOKENIZER_DEC = auto()
|
|
344
345
|
PLM = auto()
|
|
345
346
|
BAILINGMOE = auto()
|
|
347
|
+
DOTS1 = auto()
|
|
348
|
+
ARCEE = auto()
|
|
346
349
|
|
|
347
350
|
|
|
348
351
|
class VISION_PROJECTOR_TYPE(IntEnum):
|
|
@@ -571,6 +574,7 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
|
|
|
571
574
|
MODEL_ARCH.BERT: "bert",
|
|
572
575
|
MODEL_ARCH.NOMIC_BERT: "nomic-bert",
|
|
573
576
|
MODEL_ARCH.NOMIC_BERT_MOE: "nomic-bert-moe",
|
|
577
|
+
MODEL_ARCH.NEO_BERT: "neo-bert",
|
|
574
578
|
MODEL_ARCH.JINA_BERT_V2: "jina-bert-v2",
|
|
575
579
|
MODEL_ARCH.BLOOM: "bloom",
|
|
576
580
|
MODEL_ARCH.STABLELM: "stablelm",
|
|
@@ -623,6 +627,8 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
|
|
|
623
627
|
MODEL_ARCH.WAVTOKENIZER_DEC: "wavtokenizer-dec",
|
|
624
628
|
MODEL_ARCH.PLM: "plm",
|
|
625
629
|
MODEL_ARCH.BAILINGMOE: "bailingmoe",
|
|
630
|
+
MODEL_ARCH.DOTS1: "dots1",
|
|
631
|
+
MODEL_ARCH.ARCEE: "arcee",
|
|
626
632
|
}
|
|
627
633
|
|
|
628
634
|
VISION_PROJECTOR_TYPE_NAMES: dict[VISION_PROJECTOR_TYPE, str] = {
|
|
@@ -1077,6 +1083,18 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
|
1077
1083
|
MODEL_TENSOR.FFN_UP_EXP,
|
|
1078
1084
|
MODEL_TENSOR.LAYER_OUT_NORM,
|
|
1079
1085
|
],
|
|
1086
|
+
MODEL_ARCH.NEO_BERT: [
|
|
1087
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
|
1088
|
+
MODEL_TENSOR.ATTN_NORM,
|
|
1089
|
+
MODEL_TENSOR.ATTN_QKV,
|
|
1090
|
+
MODEL_TENSOR.ATTN_OUT,
|
|
1091
|
+
MODEL_TENSOR.FFN_NORM,
|
|
1092
|
+
MODEL_TENSOR.FFN_DOWN,
|
|
1093
|
+
MODEL_TENSOR.FFN_UP,
|
|
1094
|
+
MODEL_TENSOR.ENC_OUTPUT_NORM,
|
|
1095
|
+
MODEL_TENSOR.CLS,
|
|
1096
|
+
MODEL_TENSOR.CLS_OUT,
|
|
1097
|
+
],
|
|
1080
1098
|
MODEL_ARCH.JINA_BERT_V2: [
|
|
1081
1099
|
MODEL_TENSOR.TOKEN_EMBD,
|
|
1082
1100
|
MODEL_TENSOR.TOKEN_EMBD_NORM,
|
|
@@ -2044,6 +2062,45 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
|
2044
2062
|
MODEL_TENSOR.FFN_DOWN_SHEXP,
|
|
2045
2063
|
MODEL_TENSOR.FFN_UP_SHEXP,
|
|
2046
2064
|
],
|
|
2065
|
+
MODEL_ARCH.DOTS1: [
|
|
2066
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
|
2067
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
|
2068
|
+
MODEL_TENSOR.OUTPUT,
|
|
2069
|
+
MODEL_TENSOR.ATTN_NORM,
|
|
2070
|
+
MODEL_TENSOR.ATTN_Q,
|
|
2071
|
+
MODEL_TENSOR.ATTN_Q_NORM,
|
|
2072
|
+
MODEL_TENSOR.ATTN_K,
|
|
2073
|
+
MODEL_TENSOR.ATTN_K_NORM,
|
|
2074
|
+
MODEL_TENSOR.ATTN_V,
|
|
2075
|
+
MODEL_TENSOR.ATTN_OUT,
|
|
2076
|
+
MODEL_TENSOR.FFN_EXP_PROBS_B,
|
|
2077
|
+
MODEL_TENSOR.FFN_NORM,
|
|
2078
|
+
MODEL_TENSOR.FFN_GATE,
|
|
2079
|
+
MODEL_TENSOR.FFN_GATE_EXP,
|
|
2080
|
+
MODEL_TENSOR.FFN_GATE_INP,
|
|
2081
|
+
MODEL_TENSOR.FFN_GATE_SHEXP,
|
|
2082
|
+
MODEL_TENSOR.FFN_DOWN,
|
|
2083
|
+
MODEL_TENSOR.FFN_DOWN_EXP,
|
|
2084
|
+
MODEL_TENSOR.FFN_DOWN_SHEXP,
|
|
2085
|
+
MODEL_TENSOR.FFN_UP,
|
|
2086
|
+
MODEL_TENSOR.FFN_UP_EXP,
|
|
2087
|
+
MODEL_TENSOR.FFN_UP_SHEXP,
|
|
2088
|
+
],
|
|
2089
|
+
MODEL_ARCH.ARCEE: [
|
|
2090
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
|
2091
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
|
2092
|
+
MODEL_TENSOR.OUTPUT,
|
|
2093
|
+
MODEL_TENSOR.ROPE_FREQS,
|
|
2094
|
+
MODEL_TENSOR.ATTN_NORM,
|
|
2095
|
+
MODEL_TENSOR.ATTN_Q,
|
|
2096
|
+
MODEL_TENSOR.ATTN_K,
|
|
2097
|
+
MODEL_TENSOR.ATTN_V,
|
|
2098
|
+
MODEL_TENSOR.ATTN_OUT,
|
|
2099
|
+
MODEL_TENSOR.ATTN_ROT_EMBD,
|
|
2100
|
+
MODEL_TENSOR.FFN_NORM,
|
|
2101
|
+
MODEL_TENSOR.FFN_DOWN,
|
|
2102
|
+
MODEL_TENSOR.FFN_UP,
|
|
2103
|
+
],
|
|
2047
2104
|
# TODO
|
|
2048
2105
|
}
|
|
2049
2106
|
|
|
@@ -271,7 +271,7 @@ class GGUFWriter:
|
|
|
271
271
|
|
|
272
272
|
def add_key_value(self, key: str, val: Any, vtype: GGUFValueType, sub_type: GGUFValueType | None = None) -> None:
|
|
273
273
|
if any(key in kv_data for kv_data in self.kv_data):
|
|
274
|
-
|
|
274
|
+
logger.warning(f'Duplicated key name {key!r}, overwriting it with new value {val!r} of type {vtype.name}')
|
|
275
275
|
|
|
276
276
|
self.kv_data[0][key] = GGUFValue(value=val, type=vtype, sub_type=sub_type)
|
|
277
277
|
|
|
@@ -935,6 +935,9 @@ class GGUFWriter:
|
|
|
935
935
|
def add_eom_token_id(self, id: int) -> None:
|
|
936
936
|
self.add_uint32(Keys.Tokenizer.EOM_ID, id)
|
|
937
937
|
|
|
938
|
+
def add_classifier_output_labels(self, labels: Sequence[str]) -> None:
|
|
939
|
+
self.add_array(Keys.Classifier.OUTPUT_LABELS.format(arch=self.arch), labels)
|
|
940
|
+
|
|
938
941
|
# for vision models
|
|
939
942
|
|
|
940
943
|
def add_clip_has_vision_encoder(self, value: bool) -> None:
|
|
@@ -31,6 +31,7 @@ class TensorNameMap:
|
|
|
31
31
|
"model.embeddings", # rwkv7
|
|
32
32
|
"model.word_embeddings", # bailingmoe
|
|
33
33
|
"language_model.model.embed_tokens", # llama4
|
|
34
|
+
"encoder", # neobert
|
|
34
35
|
),
|
|
35
36
|
|
|
36
37
|
# Token type embeddings
|
|
@@ -134,6 +135,7 @@ class TensorNameMap:
|
|
|
134
135
|
"rwkv.blocks.{bid}.ln1", # rwkv6
|
|
135
136
|
"model.layers.{bid}.ln1", # rwkv7
|
|
136
137
|
"model.layers.{bid}.input_layernorm", # llama4
|
|
138
|
+
"transformer_encoder.{bid}.attention_norm", # neobert
|
|
137
139
|
),
|
|
138
140
|
|
|
139
141
|
# Attention norm 2
|
|
@@ -161,6 +163,7 @@ class TensorNameMap:
|
|
|
161
163
|
"model.layers.{bid}.self_attn.qkv_proj", # phi3
|
|
162
164
|
"encoder.layers.{bid}.self_attention.query_key_value", # chatglm
|
|
163
165
|
"transformer.layers.{bid}.attn.qkv_proj", # openelm
|
|
166
|
+
"transformer_encoder.{bid}.qkv", # neobert
|
|
164
167
|
),
|
|
165
168
|
|
|
166
169
|
# Attention query
|
|
@@ -236,6 +239,7 @@ class TensorNameMap:
|
|
|
236
239
|
"transformer.layers.{bid}.attn.out_proj", # openelm
|
|
237
240
|
"transformer.h.{bid}.attn.attention.out_proj", # exaone
|
|
238
241
|
"model.layers.{bid}.self_attn.o_proj", # llama4
|
|
242
|
+
"transformer_encoder.{bid}.wo", # neobert
|
|
239
243
|
),
|
|
240
244
|
|
|
241
245
|
# Attention output norm
|
|
@@ -276,6 +280,7 @@ class TensorNameMap:
|
|
|
276
280
|
"encoder.layers.{bid}.post_attention_layernorm", # chatglm
|
|
277
281
|
"transformer.layers.{bid}.ffn_norm", # openelm
|
|
278
282
|
"model.layers.{bid}.post_attention_layernorm", # llama4
|
|
283
|
+
"transformer_encoder.{bid}.ffn_norm", # neobert
|
|
279
284
|
),
|
|
280
285
|
|
|
281
286
|
# Post feed-forward norm
|
|
@@ -305,7 +310,7 @@ class TensorNameMap:
|
|
|
305
310
|
),
|
|
306
311
|
|
|
307
312
|
MODEL_TENSOR.FFN_EXP_PROBS_B: (
|
|
308
|
-
"model.layers.{bid}.mlp.gate.e_score_correction", # deepseek-v3
|
|
313
|
+
"model.layers.{bid}.mlp.gate.e_score_correction", # deepseek-v3 dots1
|
|
309
314
|
),
|
|
310
315
|
|
|
311
316
|
# Feed-forward up
|
|
@@ -333,11 +338,14 @@ class TensorNameMap:
|
|
|
333
338
|
"encoder.layers.{bid}.mlp.fc11", # nomic-bert
|
|
334
339
|
"encoder.layers.{bid}.mlp.fc1", # nomic-bert-moe
|
|
335
340
|
"model.layers.{bid}.mlp.c_fc", # starcoder2
|
|
336
|
-
"encoder.layer.{bid}.mlp.gated_layers_v", # jina-bert-v2
|
|
341
|
+
"encoder.layer.{bid}.mlp.gated_layers_v", # jina-bert-v2 (split up/gate, no longer used)
|
|
342
|
+
"encoder.layer.{bid}.mlp.gated_layers", # jina-bert-v2 (GEGLU)
|
|
343
|
+
"encoder.layer.{bid}.mlp.up_gated_layer", # jina-v2-code (GEGLU)
|
|
337
344
|
"model.layers.{bid}.residual_mlp.w3", # arctic
|
|
338
345
|
"encoder.layers.{bid}.mlp.dense_h_to_4h", # chatglm
|
|
339
346
|
"transformer.h.{bid}.mlp.c_fc_1", # exaone
|
|
340
347
|
"model.layers.{bid}.feed_forward.up_proj", # llama4
|
|
348
|
+
"transformer_encoder.{bid}.ffn.w12", # neobert
|
|
341
349
|
),
|
|
342
350
|
|
|
343
351
|
MODEL_TENSOR.FFN_UP_EXP: (
|
|
@@ -370,7 +378,7 @@ class TensorNameMap:
|
|
|
370
378
|
"model.layers.layers.{bid}.mlp.gate_proj", # plamo
|
|
371
379
|
"model.layers.{bid}.feed_forward.w1", # internlm2
|
|
372
380
|
"encoder.layers.{bid}.mlp.fc12", # nomic-bert
|
|
373
|
-
"encoder.layer.{bid}.mlp.gated_layers_w", # jina-bert-v2
|
|
381
|
+
"encoder.layer.{bid}.mlp.gated_layers_w", # jina-bert-v2 (split up/gate, no longer used)
|
|
374
382
|
"transformer.h.{bid}.mlp.linear_1", # refact
|
|
375
383
|
"model.layers.{bid}.residual_mlp.w1", # arctic
|
|
376
384
|
"transformer.h.{bid}.mlp.c_fc_0", # exaone
|
|
@@ -420,6 +428,7 @@ class TensorNameMap:
|
|
|
420
428
|
"encoder.layers.{bid}.mlp.dense_4h_to_h", # chatglm
|
|
421
429
|
"model.layers.h.{bid}.mlp.c_proj", # exaone
|
|
422
430
|
"model.layers.{bid}.feed_forward.down_proj", # llama4
|
|
431
|
+
"transformer_encoder.{bid}.ffn.w3", # neobert
|
|
423
432
|
),
|
|
424
433
|
|
|
425
434
|
MODEL_TENSOR.FFN_DOWN_EXP: (
|
|
@@ -830,12 +839,14 @@ class TensorNameMap:
|
|
|
830
839
|
# TODO: these do not belong to block_mappings_cfg - move them to mappings_cfg
|
|
831
840
|
MODEL_TENSOR.ENC_OUTPUT_NORM: (
|
|
832
841
|
"encoder.final_layer_norm", # t5
|
|
842
|
+
"layer_norm", # neobert
|
|
833
843
|
),
|
|
834
844
|
|
|
835
845
|
MODEL_TENSOR.CLS: (
|
|
836
846
|
"classifier", # jina
|
|
837
847
|
"classifier.dense", # roberta
|
|
838
848
|
"pre_classifier", # distillbert
|
|
849
|
+
"dense", # neobert
|
|
839
850
|
),
|
|
840
851
|
|
|
841
852
|
MODEL_TENSOR.CLS_OUT: (
|