@fugood/llama.node 0.3.16 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +6 -1
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +44 -2
- package/lib/index.js +132 -1
- package/lib/index.ts +203 -3
- package/package.json +2 -1
- package/src/EmbeddingWorker.cpp +1 -1
- package/src/LlamaCompletionWorker.cpp +374 -19
- package/src/LlamaCompletionWorker.h +31 -10
- package/src/LlamaContext.cpp +216 -7
- package/src/LlamaContext.h +12 -0
- package/src/common.hpp +15 -0
- package/src/llama.cpp/.github/workflows/build-linux-cross.yml +233 -0
- package/src/llama.cpp/.github/workflows/build.yml +89 -767
- package/src/llama.cpp/.github/workflows/docker.yml +9 -6
- package/src/llama.cpp/.github/workflows/release.yml +716 -0
- package/src/llama.cpp/.github/workflows/server.yml +19 -23
- package/src/llama.cpp/CMakeLists.txt +11 -1
- package/src/llama.cpp/cmake/build-info.cmake +8 -2
- package/src/llama.cpp/cmake/x64-windows-llvm.cmake +0 -6
- package/src/llama.cpp/common/CMakeLists.txt +35 -4
- package/src/llama.cpp/common/arg.cpp +844 -121
- package/src/llama.cpp/common/arg.h +9 -0
- package/src/llama.cpp/common/chat.cpp +129 -107
- package/src/llama.cpp/common/chat.h +2 -0
- package/src/llama.cpp/common/common.cpp +64 -518
- package/src/llama.cpp/common/common.h +35 -45
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +3 -0
- package/src/llama.cpp/common/llguidance.cpp +31 -47
- package/src/llama.cpp/common/minja/chat-template.hpp +23 -11
- package/src/llama.cpp/common/minja/minja.hpp +186 -127
- package/src/llama.cpp/common/regex-partial.cpp +204 -0
- package/src/llama.cpp/common/regex-partial.h +56 -0
- package/src/llama.cpp/common/sampling.cpp +60 -50
- package/src/llama.cpp/docs/build.md +122 -7
- package/src/llama.cpp/examples/CMakeLists.txt +2 -32
- package/src/llama.cpp/examples/batched/batched.cpp +1 -1
- package/src/llama.cpp/examples/embedding/embedding.cpp +9 -12
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +1 -1
- package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +1 -0
- package/src/llama.cpp/examples/parallel/parallel.cpp +89 -15
- package/src/llama.cpp/examples/passkey/passkey.cpp +1 -1
- package/src/llama.cpp/examples/speculative/speculative.cpp +1 -1
- package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +1 -1
- package/src/llama.cpp/examples/sycl/build.sh +2 -2
- package/src/llama.cpp/examples/sycl/win-build-sycl.bat +2 -2
- package/src/llama.cpp/examples/training/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/training/finetune.cpp +96 -0
- package/src/llama.cpp/ggml/CMakeLists.txt +35 -2
- package/src/llama.cpp/ggml/cmake/GitVars.cmake +22 -0
- package/src/llama.cpp/ggml/include/ggml-backend.h +4 -4
- package/src/llama.cpp/ggml/include/ggml-cpp.h +1 -1
- package/src/llama.cpp/ggml/include/ggml-cpu.h +5 -0
- package/src/llama.cpp/ggml/include/ggml-opt.h +47 -28
- package/src/llama.cpp/ggml/include/ggml-rpc.h +6 -1
- package/src/llama.cpp/ggml/include/ggml.h +76 -106
- package/src/llama.cpp/ggml/src/CMakeLists.txt +11 -8
- package/src/llama.cpp/ggml/src/ggml-alloc.c +4 -1
- package/src/llama.cpp/ggml/src/ggml-backend.cpp +9 -5
- package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +0 -2
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +8 -4
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +5 -5
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +692 -1534
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +613 -122
- package/src/llama.cpp/ggml/src/ggml-cann/common.h +135 -1
- package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +507 -137
- package/src/llama.cpp/ggml/src/ggml-common.h +12 -6
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +66 -33
- package/src/llama.cpp/ggml/src/ggml-cpu/binary-ops.cpp +158 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/binary-ops.h +16 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/common.h +72 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +896 -194
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +2 -21
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +1060 -410
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +1008 -13533
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +31 -16
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +90 -12
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +47 -13
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +266 -72
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1034 -88
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +8796 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.h +110 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +892 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.cpp +186 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.h +28 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +252 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +802 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +23 -4
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +7 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +1 -0
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +0 -4
- package/src/llama.cpp/ggml/src/ggml-impl.h +52 -18
- package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +106 -14
- package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +67 -119
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +1023 -262
- package/src/llama.cpp/ggml/src/ggml-opt.cpp +368 -190
- package/src/llama.cpp/ggml/src/ggml-quants.c +0 -6
- package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +307 -40
- package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +125 -45
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +10 -8
- package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +239 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.hpp +39 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +0 -35
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +9 -307
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +72 -25
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +14 -7
- package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +59 -21
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +7 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +79 -90
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +944 -438
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +22 -23
- package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +37 -8
- package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +24 -20
- package/src/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +1 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +507 -411
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +84 -74
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +1 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +185 -89
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +37 -49
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +7 -22
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +4 -14
- package/src/llama.cpp/ggml/src/ggml-sycl/quants.hpp +83 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +204 -118
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.hpp +1 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +128 -53
- package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +83 -49
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +1278 -282
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +32 -0
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +133 -30
- package/src/llama.cpp/ggml/src/ggml.c +170 -265
- package/src/llama.cpp/ggml/src/gguf.cpp +34 -33
- package/src/llama.cpp/include/llama.h +82 -22
- package/src/llama.cpp/models/ggml-vocab-llama4.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-llama4.gguf.out +46 -0
- package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.out +46 -0
- package/src/llama.cpp/requirements/requirements-all.txt +5 -3
- package/src/llama.cpp/requirements/requirements-gguf_editor_gui.txt +3 -0
- package/src/llama.cpp/scripts/xxd.cmake +1 -1
- package/src/llama.cpp/src/CMakeLists.txt +4 -2
- package/src/llama.cpp/src/llama-adapter.cpp +43 -1
- package/src/llama.cpp/src/llama-arch.cpp +163 -17
- package/src/llama.cpp/src/llama-arch.h +16 -0
- package/src/llama.cpp/src/llama-batch.cpp +5 -1
- package/src/llama.cpp/src/llama-batch.h +2 -1
- package/src/llama.cpp/src/llama-chat.cpp +91 -16
- package/src/llama.cpp/src/llama-chat.h +7 -2
- package/src/llama.cpp/src/llama-context.cpp +479 -575
- package/src/llama.cpp/src/llama-context.h +44 -33
- package/src/llama.cpp/src/llama-cparams.h +1 -0
- package/src/llama.cpp/src/llama-graph.cpp +209 -157
- package/src/llama.cpp/src/llama-graph.h +38 -14
- package/src/llama.cpp/src/llama-hparams.h +13 -0
- package/src/llama.cpp/src/llama-kv-cache.cpp +1604 -543
- package/src/llama.cpp/src/llama-kv-cache.h +283 -171
- package/src/llama.cpp/src/llama-memory.h +12 -2
- package/src/llama.cpp/src/llama-mmap.cpp +1 -1
- package/src/llama.cpp/src/llama-model-loader.cpp +34 -20
- package/src/llama.cpp/src/llama-model-loader.h +5 -3
- package/src/llama.cpp/src/llama-model-saver.cpp +281 -0
- package/src/llama.cpp/src/llama-model-saver.h +37 -0
- package/src/llama.cpp/src/llama-model.cpp +1803 -330
- package/src/llama.cpp/src/llama-model.h +21 -2
- package/src/llama.cpp/src/llama-quant.cpp +33 -10
- package/src/llama.cpp/src/llama-sampling.cpp +25 -7
- package/src/llama.cpp/src/llama-vocab.cpp +86 -10
- package/src/llama.cpp/src/llama-vocab.h +6 -0
- package/src/llama.cpp/src/llama.cpp +15 -1
- package/src/llama.cpp/tests/CMakeLists.txt +52 -31
- package/src/llama.cpp/tests/test-arg-parser.cpp +51 -4
- package/src/llama.cpp/tests/test-backend-ops.cpp +189 -90
- package/src/llama.cpp/tests/test-chat-template.cpp +26 -6
- package/src/llama.cpp/tests/test-chat.cpp +15 -3
- package/src/llama.cpp/{examples/gbnf-validator/gbnf-validator.cpp → tests/test-gbnf-validator.cpp} +2 -2
- package/src/llama.cpp/tests/test-grammar-integration.cpp +3 -2
- package/src/llama.cpp/tests/test-grammar-llguidance.cpp +63 -2
- package/src/llama.cpp/tests/test-grammar-parser.cpp +3 -1
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +17 -1
- package/src/llama.cpp/tests/test-llama-grammar.cpp +2 -1
- package/src/llama.cpp/tests/test-mtmd-c-api.c +63 -0
- package/src/llama.cpp/tests/test-opt.cpp +33 -21
- package/src/llama.cpp/{examples/quantize-stats/quantize-stats.cpp → tests/test-quantize-stats.cpp} +3 -1
- package/src/llama.cpp/tests/test-regex-partial.cpp +288 -0
- package/src/llama.cpp/tests/test-sampling.cpp +1 -1
- package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +2 -1
- package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +2 -1
- package/src/llama.cpp/tools/CMakeLists.txt +39 -0
- package/src/llama.cpp/{examples → tools}/batched-bench/batched-bench.cpp +3 -3
- package/src/llama.cpp/{examples → tools}/export-lora/export-lora.cpp +1 -1
- package/src/llama.cpp/{examples → tools}/gguf-split/gguf-split.cpp +15 -16
- package/src/llama.cpp/{examples → tools}/imatrix/imatrix.cpp +11 -9
- package/src/llama.cpp/{examples → tools}/llama-bench/llama-bench.cpp +623 -274
- package/src/llama.cpp/{examples → tools}/main/main.cpp +22 -14
- package/src/llama.cpp/tools/mtmd/CMakeLists.txt +47 -0
- package/src/llama.cpp/tools/mtmd/clip-impl.h +365 -0
- package/src/llama.cpp/tools/mtmd/clip.cpp +3646 -0
- package/src/llama.cpp/tools/mtmd/clip.h +99 -0
- package/src/llama.cpp/tools/mtmd/deprecation-warning.cpp +22 -0
- package/src/llama.cpp/tools/mtmd/mtmd-cli.cpp +370 -0
- package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +310 -0
- package/src/llama.cpp/tools/mtmd/mtmd.cpp +678 -0
- package/src/llama.cpp/tools/mtmd/mtmd.h +331 -0
- package/src/llama.cpp/{examples → tools}/perplexity/perplexity.cpp +21 -5
- package/src/llama.cpp/{examples → tools}/quantize/quantize.cpp +53 -3
- package/src/llama.cpp/tools/rpc/CMakeLists.txt +4 -0
- package/src/llama.cpp/tools/rpc/rpc-server.cpp +322 -0
- package/src/llama.cpp/tools/run/CMakeLists.txt +16 -0
- package/src/llama.cpp/{examples → tools}/run/run.cpp +30 -30
- package/src/llama.cpp/{examples → tools}/server/CMakeLists.txt +2 -1
- package/src/llama.cpp/{examples → tools}/server/httplib.h +313 -247
- package/src/llama.cpp/{examples → tools}/server/server.cpp +529 -215
- package/src/llama.cpp/{examples → tools}/server/utils.hpp +427 -6
- package/src/llama.cpp/{examples → tools}/tts/tts.cpp +6 -9
- package/src/llama.cpp/cmake/arm64-windows-msvc.cmake +0 -6
- package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/infill/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/infill/infill.cpp +0 -590
- package/src/llama.cpp/examples/llava/CMakeLists.txt +0 -66
- package/src/llama.cpp/examples/llava/android/build_64.sh +0 -8
- package/src/llama.cpp/examples/llava/clip-quantize-cli.cpp +0 -59
- package/src/llama.cpp/examples/llava/clip.cpp +0 -3206
- package/src/llama.cpp/examples/llava/clip.h +0 -118
- package/src/llama.cpp/examples/llava/gemma3-cli.cpp +0 -341
- package/src/llama.cpp/examples/llava/llava-cli.cpp +0 -332
- package/src/llama.cpp/examples/llava/llava.cpp +0 -574
- package/src/llama.cpp/examples/llava/llava.h +0 -49
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +0 -354
- package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +0 -584
- package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +0 -6
- package/src/llama.cpp/examples/rpc/CMakeLists.txt +0 -2
- package/src/llama.cpp/examples/rpc/rpc-server.cpp +0 -171
- package/src/llama.cpp/examples/run/CMakeLists.txt +0 -5
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +0 -30
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +0 -19
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +0 -234
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +0 -197
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +0 -190
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +0 -204
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +0 -191
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +0 -218
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +0 -216
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +0 -295
- /package/src/llama.cpp/{examples → tools}/batched-bench/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/completions.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/cvector-generator.cpp +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/mean.hpp +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/negative.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/pca.hpp +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/positive.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/export-lora/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/gguf-split/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/imatrix/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/llama-bench/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/main/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples/llava → tools/mtmd}/requirements.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/perplexity/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/quantize/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.cpp +0 -0
- /package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.h +0 -0
- /package/src/llama.cpp/{examples → tools}/server/bench/requirements.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/server/tests/requirements.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/tokenize/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/tokenize/tokenize.cpp +0 -0
- /package/src/llama.cpp/{examples → tools}/tts/CMakeLists.txt +0 -0
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
ied 4 ½ months
|
|
2
|
+
__ggml_vocab_test__
|
|
3
|
+
Führer
|
|
4
|
+
__ggml_vocab_test__
|
|
5
|
+
|
|
6
|
+
__ggml_vocab_test__
|
|
7
|
+
|
|
8
|
+
__ggml_vocab_test__
|
|
9
|
+
|
|
10
|
+
__ggml_vocab_test__
|
|
11
|
+
|
|
12
|
+
__ggml_vocab_test__
|
|
13
|
+
|
|
14
|
+
__ggml_vocab_test__
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
__ggml_vocab_test__
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
__ggml_vocab_test__
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
__ggml_vocab_test__
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
__ggml_vocab_test__
|
|
30
|
+
Hello world
|
|
31
|
+
__ggml_vocab_test__
|
|
32
|
+
Hello world
|
|
33
|
+
__ggml_vocab_test__
|
|
34
|
+
Hello World
|
|
35
|
+
__ggml_vocab_test__
|
|
36
|
+
Hello World
|
|
37
|
+
__ggml_vocab_test__
|
|
38
|
+
Hello World!
|
|
39
|
+
__ggml_vocab_test__
|
|
40
|
+
Hello, world!
|
|
41
|
+
__ggml_vocab_test__
|
|
42
|
+
Hello, world!
|
|
43
|
+
__ggml_vocab_test__
|
|
44
|
+
this is 🦙.cpp
|
|
45
|
+
__ggml_vocab_test__
|
|
46
|
+
w048 7tuijk dsdfhu
|
|
47
|
+
__ggml_vocab_test__
|
|
48
|
+
нещо на Български
|
|
49
|
+
__ggml_vocab_test__
|
|
50
|
+
កាន់តែពិសេសអាចខលចេញ
|
|
51
|
+
__ggml_vocab_test__
|
|
52
|
+
🚀 (normal) 😶🌫️ (multiple emojis concatenated) ✅ (only emoji that has its own token)
|
|
53
|
+
__ggml_vocab_test__
|
|
54
|
+
Hello
|
|
55
|
+
__ggml_vocab_test__
|
|
56
|
+
Hello
|
|
57
|
+
__ggml_vocab_test__
|
|
58
|
+
Hello
|
|
59
|
+
__ggml_vocab_test__
|
|
60
|
+
Hello
|
|
61
|
+
__ggml_vocab_test__
|
|
62
|
+
Hello
|
|
63
|
+
__ggml_vocab_test__
|
|
64
|
+
Hello
|
|
65
|
+
Hello
|
|
66
|
+
__ggml_vocab_test__
|
|
67
|
+
(
|
|
68
|
+
__ggml_vocab_test__
|
|
69
|
+
|
|
70
|
+
=
|
|
71
|
+
__ggml_vocab_test__
|
|
72
|
+
' era
|
|
73
|
+
__ggml_vocab_test__
|
|
74
|
+
Hello, y'all! How are you 😁 ?我想在apple工作1314151天~
|
|
75
|
+
__ggml_vocab_test__
|
|
76
|
+
!!!!!!
|
|
77
|
+
__ggml_vocab_test__
|
|
78
|
+
3
|
|
79
|
+
__ggml_vocab_test__
|
|
80
|
+
33
|
|
81
|
+
__ggml_vocab_test__
|
|
82
|
+
333
|
|
83
|
+
__ggml_vocab_test__
|
|
84
|
+
3333
|
|
85
|
+
__ggml_vocab_test__
|
|
86
|
+
33333
|
|
87
|
+
__ggml_vocab_test__
|
|
88
|
+
333333
|
|
89
|
+
__ggml_vocab_test__
|
|
90
|
+
3333333
|
|
91
|
+
__ggml_vocab_test__
|
|
92
|
+
33333333
|
|
93
|
+
__ggml_vocab_test__
|
|
94
|
+
333333333
|
|
95
|
+
__ggml_vocab_test__
|
|
96
|
+
Cửa Việt
|
|
97
|
+
__ggml_vocab_test__
|
|
98
|
+
discards
|
|
99
|
+
__ggml_vocab_test__
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
🚀 (normal) 😶🌫️ (multiple emojis concatenated) ✅ 🦙🦙 3 33 333 3333 33333 333333 3333333 33333333 3.3 3..3 3...3 កាន់តែពិសេសអាច😁 ?我想在apple工作1314151天~ ------======= нещо на Български ''''''```````""""......!!!!!!?????? I've been 'told he's there, 'RE you sure? 'M not sure I'll make it, 'D you like some tea? We'Ve a'lL
|
|
112
|
+
__ggml_vocab_test__
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
2014 1032 1052 1032 28504 6972
|
|
2
|
+
1070 7088 1258
|
|
3
|
+
|
|
4
|
+
1032
|
|
5
|
+
1256
|
|
6
|
+
1293
|
|
7
|
+
1009
|
|
8
|
+
1010
|
|
9
|
+
1267
|
|
10
|
+
4688
|
|
11
|
+
1009 1010
|
|
12
|
+
22177 4304
|
|
13
|
+
45383 4304
|
|
14
|
+
22177 5325
|
|
15
|
+
45383 5325
|
|
16
|
+
45383 5325 1033
|
|
17
|
+
22177 1044 4304 1033
|
|
18
|
+
45383 1044 4304 1033
|
|
19
|
+
1593 1395 119685 1166 1153 1046 51228
|
|
20
|
+
1119 1048 1052 1056 1032 1055 17391 23216 30203 7785 17279
|
|
21
|
+
3337 30757 1902 4200 63073 3671
|
|
22
|
+
1225 1158 1128 1225 1158 1182 1225 1158 1147 1225 1159 1139 1225 1158 1143 1225 1159 1130 1225 1158 1150 1225 1158 1183 1225 1158 1159 1225 21359 1225 1158 1159 1225 1158 1162 1225 1158 1182 1225 1158 1133 1225 1158 1129 1225 1158 1155 1225 1158 1133 1225 21359 1225 1158 1137
|
|
23
|
+
1240 1159 1154 1128 1319 13052 1041 119685 1152 1182 29568 1240 1159 1140 1171 1239 1184 1143 1319 88181 1873 3659 1275 56421 1621 1041 126241 1133 1319 11234 1873 26303 1455 1934 2246 3754 10835 1041
|
|
24
|
+
22177
|
|
25
|
+
45383
|
|
26
|
+
1032 45383
|
|
27
|
+
1256 45383
|
|
28
|
+
1293 45383
|
|
29
|
+
1293 45383 1010 1293 45383
|
|
30
|
+
1319
|
|
31
|
+
1010 1376
|
|
32
|
+
1039 4033
|
|
33
|
+
22177 1044 1404 48054 1033 3075 1584 1636 119685 1152 1129 3082 26060 2998 63614 82278 1049 1051 1049 1052 1049 1053 1049 6434 6749
|
|
34
|
+
7290 7290 7290
|
|
35
|
+
1051
|
|
36
|
+
1051 1051
|
|
37
|
+
1051 1051 1051
|
|
38
|
+
1051 1051 1051 1051
|
|
39
|
+
1051 1051 1051 1051 1051
|
|
40
|
+
1051 1051 1051 1051 1051 1051
|
|
41
|
+
1051 1051 1051 1051 1051 1051 1051
|
|
42
|
+
1051 1051 1051 1051 1051 1051 1051 1051
|
|
43
|
+
1051 1051 1051 1051 1051 1051 1051 1051 1051
|
|
44
|
+
1067 59503 28783
|
|
45
|
+
3724 4058
|
|
46
|
+
1010 1032 1267 1032 4688 1032 17152 1458 29356 1010 1256 1010 1293 1010 1260 1010 1652 1010 1240 1159 1154 1128 1319 13052 1041 119685 1152 1182 29568 1240 1159 1140 1171 1239 1184 1143 1319 88181 1873 3659 1275 56421 1621 1041 126241 1133 119685 1166 1153 1240 1159 1166 1153 1032 1051 1032 1051 1051 1032 1051 1051 1051 1032 1051 1051 1051 1051 1032 1051 1051 1051 1051 1051 1032 1051 1051 1051 1051 1051 1051 1032 1051 1051 1051 1051 1051 1051 1051 1032 1051 1051 1051 1051 1051 1051 1051 1051 1032 1051 1046 1051 1032 1051 1791 1051 1032 1051 2880 1051 71881 1158 1128 1225 1158 1182 1225 1158 1147 1225 1159 1139 1225 1158 1143 1225 1159 1130 1225 1158 1150 1225 1158 1183 1225 1158 1159 1225 21359 1225 1158 1159 1225 1158 1162 1225 1158 1182 1225 1158 1133 1240 1159 1152 1129 3082 26060 2998 63614 82278 1049 1051 1049 1052 1049 1053 1049 6434 6749 45577 1045 6626 43555 2843 30757 1902 4200 63073 3671 14931 20040 20040 1657 1657 1975 14135 14135 83923 7290 7290 7290 45509 45509 45509 1362 6483 2151 1576 1116 2189 1514 1681 2156 1044 1576 3609 1636 5257 1063 1576 1077 1605 5257 1362 7534 3180 1494 1044 1576 1068 1636 2479 2269 26883 1063 2837 1039 45654 1261 54297 1076
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
-r ../
|
|
2
|
-
-r ../
|
|
3
|
-
-r ../
|
|
1
|
+
-r ../tools/mtmd/requirements.txt
|
|
2
|
+
-r ../tools/server/bench/requirements.txt
|
|
3
|
+
-r ../tools/server/tests/requirements.txt
|
|
4
4
|
|
|
5
5
|
-r ./requirements-compare-llama-bench.txt
|
|
6
6
|
-r ./requirements-pydantic.txt
|
|
@@ -11,3 +11,5 @@
|
|
|
11
11
|
-r ./requirements-convert_legacy_llama.txt
|
|
12
12
|
-r ./requirements-convert_llama_ggml_to_gguf.txt
|
|
13
13
|
-r ./requirements-tool_bench.txt
|
|
14
|
+
|
|
15
|
+
-r ./requirements-gguf_editor_gui.txt
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
# CMake equivalent of `xxd -i ${INPUT} ${OUTPUT}`
|
|
2
|
-
# Usage: cmake -DINPUT=
|
|
2
|
+
# Usage: cmake -DINPUT=tools/server/public/index.html -DOUTPUT=tools/server/index.html.hpp -P scripts/xxd.cmake
|
|
3
3
|
|
|
4
4
|
SET(INPUT "" CACHE STRING "Input File")
|
|
5
5
|
SET(OUTPUT "" CACHE STRING "Output File")
|
|
@@ -23,6 +23,7 @@ add_library(llama
|
|
|
23
23
|
llama-memory.cpp
|
|
24
24
|
llama-mmap.cpp
|
|
25
25
|
llama-model-loader.cpp
|
|
26
|
+
llama-model-saver.cpp
|
|
26
27
|
llama-model.cpp
|
|
27
28
|
llama-quant.cpp
|
|
28
29
|
llama-sampling.cpp
|
|
@@ -32,8 +33,9 @@ add_library(llama
|
|
|
32
33
|
unicode.h
|
|
33
34
|
)
|
|
34
35
|
|
|
35
|
-
target_include_directories(llama
|
|
36
|
-
|
|
36
|
+
target_include_directories(llama PRIVATE .)
|
|
37
|
+
target_include_directories(llama PUBLIC ../include)
|
|
38
|
+
target_compile_features (llama PRIVATE cxx_std_17) # don't bump
|
|
37
39
|
|
|
38
40
|
target_link_libraries(llama PUBLIC ggml)
|
|
39
41
|
|
|
@@ -247,6 +247,29 @@ static void llama_adapter_lora_init_impl(llama_model & model, const char * path_
|
|
|
247
247
|
}
|
|
248
248
|
}
|
|
249
249
|
|
|
250
|
+
// get extra buffer types of the CPU
|
|
251
|
+
// TODO: a more general solution for non-CPU extra buft should be imlpemented in the future
|
|
252
|
+
// ref: https://github.com/ggml-org/llama.cpp/pull/12593#pullrequestreview-2718659948
|
|
253
|
+
std::vector<ggml_backend_buffer_type_t> buft_extra;
|
|
254
|
+
{
|
|
255
|
+
auto * cpu_dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
|
|
256
|
+
if (!cpu_dev) {
|
|
257
|
+
throw std::runtime_error(format("%s: no CPU backend found", __func__));
|
|
258
|
+
}
|
|
259
|
+
auto * cpu_reg = ggml_backend_dev_backend_reg(cpu_dev);
|
|
260
|
+
|
|
261
|
+
auto ggml_backend_dev_get_extra_bufts_fn = (ggml_backend_dev_get_extra_bufts_t)
|
|
262
|
+
ggml_backend_reg_get_proc_address(cpu_reg, "ggml_backend_dev_get_extra_bufts");
|
|
263
|
+
|
|
264
|
+
if (ggml_backend_dev_get_extra_bufts_fn) {
|
|
265
|
+
ggml_backend_buffer_type_t * extra_bufts = ggml_backend_dev_get_extra_bufts_fn(cpu_dev);
|
|
266
|
+
while (extra_bufts && *extra_bufts) {
|
|
267
|
+
buft_extra.emplace_back(*extra_bufts);
|
|
268
|
+
++extra_bufts;
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
|
|
250
273
|
// add tensors
|
|
251
274
|
for (auto & it : ab_map) {
|
|
252
275
|
const std::string & name = it.first;
|
|
@@ -263,7 +286,26 @@ static void llama_adapter_lora_init_impl(llama_model & model, const char * path_
|
|
|
263
286
|
throw std::runtime_error("LoRA tensor '" + name + "' does not exist in base model (hint: maybe wrong base model?)");
|
|
264
287
|
}
|
|
265
288
|
|
|
266
|
-
|
|
289
|
+
auto * buft = ggml_backend_buffer_get_type(model_tensor->buffer);
|
|
290
|
+
|
|
291
|
+
// do not load loras to extra buffer types (i.e. bufts for repacking) -> use the CPU in that case
|
|
292
|
+
for (auto & ex : buft_extra) {
|
|
293
|
+
if (ex == buft) {
|
|
294
|
+
LLAMA_LOG_WARN("%s: lora for '%s' cannot use buft '%s', fallback to CPU\n", __func__, model_tensor->name, ggml_backend_buft_name(buft));
|
|
295
|
+
|
|
296
|
+
auto * cpu_dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
|
|
297
|
+
if (!cpu_dev) {
|
|
298
|
+
throw std::runtime_error(format("%s: no CPU backend found", __func__));
|
|
299
|
+
}
|
|
300
|
+
buft = ggml_backend_dev_buffer_type(cpu_dev);
|
|
301
|
+
|
|
302
|
+
break;
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
LLAMA_LOG_DEBUG("%s: lora for '%s' -> '%s'\n", __func__, model_tensor->name, ggml_backend_buft_name(buft));
|
|
307
|
+
|
|
308
|
+
ggml_context * dev_ctx = ctx_for_buft(buft);
|
|
267
309
|
// validate tensor shape
|
|
268
310
|
if (is_token_embd) {
|
|
269
311
|
// expect B to be non-transposed, A and B are flipped; see llm_build_inp_embd()
|
|
@@ -6,6 +6,7 @@
|
|
|
6
6
|
|
|
7
7
|
static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
|
|
8
8
|
{ LLM_ARCH_LLAMA, "llama" },
|
|
9
|
+
{ LLM_ARCH_LLAMA4, "llama4" },
|
|
9
10
|
{ LLM_ARCH_DECI, "deci" },
|
|
10
11
|
{ LLM_ARCH_FALCON, "falcon" },
|
|
11
12
|
{ LLM_ARCH_GROK, "grok" },
|
|
@@ -18,6 +19,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
|
|
|
18
19
|
{ LLM_ARCH_REFACT, "refact" },
|
|
19
20
|
{ LLM_ARCH_BERT, "bert" },
|
|
20
21
|
{ LLM_ARCH_NOMIC_BERT, "nomic-bert" },
|
|
22
|
+
{ LLM_ARCH_NOMIC_BERT_MOE, "nomic-bert-moe" },
|
|
21
23
|
{ LLM_ARCH_JINA_BERT_V2, "jina-bert-v2" },
|
|
22
24
|
{ LLM_ARCH_BLOOM, "bloom" },
|
|
23
25
|
{ LLM_ARCH_STABLELM, "stablelm" },
|
|
@@ -25,6 +27,8 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
|
|
|
25
27
|
{ LLM_ARCH_QWEN2, "qwen2" },
|
|
26
28
|
{ LLM_ARCH_QWEN2MOE, "qwen2moe" },
|
|
27
29
|
{ LLM_ARCH_QWEN2VL, "qwen2vl" },
|
|
30
|
+
{ LLM_ARCH_QWEN3, "qwen3" },
|
|
31
|
+
{ LLM_ARCH_QWEN3MOE, "qwen3moe" },
|
|
28
32
|
{ LLM_ARCH_PHI2, "phi2" },
|
|
29
33
|
{ LLM_ARCH_PHI3, "phi3" },
|
|
30
34
|
{ LLM_ARCH_PHIMOE, "phimoe" },
|
|
@@ -51,6 +55,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
|
|
|
51
55
|
{ LLM_ARCH_DEEPSEEK, "deepseek" },
|
|
52
56
|
{ LLM_ARCH_DEEPSEEK2, "deepseek2" },
|
|
53
57
|
{ LLM_ARCH_CHATGLM, "chatglm" },
|
|
58
|
+
{ LLM_ARCH_GLM4, "glm4" },
|
|
54
59
|
{ LLM_ARCH_BITNET, "bitnet" },
|
|
55
60
|
{ LLM_ARCH_T5, "t5" },
|
|
56
61
|
{ LLM_ARCH_T5ENCODER, "t5encoder" },
|
|
@@ -65,6 +70,8 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
|
|
|
65
70
|
{ LLM_ARCH_GRANITE_MOE, "granitemoe" },
|
|
66
71
|
{ LLM_ARCH_CHAMELEON, "chameleon" },
|
|
67
72
|
{ LLM_ARCH_WAVTOKENIZER_DEC, "wavtokenizer-dec" },
|
|
73
|
+
{ LLM_ARCH_PLM, "plm" },
|
|
74
|
+
{ LLM_ARCH_BAILINGMOE, "bailingmoe" },
|
|
68
75
|
{ LLM_ARCH_UNKNOWN, "(unknown)" },
|
|
69
76
|
};
|
|
70
77
|
|
|
@@ -73,6 +80,7 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
|
|
|
73
80
|
{ LLM_KV_GENERAL_ARCHITECTURE, "general.architecture" },
|
|
74
81
|
{ LLM_KV_GENERAL_QUANTIZATION_VERSION, "general.quantization_version" },
|
|
75
82
|
{ LLM_KV_GENERAL_ALIGNMENT, "general.alignment" },
|
|
83
|
+
{ LLM_KV_GENERAL_FILE_TYPE, "general.file_type" },
|
|
76
84
|
{ LLM_KV_GENERAL_NAME, "general.name" },
|
|
77
85
|
{ LLM_KV_GENERAL_AUTHOR, "general.author" },
|
|
78
86
|
{ LLM_KV_GENERAL_VERSION, "general.version" },
|
|
@@ -99,6 +107,7 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
|
|
|
99
107
|
{ LLM_KV_EXPERT_WEIGHTS_SCALE, "%s.expert_weights_scale" },
|
|
100
108
|
{ LLM_KV_EXPERT_WEIGHTS_NORM, "%s.expert_weights_norm" },
|
|
101
109
|
{ LLM_KV_EXPERT_GATING_FUNC, "%s.expert_gating_func" },
|
|
110
|
+
{ LLM_KV_MOE_EVERY_N_LAYERS, "%s.moe_every_n_layers" },
|
|
102
111
|
{ LLM_KV_POOLING_TYPE, "%s.pooling_type" },
|
|
103
112
|
{ LLM_KV_LOGIT_SCALE, "%s.logit_scale" },
|
|
104
113
|
{ LLM_KV_DECODER_START_TOKEN_ID, "%s.decoder_start_token_id" },
|
|
@@ -111,6 +120,7 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
|
|
|
111
120
|
{ LLM_KV_RESIDUAL_SCALE, "%s.residual_scale" },
|
|
112
121
|
{ LLM_KV_EMBEDDING_SCALE, "%s.embedding_scale" },
|
|
113
122
|
{ LLM_KV_TOKEN_SHIFT_COUNT, "%s.token_shift_count" },
|
|
123
|
+
{ LLM_KV_INTERLEAVE_MOE_LAYER_STEP, "%s.interleave_moe_layer_step" },
|
|
114
124
|
|
|
115
125
|
{ LLM_KV_ATTENTION_HEAD_COUNT, "%s.attention.head_count" },
|
|
116
126
|
{ LLM_KV_ATTENTION_HEAD_COUNT_KV, "%s.attention.head_count_kv" },
|
|
@@ -132,6 +142,8 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
|
|
|
132
142
|
{ LLM_KV_ATTENTION_RELATIVE_BUCKETS_COUNT, "%s.attention.relative_buckets_count" },
|
|
133
143
|
{ LLM_KV_ATTENTION_SLIDING_WINDOW, "%s.attention.sliding_window" },
|
|
134
144
|
{ LLM_KV_ATTENTION_SCALE, "%s.attention.scale" },
|
|
145
|
+
{ LLM_KV_ATTENTION_KEY_LENGTH_MLA, "%s.attention.key_length_mla" },
|
|
146
|
+
{ LLM_KV_ATTENTION_VALUE_LENGTH_MLA, "%s.attention.value_length_mla" },
|
|
135
147
|
|
|
136
148
|
{ LLM_KV_ROPE_DIMENSION_COUNT, "%s.rope.dimension_count" },
|
|
137
149
|
{ LLM_KV_ROPE_DIMENSION_SECTIONS, "%s.rope.dimension_sections" },
|
|
@@ -230,6 +242,35 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
|
|
|
230
242
|
{ LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
|
|
231
243
|
},
|
|
232
244
|
},
|
|
245
|
+
{
|
|
246
|
+
LLM_ARCH_LLAMA4,
|
|
247
|
+
{
|
|
248
|
+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
|
249
|
+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
|
|
250
|
+
{ LLM_TENSOR_OUTPUT, "output" },
|
|
251
|
+
{ LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
|
|
252
|
+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
|
|
253
|
+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
|
|
254
|
+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
|
|
255
|
+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
|
|
256
|
+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
|
|
257
|
+
{ LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
|
|
258
|
+
{ LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
|
|
259
|
+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
|
|
260
|
+
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
|
|
261
|
+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
|
|
262
|
+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
|
|
263
|
+
{ LLM_TENSOR_FFN_GATE_EXP, "blk.%d.ffn_gate.%d" },
|
|
264
|
+
{ LLM_TENSOR_FFN_DOWN_EXP, "blk.%d.ffn_down.%d" },
|
|
265
|
+
{ LLM_TENSOR_FFN_UP_EXP, "blk.%d.ffn_up.%d" },
|
|
266
|
+
{ LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
|
|
267
|
+
{ LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
|
|
268
|
+
{ LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
|
|
269
|
+
{ LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
|
|
270
|
+
{ LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
|
|
271
|
+
{ LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
|
|
272
|
+
},
|
|
273
|
+
},
|
|
233
274
|
{
|
|
234
275
|
LLM_ARCH_DECI,
|
|
235
276
|
{
|
|
@@ -433,6 +474,24 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
|
|
|
433
474
|
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
|
|
434
475
|
},
|
|
435
476
|
},
|
|
477
|
+
{
|
|
478
|
+
LLM_ARCH_NOMIC_BERT_MOE,
|
|
479
|
+
{
|
|
480
|
+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
|
481
|
+
{ LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
|
|
482
|
+
{ LLM_TENSOR_TOKEN_TYPES, "token_types" },
|
|
483
|
+
{ LLM_TENSOR_ATTN_OUT_NORM, "blk.%d.attn_output_norm" },
|
|
484
|
+
{ LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
|
|
485
|
+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
|
|
486
|
+
{ LLM_TENSOR_LAYER_OUT_NORM, "blk.%d.layer_output_norm" },
|
|
487
|
+
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
|
|
488
|
+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
|
|
489
|
+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
|
|
490
|
+
{ LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
|
|
491
|
+
{ LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
|
|
492
|
+
{ LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
|
|
493
|
+
},
|
|
494
|
+
},
|
|
436
495
|
{
|
|
437
496
|
LLM_ARCH_JINA_BERT_V2,
|
|
438
497
|
{
|
|
@@ -561,6 +620,45 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
|
|
|
561
620
|
{ LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
|
|
562
621
|
},
|
|
563
622
|
},
|
|
623
|
+
{
|
|
624
|
+
LLM_ARCH_QWEN3,
|
|
625
|
+
{
|
|
626
|
+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
|
627
|
+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
|
|
628
|
+
{ LLM_TENSOR_OUTPUT, "output" },
|
|
629
|
+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
|
|
630
|
+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
|
|
631
|
+
{ LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
|
|
632
|
+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
|
|
633
|
+
{ LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
|
|
634
|
+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
|
|
635
|
+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
|
|
636
|
+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
|
|
637
|
+
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
|
|
638
|
+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
|
|
639
|
+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
|
|
640
|
+
},
|
|
641
|
+
},
|
|
642
|
+
{
|
|
643
|
+
LLM_ARCH_QWEN3MOE,
|
|
644
|
+
{
|
|
645
|
+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
|
646
|
+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
|
|
647
|
+
{ LLM_TENSOR_OUTPUT, "output" },
|
|
648
|
+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
|
|
649
|
+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
|
|
650
|
+
{ LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
|
|
651
|
+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
|
|
652
|
+
{ LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
|
|
653
|
+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
|
|
654
|
+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
|
|
655
|
+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
|
|
656
|
+
{ LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
|
|
657
|
+
{ LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
|
|
658
|
+
{ LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
|
|
659
|
+
{ LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
|
|
660
|
+
},
|
|
661
|
+
},
|
|
564
662
|
{
|
|
565
663
|
LLM_ARCH_PHI2,
|
|
566
664
|
{
|
|
@@ -1027,6 +1125,8 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
|
|
|
1027
1125
|
{ LLM_TENSOR_ATTN_Q_B, "blk.%d.attn_q_b" },
|
|
1028
1126
|
{ LLM_TENSOR_ATTN_KV_A_MQA, "blk.%d.attn_kv_a_mqa" },
|
|
1029
1127
|
{ LLM_TENSOR_ATTN_KV_B, "blk.%d.attn_kv_b" },
|
|
1128
|
+
{ LLM_TENSOR_ATTN_K_B, "blk.%d.attn_k_b" },
|
|
1129
|
+
{ LLM_TENSOR_ATTN_V_B, "blk.%d.attn_v_b" },
|
|
1030
1130
|
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
|
|
1031
1131
|
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
|
|
1032
1132
|
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
|
|
@@ -1043,6 +1143,22 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
|
|
|
1043
1143
|
{ LLM_TENSOR_FFN_EXP_PROBS_B, "blk.%d.exp_probs_b" },
|
|
1044
1144
|
},
|
|
1045
1145
|
},
|
|
1146
|
+
{
|
|
1147
|
+
LLM_ARCH_PLM,
|
|
1148
|
+
{
|
|
1149
|
+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
|
1150
|
+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
|
|
1151
|
+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
|
|
1152
|
+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
|
|
1153
|
+
{ LLM_TENSOR_ATTN_KV_A_MQA, "blk.%d.attn_kv_a_mqa" },
|
|
1154
|
+
{ LLM_TENSOR_ATTN_KV_A_NORM, "blk.%d.attn_kv_a_norm" },
|
|
1155
|
+
{ LLM_TENSOR_ATTN_KV_B, "blk.%d.attn_kv_b" },
|
|
1156
|
+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
|
|
1157
|
+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
|
|
1158
|
+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
|
|
1159
|
+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
|
|
1160
|
+
},
|
|
1161
|
+
},
|
|
1046
1162
|
{
|
|
1047
1163
|
LLM_ARCH_CHATGLM,
|
|
1048
1164
|
{
|
|
@@ -1061,6 +1177,25 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
|
|
|
1061
1177
|
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
|
|
1062
1178
|
},
|
|
1063
1179
|
},
|
|
1180
|
+
{
|
|
1181
|
+
LLM_ARCH_GLM4,
|
|
1182
|
+
{
|
|
1183
|
+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
|
1184
|
+
{ LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
|
|
1185
|
+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
|
|
1186
|
+
{ LLM_TENSOR_OUTPUT, "output" },
|
|
1187
|
+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
|
|
1188
|
+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
|
|
1189
|
+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
|
|
1190
|
+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
|
|
1191
|
+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
|
|
1192
|
+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
|
|
1193
|
+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
|
|
1194
|
+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
|
|
1195
|
+
{ LLM_TENSOR_ATTN_POST_NORM, "blk.%d.post_attention_norm" },
|
|
1196
|
+
{ LLM_TENSOR_FFN_POST_NORM, "blk.%d.post_ffw_norm" },
|
|
1197
|
+
},
|
|
1198
|
+
},
|
|
1064
1199
|
{
|
|
1065
1200
|
LLM_ARCH_BITNET,
|
|
1066
1201
|
{
|
|
@@ -1346,6 +1481,9 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
|
|
|
1346
1481
|
{ LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
|
|
1347
1482
|
{ LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
|
|
1348
1483
|
{ LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
|
|
1484
|
+
{ LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
|
|
1485
|
+
{ LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
|
|
1486
|
+
{ LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
|
|
1349
1487
|
},
|
|
1350
1488
|
},
|
|
1351
1489
|
{
|
|
@@ -1392,6 +1530,29 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
|
|
|
1392
1530
|
{ LLM_TENSOR_POS_NET_ATTN_OUT, "posnet.%d.attn_output" },
|
|
1393
1531
|
},
|
|
1394
1532
|
},
|
|
1533
|
+
{
|
|
1534
|
+
LLM_ARCH_BAILINGMOE,
|
|
1535
|
+
{
|
|
1536
|
+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
|
1537
|
+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
|
|
1538
|
+
{ LLM_TENSOR_OUTPUT, "output" },
|
|
1539
|
+
{ LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
|
|
1540
|
+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
|
|
1541
|
+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
|
|
1542
|
+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
|
|
1543
|
+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
|
|
1544
|
+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
|
|
1545
|
+
{ LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
|
|
1546
|
+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
|
|
1547
|
+
{ LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
|
|
1548
|
+
{ LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
|
|
1549
|
+
{ LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
|
|
1550
|
+
{ LLM_TENSOR_FFN_GATE_INP_SHEXP, "blk.%d.ffn_gate_inp_shexp" },
|
|
1551
|
+
{ LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
|
|
1552
|
+
{ LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
|
|
1553
|
+
{ LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
|
|
1554
|
+
},
|
|
1555
|
+
},
|
|
1395
1556
|
{
|
|
1396
1557
|
LLM_ARCH_UNKNOWN,
|
|
1397
1558
|
{
|
|
@@ -1429,23 +1590,8 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
|
|
|
1429
1590
|
{LLM_TENSOR_ATTN_Q_B, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
|
|
1430
1591
|
{LLM_TENSOR_ATTN_KV_A_MQA, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
|
|
1431
1592
|
{LLM_TENSOR_ATTN_KV_B, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
|
|
1432
|
-
{
|
|
1433
|
-
{
|
|
1434
|
-
{LLM_TENSOR_ATTN_Q, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
|
|
1435
|
-
{LLM_TENSOR_ATTN_K, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
|
|
1436
|
-
{LLM_TENSOR_ATTN_V, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
|
|
1437
|
-
{LLM_TENSOR_ATTN_QKV, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
|
|
1438
|
-
{LLM_TENSOR_ATTN_OUT, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
|
|
1439
|
-
{LLM_TENSOR_FFN_GATE, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
|
|
1440
|
-
{LLM_TENSOR_FFN_DOWN, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
|
|
1441
|
-
{LLM_TENSOR_FFN_UP, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
|
|
1442
|
-
{LLM_TENSOR_FFN_DOWN_SHEXP, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
|
|
1443
|
-
{LLM_TENSOR_FFN_GATE_SHEXP, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
|
|
1444
|
-
{LLM_TENSOR_FFN_UP_SHEXP, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
|
|
1445
|
-
{LLM_TENSOR_ATTN_Q_A, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
|
|
1446
|
-
{LLM_TENSOR_ATTN_Q_B, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
|
|
1447
|
-
{LLM_TENSOR_ATTN_KV_A_MQA, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
|
|
1448
|
-
{LLM_TENSOR_ATTN_KV_B, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
|
|
1593
|
+
{LLM_TENSOR_ATTN_K_B, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
|
|
1594
|
+
{LLM_TENSOR_ATTN_V_B, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
|
|
1449
1595
|
{LLM_TENSOR_DEC_ATTN_Q, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
|
|
1450
1596
|
{LLM_TENSOR_DEC_ATTN_K, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
|
|
1451
1597
|
{LLM_TENSOR_DEC_ATTN_V, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
|
|
@@ -10,6 +10,7 @@
|
|
|
10
10
|
|
|
11
11
|
enum llm_arch {
|
|
12
12
|
LLM_ARCH_LLAMA,
|
|
13
|
+
LLM_ARCH_LLAMA4,
|
|
13
14
|
LLM_ARCH_DECI,
|
|
14
15
|
LLM_ARCH_FALCON,
|
|
15
16
|
LLM_ARCH_BAICHUAN,
|
|
@@ -22,6 +23,7 @@ enum llm_arch {
|
|
|
22
23
|
LLM_ARCH_REFACT,
|
|
23
24
|
LLM_ARCH_BERT,
|
|
24
25
|
LLM_ARCH_NOMIC_BERT,
|
|
26
|
+
LLM_ARCH_NOMIC_BERT_MOE,
|
|
25
27
|
LLM_ARCH_JINA_BERT_V2,
|
|
26
28
|
LLM_ARCH_BLOOM,
|
|
27
29
|
LLM_ARCH_STABLELM,
|
|
@@ -29,6 +31,8 @@ enum llm_arch {
|
|
|
29
31
|
LLM_ARCH_QWEN2,
|
|
30
32
|
LLM_ARCH_QWEN2MOE,
|
|
31
33
|
LLM_ARCH_QWEN2VL,
|
|
34
|
+
LLM_ARCH_QWEN3,
|
|
35
|
+
LLM_ARCH_QWEN3MOE,
|
|
32
36
|
LLM_ARCH_PHI2,
|
|
33
37
|
LLM_ARCH_PHI3,
|
|
34
38
|
LLM_ARCH_PHIMOE,
|
|
@@ -55,6 +59,7 @@ enum llm_arch {
|
|
|
55
59
|
LLM_ARCH_DEEPSEEK,
|
|
56
60
|
LLM_ARCH_DEEPSEEK2,
|
|
57
61
|
LLM_ARCH_CHATGLM,
|
|
62
|
+
LLM_ARCH_GLM4,
|
|
58
63
|
LLM_ARCH_BITNET,
|
|
59
64
|
LLM_ARCH_T5,
|
|
60
65
|
LLM_ARCH_T5ENCODER,
|
|
@@ -69,6 +74,8 @@ enum llm_arch {
|
|
|
69
74
|
LLM_ARCH_GRANITE_MOE,
|
|
70
75
|
LLM_ARCH_CHAMELEON,
|
|
71
76
|
LLM_ARCH_WAVTOKENIZER_DEC,
|
|
77
|
+
LLM_ARCH_PLM,
|
|
78
|
+
LLM_ARCH_BAILINGMOE,
|
|
72
79
|
LLM_ARCH_UNKNOWN,
|
|
73
80
|
};
|
|
74
81
|
|
|
@@ -77,6 +84,7 @@ enum llm_kv {
|
|
|
77
84
|
LLM_KV_GENERAL_ARCHITECTURE,
|
|
78
85
|
LLM_KV_GENERAL_QUANTIZATION_VERSION,
|
|
79
86
|
LLM_KV_GENERAL_ALIGNMENT,
|
|
87
|
+
LLM_KV_GENERAL_FILE_TYPE,
|
|
80
88
|
LLM_KV_GENERAL_NAME,
|
|
81
89
|
LLM_KV_GENERAL_AUTHOR,
|
|
82
90
|
LLM_KV_GENERAL_VERSION,
|
|
@@ -103,6 +111,7 @@ enum llm_kv {
|
|
|
103
111
|
LLM_KV_EXPERT_WEIGHTS_SCALE,
|
|
104
112
|
LLM_KV_EXPERT_WEIGHTS_NORM,
|
|
105
113
|
LLM_KV_EXPERT_GATING_FUNC,
|
|
114
|
+
LLM_KV_MOE_EVERY_N_LAYERS,
|
|
106
115
|
LLM_KV_POOLING_TYPE,
|
|
107
116
|
LLM_KV_LOGIT_SCALE,
|
|
108
117
|
LLM_KV_DECODER_START_TOKEN_ID,
|
|
@@ -115,6 +124,7 @@ enum llm_kv {
|
|
|
115
124
|
LLM_KV_RESIDUAL_SCALE,
|
|
116
125
|
LLM_KV_EMBEDDING_SCALE,
|
|
117
126
|
LLM_KV_TOKEN_SHIFT_COUNT,
|
|
127
|
+
LLM_KV_INTERLEAVE_MOE_LAYER_STEP,
|
|
118
128
|
|
|
119
129
|
LLM_KV_ATTENTION_HEAD_COUNT,
|
|
120
130
|
LLM_KV_ATTENTION_HEAD_COUNT_KV,
|
|
@@ -136,6 +146,8 @@ enum llm_kv {
|
|
|
136
146
|
LLM_KV_ATTENTION_RELATIVE_BUCKETS_COUNT,
|
|
137
147
|
LLM_KV_ATTENTION_SLIDING_WINDOW,
|
|
138
148
|
LLM_KV_ATTENTION_SCALE,
|
|
149
|
+
LLM_KV_ATTENTION_KEY_LENGTH_MLA,
|
|
150
|
+
LLM_KV_ATTENTION_VALUE_LENGTH_MLA,
|
|
139
151
|
|
|
140
152
|
LLM_KV_ROPE_DIMENSION_COUNT,
|
|
141
153
|
LLM_KV_ROPE_DIMENSION_SECTIONS,
|
|
@@ -249,6 +261,8 @@ enum llm_tensor {
|
|
|
249
261
|
LLM_TENSOR_ATTN_Q_NORM,
|
|
250
262
|
LLM_TENSOR_ATTN_K_NORM,
|
|
251
263
|
LLM_TENSOR_LAYER_OUT_NORM,
|
|
264
|
+
LLM_TENSOR_POST_ATTN_NORM,
|
|
265
|
+
LLM_TENSOR_POST_MLP_NORM,
|
|
252
266
|
LLM_TENSOR_SSM_IN,
|
|
253
267
|
LLM_TENSOR_SSM_CONV1D,
|
|
254
268
|
LLM_TENSOR_SSM_X,
|
|
@@ -296,6 +310,8 @@ enum llm_tensor {
|
|
|
296
310
|
LLM_TENSOR_ATTN_Q_B,
|
|
297
311
|
LLM_TENSOR_ATTN_KV_A_MQA,
|
|
298
312
|
LLM_TENSOR_ATTN_KV_B,
|
|
313
|
+
LLM_TENSOR_ATTN_K_B,
|
|
314
|
+
LLM_TENSOR_ATTN_V_B,
|
|
299
315
|
LLM_TENSOR_ATTN_Q_A_NORM,
|
|
300
316
|
LLM_TENSOR_ATTN_KV_A_NORM,
|
|
301
317
|
LLM_TENSOR_ATTN_SUB_NORM,
|