@fugood/llama.node 0.3.13 → 0.3.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +1 -1
- package/package.json +1 -1
- package/src/LlamaContext.cpp +98 -76
- package/src/LlamaContext.h +1 -1
- package/src/common.hpp +1 -2
- package/src/llama.cpp/.github/workflows/build.yml +60 -10
- package/src/llama.cpp/.github/workflows/server.yml +2 -0
- package/src/llama.cpp/common/CMakeLists.txt +3 -3
- package/src/llama.cpp/common/arg.cpp +112 -11
- package/src/llama.cpp/common/chat.cpp +960 -266
- package/src/llama.cpp/common/chat.h +135 -0
- package/src/llama.cpp/common/common.cpp +27 -171
- package/src/llama.cpp/common/common.h +27 -67
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +4 -5
- package/src/llama.cpp/common/json-schema-to-grammar.h +0 -1
- package/src/llama.cpp/common/{minja.hpp → minja/minja.hpp} +37 -5
- package/src/llama.cpp/common/ngram-cache.cpp +1 -0
- package/src/llama.cpp/common/sampling.cpp +45 -7
- package/src/llama.cpp/common/speculative.cpp +6 -5
- package/src/llama.cpp/common/speculative.h +1 -1
- package/src/llama.cpp/docs/build.md +45 -7
- package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +3 -1
- package/src/llama.cpp/examples/embedding/embedding.cpp +1 -0
- package/src/llama.cpp/examples/export-lora/export-lora.cpp +4 -2
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +2 -3
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +1 -1
- package/src/llama.cpp/examples/llava/CMakeLists.txt +7 -0
- package/src/llama.cpp/examples/llava/clip.cpp +373 -107
- package/src/llama.cpp/examples/llava/clip.h +19 -3
- package/src/llama.cpp/examples/llava/gemma3-cli.cpp +341 -0
- package/src/llama.cpp/examples/llava/llava.cpp +4 -2
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +30 -11
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +1 -0
- package/src/llama.cpp/examples/main/main.cpp +73 -28
- package/src/llama.cpp/examples/parallel/parallel.cpp +1 -0
- package/src/llama.cpp/examples/passkey/passkey.cpp +1 -0
- package/src/llama.cpp/examples/quantize/quantize.cpp +1 -0
- package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.cpp +882 -237
- package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.h +35 -26
- package/src/llama.cpp/examples/run/run.cpp +110 -67
- package/src/llama.cpp/examples/server/server.cpp +82 -87
- package/src/llama.cpp/examples/server/utils.hpp +94 -107
- package/src/llama.cpp/examples/sycl/run-llama2.sh +2 -2
- package/src/llama.cpp/examples/tts/tts.cpp +251 -142
- package/src/llama.cpp/ggml/CMakeLists.txt +13 -1
- package/src/llama.cpp/ggml/include/ggml-alloc.h +1 -1
- package/src/llama.cpp/ggml/include/ggml-backend.h +3 -3
- package/src/llama.cpp/ggml/include/ggml-cpu.h +3 -0
- package/src/llama.cpp/ggml/include/ggml.h +5 -1
- package/src/llama.cpp/ggml/src/CMakeLists.txt +10 -7
- package/src/llama.cpp/ggml/src/ggml-alloc.c +24 -15
- package/src/llama.cpp/ggml/src/ggml-backend-impl.h +1 -1
- package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +58 -54
- package/src/llama.cpp/ggml/src/ggml-backend.cpp +10 -8
- package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +3 -2
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +3 -5
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +132 -17
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +2 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +4 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +2 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +151 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +1396 -386
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +1432 -151
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +22 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +259 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +61 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +288 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.h +17 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +15 -2
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +14 -0
- package/src/llama.cpp/ggml/src/ggml-impl.h +1 -1
- package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +4 -5
- package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +235 -0
- package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +6 -2
- package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +1 -0
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +220 -116
- package/src/llama.cpp/ggml/src/ggml-quants.c +114 -114
- package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +2 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +2 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +1 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +17 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +51 -10
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +33 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +2 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +701 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/cpy.hpp +11 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +55 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +136 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +308 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +23 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +168 -721
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +75 -77
- package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +3 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.cpp +13 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.hpp +23 -0
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +146 -42
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +13 -3
- package/src/llama.cpp/ggml/src/ggml.c +8 -3
- package/src/llama.cpp/include/llama.h +19 -5
- package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.out +46 -0
- package/src/llama.cpp/requirements/requirements-all.txt +1 -0
- package/src/llama.cpp/requirements/requirements-tool_bench.txt +12 -0
- package/src/llama.cpp/requirements.txt +1 -0
- package/src/llama.cpp/src/llama-arch.cpp +21 -0
- package/src/llama.cpp/src/llama-arch.h +1 -0
- package/src/llama.cpp/src/llama-chat.cpp +1 -0
- package/src/llama.cpp/src/llama-grammar.cpp +182 -182
- package/src/llama.cpp/src/llama-grammar.h +12 -3
- package/src/llama.cpp/src/llama-kv-cache.h +1 -0
- package/src/llama.cpp/src/llama-mmap.cpp +11 -1
- package/src/llama.cpp/src/llama-model.cpp +69 -5
- package/src/llama.cpp/src/llama-sampling.cpp +43 -10
- package/src/llama.cpp/src/llama-vocab.cpp +12 -0
- package/src/llama.cpp/src/llama.cpp +147 -0
- package/src/llama.cpp/tests/test-backend-ops.cpp +166 -110
- package/src/llama.cpp/tests/test-chat-template.cpp +32 -22
- package/src/llama.cpp/tests/test-chat.cpp +593 -395
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +63 -63
- package/src/llama.cpp/tests/test-quantize-fns.cpp +1 -9
- package/src/llama.cpp/Sources/llama/llama.h +0 -4
- package/src/llama.cpp/common/chat.hpp +0 -55
- /package/src/llama.cpp/common/{chat-template.hpp → minja/chat-template.hpp} +0 -0
|
@@ -105,6 +105,7 @@ extern "C" {
|
|
|
105
105
|
LLAMA_VOCAB_PRE_TYPE_CHAMELEON = 26,
|
|
106
106
|
LLAMA_VOCAB_PRE_TYPE_MINERVA = 27,
|
|
107
107
|
LLAMA_VOCAB_PRE_TYPE_DEEPSEEK3_LLM = 28,
|
|
108
|
+
LLAMA_VOCAB_PRE_TYPE_GPT4O = 29,
|
|
108
109
|
};
|
|
109
110
|
|
|
110
111
|
enum llama_rope_type {
|
|
@@ -477,6 +478,7 @@ extern "C" {
|
|
|
477
478
|
LLAMA_API int32_t llama_model_n_embd (const struct llama_model * model);
|
|
478
479
|
LLAMA_API int32_t llama_model_n_layer (const struct llama_model * model);
|
|
479
480
|
LLAMA_API int32_t llama_model_n_head (const struct llama_model * model);
|
|
481
|
+
LLAMA_API int32_t llama_model_n_head_kv (const struct llama_model * model);
|
|
480
482
|
|
|
481
483
|
// Get the model's RoPE frequency scaling factor
|
|
482
484
|
LLAMA_API float llama_model_rope_freq_scale_train(const struct llama_model * model);
|
|
@@ -1203,17 +1205,29 @@ extern "C" {
|
|
|
1203
1205
|
const char * grammar_str,
|
|
1204
1206
|
const char * grammar_root);
|
|
1205
1207
|
|
|
1206
|
-
|
|
1207
|
-
/// @param trigger_words A list of words that will trigger the grammar sampler. This may be updated to a loose regex syntax (w/ ^) in a near future.
|
|
1208
|
-
/// @param trigger_tokens A list of tokens that will trigger the grammar sampler.
|
|
1209
|
-
LLAMA_API struct llama_sampler * llama_sampler_init_grammar_lazy(
|
|
1208
|
+
DEPRECATED(LLAMA_API struct llama_sampler * llama_sampler_init_grammar_lazy(
|
|
1210
1209
|
const struct llama_vocab * vocab,
|
|
1211
1210
|
const char * grammar_str,
|
|
1212
1211
|
const char * grammar_root,
|
|
1213
1212
|
const char ** trigger_words,
|
|
1214
1213
|
size_t num_trigger_words,
|
|
1215
1214
|
const llama_token * trigger_tokens,
|
|
1216
|
-
size_t num_trigger_tokens)
|
|
1215
|
+
size_t num_trigger_tokens),
|
|
1216
|
+
"use llama_sampler_init_grammar_lazy_patterns instead");
|
|
1217
|
+
|
|
1218
|
+
|
|
1219
|
+
/// @details Lazy grammar sampler, introduced in https://github.com/ggml-org/llama.cpp/pull/9639
|
|
1220
|
+
/// @param trigger_patterns A list of patterns that will trigger the grammar sampler. Pattern will be matched from the start of the generation output, and grammar sampler will be fed content starting from its first match group.
|
|
1221
|
+
/// @param trigger_tokens A list of tokens that will trigger the grammar sampler. Grammar sampler will be fed content starting from the trigger token included.
|
|
1222
|
+
LLAMA_API struct llama_sampler * llama_sampler_init_grammar_lazy_patterns(
|
|
1223
|
+
const struct llama_vocab * vocab,
|
|
1224
|
+
const char * grammar_str,
|
|
1225
|
+
const char * grammar_root,
|
|
1226
|
+
const char ** trigger_patterns,
|
|
1227
|
+
size_t num_trigger_patterns,
|
|
1228
|
+
const llama_token * trigger_tokens,
|
|
1229
|
+
size_t num_trigger_tokens);
|
|
1230
|
+
|
|
1217
1231
|
|
|
1218
1232
|
/// NOTE: Avoid using on the full vocabulary as searching for repeated tokens can become slow. For example, apply top-k or top-p sampling first.
|
|
1219
1233
|
LLAMA_API struct llama_sampler * llama_sampler_init_penalties(
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
ied 4 ½ months
|
|
2
|
+
__ggml_vocab_test__
|
|
3
|
+
Führer
|
|
4
|
+
__ggml_vocab_test__
|
|
5
|
+
|
|
6
|
+
__ggml_vocab_test__
|
|
7
|
+
|
|
8
|
+
__ggml_vocab_test__
|
|
9
|
+
|
|
10
|
+
__ggml_vocab_test__
|
|
11
|
+
|
|
12
|
+
__ggml_vocab_test__
|
|
13
|
+
|
|
14
|
+
__ggml_vocab_test__
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
__ggml_vocab_test__
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
__ggml_vocab_test__
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
__ggml_vocab_test__
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
__ggml_vocab_test__
|
|
30
|
+
Hello world
|
|
31
|
+
__ggml_vocab_test__
|
|
32
|
+
Hello world
|
|
33
|
+
__ggml_vocab_test__
|
|
34
|
+
Hello World
|
|
35
|
+
__ggml_vocab_test__
|
|
36
|
+
Hello World
|
|
37
|
+
__ggml_vocab_test__
|
|
38
|
+
Hello World!
|
|
39
|
+
__ggml_vocab_test__
|
|
40
|
+
Hello, world!
|
|
41
|
+
__ggml_vocab_test__
|
|
42
|
+
Hello, world!
|
|
43
|
+
__ggml_vocab_test__
|
|
44
|
+
this is 🦙.cpp
|
|
45
|
+
__ggml_vocab_test__
|
|
46
|
+
w048 7tuijk dsdfhu
|
|
47
|
+
__ggml_vocab_test__
|
|
48
|
+
нещо на Български
|
|
49
|
+
__ggml_vocab_test__
|
|
50
|
+
កាន់តែពិសេសអាចខលចេញ
|
|
51
|
+
__ggml_vocab_test__
|
|
52
|
+
🚀 (normal) 😶🌫️ (multiple emojis concatenated) ✅ (only emoji that has its own token)
|
|
53
|
+
__ggml_vocab_test__
|
|
54
|
+
Hello
|
|
55
|
+
__ggml_vocab_test__
|
|
56
|
+
Hello
|
|
57
|
+
__ggml_vocab_test__
|
|
58
|
+
Hello
|
|
59
|
+
__ggml_vocab_test__
|
|
60
|
+
Hello
|
|
61
|
+
__ggml_vocab_test__
|
|
62
|
+
Hello
|
|
63
|
+
__ggml_vocab_test__
|
|
64
|
+
Hello
|
|
65
|
+
Hello
|
|
66
|
+
__ggml_vocab_test__
|
|
67
|
+
(
|
|
68
|
+
__ggml_vocab_test__
|
|
69
|
+
|
|
70
|
+
=
|
|
71
|
+
__ggml_vocab_test__
|
|
72
|
+
' era
|
|
73
|
+
__ggml_vocab_test__
|
|
74
|
+
Hello, y'all! How are you 😁 ?我想在apple工作1314151天~
|
|
75
|
+
__ggml_vocab_test__
|
|
76
|
+
!!!!!!
|
|
77
|
+
__ggml_vocab_test__
|
|
78
|
+
3
|
|
79
|
+
__ggml_vocab_test__
|
|
80
|
+
33
|
|
81
|
+
__ggml_vocab_test__
|
|
82
|
+
333
|
|
83
|
+
__ggml_vocab_test__
|
|
84
|
+
3333
|
|
85
|
+
__ggml_vocab_test__
|
|
86
|
+
33333
|
|
87
|
+
__ggml_vocab_test__
|
|
88
|
+
333333
|
|
89
|
+
__ggml_vocab_test__
|
|
90
|
+
3333333
|
|
91
|
+
__ggml_vocab_test__
|
|
92
|
+
33333333
|
|
93
|
+
__ggml_vocab_test__
|
|
94
|
+
333333333
|
|
95
|
+
__ggml_vocab_test__
|
|
96
|
+
Cửa Việt
|
|
97
|
+
__ggml_vocab_test__
|
|
98
|
+
discards
|
|
99
|
+
__ggml_vocab_test__
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
🚀 (normal) 😶🌫️ (multiple emojis concatenated) ✅ 🦙🦙 3 33 333 3333 33333 333333 3333333 33333333 3.3 3..3 3...3 កាន់តែពិសេសអាច😁 ?我想在apple工作1314151天~ ------======= нещо на Български ''''''```````""""......!!!!!!?????? I've been 'told he's there, 'RE you sure? 'M not sure I'll make it, 'D you like some tea? We'Ve a'lL
|
|
112
|
+
__ggml_vocab_test__
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
1165 220 19 220 27124 5503
|
|
2
|
+
37 19194 259
|
|
3
|
+
|
|
4
|
+
220
|
|
5
|
+
256
|
|
6
|
+
271
|
|
7
|
+
197
|
|
8
|
+
198
|
|
9
|
+
279
|
|
10
|
+
2499
|
|
11
|
+
2775
|
|
12
|
+
13225 2375
|
|
13
|
+
32949 2375
|
|
14
|
+
13225 5922
|
|
15
|
+
32949 5922
|
|
16
|
+
32949 5922 0
|
|
17
|
+
13225 11 2375 0
|
|
18
|
+
32949 11 2375 0
|
|
19
|
+
495 382 9552 99 247 13 17159
|
|
20
|
+
86 45404 220 22 10191 2852 22924 4750 6916
|
|
21
|
+
3907 53641 1235 185386 8118
|
|
22
|
+
11400 107516 15867 20804 22851 134178 77431 32010 104312 37984 16329 27751 89335
|
|
23
|
+
112927 222 350 14559 8 22861 114 2524 64364 104 15148 350 76466 166700 121942 780 8 91349 350 7393 74471 484 853 1617 2316 6602 8
|
|
24
|
+
13225
|
|
25
|
+
32949
|
|
26
|
+
220 32949
|
|
27
|
+
256 32949
|
|
28
|
+
271 32949
|
|
29
|
+
271 32949 198 271 32949
|
|
30
|
+
350
|
|
31
|
+
198 314
|
|
32
|
+
6 6837
|
|
33
|
+
13225 11 342 70653 0 3253 553 481 22861 223 1423 7522 18165 2178 34058 22369 16412 32999 16 867 8208
|
|
34
|
+
147475
|
|
35
|
+
18
|
|
36
|
+
2546
|
|
37
|
+
15517
|
|
38
|
+
15517 18
|
|
39
|
+
15517 2546
|
|
40
|
+
15517 15517
|
|
41
|
+
15517 15517 18
|
|
42
|
+
15517 15517 2546
|
|
43
|
+
15517 15517 15517
|
|
44
|
+
34 60213 53904
|
|
45
|
+
2960 3098
|
|
46
|
+
126470 25980 160432 16609 2775 4066 172261 19432 112927 222 350 14559 8 22861 114 2524 64364 104 15148 350 76466 166700 121942 780 8 91349 9552 99 247 4103 99 247 220 18 220 2546 220 15517 220 15517 18 220 15517 2546 220 15517 15517 220 15517 15517 18 220 15517 15517 2546 220 18 13 18 220 18 485 18 220 18 1008 18 44735 107516 15867 20804 22851 134178 77431 32010 104312 156437 1423 7522 18165 2178 34058 22369 16412 32999 16 867 8208 105024 106657 1967 53641 1235 185386 8118 22434 39336 26178 26178 168394 194663 27271 147475 25883 6961 9790 1339 461 83 1280 19016 1354 11 461 1099 481 3239 30 461 44 625 3239 17291 1520 480 11 461 35 481 1299 1236 17966 30 1416 6 27493 261 54602 43
|
|
@@ -36,6 +36,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
|
|
|
36
36
|
{ LLM_ARCH_MINICPM3, "minicpm3" },
|
|
37
37
|
{ LLM_ARCH_GEMMA, "gemma" },
|
|
38
38
|
{ LLM_ARCH_GEMMA2, "gemma2" },
|
|
39
|
+
{ LLM_ARCH_GEMMA3, "gemma3" },
|
|
39
40
|
{ LLM_ARCH_STARCODER2, "starcoder2" },
|
|
40
41
|
{ LLM_ARCH_MAMBA, "mamba" },
|
|
41
42
|
{ LLM_ARCH_XVERSE, "xverse" },
|
|
@@ -766,6 +767,26 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
|
|
|
766
767
|
{ LLM_TENSOR_FFN_POST_NORM, "blk.%d.post_ffw_norm" },
|
|
767
768
|
},
|
|
768
769
|
},
|
|
770
|
+
{
|
|
771
|
+
LLM_ARCH_GEMMA3,
|
|
772
|
+
{
|
|
773
|
+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
|
774
|
+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
|
|
775
|
+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
|
|
776
|
+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
|
|
777
|
+
{ LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
|
|
778
|
+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
|
|
779
|
+
{ LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
|
|
780
|
+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
|
|
781
|
+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
|
|
782
|
+
{ LLM_TENSOR_ATTN_POST_NORM, "blk.%d.post_attention_norm" },
|
|
783
|
+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
|
|
784
|
+
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
|
|
785
|
+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
|
|
786
|
+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
|
|
787
|
+
{ LLM_TENSOR_FFN_POST_NORM, "blk.%d.post_ffw_norm" },
|
|
788
|
+
},
|
|
789
|
+
},
|
|
769
790
|
{
|
|
770
791
|
LLM_ARCH_STARCODER2,
|
|
771
792
|
{
|