@fugood/llama.node 1.4.11 → 1.4.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +15 -15
- package/scripts/llama.cpp.patch +31 -31
- package/src/llama.cpp/common/arg.cpp +128 -59
- package/src/llama.cpp/common/arg.h +1 -0
- package/src/llama.cpp/common/chat-parser.cpp +11 -0
- package/src/llama.cpp/common/chat.cpp +36 -7
- package/src/llama.cpp/common/chat.h +1 -0
- package/src/llama.cpp/common/common.cpp +42 -23
- package/src/llama.cpp/common/common.h +11 -1
- package/src/llama.cpp/common/llguidance.cpp +10 -6
- package/src/llama.cpp/common/regex-partial.cpp +13 -13
- package/src/llama.cpp/common/sampling.cpp +58 -14
- package/src/llama.cpp/common/sampling.h +3 -1
- package/src/llama.cpp/ggml/CMakeLists.txt +13 -1
- package/src/llama.cpp/ggml/include/ggml-backend.h +1 -1
- package/src/llama.cpp/ggml/src/CMakeLists.txt +23 -9
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +12 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +86 -25
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +15 -8
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +768 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +0 -4
- package/src/llama.cpp/include/llama.h +100 -12
- package/src/llama.cpp/src/CMakeLists.txt +4 -0
- package/src/llama.cpp/src/llama-adapter.cpp +12 -3
- package/src/llama.cpp/src/llama-adapter.h +7 -1
- package/src/llama.cpp/src/llama-arch.cpp +78 -0
- package/src/llama.cpp/src/llama-arch.h +8 -0
- package/src/llama.cpp/src/llama-chat.cpp +11 -0
- package/src/llama.cpp/src/llama-chat.h +1 -0
- package/src/llama.cpp/src/llama-context.cpp +637 -49
- package/src/llama.cpp/src/llama-context.h +43 -1
- package/src/llama.cpp/src/llama-grammar.cpp +40 -13
- package/src/llama.cpp/src/llama-grammar.h +2 -0
- package/src/llama.cpp/src/llama-graph.cpp +173 -5
- package/src/llama.cpp/src/llama-graph.h +71 -6
- package/src/llama.cpp/src/llama-hparams.cpp +4 -0
- package/src/llama.cpp/src/llama-hparams.h +12 -5
- package/src/llama.cpp/src/llama-kv-cache.h +1 -1
- package/src/llama.cpp/src/llama-mmap.cpp +11 -4
- package/src/llama.cpp/src/llama-model-loader.cpp +23 -0
- package/src/llama.cpp/src/llama-model-loader.h +2 -0
- package/src/llama.cpp/src/llama-model-saver.cpp +3 -0
- package/src/llama.cpp/src/llama-model.cpp +337 -26
- package/src/llama.cpp/src/llama-model.h +13 -2
- package/src/llama.cpp/src/llama-sampling.cpp +1259 -186
- package/src/llama.cpp/src/llama-sampling.h +19 -7
- package/src/llama.cpp/src/llama-vocab.cpp +101 -33
- package/src/llama.cpp/src/llama-vocab.h +2 -0
- package/src/llama.cpp/src/llama.cpp +87 -64
- package/src/llama.cpp/src/models/afmoe.cpp +9 -5
- package/src/llama.cpp/src/models/bert.cpp +4 -2
- package/src/llama.cpp/src/models/cogvlm.cpp +5 -3
- package/src/llama.cpp/src/models/cohere2-iswa.cpp +3 -0
- package/src/llama.cpp/src/models/deepseek2.cpp +1 -1
- package/src/llama.cpp/src/models/gemma-embedding.cpp +2 -6
- package/src/llama.cpp/src/models/gemma2-iswa.cpp +5 -2
- package/src/llama.cpp/src/models/gemma3.cpp +3 -4
- package/src/llama.cpp/src/models/gemma3n-iswa.cpp +4 -7
- package/src/llama.cpp/src/models/llama-iswa.cpp +6 -2
- package/src/llama.cpp/src/models/llama.cpp +19 -6
- package/src/llama.cpp/src/models/maincoder.cpp +117 -0
- package/src/llama.cpp/src/models/mimo2-iswa.cpp +123 -0
- package/src/llama.cpp/src/models/models.h +18 -0
- package/src/llama.cpp/src/models/modern-bert.cpp +116 -0
- package/src/llama.cpp/src/models/openai-moe-iswa.cpp +5 -2
- package/src/llama.cpp/src/models/plamo3.cpp +128 -0
- package/src/llama.cpp/src/models/smallthinker.cpp +11 -5
- package/src/llama.cpp/src/unicode.cpp +23 -14
|
@@ -131,6 +131,8 @@ struct llama_model_loader {
|
|
|
131
131
|
template<typename T>
|
|
132
132
|
bool get_key_or_arr(enum llm_kv kid, T & result, uint32_t n, bool required = true);
|
|
133
133
|
|
|
134
|
+
bool get_key_or_arr(enum llm_kv kid, uint32_t & result, bool required = true);
|
|
135
|
+
|
|
134
136
|
std::string get_arch_name() const;
|
|
135
137
|
|
|
136
138
|
enum llm_arch get_arch() const;
|
|
@@ -146,6 +146,9 @@ void llama_model_saver::add_kv_from_model() {
|
|
|
146
146
|
add_kv(LLM_KV_VOCAB_SIZE, vocab.n_tokens());
|
|
147
147
|
add_kv(LLM_KV_CONTEXT_LENGTH, hparams.n_ctx_train);
|
|
148
148
|
add_kv(LLM_KV_EMBEDDING_LENGTH, hparams.n_embd);
|
|
149
|
+
if (hparams.n_embd_out > 0) {
|
|
150
|
+
add_kv(LLM_KV_EMBEDDING_LENGTH_OUT, hparams.n_embd_out);
|
|
151
|
+
}
|
|
149
152
|
add_kv(LLM_KV_BLOCK_COUNT, hparams.n_layer);
|
|
150
153
|
add_kv(LLM_KV_LEADING_DENSE_BLOCK_COUNT, hparams.n_layer_dense_lead);
|
|
151
154
|
add_kv(LLM_KV_FEED_FORWARD_LENGTH, hparams.n_ff_arr, true);
|