@fugood/llama.node 1.3.0 → 1.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +14 -14
- package/scripts/llama.cpp.patch +8 -8
- package/src/llama.cpp/common/CMakeLists.txt +2 -0
- package/src/llama.cpp/common/arg.cpp +44 -999
- package/src/llama.cpp/common/arg.h +2 -2
- package/src/llama.cpp/common/chat.cpp +17 -2
- package/src/llama.cpp/common/common.cpp +33 -0
- package/src/llama.cpp/common/common.h +15 -1
- package/src/llama.cpp/common/download.cpp +1054 -0
- package/src/llama.cpp/common/download.h +55 -0
- package/src/llama.cpp/ggml/CMakeLists.txt +1 -1
- package/src/llama.cpp/ggml/include/ggml.h +2 -0
- package/src/llama.cpp/ggml/src/CMakeLists.txt +6 -3
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +29 -11
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/arm/quants.c +428 -26
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +4 -5
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +108 -49
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/s390/cpu-feats.cpp +50 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +3 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +21 -21
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +172 -75
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.h +0 -4
- package/src/llama.cpp/ggml/src/ggml-cpu/repack.cpp +82 -21
- package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +25 -25
- package/src/llama.cpp/include/llama.h +7 -3
- package/src/llama.cpp/src/CMakeLists.txt +95 -0
- package/src/llama.cpp/src/llama-arch.cpp +108 -0
- package/src/llama.cpp/src/llama-arch.h +11 -0
- package/src/llama.cpp/src/llama-batch.cpp +63 -31
- package/src/llama.cpp/src/llama-batch.h +12 -1
- package/src/llama.cpp/src/llama-chat.cpp +32 -0
- package/src/llama.cpp/src/llama-chat.h +1 -0
- package/src/llama.cpp/src/llama-context.cpp +36 -13
- package/src/llama.cpp/src/llama-context.h +5 -5
- package/src/llama.cpp/src/llama-cparams.h +1 -0
- package/src/llama.cpp/src/llama-graph.cpp +3 -3
- package/src/llama.cpp/src/llama-hparams.cpp +11 -1
- package/src/llama.cpp/src/llama-hparams.h +6 -0
- package/src/llama.cpp/src/llama-kv-cache-iswa.cpp +3 -1
- package/src/llama.cpp/src/llama-kv-cache.cpp +33 -1
- package/src/llama.cpp/src/llama-kv-cells.h +44 -2
- package/src/llama.cpp/src/llama-memory-recurrent.cpp +4 -3
- package/src/llama.cpp/src/llama-model.cpp +320 -13171
- package/src/llama.cpp/src/llama-model.h +8 -0
- package/src/llama.cpp/src/llama-quant.cpp +1 -1
- package/src/llama.cpp/src/llama-vocab.cpp +5 -0
- package/src/llama.cpp/src/llama-vocab.h +1 -0
- package/src/llama.cpp/src/models/apertus.cpp +125 -0
- package/src/llama.cpp/src/models/arcee.cpp +135 -0
- package/src/llama.cpp/src/models/arctic.cpp +138 -0
- package/src/llama.cpp/src/models/arwkv7.cpp +86 -0
- package/src/llama.cpp/src/models/baichuan.cpp +122 -0
- package/src/llama.cpp/src/models/bailingmoe.cpp +144 -0
- package/src/llama.cpp/src/models/bailingmoe2.cpp +135 -0
- package/src/llama.cpp/src/models/bert.cpp +176 -0
- package/src/llama.cpp/src/models/bitnet.cpp +160 -0
- package/src/llama.cpp/src/models/bloom.cpp +101 -0
- package/src/llama.cpp/src/models/chameleon.cpp +178 -0
- package/src/llama.cpp/src/models/chatglm.cpp +132 -0
- package/src/llama.cpp/src/models/codeshell.cpp +111 -0
- package/src/llama.cpp/src/models/cogvlm.cpp +100 -0
- package/src/llama.cpp/src/models/cohere2-iswa.cpp +131 -0
- package/src/llama.cpp/src/models/command-r.cpp +122 -0
- package/src/llama.cpp/src/models/dbrx.cpp +123 -0
- package/src/llama.cpp/src/models/deci.cpp +135 -0
- package/src/llama.cpp/src/models/deepseek.cpp +144 -0
- package/src/llama.cpp/src/models/deepseek2.cpp +236 -0
- package/src/llama.cpp/src/models/dots1.cpp +134 -0
- package/src/llama.cpp/src/models/dream.cpp +105 -0
- package/src/llama.cpp/src/models/ernie4-5-moe.cpp +150 -0
- package/src/llama.cpp/src/models/ernie4-5.cpp +110 -0
- package/src/llama.cpp/src/models/exaone.cpp +114 -0
- package/src/llama.cpp/src/models/exaone4.cpp +123 -0
- package/src/llama.cpp/src/models/falcon-h1.cpp +113 -0
- package/src/llama.cpp/src/models/falcon.cpp +120 -0
- package/src/llama.cpp/src/models/gemma-embedding.cpp +120 -0
- package/src/llama.cpp/src/models/gemma.cpp +112 -0
- package/src/llama.cpp/src/models/gemma2-iswa.cpp +125 -0
- package/src/llama.cpp/src/models/gemma3-iswa.cpp +131 -0
- package/src/llama.cpp/src/models/gemma3n-iswa.cpp +377 -0
- package/src/llama.cpp/src/models/glm4-moe.cpp +153 -0
- package/src/llama.cpp/src/models/glm4.cpp +127 -0
- package/src/llama.cpp/src/models/gpt2.cpp +105 -0
- package/src/llama.cpp/src/models/gptneox.cpp +144 -0
- package/src/llama.cpp/src/models/granite-hybrid.cpp +196 -0
- package/src/llama.cpp/src/models/granite.cpp +211 -0
- package/src/llama.cpp/src/models/graph-context-mamba.cpp +283 -0
- package/src/llama.cpp/src/models/grok.cpp +159 -0
- package/src/llama.cpp/src/models/grovemoe.cpp +141 -0
- package/src/llama.cpp/src/models/hunyuan-dense.cpp +132 -0
- package/src/llama.cpp/src/models/hunyuan-moe.cpp +154 -0
- package/src/llama.cpp/src/models/internlm2.cpp +120 -0
- package/src/llama.cpp/src/models/jais.cpp +86 -0
- package/src/llama.cpp/src/models/jamba.cpp +106 -0
- package/src/llama.cpp/src/models/lfm2.cpp +173 -0
- package/src/llama.cpp/src/models/llada-moe.cpp +122 -0
- package/src/llama.cpp/src/models/llada.cpp +99 -0
- package/src/llama.cpp/src/models/llama-iswa.cpp +174 -0
- package/src/llama.cpp/src/models/llama.cpp +155 -0
- package/src/llama.cpp/src/models/mamba.cpp +55 -0
- package/src/llama.cpp/src/models/minicpm3.cpp +199 -0
- package/src/llama.cpp/src/models/minimax-m2.cpp +124 -0
- package/src/llama.cpp/src/models/models.h +481 -0
- package/src/llama.cpp/src/models/mpt.cpp +126 -0
- package/src/llama.cpp/src/models/nemotron-h.cpp +121 -0
- package/src/llama.cpp/src/models/nemotron.cpp +122 -0
- package/src/llama.cpp/src/models/neo-bert.cpp +104 -0
- package/src/llama.cpp/src/models/olmo.cpp +121 -0
- package/src/llama.cpp/src/models/olmo2.cpp +150 -0
- package/src/llama.cpp/src/models/olmoe.cpp +124 -0
- package/src/llama.cpp/src/models/openai-moe-iswa.cpp +124 -0
- package/src/llama.cpp/src/models/openelm.cpp +124 -0
- package/src/llama.cpp/src/models/orion.cpp +123 -0
- package/src/llama.cpp/src/models/pangu-embedded.cpp +121 -0
- package/src/llama.cpp/src/models/phi2.cpp +121 -0
- package/src/llama.cpp/src/models/phi3.cpp +152 -0
- package/src/llama.cpp/src/models/plamo.cpp +110 -0
- package/src/llama.cpp/src/models/plamo2.cpp +316 -0
- package/src/llama.cpp/src/models/plm.cpp +168 -0
- package/src/llama.cpp/src/models/qwen.cpp +108 -0
- package/src/llama.cpp/src/models/qwen2.cpp +117 -0
- package/src/llama.cpp/src/models/qwen2moe.cpp +151 -0
- package/src/llama.cpp/src/models/qwen2vl.cpp +117 -0
- package/src/llama.cpp/src/models/qwen3.cpp +117 -0
- package/src/llama.cpp/src/models/qwen3moe.cpp +124 -0
- package/src/llama.cpp/src/models/qwen3vl-moe.cpp +149 -0
- package/src/llama.cpp/src/models/qwen3vl.cpp +141 -0
- package/src/llama.cpp/src/models/refact.cpp +94 -0
- package/src/llama.cpp/src/models/rwkv6-base.cpp +162 -0
- package/src/llama.cpp/src/models/rwkv6.cpp +94 -0
- package/src/llama.cpp/src/models/rwkv6qwen2.cpp +86 -0
- package/src/llama.cpp/src/models/rwkv7-base.cpp +135 -0
- package/src/llama.cpp/src/models/rwkv7.cpp +90 -0
- package/src/llama.cpp/src/models/seed-oss.cpp +124 -0
- package/src/llama.cpp/src/models/smallthinker.cpp +120 -0
- package/src/llama.cpp/src/models/smollm3.cpp +128 -0
- package/src/llama.cpp/src/models/stablelm.cpp +146 -0
- package/src/llama.cpp/src/models/starcoder.cpp +100 -0
- package/src/llama.cpp/src/models/starcoder2.cpp +121 -0
- package/src/llama.cpp/src/models/t5-dec.cpp +166 -0
- package/src/llama.cpp/src/models/t5-enc.cpp +96 -0
- package/src/llama.cpp/src/models/wavtokenizer-dec.cpp +149 -0
- package/src/llama.cpp/src/models/xverse.cpp +108 -0
|
@@ -59,8 +59,8 @@ struct common_arg {
|
|
|
59
59
|
common_arg & set_sparam();
|
|
60
60
|
bool in_example(enum llama_example ex);
|
|
61
61
|
bool is_exclude(enum llama_example ex);
|
|
62
|
-
bool get_value_from_env(std::string & output);
|
|
63
|
-
bool has_value_from_env();
|
|
62
|
+
bool get_value_from_env(std::string & output) const;
|
|
63
|
+
bool has_value_from_env() const;
|
|
64
64
|
std::string to_string();
|
|
65
65
|
};
|
|
66
66
|
|
|
@@ -300,7 +300,6 @@ json common_chat_msgs_to_json_oaicompat(const std::vector<common_chat_msg> & msg
|
|
|
300
300
|
}
|
|
301
301
|
if (!msg.reasoning_content.empty()) {
|
|
302
302
|
jmsg["reasoning_content"] = msg.reasoning_content;
|
|
303
|
-
jmsg["thinking"] = msg.reasoning_content; // gpt-oss
|
|
304
303
|
}
|
|
305
304
|
if (!msg.tool_name.empty()) {
|
|
306
305
|
jmsg["name"] = msg.tool_name;
|
|
@@ -1797,7 +1796,23 @@ static void common_chat_parse_deepseek_v3_1(common_chat_msg_parser & builder) {
|
|
|
1797
1796
|
|
|
1798
1797
|
static common_chat_params common_chat_params_init_gpt_oss(const common_chat_template & tmpl, const struct templates_params & inputs) {
|
|
1799
1798
|
common_chat_params data;
|
|
1800
|
-
|
|
1799
|
+
|
|
1800
|
+
// Copy reasoning to the "thinking" field as expected by the gpt-oss template
|
|
1801
|
+
auto adjusted_messages = json::array();
|
|
1802
|
+
for (const auto & msg : inputs.messages) {
|
|
1803
|
+
auto has_reasoning_content = msg.contains("reasoning_content") && msg.at("reasoning_content").is_string();
|
|
1804
|
+
auto has_tool_calls = msg.contains("tool_calls") && msg.at("tool_calls").is_array();
|
|
1805
|
+
|
|
1806
|
+
if (has_reasoning_content && has_tool_calls) {
|
|
1807
|
+
auto adjusted_message = msg;
|
|
1808
|
+
adjusted_message["thinking"] = msg.at("reasoning_content");
|
|
1809
|
+
adjusted_messages.push_back(adjusted_message);
|
|
1810
|
+
} else {
|
|
1811
|
+
adjusted_messages.push_back(msg);
|
|
1812
|
+
}
|
|
1813
|
+
}
|
|
1814
|
+
|
|
1815
|
+
auto prompt = apply(tmpl, inputs, /* messages_override= */ adjusted_messages);
|
|
1801
1816
|
|
|
1802
1817
|
// Check if we need to replace the return token with end token during
|
|
1803
1818
|
// inference and without generation prompt. For more details see:
|
|
@@ -908,6 +908,39 @@ std::string fs_get_cache_file(const std::string & filename) {
|
|
|
908
908
|
return cache_directory + filename;
|
|
909
909
|
}
|
|
910
910
|
|
|
911
|
+
std::vector<common_file_info> fs_list_files(const std::string & path) {
|
|
912
|
+
std::vector<common_file_info> files;
|
|
913
|
+
if (path.empty()) return files;
|
|
914
|
+
|
|
915
|
+
std::filesystem::path dir(path);
|
|
916
|
+
if (!std::filesystem::exists(dir) || !std::filesystem::is_directory(dir)) {
|
|
917
|
+
return files;
|
|
918
|
+
}
|
|
919
|
+
|
|
920
|
+
for (const auto & entry : std::filesystem::directory_iterator(dir)) {
|
|
921
|
+
try {
|
|
922
|
+
// Only include regular files (skip directories)
|
|
923
|
+
const auto & p = entry.path();
|
|
924
|
+
if (std::filesystem::is_regular_file(p)) {
|
|
925
|
+
common_file_info info;
|
|
926
|
+
info.path = p.string();
|
|
927
|
+
info.name = p.filename().string();
|
|
928
|
+
try {
|
|
929
|
+
info.size = static_cast<size_t>(std::filesystem::file_size(p));
|
|
930
|
+
} catch (const std::filesystem::filesystem_error &) {
|
|
931
|
+
info.size = 0;
|
|
932
|
+
}
|
|
933
|
+
files.push_back(std::move(info));
|
|
934
|
+
}
|
|
935
|
+
} catch (const std::filesystem::filesystem_error &) {
|
|
936
|
+
// skip entries we cannot inspect
|
|
937
|
+
continue;
|
|
938
|
+
}
|
|
939
|
+
}
|
|
940
|
+
|
|
941
|
+
return files;
|
|
942
|
+
}
|
|
943
|
+
|
|
911
944
|
|
|
912
945
|
//
|
|
913
946
|
// Model utils
|
|
@@ -407,6 +407,8 @@ struct common_params {
|
|
|
407
407
|
bool mmproj_use_gpu = true; // use GPU for multimodal model
|
|
408
408
|
bool no_mmproj = false; // explicitly disable multimodal model
|
|
409
409
|
std::vector<std::string> image; // path to image file(s)
|
|
410
|
+
int image_min_tokens = -1;
|
|
411
|
+
int image_max_tokens = -1;
|
|
410
412
|
|
|
411
413
|
// finetune
|
|
412
414
|
struct lr_opt lr;
|
|
@@ -459,7 +461,8 @@ struct common_params {
|
|
|
459
461
|
float slot_prompt_similarity = 0.1f;
|
|
460
462
|
|
|
461
463
|
// batched-bench params
|
|
462
|
-
bool is_pp_shared
|
|
464
|
+
bool is_pp_shared = false;
|
|
465
|
+
bool is_tg_separate = false;
|
|
463
466
|
|
|
464
467
|
std::vector<int32_t> n_pp;
|
|
465
468
|
std::vector<int32_t> n_tg;
|
|
@@ -506,6 +509,10 @@ struct common_params {
|
|
|
506
509
|
// return false from callback to abort model loading or true to continue
|
|
507
510
|
llama_progress_callback load_progress_callback = NULL;
|
|
508
511
|
void * load_progress_callback_user_data = NULL;
|
|
512
|
+
|
|
513
|
+
bool has_speculative() const {
|
|
514
|
+
return !speculative.model.path.empty() || !speculative.model.hf_repo.empty();
|
|
515
|
+
}
|
|
509
516
|
};
|
|
510
517
|
|
|
511
518
|
// call once at the start of a program if it uses libcommon
|
|
@@ -606,6 +613,13 @@ bool fs_create_directory_with_parents(const std::string & path);
|
|
|
606
613
|
std::string fs_get_cache_directory();
|
|
607
614
|
std::string fs_get_cache_file(const std::string & filename);
|
|
608
615
|
|
|
616
|
+
struct common_file_info {
|
|
617
|
+
std::string path;
|
|
618
|
+
std::string name;
|
|
619
|
+
size_t size = 0; // in bytes
|
|
620
|
+
};
|
|
621
|
+
std::vector<common_file_info> fs_list_files(const std::string & path);
|
|
622
|
+
|
|
609
623
|
//
|
|
610
624
|
// Model utils
|
|
611
625
|
//
|