@fugood/llama.node 1.4.11 → 1.4.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +15 -15
- package/scripts/llama.cpp.patch +31 -31
- package/src/llama.cpp/common/arg.cpp +128 -59
- package/src/llama.cpp/common/arg.h +1 -0
- package/src/llama.cpp/common/chat-parser.cpp +11 -0
- package/src/llama.cpp/common/chat.cpp +36 -7
- package/src/llama.cpp/common/chat.h +1 -0
- package/src/llama.cpp/common/common.cpp +42 -23
- package/src/llama.cpp/common/common.h +11 -1
- package/src/llama.cpp/common/llguidance.cpp +10 -6
- package/src/llama.cpp/common/regex-partial.cpp +13 -13
- package/src/llama.cpp/common/sampling.cpp +58 -14
- package/src/llama.cpp/common/sampling.h +3 -1
- package/src/llama.cpp/ggml/CMakeLists.txt +13 -1
- package/src/llama.cpp/ggml/include/ggml-backend.h +1 -1
- package/src/llama.cpp/ggml/src/CMakeLists.txt +23 -9
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +12 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +86 -25
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +15 -8
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +768 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +0 -4
- package/src/llama.cpp/include/llama.h +100 -12
- package/src/llama.cpp/src/CMakeLists.txt +4 -0
- package/src/llama.cpp/src/llama-adapter.cpp +12 -3
- package/src/llama.cpp/src/llama-adapter.h +7 -1
- package/src/llama.cpp/src/llama-arch.cpp +78 -0
- package/src/llama.cpp/src/llama-arch.h +8 -0
- package/src/llama.cpp/src/llama-chat.cpp +11 -0
- package/src/llama.cpp/src/llama-chat.h +1 -0
- package/src/llama.cpp/src/llama-context.cpp +637 -49
- package/src/llama.cpp/src/llama-context.h +43 -1
- package/src/llama.cpp/src/llama-grammar.cpp +40 -13
- package/src/llama.cpp/src/llama-grammar.h +2 -0
- package/src/llama.cpp/src/llama-graph.cpp +173 -5
- package/src/llama.cpp/src/llama-graph.h +71 -6
- package/src/llama.cpp/src/llama-hparams.cpp +4 -0
- package/src/llama.cpp/src/llama-hparams.h +12 -5
- package/src/llama.cpp/src/llama-kv-cache.h +1 -1
- package/src/llama.cpp/src/llama-mmap.cpp +11 -4
- package/src/llama.cpp/src/llama-model-loader.cpp +23 -0
- package/src/llama.cpp/src/llama-model-loader.h +2 -0
- package/src/llama.cpp/src/llama-model-saver.cpp +3 -0
- package/src/llama.cpp/src/llama-model.cpp +337 -26
- package/src/llama.cpp/src/llama-model.h +13 -2
- package/src/llama.cpp/src/llama-sampling.cpp +1259 -186
- package/src/llama.cpp/src/llama-sampling.h +19 -7
- package/src/llama.cpp/src/llama-vocab.cpp +101 -33
- package/src/llama.cpp/src/llama-vocab.h +2 -0
- package/src/llama.cpp/src/llama.cpp +87 -64
- package/src/llama.cpp/src/models/afmoe.cpp +9 -5
- package/src/llama.cpp/src/models/bert.cpp +4 -2
- package/src/llama.cpp/src/models/cogvlm.cpp +5 -3
- package/src/llama.cpp/src/models/cohere2-iswa.cpp +3 -0
- package/src/llama.cpp/src/models/deepseek2.cpp +1 -1
- package/src/llama.cpp/src/models/gemma-embedding.cpp +2 -6
- package/src/llama.cpp/src/models/gemma2-iswa.cpp +5 -2
- package/src/llama.cpp/src/models/gemma3.cpp +3 -4
- package/src/llama.cpp/src/models/gemma3n-iswa.cpp +4 -7
- package/src/llama.cpp/src/models/llama-iswa.cpp +6 -2
- package/src/llama.cpp/src/models/llama.cpp +19 -6
- package/src/llama.cpp/src/models/maincoder.cpp +117 -0
- package/src/llama.cpp/src/models/mimo2-iswa.cpp +123 -0
- package/src/llama.cpp/src/models/models.h +18 -0
- package/src/llama.cpp/src/models/modern-bert.cpp +116 -0
- package/src/llama.cpp/src/models/openai-moe-iswa.cpp +5 -2
- package/src/llama.cpp/src/models/plamo3.cpp +128 -0
- package/src/llama.cpp/src/models/smallthinker.cpp +11 -5
- package/src/llama.cpp/src/unicode.cpp +23 -14
|
@@ -24,12 +24,14 @@ enum llm_type {
|
|
|
24
24
|
LLM_TYPE_17M,
|
|
25
25
|
LLM_TYPE_22M,
|
|
26
26
|
LLM_TYPE_33M,
|
|
27
|
+
LLM_TYPE_47M,
|
|
27
28
|
LLM_TYPE_60M,
|
|
28
29
|
LLM_TYPE_70M,
|
|
29
30
|
LLM_TYPE_80M,
|
|
30
31
|
LLM_TYPE_109M,
|
|
31
32
|
LLM_TYPE_137M,
|
|
32
33
|
LLM_TYPE_140M,
|
|
34
|
+
LLM_TYPE_149M,
|
|
33
35
|
LLM_TYPE_160M,
|
|
34
36
|
LLM_TYPE_190M,
|
|
35
37
|
LLM_TYPE_220M,
|
|
@@ -39,6 +41,7 @@ enum llm_type {
|
|
|
39
41
|
LLM_TYPE_335M,
|
|
40
42
|
LLM_TYPE_350M,
|
|
41
43
|
LLM_TYPE_360M,
|
|
44
|
+
LLM_TYPE_395M,
|
|
42
45
|
LLM_TYPE_410M,
|
|
43
46
|
LLM_TYPE_450M,
|
|
44
47
|
LLM_TYPE_475M,
|
|
@@ -116,10 +119,12 @@ enum llm_type {
|
|
|
116
119
|
LLM_TYPE_31B_A3_5B,
|
|
117
120
|
LLM_TYPE_80B_A3B, // Qwen3 Next
|
|
118
121
|
LLM_TYPE_100B_A6B,
|
|
122
|
+
LLM_TYPE_102B_A12B, // Solar-Open
|
|
119
123
|
LLM_TYPE_106B_A12B, // GLM-4.5-Air
|
|
120
124
|
LLM_TYPE_230B_A10B, // Minimax M2
|
|
121
125
|
LLM_TYPE_235B_A22B,
|
|
122
126
|
LLM_TYPE_300B_A47B, // Ernie MoE big
|
|
127
|
+
LLM_TYPE_310B_A15B, // /MiMo-V2-Flash
|
|
123
128
|
LLM_TYPE_355B_A32B, // GLM-4.5
|
|
124
129
|
LLM_TYPE_E2B,
|
|
125
130
|
LLM_TYPE_E4B,
|
|
@@ -462,8 +467,6 @@ struct llama_model {
|
|
|
462
467
|
struct ggml_tensor * dense_2_out_layers = nullptr;
|
|
463
468
|
struct ggml_tensor * dense_3_out_layers = nullptr;
|
|
464
469
|
|
|
465
|
-
llama_model_params params;
|
|
466
|
-
|
|
467
470
|
// gguf metadata
|
|
468
471
|
std::unordered_map<std::string, std::string> gguf_kv;
|
|
469
472
|
|
|
@@ -473,6 +476,9 @@ struct llama_model {
|
|
|
473
476
|
// for quantize-stats only
|
|
474
477
|
std::vector<std::pair<std::string, struct ggml_tensor *>> tensors_by_name;
|
|
475
478
|
|
|
479
|
+
// for keeping track of extra nodes used by lora adapters
|
|
480
|
+
uint32_t n_lora_nodes = 0;
|
|
481
|
+
|
|
476
482
|
int64_t t_load_us = 0;
|
|
477
483
|
int64_t t_start_us = 0;
|
|
478
484
|
|
|
@@ -494,6 +500,9 @@ struct llama_model {
|
|
|
494
500
|
size_t n_tensors() const;
|
|
495
501
|
size_t n_devices() const;
|
|
496
502
|
|
|
503
|
+
uint32_t n_gpu_layers() const;
|
|
504
|
+
llama_split_mode split_mode() const;
|
|
505
|
+
|
|
497
506
|
std::map<ggml_backend_buffer_type_t, size_t> memory_breakdown() const;
|
|
498
507
|
|
|
499
508
|
// total number of parameters in the model
|
|
@@ -522,6 +531,8 @@ struct llama_model {
|
|
|
522
531
|
ggml_cgraph * build_graph(const llm_graph_params & params) const;
|
|
523
532
|
|
|
524
533
|
private:
|
|
534
|
+
llama_model_params params;
|
|
535
|
+
|
|
525
536
|
struct impl;
|
|
526
537
|
std::unique_ptr<impl> pimpl;
|
|
527
538
|
};
|