@fugood/llama.node 1.3.0 → 1.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +14 -14
- package/scripts/llama.cpp.patch +8 -8
- package/src/llama.cpp/common/CMakeLists.txt +2 -0
- package/src/llama.cpp/common/arg.cpp +44 -999
- package/src/llama.cpp/common/arg.h +2 -2
- package/src/llama.cpp/common/chat.cpp +17 -2
- package/src/llama.cpp/common/common.cpp +33 -0
- package/src/llama.cpp/common/common.h +15 -1
- package/src/llama.cpp/common/download.cpp +1054 -0
- package/src/llama.cpp/common/download.h +55 -0
- package/src/llama.cpp/ggml/CMakeLists.txt +1 -1
- package/src/llama.cpp/ggml/include/ggml.h +2 -0
- package/src/llama.cpp/ggml/src/CMakeLists.txt +6 -3
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +29 -11
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/arm/quants.c +428 -26
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +4 -5
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +108 -49
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/s390/cpu-feats.cpp +50 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +3 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +21 -21
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +172 -75
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.h +0 -4
- package/src/llama.cpp/ggml/src/ggml-cpu/repack.cpp +82 -21
- package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +25 -25
- package/src/llama.cpp/include/llama.h +7 -3
- package/src/llama.cpp/src/CMakeLists.txt +95 -0
- package/src/llama.cpp/src/llama-arch.cpp +108 -0
- package/src/llama.cpp/src/llama-arch.h +11 -0
- package/src/llama.cpp/src/llama-batch.cpp +63 -31
- package/src/llama.cpp/src/llama-batch.h +12 -1
- package/src/llama.cpp/src/llama-chat.cpp +32 -0
- package/src/llama.cpp/src/llama-chat.h +1 -0
- package/src/llama.cpp/src/llama-context.cpp +36 -13
- package/src/llama.cpp/src/llama-context.h +5 -5
- package/src/llama.cpp/src/llama-cparams.h +1 -0
- package/src/llama.cpp/src/llama-graph.cpp +3 -3
- package/src/llama.cpp/src/llama-hparams.cpp +11 -1
- package/src/llama.cpp/src/llama-hparams.h +6 -0
- package/src/llama.cpp/src/llama-kv-cache-iswa.cpp +3 -1
- package/src/llama.cpp/src/llama-kv-cache.cpp +33 -1
- package/src/llama.cpp/src/llama-kv-cells.h +44 -2
- package/src/llama.cpp/src/llama-memory-recurrent.cpp +4 -3
- package/src/llama.cpp/src/llama-model.cpp +320 -13171
- package/src/llama.cpp/src/llama-model.h +8 -0
- package/src/llama.cpp/src/llama-quant.cpp +1 -1
- package/src/llama.cpp/src/llama-vocab.cpp +5 -0
- package/src/llama.cpp/src/llama-vocab.h +1 -0
- package/src/llama.cpp/src/models/apertus.cpp +125 -0
- package/src/llama.cpp/src/models/arcee.cpp +135 -0
- package/src/llama.cpp/src/models/arctic.cpp +138 -0
- package/src/llama.cpp/src/models/arwkv7.cpp +86 -0
- package/src/llama.cpp/src/models/baichuan.cpp +122 -0
- package/src/llama.cpp/src/models/bailingmoe.cpp +144 -0
- package/src/llama.cpp/src/models/bailingmoe2.cpp +135 -0
- package/src/llama.cpp/src/models/bert.cpp +176 -0
- package/src/llama.cpp/src/models/bitnet.cpp +160 -0
- package/src/llama.cpp/src/models/bloom.cpp +101 -0
- package/src/llama.cpp/src/models/chameleon.cpp +178 -0
- package/src/llama.cpp/src/models/chatglm.cpp +132 -0
- package/src/llama.cpp/src/models/codeshell.cpp +111 -0
- package/src/llama.cpp/src/models/cogvlm.cpp +100 -0
- package/src/llama.cpp/src/models/cohere2-iswa.cpp +131 -0
- package/src/llama.cpp/src/models/command-r.cpp +122 -0
- package/src/llama.cpp/src/models/dbrx.cpp +123 -0
- package/src/llama.cpp/src/models/deci.cpp +135 -0
- package/src/llama.cpp/src/models/deepseek.cpp +144 -0
- package/src/llama.cpp/src/models/deepseek2.cpp +236 -0
- package/src/llama.cpp/src/models/dots1.cpp +134 -0
- package/src/llama.cpp/src/models/dream.cpp +105 -0
- package/src/llama.cpp/src/models/ernie4-5-moe.cpp +150 -0
- package/src/llama.cpp/src/models/ernie4-5.cpp +110 -0
- package/src/llama.cpp/src/models/exaone.cpp +114 -0
- package/src/llama.cpp/src/models/exaone4.cpp +123 -0
- package/src/llama.cpp/src/models/falcon-h1.cpp +113 -0
- package/src/llama.cpp/src/models/falcon.cpp +120 -0
- package/src/llama.cpp/src/models/gemma-embedding.cpp +120 -0
- package/src/llama.cpp/src/models/gemma.cpp +112 -0
- package/src/llama.cpp/src/models/gemma2-iswa.cpp +125 -0
- package/src/llama.cpp/src/models/gemma3-iswa.cpp +131 -0
- package/src/llama.cpp/src/models/gemma3n-iswa.cpp +377 -0
- package/src/llama.cpp/src/models/glm4-moe.cpp +153 -0
- package/src/llama.cpp/src/models/glm4.cpp +127 -0
- package/src/llama.cpp/src/models/gpt2.cpp +105 -0
- package/src/llama.cpp/src/models/gptneox.cpp +144 -0
- package/src/llama.cpp/src/models/granite-hybrid.cpp +196 -0
- package/src/llama.cpp/src/models/granite.cpp +211 -0
- package/src/llama.cpp/src/models/graph-context-mamba.cpp +283 -0
- package/src/llama.cpp/src/models/grok.cpp +159 -0
- package/src/llama.cpp/src/models/grovemoe.cpp +141 -0
- package/src/llama.cpp/src/models/hunyuan-dense.cpp +132 -0
- package/src/llama.cpp/src/models/hunyuan-moe.cpp +154 -0
- package/src/llama.cpp/src/models/internlm2.cpp +120 -0
- package/src/llama.cpp/src/models/jais.cpp +86 -0
- package/src/llama.cpp/src/models/jamba.cpp +106 -0
- package/src/llama.cpp/src/models/lfm2.cpp +173 -0
- package/src/llama.cpp/src/models/llada-moe.cpp +122 -0
- package/src/llama.cpp/src/models/llada.cpp +99 -0
- package/src/llama.cpp/src/models/llama-iswa.cpp +174 -0
- package/src/llama.cpp/src/models/llama.cpp +155 -0
- package/src/llama.cpp/src/models/mamba.cpp +55 -0
- package/src/llama.cpp/src/models/minicpm3.cpp +199 -0
- package/src/llama.cpp/src/models/minimax-m2.cpp +124 -0
- package/src/llama.cpp/src/models/models.h +481 -0
- package/src/llama.cpp/src/models/mpt.cpp +126 -0
- package/src/llama.cpp/src/models/nemotron-h.cpp +121 -0
- package/src/llama.cpp/src/models/nemotron.cpp +122 -0
- package/src/llama.cpp/src/models/neo-bert.cpp +104 -0
- package/src/llama.cpp/src/models/olmo.cpp +121 -0
- package/src/llama.cpp/src/models/olmo2.cpp +150 -0
- package/src/llama.cpp/src/models/olmoe.cpp +124 -0
- package/src/llama.cpp/src/models/openai-moe-iswa.cpp +124 -0
- package/src/llama.cpp/src/models/openelm.cpp +124 -0
- package/src/llama.cpp/src/models/orion.cpp +123 -0
- package/src/llama.cpp/src/models/pangu-embedded.cpp +121 -0
- package/src/llama.cpp/src/models/phi2.cpp +121 -0
- package/src/llama.cpp/src/models/phi3.cpp +152 -0
- package/src/llama.cpp/src/models/plamo.cpp +110 -0
- package/src/llama.cpp/src/models/plamo2.cpp +316 -0
- package/src/llama.cpp/src/models/plm.cpp +168 -0
- package/src/llama.cpp/src/models/qwen.cpp +108 -0
- package/src/llama.cpp/src/models/qwen2.cpp +117 -0
- package/src/llama.cpp/src/models/qwen2moe.cpp +151 -0
- package/src/llama.cpp/src/models/qwen2vl.cpp +117 -0
- package/src/llama.cpp/src/models/qwen3.cpp +117 -0
- package/src/llama.cpp/src/models/qwen3moe.cpp +124 -0
- package/src/llama.cpp/src/models/qwen3vl-moe.cpp +149 -0
- package/src/llama.cpp/src/models/qwen3vl.cpp +141 -0
- package/src/llama.cpp/src/models/refact.cpp +94 -0
- package/src/llama.cpp/src/models/rwkv6-base.cpp +162 -0
- package/src/llama.cpp/src/models/rwkv6.cpp +94 -0
- package/src/llama.cpp/src/models/rwkv6qwen2.cpp +86 -0
- package/src/llama.cpp/src/models/rwkv7-base.cpp +135 -0
- package/src/llama.cpp/src/models/rwkv7.cpp +90 -0
- package/src/llama.cpp/src/models/seed-oss.cpp +124 -0
- package/src/llama.cpp/src/models/smallthinker.cpp +120 -0
- package/src/llama.cpp/src/models/smollm3.cpp +128 -0
- package/src/llama.cpp/src/models/stablelm.cpp +146 -0
- package/src/llama.cpp/src/models/starcoder.cpp +100 -0
- package/src/llama.cpp/src/models/starcoder2.cpp +121 -0
- package/src/llama.cpp/src/models/t5-dec.cpp +166 -0
- package/src/llama.cpp/src/models/t5-enc.cpp +96 -0
- package/src/llama.cpp/src/models/wavtokenizer-dec.cpp +149 -0
- package/src/llama.cpp/src/models/xverse.cpp +108 -0
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
#include <string>
|
|
4
|
+
|
|
5
|
+
struct common_params_model;
|
|
6
|
+
|
|
7
|
+
//
|
|
8
|
+
// download functionalities
|
|
9
|
+
//
|
|
10
|
+
|
|
11
|
+
struct common_cached_model_info {
|
|
12
|
+
std::string manifest_path;
|
|
13
|
+
std::string user;
|
|
14
|
+
std::string model;
|
|
15
|
+
std::string tag;
|
|
16
|
+
size_t size = 0; // GGUF size in bytes
|
|
17
|
+
std::string to_string() const {
|
|
18
|
+
return user + "/" + model + ":" + tag;
|
|
19
|
+
}
|
|
20
|
+
};
|
|
21
|
+
|
|
22
|
+
struct common_hf_file_res {
|
|
23
|
+
std::string repo; // repo name with ":tag" removed
|
|
24
|
+
std::string ggufFile;
|
|
25
|
+
std::string mmprojFile;
|
|
26
|
+
};
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Allow getting the HF file from the HF repo with tag (like ollama), for example:
|
|
30
|
+
* - bartowski/Llama-3.2-3B-Instruct-GGUF:q4
|
|
31
|
+
* - bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M
|
|
32
|
+
* - bartowski/Llama-3.2-3B-Instruct-GGUF:q5_k_s
|
|
33
|
+
* Tag is optional, default to "latest" (meaning it checks for Q4_K_M first, then Q4, then if not found, return the first GGUF file in repo)
|
|
34
|
+
*
|
|
35
|
+
* Return pair of <repo, file> (with "repo" already having tag removed)
|
|
36
|
+
*
|
|
37
|
+
* Note: we use the Ollama-compatible HF API, but not using the blobId. Instead, we use the special "ggufFile" field which returns the value for "hf_file". This is done to be backward-compatible with existing cache files.
|
|
38
|
+
*/
|
|
39
|
+
common_hf_file_res common_get_hf_file(
|
|
40
|
+
const std::string & hf_repo_with_tag,
|
|
41
|
+
const std::string & bearer_token,
|
|
42
|
+
bool offline);
|
|
43
|
+
|
|
44
|
+
// returns true if download succeeded
|
|
45
|
+
bool common_download_model(
|
|
46
|
+
const common_params_model & model,
|
|
47
|
+
const std::string & bearer_token,
|
|
48
|
+
bool offline);
|
|
49
|
+
|
|
50
|
+
// returns list of cached models
|
|
51
|
+
std::vector<common_cached_model_info> common_list_cached_models();
|
|
52
|
+
|
|
53
|
+
// resolve and download model from Docker registry
|
|
54
|
+
// return local path to downloaded model file
|
|
55
|
+
std::string common_docker_resolve_model(const std::string & docker);
|
|
@@ -168,7 +168,7 @@ option(GGML_RV_ZFH "ggml: enable riscv zfh" ON)
|
|
|
168
168
|
option(GGML_RV_ZVFH "ggml: enable riscv zvfh" ON)
|
|
169
169
|
option(GGML_RV_ZICBOP "ggml: enable riscv zicbop" ON)
|
|
170
170
|
option(GGML_XTHEADVECTOR "ggml: enable xtheadvector" OFF)
|
|
171
|
-
option(GGML_VXE "ggml: enable vxe"
|
|
171
|
+
option(GGML_VXE "ggml: enable vxe" ${GGML_NATIVE})
|
|
172
172
|
|
|
173
173
|
option(GGML_CPU_ALL_VARIANTS "ggml: build all variants of the CPU backend (requires GGML_BACKEND_DL)" OFF)
|
|
174
174
|
set(GGML_CPU_ARM_ARCH "" CACHE STRING "ggml: CPU architecture for ARM")
|
|
@@ -242,6 +242,7 @@
|
|
|
242
242
|
#define GGML_ROPE_TYPE_NEOX 2
|
|
243
243
|
#define GGML_ROPE_TYPE_MROPE 8
|
|
244
244
|
#define GGML_ROPE_TYPE_VISION 24
|
|
245
|
+
#define GGML_ROPE_TYPE_IMROPE 40 // binary: 101000
|
|
245
246
|
|
|
246
247
|
#define GGML_MROPE_SECTIONS 4
|
|
247
248
|
|
|
@@ -2107,6 +2108,7 @@ extern "C" {
|
|
|
2107
2108
|
enum ggml_scale_mode {
|
|
2108
2109
|
GGML_SCALE_MODE_NEAREST = 0,
|
|
2109
2110
|
GGML_SCALE_MODE_BILINEAR = 1,
|
|
2111
|
+
GGML_SCALE_MODE_BICUBIC = 2,
|
|
2110
2112
|
|
|
2111
2113
|
GGML_SCALE_MODE_COUNT
|
|
2112
2114
|
};
|
|
@@ -308,6 +308,10 @@ function(ggml_add_cpu_backend_variant tag_name)
|
|
|
308
308
|
set(GGML_INTERNAL_${feat} ON)
|
|
309
309
|
endforeach()
|
|
310
310
|
elseif (GGML_SYSTEM_ARCH STREQUAL "s390x")
|
|
311
|
+
foreach (feat VXE2 NNPA)
|
|
312
|
+
set(GGML_INTERNAL_${feat} OFF)
|
|
313
|
+
endforeach()
|
|
314
|
+
|
|
311
315
|
foreach (feat ${ARGN})
|
|
312
316
|
set(GGML_INTERNAL_${feat} ON)
|
|
313
317
|
endforeach()
|
|
@@ -377,9 +381,8 @@ if (GGML_CPU_ALL_VARIANTS)
|
|
|
377
381
|
endif()
|
|
378
382
|
elseif (GGML_SYSTEM_ARCH STREQUAL "s390x")
|
|
379
383
|
if (CMAKE_SYSTEM_NAME MATCHES "Linux")
|
|
380
|
-
ggml_add_cpu_backend_variant(
|
|
381
|
-
|
|
382
|
-
# ggml_add_cpu_backend_variant(s390x_z17 Z17 VXE)
|
|
384
|
+
ggml_add_cpu_backend_variant(z15 Z15 VXE2)
|
|
385
|
+
ggml_add_cpu_backend_variant(z16 Z16 VXE2 NNPA)
|
|
383
386
|
else()
|
|
384
387
|
message(FATAL_ERROR "Unsupported s390x target OS: ${CMAKE_SYSTEM_NAME}")
|
|
385
388
|
endif()
|
|
@@ -126,25 +126,36 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
|
|
126
126
|
)
|
|
127
127
|
if (NOT ARM_MCPU_RESULT)
|
|
128
128
|
string(REGEX MATCH "-mcpu=[^ ']+" ARM_MCPU_FLAG "${ARM_MCPU}")
|
|
129
|
+
string(REGEX MATCH "-march=[^ ']+" ARM_MARCH_FLAG "${ARM_MCPU}")
|
|
130
|
+
|
|
131
|
+
# on some old GCC we need to read -march=
|
|
132
|
+
if (ARM_MARCH_FLAG AND NOT "${ARM_MARCH_FLAG}" STREQUAL "-march=native")
|
|
133
|
+
set(ARM_NATIVE_FLAG "${ARM_MARCH_FLAG}")
|
|
134
|
+
elseif(ARM_MCPU_FLAG AND NOT "${ARM_MCPU_FLAG}" STREQUAL "-mcpu=native")
|
|
135
|
+
set(ARM_NATIVE_FLAG "${ARM_MCPU_FLAG}")
|
|
136
|
+
endif()
|
|
129
137
|
endif()
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
138
|
+
|
|
139
|
+
if ("${ARM_NATIVE_FLAG}" STREQUAL "")
|
|
140
|
+
set(ARM_NATIVE_FLAG -mcpu=native)
|
|
141
|
+
message(WARNING "ARM -march/-mcpu not found, -mcpu=native will be used")
|
|
142
|
+
else()
|
|
143
|
+
message(STATUS "ARM detected flags: ${ARM_NATIVE_FLAG}")
|
|
133
144
|
endif()
|
|
134
145
|
|
|
135
146
|
include(CheckCXXSourceRuns)
|
|
136
147
|
|
|
137
148
|
function(check_arm_feature tag code)
|
|
138
149
|
set(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS})
|
|
139
|
-
set(CMAKE_REQUIRED_FLAGS "${
|
|
150
|
+
set(CMAKE_REQUIRED_FLAGS "${ARM_NATIVE_FLAG}+${tag}")
|
|
140
151
|
check_cxx_source_runs("${code}" GGML_MACHINE_SUPPORTS_${tag})
|
|
141
152
|
if (GGML_MACHINE_SUPPORTS_${tag})
|
|
142
|
-
set(
|
|
153
|
+
set(ARM_NATIVE_FLAG_FIX "${ARM_NATIVE_FLAG_FIX}+${tag}" PARENT_SCOPE)
|
|
143
154
|
else()
|
|
144
|
-
set(CMAKE_REQUIRED_FLAGS "${
|
|
155
|
+
set(CMAKE_REQUIRED_FLAGS "${ARM_NATIVE_FLAG}+no${tag}")
|
|
145
156
|
check_cxx_source_compiles("int main() { return 0; }" GGML_MACHINE_SUPPORTS_no${tag})
|
|
146
157
|
if (GGML_MACHINE_SUPPORTS_no${tag})
|
|
147
|
-
set(
|
|
158
|
+
set(ARM_NATIVE_FLAG_FIX "${ARM_NATIVE_FLAG_FIX}+no${tag}" PARENT_SCOPE)
|
|
148
159
|
endif()
|
|
149
160
|
endif()
|
|
150
161
|
set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_SAVE})
|
|
@@ -155,7 +166,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
|
|
155
166
|
check_arm_feature(sve "#include <arm_sve.h>\nint main() { svfloat32_t _a, _b; volatile svfloat32_t _c = svadd_f32_z(svptrue_b8(), _a, _b); return 0; }")
|
|
156
167
|
check_arm_feature(sme "#include <arm_sme.h>\n__arm_locally_streaming int main() { __asm__ volatile(\"smstart; smstop;\"); return 0; }")
|
|
157
168
|
|
|
158
|
-
list(APPEND ARCH_FLAGS "${
|
|
169
|
+
list(APPEND ARCH_FLAGS "${ARM_NATIVE_FLAG}${ARM_NATIVE_FLAG_FIX}")
|
|
159
170
|
else()
|
|
160
171
|
if (GGML_CPU_ARM_ARCH)
|
|
161
172
|
list(APPEND ARCH_FLAGS -march=${GGML_CPU_ARM_ARCH})
|
|
@@ -504,11 +515,18 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
|
|
504
515
|
endforeach()
|
|
505
516
|
endif()
|
|
506
517
|
|
|
507
|
-
if (GGML_VXE OR
|
|
508
|
-
message(STATUS "
|
|
518
|
+
if (GGML_VXE OR GGML_INTERNAL_VXE2)
|
|
519
|
+
message(STATUS "VXE2 enabled")
|
|
509
520
|
list(APPEND ARCH_FLAGS -mvx -mzvector)
|
|
510
|
-
list(APPEND ARCH_DEFINITIONS
|
|
521
|
+
list(APPEND ARCH_DEFINITIONS GGML_USE_VXE2)
|
|
511
522
|
endif()
|
|
523
|
+
|
|
524
|
+
if (GGML_INTERNAL_NNPA)
|
|
525
|
+
message(STATUS "NNPA enabled")
|
|
526
|
+
list(APPEND ARCH_DEFINITIONS GGML_USE_NNPA)
|
|
527
|
+
endif()
|
|
528
|
+
|
|
529
|
+
ggml_add_cpu_backend_features(${GGML_CPU_NAME} s390 ${ARCH_DEFINITIONS})
|
|
512
530
|
elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "wasm")
|
|
513
531
|
message(STATUS "Wasm detected")
|
|
514
532
|
list (APPEND GGML_CPU_SOURCES ggml-cpu/arch/wasm/quants.c)
|