@fugood/llama.node 1.4.2 → 1.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +1 -1
- package/lib/binding.js +3 -0
- package/lib/binding.ts +2 -0
- package/package.json +15 -15
- package/scripts/llama.cpp.patch +25 -11
- package/src/llama.cpp/CMakeLists.txt +21 -6
- package/src/llama.cpp/common/CMakeLists.txt +6 -0
- package/src/llama.cpp/common/arg.cpp +65 -16
- package/src/llama.cpp/common/chat-parser.cpp +40 -0
- package/src/llama.cpp/common/chat-peg-parser.cpp +110 -0
- package/src/llama.cpp/common/chat-peg-parser.h +105 -0
- package/src/llama.cpp/common/chat.cpp +40 -29
- package/src/llama.cpp/common/chat.h +10 -1
- package/src/llama.cpp/common/common.cpp +24 -5
- package/src/llama.cpp/common/common.h +16 -5
- package/src/llama.cpp/common/download.cpp +18 -8
- package/src/llama.cpp/common/download.h +3 -1
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +1 -1
- package/src/llama.cpp/common/log.cpp +15 -1
- package/src/llama.cpp/common/log.h +19 -12
- package/src/llama.cpp/common/peg-parser.cpp +1712 -0
- package/src/llama.cpp/common/peg-parser.h +459 -0
- package/src/llama.cpp/common/unicode.cpp +64 -0
- package/src/llama.cpp/common/unicode.h +22 -0
- package/src/llama.cpp/ggml/CMakeLists.txt +48 -48
- package/src/llama.cpp/ggml/include/ggml.h +7 -2
- package/src/llama.cpp/ggml/src/CMakeLists.txt +0 -4
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +4 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +10 -13
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +60 -1
- package/src/llama.cpp/src/CMakeLists.txt +1 -0
- package/src/llama.cpp/src/llama-arch.cpp +30 -1
- package/src/llama.cpp/src/llama-arch.h +3 -0
- package/src/llama.cpp/src/llama-graph.cpp +3 -6
- package/src/llama.cpp/src/llama-hparams.h +2 -2
- package/src/llama.cpp/src/llama-impl.h +1 -1
- package/src/llama.cpp/src/llama-mmap.cpp +1 -1
- package/src/llama.cpp/src/llama-model.cpp +50 -6
- package/src/llama.cpp/src/llama-vocab.cpp +1 -2
- package/src/llama.cpp/src/models/mistral3.cpp +160 -0
- package/src/llama.cpp/src/models/models.h +4 -0
package/CMakeLists.txt
CHANGED
package/lib/binding.js
CHANGED
|
@@ -64,6 +64,9 @@ const loadModule = (variant) => __awaiter(void 0, void 0, void 0, function* () {
|
|
|
64
64
|
/* no-op */
|
|
65
65
|
}
|
|
66
66
|
}
|
|
67
|
+
const nDev = process.env.GGML_HEXAGON_NDEV;
|
|
68
|
+
if (!nDev)
|
|
69
|
+
process.env.GGML_HEXAGON_NDEV = '16';
|
|
67
70
|
}
|
|
68
71
|
let module = yield loadPlatformPackage(packageName);
|
|
69
72
|
if (module) {
|
package/lib/binding.ts
CHANGED
|
@@ -616,6 +616,8 @@ export const loadModule = async (variant?: LibVariant): Promise<Module> => {
|
|
|
616
616
|
/* no-op */
|
|
617
617
|
}
|
|
618
618
|
}
|
|
619
|
+
const nDev = process.env.GGML_HEXAGON_NDEV
|
|
620
|
+
if (!nDev) process.env.GGML_HEXAGON_NDEV = '16'
|
|
619
621
|
}
|
|
620
622
|
|
|
621
623
|
let module = await loadPlatformPackage(packageName)
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fugood/llama.node",
|
|
3
3
|
"access": "public",
|
|
4
|
-
"version": "1.4.
|
|
4
|
+
"version": "1.4.3",
|
|
5
5
|
"description": "An another Node binding of llama.cpp",
|
|
6
6
|
"main": "lib/index.js",
|
|
7
7
|
"scripts": {
|
|
@@ -72,20 +72,20 @@
|
|
|
72
72
|
"CMakeLists.txt"
|
|
73
73
|
],
|
|
74
74
|
"optionalDependencies": {
|
|
75
|
-
"@fugood/node-llama-darwin-arm64": "1.4.
|
|
76
|
-
"@fugood/node-llama-darwin-x64": "1.4.
|
|
77
|
-
"@fugood/node-llama-linux-arm64": "1.4.
|
|
78
|
-
"@fugood/node-llama-linux-arm64-cuda": "1.4.
|
|
79
|
-
"@fugood/node-llama-linux-arm64-snapdragon": "1.4.
|
|
80
|
-
"@fugood/node-llama-linux-arm64-vulkan": "1.4.
|
|
81
|
-
"@fugood/node-llama-linux-x64": "1.4.
|
|
82
|
-
"@fugood/node-llama-linux-x64-cuda": "1.4.
|
|
83
|
-
"@fugood/node-llama-linux-x64-vulkan": "1.4.
|
|
84
|
-
"@fugood/node-llama-win32-arm64": "1.4.
|
|
85
|
-
"@fugood/node-llama-win32-arm64-vulkan": "1.4.
|
|
86
|
-
"@fugood/node-llama-win32-x64": "1.4.
|
|
87
|
-
"@fugood/node-llama-win32-x64-cuda": "1.4.
|
|
88
|
-
"@fugood/node-llama-win32-x64-vulkan": "1.4.
|
|
75
|
+
"@fugood/node-llama-darwin-arm64": "1.4.3",
|
|
76
|
+
"@fugood/node-llama-darwin-x64": "1.4.3",
|
|
77
|
+
"@fugood/node-llama-linux-arm64": "1.4.3",
|
|
78
|
+
"@fugood/node-llama-linux-arm64-cuda": "1.4.3",
|
|
79
|
+
"@fugood/node-llama-linux-arm64-snapdragon": "1.4.3",
|
|
80
|
+
"@fugood/node-llama-linux-arm64-vulkan": "1.4.3",
|
|
81
|
+
"@fugood/node-llama-linux-x64": "1.4.3",
|
|
82
|
+
"@fugood/node-llama-linux-x64-cuda": "1.4.3",
|
|
83
|
+
"@fugood/node-llama-linux-x64-vulkan": "1.4.3",
|
|
84
|
+
"@fugood/node-llama-win32-arm64": "1.4.3",
|
|
85
|
+
"@fugood/node-llama-win32-arm64-vulkan": "1.4.3",
|
|
86
|
+
"@fugood/node-llama-win32-x64": "1.4.3",
|
|
87
|
+
"@fugood/node-llama-win32-x64-cuda": "1.4.3",
|
|
88
|
+
"@fugood/node-llama-win32-x64-vulkan": "1.4.3"
|
|
89
89
|
},
|
|
90
90
|
"devDependencies": {
|
|
91
91
|
"@babel/preset-env": "^7.24.4",
|
package/scripts/llama.cpp.patch
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
diff --git a/src/llama.cpp/common/CMakeLists.txt b/src/llama.cpp/common/CMakeLists.txt
|
|
2
|
-
index
|
|
2
|
+
index 377b26846..1873b5206 100644
|
|
3
3
|
--- a/src/llama.cpp/common/CMakeLists.txt
|
|
4
4
|
+++ b/src/llama.cpp/common/CMakeLists.txt
|
|
5
|
-
@@ -
|
|
5
|
+
@@ -149,9 +149,16 @@ if (LLAMA_LLGUIDANCE)
|
|
6
6
|
set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} llguidance ${LLGUIDANCE_PLATFORM_LIBS})
|
|
7
7
|
endif ()
|
|
8
8
|
|
|
@@ -20,8 +20,22 @@ index bb168e835..cfc0e2c2e 100644
|
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
#
|
|
23
|
+
diff --git a/src/llama.cpp/common/chat-peg-parser.cpp b/src/llama.cpp/common/chat-peg-parser.cpp
|
|
24
|
+
index 74a7b6a46..7b7a1bd50 100644
|
|
25
|
+
--- a/src/llama.cpp/common/chat-peg-parser.cpp
|
|
26
|
+
+++ b/src/llama.cpp/common/chat-peg-parser.cpp
|
|
27
|
+
@@ -1,9 +1,5 @@
|
|
28
|
+
#include "chat-peg-parser.h"
|
|
29
|
+
|
|
30
|
+
-#include <nlohmann/json.hpp>
|
|
31
|
+
-
|
|
32
|
+
-using json = nlohmann::json;
|
|
33
|
+
-
|
|
34
|
+
static std::string_view trim_trailing_space(std::string_view sv) {
|
|
35
|
+
while (!sv.empty() && std::isspace(static_cast<unsigned char>(sv.back()))) {
|
|
36
|
+
sv.remove_suffix(1);
|
|
23
37
|
diff --git a/src/llama.cpp/common/chat.cpp b/src/llama.cpp/common/chat.cpp
|
|
24
|
-
index
|
|
38
|
+
index 41a5bb42d..da5cf4b94 100644
|
|
25
39
|
--- a/src/llama.cpp/common/chat.cpp
|
|
26
40
|
+++ b/src/llama.cpp/common/chat.cpp
|
|
27
41
|
@@ -6,9 +6,6 @@
|
|
@@ -34,7 +48,7 @@ index b4a0f985e..2383d2ea9 100644
|
|
|
34
48
|
#include <algorithm>
|
|
35
49
|
#include <cstdio>
|
|
36
50
|
#include <cctype>
|
|
37
|
-
@@ -
|
|
51
|
+
@@ -134,16 +131,6 @@ std::vector<common_chat_msg_diff> common_chat_msg_diff::compute_diffs(const comm
|
|
38
52
|
return diffs;
|
|
39
53
|
}
|
|
40
54
|
|
|
@@ -51,7 +65,7 @@ index b4a0f985e..2383d2ea9 100644
|
|
|
51
65
|
struct templates_params {
|
|
52
66
|
json messages;
|
|
53
67
|
json tools;
|
|
54
|
-
@@ -
|
|
68
|
+
@@ -720,7 +707,7 @@ static std::string apply(
|
|
55
69
|
tmpl_inputs.extra_context.merge_patch(*additional_context);
|
|
56
70
|
}
|
|
57
71
|
// TODO: add flag to control date/time, if only for testing purposes.
|
|
@@ -61,10 +75,10 @@ index b4a0f985e..2383d2ea9 100644
|
|
|
61
75
|
minja::chat_template_options tmpl_opts;
|
|
62
76
|
// To avoid double BOS / EOS tokens, we're manually removing begining / trailing tokens
|
|
63
77
|
diff --git a/src/llama.cpp/common/chat.h b/src/llama.cpp/common/chat.h
|
|
64
|
-
index
|
|
78
|
+
index 6085510a4..263076ce2 100644
|
|
65
79
|
--- a/src/llama.cpp/common/chat.h
|
|
66
80
|
+++ b/src/llama.cpp/common/chat.h
|
|
67
|
-
@@ -
|
|
81
|
+
@@ -10,7 +10,18 @@
|
|
68
82
|
#include <vector>
|
|
69
83
|
#include <map>
|
|
70
84
|
|
|
@@ -85,10 +99,10 @@ index 754c411e2..71241a6cc 100644
|
|
|
85
99
|
struct common_chat_tool_call {
|
|
86
100
|
std::string name;
|
|
87
101
|
diff --git a/src/llama.cpp/common/common.cpp b/src/llama.cpp/common/common.cpp
|
|
88
|
-
index
|
|
102
|
+
index f07af1d86..1b10c7b13 100644
|
|
89
103
|
--- a/src/llama.cpp/common/common.cpp
|
|
90
104
|
+++ b/src/llama.cpp/common/common.cpp
|
|
91
|
-
@@ -
|
|
105
|
+
@@ -1236,6 +1236,7 @@ struct llama_model_params common_model_params_to_llama(common_params & params) {
|
|
92
106
|
mparams.n_gpu_layers = params.n_gpu_layers;
|
|
93
107
|
}
|
|
94
108
|
|
|
@@ -97,10 +111,10 @@ index 0d7fd9a93..6bf3cc7ab 100644
|
|
|
97
111
|
mparams.split_mode = params.split_mode;
|
|
98
112
|
mparams.tensor_split = params.tensor_split;
|
|
99
113
|
diff --git a/src/llama.cpp/common/common.h b/src/llama.cpp/common/common.h
|
|
100
|
-
index
|
|
114
|
+
index 179113a4d..78aa24bc3 100644
|
|
101
115
|
--- a/src/llama.cpp/common/common.h
|
|
102
116
|
+++ b/src/llama.cpp/common/common.h
|
|
103
|
-
@@ -
|
|
117
|
+
@@ -302,6 +302,7 @@ struct lr_opt {
|
|
104
118
|
struct ggml_opt_optimizer_params common_opt_lr_pars(void * userdata);
|
|
105
119
|
|
|
106
120
|
struct common_params {
|
|
@@ -33,10 +33,24 @@ endif()
|
|
|
33
33
|
|
|
34
34
|
option(LLAMA_USE_SYSTEM_GGML "Use system libggml" OFF)
|
|
35
35
|
|
|
36
|
+
option(LLAMA_WASM_MEM64 "llama: use 64-bit memory in WASM builds" ON)
|
|
37
|
+
|
|
36
38
|
if (EMSCRIPTEN)
|
|
37
39
|
set(BUILD_SHARED_LIBS_DEFAULT OFF)
|
|
38
40
|
|
|
39
|
-
|
|
41
|
+
# Use 64-bit memory to support backend_get_memory queries
|
|
42
|
+
# TODO: analyze performance impact, see https://spidermonkey.dev/blog/2025/01/15/is-memory64-actually-worth-using
|
|
43
|
+
if (LLAMA_WASM_MEM64)
|
|
44
|
+
add_compile_options("-sMEMORY64=1")
|
|
45
|
+
add_link_options("-sMEMORY64=1")
|
|
46
|
+
endif()
|
|
47
|
+
add_link_options("-sALLOW_MEMORY_GROWTH=1")
|
|
48
|
+
|
|
49
|
+
option(LLAMA_WASM_SINGLE_FILE "llama: embed WASM inside the generated llama.js" OFF)
|
|
50
|
+
option(LLAMA_BUILD_HTML "llama: build HTML file" ON)
|
|
51
|
+
if (LLAMA_BUILD_HTML)
|
|
52
|
+
set(CMAKE_EXECUTABLE_SUFFIX ".html")
|
|
53
|
+
endif()
|
|
40
54
|
else()
|
|
41
55
|
if (MINGW)
|
|
42
56
|
set(BUILD_SHARED_LIBS_DEFAULT OFF)
|
|
@@ -58,6 +72,12 @@ if (MSVC)
|
|
|
58
72
|
add_compile_options("$<$<COMPILE_LANGUAGE:CXX>:/bigobj>")
|
|
59
73
|
endif()
|
|
60
74
|
|
|
75
|
+
if (LLAMA_STANDALONE)
|
|
76
|
+
# enable parallel builds for msbuild
|
|
77
|
+
list(APPEND CMAKE_VS_GLOBALS UseMultiToolTask=true)
|
|
78
|
+
list(APPEND CMAKE_VS_GLOBALS EnforceProcessCountAcrossBuilds=true)
|
|
79
|
+
endif()
|
|
80
|
+
|
|
61
81
|
if (CMAKE_SYSTEM_NAME STREQUAL "iOS")
|
|
62
82
|
set(LLAMA_TOOLS_INSTALL_DEFAULT OFF)
|
|
63
83
|
else()
|
|
@@ -179,11 +199,6 @@ if (NOT TARGET ggml AND NOT LLAMA_USE_SYSTEM_GGML)
|
|
|
179
199
|
# ... otherwise assume ggml is added by a parent CMakeLists.txt
|
|
180
200
|
endif()
|
|
181
201
|
|
|
182
|
-
if (MINGW)
|
|
183
|
-
# Target Windows 8 for PrefetchVirtualMemory
|
|
184
|
-
add_compile_definitions(_WIN32_WINNT=${GGML_WIN_VER})
|
|
185
|
-
endif()
|
|
186
|
-
|
|
187
202
|
#
|
|
188
203
|
# build the library
|
|
189
204
|
#
|
|
@@ -52,6 +52,8 @@ add_library(${TARGET} STATIC
|
|
|
52
52
|
chat-parser.h
|
|
53
53
|
chat-parser-xml-toolcall.h
|
|
54
54
|
chat-parser-xml-toolcall.cpp
|
|
55
|
+
chat-peg-parser.cpp
|
|
56
|
+
chat-peg-parser.h
|
|
55
57
|
chat.cpp
|
|
56
58
|
chat.h
|
|
57
59
|
common.cpp
|
|
@@ -69,12 +71,16 @@ add_library(${TARGET} STATIC
|
|
|
69
71
|
log.h
|
|
70
72
|
ngram-cache.cpp
|
|
71
73
|
ngram-cache.h
|
|
74
|
+
peg-parser.cpp
|
|
75
|
+
peg-parser.h
|
|
72
76
|
regex-partial.cpp
|
|
73
77
|
regex-partial.h
|
|
74
78
|
sampling.cpp
|
|
75
79
|
sampling.h
|
|
76
80
|
speculative.cpp
|
|
77
81
|
speculative.h
|
|
82
|
+
unicode.cpp
|
|
83
|
+
unicode.h
|
|
78
84
|
)
|
|
79
85
|
|
|
80
86
|
if (BUILD_SHARED_LIBS)
|
|
@@ -30,6 +30,7 @@
|
|
|
30
30
|
#include <thread> // for hardware_concurrency
|
|
31
31
|
#include <vector>
|
|
32
32
|
|
|
33
|
+
#ifndef __EMSCRIPTEN__
|
|
33
34
|
#ifdef __linux__
|
|
34
35
|
#include <linux/limits.h>
|
|
35
36
|
#elif defined(_WIN32)
|
|
@@ -41,6 +42,8 @@
|
|
|
41
42
|
#else
|
|
42
43
|
#include <sys/syslimits.h>
|
|
43
44
|
#endif
|
|
45
|
+
#endif
|
|
46
|
+
|
|
44
47
|
#define LLAMA_MAX_URL_LENGTH 2084 // Maximum URL Length in Chrome: 2083
|
|
45
48
|
|
|
46
49
|
using json = nlohmann::ordered_json;
|
|
@@ -212,13 +215,13 @@ struct handle_model_result {
|
|
|
212
215
|
static handle_model_result common_params_handle_model(
|
|
213
216
|
struct common_params_model & model,
|
|
214
217
|
const std::string & bearer_token,
|
|
215
|
-
const std::string & model_path_default,
|
|
216
218
|
bool offline) {
|
|
217
219
|
handle_model_result result;
|
|
218
220
|
// handle pre-fill default model path and url based on hf_repo and hf_file
|
|
219
221
|
{
|
|
220
222
|
if (!model.docker_repo.empty()) { // Handle Docker URLs by resolving them to local paths
|
|
221
223
|
model.path = common_docker_resolve_model(model.docker_repo);
|
|
224
|
+
model.name = model.docker_repo; // set name for consistency
|
|
222
225
|
} else if (!model.hf_repo.empty()) {
|
|
223
226
|
// short-hand to avoid specifying --hf-file -> default it to --model
|
|
224
227
|
if (model.hf_file.empty()) {
|
|
@@ -227,7 +230,8 @@ static handle_model_result common_params_handle_model(
|
|
|
227
230
|
if (auto_detected.repo.empty() || auto_detected.ggufFile.empty()) {
|
|
228
231
|
exit(1); // built without CURL, error message already printed
|
|
229
232
|
}
|
|
230
|
-
model.
|
|
233
|
+
model.name = model.hf_repo; // repo name with tag
|
|
234
|
+
model.hf_repo = auto_detected.repo; // repo name without tag
|
|
231
235
|
model.hf_file = auto_detected.ggufFile;
|
|
232
236
|
if (!auto_detected.mmprojFile.empty()) {
|
|
233
237
|
result.found_mmproj = true;
|
|
@@ -257,8 +261,6 @@ static handle_model_result common_params_handle_model(
|
|
|
257
261
|
model.path = fs_get_cache_file(string_split<std::string>(f, '/').back());
|
|
258
262
|
}
|
|
259
263
|
|
|
260
|
-
} else if (model.path.empty()) {
|
|
261
|
-
model.path = model_path_default;
|
|
262
264
|
}
|
|
263
265
|
}
|
|
264
266
|
|
|
@@ -405,7 +407,7 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
|
|
|
405
407
|
|
|
406
408
|
// handle model and download
|
|
407
409
|
{
|
|
408
|
-
auto res = common_params_handle_model(params.model, params.hf_token,
|
|
410
|
+
auto res = common_params_handle_model(params.model, params.hf_token, params.offline);
|
|
409
411
|
if (params.no_mmproj) {
|
|
410
412
|
params.mmproj = {};
|
|
411
413
|
} else if (res.found_mmproj && params.mmproj.path.empty() && params.mmproj.url.empty()) {
|
|
@@ -415,12 +417,18 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
|
|
|
415
417
|
// only download mmproj if the current example is using it
|
|
416
418
|
for (auto & ex : mmproj_examples) {
|
|
417
419
|
if (ctx_arg.ex == ex) {
|
|
418
|
-
common_params_handle_model(params.mmproj, params.hf_token,
|
|
420
|
+
common_params_handle_model(params.mmproj, params.hf_token, params.offline);
|
|
419
421
|
break;
|
|
420
422
|
}
|
|
421
423
|
}
|
|
422
|
-
common_params_handle_model(params.speculative.model, params.hf_token,
|
|
423
|
-
common_params_handle_model(params.vocoder.model, params.hf_token,
|
|
424
|
+
common_params_handle_model(params.speculative.model, params.hf_token, params.offline);
|
|
425
|
+
common_params_handle_model(params.vocoder.model, params.hf_token, params.offline);
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
// model is required (except for server)
|
|
429
|
+
// TODO @ngxson : maybe show a list of available models in CLI in this case
|
|
430
|
+
if (params.model.path.empty() && ctx_arg.ex != LLAMA_EXAMPLE_SERVER) {
|
|
431
|
+
throw std::invalid_argument("error: --model is required\n");
|
|
424
432
|
}
|
|
425
433
|
|
|
426
434
|
if (params.escape) {
|
|
@@ -1221,7 +1229,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
1221
1229
|
[](common_params & params) {
|
|
1222
1230
|
params.warmup = false;
|
|
1223
1231
|
}
|
|
1224
|
-
).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_EMBEDDING, LLAMA_EXAMPLE_RETRIEVAL, LLAMA_EXAMPLE_PERPLEXITY}));
|
|
1232
|
+
).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_MTMD, LLAMA_EXAMPLE_EMBEDDING, LLAMA_EXAMPLE_RETRIEVAL, LLAMA_EXAMPLE_PERPLEXITY}));
|
|
1225
1233
|
add_opt(common_arg(
|
|
1226
1234
|
{"--spm-infill"},
|
|
1227
1235
|
string_format(
|
|
@@ -2090,11 +2098,8 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
2090
2098
|
add_opt(common_arg(
|
|
2091
2099
|
{"-m", "--model"}, "FNAME",
|
|
2092
2100
|
ex == LLAMA_EXAMPLE_EXPORT_LORA
|
|
2093
|
-
?
|
|
2094
|
-
:
|
|
2095
|
-
"model path (default: `models/$filename` with filename from `--hf-file` "
|
|
2096
|
-
"or `--model-url` if set, otherwise %s)", DEFAULT_MODEL_PATH
|
|
2097
|
-
),
|
|
2101
|
+
? "model path from which to load base model"
|
|
2102
|
+
: "model path to load",
|
|
2098
2103
|
[](common_params & params, const std::string & value) {
|
|
2099
2104
|
params.model.path = value;
|
|
2100
2105
|
}
|
|
@@ -2486,12 +2491,50 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
2486
2491
|
"path to save slot kv cache (default: disabled)",
|
|
2487
2492
|
[](common_params & params, const std::string & value) {
|
|
2488
2493
|
params.slot_save_path = value;
|
|
2494
|
+
if (!fs_is_directory(params.slot_save_path)) {
|
|
2495
|
+
throw std::invalid_argument("not a directory: " + value);
|
|
2496
|
+
}
|
|
2489
2497
|
// if doesn't end with DIRECTORY_SEPARATOR, add it
|
|
2490
2498
|
if (!params.slot_save_path.empty() && params.slot_save_path[params.slot_save_path.size() - 1] != DIRECTORY_SEPARATOR) {
|
|
2491
2499
|
params.slot_save_path += DIRECTORY_SEPARATOR;
|
|
2492
2500
|
}
|
|
2493
2501
|
}
|
|
2494
2502
|
).set_examples({LLAMA_EXAMPLE_SERVER}));
|
|
2503
|
+
add_opt(common_arg(
|
|
2504
|
+
{"--media-path"}, "PATH",
|
|
2505
|
+
"directory for loading local media files; files can be accessed via file:// URLs using relative paths (default: disabled)",
|
|
2506
|
+
[](common_params & params, const std::string & value) {
|
|
2507
|
+
params.media_path = value;
|
|
2508
|
+
if (!fs_is_directory(params.media_path)) {
|
|
2509
|
+
throw std::invalid_argument("not a directory: " + value);
|
|
2510
|
+
}
|
|
2511
|
+
// if doesn't end with DIRECTORY_SEPARATOR, add it
|
|
2512
|
+
if (!params.media_path.empty() && params.media_path[params.media_path.size() - 1] != DIRECTORY_SEPARATOR) {
|
|
2513
|
+
params.media_path += DIRECTORY_SEPARATOR;
|
|
2514
|
+
}
|
|
2515
|
+
}
|
|
2516
|
+
).set_examples({LLAMA_EXAMPLE_SERVER}));
|
|
2517
|
+
add_opt(common_arg(
|
|
2518
|
+
{"--models-dir"}, "PATH",
|
|
2519
|
+
"directory containing models for the router server (default: disabled)",
|
|
2520
|
+
[](common_params & params, const std::string & value) {
|
|
2521
|
+
params.models_dir = value;
|
|
2522
|
+
}
|
|
2523
|
+
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_MODELS_DIR"));
|
|
2524
|
+
add_opt(common_arg(
|
|
2525
|
+
{"--models-max"}, "N",
|
|
2526
|
+
string_format("for router server, maximum number of models to load simultaneously (default: %d, 0 = unlimited)", params.models_max),
|
|
2527
|
+
[](common_params & params, int value) {
|
|
2528
|
+
params.models_max = value;
|
|
2529
|
+
}
|
|
2530
|
+
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_MODELS_MAX"));
|
|
2531
|
+
add_opt(common_arg(
|
|
2532
|
+
{"--no-models-autoload"},
|
|
2533
|
+
"disables automatic loading of models (default: enabled)",
|
|
2534
|
+
[](common_params & params) {
|
|
2535
|
+
params.models_autoload = false;
|
|
2536
|
+
}
|
|
2537
|
+
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_NO_MODELS_AUTOLOAD"));
|
|
2495
2538
|
add_opt(common_arg(
|
|
2496
2539
|
{"--jinja"},
|
|
2497
2540
|
string_format("use jinja template for chat (default: %s)\n", params.use_jinja ? "enabled" : "disabled"),
|
|
@@ -2639,7 +2682,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
2639
2682
|
[](common_params &, const std::string & value) {
|
|
2640
2683
|
common_log_set_file(common_log_main(), value.c_str());
|
|
2641
2684
|
}
|
|
2642
|
-
));
|
|
2685
|
+
).set_env("LLAMA_LOG_FILE"));
|
|
2643
2686
|
add_opt(common_arg(
|
|
2644
2687
|
{"--log-colors"}, "[on|off|auto]",
|
|
2645
2688
|
"Set colored logging ('on', 'off', or 'auto', default: 'auto')\n"
|
|
@@ -2674,7 +2717,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
2674
2717
|
).set_env("LLAMA_OFFLINE"));
|
|
2675
2718
|
add_opt(common_arg(
|
|
2676
2719
|
{"-lv", "--verbosity", "--log-verbosity"}, "N",
|
|
2677
|
-
"Set the verbosity threshold. Messages with a higher verbosity will be ignored."
|
|
2720
|
+
string_format("Set the verbosity threshold. Messages with a higher verbosity will be ignored. Values:\n"
|
|
2721
|
+
" - 0: generic output\n"
|
|
2722
|
+
" - 1: error\n"
|
|
2723
|
+
" - 2: warning\n"
|
|
2724
|
+
" - 3: info\n"
|
|
2725
|
+
" - 4: debug\n"
|
|
2726
|
+
"(default: %d)\n", params.verbosity),
|
|
2678
2727
|
[](common_params & params, int value) {
|
|
2679
2728
|
params.verbosity = value;
|
|
2680
2729
|
common_log_set_verbosity_thold(value);
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
#include "chat-parser.h"
|
|
2
|
+
#include "chat-peg-parser.h"
|
|
2
3
|
#include "common.h"
|
|
3
4
|
#include "log.h"
|
|
5
|
+
#include "peg-parser.h"
|
|
4
6
|
#include "regex-partial.h"
|
|
5
7
|
|
|
6
8
|
#include <algorithm>
|
|
@@ -1483,6 +1485,11 @@ static void common_chat_parse(common_chat_msg_parser & builder) {
|
|
|
1483
1485
|
}
|
|
1484
1486
|
|
|
1485
1487
|
common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_syntax & syntax) {
|
|
1488
|
+
if (syntax.format == COMMON_CHAT_FORMAT_PEG_SIMPLE ||
|
|
1489
|
+
syntax.format == COMMON_CHAT_FORMAT_PEG_NATIVE ||
|
|
1490
|
+
syntax.format == COMMON_CHAT_FORMAT_PEG_CONSTRUCTED) {
|
|
1491
|
+
return common_chat_peg_parse(syntax.parser, input, is_partial, syntax);
|
|
1492
|
+
}
|
|
1486
1493
|
common_chat_msg_parser builder(input, is_partial, syntax);
|
|
1487
1494
|
try {
|
|
1488
1495
|
common_chat_parse(builder);
|
|
@@ -1500,3 +1507,36 @@ common_chat_msg common_chat_parse(const std::string & input, bool is_partial, co
|
|
|
1500
1507
|
}
|
|
1501
1508
|
return msg;
|
|
1502
1509
|
}
|
|
1510
|
+
|
|
1511
|
+
common_chat_msg common_chat_peg_parse(const common_peg_arena & parser, const std::string & input, bool is_partial, const common_chat_syntax & syntax) {
|
|
1512
|
+
if (parser.empty()) {
|
|
1513
|
+
throw std::runtime_error("Failed to parse due to missing parser definition.");
|
|
1514
|
+
}
|
|
1515
|
+
|
|
1516
|
+
LOG_DBG("Parsing input with format %s: %s\n", common_chat_format_name(syntax.format), input.c_str());
|
|
1517
|
+
|
|
1518
|
+
common_peg_parse_context ctx(input, is_partial);
|
|
1519
|
+
auto result = parser.parse(ctx);
|
|
1520
|
+
if (result.fail()) {
|
|
1521
|
+
throw std::runtime_error(std::string("Failed to parse input at pos ") + std::to_string(result.end));
|
|
1522
|
+
}
|
|
1523
|
+
|
|
1524
|
+
common_chat_msg msg;
|
|
1525
|
+
msg.role = "assistant";
|
|
1526
|
+
|
|
1527
|
+
if (syntax.format == COMMON_CHAT_FORMAT_PEG_NATIVE) {
|
|
1528
|
+
auto mapper = common_chat_peg_native_mapper(msg);
|
|
1529
|
+
mapper.from_ast(ctx.ast, result);
|
|
1530
|
+
} else if (syntax.format == COMMON_CHAT_FORMAT_PEG_CONSTRUCTED) {
|
|
1531
|
+
auto mapper = common_chat_peg_constructed_mapper(msg);
|
|
1532
|
+
mapper.from_ast(ctx.ast, result);
|
|
1533
|
+
} else {
|
|
1534
|
+
// Generic mapper
|
|
1535
|
+
auto mapper = common_chat_peg_mapper(msg);
|
|
1536
|
+
mapper.from_ast(ctx.ast, result);
|
|
1537
|
+
}
|
|
1538
|
+
if (!is_partial) {
|
|
1539
|
+
LOG_DBG("Parsed message: %s\n", common_chat_msgs_to_json_oaicompat<json>({msg}).at(0).dump().c_str());
|
|
1540
|
+
}
|
|
1541
|
+
return msg;
|
|
1542
|
+
}
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
#include "chat-peg-parser.h"
|
|
2
|
+
|
|
3
|
+
static std::string_view trim_trailing_space(std::string_view sv) {
|
|
4
|
+
while (!sv.empty() && std::isspace(static_cast<unsigned char>(sv.back()))) {
|
|
5
|
+
sv.remove_suffix(1);
|
|
6
|
+
}
|
|
7
|
+
return sv;
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
void common_chat_peg_mapper::from_ast(const common_peg_ast_arena & arena, const common_peg_parse_result & result) {
|
|
11
|
+
arena.visit(result, [this](const common_peg_ast_node & node) {
|
|
12
|
+
map(node);
|
|
13
|
+
});
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
void common_chat_peg_mapper::map(const common_peg_ast_node & node) {
|
|
17
|
+
bool is_reasoning = node.tag == common_chat_peg_builder::REASONING;
|
|
18
|
+
bool is_content = node.tag == common_chat_peg_builder::CONTENT;
|
|
19
|
+
|
|
20
|
+
if (is_reasoning) {
|
|
21
|
+
result.reasoning_content = std::string(trim_trailing_space(node.text));
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
if (is_content) {
|
|
25
|
+
result.content = std::string(trim_trailing_space(node.text));
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
void common_chat_peg_native_mapper::map(const common_peg_ast_node & node) {
|
|
30
|
+
common_chat_peg_mapper::map(node);
|
|
31
|
+
|
|
32
|
+
bool is_tool_open = node.tag == common_chat_peg_native_builder::TOOL_OPEN;
|
|
33
|
+
bool is_tool_name = node.tag == common_chat_peg_native_builder::TOOL_NAME;
|
|
34
|
+
bool is_tool_id = node.tag == common_chat_peg_native_builder::TOOL_ID;
|
|
35
|
+
bool is_tool_args = node.tag == common_chat_peg_native_builder::TOOL_ARGS;
|
|
36
|
+
|
|
37
|
+
if (is_tool_open) {
|
|
38
|
+
result.tool_calls.emplace_back();
|
|
39
|
+
current_tool = &result.tool_calls.back();
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
if (is_tool_id && current_tool) {
|
|
43
|
+
current_tool->id = std::string(trim_trailing_space(node.text));
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
if (is_tool_name && current_tool) {
|
|
47
|
+
current_tool->name = std::string(trim_trailing_space(node.text));
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
if (is_tool_args && current_tool) {
|
|
51
|
+
current_tool->arguments = std::string(trim_trailing_space(node.text));
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
void common_chat_peg_constructed_mapper::map(const common_peg_ast_node & node) {
|
|
56
|
+
common_chat_peg_mapper::map(node);
|
|
57
|
+
|
|
58
|
+
bool is_tool_open = node.tag == common_chat_peg_constructed_builder::TOOL_OPEN;
|
|
59
|
+
bool is_tool_name = node.tag == common_chat_peg_constructed_builder::TOOL_NAME;
|
|
60
|
+
bool is_tool_close = node.tag == common_chat_peg_constructed_builder::TOOL_CLOSE;
|
|
61
|
+
bool is_arg_open = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_OPEN;
|
|
62
|
+
bool is_arg_close = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_CLOSE;
|
|
63
|
+
bool is_arg_name = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_NAME;
|
|
64
|
+
bool is_arg_string = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_STRING_VALUE;
|
|
65
|
+
bool is_arg_json = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_JSON_VALUE;
|
|
66
|
+
|
|
67
|
+
if (is_tool_open) {
|
|
68
|
+
result.tool_calls.emplace_back();
|
|
69
|
+
current_tool = &result.tool_calls.back();
|
|
70
|
+
arg_count = 0;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
if (is_tool_name) {
|
|
74
|
+
current_tool->name = std::string(node.text);
|
|
75
|
+
current_tool->arguments = "{";
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
if (is_arg_open) {
|
|
79
|
+
needs_closing_quote = false;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
if (is_arg_name && current_tool) {
|
|
83
|
+
if (arg_count > 0) {
|
|
84
|
+
current_tool->arguments += ",";
|
|
85
|
+
}
|
|
86
|
+
current_tool->arguments += json(trim_trailing_space(node.text)).dump() + ":";
|
|
87
|
+
++arg_count;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
if (is_arg_string && current_tool) {
|
|
91
|
+
// Serialize to JSON, but exclude the end quote
|
|
92
|
+
std::string dumped = json(node.text).dump();
|
|
93
|
+
current_tool->arguments += dumped.substr(0, dumped.size() - 1);
|
|
94
|
+
needs_closing_quote = true;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
if (is_arg_close && current_tool) {
|
|
98
|
+
if (needs_closing_quote) {
|
|
99
|
+
current_tool->arguments += "\"";
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
if (is_arg_json && current_tool) {
|
|
104
|
+
current_tool->arguments += std::string(trim_trailing_space(node.text));
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
if (is_tool_close && current_tool) {
|
|
108
|
+
current_tool->arguments += "}";
|
|
109
|
+
}
|
|
110
|
+
}
|