@fugood/llama.node 0.3.9 → 0.3.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.js +2 -2
- package/lib/binding.ts +47 -8
- package/lib/index.js +21 -1
- package/lib/index.ts +31 -1
- package/package.json +12 -3
- package/src/LlamaCompletionWorker.cpp +33 -6
- package/src/LlamaCompletionWorker.h +3 -1
- package/src/LlamaContext.cpp +336 -28
- package/src/LlamaContext.h +2 -0
- package/src/common.hpp +19 -2
- package/src/llama.cpp/.github/workflows/build.yml +289 -107
- package/src/llama.cpp/.github/workflows/close-issue.yml +1 -1
- package/src/llama.cpp/.github/workflows/docker.yml +2 -1
- package/src/llama.cpp/.github/workflows/server.yml +25 -2
- package/src/llama.cpp/CMakeLists.txt +10 -19
- package/src/llama.cpp/cmake/build-info.cmake +1 -1
- package/src/llama.cpp/common/CMakeLists.txt +32 -0
- package/src/llama.cpp/common/arg.cpp +66 -16
- package/src/llama.cpp/common/chat-template.hpp +515 -0
- package/src/llama.cpp/common/chat.cpp +966 -0
- package/src/llama.cpp/common/chat.hpp +52 -0
- package/src/llama.cpp/common/common.cpp +159 -36
- package/src/llama.cpp/common/common.h +56 -14
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +46 -66
- package/src/llama.cpp/common/json-schema-to-grammar.h +15 -1
- package/src/llama.cpp/common/llguidance.cpp +270 -0
- package/src/llama.cpp/common/log.cpp +1 -10
- package/src/llama.cpp/common/log.h +10 -0
- package/src/llama.cpp/common/minja.hpp +2868 -0
- package/src/llama.cpp/common/sampling.cpp +22 -1
- package/src/llama.cpp/common/sampling.h +3 -0
- package/src/llama.cpp/docs/build.md +54 -9
- package/src/llama.cpp/examples/export-lora/export-lora.cpp +12 -2
- package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +1 -1
- package/src/llama.cpp/examples/llava/CMakeLists.txt +7 -0
- package/src/llama.cpp/examples/llava/clip-quantize-cli.cpp +59 -0
- package/src/llama.cpp/examples/llava/clip.cpp +133 -14
- package/src/llama.cpp/examples/llava/clip.h +2 -0
- package/src/llama.cpp/examples/llava/llava.cpp +22 -8
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +9 -1
- package/src/llama.cpp/examples/main/main.cpp +26 -25
- package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.cpp +136 -137
- package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.h +18 -4
- package/src/llama.cpp/examples/run/run.cpp +224 -69
- package/src/llama.cpp/examples/server/server.cpp +252 -81
- package/src/llama.cpp/examples/server/utils.hpp +73 -21
- package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +6 -4
- package/src/llama.cpp/examples/simple-cmake-pkg/CMakeLists.txt +11 -0
- package/src/llama.cpp/ggml/CMakeLists.txt +78 -1
- package/src/llama.cpp/ggml/include/ggml.h +1 -1
- package/src/llama.cpp/ggml/src/CMakeLists.txt +21 -4
- package/src/llama.cpp/ggml/src/ggml-alloc.c +1 -13
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +91 -78
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +7 -7
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +2 -1
- package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +1 -1
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +46 -0
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +16 -1
- package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +1 -1
- package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +28 -8
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +5 -7
- package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +33 -23
- package/src/llama.cpp/ggml/src/ggml-sycl/softmax.hpp +1 -5
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +323 -121
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +13 -3
- package/src/llama.cpp/ggml/src/ggml.c +23 -13
- package/src/llama.cpp/include/llama.h +14 -1
- package/src/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.out +46 -0
- package/src/llama.cpp/src/CMakeLists.txt +1 -1
- package/src/llama.cpp/src/llama-arch.cpp +7 -2
- package/src/llama.cpp/src/llama-arch.h +3 -1
- package/src/llama.cpp/src/llama-chat.cpp +11 -2
- package/src/llama.cpp/src/llama-chat.h +1 -0
- package/src/llama.cpp/src/llama-grammar.cpp +86 -6
- package/src/llama.cpp/src/llama-grammar.h +22 -1
- package/src/llama.cpp/src/llama-mmap.cpp +1 -0
- package/src/llama.cpp/src/llama-model-loader.cpp +1 -1
- package/src/llama.cpp/src/llama-model.cpp +76 -6
- package/src/llama.cpp/src/llama-sampling.cpp +47 -4
- package/src/llama.cpp/src/llama-vocab.cpp +10 -4
- package/src/llama.cpp/src/llama.cpp +181 -123
- package/src/llama.cpp/tests/CMakeLists.txt +4 -0
- package/src/llama.cpp/tests/test-backend-ops.cpp +158 -57
- package/src/llama.cpp/tests/test-chat-template.cpp +154 -31
- package/src/llama.cpp/tests/test-chat.cpp +607 -0
- package/src/llama.cpp/tests/test-grammar-integration.cpp +2 -2
- package/src/llama.cpp/tests/test-grammar-llguidance.cpp +1140 -0
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +1 -1
- package/src/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +0 -32
|
@@ -17,7 +17,7 @@ jobs:
|
|
|
17
17
|
steps:
|
|
18
18
|
- uses: actions/stale@v5
|
|
19
19
|
with:
|
|
20
|
-
exempt-issue-labels: "refactor,help wanted,good first issue,research,bug"
|
|
20
|
+
exempt-issue-labels: "refactor,help wanted,good first issue,research,bug,roadmap"
|
|
21
21
|
days-before-issue-stale: 30
|
|
22
22
|
days-before-issue-close: 14
|
|
23
23
|
stale-issue-label: "stale"
|
|
@@ -81,13 +81,36 @@ jobs:
|
|
|
81
81
|
with:
|
|
82
82
|
node-version: '22.11.0'
|
|
83
83
|
|
|
84
|
+
- name: WebUI - Install dependencies
|
|
85
|
+
id: webui_lint
|
|
86
|
+
run: |
|
|
87
|
+
cd examples/server/webui
|
|
88
|
+
npm ci
|
|
89
|
+
|
|
90
|
+
- name: WebUI - Check code format
|
|
91
|
+
id: webui_format
|
|
92
|
+
run: |
|
|
93
|
+
git config --global --add safe.directory $(realpath .)
|
|
94
|
+
cd examples/server/webui
|
|
95
|
+
git status
|
|
96
|
+
|
|
97
|
+
npm run format
|
|
98
|
+
git status
|
|
99
|
+
modified_files="$(git status -s)"
|
|
100
|
+
echo "Modified files: ${modified_files}"
|
|
101
|
+
if [ -n "${modified_files}" ]; then
|
|
102
|
+
echo "Files do not follow coding style. To fix: npm run format"
|
|
103
|
+
echo "${modified_files}"
|
|
104
|
+
exit 1
|
|
105
|
+
fi
|
|
106
|
+
|
|
84
107
|
- name: Verify bundled index.html
|
|
85
108
|
id: verify_server_index_html
|
|
86
109
|
run: |
|
|
87
110
|
git config --global --add safe.directory $(realpath .)
|
|
88
111
|
cd examples/server/webui
|
|
89
112
|
git status
|
|
90
|
-
|
|
113
|
+
|
|
91
114
|
npm run build
|
|
92
115
|
git status
|
|
93
116
|
modified_files="$(git status -s)"
|
|
@@ -205,7 +228,7 @@ jobs:
|
|
|
205
228
|
run: |
|
|
206
229
|
cd examples/server/tests
|
|
207
230
|
$env:PYTHONIOENCODING = ":replace"
|
|
208
|
-
pytest -v -x
|
|
231
|
+
pytest -v -x -m "not slow"
|
|
209
232
|
|
|
210
233
|
- name: Slow tests
|
|
211
234
|
id: server_integration_tests_slow
|
|
@@ -16,6 +16,7 @@ endif()
|
|
|
16
16
|
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/")
|
|
17
17
|
|
|
18
18
|
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
|
|
19
|
+
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
|
|
19
20
|
|
|
20
21
|
if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
|
|
21
22
|
set(LLAMA_STANDALONE ON)
|
|
@@ -49,6 +50,8 @@ endif()
|
|
|
49
50
|
if (MSVC)
|
|
50
51
|
add_compile_options("$<$<COMPILE_LANGUAGE:C>:/utf-8>")
|
|
51
52
|
add_compile_options("$<$<COMPILE_LANGUAGE:CXX>:/utf-8>")
|
|
53
|
+
add_compile_options("$<$<COMPILE_LANGUAGE:C>:/bigobj>")
|
|
54
|
+
add_compile_options("$<$<COMPILE_LANGUAGE:CXX>:/bigobj>")
|
|
52
55
|
endif()
|
|
53
56
|
|
|
54
57
|
#
|
|
@@ -77,6 +80,7 @@ option(LLAMA_BUILD_SERVER "llama: build server example" ${LLAMA_STANDALONE})
|
|
|
77
80
|
|
|
78
81
|
# 3rd party libs
|
|
79
82
|
option(LLAMA_CURL "llama: use libcurl to download model from an URL" OFF)
|
|
83
|
+
option(LLAMA_LLGUIDANCE "llama-common: include LLGuidance library for structured output in common utils" OFF)
|
|
80
84
|
|
|
81
85
|
# Required for relocatable CMake package
|
|
82
86
|
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake)
|
|
@@ -185,27 +189,14 @@ set(LLAMA_INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR} CACHE PATH "Location o
|
|
|
185
189
|
set(LLAMA_LIB_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR} CACHE PATH "Location of library files")
|
|
186
190
|
set(LLAMA_BIN_INSTALL_DIR ${CMAKE_INSTALL_BINDIR} CACHE PATH "Location of binary files")
|
|
187
191
|
|
|
188
|
-
# At the moment some compile definitions are placed within the ggml/src
|
|
189
|
-
# directory but not exported on the `ggml` target. This could be improved by
|
|
190
|
-
# determining _precisely_ which defines are necessary for the llama-config
|
|
191
|
-
# package.
|
|
192
|
-
#
|
|
193
|
-
set(GGML_TRANSIENT_DEFINES)
|
|
194
|
-
get_target_property(GGML_DIRECTORY ggml SOURCE_DIR)
|
|
195
|
-
get_directory_property(GGML_DIR_DEFINES DIRECTORY ${GGML_DIRECTORY} COMPILE_DEFINITIONS)
|
|
196
|
-
if (GGML_DIR_DEFINES)
|
|
197
|
-
list(APPEND GGML_TRANSIENT_DEFINES ${GGML_DIR_DEFINES})
|
|
198
|
-
endif()
|
|
199
|
-
get_target_property(GGML_TARGET_DEFINES ggml COMPILE_DEFINITIONS)
|
|
200
|
-
if (GGML_TARGET_DEFINES)
|
|
201
|
-
list(APPEND GGML_TRANSIENT_DEFINES ${GGML_TARGET_DEFINES})
|
|
202
|
-
endif()
|
|
203
|
-
get_target_property(GGML_LINK_LIBRARIES ggml LINK_LIBRARIES)
|
|
204
|
-
# all public headers
|
|
205
192
|
set(LLAMA_PUBLIC_HEADERS
|
|
206
193
|
${CMAKE_CURRENT_SOURCE_DIR}/include/llama.h
|
|
207
194
|
${CMAKE_CURRENT_SOURCE_DIR}/include/llama-cpp.h)
|
|
208
|
-
|
|
195
|
+
|
|
196
|
+
set_target_properties(llama
|
|
197
|
+
PROPERTIES
|
|
198
|
+
PUBLIC_HEADER "${LLAMA_PUBLIC_HEADERS}")
|
|
199
|
+
|
|
209
200
|
install(TARGETS llama LIBRARY PUBLIC_HEADER)
|
|
210
201
|
|
|
211
202
|
configure_package_config_file(
|
|
@@ -242,4 +233,4 @@ configure_file(cmake/llama.pc.in
|
|
|
242
233
|
@ONLY)
|
|
243
234
|
|
|
244
235
|
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/llama.pc"
|
|
245
|
-
DESTINATION
|
|
236
|
+
DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig)
|
|
@@ -44,7 +44,7 @@ if(MSVC)
|
|
|
44
44
|
set(BUILD_TARGET ${CMAKE_VS_PLATFORM_NAME})
|
|
45
45
|
else()
|
|
46
46
|
execute_process(
|
|
47
|
-
COMMAND sh -c "
|
|
47
|
+
COMMAND sh -c "\"$@\" --version | head -1" _ ${CMAKE_C_COMPILER}
|
|
48
48
|
OUTPUT_VARIABLE OUT
|
|
49
49
|
OUTPUT_STRIP_TRAILING_WHITESPACE
|
|
50
50
|
)
|
|
@@ -56,14 +56,19 @@ add_library(${TARGET} STATIC
|
|
|
56
56
|
arg.cpp
|
|
57
57
|
arg.h
|
|
58
58
|
base64.hpp
|
|
59
|
+
chat.cpp
|
|
60
|
+
chat.hpp
|
|
61
|
+
chat-template.hpp
|
|
59
62
|
common.cpp
|
|
60
63
|
common.h
|
|
61
64
|
console.cpp
|
|
62
65
|
console.h
|
|
63
66
|
json-schema-to-grammar.cpp
|
|
64
67
|
json.hpp
|
|
68
|
+
llguidance.cpp
|
|
65
69
|
log.cpp
|
|
66
70
|
log.h
|
|
71
|
+
minja.hpp
|
|
67
72
|
ngram-cache.cpp
|
|
68
73
|
ngram-cache.h
|
|
69
74
|
sampling.cpp
|
|
@@ -87,6 +92,33 @@ if (LLAMA_CURL)
|
|
|
87
92
|
set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} ${CURL_LIBRARY})
|
|
88
93
|
endif ()
|
|
89
94
|
|
|
95
|
+
if (LLAMA_LLGUIDANCE)
|
|
96
|
+
include(ExternalProject)
|
|
97
|
+
set(LLGUIDANCE_SRC ${CMAKE_BINARY_DIR}/llguidance/source)
|
|
98
|
+
set(LLGUIDANCE_PATH ${LLGUIDANCE_SRC}/target/release)
|
|
99
|
+
ExternalProject_Add(llguidance_ext
|
|
100
|
+
GIT_REPOSITORY https://github.com/guidance-ai/llguidance
|
|
101
|
+
# v0.6.12:
|
|
102
|
+
GIT_TAG ced1c9023d47ec194fa977932d35ce65c2ebfc09
|
|
103
|
+
PREFIX ${CMAKE_BINARY_DIR}/llguidance
|
|
104
|
+
SOURCE_DIR ${LLGUIDANCE_SRC}
|
|
105
|
+
BUILD_IN_SOURCE TRUE
|
|
106
|
+
CONFIGURE_COMMAND ""
|
|
107
|
+
BUILD_COMMAND cargo build --release
|
|
108
|
+
INSTALL_COMMAND ""
|
|
109
|
+
BUILD_BYPRODUCTS ${LLGUIDANCE_PATH}/libllguidance.a ${LLGUIDANCE_PATH}/llguidance.h
|
|
110
|
+
UPDATE_COMMAND ""
|
|
111
|
+
)
|
|
112
|
+
target_compile_definitions(${TARGET} PUBLIC LLAMA_USE_LLGUIDANCE)
|
|
113
|
+
|
|
114
|
+
add_library(llguidance STATIC IMPORTED)
|
|
115
|
+
set_target_properties(llguidance PROPERTIES IMPORTED_LOCATION ${LLGUIDANCE_PATH}/libllguidance.a)
|
|
116
|
+
add_dependencies(llguidance llguidance_ext)
|
|
117
|
+
|
|
118
|
+
target_include_directories(${TARGET} PRIVATE ${LLGUIDANCE_PATH})
|
|
119
|
+
set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} llguidance)
|
|
120
|
+
endif ()
|
|
121
|
+
|
|
90
122
|
target_include_directories(${TARGET} PUBLIC .)
|
|
91
123
|
target_compile_features (${TARGET} PUBLIC cxx_std_17)
|
|
92
124
|
target_link_libraries (${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} PUBLIC llama Threads::Threads)
|
|
@@ -133,7 +133,8 @@ static void common_params_handle_model_default(
|
|
|
133
133
|
const std::string & model_url,
|
|
134
134
|
std::string & hf_repo,
|
|
135
135
|
std::string & hf_file,
|
|
136
|
-
const std::string & hf_token
|
|
136
|
+
const std::string & hf_token,
|
|
137
|
+
const std::string & model_default) {
|
|
137
138
|
if (!hf_repo.empty()) {
|
|
138
139
|
// short-hand to avoid specifying --hf-file -> default it to --model
|
|
139
140
|
if (hf_file.empty()) {
|
|
@@ -163,7 +164,7 @@ static void common_params_handle_model_default(
|
|
|
163
164
|
model = fs_get_cache_file(string_split<std::string>(f, '/').back());
|
|
164
165
|
}
|
|
165
166
|
} else if (model.empty()) {
|
|
166
|
-
model =
|
|
167
|
+
model = model_default;
|
|
167
168
|
}
|
|
168
169
|
}
|
|
169
170
|
|
|
@@ -299,8 +300,9 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
|
|
|
299
300
|
}
|
|
300
301
|
|
|
301
302
|
// TODO: refactor model params in a common struct
|
|
302
|
-
common_params_handle_model_default(params.model,
|
|
303
|
-
common_params_handle_model_default(params.
|
|
303
|
+
common_params_handle_model_default(params.model, params.model_url, params.hf_repo, params.hf_file, params.hf_token, DEFAULT_MODEL_PATH);
|
|
304
|
+
common_params_handle_model_default(params.speculative.model, params.speculative.model_url, params.speculative.hf_repo, params.speculative.hf_file, params.hf_token, "");
|
|
305
|
+
common_params_handle_model_default(params.vocoder.model, params.vocoder.model_url, params.vocoder.hf_repo, params.vocoder.hf_file, params.hf_token, "");
|
|
304
306
|
|
|
305
307
|
if (params.escape) {
|
|
306
308
|
string_process_escapes(params.prompt);
|
|
@@ -323,6 +325,14 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
|
|
|
323
325
|
throw std::invalid_argument("error: either --embedding or --reranking can be specified, but not both");
|
|
324
326
|
}
|
|
325
327
|
|
|
328
|
+
if (!params.chat_template.empty() && !common_chat_verify_template(params.chat_template, params.use_jinja)) {
|
|
329
|
+
throw std::runtime_error(string_format(
|
|
330
|
+
"error: the supplied chat template is not supported: %s%s\n",
|
|
331
|
+
params.chat_template.c_str(),
|
|
332
|
+
params.use_jinja ? "" : "\nnote: llama.cpp was started without --jinja, we only support commonly used templates"
|
|
333
|
+
));
|
|
334
|
+
}
|
|
335
|
+
|
|
326
336
|
return true;
|
|
327
337
|
}
|
|
328
338
|
|
|
@@ -867,7 +877,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
867
877
|
[](common_params & params) {
|
|
868
878
|
params.warmup = false;
|
|
869
879
|
}
|
|
870
|
-
).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER}));
|
|
880
|
+
).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_EMBEDDING}));
|
|
871
881
|
add_opt(common_arg(
|
|
872
882
|
{"--spm-infill"},
|
|
873
883
|
string_format(
|
|
@@ -1455,15 +1465,28 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
1455
1465
|
{"--list-devices"},
|
|
1456
1466
|
"print list of available devices and exit",
|
|
1457
1467
|
[](common_params &) {
|
|
1458
|
-
|
|
1468
|
+
std::vector<ggml_backend_dev_t> rpc_devices;
|
|
1469
|
+
std::vector<ggml_backend_dev_t> all_devices;
|
|
1459
1470
|
for (size_t i = 0; i < ggml_backend_dev_count(); ++i) {
|
|
1460
1471
|
auto * dev = ggml_backend_dev_get(i);
|
|
1461
1472
|
if (ggml_backend_dev_type(dev) == GGML_BACKEND_DEVICE_TYPE_GPU) {
|
|
1462
|
-
|
|
1463
|
-
|
|
1464
|
-
|
|
1473
|
+
ggml_backend_reg_t reg = ggml_backend_dev_backend_reg(dev);
|
|
1474
|
+
if (ggml_backend_reg_name(reg) == std::string("RPC")) {
|
|
1475
|
+
rpc_devices.push_back(dev);
|
|
1476
|
+
} else {
|
|
1477
|
+
all_devices.push_back(dev);
|
|
1478
|
+
}
|
|
1465
1479
|
}
|
|
1466
1480
|
}
|
|
1481
|
+
// insert RPC devices in front
|
|
1482
|
+
all_devices.insert(all_devices.begin(), rpc_devices.begin(), rpc_devices.end());
|
|
1483
|
+
printf("Available devices:\n");
|
|
1484
|
+
for (size_t i = 0; i < all_devices.size(); ++i) {
|
|
1485
|
+
auto * dev = all_devices[i];
|
|
1486
|
+
size_t free, total;
|
|
1487
|
+
ggml_backend_dev_memory(dev, &free, &total);
|
|
1488
|
+
printf(" %s: %s (%zu MiB, %zu MiB free)\n", ggml_backend_dev_name(dev), ggml_backend_dev_description(dev), total / 1024 / 1024, free / 1024 / 1024);
|
|
1489
|
+
}
|
|
1467
1490
|
exit(0);
|
|
1468
1491
|
}
|
|
1469
1492
|
));
|
|
@@ -1629,6 +1652,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
1629
1652
|
params.hf_repo = value;
|
|
1630
1653
|
}
|
|
1631
1654
|
).set_env("LLAMA_ARG_HF_REPO"));
|
|
1655
|
+
add_opt(common_arg(
|
|
1656
|
+
{"-hfd", "-hfrd", "--hf-repo-draft"}, "<user>/<model>[:quant]",
|
|
1657
|
+
"Same as --hf-repo, but for the draft model (default: unused)",
|
|
1658
|
+
[](common_params & params, const std::string & value) {
|
|
1659
|
+
params.speculative.hf_repo = value;
|
|
1660
|
+
}
|
|
1661
|
+
).set_env("LLAMA_ARG_HFD_REPO"));
|
|
1632
1662
|
add_opt(common_arg(
|
|
1633
1663
|
{"-hff", "--hf-file"}, "FILE",
|
|
1634
1664
|
"Hugging Face model file. If specified, it will override the quant in --hf-repo (default: unused)",
|
|
@@ -1938,24 +1968,44 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
1938
1968
|
}
|
|
1939
1969
|
}
|
|
1940
1970
|
).set_examples({LLAMA_EXAMPLE_SERVER}));
|
|
1971
|
+
add_opt(common_arg(
|
|
1972
|
+
{"--jinja"},
|
|
1973
|
+
"use jinja template for chat (default: disabled)",
|
|
1974
|
+
[](common_params & params) {
|
|
1975
|
+
params.use_jinja = true;
|
|
1976
|
+
}
|
|
1977
|
+
).set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_MAIN}).set_env("LLAMA_ARG_JINJA"));
|
|
1941
1978
|
add_opt(common_arg(
|
|
1942
1979
|
{"--chat-template"}, "JINJA_TEMPLATE",
|
|
1943
1980
|
string_format(
|
|
1944
1981
|
"set custom jinja chat template (default: template taken from model's metadata)\n"
|
|
1945
1982
|
"if suffix/prefix are specified, template will be disabled\n"
|
|
1983
|
+
"only commonly used templates are accepted (unless --jinja is set before this flag):\n"
|
|
1946
1984
|
"list of built-in templates:\n%s", list_builtin_chat_templates().c_str()
|
|
1947
1985
|
),
|
|
1948
1986
|
[](common_params & params, const std::string & value) {
|
|
1949
|
-
if (!common_chat_verify_template(value)) {
|
|
1950
|
-
throw std::runtime_error(string_format(
|
|
1951
|
-
"error: the supplied chat template is not supported: %s\n"
|
|
1952
|
-
"note: llama.cpp does not use jinja parser, we only support commonly used templates\n",
|
|
1953
|
-
value.c_str()
|
|
1954
|
-
));
|
|
1955
|
-
}
|
|
1956
1987
|
params.chat_template = value;
|
|
1957
1988
|
}
|
|
1958
1989
|
).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_CHAT_TEMPLATE"));
|
|
1990
|
+
add_opt(common_arg(
|
|
1991
|
+
{"--chat-template-file"}, "JINJA_TEMPLATE_FILE",
|
|
1992
|
+
string_format(
|
|
1993
|
+
"set custom jinja chat template file (default: template taken from model's metadata)\n"
|
|
1994
|
+
"if suffix/prefix are specified, template will be disabled\n"
|
|
1995
|
+
"only commonly used templates are accepted (unless --jinja is set before this flag):\n"
|
|
1996
|
+
"list of built-in templates:\n%s", list_builtin_chat_templates().c_str()
|
|
1997
|
+
),
|
|
1998
|
+
[](common_params & params, const std::string & value) {
|
|
1999
|
+
std::ifstream file(value);
|
|
2000
|
+
if (!file) {
|
|
2001
|
+
throw std::runtime_error(string_format("error: failed to open file '%s'\n", value.c_str()));
|
|
2002
|
+
}
|
|
2003
|
+
std::copy(
|
|
2004
|
+
std::istreambuf_iterator<char>(file),
|
|
2005
|
+
std::istreambuf_iterator<char>(),
|
|
2006
|
+
std::back_inserter(params.chat_template));
|
|
2007
|
+
}
|
|
2008
|
+
).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_CHAT_TEMPLATE_FILE"));
|
|
1959
2009
|
add_opt(common_arg(
|
|
1960
2010
|
{"-sps", "--slot-prompt-similarity"}, "SIMILARITY",
|
|
1961
2011
|
string_format("how much the prompt of a request must match the prompt of a slot in order to use that slot (default: %.2f, 0.0 = disabled)\n", params.slot_prompt_similarity),
|