@fugood/llama.node 0.3.7 → 0.3.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -2
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +2 -0
- package/lib/index.js +16 -1
- package/lib/index.ts +16 -0
- package/package.json +1 -1
- package/src/EmbeddingWorker.cpp +4 -3
- package/src/LlamaCompletionWorker.cpp +4 -2
- package/src/LlamaContext.cpp +61 -6
- package/src/LlamaContext.h +1 -0
- package/src/common.hpp +6 -11
- package/src/llama.cpp/.github/workflows/build.yml +19 -17
- package/src/llama.cpp/.github/workflows/docker.yml +77 -30
- package/src/llama.cpp/.github/workflows/editorconfig.yml +3 -1
- package/src/llama.cpp/.github/workflows/server.yml +22 -3
- package/src/llama.cpp/CMakeLists.txt +49 -24
- package/src/llama.cpp/common/arg.cpp +82 -26
- package/src/llama.cpp/common/arg.h +3 -0
- package/src/llama.cpp/common/common.cpp +192 -72
- package/src/llama.cpp/common/common.h +51 -18
- package/src/llama.cpp/common/ngram-cache.cpp +12 -12
- package/src/llama.cpp/common/ngram-cache.h +2 -2
- package/src/llama.cpp/common/sampling.cpp +11 -6
- package/src/llama.cpp/common/speculative.cpp +18 -15
- package/src/llama.cpp/docs/build.md +2 -0
- package/src/llama.cpp/examples/batched/batched.cpp +9 -7
- package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +3 -3
- package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +10 -8
- package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +11 -8
- package/src/llama.cpp/examples/cvector-generator/mean.hpp +1 -1
- package/src/llama.cpp/examples/cvector-generator/pca.hpp +1 -1
- package/src/llama.cpp/examples/embedding/embedding.cpp +8 -7
- package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +7 -6
- package/src/llama.cpp/examples/export-lora/export-lora.cpp +8 -7
- package/src/llama.cpp/examples/gguf/gguf.cpp +10 -6
- package/src/llama.cpp/examples/gguf-hash/gguf-hash.cpp +1 -0
- package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +8 -7
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +13 -10
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +13 -12
- package/src/llama.cpp/examples/infill/infill.cpp +23 -24
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +44 -13
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +11 -6
- package/src/llama.cpp/examples/llava/clip.cpp +4 -2
- package/src/llama.cpp/examples/llava/llava-cli.cpp +9 -6
- package/src/llama.cpp/examples/llava/llava.cpp +2 -2
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +8 -4
- package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +11 -8
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +6 -7
- package/src/llama.cpp/examples/lookup/lookup-create.cpp +4 -9
- package/src/llama.cpp/examples/lookup/lookup-stats.cpp +3 -7
- package/src/llama.cpp/examples/lookup/lookup.cpp +5 -6
- package/src/llama.cpp/examples/main/main.cpp +51 -29
- package/src/llama.cpp/examples/parallel/parallel.cpp +5 -6
- package/src/llama.cpp/examples/passkey/passkey.cpp +7 -5
- package/src/llama.cpp/examples/perplexity/perplexity.cpp +37 -23
- package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +12 -14
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +8 -8
- package/src/llama.cpp/examples/rpc/rpc-server.cpp +12 -0
- package/src/llama.cpp/examples/run/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.cpp +1351 -0
- package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.h +114 -0
- package/src/llama.cpp/examples/run/run.cpp +175 -61
- package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +4 -25
- package/src/llama.cpp/examples/server/CMakeLists.txt +1 -0
- package/src/llama.cpp/examples/server/httplib.h +1295 -409
- package/src/llama.cpp/examples/server/server.cpp +387 -181
- package/src/llama.cpp/examples/server/tests/requirements.txt +1 -0
- package/src/llama.cpp/examples/server/utils.hpp +170 -58
- package/src/llama.cpp/examples/simple/simple.cpp +9 -8
- package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +16 -12
- package/src/llama.cpp/examples/speculative/speculative.cpp +22 -23
- package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +8 -12
- package/src/llama.cpp/examples/tokenize/tokenize.cpp +17 -5
- package/src/llama.cpp/examples/tts/tts.cpp +64 -23
- package/src/llama.cpp/ggml/CMakeLists.txt +5 -21
- package/src/llama.cpp/ggml/include/ggml-backend.h +2 -0
- package/src/llama.cpp/ggml/include/ggml-cpp.h +1 -0
- package/src/llama.cpp/ggml/include/ggml.h +36 -145
- package/src/llama.cpp/ggml/include/gguf.h +202 -0
- package/src/llama.cpp/ggml/src/CMakeLists.txt +6 -3
- package/src/llama.cpp/ggml/src/ggml-alloc.c +5 -0
- package/src/llama.cpp/ggml/src/ggml-backend-impl.h +0 -1
- package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +79 -49
- package/src/llama.cpp/ggml/src/ggml-backend.cpp +5 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +33 -23
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +57 -72
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +87 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +335 -66
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +10 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1090 -378
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.h +2 -2
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h +1 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +3 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +3 -0
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +3 -1
- package/src/llama.cpp/ggml/src/ggml-impl.h +11 -16
- package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +16 -0
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +6 -6
- package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +154 -35
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +1 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +9 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +18 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +3 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/concat.hpp +1 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/conv.cpp +3 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/conv.hpp +1 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +40 -95
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +48 -48
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +24 -24
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +238 -164
- package/src/llama.cpp/ggml/src/ggml-sycl/gla.cpp +105 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/gla.hpp +8 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +3 -3
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +1 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +3 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.hpp +1 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +7 -5
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.hpp +1 -2
- package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +74 -4
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +314 -116
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +4 -2
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +9 -3
- package/src/llama.cpp/ggml/src/ggml.c +117 -1327
- package/src/llama.cpp/ggml/src/gguf.cpp +1329 -0
- package/src/llama.cpp/include/llama-cpp.h +6 -1
- package/src/llama.cpp/include/llama.h +138 -75
- package/src/llama.cpp/src/CMakeLists.txt +13 -1
- package/src/llama.cpp/src/llama-adapter.cpp +347 -0
- package/src/llama.cpp/src/llama-adapter.h +74 -0
- package/src/llama.cpp/src/llama-arch.cpp +1487 -0
- package/src/llama.cpp/src/llama-arch.h +400 -0
- package/src/llama.cpp/src/llama-batch.cpp +368 -0
- package/src/llama.cpp/src/llama-batch.h +88 -0
- package/src/llama.cpp/src/llama-chat.cpp +578 -0
- package/src/llama.cpp/src/llama-chat.h +52 -0
- package/src/llama.cpp/src/llama-context.cpp +1775 -0
- package/src/llama.cpp/src/llama-context.h +128 -0
- package/src/llama.cpp/src/llama-cparams.cpp +1 -0
- package/src/llama.cpp/src/llama-cparams.h +37 -0
- package/src/llama.cpp/src/llama-grammar.cpp +5 -4
- package/src/llama.cpp/src/llama-grammar.h +3 -1
- package/src/llama.cpp/src/llama-hparams.cpp +71 -0
- package/src/llama.cpp/src/llama-hparams.h +139 -0
- package/src/llama.cpp/src/llama-impl.cpp +167 -0
- package/src/llama.cpp/src/llama-impl.h +16 -136
- package/src/llama.cpp/src/llama-kv-cache.cpp +718 -0
- package/src/llama.cpp/src/llama-kv-cache.h +218 -0
- package/src/llama.cpp/src/llama-mmap.cpp +589 -0
- package/src/llama.cpp/src/llama-mmap.h +67 -0
- package/src/llama.cpp/src/llama-model-loader.cpp +1124 -0
- package/src/llama.cpp/src/llama-model-loader.h +167 -0
- package/src/llama.cpp/src/llama-model.cpp +3953 -0
- package/src/llama.cpp/src/llama-model.h +370 -0
- package/src/llama.cpp/src/llama-quant.cpp +934 -0
- package/src/llama.cpp/src/llama-quant.h +1 -0
- package/src/llama.cpp/src/llama-sampling.cpp +147 -32
- package/src/llama.cpp/src/llama-sampling.h +3 -19
- package/src/llama.cpp/src/llama-vocab.cpp +1832 -575
- package/src/llama.cpp/src/llama-vocab.h +97 -142
- package/src/llama.cpp/src/llama.cpp +7160 -20314
- package/src/llama.cpp/src/unicode.cpp +8 -3
- package/src/llama.cpp/tests/CMakeLists.txt +2 -0
- package/src/llama.cpp/tests/test-autorelease.cpp +3 -3
- package/src/llama.cpp/tests/test-backend-ops.cpp +370 -59
- package/src/llama.cpp/tests/test-chat-template.cpp +162 -125
- package/src/llama.cpp/tests/test-gguf.cpp +222 -187
- package/src/llama.cpp/tests/test-model-load-cancel.cpp +1 -1
- package/src/llama.cpp/tests/test-sampling.cpp +0 -1
- package/src/llama.cpp/tests/test-tokenizer-0.cpp +4 -4
- package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +9 -7
- package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +8 -6
|
@@ -112,9 +112,9 @@ jobs:
|
|
|
112
112
|
-DGGML_OPENMP=OFF ;
|
|
113
113
|
cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server
|
|
114
114
|
|
|
115
|
-
- name: Build
|
|
116
|
-
id:
|
|
117
|
-
if: ${{ matrix.sanitizer != 'THREAD' }}
|
|
115
|
+
- name: Build (sanitizers)
|
|
116
|
+
id: cmake_build_sanitizers
|
|
117
|
+
if: ${{ matrix.sanitizer != '' && matrix.sanitizer != 'THREAD' }}
|
|
118
118
|
run: |
|
|
119
119
|
cmake -B build \
|
|
120
120
|
-DGGML_NATIVE=OFF \
|
|
@@ -124,12 +124,31 @@ jobs:
|
|
|
124
124
|
-DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON ;
|
|
125
125
|
cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server
|
|
126
126
|
|
|
127
|
+
- name: Build (sanitizers)
|
|
128
|
+
id: cmake_build
|
|
129
|
+
if: ${{ matrix.sanitizer == '' }}
|
|
130
|
+
run: |
|
|
131
|
+
cmake -B build \
|
|
132
|
+
-DGGML_NATIVE=OFF \
|
|
133
|
+
-DLLAMA_BUILD_SERVER=ON \
|
|
134
|
+
-DLLAMA_CURL=ON \
|
|
135
|
+
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }} ;
|
|
136
|
+
cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server
|
|
137
|
+
|
|
127
138
|
- name: Tests
|
|
128
139
|
id: server_integration_tests
|
|
140
|
+
if: ${{ matrix.sanitizer == '' }}
|
|
129
141
|
run: |
|
|
130
142
|
cd examples/server/tests
|
|
131
143
|
./tests.sh
|
|
132
144
|
|
|
145
|
+
- name: Tests (sanitizers)
|
|
146
|
+
id: server_integration_tests_sanitizers
|
|
147
|
+
if: ${{ matrix.sanitizer != '' }}
|
|
148
|
+
run: |
|
|
149
|
+
cd examples/server/tests
|
|
150
|
+
LLAMA_SANITIZE=1 ./tests.sh
|
|
151
|
+
|
|
133
152
|
- name: Slow tests
|
|
134
153
|
id: server_integration_tests_slow
|
|
135
154
|
if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }}
|
|
@@ -83,11 +83,8 @@ include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake)
|
|
|
83
83
|
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/common.cmake)
|
|
84
84
|
|
|
85
85
|
# override ggml options
|
|
86
|
-
set(
|
|
87
|
-
set(
|
|
88
|
-
set(GGML_SANITIZE_UNDEFINED ${LLAMA_SANITIZE_UNDEFINED})
|
|
89
|
-
set(GGML_ALL_WARNINGS ${LLAMA_ALL_WARNINGS})
|
|
90
|
-
set(GGML_FATAL_WARNINGS ${LLAMA_FATAL_WARNINGS})
|
|
86
|
+
set(GGML_ALL_WARNINGS ${LLAMA_ALL_WARNINGS})
|
|
87
|
+
set(GGML_FATAL_WARNINGS ${LLAMA_FATAL_WARNINGS})
|
|
91
88
|
|
|
92
89
|
# change the default for these ggml options
|
|
93
90
|
if (NOT DEFINED GGML_LLAMAFILE)
|
|
@@ -117,16 +114,62 @@ llama_option_depr(WARNING LLAMA_SYCL GGML_SYCL)
|
|
|
117
114
|
llama_option_depr(WARNING LLAMA_SYCL_F16 GGML_SYCL_F16)
|
|
118
115
|
llama_option_depr(WARNING LLAMA_CANN GGML_CANN)
|
|
119
116
|
|
|
117
|
+
if (NOT MSVC)
|
|
118
|
+
if (LLAMA_SANITIZE_THREAD)
|
|
119
|
+
message(STATUS "Using -fsanitize=thread")
|
|
120
|
+
|
|
121
|
+
add_compile_options(-fsanitize=thread)
|
|
122
|
+
link_libraries (-fsanitize=thread)
|
|
123
|
+
endif()
|
|
124
|
+
|
|
125
|
+
if (LLAMA_SANITIZE_ADDRESS)
|
|
126
|
+
message(STATUS "Using -fsanitize=address")
|
|
127
|
+
|
|
128
|
+
add_compile_options(-fsanitize=address -fno-omit-frame-pointer)
|
|
129
|
+
link_libraries (-fsanitize=address)
|
|
130
|
+
endif()
|
|
131
|
+
|
|
132
|
+
if (LLAMA_SANITIZE_UNDEFINED)
|
|
133
|
+
message(STATUS "Using -fsanitize=undefined")
|
|
134
|
+
|
|
135
|
+
add_compile_options(-fsanitize=undefined)
|
|
136
|
+
link_libraries (-fsanitize=undefined)
|
|
137
|
+
endif()
|
|
138
|
+
endif()
|
|
139
|
+
|
|
120
140
|
#
|
|
121
|
-
#
|
|
141
|
+
# 3rd-party
|
|
122
142
|
#
|
|
123
143
|
|
|
124
144
|
if (NOT TARGET ggml)
|
|
125
145
|
add_subdirectory(ggml)
|
|
126
146
|
# ... otherwise assume ggml is added by a parent CMakeLists.txt
|
|
127
147
|
endif()
|
|
148
|
+
|
|
149
|
+
#
|
|
150
|
+
# build the library
|
|
151
|
+
#
|
|
152
|
+
|
|
128
153
|
add_subdirectory(src)
|
|
129
154
|
|
|
155
|
+
#
|
|
156
|
+
# utils, programs, examples and tests
|
|
157
|
+
#
|
|
158
|
+
|
|
159
|
+
if (LLAMA_BUILD_COMMON)
|
|
160
|
+
add_subdirectory(common)
|
|
161
|
+
endif()
|
|
162
|
+
|
|
163
|
+
if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_TESTS AND NOT CMAKE_JS_VERSION)
|
|
164
|
+
include(CTest)
|
|
165
|
+
add_subdirectory(tests)
|
|
166
|
+
endif()
|
|
167
|
+
|
|
168
|
+
if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_EXAMPLES)
|
|
169
|
+
add_subdirectory(examples)
|
|
170
|
+
add_subdirectory(pocs)
|
|
171
|
+
endif()
|
|
172
|
+
|
|
130
173
|
#
|
|
131
174
|
# install
|
|
132
175
|
#
|
|
@@ -200,21 +243,3 @@ configure_file(cmake/llama.pc.in
|
|
|
200
243
|
|
|
201
244
|
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/llama.pc"
|
|
202
245
|
DESTINATION lib/pkgconfig)
|
|
203
|
-
|
|
204
|
-
#
|
|
205
|
-
# utils, programs, examples and tests
|
|
206
|
-
#
|
|
207
|
-
|
|
208
|
-
if (LLAMA_BUILD_COMMON)
|
|
209
|
-
add_subdirectory(common)
|
|
210
|
-
endif()
|
|
211
|
-
|
|
212
|
-
if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_TESTS AND NOT CMAKE_JS_VERSION)
|
|
213
|
-
include(CTest)
|
|
214
|
-
add_subdirectory(tests)
|
|
215
|
-
endif()
|
|
216
|
-
|
|
217
|
-
if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_EXAMPLES)
|
|
218
|
-
add_subdirectory(examples)
|
|
219
|
-
add_subdirectory(pocs)
|
|
220
|
-
endif()
|
|
@@ -22,6 +22,11 @@ common_arg & common_arg::set_examples(std::initializer_list<enum llama_example>
|
|
|
22
22
|
return *this;
|
|
23
23
|
}
|
|
24
24
|
|
|
25
|
+
common_arg & common_arg::set_excludes(std::initializer_list<enum llama_example> excludes) {
|
|
26
|
+
this->excludes = std::move(excludes);
|
|
27
|
+
return *this;
|
|
28
|
+
}
|
|
29
|
+
|
|
25
30
|
common_arg & common_arg::set_env(const char * env) {
|
|
26
31
|
help = help + "\n(env: " + env + ")";
|
|
27
32
|
this->env = env;
|
|
@@ -37,6 +42,10 @@ bool common_arg::in_example(enum llama_example ex) {
|
|
|
37
42
|
return examples.find(ex) != examples.end();
|
|
38
43
|
}
|
|
39
44
|
|
|
45
|
+
bool common_arg::is_exclude(enum llama_example ex) {
|
|
46
|
+
return excludes.find(ex) != excludes.end();
|
|
47
|
+
}
|
|
48
|
+
|
|
40
49
|
bool common_arg::get_value_from_env(std::string & output) {
|
|
41
50
|
if (env == nullptr) return false;
|
|
42
51
|
char * value = std::getenv(env);
|
|
@@ -121,17 +130,26 @@ std::string common_arg::to_string() {
|
|
|
121
130
|
|
|
122
131
|
static void common_params_handle_model_default(
|
|
123
132
|
std::string & model,
|
|
124
|
-
std::string & model_url,
|
|
133
|
+
const std::string & model_url,
|
|
125
134
|
std::string & hf_repo,
|
|
126
|
-
std::string & hf_file
|
|
135
|
+
std::string & hf_file,
|
|
136
|
+
const std::string & hf_token) {
|
|
127
137
|
if (!hf_repo.empty()) {
|
|
128
138
|
// short-hand to avoid specifying --hf-file -> default it to --model
|
|
129
139
|
if (hf_file.empty()) {
|
|
130
140
|
if (model.empty()) {
|
|
131
|
-
|
|
141
|
+
auto auto_detected = common_get_hf_file(hf_repo, hf_token);
|
|
142
|
+
if (auto_detected.first.empty() || auto_detected.second.empty()) {
|
|
143
|
+
exit(1); // built without CURL, error message already printed
|
|
144
|
+
}
|
|
145
|
+
hf_repo = auto_detected.first;
|
|
146
|
+
hf_file = auto_detected.second;
|
|
147
|
+
} else {
|
|
148
|
+
hf_file = model;
|
|
132
149
|
}
|
|
133
|
-
|
|
134
|
-
|
|
150
|
+
}
|
|
151
|
+
// make sure model path is present (for caching purposes)
|
|
152
|
+
if (model.empty()) {
|
|
135
153
|
// this is to avoid different repo having same file name, or same file name in different subdirs
|
|
136
154
|
std::string filename = hf_repo + "_" + hf_file;
|
|
137
155
|
// to make sure we don't have any slashes in the filename
|
|
@@ -281,8 +299,8 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
|
|
|
281
299
|
}
|
|
282
300
|
|
|
283
301
|
// TODO: refactor model params in a common struct
|
|
284
|
-
common_params_handle_model_default(params.model, params.model_url, params.hf_repo, params.hf_file);
|
|
285
|
-
common_params_handle_model_default(params.vocoder.model, params.vocoder.model_url, params.vocoder.hf_repo, params.vocoder.hf_file);
|
|
302
|
+
common_params_handle_model_default(params.model, params.model_url, params.hf_repo, params.hf_file, params.hf_token);
|
|
303
|
+
common_params_handle_model_default(params.vocoder.model, params.vocoder.model_url, params.vocoder.hf_repo, params.vocoder.hf_file, params.hf_token);
|
|
286
304
|
|
|
287
305
|
if (params.escape) {
|
|
288
306
|
string_process_escapes(params.prompt);
|
|
@@ -358,6 +376,30 @@ static std::vector<ggml_backend_dev_t> parse_device_list(const std::string & val
|
|
|
358
376
|
return devices;
|
|
359
377
|
}
|
|
360
378
|
|
|
379
|
+
static void add_rpc_devices(std::string servers) {
|
|
380
|
+
auto rpc_servers = string_split<std::string>(servers, ',');
|
|
381
|
+
if (rpc_servers.empty()) {
|
|
382
|
+
throw std::invalid_argument("no RPC servers specified");
|
|
383
|
+
}
|
|
384
|
+
ggml_backend_reg_t rpc_reg = ggml_backend_reg_by_name("RPC");
|
|
385
|
+
if (!rpc_reg) {
|
|
386
|
+
throw std::invalid_argument("failed to find RPC backend");
|
|
387
|
+
}
|
|
388
|
+
typedef ggml_backend_dev_t (*ggml_backend_rpc_add_device_t)(const char * endpoint);
|
|
389
|
+
ggml_backend_rpc_add_device_t ggml_backend_rpc_add_device_fn = (ggml_backend_rpc_add_device_t) ggml_backend_reg_get_proc_address(rpc_reg, "ggml_backend_rpc_add_device");
|
|
390
|
+
if (!ggml_backend_rpc_add_device_fn) {
|
|
391
|
+
throw std::invalid_argument("failed to find RPC device add function");
|
|
392
|
+
}
|
|
393
|
+
for (const auto & server : rpc_servers) {
|
|
394
|
+
ggml_backend_dev_t dev = ggml_backend_rpc_add_device_fn(server.c_str());
|
|
395
|
+
if (dev) {
|
|
396
|
+
ggml_backend_device_register(dev);
|
|
397
|
+
} else {
|
|
398
|
+
throw std::invalid_argument("failed to register RPC device");
|
|
399
|
+
}
|
|
400
|
+
}
|
|
401
|
+
}
|
|
402
|
+
|
|
361
403
|
bool common_params_parse(int argc, char ** argv, common_params & params, llama_example ex, void(*print_usage)(int, char **)) {
|
|
362
404
|
auto ctx_arg = common_params_parser_init(params, ex, print_usage);
|
|
363
405
|
const common_params params_org = ctx_arg.params; // the example can modify the default params
|
|
@@ -420,7 +462,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
420
462
|
* - if both {LLAMA_EXAMPLE_COMMON, LLAMA_EXAMPLE_*,} are set, we will prioritize the LLAMA_EXAMPLE_* matching current example
|
|
421
463
|
*/
|
|
422
464
|
auto add_opt = [&](common_arg arg) {
|
|
423
|
-
if (arg.in_example(ex) || arg.in_example(LLAMA_EXAMPLE_COMMON)) {
|
|
465
|
+
if ((arg.in_example(ex) || arg.in_example(LLAMA_EXAMPLE_COMMON)) && !arg.is_exclude(ex)) {
|
|
424
466
|
ctx_arg.options.push_back(std::move(arg));
|
|
425
467
|
}
|
|
426
468
|
};
|
|
@@ -649,7 +691,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
649
691
|
[](common_params & params, const std::string & value) {
|
|
650
692
|
params.prompt = value;
|
|
651
693
|
}
|
|
652
|
-
));
|
|
694
|
+
).set_excludes({LLAMA_EXAMPLE_SERVER}));
|
|
653
695
|
add_opt(common_arg(
|
|
654
696
|
{"--no-perf"},
|
|
655
697
|
string_format("disable internal libllama performance timings (default: %s)", params.no_perf ? "true" : "false"),
|
|
@@ -673,7 +715,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
673
715
|
params.prompt.pop_back();
|
|
674
716
|
}
|
|
675
717
|
}
|
|
676
|
-
));
|
|
718
|
+
).set_excludes({LLAMA_EXAMPLE_SERVER}));
|
|
677
719
|
add_opt(common_arg(
|
|
678
720
|
{"--in-file"}, "FNAME",
|
|
679
721
|
"an input file (repeat to specify multiple files)",
|
|
@@ -700,7 +742,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
700
742
|
params.prompt = ss.str();
|
|
701
743
|
fprintf(stderr, "Read %zu bytes from binary file %s\n", params.prompt.size(), value.c_str());
|
|
702
744
|
}
|
|
703
|
-
));
|
|
745
|
+
).set_excludes({LLAMA_EXAMPLE_SERVER}));
|
|
704
746
|
add_opt(common_arg(
|
|
705
747
|
{"-e", "--escape"},
|
|
706
748
|
string_format("process escapes sequences (\\n, \\r, \\t, \\', \\\", \\\\) (default: %s)", params.escape ? "true" : "false"),
|
|
@@ -759,15 +801,19 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
759
801
|
).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER}));
|
|
760
802
|
add_opt(common_arg(
|
|
761
803
|
{"-cnv", "--conversation"},
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
"(default: %s)",
|
|
767
|
-
params.conversation ? "true" : "false"
|
|
768
|
-
),
|
|
804
|
+
"run in conversation mode:\n"
|
|
805
|
+
"- does not print special tokens and suffix/prefix\n"
|
|
806
|
+
"- interactive mode is also enabled\n"
|
|
807
|
+
"(default: auto enabled if chat template is available)",
|
|
769
808
|
[](common_params & params) {
|
|
770
|
-
params.
|
|
809
|
+
params.conversation_mode = COMMON_CONVERSATION_MODE_ENABLED;
|
|
810
|
+
}
|
|
811
|
+
).set_examples({LLAMA_EXAMPLE_MAIN}));
|
|
812
|
+
add_opt(common_arg(
|
|
813
|
+
{"-no-cnv", "--no-conversation"},
|
|
814
|
+
"force disable conversation mode (default: false)",
|
|
815
|
+
[](common_params & params) {
|
|
816
|
+
params.conversation_mode = COMMON_CONVERSATION_MODE_DISABLED;
|
|
771
817
|
}
|
|
772
818
|
).set_examples({LLAMA_EXAMPLE_MAIN}));
|
|
773
819
|
add_opt(common_arg(
|
|
@@ -1363,7 +1409,8 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
1363
1409
|
{"--rpc"}, "SERVERS",
|
|
1364
1410
|
"comma separated list of RPC servers",
|
|
1365
1411
|
[](common_params & params, const std::string & value) {
|
|
1366
|
-
|
|
1412
|
+
add_rpc_devices(value);
|
|
1413
|
+
GGML_UNUSED(params);
|
|
1367
1414
|
}
|
|
1368
1415
|
).set_env("LLAMA_ARG_RPC"));
|
|
1369
1416
|
}
|
|
@@ -1512,7 +1559,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
1512
1559
|
{"--lora"}, "FNAME",
|
|
1513
1560
|
"path to LoRA adapter (can be repeated to use multiple adapters)",
|
|
1514
1561
|
[](common_params & params, const std::string & value) {
|
|
1515
|
-
params.lora_adapters.push_back({ std::string(value), 1.0 });
|
|
1562
|
+
params.lora_adapters.push_back({ std::string(value), 1.0, nullptr });
|
|
1516
1563
|
}
|
|
1517
1564
|
// we define this arg on both COMMON and EXPORT_LORA, so when showing help message of export-lora, it will be categorized as "example-specific" arg
|
|
1518
1565
|
).set_examples({LLAMA_EXAMPLE_COMMON, LLAMA_EXAMPLE_EXPORT_LORA}));
|
|
@@ -1520,7 +1567,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
1520
1567
|
{"--lora-scaled"}, "FNAME", "SCALE",
|
|
1521
1568
|
"path to LoRA adapter with user defined scaling (can be repeated to use multiple adapters)",
|
|
1522
1569
|
[](common_params & params, const std::string & fname, const std::string & scale) {
|
|
1523
|
-
params.lora_adapters.push_back({ fname, std::stof(scale) });
|
|
1570
|
+
params.lora_adapters.push_back({ fname, std::stof(scale), nullptr });
|
|
1524
1571
|
}
|
|
1525
1572
|
// we define this arg on both COMMON and EXPORT_LORA, so when showing help message of export-lora, it will be categorized as "example-specific" arg
|
|
1526
1573
|
).set_examples({LLAMA_EXAMPLE_COMMON, LLAMA_EXAMPLE_EXPORT_LORA}));
|
|
@@ -1574,21 +1621,23 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
1574
1621
|
}
|
|
1575
1622
|
).set_env("LLAMA_ARG_MODEL_URL"));
|
|
1576
1623
|
add_opt(common_arg(
|
|
1577
|
-
{"-hfr", "--hf-repo"}, "
|
|
1578
|
-
"Hugging Face model repository
|
|
1624
|
+
{"-hf", "-hfr", "--hf-repo"}, "<user>/<model>[:quant]",
|
|
1625
|
+
"Hugging Face model repository; quant is optional, case-insensitive, default to Q4_K_M, or falls back to the first file in the repo if Q4_K_M doesn't exist.\n"
|
|
1626
|
+
"example: unsloth/phi-4-GGUF:q4_k_m\n"
|
|
1627
|
+
"(default: unused)",
|
|
1579
1628
|
[](common_params & params, const std::string & value) {
|
|
1580
1629
|
params.hf_repo = value;
|
|
1581
1630
|
}
|
|
1582
1631
|
).set_env("LLAMA_ARG_HF_REPO"));
|
|
1583
1632
|
add_opt(common_arg(
|
|
1584
1633
|
{"-hff", "--hf-file"}, "FILE",
|
|
1585
|
-
"Hugging Face model file (default: unused)",
|
|
1634
|
+
"Hugging Face model file. If specified, it will override the quant in --hf-repo (default: unused)",
|
|
1586
1635
|
[](common_params & params, const std::string & value) {
|
|
1587
1636
|
params.hf_file = value;
|
|
1588
1637
|
}
|
|
1589
1638
|
).set_env("LLAMA_ARG_HF_FILE"));
|
|
1590
1639
|
add_opt(common_arg(
|
|
1591
|
-
{"-hfrv", "--hf-repo-v"}, "
|
|
1640
|
+
{"-hfv", "-hfrv", "--hf-repo-v"}, "<user>/<model>[:quant]",
|
|
1592
1641
|
"Hugging Face model repository for the vocoder model (default: unused)",
|
|
1593
1642
|
[](common_params & params, const std::string & value) {
|
|
1594
1643
|
params.vocoder.hf_repo = value;
|
|
@@ -2205,6 +2254,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
2205
2254
|
params.vocoder.model = value;
|
|
2206
2255
|
}
|
|
2207
2256
|
).set_examples({LLAMA_EXAMPLE_TTS, LLAMA_EXAMPLE_SERVER}));
|
|
2257
|
+
add_opt(common_arg(
|
|
2258
|
+
{"--tts-use-guide-tokens"},
|
|
2259
|
+
"Use guide tokens to improve TTS word recall",
|
|
2260
|
+
[](common_params & params) {
|
|
2261
|
+
params.vocoder.use_guide_tokens = true;
|
|
2262
|
+
}
|
|
2263
|
+
).set_examples({LLAMA_EXAMPLE_TTS, LLAMA_EXAMPLE_SERVER}));
|
|
2208
2264
|
|
|
2209
2265
|
// model-specific
|
|
2210
2266
|
add_opt(common_arg(
|
|
@@ -12,6 +12,7 @@
|
|
|
12
12
|
|
|
13
13
|
struct common_arg {
|
|
14
14
|
std::set<enum llama_example> examples = {LLAMA_EXAMPLE_COMMON};
|
|
15
|
+
std::set<enum llama_example> excludes = {};
|
|
15
16
|
std::vector<const char *> args;
|
|
16
17
|
const char * value_hint = nullptr; // help text or example for arg value
|
|
17
18
|
const char * value_hint_2 = nullptr; // for second arg value
|
|
@@ -53,9 +54,11 @@ struct common_arg {
|
|
|
53
54
|
) : args(args), value_hint(value_hint), value_hint_2(value_hint_2), help(help), handler_str_str(handler) {}
|
|
54
55
|
|
|
55
56
|
common_arg & set_examples(std::initializer_list<enum llama_example> examples);
|
|
57
|
+
common_arg & set_excludes(std::initializer_list<enum llama_example> excludes);
|
|
56
58
|
common_arg & set_env(const char * env);
|
|
57
59
|
common_arg & set_sparam();
|
|
58
60
|
bool in_example(enum llama_example ex);
|
|
61
|
+
bool is_exclude(enum llama_example ex);
|
|
59
62
|
bool get_value_from_env(std::string & output);
|
|
60
63
|
bool has_value_from_env();
|
|
61
64
|
std::string to_string();
|