@fugood/llama.node 0.3.1 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +1 -8
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/package.json +4 -2
- package/src/DetokenizeWorker.cpp +1 -1
- package/src/EmbeddingWorker.cpp +2 -2
- package/src/LlamaCompletionWorker.cpp +10 -10
- package/src/LlamaCompletionWorker.h +2 -2
- package/src/LlamaContext.cpp +14 -17
- package/src/TokenizeWorker.cpp +1 -1
- package/src/common.hpp +5 -4
- package/src/llama.cpp/.github/workflows/build.yml +137 -29
- package/src/llama.cpp/.github/workflows/close-issue.yml +5 -0
- package/src/llama.cpp/.github/workflows/docker.yml +46 -34
- package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +7 -0
- package/src/llama.cpp/.github/workflows/nix-ci.yml +7 -0
- package/src/llama.cpp/.github/workflows/python-check-requirements.yml +2 -4
- package/src/llama.cpp/.github/workflows/python-type-check.yml +3 -1
- package/src/llama.cpp/.github/workflows/server.yml +7 -0
- package/src/llama.cpp/CMakeLists.txt +26 -11
- package/src/llama.cpp/cmake/arm64-apple-clang.cmake +16 -0
- package/src/llama.cpp/common/CMakeLists.txt +10 -10
- package/src/llama.cpp/common/arg.cpp +2041 -0
- package/src/llama.cpp/common/arg.h +77 -0
- package/src/llama.cpp/common/common.cpp +523 -1861
- package/src/llama.cpp/common/common.h +234 -106
- package/src/llama.cpp/common/console.cpp +3 -0
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +1 -1
- package/src/llama.cpp/common/log.cpp +401 -0
- package/src/llama.cpp/common/log.h +66 -698
- package/src/llama.cpp/common/ngram-cache.cpp +39 -36
- package/src/llama.cpp/common/ngram-cache.h +19 -19
- package/src/llama.cpp/common/sampling.cpp +356 -350
- package/src/llama.cpp/common/sampling.h +62 -139
- package/src/llama.cpp/common/stb_image.h +5990 -6398
- package/src/llama.cpp/docs/build.md +72 -17
- package/src/llama.cpp/examples/CMakeLists.txt +1 -2
- package/src/llama.cpp/examples/batched/batched.cpp +49 -65
- package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +42 -53
- package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +55 -52
- package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +22 -22
- package/src/llama.cpp/examples/cvector-generator/pca.hpp +3 -13
- package/src/llama.cpp/examples/embedding/embedding.cpp +147 -91
- package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +37 -37
- package/src/llama.cpp/examples/export-lora/export-lora.cpp +39 -38
- package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +14 -39
- package/src/llama.cpp/examples/{baby-llama → gen-docs}/CMakeLists.txt +2 -2
- package/src/llama.cpp/examples/gen-docs/gen-docs.cpp +83 -0
- package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +58 -39
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +46 -39
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +75 -69
- package/src/llama.cpp/examples/infill/infill.cpp +131 -192
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +276 -178
- package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +1 -0
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +40 -36
- package/src/llama.cpp/examples/llava/CMakeLists.txt +7 -0
- package/src/llama.cpp/examples/llava/clip.cpp +686 -150
- package/src/llama.cpp/examples/llava/clip.h +11 -2
- package/src/llama.cpp/examples/llava/llava-cli.cpp +60 -71
- package/src/llama.cpp/examples/llava/llava.cpp +146 -26
- package/src/llama.cpp/examples/llava/llava.h +2 -3
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +323 -0
- package/src/llama.cpp/examples/llava/requirements.txt +1 -0
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +55 -56
- package/src/llama.cpp/examples/lookup/lookup-create.cpp +15 -13
- package/src/llama.cpp/examples/lookup/lookup-merge.cpp +4 -4
- package/src/llama.cpp/examples/lookup/lookup-stats.cpp +34 -33
- package/src/llama.cpp/examples/lookup/lookup.cpp +60 -63
- package/src/llama.cpp/examples/main/main.cpp +216 -313
- package/src/llama.cpp/examples/parallel/parallel.cpp +58 -59
- package/src/llama.cpp/examples/passkey/passkey.cpp +53 -61
- package/src/llama.cpp/examples/perplexity/perplexity.cpp +277 -311
- package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/quantize/quantize.cpp +27 -9
- package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +12 -12
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +57 -52
- package/src/llama.cpp/examples/rpc/rpc-server.cpp +27 -2
- package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +60 -46
- package/src/llama.cpp/examples/server/CMakeLists.txt +7 -18
- package/src/llama.cpp/examples/server/server.cpp +1347 -1531
- package/src/llama.cpp/examples/server/tests/requirements.txt +2 -1
- package/src/llama.cpp/examples/server/utils.hpp +396 -107
- package/src/llama.cpp/examples/simple/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/simple/simple.cpp +132 -106
- package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +197 -0
- package/src/llama.cpp/examples/speculative/speculative.cpp +153 -124
- package/src/llama.cpp/examples/sycl/run-llama2.sh +10 -19
- package/src/llama.cpp/examples/sycl/win-run-llama2.bat +1 -1
- package/src/llama.cpp/examples/tokenize/tokenize.cpp +27 -29
- package/src/llama.cpp/ggml/CMakeLists.txt +29 -12
- package/src/llama.cpp/ggml/include/ggml-alloc.h +3 -3
- package/src/llama.cpp/ggml/include/ggml-amx.h +25 -0
- package/src/llama.cpp/ggml/include/ggml-backend.h +166 -68
- package/src/llama.cpp/ggml/include/ggml-blas.h +5 -3
- package/src/llama.cpp/ggml/include/ggml-cann.h +17 -19
- package/src/llama.cpp/ggml/include/ggml-cpp.h +38 -0
- package/src/llama.cpp/ggml/include/ggml-cpu.h +177 -0
- package/src/llama.cpp/ggml/include/ggml-cuda.h +17 -17
- package/src/llama.cpp/ggml/include/ggml-kompute.h +7 -3
- package/src/llama.cpp/ggml/include/ggml-metal.h +13 -12
- package/src/llama.cpp/ggml/include/ggml-opt.h +216 -0
- package/src/llama.cpp/ggml/include/ggml-rpc.h +9 -5
- package/src/llama.cpp/ggml/include/ggml-sycl.h +18 -11
- package/src/llama.cpp/ggml/include/ggml-vulkan.h +10 -8
- package/src/llama.cpp/ggml/include/ggml.h +272 -505
- package/src/llama.cpp/ggml/src/CMakeLists.txt +69 -1110
- package/src/llama.cpp/ggml/src/ggml-aarch64.c +52 -2116
- package/src/llama.cpp/ggml/src/ggml-aarch64.h +0 -20
- package/src/llama.cpp/ggml/src/ggml-alloc.c +29 -27
- package/src/llama.cpp/ggml/src/ggml-amx/CMakeLists.txt +107 -0
- package/src/llama.cpp/ggml/src/ggml-amx/common.h +94 -0
- package/src/llama.cpp/ggml/src/ggml-amx/ggml-amx.cpp +446 -0
- package/src/llama.cpp/ggml/src/ggml-amx/mmq.cpp +2510 -0
- package/src/llama.cpp/ggml/src/ggml-amx/mmq.h +17 -0
- package/src/llama.cpp/ggml/src/ggml-backend-impl.h +144 -81
- package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +195 -0
- package/src/llama.cpp/ggml/src/{ggml-backend.c → ggml-backend.cpp} +394 -635
- package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +91 -0
- package/src/llama.cpp/ggml/src/{ggml-blas.cpp → ggml-blas/ggml-blas.cpp} +217 -70
- package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +46 -0
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +4 -27
- package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +32 -4
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +179 -41
- package/src/llama.cpp/ggml/src/ggml-cann/common.h +1 -0
- package/src/llama.cpp/ggml/src/{ggml-cann.cpp → ggml-cann/ggml-cann.cpp} +458 -353
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +2 -1
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +2 -0
- package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +278 -0
- package/src/llama.cpp/ggml/src/ggml-common.h +20 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +261 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.c +3560 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +30 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +371 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +10822 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.h +63 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +13970 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +663 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1885 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +155 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h +14 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +178 -0
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +134 -0
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +106 -0
- package/src/llama.cpp/ggml/src/ggml-impl.h +380 -584
- package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +162 -0
- package/src/llama.cpp/ggml/src/{ggml-kompute.cpp → ggml-kompute/ggml-kompute.cpp} +233 -87
- package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +108 -0
- package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +249 -0
- package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +100 -0
- package/src/llama.cpp/ggml/src/ggml-opt.cpp +867 -0
- package/src/llama.cpp/ggml/src/ggml-quants.c +369 -9994
- package/src/llama.cpp/ggml/src/ggml-quants.h +78 -110
- package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +11 -0
- package/src/llama.cpp/ggml/src/{ggml-rpc.cpp → ggml-rpc/ggml-rpc.cpp} +560 -335
- package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +81 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +6 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +51 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +310 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +1 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/conv.cpp +99 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/conv.hpp +21 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +57 -57
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +106 -106
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +4 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +18 -25
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +1011 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +76 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +101 -0
- package/src/llama.cpp/ggml/src/{ggml-sycl.cpp → ggml-sycl/ggml-sycl.cpp} +3350 -3980
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +125 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +23 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +70 -68
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +9 -6
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +56 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +11 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +8 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +71 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.hpp +21 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +4 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +138 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.hpp +10 -0
- package/src/llama.cpp/ggml/src/ggml-threading.cpp +12 -0
- package/src/llama.cpp/ggml/src/ggml-threading.h +12 -0
- package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +78 -0
- package/src/llama.cpp/ggml/src/{ggml-vulkan.cpp → ggml-vulkan/ggml-vulkan.cpp} +2034 -1718
- package/src/llama.cpp/ggml/src/{vulkan-shaders → ggml-vulkan/vulkan-shaders}/CMakeLists.txt +2 -0
- package/src/llama.cpp/ggml/src/{vulkan-shaders → ggml-vulkan/vulkan-shaders}/vulkan-shaders-gen.cpp +152 -185
- package/src/llama.cpp/ggml/src/ggml.c +2075 -16579
- package/src/llama.cpp/include/llama.h +296 -285
- package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.out +46 -0
- package/src/llama.cpp/pocs/vdot/q8dot.cpp +4 -3
- package/src/llama.cpp/pocs/vdot/vdot.cpp +8 -7
- package/src/llama.cpp/requirements/requirements-convert_legacy_llama.txt +1 -1
- package/src/llama.cpp/src/CMakeLists.txt +2 -1
- package/src/llama.cpp/src/llama-grammar.cpp +721 -122
- package/src/llama.cpp/src/llama-grammar.h +120 -15
- package/src/llama.cpp/src/llama-impl.h +156 -1
- package/src/llama.cpp/src/llama-sampling.cpp +2058 -346
- package/src/llama.cpp/src/llama-sampling.h +39 -47
- package/src/llama.cpp/src/llama-vocab.cpp +390 -127
- package/src/llama.cpp/src/llama-vocab.h +60 -20
- package/src/llama.cpp/src/llama.cpp +6215 -3263
- package/src/llama.cpp/src/unicode-data.cpp +6 -4
- package/src/llama.cpp/src/unicode-data.h +4 -4
- package/src/llama.cpp/src/unicode.cpp +15 -7
- package/src/llama.cpp/tests/CMakeLists.txt +4 -2
- package/src/llama.cpp/tests/test-arg-parser.cpp +131 -0
- package/src/llama.cpp/tests/test-backend-ops.cpp +1725 -297
- package/src/llama.cpp/tests/test-barrier.cpp +94 -0
- package/src/llama.cpp/tests/test-chat-template.cpp +9 -5
- package/src/llama.cpp/tests/test-grammar-integration.cpp +23 -38
- package/src/llama.cpp/tests/test-grammar-parser.cpp +6 -4
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +23 -8
- package/src/llama.cpp/tests/test-llama-grammar.cpp +9 -8
- package/src/llama.cpp/tests/test-log.cpp +39 -0
- package/src/llama.cpp/tests/test-opt.cpp +853 -142
- package/src/llama.cpp/tests/test-quantize-fns.cpp +28 -19
- package/src/llama.cpp/tests/test-quantize-perf.cpp +16 -14
- package/src/llama.cpp/tests/test-rope.cpp +2 -1
- package/src/llama.cpp/tests/test-sampling.cpp +226 -142
- package/src/llama.cpp/tests/test-tokenizer-0.cpp +56 -36
- package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +5 -5
- package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +5 -5
- package/patches/llama.patch +0 -22
- package/src/llama.cpp/.github/workflows/bench.yml +0 -310
- package/src/llama.cpp/common/grammar-parser.cpp +0 -536
- package/src/llama.cpp/common/grammar-parser.h +0 -29
- package/src/llama.cpp/common/train.cpp +0 -1513
- package/src/llama.cpp/common/train.h +0 -233
- package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +0 -1640
- package/src/llama.cpp/examples/benchmark/CMakeLists.txt +0 -6
- package/src/llama.cpp/examples/benchmark/benchmark-matmult.cpp +0 -275
- package/src/llama.cpp/ggml/src/llamafile/sgemm.cpp +0 -1027
- package/src/llama.cpp/tests/test-grad0.cpp +0 -1566
- /package/src/llama.cpp/ggml/{cmake → src/ggml-cpu/cmake}/FindSIMD.cmake +0 -0
- /package/src/llama.cpp/ggml/src/{llamafile → ggml-cpu/llamafile}/sgemm.h +0 -0
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
#include "arg.h"
|
|
1
2
|
#include "common.h"
|
|
2
3
|
#include "ggml.h"
|
|
3
4
|
#include "ggml-alloc.h"
|
|
@@ -10,6 +11,12 @@
|
|
|
10
11
|
|
|
11
12
|
static bool g_verbose = false;
|
|
12
13
|
|
|
14
|
+
struct tensor_transformation {
|
|
15
|
+
struct ggml_tensor * in;
|
|
16
|
+
struct ggml_tensor * out;
|
|
17
|
+
bool is_copy;
|
|
18
|
+
};
|
|
19
|
+
|
|
13
20
|
static std::string get_kv_str(struct gguf_context * ctx_gguf, const std::string & key){
|
|
14
21
|
int id = gguf_find_key(ctx_gguf, key.c_str());
|
|
15
22
|
return id < 0 ? "" : std::string(gguf_get_val_str(ctx_gguf, id));
|
|
@@ -50,20 +57,6 @@ static struct gguf_context * load_gguf(std::string & fname, struct ggml_context
|
|
|
50
57
|
return ctx_gguf;
|
|
51
58
|
}
|
|
52
59
|
|
|
53
|
-
static void replace_all(std::string & s, const std::string & search, const std::string & replace) {
|
|
54
|
-
std::string result;
|
|
55
|
-
for (size_t pos = 0; ; pos += search.length()) {
|
|
56
|
-
auto new_pos = s.find(search, pos);
|
|
57
|
-
if (new_pos == std::string::npos) {
|
|
58
|
-
result += s.substr(pos, s.size() - pos);
|
|
59
|
-
break;
|
|
60
|
-
}
|
|
61
|
-
result += s.substr(pos, new_pos - pos) + replace;
|
|
62
|
-
pos = new_pos;
|
|
63
|
-
}
|
|
64
|
-
s = std::move(result);
|
|
65
|
-
}
|
|
66
|
-
|
|
67
60
|
struct file_input {
|
|
68
61
|
struct ggml_context * ctx_meta = nullptr;
|
|
69
62
|
struct gguf_context * ctx_gguf = nullptr;
|
|
@@ -135,7 +128,7 @@ struct lora_merge_ctx {
|
|
|
135
128
|
|
|
136
129
|
lora_merge_ctx(
|
|
137
130
|
std::string & base_fname,
|
|
138
|
-
std::vector<
|
|
131
|
+
std::vector<common_lora_adapter_info> & lora_files,
|
|
139
132
|
std::string & outfile,
|
|
140
133
|
int n_threads) : base_model(base_fname, 0), n_threads(n_threads), fout(outfile, std::ios::binary) {
|
|
141
134
|
fout.exceptions(std::ofstream::failbit); // fail fast on write errors
|
|
@@ -144,9 +137,9 @@ struct lora_merge_ctx {
|
|
|
144
137
|
throw std::runtime_error("split model is not yet supported");
|
|
145
138
|
}
|
|
146
139
|
|
|
147
|
-
for (auto lora_inp : lora_files) {
|
|
148
|
-
auto fname =
|
|
149
|
-
auto scale =
|
|
140
|
+
for (auto & lora_inp : lora_files) {
|
|
141
|
+
auto fname = lora_inp.path;
|
|
142
|
+
auto scale = lora_inp.scale;
|
|
150
143
|
std::unique_ptr<file_input> adapter(new file_input(fname, scale));
|
|
151
144
|
check_metadata_lora(adapter.get());
|
|
152
145
|
adapters.push_back(std::move(adapter));
|
|
@@ -212,8 +205,7 @@ struct lora_merge_ctx {
|
|
|
212
205
|
}
|
|
213
206
|
|
|
214
207
|
// mapping base tensor to out tensor (same shape with base, but different type)
|
|
215
|
-
|
|
216
|
-
std::vector<std::pair<struct ggml_tensor *, struct ggml_tensor *>> base_to_out_tensors;
|
|
208
|
+
std::vector<tensor_transformation> trans;
|
|
217
209
|
for (auto & it : base_model.tensors) {
|
|
218
210
|
bool t_a = true;
|
|
219
211
|
bool t_b = true;
|
|
@@ -226,14 +218,22 @@ struct lora_merge_ctx {
|
|
|
226
218
|
// only copy
|
|
227
219
|
struct ggml_tensor * cpy_tensor = ggml_dup_tensor(ctx_out_ggml, base_tensor);
|
|
228
220
|
ggml_set_name(cpy_tensor, base_tensor->name);
|
|
229
|
-
|
|
221
|
+
trans.push_back({
|
|
222
|
+
cpy_tensor,
|
|
223
|
+
cpy_tensor,
|
|
224
|
+
true,
|
|
225
|
+
});
|
|
230
226
|
gguf_add_tensor(ctx_out, cpy_tensor);
|
|
231
227
|
} else if (t_a && t_b) {
|
|
232
228
|
// need merging
|
|
233
229
|
struct ggml_tensor * out_tensor = ggml_new_tensor(
|
|
234
230
|
ctx_out_ggml, get_out_tensor_type(base_tensor), GGML_MAX_DIMS, base_tensor->ne);
|
|
235
231
|
ggml_set_name(out_tensor, base_tensor->name);
|
|
236
|
-
|
|
232
|
+
trans.push_back({
|
|
233
|
+
base_tensor,
|
|
234
|
+
out_tensor,
|
|
235
|
+
false,
|
|
236
|
+
});
|
|
237
237
|
gguf_add_tensor(ctx_out, out_tensor);
|
|
238
238
|
} else {
|
|
239
239
|
throw std::runtime_error("tensor " + it.first + " missing either lora_a or lora_b");
|
|
@@ -248,12 +248,12 @@ struct lora_merge_ctx {
|
|
|
248
248
|
|
|
249
249
|
// process base model tensors
|
|
250
250
|
size_t n_merged = 0;
|
|
251
|
-
for (auto & it :
|
|
252
|
-
if (it.
|
|
253
|
-
merge_tensor(it.
|
|
251
|
+
for (auto & it : trans) {
|
|
252
|
+
if (!it.is_copy) {
|
|
253
|
+
merge_tensor(it.in, it.out);
|
|
254
254
|
n_merged++;
|
|
255
255
|
} else {
|
|
256
|
-
copy_tensor(it.
|
|
256
|
+
copy_tensor(it.in);
|
|
257
257
|
}
|
|
258
258
|
}
|
|
259
259
|
|
|
@@ -266,7 +266,7 @@ struct lora_merge_ctx {
|
|
|
266
266
|
}
|
|
267
267
|
|
|
268
268
|
printf("%s : merged %ld tensors with lora adapters\n", __func__, n_merged);
|
|
269
|
-
printf("%s : wrote %ld tensors to output file\n", __func__,
|
|
269
|
+
printf("%s : wrote %ld tensors to output file\n", __func__, trans.size());
|
|
270
270
|
}
|
|
271
271
|
|
|
272
272
|
void copy_tensor(struct ggml_tensor * base) {
|
|
@@ -299,6 +299,10 @@ struct lora_merge_ctx {
|
|
|
299
299
|
for (size_t i = 0; i < adapters.size(); ++i) {
|
|
300
300
|
auto t_a = adapters[i]->get_tensor(name_lora_a);
|
|
301
301
|
auto t_b = adapters[i]->get_tensor(name_lora_b);
|
|
302
|
+
// TODO: add support for quantized lora
|
|
303
|
+
if (ggml_is_quantized(t_a->type) || ggml_is_quantized(t_b->type)) {
|
|
304
|
+
throw std::runtime_error("quantized LoRA adapters is not supported, please retry with f16 or f32");
|
|
305
|
+
}
|
|
302
306
|
inp_a[i] = ggml_dup_tensor(ctx, t_a);
|
|
303
307
|
inp_b[i] = ggml_dup_tensor(ctx, t_b);
|
|
304
308
|
}
|
|
@@ -310,9 +314,9 @@ struct lora_merge_ctx {
|
|
|
310
314
|
// optionally dequantize it
|
|
311
315
|
printf("%s : + dequantize base tensor from %s to F32\n", __func__, ggml_type_name(base->type));
|
|
312
316
|
auto nels = ggml_nelements(inp_base);
|
|
313
|
-
|
|
317
|
+
const auto * qtype = ggml_get_type_traits(base->type);
|
|
314
318
|
std::vector<uint8_t> dequant_buf(nels * sizeof(float));
|
|
315
|
-
qtype
|
|
319
|
+
qtype->to_float(read_buf.data(), (float *)dequant_buf.data(), nels);
|
|
316
320
|
ggml_backend_tensor_set(inp_base, dequant_buf.data(), 0, dequant_buf.size());
|
|
317
321
|
} else {
|
|
318
322
|
ggml_backend_tensor_set(inp_base, read_buf.data(), 0, ggml_nbytes(inp_base));
|
|
@@ -366,7 +370,7 @@ struct lora_merge_ctx {
|
|
|
366
370
|
|
|
367
371
|
// write data to output file
|
|
368
372
|
{
|
|
369
|
-
auto result = gf
|
|
373
|
+
auto * result = ggml_graph_node(gf, -1);
|
|
370
374
|
size_t len = ggml_nbytes(result);
|
|
371
375
|
if (read_buf.size() < len) {
|
|
372
376
|
read_buf.resize(len);
|
|
@@ -388,9 +392,7 @@ struct lora_merge_ctx {
|
|
|
388
392
|
}
|
|
389
393
|
};
|
|
390
394
|
|
|
391
|
-
static void print_usage(int
|
|
392
|
-
gpt_params_print_usage(argc, argv, params);
|
|
393
|
-
|
|
395
|
+
static void print_usage(int, char ** argv) {
|
|
394
396
|
printf("\nexample usage:\n");
|
|
395
397
|
printf("\n %s -m base-model.gguf --lora lora-file.gguf -o merged-model-f16.gguf\n", argv[0]);
|
|
396
398
|
printf("\nNOTE: output model is F16\n");
|
|
@@ -398,16 +400,15 @@ static void print_usage(int argc, char ** argv, const gpt_params & params) {
|
|
|
398
400
|
}
|
|
399
401
|
|
|
400
402
|
int main(int argc, char ** argv) {
|
|
401
|
-
|
|
403
|
+
common_params params;
|
|
402
404
|
|
|
403
|
-
if (!
|
|
404
|
-
print_usage(argc, argv, params);
|
|
405
|
+
if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_EXPORT_LORA, print_usage)) {
|
|
405
406
|
return 1;
|
|
406
407
|
}
|
|
407
408
|
|
|
408
|
-
g_verbose = (params.verbosity
|
|
409
|
+
g_verbose = (params.verbosity > 1);
|
|
409
410
|
try {
|
|
410
|
-
lora_merge_ctx ctx(params.model, params.
|
|
411
|
+
lora_merge_ctx ctx(params.model, params.lora_adapters, params.lora_outfile, params.cpuparams.n_threads);
|
|
411
412
|
ctx.run_merge();
|
|
412
413
|
} catch (const std::exception & err) {
|
|
413
414
|
fprintf(stderr, "%s\n", err.what());
|
|
@@ -1,9 +1,5 @@
|
|
|
1
|
-
#define LLAMA_API_INTERNAL
|
|
2
|
-
|
|
3
|
-
#include "grammar-parser.h"
|
|
4
|
-
#include "ggml.h"
|
|
5
|
-
#include "llama.h"
|
|
6
1
|
#include "unicode.h"
|
|
2
|
+
#include "llama-grammar.h"
|
|
7
3
|
|
|
8
4
|
#include <cstdio>
|
|
9
5
|
#include <cstdlib>
|
|
@@ -12,29 +8,28 @@
|
|
|
12
8
|
#include <string>
|
|
13
9
|
#include <vector>
|
|
14
10
|
|
|
15
|
-
static bool
|
|
16
|
-
auto
|
|
17
|
-
const auto & code_points = decoded.first;
|
|
11
|
+
static bool llama_grammar_validate(struct llama_grammar * grammar, const std::string & input_str, size_t & error_pos, std::string & error_msg) {
|
|
12
|
+
const auto cpts = unicode_cpts_from_utf8(input_str);
|
|
18
13
|
|
|
19
14
|
const llama_grammar_rules & rules = llama_grammar_get_rules (grammar);
|
|
20
|
-
llama_grammar_stacks &
|
|
15
|
+
llama_grammar_stacks & stacks_cur = llama_grammar_get_stacks(grammar);
|
|
21
16
|
|
|
22
17
|
size_t pos = 0;
|
|
23
|
-
for (auto
|
|
24
|
-
const llama_grammar_stacks
|
|
18
|
+
for (const auto & cpt : cpts) {
|
|
19
|
+
const llama_grammar_stacks stacks_prev = llama_grammar_get_stacks(grammar); // copy
|
|
25
20
|
|
|
26
|
-
llama_grammar_accept(rules,
|
|
21
|
+
llama_grammar_accept(rules, stacks_prev, cpt, stacks_cur);
|
|
27
22
|
|
|
28
|
-
if (
|
|
23
|
+
if (stacks_cur.empty()) {
|
|
29
24
|
error_pos = pos;
|
|
30
|
-
error_msg = "Unexpected character '" + unicode_cpt_to_utf8(
|
|
31
|
-
|
|
25
|
+
error_msg = "Unexpected character '" + unicode_cpt_to_utf8(cpt) + "'";
|
|
26
|
+
stacks_cur = stacks_prev;
|
|
32
27
|
return false;
|
|
33
28
|
}
|
|
34
29
|
++pos;
|
|
35
30
|
}
|
|
36
31
|
|
|
37
|
-
for (const auto & stack :
|
|
32
|
+
for (const auto & stack : stacks_cur) {
|
|
38
33
|
if (stack.empty()) {
|
|
39
34
|
return true;
|
|
40
35
|
}
|
|
@@ -85,27 +80,7 @@ int main(int argc, char** argv) {
|
|
|
85
80
|
grammar_str = buffer.str();
|
|
86
81
|
}
|
|
87
82
|
|
|
88
|
-
|
|
89
|
-
auto parsed_grammar = grammar_parser::parse(grammar_str.c_str());
|
|
90
|
-
|
|
91
|
-
// will be empty (default) if there are parse errors
|
|
92
|
-
if (parsed_grammar.rules.empty()) {
|
|
93
|
-
fprintf(stdout, "%s: failed to parse grammar\n", __func__);
|
|
94
|
-
return 1;
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
// Ensure that there is a "root" node.
|
|
98
|
-
if (parsed_grammar.symbol_ids.find("root") == parsed_grammar.symbol_ids.end()) {
|
|
99
|
-
fprintf(stdout, "%s: grammar does not contain a 'root' symbol\n", __func__);
|
|
100
|
-
return 1;
|
|
101
|
-
}
|
|
102
|
-
|
|
103
|
-
std::vector<const llama_grammar_element *> grammar_rules(parsed_grammar.c_rules());
|
|
104
|
-
|
|
105
|
-
// Create the LLAMA grammar
|
|
106
|
-
auto grammar = llama_grammar_init(
|
|
107
|
-
grammar_rules.data(),
|
|
108
|
-
grammar_rules.size(), parsed_grammar.symbol_ids.at("root"));
|
|
83
|
+
llama_grammar * grammar = llama_grammar_init_impl(nullptr, grammar_str.c_str(), "root");
|
|
109
84
|
if (grammar == nullptr) {
|
|
110
85
|
throw std::runtime_error("Failed to initialize llama_grammar");
|
|
111
86
|
}
|
|
@@ -122,7 +97,7 @@ int main(int argc, char** argv) {
|
|
|
122
97
|
// Validate the input string against the grammar
|
|
123
98
|
size_t error_pos;
|
|
124
99
|
std::string error_msg;
|
|
125
|
-
bool is_valid =
|
|
100
|
+
bool is_valid = llama_grammar_validate(grammar, input_str, error_pos, error_msg);
|
|
126
101
|
|
|
127
102
|
if (is_valid) {
|
|
128
103
|
fprintf(stdout, "Input string is valid according to the grammar.\n");
|
|
@@ -131,7 +106,7 @@ int main(int argc, char** argv) {
|
|
|
131
106
|
}
|
|
132
107
|
|
|
133
108
|
// Clean up
|
|
134
|
-
|
|
109
|
+
llama_grammar_free_impl(grammar);
|
|
135
110
|
|
|
136
111
|
return 0;
|
|
137
112
|
}
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
set(TARGET llama-
|
|
2
|
-
add_executable(${TARGET}
|
|
1
|
+
set(TARGET llama-gen-docs)
|
|
2
|
+
add_executable(${TARGET} gen-docs.cpp)
|
|
3
3
|
install(TARGETS ${TARGET} RUNTIME)
|
|
4
4
|
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
|
5
5
|
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
#include "arg.h"
|
|
2
|
+
#include "common.h"
|
|
3
|
+
|
|
4
|
+
#include <fstream>
|
|
5
|
+
#include <string>
|
|
6
|
+
|
|
7
|
+
// Export usage message (-h) to markdown format
|
|
8
|
+
|
|
9
|
+
static void write_table_header(std::ofstream & file) {
|
|
10
|
+
file << "| Argument | Explanation |\n";
|
|
11
|
+
file << "| -------- | ----------- |\n";
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
static void write_table_entry(std::ofstream & file, const common_arg & opt) {
|
|
15
|
+
file << "| `";
|
|
16
|
+
// args
|
|
17
|
+
for (const auto & arg : opt.args) {
|
|
18
|
+
if (arg == opt.args.front()) {
|
|
19
|
+
file << arg;
|
|
20
|
+
if (opt.args.size() > 1) file << ", ";
|
|
21
|
+
} else {
|
|
22
|
+
file << arg << (arg != opt.args.back() ? ", " : "");
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
// value hint
|
|
26
|
+
if (opt.value_hint) {
|
|
27
|
+
std::string md_value_hint(opt.value_hint);
|
|
28
|
+
string_replace_all(md_value_hint, "|", "\\|");
|
|
29
|
+
file << " " << md_value_hint;
|
|
30
|
+
}
|
|
31
|
+
if (opt.value_hint_2) {
|
|
32
|
+
std::string md_value_hint_2(opt.value_hint_2);
|
|
33
|
+
string_replace_all(md_value_hint_2, "|", "\\|");
|
|
34
|
+
file << " " << md_value_hint_2;
|
|
35
|
+
}
|
|
36
|
+
// help text
|
|
37
|
+
std::string md_help(opt.help);
|
|
38
|
+
string_replace_all(md_help, "\n", "<br/>");
|
|
39
|
+
string_replace_all(md_help, "|", "\\|");
|
|
40
|
+
file << "` | " << md_help << " |\n";
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
static void write_table(std::ofstream & file, std::vector<common_arg *> & opts) {
|
|
44
|
+
write_table_header(file);
|
|
45
|
+
for (const auto & opt : opts) {
|
|
46
|
+
write_table_entry(file, *opt);
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
static void export_md(std::string fname, llama_example ex) {
|
|
51
|
+
std::ofstream file(fname, std::ofstream::out | std::ofstream::trunc);
|
|
52
|
+
|
|
53
|
+
common_params params;
|
|
54
|
+
auto ctx_arg = common_params_parser_init(params, ex);
|
|
55
|
+
|
|
56
|
+
std::vector<common_arg *> common_options;
|
|
57
|
+
std::vector<common_arg *> sparam_options;
|
|
58
|
+
std::vector<common_arg *> specific_options;
|
|
59
|
+
for (auto & opt : ctx_arg.options) {
|
|
60
|
+
// in case multiple LLAMA_EXAMPLE_* are set, we prioritize the LLAMA_EXAMPLE_* matching current example
|
|
61
|
+
if (opt.is_sparam) {
|
|
62
|
+
sparam_options.push_back(&opt);
|
|
63
|
+
} else if (opt.in_example(ctx_arg.ex)) {
|
|
64
|
+
specific_options.push_back(&opt);
|
|
65
|
+
} else {
|
|
66
|
+
common_options.push_back(&opt);
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
file << "**Common params**\n\n";
|
|
71
|
+
write_table(file, common_options);
|
|
72
|
+
file << "\n\n**Sampling params**\n\n";
|
|
73
|
+
write_table(file, sparam_options);
|
|
74
|
+
file << "\n\n**Example-specific params**\n\n";
|
|
75
|
+
write_table(file, specific_options);
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
int main(int, char **) {
|
|
79
|
+
export_md("autogen-main.md", LLAMA_EXAMPLE_MAIN);
|
|
80
|
+
export_md("autogen-server.md", LLAMA_EXAMPLE_SERVER);
|
|
81
|
+
|
|
82
|
+
return 0;
|
|
83
|
+
}
|
|
@@ -22,12 +22,20 @@
|
|
|
22
22
|
#endif
|
|
23
23
|
|
|
24
24
|
enum split_operation : uint8_t {
|
|
25
|
-
|
|
26
|
-
|
|
25
|
+
OP_NONE,
|
|
26
|
+
OP_SPLIT,
|
|
27
|
+
OP_MERGE,
|
|
28
|
+
};
|
|
29
|
+
|
|
30
|
+
enum split_mode : uint8_t {
|
|
31
|
+
MODE_NONE,
|
|
32
|
+
MODE_TENSOR,
|
|
33
|
+
MODE_SIZE,
|
|
27
34
|
};
|
|
28
35
|
|
|
29
36
|
struct split_params {
|
|
30
|
-
split_operation operation =
|
|
37
|
+
split_operation operation = OP_NONE;
|
|
38
|
+
split_mode mode = MODE_NONE;
|
|
31
39
|
size_t n_bytes_split = 0;
|
|
32
40
|
int n_split_tensors = 128;
|
|
33
41
|
std::string input;
|
|
@@ -87,59 +95,52 @@ static void split_params_parse_ex(int argc, const char ** argv, split_params & p
|
|
|
87
95
|
}
|
|
88
96
|
|
|
89
97
|
bool arg_found = false;
|
|
90
|
-
bool is_op_set = false;
|
|
91
|
-
bool is_mode_set = false;
|
|
92
98
|
if (arg == "-h" || arg == "--help") {
|
|
93
99
|
split_print_usage(argv[0]);
|
|
94
100
|
exit(0);
|
|
95
|
-
}
|
|
96
|
-
if (arg == "--version") {
|
|
101
|
+
} else if (arg == "--version") {
|
|
97
102
|
fprintf(stderr, "version: %d (%s)\n", LLAMA_BUILD_NUMBER, LLAMA_COMMIT);
|
|
98
103
|
fprintf(stderr, "built with %s for %s\n", LLAMA_COMPILER, LLAMA_BUILD_TARGET);
|
|
99
104
|
exit(0);
|
|
100
|
-
}
|
|
101
|
-
if (arg == "--dry-run") {
|
|
105
|
+
} else if (arg == "--dry-run") {
|
|
102
106
|
arg_found = true;
|
|
103
107
|
params.dry_run = true;
|
|
104
|
-
}
|
|
105
|
-
if (arg == "--no-tensor-first-split") {
|
|
108
|
+
} else if (arg == "--no-tensor-first-split") {
|
|
106
109
|
arg_found = true;
|
|
107
110
|
params.no_tensor_first_split = true;
|
|
108
|
-
}
|
|
109
|
-
|
|
110
|
-
if (is_op_set) {
|
|
111
|
-
throw std::invalid_argument("error: either --split or --merge can be specified, but not both");
|
|
112
|
-
}
|
|
113
|
-
if (arg == "--merge") {
|
|
111
|
+
} else if (arg == "--merge") {
|
|
114
112
|
arg_found = true;
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
113
|
+
if (params.operation != OP_NONE && params.operation != OP_MERGE) {
|
|
114
|
+
throw std::invalid_argument("error: either --split or --merge can be specified, but not both");
|
|
115
|
+
}
|
|
116
|
+
params.operation = OP_MERGE;
|
|
117
|
+
} else if (arg == "--split") {
|
|
119
118
|
arg_found = true;
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
if (
|
|
125
|
-
throw std::invalid_argument("error: either --split-max-tensors or --split-max-size can be specified, but not both");
|
|
126
|
-
}
|
|
127
|
-
if (arg == "--split-max-tensors") {
|
|
119
|
+
if (params.operation != OP_NONE && params.operation != OP_SPLIT) {
|
|
120
|
+
throw std::invalid_argument("error: either --split or --merge can be specified, but not both");
|
|
121
|
+
}
|
|
122
|
+
params.operation = OP_SPLIT;
|
|
123
|
+
} else if (arg == "--split-max-tensors") {
|
|
128
124
|
if (++arg_idx >= argc) {
|
|
129
125
|
invalid_param = true;
|
|
130
126
|
break;
|
|
131
127
|
}
|
|
132
128
|
arg_found = true;
|
|
133
|
-
|
|
129
|
+
if (params.mode != MODE_NONE && params.mode != MODE_TENSOR) {
|
|
130
|
+
throw std::invalid_argument("error: either --split-max-tensors or --split-max-size can be specified, but not both");
|
|
131
|
+
}
|
|
132
|
+
params.mode = MODE_TENSOR;
|
|
134
133
|
params.n_split_tensors = atoi(argv[arg_idx]);
|
|
135
|
-
}
|
|
136
|
-
if (arg == "--split-max-size") {
|
|
134
|
+
} else if (arg == "--split-max-size") {
|
|
137
135
|
if (++arg_idx >= argc) {
|
|
138
136
|
invalid_param = true;
|
|
139
137
|
break;
|
|
140
138
|
}
|
|
141
139
|
arg_found = true;
|
|
142
|
-
|
|
140
|
+
if (params.mode != MODE_NONE && params.mode != MODE_SIZE) {
|
|
141
|
+
throw std::invalid_argument("error: either --split-max-tensors or --split-max-size can be specified, but not both");
|
|
142
|
+
}
|
|
143
|
+
params.mode = MODE_SIZE;
|
|
143
144
|
params.n_bytes_split = split_str_to_n_bytes(argv[arg_idx]);
|
|
144
145
|
}
|
|
145
146
|
|
|
@@ -148,11 +149,20 @@ static void split_params_parse_ex(int argc, const char ** argv, split_params & p
|
|
|
148
149
|
}
|
|
149
150
|
}
|
|
150
151
|
|
|
152
|
+
// the operation is split if not specified
|
|
153
|
+
if (params.operation == OP_NONE) {
|
|
154
|
+
params.operation = OP_SPLIT;
|
|
155
|
+
}
|
|
156
|
+
// the split mode is by tensor if not specified
|
|
157
|
+
if (params.mode == MODE_NONE) {
|
|
158
|
+
params.mode = MODE_TENSOR;
|
|
159
|
+
}
|
|
160
|
+
|
|
151
161
|
if (invalid_param) {
|
|
152
162
|
throw std::invalid_argument("error: invalid parameter for argument: " + arg);
|
|
153
163
|
}
|
|
154
164
|
|
|
155
|
-
if (argc - arg_idx
|
|
165
|
+
if (argc - arg_idx != 2) {
|
|
156
166
|
throw std::invalid_argument("error: bad arguments");
|
|
157
167
|
}
|
|
158
168
|
|
|
@@ -265,13 +275,15 @@ struct split_strategy {
|
|
|
265
275
|
}
|
|
266
276
|
|
|
267
277
|
bool should_split(int i_tensor, size_t next_size) {
|
|
268
|
-
if (params.
|
|
278
|
+
if (params.mode == MODE_SIZE) {
|
|
269
279
|
// split by max size per file
|
|
270
280
|
return next_size > params.n_bytes_split;
|
|
271
|
-
} else {
|
|
281
|
+
} else if (params.mode == MODE_TENSOR) {
|
|
272
282
|
// split by number of tensors per file
|
|
273
283
|
return i_tensor > 0 && i_tensor < n_tensors && i_tensor % params.n_split_tensors == 0;
|
|
274
284
|
}
|
|
285
|
+
// should never happen
|
|
286
|
+
GGML_ABORT("invalid mode");
|
|
275
287
|
}
|
|
276
288
|
|
|
277
289
|
void print_info() {
|
|
@@ -389,10 +401,17 @@ static void gguf_merge(const split_params & split_params) {
|
|
|
389
401
|
int n_split = 1;
|
|
390
402
|
int total_tensors = 0;
|
|
391
403
|
|
|
392
|
-
|
|
404
|
+
// avoid overwriting existing output file
|
|
405
|
+
if (std::ifstream(split_params.output.c_str())) {
|
|
406
|
+
fprintf(stderr, "%s: output file %s already exists\n", __func__, split_params.output.c_str());
|
|
407
|
+
exit(EXIT_FAILURE);
|
|
408
|
+
}
|
|
409
|
+
|
|
393
410
|
std::ofstream fout(split_params.output.c_str(), std::ios::binary);
|
|
394
411
|
fout.exceptions(std::ofstream::failbit); // fail fast on write errors
|
|
395
412
|
|
|
413
|
+
auto * ctx_out = gguf_init_empty();
|
|
414
|
+
|
|
396
415
|
std::vector<uint8_t> read_data;
|
|
397
416
|
std::vector<ggml_context *> ctx_metas;
|
|
398
417
|
std::vector<gguf_context *> ctx_ggufs;
|
|
@@ -552,9 +571,9 @@ int main(int argc, const char ** argv) {
|
|
|
552
571
|
split_params_parse(argc, argv, params);
|
|
553
572
|
|
|
554
573
|
switch (params.operation) {
|
|
555
|
-
case
|
|
574
|
+
case OP_SPLIT: gguf_split(params);
|
|
556
575
|
break;
|
|
557
|
-
case
|
|
576
|
+
case OP_MERGE: gguf_merge(params);
|
|
558
577
|
break;
|
|
559
578
|
default: split_print_usage(argv[0]);
|
|
560
579
|
exit(EXIT_FAILURE);
|