@fugood/llama.node 0.0.1-alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +85 -0
- package/README.md +56 -0
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/lib/binding.js +13 -0
- package/lib/binding.ts +57 -0
- package/lib/index.js +24 -0
- package/lib/index.ts +13 -0
- package/package.json +65 -0
- package/src/addons.cpp +506 -0
- package/src/llama.cpp/CMakeLists.txt +1320 -0
- package/src/llama.cpp/build.zig +172 -0
- package/src/llama.cpp/cmake/FindSIMD.cmake +100 -0
- package/src/llama.cpp/common/CMakeLists.txt +87 -0
- package/src/llama.cpp/common/base64.hpp +392 -0
- package/src/llama.cpp/common/common.cpp +2949 -0
- package/src/llama.cpp/common/common.h +324 -0
- package/src/llama.cpp/common/console.cpp +501 -0
- package/src/llama.cpp/common/console.h +19 -0
- package/src/llama.cpp/common/grammar-parser.cpp +440 -0
- package/src/llama.cpp/common/grammar-parser.h +29 -0
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +764 -0
- package/src/llama.cpp/common/json-schema-to-grammar.h +4 -0
- package/src/llama.cpp/common/json.hpp +24766 -0
- package/src/llama.cpp/common/log.h +724 -0
- package/src/llama.cpp/common/ngram-cache.cpp +282 -0
- package/src/llama.cpp/common/ngram-cache.h +94 -0
- package/src/llama.cpp/common/sampling.cpp +353 -0
- package/src/llama.cpp/common/sampling.h +147 -0
- package/src/llama.cpp/common/stb_image.h +8396 -0
- package/src/llama.cpp/common/train.cpp +1513 -0
- package/src/llama.cpp/common/train.h +233 -0
- package/src/llama.cpp/examples/CMakeLists.txt +52 -0
- package/src/llama.cpp/examples/baby-llama/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +1640 -0
- package/src/llama.cpp/examples/batched/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/batched/batched.cpp +262 -0
- package/src/llama.cpp/examples/batched-bench/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +261 -0
- package/src/llama.cpp/examples/beam-search/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/beam-search/beam-search.cpp +188 -0
- package/src/llama.cpp/examples/benchmark/CMakeLists.txt +6 -0
- package/src/llama.cpp/examples/benchmark/benchmark-matmult.cpp +275 -0
- package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +936 -0
- package/src/llama.cpp/examples/embedding/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/embedding/embedding.cpp +211 -0
- package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +9 -0
- package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +195 -0
- package/src/llama.cpp/examples/export-lora/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/export-lora/export-lora.cpp +462 -0
- package/src/llama.cpp/examples/finetune/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/finetune/finetune.cpp +1861 -0
- package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +132 -0
- package/src/llama.cpp/examples/gguf/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/gguf/gguf.cpp +256 -0
- package/src/llama.cpp/examples/gguf-split/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +553 -0
- package/src/llama.cpp/examples/gritlm/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +215 -0
- package/src/llama.cpp/examples/imatrix/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +655 -0
- package/src/llama.cpp/examples/infill/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/infill/infill.cpp +767 -0
- package/src/llama.cpp/examples/jeopardy/questions.txt +100 -0
- package/src/llama.cpp/examples/llama-bench/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +1286 -0
- package/src/llama.cpp/examples/llama.android/app/src/main/cpp/CMakeLists.txt +50 -0
- package/src/llama.cpp/examples/llama.android/app/src/main/cpp/llama-android.cpp +443 -0
- package/src/llama.cpp/examples/llava/CMakeLists.txt +37 -0
- package/src/llama.cpp/examples/llava/clip.cpp +2027 -0
- package/src/llama.cpp/examples/llava/clip.h +85 -0
- package/src/llama.cpp/examples/llava/llava-cli.cpp +309 -0
- package/src/llama.cpp/examples/llava/llava.cpp +426 -0
- package/src/llama.cpp/examples/llava/llava.h +50 -0
- package/src/llama.cpp/examples/llava/requirements.txt +3 -0
- package/src/llama.cpp/examples/lookahead/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +485 -0
- package/src/llama.cpp/examples/lookup/CMakeLists.txt +23 -0
- package/src/llama.cpp/examples/lookup/lookup-create.cpp +41 -0
- package/src/llama.cpp/examples/lookup/lookup-merge.cpp +47 -0
- package/src/llama.cpp/examples/lookup/lookup-stats.cpp +160 -0
- package/src/llama.cpp/examples/lookup/lookup.cpp +258 -0
- package/src/llama.cpp/examples/main/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/main/main.cpp +957 -0
- package/src/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +33 -0
- package/src/llama.cpp/examples/parallel/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/parallel/parallel.cpp +427 -0
- package/src/llama.cpp/examples/passkey/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/passkey/passkey.cpp +302 -0
- package/src/llama.cpp/examples/perplexity/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/perplexity/perplexity.cpp +1943 -0
- package/src/llama.cpp/examples/quantize/CMakeLists.txt +6 -0
- package/src/llama.cpp/examples/quantize/quantize.cpp +423 -0
- package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +6 -0
- package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +424 -0
- package/src/llama.cpp/examples/retrieval/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +350 -0
- package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +246 -0
- package/src/llama.cpp/examples/server/CMakeLists.txt +40 -0
- package/src/llama.cpp/examples/server/bench/requirements.txt +2 -0
- package/src/llama.cpp/examples/server/httplib.h +9465 -0
- package/src/llama.cpp/examples/server/server.cpp +3826 -0
- package/src/llama.cpp/examples/server/tests/requirements.txt +6 -0
- package/src/llama.cpp/examples/server/utils.hpp +653 -0
- package/src/llama.cpp/examples/simple/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/simple/simple.cpp +183 -0
- package/src/llama.cpp/examples/speculative/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/speculative/speculative.cpp +614 -0
- package/src/llama.cpp/examples/sycl/CMakeLists.txt +9 -0
- package/src/llama.cpp/examples/sycl/ls-sycl-device.cpp +13 -0
- package/src/llama.cpp/examples/tokenize/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/tokenize/tokenize.cpp +42 -0
- package/src/llama.cpp/examples/train-text-from-scratch/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/train-text-from-scratch/train-text-from-scratch.cpp +1252 -0
- package/src/llama.cpp/ggml-alloc.c +985 -0
- package/src/llama.cpp/ggml-alloc.h +76 -0
- package/src/llama.cpp/ggml-backend-impl.h +141 -0
- package/src/llama.cpp/ggml-backend.c +2099 -0
- package/src/llama.cpp/ggml-backend.h +233 -0
- package/src/llama.cpp/ggml-common.h +1853 -0
- package/src/llama.cpp/ggml-cuda.h +43 -0
- package/src/llama.cpp/ggml-impl.h +265 -0
- package/src/llama.cpp/ggml-kompute.cpp +2006 -0
- package/src/llama.cpp/ggml-kompute.h +46 -0
- package/src/llama.cpp/ggml-metal.h +66 -0
- package/src/llama.cpp/ggml-mpi.c +216 -0
- package/src/llama.cpp/ggml-mpi.h +39 -0
- package/src/llama.cpp/ggml-opencl.cpp +2301 -0
- package/src/llama.cpp/ggml-opencl.h +36 -0
- package/src/llama.cpp/ggml-quants.c +12678 -0
- package/src/llama.cpp/ggml-quants.h +133 -0
- package/src/llama.cpp/ggml-sycl.cpp +17882 -0
- package/src/llama.cpp/ggml-sycl.h +49 -0
- package/src/llama.cpp/ggml-vulkan-shaders.hpp +69849 -0
- package/src/llama.cpp/ggml-vulkan.cpp +6442 -0
- package/src/llama.cpp/ggml-vulkan.h +29 -0
- package/src/llama.cpp/ggml.c +21819 -0
- package/src/llama.cpp/ggml.h +2403 -0
- package/src/llama.cpp/llama.cpp +17468 -0
- package/src/llama.cpp/llama.h +1117 -0
- package/src/llama.cpp/pocs/CMakeLists.txt +12 -0
- package/src/llama.cpp/pocs/vdot/CMakeLists.txt +9 -0
- package/src/llama.cpp/pocs/vdot/q8dot.cpp +172 -0
- package/src/llama.cpp/pocs/vdot/vdot.cpp +310 -0
- package/src/llama.cpp/prompts/LLM-questions.txt +49 -0
- package/src/llama.cpp/prompts/alpaca.txt +1 -0
- package/src/llama.cpp/prompts/assistant.txt +31 -0
- package/src/llama.cpp/prompts/chat-with-baichuan.txt +4 -0
- package/src/llama.cpp/prompts/chat-with-bob.txt +7 -0
- package/src/llama.cpp/prompts/chat-with-qwen.txt +1 -0
- package/src/llama.cpp/prompts/chat-with-vicuna-v0.txt +7 -0
- package/src/llama.cpp/prompts/chat-with-vicuna-v1.txt +7 -0
- package/src/llama.cpp/prompts/chat.txt +28 -0
- package/src/llama.cpp/prompts/dan-modified.txt +1 -0
- package/src/llama.cpp/prompts/dan.txt +1 -0
- package/src/llama.cpp/prompts/mnemonics.txt +93 -0
- package/src/llama.cpp/prompts/parallel-questions.txt +43 -0
- package/src/llama.cpp/prompts/reason-act.txt +18 -0
- package/src/llama.cpp/requirements/requirements-convert-hf-to-gguf.txt +3 -0
- package/src/llama.cpp/requirements/requirements-convert-llama-ggml-to-gguf.txt +1 -0
- package/src/llama.cpp/requirements/requirements-convert-lora-to-ggml.txt +2 -0
- package/src/llama.cpp/requirements/requirements-convert-persimmon-to-gguf.txt +2 -0
- package/src/llama.cpp/requirements/requirements-convert.txt +5 -0
- package/src/llama.cpp/requirements.txt +12 -0
- package/src/llama.cpp/scripts/gen-build-info-cpp.cmake +24 -0
- package/src/llama.cpp/scripts/xxd.cmake +16 -0
- package/src/llama.cpp/sgemm.cpp +999 -0
- package/src/llama.cpp/sgemm.h +12 -0
- package/src/llama.cpp/tests/CMakeLists.txt +78 -0
- package/src/llama.cpp/tests/get-model.cpp +21 -0
- package/src/llama.cpp/tests/get-model.h +2 -0
- package/src/llama.cpp/tests/test-autorelease.cpp +24 -0
- package/src/llama.cpp/tests/test-backend-ops.cpp +2266 -0
- package/src/llama.cpp/tests/test-c.c +7 -0
- package/src/llama.cpp/tests/test-chat-template.cpp +107 -0
- package/src/llama.cpp/tests/test-double-float.cpp +57 -0
- package/src/llama.cpp/tests/test-grad0.cpp +1606 -0
- package/src/llama.cpp/tests/test-grammar-integration.cpp +243 -0
- package/src/llama.cpp/tests/test-grammar-parser.cpp +250 -0
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +899 -0
- package/src/llama.cpp/tests/test-llama-grammar.cpp +402 -0
- package/src/llama.cpp/tests/test-model-load-cancel.cpp +27 -0
- package/src/llama.cpp/tests/test-opt.cpp +181 -0
- package/src/llama.cpp/tests/test-quantize-fns.cpp +185 -0
- package/src/llama.cpp/tests/test-quantize-perf.cpp +363 -0
- package/src/llama.cpp/tests/test-rope.cpp +221 -0
- package/src/llama.cpp/tests/test-sampling.cpp +301 -0
- package/src/llama.cpp/tests/test-tokenizer-0-falcon.cpp +187 -0
- package/src/llama.cpp/tests/test-tokenizer-0-llama.cpp +190 -0
- package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +123 -0
- package/src/llama.cpp/tests/test-tokenizer-1-llama.cpp +111 -0
- package/src/llama.cpp/unicode-data.cpp +1651 -0
- package/src/llama.cpp/unicode-data.h +16 -0
- package/src/llama.cpp/unicode.cpp +277 -0
- package/src/llama.cpp/unicode.h +28 -0
|
@@ -0,0 +1,553 @@
|
|
|
1
|
+
#include "llama.h"
|
|
2
|
+
#include "common.h"
|
|
3
|
+
|
|
4
|
+
#include <algorithm>
|
|
5
|
+
#include <cmath>
|
|
6
|
+
#include <cstdlib>
|
|
7
|
+
#include <fstream>
|
|
8
|
+
#include <string>
|
|
9
|
+
#include <vector>
|
|
10
|
+
|
|
11
|
+
#include <stdio.h>
|
|
12
|
+
#include <string.h>
|
|
13
|
+
#include <climits>
|
|
14
|
+
#include <stdexcept>
|
|
15
|
+
|
|
16
|
+
#if defined(_WIN32)
|
|
17
|
+
#include <windows.h>
|
|
18
|
+
#ifndef PATH_MAX
|
|
19
|
+
#define PATH_MAX MAX_PATH
|
|
20
|
+
#endif
|
|
21
|
+
#include <io.h>
|
|
22
|
+
#endif
|
|
23
|
+
|
|
24
|
+
enum split_operation : uint8_t {
|
|
25
|
+
SPLIT_OP_SPLIT,
|
|
26
|
+
SPLIT_OP_MERGE,
|
|
27
|
+
};
|
|
28
|
+
|
|
29
|
+
struct split_params {
|
|
30
|
+
split_operation operation = SPLIT_OP_SPLIT;
|
|
31
|
+
size_t n_bytes_split = 0;
|
|
32
|
+
int n_split_tensors = 128;
|
|
33
|
+
std::string input;
|
|
34
|
+
std::string output;
|
|
35
|
+
bool dry_run = false;
|
|
36
|
+
};
|
|
37
|
+
|
|
38
|
+
static void split_print_usage(const char * executable) {
|
|
39
|
+
const split_params default_params;
|
|
40
|
+
printf("\n");
|
|
41
|
+
printf("usage: %s [options] GGUF_IN GGUF_OUT\n", executable);
|
|
42
|
+
printf("\n");
|
|
43
|
+
printf("Apply a GGUF operation on IN to OUT.");
|
|
44
|
+
printf("\n");
|
|
45
|
+
printf("options:\n");
|
|
46
|
+
printf(" -h, --help show this help message and exit\n");
|
|
47
|
+
printf(" --version show version and build info\n");
|
|
48
|
+
printf(" --split split GGUF to multiple GGUF (enabled by default)\n");
|
|
49
|
+
printf(" --merge merge multiple GGUF to a single GGUF\n");
|
|
50
|
+
printf(" --split-max-tensors max tensors in each split (default: %d)\n", default_params.n_split_tensors);
|
|
51
|
+
printf(" --split-max-size N(M|G) max size per split\n");
|
|
52
|
+
printf(" --dry-run only print out a split plan and exit, without writing any new files\n");
|
|
53
|
+
printf("\n");
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
// return convert string, for example "128M" or "4G" to number of bytes
|
|
57
|
+
static size_t split_str_to_n_bytes(std::string str) {
|
|
58
|
+
size_t n_bytes = 0;
|
|
59
|
+
int n;
|
|
60
|
+
if (str.back() == 'M') {
|
|
61
|
+
sscanf(str.c_str(), "%d", &n);
|
|
62
|
+
n_bytes = (size_t)n * 1024 * 1024; // megabytes
|
|
63
|
+
} else if (str.back() == 'G') {
|
|
64
|
+
sscanf(str.c_str(), "%d", &n);
|
|
65
|
+
n_bytes = (size_t)n * 1024 * 1024 * 1024; // gigabytes
|
|
66
|
+
} else {
|
|
67
|
+
throw std::invalid_argument("error: supported units are M (megabytes) or G (gigabytes), but got: " + std::string(1, str.back()));
|
|
68
|
+
}
|
|
69
|
+
if (n <= 0) {
|
|
70
|
+
throw std::invalid_argument("error: size must be a positive value");
|
|
71
|
+
}
|
|
72
|
+
return n_bytes;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
static void split_params_parse_ex(int argc, const char ** argv, split_params & params) {
|
|
76
|
+
std::string arg;
|
|
77
|
+
const std::string arg_prefix = "--";
|
|
78
|
+
bool invalid_param = false;
|
|
79
|
+
|
|
80
|
+
int arg_idx = 1;
|
|
81
|
+
for (; arg_idx < argc && strncmp(argv[arg_idx], "--", 2) == 0; arg_idx++) {
|
|
82
|
+
arg = argv[arg_idx];
|
|
83
|
+
if (arg.compare(0, arg_prefix.size(), arg_prefix) == 0) {
|
|
84
|
+
std::replace(arg.begin(), arg.end(), '_', '-');
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
bool arg_found = false;
|
|
88
|
+
bool is_op_set = false;
|
|
89
|
+
bool is_mode_set = false;
|
|
90
|
+
if (arg == "-h" || arg == "--help") {
|
|
91
|
+
split_print_usage(argv[0]);
|
|
92
|
+
exit(0);
|
|
93
|
+
}
|
|
94
|
+
if (arg == "--version") {
|
|
95
|
+
fprintf(stderr, "version: %d (%s)\n", LLAMA_BUILD_NUMBER, LLAMA_COMMIT);
|
|
96
|
+
fprintf(stderr, "built with %s for %s\n", LLAMA_COMPILER, LLAMA_BUILD_TARGET);
|
|
97
|
+
exit(0);
|
|
98
|
+
}
|
|
99
|
+
if (arg == "--dry-run") {
|
|
100
|
+
arg_found = true;
|
|
101
|
+
params.dry_run = true;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
if (is_op_set) {
|
|
105
|
+
throw std::invalid_argument("error: either --split or --merge can be specified, but not both");
|
|
106
|
+
}
|
|
107
|
+
if (arg == "--merge") {
|
|
108
|
+
arg_found = true;
|
|
109
|
+
is_op_set = true;
|
|
110
|
+
params.operation = SPLIT_OP_MERGE;
|
|
111
|
+
}
|
|
112
|
+
if (arg == "--split") {
|
|
113
|
+
arg_found = true;
|
|
114
|
+
is_op_set = true;
|
|
115
|
+
params.operation = SPLIT_OP_SPLIT;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
if (is_mode_set) {
|
|
119
|
+
throw std::invalid_argument("error: either --split-max-tensors or --split-max-size can be specified, but not both");
|
|
120
|
+
}
|
|
121
|
+
if (arg == "--split-max-tensors") {
|
|
122
|
+
if (++arg_idx >= argc) {
|
|
123
|
+
invalid_param = true;
|
|
124
|
+
break;
|
|
125
|
+
}
|
|
126
|
+
arg_found = true;
|
|
127
|
+
is_mode_set = true;
|
|
128
|
+
params.n_split_tensors = atoi(argv[arg_idx]);
|
|
129
|
+
}
|
|
130
|
+
if (arg == "--split-max-size") {
|
|
131
|
+
if (++arg_idx >= argc) {
|
|
132
|
+
invalid_param = true;
|
|
133
|
+
break;
|
|
134
|
+
}
|
|
135
|
+
arg_found = true;
|
|
136
|
+
is_mode_set = true;
|
|
137
|
+
params.n_bytes_split = split_str_to_n_bytes(argv[arg_idx]);
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
if (!arg_found) {
|
|
141
|
+
throw std::invalid_argument("error: unknown argument: " + arg);
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
if (invalid_param) {
|
|
146
|
+
throw std::invalid_argument("error: invalid parameter for argument: " + arg);
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
if (argc - arg_idx < 2) {
|
|
150
|
+
throw std::invalid_argument("error: bad arguments");
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
params.input = argv[arg_idx++];
|
|
154
|
+
params.output = argv[arg_idx++];
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
static bool split_params_parse(int argc, const char ** argv, split_params & params) {
|
|
158
|
+
bool result = true;
|
|
159
|
+
try {
|
|
160
|
+
split_params_parse_ex(argc, argv, params);
|
|
161
|
+
}
|
|
162
|
+
catch (const std::invalid_argument & ex) {
|
|
163
|
+
fprintf(stderr, "%s\n", ex.what());
|
|
164
|
+
split_print_usage(argv[0]);
|
|
165
|
+
exit(EXIT_FAILURE);
|
|
166
|
+
}
|
|
167
|
+
return result;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
static void zeros(std::ofstream & file, size_t n) {
|
|
171
|
+
char zero = 0;
|
|
172
|
+
for (size_t i = 0; i < n; ++i) {
|
|
173
|
+
file.write(&zero, 1);
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
struct split_strategy {
|
|
178
|
+
const split_params params;
|
|
179
|
+
std::ifstream & f_input;
|
|
180
|
+
struct gguf_context * ctx_gguf;
|
|
181
|
+
struct ggml_context * ctx_meta = NULL;
|
|
182
|
+
const int n_tensors;
|
|
183
|
+
|
|
184
|
+
// one ctx_out per one output file
|
|
185
|
+
std::vector<struct gguf_context *> ctx_outs;
|
|
186
|
+
|
|
187
|
+
// temporary buffer for reading in tensor data
|
|
188
|
+
std::vector<uint8_t> read_buf;
|
|
189
|
+
|
|
190
|
+
split_strategy(const split_params & params,
|
|
191
|
+
std::ifstream & f_input,
|
|
192
|
+
struct gguf_context * ctx_gguf,
|
|
193
|
+
struct ggml_context * ctx_meta) :
|
|
194
|
+
params(params),
|
|
195
|
+
f_input(f_input),
|
|
196
|
+
ctx_gguf(ctx_gguf),
|
|
197
|
+
ctx_meta(ctx_meta),
|
|
198
|
+
n_tensors(gguf_get_n_tensors(ctx_gguf)) {
|
|
199
|
+
|
|
200
|
+
// because we need to know list of tensors for each file in advance, we will build all the ctx_out for all output splits
|
|
201
|
+
int i_split = -1;
|
|
202
|
+
struct gguf_context * ctx_out = NULL;
|
|
203
|
+
auto new_ctx_out = [&]() {
|
|
204
|
+
i_split++;
|
|
205
|
+
if (ctx_out != NULL) {
|
|
206
|
+
if (gguf_get_n_tensors(ctx_out) == 0) {
|
|
207
|
+
fprintf(stderr, "error: one of splits have 0 tensors. Maybe size or tensors limit is too small\n");
|
|
208
|
+
exit(EXIT_FAILURE);
|
|
209
|
+
}
|
|
210
|
+
ctx_outs.push_back(ctx_out);
|
|
211
|
+
}
|
|
212
|
+
ctx_out = gguf_init_empty();
|
|
213
|
+
// Save all metadata in first split only
|
|
214
|
+
if (i_split == 0) {
|
|
215
|
+
gguf_set_kv(ctx_out, ctx_gguf);
|
|
216
|
+
}
|
|
217
|
+
gguf_set_val_u16(ctx_out, LLM_KV_SPLIT_NO, i_split);
|
|
218
|
+
gguf_set_val_u16(ctx_out, LLM_KV_SPLIT_COUNT, 0); // placeholder
|
|
219
|
+
gguf_set_val_i32(ctx_out, LLM_KV_SPLIT_TENSORS_COUNT, n_tensors);
|
|
220
|
+
};
|
|
221
|
+
|
|
222
|
+
// initialize ctx_out for the first split
|
|
223
|
+
new_ctx_out();
|
|
224
|
+
|
|
225
|
+
// process tensors one by one
|
|
226
|
+
size_t curr_tensors_size = 0; // current size by counting only tensors size (without metadata)
|
|
227
|
+
for (int i = 0; i < n_tensors; ++i) {
|
|
228
|
+
struct ggml_tensor * t = ggml_get_tensor(ctx_meta, gguf_get_tensor_name(ctx_gguf, i));
|
|
229
|
+
// calculate the "imaginary" size = the current size + next tensor size
|
|
230
|
+
size_t n_bytes = GGML_PAD(ggml_nbytes(t), GGUF_DEFAULT_ALIGNMENT);
|
|
231
|
+
size_t next_tensors_size = curr_tensors_size + n_bytes;
|
|
232
|
+
if (should_split(i, next_tensors_size)) {
|
|
233
|
+
new_ctx_out();
|
|
234
|
+
curr_tensors_size = n_bytes;
|
|
235
|
+
} else {
|
|
236
|
+
curr_tensors_size = next_tensors_size;
|
|
237
|
+
}
|
|
238
|
+
gguf_add_tensor(ctx_out, t);
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
// push the last ctx_out
|
|
242
|
+
ctx_outs.push_back(ctx_out);
|
|
243
|
+
|
|
244
|
+
// set the correct n_split for all ctx_out
|
|
245
|
+
for (auto & ctx : ctx_outs) {
|
|
246
|
+
gguf_set_val_u16(ctx, LLM_KV_SPLIT_COUNT, ctx_outs.size());
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
~split_strategy() {
|
|
251
|
+
for (auto & ctx_out : ctx_outs) {
|
|
252
|
+
gguf_free(ctx_out);
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
bool should_split(int i_tensor, size_t next_size) {
|
|
257
|
+
if (params.n_bytes_split > 0) {
|
|
258
|
+
// split by max size per file
|
|
259
|
+
return next_size > params.n_bytes_split;
|
|
260
|
+
} else {
|
|
261
|
+
// split by number of tensors per file
|
|
262
|
+
return i_tensor > 0 && i_tensor < n_tensors && i_tensor % params.n_split_tensors == 0;
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
void print_info() {
|
|
267
|
+
printf("n_split: %ld\n", ctx_outs.size());
|
|
268
|
+
int i_split = 0;
|
|
269
|
+
for (auto & ctx_out : ctx_outs) {
|
|
270
|
+
// re-calculate the real gguf size for each split (= metadata size + total size of all tensors)
|
|
271
|
+
size_t total_size = gguf_get_meta_size(ctx_out);
|
|
272
|
+
for (int i = 0; i < gguf_get_n_tensors(ctx_out); ++i) {
|
|
273
|
+
struct ggml_tensor * t = ggml_get_tensor(ctx_meta, gguf_get_tensor_name(ctx_out, i));
|
|
274
|
+
total_size += ggml_nbytes(t);
|
|
275
|
+
}
|
|
276
|
+
total_size = total_size / 1024 / 1024; // convert to megabytes
|
|
277
|
+
printf("split %05d: n_tensors = %d, total_size = %ldM\n", i_split + 1, gguf_get_n_tensors(ctx_out), total_size);
|
|
278
|
+
i_split++;
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
void write() {
|
|
283
|
+
int i_split = 0;
|
|
284
|
+
int n_split = ctx_outs.size();
|
|
285
|
+
for (auto & ctx_out : ctx_outs) {
|
|
286
|
+
// construct file path
|
|
287
|
+
char split_path[PATH_MAX] = {0};
|
|
288
|
+
llama_split_path(split_path, sizeof(split_path), params.output.c_str(), i_split, n_split);
|
|
289
|
+
|
|
290
|
+
// open the output file
|
|
291
|
+
printf("Writing file %s ... ", split_path);
|
|
292
|
+
fflush(stdout);
|
|
293
|
+
std::ofstream fout = std::ofstream(split_path, std::ios::binary);
|
|
294
|
+
fout.exceptions(std::ofstream::failbit); // fail fast on write errors
|
|
295
|
+
|
|
296
|
+
// write metadata
|
|
297
|
+
std::vector<uint8_t> data(gguf_get_meta_size(ctx_out));
|
|
298
|
+
gguf_get_meta_data(ctx_out, data.data());
|
|
299
|
+
fout.write((const char *)data.data(), data.size());
|
|
300
|
+
|
|
301
|
+
// write tensors
|
|
302
|
+
for (int i = 0; i < gguf_get_n_tensors(ctx_out); ++i) {
|
|
303
|
+
// read tensor meta and prepare buffer
|
|
304
|
+
const char * t_name = gguf_get_tensor_name(ctx_out, i);
|
|
305
|
+
struct ggml_tensor * t = ggml_get_tensor(ctx_meta, t_name);
|
|
306
|
+
auto n_bytes = ggml_nbytes(t);
|
|
307
|
+
read_buf.resize(n_bytes);
|
|
308
|
+
|
|
309
|
+
// calculate offset
|
|
310
|
+
auto i_tensor_in = gguf_find_tensor(ctx_gguf, t_name); // idx of tensor in the input file
|
|
311
|
+
auto offset = gguf_get_data_offset(ctx_gguf) + gguf_get_tensor_offset(ctx_gguf, i_tensor_in);
|
|
312
|
+
|
|
313
|
+
// copy tensor from input to output file
|
|
314
|
+
copy_file_to_file(f_input, fout, offset, n_bytes);
|
|
315
|
+
zeros(fout, GGML_PAD(n_bytes, GGUF_DEFAULT_ALIGNMENT) - n_bytes);
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
printf("done\n");
|
|
319
|
+
// close the file
|
|
320
|
+
fout.close();
|
|
321
|
+
i_split++;
|
|
322
|
+
}
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
void copy_file_to_file(std::ifstream & f_in, std::ofstream & f_out, const size_t in_offset, const size_t len) {
|
|
326
|
+
// TODO: detect OS and use copy_file_range() here for better performance
|
|
327
|
+
if (read_buf.size() < len) {
|
|
328
|
+
read_buf.resize(len);
|
|
329
|
+
}
|
|
330
|
+
f_in.seekg(in_offset);
|
|
331
|
+
f_in.read((char *)read_buf.data(), len);
|
|
332
|
+
f_out.write((const char *)read_buf.data(), len);
|
|
333
|
+
}
|
|
334
|
+
};
|
|
335
|
+
|
|
336
|
+
static void gguf_split(const split_params & split_params) {
|
|
337
|
+
struct ggml_context * ctx_meta = NULL;
|
|
338
|
+
|
|
339
|
+
struct gguf_init_params params = {
|
|
340
|
+
/*.no_alloc = */ true,
|
|
341
|
+
/*.ctx = */ &ctx_meta,
|
|
342
|
+
};
|
|
343
|
+
|
|
344
|
+
std::ifstream f_input(split_params.input.c_str(), std::ios::binary);
|
|
345
|
+
if (!f_input.is_open()) {
|
|
346
|
+
fprintf(stderr, "%s: failed to open input GGUF from %s\n", __func__, split_params.input.c_str());
|
|
347
|
+
exit(EXIT_FAILURE);
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
auto * ctx_gguf = gguf_init_from_file(split_params.input.c_str(), params);
|
|
351
|
+
if (!ctx_gguf) {
|
|
352
|
+
fprintf(stderr, "%s: failed to load input GGUF from %s\n", __func__, split_params.input.c_str());
|
|
353
|
+
exit(EXIT_FAILURE);
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
// prepare the strategy
|
|
357
|
+
split_strategy strategy(split_params, f_input, ctx_gguf, ctx_meta);
|
|
358
|
+
int n_split = strategy.ctx_outs.size();
|
|
359
|
+
strategy.print_info();
|
|
360
|
+
|
|
361
|
+
if (!split_params.dry_run) {
|
|
362
|
+
// write all output splits
|
|
363
|
+
strategy.write();
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
// done, clean up
|
|
367
|
+
gguf_free(ctx_gguf);
|
|
368
|
+
f_input.close();
|
|
369
|
+
|
|
370
|
+
fprintf(stderr, "%s: %d gguf split written with a total of %d tensors.\n",
|
|
371
|
+
__func__, n_split, strategy.n_tensors);
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
static void gguf_merge(const split_params & split_params) {
|
|
375
|
+
fprintf(stderr, "%s: %s -> %s\n",
|
|
376
|
+
__func__, split_params.input.c_str(),
|
|
377
|
+
split_params.output.c_str());
|
|
378
|
+
int n_split = 1;
|
|
379
|
+
int total_tensors = 0;
|
|
380
|
+
|
|
381
|
+
auto * ctx_out = gguf_init_empty();
|
|
382
|
+
std::ofstream fout(split_params.output.c_str(), std::ios::binary);
|
|
383
|
+
fout.exceptions(std::ofstream::failbit); // fail fast on write errors
|
|
384
|
+
|
|
385
|
+
std::vector<uint8_t> read_data;
|
|
386
|
+
std::vector<ggml_context *> ctx_metas;
|
|
387
|
+
std::vector<gguf_context *> ctx_ggufs;
|
|
388
|
+
|
|
389
|
+
char split_path[PATH_MAX] = {0};
|
|
390
|
+
strncpy(split_path, split_params.input.c_str(), sizeof(split_path) - 1);
|
|
391
|
+
char split_prefix[PATH_MAX] = {0};
|
|
392
|
+
|
|
393
|
+
// First pass to find KV and tensors metadata
|
|
394
|
+
for (int i_split = 0; i_split < n_split; i_split++) {
|
|
395
|
+
struct ggml_context * ctx_meta = NULL;
|
|
396
|
+
|
|
397
|
+
struct gguf_init_params params = {
|
|
398
|
+
/*.no_alloc = */ true,
|
|
399
|
+
/*.ctx = */ &ctx_meta,
|
|
400
|
+
};
|
|
401
|
+
|
|
402
|
+
if (i_split > 0) {
|
|
403
|
+
llama_split_path(split_path, sizeof(split_path), split_prefix, i_split, n_split);
|
|
404
|
+
}
|
|
405
|
+
fprintf(stderr, "%s: reading metadata %s ...", __func__, split_path);
|
|
406
|
+
|
|
407
|
+
auto * ctx_gguf = gguf_init_from_file(split_path, params);
|
|
408
|
+
if (!ctx_gguf) {
|
|
409
|
+
fprintf(stderr, "\n%s: failed to load input GGUF from %s\n", __func__, split_params.input.c_str());
|
|
410
|
+
exit(EXIT_FAILURE);
|
|
411
|
+
}
|
|
412
|
+
ctx_ggufs.push_back(ctx_gguf);
|
|
413
|
+
ctx_metas.push_back(ctx_meta);
|
|
414
|
+
|
|
415
|
+
if (i_split == 0) {
|
|
416
|
+
auto key_n_split = gguf_find_key(ctx_gguf, LLM_KV_SPLIT_COUNT);
|
|
417
|
+
if (key_n_split < 0) {
|
|
418
|
+
fprintf(stderr,
|
|
419
|
+
"\n%s: input file does not contain %s metadata\n",
|
|
420
|
+
__func__,
|
|
421
|
+
LLM_KV_SPLIT_COUNT);
|
|
422
|
+
gguf_free(ctx_gguf);
|
|
423
|
+
ggml_free(ctx_meta);
|
|
424
|
+
gguf_free(ctx_out);
|
|
425
|
+
fout.close();
|
|
426
|
+
exit(EXIT_FAILURE);
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
n_split = gguf_get_val_u16(ctx_gguf, key_n_split);
|
|
430
|
+
if (n_split < 1) {
|
|
431
|
+
fprintf(stderr,
|
|
432
|
+
"\n%s: input file does not contain a valid split count %d\n",
|
|
433
|
+
__func__,
|
|
434
|
+
n_split);
|
|
435
|
+
gguf_free(ctx_gguf);
|
|
436
|
+
ggml_free(ctx_meta);
|
|
437
|
+
gguf_free(ctx_out);
|
|
438
|
+
fout.close();
|
|
439
|
+
exit(EXIT_FAILURE);
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
// Verify the file naming and extract split_prefix
|
|
443
|
+
if (!llama_split_prefix(split_prefix, sizeof (split_prefix), split_path, i_split, n_split)) {
|
|
444
|
+
fprintf(stderr, "\n%s: unexpected input file name: %s"
|
|
445
|
+
" i_split=%d"
|
|
446
|
+
" n_split=%d\n", __func__,
|
|
447
|
+
split_path, i_split, n_split);
|
|
448
|
+
gguf_free(ctx_gguf);
|
|
449
|
+
ggml_free(ctx_meta);
|
|
450
|
+
gguf_free(ctx_out);
|
|
451
|
+
fout.close();
|
|
452
|
+
exit(EXIT_FAILURE);
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
// Do not trigger merge if we try to merge again the output
|
|
456
|
+
gguf_set_val_u16(ctx_gguf, LLM_KV_SPLIT_COUNT, 0);
|
|
457
|
+
|
|
458
|
+
// Set metadata from the first split
|
|
459
|
+
gguf_set_kv(ctx_out, ctx_gguf);
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
auto n_tensors = gguf_get_n_tensors(ctx_gguf);
|
|
463
|
+
for (int i_tensor = 0; i_tensor < n_tensors; i_tensor++) {
|
|
464
|
+
const char * t_name = gguf_get_tensor_name(ctx_gguf, i_tensor);
|
|
465
|
+
struct ggml_tensor * t = ggml_get_tensor(ctx_meta, t_name);
|
|
466
|
+
gguf_add_tensor(ctx_out, t);
|
|
467
|
+
}
|
|
468
|
+
total_tensors += n_tensors;
|
|
469
|
+
|
|
470
|
+
fprintf(stderr, "\033[3Ddone\n");
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
// placeholder for the meta data
|
|
474
|
+
{
|
|
475
|
+
auto meta_size = gguf_get_meta_size(ctx_out);
|
|
476
|
+
::zeros(fout, meta_size);
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
// Write tensors data
|
|
480
|
+
for (int i_split = 0; i_split < n_split; i_split++) {
|
|
481
|
+
llama_split_path(split_path, sizeof(split_path), split_prefix, i_split, n_split);
|
|
482
|
+
std::ifstream f_input(split_path, std::ios::binary);
|
|
483
|
+
if (!f_input.is_open()) {
|
|
484
|
+
fprintf(stderr, "%s: failed to open input GGUF from %s\n", __func__, split_path);
|
|
485
|
+
for (uint32_t i = 0; i < ctx_ggufs.size(); i++) {
|
|
486
|
+
gguf_free(ctx_ggufs[i]);
|
|
487
|
+
ggml_free(ctx_metas[i]);
|
|
488
|
+
}
|
|
489
|
+
gguf_free(ctx_out);
|
|
490
|
+
fout.close();
|
|
491
|
+
exit(EXIT_FAILURE);
|
|
492
|
+
}
|
|
493
|
+
fprintf(stderr, "%s: writing tensors %s ...", __func__, split_path);
|
|
494
|
+
|
|
495
|
+
auto * ctx_gguf = ctx_ggufs[i_split];
|
|
496
|
+
auto * ctx_meta = ctx_metas[i_split];
|
|
497
|
+
|
|
498
|
+
auto n_tensors = gguf_get_n_tensors(ctx_gguf);
|
|
499
|
+
for (int i_tensor = 0; i_tensor < n_tensors; i_tensor++) {
|
|
500
|
+
const char * t_name = gguf_get_tensor_name(ctx_gguf, i_tensor);
|
|
501
|
+
struct ggml_tensor * t = ggml_get_tensor(ctx_meta, t_name);
|
|
502
|
+
|
|
503
|
+
auto n_bytes = ggml_nbytes(t);
|
|
504
|
+
|
|
505
|
+
if (read_data.size() < n_bytes) {
|
|
506
|
+
read_data.resize(n_bytes);
|
|
507
|
+
}
|
|
508
|
+
|
|
509
|
+
auto offset = gguf_get_data_offset(ctx_gguf) + gguf_get_tensor_offset(ctx_gguf, i_tensor);
|
|
510
|
+
f_input.seekg(offset);
|
|
511
|
+
f_input.read((char *)read_data.data(), n_bytes);
|
|
512
|
+
|
|
513
|
+
// write tensor data + padding
|
|
514
|
+
fout.write((const char *)read_data.data(), n_bytes);
|
|
515
|
+
zeros(fout, GGML_PAD(n_bytes, GGUF_DEFAULT_ALIGNMENT) - n_bytes);
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
gguf_free(ctx_gguf);
|
|
519
|
+
ggml_free(ctx_meta);
|
|
520
|
+
f_input.close();
|
|
521
|
+
fprintf(stderr, "\033[3Ddone\n");
|
|
522
|
+
}
|
|
523
|
+
|
|
524
|
+
{
|
|
525
|
+
// go back to beginning of file and write the updated metadata
|
|
526
|
+
fout.seekp(0);
|
|
527
|
+
std::vector<uint8_t> data(gguf_get_meta_size(ctx_out));
|
|
528
|
+
gguf_get_meta_data(ctx_out, data.data());
|
|
529
|
+
fout.write((const char *)data.data(), data.size());
|
|
530
|
+
|
|
531
|
+
fout.close();
|
|
532
|
+
gguf_free(ctx_out);
|
|
533
|
+
}
|
|
534
|
+
|
|
535
|
+
fprintf(stderr, "%s: %s merged from %d split with %d tensors.\n",
|
|
536
|
+
__func__, split_params.output.c_str(), n_split, total_tensors);
|
|
537
|
+
}
|
|
538
|
+
|
|
539
|
+
int main(int argc, const char ** argv) {
|
|
540
|
+
split_params params;
|
|
541
|
+
split_params_parse(argc, argv, params);
|
|
542
|
+
|
|
543
|
+
switch (params.operation) {
|
|
544
|
+
case SPLIT_OP_SPLIT: gguf_split(params);
|
|
545
|
+
break;
|
|
546
|
+
case SPLIT_OP_MERGE: gguf_merge(params);
|
|
547
|
+
break;
|
|
548
|
+
default: split_print_usage(argv[0]);
|
|
549
|
+
exit(EXIT_FAILURE);
|
|
550
|
+
}
|
|
551
|
+
|
|
552
|
+
return 0;
|
|
553
|
+
}
|