@fugood/llama.node 0.0.1-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (204) hide show
  1. package/CMakeLists.txt +85 -0
  2. package/README.md +56 -0
  3. package/bin/darwin/arm64/llama-node.node +0 -0
  4. package/bin/darwin/x64/llama-node.node +0 -0
  5. package/bin/linux/arm64/llama-node.node +0 -0
  6. package/bin/linux/x64/llama-node.node +0 -0
  7. package/bin/win32/arm64/llama-node.node +0 -0
  8. package/bin/win32/arm64/node.lib +0 -0
  9. package/bin/win32/x64/llama-node.node +0 -0
  10. package/bin/win32/x64/node.lib +0 -0
  11. package/lib/binding.js +13 -0
  12. package/lib/binding.ts +57 -0
  13. package/lib/index.js +24 -0
  14. package/lib/index.ts +13 -0
  15. package/package.json +65 -0
  16. package/src/addons.cpp +506 -0
  17. package/src/llama.cpp/CMakeLists.txt +1320 -0
  18. package/src/llama.cpp/build.zig +172 -0
  19. package/src/llama.cpp/cmake/FindSIMD.cmake +100 -0
  20. package/src/llama.cpp/common/CMakeLists.txt +87 -0
  21. package/src/llama.cpp/common/base64.hpp +392 -0
  22. package/src/llama.cpp/common/common.cpp +2949 -0
  23. package/src/llama.cpp/common/common.h +324 -0
  24. package/src/llama.cpp/common/console.cpp +501 -0
  25. package/src/llama.cpp/common/console.h +19 -0
  26. package/src/llama.cpp/common/grammar-parser.cpp +440 -0
  27. package/src/llama.cpp/common/grammar-parser.h +29 -0
  28. package/src/llama.cpp/common/json-schema-to-grammar.cpp +764 -0
  29. package/src/llama.cpp/common/json-schema-to-grammar.h +4 -0
  30. package/src/llama.cpp/common/json.hpp +24766 -0
  31. package/src/llama.cpp/common/log.h +724 -0
  32. package/src/llama.cpp/common/ngram-cache.cpp +282 -0
  33. package/src/llama.cpp/common/ngram-cache.h +94 -0
  34. package/src/llama.cpp/common/sampling.cpp +353 -0
  35. package/src/llama.cpp/common/sampling.h +147 -0
  36. package/src/llama.cpp/common/stb_image.h +8396 -0
  37. package/src/llama.cpp/common/train.cpp +1513 -0
  38. package/src/llama.cpp/common/train.h +233 -0
  39. package/src/llama.cpp/examples/CMakeLists.txt +52 -0
  40. package/src/llama.cpp/examples/baby-llama/CMakeLists.txt +5 -0
  41. package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +1640 -0
  42. package/src/llama.cpp/examples/batched/CMakeLists.txt +5 -0
  43. package/src/llama.cpp/examples/batched/batched.cpp +262 -0
  44. package/src/llama.cpp/examples/batched-bench/CMakeLists.txt +5 -0
  45. package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +261 -0
  46. package/src/llama.cpp/examples/beam-search/CMakeLists.txt +5 -0
  47. package/src/llama.cpp/examples/beam-search/beam-search.cpp +188 -0
  48. package/src/llama.cpp/examples/benchmark/CMakeLists.txt +6 -0
  49. package/src/llama.cpp/examples/benchmark/benchmark-matmult.cpp +275 -0
  50. package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +5 -0
  51. package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +936 -0
  52. package/src/llama.cpp/examples/embedding/CMakeLists.txt +5 -0
  53. package/src/llama.cpp/examples/embedding/embedding.cpp +211 -0
  54. package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +9 -0
  55. package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +195 -0
  56. package/src/llama.cpp/examples/export-lora/CMakeLists.txt +5 -0
  57. package/src/llama.cpp/examples/export-lora/export-lora.cpp +462 -0
  58. package/src/llama.cpp/examples/finetune/CMakeLists.txt +5 -0
  59. package/src/llama.cpp/examples/finetune/finetune.cpp +1861 -0
  60. package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +5 -0
  61. package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +132 -0
  62. package/src/llama.cpp/examples/gguf/CMakeLists.txt +5 -0
  63. package/src/llama.cpp/examples/gguf/gguf.cpp +256 -0
  64. package/src/llama.cpp/examples/gguf-split/CMakeLists.txt +5 -0
  65. package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +553 -0
  66. package/src/llama.cpp/examples/gritlm/CMakeLists.txt +5 -0
  67. package/src/llama.cpp/examples/gritlm/gritlm.cpp +215 -0
  68. package/src/llama.cpp/examples/imatrix/CMakeLists.txt +5 -0
  69. package/src/llama.cpp/examples/imatrix/imatrix.cpp +655 -0
  70. package/src/llama.cpp/examples/infill/CMakeLists.txt +5 -0
  71. package/src/llama.cpp/examples/infill/infill.cpp +767 -0
  72. package/src/llama.cpp/examples/jeopardy/questions.txt +100 -0
  73. package/src/llama.cpp/examples/llama-bench/CMakeLists.txt +5 -0
  74. package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +1286 -0
  75. package/src/llama.cpp/examples/llama.android/app/src/main/cpp/CMakeLists.txt +50 -0
  76. package/src/llama.cpp/examples/llama.android/app/src/main/cpp/llama-android.cpp +443 -0
  77. package/src/llama.cpp/examples/llava/CMakeLists.txt +37 -0
  78. package/src/llama.cpp/examples/llava/clip.cpp +2027 -0
  79. package/src/llama.cpp/examples/llava/clip.h +85 -0
  80. package/src/llama.cpp/examples/llava/llava-cli.cpp +309 -0
  81. package/src/llama.cpp/examples/llava/llava.cpp +426 -0
  82. package/src/llama.cpp/examples/llava/llava.h +50 -0
  83. package/src/llama.cpp/examples/llava/requirements.txt +3 -0
  84. package/src/llama.cpp/examples/lookahead/CMakeLists.txt +5 -0
  85. package/src/llama.cpp/examples/lookahead/lookahead.cpp +485 -0
  86. package/src/llama.cpp/examples/lookup/CMakeLists.txt +23 -0
  87. package/src/llama.cpp/examples/lookup/lookup-create.cpp +41 -0
  88. package/src/llama.cpp/examples/lookup/lookup-merge.cpp +47 -0
  89. package/src/llama.cpp/examples/lookup/lookup-stats.cpp +160 -0
  90. package/src/llama.cpp/examples/lookup/lookup.cpp +258 -0
  91. package/src/llama.cpp/examples/main/CMakeLists.txt +5 -0
  92. package/src/llama.cpp/examples/main/main.cpp +957 -0
  93. package/src/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +33 -0
  94. package/src/llama.cpp/examples/parallel/CMakeLists.txt +5 -0
  95. package/src/llama.cpp/examples/parallel/parallel.cpp +427 -0
  96. package/src/llama.cpp/examples/passkey/CMakeLists.txt +5 -0
  97. package/src/llama.cpp/examples/passkey/passkey.cpp +302 -0
  98. package/src/llama.cpp/examples/perplexity/CMakeLists.txt +5 -0
  99. package/src/llama.cpp/examples/perplexity/perplexity.cpp +1943 -0
  100. package/src/llama.cpp/examples/quantize/CMakeLists.txt +6 -0
  101. package/src/llama.cpp/examples/quantize/quantize.cpp +423 -0
  102. package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +6 -0
  103. package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +424 -0
  104. package/src/llama.cpp/examples/retrieval/CMakeLists.txt +5 -0
  105. package/src/llama.cpp/examples/retrieval/retrieval.cpp +350 -0
  106. package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +5 -0
  107. package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +246 -0
  108. package/src/llama.cpp/examples/server/CMakeLists.txt +40 -0
  109. package/src/llama.cpp/examples/server/bench/requirements.txt +2 -0
  110. package/src/llama.cpp/examples/server/httplib.h +9465 -0
  111. package/src/llama.cpp/examples/server/server.cpp +3826 -0
  112. package/src/llama.cpp/examples/server/tests/requirements.txt +6 -0
  113. package/src/llama.cpp/examples/server/utils.hpp +653 -0
  114. package/src/llama.cpp/examples/simple/CMakeLists.txt +5 -0
  115. package/src/llama.cpp/examples/simple/simple.cpp +183 -0
  116. package/src/llama.cpp/examples/speculative/CMakeLists.txt +5 -0
  117. package/src/llama.cpp/examples/speculative/speculative.cpp +614 -0
  118. package/src/llama.cpp/examples/sycl/CMakeLists.txt +9 -0
  119. package/src/llama.cpp/examples/sycl/ls-sycl-device.cpp +13 -0
  120. package/src/llama.cpp/examples/tokenize/CMakeLists.txt +5 -0
  121. package/src/llama.cpp/examples/tokenize/tokenize.cpp +42 -0
  122. package/src/llama.cpp/examples/train-text-from-scratch/CMakeLists.txt +5 -0
  123. package/src/llama.cpp/examples/train-text-from-scratch/train-text-from-scratch.cpp +1252 -0
  124. package/src/llama.cpp/ggml-alloc.c +985 -0
  125. package/src/llama.cpp/ggml-alloc.h +76 -0
  126. package/src/llama.cpp/ggml-backend-impl.h +141 -0
  127. package/src/llama.cpp/ggml-backend.c +2099 -0
  128. package/src/llama.cpp/ggml-backend.h +233 -0
  129. package/src/llama.cpp/ggml-common.h +1853 -0
  130. package/src/llama.cpp/ggml-cuda.h +43 -0
  131. package/src/llama.cpp/ggml-impl.h +265 -0
  132. package/src/llama.cpp/ggml-kompute.cpp +2006 -0
  133. package/src/llama.cpp/ggml-kompute.h +46 -0
  134. package/src/llama.cpp/ggml-metal.h +66 -0
  135. package/src/llama.cpp/ggml-mpi.c +216 -0
  136. package/src/llama.cpp/ggml-mpi.h +39 -0
  137. package/src/llama.cpp/ggml-opencl.cpp +2301 -0
  138. package/src/llama.cpp/ggml-opencl.h +36 -0
  139. package/src/llama.cpp/ggml-quants.c +12678 -0
  140. package/src/llama.cpp/ggml-quants.h +133 -0
  141. package/src/llama.cpp/ggml-sycl.cpp +17882 -0
  142. package/src/llama.cpp/ggml-sycl.h +49 -0
  143. package/src/llama.cpp/ggml-vulkan-shaders.hpp +69849 -0
  144. package/src/llama.cpp/ggml-vulkan.cpp +6442 -0
  145. package/src/llama.cpp/ggml-vulkan.h +29 -0
  146. package/src/llama.cpp/ggml.c +21819 -0
  147. package/src/llama.cpp/ggml.h +2403 -0
  148. package/src/llama.cpp/llama.cpp +17468 -0
  149. package/src/llama.cpp/llama.h +1117 -0
  150. package/src/llama.cpp/pocs/CMakeLists.txt +12 -0
  151. package/src/llama.cpp/pocs/vdot/CMakeLists.txt +9 -0
  152. package/src/llama.cpp/pocs/vdot/q8dot.cpp +172 -0
  153. package/src/llama.cpp/pocs/vdot/vdot.cpp +310 -0
  154. package/src/llama.cpp/prompts/LLM-questions.txt +49 -0
  155. package/src/llama.cpp/prompts/alpaca.txt +1 -0
  156. package/src/llama.cpp/prompts/assistant.txt +31 -0
  157. package/src/llama.cpp/prompts/chat-with-baichuan.txt +4 -0
  158. package/src/llama.cpp/prompts/chat-with-bob.txt +7 -0
  159. package/src/llama.cpp/prompts/chat-with-qwen.txt +1 -0
  160. package/src/llama.cpp/prompts/chat-with-vicuna-v0.txt +7 -0
  161. package/src/llama.cpp/prompts/chat-with-vicuna-v1.txt +7 -0
  162. package/src/llama.cpp/prompts/chat.txt +28 -0
  163. package/src/llama.cpp/prompts/dan-modified.txt +1 -0
  164. package/src/llama.cpp/prompts/dan.txt +1 -0
  165. package/src/llama.cpp/prompts/mnemonics.txt +93 -0
  166. package/src/llama.cpp/prompts/parallel-questions.txt +43 -0
  167. package/src/llama.cpp/prompts/reason-act.txt +18 -0
  168. package/src/llama.cpp/requirements/requirements-convert-hf-to-gguf.txt +3 -0
  169. package/src/llama.cpp/requirements/requirements-convert-llama-ggml-to-gguf.txt +1 -0
  170. package/src/llama.cpp/requirements/requirements-convert-lora-to-ggml.txt +2 -0
  171. package/src/llama.cpp/requirements/requirements-convert-persimmon-to-gguf.txt +2 -0
  172. package/src/llama.cpp/requirements/requirements-convert.txt +5 -0
  173. package/src/llama.cpp/requirements.txt +12 -0
  174. package/src/llama.cpp/scripts/gen-build-info-cpp.cmake +24 -0
  175. package/src/llama.cpp/scripts/xxd.cmake +16 -0
  176. package/src/llama.cpp/sgemm.cpp +999 -0
  177. package/src/llama.cpp/sgemm.h +12 -0
  178. package/src/llama.cpp/tests/CMakeLists.txt +78 -0
  179. package/src/llama.cpp/tests/get-model.cpp +21 -0
  180. package/src/llama.cpp/tests/get-model.h +2 -0
  181. package/src/llama.cpp/tests/test-autorelease.cpp +24 -0
  182. package/src/llama.cpp/tests/test-backend-ops.cpp +2266 -0
  183. package/src/llama.cpp/tests/test-c.c +7 -0
  184. package/src/llama.cpp/tests/test-chat-template.cpp +107 -0
  185. package/src/llama.cpp/tests/test-double-float.cpp +57 -0
  186. package/src/llama.cpp/tests/test-grad0.cpp +1606 -0
  187. package/src/llama.cpp/tests/test-grammar-integration.cpp +243 -0
  188. package/src/llama.cpp/tests/test-grammar-parser.cpp +250 -0
  189. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +899 -0
  190. package/src/llama.cpp/tests/test-llama-grammar.cpp +402 -0
  191. package/src/llama.cpp/tests/test-model-load-cancel.cpp +27 -0
  192. package/src/llama.cpp/tests/test-opt.cpp +181 -0
  193. package/src/llama.cpp/tests/test-quantize-fns.cpp +185 -0
  194. package/src/llama.cpp/tests/test-quantize-perf.cpp +363 -0
  195. package/src/llama.cpp/tests/test-rope.cpp +221 -0
  196. package/src/llama.cpp/tests/test-sampling.cpp +301 -0
  197. package/src/llama.cpp/tests/test-tokenizer-0-falcon.cpp +187 -0
  198. package/src/llama.cpp/tests/test-tokenizer-0-llama.cpp +190 -0
  199. package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +123 -0
  200. package/src/llama.cpp/tests/test-tokenizer-1-llama.cpp +111 -0
  201. package/src/llama.cpp/unicode-data.cpp +1651 -0
  202. package/src/llama.cpp/unicode-data.h +16 -0
  203. package/src/llama.cpp/unicode.cpp +277 -0
  204. package/src/llama.cpp/unicode.h +28 -0
@@ -0,0 +1,553 @@
1
+ #include "llama.h"
2
+ #include "common.h"
3
+
4
+ #include <algorithm>
5
+ #include <cmath>
6
+ #include <cstdlib>
7
+ #include <fstream>
8
+ #include <string>
9
+ #include <vector>
10
+
11
+ #include <stdio.h>
12
+ #include <string.h>
13
+ #include <climits>
14
+ #include <stdexcept>
15
+
16
+ #if defined(_WIN32)
17
+ #include <windows.h>
18
+ #ifndef PATH_MAX
19
+ #define PATH_MAX MAX_PATH
20
+ #endif
21
+ #include <io.h>
22
+ #endif
23
+
24
+ enum split_operation : uint8_t {
25
+ SPLIT_OP_SPLIT,
26
+ SPLIT_OP_MERGE,
27
+ };
28
+
29
+ struct split_params {
30
+ split_operation operation = SPLIT_OP_SPLIT;
31
+ size_t n_bytes_split = 0;
32
+ int n_split_tensors = 128;
33
+ std::string input;
34
+ std::string output;
35
+ bool dry_run = false;
36
+ };
37
+
38
+ static void split_print_usage(const char * executable) {
39
+ const split_params default_params;
40
+ printf("\n");
41
+ printf("usage: %s [options] GGUF_IN GGUF_OUT\n", executable);
42
+ printf("\n");
43
+ printf("Apply a GGUF operation on IN to OUT.");
44
+ printf("\n");
45
+ printf("options:\n");
46
+ printf(" -h, --help show this help message and exit\n");
47
+ printf(" --version show version and build info\n");
48
+ printf(" --split split GGUF to multiple GGUF (enabled by default)\n");
49
+ printf(" --merge merge multiple GGUF to a single GGUF\n");
50
+ printf(" --split-max-tensors max tensors in each split (default: %d)\n", default_params.n_split_tensors);
51
+ printf(" --split-max-size N(M|G) max size per split\n");
52
+ printf(" --dry-run only print out a split plan and exit, without writing any new files\n");
53
+ printf("\n");
54
+ }
55
+
56
+ // return convert string, for example "128M" or "4G" to number of bytes
57
+ static size_t split_str_to_n_bytes(std::string str) {
58
+ size_t n_bytes = 0;
59
+ int n;
60
+ if (str.back() == 'M') {
61
+ sscanf(str.c_str(), "%d", &n);
62
+ n_bytes = (size_t)n * 1024 * 1024; // megabytes
63
+ } else if (str.back() == 'G') {
64
+ sscanf(str.c_str(), "%d", &n);
65
+ n_bytes = (size_t)n * 1024 * 1024 * 1024; // gigabytes
66
+ } else {
67
+ throw std::invalid_argument("error: supported units are M (megabytes) or G (gigabytes), but got: " + std::string(1, str.back()));
68
+ }
69
+ if (n <= 0) {
70
+ throw std::invalid_argument("error: size must be a positive value");
71
+ }
72
+ return n_bytes;
73
+ }
74
+
75
+ static void split_params_parse_ex(int argc, const char ** argv, split_params & params) {
76
+ std::string arg;
77
+ const std::string arg_prefix = "--";
78
+ bool invalid_param = false;
79
+
80
+ int arg_idx = 1;
81
+ for (; arg_idx < argc && strncmp(argv[arg_idx], "--", 2) == 0; arg_idx++) {
82
+ arg = argv[arg_idx];
83
+ if (arg.compare(0, arg_prefix.size(), arg_prefix) == 0) {
84
+ std::replace(arg.begin(), arg.end(), '_', '-');
85
+ }
86
+
87
+ bool arg_found = false;
88
+ bool is_op_set = false;
89
+ bool is_mode_set = false;
90
+ if (arg == "-h" || arg == "--help") {
91
+ split_print_usage(argv[0]);
92
+ exit(0);
93
+ }
94
+ if (arg == "--version") {
95
+ fprintf(stderr, "version: %d (%s)\n", LLAMA_BUILD_NUMBER, LLAMA_COMMIT);
96
+ fprintf(stderr, "built with %s for %s\n", LLAMA_COMPILER, LLAMA_BUILD_TARGET);
97
+ exit(0);
98
+ }
99
+ if (arg == "--dry-run") {
100
+ arg_found = true;
101
+ params.dry_run = true;
102
+ }
103
+
104
+ if (is_op_set) {
105
+ throw std::invalid_argument("error: either --split or --merge can be specified, but not both");
106
+ }
107
+ if (arg == "--merge") {
108
+ arg_found = true;
109
+ is_op_set = true;
110
+ params.operation = SPLIT_OP_MERGE;
111
+ }
112
+ if (arg == "--split") {
113
+ arg_found = true;
114
+ is_op_set = true;
115
+ params.operation = SPLIT_OP_SPLIT;
116
+ }
117
+
118
+ if (is_mode_set) {
119
+ throw std::invalid_argument("error: either --split-max-tensors or --split-max-size can be specified, but not both");
120
+ }
121
+ if (arg == "--split-max-tensors") {
122
+ if (++arg_idx >= argc) {
123
+ invalid_param = true;
124
+ break;
125
+ }
126
+ arg_found = true;
127
+ is_mode_set = true;
128
+ params.n_split_tensors = atoi(argv[arg_idx]);
129
+ }
130
+ if (arg == "--split-max-size") {
131
+ if (++arg_idx >= argc) {
132
+ invalid_param = true;
133
+ break;
134
+ }
135
+ arg_found = true;
136
+ is_mode_set = true;
137
+ params.n_bytes_split = split_str_to_n_bytes(argv[arg_idx]);
138
+ }
139
+
140
+ if (!arg_found) {
141
+ throw std::invalid_argument("error: unknown argument: " + arg);
142
+ }
143
+ }
144
+
145
+ if (invalid_param) {
146
+ throw std::invalid_argument("error: invalid parameter for argument: " + arg);
147
+ }
148
+
149
+ if (argc - arg_idx < 2) {
150
+ throw std::invalid_argument("error: bad arguments");
151
+ }
152
+
153
+ params.input = argv[arg_idx++];
154
+ params.output = argv[arg_idx++];
155
+ }
156
+
157
+ static bool split_params_parse(int argc, const char ** argv, split_params & params) {
158
+ bool result = true;
159
+ try {
160
+ split_params_parse_ex(argc, argv, params);
161
+ }
162
+ catch (const std::invalid_argument & ex) {
163
+ fprintf(stderr, "%s\n", ex.what());
164
+ split_print_usage(argv[0]);
165
+ exit(EXIT_FAILURE);
166
+ }
167
+ return result;
168
+ }
169
+
170
+ static void zeros(std::ofstream & file, size_t n) {
171
+ char zero = 0;
172
+ for (size_t i = 0; i < n; ++i) {
173
+ file.write(&zero, 1);
174
+ }
175
+ }
176
+
177
+ struct split_strategy {
178
+ const split_params params;
179
+ std::ifstream & f_input;
180
+ struct gguf_context * ctx_gguf;
181
+ struct ggml_context * ctx_meta = NULL;
182
+ const int n_tensors;
183
+
184
+ // one ctx_out per one output file
185
+ std::vector<struct gguf_context *> ctx_outs;
186
+
187
+ // temporary buffer for reading in tensor data
188
+ std::vector<uint8_t> read_buf;
189
+
190
+ split_strategy(const split_params & params,
191
+ std::ifstream & f_input,
192
+ struct gguf_context * ctx_gguf,
193
+ struct ggml_context * ctx_meta) :
194
+ params(params),
195
+ f_input(f_input),
196
+ ctx_gguf(ctx_gguf),
197
+ ctx_meta(ctx_meta),
198
+ n_tensors(gguf_get_n_tensors(ctx_gguf)) {
199
+
200
+ // because we need to know list of tensors for each file in advance, we will build all the ctx_out for all output splits
201
+ int i_split = -1;
202
+ struct gguf_context * ctx_out = NULL;
203
+ auto new_ctx_out = [&]() {
204
+ i_split++;
205
+ if (ctx_out != NULL) {
206
+ if (gguf_get_n_tensors(ctx_out) == 0) {
207
+ fprintf(stderr, "error: one of splits have 0 tensors. Maybe size or tensors limit is too small\n");
208
+ exit(EXIT_FAILURE);
209
+ }
210
+ ctx_outs.push_back(ctx_out);
211
+ }
212
+ ctx_out = gguf_init_empty();
213
+ // Save all metadata in first split only
214
+ if (i_split == 0) {
215
+ gguf_set_kv(ctx_out, ctx_gguf);
216
+ }
217
+ gguf_set_val_u16(ctx_out, LLM_KV_SPLIT_NO, i_split);
218
+ gguf_set_val_u16(ctx_out, LLM_KV_SPLIT_COUNT, 0); // placeholder
219
+ gguf_set_val_i32(ctx_out, LLM_KV_SPLIT_TENSORS_COUNT, n_tensors);
220
+ };
221
+
222
+ // initialize ctx_out for the first split
223
+ new_ctx_out();
224
+
225
+ // process tensors one by one
226
+ size_t curr_tensors_size = 0; // current size by counting only tensors size (without metadata)
227
+ for (int i = 0; i < n_tensors; ++i) {
228
+ struct ggml_tensor * t = ggml_get_tensor(ctx_meta, gguf_get_tensor_name(ctx_gguf, i));
229
+ // calculate the "imaginary" size = the current size + next tensor size
230
+ size_t n_bytes = GGML_PAD(ggml_nbytes(t), GGUF_DEFAULT_ALIGNMENT);
231
+ size_t next_tensors_size = curr_tensors_size + n_bytes;
232
+ if (should_split(i, next_tensors_size)) {
233
+ new_ctx_out();
234
+ curr_tensors_size = n_bytes;
235
+ } else {
236
+ curr_tensors_size = next_tensors_size;
237
+ }
238
+ gguf_add_tensor(ctx_out, t);
239
+ }
240
+
241
+ // push the last ctx_out
242
+ ctx_outs.push_back(ctx_out);
243
+
244
+ // set the correct n_split for all ctx_out
245
+ for (auto & ctx : ctx_outs) {
246
+ gguf_set_val_u16(ctx, LLM_KV_SPLIT_COUNT, ctx_outs.size());
247
+ }
248
+ }
249
+
250
+ ~split_strategy() {
251
+ for (auto & ctx_out : ctx_outs) {
252
+ gguf_free(ctx_out);
253
+ }
254
+ }
255
+
256
+ bool should_split(int i_tensor, size_t next_size) {
257
+ if (params.n_bytes_split > 0) {
258
+ // split by max size per file
259
+ return next_size > params.n_bytes_split;
260
+ } else {
261
+ // split by number of tensors per file
262
+ return i_tensor > 0 && i_tensor < n_tensors && i_tensor % params.n_split_tensors == 0;
263
+ }
264
+ }
265
+
266
+ void print_info() {
267
+ printf("n_split: %ld\n", ctx_outs.size());
268
+ int i_split = 0;
269
+ for (auto & ctx_out : ctx_outs) {
270
+ // re-calculate the real gguf size for each split (= metadata size + total size of all tensors)
271
+ size_t total_size = gguf_get_meta_size(ctx_out);
272
+ for (int i = 0; i < gguf_get_n_tensors(ctx_out); ++i) {
273
+ struct ggml_tensor * t = ggml_get_tensor(ctx_meta, gguf_get_tensor_name(ctx_out, i));
274
+ total_size += ggml_nbytes(t);
275
+ }
276
+ total_size = total_size / 1024 / 1024; // convert to megabytes
277
+ printf("split %05d: n_tensors = %d, total_size = %ldM\n", i_split + 1, gguf_get_n_tensors(ctx_out), total_size);
278
+ i_split++;
279
+ }
280
+ }
281
+
282
+ void write() {
283
+ int i_split = 0;
284
+ int n_split = ctx_outs.size();
285
+ for (auto & ctx_out : ctx_outs) {
286
+ // construct file path
287
+ char split_path[PATH_MAX] = {0};
288
+ llama_split_path(split_path, sizeof(split_path), params.output.c_str(), i_split, n_split);
289
+
290
+ // open the output file
291
+ printf("Writing file %s ... ", split_path);
292
+ fflush(stdout);
293
+ std::ofstream fout = std::ofstream(split_path, std::ios::binary);
294
+ fout.exceptions(std::ofstream::failbit); // fail fast on write errors
295
+
296
+ // write metadata
297
+ std::vector<uint8_t> data(gguf_get_meta_size(ctx_out));
298
+ gguf_get_meta_data(ctx_out, data.data());
299
+ fout.write((const char *)data.data(), data.size());
300
+
301
+ // write tensors
302
+ for (int i = 0; i < gguf_get_n_tensors(ctx_out); ++i) {
303
+ // read tensor meta and prepare buffer
304
+ const char * t_name = gguf_get_tensor_name(ctx_out, i);
305
+ struct ggml_tensor * t = ggml_get_tensor(ctx_meta, t_name);
306
+ auto n_bytes = ggml_nbytes(t);
307
+ read_buf.resize(n_bytes);
308
+
309
+ // calculate offset
310
+ auto i_tensor_in = gguf_find_tensor(ctx_gguf, t_name); // idx of tensor in the input file
311
+ auto offset = gguf_get_data_offset(ctx_gguf) + gguf_get_tensor_offset(ctx_gguf, i_tensor_in);
312
+
313
+ // copy tensor from input to output file
314
+ copy_file_to_file(f_input, fout, offset, n_bytes);
315
+ zeros(fout, GGML_PAD(n_bytes, GGUF_DEFAULT_ALIGNMENT) - n_bytes);
316
+ }
317
+
318
+ printf("done\n");
319
+ // close the file
320
+ fout.close();
321
+ i_split++;
322
+ }
323
+ }
324
+
325
+ void copy_file_to_file(std::ifstream & f_in, std::ofstream & f_out, const size_t in_offset, const size_t len) {
326
+ // TODO: detect OS and use copy_file_range() here for better performance
327
+ if (read_buf.size() < len) {
328
+ read_buf.resize(len);
329
+ }
330
+ f_in.seekg(in_offset);
331
+ f_in.read((char *)read_buf.data(), len);
332
+ f_out.write((const char *)read_buf.data(), len);
333
+ }
334
+ };
335
+
336
+ static void gguf_split(const split_params & split_params) {
337
+ struct ggml_context * ctx_meta = NULL;
338
+
339
+ struct gguf_init_params params = {
340
+ /*.no_alloc = */ true,
341
+ /*.ctx = */ &ctx_meta,
342
+ };
343
+
344
+ std::ifstream f_input(split_params.input.c_str(), std::ios::binary);
345
+ if (!f_input.is_open()) {
346
+ fprintf(stderr, "%s: failed to open input GGUF from %s\n", __func__, split_params.input.c_str());
347
+ exit(EXIT_FAILURE);
348
+ }
349
+
350
+ auto * ctx_gguf = gguf_init_from_file(split_params.input.c_str(), params);
351
+ if (!ctx_gguf) {
352
+ fprintf(stderr, "%s: failed to load input GGUF from %s\n", __func__, split_params.input.c_str());
353
+ exit(EXIT_FAILURE);
354
+ }
355
+
356
+ // prepare the strategy
357
+ split_strategy strategy(split_params, f_input, ctx_gguf, ctx_meta);
358
+ int n_split = strategy.ctx_outs.size();
359
+ strategy.print_info();
360
+
361
+ if (!split_params.dry_run) {
362
+ // write all output splits
363
+ strategy.write();
364
+ }
365
+
366
+ // done, clean up
367
+ gguf_free(ctx_gguf);
368
+ f_input.close();
369
+
370
+ fprintf(stderr, "%s: %d gguf split written with a total of %d tensors.\n",
371
+ __func__, n_split, strategy.n_tensors);
372
+ }
373
+
374
+ static void gguf_merge(const split_params & split_params) {
375
+ fprintf(stderr, "%s: %s -> %s\n",
376
+ __func__, split_params.input.c_str(),
377
+ split_params.output.c_str());
378
+ int n_split = 1;
379
+ int total_tensors = 0;
380
+
381
+ auto * ctx_out = gguf_init_empty();
382
+ std::ofstream fout(split_params.output.c_str(), std::ios::binary);
383
+ fout.exceptions(std::ofstream::failbit); // fail fast on write errors
384
+
385
+ std::vector<uint8_t> read_data;
386
+ std::vector<ggml_context *> ctx_metas;
387
+ std::vector<gguf_context *> ctx_ggufs;
388
+
389
+ char split_path[PATH_MAX] = {0};
390
+ strncpy(split_path, split_params.input.c_str(), sizeof(split_path) - 1);
391
+ char split_prefix[PATH_MAX] = {0};
392
+
393
+ // First pass to find KV and tensors metadata
394
+ for (int i_split = 0; i_split < n_split; i_split++) {
395
+ struct ggml_context * ctx_meta = NULL;
396
+
397
+ struct gguf_init_params params = {
398
+ /*.no_alloc = */ true,
399
+ /*.ctx = */ &ctx_meta,
400
+ };
401
+
402
+ if (i_split > 0) {
403
+ llama_split_path(split_path, sizeof(split_path), split_prefix, i_split, n_split);
404
+ }
405
+ fprintf(stderr, "%s: reading metadata %s ...", __func__, split_path);
406
+
407
+ auto * ctx_gguf = gguf_init_from_file(split_path, params);
408
+ if (!ctx_gguf) {
409
+ fprintf(stderr, "\n%s: failed to load input GGUF from %s\n", __func__, split_params.input.c_str());
410
+ exit(EXIT_FAILURE);
411
+ }
412
+ ctx_ggufs.push_back(ctx_gguf);
413
+ ctx_metas.push_back(ctx_meta);
414
+
415
+ if (i_split == 0) {
416
+ auto key_n_split = gguf_find_key(ctx_gguf, LLM_KV_SPLIT_COUNT);
417
+ if (key_n_split < 0) {
418
+ fprintf(stderr,
419
+ "\n%s: input file does not contain %s metadata\n",
420
+ __func__,
421
+ LLM_KV_SPLIT_COUNT);
422
+ gguf_free(ctx_gguf);
423
+ ggml_free(ctx_meta);
424
+ gguf_free(ctx_out);
425
+ fout.close();
426
+ exit(EXIT_FAILURE);
427
+ }
428
+
429
+ n_split = gguf_get_val_u16(ctx_gguf, key_n_split);
430
+ if (n_split < 1) {
431
+ fprintf(stderr,
432
+ "\n%s: input file does not contain a valid split count %d\n",
433
+ __func__,
434
+ n_split);
435
+ gguf_free(ctx_gguf);
436
+ ggml_free(ctx_meta);
437
+ gguf_free(ctx_out);
438
+ fout.close();
439
+ exit(EXIT_FAILURE);
440
+ }
441
+
442
+ // Verify the file naming and extract split_prefix
443
+ if (!llama_split_prefix(split_prefix, sizeof (split_prefix), split_path, i_split, n_split)) {
444
+ fprintf(stderr, "\n%s: unexpected input file name: %s"
445
+ " i_split=%d"
446
+ " n_split=%d\n", __func__,
447
+ split_path, i_split, n_split);
448
+ gguf_free(ctx_gguf);
449
+ ggml_free(ctx_meta);
450
+ gguf_free(ctx_out);
451
+ fout.close();
452
+ exit(EXIT_FAILURE);
453
+ }
454
+
455
+ // Do not trigger merge if we try to merge again the output
456
+ gguf_set_val_u16(ctx_gguf, LLM_KV_SPLIT_COUNT, 0);
457
+
458
+ // Set metadata from the first split
459
+ gguf_set_kv(ctx_out, ctx_gguf);
460
+ }
461
+
462
+ auto n_tensors = gguf_get_n_tensors(ctx_gguf);
463
+ for (int i_tensor = 0; i_tensor < n_tensors; i_tensor++) {
464
+ const char * t_name = gguf_get_tensor_name(ctx_gguf, i_tensor);
465
+ struct ggml_tensor * t = ggml_get_tensor(ctx_meta, t_name);
466
+ gguf_add_tensor(ctx_out, t);
467
+ }
468
+ total_tensors += n_tensors;
469
+
470
+ fprintf(stderr, "\033[3Ddone\n");
471
+ }
472
+
473
+ // placeholder for the meta data
474
+ {
475
+ auto meta_size = gguf_get_meta_size(ctx_out);
476
+ ::zeros(fout, meta_size);
477
+ }
478
+
479
+ // Write tensors data
480
+ for (int i_split = 0; i_split < n_split; i_split++) {
481
+ llama_split_path(split_path, sizeof(split_path), split_prefix, i_split, n_split);
482
+ std::ifstream f_input(split_path, std::ios::binary);
483
+ if (!f_input.is_open()) {
484
+ fprintf(stderr, "%s: failed to open input GGUF from %s\n", __func__, split_path);
485
+ for (uint32_t i = 0; i < ctx_ggufs.size(); i++) {
486
+ gguf_free(ctx_ggufs[i]);
487
+ ggml_free(ctx_metas[i]);
488
+ }
489
+ gguf_free(ctx_out);
490
+ fout.close();
491
+ exit(EXIT_FAILURE);
492
+ }
493
+ fprintf(stderr, "%s: writing tensors %s ...", __func__, split_path);
494
+
495
+ auto * ctx_gguf = ctx_ggufs[i_split];
496
+ auto * ctx_meta = ctx_metas[i_split];
497
+
498
+ auto n_tensors = gguf_get_n_tensors(ctx_gguf);
499
+ for (int i_tensor = 0; i_tensor < n_tensors; i_tensor++) {
500
+ const char * t_name = gguf_get_tensor_name(ctx_gguf, i_tensor);
501
+ struct ggml_tensor * t = ggml_get_tensor(ctx_meta, t_name);
502
+
503
+ auto n_bytes = ggml_nbytes(t);
504
+
505
+ if (read_data.size() < n_bytes) {
506
+ read_data.resize(n_bytes);
507
+ }
508
+
509
+ auto offset = gguf_get_data_offset(ctx_gguf) + gguf_get_tensor_offset(ctx_gguf, i_tensor);
510
+ f_input.seekg(offset);
511
+ f_input.read((char *)read_data.data(), n_bytes);
512
+
513
+ // write tensor data + padding
514
+ fout.write((const char *)read_data.data(), n_bytes);
515
+ zeros(fout, GGML_PAD(n_bytes, GGUF_DEFAULT_ALIGNMENT) - n_bytes);
516
+ }
517
+
518
+ gguf_free(ctx_gguf);
519
+ ggml_free(ctx_meta);
520
+ f_input.close();
521
+ fprintf(stderr, "\033[3Ddone\n");
522
+ }
523
+
524
+ {
525
+ // go back to beginning of file and write the updated metadata
526
+ fout.seekp(0);
527
+ std::vector<uint8_t> data(gguf_get_meta_size(ctx_out));
528
+ gguf_get_meta_data(ctx_out, data.data());
529
+ fout.write((const char *)data.data(), data.size());
530
+
531
+ fout.close();
532
+ gguf_free(ctx_out);
533
+ }
534
+
535
+ fprintf(stderr, "%s: %s merged from %d split with %d tensors.\n",
536
+ __func__, split_params.output.c_str(), n_split, total_tensors);
537
+ }
538
+
539
+ int main(int argc, const char ** argv) {
540
+ split_params params;
541
+ split_params_parse(argc, argv, params);
542
+
543
+ switch (params.operation) {
544
+ case SPLIT_OP_SPLIT: gguf_split(params);
545
+ break;
546
+ case SPLIT_OP_MERGE: gguf_merge(params);
547
+ break;
548
+ default: split_print_usage(argv[0]);
549
+ exit(EXIT_FAILURE);
550
+ }
551
+
552
+ return 0;
553
+ }
@@ -0,0 +1,5 @@
1
+ set(TARGET gritlm)
2
+ add_executable(${TARGET} gritlm.cpp)
3
+ install(TARGETS ${TARGET} RUNTIME)
4
+ target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
5
+ target_compile_features(${TARGET} PRIVATE cxx_std_11)