@fugood/llama.node 0.0.1-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (204) hide show
  1. package/CMakeLists.txt +85 -0
  2. package/README.md +56 -0
  3. package/bin/darwin/arm64/llama-node.node +0 -0
  4. package/bin/darwin/x64/llama-node.node +0 -0
  5. package/bin/linux/arm64/llama-node.node +0 -0
  6. package/bin/linux/x64/llama-node.node +0 -0
  7. package/bin/win32/arm64/llama-node.node +0 -0
  8. package/bin/win32/arm64/node.lib +0 -0
  9. package/bin/win32/x64/llama-node.node +0 -0
  10. package/bin/win32/x64/node.lib +0 -0
  11. package/lib/binding.js +13 -0
  12. package/lib/binding.ts +57 -0
  13. package/lib/index.js +24 -0
  14. package/lib/index.ts +13 -0
  15. package/package.json +65 -0
  16. package/src/addons.cpp +506 -0
  17. package/src/llama.cpp/CMakeLists.txt +1320 -0
  18. package/src/llama.cpp/build.zig +172 -0
  19. package/src/llama.cpp/cmake/FindSIMD.cmake +100 -0
  20. package/src/llama.cpp/common/CMakeLists.txt +87 -0
  21. package/src/llama.cpp/common/base64.hpp +392 -0
  22. package/src/llama.cpp/common/common.cpp +2949 -0
  23. package/src/llama.cpp/common/common.h +324 -0
  24. package/src/llama.cpp/common/console.cpp +501 -0
  25. package/src/llama.cpp/common/console.h +19 -0
  26. package/src/llama.cpp/common/grammar-parser.cpp +440 -0
  27. package/src/llama.cpp/common/grammar-parser.h +29 -0
  28. package/src/llama.cpp/common/json-schema-to-grammar.cpp +764 -0
  29. package/src/llama.cpp/common/json-schema-to-grammar.h +4 -0
  30. package/src/llama.cpp/common/json.hpp +24766 -0
  31. package/src/llama.cpp/common/log.h +724 -0
  32. package/src/llama.cpp/common/ngram-cache.cpp +282 -0
  33. package/src/llama.cpp/common/ngram-cache.h +94 -0
  34. package/src/llama.cpp/common/sampling.cpp +353 -0
  35. package/src/llama.cpp/common/sampling.h +147 -0
  36. package/src/llama.cpp/common/stb_image.h +8396 -0
  37. package/src/llama.cpp/common/train.cpp +1513 -0
  38. package/src/llama.cpp/common/train.h +233 -0
  39. package/src/llama.cpp/examples/CMakeLists.txt +52 -0
  40. package/src/llama.cpp/examples/baby-llama/CMakeLists.txt +5 -0
  41. package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +1640 -0
  42. package/src/llama.cpp/examples/batched/CMakeLists.txt +5 -0
  43. package/src/llama.cpp/examples/batched/batched.cpp +262 -0
  44. package/src/llama.cpp/examples/batched-bench/CMakeLists.txt +5 -0
  45. package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +261 -0
  46. package/src/llama.cpp/examples/beam-search/CMakeLists.txt +5 -0
  47. package/src/llama.cpp/examples/beam-search/beam-search.cpp +188 -0
  48. package/src/llama.cpp/examples/benchmark/CMakeLists.txt +6 -0
  49. package/src/llama.cpp/examples/benchmark/benchmark-matmult.cpp +275 -0
  50. package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +5 -0
  51. package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +936 -0
  52. package/src/llama.cpp/examples/embedding/CMakeLists.txt +5 -0
  53. package/src/llama.cpp/examples/embedding/embedding.cpp +211 -0
  54. package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +9 -0
  55. package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +195 -0
  56. package/src/llama.cpp/examples/export-lora/CMakeLists.txt +5 -0
  57. package/src/llama.cpp/examples/export-lora/export-lora.cpp +462 -0
  58. package/src/llama.cpp/examples/finetune/CMakeLists.txt +5 -0
  59. package/src/llama.cpp/examples/finetune/finetune.cpp +1861 -0
  60. package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +5 -0
  61. package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +132 -0
  62. package/src/llama.cpp/examples/gguf/CMakeLists.txt +5 -0
  63. package/src/llama.cpp/examples/gguf/gguf.cpp +256 -0
  64. package/src/llama.cpp/examples/gguf-split/CMakeLists.txt +5 -0
  65. package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +553 -0
  66. package/src/llama.cpp/examples/gritlm/CMakeLists.txt +5 -0
  67. package/src/llama.cpp/examples/gritlm/gritlm.cpp +215 -0
  68. package/src/llama.cpp/examples/imatrix/CMakeLists.txt +5 -0
  69. package/src/llama.cpp/examples/imatrix/imatrix.cpp +655 -0
  70. package/src/llama.cpp/examples/infill/CMakeLists.txt +5 -0
  71. package/src/llama.cpp/examples/infill/infill.cpp +767 -0
  72. package/src/llama.cpp/examples/jeopardy/questions.txt +100 -0
  73. package/src/llama.cpp/examples/llama-bench/CMakeLists.txt +5 -0
  74. package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +1286 -0
  75. package/src/llama.cpp/examples/llama.android/app/src/main/cpp/CMakeLists.txt +50 -0
  76. package/src/llama.cpp/examples/llama.android/app/src/main/cpp/llama-android.cpp +443 -0
  77. package/src/llama.cpp/examples/llava/CMakeLists.txt +37 -0
  78. package/src/llama.cpp/examples/llava/clip.cpp +2027 -0
  79. package/src/llama.cpp/examples/llava/clip.h +85 -0
  80. package/src/llama.cpp/examples/llava/llava-cli.cpp +309 -0
  81. package/src/llama.cpp/examples/llava/llava.cpp +426 -0
  82. package/src/llama.cpp/examples/llava/llava.h +50 -0
  83. package/src/llama.cpp/examples/llava/requirements.txt +3 -0
  84. package/src/llama.cpp/examples/lookahead/CMakeLists.txt +5 -0
  85. package/src/llama.cpp/examples/lookahead/lookahead.cpp +485 -0
  86. package/src/llama.cpp/examples/lookup/CMakeLists.txt +23 -0
  87. package/src/llama.cpp/examples/lookup/lookup-create.cpp +41 -0
  88. package/src/llama.cpp/examples/lookup/lookup-merge.cpp +47 -0
  89. package/src/llama.cpp/examples/lookup/lookup-stats.cpp +160 -0
  90. package/src/llama.cpp/examples/lookup/lookup.cpp +258 -0
  91. package/src/llama.cpp/examples/main/CMakeLists.txt +5 -0
  92. package/src/llama.cpp/examples/main/main.cpp +957 -0
  93. package/src/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +33 -0
  94. package/src/llama.cpp/examples/parallel/CMakeLists.txt +5 -0
  95. package/src/llama.cpp/examples/parallel/parallel.cpp +427 -0
  96. package/src/llama.cpp/examples/passkey/CMakeLists.txt +5 -0
  97. package/src/llama.cpp/examples/passkey/passkey.cpp +302 -0
  98. package/src/llama.cpp/examples/perplexity/CMakeLists.txt +5 -0
  99. package/src/llama.cpp/examples/perplexity/perplexity.cpp +1943 -0
  100. package/src/llama.cpp/examples/quantize/CMakeLists.txt +6 -0
  101. package/src/llama.cpp/examples/quantize/quantize.cpp +423 -0
  102. package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +6 -0
  103. package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +424 -0
  104. package/src/llama.cpp/examples/retrieval/CMakeLists.txt +5 -0
  105. package/src/llama.cpp/examples/retrieval/retrieval.cpp +350 -0
  106. package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +5 -0
  107. package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +246 -0
  108. package/src/llama.cpp/examples/server/CMakeLists.txt +40 -0
  109. package/src/llama.cpp/examples/server/bench/requirements.txt +2 -0
  110. package/src/llama.cpp/examples/server/httplib.h +9465 -0
  111. package/src/llama.cpp/examples/server/server.cpp +3826 -0
  112. package/src/llama.cpp/examples/server/tests/requirements.txt +6 -0
  113. package/src/llama.cpp/examples/server/utils.hpp +653 -0
  114. package/src/llama.cpp/examples/simple/CMakeLists.txt +5 -0
  115. package/src/llama.cpp/examples/simple/simple.cpp +183 -0
  116. package/src/llama.cpp/examples/speculative/CMakeLists.txt +5 -0
  117. package/src/llama.cpp/examples/speculative/speculative.cpp +614 -0
  118. package/src/llama.cpp/examples/sycl/CMakeLists.txt +9 -0
  119. package/src/llama.cpp/examples/sycl/ls-sycl-device.cpp +13 -0
  120. package/src/llama.cpp/examples/tokenize/CMakeLists.txt +5 -0
  121. package/src/llama.cpp/examples/tokenize/tokenize.cpp +42 -0
  122. package/src/llama.cpp/examples/train-text-from-scratch/CMakeLists.txt +5 -0
  123. package/src/llama.cpp/examples/train-text-from-scratch/train-text-from-scratch.cpp +1252 -0
  124. package/src/llama.cpp/ggml-alloc.c +985 -0
  125. package/src/llama.cpp/ggml-alloc.h +76 -0
  126. package/src/llama.cpp/ggml-backend-impl.h +141 -0
  127. package/src/llama.cpp/ggml-backend.c +2099 -0
  128. package/src/llama.cpp/ggml-backend.h +233 -0
  129. package/src/llama.cpp/ggml-common.h +1853 -0
  130. package/src/llama.cpp/ggml-cuda.h +43 -0
  131. package/src/llama.cpp/ggml-impl.h +265 -0
  132. package/src/llama.cpp/ggml-kompute.cpp +2006 -0
  133. package/src/llama.cpp/ggml-kompute.h +46 -0
  134. package/src/llama.cpp/ggml-metal.h +66 -0
  135. package/src/llama.cpp/ggml-mpi.c +216 -0
  136. package/src/llama.cpp/ggml-mpi.h +39 -0
  137. package/src/llama.cpp/ggml-opencl.cpp +2301 -0
  138. package/src/llama.cpp/ggml-opencl.h +36 -0
  139. package/src/llama.cpp/ggml-quants.c +12678 -0
  140. package/src/llama.cpp/ggml-quants.h +133 -0
  141. package/src/llama.cpp/ggml-sycl.cpp +17882 -0
  142. package/src/llama.cpp/ggml-sycl.h +49 -0
  143. package/src/llama.cpp/ggml-vulkan-shaders.hpp +69849 -0
  144. package/src/llama.cpp/ggml-vulkan.cpp +6442 -0
  145. package/src/llama.cpp/ggml-vulkan.h +29 -0
  146. package/src/llama.cpp/ggml.c +21819 -0
  147. package/src/llama.cpp/ggml.h +2403 -0
  148. package/src/llama.cpp/llama.cpp +17468 -0
  149. package/src/llama.cpp/llama.h +1117 -0
  150. package/src/llama.cpp/pocs/CMakeLists.txt +12 -0
  151. package/src/llama.cpp/pocs/vdot/CMakeLists.txt +9 -0
  152. package/src/llama.cpp/pocs/vdot/q8dot.cpp +172 -0
  153. package/src/llama.cpp/pocs/vdot/vdot.cpp +310 -0
  154. package/src/llama.cpp/prompts/LLM-questions.txt +49 -0
  155. package/src/llama.cpp/prompts/alpaca.txt +1 -0
  156. package/src/llama.cpp/prompts/assistant.txt +31 -0
  157. package/src/llama.cpp/prompts/chat-with-baichuan.txt +4 -0
  158. package/src/llama.cpp/prompts/chat-with-bob.txt +7 -0
  159. package/src/llama.cpp/prompts/chat-with-qwen.txt +1 -0
  160. package/src/llama.cpp/prompts/chat-with-vicuna-v0.txt +7 -0
  161. package/src/llama.cpp/prompts/chat-with-vicuna-v1.txt +7 -0
  162. package/src/llama.cpp/prompts/chat.txt +28 -0
  163. package/src/llama.cpp/prompts/dan-modified.txt +1 -0
  164. package/src/llama.cpp/prompts/dan.txt +1 -0
  165. package/src/llama.cpp/prompts/mnemonics.txt +93 -0
  166. package/src/llama.cpp/prompts/parallel-questions.txt +43 -0
  167. package/src/llama.cpp/prompts/reason-act.txt +18 -0
  168. package/src/llama.cpp/requirements/requirements-convert-hf-to-gguf.txt +3 -0
  169. package/src/llama.cpp/requirements/requirements-convert-llama-ggml-to-gguf.txt +1 -0
  170. package/src/llama.cpp/requirements/requirements-convert-lora-to-ggml.txt +2 -0
  171. package/src/llama.cpp/requirements/requirements-convert-persimmon-to-gguf.txt +2 -0
  172. package/src/llama.cpp/requirements/requirements-convert.txt +5 -0
  173. package/src/llama.cpp/requirements.txt +12 -0
  174. package/src/llama.cpp/scripts/gen-build-info-cpp.cmake +24 -0
  175. package/src/llama.cpp/scripts/xxd.cmake +16 -0
  176. package/src/llama.cpp/sgemm.cpp +999 -0
  177. package/src/llama.cpp/sgemm.h +12 -0
  178. package/src/llama.cpp/tests/CMakeLists.txt +78 -0
  179. package/src/llama.cpp/tests/get-model.cpp +21 -0
  180. package/src/llama.cpp/tests/get-model.h +2 -0
  181. package/src/llama.cpp/tests/test-autorelease.cpp +24 -0
  182. package/src/llama.cpp/tests/test-backend-ops.cpp +2266 -0
  183. package/src/llama.cpp/tests/test-c.c +7 -0
  184. package/src/llama.cpp/tests/test-chat-template.cpp +107 -0
  185. package/src/llama.cpp/tests/test-double-float.cpp +57 -0
  186. package/src/llama.cpp/tests/test-grad0.cpp +1606 -0
  187. package/src/llama.cpp/tests/test-grammar-integration.cpp +243 -0
  188. package/src/llama.cpp/tests/test-grammar-parser.cpp +250 -0
  189. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +899 -0
  190. package/src/llama.cpp/tests/test-llama-grammar.cpp +402 -0
  191. package/src/llama.cpp/tests/test-model-load-cancel.cpp +27 -0
  192. package/src/llama.cpp/tests/test-opt.cpp +181 -0
  193. package/src/llama.cpp/tests/test-quantize-fns.cpp +185 -0
  194. package/src/llama.cpp/tests/test-quantize-perf.cpp +363 -0
  195. package/src/llama.cpp/tests/test-rope.cpp +221 -0
  196. package/src/llama.cpp/tests/test-sampling.cpp +301 -0
  197. package/src/llama.cpp/tests/test-tokenizer-0-falcon.cpp +187 -0
  198. package/src/llama.cpp/tests/test-tokenizer-0-llama.cpp +190 -0
  199. package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +123 -0
  200. package/src/llama.cpp/tests/test-tokenizer-1-llama.cpp +111 -0
  201. package/src/llama.cpp/unicode-data.cpp +1651 -0
  202. package/src/llama.cpp/unicode-data.h +16 -0
  203. package/src/llama.cpp/unicode.cpp +277 -0
  204. package/src/llama.cpp/unicode.h +28 -0
@@ -0,0 +1,5 @@
1
+ set(TARGET gbnf-validator)
2
+ add_executable(${TARGET} gbnf-validator.cpp)
3
+ install(TARGETS ${TARGET} RUNTIME)
4
+ target_link_libraries(${TARGET} PRIVATE common grammar-parser llama ${CMAKE_THREAD_LIBS_INIT})
5
+ target_compile_features(${TARGET} PRIVATE cxx_std_11)
@@ -0,0 +1,132 @@
1
+ #define LLAMA_API_INTERNAL
2
+
3
+ #include "grammar-parser.h"
4
+ #include "ggml.h"
5
+ #include "llama.h"
6
+ #include "unicode.h"
7
+
8
+ #include <cstdio>
9
+ #include <cstdlib>
10
+ #include <string>
11
+ #include <vector>
12
+
13
+ static bool llama_sample_grammar_string(struct llama_grammar * grammar, const std::string & input_str, size_t & error_pos, std::string & error_msg) {
14
+ auto decoded = decode_utf8(input_str, {});
15
+ const auto & code_points = decoded.first;
16
+
17
+ size_t pos = 0;
18
+ for (auto it = code_points.begin(), end = code_points.end() - 1; it != end; ++it) {
19
+ auto prev_stacks = grammar->stacks;
20
+ llama_grammar_accept(grammar->rules, prev_stacks, *it, grammar->stacks);
21
+ if (grammar->stacks.empty()) {
22
+ error_pos = pos;
23
+ error_msg = "Unexpected character '" + unicode_cpt_to_utf8(*it) + "'";
24
+ grammar->stacks = prev_stacks;
25
+ return false;
26
+ }
27
+ ++pos;
28
+ }
29
+
30
+ for (const auto & stack : grammar->stacks) {
31
+ if (stack.empty()) {
32
+ return true;
33
+ }
34
+ }
35
+
36
+ error_pos = pos;
37
+ error_msg = "Unexpected end of input";
38
+ return false;
39
+ }
40
+
41
+ static void print_error_message(const std::string & input_str, size_t error_pos, const std::string & error_msg) {
42
+ fprintf(stdout, "Input string is invalid according to the grammar.\n");
43
+ fprintf(stdout, "Error: %s at position %zu\n", error_msg.c_str(), error_pos);
44
+ fprintf(stdout, "\n");
45
+ fprintf(stdout, "Input string:\n");
46
+ fprintf(stdout, "%s", input_str.substr(0, error_pos).c_str());
47
+ if (error_pos < input_str.size()) {
48
+ fprintf(stdout, "\033[1;31m%c", input_str[error_pos]);
49
+ if (error_pos+1 < input_str.size()) {
50
+ fprintf(stdout, "\033[0;31m%s", input_str.substr(error_pos+1).c_str());
51
+ }
52
+ fprintf(stdout, "\033[0m\n");
53
+ }
54
+ }
55
+
56
+ int main(int argc, char** argv) {
57
+ if (argc != 3) {
58
+ fprintf(stdout, "Usage: %s <grammar_filename> <input_filename>\n", argv[0]);
59
+ return 1;
60
+ }
61
+
62
+ const std::string grammar_filename = argv[1];
63
+ const std::string input_filename = argv[2];
64
+
65
+ // Read the GBNF grammar file
66
+ FILE* grammar_file = fopen(grammar_filename.c_str(), "r");
67
+ if (!grammar_file) {
68
+ fprintf(stdout, "Failed to open grammar file: %s\n", grammar_filename.c_str());
69
+ return 1;
70
+ }
71
+
72
+ fseek(grammar_file, 0, SEEK_END);
73
+ size_t grammar_size = ftell(grammar_file);
74
+ fseek(grammar_file, 0, SEEK_SET);
75
+
76
+ std::string grammar_str(grammar_size, ' ');
77
+ fread(&grammar_str[0], 1, grammar_size, grammar_file);
78
+ fclose(grammar_file);
79
+
80
+ // Parse the GBNF grammar
81
+ auto parsed_grammar = grammar_parser::parse(grammar_str.c_str());
82
+
83
+ // will be empty (default) if there are parse errors
84
+ if (parsed_grammar.rules.empty()) {
85
+ fprintf(stdout, "%s: failed to parse grammar\n", __func__);
86
+ return 1;
87
+ }
88
+
89
+ // Ensure that there is a "root" node.
90
+ if (parsed_grammar.symbol_ids.find("root") == parsed_grammar.symbol_ids.end()) {
91
+ fprintf(stdout, "%s: grammar does not contain a 'root' symbol\n", __func__);
92
+ return 1;
93
+ }
94
+
95
+ std::vector<const llama_grammar_element *> grammar_rules(parsed_grammar.c_rules());
96
+
97
+ // Create the LLAMA grammar
98
+ auto grammar = llama_grammar_init(
99
+ grammar_rules.data(),
100
+ grammar_rules.size(), parsed_grammar.symbol_ids.at("root"));
101
+
102
+ // Read the input file
103
+ FILE* input_file = fopen(input_filename.c_str(), "r");
104
+ if (!input_file) {
105
+ fprintf(stdout, "Failed to open input file: %s\n", input_filename.c_str());
106
+ return 1;
107
+ }
108
+
109
+ fseek(input_file, 0, SEEK_END);
110
+ size_t input_size = ftell(input_file);
111
+ fseek(input_file, 0, SEEK_SET);
112
+
113
+ std::string input_str(input_size, ' ');
114
+ fread(&input_str[0], 1, input_size, input_file);
115
+ fclose(input_file);
116
+
117
+ // Validate the input string against the grammar
118
+ size_t error_pos;
119
+ std::string error_msg;
120
+ bool is_valid = llama_sample_grammar_string(grammar, input_str, error_pos, error_msg);
121
+
122
+ if (is_valid) {
123
+ fprintf(stdout, "Input string is valid according to the grammar.\n");
124
+ } else {
125
+ print_error_message(input_str, error_pos, error_msg);
126
+ }
127
+
128
+ // Clean up
129
+ llama_grammar_free(grammar);
130
+
131
+ return 0;
132
+ }
@@ -0,0 +1,5 @@
1
+ set(TARGET gguf)
2
+ add_executable(${TARGET} gguf.cpp)
3
+ install(TARGETS ${TARGET} RUNTIME)
4
+ target_link_libraries(${TARGET} PRIVATE ggml ${CMAKE_THREAD_LIBS_INIT})
5
+ target_compile_features(${TARGET} PRIVATE cxx_std_11)
@@ -0,0 +1,256 @@
1
+ #include "ggml.h"
2
+
3
+ #include <cstdio>
4
+ #include <cinttypes>
5
+ #include <string>
6
+ #include <sstream>
7
+ #include <fstream>
8
+ #include <vector>
9
+
10
+ #undef MIN
11
+ #undef MAX
12
+ #define MIN(a, b) ((a) < (b) ? (a) : (b))
13
+ #define MAX(a, b) ((a) > (b) ? (a) : (b))
14
+
15
+ template <typename T>
16
+ static std::string to_string(const T & val) {
17
+ std::stringstream ss;
18
+ ss << val;
19
+ return ss.str();
20
+ }
21
+
22
+ static bool gguf_ex_write(const std::string & fname) {
23
+ struct gguf_context * ctx = gguf_init_empty();
24
+
25
+ gguf_set_val_u8 (ctx, "some.parameter.uint8", 0x12);
26
+ gguf_set_val_i8 (ctx, "some.parameter.int8", -0x13);
27
+ gguf_set_val_u16 (ctx, "some.parameter.uint16", 0x1234);
28
+ gguf_set_val_i16 (ctx, "some.parameter.int16", -0x1235);
29
+ gguf_set_val_u32 (ctx, "some.parameter.uint32", 0x12345678);
30
+ gguf_set_val_i32 (ctx, "some.parameter.int32", -0x12345679);
31
+ gguf_set_val_f32 (ctx, "some.parameter.float32", 0.123456789f);
32
+ gguf_set_val_u64 (ctx, "some.parameter.uint64", 0x123456789abcdef0ull);
33
+ gguf_set_val_i64 (ctx, "some.parameter.int64", -0x123456789abcdef1ll);
34
+ gguf_set_val_f64 (ctx, "some.parameter.float64", 0.1234567890123456789);
35
+ gguf_set_val_bool(ctx, "some.parameter.bool", true);
36
+ gguf_set_val_str (ctx, "some.parameter.string", "hello world");
37
+
38
+ gguf_set_arr_data(ctx, "some.parameter.arr.i16", GGUF_TYPE_INT16, std::vector<int16_t>{ 1, 2, 3, 4, }.data(), 4);
39
+ gguf_set_arr_data(ctx, "some.parameter.arr.f32", GGUF_TYPE_FLOAT32, std::vector<float>{ 3.145f, 2.718f, 1.414f, }.data(), 3);
40
+ gguf_set_arr_str (ctx, "some.parameter.arr.str", std::vector<const char *>{ "hello", "world", "!" }.data(), 3);
41
+
42
+ struct ggml_init_params params = {
43
+ /*.mem_size =*/ 128ull*1024ull*1024ull,
44
+ /*.mem_buffer =*/ NULL,
45
+ /*.no_alloc =*/ false,
46
+ };
47
+
48
+ struct ggml_context * ctx_data = ggml_init(params);
49
+
50
+ const int n_tensors = 10;
51
+
52
+ // tensor infos
53
+ for (int i = 0; i < n_tensors; ++i) {
54
+ const std::string name = "tensor_" + to_string(i);
55
+
56
+ int64_t ne[GGML_MAX_DIMS] = { 1 };
57
+ int32_t n_dims = rand() % GGML_MAX_DIMS + 1;
58
+
59
+ for (int j = 0; j < n_dims; ++j) {
60
+ ne[j] = rand() % 10 + 1;
61
+ }
62
+
63
+ struct ggml_tensor * cur = ggml_new_tensor(ctx_data, GGML_TYPE_F32, n_dims, ne);
64
+ ggml_set_name(cur, name.c_str());
65
+
66
+ {
67
+ float * data = (float *) cur->data;
68
+ for (int j = 0; j < ggml_nelements(cur); ++j) {
69
+ data[j] = 100 + i;
70
+ }
71
+ }
72
+
73
+ gguf_add_tensor(ctx, cur);
74
+ }
75
+
76
+ gguf_write_to_file(ctx, fname.c_str(), false);
77
+
78
+ printf("%s: wrote file '%s;\n", __func__, fname.c_str());
79
+
80
+ ggml_free(ctx_data);
81
+ gguf_free(ctx);
82
+
83
+ return true;
84
+ }
85
+
86
+ // just read tensor info
87
+ static bool gguf_ex_read_0(const std::string & fname) {
88
+ struct gguf_init_params params = {
89
+ /*.no_alloc = */ false,
90
+ /*.ctx = */ NULL,
91
+ };
92
+
93
+ struct gguf_context * ctx = gguf_init_from_file(fname.c_str(), params);
94
+
95
+ printf("%s: version: %d\n", __func__, gguf_get_version(ctx));
96
+ printf("%s: alignment: %zu\n", __func__, gguf_get_alignment(ctx));
97
+ printf("%s: data offset: %zu\n", __func__, gguf_get_data_offset(ctx));
98
+
99
+ // kv
100
+ {
101
+ const int n_kv = gguf_get_n_kv(ctx);
102
+
103
+ printf("%s: n_kv: %d\n", __func__, n_kv);
104
+
105
+ for (int i = 0; i < n_kv; ++i) {
106
+ const char * key = gguf_get_key(ctx, i);
107
+
108
+ printf("%s: kv[%d]: key = %s\n", __func__, i, key);
109
+ }
110
+ }
111
+
112
+ // find kv string
113
+ {
114
+ const char * findkey = "some.parameter.string";
115
+
116
+ const int keyidx = gguf_find_key(ctx, findkey);
117
+ if (keyidx == -1) {
118
+ printf("%s: find key: %s not found.\n", __func__, findkey);
119
+ } else {
120
+ const char * key_value = gguf_get_val_str(ctx, keyidx);
121
+ printf("%s: find key: %s found, kv[%d] value = %s\n", __func__, findkey, keyidx, key_value);
122
+ }
123
+ }
124
+
125
+ // tensor info
126
+ {
127
+ const int n_tensors = gguf_get_n_tensors(ctx);
128
+
129
+ printf("%s: n_tensors: %d\n", __func__, n_tensors);
130
+
131
+ for (int i = 0; i < n_tensors; ++i) {
132
+ const char * name = gguf_get_tensor_name (ctx, i);
133
+ const size_t offset = gguf_get_tensor_offset(ctx, i);
134
+
135
+ printf("%s: tensor[%d]: name = %s, offset = %zu\n", __func__, i, name, offset);
136
+ }
137
+ }
138
+
139
+ gguf_free(ctx);
140
+
141
+ return true;
142
+ }
143
+
144
+ // read and create ggml_context containing the tensors and their data
145
+ static bool gguf_ex_read_1(const std::string & fname, bool check_data) {
146
+ struct ggml_context * ctx_data = NULL;
147
+
148
+ struct gguf_init_params params = {
149
+ /*.no_alloc = */ false,
150
+ /*.ctx = */ &ctx_data,
151
+ };
152
+
153
+ struct gguf_context * ctx = gguf_init_from_file(fname.c_str(), params);
154
+
155
+ printf("%s: version: %d\n", __func__, gguf_get_version(ctx));
156
+ printf("%s: alignment: %zu\n", __func__, gguf_get_alignment(ctx));
157
+ printf("%s: data offset: %zu\n", __func__, gguf_get_data_offset(ctx));
158
+
159
+ // kv
160
+ {
161
+ const int n_kv = gguf_get_n_kv(ctx);
162
+
163
+ printf("%s: n_kv: %d\n", __func__, n_kv);
164
+
165
+ for (int i = 0; i < n_kv; ++i) {
166
+ const char * key = gguf_get_key(ctx, i);
167
+
168
+ printf("%s: kv[%d]: key = %s\n", __func__, i, key);
169
+ }
170
+ }
171
+
172
+ // tensor info
173
+ {
174
+ const int n_tensors = gguf_get_n_tensors(ctx);
175
+
176
+ printf("%s: n_tensors: %d\n", __func__, n_tensors);
177
+
178
+ for (int i = 0; i < n_tensors; ++i) {
179
+ const char * name = gguf_get_tensor_name (ctx, i);
180
+ const size_t offset = gguf_get_tensor_offset(ctx, i);
181
+
182
+ printf("%s: tensor[%d]: name = %s, offset = %zu\n", __func__, i, name, offset);
183
+ }
184
+ }
185
+
186
+ // data
187
+ {
188
+ const int n_tensors = gguf_get_n_tensors(ctx);
189
+
190
+ for (int i = 0; i < n_tensors; ++i) {
191
+ printf("%s: reading tensor %d data\n", __func__, i);
192
+
193
+ const char * name = gguf_get_tensor_name(ctx, i);
194
+
195
+ struct ggml_tensor * cur = ggml_get_tensor(ctx_data, name);
196
+
197
+ printf("%s: tensor[%d]: n_dims = %d, name = %s, data = %p\n", __func__, i, ggml_n_dims(cur), cur->name, cur->data);
198
+
199
+ // print first 10 elements
200
+ const float * data = (const float *) cur->data;
201
+
202
+ printf("%s data[:10] : ", name);
203
+ for (int j = 0; j < MIN(10, ggml_nelements(cur)); ++j) {
204
+ printf("%f ", data[j]);
205
+ }
206
+ printf("\n\n");
207
+
208
+ // check data
209
+ if (check_data) {
210
+ const float * data = (const float *) cur->data;
211
+ for (int j = 0; j < ggml_nelements(cur); ++j) {
212
+ if (data[j] != 100 + i) {
213
+ fprintf(stderr, "%s: tensor[%d]: data[%d] = %f\n", __func__, i, j, data[j]);
214
+ gguf_free(ctx);
215
+ return false;
216
+ }
217
+ }
218
+ }
219
+ }
220
+ }
221
+
222
+ printf("%s: ctx_data size: %zu\n", __func__, ggml_get_mem_size(ctx_data));
223
+
224
+ ggml_free(ctx_data);
225
+ gguf_free(ctx);
226
+
227
+ return true;
228
+ }
229
+
230
+ int main(int argc, char ** argv) {
231
+ if (argc < 3) {
232
+ printf("usage: %s data.gguf r|w [n]\n", argv[0]);
233
+ printf("r: read data.gguf file\n");
234
+ printf("w: write data.gguf file\n");
235
+ printf("n: no check of tensor data\n");
236
+ return -1;
237
+ }
238
+ bool check_data = true;
239
+ if (argc == 4) {
240
+ check_data = false;
241
+ }
242
+
243
+ const std::string fname(argv[1]);
244
+ const std::string mode (argv[2]);
245
+
246
+ GGML_ASSERT((mode == "r" || mode == "w") && "mode must be r or w");
247
+
248
+ if (mode == "w") {
249
+ GGML_ASSERT(gguf_ex_write(fname) && "failed to write gguf file");
250
+ } else if (mode == "r") {
251
+ GGML_ASSERT(gguf_ex_read_0(fname) && "failed to read gguf file");
252
+ GGML_ASSERT(gguf_ex_read_1(fname, check_data) && "failed to read gguf file");
253
+ }
254
+
255
+ return 0;
256
+ }
@@ -0,0 +1,5 @@
1
+ set(TARGET gguf-split)
2
+ add_executable(${TARGET} gguf-split.cpp)
3
+ install(TARGETS ${TARGET} RUNTIME)
4
+ target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
5
+ target_compile_features(${TARGET} PRIVATE cxx_std_11)