@fugood/llama.node 0.0.1-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (204) hide show
  1. package/CMakeLists.txt +85 -0
  2. package/README.md +56 -0
  3. package/bin/darwin/arm64/llama-node.node +0 -0
  4. package/bin/darwin/x64/llama-node.node +0 -0
  5. package/bin/linux/arm64/llama-node.node +0 -0
  6. package/bin/linux/x64/llama-node.node +0 -0
  7. package/bin/win32/arm64/llama-node.node +0 -0
  8. package/bin/win32/arm64/node.lib +0 -0
  9. package/bin/win32/x64/llama-node.node +0 -0
  10. package/bin/win32/x64/node.lib +0 -0
  11. package/lib/binding.js +13 -0
  12. package/lib/binding.ts +57 -0
  13. package/lib/index.js +24 -0
  14. package/lib/index.ts +13 -0
  15. package/package.json +65 -0
  16. package/src/addons.cpp +506 -0
  17. package/src/llama.cpp/CMakeLists.txt +1320 -0
  18. package/src/llama.cpp/build.zig +172 -0
  19. package/src/llama.cpp/cmake/FindSIMD.cmake +100 -0
  20. package/src/llama.cpp/common/CMakeLists.txt +87 -0
  21. package/src/llama.cpp/common/base64.hpp +392 -0
  22. package/src/llama.cpp/common/common.cpp +2949 -0
  23. package/src/llama.cpp/common/common.h +324 -0
  24. package/src/llama.cpp/common/console.cpp +501 -0
  25. package/src/llama.cpp/common/console.h +19 -0
  26. package/src/llama.cpp/common/grammar-parser.cpp +440 -0
  27. package/src/llama.cpp/common/grammar-parser.h +29 -0
  28. package/src/llama.cpp/common/json-schema-to-grammar.cpp +764 -0
  29. package/src/llama.cpp/common/json-schema-to-grammar.h +4 -0
  30. package/src/llama.cpp/common/json.hpp +24766 -0
  31. package/src/llama.cpp/common/log.h +724 -0
  32. package/src/llama.cpp/common/ngram-cache.cpp +282 -0
  33. package/src/llama.cpp/common/ngram-cache.h +94 -0
  34. package/src/llama.cpp/common/sampling.cpp +353 -0
  35. package/src/llama.cpp/common/sampling.h +147 -0
  36. package/src/llama.cpp/common/stb_image.h +8396 -0
  37. package/src/llama.cpp/common/train.cpp +1513 -0
  38. package/src/llama.cpp/common/train.h +233 -0
  39. package/src/llama.cpp/examples/CMakeLists.txt +52 -0
  40. package/src/llama.cpp/examples/baby-llama/CMakeLists.txt +5 -0
  41. package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +1640 -0
  42. package/src/llama.cpp/examples/batched/CMakeLists.txt +5 -0
  43. package/src/llama.cpp/examples/batched/batched.cpp +262 -0
  44. package/src/llama.cpp/examples/batched-bench/CMakeLists.txt +5 -0
  45. package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +261 -0
  46. package/src/llama.cpp/examples/beam-search/CMakeLists.txt +5 -0
  47. package/src/llama.cpp/examples/beam-search/beam-search.cpp +188 -0
  48. package/src/llama.cpp/examples/benchmark/CMakeLists.txt +6 -0
  49. package/src/llama.cpp/examples/benchmark/benchmark-matmult.cpp +275 -0
  50. package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +5 -0
  51. package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +936 -0
  52. package/src/llama.cpp/examples/embedding/CMakeLists.txt +5 -0
  53. package/src/llama.cpp/examples/embedding/embedding.cpp +211 -0
  54. package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +9 -0
  55. package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +195 -0
  56. package/src/llama.cpp/examples/export-lora/CMakeLists.txt +5 -0
  57. package/src/llama.cpp/examples/export-lora/export-lora.cpp +462 -0
  58. package/src/llama.cpp/examples/finetune/CMakeLists.txt +5 -0
  59. package/src/llama.cpp/examples/finetune/finetune.cpp +1861 -0
  60. package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +5 -0
  61. package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +132 -0
  62. package/src/llama.cpp/examples/gguf/CMakeLists.txt +5 -0
  63. package/src/llama.cpp/examples/gguf/gguf.cpp +256 -0
  64. package/src/llama.cpp/examples/gguf-split/CMakeLists.txt +5 -0
  65. package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +553 -0
  66. package/src/llama.cpp/examples/gritlm/CMakeLists.txt +5 -0
  67. package/src/llama.cpp/examples/gritlm/gritlm.cpp +215 -0
  68. package/src/llama.cpp/examples/imatrix/CMakeLists.txt +5 -0
  69. package/src/llama.cpp/examples/imatrix/imatrix.cpp +655 -0
  70. package/src/llama.cpp/examples/infill/CMakeLists.txt +5 -0
  71. package/src/llama.cpp/examples/infill/infill.cpp +767 -0
  72. package/src/llama.cpp/examples/jeopardy/questions.txt +100 -0
  73. package/src/llama.cpp/examples/llama-bench/CMakeLists.txt +5 -0
  74. package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +1286 -0
  75. package/src/llama.cpp/examples/llama.android/app/src/main/cpp/CMakeLists.txt +50 -0
  76. package/src/llama.cpp/examples/llama.android/app/src/main/cpp/llama-android.cpp +443 -0
  77. package/src/llama.cpp/examples/llava/CMakeLists.txt +37 -0
  78. package/src/llama.cpp/examples/llava/clip.cpp +2027 -0
  79. package/src/llama.cpp/examples/llava/clip.h +85 -0
  80. package/src/llama.cpp/examples/llava/llava-cli.cpp +309 -0
  81. package/src/llama.cpp/examples/llava/llava.cpp +426 -0
  82. package/src/llama.cpp/examples/llava/llava.h +50 -0
  83. package/src/llama.cpp/examples/llava/requirements.txt +3 -0
  84. package/src/llama.cpp/examples/lookahead/CMakeLists.txt +5 -0
  85. package/src/llama.cpp/examples/lookahead/lookahead.cpp +485 -0
  86. package/src/llama.cpp/examples/lookup/CMakeLists.txt +23 -0
  87. package/src/llama.cpp/examples/lookup/lookup-create.cpp +41 -0
  88. package/src/llama.cpp/examples/lookup/lookup-merge.cpp +47 -0
  89. package/src/llama.cpp/examples/lookup/lookup-stats.cpp +160 -0
  90. package/src/llama.cpp/examples/lookup/lookup.cpp +258 -0
  91. package/src/llama.cpp/examples/main/CMakeLists.txt +5 -0
  92. package/src/llama.cpp/examples/main/main.cpp +957 -0
  93. package/src/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +33 -0
  94. package/src/llama.cpp/examples/parallel/CMakeLists.txt +5 -0
  95. package/src/llama.cpp/examples/parallel/parallel.cpp +427 -0
  96. package/src/llama.cpp/examples/passkey/CMakeLists.txt +5 -0
  97. package/src/llama.cpp/examples/passkey/passkey.cpp +302 -0
  98. package/src/llama.cpp/examples/perplexity/CMakeLists.txt +5 -0
  99. package/src/llama.cpp/examples/perplexity/perplexity.cpp +1943 -0
  100. package/src/llama.cpp/examples/quantize/CMakeLists.txt +6 -0
  101. package/src/llama.cpp/examples/quantize/quantize.cpp +423 -0
  102. package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +6 -0
  103. package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +424 -0
  104. package/src/llama.cpp/examples/retrieval/CMakeLists.txt +5 -0
  105. package/src/llama.cpp/examples/retrieval/retrieval.cpp +350 -0
  106. package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +5 -0
  107. package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +246 -0
  108. package/src/llama.cpp/examples/server/CMakeLists.txt +40 -0
  109. package/src/llama.cpp/examples/server/bench/requirements.txt +2 -0
  110. package/src/llama.cpp/examples/server/httplib.h +9465 -0
  111. package/src/llama.cpp/examples/server/server.cpp +3826 -0
  112. package/src/llama.cpp/examples/server/tests/requirements.txt +6 -0
  113. package/src/llama.cpp/examples/server/utils.hpp +653 -0
  114. package/src/llama.cpp/examples/simple/CMakeLists.txt +5 -0
  115. package/src/llama.cpp/examples/simple/simple.cpp +183 -0
  116. package/src/llama.cpp/examples/speculative/CMakeLists.txt +5 -0
  117. package/src/llama.cpp/examples/speculative/speculative.cpp +614 -0
  118. package/src/llama.cpp/examples/sycl/CMakeLists.txt +9 -0
  119. package/src/llama.cpp/examples/sycl/ls-sycl-device.cpp +13 -0
  120. package/src/llama.cpp/examples/tokenize/CMakeLists.txt +5 -0
  121. package/src/llama.cpp/examples/tokenize/tokenize.cpp +42 -0
  122. package/src/llama.cpp/examples/train-text-from-scratch/CMakeLists.txt +5 -0
  123. package/src/llama.cpp/examples/train-text-from-scratch/train-text-from-scratch.cpp +1252 -0
  124. package/src/llama.cpp/ggml-alloc.c +985 -0
  125. package/src/llama.cpp/ggml-alloc.h +76 -0
  126. package/src/llama.cpp/ggml-backend-impl.h +141 -0
  127. package/src/llama.cpp/ggml-backend.c +2099 -0
  128. package/src/llama.cpp/ggml-backend.h +233 -0
  129. package/src/llama.cpp/ggml-common.h +1853 -0
  130. package/src/llama.cpp/ggml-cuda.h +43 -0
  131. package/src/llama.cpp/ggml-impl.h +265 -0
  132. package/src/llama.cpp/ggml-kompute.cpp +2006 -0
  133. package/src/llama.cpp/ggml-kompute.h +46 -0
  134. package/src/llama.cpp/ggml-metal.h +66 -0
  135. package/src/llama.cpp/ggml-mpi.c +216 -0
  136. package/src/llama.cpp/ggml-mpi.h +39 -0
  137. package/src/llama.cpp/ggml-opencl.cpp +2301 -0
  138. package/src/llama.cpp/ggml-opencl.h +36 -0
  139. package/src/llama.cpp/ggml-quants.c +12678 -0
  140. package/src/llama.cpp/ggml-quants.h +133 -0
  141. package/src/llama.cpp/ggml-sycl.cpp +17882 -0
  142. package/src/llama.cpp/ggml-sycl.h +49 -0
  143. package/src/llama.cpp/ggml-vulkan-shaders.hpp +69849 -0
  144. package/src/llama.cpp/ggml-vulkan.cpp +6442 -0
  145. package/src/llama.cpp/ggml-vulkan.h +29 -0
  146. package/src/llama.cpp/ggml.c +21819 -0
  147. package/src/llama.cpp/ggml.h +2403 -0
  148. package/src/llama.cpp/llama.cpp +17468 -0
  149. package/src/llama.cpp/llama.h +1117 -0
  150. package/src/llama.cpp/pocs/CMakeLists.txt +12 -0
  151. package/src/llama.cpp/pocs/vdot/CMakeLists.txt +9 -0
  152. package/src/llama.cpp/pocs/vdot/q8dot.cpp +172 -0
  153. package/src/llama.cpp/pocs/vdot/vdot.cpp +310 -0
  154. package/src/llama.cpp/prompts/LLM-questions.txt +49 -0
  155. package/src/llama.cpp/prompts/alpaca.txt +1 -0
  156. package/src/llama.cpp/prompts/assistant.txt +31 -0
  157. package/src/llama.cpp/prompts/chat-with-baichuan.txt +4 -0
  158. package/src/llama.cpp/prompts/chat-with-bob.txt +7 -0
  159. package/src/llama.cpp/prompts/chat-with-qwen.txt +1 -0
  160. package/src/llama.cpp/prompts/chat-with-vicuna-v0.txt +7 -0
  161. package/src/llama.cpp/prompts/chat-with-vicuna-v1.txt +7 -0
  162. package/src/llama.cpp/prompts/chat.txt +28 -0
  163. package/src/llama.cpp/prompts/dan-modified.txt +1 -0
  164. package/src/llama.cpp/prompts/dan.txt +1 -0
  165. package/src/llama.cpp/prompts/mnemonics.txt +93 -0
  166. package/src/llama.cpp/prompts/parallel-questions.txt +43 -0
  167. package/src/llama.cpp/prompts/reason-act.txt +18 -0
  168. package/src/llama.cpp/requirements/requirements-convert-hf-to-gguf.txt +3 -0
  169. package/src/llama.cpp/requirements/requirements-convert-llama-ggml-to-gguf.txt +1 -0
  170. package/src/llama.cpp/requirements/requirements-convert-lora-to-ggml.txt +2 -0
  171. package/src/llama.cpp/requirements/requirements-convert-persimmon-to-gguf.txt +2 -0
  172. package/src/llama.cpp/requirements/requirements-convert.txt +5 -0
  173. package/src/llama.cpp/requirements.txt +12 -0
  174. package/src/llama.cpp/scripts/gen-build-info-cpp.cmake +24 -0
  175. package/src/llama.cpp/scripts/xxd.cmake +16 -0
  176. package/src/llama.cpp/sgemm.cpp +999 -0
  177. package/src/llama.cpp/sgemm.h +12 -0
  178. package/src/llama.cpp/tests/CMakeLists.txt +78 -0
  179. package/src/llama.cpp/tests/get-model.cpp +21 -0
  180. package/src/llama.cpp/tests/get-model.h +2 -0
  181. package/src/llama.cpp/tests/test-autorelease.cpp +24 -0
  182. package/src/llama.cpp/tests/test-backend-ops.cpp +2266 -0
  183. package/src/llama.cpp/tests/test-c.c +7 -0
  184. package/src/llama.cpp/tests/test-chat-template.cpp +107 -0
  185. package/src/llama.cpp/tests/test-double-float.cpp +57 -0
  186. package/src/llama.cpp/tests/test-grad0.cpp +1606 -0
  187. package/src/llama.cpp/tests/test-grammar-integration.cpp +243 -0
  188. package/src/llama.cpp/tests/test-grammar-parser.cpp +250 -0
  189. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +899 -0
  190. package/src/llama.cpp/tests/test-llama-grammar.cpp +402 -0
  191. package/src/llama.cpp/tests/test-model-load-cancel.cpp +27 -0
  192. package/src/llama.cpp/tests/test-opt.cpp +181 -0
  193. package/src/llama.cpp/tests/test-quantize-fns.cpp +185 -0
  194. package/src/llama.cpp/tests/test-quantize-perf.cpp +363 -0
  195. package/src/llama.cpp/tests/test-rope.cpp +221 -0
  196. package/src/llama.cpp/tests/test-sampling.cpp +301 -0
  197. package/src/llama.cpp/tests/test-tokenizer-0-falcon.cpp +187 -0
  198. package/src/llama.cpp/tests/test-tokenizer-0-llama.cpp +190 -0
  199. package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +123 -0
  200. package/src/llama.cpp/tests/test-tokenizer-1-llama.cpp +111 -0
  201. package/src/llama.cpp/unicode-data.cpp +1651 -0
  202. package/src/llama.cpp/unicode-data.h +16 -0
  203. package/src/llama.cpp/unicode.cpp +277 -0
  204. package/src/llama.cpp/unicode.h +28 -0
@@ -0,0 +1,764 @@
1
+ #include "json-schema-to-grammar.h"
2
+ #include <algorithm>
3
+ #include <fstream>
4
+ #include <map>
5
+ #include <regex>
6
+ #include <sstream>
7
+ #include <string>
8
+ #include <unordered_map>
9
+ #include <unordered_set>
10
+ #include <vector>
11
+
12
+ using json = nlohmann::ordered_json;
13
+
14
+ template <typename Iterator>
15
+ static std::string join(Iterator begin, Iterator end, const std::string & separator);
16
+
17
+ static std::string repeat(const std::string & str, size_t n);
18
+
19
+ static std::string build_repetition(const std::string & item_rule, int min_items, int max_items, const std::string & separator_rule = "", bool item_rule_is_literal = false) {
20
+ if (separator_rule.empty()) {
21
+ if (min_items == 0 && max_items == 1) {
22
+ return item_rule + "?";
23
+ } else if (min_items == 1 && max_items == std::numeric_limits<int>::max()) {
24
+ return item_rule + "+";
25
+ }
26
+ }
27
+
28
+ std::string result;
29
+ if (min_items > 0) {
30
+ if (item_rule_is_literal && separator_rule.empty()) {
31
+ result = "\"" + repeat(std::string(item_rule.begin() + 1, item_rule.end() - 1), min_items) + "\"";
32
+ } else {
33
+ std::vector<std::string> items(min_items, item_rule);
34
+ result = join(items.begin(), items.end(), separator_rule.empty() ? " " : " " + separator_rule + " ");
35
+ }
36
+ }
37
+
38
+ std::function<std::string(int, bool)> opt_repetitions = [&](int up_to_n, bool prefix_with_sep) -> std::string {
39
+ auto content = prefix_with_sep && !separator_rule.empty() ? separator_rule + " " + item_rule : item_rule;
40
+
41
+ if (up_to_n == 0) {
42
+ return "";
43
+ } else if (up_to_n == 1) {
44
+ return "(" + content + ")?";
45
+ } else if (!separator_rule.empty() && !prefix_with_sep) {
46
+ return "(" + content + " " + opt_repetitions(up_to_n - 1, true) + ")?";
47
+ } else {
48
+ std::string res = repeat("(" + content + " ", up_to_n);
49
+ // strip trailing space
50
+ res = res.substr(0, res.length() - 1);
51
+ res += repeat(")?", up_to_n);
52
+ return res;
53
+ }
54
+ };
55
+
56
+ if (min_items > 0 && max_items != min_items) {
57
+ result += " ";
58
+ }
59
+
60
+ if (max_items != std::numeric_limits<int>::max()) {
61
+ result += opt_repetitions(max_items - min_items, min_items > 0);
62
+ } else {
63
+ std::string item_operator = "(" + (separator_rule.empty() ? "" : separator_rule + " ") + item_rule + ")";
64
+ if (min_items == 0 && !separator_rule.empty()) {
65
+ result = "(" + item_rule + " " + item_operator + "*)?";
66
+ } else {
67
+ result += item_operator + "*";
68
+ }
69
+ }
70
+
71
+ return result;
72
+ }
73
+
74
+ const std::string SPACE_RULE = "\" \"?";
75
+
76
+ struct BuiltinRule {
77
+ std::string content;
78
+ std::vector<std::string> deps;
79
+ };
80
+
81
+ const std::string _up_to_15_digits = build_repetition("[0-9]", 0, 15);
82
+
83
+ std::unordered_map<std::string, BuiltinRule> PRIMITIVE_RULES = {
84
+ {"boolean", {"(\"true\" | \"false\") space", {}}},
85
+ {"decimal-part", {"[0-9] " + _up_to_15_digits, {}}},
86
+ {"integral-part", {"[0-9] | [1-9] " + _up_to_15_digits, {}}},
87
+ {"number", {"(\"-\"? integral-part) (\".\" decimal-part)? ([eE] [-+]? integral-part)? space", {"integral-part", "decimal-part"}}},
88
+ {"integer", {"(\"-\"? integral-part) space", {"integral-part"}}},
89
+ {"value", {"object | array | string | number | boolean | null", {"object", "array", "string", "number", "boolean", "null"}}},
90
+ {"object", {"\"{\" space ( string \":\" space value (\",\" space string \":\" space value)* )? \"}\" space", {"string", "value"}}},
91
+ {"array", {"\"[\" space ( value (\",\" space value)* )? \"]\" space", {"value"}}},
92
+ {"uuid", {"\"\\\"\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "
93
+ "\"-\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "
94
+ "\"-\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "
95
+ "\"-\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "
96
+ "\"-\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] \"\\\"\" space", {}}},
97
+ {"char", {"[^\"\\\\] | \"\\\\\" ([\"\\\\/bfnrt] | \"u\" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])", {}}},
98
+ {"string", {"\"\\\"\" char* \"\\\"\" space", {"char"}}},
99
+ {"null", {"\"null\" space", {}}},
100
+ };
101
+
102
+ std::unordered_map<std::string, BuiltinRule> STRING_FORMAT_RULES = {
103
+ {"date", {"[0-9] [0-9] [0-9] [0-9] \"-\" ( \"0\" [1-9] | \"1\" [0-2] ) \"-\" ( \"0\" [1-9] | [1-2] [0-9] | \"3\" [0-1] )", {}}},
104
+ {"time", {"([01] [0-9] | \"2\" [0-3]) \":\" [0-5] [0-9] \":\" [0-5] [0-9] ( \".\" [0-9] [0-9] [0-9] )? ( \"Z\" | ( \"+\" | \"-\" ) ( [01] [0-9] | \"2\" [0-3] ) \":\" [0-5] [0-9] )", {}}},
105
+ {"date-time", {"date \"T\" time", {"date", "time"}}},
106
+ {"date-string", {"\"\\\"\" date \"\\\"\" space", {"date"}}},
107
+ {"time-string", {"\"\\\"\" time \"\\\"\" space", {"time"}}},
108
+ {"date-time-string", {"\"\\\"\" date-time \"\\\"\" space", {"date-time"}}}
109
+ };
110
+
111
+ static bool is_reserved_name(const std::string & name) {
112
+ static std::unordered_set<std::string> RESERVED_NAMES;
113
+ if (RESERVED_NAMES.empty()) {
114
+ RESERVED_NAMES.insert("root");
115
+ for (const auto &p : PRIMITIVE_RULES) RESERVED_NAMES.insert(p.first);
116
+ for (const auto &p : STRING_FORMAT_RULES) RESERVED_NAMES.insert(p.first);
117
+ }
118
+ return RESERVED_NAMES.find(name) != RESERVED_NAMES.end();
119
+ }
120
+
121
+ std::regex INVALID_RULE_CHARS_RE("[^a-zA-Z0-9-]+");
122
+ std::regex GRAMMAR_LITERAL_ESCAPE_RE("[\r\n\"]");
123
+ std::regex GRAMMAR_RANGE_LITERAL_ESCAPE_RE("[\r\n\"\\]\\-\\\\]");
124
+ std::unordered_map<char, std::string> GRAMMAR_LITERAL_ESCAPES = {
125
+ {'\r', "\\r"}, {'\n', "\\n"}, {'"', "\\\""}, {'-', "\\-"}, {']', "\\]"}
126
+ };
127
+
128
+ std::unordered_set<char> NON_LITERAL_SET = {'|', '.', '(', ')', '[', ']', '{', '}', '*', '+', '?'};
129
+ std::unordered_set<char> ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS = {'[', ']', '(', ')', '|', '{', '}', '*', '+', '?'};
130
+
131
+ template <typename Iterator>
132
+ std::string join(Iterator begin, Iterator end, const std::string & separator) {
133
+ std::ostringstream result;
134
+ if (begin != end) {
135
+ result << *begin;
136
+ for (Iterator it = begin + 1; it != end; ++it) {
137
+ result << separator << *it;
138
+ }
139
+ }
140
+ return result.str();
141
+ }
142
+
143
+ static std::vector<std::string> split(const std::string & str, const std::string & delimiter) {
144
+ std::vector<std::string> tokens;
145
+ size_t start = 0;
146
+ size_t end = str.find(delimiter);
147
+
148
+ while (end != std::string::npos) {
149
+ tokens.push_back(str.substr(start, end - start));
150
+ start = end + delimiter.length();
151
+ end = str.find(delimiter, start);
152
+ }
153
+
154
+ tokens.push_back(str.substr(start));
155
+
156
+ return tokens;
157
+ }
158
+
159
+ static std::string repeat(const std::string & str, size_t n) {
160
+ if (n == 0) {
161
+ return "";
162
+ }
163
+
164
+ std::string result;
165
+ result.reserve(str.length() * n);
166
+
167
+ for (size_t i = 0; i < n; ++i) {
168
+ result += str;
169
+ }
170
+
171
+ return result;
172
+ }
173
+
174
+ static std::string replacePattern(const std::string & input, const std::regex & regex, const std::function<std::string(const std::smatch &)> & replacement) {
175
+ std::smatch match;
176
+ std::string result;
177
+
178
+ std::string::const_iterator searchStart(input.cbegin());
179
+ std::string::const_iterator searchEnd(input.cend());
180
+
181
+ while (std::regex_search(searchStart, searchEnd, match, regex)) {
182
+ result.append(searchStart, searchStart + match.position());
183
+ result.append(replacement(match));
184
+ searchStart = match.suffix().first;
185
+ }
186
+
187
+ result.append(searchStart, searchEnd);
188
+
189
+ return result;
190
+ }
191
+
192
+ static std::string format_literal(const std::string & literal) {
193
+ std::string escaped = replacePattern(literal, GRAMMAR_LITERAL_ESCAPE_RE, [&](const std::smatch & match) {
194
+ char c = match.str()[0];
195
+ return GRAMMAR_LITERAL_ESCAPES.at(c);
196
+ });
197
+ return "\"" + escaped + "\"";
198
+ }
199
+
200
+
201
+ class SchemaConverter {
202
+ private:
203
+ std::function<json(const std::string &)> _fetch_json;
204
+ bool _dotall;
205
+ std::map<std::string, std::string> _rules;
206
+ std::unordered_map<std::string, json> _refs;
207
+ std::unordered_set<std::string> _refs_being_resolved;
208
+ std::vector<std::string> _errors;
209
+ std::vector<std::string> _warnings;
210
+
211
+ std::string _add_rule(const std::string & name, const std::string & rule) {
212
+ std::string esc_name = regex_replace(name, INVALID_RULE_CHARS_RE, "-");
213
+ if (_rules.find(esc_name) == _rules.end() || _rules[esc_name] == rule) {
214
+ _rules[esc_name] = rule;
215
+ return esc_name;
216
+ } else {
217
+ int i = 0;
218
+ while (_rules.find(esc_name + std::to_string(i)) != _rules.end() && _rules[esc_name + std::to_string(i)] != rule) {
219
+ i++;
220
+ }
221
+ std::string key = esc_name + std::to_string(i);
222
+ _rules[key] = rule;
223
+ return key;
224
+ }
225
+ }
226
+
227
+ std::string _generate_union_rule(const std::string & name, const std::vector<json> & alt_schemas) {
228
+ std::vector<std::string> rules;
229
+ for (size_t i = 0; i < alt_schemas.size(); i++) {
230
+ rules.push_back(visit(alt_schemas[i], name + (name.empty() ? "alternative-" : "-") + std::to_string(i)));
231
+ }
232
+ return join(rules.begin(), rules.end(), " | ");
233
+ }
234
+
235
+ std::string _visit_pattern(const std::string & pattern, const std::string & name) {
236
+ if (!(pattern.front() == '^' && pattern.back() == '$')) {
237
+ _errors.push_back("Pattern must start with '^' and end with '$'");
238
+ return "";
239
+ }
240
+ std::string sub_pattern = pattern.substr(1, pattern.length() - 2);
241
+ std::unordered_map<std::string, std::string> sub_rule_ids;
242
+
243
+ size_t i = 0;
244
+ size_t length = sub_pattern.length();
245
+
246
+ using literal_or_rule = std::pair<std::string, bool>;
247
+ auto to_rule = [&](const literal_or_rule & ls) {
248
+ auto is_literal = ls.second;
249
+ auto s = ls.first;
250
+ return is_literal ? "\"" + s + "\"" : s;
251
+ };
252
+ std::function<literal_or_rule()> transform = [&]() -> literal_or_rule {
253
+ size_t start = i;
254
+ std::vector<literal_or_rule> seq;
255
+
256
+ auto get_dot = [&]() {
257
+ std::string rule;
258
+ if (_dotall) {
259
+ rule = "[\\U00000000-\\U0010FFFF]";
260
+ } else {
261
+ rule = "[^\\x0A\\x0D]";
262
+ }
263
+ return _add_rule("dot", rule);
264
+ };
265
+
266
+ // Joins the sequence, merging consecutive literals together.
267
+ auto join_seq = [&]() {
268
+ std::vector<literal_or_rule> ret;
269
+
270
+ std::string literal;
271
+ auto flush_literal = [&]() {
272
+ if (literal.empty()) {
273
+ return false;
274
+ }
275
+ ret.push_back(std::make_pair(literal, true));
276
+ literal.clear();
277
+ return true;
278
+ };
279
+
280
+ for (const auto & item : seq) {
281
+ auto is_literal = item.second;
282
+ if (is_literal) {
283
+ literal += item.first;
284
+ } else {
285
+ flush_literal();
286
+ ret.push_back(item);
287
+ }
288
+ }
289
+ flush_literal();
290
+
291
+ std::vector<std::string> results;
292
+ for (const auto & item : ret) {
293
+ results.push_back(to_rule(item));
294
+ }
295
+ return std::make_pair(join(results.begin(), results.end(), " "), false);
296
+ };
297
+
298
+ while (i < length) {
299
+ char c = sub_pattern[i];
300
+ if (c == '.') {
301
+ seq.push_back(std::make_pair(get_dot(), false));
302
+ i++;
303
+ } else if (c == '(') {
304
+ i++;
305
+ if (i < length) {
306
+ if (sub_pattern[i] == '?') {
307
+ _warnings.push_back("Unsupported pattern syntax");
308
+ }
309
+ }
310
+ seq.push_back(std::make_pair("(" + to_rule(transform()) + ")", false));
311
+ } else if (c == ')') {
312
+ i++;
313
+ if (start > 0 && sub_pattern[start - 1] != '(') {
314
+ _errors.push_back("Unbalanced parentheses");
315
+ }
316
+ return join_seq();
317
+ } else if (c == '[') {
318
+ std::string square_brackets = std::string(1, c);
319
+ i++;
320
+ while (i < length && sub_pattern[i] != ']') {
321
+ if (sub_pattern[i] == '\\') {
322
+ square_brackets += sub_pattern.substr(i, 2);
323
+ i += 2;
324
+ } else {
325
+ square_brackets += sub_pattern[i];
326
+ i++;
327
+ }
328
+ }
329
+ if (i >= length) {
330
+ _errors.push_back("Unbalanced square brackets");
331
+ }
332
+ square_brackets += ']';
333
+ i++;
334
+ seq.push_back(std::make_pair(square_brackets, false));
335
+ } else if (c == '|') {
336
+ seq.push_back(std::make_pair("|", false));
337
+ i++;
338
+ } else if (c == '*' || c == '+' || c == '?') {
339
+ seq.back() = std::make_pair(to_rule(seq.back()) + c, false);
340
+ i++;
341
+ } else if (c == '{') {
342
+ std::string curly_brackets = std::string(1, c);
343
+ i++;
344
+ while (i < length && sub_pattern[i] != '}') {
345
+ curly_brackets += sub_pattern[i];
346
+ i++;
347
+ }
348
+ if (i >= length) {
349
+ _errors.push_back("Unbalanced curly brackets");
350
+ }
351
+ curly_brackets += '}';
352
+ i++;
353
+ auto nums = split(curly_brackets.substr(1, curly_brackets.length() - 2), ",");
354
+ int min_times = 0;
355
+ int max_times = std::numeric_limits<int>::max();
356
+ try {
357
+ if (nums.size() == 1) {
358
+ min_times = max_times = std::stoi(nums[0]);
359
+ } else if (nums.size() != 2) {
360
+ _errors.push_back("Wrong number of values in curly brackets");
361
+ } else {
362
+ if (!nums[0].empty()) {
363
+ min_times = std::stoi(nums[0]);
364
+ }
365
+ if (!nums[1].empty()) {
366
+ max_times = std::stoi(nums[1]);
367
+ }
368
+ }
369
+ } catch (const std::invalid_argument & e) {
370
+ _errors.push_back("Invalid number in curly brackets");
371
+ return std::make_pair("", false);
372
+ }
373
+ auto &last = seq.back();
374
+ auto &sub = last.first;
375
+ auto sub_is_literal = last.second;
376
+
377
+ if (!sub_is_literal) {
378
+ std::string & sub_id = sub_rule_ids[sub];
379
+ if (sub_id.empty()) {
380
+ sub_id = _add_rule(name + "-" + std::to_string(sub_rule_ids.size()), sub);
381
+ }
382
+ sub = sub_id;
383
+ }
384
+ seq.back().first = build_repetition(
385
+ sub_is_literal ? "\"" + sub + "\"" : sub,
386
+ min_times,
387
+ max_times,
388
+ "",
389
+ sub_is_literal
390
+ );
391
+ seq.back().second = false;
392
+ } else {
393
+ std::string literal;
394
+ auto is_non_literal = [&](char c) {
395
+ return NON_LITERAL_SET.find(c) != NON_LITERAL_SET.end();
396
+ };
397
+ while (i < length) {
398
+ if (sub_pattern[i] == '\\' && i < length - 1) {
399
+ char next = sub_pattern[i + 1];
400
+ if (ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS.find(next) != ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS.end()) {
401
+ i++;
402
+ literal += sub_pattern[i];
403
+ i++;
404
+ } else {
405
+ literal += sub_pattern.substr(i, 2);
406
+ i += 2;
407
+ }
408
+ } else if (sub_pattern[i] == '"') {
409
+ literal += "\\\"";
410
+ i++;
411
+ } else if (!is_non_literal(sub_pattern[i]) &&
412
+ (i == length - 1 || literal.empty() || sub_pattern[i + 1] == '.' || !is_non_literal(sub_pattern[i + 1]))) {
413
+ literal += sub_pattern[i];
414
+ i++;
415
+ } else {
416
+ break;
417
+ }
418
+ }
419
+ if (!literal.empty()) {
420
+ seq.push_back(std::make_pair(literal, true));
421
+ }
422
+ }
423
+ }
424
+ return join_seq();
425
+ };
426
+ return _add_rule(name, "\"\\\"\" " + to_rule(transform()) + " \"\\\"\" space");
427
+ }
428
+
429
+ std::string _resolve_ref(const std::string & ref) {
430
+ std::string ref_name = ref.substr(ref.find_last_of('/') + 1);
431
+ if (_rules.find(ref_name) == _rules.end() && _refs_being_resolved.find(ref) == _refs_being_resolved.end()) {
432
+ _refs_being_resolved.insert(ref);
433
+ json resolved = _refs[ref];
434
+ ref_name = visit(resolved, ref_name);
435
+ _refs_being_resolved.erase(ref);
436
+ }
437
+ return ref_name;
438
+ }
439
+
440
+ std::string _build_object_rule(
441
+ const std::vector<std::pair<std::string, json>> & properties,
442
+ const std::unordered_set<std::string> & required,
443
+ const std::string & name,
444
+ const json & additional_properties)
445
+ {
446
+ std::vector<std::string> required_props;
447
+ std::vector<std::string> optional_props;
448
+ std::unordered_map<std::string, std::string> prop_kv_rule_names;
449
+ for (const auto & kv : properties) {
450
+ const auto &prop_name = kv.first;
451
+ const auto &prop_schema = kv.second;
452
+
453
+ std::string prop_rule_name = visit(prop_schema, name + (name.empty() ? "" : "-") + prop_name);
454
+ prop_kv_rule_names[prop_name] = _add_rule(
455
+ name + (name.empty() ? "" : "-") + prop_name + "-kv",
456
+ format_literal(json(prop_name).dump()) + " space \":\" space " + prop_rule_name
457
+ );
458
+ if (required.find(prop_name) != required.end()) {
459
+ required_props.push_back(prop_name);
460
+ } else {
461
+ optional_props.push_back(prop_name);
462
+ }
463
+ }
464
+ if (additional_properties.is_object() || (additional_properties.is_boolean() && additional_properties.get<bool>())) {
465
+ std::string sub_name = name + (name.empty() ? "" : "-") + "additional";
466
+ std::string value_rule = visit(additional_properties.is_object() ? additional_properties : json::object(), sub_name + "-value");
467
+ std::string kv_rule = _add_rule(sub_name + "-kv", _add_primitive("string", PRIMITIVE_RULES.at("string")) + " \":\" space " + value_rule);
468
+ prop_kv_rule_names["*"] = kv_rule;
469
+ optional_props.push_back("*");
470
+ }
471
+
472
+ std::string rule = "\"{\" space ";
473
+ for (size_t i = 0; i < required_props.size(); i++) {
474
+ if (i > 0) {
475
+ rule += " \",\" space ";
476
+ }
477
+ rule += prop_kv_rule_names[required_props[i]];
478
+ }
479
+
480
+ if (!optional_props.empty()) {
481
+ rule += " (";
482
+ if (!required_props.empty()) {
483
+ rule += " \",\" space ( ";
484
+ }
485
+
486
+ std::function<std::string(const std::vector<std::string> &, bool)> get_recursive_refs = [&](const std::vector<std::string> & ks, bool first_is_optional) {
487
+ std::string res;
488
+ if (ks.empty()) {
489
+ return res;
490
+ }
491
+ std::string k = ks[0];
492
+ std::string kv_rule_name = prop_kv_rule_names[k];
493
+ if (k == "*") {
494
+ res = _add_rule(
495
+ name + (name.empty() ? "" : "-") + "additional-kvs",
496
+ kv_rule_name + " ( \",\" space " + kv_rule_name + " )*"
497
+ );
498
+ } else if (first_is_optional) {
499
+ res = "( \",\" space " + kv_rule_name + " )?";
500
+ } else {
501
+ res = kv_rule_name;
502
+ }
503
+ if (ks.size() > 1) {
504
+ res += " " + _add_rule(
505
+ name + (name.empty() ? "" : "-") + k + "-rest",
506
+ get_recursive_refs(std::vector<std::string>(ks.begin() + 1, ks.end()), true)
507
+ );
508
+ }
509
+ return res;
510
+ };
511
+
512
+ for (size_t i = 0; i < optional_props.size(); i++) {
513
+ if (i > 0) {
514
+ rule += " | ";
515
+ }
516
+ rule += get_recursive_refs(std::vector<std::string>(optional_props.begin() + i, optional_props.end()), false);
517
+ }
518
+ if (!required_props.empty()) {
519
+ rule += " )";
520
+ }
521
+ rule += " )?";
522
+ }
523
+
524
+ rule += " \"}\" space";
525
+
526
+ return rule;
527
+ }
528
+
529
+ std::string _add_primitive(const std::string & name, const BuiltinRule & rule) {
530
+ auto n = _add_rule(name, rule.content);
531
+ for (const auto & dep : rule.deps) {
532
+ BuiltinRule dep_rule;
533
+ auto it = PRIMITIVE_RULES.find(dep);
534
+ if (it == PRIMITIVE_RULES.end()) {
535
+ it = STRING_FORMAT_RULES.find(dep);
536
+ if (it == STRING_FORMAT_RULES.end()) {
537
+ _errors.push_back("Rule " + dep + " not known");
538
+ continue;
539
+ }
540
+ }
541
+ if (_rules.find(dep) == _rules.end()) {
542
+ _add_primitive(dep, it->second);
543
+ }
544
+ }
545
+ return n;
546
+ }
547
+
548
+ public:
549
+ SchemaConverter(
550
+ const std::function<json(const std::string &)> & fetch_json,
551
+ bool dotall)
552
+ : _fetch_json(fetch_json), _dotall(dotall)
553
+ {
554
+ _rules["space"] = SPACE_RULE;
555
+ }
556
+
557
+ void resolve_refs(json & schema, const std::string & url) {
558
+ /*
559
+ * Resolves all $ref fields in the given schema, fetching any remote schemas,
560
+ * replacing each $ref with absolute reference URL and populates _refs with the
561
+ * respective referenced (sub)schema dictionaries.
562
+ */
563
+ std::function<void(json &)> visit_refs = [&](json & n) {
564
+ if (n.is_array()) {
565
+ for (auto & x : n) {
566
+ visit_refs(x);
567
+ }
568
+ } else if (n.is_object()) {
569
+ if (n.contains("$ref")) {
570
+ std::string ref = n["$ref"];
571
+ if (_refs.find(ref) == _refs.end()) {
572
+ json target;
573
+ if (ref.find("https://") == 0) {
574
+ std::string base_url = ref.substr(0, ref.find('#'));
575
+ auto it = _refs.find(base_url);
576
+ if (it != _refs.end()) {
577
+ target = it->second;
578
+ } else {
579
+ // Fetch the referenced schema and resolve its refs
580
+ auto referenced = _fetch_json(ref);
581
+ resolve_refs(referenced, base_url);
582
+ _refs[base_url] = referenced;
583
+ }
584
+ if (ref.find('#') == std::string::npos || ref.substr(ref.find('#') + 1).empty()) {
585
+ return;
586
+ }
587
+ } else if (ref.find("#/") == 0) {
588
+ target = schema;
589
+ n["$ref"] = url + ref;
590
+ ref = url + ref;
591
+ } else {
592
+ _errors.push_back("Unsupported ref: " + ref);
593
+ return;
594
+ }
595
+ std::string pointer = ref.substr(ref.find('#') + 1);
596
+ std::vector<std::string> tokens = split(pointer, "/");
597
+ for (size_t i = 1; i < tokens.size(); ++i) {
598
+ std::string sel = tokens[i];
599
+ if (target.is_null() || !target.contains(sel)) {
600
+ _errors.push_back("Error resolving ref " + ref + ": " + sel + " not in " + target.dump());
601
+ return;
602
+ }
603
+ target = target[sel];
604
+ }
605
+ _refs[ref] = target;
606
+ }
607
+ } else {
608
+ for (auto & kv : n.items()) {
609
+ visit_refs(kv.value());
610
+ }
611
+ }
612
+ }
613
+ };
614
+
615
+ visit_refs(schema);
616
+ }
617
+
618
+ std::string _generate_constant_rule(const json & value) {
619
+ return format_literal(value.dump());
620
+ }
621
+
622
+ std::string visit(const json & schema, const std::string & name) {
623
+ json schema_type = schema.contains("type") ? schema["type"] : json();
624
+ std::string schema_format = schema.contains("format") ? schema["format"].get<std::string>() : "";
625
+ std::string rule_name = is_reserved_name(name) ? name + "-" : name.empty() ? "root" : name;
626
+
627
+ if (schema.contains("$ref")) {
628
+ return _add_rule(rule_name, _resolve_ref(schema["$ref"]));
629
+ } else if (schema.contains("oneOf") || schema.contains("anyOf")) {
630
+ std::vector<json> alt_schemas = schema.contains("oneOf") ? schema["oneOf"].get<std::vector<json>>() : schema["anyOf"].get<std::vector<json>>();
631
+ return _add_rule(rule_name, _generate_union_rule(name, alt_schemas));
632
+ } else if (schema_type.is_array()) {
633
+ std::vector<json> schema_types;
634
+ for (const auto & t : schema_type) {
635
+ schema_types.push_back({{"type", t}});
636
+ }
637
+ return _add_rule(rule_name, _generate_union_rule(name, schema_types));
638
+ } else if (schema.contains("const")) {
639
+ return _add_rule(rule_name, _generate_constant_rule(schema["const"]));
640
+ } else if (schema.contains("enum")) {
641
+ std::vector<std::string> enum_values;
642
+ for (const auto & v : schema["enum"]) {
643
+ enum_values.push_back(_generate_constant_rule(v));
644
+ }
645
+ return _add_rule(rule_name, join(enum_values.begin(), enum_values.end(), " | "));
646
+ } else if ((schema_type.is_null() || schema_type == "object")
647
+ && (schema.contains("properties") ||
648
+ (schema.contains("additionalProperties") && schema["additionalProperties"] != true))) {
649
+ std::unordered_set<std::string> required;
650
+ if (schema.contains("required") && schema["required"].is_array()) {
651
+ for (const auto & item : schema["required"]) {
652
+ if (item.is_string()) {
653
+ required.insert(item.get<std::string>());
654
+ }
655
+ }
656
+ }
657
+ std::vector<std::pair<std::string, json>> properties;
658
+ if (schema.contains("properties")) {
659
+ for (const auto & prop : schema["properties"].items()) {
660
+ properties.emplace_back(prop.key(), prop.value());
661
+ }
662
+ }
663
+ return _add_rule(rule_name,
664
+ _build_object_rule(
665
+ properties, required, name,
666
+ schema.contains("additionalProperties") ? schema["additionalProperties"] : json()));
667
+ } else if ((schema_type.is_null() || schema_type == "object") && schema.contains("allOf")) {
668
+ std::unordered_set<std::string> required;
669
+ std::vector<std::pair<std::string, json>> properties;
670
+ std::string hybrid_name = name;
671
+ std::function<void(const json &, bool)> add_component = [&](const json & comp_schema, bool is_required) {
672
+ if (comp_schema.contains("$ref")) {
673
+ add_component(_refs[comp_schema["$ref"]], is_required);
674
+ } else if (comp_schema.contains("properties")) {
675
+ for (const auto & prop : comp_schema["properties"].items()) {
676
+ properties.emplace_back(prop.key(), prop.value());
677
+ if (is_required) {
678
+ required.insert(prop.key());
679
+ }
680
+ }
681
+ } else {
682
+ // todo warning
683
+ }
684
+ };
685
+ for (auto & t : schema["allOf"]) {
686
+ if (t.contains("anyOf")) {
687
+ for (auto & tt : t["anyOf"]) {
688
+ add_component(tt, false);
689
+ }
690
+ } else {
691
+ add_component(t, true);
692
+ }
693
+ }
694
+ return _add_rule(rule_name, _build_object_rule(properties, required, hybrid_name, json()));
695
+ } else if ((schema_type.is_null() || schema_type == "array") && (schema.contains("items") || schema.contains("prefixItems"))) {
696
+ json items = schema.contains("items") ? schema["items"] : schema["prefixItems"];
697
+ if (items.is_array()) {
698
+ std::string rule = "\"[\" space ";
699
+ for (size_t i = 0; i < items.size(); i++) {
700
+ if (i > 0) {
701
+ rule += " \",\" space ";
702
+ }
703
+ rule += visit(items[i], name + (name.empty() ? "" : "-") + "tuple-" + std::to_string(i));
704
+ }
705
+ rule += " \"]\" space";
706
+ return _add_rule(rule_name, rule);
707
+ } else {
708
+ std::string item_rule_name = visit(items, name + (name.empty() ? "" : "-") + "item");
709
+ int min_items = schema.contains("minItems") ? schema["minItems"].get<int>() : 0;
710
+ json max_items_json = schema.contains("maxItems") ? schema["maxItems"] : json();
711
+ int max_items = max_items_json.is_number_integer() ? max_items_json.get<int>() : std::numeric_limits<int>::max();
712
+
713
+ return _add_rule(rule_name, "\"[\" space " + build_repetition(item_rule_name, min_items, max_items, "\",\" space") + " \"]\" space");
714
+ }
715
+ } else if ((schema_type.is_null() || schema_type == "string") && schema.contains("pattern")) {
716
+ return _visit_pattern(schema["pattern"], rule_name);
717
+ } else if ((schema_type.is_null() || schema_type == "string") && std::regex_match(schema_format, std::regex("^uuid[1-5]?$"))) {
718
+ return _add_primitive(rule_name == "root" ? "root" : schema_format, PRIMITIVE_RULES.at("uuid"));
719
+ } else if ((schema_type.is_null() || schema_type == "string") && STRING_FORMAT_RULES.find(schema_format + "-string") != STRING_FORMAT_RULES.end()) {
720
+ auto prim_name = schema_format + "-string";
721
+ return _add_rule(rule_name, _add_primitive(prim_name, STRING_FORMAT_RULES.at(prim_name)));
722
+ } else if (schema_type == "string" && (schema.contains("minLength") || schema.contains("maxLength"))) {
723
+ std::string char_rule = _add_primitive("char", PRIMITIVE_RULES.at("char"));
724
+ int min_len = schema.contains("minLength") ? schema["minLength"].get<int>() : 0;
725
+ int max_len = schema.contains("maxLength") ? schema["maxLength"].get<int>() : std::numeric_limits<int>::max();
726
+ return _add_rule(rule_name, "\"\\\"\" " + build_repetition(char_rule, min_len, max_len) + " \"\\\"\" space");
727
+ } else if (schema.empty() || schema_type == "object") {
728
+ return _add_rule(rule_name, _add_primitive("object", PRIMITIVE_RULES.at("object")));
729
+ } else {
730
+ if (!schema_type.is_string() || PRIMITIVE_RULES.find(schema_type.get<std::string>()) == PRIMITIVE_RULES.end()) {
731
+ _errors.push_back("Unrecognized schema: " + schema.dump());
732
+ return "";
733
+ }
734
+ // TODO: support minimum, maximum, exclusiveMinimum, exclusiveMaximum at least for zero
735
+ return _add_primitive(rule_name == "root" ? "root" : schema_type.get<std::string>(), PRIMITIVE_RULES.at(schema_type.get<std::string>()));
736
+ }
737
+ }
738
+
739
+ void check_errors() {
740
+ if (!_errors.empty()) {
741
+ throw std::runtime_error("JSON schema conversion failed:\n" + join(_errors.begin(), _errors.end(), "\n"));
742
+ }
743
+ if (!_warnings.empty()) {
744
+ fprintf(stderr, "WARNING: JSON schema conversion was incomplete: %s\n", join(_warnings.begin(), _warnings.end(), "; ").c_str());
745
+ }
746
+ }
747
+
748
+ std::string format_grammar() {
749
+ std::stringstream ss;
750
+ for (const auto & kv : _rules) {
751
+ ss << kv.first << " ::= " << kv.second << std::endl;
752
+ }
753
+ return ss.str();
754
+ }
755
+ };
756
+
757
+ std::string json_schema_to_grammar(const json & schema) {
758
+ SchemaConverter converter([](const std::string &) { return json::object(); }, /* dotall= */ false);
759
+ auto copy = schema;
760
+ converter.resolve_refs(copy, "input");
761
+ converter.visit(copy, "");
762
+ converter.check_errors();
763
+ return converter.format_grammar();
764
+ }