fast_json-schema 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (123) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/CODE_OF_CONDUCT.md +84 -0
  4. data/Dockerfile +17 -0
  5. data/Gemfile +11 -0
  6. data/Gemfile.lock +68 -0
  7. data/LICENSE.txt +21 -0
  8. data/README.md +156 -0
  9. data/Rakefile +60 -0
  10. data/build-deps +3 -0
  11. data/data/invalid.json +31 -0
  12. data/data/schema.json +150 -0
  13. data/data/valid.json +49 -0
  14. data/ext/fast_json/schema/all_of.c +23 -0
  15. data/ext/fast_json/schema/all_of.h +4 -0
  16. data/ext/fast_json/schema/any_of.c +22 -0
  17. data/ext/fast_json/schema/any_of.h +4 -0
  18. data/ext/fast_json/schema/compiled_schema.c +503 -0
  19. data/ext/fast_json/schema/compiled_schema.h +10 -0
  20. data/ext/fast_json/schema/context.c +78 -0
  21. data/ext/fast_json/schema/error.c +26 -0
  22. data/ext/fast_json/schema/error.h +5 -0
  23. data/ext/fast_json/schema/extconf.rb +7 -0
  24. data/ext/fast_json/schema/formats/custom_format.c +63 -0
  25. data/ext/fast_json/schema/formats/custom_format.h +4 -0
  26. data/ext/fast_json/schema/formats/date.c +48 -0
  27. data/ext/fast_json/schema/formats/date.h +5 -0
  28. data/ext/fast_json/schema/formats/date_time.c +22 -0
  29. data/ext/fast_json/schema/formats/date_time.h +4 -0
  30. data/ext/fast_json/schema/formats/email.c +8 -0
  31. data/ext/fast_json/schema/formats/email.h +4 -0
  32. data/ext/fast_json/schema/formats/format.c +68 -0
  33. data/ext/fast_json/schema/formats/format.h +4 -0
  34. data/ext/fast_json/schema/formats/hostname.c +8 -0
  35. data/ext/fast_json/schema/formats/hostname.h +4 -0
  36. data/ext/fast_json/schema/formats/idn_email.c +8 -0
  37. data/ext/fast_json/schema/formats/idn_email.h +4 -0
  38. data/ext/fast_json/schema/formats/idn_hostname.c +8 -0
  39. data/ext/fast_json/schema/formats/idn_hostname.h +4 -0
  40. data/ext/fast_json/schema/formats/ipv4.c +8 -0
  41. data/ext/fast_json/schema/formats/ipv4.h +4 -0
  42. data/ext/fast_json/schema/formats/ipv6.c +8 -0
  43. data/ext/fast_json/schema/formats/ipv6.h +4 -0
  44. data/ext/fast_json/schema/formats/iri.c +8 -0
  45. data/ext/fast_json/schema/formats/iri.h +4 -0
  46. data/ext/fast_json/schema/formats/iri_reference.c +8 -0
  47. data/ext/fast_json/schema/formats/iri_reference.h +4 -0
  48. data/ext/fast_json/schema/formats/json_pointer.c +8 -0
  49. data/ext/fast_json/schema/formats/json_pointer.h +4 -0
  50. data/ext/fast_json/schema/formats/regex.c +27 -0
  51. data/ext/fast_json/schema/formats/regex.h +4 -0
  52. data/ext/fast_json/schema/formats/relative_json_pointer.c +57 -0
  53. data/ext/fast_json/schema/formats/relative_json_pointer.h +4 -0
  54. data/ext/fast_json/schema/formats/time.c +65 -0
  55. data/ext/fast_json/schema/formats/time.h +5 -0
  56. data/ext/fast_json/schema/formats/uri.c +8 -0
  57. data/ext/fast_json/schema/formats/uri.h +4 -0
  58. data/ext/fast_json/schema/formats/uri_reference.c +8 -0
  59. data/ext/fast_json/schema/formats/uri_reference.h +4 -0
  60. data/ext/fast_json/schema/formats/uri_template.c +8 -0
  61. data/ext/fast_json/schema/formats/uri_template.h +4 -0
  62. data/ext/fast_json/schema/formats/utils/addr_spec_parser.c +342 -0
  63. data/ext/fast_json/schema/formats/utils/addr_spec_parser.h +16 -0
  64. data/ext/fast_json/schema/formats/utils/hostname_parser.c +113 -0
  65. data/ext/fast_json/schema/formats/utils/hostname_parser.h +17 -0
  66. data/ext/fast_json/schema/formats/utils/ip_parser.c +126 -0
  67. data/ext/fast_json/schema/formats/utils/ip_parser.h +25 -0
  68. data/ext/fast_json/schema/formats/utils/json_pointer_parser.c +45 -0
  69. data/ext/fast_json/schema/formats/utils/json_pointer_parser.h +20 -0
  70. data/ext/fast_json/schema/formats/utils/uri_parser.c +605 -0
  71. data/ext/fast_json/schema/formats/utils/uri_parser.h +20 -0
  72. data/ext/fast_json/schema/formats/utils/uri_template_parser.c +235 -0
  73. data/ext/fast_json/schema/formats/utils/uri_template_parser.h +18 -0
  74. data/ext/fast_json/schema/formats/utils/utf8.c +73 -0
  75. data/ext/fast_json/schema/formats/utils/utf8.h +17 -0
  76. data/ext/fast_json/schema/if.c +31 -0
  77. data/ext/fast_json/schema/if.h +4 -0
  78. data/ext/fast_json/schema/is_valid.c +124 -0
  79. data/ext/fast_json/schema/is_valid.h +6 -0
  80. data/ext/fast_json/schema/keywords.c +220 -0
  81. data/ext/fast_json/schema/keywords.h +60 -0
  82. data/ext/fast_json/schema/nested_schemas.c +68 -0
  83. data/ext/fast_json/schema/nested_schemas.h +4 -0
  84. data/ext/fast_json/schema/not.c +11 -0
  85. data/ext/fast_json/schema/not.h +4 -0
  86. data/ext/fast_json/schema/one_of.c +23 -0
  87. data/ext/fast_json/schema/one_of.h +4 -0
  88. data/ext/fast_json/schema/path.c +44 -0
  89. data/ext/fast_json/schema/path.h +5 -0
  90. data/ext/fast_json/schema/properties_val.c +103 -0
  91. data/ext/fast_json/schema/properties_val.h +6 -0
  92. data/ext/fast_json/schema/ref.c +7 -0
  93. data/ext/fast_json/schema/ref.h +4 -0
  94. data/ext/fast_json/schema/ref_resolver.c +85 -0
  95. data/ext/fast_json/schema/ref_resolver.h +5 -0
  96. data/ext/fast_json/schema/schema.c +68 -0
  97. data/ext/fast_json/schema/schema_collection.c +29 -0
  98. data/ext/fast_json/schema/schema_collection.h +3 -0
  99. data/ext/fast_json/schema/types/compiled_schema.h +96 -0
  100. data/ext/fast_json/schema/types/context.h +27 -0
  101. data/ext/fast_json/schema/validate.c +63 -0
  102. data/ext/fast_json/schema/validate.h +19 -0
  103. data/ext/fast_json/schema/validate_array.c +130 -0
  104. data/ext/fast_json/schema/validate_array.h +4 -0
  105. data/ext/fast_json/schema/validate_bool.c +7 -0
  106. data/ext/fast_json/schema/validate_bool.h +4 -0
  107. data/ext/fast_json/schema/validate_integer.c +52 -0
  108. data/ext/fast_json/schema/validate_integer.h +4 -0
  109. data/ext/fast_json/schema/validate_null.c +7 -0
  110. data/ext/fast_json/schema/validate_null.h +4 -0
  111. data/ext/fast_json/schema/validate_number.c +62 -0
  112. data/ext/fast_json/schema/validate_number.h +4 -0
  113. data/ext/fast_json/schema/validate_object.c +159 -0
  114. data/ext/fast_json/schema/validate_object.h +4 -0
  115. data/ext/fast_json/schema/validate_string.c +32 -0
  116. data/ext/fast_json/schema/validate_string.h +4 -0
  117. data/ext/fast_json/schema/value_pointer_caster.h +9 -0
  118. data/fast_json-schema.gemspec +31 -0
  119. data/lib/fast_json/schema/error.rb +16 -0
  120. data/lib/fast_json/schema/version.rb +7 -0
  121. data/lib/fast_json/schema.rb +50 -0
  122. data/makefile +10 -0
  123. metadata +164 -0
@@ -0,0 +1,342 @@
1
+ #include "formats/utils/addr_spec_parser.h"
2
+ #include "formats/utils/utf8.h"
3
+ #include "formats/utils/ip_parser.h"
4
+
5
+ #include <stdbool.h>
6
+ #include <string.h>
7
+
8
+ /*
9
+ * RFC 5321 section 4.1.3 length limits.
10
+ */
11
+ #define MAX_LOCAL_PART 64
12
+ #define MAX_DOMAIN 255
13
+
14
+ #define IS_DIGIT(c) ((c) >= '0' && (c) <= '9')
15
+ #define IS_ALPHA(c) (((c) >= 'A' && (c) <= 'Z') || ((c) >= 'a' && (c) <= 'z'))
16
+
17
+ /*
18
+ * atext per RFC 5322:
19
+ * ALPHA / DIGIT / "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" /
20
+ * "/" / "=" / "?" / "^" / "_" / "`" / "{" / "|" / "}" / "~"
21
+ */
22
+ static bool is_atext(unsigned char c) {
23
+ if(IS_ALPHA(c) || IS_DIGIT(c)) return true;
24
+
25
+ switch(c) {
26
+ case '!': case '#': case '$': case '%': case '&': case '\'':
27
+ case '*': case '+': case '-': case '/': case '=': case '?':
28
+ case '^': case '_': case '`': case '{': case '|': case '}':
29
+ case '~':
30
+ return true;
31
+ }
32
+
33
+ return false;
34
+ }
35
+
36
+ /*
37
+ * qtext per RFC 5322:
38
+ * %d33 / %d35-91 / %d93-126
39
+ * (i.e. VCHAR except '"' and '\').
40
+ */
41
+ static bool is_qtext(unsigned char c) {
42
+ return c == 33 || (c >= 35 && c <= 91) || (c >= 93 && c <= 126);
43
+ }
44
+
45
+ /*
46
+ * General-address-literal dcontent per RFC 5321 section 4.1.3:
47
+ * dcontent = %d33-90 / %d94-126
48
+ */
49
+ static bool is_dcontent(unsigned char c) {
50
+ return (c >= 33 && c <= 90) || (c >= 94 && c <= 126);
51
+ }
52
+
53
+ /*
54
+ * VCHAR per RFC 5234: %x21-7E.
55
+ * WSP per RFC 5234: SP / HTAB.
56
+ */
57
+ static bool is_vchar(unsigned char c) { return c >= 0x21 && c <= 0x7E; }
58
+ static bool is_wsp(unsigned char c) { return c == ' ' || c == '\t'; }
59
+
60
+ /*
61
+ * Try to consume a single atext character (or a valid UTF-8 sequence when
62
+ * allow_utf8 is true). Returns the number of bytes consumed, or 0 if the
63
+ * input does not start with an atext character.
64
+ */
65
+ static long consume_atext(const char *s, long len, bool allow_utf8) {
66
+ unsigned char c = (unsigned char)s[0];
67
+
68
+ if(c < 0x80) return is_atext(c) ? 1 : 0;
69
+ if(!allow_utf8) return 0;
70
+
71
+ return utf8_seq_len((const unsigned char *)s, len);
72
+ }
73
+
74
+ /*
75
+ * Parse 1*atext starting at s. Returns the number of bytes consumed, or 0 if
76
+ * there is not at least one atext character.
77
+ */
78
+ static long parse_atext_run(const char *s, long len, bool allow_utf8) {
79
+ long pos = 0;
80
+
81
+ while(pos < len) {
82
+ long n = consume_atext(s + pos, len - pos, allow_utf8);
83
+
84
+ if(n == 0) break;
85
+
86
+ pos += n;
87
+ }
88
+
89
+ return pos;
90
+ }
91
+
92
+ /*
93
+ * Parse a dot-atom-text: 1*atext *("." 1*atext).
94
+ * Returns the number of bytes consumed, or 0 on failure.
95
+ */
96
+ static long parse_dot_atom(const char *s, long len, bool allow_utf8, bool allow_trailing_dot) {
97
+ long first = parse_atext_run(s, len, allow_utf8);
98
+
99
+ if(first == 0) return 0;
100
+
101
+ long pos = first;
102
+
103
+ while(pos < len && s[pos] == '.') {
104
+ long next = parse_atext_run(s + pos + 1, len - pos - 1, allow_utf8);
105
+
106
+ if(next == 0) {
107
+ /*
108
+ * A single trailing dot is accepted in the domain to support fully
109
+ * qualified DNS names; consecutive dots and trailing dots in the
110
+ * local-part remain rejected.
111
+ */
112
+ if(allow_trailing_dot && pos + 1 == len) return len;
113
+
114
+ return 0;
115
+ }
116
+
117
+ pos += 1 + next;
118
+ }
119
+
120
+ return pos;
121
+ }
122
+
123
+ /*
124
+ * Parse a quoted-string (excluding any surrounding CFWS - we don't allow it):
125
+ * DQUOTE *( qtext / quoted-pair ) DQUOTE
126
+ * quoted-pair = "\" ( VCHAR / WSP )
127
+ *
128
+ * WSP between qcontent items is allowed by RFC 5322 (FWS), but we keep it
129
+ * strict: WSP is only allowed inside the quoted-string as bare octets if it
130
+ * is part of qtext. SP (0x20) and HTAB (0x09) are not in qtext, but are
131
+ * commonly accepted; for strictness we accept SP/HTAB as a bare octet inside
132
+ * the quoted-string (as if folded FWS) - this mirrors the RFC's qcontent
133
+ * allowance via FWS without permitting line folding.
134
+ *
135
+ * Returns the number of bytes consumed including the surrounding DQUOTEs,
136
+ * or 0 on failure.
137
+ */
138
+ static long parse_quoted_string(const char *s, long len, bool allow_utf8) {
139
+ if(len < 2 || s[0] != '"') return 0;
140
+
141
+ long pos = 1;
142
+
143
+ while(pos < len && s[pos] != '"') {
144
+ unsigned char c = (unsigned char)s[pos];
145
+
146
+ if(c == '\\') {
147
+ if(pos + 1 >= len) return 0;
148
+
149
+ unsigned char next = (unsigned char)s[pos + 1];
150
+
151
+ if(!is_vchar(next) && !is_wsp(next)) return 0;
152
+
153
+ pos += 2;
154
+ continue;
155
+ }
156
+
157
+ if(is_qtext(c) || is_wsp(c)) {
158
+ pos++;
159
+ continue;
160
+ }
161
+
162
+ if(allow_utf8 && c >= 0x80) {
163
+ long n = utf8_seq_len((const unsigned char *)s + pos, len - pos);
164
+
165
+ if(n == 0) return 0;
166
+
167
+ pos += n;
168
+ continue;
169
+ }
170
+
171
+ return 0;
172
+ }
173
+
174
+ if(pos >= len || s[pos] != '"') return 0;
175
+
176
+ return pos + 1;
177
+ }
178
+
179
+ /*
180
+ * Parse the local-part: dot-atom-text or quoted-string.
181
+ * Returns the number of bytes consumed, or 0 on failure.
182
+ */
183
+ static long parse_local_part(const char *s, long len, bool allow_utf8) {
184
+ if(len == 0) return 0;
185
+
186
+ if(s[0] == '"') return parse_quoted_string(s, len, allow_utf8);
187
+
188
+ return parse_dot_atom(s, len, allow_utf8, false);
189
+ }
190
+
191
+ /*
192
+ * IPv6-address-literal per RFC 5321 section 4.1.3: "IPv6:" IPv6-addr
193
+ */
194
+ static bool parse_ipv6_literal(const char *s, long len) {
195
+ if(len < 5) return false;
196
+ if(memcmp(s, "IPv6:", 5) != 0) return false;
197
+
198
+ return parse_ipv6(s + 5, len - 5);
199
+ }
200
+
201
+ /*
202
+ * Ldh-str per RFC 5321:
203
+ * Let-dig *( *("-") Let-dig )
204
+ * simplified to: ALPHA/DIGIT, may contain '-' but must end with ALPHA/DIGIT.
205
+ */
206
+ static bool parse_ldh_str(const char *s, long len) {
207
+ if(len == 0) return false;
208
+ if(!IS_ALPHA((unsigned char)s[0]) && !IS_DIGIT((unsigned char)s[0])) return false;
209
+
210
+ for(long i = 0; i < len; i++) {
211
+ unsigned char c = (unsigned char)s[i];
212
+
213
+ if(!IS_ALPHA(c) && !IS_DIGIT(c) && c != '-') return false;
214
+ }
215
+
216
+ if(s[len - 1] == '-') return false;
217
+
218
+ return true;
219
+ }
220
+
221
+ /*
222
+ * General-address-literal per RFC 5321 section 4.1.3:
223
+ * Standardized-tag ":" 1*dcontent
224
+ * Standardized-tag = Ldh-str
225
+ */
226
+ static bool parse_general_address_literal(const char *s, long len, bool allow_utf8) {
227
+ long colon = -1;
228
+
229
+ for(long i = 0; i < len; i++) {
230
+ if(s[i] == ':') { colon = i; break; }
231
+ }
232
+
233
+ if(colon <= 0 || colon >= len - 1) return false;
234
+
235
+ if(!parse_ldh_str(s, colon)) return false;
236
+
237
+ /*
238
+ * Standardized-tag must be a registered token; reject "IPv6" here so that an
239
+ * "IPv6:..." body that failed parse_ipv6_literal does not silently match the
240
+ * General-address-literal form. Also require at least one ALPHA in the tag
241
+ * to avoid all-digit tags accidentally matching IPv6-shaped content.
242
+ */
243
+ if(colon == 4 && memcmp(s, "IPv6", 4) == 0) return false;
244
+
245
+ bool has_alpha = false;
246
+ for(long i = 0; i < colon; i++) {
247
+ if(IS_ALPHA((unsigned char)s[i])) { has_alpha = true; break; }
248
+ }
249
+ if(!has_alpha) return false;
250
+
251
+ long pos = colon + 1;
252
+
253
+ if(pos >= len) return false;
254
+
255
+ while(pos < len) {
256
+ unsigned char c = (unsigned char)s[pos];
257
+
258
+ if(c < 0x80) {
259
+ if(!is_dcontent(c)) return false;
260
+ pos++;
261
+ continue;
262
+ }
263
+
264
+ if(!allow_utf8) return false;
265
+
266
+ long n = utf8_seq_len((const unsigned char *)s + pos, len - pos);
267
+
268
+ if(n == 0) return false;
269
+
270
+ pos += n;
271
+ }
272
+
273
+ return true;
274
+ }
275
+
276
+ /*
277
+ * domain-literal contents (between "[" and "]"). Tries IPv4, then IPv6, then
278
+ * General-address-literal. Per RFC 6531 only General-address-literal admits
279
+ * UTF-8 in dcontent; IPv4 and IPv6 literals remain ASCII.
280
+ */
281
+ static bool parse_domain_literal_body(const char *s, long len, bool allow_utf8) {
282
+ if(parse_ipv4(s, len)) return true;
283
+ if(parse_ipv6_literal(s, len)) return true;
284
+
285
+ return parse_general_address_literal(s, len, allow_utf8);
286
+ }
287
+
288
+ /*
289
+ * Parse a domain-literal: "[" body "]"
290
+ * Returns the number of bytes consumed, or 0 on failure.
291
+ */
292
+ static long parse_domain_literal(const char *s, long len, bool allow_utf8) {
293
+ if(len < 2 || s[0] != '[') return 0;
294
+
295
+ long end = -1;
296
+
297
+ for(long i = 1; i < len; i++) {
298
+ if(s[i] == ']') { end = i; break; }
299
+ }
300
+
301
+ if(end == -1) return 0;
302
+
303
+ if(!parse_domain_literal_body(s + 1, end - 1, allow_utf8)) return 0;
304
+
305
+ return end + 1;
306
+ }
307
+
308
+ /*
309
+ * Parse the domain: dot-atom-text or domain-literal.
310
+ * Returns the number of bytes consumed, or 0 on failure.
311
+ */
312
+ static long parse_domain(const char *s, long len, bool allow_utf8) {
313
+ if(len == 0) return 0;
314
+
315
+ if(s[0] == '[') return parse_domain_literal(s, len, allow_utf8);
316
+
317
+ return parse_dot_atom(s, len, allow_utf8, true);
318
+ }
319
+
320
+ bool parse_addr_spec(const char *s, long len, bool allow_utf8) {
321
+ if(len == 0) return false;
322
+
323
+ long local_len = parse_local_part(s, len, allow_utf8);
324
+
325
+ if(local_len == 0) return false;
326
+ if(local_len > MAX_LOCAL_PART) return false;
327
+ if(local_len >= len) return false;
328
+ if(s[local_len] != '@') return false;
329
+
330
+ long domain_off = local_len + 1;
331
+ long domain_len = len - domain_off;
332
+
333
+ if(domain_len <= 0) return false;
334
+ if(domain_len > MAX_DOMAIN) return false;
335
+
336
+ long consumed = parse_domain(s + domain_off, domain_len, allow_utf8);
337
+
338
+ if(consumed == 0) return false;
339
+ if(consumed != domain_len) return false;
340
+
341
+ return true;
342
+ }
@@ -0,0 +1,16 @@
1
+ #ifndef FAST_JSON_FORMATS_UTILS_ADDR_SPEC_PARSER_H
2
+ #define FAST_JSON_FORMATS_UTILS_ADDR_SPEC_PARSER_H
3
+
4
+ #include <stdbool.h>
5
+
6
+ /*
7
+ * Strict RFC 5322 addr-spec parser (with RFC 5321 address-literals).
8
+ * When allow_utf8 is true, RFC 6531 UTF8-non-ASCII extensions are accepted in
9
+ * atext, qtext and General-address-literal dcontent productions.
10
+ *
11
+ * Returns true if the entire input is a valid addr-spec, false otherwise.
12
+ * No CFWS, no obs-* productions, no leading/trailing whitespace.
13
+ */
14
+ bool parse_addr_spec(const char *s, long len, bool allow_utf8);
15
+
16
+ #endif
@@ -0,0 +1,113 @@
1
+ #include "formats/utils/hostname_parser.h"
2
+ #include "formats/utils/utf8.h"
3
+
4
+ #include <stdbool.h>
5
+
6
+ #define MAX_HOSTNAME_LEN 253
7
+ #define MAX_LABEL_LEN 63
8
+
9
+ #define IS_DIGIT(c) ((c) >= '0' && (c) <= '9')
10
+ #define IS_ALPHA(c) (((c) >= 'A' && (c) <= 'Z') || ((c) >= 'a' && (c) <= 'z'))
11
+
12
+ /*
13
+ * let-dig per RFC 1123 section 2.1: ALPHA / DIGIT.
14
+ */
15
+ static bool is_let_dig(unsigned char c) {
16
+ return IS_ALPHA(c) || IS_DIGIT(c);
17
+ }
18
+
19
+ /*
20
+ * let-dig-hyp per RFC 1123 section 2.1: ALPHA / DIGIT / "-".
21
+ */
22
+ static bool is_let_dig_hyp(unsigned char c) {
23
+ return is_let_dig(c) || c == '-';
24
+ }
25
+
26
+ /*
27
+ * Try to consume one "label character" starting at s.
28
+ * Returns the number of bytes consumed, or 0 if the input does not start
29
+ * with a valid label character.
30
+ *
31
+ * In allow_utf8 mode, any byte >= 0x80 must begin a valid UTF-8 sequence
32
+ * and is accepted as a single label character (pragmatic idn-hostname).
33
+ */
34
+ static long consume_label_char(const char *s, long len, bool allow_utf8, bool allow_hyphen) {
35
+ unsigned char c = (unsigned char)s[0];
36
+
37
+ if(c < 0x80) {
38
+ if(allow_hyphen) return is_let_dig_hyp(c) ? 1 : 0;
39
+
40
+ return is_let_dig(c) ? 1 : 0;
41
+ }
42
+
43
+ if(!allow_utf8) return 0;
44
+
45
+ return utf8_seq_len((const unsigned char *)s, len);
46
+ }
47
+
48
+ /*
49
+ * Parse a single label: let-dig [ *(let-dig-hyp) let-dig ].
50
+ * The label may not start with a hyphen and may not end with a hyphen.
51
+ * Returns the number of bytes consumed, or 0 on failure.
52
+ */
53
+ static long parse_label(const char *s, long len, bool allow_utf8) {
54
+ if(len == 0) return 0;
55
+
56
+ long first = consume_label_char(s, len, allow_utf8, false);
57
+
58
+ if(first == 0) return 0;
59
+
60
+ long pos = first;
61
+ long last_char_start = 0;
62
+
63
+ while(pos < len && s[pos] != '.') {
64
+ long n = consume_label_char(s + pos, len - pos, allow_utf8, true);
65
+
66
+ if(n == 0) return 0;
67
+
68
+ last_char_start = pos;
69
+ pos += n;
70
+
71
+ if(pos > MAX_LABEL_LEN) return 0;
72
+ }
73
+
74
+ /*
75
+ * Label must not end with a hyphen. The last consumed character begins at
76
+ * either `last_char_start` (if more than one character) or at offset 0.
77
+ */
78
+ long last_start = last_char_start ? last_char_start : 0;
79
+
80
+ if(pos > first && (unsigned char)s[last_start] == '-') return 0;
81
+
82
+ return pos;
83
+ }
84
+
85
+ bool parse_hostname(const char *s, long len, bool allow_utf8) {
86
+ if(len == 0) return false;
87
+ if(len > MAX_HOSTNAME_LEN) return false;
88
+
89
+ long pos = 0;
90
+
91
+ while(pos < len) {
92
+ long label_len = parse_label(s + pos, len - pos, allow_utf8);
93
+
94
+ if(label_len == 0) return false;
95
+
96
+ pos += label_len;
97
+
98
+ if(pos == len) break;
99
+
100
+ if(s[pos] != '.') return false;
101
+
102
+ pos++; // consume '.'
103
+
104
+ /*
105
+ * A single trailing dot is accepted (RFC 1034 section 3.1 absolute form).
106
+ * Any further bytes after the trailing dot, or two consecutive dots,
107
+ * will be rejected because parse_label returns 0 on empty input.
108
+ */
109
+ if(pos == len) return true;
110
+ }
111
+
112
+ return true;
113
+ }
@@ -0,0 +1,17 @@
1
+ #ifndef FAST_JSON_FORMATS_UTILS_HOSTNAME_PARSER_H
2
+ #define FAST_JSON_FORMATS_UTILS_HOSTNAME_PARSER_H
3
+
4
+ #include <stdbool.h>
5
+
6
+ /*
7
+ * Hostname parser per RFC 1123 section 2.1 + RFC 1035 section 2.3.4 length limits.
8
+ *
9
+ * When allow_utf8 is true, label characters may also be valid UTF-8
10
+ * multi-byte sequences (pragmatic idn-hostname; not full IDNA2008).
11
+ *
12
+ * Returns true if the entire input is a syntactically valid hostname,
13
+ * false otherwise. A single trailing dot is allowed (RFC 1034 section 3.1).
14
+ */
15
+ bool parse_hostname(const char *s, long len, bool allow_utf8);
16
+
17
+ #endif
@@ -0,0 +1,126 @@
1
+ #include "formats/utils/ip_parser.h"
2
+
3
+ #include <stdbool.h>
4
+
5
+ #define IS_DIGIT(c) ((c) >= '0' && (c) <= '9')
6
+ #define IS_HEX(c) (IS_DIGIT(c) || ((c) >= 'A' && (c) <= 'F') || ((c) >= 'a' && (c) <= 'f'))
7
+
8
+ bool parse_ipv4(const char *s, long len) {
9
+ long pos = 0;
10
+
11
+ for(int i = 0; i < 4; i++) {
12
+ if(pos >= len) return false;
13
+ if(!IS_DIGIT(s[pos])) return false;
14
+
15
+ /*
16
+ * Reject leading zeros: "0" is allowed alone, but "01", "001" are not.
17
+ * Per RFC 3986 section 3.2.2 dec-octet, leading zeros are forbidden to avoid
18
+ * historical octal-vs-decimal ambiguity.
19
+ */
20
+ if(s[pos] == '0' && pos + 1 < len && IS_DIGIT(s[pos + 1])) return false;
21
+
22
+ int value = 0;
23
+ int digits = 0;
24
+
25
+ while(pos < len && IS_DIGIT(s[pos]) && digits < 3) {
26
+ value = value * 10 + (s[pos] - '0');
27
+ pos++;
28
+ digits++;
29
+ }
30
+
31
+ if(value > 255) return false;
32
+
33
+ if(i < 3) {
34
+ if(pos >= len || s[pos] != '.') return false;
35
+ pos++;
36
+ }
37
+ }
38
+
39
+ return pos == len;
40
+ }
41
+
42
+ /*
43
+ * Parse an IPv6 hex group: 1-4 hex digits.
44
+ * Returns the number of digits consumed, or 0 if none.
45
+ */
46
+ static int parse_ipv6_group(const char *s, long len) {
47
+ int digits = 0;
48
+
49
+ while(digits < 4 && digits < len && IS_HEX((unsigned char)s[digits])) digits++;
50
+
51
+ return digits;
52
+ }
53
+
54
+ bool parse_ipv6(const char *s, long len) {
55
+ long pos = 0;
56
+ int groups = 0;
57
+ bool seen_compression = false;
58
+ bool last_was_group = false;
59
+
60
+ if(len >= 2 && s[0] == ':' && s[1] == ':') {
61
+ seen_compression = true;
62
+ pos = 2;
63
+ } else if(len >= 1 && s[0] == ':') {
64
+ return false;
65
+ }
66
+
67
+ while(pos < len) {
68
+ /* Try IPv4 tail first when we are at the start of a group and there are dots ahead. */
69
+ if(!last_was_group) {
70
+ bool has_dot = false;
71
+
72
+ for(long k = pos; k < len; k++) {
73
+ if(s[k] == '.') { has_dot = true; break; }
74
+ if(s[k] == ':') break;
75
+ }
76
+
77
+ if(has_dot) {
78
+ if(!parse_ipv4(s + pos, len - pos)) return false;
79
+
80
+ groups += 2;
81
+ pos = len;
82
+ last_was_group = true;
83
+ break;
84
+ }
85
+ }
86
+
87
+ int g = parse_ipv6_group(s + pos, len - pos);
88
+
89
+ if(g == 0) return false;
90
+
91
+ pos += g;
92
+ groups++;
93
+ last_was_group = true;
94
+
95
+ if(pos == len) break;
96
+
97
+ if(s[pos] != ':') return false;
98
+
99
+ /* Possible "::" compression. */
100
+ if(pos + 1 < len && s[pos + 1] == ':') {
101
+ if(seen_compression) return false;
102
+
103
+ seen_compression = true;
104
+ pos += 2;
105
+ last_was_group = false;
106
+
107
+ if(pos == len) break; // trailing "::"
108
+ continue;
109
+ }
110
+
111
+ pos++;
112
+ last_was_group = false;
113
+ }
114
+
115
+ if(pos != len) return false;
116
+
117
+ if(!last_was_group && !seen_compression) return false; // trailing single ":"
118
+
119
+ if(seen_compression) {
120
+ if(groups > 7) return false;
121
+
122
+ return true;
123
+ }
124
+
125
+ return groups == 8;
126
+ }
@@ -0,0 +1,25 @@
1
+ #ifndef FAST_JSON_FORMATS_UTILS_IP_PARSER_H
2
+ #define FAST_JSON_FORMATS_UTILS_IP_PARSER_H
3
+
4
+ #include <stdbool.h>
5
+
6
+ /*
7
+ * IPv4 dotted-decimal address per RFC 2673 section 3.2 / RFC 3986 section 3.2.2:
8
+ * IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
9
+ * dec-octet = 1*3DIGIT, value 0..255, no leading zeros
10
+ *
11
+ * Returns true if the entire input is a valid IPv4 address, false otherwise.
12
+ */
13
+ bool parse_ipv4(const char *s, long len);
14
+
15
+ /*
16
+ * IPv6 address per RFC 4291 section 2.2:
17
+ * - Up to 8 groups of 1-4 hex digits separated by ":"
18
+ * - At most one "::" compression standing for one or more zero groups
19
+ * - Optional trailing IPv4 dotted form (counts as 2 groups)
20
+ *
21
+ * Returns true if the entire input is a valid IPv6 address, false otherwise.
22
+ */
23
+ bool parse_ipv6(const char *s, long len);
24
+
25
+ #endif
@@ -0,0 +1,45 @@
1
+ #include "formats/utils/json_pointer_parser.h"
2
+ #include "formats/utils/utf8.h"
3
+
4
+ bool parse_json_pointer(const char *s, long len) {
5
+ long pos = 0;
6
+
7
+ while(pos < len) {
8
+ if(s[pos] != '/') return false;
9
+
10
+ pos++;
11
+
12
+ while(pos < len && s[pos] != '/') {
13
+ unsigned char c = (unsigned char)s[pos];
14
+
15
+ /*
16
+ * Escape sequence: "~" ( "0" / "1" )
17
+ */
18
+ if(c == '~') {
19
+ if(pos + 1 >= len) return false;
20
+ if(s[pos + 1] != '0' && s[pos + 1] != '1') return false;
21
+ pos += 2;
22
+ continue;
23
+ }
24
+
25
+ /*
26
+ * ASCII byte
27
+ */
28
+ if(c < 0x80) {
29
+ pos++;
30
+ continue;
31
+ }
32
+
33
+ /*
34
+ * UTF-8 multi-byte sequence
35
+ */
36
+ long n = utf8_seq_len((const unsigned char *)(s + pos), len - pos);
37
+
38
+ if(n == 0) return false;
39
+
40
+ pos += n;
41
+ }
42
+ }
43
+
44
+ return true;
45
+ }
@@ -0,0 +1,20 @@
1
+ #ifndef FAST_JSON_FORMATS_UTILS_JSON_POINTER_PARSER_H
2
+ #define FAST_JSON_FORMATS_UTILS_JSON_POINTER_PARSER_H
3
+
4
+ #include <stdbool.h>
5
+
6
+ /*
7
+ * RFC 6901 section 3 JSON Pointer parser.
8
+ *
9
+ * json-pointer = *( "/" reference-token )
10
+ * reference-token = *( unescaped / escaped )
11
+ * unescaped = %x00-2E / %x30-7D / %x7F-10FFFF
12
+ * escaped = "~" ( "0" / "1" )
13
+ *
14
+ * The empty string is a valid JSON Pointer (whole-document reference).
15
+ * UTF-8 multi-byte sequences are accepted in token content per the
16
+ * unescaped %x80-10FFFF range. NUL bytes are accepted per %x00-2E.
17
+ */
18
+ bool parse_json_pointer(const char *s, long len);
19
+
20
+ #endif