fast_json-schema 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/CODE_OF_CONDUCT.md +84 -0
- data/Dockerfile +17 -0
- data/Gemfile +11 -0
- data/Gemfile.lock +68 -0
- data/LICENSE.txt +21 -0
- data/README.md +156 -0
- data/Rakefile +60 -0
- data/build-deps +3 -0
- data/data/invalid.json +31 -0
- data/data/schema.json +150 -0
- data/data/valid.json +49 -0
- data/ext/fast_json/schema/all_of.c +23 -0
- data/ext/fast_json/schema/all_of.h +4 -0
- data/ext/fast_json/schema/any_of.c +22 -0
- data/ext/fast_json/schema/any_of.h +4 -0
- data/ext/fast_json/schema/compiled_schema.c +503 -0
- data/ext/fast_json/schema/compiled_schema.h +10 -0
- data/ext/fast_json/schema/context.c +78 -0
- data/ext/fast_json/schema/error.c +26 -0
- data/ext/fast_json/schema/error.h +5 -0
- data/ext/fast_json/schema/extconf.rb +7 -0
- data/ext/fast_json/schema/formats/custom_format.c +63 -0
- data/ext/fast_json/schema/formats/custom_format.h +4 -0
- data/ext/fast_json/schema/formats/date.c +48 -0
- data/ext/fast_json/schema/formats/date.h +5 -0
- data/ext/fast_json/schema/formats/date_time.c +22 -0
- data/ext/fast_json/schema/formats/date_time.h +4 -0
- data/ext/fast_json/schema/formats/email.c +8 -0
- data/ext/fast_json/schema/formats/email.h +4 -0
- data/ext/fast_json/schema/formats/format.c +68 -0
- data/ext/fast_json/schema/formats/format.h +4 -0
- data/ext/fast_json/schema/formats/hostname.c +8 -0
- data/ext/fast_json/schema/formats/hostname.h +4 -0
- data/ext/fast_json/schema/formats/idn_email.c +8 -0
- data/ext/fast_json/schema/formats/idn_email.h +4 -0
- data/ext/fast_json/schema/formats/idn_hostname.c +8 -0
- data/ext/fast_json/schema/formats/idn_hostname.h +4 -0
- data/ext/fast_json/schema/formats/ipv4.c +8 -0
- data/ext/fast_json/schema/formats/ipv4.h +4 -0
- data/ext/fast_json/schema/formats/ipv6.c +8 -0
- data/ext/fast_json/schema/formats/ipv6.h +4 -0
- data/ext/fast_json/schema/formats/iri.c +8 -0
- data/ext/fast_json/schema/formats/iri.h +4 -0
- data/ext/fast_json/schema/formats/iri_reference.c +8 -0
- data/ext/fast_json/schema/formats/iri_reference.h +4 -0
- data/ext/fast_json/schema/formats/json_pointer.c +8 -0
- data/ext/fast_json/schema/formats/json_pointer.h +4 -0
- data/ext/fast_json/schema/formats/regex.c +27 -0
- data/ext/fast_json/schema/formats/regex.h +4 -0
- data/ext/fast_json/schema/formats/relative_json_pointer.c +57 -0
- data/ext/fast_json/schema/formats/relative_json_pointer.h +4 -0
- data/ext/fast_json/schema/formats/time.c +65 -0
- data/ext/fast_json/schema/formats/time.h +5 -0
- data/ext/fast_json/schema/formats/uri.c +8 -0
- data/ext/fast_json/schema/formats/uri.h +4 -0
- data/ext/fast_json/schema/formats/uri_reference.c +8 -0
- data/ext/fast_json/schema/formats/uri_reference.h +4 -0
- data/ext/fast_json/schema/formats/uri_template.c +8 -0
- data/ext/fast_json/schema/formats/uri_template.h +4 -0
- data/ext/fast_json/schema/formats/utils/addr_spec_parser.c +342 -0
- data/ext/fast_json/schema/formats/utils/addr_spec_parser.h +16 -0
- data/ext/fast_json/schema/formats/utils/hostname_parser.c +113 -0
- data/ext/fast_json/schema/formats/utils/hostname_parser.h +17 -0
- data/ext/fast_json/schema/formats/utils/ip_parser.c +126 -0
- data/ext/fast_json/schema/formats/utils/ip_parser.h +25 -0
- data/ext/fast_json/schema/formats/utils/json_pointer_parser.c +45 -0
- data/ext/fast_json/schema/formats/utils/json_pointer_parser.h +20 -0
- data/ext/fast_json/schema/formats/utils/uri_parser.c +605 -0
- data/ext/fast_json/schema/formats/utils/uri_parser.h +20 -0
- data/ext/fast_json/schema/formats/utils/uri_template_parser.c +235 -0
- data/ext/fast_json/schema/formats/utils/uri_template_parser.h +18 -0
- data/ext/fast_json/schema/formats/utils/utf8.c +73 -0
- data/ext/fast_json/schema/formats/utils/utf8.h +17 -0
- data/ext/fast_json/schema/if.c +31 -0
- data/ext/fast_json/schema/if.h +4 -0
- data/ext/fast_json/schema/is_valid.c +124 -0
- data/ext/fast_json/schema/is_valid.h +6 -0
- data/ext/fast_json/schema/keywords.c +220 -0
- data/ext/fast_json/schema/keywords.h +60 -0
- data/ext/fast_json/schema/nested_schemas.c +68 -0
- data/ext/fast_json/schema/nested_schemas.h +4 -0
- data/ext/fast_json/schema/not.c +11 -0
- data/ext/fast_json/schema/not.h +4 -0
- data/ext/fast_json/schema/one_of.c +23 -0
- data/ext/fast_json/schema/one_of.h +4 -0
- data/ext/fast_json/schema/path.c +44 -0
- data/ext/fast_json/schema/path.h +5 -0
- data/ext/fast_json/schema/properties_val.c +103 -0
- data/ext/fast_json/schema/properties_val.h +6 -0
- data/ext/fast_json/schema/ref.c +7 -0
- data/ext/fast_json/schema/ref.h +4 -0
- data/ext/fast_json/schema/ref_resolver.c +85 -0
- data/ext/fast_json/schema/ref_resolver.h +5 -0
- data/ext/fast_json/schema/schema.c +68 -0
- data/ext/fast_json/schema/schema_collection.c +29 -0
- data/ext/fast_json/schema/schema_collection.h +3 -0
- data/ext/fast_json/schema/types/compiled_schema.h +96 -0
- data/ext/fast_json/schema/types/context.h +27 -0
- data/ext/fast_json/schema/validate.c +63 -0
- data/ext/fast_json/schema/validate.h +19 -0
- data/ext/fast_json/schema/validate_array.c +130 -0
- data/ext/fast_json/schema/validate_array.h +4 -0
- data/ext/fast_json/schema/validate_bool.c +7 -0
- data/ext/fast_json/schema/validate_bool.h +4 -0
- data/ext/fast_json/schema/validate_integer.c +52 -0
- data/ext/fast_json/schema/validate_integer.h +4 -0
- data/ext/fast_json/schema/validate_null.c +7 -0
- data/ext/fast_json/schema/validate_null.h +4 -0
- data/ext/fast_json/schema/validate_number.c +62 -0
- data/ext/fast_json/schema/validate_number.h +4 -0
- data/ext/fast_json/schema/validate_object.c +159 -0
- data/ext/fast_json/schema/validate_object.h +4 -0
- data/ext/fast_json/schema/validate_string.c +32 -0
- data/ext/fast_json/schema/validate_string.h +4 -0
- data/ext/fast_json/schema/value_pointer_caster.h +9 -0
- data/fast_json-schema.gemspec +31 -0
- data/lib/fast_json/schema/error.rb +16 -0
- data/lib/fast_json/schema/version.rb +7 -0
- data/lib/fast_json/schema.rb +50 -0
- data/makefile +10 -0
- metadata +164 -0
|
@@ -0,0 +1,342 @@
|
|
|
1
|
+
#include "formats/utils/addr_spec_parser.h"
|
|
2
|
+
#include "formats/utils/utf8.h"
|
|
3
|
+
#include "formats/utils/ip_parser.h"
|
|
4
|
+
|
|
5
|
+
#include <stdbool.h>
|
|
6
|
+
#include <string.h>
|
|
7
|
+
|
|
8
|
+
/*
|
|
9
|
+
* RFC 5321 section 4.1.3 length limits.
|
|
10
|
+
*/
|
|
11
|
+
#define MAX_LOCAL_PART 64
|
|
12
|
+
#define MAX_DOMAIN 255
|
|
13
|
+
|
|
14
|
+
#define IS_DIGIT(c) ((c) >= '0' && (c) <= '9')
|
|
15
|
+
#define IS_ALPHA(c) (((c) >= 'A' && (c) <= 'Z') || ((c) >= 'a' && (c) <= 'z'))
|
|
16
|
+
|
|
17
|
+
/*
|
|
18
|
+
* atext per RFC 5322:
|
|
19
|
+
* ALPHA / DIGIT / "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" /
|
|
20
|
+
* "/" / "=" / "?" / "^" / "_" / "`" / "{" / "|" / "}" / "~"
|
|
21
|
+
*/
|
|
22
|
+
static bool is_atext(unsigned char c) {
|
|
23
|
+
if(IS_ALPHA(c) || IS_DIGIT(c)) return true;
|
|
24
|
+
|
|
25
|
+
switch(c) {
|
|
26
|
+
case '!': case '#': case '$': case '%': case '&': case '\'':
|
|
27
|
+
case '*': case '+': case '-': case '/': case '=': case '?':
|
|
28
|
+
case '^': case '_': case '`': case '{': case '|': case '}':
|
|
29
|
+
case '~':
|
|
30
|
+
return true;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
return false;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
/*
|
|
37
|
+
* qtext per RFC 5322:
|
|
38
|
+
* %d33 / %d35-91 / %d93-126
|
|
39
|
+
* (i.e. VCHAR except '"' and '\').
|
|
40
|
+
*/
|
|
41
|
+
static bool is_qtext(unsigned char c) {
|
|
42
|
+
return c == 33 || (c >= 35 && c <= 91) || (c >= 93 && c <= 126);
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/*
|
|
46
|
+
* General-address-literal dcontent per RFC 5321 section 4.1.3:
|
|
47
|
+
* dcontent = %d33-90 / %d94-126
|
|
48
|
+
*/
|
|
49
|
+
static bool is_dcontent(unsigned char c) {
|
|
50
|
+
return (c >= 33 && c <= 90) || (c >= 94 && c <= 126);
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/*
|
|
54
|
+
* VCHAR per RFC 5234: %x21-7E.
|
|
55
|
+
* WSP per RFC 5234: SP / HTAB.
|
|
56
|
+
*/
|
|
57
|
+
static bool is_vchar(unsigned char c) { return c >= 0x21 && c <= 0x7E; }
|
|
58
|
+
static bool is_wsp(unsigned char c) { return c == ' ' || c == '\t'; }
|
|
59
|
+
|
|
60
|
+
/*
|
|
61
|
+
* Try to consume a single atext character (or a valid UTF-8 sequence when
|
|
62
|
+
* allow_utf8 is true). Returns the number of bytes consumed, or 0 if the
|
|
63
|
+
* input does not start with an atext character.
|
|
64
|
+
*/
|
|
65
|
+
static long consume_atext(const char *s, long len, bool allow_utf8) {
|
|
66
|
+
unsigned char c = (unsigned char)s[0];
|
|
67
|
+
|
|
68
|
+
if(c < 0x80) return is_atext(c) ? 1 : 0;
|
|
69
|
+
if(!allow_utf8) return 0;
|
|
70
|
+
|
|
71
|
+
return utf8_seq_len((const unsigned char *)s, len);
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/*
|
|
75
|
+
* Parse 1*atext starting at s. Returns the number of bytes consumed, or 0 if
|
|
76
|
+
* there is not at least one atext character.
|
|
77
|
+
*/
|
|
78
|
+
static long parse_atext_run(const char *s, long len, bool allow_utf8) {
|
|
79
|
+
long pos = 0;
|
|
80
|
+
|
|
81
|
+
while(pos < len) {
|
|
82
|
+
long n = consume_atext(s + pos, len - pos, allow_utf8);
|
|
83
|
+
|
|
84
|
+
if(n == 0) break;
|
|
85
|
+
|
|
86
|
+
pos += n;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
return pos;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/*
|
|
93
|
+
* Parse a dot-atom-text: 1*atext *("." 1*atext).
|
|
94
|
+
* Returns the number of bytes consumed, or 0 on failure.
|
|
95
|
+
*/
|
|
96
|
+
static long parse_dot_atom(const char *s, long len, bool allow_utf8, bool allow_trailing_dot) {
|
|
97
|
+
long first = parse_atext_run(s, len, allow_utf8);
|
|
98
|
+
|
|
99
|
+
if(first == 0) return 0;
|
|
100
|
+
|
|
101
|
+
long pos = first;
|
|
102
|
+
|
|
103
|
+
while(pos < len && s[pos] == '.') {
|
|
104
|
+
long next = parse_atext_run(s + pos + 1, len - pos - 1, allow_utf8);
|
|
105
|
+
|
|
106
|
+
if(next == 0) {
|
|
107
|
+
/*
|
|
108
|
+
* A single trailing dot is accepted in the domain to support fully
|
|
109
|
+
* qualified DNS names; consecutive dots and trailing dots in the
|
|
110
|
+
* local-part remain rejected.
|
|
111
|
+
*/
|
|
112
|
+
if(allow_trailing_dot && pos + 1 == len) return len;
|
|
113
|
+
|
|
114
|
+
return 0;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
pos += 1 + next;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
return pos;
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
/*
|
|
124
|
+
* Parse a quoted-string (excluding any surrounding CFWS - we don't allow it):
|
|
125
|
+
* DQUOTE *( qtext / quoted-pair ) DQUOTE
|
|
126
|
+
* quoted-pair = "\" ( VCHAR / WSP )
|
|
127
|
+
*
|
|
128
|
+
* WSP between qcontent items is allowed by RFC 5322 (FWS), but we keep it
|
|
129
|
+
* strict: WSP is only allowed inside the quoted-string as bare octets if it
|
|
130
|
+
* is part of qtext. SP (0x20) and HTAB (0x09) are not in qtext, but are
|
|
131
|
+
* commonly accepted; for strictness we accept SP/HTAB as a bare octet inside
|
|
132
|
+
* the quoted-string (as if folded FWS) - this mirrors the RFC's qcontent
|
|
133
|
+
* allowance via FWS without permitting line folding.
|
|
134
|
+
*
|
|
135
|
+
* Returns the number of bytes consumed including the surrounding DQUOTEs,
|
|
136
|
+
* or 0 on failure.
|
|
137
|
+
*/
|
|
138
|
+
static long parse_quoted_string(const char *s, long len, bool allow_utf8) {
|
|
139
|
+
if(len < 2 || s[0] != '"') return 0;
|
|
140
|
+
|
|
141
|
+
long pos = 1;
|
|
142
|
+
|
|
143
|
+
while(pos < len && s[pos] != '"') {
|
|
144
|
+
unsigned char c = (unsigned char)s[pos];
|
|
145
|
+
|
|
146
|
+
if(c == '\\') {
|
|
147
|
+
if(pos + 1 >= len) return 0;
|
|
148
|
+
|
|
149
|
+
unsigned char next = (unsigned char)s[pos + 1];
|
|
150
|
+
|
|
151
|
+
if(!is_vchar(next) && !is_wsp(next)) return 0;
|
|
152
|
+
|
|
153
|
+
pos += 2;
|
|
154
|
+
continue;
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
if(is_qtext(c) || is_wsp(c)) {
|
|
158
|
+
pos++;
|
|
159
|
+
continue;
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
if(allow_utf8 && c >= 0x80) {
|
|
163
|
+
long n = utf8_seq_len((const unsigned char *)s + pos, len - pos);
|
|
164
|
+
|
|
165
|
+
if(n == 0) return 0;
|
|
166
|
+
|
|
167
|
+
pos += n;
|
|
168
|
+
continue;
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
return 0;
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
if(pos >= len || s[pos] != '"') return 0;
|
|
175
|
+
|
|
176
|
+
return pos + 1;
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
/*
|
|
180
|
+
* Parse the local-part: dot-atom-text or quoted-string.
|
|
181
|
+
* Returns the number of bytes consumed, or 0 on failure.
|
|
182
|
+
*/
|
|
183
|
+
static long parse_local_part(const char *s, long len, bool allow_utf8) {
|
|
184
|
+
if(len == 0) return 0;
|
|
185
|
+
|
|
186
|
+
if(s[0] == '"') return parse_quoted_string(s, len, allow_utf8);
|
|
187
|
+
|
|
188
|
+
return parse_dot_atom(s, len, allow_utf8, false);
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
/*
|
|
192
|
+
* IPv6-address-literal per RFC 5321 section 4.1.3: "IPv6:" IPv6-addr
|
|
193
|
+
*/
|
|
194
|
+
static bool parse_ipv6_literal(const char *s, long len) {
|
|
195
|
+
if(len < 5) return false;
|
|
196
|
+
if(memcmp(s, "IPv6:", 5) != 0) return false;
|
|
197
|
+
|
|
198
|
+
return parse_ipv6(s + 5, len - 5);
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
/*
|
|
202
|
+
* Ldh-str per RFC 5321:
|
|
203
|
+
* Let-dig *( *("-") Let-dig )
|
|
204
|
+
* simplified to: ALPHA/DIGIT, may contain '-' but must end with ALPHA/DIGIT.
|
|
205
|
+
*/
|
|
206
|
+
static bool parse_ldh_str(const char *s, long len) {
|
|
207
|
+
if(len == 0) return false;
|
|
208
|
+
if(!IS_ALPHA((unsigned char)s[0]) && !IS_DIGIT((unsigned char)s[0])) return false;
|
|
209
|
+
|
|
210
|
+
for(long i = 0; i < len; i++) {
|
|
211
|
+
unsigned char c = (unsigned char)s[i];
|
|
212
|
+
|
|
213
|
+
if(!IS_ALPHA(c) && !IS_DIGIT(c) && c != '-') return false;
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
if(s[len - 1] == '-') return false;
|
|
217
|
+
|
|
218
|
+
return true;
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
/*
|
|
222
|
+
* General-address-literal per RFC 5321 section 4.1.3:
|
|
223
|
+
* Standardized-tag ":" 1*dcontent
|
|
224
|
+
* Standardized-tag = Ldh-str
|
|
225
|
+
*/
|
|
226
|
+
static bool parse_general_address_literal(const char *s, long len, bool allow_utf8) {
|
|
227
|
+
long colon = -1;
|
|
228
|
+
|
|
229
|
+
for(long i = 0; i < len; i++) {
|
|
230
|
+
if(s[i] == ':') { colon = i; break; }
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
if(colon <= 0 || colon >= len - 1) return false;
|
|
234
|
+
|
|
235
|
+
if(!parse_ldh_str(s, colon)) return false;
|
|
236
|
+
|
|
237
|
+
/*
|
|
238
|
+
* Standardized-tag must be a registered token; reject "IPv6" here so that an
|
|
239
|
+
* "IPv6:..." body that failed parse_ipv6_literal does not silently match the
|
|
240
|
+
* General-address-literal form. Also require at least one ALPHA in the tag
|
|
241
|
+
* to avoid all-digit tags accidentally matching IPv6-shaped content.
|
|
242
|
+
*/
|
|
243
|
+
if(colon == 4 && memcmp(s, "IPv6", 4) == 0) return false;
|
|
244
|
+
|
|
245
|
+
bool has_alpha = false;
|
|
246
|
+
for(long i = 0; i < colon; i++) {
|
|
247
|
+
if(IS_ALPHA((unsigned char)s[i])) { has_alpha = true; break; }
|
|
248
|
+
}
|
|
249
|
+
if(!has_alpha) return false;
|
|
250
|
+
|
|
251
|
+
long pos = colon + 1;
|
|
252
|
+
|
|
253
|
+
if(pos >= len) return false;
|
|
254
|
+
|
|
255
|
+
while(pos < len) {
|
|
256
|
+
unsigned char c = (unsigned char)s[pos];
|
|
257
|
+
|
|
258
|
+
if(c < 0x80) {
|
|
259
|
+
if(!is_dcontent(c)) return false;
|
|
260
|
+
pos++;
|
|
261
|
+
continue;
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
if(!allow_utf8) return false;
|
|
265
|
+
|
|
266
|
+
long n = utf8_seq_len((const unsigned char *)s + pos, len - pos);
|
|
267
|
+
|
|
268
|
+
if(n == 0) return false;
|
|
269
|
+
|
|
270
|
+
pos += n;
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
return true;
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
/*
|
|
277
|
+
* domain-literal contents (between "[" and "]"). Tries IPv4, then IPv6, then
|
|
278
|
+
* General-address-literal. Per RFC 6531 only General-address-literal admits
|
|
279
|
+
* UTF-8 in dcontent; IPv4 and IPv6 literals remain ASCII.
|
|
280
|
+
*/
|
|
281
|
+
static bool parse_domain_literal_body(const char *s, long len, bool allow_utf8) {
|
|
282
|
+
if(parse_ipv4(s, len)) return true;
|
|
283
|
+
if(parse_ipv6_literal(s, len)) return true;
|
|
284
|
+
|
|
285
|
+
return parse_general_address_literal(s, len, allow_utf8);
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
/*
|
|
289
|
+
* Parse a domain-literal: "[" body "]"
|
|
290
|
+
* Returns the number of bytes consumed, or 0 on failure.
|
|
291
|
+
*/
|
|
292
|
+
static long parse_domain_literal(const char *s, long len, bool allow_utf8) {
|
|
293
|
+
if(len < 2 || s[0] != '[') return 0;
|
|
294
|
+
|
|
295
|
+
long end = -1;
|
|
296
|
+
|
|
297
|
+
for(long i = 1; i < len; i++) {
|
|
298
|
+
if(s[i] == ']') { end = i; break; }
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
if(end == -1) return 0;
|
|
302
|
+
|
|
303
|
+
if(!parse_domain_literal_body(s + 1, end - 1, allow_utf8)) return 0;
|
|
304
|
+
|
|
305
|
+
return end + 1;
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
/*
|
|
309
|
+
* Parse the domain: dot-atom-text or domain-literal.
|
|
310
|
+
* Returns the number of bytes consumed, or 0 on failure.
|
|
311
|
+
*/
|
|
312
|
+
static long parse_domain(const char *s, long len, bool allow_utf8) {
|
|
313
|
+
if(len == 0) return 0;
|
|
314
|
+
|
|
315
|
+
if(s[0] == '[') return parse_domain_literal(s, len, allow_utf8);
|
|
316
|
+
|
|
317
|
+
return parse_dot_atom(s, len, allow_utf8, true);
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
bool parse_addr_spec(const char *s, long len, bool allow_utf8) {
|
|
321
|
+
if(len == 0) return false;
|
|
322
|
+
|
|
323
|
+
long local_len = parse_local_part(s, len, allow_utf8);
|
|
324
|
+
|
|
325
|
+
if(local_len == 0) return false;
|
|
326
|
+
if(local_len > MAX_LOCAL_PART) return false;
|
|
327
|
+
if(local_len >= len) return false;
|
|
328
|
+
if(s[local_len] != '@') return false;
|
|
329
|
+
|
|
330
|
+
long domain_off = local_len + 1;
|
|
331
|
+
long domain_len = len - domain_off;
|
|
332
|
+
|
|
333
|
+
if(domain_len <= 0) return false;
|
|
334
|
+
if(domain_len > MAX_DOMAIN) return false;
|
|
335
|
+
|
|
336
|
+
long consumed = parse_domain(s + domain_off, domain_len, allow_utf8);
|
|
337
|
+
|
|
338
|
+
if(consumed == 0) return false;
|
|
339
|
+
if(consumed != domain_len) return false;
|
|
340
|
+
|
|
341
|
+
return true;
|
|
342
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
#ifndef FAST_JSON_FORMATS_UTILS_ADDR_SPEC_PARSER_H
|
|
2
|
+
#define FAST_JSON_FORMATS_UTILS_ADDR_SPEC_PARSER_H
|
|
3
|
+
|
|
4
|
+
#include <stdbool.h>
|
|
5
|
+
|
|
6
|
+
/*
|
|
7
|
+
* Strict RFC 5322 addr-spec parser (with RFC 5321 address-literals).
|
|
8
|
+
* When allow_utf8 is true, RFC 6531 UTF8-non-ASCII extensions are accepted in
|
|
9
|
+
* atext, qtext and General-address-literal dcontent productions.
|
|
10
|
+
*
|
|
11
|
+
* Returns true if the entire input is a valid addr-spec, false otherwise.
|
|
12
|
+
* No CFWS, no obs-* productions, no leading/trailing whitespace.
|
|
13
|
+
*/
|
|
14
|
+
bool parse_addr_spec(const char *s, long len, bool allow_utf8);
|
|
15
|
+
|
|
16
|
+
#endif
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
#include "formats/utils/hostname_parser.h"
|
|
2
|
+
#include "formats/utils/utf8.h"
|
|
3
|
+
|
|
4
|
+
#include <stdbool.h>
|
|
5
|
+
|
|
6
|
+
#define MAX_HOSTNAME_LEN 253
|
|
7
|
+
#define MAX_LABEL_LEN 63
|
|
8
|
+
|
|
9
|
+
#define IS_DIGIT(c) ((c) >= '0' && (c) <= '9')
|
|
10
|
+
#define IS_ALPHA(c) (((c) >= 'A' && (c) <= 'Z') || ((c) >= 'a' && (c) <= 'z'))
|
|
11
|
+
|
|
12
|
+
/*
|
|
13
|
+
* let-dig per RFC 1123 section 2.1: ALPHA / DIGIT.
|
|
14
|
+
*/
|
|
15
|
+
static bool is_let_dig(unsigned char c) {
|
|
16
|
+
return IS_ALPHA(c) || IS_DIGIT(c);
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
/*
|
|
20
|
+
* let-dig-hyp per RFC 1123 section 2.1: ALPHA / DIGIT / "-".
|
|
21
|
+
*/
|
|
22
|
+
static bool is_let_dig_hyp(unsigned char c) {
|
|
23
|
+
return is_let_dig(c) || c == '-';
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/*
|
|
27
|
+
* Try to consume one "label character" starting at s.
|
|
28
|
+
* Returns the number of bytes consumed, or 0 if the input does not start
|
|
29
|
+
* with a valid label character.
|
|
30
|
+
*
|
|
31
|
+
* In allow_utf8 mode, any byte >= 0x80 must begin a valid UTF-8 sequence
|
|
32
|
+
* and is accepted as a single label character (pragmatic idn-hostname).
|
|
33
|
+
*/
|
|
34
|
+
static long consume_label_char(const char *s, long len, bool allow_utf8, bool allow_hyphen) {
|
|
35
|
+
unsigned char c = (unsigned char)s[0];
|
|
36
|
+
|
|
37
|
+
if(c < 0x80) {
|
|
38
|
+
if(allow_hyphen) return is_let_dig_hyp(c) ? 1 : 0;
|
|
39
|
+
|
|
40
|
+
return is_let_dig(c) ? 1 : 0;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
if(!allow_utf8) return 0;
|
|
44
|
+
|
|
45
|
+
return utf8_seq_len((const unsigned char *)s, len);
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/*
|
|
49
|
+
* Parse a single label: let-dig [ *(let-dig-hyp) let-dig ].
|
|
50
|
+
* The label may not start with a hyphen and may not end with a hyphen.
|
|
51
|
+
* Returns the number of bytes consumed, or 0 on failure.
|
|
52
|
+
*/
|
|
53
|
+
static long parse_label(const char *s, long len, bool allow_utf8) {
|
|
54
|
+
if(len == 0) return 0;
|
|
55
|
+
|
|
56
|
+
long first = consume_label_char(s, len, allow_utf8, false);
|
|
57
|
+
|
|
58
|
+
if(first == 0) return 0;
|
|
59
|
+
|
|
60
|
+
long pos = first;
|
|
61
|
+
long last_char_start = 0;
|
|
62
|
+
|
|
63
|
+
while(pos < len && s[pos] != '.') {
|
|
64
|
+
long n = consume_label_char(s + pos, len - pos, allow_utf8, true);
|
|
65
|
+
|
|
66
|
+
if(n == 0) return 0;
|
|
67
|
+
|
|
68
|
+
last_char_start = pos;
|
|
69
|
+
pos += n;
|
|
70
|
+
|
|
71
|
+
if(pos > MAX_LABEL_LEN) return 0;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/*
|
|
75
|
+
* Label must not end with a hyphen. The last consumed character begins at
|
|
76
|
+
* either `last_char_start` (if more than one character) or at offset 0.
|
|
77
|
+
*/
|
|
78
|
+
long last_start = last_char_start ? last_char_start : 0;
|
|
79
|
+
|
|
80
|
+
if(pos > first && (unsigned char)s[last_start] == '-') return 0;
|
|
81
|
+
|
|
82
|
+
return pos;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
bool parse_hostname(const char *s, long len, bool allow_utf8) {
|
|
86
|
+
if(len == 0) return false;
|
|
87
|
+
if(len > MAX_HOSTNAME_LEN) return false;
|
|
88
|
+
|
|
89
|
+
long pos = 0;
|
|
90
|
+
|
|
91
|
+
while(pos < len) {
|
|
92
|
+
long label_len = parse_label(s + pos, len - pos, allow_utf8);
|
|
93
|
+
|
|
94
|
+
if(label_len == 0) return false;
|
|
95
|
+
|
|
96
|
+
pos += label_len;
|
|
97
|
+
|
|
98
|
+
if(pos == len) break;
|
|
99
|
+
|
|
100
|
+
if(s[pos] != '.') return false;
|
|
101
|
+
|
|
102
|
+
pos++; // consume '.'
|
|
103
|
+
|
|
104
|
+
/*
|
|
105
|
+
* A single trailing dot is accepted (RFC 1034 section 3.1 absolute form).
|
|
106
|
+
* Any further bytes after the trailing dot, or two consecutive dots,
|
|
107
|
+
* will be rejected because parse_label returns 0 on empty input.
|
|
108
|
+
*/
|
|
109
|
+
if(pos == len) return true;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
return true;
|
|
113
|
+
}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
#ifndef FAST_JSON_FORMATS_UTILS_HOSTNAME_PARSER_H
|
|
2
|
+
#define FAST_JSON_FORMATS_UTILS_HOSTNAME_PARSER_H
|
|
3
|
+
|
|
4
|
+
#include <stdbool.h>
|
|
5
|
+
|
|
6
|
+
/*
|
|
7
|
+
* Hostname parser per RFC 1123 section 2.1 + RFC 1035 section 2.3.4 length limits.
|
|
8
|
+
*
|
|
9
|
+
* When allow_utf8 is true, label characters may also be valid UTF-8
|
|
10
|
+
* multi-byte sequences (pragmatic idn-hostname; not full IDNA2008).
|
|
11
|
+
*
|
|
12
|
+
* Returns true if the entire input is a syntactically valid hostname,
|
|
13
|
+
* false otherwise. A single trailing dot is allowed (RFC 1034 section 3.1).
|
|
14
|
+
*/
|
|
15
|
+
bool parse_hostname(const char *s, long len, bool allow_utf8);
|
|
16
|
+
|
|
17
|
+
#endif
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
#include "formats/utils/ip_parser.h"
|
|
2
|
+
|
|
3
|
+
#include <stdbool.h>
|
|
4
|
+
|
|
5
|
+
#define IS_DIGIT(c) ((c) >= '0' && (c) <= '9')
|
|
6
|
+
#define IS_HEX(c) (IS_DIGIT(c) || ((c) >= 'A' && (c) <= 'F') || ((c) >= 'a' && (c) <= 'f'))
|
|
7
|
+
|
|
8
|
+
bool parse_ipv4(const char *s, long len) {
|
|
9
|
+
long pos = 0;
|
|
10
|
+
|
|
11
|
+
for(int i = 0; i < 4; i++) {
|
|
12
|
+
if(pos >= len) return false;
|
|
13
|
+
if(!IS_DIGIT(s[pos])) return false;
|
|
14
|
+
|
|
15
|
+
/*
|
|
16
|
+
* Reject leading zeros: "0" is allowed alone, but "01", "001" are not.
|
|
17
|
+
* Per RFC 3986 section 3.2.2 dec-octet, leading zeros are forbidden to avoid
|
|
18
|
+
* historical octal-vs-decimal ambiguity.
|
|
19
|
+
*/
|
|
20
|
+
if(s[pos] == '0' && pos + 1 < len && IS_DIGIT(s[pos + 1])) return false;
|
|
21
|
+
|
|
22
|
+
int value = 0;
|
|
23
|
+
int digits = 0;
|
|
24
|
+
|
|
25
|
+
while(pos < len && IS_DIGIT(s[pos]) && digits < 3) {
|
|
26
|
+
value = value * 10 + (s[pos] - '0');
|
|
27
|
+
pos++;
|
|
28
|
+
digits++;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
if(value > 255) return false;
|
|
32
|
+
|
|
33
|
+
if(i < 3) {
|
|
34
|
+
if(pos >= len || s[pos] != '.') return false;
|
|
35
|
+
pos++;
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
return pos == len;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/*
|
|
43
|
+
* Parse an IPv6 hex group: 1-4 hex digits.
|
|
44
|
+
* Returns the number of digits consumed, or 0 if none.
|
|
45
|
+
*/
|
|
46
|
+
static int parse_ipv6_group(const char *s, long len) {
|
|
47
|
+
int digits = 0;
|
|
48
|
+
|
|
49
|
+
while(digits < 4 && digits < len && IS_HEX((unsigned char)s[digits])) digits++;
|
|
50
|
+
|
|
51
|
+
return digits;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
bool parse_ipv6(const char *s, long len) {
|
|
55
|
+
long pos = 0;
|
|
56
|
+
int groups = 0;
|
|
57
|
+
bool seen_compression = false;
|
|
58
|
+
bool last_was_group = false;
|
|
59
|
+
|
|
60
|
+
if(len >= 2 && s[0] == ':' && s[1] == ':') {
|
|
61
|
+
seen_compression = true;
|
|
62
|
+
pos = 2;
|
|
63
|
+
} else if(len >= 1 && s[0] == ':') {
|
|
64
|
+
return false;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
while(pos < len) {
|
|
68
|
+
/* Try IPv4 tail first when we are at the start of a group and there are dots ahead. */
|
|
69
|
+
if(!last_was_group) {
|
|
70
|
+
bool has_dot = false;
|
|
71
|
+
|
|
72
|
+
for(long k = pos; k < len; k++) {
|
|
73
|
+
if(s[k] == '.') { has_dot = true; break; }
|
|
74
|
+
if(s[k] == ':') break;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
if(has_dot) {
|
|
78
|
+
if(!parse_ipv4(s + pos, len - pos)) return false;
|
|
79
|
+
|
|
80
|
+
groups += 2;
|
|
81
|
+
pos = len;
|
|
82
|
+
last_was_group = true;
|
|
83
|
+
break;
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
int g = parse_ipv6_group(s + pos, len - pos);
|
|
88
|
+
|
|
89
|
+
if(g == 0) return false;
|
|
90
|
+
|
|
91
|
+
pos += g;
|
|
92
|
+
groups++;
|
|
93
|
+
last_was_group = true;
|
|
94
|
+
|
|
95
|
+
if(pos == len) break;
|
|
96
|
+
|
|
97
|
+
if(s[pos] != ':') return false;
|
|
98
|
+
|
|
99
|
+
/* Possible "::" compression. */
|
|
100
|
+
if(pos + 1 < len && s[pos + 1] == ':') {
|
|
101
|
+
if(seen_compression) return false;
|
|
102
|
+
|
|
103
|
+
seen_compression = true;
|
|
104
|
+
pos += 2;
|
|
105
|
+
last_was_group = false;
|
|
106
|
+
|
|
107
|
+
if(pos == len) break; // trailing "::"
|
|
108
|
+
continue;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
pos++;
|
|
112
|
+
last_was_group = false;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
if(pos != len) return false;
|
|
116
|
+
|
|
117
|
+
if(!last_was_group && !seen_compression) return false; // trailing single ":"
|
|
118
|
+
|
|
119
|
+
if(seen_compression) {
|
|
120
|
+
if(groups > 7) return false;
|
|
121
|
+
|
|
122
|
+
return true;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
return groups == 8;
|
|
126
|
+
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
#ifndef FAST_JSON_FORMATS_UTILS_IP_PARSER_H
|
|
2
|
+
#define FAST_JSON_FORMATS_UTILS_IP_PARSER_H
|
|
3
|
+
|
|
4
|
+
#include <stdbool.h>
|
|
5
|
+
|
|
6
|
+
/*
|
|
7
|
+
* IPv4 dotted-decimal address per RFC 2673 section 3.2 / RFC 3986 section 3.2.2:
|
|
8
|
+
* IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
|
|
9
|
+
* dec-octet = 1*3DIGIT, value 0..255, no leading zeros
|
|
10
|
+
*
|
|
11
|
+
* Returns true if the entire input is a valid IPv4 address, false otherwise.
|
|
12
|
+
*/
|
|
13
|
+
bool parse_ipv4(const char *s, long len);
|
|
14
|
+
|
|
15
|
+
/*
|
|
16
|
+
* IPv6 address per RFC 4291 section 2.2:
|
|
17
|
+
* - Up to 8 groups of 1-4 hex digits separated by ":"
|
|
18
|
+
* - At most one "::" compression standing for one or more zero groups
|
|
19
|
+
* - Optional trailing IPv4 dotted form (counts as 2 groups)
|
|
20
|
+
*
|
|
21
|
+
* Returns true if the entire input is a valid IPv6 address, false otherwise.
|
|
22
|
+
*/
|
|
23
|
+
bool parse_ipv6(const char *s, long len);
|
|
24
|
+
|
|
25
|
+
#endif
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
#include "formats/utils/json_pointer_parser.h"
|
|
2
|
+
#include "formats/utils/utf8.h"
|
|
3
|
+
|
|
4
|
+
bool parse_json_pointer(const char *s, long len) {
|
|
5
|
+
long pos = 0;
|
|
6
|
+
|
|
7
|
+
while(pos < len) {
|
|
8
|
+
if(s[pos] != '/') return false;
|
|
9
|
+
|
|
10
|
+
pos++;
|
|
11
|
+
|
|
12
|
+
while(pos < len && s[pos] != '/') {
|
|
13
|
+
unsigned char c = (unsigned char)s[pos];
|
|
14
|
+
|
|
15
|
+
/*
|
|
16
|
+
* Escape sequence: "~" ( "0" / "1" )
|
|
17
|
+
*/
|
|
18
|
+
if(c == '~') {
|
|
19
|
+
if(pos + 1 >= len) return false;
|
|
20
|
+
if(s[pos + 1] != '0' && s[pos + 1] != '1') return false;
|
|
21
|
+
pos += 2;
|
|
22
|
+
continue;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
/*
|
|
26
|
+
* ASCII byte
|
|
27
|
+
*/
|
|
28
|
+
if(c < 0x80) {
|
|
29
|
+
pos++;
|
|
30
|
+
continue;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
/*
|
|
34
|
+
* UTF-8 multi-byte sequence
|
|
35
|
+
*/
|
|
36
|
+
long n = utf8_seq_len((const unsigned char *)(s + pos), len - pos);
|
|
37
|
+
|
|
38
|
+
if(n == 0) return false;
|
|
39
|
+
|
|
40
|
+
pos += n;
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
return true;
|
|
45
|
+
}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
#ifndef FAST_JSON_FORMATS_UTILS_JSON_POINTER_PARSER_H
|
|
2
|
+
#define FAST_JSON_FORMATS_UTILS_JSON_POINTER_PARSER_H
|
|
3
|
+
|
|
4
|
+
#include <stdbool.h>
|
|
5
|
+
|
|
6
|
+
/*
|
|
7
|
+
* RFC 6901 section 3 JSON Pointer parser.
|
|
8
|
+
*
|
|
9
|
+
* json-pointer = *( "/" reference-token )
|
|
10
|
+
* reference-token = *( unescaped / escaped )
|
|
11
|
+
* unescaped = %x00-2E / %x30-7D / %x7F-10FFFF
|
|
12
|
+
* escaped = "~" ( "0" / "1" )
|
|
13
|
+
*
|
|
14
|
+
* The empty string is a valid JSON Pointer (whole-document reference).
|
|
15
|
+
* UTF-8 multi-byte sequences are accepted in token content per the
|
|
16
|
+
* unescaped %x80-10FFFF range. NUL bytes are accepted per %x00-2E.
|
|
17
|
+
*/
|
|
18
|
+
bool parse_json_pointer(const char *s, long len);
|
|
19
|
+
|
|
20
|
+
#endif
|