fast_json-schema 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/CODE_OF_CONDUCT.md +84 -0
- data/Dockerfile +17 -0
- data/Gemfile +11 -0
- data/Gemfile.lock +68 -0
- data/LICENSE.txt +21 -0
- data/README.md +156 -0
- data/Rakefile +60 -0
- data/build-deps +3 -0
- data/data/invalid.json +31 -0
- data/data/schema.json +150 -0
- data/data/valid.json +49 -0
- data/ext/fast_json/schema/all_of.c +23 -0
- data/ext/fast_json/schema/all_of.h +4 -0
- data/ext/fast_json/schema/any_of.c +22 -0
- data/ext/fast_json/schema/any_of.h +4 -0
- data/ext/fast_json/schema/compiled_schema.c +503 -0
- data/ext/fast_json/schema/compiled_schema.h +10 -0
- data/ext/fast_json/schema/context.c +78 -0
- data/ext/fast_json/schema/error.c +26 -0
- data/ext/fast_json/schema/error.h +5 -0
- data/ext/fast_json/schema/extconf.rb +7 -0
- data/ext/fast_json/schema/formats/custom_format.c +63 -0
- data/ext/fast_json/schema/formats/custom_format.h +4 -0
- data/ext/fast_json/schema/formats/date.c +48 -0
- data/ext/fast_json/schema/formats/date.h +5 -0
- data/ext/fast_json/schema/formats/date_time.c +22 -0
- data/ext/fast_json/schema/formats/date_time.h +4 -0
- data/ext/fast_json/schema/formats/email.c +8 -0
- data/ext/fast_json/schema/formats/email.h +4 -0
- data/ext/fast_json/schema/formats/format.c +68 -0
- data/ext/fast_json/schema/formats/format.h +4 -0
- data/ext/fast_json/schema/formats/hostname.c +8 -0
- data/ext/fast_json/schema/formats/hostname.h +4 -0
- data/ext/fast_json/schema/formats/idn_email.c +8 -0
- data/ext/fast_json/schema/formats/idn_email.h +4 -0
- data/ext/fast_json/schema/formats/idn_hostname.c +8 -0
- data/ext/fast_json/schema/formats/idn_hostname.h +4 -0
- data/ext/fast_json/schema/formats/ipv4.c +8 -0
- data/ext/fast_json/schema/formats/ipv4.h +4 -0
- data/ext/fast_json/schema/formats/ipv6.c +8 -0
- data/ext/fast_json/schema/formats/ipv6.h +4 -0
- data/ext/fast_json/schema/formats/iri.c +8 -0
- data/ext/fast_json/schema/formats/iri.h +4 -0
- data/ext/fast_json/schema/formats/iri_reference.c +8 -0
- data/ext/fast_json/schema/formats/iri_reference.h +4 -0
- data/ext/fast_json/schema/formats/json_pointer.c +8 -0
- data/ext/fast_json/schema/formats/json_pointer.h +4 -0
- data/ext/fast_json/schema/formats/regex.c +27 -0
- data/ext/fast_json/schema/formats/regex.h +4 -0
- data/ext/fast_json/schema/formats/relative_json_pointer.c +57 -0
- data/ext/fast_json/schema/formats/relative_json_pointer.h +4 -0
- data/ext/fast_json/schema/formats/time.c +65 -0
- data/ext/fast_json/schema/formats/time.h +5 -0
- data/ext/fast_json/schema/formats/uri.c +8 -0
- data/ext/fast_json/schema/formats/uri.h +4 -0
- data/ext/fast_json/schema/formats/uri_reference.c +8 -0
- data/ext/fast_json/schema/formats/uri_reference.h +4 -0
- data/ext/fast_json/schema/formats/uri_template.c +8 -0
- data/ext/fast_json/schema/formats/uri_template.h +4 -0
- data/ext/fast_json/schema/formats/utils/addr_spec_parser.c +342 -0
- data/ext/fast_json/schema/formats/utils/addr_spec_parser.h +16 -0
- data/ext/fast_json/schema/formats/utils/hostname_parser.c +113 -0
- data/ext/fast_json/schema/formats/utils/hostname_parser.h +17 -0
- data/ext/fast_json/schema/formats/utils/ip_parser.c +126 -0
- data/ext/fast_json/schema/formats/utils/ip_parser.h +25 -0
- data/ext/fast_json/schema/formats/utils/json_pointer_parser.c +45 -0
- data/ext/fast_json/schema/formats/utils/json_pointer_parser.h +20 -0
- data/ext/fast_json/schema/formats/utils/uri_parser.c +605 -0
- data/ext/fast_json/schema/formats/utils/uri_parser.h +20 -0
- data/ext/fast_json/schema/formats/utils/uri_template_parser.c +235 -0
- data/ext/fast_json/schema/formats/utils/uri_template_parser.h +18 -0
- data/ext/fast_json/schema/formats/utils/utf8.c +73 -0
- data/ext/fast_json/schema/formats/utils/utf8.h +17 -0
- data/ext/fast_json/schema/if.c +31 -0
- data/ext/fast_json/schema/if.h +4 -0
- data/ext/fast_json/schema/is_valid.c +124 -0
- data/ext/fast_json/schema/is_valid.h +6 -0
- data/ext/fast_json/schema/keywords.c +220 -0
- data/ext/fast_json/schema/keywords.h +60 -0
- data/ext/fast_json/schema/nested_schemas.c +68 -0
- data/ext/fast_json/schema/nested_schemas.h +4 -0
- data/ext/fast_json/schema/not.c +11 -0
- data/ext/fast_json/schema/not.h +4 -0
- data/ext/fast_json/schema/one_of.c +23 -0
- data/ext/fast_json/schema/one_of.h +4 -0
- data/ext/fast_json/schema/path.c +44 -0
- data/ext/fast_json/schema/path.h +5 -0
- data/ext/fast_json/schema/properties_val.c +103 -0
- data/ext/fast_json/schema/properties_val.h +6 -0
- data/ext/fast_json/schema/ref.c +7 -0
- data/ext/fast_json/schema/ref.h +4 -0
- data/ext/fast_json/schema/ref_resolver.c +85 -0
- data/ext/fast_json/schema/ref_resolver.h +5 -0
- data/ext/fast_json/schema/schema.c +68 -0
- data/ext/fast_json/schema/schema_collection.c +29 -0
- data/ext/fast_json/schema/schema_collection.h +3 -0
- data/ext/fast_json/schema/types/compiled_schema.h +96 -0
- data/ext/fast_json/schema/types/context.h +27 -0
- data/ext/fast_json/schema/validate.c +63 -0
- data/ext/fast_json/schema/validate.h +19 -0
- data/ext/fast_json/schema/validate_array.c +130 -0
- data/ext/fast_json/schema/validate_array.h +4 -0
- data/ext/fast_json/schema/validate_bool.c +7 -0
- data/ext/fast_json/schema/validate_bool.h +4 -0
- data/ext/fast_json/schema/validate_integer.c +52 -0
- data/ext/fast_json/schema/validate_integer.h +4 -0
- data/ext/fast_json/schema/validate_null.c +7 -0
- data/ext/fast_json/schema/validate_null.h +4 -0
- data/ext/fast_json/schema/validate_number.c +62 -0
- data/ext/fast_json/schema/validate_number.h +4 -0
- data/ext/fast_json/schema/validate_object.c +159 -0
- data/ext/fast_json/schema/validate_object.h +4 -0
- data/ext/fast_json/schema/validate_string.c +32 -0
- data/ext/fast_json/schema/validate_string.h +4 -0
- data/ext/fast_json/schema/value_pointer_caster.h +9 -0
- data/fast_json-schema.gemspec +31 -0
- data/lib/fast_json/schema/error.rb +16 -0
- data/lib/fast_json/schema/version.rb +7 -0
- data/lib/fast_json/schema.rb +50 -0
- data/makefile +10 -0
- metadata +164 -0
|
@@ -0,0 +1,235 @@
|
|
|
1
|
+
#include "formats/utils/uri_template_parser.h"
|
|
2
|
+
#include "formats/utils/utf8.h"
|
|
3
|
+
|
|
4
|
+
#include <stdbool.h>
|
|
5
|
+
|
|
6
|
+
#define IS_DIGIT(c) ((c) >= '0' && (c) <= '9')
|
|
7
|
+
#define IS_ALPHA(c) (((c) >= 'A' && (c) <= 'Z') || ((c) >= 'a' && (c) <= 'z'))
|
|
8
|
+
#define IS_HEX(c) (IS_DIGIT(c) || ((c) >= 'A' && (c) <= 'F') || ((c) >= 'a' && (c) <= 'f'))
|
|
9
|
+
|
|
10
|
+
/*
|
|
11
|
+
* varchar per RFC 6570 section 2.3:
|
|
12
|
+
* ALPHA / DIGIT / "_"
|
|
13
|
+
* (pct-encoded handled separately).
|
|
14
|
+
*/
|
|
15
|
+
static bool is_varchar_ascii(unsigned char c) {
|
|
16
|
+
return IS_ALPHA(c) || IS_DIGIT(c) || c == '_';
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
/*
|
|
20
|
+
* Operator per RFC 6570 section 2.2:
|
|
21
|
+
* op-level2 = "+" / "#"
|
|
22
|
+
* op-level3 = "." / "/" / ";" / "?" / "&"
|
|
23
|
+
* op-reserve = "=" / "," / "!" / "@" / "|"
|
|
24
|
+
*/
|
|
25
|
+
static bool is_operator(unsigned char c) {
|
|
26
|
+
switch(c) {
|
|
27
|
+
case '+': case '#':
|
|
28
|
+
case '.': case '/': case ';': case '?': case '&':
|
|
29
|
+
case '=': case ',': case '!': case '@': case '|':
|
|
30
|
+
return true;
|
|
31
|
+
}
|
|
32
|
+
return false;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
/*
|
|
36
|
+
* Literal ASCII per RFC 6570 section 2.1:
|
|
37
|
+
* %x21 / %x23-24 / %x26 / %x28-3B / %x3D / %x3F-5B / %x5D / %x5F / %x61-7A / %x7E
|
|
38
|
+
*
|
|
39
|
+
* That is: any printable ASCII except SP, control chars, DEL, and the set
|
|
40
|
+
* { '"' '%' '\'' '<' '>' '\\' '^' '`' '{' '|' '}' }.
|
|
41
|
+
* ('%' is allowed only as the start of a pct-encoded triple.)
|
|
42
|
+
*/
|
|
43
|
+
static bool is_literal_ascii(unsigned char c) {
|
|
44
|
+
if(c == 0x21) return true; // "!"
|
|
45
|
+
if(c >= 0x23 && c <= 0x24) return true; // "#" "$"
|
|
46
|
+
if(c == 0x26) return true; // "&"
|
|
47
|
+
if(c >= 0x28 && c <= 0x3B) return true; // "(" .. ";"
|
|
48
|
+
if(c == 0x3D) return true; // "="
|
|
49
|
+
if(c >= 0x3F && c <= 0x5B) return true; // "?" .. "["
|
|
50
|
+
if(c == 0x5D) return true; // "]"
|
|
51
|
+
if(c == 0x5F) return true; // "_"
|
|
52
|
+
if(c >= 0x61 && c <= 0x7A) return true; // "a" .. "z"
|
|
53
|
+
if(c == 0x7E) return true; // "~"
|
|
54
|
+
return false;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
/*
|
|
58
|
+
* pct-encoded per RFC 3986 section 2.1: "%" HEXDIG HEXDIG.
|
|
59
|
+
* Returns 3 on success, 0 on failure.
|
|
60
|
+
*/
|
|
61
|
+
static long consume_pct_encoded(const char *s, long len) {
|
|
62
|
+
if(len < 3 || s[0] != '%') return 0;
|
|
63
|
+
if(!IS_HEX((unsigned char)s[1]) || !IS_HEX((unsigned char)s[2])) return 0;
|
|
64
|
+
return 3;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
/*
|
|
68
|
+
* Consume one literal unit: literal ASCII byte, pct-encoded triple, or
|
|
69
|
+
* valid UTF-8 multi-byte sequence. Returns bytes consumed, or 0 on failure.
|
|
70
|
+
*/
|
|
71
|
+
static long consume_literal_byte(const char *s, long len) {
|
|
72
|
+
unsigned char c = (unsigned char)s[0];
|
|
73
|
+
|
|
74
|
+
if(c < 0x80) {
|
|
75
|
+
if(c == '%') return consume_pct_encoded(s, len);
|
|
76
|
+
return is_literal_ascii(c) ? 1 : 0;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
long n = utf8_seq_len((const unsigned char *)s, len);
|
|
80
|
+
return n >= 2 ? n : 0;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
/*
|
|
84
|
+
* Consume a run of literals (1+ bytes) until "{" or an invalid byte.
|
|
85
|
+
* Returns bytes consumed (must be > 0).
|
|
86
|
+
*/
|
|
87
|
+
static long parse_literals_run(const char *s, long len) {
|
|
88
|
+
long pos = 0;
|
|
89
|
+
while(pos < len && s[pos] != '{') {
|
|
90
|
+
long n = consume_literal_byte(s + pos, len - pos);
|
|
91
|
+
if(n == 0) return 0;
|
|
92
|
+
pos += n;
|
|
93
|
+
}
|
|
94
|
+
return pos;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
/*
|
|
98
|
+
* Consume one varchar: ALPHA / DIGIT / "_" / pct-encoded per RFC 6570 section 2.3.
|
|
99
|
+
* Returns bytes consumed (1 or 3), or 0 on failure.
|
|
100
|
+
*/
|
|
101
|
+
static long parse_varchar(const char *s, long len) {
|
|
102
|
+
if(len == 0) return 0;
|
|
103
|
+
unsigned char c = (unsigned char)s[0];
|
|
104
|
+
|
|
105
|
+
if(c == '%') return consume_pct_encoded(s, len);
|
|
106
|
+
if(is_varchar_ascii(c)) return 1;
|
|
107
|
+
return 0;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
/*
|
|
111
|
+
* varname per RFC 6570 section 2.3:
|
|
112
|
+
* varchar *( ["."] varchar )
|
|
113
|
+
* Dots are optional separators between varchars (no leading/trailing dot,
|
|
114
|
+
* no consecutive dots).
|
|
115
|
+
*/
|
|
116
|
+
static long parse_varname(const char *s, long len) {
|
|
117
|
+
long first = parse_varchar(s, len);
|
|
118
|
+
if(first == 0) return 0;
|
|
119
|
+
|
|
120
|
+
long pos = first;
|
|
121
|
+
|
|
122
|
+
while(pos < len) {
|
|
123
|
+
if(s[pos] == '.') {
|
|
124
|
+
long n = parse_varchar(s + pos + 1, len - pos - 1);
|
|
125
|
+
if(n == 0) return 0; // trailing dot or no varchar after dot
|
|
126
|
+
pos += 1 + n;
|
|
127
|
+
continue;
|
|
128
|
+
}
|
|
129
|
+
long n = parse_varchar(s + pos, len - pos);
|
|
130
|
+
if(n == 0) break;
|
|
131
|
+
pos += n;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
return pos;
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
/*
|
|
138
|
+
* modifier-level4 per RFC 6570 section 2.4:
|
|
139
|
+
* prefix = ":" max-length ; max-length = %x31-39 0*3DIGIT (1..9999)
|
|
140
|
+
* explode = "*"
|
|
141
|
+
* Caller must only invoke this when s[0] is ':' or '*'.
|
|
142
|
+
* Returns bytes consumed, or 0 on failure.
|
|
143
|
+
*/
|
|
144
|
+
static long parse_modifier(const char *s, long len) {
|
|
145
|
+
if(len == 0) return 0;
|
|
146
|
+
|
|
147
|
+
if(s[0] == '*') return 1;
|
|
148
|
+
|
|
149
|
+
if(s[0] == ':') {
|
|
150
|
+
if(len < 2) return 0;
|
|
151
|
+
unsigned char first = (unsigned char)s[1];
|
|
152
|
+
if(first < '1' || first > '9') return 0;
|
|
153
|
+
|
|
154
|
+
long pos = 2;
|
|
155
|
+
while(pos < len && IS_DIGIT((unsigned char)s[pos])) pos++;
|
|
156
|
+
|
|
157
|
+
long digits = pos - 1;
|
|
158
|
+
if(digits < 1 || digits > 4) return 0;
|
|
159
|
+
return pos;
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
return 0;
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
/*
|
|
166
|
+
* varspec per RFC 6570 section 2.3:
|
|
167
|
+
* varname [ modifier-level4 ]
|
|
168
|
+
*/
|
|
169
|
+
static long parse_varspec(const char *s, long len) {
|
|
170
|
+
long n = parse_varname(s, len);
|
|
171
|
+
if(n == 0) return 0;
|
|
172
|
+
|
|
173
|
+
long pos = n;
|
|
174
|
+
if(pos < len && (s[pos] == ':' || s[pos] == '*')) {
|
|
175
|
+
long m = parse_modifier(s + pos, len - pos);
|
|
176
|
+
if(m == 0) return 0;
|
|
177
|
+
pos += m;
|
|
178
|
+
}
|
|
179
|
+
return pos;
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
/*
|
|
183
|
+
* variable-list per RFC 6570 section 2.3:
|
|
184
|
+
* varspec *( "," varspec )
|
|
185
|
+
*/
|
|
186
|
+
static long parse_variable_list(const char *s, long len) {
|
|
187
|
+
long n = parse_varspec(s, len);
|
|
188
|
+
if(n == 0) return 0;
|
|
189
|
+
|
|
190
|
+
long pos = n;
|
|
191
|
+
while(pos < len && s[pos] == ',') {
|
|
192
|
+
long m = parse_varspec(s + pos + 1, len - pos - 1);
|
|
193
|
+
if(m == 0) return 0;
|
|
194
|
+
pos += 1 + m;
|
|
195
|
+
}
|
|
196
|
+
return pos;
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
/*
|
|
200
|
+
* expression per RFC 6570 section 2.2:
|
|
201
|
+
* "{" [ operator ] variable-list "}"
|
|
202
|
+
* Returns bytes consumed (including braces), or 0 on failure.
|
|
203
|
+
*/
|
|
204
|
+
static long parse_expression(const char *s, long len) {
|
|
205
|
+
if(len < 2 || s[0] != '{') return 0;
|
|
206
|
+
|
|
207
|
+
long pos = 1;
|
|
208
|
+
|
|
209
|
+
if(pos < len && is_operator((unsigned char)s[pos])) pos++;
|
|
210
|
+
|
|
211
|
+
long vl = parse_variable_list(s + pos, len - pos);
|
|
212
|
+
if(vl == 0) return 0;
|
|
213
|
+
pos += vl;
|
|
214
|
+
|
|
215
|
+
if(pos >= len || s[pos] != '}') return 0;
|
|
216
|
+
return pos + 1;
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
bool parse_uri_template(const char *s, long len) {
|
|
220
|
+
long pos = 0;
|
|
221
|
+
|
|
222
|
+
while(pos < len) {
|
|
223
|
+
if(s[pos] == '{') {
|
|
224
|
+
long n = parse_expression(s + pos, len - pos);
|
|
225
|
+
if(n == 0) return false;
|
|
226
|
+
pos += n;
|
|
227
|
+
} else {
|
|
228
|
+
long n = parse_literals_run(s + pos, len - pos);
|
|
229
|
+
if(n == 0) return false;
|
|
230
|
+
pos += n;
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
return true;
|
|
235
|
+
}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
#ifndef FAST_JSON_FORMATS_UTILS_URI_TEMPLATE_PARSER_H
|
|
2
|
+
#define FAST_JSON_FORMATS_UTILS_URI_TEMPLATE_PARSER_H
|
|
3
|
+
|
|
4
|
+
#include <stdbool.h>
|
|
5
|
+
|
|
6
|
+
/*
|
|
7
|
+
* RFC 6570 URI Template parser.
|
|
8
|
+
*
|
|
9
|
+
* A URI Template is *( literals / expression ). Literals accept any valid
|
|
10
|
+
* UTF-8 (ucschar / iprivate) plus pct-encoded; expressions are ASCII-only:
|
|
11
|
+
* "{" [ operator ] variable-list "}"
|
|
12
|
+
*
|
|
13
|
+
* Returns true if the entire input is a well-formed URI Template. The empty
|
|
14
|
+
* string is valid (zero repetitions of literals/expression).
|
|
15
|
+
*/
|
|
16
|
+
bool parse_uri_template(const char *s, long len);
|
|
17
|
+
|
|
18
|
+
#endif
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
#include "formats/utils/utf8.h"
|
|
2
|
+
|
|
3
|
+
/* Continuation-byte range (RFC 3629). */
|
|
4
|
+
#define UTF8_CONT_MIN 0x80 /* 1000 0000 */
|
|
5
|
+
#define UTF8_CONT_MAX 0xBF /* 1011 1111 */
|
|
6
|
+
|
|
7
|
+
/* ASCII upper bound. */
|
|
8
|
+
#define UTF8_ASCII_MAX 0x7F /* 0111 1111 */
|
|
9
|
+
|
|
10
|
+
/* Valid lead-byte ranges per sequence length. */
|
|
11
|
+
#define UTF8_LEAD2_MIN 0xC2 /* 1100 0010 */
|
|
12
|
+
#define UTF8_LEAD2_MAX 0xDF /* 1101 1111 */
|
|
13
|
+
#define UTF8_LEAD3_MIN 0xE0 /* 1110 0000 */
|
|
14
|
+
#define UTF8_LEAD3_MAX 0xEF /* 1110 1111 */
|
|
15
|
+
#define UTF8_LEAD4_MIN 0xF0 /* 1111 0000 */
|
|
16
|
+
#define UTF8_LEAD4_MAX 0xF4 /* 1111 0100 */
|
|
17
|
+
|
|
18
|
+
/* Lead byte triggering byte-2 anti-surrogate tightening (U+D800..U+DFFF). */
|
|
19
|
+
#define UTF8_LEAD3_SURROGATE 0xED /* 1110 1101 */
|
|
20
|
+
|
|
21
|
+
/* Tightened byte-2 bounds for irregular lead bytes. */
|
|
22
|
+
#define UTF8_E0_BYTE2_MIN 0xA0 /* 1010 0000 */
|
|
23
|
+
#define UTF8_ED_BYTE2_MAX 0x9F /* 1001 1111 */
|
|
24
|
+
#define UTF8_F0_BYTE2_MIN 0x90 /* 1001 0000 */
|
|
25
|
+
#define UTF8_F4_BYTE2_MAX 0x8F /* 1000 1111 */
|
|
26
|
+
|
|
27
|
+
long utf8_seq_len(const unsigned char *s, long remaining) {
|
|
28
|
+
if(remaining < 1) return 0;
|
|
29
|
+
|
|
30
|
+
unsigned char first_byte = s[0];
|
|
31
|
+
|
|
32
|
+
if(first_byte <= UTF8_ASCII_MAX) return 1;
|
|
33
|
+
|
|
34
|
+
if(first_byte < UTF8_LEAD2_MIN) return 0;
|
|
35
|
+
|
|
36
|
+
if(first_byte <= UTF8_LEAD2_MAX) {
|
|
37
|
+
if(remaining < 2) return 0;
|
|
38
|
+
if(s[1] < UTF8_CONT_MIN || s[1] > UTF8_CONT_MAX) return 0;
|
|
39
|
+
|
|
40
|
+
return 2;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
if(first_byte <= UTF8_LEAD3_MAX) {
|
|
44
|
+
if(remaining < 3) return 0;
|
|
45
|
+
|
|
46
|
+
unsigned char low = UTF8_CONT_MIN, high = UTF8_CONT_MAX;
|
|
47
|
+
|
|
48
|
+
if(first_byte == UTF8_LEAD3_MIN) low = UTF8_E0_BYTE2_MIN; /* anti-overlong */
|
|
49
|
+
if(first_byte == UTF8_LEAD3_SURROGATE) high = UTF8_ED_BYTE2_MAX; /* anti-surrogate */
|
|
50
|
+
|
|
51
|
+
if(s[1] < low || s[1] > high) return 0;
|
|
52
|
+
if(s[2] < UTF8_CONT_MIN || s[2] > UTF8_CONT_MAX) return 0;
|
|
53
|
+
|
|
54
|
+
return 3;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
if(first_byte <= UTF8_LEAD4_MAX) {
|
|
58
|
+
if(remaining < 4) return 0;
|
|
59
|
+
|
|
60
|
+
unsigned char low = UTF8_CONT_MIN, high = UTF8_CONT_MAX;
|
|
61
|
+
|
|
62
|
+
if(first_byte == UTF8_LEAD4_MIN) low = UTF8_F0_BYTE2_MIN; /* anti-overlong */
|
|
63
|
+
if(first_byte == UTF8_LEAD4_MAX) high = UTF8_F4_BYTE2_MAX; /* anti-codepoint > U+10FFFF */
|
|
64
|
+
|
|
65
|
+
if(s[1] < low || s[1] > high) return 0;
|
|
66
|
+
if(s[2] < UTF8_CONT_MIN || s[2] > UTF8_CONT_MAX) return 0;
|
|
67
|
+
if(s[3] < UTF8_CONT_MIN || s[3] > UTF8_CONT_MAX) return 0;
|
|
68
|
+
|
|
69
|
+
return 4;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
return 0;
|
|
73
|
+
}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
#ifndef FAST_JSON_FORMATS_UTILS_UTF8_H
|
|
2
|
+
#define FAST_JSON_FORMATS_UTILS_UTF8_H
|
|
3
|
+
|
|
4
|
+
#include <stdbool.h>
|
|
5
|
+
|
|
6
|
+
/*
|
|
7
|
+
* Strict UTF-8 sequence validator. Given a pointer to the leading byte of a
|
|
8
|
+
* UTF-8 character, returns the byte length (1..4) of a valid UTF-8 sequence
|
|
9
|
+
* starting at that position, or 0 if the bytes do not form a valid sequence.
|
|
10
|
+
*
|
|
11
|
+
* Returns 1 for any ASCII byte (< 0x80). Returns 2..4 for valid multi-byte
|
|
12
|
+
* sequences. Rejects overlong encodings, UTF-16 surrogates (U+D800..U+DFFF)
|
|
13
|
+
* and codepoints above U+10FFFF.
|
|
14
|
+
*/
|
|
15
|
+
long utf8_seq_len(const unsigned char *s, long remaining);
|
|
16
|
+
|
|
17
|
+
#endif
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
#include "if.h"
|
|
2
|
+
#include "error.h"
|
|
3
|
+
|
|
4
|
+
extern bool is_valid(VALUE, CompiledSchema *, VALUE, Context *);
|
|
5
|
+
|
|
6
|
+
static void run_then(VALUE schema, CompiledSchema *compiled_schema, VALUE data, Context *context) {
|
|
7
|
+
bool valid = is_valid(schema, compiled_schema, data, context);
|
|
8
|
+
|
|
9
|
+
if(!valid)
|
|
10
|
+
yield_error(compiled_schema, data, context, "if/then");
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
static void run_else(VALUE schema, CompiledSchema *compiled_schema, VALUE data, Context *context) {
|
|
14
|
+
bool valid = is_valid(schema, compiled_schema, data, context);
|
|
15
|
+
|
|
16
|
+
if(!valid)
|
|
17
|
+
yield_error(compiled_schema, data, context, "if/else");
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
void validate_if(VALUE schema, CompiledSchema *compiled_schema, VALUE data, Context *context) {
|
|
21
|
+
if(compiled_schema->then_schema == NULL && compiled_schema->else_schema == NULL)
|
|
22
|
+
return;
|
|
23
|
+
|
|
24
|
+
bool valid = is_valid(schema, compiled_schema->if_schema, data, context);
|
|
25
|
+
|
|
26
|
+
if(valid && compiled_schema->then_schema != NULL) {
|
|
27
|
+
run_then(schema, compiled_schema->then_schema, data, context);
|
|
28
|
+
} else if(!valid && compiled_schema->else_schema != NULL) {
|
|
29
|
+
run_else(schema, compiled_schema->else_schema, data, context);
|
|
30
|
+
}
|
|
31
|
+
}
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
#include "is_valid.h"
|
|
2
|
+
|
|
3
|
+
VALUE short_circuit_tag;
|
|
4
|
+
|
|
5
|
+
struct is_valid_args_memo_S {
|
|
6
|
+
VALUE schema;
|
|
7
|
+
CompiledSchema *compiled_schema;
|
|
8
|
+
VALUE data;
|
|
9
|
+
Context *context;
|
|
10
|
+
};
|
|
11
|
+
|
|
12
|
+
struct ensure_args_memo_S {
|
|
13
|
+
Context *context;
|
|
14
|
+
bool prev_short_circuit;
|
|
15
|
+
};
|
|
16
|
+
|
|
17
|
+
static VALUE is_valid_body(RB_BLOCK_CALL_FUNC_ARGLIST(_tag, arg)) {
|
|
18
|
+
struct is_valid_args_memo_S *args = (struct is_valid_args_memo_S *)arg;
|
|
19
|
+
|
|
20
|
+
args->compiled_schema->validation_function(
|
|
21
|
+
args->schema, args->compiled_schema, args->data, args->context
|
|
22
|
+
);
|
|
23
|
+
|
|
24
|
+
return Qtrue;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
static VALUE is_valid_catch(VALUE arg) {
|
|
28
|
+
return rb_catch_obj(short_circuit_tag, is_valid_body, arg);
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
static VALUE is_valid_ensure(VALUE arg) {
|
|
32
|
+
struct ensure_args_memo_S *ensure_args = (struct ensure_args_memo_S *)arg;
|
|
33
|
+
|
|
34
|
+
ensure_args->context->short_circuit_on_error = ensure_args->prev_short_circuit;
|
|
35
|
+
|
|
36
|
+
return Qnil;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
/*
|
|
40
|
+
* `is_valid` runs the given compiled schema against the given data and
|
|
41
|
+
* reports whether the data validates, without yielding any error to the
|
|
42
|
+
* user. It is the entry point used by every combinator that needs a
|
|
43
|
+
* pure pass/fail answer about a sub-schema: `anyOf`, `oneOf`, `allOf`,
|
|
44
|
+
* `not`, `if`, and the array `contains` keyword.
|
|
45
|
+
*
|
|
46
|
+
* Why short-circuiting matters
|
|
47
|
+
* ----------------------------
|
|
48
|
+
* Validation is normally driven by side effects: when a constraint is
|
|
49
|
+
* violated, `yield_error` builds an `Error` and yields it to the user
|
|
50
|
+
* block. Combinators don't want that. As soon as `anyOf` (for example)
|
|
51
|
+
* sees the *first* failure inside a branch, it knows that branch is
|
|
52
|
+
* invalid and wants to abandon it immediately, no matter how deeply
|
|
53
|
+
* nested the failing constraint is. Letting validation run to completion
|
|
54
|
+
* to discover the failure would also leak the branch's internal errors
|
|
55
|
+
* to the user, which is wrong.
|
|
56
|
+
*
|
|
57
|
+
* How the short-circuit works
|
|
58
|
+
* ---------------------------
|
|
59
|
+
* Two pieces of state cooperate:
|
|
60
|
+
*
|
|
61
|
+
* 1. `context->short_circuit_on_error` (a `bool`) tells `yield_error`
|
|
62
|
+
* whether it should yield (the default, top-level behavior) or
|
|
63
|
+
* short-circuit (when an `is_valid` frame is active). The flag is
|
|
64
|
+
* set to `true` here on entry and restored to its prior value on
|
|
65
|
+
* exit, regardless of how the body unwinds.
|
|
66
|
+
*
|
|
67
|
+
* 2. `short_circuit_tag` (a unique frozen `Object.new` allocated in
|
|
68
|
+
* `Init_validate`) is the tag we use with Ruby's `throw`/`catch`
|
|
69
|
+
* mechanism. When `yield_error` is reached with the flag set, it
|
|
70
|
+
* calls `rb_throw_obj(short_circuit_tag, Qfalse)`, which Ruby's
|
|
71
|
+
* VM unwinds up to the matching `rb_catch_obj` registered below.
|
|
72
|
+
*
|
|
73
|
+
* Why throw/catch instead of setjmp/longjmp
|
|
74
|
+
* -----------------------------------------
|
|
75
|
+
* An earlier implementation used `RUBY_SETJMP`/`RUBY_LONGJMP` to escape
|
|
76
|
+
* out of nested validation. That worked on Ruby 2.x, but in Ruby 3.x it
|
|
77
|
+
* corrupts VM state when the jump crosses Ruby callbacks like
|
|
78
|
+
* `rb_funcall`, `rb_reg_match`, `rb_yield`, or `rb_hash_foreach`: those
|
|
79
|
+
* functions register frame and iteration bookkeeping that `longjmp`
|
|
80
|
+
* skips over, leaving the VM and GC in an inconsistent state and
|
|
81
|
+
* eventually segfaulting. `rb_throw_obj` performs the same logical
|
|
82
|
+
* non-local exit but uses the VM's own unwind machinery, which closes
|
|
83
|
+
* frames cleanly, releases hash iteration state, and runs intervening
|
|
84
|
+
* `ensure` blocks.
|
|
85
|
+
*
|
|
86
|
+
* Why we still need `rb_ensure`
|
|
87
|
+
* -----------------------------
|
|
88
|
+
* `rb_catch_obj` only intercepts throws matching its tag. A real Ruby
|
|
89
|
+
* exception -- e.g. the `RuntimeError` raised by `INCR_CONTEXT` when
|
|
90
|
+
* the document exceeds `MAX_CONTEXT_DEPTH`, or a user `raise` from
|
|
91
|
+
* inside the validate block -- is not a throw; it propagates straight
|
|
92
|
+
* through. We wrap the catch in `rb_ensure` so that even when an
|
|
93
|
+
* exception escapes, the ensure callback restores
|
|
94
|
+
* `short_circuit_on_error` to its prior value before unwinding
|
|
95
|
+
* continues. Without that, an active `is_valid` frame interrupted by
|
|
96
|
+
* a real exception would leave the flag stuck at `true`, silently
|
|
97
|
+
* breaking subsequent validations that share the same context.
|
|
98
|
+
*
|
|
99
|
+
* Nesting
|
|
100
|
+
* -------
|
|
101
|
+
* Combinators routinely nest (e.g. `anyOf` containing `oneOf`
|
|
102
|
+
* containing `allOf`). Saving and restoring the flag around each
|
|
103
|
+
* `is_valid` call, combined with `rb_catch_obj`'s LIFO semantics
|
|
104
|
+
* (each throw is caught by the nearest matching catch), gives correct
|
|
105
|
+
* behavior at any depth: an inner short-circuit unwinds only to the
|
|
106
|
+
* inner catch, leaving the outer combinator free to continue
|
|
107
|
+
* iterating its remaining branches with the flag still set.
|
|
108
|
+
*/
|
|
109
|
+
bool is_valid(VALUE schema, CompiledSchema *compiled_schema, VALUE data, Context *context) {
|
|
110
|
+
struct is_valid_args_memo_S is_valid_args = { schema, compiled_schema, data, context };
|
|
111
|
+
struct ensure_args_memo_S ensure_args = { context, context->short_circuit_on_error };
|
|
112
|
+
|
|
113
|
+
context->short_circuit_on_error = true;
|
|
114
|
+
|
|
115
|
+
VALUE result = rb_ensure(is_valid_catch, (VALUE)&is_valid_args, is_valid_ensure, (VALUE)&ensure_args);
|
|
116
|
+
|
|
117
|
+
return result == Qtrue;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
void Init_is_valid(void) {
|
|
121
|
+
short_circuit_tag = rb_obj_freeze(rb_class_new_instance(0, NULL, rb_cObject));
|
|
122
|
+
|
|
123
|
+
rb_gc_register_address(&short_circuit_tag);
|
|
124
|
+
}
|