fast_json-schema 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (123) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/CODE_OF_CONDUCT.md +84 -0
  4. data/Dockerfile +17 -0
  5. data/Gemfile +11 -0
  6. data/Gemfile.lock +68 -0
  7. data/LICENSE.txt +21 -0
  8. data/README.md +156 -0
  9. data/Rakefile +60 -0
  10. data/build-deps +3 -0
  11. data/data/invalid.json +31 -0
  12. data/data/schema.json +150 -0
  13. data/data/valid.json +49 -0
  14. data/ext/fast_json/schema/all_of.c +23 -0
  15. data/ext/fast_json/schema/all_of.h +4 -0
  16. data/ext/fast_json/schema/any_of.c +22 -0
  17. data/ext/fast_json/schema/any_of.h +4 -0
  18. data/ext/fast_json/schema/compiled_schema.c +503 -0
  19. data/ext/fast_json/schema/compiled_schema.h +10 -0
  20. data/ext/fast_json/schema/context.c +78 -0
  21. data/ext/fast_json/schema/error.c +26 -0
  22. data/ext/fast_json/schema/error.h +5 -0
  23. data/ext/fast_json/schema/extconf.rb +7 -0
  24. data/ext/fast_json/schema/formats/custom_format.c +63 -0
  25. data/ext/fast_json/schema/formats/custom_format.h +4 -0
  26. data/ext/fast_json/schema/formats/date.c +48 -0
  27. data/ext/fast_json/schema/formats/date.h +5 -0
  28. data/ext/fast_json/schema/formats/date_time.c +22 -0
  29. data/ext/fast_json/schema/formats/date_time.h +4 -0
  30. data/ext/fast_json/schema/formats/email.c +8 -0
  31. data/ext/fast_json/schema/formats/email.h +4 -0
  32. data/ext/fast_json/schema/formats/format.c +68 -0
  33. data/ext/fast_json/schema/formats/format.h +4 -0
  34. data/ext/fast_json/schema/formats/hostname.c +8 -0
  35. data/ext/fast_json/schema/formats/hostname.h +4 -0
  36. data/ext/fast_json/schema/formats/idn_email.c +8 -0
  37. data/ext/fast_json/schema/formats/idn_email.h +4 -0
  38. data/ext/fast_json/schema/formats/idn_hostname.c +8 -0
  39. data/ext/fast_json/schema/formats/idn_hostname.h +4 -0
  40. data/ext/fast_json/schema/formats/ipv4.c +8 -0
  41. data/ext/fast_json/schema/formats/ipv4.h +4 -0
  42. data/ext/fast_json/schema/formats/ipv6.c +8 -0
  43. data/ext/fast_json/schema/formats/ipv6.h +4 -0
  44. data/ext/fast_json/schema/formats/iri.c +8 -0
  45. data/ext/fast_json/schema/formats/iri.h +4 -0
  46. data/ext/fast_json/schema/formats/iri_reference.c +8 -0
  47. data/ext/fast_json/schema/formats/iri_reference.h +4 -0
  48. data/ext/fast_json/schema/formats/json_pointer.c +8 -0
  49. data/ext/fast_json/schema/formats/json_pointer.h +4 -0
  50. data/ext/fast_json/schema/formats/regex.c +27 -0
  51. data/ext/fast_json/schema/formats/regex.h +4 -0
  52. data/ext/fast_json/schema/formats/relative_json_pointer.c +57 -0
  53. data/ext/fast_json/schema/formats/relative_json_pointer.h +4 -0
  54. data/ext/fast_json/schema/formats/time.c +65 -0
  55. data/ext/fast_json/schema/formats/time.h +5 -0
  56. data/ext/fast_json/schema/formats/uri.c +8 -0
  57. data/ext/fast_json/schema/formats/uri.h +4 -0
  58. data/ext/fast_json/schema/formats/uri_reference.c +8 -0
  59. data/ext/fast_json/schema/formats/uri_reference.h +4 -0
  60. data/ext/fast_json/schema/formats/uri_template.c +8 -0
  61. data/ext/fast_json/schema/formats/uri_template.h +4 -0
  62. data/ext/fast_json/schema/formats/utils/addr_spec_parser.c +342 -0
  63. data/ext/fast_json/schema/formats/utils/addr_spec_parser.h +16 -0
  64. data/ext/fast_json/schema/formats/utils/hostname_parser.c +113 -0
  65. data/ext/fast_json/schema/formats/utils/hostname_parser.h +17 -0
  66. data/ext/fast_json/schema/formats/utils/ip_parser.c +126 -0
  67. data/ext/fast_json/schema/formats/utils/ip_parser.h +25 -0
  68. data/ext/fast_json/schema/formats/utils/json_pointer_parser.c +45 -0
  69. data/ext/fast_json/schema/formats/utils/json_pointer_parser.h +20 -0
  70. data/ext/fast_json/schema/formats/utils/uri_parser.c +605 -0
  71. data/ext/fast_json/schema/formats/utils/uri_parser.h +20 -0
  72. data/ext/fast_json/schema/formats/utils/uri_template_parser.c +235 -0
  73. data/ext/fast_json/schema/formats/utils/uri_template_parser.h +18 -0
  74. data/ext/fast_json/schema/formats/utils/utf8.c +73 -0
  75. data/ext/fast_json/schema/formats/utils/utf8.h +17 -0
  76. data/ext/fast_json/schema/if.c +31 -0
  77. data/ext/fast_json/schema/if.h +4 -0
  78. data/ext/fast_json/schema/is_valid.c +124 -0
  79. data/ext/fast_json/schema/is_valid.h +6 -0
  80. data/ext/fast_json/schema/keywords.c +220 -0
  81. data/ext/fast_json/schema/keywords.h +60 -0
  82. data/ext/fast_json/schema/nested_schemas.c +68 -0
  83. data/ext/fast_json/schema/nested_schemas.h +4 -0
  84. data/ext/fast_json/schema/not.c +11 -0
  85. data/ext/fast_json/schema/not.h +4 -0
  86. data/ext/fast_json/schema/one_of.c +23 -0
  87. data/ext/fast_json/schema/one_of.h +4 -0
  88. data/ext/fast_json/schema/path.c +44 -0
  89. data/ext/fast_json/schema/path.h +5 -0
  90. data/ext/fast_json/schema/properties_val.c +103 -0
  91. data/ext/fast_json/schema/properties_val.h +6 -0
  92. data/ext/fast_json/schema/ref.c +7 -0
  93. data/ext/fast_json/schema/ref.h +4 -0
  94. data/ext/fast_json/schema/ref_resolver.c +85 -0
  95. data/ext/fast_json/schema/ref_resolver.h +5 -0
  96. data/ext/fast_json/schema/schema.c +68 -0
  97. data/ext/fast_json/schema/schema_collection.c +29 -0
  98. data/ext/fast_json/schema/schema_collection.h +3 -0
  99. data/ext/fast_json/schema/types/compiled_schema.h +96 -0
  100. data/ext/fast_json/schema/types/context.h +27 -0
  101. data/ext/fast_json/schema/validate.c +63 -0
  102. data/ext/fast_json/schema/validate.h +19 -0
  103. data/ext/fast_json/schema/validate_array.c +130 -0
  104. data/ext/fast_json/schema/validate_array.h +4 -0
  105. data/ext/fast_json/schema/validate_bool.c +7 -0
  106. data/ext/fast_json/schema/validate_bool.h +4 -0
  107. data/ext/fast_json/schema/validate_integer.c +52 -0
  108. data/ext/fast_json/schema/validate_integer.h +4 -0
  109. data/ext/fast_json/schema/validate_null.c +7 -0
  110. data/ext/fast_json/schema/validate_null.h +4 -0
  111. data/ext/fast_json/schema/validate_number.c +62 -0
  112. data/ext/fast_json/schema/validate_number.h +4 -0
  113. data/ext/fast_json/schema/validate_object.c +159 -0
  114. data/ext/fast_json/schema/validate_object.h +4 -0
  115. data/ext/fast_json/schema/validate_string.c +32 -0
  116. data/ext/fast_json/schema/validate_string.h +4 -0
  117. data/ext/fast_json/schema/value_pointer_caster.h +9 -0
  118. data/fast_json-schema.gemspec +31 -0
  119. data/lib/fast_json/schema/error.rb +16 -0
  120. data/lib/fast_json/schema/version.rb +7 -0
  121. data/lib/fast_json/schema.rb +50 -0
  122. data/makefile +10 -0
  123. metadata +164 -0
@@ -0,0 +1,235 @@
1
+ #include "formats/utils/uri_template_parser.h"
2
+ #include "formats/utils/utf8.h"
3
+
4
+ #include <stdbool.h>
5
+
6
+ #define IS_DIGIT(c) ((c) >= '0' && (c) <= '9')
7
+ #define IS_ALPHA(c) (((c) >= 'A' && (c) <= 'Z') || ((c) >= 'a' && (c) <= 'z'))
8
+ #define IS_HEX(c) (IS_DIGIT(c) || ((c) >= 'A' && (c) <= 'F') || ((c) >= 'a' && (c) <= 'f'))
9
+
10
+ /*
11
+ * varchar per RFC 6570 section 2.3:
12
+ * ALPHA / DIGIT / "_"
13
+ * (pct-encoded handled separately).
14
+ */
15
+ static bool is_varchar_ascii(unsigned char c) {
16
+ return IS_ALPHA(c) || IS_DIGIT(c) || c == '_';
17
+ }
18
+
19
+ /*
20
+ * Operator per RFC 6570 section 2.2:
21
+ * op-level2 = "+" / "#"
22
+ * op-level3 = "." / "/" / ";" / "?" / "&"
23
+ * op-reserve = "=" / "," / "!" / "@" / "|"
24
+ */
25
+ static bool is_operator(unsigned char c) {
26
+ switch(c) {
27
+ case '+': case '#':
28
+ case '.': case '/': case ';': case '?': case '&':
29
+ case '=': case ',': case '!': case '@': case '|':
30
+ return true;
31
+ }
32
+ return false;
33
+ }
34
+
35
+ /*
36
+ * Literal ASCII per RFC 6570 section 2.1:
37
+ * %x21 / %x23-24 / %x26 / %x28-3B / %x3D / %x3F-5B / %x5D / %x5F / %x61-7A / %x7E
38
+ *
39
+ * That is: any printable ASCII except SP, control chars, DEL, and the set
40
+ * { '"' '%' '\'' '<' '>' '\\' '^' '`' '{' '|' '}' }.
41
+ * ('%' is allowed only as the start of a pct-encoded triple.)
42
+ */
43
+ static bool is_literal_ascii(unsigned char c) {
44
+ if(c == 0x21) return true; // "!"
45
+ if(c >= 0x23 && c <= 0x24) return true; // "#" "$"
46
+ if(c == 0x26) return true; // "&"
47
+ if(c >= 0x28 && c <= 0x3B) return true; // "(" .. ";"
48
+ if(c == 0x3D) return true; // "="
49
+ if(c >= 0x3F && c <= 0x5B) return true; // "?" .. "["
50
+ if(c == 0x5D) return true; // "]"
51
+ if(c == 0x5F) return true; // "_"
52
+ if(c >= 0x61 && c <= 0x7A) return true; // "a" .. "z"
53
+ if(c == 0x7E) return true; // "~"
54
+ return false;
55
+ }
56
+
57
+ /*
58
+ * pct-encoded per RFC 3986 section 2.1: "%" HEXDIG HEXDIG.
59
+ * Returns 3 on success, 0 on failure.
60
+ */
61
+ static long consume_pct_encoded(const char *s, long len) {
62
+ if(len < 3 || s[0] != '%') return 0;
63
+ if(!IS_HEX((unsigned char)s[1]) || !IS_HEX((unsigned char)s[2])) return 0;
64
+ return 3;
65
+ }
66
+
67
+ /*
68
+ * Consume one literal unit: literal ASCII byte, pct-encoded triple, or
69
+ * valid UTF-8 multi-byte sequence. Returns bytes consumed, or 0 on failure.
70
+ */
71
+ static long consume_literal_byte(const char *s, long len) {
72
+ unsigned char c = (unsigned char)s[0];
73
+
74
+ if(c < 0x80) {
75
+ if(c == '%') return consume_pct_encoded(s, len);
76
+ return is_literal_ascii(c) ? 1 : 0;
77
+ }
78
+
79
+ long n = utf8_seq_len((const unsigned char *)s, len);
80
+ return n >= 2 ? n : 0;
81
+ }
82
+
83
+ /*
84
+ * Consume a run of literals (1+ bytes) until "{" or an invalid byte.
85
+ * Returns bytes consumed (must be > 0).
86
+ */
87
+ static long parse_literals_run(const char *s, long len) {
88
+ long pos = 0;
89
+ while(pos < len && s[pos] != '{') {
90
+ long n = consume_literal_byte(s + pos, len - pos);
91
+ if(n == 0) return 0;
92
+ pos += n;
93
+ }
94
+ return pos;
95
+ }
96
+
97
+ /*
98
+ * Consume one varchar: ALPHA / DIGIT / "_" / pct-encoded per RFC 6570 section 2.3.
99
+ * Returns bytes consumed (1 or 3), or 0 on failure.
100
+ */
101
+ static long parse_varchar(const char *s, long len) {
102
+ if(len == 0) return 0;
103
+ unsigned char c = (unsigned char)s[0];
104
+
105
+ if(c == '%') return consume_pct_encoded(s, len);
106
+ if(is_varchar_ascii(c)) return 1;
107
+ return 0;
108
+ }
109
+
110
+ /*
111
+ * varname per RFC 6570 section 2.3:
112
+ * varchar *( ["."] varchar )
113
+ * Dots are optional separators between varchars (no leading/trailing dot,
114
+ * no consecutive dots).
115
+ */
116
+ static long parse_varname(const char *s, long len) {
117
+ long first = parse_varchar(s, len);
118
+ if(first == 0) return 0;
119
+
120
+ long pos = first;
121
+
122
+ while(pos < len) {
123
+ if(s[pos] == '.') {
124
+ long n = parse_varchar(s + pos + 1, len - pos - 1);
125
+ if(n == 0) return 0; // trailing dot or no varchar after dot
126
+ pos += 1 + n;
127
+ continue;
128
+ }
129
+ long n = parse_varchar(s + pos, len - pos);
130
+ if(n == 0) break;
131
+ pos += n;
132
+ }
133
+
134
+ return pos;
135
+ }
136
+
137
+ /*
138
+ * modifier-level4 per RFC 6570 section 2.4:
139
+ * prefix = ":" max-length ; max-length = %x31-39 0*3DIGIT (1..9999)
140
+ * explode = "*"
141
+ * Caller must only invoke this when s[0] is ':' or '*'.
142
+ * Returns bytes consumed, or 0 on failure.
143
+ */
144
+ static long parse_modifier(const char *s, long len) {
145
+ if(len == 0) return 0;
146
+
147
+ if(s[0] == '*') return 1;
148
+
149
+ if(s[0] == ':') {
150
+ if(len < 2) return 0;
151
+ unsigned char first = (unsigned char)s[1];
152
+ if(first < '1' || first > '9') return 0;
153
+
154
+ long pos = 2;
155
+ while(pos < len && IS_DIGIT((unsigned char)s[pos])) pos++;
156
+
157
+ long digits = pos - 1;
158
+ if(digits < 1 || digits > 4) return 0;
159
+ return pos;
160
+ }
161
+
162
+ return 0;
163
+ }
164
+
165
+ /*
166
+ * varspec per RFC 6570 section 2.3:
167
+ * varname [ modifier-level4 ]
168
+ */
169
+ static long parse_varspec(const char *s, long len) {
170
+ long n = parse_varname(s, len);
171
+ if(n == 0) return 0;
172
+
173
+ long pos = n;
174
+ if(pos < len && (s[pos] == ':' || s[pos] == '*')) {
175
+ long m = parse_modifier(s + pos, len - pos);
176
+ if(m == 0) return 0;
177
+ pos += m;
178
+ }
179
+ return pos;
180
+ }
181
+
182
+ /*
183
+ * variable-list per RFC 6570 section 2.3:
184
+ * varspec *( "," varspec )
185
+ */
186
+ static long parse_variable_list(const char *s, long len) {
187
+ long n = parse_varspec(s, len);
188
+ if(n == 0) return 0;
189
+
190
+ long pos = n;
191
+ while(pos < len && s[pos] == ',') {
192
+ long m = parse_varspec(s + pos + 1, len - pos - 1);
193
+ if(m == 0) return 0;
194
+ pos += 1 + m;
195
+ }
196
+ return pos;
197
+ }
198
+
199
+ /*
200
+ * expression per RFC 6570 section 2.2:
201
+ * "{" [ operator ] variable-list "}"
202
+ * Returns bytes consumed (including braces), or 0 on failure.
203
+ */
204
+ static long parse_expression(const char *s, long len) {
205
+ if(len < 2 || s[0] != '{') return 0;
206
+
207
+ long pos = 1;
208
+
209
+ if(pos < len && is_operator((unsigned char)s[pos])) pos++;
210
+
211
+ long vl = parse_variable_list(s + pos, len - pos);
212
+ if(vl == 0) return 0;
213
+ pos += vl;
214
+
215
+ if(pos >= len || s[pos] != '}') return 0;
216
+ return pos + 1;
217
+ }
218
+
219
+ bool parse_uri_template(const char *s, long len) {
220
+ long pos = 0;
221
+
222
+ while(pos < len) {
223
+ if(s[pos] == '{') {
224
+ long n = parse_expression(s + pos, len - pos);
225
+ if(n == 0) return false;
226
+ pos += n;
227
+ } else {
228
+ long n = parse_literals_run(s + pos, len - pos);
229
+ if(n == 0) return false;
230
+ pos += n;
231
+ }
232
+ }
233
+
234
+ return true;
235
+ }
@@ -0,0 +1,18 @@
1
+ #ifndef FAST_JSON_FORMATS_UTILS_URI_TEMPLATE_PARSER_H
2
+ #define FAST_JSON_FORMATS_UTILS_URI_TEMPLATE_PARSER_H
3
+
4
+ #include <stdbool.h>
5
+
6
+ /*
7
+ * RFC 6570 URI Template parser.
8
+ *
9
+ * A URI Template is *( literals / expression ). Literals accept any valid
10
+ * UTF-8 (ucschar / iprivate) plus pct-encoded; expressions are ASCII-only:
11
+ * "{" [ operator ] variable-list "}"
12
+ *
13
+ * Returns true if the entire input is a well-formed URI Template. The empty
14
+ * string is valid (zero repetitions of literals/expression).
15
+ */
16
+ bool parse_uri_template(const char *s, long len);
17
+
18
+ #endif
@@ -0,0 +1,73 @@
1
+ #include "formats/utils/utf8.h"
2
+
3
+ /* Continuation-byte range (RFC 3629). */
4
+ #define UTF8_CONT_MIN 0x80 /* 1000 0000 */
5
+ #define UTF8_CONT_MAX 0xBF /* 1011 1111 */
6
+
7
+ /* ASCII upper bound. */
8
+ #define UTF8_ASCII_MAX 0x7F /* 0111 1111 */
9
+
10
+ /* Valid lead-byte ranges per sequence length. */
11
+ #define UTF8_LEAD2_MIN 0xC2 /* 1100 0010 */
12
+ #define UTF8_LEAD2_MAX 0xDF /* 1101 1111 */
13
+ #define UTF8_LEAD3_MIN 0xE0 /* 1110 0000 */
14
+ #define UTF8_LEAD3_MAX 0xEF /* 1110 1111 */
15
+ #define UTF8_LEAD4_MIN 0xF0 /* 1111 0000 */
16
+ #define UTF8_LEAD4_MAX 0xF4 /* 1111 0100 */
17
+
18
+ /* Lead byte triggering byte-2 anti-surrogate tightening (U+D800..U+DFFF). */
19
+ #define UTF8_LEAD3_SURROGATE 0xED /* 1110 1101 */
20
+
21
+ /* Tightened byte-2 bounds for irregular lead bytes. */
22
+ #define UTF8_E0_BYTE2_MIN 0xA0 /* 1010 0000 */
23
+ #define UTF8_ED_BYTE2_MAX 0x9F /* 1001 1111 */
24
+ #define UTF8_F0_BYTE2_MIN 0x90 /* 1001 0000 */
25
+ #define UTF8_F4_BYTE2_MAX 0x8F /* 1000 1111 */
26
+
27
+ long utf8_seq_len(const unsigned char *s, long remaining) {
28
+ if(remaining < 1) return 0;
29
+
30
+ unsigned char first_byte = s[0];
31
+
32
+ if(first_byte <= UTF8_ASCII_MAX) return 1;
33
+
34
+ if(first_byte < UTF8_LEAD2_MIN) return 0;
35
+
36
+ if(first_byte <= UTF8_LEAD2_MAX) {
37
+ if(remaining < 2) return 0;
38
+ if(s[1] < UTF8_CONT_MIN || s[1] > UTF8_CONT_MAX) return 0;
39
+
40
+ return 2;
41
+ }
42
+
43
+ if(first_byte <= UTF8_LEAD3_MAX) {
44
+ if(remaining < 3) return 0;
45
+
46
+ unsigned char low = UTF8_CONT_MIN, high = UTF8_CONT_MAX;
47
+
48
+ if(first_byte == UTF8_LEAD3_MIN) low = UTF8_E0_BYTE2_MIN; /* anti-overlong */
49
+ if(first_byte == UTF8_LEAD3_SURROGATE) high = UTF8_ED_BYTE2_MAX; /* anti-surrogate */
50
+
51
+ if(s[1] < low || s[1] > high) return 0;
52
+ if(s[2] < UTF8_CONT_MIN || s[2] > UTF8_CONT_MAX) return 0;
53
+
54
+ return 3;
55
+ }
56
+
57
+ if(first_byte <= UTF8_LEAD4_MAX) {
58
+ if(remaining < 4) return 0;
59
+
60
+ unsigned char low = UTF8_CONT_MIN, high = UTF8_CONT_MAX;
61
+
62
+ if(first_byte == UTF8_LEAD4_MIN) low = UTF8_F0_BYTE2_MIN; /* anti-overlong */
63
+ if(first_byte == UTF8_LEAD4_MAX) high = UTF8_F4_BYTE2_MAX; /* anti-codepoint > U+10FFFF */
64
+
65
+ if(s[1] < low || s[1] > high) return 0;
66
+ if(s[2] < UTF8_CONT_MIN || s[2] > UTF8_CONT_MAX) return 0;
67
+ if(s[3] < UTF8_CONT_MIN || s[3] > UTF8_CONT_MAX) return 0;
68
+
69
+ return 4;
70
+ }
71
+
72
+ return 0;
73
+ }
@@ -0,0 +1,17 @@
1
+ #ifndef FAST_JSON_FORMATS_UTILS_UTF8_H
2
+ #define FAST_JSON_FORMATS_UTILS_UTF8_H
3
+
4
+ #include <stdbool.h>
5
+
6
+ /*
7
+ * Strict UTF-8 sequence validator. Given a pointer to the leading byte of a
8
+ * UTF-8 character, returns the byte length (1..4) of a valid UTF-8 sequence
9
+ * starting at that position, or 0 if the bytes do not form a valid sequence.
10
+ *
11
+ * Returns 1 for any ASCII byte (< 0x80). Returns 2..4 for valid multi-byte
12
+ * sequences. Rejects overlong encodings, UTF-16 surrogates (U+D800..U+DFFF)
13
+ * and codepoints above U+10FFFF.
14
+ */
15
+ long utf8_seq_len(const unsigned char *s, long remaining);
16
+
17
+ #endif
@@ -0,0 +1,31 @@
1
+ #include "if.h"
2
+ #include "error.h"
3
+
4
+ extern bool is_valid(VALUE, CompiledSchema *, VALUE, Context *);
5
+
6
+ static void run_then(VALUE schema, CompiledSchema *compiled_schema, VALUE data, Context *context) {
7
+ bool valid = is_valid(schema, compiled_schema, data, context);
8
+
9
+ if(!valid)
10
+ yield_error(compiled_schema, data, context, "if/then");
11
+ }
12
+
13
+ static void run_else(VALUE schema, CompiledSchema *compiled_schema, VALUE data, Context *context) {
14
+ bool valid = is_valid(schema, compiled_schema, data, context);
15
+
16
+ if(!valid)
17
+ yield_error(compiled_schema, data, context, "if/else");
18
+ }
19
+
20
+ void validate_if(VALUE schema, CompiledSchema *compiled_schema, VALUE data, Context *context) {
21
+ if(compiled_schema->then_schema == NULL && compiled_schema->else_schema == NULL)
22
+ return;
23
+
24
+ bool valid = is_valid(schema, compiled_schema->if_schema, data, context);
25
+
26
+ if(valid && compiled_schema->then_schema != NULL) {
27
+ run_then(schema, compiled_schema->then_schema, data, context);
28
+ } else if(!valid && compiled_schema->else_schema != NULL) {
29
+ run_else(schema, compiled_schema->else_schema, data, context);
30
+ }
31
+ }
@@ -0,0 +1,4 @@
1
+ #include <ruby.h>
2
+ #include "compiled_schema.h"
3
+
4
+ void validate_if(VALUE, CompiledSchema *, VALUE, Context *);
@@ -0,0 +1,124 @@
1
+ #include "is_valid.h"
2
+
3
+ VALUE short_circuit_tag;
4
+
5
+ struct is_valid_args_memo_S {
6
+ VALUE schema;
7
+ CompiledSchema *compiled_schema;
8
+ VALUE data;
9
+ Context *context;
10
+ };
11
+
12
+ struct ensure_args_memo_S {
13
+ Context *context;
14
+ bool prev_short_circuit;
15
+ };
16
+
17
+ static VALUE is_valid_body(RB_BLOCK_CALL_FUNC_ARGLIST(_tag, arg)) {
18
+ struct is_valid_args_memo_S *args = (struct is_valid_args_memo_S *)arg;
19
+
20
+ args->compiled_schema->validation_function(
21
+ args->schema, args->compiled_schema, args->data, args->context
22
+ );
23
+
24
+ return Qtrue;
25
+ }
26
+
27
+ static VALUE is_valid_catch(VALUE arg) {
28
+ return rb_catch_obj(short_circuit_tag, is_valid_body, arg);
29
+ }
30
+
31
+ static VALUE is_valid_ensure(VALUE arg) {
32
+ struct ensure_args_memo_S *ensure_args = (struct ensure_args_memo_S *)arg;
33
+
34
+ ensure_args->context->short_circuit_on_error = ensure_args->prev_short_circuit;
35
+
36
+ return Qnil;
37
+ }
38
+
39
+ /*
40
+ * `is_valid` runs the given compiled schema against the given data and
41
+ * reports whether the data validates, without yielding any error to the
42
+ * user. It is the entry point used by every combinator that needs a
43
+ * pure pass/fail answer about a sub-schema: `anyOf`, `oneOf`, `allOf`,
44
+ * `not`, `if`, and the array `contains` keyword.
45
+ *
46
+ * Why short-circuiting matters
47
+ * ----------------------------
48
+ * Validation is normally driven by side effects: when a constraint is
49
+ * violated, `yield_error` builds an `Error` and yields it to the user
50
+ * block. Combinators don't want that. As soon as `anyOf` (for example)
51
+ * sees the *first* failure inside a branch, it knows that branch is
52
+ * invalid and wants to abandon it immediately, no matter how deeply
53
+ * nested the failing constraint is. Letting validation run to completion
54
+ * to discover the failure would also leak the branch's internal errors
55
+ * to the user, which is wrong.
56
+ *
57
+ * How the short-circuit works
58
+ * ---------------------------
59
+ * Two pieces of state cooperate:
60
+ *
61
+ * 1. `context->short_circuit_on_error` (a `bool`) tells `yield_error`
62
+ * whether it should yield (the default, top-level behavior) or
63
+ * short-circuit (when an `is_valid` frame is active). The flag is
64
+ * set to `true` here on entry and restored to its prior value on
65
+ * exit, regardless of how the body unwinds.
66
+ *
67
+ * 2. `short_circuit_tag` (a unique frozen `Object.new` allocated in
68
+ * `Init_validate`) is the tag we use with Ruby's `throw`/`catch`
69
+ * mechanism. When `yield_error` is reached with the flag set, it
70
+ * calls `rb_throw_obj(short_circuit_tag, Qfalse)`, which Ruby's
71
+ * VM unwinds up to the matching `rb_catch_obj` registered below.
72
+ *
73
+ * Why throw/catch instead of setjmp/longjmp
74
+ * -----------------------------------------
75
+ * An earlier implementation used `RUBY_SETJMP`/`RUBY_LONGJMP` to escape
76
+ * out of nested validation. That worked on Ruby 2.x, but in Ruby 3.x it
77
+ * corrupts VM state when the jump crosses Ruby callbacks like
78
+ * `rb_funcall`, `rb_reg_match`, `rb_yield`, or `rb_hash_foreach`: those
79
+ * functions register frame and iteration bookkeeping that `longjmp`
80
+ * skips over, leaving the VM and GC in an inconsistent state and
81
+ * eventually segfaulting. `rb_throw_obj` performs the same logical
82
+ * non-local exit but uses the VM's own unwind machinery, which closes
83
+ * frames cleanly, releases hash iteration state, and runs intervening
84
+ * `ensure` blocks.
85
+ *
86
+ * Why we still need `rb_ensure`
87
+ * -----------------------------
88
+ * `rb_catch_obj` only intercepts throws matching its tag. A real Ruby
89
+ * exception -- e.g. the `RuntimeError` raised by `INCR_CONTEXT` when
90
+ * the document exceeds `MAX_CONTEXT_DEPTH`, or a user `raise` from
91
+ * inside the validate block -- is not a throw; it propagates straight
92
+ * through. We wrap the catch in `rb_ensure` so that even when an
93
+ * exception escapes, the ensure callback restores
94
+ * `short_circuit_on_error` to its prior value before unwinding
95
+ * continues. Without that, an active `is_valid` frame interrupted by
96
+ * a real exception would leave the flag stuck at `true`, silently
97
+ * breaking subsequent validations that share the same context.
98
+ *
99
+ * Nesting
100
+ * -------
101
+ * Combinators routinely nest (e.g. `anyOf` containing `oneOf`
102
+ * containing `allOf`). Saving and restoring the flag around each
103
+ * `is_valid` call, combined with `rb_catch_obj`'s LIFO semantics
104
+ * (each throw is caught by the nearest matching catch), gives correct
105
+ * behavior at any depth: an inner short-circuit unwinds only to the
106
+ * inner catch, leaving the outer combinator free to continue
107
+ * iterating its remaining branches with the flag still set.
108
+ */
109
+ bool is_valid(VALUE schema, CompiledSchema *compiled_schema, VALUE data, Context *context) {
110
+ struct is_valid_args_memo_S is_valid_args = { schema, compiled_schema, data, context };
111
+ struct ensure_args_memo_S ensure_args = { context, context->short_circuit_on_error };
112
+
113
+ context->short_circuit_on_error = true;
114
+
115
+ VALUE result = rb_ensure(is_valid_catch, (VALUE)&is_valid_args, is_valid_ensure, (VALUE)&ensure_args);
116
+
117
+ return result == Qtrue;
118
+ }
119
+
120
+ void Init_is_valid(void) {
121
+ short_circuit_tag = rb_obj_freeze(rb_class_new_instance(0, NULL, rb_cObject));
122
+
123
+ rb_gc_register_address(&short_circuit_tag);
124
+ }
@@ -0,0 +1,6 @@
1
+ #include <ruby.h>
2
+ #include "compiled_schema.h"
3
+ #include "types/context.h"
4
+
5
+ extern VALUE short_circuit_tag;
6
+ void Init_is_valid(void);