ata-validator 0.4.9 → 0.4.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding/ata_napi.cpp +26 -33
- package/package.json +1 -1
- package/src/ata.cpp +329 -202
package/binding/ata_napi.cpp
CHANGED
|
@@ -28,7 +28,7 @@ using schema_node_ptr = std::shared_ptr<schema_node>;
|
|
|
28
28
|
|
|
29
29
|
// MUST match layout in src/ata.cpp exactly (reinterpret_cast)
|
|
30
30
|
struct schema_node {
|
|
31
|
-
|
|
31
|
+
uint8_t type_mask = 0;
|
|
32
32
|
|
|
33
33
|
std::optional<double> minimum;
|
|
34
34
|
std::optional<double> maximum;
|
|
@@ -67,11 +67,11 @@ struct schema_node {
|
|
|
67
67
|
};
|
|
68
68
|
std::vector<pattern_prop> pattern_properties;
|
|
69
69
|
|
|
70
|
-
std::optional<std::string> enum_values_raw;
|
|
71
70
|
std::vector<std::string> enum_values_minified;
|
|
72
71
|
std::optional<std::string> const_value_raw;
|
|
73
72
|
|
|
74
73
|
std::optional<std::string> format;
|
|
74
|
+
uint8_t format_id = 255;
|
|
75
75
|
|
|
76
76
|
std::vector<schema_node_ptr> all_of;
|
|
77
77
|
std::vector<schema_node_ptr> any_of;
|
|
@@ -413,46 +413,39 @@ static void validate_napi(const schema_node_ptr& node,
|
|
|
413
413
|
|
|
414
414
|
auto actual_type = napi_type_of(value);
|
|
415
415
|
|
|
416
|
-
// type
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
if (
|
|
416
|
+
// type — uses bitmask matching ata.cpp json_type enum order:
|
|
417
|
+
// 0=string, 1=number, 2=integer, 3=boolean, 4=null_value, 5=object, 6=array
|
|
418
|
+
if (node->type_mask) {
|
|
419
|
+
uint8_t val_bits = 0;
|
|
420
|
+
if (actual_type == "string") val_bits = 1u << 0;
|
|
421
|
+
else if (actual_type == "number") val_bits = 1u << 1;
|
|
422
|
+
else if (actual_type == "integer") val_bits = (1u << 2) | (1u << 1); // integer matches number
|
|
423
|
+
else if (actual_type == "boolean") val_bits = 1u << 3;
|
|
424
|
+
else if (actual_type == "null") val_bits = 1u << 4;
|
|
425
|
+
else if (actual_type == "object") val_bits = 1u << 5;
|
|
426
|
+
else if (actual_type == "array") val_bits = 1u << 6;
|
|
427
|
+
if (!(val_bits & node->type_mask)) {
|
|
428
|
+
static const char* type_names[] = {"string","number","integer","boolean","null","object","array"};
|
|
426
429
|
std::string expected;
|
|
427
|
-
for (
|
|
428
|
-
if (
|
|
429
|
-
|
|
430
|
+
for (int b = 0; b < 7; ++b) {
|
|
431
|
+
if (node->type_mask & (1u << b)) {
|
|
432
|
+
if (!expected.empty()) expected += ", ";
|
|
433
|
+
expected += type_names[b];
|
|
434
|
+
}
|
|
430
435
|
}
|
|
431
436
|
errors.push_back({ata::error_code::type_mismatch, path,
|
|
432
437
|
"expected type " + expected + ", got " + actual_type});
|
|
433
438
|
}
|
|
434
439
|
}
|
|
435
440
|
|
|
436
|
-
// enum
|
|
437
|
-
if (node->
|
|
441
|
+
// enum — compare against pre-minified canonical values
|
|
442
|
+
if (!node->enum_values_minified.empty()) {
|
|
438
443
|
std::string val_json = napi_to_json(env, value);
|
|
439
|
-
// Parse enum from raw and compare
|
|
440
444
|
bool found = false;
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
auto parse_fn = json_obj.Get("parse").As<Napi::Function>();
|
|
446
|
-
auto enum_arr = parse_fn.Call(json_obj,
|
|
447
|
-
{Napi::String::New(env, node->enum_values_raw.value())});
|
|
448
|
-
if (enum_arr.IsArray()) {
|
|
449
|
-
auto arr = enum_arr.As<Napi::Array>();
|
|
450
|
-
for (uint32_t i = 0; i < arr.Length(); ++i) {
|
|
451
|
-
std::string elem_json = napi_to_json(env, arr.Get(i));
|
|
452
|
-
if (elem_json == val_json) {
|
|
453
|
-
found = true;
|
|
454
|
-
break;
|
|
455
|
-
}
|
|
445
|
+
for (const auto& ev : node->enum_values_minified) {
|
|
446
|
+
if (ev == val_json) {
|
|
447
|
+
found = true;
|
|
448
|
+
break;
|
|
456
449
|
}
|
|
457
450
|
}
|
|
458
451
|
if (!found) {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "ata-validator",
|
|
3
|
-
"version": "0.4.
|
|
3
|
+
"version": "0.4.10",
|
|
4
4
|
"description": "Ultra-fast JSON Schema validator. Beats ajv on every valid-path benchmark: 1.1x–2.7x faster validate(obj), 151x faster compilation, 5.9x faster parallel batch. Speculative validation with V8-optimized JS codegen, simdjson, multi-core. Standard Schema V1 compatible.",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"types": "index.d.ts",
|
package/src/ata.cpp
CHANGED
|
@@ -128,17 +128,20 @@ static bool fast_check_hostname(std::string_view s) {
|
|
|
128
128
|
return label_len > 0;
|
|
129
129
|
}
|
|
130
130
|
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
131
|
+
// Check format by pre-resolved numeric ID — no string comparisons.
|
|
132
|
+
static bool check_format_by_id(std::string_view sv, uint8_t fid) {
|
|
133
|
+
switch (fid) {
|
|
134
|
+
case 0: return fast_check_email(sv);
|
|
135
|
+
case 1: return fast_check_date(sv);
|
|
136
|
+
case 2: return fast_check_datetime(sv);
|
|
137
|
+
case 3: return fast_check_time(sv);
|
|
138
|
+
case 4: return fast_check_ipv4(sv);
|
|
139
|
+
case 5: return sv.find(':') != std::string_view::npos;
|
|
140
|
+
case 6: return fast_check_uri(sv);
|
|
141
|
+
case 7: return fast_check_uuid(sv);
|
|
142
|
+
case 8: return fast_check_hostname(sv);
|
|
143
|
+
default: return true; // unknown formats pass
|
|
144
|
+
}
|
|
142
145
|
}
|
|
143
146
|
|
|
144
147
|
namespace ata {
|
|
@@ -182,14 +185,74 @@ static std::string canonical_json(dom::element el) {
|
|
|
182
185
|
}
|
|
183
186
|
}
|
|
184
187
|
|
|
188
|
+
// JSON Schema type enum — avoids string comparisons on the hot path.
|
|
189
|
+
enum class json_type : uint8_t {
|
|
190
|
+
string, number, integer, boolean, null_value, object, array
|
|
191
|
+
};
|
|
192
|
+
|
|
193
|
+
static json_type json_type_from_sv(std::string_view s) {
|
|
194
|
+
if (s == "string") return json_type::string;
|
|
195
|
+
if (s == "number") return json_type::number;
|
|
196
|
+
if (s == "integer") return json_type::integer;
|
|
197
|
+
if (s == "boolean") return json_type::boolean;
|
|
198
|
+
if (s == "null") return json_type::null_value;
|
|
199
|
+
if (s == "object") return json_type::object;
|
|
200
|
+
if (s == "array") return json_type::array;
|
|
201
|
+
return json_type::string; // fallback
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
static const char* json_type_name(json_type t) {
|
|
205
|
+
switch (t) {
|
|
206
|
+
case json_type::string: return "string";
|
|
207
|
+
case json_type::number: return "number";
|
|
208
|
+
case json_type::integer: return "integer";
|
|
209
|
+
case json_type::boolean: return "boolean";
|
|
210
|
+
case json_type::null_value: return "null";
|
|
211
|
+
case json_type::object: return "object";
|
|
212
|
+
case json_type::array: return "array";
|
|
213
|
+
}
|
|
214
|
+
return "unknown";
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
// Bitmask for O(1) type checking: one bit per json_type value.
|
|
218
|
+
static uint8_t json_type_bit(json_type t) { return 1u << static_cast<uint8_t>(t); }
|
|
219
|
+
|
|
220
|
+
// Map dom::element_type to a json_type bitmask (number matches integer too).
|
|
221
|
+
static uint8_t element_type_mask(dom::element_type t) {
|
|
222
|
+
switch (t) {
|
|
223
|
+
case dom::element_type::STRING: return json_type_bit(json_type::string);
|
|
224
|
+
case dom::element_type::INT64:
|
|
225
|
+
case dom::element_type::UINT64: return json_type_bit(json_type::integer) | json_type_bit(json_type::number);
|
|
226
|
+
case dom::element_type::DOUBLE: return json_type_bit(json_type::number);
|
|
227
|
+
case dom::element_type::BOOL: return json_type_bit(json_type::boolean);
|
|
228
|
+
case dom::element_type::NULL_VALUE: return json_type_bit(json_type::null_value);
|
|
229
|
+
case dom::element_type::ARRAY: return json_type_bit(json_type::array);
|
|
230
|
+
case dom::element_type::OBJECT: return json_type_bit(json_type::object);
|
|
231
|
+
}
|
|
232
|
+
return 0;
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
// Resolve format string to numeric ID at compile time.
|
|
236
|
+
static uint8_t format_id_from_string(const std::string& f) {
|
|
237
|
+
if (f == "email") return 0;
|
|
238
|
+
if (f == "date") return 1;
|
|
239
|
+
if (f == "date-time") return 2;
|
|
240
|
+
if (f == "time") return 3;
|
|
241
|
+
if (f == "ipv4") return 4;
|
|
242
|
+
if (f == "ipv6") return 5;
|
|
243
|
+
if (f == "uri" || f == "uri-reference") return 6;
|
|
244
|
+
if (f == "uuid") return 7;
|
|
245
|
+
if (f == "hostname") return 8;
|
|
246
|
+
return 255;
|
|
247
|
+
}
|
|
248
|
+
|
|
185
249
|
// Forward declarations
|
|
186
250
|
struct schema_node;
|
|
187
251
|
using schema_node_ptr = std::shared_ptr<schema_node>;
|
|
188
252
|
|
|
189
253
|
struct schema_node {
|
|
190
|
-
// type constraint
|
|
191
|
-
//
|
|
192
|
-
std::vector<std::string> types;
|
|
254
|
+
// type constraint — bitmask for O(1) type checking
|
|
255
|
+
uint8_t type_mask = 0; // bit per json_type value
|
|
193
256
|
|
|
194
257
|
// numeric
|
|
195
258
|
std::optional<double> minimum;
|
|
@@ -234,12 +297,12 @@ struct schema_node {
|
|
|
234
297
|
std::vector<pattern_prop> pattern_properties;
|
|
235
298
|
|
|
236
299
|
// enum / const
|
|
237
|
-
std::optional<std::string> enum_values_raw; // raw JSON array string
|
|
238
300
|
std::vector<std::string> enum_values_minified; // pre-minified enum values
|
|
239
301
|
std::optional<std::string> const_value_raw; // raw JSON value string
|
|
240
302
|
|
|
241
303
|
// format
|
|
242
304
|
std::optional<std::string> format;
|
|
305
|
+
uint8_t format_id = 255; // pre-resolved format ID (255 = unknown/pass)
|
|
243
306
|
|
|
244
307
|
// composition
|
|
245
308
|
std::vector<schema_node_ptr> all_of;
|
|
@@ -281,7 +344,7 @@ struct plan {
|
|
|
281
344
|
std::vector<std::string> strings;
|
|
282
345
|
std::vector<std::shared_ptr<re2::RE2>> regexes;
|
|
283
346
|
std::vector<std::vector<std::string>> enum_sets;
|
|
284
|
-
std::vector<
|
|
347
|
+
std::vector<uint8_t> type_masks;
|
|
285
348
|
std::vector<uint8_t> format_ids;
|
|
286
349
|
std::vector<std::vector<ins>> subs;
|
|
287
350
|
};
|
|
@@ -302,6 +365,10 @@ static dom::parser& tl_dom_parser() {
|
|
|
302
365
|
thread_local dom::parser p;
|
|
303
366
|
return p;
|
|
304
367
|
}
|
|
368
|
+
static dom::parser& tl_dom_key_parser() {
|
|
369
|
+
thread_local dom::parser p;
|
|
370
|
+
return p;
|
|
371
|
+
}
|
|
305
372
|
static simdjson::ondemand::parser& tl_od_parser() {
|
|
306
373
|
thread_local simdjson::ondemand::parser p;
|
|
307
374
|
return p;
|
|
@@ -346,12 +413,12 @@ static schema_node_ptr compile_node(dom::element el,
|
|
|
346
413
|
if (type_el.is<std::string_view>()) {
|
|
347
414
|
std::string_view sv;
|
|
348
415
|
type_el.get(sv);
|
|
349
|
-
node->
|
|
416
|
+
node->type_mask |= json_type_bit(json_type_from_sv(sv));
|
|
350
417
|
} else if (type_el.is<dom::array>()) {
|
|
351
418
|
dom::array type_arr; type_el.get(type_arr); for (auto t : type_arr) {
|
|
352
419
|
std::string_view sv;
|
|
353
420
|
if (t.get(sv) == SUCCESS) {
|
|
354
|
-
node->
|
|
421
|
+
node->type_mask |= json_type_bit(json_type_from_sv(sv));
|
|
355
422
|
}
|
|
356
423
|
}
|
|
357
424
|
}
|
|
@@ -531,7 +598,10 @@ static schema_node_ptr compile_node(dom::element el,
|
|
|
531
598
|
dom::element fmt_el;
|
|
532
599
|
if (obj["format"].get(fmt_el) == SUCCESS) {
|
|
533
600
|
std::string_view sv;
|
|
534
|
-
if (fmt_el.get(sv) == SUCCESS)
|
|
601
|
+
if (fmt_el.get(sv) == SUCCESS) {
|
|
602
|
+
node->format = std::string(sv);
|
|
603
|
+
node->format_id = format_id_from_string(node->format.value());
|
|
604
|
+
}
|
|
535
605
|
}
|
|
536
606
|
|
|
537
607
|
// $id (register in defs for potential resolution)
|
|
@@ -546,7 +616,6 @@ static schema_node_ptr compile_node(dom::element el,
|
|
|
546
616
|
// enum — pre-minify each value at compile time
|
|
547
617
|
dom::element enum_el;
|
|
548
618
|
if (obj["enum"].get(enum_el) == SUCCESS) {
|
|
549
|
-
node->enum_values_raw = canonical_json(enum_el);
|
|
550
619
|
if (enum_el.is<dom::array>()) {
|
|
551
620
|
dom::array enum_arr; enum_el.get(enum_arr); for (auto e : enum_arr) {
|
|
552
621
|
node->enum_values_minified.push_back(canonical_json(e));
|
|
@@ -640,41 +709,37 @@ static bool validate_fast(const schema_node_ptr& node,
|
|
|
640
709
|
// Macro for early termination
|
|
641
710
|
#define ATA_CHECK_EARLY() if (!all_errors && !errors.empty()) return
|
|
642
711
|
|
|
712
|
+
using et = dom::element_type;
|
|
713
|
+
|
|
714
|
+
|
|
643
715
|
// Use string_view to avoid allocations in hot path
|
|
644
716
|
static std::string_view type_of_sv(dom::element el) {
|
|
645
717
|
switch (el.type()) {
|
|
646
|
-
case
|
|
647
|
-
case
|
|
648
|
-
case
|
|
649
|
-
case
|
|
650
|
-
case
|
|
651
|
-
case
|
|
652
|
-
case
|
|
653
|
-
case
|
|
718
|
+
case et::STRING: return "string";
|
|
719
|
+
case et::INT64:
|
|
720
|
+
case et::UINT64: return "integer";
|
|
721
|
+
case et::DOUBLE: return "number";
|
|
722
|
+
case et::BOOL: return "boolean";
|
|
723
|
+
case et::NULL_VALUE:return "null";
|
|
724
|
+
case et::ARRAY: return "array";
|
|
725
|
+
case et::OBJECT: return "object";
|
|
654
726
|
}
|
|
655
727
|
return "unknown";
|
|
656
728
|
}
|
|
657
729
|
|
|
658
|
-
static std::string type_of(dom::element el) {
|
|
659
|
-
return std::string(type_of_sv(el));
|
|
660
|
-
}
|
|
661
730
|
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
if (type == "number" && (actual == "integer" || actual == "number"))
|
|
666
|
-
return true;
|
|
667
|
-
return false;
|
|
731
|
+
// O(1) type check: test element's type bits against the schema's type_mask.
|
|
732
|
+
static bool type_matches_mask(dom::element el, uint8_t type_mask) {
|
|
733
|
+
return (element_type_mask(el.type()) & type_mask) != 0;
|
|
668
734
|
}
|
|
669
735
|
|
|
670
736
|
static double to_double(dom::element el) {
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
return 0;
|
|
737
|
+
switch (el.type()) {
|
|
738
|
+
case et::DOUBLE: { double v; el.get(v); return v; }
|
|
739
|
+
case et::INT64: { int64_t v; el.get(v); return static_cast<double>(v); }
|
|
740
|
+
case et::UINT64: { uint64_t v; el.get(v); return static_cast<double>(v); }
|
|
741
|
+
default: return 0;
|
|
742
|
+
}
|
|
678
743
|
}
|
|
679
744
|
|
|
680
745
|
// Count UTF-8 codepoints — branchless: count non-continuation bytes
|
|
@@ -843,22 +908,17 @@ static void validate_node(const schema_node_ptr& node,
|
|
|
843
908
|
}
|
|
844
909
|
|
|
845
910
|
// type
|
|
846
|
-
if (
|
|
847
|
-
|
|
848
|
-
for (const auto& t : node->types) {
|
|
849
|
-
if (type_matches(value, t)) {
|
|
850
|
-
match = true;
|
|
851
|
-
break;
|
|
852
|
-
}
|
|
853
|
-
}
|
|
854
|
-
if (!match) {
|
|
911
|
+
if (node->type_mask) {
|
|
912
|
+
if (!type_matches_mask(value, node->type_mask)) {
|
|
855
913
|
std::string expected;
|
|
856
|
-
for (
|
|
857
|
-
if (
|
|
858
|
-
|
|
914
|
+
for (int b = 0; b < 7; ++b) {
|
|
915
|
+
if (node->type_mask & (1u << b)) {
|
|
916
|
+
if (!expected.empty()) expected += ", ";
|
|
917
|
+
expected += json_type_name(static_cast<json_type>(b));
|
|
918
|
+
}
|
|
859
919
|
}
|
|
860
920
|
errors.push_back({error_code::type_mismatch, path,
|
|
861
|
-
"expected type " + expected + ", got " +
|
|
921
|
+
"expected type " + expected + ", got " + std::string(type_of_sv(value))});
|
|
862
922
|
ATA_CHECK_EARLY();
|
|
863
923
|
}
|
|
864
924
|
}
|
|
@@ -891,8 +951,8 @@ static void validate_node(const schema_node_ptr& node,
|
|
|
891
951
|
|
|
892
952
|
ATA_CHECK_EARLY();
|
|
893
953
|
// Numeric validations
|
|
894
|
-
auto
|
|
895
|
-
if (
|
|
954
|
+
auto vtype = value.type();
|
|
955
|
+
if (vtype == et::INT64 || vtype == et::UINT64 || vtype == et::DOUBLE) {
|
|
896
956
|
double v = to_double(value);
|
|
897
957
|
if (node->minimum.has_value() && v < node->minimum.value()) {
|
|
898
958
|
errors.push_back({error_code::minimum_violation, path,
|
|
@@ -929,7 +989,7 @@ static void validate_node(const schema_node_ptr& node,
|
|
|
929
989
|
}
|
|
930
990
|
|
|
931
991
|
// String validations
|
|
932
|
-
if (
|
|
992
|
+
if (vtype == et::STRING) {
|
|
933
993
|
std::string_view sv;
|
|
934
994
|
value.get(sv);
|
|
935
995
|
uint64_t len = utf8_length(sv);
|
|
@@ -955,7 +1015,7 @@ static void validate_node(const schema_node_ptr& node,
|
|
|
955
1015
|
}
|
|
956
1016
|
|
|
957
1017
|
if (node->format.has_value()) {
|
|
958
|
-
if (!
|
|
1018
|
+
if (!check_format_by_id(sv, node->format_id)) {
|
|
959
1019
|
errors.push_back({error_code::format_mismatch, path,
|
|
960
1020
|
"string does not match format: " +
|
|
961
1021
|
node->format.value()});
|
|
@@ -964,10 +1024,14 @@ static void validate_node(const schema_node_ptr& node,
|
|
|
964
1024
|
}
|
|
965
1025
|
|
|
966
1026
|
// Array validations
|
|
967
|
-
if (
|
|
1027
|
+
if (vtype == et::ARRAY) {
|
|
968
1028
|
dom::array arr; value.get(arr);
|
|
969
|
-
uint64_t arr_size =
|
|
970
|
-
|
|
1029
|
+
uint64_t arr_size = arr.size();
|
|
1030
|
+
if(arr_size == 0xFFFFFF) [[unlikely]] {
|
|
1031
|
+
// Fallback for large arrays where size() saturates — count manually to avoid overflow
|
|
1032
|
+
arr_size = 0;
|
|
1033
|
+
for ([[maybe_unused]] auto _ : arr) ++arr_size;
|
|
1034
|
+
}
|
|
971
1035
|
|
|
972
1036
|
if (node->min_items.has_value() && arr_size < node->min_items.value()) {
|
|
973
1037
|
errors.push_back({error_code::min_items_violation, path,
|
|
@@ -983,13 +1047,29 @@ static void validate_node(const schema_node_ptr& node,
|
|
|
983
1047
|
}
|
|
984
1048
|
|
|
985
1049
|
if (node->unique_items) {
|
|
986
|
-
std::set<std::string> seen;
|
|
987
1050
|
bool has_dup = false;
|
|
988
|
-
|
|
989
|
-
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
|
|
1051
|
+
// Fast path: check if all items are the same simple type
|
|
1052
|
+
auto first_it = arr.begin();
|
|
1053
|
+
if (first_it != arr.end()) {
|
|
1054
|
+
auto first_type = (*first_it).type();
|
|
1055
|
+
bool all_same = true;
|
|
1056
|
+
for (auto item : arr) { if (item.type() != first_type) { all_same = false; break; } }
|
|
1057
|
+
if (all_same && first_type == et::STRING) {
|
|
1058
|
+
std::set<std::string_view> seen;
|
|
1059
|
+
for (auto item : arr) {
|
|
1060
|
+
std::string_view sv; item.get(sv);
|
|
1061
|
+
if (!seen.insert(sv).second) { has_dup = true; break; }
|
|
1062
|
+
}
|
|
1063
|
+
} else if (all_same && (first_type == et::INT64 || first_type == et::UINT64 || first_type == et::DOUBLE)) {
|
|
1064
|
+
std::set<double> seen;
|
|
1065
|
+
for (auto item : arr) {
|
|
1066
|
+
if (!seen.insert(to_double(item)).second) { has_dup = true; break; }
|
|
1067
|
+
}
|
|
1068
|
+
} else {
|
|
1069
|
+
std::set<std::string> seen;
|
|
1070
|
+
for (auto item : arr) {
|
|
1071
|
+
if (!seen.insert(canonical_json(item)).second) { has_dup = true; break; }
|
|
1072
|
+
}
|
|
993
1073
|
}
|
|
994
1074
|
}
|
|
995
1075
|
if (has_dup) {
|
|
@@ -1017,9 +1097,7 @@ static void validate_node(const schema_node_ptr& node,
|
|
|
1017
1097
|
if (node->contains_schema) {
|
|
1018
1098
|
uint64_t match_count = 0;
|
|
1019
1099
|
for (auto item : arr) {
|
|
1020
|
-
|
|
1021
|
-
validate_node(node->contains_schema, item, path, ctx, tmp, false);
|
|
1022
|
-
if (tmp.empty()) ++match_count;
|
|
1100
|
+
if (validate_fast(node->contains_schema, item, ctx)) ++match_count;
|
|
1023
1101
|
}
|
|
1024
1102
|
uint64_t min_c = node->min_contains.value_or(1);
|
|
1025
1103
|
uint64_t max_c = node->max_contains.value_or(arr_size);
|
|
@@ -1037,24 +1115,26 @@ static void validate_node(const schema_node_ptr& node,
|
|
|
1037
1115
|
}
|
|
1038
1116
|
|
|
1039
1117
|
// Object validations
|
|
1040
|
-
if (
|
|
1118
|
+
if (vtype == et::OBJECT) {
|
|
1041
1119
|
dom::object obj; value.get(obj);
|
|
1042
|
-
uint64_t prop_count = 0;
|
|
1043
|
-
for ([[maybe_unused]] auto _ : obj) ++prop_count;
|
|
1044
1120
|
|
|
1045
|
-
if (node->min_properties.has_value()
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
|
|
1056
|
-
|
|
1057
|
-
|
|
1121
|
+
if (node->min_properties.has_value() || node->max_properties.has_value()) {
|
|
1122
|
+
uint64_t prop_count = 0;
|
|
1123
|
+
for ([[maybe_unused]] auto _ : obj) ++prop_count;
|
|
1124
|
+
if (node->min_properties.has_value() &&
|
|
1125
|
+
prop_count < node->min_properties.value()) {
|
|
1126
|
+
errors.push_back({error_code::min_properties_violation, path,
|
|
1127
|
+
"object has " + std::to_string(prop_count) +
|
|
1128
|
+
" properties, minimum " +
|
|
1129
|
+
std::to_string(node->min_properties.value())});
|
|
1130
|
+
}
|
|
1131
|
+
if (node->max_properties.has_value() &&
|
|
1132
|
+
prop_count > node->max_properties.value()) {
|
|
1133
|
+
errors.push_back({error_code::max_properties_violation, path,
|
|
1134
|
+
"object has " + std::to_string(prop_count) +
|
|
1135
|
+
" properties, maximum " +
|
|
1136
|
+
std::to_string(node->max_properties.value())});
|
|
1137
|
+
}
|
|
1058
1138
|
}
|
|
1059
1139
|
|
|
1060
1140
|
// required
|
|
@@ -1099,17 +1179,50 @@ static void validate_node(const schema_node_ptr& node,
|
|
|
1099
1179
|
}
|
|
1100
1180
|
}
|
|
1101
1181
|
}
|
|
1102
|
-
|
|
1103
|
-
// propertyNames
|
|
1182
|
+
// propertyNames — validate key as string directly when possible
|
|
1104
1183
|
if (node->property_names_schema) {
|
|
1105
|
-
|
|
1106
|
-
|
|
1107
|
-
|
|
1108
|
-
|
|
1109
|
-
|
|
1110
|
-
|
|
1111
|
-
|
|
1112
|
-
|
|
1184
|
+
auto pn = node->property_names_schema;
|
|
1185
|
+
bool string_only = pn->ref.empty() && pn->all_of.empty() &&
|
|
1186
|
+
pn->any_of.empty() && pn->one_of.empty() && !pn->not_schema &&
|
|
1187
|
+
!pn->if_schema && pn->enum_values_minified.empty() &&
|
|
1188
|
+
!pn->const_value_raw.has_value();
|
|
1189
|
+
if (string_only) {
|
|
1190
|
+
// Fast path: validate string constraints on key directly
|
|
1191
|
+
for (auto [key, val] : obj) {
|
|
1192
|
+
std::string_view key_sv(key);
|
|
1193
|
+
if (pn->type_mask && !(pn->type_mask & json_type_bit(json_type::string))) {
|
|
1194
|
+
errors.push_back({error_code::type_mismatch, path,
|
|
1195
|
+
"propertyNames: key is string but schema requires different type"});
|
|
1196
|
+
continue;
|
|
1197
|
+
}
|
|
1198
|
+
uint64_t len = utf8_length(key_sv);
|
|
1199
|
+
if (pn->min_length.has_value() && len < pn->min_length.value()) {
|
|
1200
|
+
errors.push_back({error_code::min_length_violation, path,
|
|
1201
|
+
"propertyNames: key too short: " + std::string(key_sv)});
|
|
1202
|
+
}
|
|
1203
|
+
if (pn->max_length.has_value() && len > pn->max_length.value()) {
|
|
1204
|
+
errors.push_back({error_code::max_length_violation, path,
|
|
1205
|
+
"propertyNames: key too long: " + std::string(key_sv)});
|
|
1206
|
+
}
|
|
1207
|
+
if (pn->compiled_pattern) {
|
|
1208
|
+
if (!re2::RE2::PartialMatch(re2::StringPiece(key_sv.data(), key_sv.size()), *pn->compiled_pattern)) {
|
|
1209
|
+
errors.push_back({error_code::pattern_mismatch, path,
|
|
1210
|
+
"propertyNames: key does not match pattern: " + std::string(key_sv)});
|
|
1211
|
+
}
|
|
1212
|
+
}
|
|
1213
|
+
if (pn->format.has_value() && !check_format_by_id(key_sv, pn->format_id)) {
|
|
1214
|
+
errors.push_back({error_code::format_mismatch, path,
|
|
1215
|
+
"propertyNames: key does not match format: " + std::string(key_sv)});
|
|
1216
|
+
}
|
|
1217
|
+
}
|
|
1218
|
+
} else {
|
|
1219
|
+
// Fallback: parse key as JSON string element
|
|
1220
|
+
for (auto [key, val] : obj) {
|
|
1221
|
+
std::string key_json = "\"" + std::string(key) + "\"";
|
|
1222
|
+
auto key_result = tl_dom_key_parser().parse(key_json);
|
|
1223
|
+
if (!key_result.error()) {
|
|
1224
|
+
validate_node(pn, key_result.value(), path, ctx, errors, all_errors);
|
|
1225
|
+
}
|
|
1113
1226
|
}
|
|
1114
1227
|
}
|
|
1115
1228
|
}
|
|
@@ -1235,12 +1348,8 @@ static bool validate_fast(const schema_node_ptr& node,
|
|
|
1235
1348
|
}
|
|
1236
1349
|
|
|
1237
1350
|
// type
|
|
1238
|
-
if (
|
|
1239
|
-
|
|
1240
|
-
for (const auto& t : node->types) {
|
|
1241
|
-
if (type_matches(value, t)) { match = true; break; }
|
|
1242
|
-
}
|
|
1243
|
-
if (!match) [[unlikely]] return false;
|
|
1351
|
+
if (node->type_mask) {
|
|
1352
|
+
if (!type_matches_mask(value, node->type_mask)) [[unlikely]] return false;
|
|
1244
1353
|
}
|
|
1245
1354
|
|
|
1246
1355
|
// enum
|
|
@@ -1258,10 +1367,10 @@ static bool validate_fast(const schema_node_ptr& node,
|
|
|
1258
1367
|
if (canonical_json(value) != node->const_value_raw.value()) [[unlikely]] return false;
|
|
1259
1368
|
}
|
|
1260
1369
|
|
|
1261
|
-
auto
|
|
1370
|
+
auto vtype = value.type();
|
|
1262
1371
|
|
|
1263
1372
|
// Numeric
|
|
1264
|
-
if (
|
|
1373
|
+
if (vtype == et::INT64 || vtype == et::UINT64 || vtype == et::DOUBLE) {
|
|
1265
1374
|
double v = to_double(value);
|
|
1266
1375
|
if (node->minimum.has_value() && v < node->minimum.value()) return false;
|
|
1267
1376
|
if (node->maximum.has_value() && v > node->maximum.value()) return false;
|
|
@@ -1274,7 +1383,7 @@ static bool validate_fast(const schema_node_ptr& node,
|
|
|
1274
1383
|
}
|
|
1275
1384
|
|
|
1276
1385
|
// String
|
|
1277
|
-
if (
|
|
1386
|
+
if (vtype == et::STRING) {
|
|
1278
1387
|
std::string_view sv;
|
|
1279
1388
|
value.get(sv);
|
|
1280
1389
|
uint64_t len = utf8_length(sv);
|
|
@@ -1284,22 +1393,38 @@ static bool validate_fast(const schema_node_ptr& node,
|
|
|
1284
1393
|
if (!re2::RE2::PartialMatch(re2::StringPiece(sv.data(), sv.size()), *node->compiled_pattern))
|
|
1285
1394
|
return false;
|
|
1286
1395
|
}
|
|
1287
|
-
if (node->format.has_value() && !
|
|
1396
|
+
if (node->format.has_value() && !check_format_by_id(sv, node->format_id)) return false;
|
|
1288
1397
|
}
|
|
1289
1398
|
|
|
1290
1399
|
// Array
|
|
1291
|
-
if (
|
|
1400
|
+
if (vtype == et::ARRAY) {
|
|
1292
1401
|
dom::array arr; value.get(arr);
|
|
1293
|
-
uint64_t arr_size =
|
|
1294
|
-
|
|
1402
|
+
uint64_t arr_size = arr.size();
|
|
1403
|
+
if(arr_size == 0xFFFFFF) [[unlikely]] {
|
|
1404
|
+
// Fallback for large arrays where size() saturates — count manually to avoid overflow
|
|
1405
|
+
arr_size = 0;
|
|
1406
|
+
for ([[maybe_unused]] auto _ : arr) ++arr_size;
|
|
1407
|
+
}
|
|
1295
1408
|
|
|
1296
1409
|
if (node->min_items.has_value() && arr_size < node->min_items.value()) return false;
|
|
1297
1410
|
if (node->max_items.has_value() && arr_size > node->max_items.value()) return false;
|
|
1298
1411
|
|
|
1299
1412
|
if (node->unique_items) {
|
|
1300
|
-
|
|
1301
|
-
|
|
1302
|
-
|
|
1413
|
+
auto first_it = arr.begin();
|
|
1414
|
+
if (first_it != arr.end()) {
|
|
1415
|
+
auto first_type = (*first_it).type();
|
|
1416
|
+
bool all_same = true;
|
|
1417
|
+
for (auto item : arr) { if (item.type() != first_type) { all_same = false; break; } }
|
|
1418
|
+
if (all_same && first_type == et::STRING) {
|
|
1419
|
+
std::set<std::string_view> seen;
|
|
1420
|
+
for (auto item : arr) { std::string_view sv; item.get(sv); if (!seen.insert(sv).second) return false; }
|
|
1421
|
+
} else if (all_same && (first_type == et::INT64 || first_type == et::UINT64 || first_type == et::DOUBLE)) {
|
|
1422
|
+
std::set<double> seen;
|
|
1423
|
+
for (auto item : arr) { if (!seen.insert(to_double(item)).second) return false; }
|
|
1424
|
+
} else {
|
|
1425
|
+
std::set<std::string> seen;
|
|
1426
|
+
for (auto item : arr) { if (!seen.insert(canonical_json(item)).second) return false; }
|
|
1427
|
+
}
|
|
1303
1428
|
}
|
|
1304
1429
|
}
|
|
1305
1430
|
|
|
@@ -1326,7 +1451,7 @@ static bool validate_fast(const schema_node_ptr& node,
|
|
|
1326
1451
|
}
|
|
1327
1452
|
|
|
1328
1453
|
// Object
|
|
1329
|
-
if (
|
|
1454
|
+
if (vtype == et::OBJECT) {
|
|
1330
1455
|
dom::object obj; value.get(obj);
|
|
1331
1456
|
|
|
1332
1457
|
if (node->min_properties.has_value() || node->max_properties.has_value()) {
|
|
@@ -1443,19 +1568,27 @@ static void cg_compile(const schema_node* n, cg::plan& p,
|
|
|
1443
1568
|
return;
|
|
1444
1569
|
}
|
|
1445
1570
|
// Type
|
|
1446
|
-
if (
|
|
1447
|
-
|
|
1448
|
-
|
|
1449
|
-
|
|
1450
|
-
|
|
1451
|
-
|
|
1452
|
-
|
|
1453
|
-
|
|
1454
|
-
|
|
1455
|
-
|
|
1571
|
+
if (n->type_mask) {
|
|
1572
|
+
int popcount = __builtin_popcount(n->type_mask);
|
|
1573
|
+
if (popcount == 1) {
|
|
1574
|
+
// Single type — emit specific opcode
|
|
1575
|
+
for (int b = 0; b < 7; ++b) {
|
|
1576
|
+
if (n->type_mask & (1u << b)) {
|
|
1577
|
+
switch (static_cast<json_type>(b)) {
|
|
1578
|
+
case json_type::object: out.push_back({cg::op::EXPECT_OBJECT}); break;
|
|
1579
|
+
case json_type::array: out.push_back({cg::op::EXPECT_ARRAY}); break;
|
|
1580
|
+
case json_type::string: out.push_back({cg::op::EXPECT_STRING}); break;
|
|
1581
|
+
case json_type::number: out.push_back({cg::op::EXPECT_NUMBER}); break;
|
|
1582
|
+
case json_type::integer: out.push_back({cg::op::EXPECT_INTEGER}); break;
|
|
1583
|
+
case json_type::boolean: out.push_back({cg::op::EXPECT_BOOLEAN}); break;
|
|
1584
|
+
case json_type::null_value: out.push_back({cg::op::EXPECT_NULL}); break;
|
|
1585
|
+
}
|
|
1586
|
+
break;
|
|
1587
|
+
}
|
|
1588
|
+
}
|
|
1456
1589
|
} else {
|
|
1457
|
-
uint32_t i = (uint32_t)p.
|
|
1458
|
-
p.
|
|
1590
|
+
uint32_t i = (uint32_t)p.type_masks.size();
|
|
1591
|
+
p.type_masks.push_back(n->type_mask);
|
|
1459
1592
|
out.push_back({cg::op::EXPECT_TYPE_MULTI, i});
|
|
1460
1593
|
}
|
|
1461
1594
|
}
|
|
@@ -1485,13 +1618,7 @@ static void cg_compile(const schema_node* n, cg::plan& p,
|
|
|
1485
1618
|
if (n->compiled_pattern) { uint32_t i=(uint32_t)p.regexes.size(); p.regexes.push_back(n->compiled_pattern); out.push_back({cg::op::CHECK_PATTERN,i}); }
|
|
1486
1619
|
if (n->format.has_value()) {
|
|
1487
1620
|
uint32_t i=(uint32_t)p.format_ids.size();
|
|
1488
|
-
|
|
1489
|
-
auto& f=*n->format;
|
|
1490
|
-
if(f=="email")fid=0;else if(f=="date")fid=1;else if(f=="date-time")fid=2;
|
|
1491
|
-
else if(f=="time")fid=3;else if(f=="ipv4")fid=4;else if(f=="ipv6")fid=5;
|
|
1492
|
-
else if(f=="uri"||f=="uri-reference")fid=6;else if(f=="uuid")fid=7;
|
|
1493
|
-
else if(f=="hostname")fid=8;
|
|
1494
|
-
p.format_ids.push_back(fid);
|
|
1621
|
+
p.format_ids.push_back(n->format_id);
|
|
1495
1622
|
out.push_back({cg::op::CHECK_FORMAT,i});
|
|
1496
1623
|
}
|
|
1497
1624
|
// Array
|
|
@@ -1535,44 +1662,43 @@ static void cg_compile(const schema_node* n, cg::plan& p,
|
|
|
1535
1662
|
}
|
|
1536
1663
|
|
|
1537
1664
|
// --- Codegen executor ---
|
|
1538
|
-
static const char* fmt_names[]={"email","date","date-time","time","ipv4","ipv6","uri","uuid","hostname"};
|
|
1539
1665
|
|
|
1540
1666
|
static bool cg_exec(const cg::plan& p, const std::vector<cg::ins>& code,
|
|
1541
1667
|
dom::element value) {
|
|
1542
|
-
auto t =
|
|
1668
|
+
auto t = value.type();
|
|
1669
|
+
bool t_numeric = (t == et::INT64 || t == et::UINT64 || t == et::DOUBLE);
|
|
1670
|
+
double t_dval = t_numeric ? to_double(value) : 0.0;
|
|
1543
1671
|
for (size_t i=0; i<code.size(); ++i) {
|
|
1544
1672
|
auto& c = code[i];
|
|
1545
1673
|
switch(c.o) {
|
|
1546
1674
|
case cg::op::END: return true;
|
|
1547
|
-
case cg::op::EXPECT_OBJECT: if(t!=
|
|
1548
|
-
case cg::op::EXPECT_ARRAY: if(t!=
|
|
1549
|
-
case cg::op::EXPECT_STRING: if(t!=
|
|
1550
|
-
case cg::op::EXPECT_NUMBER: if(
|
|
1551
|
-
case cg::op::EXPECT_INTEGER: if(t!=
|
|
1552
|
-
case cg::op::EXPECT_BOOLEAN: if(t!=
|
|
1553
|
-
case cg::op::EXPECT_NULL: if(t!=
|
|
1675
|
+
case cg::op::EXPECT_OBJECT: if(t!=et::OBJECT) return false; break;
|
|
1676
|
+
case cg::op::EXPECT_ARRAY: if(t!=et::ARRAY) return false; break;
|
|
1677
|
+
case cg::op::EXPECT_STRING: if(t!=et::STRING) return false; break;
|
|
1678
|
+
case cg::op::EXPECT_NUMBER: if(!t_numeric) return false; break;
|
|
1679
|
+
case cg::op::EXPECT_INTEGER: if(t!=et::INT64&&t!=et::UINT64) return false; break;
|
|
1680
|
+
case cg::op::EXPECT_BOOLEAN: if(t!=et::BOOL) return false; break;
|
|
1681
|
+
case cg::op::EXPECT_NULL: if(t!=et::NULL_VALUE) return false; break;
|
|
1554
1682
|
case cg::op::EXPECT_TYPE_MULTI: {
|
|
1555
|
-
|
|
1556
|
-
|
|
1557
|
-
|
|
1558
|
-
|
|
1559
|
-
case cg::op::
|
|
1560
|
-
case cg::op::
|
|
1561
|
-
case cg::op::
|
|
1562
|
-
case cg::op::
|
|
1563
|
-
case cg::op::
|
|
1564
|
-
case cg::op::
|
|
1565
|
-
case cg::op::
|
|
1566
|
-
case cg::op::
|
|
1567
|
-
case cg::op::
|
|
1568
|
-
case cg::op::
|
|
1569
|
-
case cg::op::
|
|
1570
|
-
case cg::op::
|
|
1571
|
-
case cg::op::
|
|
1572
|
-
case cg::op::
|
|
1573
|
-
case cg::op::
|
|
1574
|
-
case cg::op::CHECK_MAX_PROPS: if(t=="object"){dom::object o;value.get(o);uint64_t n=0;for([[maybe_unused]]auto _:o)++n;if(n>c.a)return false;} break;
|
|
1575
|
-
case cg::op::OBJ_PROPS_START: if(t=="object"){
|
|
1683
|
+
if(!(element_type_mask(t) & p.type_masks[c.a])) return false; break;
|
|
1684
|
+
}
|
|
1685
|
+
case cg::op::CHECK_MINIMUM: if(t_numeric&&t_dval<p.doubles[c.a])return false; break;
|
|
1686
|
+
case cg::op::CHECK_MAXIMUM: if(t_numeric&&t_dval>p.doubles[c.a])return false; break;
|
|
1687
|
+
case cg::op::CHECK_EX_MINIMUM: if(t_numeric&&t_dval<=p.doubles[c.a])return false; break;
|
|
1688
|
+
case cg::op::CHECK_EX_MAXIMUM: if(t_numeric&&t_dval>=p.doubles[c.a])return false; break;
|
|
1689
|
+
case cg::op::CHECK_MULTIPLE_OF: if(t_numeric){double d=p.doubles[c.a],r=std::fmod(t_dval,d);if(std::abs(r)>1e-8&&std::abs(r-d)>1e-8)return false;} break;
|
|
1690
|
+
case cg::op::CHECK_MIN_LENGTH: if(t==et::STRING){std::string_view sv;value.get(sv);if(utf8_length(sv)<c.a)return false;} break;
|
|
1691
|
+
case cg::op::CHECK_MAX_LENGTH: if(t==et::STRING){std::string_view sv;value.get(sv);if(utf8_length(sv)>c.a)return false;} break;
|
|
1692
|
+
case cg::op::CHECK_PATTERN: if(t==et::STRING){std::string_view sv;value.get(sv);if(!re2::RE2::PartialMatch(re2::StringPiece(sv.data(),sv.size()),*p.regexes[c.a]))return false;} break;
|
|
1693
|
+
case cg::op::CHECK_FORMAT: if(t==et::STRING){std::string_view sv;value.get(sv);if(!check_format_by_id(sv,p.format_ids[c.a]))return false;} break;
|
|
1694
|
+
case cg::op::CHECK_MIN_ITEMS: if(t==et::ARRAY){dom::array a;value.get(a);uint64_t s=0;for([[maybe_unused]]auto _:a)++s;if(s<c.a)return false;} break;
|
|
1695
|
+
case cg::op::CHECK_MAX_ITEMS: if(t==et::ARRAY){dom::array a;value.get(a);uint64_t s=0;for([[maybe_unused]]auto _:a)++s;if(s>c.a)return false;} break;
|
|
1696
|
+
case cg::op::CHECK_UNIQUE_ITEMS: if(t==et::ARRAY){dom::array a;value.get(a);std::set<std::string> seen;for(auto x:a)if(!seen.insert(canonical_json(x)).second)return false;} break;
|
|
1697
|
+
case cg::op::ARRAY_ITEMS: if(t==et::ARRAY){dom::array a;value.get(a);for(auto x:a)if(!cg_exec(p,p.subs[c.a],x))return false;} break;
|
|
1698
|
+
case cg::op::CHECK_REQUIRED: if(t==et::OBJECT){dom::object o;value.get(o);dom::element d;if(o[p.strings[c.a]].get(d)!=SUCCESS)return false;} break;
|
|
1699
|
+
case cg::op::CHECK_MIN_PROPS: if(t==et::OBJECT){dom::object o;value.get(o);uint64_t n=0;for([[maybe_unused]]auto _:o)++n;if(n<c.a)return false;} break;
|
|
1700
|
+
case cg::op::CHECK_MAX_PROPS: if(t==et::OBJECT){dom::object o;value.get(o);uint64_t n=0;for([[maybe_unused]]auto _:o)++n;if(n>c.a)return false;} break;
|
|
1701
|
+
case cg::op::OBJ_PROPS_START: if(t==et::OBJECT){
|
|
1576
1702
|
dom::object o; value.get(o);
|
|
1577
1703
|
// collect prop defs
|
|
1578
1704
|
struct pd{std::string_view nm;uint32_t si;};
|
|
@@ -1592,13 +1718,13 @@ static bool cg_exec(const cg::plan& p, const std::vector<cg::ins>& code,
|
|
|
1592
1718
|
case cg::op::OBJ_PROP: case cg::op::OBJ_PROPS_END: case cg::op::CHECK_NO_ADDITIONAL: break;
|
|
1593
1719
|
case cg::op::CHECK_ENUM_STR: {
|
|
1594
1720
|
auto& es=p.enum_sets[c.a]; bool f=false;
|
|
1595
|
-
if(t==
|
|
1721
|
+
if(t==et::STRING){std::string_view sv;value.get(sv);for(auto& e:es)if(e.size()==sv.size()+2&&e[0]=='"'&&e.back()=='"'&&e.compare(1,sv.size(),sv)==0){f=true;break;}}
|
|
1596
1722
|
if(!f){std::string v=canonical_json(value);for(auto& e:es)if(e==v){f=true;break;}}
|
|
1597
1723
|
if(!f)return false; break;
|
|
1598
1724
|
}
|
|
1599
1725
|
case cg::op::CHECK_ENUM: {
|
|
1600
1726
|
auto& es=p.enum_sets[c.a]; bool f=false;
|
|
1601
|
-
if(t==
|
|
1727
|
+
if(t==et::STRING){std::string_view sv;value.get(sv);for(auto& e:es)if(e.size()==sv.size()+2&&e[0]=='"'&&e.back()=='"'&&e.compare(1,sv.size(),sv)==0){f=true;break;}}
|
|
1602
1728
|
if(!f&&value.is<int64_t>()){int64_t v;value.get(v);auto s=std::to_string(v);for(auto& e:es)if(e==s){f=true;break;}}
|
|
1603
1729
|
if(!f){std::string v=canonical_json(value);for(auto& e:es)if(e==v){f=true;break;}}
|
|
1604
1730
|
if(!f)return false; break;
|
|
@@ -1614,51 +1740,53 @@ static bool cg_exec(const cg::plan& p, const std::vector<cg::ins>& code,
|
|
|
1614
1740
|
// Uses simdjson On Demand API to avoid materializing the full DOM tree.
|
|
1615
1741
|
// Returns: true = valid, false = invalid OR unsupported (fallback to DOM).
|
|
1616
1742
|
|
|
1617
|
-
static
|
|
1743
|
+
static json_type od_type(simdjson::ondemand::value& v) {
|
|
1618
1744
|
switch (v.type()) {
|
|
1619
|
-
case simdjson::ondemand::json_type::object: return
|
|
1620
|
-
case simdjson::ondemand::json_type::array: return
|
|
1621
|
-
case simdjson::ondemand::json_type::string: return
|
|
1622
|
-
case simdjson::ondemand::json_type::boolean: return
|
|
1623
|
-
case simdjson::ondemand::json_type::null: return
|
|
1745
|
+
case simdjson::ondemand::json_type::object: return json_type::object;
|
|
1746
|
+
case simdjson::ondemand::json_type::array: return json_type::array;
|
|
1747
|
+
case simdjson::ondemand::json_type::string: return json_type::string;
|
|
1748
|
+
case simdjson::ondemand::json_type::boolean: return json_type::boolean;
|
|
1749
|
+
case simdjson::ondemand::json_type::null: return json_type::null_value;
|
|
1624
1750
|
case simdjson::ondemand::json_type::number: {
|
|
1625
1751
|
simdjson::ondemand::number_type nt;
|
|
1626
1752
|
if (v.get_number_type().get(nt) == SUCCESS &&
|
|
1627
1753
|
nt == simdjson::ondemand::number_type::floating_point_number)
|
|
1628
|
-
return
|
|
1629
|
-
return
|
|
1754
|
+
return json_type::number;
|
|
1755
|
+
return json_type::integer;
|
|
1630
1756
|
}
|
|
1631
1757
|
}
|
|
1632
|
-
return
|
|
1758
|
+
return json_type::string;
|
|
1633
1759
|
}
|
|
1634
1760
|
|
|
1635
1761
|
static bool od_exec(const cg::plan& p, const std::vector<cg::ins>& code,
|
|
1636
1762
|
simdjson::ondemand::value value) {
|
|
1637
1763
|
auto t = od_type(value);
|
|
1764
|
+
bool t_numeric = (t == json_type::integer || t == json_type::number);
|
|
1638
1765
|
for (size_t i = 0; i < code.size(); ++i) {
|
|
1639
1766
|
auto& c = code[i];
|
|
1640
1767
|
switch (c.o) {
|
|
1641
1768
|
case cg::op::END: return true;
|
|
1642
|
-
case cg::op::EXPECT_OBJECT: if(t!=
|
|
1643
|
-
case cg::op::EXPECT_ARRAY: if(t!=
|
|
1644
|
-
case cg::op::EXPECT_STRING: if(t!=
|
|
1645
|
-
case cg::op::EXPECT_NUMBER: if(
|
|
1646
|
-
case cg::op::EXPECT_INTEGER: if(t!=
|
|
1647
|
-
case cg::op::EXPECT_BOOLEAN: if(t!=
|
|
1648
|
-
case cg::op::EXPECT_NULL: if(t!=
|
|
1769
|
+
case cg::op::EXPECT_OBJECT: if(t!=json_type::object) return false; break;
|
|
1770
|
+
case cg::op::EXPECT_ARRAY: if(t!=json_type::array) return false; break;
|
|
1771
|
+
case cg::op::EXPECT_STRING: if(t!=json_type::string) return false; break;
|
|
1772
|
+
case cg::op::EXPECT_NUMBER: if(!t_numeric) return false; break;
|
|
1773
|
+
case cg::op::EXPECT_INTEGER: if(t!=json_type::integer) return false; break;
|
|
1774
|
+
case cg::op::EXPECT_BOOLEAN: if(t!=json_type::boolean) return false; break;
|
|
1775
|
+
case cg::op::EXPECT_NULL: if(t!=json_type::null_value) return false; break;
|
|
1649
1776
|
case cg::op::EXPECT_TYPE_MULTI: {
|
|
1650
|
-
|
|
1651
|
-
|
|
1652
|
-
if(
|
|
1777
|
+
// integer matches both "integer" and "number" type constraints
|
|
1778
|
+
uint8_t tbits = json_type_bit(t);
|
|
1779
|
+
if (t == json_type::integer) tbits |= json_type_bit(json_type::number);
|
|
1780
|
+
if(!(tbits & p.type_masks[c.a])) return false; break;
|
|
1653
1781
|
}
|
|
1654
1782
|
case cg::op::CHECK_MINIMUM:
|
|
1655
1783
|
case cg::op::CHECK_MAXIMUM:
|
|
1656
1784
|
case cg::op::CHECK_EX_MINIMUM:
|
|
1657
1785
|
case cg::op::CHECK_EX_MAXIMUM:
|
|
1658
1786
|
case cg::op::CHECK_MULTIPLE_OF: {
|
|
1659
|
-
if (
|
|
1787
|
+
if (t_numeric) {
|
|
1660
1788
|
double v;
|
|
1661
|
-
if (t==
|
|
1789
|
+
if (t==json_type::integer) { int64_t iv; if(value.get(iv)!=SUCCESS) return false; v=(double)iv; }
|
|
1662
1790
|
else { if(value.get(v)!=SUCCESS) return false; }
|
|
1663
1791
|
double d=p.doubles[c.a];
|
|
1664
1792
|
if(c.o==cg::op::CHECK_MINIMUM && v<d) return false;
|
|
@@ -1669,39 +1797,39 @@ static bool od_exec(const cg::plan& p, const std::vector<cg::ins>& code,
|
|
|
1669
1797
|
}
|
|
1670
1798
|
break;
|
|
1671
1799
|
}
|
|
1672
|
-
case cg::op::CHECK_MIN_LENGTH: if(t==
|
|
1673
|
-
case cg::op::CHECK_MAX_LENGTH: if(t==
|
|
1674
|
-
case cg::op::CHECK_PATTERN: if(t==
|
|
1675
|
-
case cg::op::CHECK_FORMAT: if(t==
|
|
1676
|
-
case cg::op::CHECK_MIN_ITEMS: if(t==
|
|
1800
|
+
case cg::op::CHECK_MIN_LENGTH: if(t==json_type::string){std::string_view sv; if(value.get(sv)!=SUCCESS) return false; if(utf8_length(sv)<c.a) return false;} break;
|
|
1801
|
+
case cg::op::CHECK_MAX_LENGTH: if(t==json_type::string){std::string_view sv; if(value.get(sv)!=SUCCESS) return false; if(utf8_length(sv)>c.a) return false;} break;
|
|
1802
|
+
case cg::op::CHECK_PATTERN: if(t==json_type::string){std::string_view sv; if(value.get(sv)!=SUCCESS) return false; if(!re2::RE2::PartialMatch(re2::StringPiece(sv.data(),sv.size()),*p.regexes[c.a]))return false;} break;
|
|
1803
|
+
case cg::op::CHECK_FORMAT: if(t==json_type::string){std::string_view sv; if(value.get(sv)!=SUCCESS) return false; if(!check_format_by_id(sv,p.format_ids[c.a]))return false;} break;
|
|
1804
|
+
case cg::op::CHECK_MIN_ITEMS: if(t==json_type::array){
|
|
1677
1805
|
simdjson::ondemand::array a; if(value.get(a)!=SUCCESS) return false;
|
|
1678
1806
|
uint64_t s=0; for(auto x:a){(void)x;++s;} if(s<c.a) return false;
|
|
1679
1807
|
} break;
|
|
1680
|
-
case cg::op::CHECK_MAX_ITEMS: if(t==
|
|
1808
|
+
case cg::op::CHECK_MAX_ITEMS: if(t==json_type::array){
|
|
1681
1809
|
simdjson::ondemand::array a; if(value.get(a)!=SUCCESS) return false;
|
|
1682
1810
|
uint64_t s=0; for(auto x:a){(void)x;++s;} if(s>c.a) return false;
|
|
1683
1811
|
} break;
|
|
1684
|
-
case cg::op::ARRAY_ITEMS: if(t==
|
|
1812
|
+
case cg::op::ARRAY_ITEMS: if(t==json_type::array){
|
|
1685
1813
|
simdjson::ondemand::array a; if(value.get(a)!=SUCCESS) return false;
|
|
1686
1814
|
for(auto elem:a){
|
|
1687
1815
|
simdjson::ondemand::value v; if(elem.get(v)!=SUCCESS) return false;
|
|
1688
1816
|
if(!od_exec(p,p.subs[c.a],v)) return false;
|
|
1689
1817
|
}
|
|
1690
1818
|
} break;
|
|
1691
|
-
case cg::op::CHECK_REQUIRED: if(t==
|
|
1819
|
+
case cg::op::CHECK_REQUIRED: if(t==json_type::object){
|
|
1692
1820
|
simdjson::ondemand::object o; if(value.get(o)!=SUCCESS) return false;
|
|
1693
1821
|
auto f = o.find_field_unordered(p.strings[c.a]);
|
|
1694
1822
|
if(f.error()) return false;
|
|
1695
1823
|
} break;
|
|
1696
|
-
case cg::op::CHECK_MIN_PROPS: if(t==
|
|
1824
|
+
case cg::op::CHECK_MIN_PROPS: if(t==json_type::object){
|
|
1697
1825
|
simdjson::ondemand::object o; if(value.get(o)!=SUCCESS) return false;
|
|
1698
1826
|
uint64_t n=0; for(auto f:o){(void)f;++n;} if(n<c.a) return false;
|
|
1699
1827
|
} break;
|
|
1700
|
-
case cg::op::CHECK_MAX_PROPS: if(t==
|
|
1828
|
+
case cg::op::CHECK_MAX_PROPS: if(t==json_type::object){
|
|
1701
1829
|
simdjson::ondemand::object o; if(value.get(o)!=SUCCESS) return false;
|
|
1702
1830
|
uint64_t n=0; for(auto f:o){(void)f;++n;} if(n>c.a) return false;
|
|
1703
1831
|
} break;
|
|
1704
|
-
case cg::op::OBJ_PROPS_START: if(t==
|
|
1832
|
+
case cg::op::OBJ_PROPS_START: if(t==json_type::object){
|
|
1705
1833
|
simdjson::ondemand::object o; if(value.get(o)!=SUCCESS) return false;
|
|
1706
1834
|
struct pd{std::string_view nm;uint32_t si;};
|
|
1707
1835
|
std::vector<pd> props; bool no_add=false;
|
|
@@ -1854,10 +1982,9 @@ validation_result validate(const schema_ref& schema, std::string_view json,
|
|
|
1854
1982
|
// Codegen said invalid OR hit COMPOSITION — fall through to tree walker
|
|
1855
1983
|
}
|
|
1856
1984
|
|
|
1857
|
-
// Slow path:
|
|
1858
|
-
auto result2 = dom_p.parse(psv);
|
|
1985
|
+
// Slow path: tree walker with error details (reuse already-parsed DOM)
|
|
1859
1986
|
std::vector<validation_error> errors;
|
|
1860
|
-
validate_node(schema.impl->root,
|
|
1987
|
+
validate_node(schema.impl->root, result.value(), "", *schema.impl, errors,
|
|
1861
1988
|
opts.all_errors);
|
|
1862
1989
|
|
|
1863
1990
|
return {errors.empty(), std::move(errors)};
|