ata-validator 0.4.9 → 0.4.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/ata.cpp CHANGED
@@ -128,17 +128,20 @@ static bool fast_check_hostname(std::string_view s) {
128
128
  return label_len > 0;
129
129
  }
130
130
 
131
- static bool check_format(std::string_view sv, const std::string& fmt) {
132
- if (fmt == "email") return fast_check_email(sv);
133
- if (fmt == "date") return fast_check_date(sv);
134
- if (fmt == "date-time") return fast_check_datetime(sv);
135
- if (fmt == "time") return fast_check_time(sv);
136
- if (fmt == "ipv4") return fast_check_ipv4(sv);
137
- if (fmt == "ipv6") return sv.find(':') != std::string_view::npos;
138
- if (fmt == "uri" || fmt == "uri-reference") return fast_check_uri(sv);
139
- if (fmt == "uuid") return fast_check_uuid(sv);
140
- if (fmt == "hostname") return fast_check_hostname(sv);
141
- return true; // unknown formats pass
131
+ // Check format by pre-resolved numeric ID — no string comparisons.
132
+ static bool check_format_by_id(std::string_view sv, uint8_t fid) {
133
+ switch (fid) {
134
+ case 0: return fast_check_email(sv);
135
+ case 1: return fast_check_date(sv);
136
+ case 2: return fast_check_datetime(sv);
137
+ case 3: return fast_check_time(sv);
138
+ case 4: return fast_check_ipv4(sv);
139
+ case 5: return sv.find(':') != std::string_view::npos;
140
+ case 6: return fast_check_uri(sv);
141
+ case 7: return fast_check_uuid(sv);
142
+ case 8: return fast_check_hostname(sv);
143
+ default: return true; // unknown formats pass
144
+ }
142
145
  }
143
146
 
144
147
  namespace ata {
@@ -182,14 +185,74 @@ static std::string canonical_json(dom::element el) {
182
185
  }
183
186
  }
184
187
 
188
+ // JSON Schema type enum — avoids string comparisons on the hot path.
189
+ enum class json_type : uint8_t {
190
+ string, number, integer, boolean, null_value, object, array
191
+ };
192
+
193
+ static json_type json_type_from_sv(std::string_view s) {
194
+ if (s == "string") return json_type::string;
195
+ if (s == "number") return json_type::number;
196
+ if (s == "integer") return json_type::integer;
197
+ if (s == "boolean") return json_type::boolean;
198
+ if (s == "null") return json_type::null_value;
199
+ if (s == "object") return json_type::object;
200
+ if (s == "array") return json_type::array;
201
+ return json_type::string; // fallback
202
+ }
203
+
204
+ static const char* json_type_name(json_type t) {
205
+ switch (t) {
206
+ case json_type::string: return "string";
207
+ case json_type::number: return "number";
208
+ case json_type::integer: return "integer";
209
+ case json_type::boolean: return "boolean";
210
+ case json_type::null_value: return "null";
211
+ case json_type::object: return "object";
212
+ case json_type::array: return "array";
213
+ }
214
+ return "unknown";
215
+ }
216
+
217
+ // Bitmask for O(1) type checking: one bit per json_type value.
218
+ static uint8_t json_type_bit(json_type t) { return 1u << static_cast<uint8_t>(t); }
219
+
220
+ // Map dom::element_type to a json_type bitmask (number matches integer too).
221
+ static uint8_t element_type_mask(dom::element_type t) {
222
+ switch (t) {
223
+ case dom::element_type::STRING: return json_type_bit(json_type::string);
224
+ case dom::element_type::INT64:
225
+ case dom::element_type::UINT64: return json_type_bit(json_type::integer) | json_type_bit(json_type::number);
226
+ case dom::element_type::DOUBLE: return json_type_bit(json_type::number);
227
+ case dom::element_type::BOOL: return json_type_bit(json_type::boolean);
228
+ case dom::element_type::NULL_VALUE: return json_type_bit(json_type::null_value);
229
+ case dom::element_type::ARRAY: return json_type_bit(json_type::array);
230
+ case dom::element_type::OBJECT: return json_type_bit(json_type::object);
231
+ }
232
+ return 0;
233
+ }
234
+
235
+ // Resolve format string to numeric ID at compile time.
236
+ static uint8_t format_id_from_string(const std::string& f) {
237
+ if (f == "email") return 0;
238
+ if (f == "date") return 1;
239
+ if (f == "date-time") return 2;
240
+ if (f == "time") return 3;
241
+ if (f == "ipv4") return 4;
242
+ if (f == "ipv6") return 5;
243
+ if (f == "uri" || f == "uri-reference") return 6;
244
+ if (f == "uuid") return 7;
245
+ if (f == "hostname") return 8;
246
+ return 255;
247
+ }
248
+
185
249
  // Forward declarations
186
250
  struct schema_node;
187
251
  using schema_node_ptr = std::shared_ptr<schema_node>;
188
252
 
189
253
  struct schema_node {
190
- // type constraint: "string", "number", "integer", "boolean", "null",
191
- // "object", "array"
192
- std::vector<std::string> types;
254
+ // type constraint bitmask for O(1) type checking
255
+ uint8_t type_mask = 0; // bit per json_type value
193
256
 
194
257
  // numeric
195
258
  std::optional<double> minimum;
@@ -234,12 +297,12 @@ struct schema_node {
234
297
  std::vector<pattern_prop> pattern_properties;
235
298
 
236
299
  // enum / const
237
- std::optional<std::string> enum_values_raw; // raw JSON array string
238
300
  std::vector<std::string> enum_values_minified; // pre-minified enum values
239
301
  std::optional<std::string> const_value_raw; // raw JSON value string
240
302
 
241
303
  // format
242
304
  std::optional<std::string> format;
305
+ uint8_t format_id = 255; // pre-resolved format ID (255 = unknown/pass)
243
306
 
244
307
  // composition
245
308
  std::vector<schema_node_ptr> all_of;
@@ -281,7 +344,7 @@ struct plan {
281
344
  std::vector<std::string> strings;
282
345
  std::vector<std::shared_ptr<re2::RE2>> regexes;
283
346
  std::vector<std::vector<std::string>> enum_sets;
284
- std::vector<std::vector<std::string>> type_sets;
347
+ std::vector<uint8_t> type_masks;
285
348
  std::vector<uint8_t> format_ids;
286
349
  std::vector<std::vector<ins>> subs;
287
350
  };
@@ -302,6 +365,10 @@ static dom::parser& tl_dom_parser() {
302
365
  thread_local dom::parser p;
303
366
  return p;
304
367
  }
368
+ static dom::parser& tl_dom_key_parser() {
369
+ thread_local dom::parser p;
370
+ return p;
371
+ }
305
372
  static simdjson::ondemand::parser& tl_od_parser() {
306
373
  thread_local simdjson::ondemand::parser p;
307
374
  return p;
@@ -346,12 +413,12 @@ static schema_node_ptr compile_node(dom::element el,
346
413
  if (type_el.is<std::string_view>()) {
347
414
  std::string_view sv;
348
415
  type_el.get(sv);
349
- node->types.emplace_back(sv);
416
+ node->type_mask |= json_type_bit(json_type_from_sv(sv));
350
417
  } else if (type_el.is<dom::array>()) {
351
418
  dom::array type_arr; type_el.get(type_arr); for (auto t : type_arr) {
352
419
  std::string_view sv;
353
420
  if (t.get(sv) == SUCCESS) {
354
- node->types.emplace_back(sv);
421
+ node->type_mask |= json_type_bit(json_type_from_sv(sv));
355
422
  }
356
423
  }
357
424
  }
@@ -531,7 +598,10 @@ static schema_node_ptr compile_node(dom::element el,
531
598
  dom::element fmt_el;
532
599
  if (obj["format"].get(fmt_el) == SUCCESS) {
533
600
  std::string_view sv;
534
- if (fmt_el.get(sv) == SUCCESS) node->format = std::string(sv);
601
+ if (fmt_el.get(sv) == SUCCESS) {
602
+ node->format = std::string(sv);
603
+ node->format_id = format_id_from_string(node->format.value());
604
+ }
535
605
  }
536
606
 
537
607
  // $id (register in defs for potential resolution)
@@ -546,7 +616,6 @@ static schema_node_ptr compile_node(dom::element el,
546
616
  // enum — pre-minify each value at compile time
547
617
  dom::element enum_el;
548
618
  if (obj["enum"].get(enum_el) == SUCCESS) {
549
- node->enum_values_raw = canonical_json(enum_el);
550
619
  if (enum_el.is<dom::array>()) {
551
620
  dom::array enum_arr; enum_el.get(enum_arr); for (auto e : enum_arr) {
552
621
  node->enum_values_minified.push_back(canonical_json(e));
@@ -640,41 +709,37 @@ static bool validate_fast(const schema_node_ptr& node,
640
709
  // Macro for early termination
641
710
  #define ATA_CHECK_EARLY() if (!all_errors && !errors.empty()) return
642
711
 
712
+ using et = dom::element_type;
713
+
714
+
643
715
  // Use string_view to avoid allocations in hot path
644
716
  static std::string_view type_of_sv(dom::element el) {
645
717
  switch (el.type()) {
646
- case dom::element_type::STRING: return "string";
647
- case dom::element_type::INT64:
648
- case dom::element_type::UINT64: return "integer";
649
- case dom::element_type::DOUBLE: return "number";
650
- case dom::element_type::BOOL: return "boolean";
651
- case dom::element_type::NULL_VALUE:return "null";
652
- case dom::element_type::ARRAY: return "array";
653
- case dom::element_type::OBJECT: return "object";
718
+ case et::STRING: return "string";
719
+ case et::INT64:
720
+ case et::UINT64: return "integer";
721
+ case et::DOUBLE: return "number";
722
+ case et::BOOL: return "boolean";
723
+ case et::NULL_VALUE:return "null";
724
+ case et::ARRAY: return "array";
725
+ case et::OBJECT: return "object";
654
726
  }
655
727
  return "unknown";
656
728
  }
657
729
 
658
- static std::string type_of(dom::element el) {
659
- return std::string(type_of_sv(el));
660
- }
661
730
 
662
- static bool type_matches(dom::element el, const std::string& type) {
663
- auto actual = type_of_sv(el);
664
- if (actual == type) return true;
665
- if (type == "number" && (actual == "integer" || actual == "number"))
666
- return true;
667
- return false;
731
+ // O(1) type check: test element's type bits against the schema's type_mask.
732
+ static bool type_matches_mask(dom::element el, uint8_t type_mask) {
733
+ return (element_type_mask(el.type()) & type_mask) != 0;
668
734
  }
669
735
 
670
736
  static double to_double(dom::element el) {
671
- double v = 0;
672
- if (el.get(v) == SUCCESS) return v;
673
- int64_t i = 0;
674
- if (el.get(i) == SUCCESS) return static_cast<double>(i);
675
- uint64_t u = 0;
676
- if (el.get(u) == SUCCESS) return static_cast<double>(u);
677
- return 0;
737
+ switch (el.type()) {
738
+ case et::DOUBLE: { double v; el.get(v); return v; }
739
+ case et::INT64: { int64_t v; el.get(v); return static_cast<double>(v); }
740
+ case et::UINT64: { uint64_t v; el.get(v); return static_cast<double>(v); }
741
+ default: return 0;
742
+ }
678
743
  }
679
744
 
680
745
  // Count UTF-8 codepoints — branchless: count non-continuation bytes
@@ -843,22 +908,17 @@ static void validate_node(const schema_node_ptr& node,
843
908
  }
844
909
 
845
910
  // type
846
- if (!node->types.empty()) {
847
- bool match = false;
848
- for (const auto& t : node->types) {
849
- if (type_matches(value, t)) {
850
- match = true;
851
- break;
852
- }
853
- }
854
- if (!match) {
911
+ if (node->type_mask) {
912
+ if (!type_matches_mask(value, node->type_mask)) {
855
913
  std::string expected;
856
- for (size_t i = 0; i < node->types.size(); ++i) {
857
- if (i > 0) expected += ", ";
858
- expected += node->types[i];
914
+ for (int b = 0; b < 7; ++b) {
915
+ if (node->type_mask & (1u << b)) {
916
+ if (!expected.empty()) expected += ", ";
917
+ expected += json_type_name(static_cast<json_type>(b));
918
+ }
859
919
  }
860
920
  errors.push_back({error_code::type_mismatch, path,
861
- "expected type " + expected + ", got " + type_of(value)});
921
+ "expected type " + expected + ", got " + std::string(type_of_sv(value))});
862
922
  ATA_CHECK_EARLY();
863
923
  }
864
924
  }
@@ -891,8 +951,8 @@ static void validate_node(const schema_node_ptr& node,
891
951
 
892
952
  ATA_CHECK_EARLY();
893
953
  // Numeric validations
894
- auto actual_type = type_of(value);
895
- if (actual_type == "integer" || actual_type == "number") {
954
+ auto vtype = value.type();
955
+ if (vtype == et::INT64 || vtype == et::UINT64 || vtype == et::DOUBLE) {
896
956
  double v = to_double(value);
897
957
  if (node->minimum.has_value() && v < node->minimum.value()) {
898
958
  errors.push_back({error_code::minimum_violation, path,
@@ -929,7 +989,7 @@ static void validate_node(const schema_node_ptr& node,
929
989
  }
930
990
 
931
991
  // String validations
932
- if (actual_type == "string") {
992
+ if (vtype == et::STRING) {
933
993
  std::string_view sv;
934
994
  value.get(sv);
935
995
  uint64_t len = utf8_length(sv);
@@ -955,7 +1015,7 @@ static void validate_node(const schema_node_ptr& node,
955
1015
  }
956
1016
 
957
1017
  if (node->format.has_value()) {
958
- if (!check_format(sv, node->format.value())) {
1018
+ if (!check_format_by_id(sv, node->format_id)) {
959
1019
  errors.push_back({error_code::format_mismatch, path,
960
1020
  "string does not match format: " +
961
1021
  node->format.value()});
@@ -964,10 +1024,14 @@ static void validate_node(const schema_node_ptr& node,
964
1024
  }
965
1025
 
966
1026
  // Array validations
967
- if (actual_type == "array" && value.is<dom::array>()) {
1027
+ if (vtype == et::ARRAY) {
968
1028
  dom::array arr; value.get(arr);
969
- uint64_t arr_size = 0;
970
- for ([[maybe_unused]] auto _ : arr) ++arr_size;
1029
+ uint64_t arr_size = arr.size();
1030
+ if(arr_size == 0xFFFFFF) [[unlikely]] {
1031
+ // Fallback for large arrays where size() saturates — count manually to avoid overflow
1032
+ arr_size = 0;
1033
+ for ([[maybe_unused]] auto _ : arr) ++arr_size;
1034
+ }
971
1035
 
972
1036
  if (node->min_items.has_value() && arr_size < node->min_items.value()) {
973
1037
  errors.push_back({error_code::min_items_violation, path,
@@ -983,13 +1047,29 @@ static void validate_node(const schema_node_ptr& node,
983
1047
  }
984
1048
 
985
1049
  if (node->unique_items) {
986
- std::set<std::string> seen;
987
1050
  bool has_dup = false;
988
- for (auto item : arr) {
989
- auto s = canonical_json(item);
990
- if (!seen.insert(s).second) {
991
- has_dup = true;
992
- break;
1051
+ // Fast path: check if all items are the same simple type
1052
+ auto first_it = arr.begin();
1053
+ if (first_it != arr.end()) {
1054
+ auto first_type = (*first_it).type();
1055
+ bool all_same = true;
1056
+ for (auto item : arr) { if (item.type() != first_type) { all_same = false; break; } }
1057
+ if (all_same && first_type == et::STRING) {
1058
+ std::set<std::string_view> seen;
1059
+ for (auto item : arr) {
1060
+ std::string_view sv; item.get(sv);
1061
+ if (!seen.insert(sv).second) { has_dup = true; break; }
1062
+ }
1063
+ } else if (all_same && (first_type == et::INT64 || first_type == et::UINT64 || first_type == et::DOUBLE)) {
1064
+ std::set<double> seen;
1065
+ for (auto item : arr) {
1066
+ if (!seen.insert(to_double(item)).second) { has_dup = true; break; }
1067
+ }
1068
+ } else {
1069
+ std::set<std::string> seen;
1070
+ for (auto item : arr) {
1071
+ if (!seen.insert(canonical_json(item)).second) { has_dup = true; break; }
1072
+ }
993
1073
  }
994
1074
  }
995
1075
  if (has_dup) {
@@ -1017,9 +1097,7 @@ static void validate_node(const schema_node_ptr& node,
1017
1097
  if (node->contains_schema) {
1018
1098
  uint64_t match_count = 0;
1019
1099
  for (auto item : arr) {
1020
- std::vector<validation_error> tmp;
1021
- validate_node(node->contains_schema, item, path, ctx, tmp, false);
1022
- if (tmp.empty()) ++match_count;
1100
+ if (validate_fast(node->contains_schema, item, ctx)) ++match_count;
1023
1101
  }
1024
1102
  uint64_t min_c = node->min_contains.value_or(1);
1025
1103
  uint64_t max_c = node->max_contains.value_or(arr_size);
@@ -1037,24 +1115,26 @@ static void validate_node(const schema_node_ptr& node,
1037
1115
  }
1038
1116
 
1039
1117
  // Object validations
1040
- if (actual_type == "object" && value.is<dom::object>()) {
1118
+ if (vtype == et::OBJECT) {
1041
1119
  dom::object obj; value.get(obj);
1042
- uint64_t prop_count = 0;
1043
- for ([[maybe_unused]] auto _ : obj) ++prop_count;
1044
1120
 
1045
- if (node->min_properties.has_value() &&
1046
- prop_count < node->min_properties.value()) {
1047
- errors.push_back({error_code::min_properties_violation, path,
1048
- "object has " + std::to_string(prop_count) +
1049
- " properties, minimum " +
1050
- std::to_string(node->min_properties.value())});
1051
- }
1052
- if (node->max_properties.has_value() &&
1053
- prop_count > node->max_properties.value()) {
1054
- errors.push_back({error_code::max_properties_violation, path,
1055
- "object has " + std::to_string(prop_count) +
1056
- " properties, maximum " +
1057
- std::to_string(node->max_properties.value())});
1121
+ if (node->min_properties.has_value() || node->max_properties.has_value()) {
1122
+ uint64_t prop_count = 0;
1123
+ for ([[maybe_unused]] auto _ : obj) ++prop_count;
1124
+ if (node->min_properties.has_value() &&
1125
+ prop_count < node->min_properties.value()) {
1126
+ errors.push_back({error_code::min_properties_violation, path,
1127
+ "object has " + std::to_string(prop_count) +
1128
+ " properties, minimum " +
1129
+ std::to_string(node->min_properties.value())});
1130
+ }
1131
+ if (node->max_properties.has_value() &&
1132
+ prop_count > node->max_properties.value()) {
1133
+ errors.push_back({error_code::max_properties_violation, path,
1134
+ "object has " + std::to_string(prop_count) +
1135
+ " properties, maximum " +
1136
+ std::to_string(node->max_properties.value())});
1137
+ }
1058
1138
  }
1059
1139
 
1060
1140
  // required
@@ -1099,17 +1179,50 @@ static void validate_node(const schema_node_ptr& node,
1099
1179
  }
1100
1180
  }
1101
1181
  }
1102
-
1103
- // propertyNames
1182
+ // propertyNames — validate key as string directly when possible
1104
1183
  if (node->property_names_schema) {
1105
- for (auto [key, val] : obj) {
1106
- // Create a string element to validate the key
1107
- std::string key_json = "\"" + std::string(key) + "\"";
1108
- dom::parser key_parser;
1109
- auto key_result = key_parser.parse(key_json);
1110
- if (!key_result.error()) {
1111
- validate_node(node->property_names_schema, key_result.value(),
1112
- path, ctx, errors, all_errors);
1184
+ auto pn = node->property_names_schema;
1185
+ bool string_only = pn->ref.empty() && pn->all_of.empty() &&
1186
+ pn->any_of.empty() && pn->one_of.empty() && !pn->not_schema &&
1187
+ !pn->if_schema && pn->enum_values_minified.empty() &&
1188
+ !pn->const_value_raw.has_value();
1189
+ if (string_only) {
1190
+ // Fast path: validate string constraints on key directly
1191
+ for (auto [key, val] : obj) {
1192
+ std::string_view key_sv(key);
1193
+ if (pn->type_mask && !(pn->type_mask & json_type_bit(json_type::string))) {
1194
+ errors.push_back({error_code::type_mismatch, path,
1195
+ "propertyNames: key is string but schema requires different type"});
1196
+ continue;
1197
+ }
1198
+ uint64_t len = utf8_length(key_sv);
1199
+ if (pn->min_length.has_value() && len < pn->min_length.value()) {
1200
+ errors.push_back({error_code::min_length_violation, path,
1201
+ "propertyNames: key too short: " + std::string(key_sv)});
1202
+ }
1203
+ if (pn->max_length.has_value() && len > pn->max_length.value()) {
1204
+ errors.push_back({error_code::max_length_violation, path,
1205
+ "propertyNames: key too long: " + std::string(key_sv)});
1206
+ }
1207
+ if (pn->compiled_pattern) {
1208
+ if (!re2::RE2::PartialMatch(re2::StringPiece(key_sv.data(), key_sv.size()), *pn->compiled_pattern)) {
1209
+ errors.push_back({error_code::pattern_mismatch, path,
1210
+ "propertyNames: key does not match pattern: " + std::string(key_sv)});
1211
+ }
1212
+ }
1213
+ if (pn->format.has_value() && !check_format_by_id(key_sv, pn->format_id)) {
1214
+ errors.push_back({error_code::format_mismatch, path,
1215
+ "propertyNames: key does not match format: " + std::string(key_sv)});
1216
+ }
1217
+ }
1218
+ } else {
1219
+ // Fallback: parse key as JSON string element
1220
+ for (auto [key, val] : obj) {
1221
+ std::string key_json = "\"" + std::string(key) + "\"";
1222
+ auto key_result = tl_dom_key_parser().parse(key_json);
1223
+ if (!key_result.error()) {
1224
+ validate_node(pn, key_result.value(), path, ctx, errors, all_errors);
1225
+ }
1113
1226
  }
1114
1227
  }
1115
1228
  }
@@ -1235,12 +1348,8 @@ static bool validate_fast(const schema_node_ptr& node,
1235
1348
  }
1236
1349
 
1237
1350
  // type
1238
- if (!node->types.empty()) {
1239
- bool match = false;
1240
- for (const auto& t : node->types) {
1241
- if (type_matches(value, t)) { match = true; break; }
1242
- }
1243
- if (!match) [[unlikely]] return false;
1351
+ if (node->type_mask) {
1352
+ if (!type_matches_mask(value, node->type_mask)) [[unlikely]] return false;
1244
1353
  }
1245
1354
 
1246
1355
  // enum
@@ -1258,10 +1367,10 @@ static bool validate_fast(const schema_node_ptr& node,
1258
1367
  if (canonical_json(value) != node->const_value_raw.value()) [[unlikely]] return false;
1259
1368
  }
1260
1369
 
1261
- auto actual_type = type_of_sv(value);
1370
+ auto vtype = value.type();
1262
1371
 
1263
1372
  // Numeric
1264
- if (actual_type == "integer" || actual_type == "number") {
1373
+ if (vtype == et::INT64 || vtype == et::UINT64 || vtype == et::DOUBLE) {
1265
1374
  double v = to_double(value);
1266
1375
  if (node->minimum.has_value() && v < node->minimum.value()) return false;
1267
1376
  if (node->maximum.has_value() && v > node->maximum.value()) return false;
@@ -1274,7 +1383,7 @@ static bool validate_fast(const schema_node_ptr& node,
1274
1383
  }
1275
1384
 
1276
1385
  // String
1277
- if (actual_type == "string") {
1386
+ if (vtype == et::STRING) {
1278
1387
  std::string_view sv;
1279
1388
  value.get(sv);
1280
1389
  uint64_t len = utf8_length(sv);
@@ -1284,22 +1393,38 @@ static bool validate_fast(const schema_node_ptr& node,
1284
1393
  if (!re2::RE2::PartialMatch(re2::StringPiece(sv.data(), sv.size()), *node->compiled_pattern))
1285
1394
  return false;
1286
1395
  }
1287
- if (node->format.has_value() && !check_format(sv, node->format.value())) return false;
1396
+ if (node->format.has_value() && !check_format_by_id(sv, node->format_id)) return false;
1288
1397
  }
1289
1398
 
1290
1399
  // Array
1291
- if (actual_type == "array" && value.is<dom::array>()) {
1400
+ if (vtype == et::ARRAY) {
1292
1401
  dom::array arr; value.get(arr);
1293
- uint64_t arr_size = 0;
1294
- for ([[maybe_unused]] auto _ : arr) ++arr_size;
1402
+ uint64_t arr_size = arr.size();
1403
+ if(arr_size == 0xFFFFFF) [[unlikely]] {
1404
+ // Fallback for large arrays where size() saturates — count manually to avoid overflow
1405
+ arr_size = 0;
1406
+ for ([[maybe_unused]] auto _ : arr) ++arr_size;
1407
+ }
1295
1408
 
1296
1409
  if (node->min_items.has_value() && arr_size < node->min_items.value()) return false;
1297
1410
  if (node->max_items.has_value() && arr_size > node->max_items.value()) return false;
1298
1411
 
1299
1412
  if (node->unique_items) {
1300
- std::set<std::string> seen;
1301
- for (auto item : arr) {
1302
- if (!seen.insert(canonical_json(item)).second) return false;
1413
+ auto first_it = arr.begin();
1414
+ if (first_it != arr.end()) {
1415
+ auto first_type = (*first_it).type();
1416
+ bool all_same = true;
1417
+ for (auto item : arr) { if (item.type() != first_type) { all_same = false; break; } }
1418
+ if (all_same && first_type == et::STRING) {
1419
+ std::set<std::string_view> seen;
1420
+ for (auto item : arr) { std::string_view sv; item.get(sv); if (!seen.insert(sv).second) return false; }
1421
+ } else if (all_same && (first_type == et::INT64 || first_type == et::UINT64 || first_type == et::DOUBLE)) {
1422
+ std::set<double> seen;
1423
+ for (auto item : arr) { if (!seen.insert(to_double(item)).second) return false; }
1424
+ } else {
1425
+ std::set<std::string> seen;
1426
+ for (auto item : arr) { if (!seen.insert(canonical_json(item)).second) return false; }
1427
+ }
1303
1428
  }
1304
1429
  }
1305
1430
 
@@ -1326,7 +1451,7 @@ static bool validate_fast(const schema_node_ptr& node,
1326
1451
  }
1327
1452
 
1328
1453
  // Object
1329
- if (actual_type == "object" && value.is<dom::object>()) {
1454
+ if (vtype == et::OBJECT) {
1330
1455
  dom::object obj; value.get(obj);
1331
1456
 
1332
1457
  if (node->min_properties.has_value() || node->max_properties.has_value()) {
@@ -1443,19 +1568,27 @@ static void cg_compile(const schema_node* n, cg::plan& p,
1443
1568
  return;
1444
1569
  }
1445
1570
  // Type
1446
- if (!n->types.empty()) {
1447
- if (n->types.size() == 1) {
1448
- auto& t = n->types[0];
1449
- if (t=="object") out.push_back({cg::op::EXPECT_OBJECT});
1450
- else if (t=="array") out.push_back({cg::op::EXPECT_ARRAY});
1451
- else if (t=="string") out.push_back({cg::op::EXPECT_STRING});
1452
- else if (t=="number") out.push_back({cg::op::EXPECT_NUMBER});
1453
- else if (t=="integer") out.push_back({cg::op::EXPECT_INTEGER});
1454
- else if (t=="boolean") out.push_back({cg::op::EXPECT_BOOLEAN});
1455
- else if (t=="null") out.push_back({cg::op::EXPECT_NULL});
1571
+ if (n->type_mask) {
1572
+ int popcount = __builtin_popcount(n->type_mask);
1573
+ if (popcount == 1) {
1574
+ // Single type — emit specific opcode
1575
+ for (int b = 0; b < 7; ++b) {
1576
+ if (n->type_mask & (1u << b)) {
1577
+ switch (static_cast<json_type>(b)) {
1578
+ case json_type::object: out.push_back({cg::op::EXPECT_OBJECT}); break;
1579
+ case json_type::array: out.push_back({cg::op::EXPECT_ARRAY}); break;
1580
+ case json_type::string: out.push_back({cg::op::EXPECT_STRING}); break;
1581
+ case json_type::number: out.push_back({cg::op::EXPECT_NUMBER}); break;
1582
+ case json_type::integer: out.push_back({cg::op::EXPECT_INTEGER}); break;
1583
+ case json_type::boolean: out.push_back({cg::op::EXPECT_BOOLEAN}); break;
1584
+ case json_type::null_value: out.push_back({cg::op::EXPECT_NULL}); break;
1585
+ }
1586
+ break;
1587
+ }
1588
+ }
1456
1589
  } else {
1457
- uint32_t i = (uint32_t)p.type_sets.size();
1458
- p.type_sets.push_back(n->types);
1590
+ uint32_t i = (uint32_t)p.type_masks.size();
1591
+ p.type_masks.push_back(n->type_mask);
1459
1592
  out.push_back({cg::op::EXPECT_TYPE_MULTI, i});
1460
1593
  }
1461
1594
  }
@@ -1485,13 +1618,7 @@ static void cg_compile(const schema_node* n, cg::plan& p,
1485
1618
  if (n->compiled_pattern) { uint32_t i=(uint32_t)p.regexes.size(); p.regexes.push_back(n->compiled_pattern); out.push_back({cg::op::CHECK_PATTERN,i}); }
1486
1619
  if (n->format.has_value()) {
1487
1620
  uint32_t i=(uint32_t)p.format_ids.size();
1488
- uint8_t fid=255;
1489
- auto& f=*n->format;
1490
- if(f=="email")fid=0;else if(f=="date")fid=1;else if(f=="date-time")fid=2;
1491
- else if(f=="time")fid=3;else if(f=="ipv4")fid=4;else if(f=="ipv6")fid=5;
1492
- else if(f=="uri"||f=="uri-reference")fid=6;else if(f=="uuid")fid=7;
1493
- else if(f=="hostname")fid=8;
1494
- p.format_ids.push_back(fid);
1621
+ p.format_ids.push_back(n->format_id);
1495
1622
  out.push_back({cg::op::CHECK_FORMAT,i});
1496
1623
  }
1497
1624
  // Array
@@ -1535,44 +1662,43 @@ static void cg_compile(const schema_node* n, cg::plan& p,
1535
1662
  }
1536
1663
 
1537
1664
  // --- Codegen executor ---
1538
- static const char* fmt_names[]={"email","date","date-time","time","ipv4","ipv6","uri","uuid","hostname"};
1539
1665
 
1540
1666
  static bool cg_exec(const cg::plan& p, const std::vector<cg::ins>& code,
1541
1667
  dom::element value) {
1542
- auto t = type_of_sv(value);
1668
+ auto t = value.type();
1669
+ bool t_numeric = (t == et::INT64 || t == et::UINT64 || t == et::DOUBLE);
1670
+ double t_dval = t_numeric ? to_double(value) : 0.0;
1543
1671
  for (size_t i=0; i<code.size(); ++i) {
1544
1672
  auto& c = code[i];
1545
1673
  switch(c.o) {
1546
1674
  case cg::op::END: return true;
1547
- case cg::op::EXPECT_OBJECT: if(t!="object") return false; break;
1548
- case cg::op::EXPECT_ARRAY: if(t!="array") return false; break;
1549
- case cg::op::EXPECT_STRING: if(t!="string") return false; break;
1550
- case cg::op::EXPECT_NUMBER: if(t!="number"&&t!="integer") return false; break;
1551
- case cg::op::EXPECT_INTEGER: if(t!="integer") return false; break;
1552
- case cg::op::EXPECT_BOOLEAN: if(t!="boolean") return false; break;
1553
- case cg::op::EXPECT_NULL: if(t!="null") return false; break;
1675
+ case cg::op::EXPECT_OBJECT: if(t!=et::OBJECT) return false; break;
1676
+ case cg::op::EXPECT_ARRAY: if(t!=et::ARRAY) return false; break;
1677
+ case cg::op::EXPECT_STRING: if(t!=et::STRING) return false; break;
1678
+ case cg::op::EXPECT_NUMBER: if(!t_numeric) return false; break;
1679
+ case cg::op::EXPECT_INTEGER: if(t!=et::INT64&&t!=et::UINT64) return false; break;
1680
+ case cg::op::EXPECT_BOOLEAN: if(t!=et::BOOL) return false; break;
1681
+ case cg::op::EXPECT_NULL: if(t!=et::NULL_VALUE) return false; break;
1554
1682
  case cg::op::EXPECT_TYPE_MULTI: {
1555
- auto& ts=p.type_sets[c.a]; bool m=false;
1556
- for(auto& ty:ts){if(t==ty||(ty=="number"&&(t=="integer"||t=="number"))){m=true;break;}}
1557
- if(!m) return false; break;
1558
- }
1559
- case cg::op::CHECK_MINIMUM: if(t=="integer"||t=="number"){if(to_double(value)<p.doubles[c.a])return false;} break;
1560
- case cg::op::CHECK_MAXIMUM: if(t=="integer"||t=="number"){if(to_double(value)>p.doubles[c.a])return false;} break;
1561
- case cg::op::CHECK_EX_MINIMUM: if(t=="integer"||t=="number"){if(to_double(value)<=p.doubles[c.a])return false;} break;
1562
- case cg::op::CHECK_EX_MAXIMUM: if(t=="integer"||t=="number"){if(to_double(value)>=p.doubles[c.a])return false;} break;
1563
- case cg::op::CHECK_MULTIPLE_OF: if(t=="integer"||t=="number"){double v=to_double(value),d=p.doubles[c.a],r=std::fmod(v,d);if(std::abs(r)>1e-8&&std::abs(r-d)>1e-8)return false;} break;
1564
- case cg::op::CHECK_MIN_LENGTH: if(t=="string"){std::string_view sv;value.get(sv);if(utf8_length(sv)<c.a)return false;} break;
1565
- case cg::op::CHECK_MAX_LENGTH: if(t=="string"){std::string_view sv;value.get(sv);if(utf8_length(sv)>c.a)return false;} break;
1566
- case cg::op::CHECK_PATTERN: if(t=="string"){std::string_view sv;value.get(sv);if(!re2::RE2::PartialMatch(re2::StringPiece(sv.data(),sv.size()),*p.regexes[c.a]))return false;} break;
1567
- case cg::op::CHECK_FORMAT: if(t=="string"){std::string_view sv;value.get(sv);uint8_t f=p.format_ids[c.a];if(f<9&&!check_format(sv,fmt_names[f]))return false;} break;
1568
- case cg::op::CHECK_MIN_ITEMS: if(t=="array"){dom::array a;value.get(a);uint64_t s=0;for([[maybe_unused]]auto _:a)++s;if(s<c.a)return false;} break;
1569
- case cg::op::CHECK_MAX_ITEMS: if(t=="array"){dom::array a;value.get(a);uint64_t s=0;for([[maybe_unused]]auto _:a)++s;if(s>c.a)return false;} break;
1570
- case cg::op::CHECK_UNIQUE_ITEMS: if(t=="array"){dom::array a;value.get(a);std::set<std::string> seen;for(auto x:a)if(!seen.insert(canonical_json(x)).second)return false;} break;
1571
- case cg::op::ARRAY_ITEMS: if(t=="array"){dom::array a;value.get(a);for(auto x:a)if(!cg_exec(p,p.subs[c.a],x))return false;} break;
1572
- case cg::op::CHECK_REQUIRED: if(t=="object"){dom::object o;value.get(o);dom::element d;if(o[p.strings[c.a]].get(d)!=SUCCESS)return false;} break;
1573
- case cg::op::CHECK_MIN_PROPS: if(t=="object"){dom::object o;value.get(o);uint64_t n=0;for([[maybe_unused]]auto _:o)++n;if(n<c.a)return false;} break;
1574
- case cg::op::CHECK_MAX_PROPS: if(t=="object"){dom::object o;value.get(o);uint64_t n=0;for([[maybe_unused]]auto _:o)++n;if(n>c.a)return false;} break;
1575
- case cg::op::OBJ_PROPS_START: if(t=="object"){
1683
+ if(!(element_type_mask(t) & p.type_masks[c.a])) return false; break;
1684
+ }
1685
+ case cg::op::CHECK_MINIMUM: if(t_numeric&&t_dval<p.doubles[c.a])return false; break;
1686
+ case cg::op::CHECK_MAXIMUM: if(t_numeric&&t_dval>p.doubles[c.a])return false; break;
1687
+ case cg::op::CHECK_EX_MINIMUM: if(t_numeric&&t_dval<=p.doubles[c.a])return false; break;
1688
+ case cg::op::CHECK_EX_MAXIMUM: if(t_numeric&&t_dval>=p.doubles[c.a])return false; break;
1689
+ case cg::op::CHECK_MULTIPLE_OF: if(t_numeric){double d=p.doubles[c.a],r=std::fmod(t_dval,d);if(std::abs(r)>1e-8&&std::abs(r-d)>1e-8)return false;} break;
1690
+ case cg::op::CHECK_MIN_LENGTH: if(t==et::STRING){std::string_view sv;value.get(sv);if(utf8_length(sv)<c.a)return false;} break;
1691
+ case cg::op::CHECK_MAX_LENGTH: if(t==et::STRING){std::string_view sv;value.get(sv);if(utf8_length(sv)>c.a)return false;} break;
1692
+ case cg::op::CHECK_PATTERN: if(t==et::STRING){std::string_view sv;value.get(sv);if(!re2::RE2::PartialMatch(re2::StringPiece(sv.data(),sv.size()),*p.regexes[c.a]))return false;} break;
1693
+ case cg::op::CHECK_FORMAT: if(t==et::STRING){std::string_view sv;value.get(sv);if(!check_format_by_id(sv,p.format_ids[c.a]))return false;} break;
1694
+ case cg::op::CHECK_MIN_ITEMS: if(t==et::ARRAY){dom::array a;value.get(a);uint64_t s=0;for([[maybe_unused]]auto _:a)++s;if(s<c.a)return false;} break;
1695
+ case cg::op::CHECK_MAX_ITEMS: if(t==et::ARRAY){dom::array a;value.get(a);uint64_t s=0;for([[maybe_unused]]auto _:a)++s;if(s>c.a)return false;} break;
1696
+ case cg::op::CHECK_UNIQUE_ITEMS: if(t==et::ARRAY){dom::array a;value.get(a);std::set<std::string> seen;for(auto x:a)if(!seen.insert(canonical_json(x)).second)return false;} break;
1697
+ case cg::op::ARRAY_ITEMS: if(t==et::ARRAY){dom::array a;value.get(a);for(auto x:a)if(!cg_exec(p,p.subs[c.a],x))return false;} break;
1698
+ case cg::op::CHECK_REQUIRED: if(t==et::OBJECT){dom::object o;value.get(o);dom::element d;if(o[p.strings[c.a]].get(d)!=SUCCESS)return false;} break;
1699
+ case cg::op::CHECK_MIN_PROPS: if(t==et::OBJECT){dom::object o;value.get(o);uint64_t n=0;for([[maybe_unused]]auto _:o)++n;if(n<c.a)return false;} break;
1700
+ case cg::op::CHECK_MAX_PROPS: if(t==et::OBJECT){dom::object o;value.get(o);uint64_t n=0;for([[maybe_unused]]auto _:o)++n;if(n>c.a)return false;} break;
1701
+ case cg::op::OBJ_PROPS_START: if(t==et::OBJECT){
1576
1702
  dom::object o; value.get(o);
1577
1703
  // collect prop defs
1578
1704
  struct pd{std::string_view nm;uint32_t si;};
@@ -1592,13 +1718,13 @@ static bool cg_exec(const cg::plan& p, const std::vector<cg::ins>& code,
1592
1718
  case cg::op::OBJ_PROP: case cg::op::OBJ_PROPS_END: case cg::op::CHECK_NO_ADDITIONAL: break;
1593
1719
  case cg::op::CHECK_ENUM_STR: {
1594
1720
  auto& es=p.enum_sets[c.a]; bool f=false;
1595
- if(t=="string"){std::string_view sv;value.get(sv);for(auto& e:es)if(e.size()==sv.size()+2&&e[0]=='"'&&e.back()=='"'&&e.compare(1,sv.size(),sv)==0){f=true;break;}}
1721
+ if(t==et::STRING){std::string_view sv;value.get(sv);for(auto& e:es)if(e.size()==sv.size()+2&&e[0]=='"'&&e.back()=='"'&&e.compare(1,sv.size(),sv)==0){f=true;break;}}
1596
1722
  if(!f){std::string v=canonical_json(value);for(auto& e:es)if(e==v){f=true;break;}}
1597
1723
  if(!f)return false; break;
1598
1724
  }
1599
1725
  case cg::op::CHECK_ENUM: {
1600
1726
  auto& es=p.enum_sets[c.a]; bool f=false;
1601
- if(t=="string"){std::string_view sv;value.get(sv);for(auto& e:es)if(e.size()==sv.size()+2&&e[0]=='"'&&e.back()=='"'&&e.compare(1,sv.size(),sv)==0){f=true;break;}}
1727
+ if(t==et::STRING){std::string_view sv;value.get(sv);for(auto& e:es)if(e.size()==sv.size()+2&&e[0]=='"'&&e.back()=='"'&&e.compare(1,sv.size(),sv)==0){f=true;break;}}
1602
1728
  if(!f&&value.is<int64_t>()){int64_t v;value.get(v);auto s=std::to_string(v);for(auto& e:es)if(e==s){f=true;break;}}
1603
1729
  if(!f){std::string v=canonical_json(value);for(auto& e:es)if(e==v){f=true;break;}}
1604
1730
  if(!f)return false; break;
@@ -1614,51 +1740,53 @@ static bool cg_exec(const cg::plan& p, const std::vector<cg::ins>& code,
1614
1740
  // Uses simdjson On Demand API to avoid materializing the full DOM tree.
1615
1741
  // Returns: true = valid, false = invalid OR unsupported (fallback to DOM).
1616
1742
 
1617
- static std::string_view od_type(simdjson::ondemand::value& v) {
1743
+ static json_type od_type(simdjson::ondemand::value& v) {
1618
1744
  switch (v.type()) {
1619
- case simdjson::ondemand::json_type::object: return "object";
1620
- case simdjson::ondemand::json_type::array: return "array";
1621
- case simdjson::ondemand::json_type::string: return "string";
1622
- case simdjson::ondemand::json_type::boolean: return "boolean";
1623
- case simdjson::ondemand::json_type::null: return "null";
1745
+ case simdjson::ondemand::json_type::object: return json_type::object;
1746
+ case simdjson::ondemand::json_type::array: return json_type::array;
1747
+ case simdjson::ondemand::json_type::string: return json_type::string;
1748
+ case simdjson::ondemand::json_type::boolean: return json_type::boolean;
1749
+ case simdjson::ondemand::json_type::null: return json_type::null_value;
1624
1750
  case simdjson::ondemand::json_type::number: {
1625
1751
  simdjson::ondemand::number_type nt;
1626
1752
  if (v.get_number_type().get(nt) == SUCCESS &&
1627
1753
  nt == simdjson::ondemand::number_type::floating_point_number)
1628
- return "number";
1629
- return "integer";
1754
+ return json_type::number;
1755
+ return json_type::integer;
1630
1756
  }
1631
1757
  }
1632
- return "unknown";
1758
+ return json_type::string;
1633
1759
  }
1634
1760
 
1635
1761
  static bool od_exec(const cg::plan& p, const std::vector<cg::ins>& code,
1636
1762
  simdjson::ondemand::value value) {
1637
1763
  auto t = od_type(value);
1764
+ bool t_numeric = (t == json_type::integer || t == json_type::number);
1638
1765
  for (size_t i = 0; i < code.size(); ++i) {
1639
1766
  auto& c = code[i];
1640
1767
  switch (c.o) {
1641
1768
  case cg::op::END: return true;
1642
- case cg::op::EXPECT_OBJECT: if(t!="object") return false; break;
1643
- case cg::op::EXPECT_ARRAY: if(t!="array") return false; break;
1644
- case cg::op::EXPECT_STRING: if(t!="string") return false; break;
1645
- case cg::op::EXPECT_NUMBER: if(t!="number"&&t!="integer") return false; break;
1646
- case cg::op::EXPECT_INTEGER: if(t!="integer") return false; break;
1647
- case cg::op::EXPECT_BOOLEAN: if(t!="boolean") return false; break;
1648
- case cg::op::EXPECT_NULL: if(t!="null") return false; break;
1769
+ case cg::op::EXPECT_OBJECT: if(t!=json_type::object) return false; break;
1770
+ case cg::op::EXPECT_ARRAY: if(t!=json_type::array) return false; break;
1771
+ case cg::op::EXPECT_STRING: if(t!=json_type::string) return false; break;
1772
+ case cg::op::EXPECT_NUMBER: if(!t_numeric) return false; break;
1773
+ case cg::op::EXPECT_INTEGER: if(t!=json_type::integer) return false; break;
1774
+ case cg::op::EXPECT_BOOLEAN: if(t!=json_type::boolean) return false; break;
1775
+ case cg::op::EXPECT_NULL: if(t!=json_type::null_value) return false; break;
1649
1776
  case cg::op::EXPECT_TYPE_MULTI: {
1650
- auto& ts=p.type_sets[c.a]; bool m=false;
1651
- for(auto& ty:ts){if(t==ty||(ty=="number"&&(t=="integer"||t=="number"))){m=true;break;}}
1652
- if(!m) return false; break;
1777
+ // integer matches both "integer" and "number" type constraints
1778
+ uint8_t tbits = json_type_bit(t);
1779
+ if (t == json_type::integer) tbits |= json_type_bit(json_type::number);
1780
+ if(!(tbits & p.type_masks[c.a])) return false; break;
1653
1781
  }
1654
1782
  case cg::op::CHECK_MINIMUM:
1655
1783
  case cg::op::CHECK_MAXIMUM:
1656
1784
  case cg::op::CHECK_EX_MINIMUM:
1657
1785
  case cg::op::CHECK_EX_MAXIMUM:
1658
1786
  case cg::op::CHECK_MULTIPLE_OF: {
1659
- if (t=="integer"||t=="number") {
1787
+ if (t_numeric) {
1660
1788
  double v;
1661
- if (t=="integer") { int64_t iv; if(value.get(iv)!=SUCCESS) return false; v=(double)iv; }
1789
+ if (t==json_type::integer) { int64_t iv; if(value.get(iv)!=SUCCESS) return false; v=(double)iv; }
1662
1790
  else { if(value.get(v)!=SUCCESS) return false; }
1663
1791
  double d=p.doubles[c.a];
1664
1792
  if(c.o==cg::op::CHECK_MINIMUM && v<d) return false;
@@ -1669,39 +1797,39 @@ static bool od_exec(const cg::plan& p, const std::vector<cg::ins>& code,
1669
1797
  }
1670
1798
  break;
1671
1799
  }
1672
- case cg::op::CHECK_MIN_LENGTH: if(t=="string"){std::string_view sv; if(value.get(sv)!=SUCCESS) return false; if(utf8_length(sv)<c.a) return false;} break;
1673
- case cg::op::CHECK_MAX_LENGTH: if(t=="string"){std::string_view sv; if(value.get(sv)!=SUCCESS) return false; if(utf8_length(sv)>c.a) return false;} break;
1674
- case cg::op::CHECK_PATTERN: if(t=="string"){std::string_view sv; if(value.get(sv)!=SUCCESS) return false; if(!re2::RE2::PartialMatch(re2::StringPiece(sv.data(),sv.size()),*p.regexes[c.a]))return false;} break;
1675
- case cg::op::CHECK_FORMAT: if(t=="string"){std::string_view sv; if(value.get(sv)!=SUCCESS) return false; uint8_t f=p.format_ids[c.a]; if(f<9&&!check_format(sv,fmt_names[f]))return false;} break;
1676
- case cg::op::CHECK_MIN_ITEMS: if(t=="array"){
1800
+ case cg::op::CHECK_MIN_LENGTH: if(t==json_type::string){std::string_view sv; if(value.get(sv)!=SUCCESS) return false; if(utf8_length(sv)<c.a) return false;} break;
1801
+ case cg::op::CHECK_MAX_LENGTH: if(t==json_type::string){std::string_view sv; if(value.get(sv)!=SUCCESS) return false; if(utf8_length(sv)>c.a) return false;} break;
1802
+ case cg::op::CHECK_PATTERN: if(t==json_type::string){std::string_view sv; if(value.get(sv)!=SUCCESS) return false; if(!re2::RE2::PartialMatch(re2::StringPiece(sv.data(),sv.size()),*p.regexes[c.a]))return false;} break;
1803
+ case cg::op::CHECK_FORMAT: if(t==json_type::string){std::string_view sv; if(value.get(sv)!=SUCCESS) return false; if(!check_format_by_id(sv,p.format_ids[c.a]))return false;} break;
1804
+ case cg::op::CHECK_MIN_ITEMS: if(t==json_type::array){
1677
1805
  simdjson::ondemand::array a; if(value.get(a)!=SUCCESS) return false;
1678
1806
  uint64_t s=0; for(auto x:a){(void)x;++s;} if(s<c.a) return false;
1679
1807
  } break;
1680
- case cg::op::CHECK_MAX_ITEMS: if(t=="array"){
1808
+ case cg::op::CHECK_MAX_ITEMS: if(t==json_type::array){
1681
1809
  simdjson::ondemand::array a; if(value.get(a)!=SUCCESS) return false;
1682
1810
  uint64_t s=0; for(auto x:a){(void)x;++s;} if(s>c.a) return false;
1683
1811
  } break;
1684
- case cg::op::ARRAY_ITEMS: if(t=="array"){
1812
+ case cg::op::ARRAY_ITEMS: if(t==json_type::array){
1685
1813
  simdjson::ondemand::array a; if(value.get(a)!=SUCCESS) return false;
1686
1814
  for(auto elem:a){
1687
1815
  simdjson::ondemand::value v; if(elem.get(v)!=SUCCESS) return false;
1688
1816
  if(!od_exec(p,p.subs[c.a],v)) return false;
1689
1817
  }
1690
1818
  } break;
1691
- case cg::op::CHECK_REQUIRED: if(t=="object"){
1819
+ case cg::op::CHECK_REQUIRED: if(t==json_type::object){
1692
1820
  simdjson::ondemand::object o; if(value.get(o)!=SUCCESS) return false;
1693
1821
  auto f = o.find_field_unordered(p.strings[c.a]);
1694
1822
  if(f.error()) return false;
1695
1823
  } break;
1696
- case cg::op::CHECK_MIN_PROPS: if(t=="object"){
1824
+ case cg::op::CHECK_MIN_PROPS: if(t==json_type::object){
1697
1825
  simdjson::ondemand::object o; if(value.get(o)!=SUCCESS) return false;
1698
1826
  uint64_t n=0; for(auto f:o){(void)f;++n;} if(n<c.a) return false;
1699
1827
  } break;
1700
- case cg::op::CHECK_MAX_PROPS: if(t=="object"){
1828
+ case cg::op::CHECK_MAX_PROPS: if(t==json_type::object){
1701
1829
  simdjson::ondemand::object o; if(value.get(o)!=SUCCESS) return false;
1702
1830
  uint64_t n=0; for(auto f:o){(void)f;++n;} if(n>c.a) return false;
1703
1831
  } break;
1704
- case cg::op::OBJ_PROPS_START: if(t=="object"){
1832
+ case cg::op::OBJ_PROPS_START: if(t==json_type::object){
1705
1833
  simdjson::ondemand::object o; if(value.get(o)!=SUCCESS) return false;
1706
1834
  struct pd{std::string_view nm;uint32_t si;};
1707
1835
  std::vector<pd> props; bool no_add=false;
@@ -1854,10 +1982,9 @@ validation_result validate(const schema_ref& schema, std::string_view json,
1854
1982
  // Codegen said invalid OR hit COMPOSITION — fall through to tree walker
1855
1983
  }
1856
1984
 
1857
- // Slow path: re-parse + tree walker with error details
1858
- auto result2 = dom_p.parse(psv);
1985
+ // Slow path: tree walker with error details (reuse already-parsed DOM)
1859
1986
  std::vector<validation_error> errors;
1860
- validate_node(schema.impl->root, result2.value(), "", *schema.impl, errors,
1987
+ validate_node(schema.impl->root, result.value(), "", *schema.impl, errors,
1861
1988
  opts.all_errors);
1862
1989
 
1863
1990
  return {errors.empty(), std::move(errors)};