ata-validator 0.7.3 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/ata.cpp CHANGED
@@ -8,7 +8,9 @@
8
8
  #include <algorithm>
9
9
  #include <cmath>
10
10
  #include <cstring>
11
+ #ifndef ATA_NO_RE2
11
12
  #include <re2/re2.h>
13
+ #endif
12
14
  #include <set>
13
15
  #include <unordered_map>
14
16
 
@@ -53,11 +55,15 @@ static bool fast_check_email(std::string_view s) {
53
55
  }
54
56
 
55
57
  static bool fast_check_date(std::string_view s) {
56
- // YYYY-MM-DD
57
- return s.size() == 10 && is_digit(s[0]) && is_digit(s[1]) &&
58
- is_digit(s[2]) && is_digit(s[3]) && s[4] == '-' &&
59
- is_digit(s[5]) && is_digit(s[6]) && s[7] == '-' &&
60
- is_digit(s[8]) && is_digit(s[9]);
58
+ // YYYY-MM-DD with range validation
59
+ if (s.size() != 10 || !is_digit(s[0]) || !is_digit(s[1]) ||
60
+ !is_digit(s[2]) || !is_digit(s[3]) || s[4] != '-' ||
61
+ !is_digit(s[5]) || !is_digit(s[6]) || s[7] != '-' ||
62
+ !is_digit(s[8]) || !is_digit(s[9]))
63
+ return false;
64
+ int month = (s[5] - '0') * 10 + (s[6] - '0');
65
+ int day = (s[8] - '0') * 10 + (s[9] - '0');
66
+ return month >= 1 && month <= 12 && day >= 1 && day <= 31;
61
67
  }
62
68
 
63
69
  static bool fast_check_time(std::string_view s) {
@@ -274,7 +280,9 @@ struct schema_node {
274
280
  std::optional<uint64_t> min_length;
275
281
  std::optional<uint64_t> max_length;
276
282
  std::optional<std::string> pattern;
283
+ #ifndef ATA_NO_RE2
277
284
  std::shared_ptr<re2::RE2> compiled_pattern; // cached compiled regex (RE2)
285
+ #endif
278
286
 
279
287
  // array
280
288
  std::optional<uint64_t> min_items;
@@ -301,7 +309,9 @@ struct schema_node {
301
309
  struct pattern_prop {
302
310
  std::string pattern;
303
311
  schema_node_ptr schema;
312
+ #ifndef ATA_NO_RE2
304
313
  std::shared_ptr<re2::RE2> compiled;
314
+ #endif
305
315
  };
306
316
  std::vector<pattern_prop> pattern_properties;
307
317
 
@@ -326,6 +336,8 @@ struct schema_node {
326
336
 
327
337
  // $ref
328
338
  std::string ref;
339
+ std::string dynamic_ref; // $dynamicRef value (e.g. "#items")
340
+ std::string id; // $id — resource boundary marker
329
341
 
330
342
  // $defs — stored on node for pointer navigation
331
343
  std::unordered_map<std::string, schema_node_ptr> defs;
@@ -351,7 +363,9 @@ struct plan {
351
363
  std::vector<ins> code;
352
364
  std::vector<double> doubles;
353
365
  std::vector<std::string> strings;
366
+ #ifndef ATA_NO_RE2
354
367
  std::vector<std::shared_ptr<re2::RE2>> regexes;
368
+ #endif
355
369
  std::vector<std::vector<std::string>> enum_sets;
356
370
  std::vector<uint8_t> type_masks;
357
371
  std::vector<uint8_t> format_ids;
@@ -374,7 +388,9 @@ struct od_plan {
374
388
 
375
389
  // String — single value.get(sv) then all checks
376
390
  std::optional<uint64_t> min_length, max_length;
391
+ #ifndef ATA_NO_RE2
377
392
  re2::RE2* pattern = nullptr; // borrowed pointer from schema_node
393
+ #endif
378
394
  uint8_t format_id = 255; // 255 = no format check
379
395
 
380
396
  // Object — single iterate with merged required+property lookup
@@ -408,10 +424,22 @@ struct compiled_schema {
408
424
  schema_node_ptr root;
409
425
  std::unordered_map<std::string, schema_node_ptr> defs;
410
426
  std::string raw_schema;
427
+ std::string compile_error; // non-empty if compilation failed
411
428
  dom::parser parser; // used only at compile time
412
429
  cg::plan gen_plan; // codegen validation plan
413
430
  bool use_ondemand = false; // true if codegen plan supports On Demand
414
431
  od_plan_ptr od; // On-Demand execution plan
432
+
433
+ // anchor resolution
434
+ std::unordered_map<std::string, schema_node_ptr> anchors;
435
+ std::unordered_map<std::string,
436
+ std::unordered_map<std::string, schema_node_ptr>> resource_dynamic_anchors;
437
+ bool has_dynamic_refs = false;
438
+ std::string current_resource_id; // compile-time only
439
+
440
+ // compile-time warnings (misplaced keywords, etc.)
441
+ std::vector<schema_warning> warnings;
442
+ std::string compile_path; // current JSON pointer during compilation
415
443
  };
416
444
 
417
445
  // Thread-local persistent parsers — reused across all validate calls on the
@@ -462,6 +490,64 @@ static schema_node_ptr compile_node(dom::element el,
462
490
  }
463
491
  }
464
492
 
493
+ // $id — must come before $anchor/$dynamicAnchor so current_resource_id is set
494
+ std::string prev_resource = ctx.current_resource_id;
495
+ {
496
+ dom::element id_el;
497
+ if (obj["$id"].get(id_el) == SUCCESS) {
498
+ std::string_view sv;
499
+ if (id_el.get(sv) == SUCCESS) {
500
+ node->id = std::string(sv);
501
+ ctx.current_resource_id = node->id;
502
+ ctx.defs[node->id] = node;
503
+ }
504
+ }
505
+ }
506
+
507
+ // $anchor — register in flat anchor map
508
+ {
509
+ dom::element anchor_el;
510
+ if (obj["$anchor"].get(anchor_el) == SUCCESS) {
511
+ std::string_view sv;
512
+ if (anchor_el.get(sv) == SUCCESS) {
513
+ ctx.anchors[std::string(sv)] = node;
514
+ }
515
+ }
516
+ }
517
+
518
+ // $dynamicAnchor — register in both flat anchors and per-resource map
519
+ {
520
+ dom::element da_el;
521
+ if (obj["$dynamicAnchor"].get(da_el) == SUCCESS) {
522
+ std::string_view sv;
523
+ if (da_el.get(sv) == SUCCESS) {
524
+ std::string name(sv);
525
+ ctx.anchors[name] = node;
526
+ ctx.resource_dynamic_anchors[ctx.current_resource_id][name] = node;
527
+ }
528
+ }
529
+ }
530
+
531
+ // $dynamicRef
532
+ {
533
+ dom::element dr_el;
534
+ if (obj["$dynamicRef"].get(dr_el) == SUCCESS) {
535
+ std::string_view sv;
536
+ if (dr_el.get(sv) == SUCCESS) {
537
+ std::string dr_val(sv);
538
+ // If the $dynamicRef starts with "#" (fragment-only) and we're inside
539
+ // a non-root resource, qualify it with the current resource ID so
540
+ // validation can resolve it correctly.
541
+ if (!dr_val.empty() && dr_val[0] == '#' &&
542
+ !ctx.current_resource_id.empty()) {
543
+ dr_val = ctx.current_resource_id + dr_val;
544
+ }
545
+ node->dynamic_ref = dr_val;
546
+ ctx.has_dynamic_refs = true;
547
+ }
548
+ }
549
+ }
550
+
465
551
  // type
466
552
  dom::element type_el;
467
553
  if (obj["type"].get(type_el) == SUCCESS) {
@@ -516,10 +602,15 @@ static schema_node_ptr compile_node(dom::element el,
516
602
  std::string_view sv;
517
603
  if (str_el.get(sv) == SUCCESS) {
518
604
  node->pattern = std::string(sv);
605
+ #ifdef ATA_NO_RE2
606
+ ctx.compile_error = "pattern keyword requires RE2 support (built with ATA_NO_RE2)";
607
+ return node;
608
+ #else
519
609
  auto re = std::make_shared<re2::RE2>(node->pattern.value());
520
610
  if (re->ok()) {
521
611
  node->compiled_pattern = std::move(re);
522
612
  }
613
+ #endif
523
614
  }
524
615
  }
525
616
 
@@ -636,6 +727,10 @@ static schema_node_ptr compile_node(dom::element el,
636
727
  dom::element pp_el;
637
728
  if (obj["patternProperties"].get(pp_el) == SUCCESS &&
638
729
  pp_el.is<dom::object>()) {
730
+ #ifdef ATA_NO_RE2
731
+ ctx.compile_error = "patternProperties keyword requires RE2 support (built with ATA_NO_RE2)";
732
+ return node;
733
+ #else
639
734
  dom::object pp_obj; pp_el.get(pp_obj);
640
735
  for (auto [key, val] : pp_obj) {
641
736
  schema_node::pattern_prop pp;
@@ -647,6 +742,7 @@ static schema_node_ptr compile_node(dom::element el,
647
742
  }
648
743
  node->pattern_properties.push_back(std::move(pp));
649
744
  }
745
+ #endif
650
746
  }
651
747
 
652
748
  // format
@@ -659,15 +755,6 @@ static schema_node_ptr compile_node(dom::element el,
659
755
  }
660
756
  }
661
757
 
662
- // $id (register in defs for potential resolution)
663
- dom::element id_el;
664
- if (obj["$id"].get(id_el) == SUCCESS) {
665
- std::string_view sv;
666
- if (id_el.get(sv) == SUCCESS) {
667
- ctx.defs[std::string(sv)] = node;
668
- }
669
- }
670
-
671
758
  // enum — pre-minify each value at compile time
672
759
  dom::element enum_el;
673
760
  if (obj["enum"].get(enum_el) == SUCCESS) {
@@ -743,17 +830,199 @@ static schema_node_ptr compile_node(dom::element el,
743
830
  }
744
831
  }
745
832
 
833
+ // Warn about keywords used at the wrong type level.
834
+ // Only check when an explicit "type" is declared (type_mask != 0).
835
+ if (node->type_mask != 0) {
836
+ const uint8_t array_bit = json_type_bit(json_type::array);
837
+ const uint8_t string_bit = json_type_bit(json_type::string);
838
+ const uint8_t number_bits = json_type_bit(json_type::number) |
839
+ json_type_bit(json_type::integer);
840
+ const uint8_t object_bit = json_type_bit(json_type::object);
841
+
842
+ auto warn = [&](const char* keyword, const char* expected_type) {
843
+ ctx.warnings.push_back({
844
+ ctx.compile_path,
845
+ std::string(keyword) + " has no effect on type \"" +
846
+ (node->type_mask & json_type_bit(json_type::string) ? "string" :
847
+ node->type_mask & json_type_bit(json_type::boolean) ? "boolean" :
848
+ node->type_mask & json_type_bit(json_type::number) ? "number" :
849
+ node->type_mask & object_bit ? "object" :
850
+ node->type_mask & array_bit ? "array" : "unknown") +
851
+ "\", only applies to " + expected_type
852
+ });
853
+ };
854
+
855
+ // Array keywords on non-array type
856
+ if (!(node->type_mask & array_bit)) {
857
+ if (node->min_items.has_value()) warn("minItems", "array");
858
+ if (node->max_items.has_value()) warn("maxItems", "array");
859
+ if (node->unique_items) warn("uniqueItems", "array");
860
+ if (!node->prefix_items.empty()) warn("prefixItems", "array");
861
+ if (node->items_schema) warn("items", "array");
862
+ if (node->contains_schema) warn("contains", "array");
863
+ }
864
+
865
+ // String keywords on non-string type
866
+ if (!(node->type_mask & string_bit)) {
867
+ if (node->min_length.has_value()) warn("minLength", "string");
868
+ if (node->max_length.has_value()) warn("maxLength", "string");
869
+ if (node->pattern.has_value()) warn("pattern", "string");
870
+ }
871
+
872
+ // Numeric keywords on non-numeric type
873
+ if (!(node->type_mask & number_bits)) {
874
+ if (node->minimum.has_value()) warn("minimum", "number");
875
+ if (node->maximum.has_value()) warn("maximum", "number");
876
+ if (node->exclusive_minimum.has_value()) warn("exclusiveMinimum", "number");
877
+ if (node->exclusive_maximum.has_value()) warn("exclusiveMaximum", "number");
878
+ if (node->multiple_of.has_value()) warn("multipleOf", "number");
879
+ }
880
+
881
+ // Object keywords on non-object type
882
+ if (!(node->type_mask & object_bit)) {
883
+ if (!node->properties.empty()) warn("properties", "object");
884
+ if (!node->required.empty()) warn("required", "object");
885
+ }
886
+ }
887
+
888
+ ctx.current_resource_id = prev_resource;
746
889
  return node;
747
890
  }
748
891
 
749
892
  // --- Validation ---
750
893
 
894
+ using dynamic_scope_t = std::vector<const std::unordered_map<std::string, schema_node_ptr>*>;
895
+
896
+ // Decode a single JSON Pointer segment (percent-decode, then ~1->/, ~0->~)
897
+ static std::string decode_pointer_segment(const std::string& seg) {
898
+ std::string pct;
899
+ for (size_t i = 0; i < seg.size(); ++i) {
900
+ if (seg[i] == '%' && i + 2 < seg.size()) {
901
+ auto hex = [](char c) -> int {
902
+ if (c >= '0' && c <= '9') return c - '0';
903
+ if (c >= 'a' && c <= 'f') return 10 + c - 'a';
904
+ if (c >= 'A' && c <= 'F') return 10 + c - 'A';
905
+ return -1;
906
+ };
907
+ int hv = hex(seg[i+1]), lv = hex(seg[i+2]);
908
+ if (hv >= 0 && lv >= 0) {
909
+ pct += static_cast<char>(hv * 16 + lv);
910
+ i += 2;
911
+ } else {
912
+ pct += seg[i];
913
+ }
914
+ } else {
915
+ pct += seg[i];
916
+ }
917
+ }
918
+ std::string out;
919
+ for (size_t i = 0; i < pct.size(); ++i) {
920
+ if (pct[i] == '~' && i + 1 < pct.size()) {
921
+ if (pct[i + 1] == '1') { out += '/'; ++i; }
922
+ else if (pct[i + 1] == '0') { out += '~'; ++i; }
923
+ else out += pct[i];
924
+ } else {
925
+ out += pct[i];
926
+ }
927
+ }
928
+ return out;
929
+ }
930
+
931
+ // Walk a JSON Pointer (without leading #) within a given schema node.
932
+ // Returns the resolved node, or nullptr if not found.
933
+ static schema_node_ptr walk_json_pointer(const schema_node_ptr& root_node,
934
+ const std::string& pointer) {
935
+ if (pointer.empty()) return root_node;
936
+
937
+ std::vector<std::string> segments;
938
+ size_t spos = 0;
939
+ // pointer starts with "/" — skip leading slash
940
+ if (!pointer.empty() && pointer[0] == '/') spos = 1;
941
+ while (spos <= pointer.size()) {
942
+ size_t snext = pointer.find('/', spos);
943
+ segments.push_back(decode_pointer_segment(
944
+ pointer.substr(spos, snext == std::string::npos ? snext : snext - spos)));
945
+ spos = (snext == std::string::npos) ? pointer.size() + 1 : snext + 1;
946
+ }
947
+
948
+ schema_node_ptr current = root_node;
949
+ for (size_t si = 0; si < segments.size() && current; ++si) {
950
+ const auto& key = segments[si];
951
+ if (key == "properties" && si + 1 < segments.size()) {
952
+ const auto& prop_name = segments[++si];
953
+ auto pit = current->properties.find(prop_name);
954
+ if (pit != current->properties.end()) { current = pit->second; }
955
+ else { return nullptr; }
956
+ } else if (key == "items" && current->items_schema) {
957
+ current = current->items_schema;
958
+ } else if (key == "$defs" || key == "definitions") {
959
+ if (si + 1 < segments.size()) {
960
+ const auto& def_name = segments[++si];
961
+ auto dit = current->defs.find(def_name);
962
+ if (dit != current->defs.end()) { current = dit->second; }
963
+ else { return nullptr; }
964
+ } else { return nullptr; }
965
+ } else if (key == "allOf" || key == "anyOf" || key == "oneOf") {
966
+ if (si + 1 < segments.size()) {
967
+ size_t idx = std::stoul(segments[++si]);
968
+ auto& vec = (key == "allOf") ? current->all_of
969
+ : (key == "anyOf") ? current->any_of
970
+ : current->one_of;
971
+ if (idx < vec.size()) { current = vec[idx]; }
972
+ else { return nullptr; }
973
+ } else { return nullptr; }
974
+ } else if (key == "not" && current->not_schema) {
975
+ current = current->not_schema;
976
+ } else if (key == "if" && current->if_schema) {
977
+ current = current->if_schema;
978
+ } else if (key == "then" && current->then_schema) {
979
+ current = current->then_schema;
980
+ } else if (key == "else" && current->else_schema) {
981
+ current = current->else_schema;
982
+ } else if (key == "additionalProperties" &&
983
+ current->additional_properties_schema) {
984
+ current = current->additional_properties_schema;
985
+ } else if (key == "prefixItems") {
986
+ if (si + 1 < segments.size()) {
987
+ size_t idx = std::stoul(segments[++si]);
988
+ if (idx < current->prefix_items.size()) { current = current->prefix_items[idx]; }
989
+ else { return nullptr; }
990
+ } else { return nullptr; }
991
+ } else if (key == "contains" && current->contains_schema) {
992
+ current = current->contains_schema;
993
+ } else if (key == "propertyNames" && current->property_names_schema) {
994
+ current = current->property_names_schema;
995
+ } else {
996
+ return nullptr;
997
+ }
998
+ }
999
+ return current;
1000
+ }
1001
+
1002
+ // Find an anchor (non-pointer fragment) within a specific resource node by
1003
+ // searching its sub-tree. Used for resolving "base#anchor" references.
1004
+ static schema_node_ptr find_anchor_in_resource(const compiled_schema& ctx,
1005
+ const std::string& resource_id,
1006
+ const std::string& anchor_name) {
1007
+ // Look up in per-resource dynamic anchors first
1008
+ auto rit = ctx.resource_dynamic_anchors.find(resource_id);
1009
+ if (rit != ctx.resource_dynamic_anchors.end()) {
1010
+ auto ait = rit->second.find(anchor_name);
1011
+ if (ait != rit->second.end()) return ait->second;
1012
+ }
1013
+ // Fallback to flat anchors (which includes $anchor entries)
1014
+ auto ait = ctx.anchors.find(anchor_name);
1015
+ if (ait != ctx.anchors.end()) return ait->second;
1016
+ return nullptr;
1017
+ }
1018
+
751
1019
  static void validate_node(const schema_node_ptr& node,
752
1020
  dom::element value,
753
1021
  const std::string& path,
754
1022
  const compiled_schema& ctx,
755
1023
  std::vector<validation_error>& errors,
756
- bool all_errors = true);
1024
+ bool all_errors = true,
1025
+ dynamic_scope_t* dynamic_scope = nullptr);
757
1026
 
758
1027
  // Fast boolean-only tree walker — no error collection, no string allocation.
759
1028
  // Uses [[likely]]/[[unlikely]] hints. Returns true if valid.
@@ -808,14 +1077,27 @@ static uint64_t utf8_length(std::string_view s) {
808
1077
  return count;
809
1078
  }
810
1079
 
1080
+ // Recursion depth guard — prevents stack overflow on self-referencing schemas
1081
+ struct DepthGuard {
1082
+ static thread_local int depth;
1083
+ bool overflow;
1084
+ DepthGuard() : overflow(++depth > 100) {}
1085
+ ~DepthGuard() { --depth; }
1086
+ };
1087
+ thread_local int DepthGuard::depth = 0;
1088
+
811
1089
  static void validate_node(const schema_node_ptr& node,
812
1090
  dom::element value,
813
1091
  const std::string& path,
814
1092
  const compiled_schema& ctx,
815
1093
  std::vector<validation_error>& errors,
816
- bool all_errors) {
1094
+ bool all_errors,
1095
+ dynamic_scope_t* dynamic_scope) {
817
1096
  if (!node) return;
818
1097
 
1098
+ DepthGuard guard;
1099
+ if (guard.overflow) return;
1100
+
819
1101
  // Boolean schema
820
1102
  if (node->boolean_schema.has_value()) {
821
1103
  if (!node->boolean_schema.value()) {
@@ -825,136 +1107,116 @@ static void validate_node(const schema_node_ptr& node,
825
1107
  return;
826
1108
  }
827
1109
 
1110
+ // Dynamic scope tracking: push this resource's dynamic anchors
1111
+ bool pushed_scope = false;
1112
+ if (dynamic_scope && !node->id.empty()) {
1113
+ auto it = ctx.resource_dynamic_anchors.find(node->id);
1114
+ if (it != ctx.resource_dynamic_anchors.end()) {
1115
+ dynamic_scope->push_back(&it->second);
1116
+ pushed_scope = true;
1117
+ }
1118
+ }
1119
+
828
1120
  // $ref — Draft 2020-12: $ref is not a short-circuit, sibling keywords still apply
829
1121
  bool ref_resolved = false;
830
1122
  if (!node->ref.empty()) {
831
- // First check defs map
832
- auto it = ctx.defs.find(node->ref);
833
- if (it != ctx.defs.end()) {
834
- validate_node(it->second, value, path, ctx, errors, all_errors);
1123
+ // Self-reference: "#"
1124
+ if (node->ref == "#" && ctx.root) {
1125
+ validate_node(ctx.root, value, path, ctx, errors, all_errors, dynamic_scope);
835
1126
  ref_resolved = true;
836
1127
  }
837
- // Try JSON Pointer resolution from root (e.g., "#/properties/foo")
838
- if (node->ref.size() > 1 && node->ref[0] == '#' &&
839
- node->ref[1] == '/') {
840
- // Decode JSON Pointer segments
841
- auto decode_pointer_segment = [](const std::string& seg) -> std::string {
842
- // Percent-decode first
843
- std::string pct;
844
- for (size_t i = 0; i < seg.size(); ++i) {
845
- if (seg[i] == '%' && i + 2 < seg.size()) {
846
- char h = seg[i+1], l = seg[i+2];
847
- auto hex = [](char c) -> int {
848
- if (c >= '0' && c <= '9') return c - '0';
849
- if (c >= 'a' && c <= 'f') return 10 + c - 'a';
850
- if (c >= 'A' && c <= 'F') return 10 + c - 'A';
851
- return -1;
852
- };
853
- int hv = hex(h), lv = hex(l);
854
- if (hv >= 0 && lv >= 0) {
855
- pct += static_cast<char>(hv * 16 + lv);
856
- i += 2;
857
- } else {
858
- pct += seg[i];
859
- }
860
- } else {
861
- pct += seg[i];
862
- }
863
- }
864
- // Then JSON Pointer unescape: ~1 -> /, ~0 -> ~
865
- std::string out;
866
- for (size_t i = 0; i < pct.size(); ++i) {
867
- if (pct[i] == '~' && i + 1 < pct.size()) {
868
- if (pct[i + 1] == '1') { out += '/'; ++i; }
869
- else if (pct[i + 1] == '0') { out += '~'; ++i; }
870
- else out += pct[i];
871
- } else {
872
- out += pct[i];
1128
+ // Check for "base#fragment" pattern (e.g. "first#/$defs/stuff", "tree.json")
1129
+ if (!ref_resolved) {
1130
+ std::string base_uri;
1131
+ std::string fragment;
1132
+ size_t hash_pos = node->ref.find('#');
1133
+ if (hash_pos != std::string::npos) {
1134
+ base_uri = node->ref.substr(0, hash_pos);
1135
+ fragment = node->ref.substr(hash_pos + 1);
1136
+ } else {
1137
+ base_uri = node->ref;
1138
+ }
1139
+
1140
+ // Helper: push base resource's dynamic anchors to scope, validate, pop
1141
+ auto validate_with_resource_scope = [&](const schema_node_ptr& target,
1142
+ const std::string& resource_id) {
1143
+ bool scope_pushed = false;
1144
+ if (dynamic_scope && !resource_id.empty()) {
1145
+ auto rit = ctx.resource_dynamic_anchors.find(resource_id);
1146
+ if (rit != ctx.resource_dynamic_anchors.end()) {
1147
+ dynamic_scope->push_back(&rit->second);
1148
+ scope_pushed = true;
873
1149
  }
874
1150
  }
875
- return out;
1151
+ validate_node(target, value, path, ctx, errors, all_errors, dynamic_scope);
1152
+ if (scope_pushed) dynamic_scope->pop_back();
876
1153
  };
877
1154
 
878
- // Split pointer into segments
879
- std::string pointer = node->ref.substr(2);
880
- std::vector<std::string> segments;
881
- size_t spos = 0;
882
- while (spos < pointer.size()) {
883
- size_t snext = pointer.find('/', spos);
884
- segments.push_back(decode_pointer_segment(
885
- pointer.substr(spos, snext == std::string::npos ? snext : snext - spos)));
886
- spos = (snext == std::string::npos) ? pointer.size() : snext + 1;
887
- }
888
-
889
- // Walk the schema tree
890
- schema_node_ptr current = ctx.root;
891
- bool resolved = true;
892
- for (size_t si = 0; si < segments.size() && current; ++si) {
893
- const auto& key = segments[si];
894
-
895
- if (key == "properties" && si + 1 < segments.size()) {
896
- const auto& prop_name = segments[++si];
897
- auto pit = current->properties.find(prop_name);
898
- if (pit != current->properties.end()) {
899
- current = pit->second;
900
- } else { resolved = false; break; }
901
- } else if (key == "items" && current->items_schema) {
902
- current = current->items_schema;
903
- } else if (key == "$defs" || key == "definitions") {
904
- if (si + 1 < segments.size()) {
905
- const auto& def_name = segments[++si];
906
- // Navigate into node's defs map
907
- auto dit = current->defs.find(def_name);
908
- if (dit != current->defs.end()) {
909
- current = dit->second;
1155
+ if (!base_uri.empty()) {
1156
+ // Resolve base URI to a resource via defs
1157
+ auto it = ctx.defs.find(base_uri);
1158
+ if (it != ctx.defs.end()) {
1159
+ schema_node_ptr target = it->second;
1160
+ if (!fragment.empty()) {
1161
+ if (fragment[0] == '/') {
1162
+ // JSON Pointer within the resource
1163
+ auto resolved = walk_json_pointer(target, fragment);
1164
+ if (resolved) {
1165
+ validate_with_resource_scope(resolved, base_uri);
1166
+ ref_resolved = true;
1167
+ }
910
1168
  } else {
911
- // Fallback: try ctx.defs with full path
912
- std::string full_ref = "#/" + key + "/" + def_name;
913
- auto cit = ctx.defs.find(full_ref);
914
- if (cit != ctx.defs.end()) {
915
- current = cit->second;
916
- } else { resolved = false; break; }
1169
+ // Anchor lookup within the resource
1170
+ auto resolved = find_anchor_in_resource(ctx, base_uri, fragment);
1171
+ if (resolved) {
1172
+ validate_with_resource_scope(resolved, base_uri);
1173
+ ref_resolved = true;
1174
+ }
917
1175
  }
918
- } else { resolved = false; break; }
919
- } else if (key == "allOf" || key == "anyOf" || key == "oneOf") {
920
- if (si + 1 < segments.size()) {
921
- size_t idx = std::stoul(segments[++si]);
922
- auto& vec = (key == "allOf") ? current->all_of
923
- : (key == "anyOf") ? current->any_of
924
- : current->one_of;
925
- if (idx < vec.size()) { current = vec[idx]; }
926
- else { resolved = false; break; }
927
- } else { resolved = false; break; }
928
- } else if (key == "not" && current->not_schema) {
929
- current = current->not_schema;
930
- } else if (key == "if" && current->if_schema) {
931
- current = current->if_schema;
932
- } else if (key == "then" && current->then_schema) {
933
- current = current->then_schema;
934
- } else if (key == "else" && current->else_schema) {
935
- current = current->else_schema;
936
- } else if (key == "additionalProperties" &&
937
- current->additional_properties_schema) {
938
- current = current->additional_properties_schema;
939
- } else if (key == "prefixItems") {
940
- if (si + 1 < segments.size()) {
941
- size_t idx = std::stoul(segments[++si]);
942
- if (idx < current->prefix_items.size()) { current = current->prefix_items[idx]; }
943
- else { resolved = false; break; }
944
- } else { resolved = false; break; }
1176
+ } else {
1177
+ // No fragment, just the base resource (it pushes its own scope)
1178
+ validate_node(target, value, path, ctx, errors, all_errors, dynamic_scope);
1179
+ ref_resolved = true;
1180
+ }
1181
+ }
1182
+ } else if (!fragment.empty()) {
1183
+ // "#fragment" no base URI
1184
+ if (fragment[0] == '/') {
1185
+ // JSON Pointer from root
1186
+ auto resolved = walk_json_pointer(ctx.root, fragment);
1187
+ if (resolved) {
1188
+ validate_node(resolved, value, path, ctx, errors, all_errors, dynamic_scope);
1189
+ ref_resolved = true;
1190
+ }
945
1191
  } else {
946
- resolved = false; break;
1192
+ // Anchor lookup
1193
+ auto ait = ctx.anchors.find(fragment);
1194
+ if (ait != ctx.anchors.end()) {
1195
+ validate_node(ait->second, value, path, ctx, errors, all_errors, dynamic_scope);
1196
+ ref_resolved = true;
1197
+ }
947
1198
  }
948
1199
  }
949
- if (resolved && current) {
950
- validate_node(current, value, path, ctx, errors, all_errors);
1200
+ }
1201
+ // Fallback: try defs map directly (handles bare $id references like "list")
1202
+ if (!ref_resolved) {
1203
+ auto it = ctx.defs.find(node->ref);
1204
+ if (it != ctx.defs.end()) {
1205
+ validate_node(it->second, value, path, ctx, errors, all_errors, dynamic_scope);
951
1206
  ref_resolved = true;
952
1207
  }
953
1208
  }
954
- // Self-reference: "#"
955
- if (!ref_resolved && node->ref == "#" && ctx.root) {
956
- validate_node(ctx.root, value, path, ctx, errors, all_errors);
957
- ref_resolved = true;
1209
+ // Fallback: relative URI resolution — match ref against defs keys by suffix
1210
+ if (!ref_resolved && !node->ref.empty() && node->ref[0] != '#') {
1211
+ std::string suffix = "/" + node->ref;
1212
+ for (const auto& [key, def_node] : ctx.defs) {
1213
+ if (key.size() >= suffix.size() &&
1214
+ key.compare(key.size() - suffix.size(), suffix.size(), suffix) == 0) {
1215
+ validate_node(def_node, value, path, ctx, errors, all_errors, dynamic_scope);
1216
+ ref_resolved = true;
1217
+ break;
1218
+ }
1219
+ }
958
1220
  }
959
1221
  if (!ref_resolved) {
960
1222
  errors.push_back({error_code::ref_not_found, path,
@@ -962,6 +1224,132 @@ static void validate_node(const schema_node_ptr& node,
962
1224
  }
963
1225
  }
964
1226
 
1227
+ // $dynamicRef — Draft 2020-12 dynamic scope resolution
1228
+ if (!node->dynamic_ref.empty()) {
1229
+ bool dref_resolved = false;
1230
+
1231
+ // Parse the $dynamicRef value into base URI and fragment
1232
+ std::string dr_base;
1233
+ std::string dr_fragment;
1234
+ {
1235
+ size_t hash_pos = node->dynamic_ref.find('#');
1236
+ if (hash_pos != std::string::npos) {
1237
+ dr_base = node->dynamic_ref.substr(0, hash_pos);
1238
+ dr_fragment = node->dynamic_ref.substr(hash_pos + 1);
1239
+ } else {
1240
+ dr_base = node->dynamic_ref;
1241
+ }
1242
+ }
1243
+
1244
+ // Helper: push base resource's dynamic anchors to scope temporarily
1245
+ auto push_resource_scope = [&](const std::string& resource_id) -> bool {
1246
+ if (dynamic_scope && !resource_id.empty()) {
1247
+ auto rit = ctx.resource_dynamic_anchors.find(resource_id);
1248
+ if (rit != ctx.resource_dynamic_anchors.end()) {
1249
+ dynamic_scope->push_back(&rit->second);
1250
+ return true;
1251
+ }
1252
+ }
1253
+ return false;
1254
+ };
1255
+
1256
+ // If fragment is a JSON pointer (starts with /), resolve like $ref
1257
+ if (!dr_fragment.empty() && dr_fragment[0] == '/') {
1258
+ schema_node_ptr base_node = dr_base.empty() ? ctx.root : nullptr;
1259
+ if (!dr_base.empty()) {
1260
+ auto it = ctx.defs.find(dr_base);
1261
+ if (it != ctx.defs.end()) base_node = it->second;
1262
+ }
1263
+ if (base_node) {
1264
+ auto resolved = walk_json_pointer(base_node, dr_fragment);
1265
+ if (resolved) {
1266
+ bool dr_scope_pushed = push_resource_scope(dr_base);
1267
+ validate_node(resolved, value, path, ctx, errors, all_errors, dynamic_scope);
1268
+ if (dr_scope_pushed) dynamic_scope->pop_back();
1269
+ dref_resolved = true;
1270
+ }
1271
+ }
1272
+ }
1273
+
1274
+ // If fragment is an anchor name (not a JSON pointer)
1275
+ if (!dref_resolved && !dr_fragment.empty() && dr_fragment[0] != '/') {
1276
+ std::string anchor_name = dr_fragment;
1277
+
1278
+ // Initial resolution: find the anchor
1279
+ schema_node_ptr target = nullptr;
1280
+
1281
+ if (!dr_base.empty()) {
1282
+ // Resolve base URI first, then find anchor in that resource
1283
+ auto it = ctx.defs.find(dr_base);
1284
+ if (it != ctx.defs.end()) {
1285
+ target = find_anchor_in_resource(ctx, dr_base, anchor_name);
1286
+ }
1287
+ } else {
1288
+ // No base URI — look up in flat anchors map
1289
+ auto ait = ctx.anchors.find(anchor_name);
1290
+ if (ait != ctx.anchors.end()) {
1291
+ target = ait->second;
1292
+ }
1293
+ }
1294
+
1295
+ if (target) {
1296
+ // Check if the initially resolved target is itself a $dynamicAnchor
1297
+ // (the "bookend" requirement). Only do dynamic scope walk if the
1298
+ // initial target's resource has a $dynamicAnchor with this name.
1299
+ bool is_dynamic_at_initial = false;
1300
+ if (!dr_base.empty()) {
1301
+ // We resolved via a specific base URI
1302
+ auto rit = ctx.resource_dynamic_anchors.find(dr_base);
1303
+ if (rit != ctx.resource_dynamic_anchors.end() &&
1304
+ rit->second.count(anchor_name)) {
1305
+ is_dynamic_at_initial = true;
1306
+ }
1307
+ } else {
1308
+ // No base URI — check if ANY resource has this as $dynamicAnchor
1309
+ // and the target matches (i.e., the initially resolved node IS a
1310
+ // $dynamicAnchor node)
1311
+ for (const auto& [rid, rmap] : ctx.resource_dynamic_anchors) {
1312
+ auto ait2 = rmap.find(anchor_name);
1313
+ if (ait2 != rmap.end() && ait2->second == target) {
1314
+ is_dynamic_at_initial = true;
1315
+ break;
1316
+ }
1317
+ }
1318
+ }
1319
+
1320
+ // Dynamic scope walk: find first override in dynamic scope
1321
+ if (is_dynamic_at_initial && dynamic_scope) {
1322
+ for (size_t i = 0; i < dynamic_scope->size(); ++i) {
1323
+ auto dit = (*dynamic_scope)[i]->find(anchor_name);
1324
+ if (dit != (*dynamic_scope)[i]->end()) {
1325
+ target = dit->second;
1326
+ break;
1327
+ }
1328
+ }
1329
+ }
1330
+
1331
+ bool dr_scope_pushed = push_resource_scope(dr_base);
1332
+ validate_node(target, value, path, ctx, errors, all_errors, dynamic_scope);
1333
+ if (dr_scope_pushed) dynamic_scope->pop_back();
1334
+ dref_resolved = true;
1335
+ }
1336
+ }
1337
+
1338
+ // Bare $dynamicRef without fragment (unusual, but handle it)
1339
+ if (!dref_resolved && dr_fragment.empty() && !dr_base.empty()) {
1340
+ auto it = ctx.defs.find(dr_base);
1341
+ if (it != ctx.defs.end()) {
1342
+ validate_node(it->second, value, path, ctx, errors, all_errors, dynamic_scope);
1343
+ dref_resolved = true;
1344
+ }
1345
+ }
1346
+
1347
+ if (!dref_resolved) {
1348
+ errors.push_back({error_code::ref_not_found, path,
1349
+ "cannot resolve $dynamicRef: " + node->dynamic_ref});
1350
+ }
1351
+ }
1352
+
965
1353
  // type
966
1354
  if (node->type_mask) {
967
1355
  if (!type_matches_mask(value, node->type_mask)) {
@@ -1061,6 +1449,7 @@ static void validate_node(const schema_node_ptr& node,
1061
1449
  " > maxLength " +
1062
1450
  std::to_string(node->max_length.value())});
1063
1451
  }
1452
+ #ifndef ATA_NO_RE2
1064
1453
  if (node->compiled_pattern) {
1065
1454
  if (!re2::RE2::PartialMatch(re2::StringPiece(sv.data(), sv.size()), *node->compiled_pattern)) {
1066
1455
  errors.push_back({error_code::pattern_mismatch, path,
@@ -1068,6 +1457,7 @@ static void validate_node(const schema_node_ptr& node,
1068
1457
  node->pattern.value()});
1069
1458
  }
1070
1459
  }
1460
+ #endif
1071
1461
 
1072
1462
  if (node->format.has_value()) {
1073
1463
  if (!check_format_by_id(sv, node->format_id)) {
@@ -1139,10 +1529,10 @@ static void validate_node(const schema_node_ptr& node,
1139
1529
  for (auto item : arr) {
1140
1530
  if (idx < node->prefix_items.size()) {
1141
1531
  validate_node(node->prefix_items[idx], item,
1142
- path + "/" + std::to_string(idx), ctx, errors, all_errors);
1532
+ path + "/" + std::to_string(idx), ctx, errors, all_errors, dynamic_scope);
1143
1533
  } else if (node->items_schema) {
1144
1534
  validate_node(node->items_schema, item,
1145
- path + "/" + std::to_string(idx), ctx, errors, all_errors);
1535
+ path + "/" + std::to_string(idx), ctx, errors, all_errors, dynamic_scope);
1146
1536
  }
1147
1537
  ++idx;
1148
1538
  }
@@ -1209,16 +1599,18 @@ static void validate_node(const schema_node_ptr& node,
1209
1599
  // Check properties
1210
1600
  auto it = node->properties.find(key_str);
1211
1601
  if (it != node->properties.end()) {
1212
- validate_node(it->second, val, path + "/" + key_str, ctx, errors, all_errors);
1602
+ validate_node(it->second, val, path + "/" + key_str, ctx, errors, all_errors, dynamic_scope);
1213
1603
  matched = true;
1214
1604
  }
1215
1605
 
1216
1606
  // Check patternProperties (use cached compiled regex)
1217
1607
  for (const auto& pp : node->pattern_properties) {
1608
+ #ifndef ATA_NO_RE2
1218
1609
  if (pp.compiled && re2::RE2::PartialMatch(key_str, *pp.compiled)) {
1219
- validate_node(pp.schema, val, path + "/" + key_str, ctx, errors, all_errors);
1610
+ validate_node(pp.schema, val, path + "/" + key_str, ctx, errors, all_errors, dynamic_scope);
1220
1611
  matched = true;
1221
1612
  }
1613
+ #endif
1222
1614
  }
1223
1615
 
1224
1616
  // additionalProperties (only if not matched by properties or patternProperties)
@@ -1230,7 +1622,7 @@ static void validate_node(const schema_node_ptr& node,
1230
1622
  "additional property not allowed: " + key_str});
1231
1623
  } else if (node->additional_properties_schema) {
1232
1624
  validate_node(node->additional_properties_schema, val,
1233
- path + "/" + key_str, ctx, errors);
1625
+ path + "/" + key_str, ctx, errors, all_errors, dynamic_scope);
1234
1626
  }
1235
1627
  }
1236
1628
  }
@@ -1259,12 +1651,14 @@ static void validate_node(const schema_node_ptr& node,
1259
1651
  errors.push_back({error_code::max_length_violation, path,
1260
1652
  "propertyNames: key too long: " + std::string(key_sv)});
1261
1653
  }
1654
+ #ifndef ATA_NO_RE2
1262
1655
  if (pn->compiled_pattern) {
1263
1656
  if (!re2::RE2::PartialMatch(re2::StringPiece(key_sv.data(), key_sv.size()), *pn->compiled_pattern)) {
1264
1657
  errors.push_back({error_code::pattern_mismatch, path,
1265
1658
  "propertyNames: key does not match pattern: " + std::string(key_sv)});
1266
1659
  }
1267
1660
  }
1661
+ #endif
1268
1662
  if (pn->format.has_value() && !check_format_by_id(key_sv, pn->format_id)) {
1269
1663
  errors.push_back({error_code::format_mismatch, path,
1270
1664
  "propertyNames: key does not match format: " + std::string(key_sv)});
@@ -1276,7 +1670,7 @@ static void validate_node(const schema_node_ptr& node,
1276
1670
  std::string key_json = "\"" + std::string(key) + "\"";
1277
1671
  auto key_result = tl_dom_key_parser().parse(key_json);
1278
1672
  if (!key_result.error()) {
1279
- validate_node(pn, key_result.value(), path, ctx, errors, all_errors);
1673
+ validate_node(pn, key_result.value_unsafe(), path, ctx, errors, all_errors, dynamic_scope);
1280
1674
  }
1281
1675
  }
1282
1676
  }
@@ -1301,7 +1695,7 @@ static void validate_node(const schema_node_ptr& node,
1301
1695
  for (const auto& [prop, schema] : node->dependent_schemas) {
1302
1696
  dom::element dummy;
1303
1697
  if (obj[prop].get(dummy) == SUCCESS) {
1304
- validate_node(schema, value, path, ctx, errors, all_errors);
1698
+ validate_node(schema, value, path, ctx, errors, all_errors, dynamic_scope);
1305
1699
  }
1306
1700
  }
1307
1701
  }
@@ -1310,7 +1704,7 @@ static void validate_node(const schema_node_ptr& node,
1310
1704
  if (!node->all_of.empty()) {
1311
1705
  for (const auto& sub : node->all_of) {
1312
1706
  std::vector<validation_error> sub_errors;
1313
- validate_node(sub, value, path, ctx, sub_errors, all_errors);
1707
+ validate_node(sub, value, path, ctx, sub_errors, all_errors, dynamic_scope);
1314
1708
  if (!sub_errors.empty()) {
1315
1709
  errors.push_back({error_code::all_of_failed, path,
1316
1710
  "allOf subschema failed"});
@@ -1324,7 +1718,7 @@ static void validate_node(const schema_node_ptr& node,
1324
1718
  bool any_valid = false;
1325
1719
  for (const auto& sub : node->any_of) {
1326
1720
  std::vector<validation_error> sub_errors;
1327
- validate_node(sub, value, path, ctx, sub_errors, all_errors);
1721
+ validate_node(sub, value, path, ctx, sub_errors, all_errors, dynamic_scope);
1328
1722
  if (sub_errors.empty()) {
1329
1723
  any_valid = true;
1330
1724
  break;
@@ -1341,7 +1735,7 @@ static void validate_node(const schema_node_ptr& node,
1341
1735
  int match_count = 0;
1342
1736
  for (const auto& sub : node->one_of) {
1343
1737
  std::vector<validation_error> sub_errors;
1344
- validate_node(sub, value, path, ctx, sub_errors, all_errors);
1738
+ validate_node(sub, value, path, ctx, sub_errors, all_errors, dynamic_scope);
1345
1739
  if (sub_errors.empty()) ++match_count;
1346
1740
  }
1347
1741
  if (match_count != 1) {
@@ -1354,7 +1748,7 @@ static void validate_node(const schema_node_ptr& node,
1354
1748
  // not
1355
1749
  if (node->not_schema) {
1356
1750
  std::vector<validation_error> sub_errors;
1357
- validate_node(node->not_schema, value, path, ctx, sub_errors, all_errors);
1751
+ validate_node(node->not_schema, value, path, ctx, sub_errors, all_errors, dynamic_scope);
1358
1752
  if (sub_errors.empty()) {
1359
1753
  errors.push_back({error_code::not_failed, path,
1360
1754
  "value should not match 'not' schema"});
@@ -1364,19 +1758,21 @@ static void validate_node(const schema_node_ptr& node,
1364
1758
  // if/then/else
1365
1759
  if (node->if_schema) {
1366
1760
  std::vector<validation_error> if_errors;
1367
- validate_node(node->if_schema, value, path, ctx, if_errors, all_errors);
1761
+ validate_node(node->if_schema, value, path, ctx, if_errors, all_errors, dynamic_scope);
1368
1762
  if (if_errors.empty()) {
1369
1763
  // if passed → validate then
1370
1764
  if (node->then_schema) {
1371
- validate_node(node->then_schema, value, path, ctx, errors, all_errors);
1765
+ validate_node(node->then_schema, value, path, ctx, errors, all_errors, dynamic_scope);
1372
1766
  }
1373
1767
  } else {
1374
1768
  // if failed → validate else
1375
1769
  if (node->else_schema) {
1376
- validate_node(node->else_schema, value, path, ctx, errors, all_errors);
1770
+ validate_node(node->else_schema, value, path, ctx, errors, all_errors, dynamic_scope);
1377
1771
  }
1378
1772
  }
1379
1773
  }
1774
+
1775
+ if (pushed_scope) dynamic_scope->pop_back();
1380
1776
  }
1381
1777
 
1382
1778
  // Fast boolean-only tree walker — stripped of all error collection.
@@ -1387,14 +1783,27 @@ static bool validate_fast(const schema_node_ptr& node,
1387
1783
  const compiled_schema& ctx) {
1388
1784
  if (!node) [[unlikely]] return true;
1389
1785
 
1786
+ DepthGuard guard;
1787
+ if (guard.overflow) [[unlikely]] return true;
1788
+
1390
1789
  if (node->boolean_schema.has_value()) [[unlikely]]
1391
1790
  return node->boolean_schema.value();
1392
1791
 
1792
+ // $dynamicRef — bail to tree walker
1793
+ if (!node->dynamic_ref.empty()) [[unlikely]] return false;
1794
+
1393
1795
  // $ref
1394
1796
  if (!node->ref.empty()) [[unlikely]] {
1395
1797
  auto it = ctx.defs.find(node->ref);
1396
1798
  if (it != ctx.defs.end()) {
1397
1799
  if (!validate_fast(it->second, value, ctx)) return false;
1800
+ } else if (node->ref.size() > 1 && node->ref[0] == '#' && node->ref[1] != '/') {
1801
+ auto ait = ctx.anchors.find(node->ref.substr(1));
1802
+ if (ait != ctx.anchors.end()) {
1803
+ if (!validate_fast(ait->second, value, ctx)) return false;
1804
+ } else {
1805
+ return false;
1806
+ }
1398
1807
  } else if (node->ref == "#" && ctx.root) {
1399
1808
  if (!validate_fast(ctx.root, value, ctx)) return false;
1400
1809
  } else {
@@ -1444,10 +1853,12 @@ static bool validate_fast(const schema_node_ptr& node,
1444
1853
  uint64_t len = utf8_length(sv);
1445
1854
  if (node->min_length.has_value() && len < node->min_length.value()) return false;
1446
1855
  if (node->max_length.has_value() && len > node->max_length.value()) return false;
1856
+ #ifndef ATA_NO_RE2
1447
1857
  if (node->compiled_pattern) {
1448
1858
  if (!re2::RE2::PartialMatch(re2::StringPiece(sv.data(), sv.size()), *node->compiled_pattern))
1449
1859
  return false;
1450
1860
  }
1861
+ #endif
1451
1862
  if (node->format.has_value() && !check_format_by_id(sv, node->format_id)) return false;
1452
1863
  }
1453
1864
 
@@ -1532,11 +1943,13 @@ static bool validate_fast(const schema_node_ptr& node,
1532
1943
  }
1533
1944
 
1534
1945
  for (const auto& pp : node->pattern_properties) {
1946
+ #ifndef ATA_NO_RE2
1535
1947
  if (pp.compiled && re2::RE2::PartialMatch(
1536
1948
  re2::StringPiece(key_sv.data(), key_sv.size()), *pp.compiled)) {
1537
1949
  if (!validate_fast(pp.schema, val, ctx)) return false;
1538
1950
  matched = true;
1539
1951
  }
1952
+ #endif
1540
1953
  }
1541
1954
 
1542
1955
  if (!matched) {
@@ -1615,8 +2028,9 @@ static void cg_compile(const schema_node* n, cg::plan& p,
1615
2028
  return;
1616
2029
  }
1617
2030
  // Composition fallback
1618
- if (!n->ref.empty() || !n->all_of.empty() || !n->any_of.empty() ||
1619
- !n->one_of.empty() || n->not_schema || n->if_schema) {
2031
+ if (!n->ref.empty() || !n->dynamic_ref.empty() || !n->all_of.empty() ||
2032
+ !n->any_of.empty() || !n->one_of.empty() || n->not_schema ||
2033
+ n->if_schema) {
1620
2034
  uintptr_t ptr = reinterpret_cast<uintptr_t>(n);
1621
2035
  out.push_back({cg::op::COMPOSITION, (uint32_t)(ptr & 0xFFFFFFFF),
1622
2036
  (uint32_t)((ptr >> 32) & 0xFFFFFFFF)});
@@ -1670,7 +2084,9 @@ static void cg_compile(const schema_node* n, cg::plan& p,
1670
2084
  // String
1671
2085
  if (n->min_length.has_value()) out.push_back({cg::op::CHECK_MIN_LENGTH,(uint32_t)*n->min_length});
1672
2086
  if (n->max_length.has_value()) out.push_back({cg::op::CHECK_MAX_LENGTH,(uint32_t)*n->max_length});
2087
+ #ifndef ATA_NO_RE2
1673
2088
  if (n->compiled_pattern) { uint32_t i=(uint32_t)p.regexes.size(); p.regexes.push_back(n->compiled_pattern); out.push_back({cg::op::CHECK_PATTERN,i}); }
2089
+ #endif
1674
2090
  if (n->format.has_value()) {
1675
2091
  uint32_t i=(uint32_t)p.format_ids.size();
1676
2092
  p.format_ids.push_back(n->format_id);
@@ -1744,7 +2160,11 @@ static bool cg_exec(const cg::plan& p, const std::vector<cg::ins>& code,
1744
2160
  case cg::op::CHECK_MULTIPLE_OF: if(t_numeric){double d=p.doubles[c.a],r=std::fmod(t_dval,d);if(std::abs(r)>1e-8&&std::abs(r-d)>1e-8)return false;} break;
1745
2161
  case cg::op::CHECK_MIN_LENGTH: if(t==et::STRING){std::string_view sv;value.get(sv);if(utf8_length(sv)<c.a)return false;} break;
1746
2162
  case cg::op::CHECK_MAX_LENGTH: if(t==et::STRING){std::string_view sv;value.get(sv);if(utf8_length(sv)>c.a)return false;} break;
2163
+ #ifndef ATA_NO_RE2
1747
2164
  case cg::op::CHECK_PATTERN: if(t==et::STRING){std::string_view sv;value.get(sv);if(!re2::RE2::PartialMatch(re2::StringPiece(sv.data(),sv.size()),*p.regexes[c.a]))return false;} break;
2165
+ #else
2166
+ case cg::op::CHECK_PATTERN: break;
2167
+ #endif
1748
2168
  case cg::op::CHECK_FORMAT: if(t==et::STRING){std::string_view sv;value.get(sv);if(!check_format_by_id(sv,p.format_ids[c.a]))return false;} break;
1749
2169
  case cg::op::CHECK_MIN_ITEMS: if(t==et::ARRAY){dom::array a;value.get(a);uint64_t s=0;for([[maybe_unused]]auto _:a)++s;if(s<c.a)return false;} break;
1750
2170
  case cg::op::CHECK_MAX_ITEMS: if(t==et::ARRAY){dom::array a;value.get(a);uint64_t s=0;for([[maybe_unused]]auto _:a)++s;if(s>c.a)return false;} break;
@@ -1796,7 +2216,9 @@ static bool cg_exec(const cg::plan& p, const std::vector<cg::ins>& code,
1796
2216
  // Returns: true = valid, false = invalid OR unsupported (fallback to DOM).
1797
2217
 
1798
2218
  static json_type od_type(simdjson::ondemand::value& v) {
1799
- switch (v.type()) {
2219
+ simdjson::ondemand::json_type jt;
2220
+ if (v.type().get(jt)) return json_type::null_value;
2221
+ switch (jt) {
1800
2222
  case simdjson::ondemand::json_type::object: return json_type::object;
1801
2223
  case simdjson::ondemand::json_type::array: return json_type::array;
1802
2224
  case simdjson::ondemand::json_type::string: return json_type::string;
@@ -1854,7 +2276,11 @@ static bool od_exec(const cg::plan& p, const std::vector<cg::ins>& code,
1854
2276
  }
1855
2277
  case cg::op::CHECK_MIN_LENGTH: if(t==json_type::string){std::string_view sv; if(value.get(sv)!=SUCCESS) return false; if(utf8_length(sv)<c.a) return false;} break;
1856
2278
  case cg::op::CHECK_MAX_LENGTH: if(t==json_type::string){std::string_view sv; if(value.get(sv)!=SUCCESS) return false; if(utf8_length(sv)>c.a) return false;} break;
2279
+ #ifndef ATA_NO_RE2
1857
2280
  case cg::op::CHECK_PATTERN: if(t==json_type::string){std::string_view sv; if(value.get(sv)!=SUCCESS) return false; if(!re2::RE2::PartialMatch(re2::StringPiece(sv.data(),sv.size()),*p.regexes[c.a]))return false;} break;
2281
+ #else
2282
+ case cg::op::CHECK_PATTERN: break;
2283
+ #endif
1858
2284
  case cg::op::CHECK_FORMAT: if(t==json_type::string){std::string_view sv; if(value.get(sv)!=SUCCESS) return false; if(!check_format_by_id(sv,p.format_ids[c.a]))return false;} break;
1859
2285
  case cg::op::CHECK_MIN_ITEMS: if(t==json_type::array){
1860
2286
  simdjson::ondemand::array a; if(value.get(a)!=SUCCESS) return false;
@@ -1895,7 +2321,8 @@ static bool od_exec(const cg::plan& p, const std::vector<cg::ins>& code,
1895
2321
  }
1896
2322
  for(auto field:o){
1897
2323
  simdjson::ondemand::raw_json_string rk; if(field.key().get(rk)!=SUCCESS) return false;
1898
- std::string_view key = field.unescaped_key();
2324
+ std::string_view key;
2325
+ if (field.unescaped_key().get(key)) continue;
1899
2326
  bool matched=false;
1900
2327
  for(auto& pp:props){
1901
2328
  if(key==pp.nm){
@@ -2012,7 +2439,9 @@ static od_plan_ptr compile_od_plan(const schema_node_ptr& node) {
2012
2439
  if (node->multiple_of) { plan->num_flags |= od_plan::HAS_MUL; plan->num_mul = *node->multiple_of; }
2013
2440
  plan->min_length = node->min_length;
2014
2441
  plan->max_length = node->max_length;
2442
+ #ifndef ATA_NO_RE2
2015
2443
  plan->pattern = node->compiled_pattern.get();
2444
+ #endif
2016
2445
  plan->format_id = node->format_id;
2017
2446
 
2018
2447
  // Object plan — build hash lookup for O(1) per-field dispatch
@@ -2151,10 +2580,12 @@ static bool od_exec_plan(const od_plan& plan, simdjson::ondemand::value value) {
2151
2580
  if (plan.min_length && len < *plan.min_length) return false;
2152
2581
  if (plan.max_length && len > *plan.max_length) return false;
2153
2582
  }
2583
+ #ifndef ATA_NO_RE2
2154
2584
  if (plan.pattern) {
2155
2585
  if (!re2::RE2::PartialMatch(re2::StringPiece(sv.data(), sv.size()), *plan.pattern))
2156
2586
  return false;
2157
2587
  }
2588
+ #endif
2158
2589
  if (plan.format_id != 255) {
2159
2590
  if (!check_format_by_id(sv, plan.format_id)) return false;
2160
2591
  }
@@ -2170,7 +2601,8 @@ static bool od_exec_plan(const od_plan& plan, simdjson::ondemand::value value) {
2170
2601
  uint64_t prop_count = 0;
2171
2602
 
2172
2603
  for (auto field : obj) {
2173
- std::string_view key = field.unescaped_key();
2604
+ std::string_view key;
2605
+ if (field.unescaped_key().get(key)) continue;
2174
2606
  prop_count++;
2175
2607
 
2176
2608
  // Single merged scan: required + property in one pass
@@ -2231,10 +2663,14 @@ schema_ref compile(std::string_view schema_json) {
2231
2663
  if (result.error()) {
2232
2664
  return schema_ref{nullptr};
2233
2665
  }
2234
- doc = result.value();
2666
+ doc = result.value_unsafe();
2235
2667
 
2236
2668
  ctx->root = compile_node(doc, *ctx);
2237
2669
 
2670
+ if (!ctx->compile_error.empty()) {
2671
+ return schema_ref{nullptr};
2672
+ }
2673
+
2238
2674
  // Generate codegen plan
2239
2675
  cg_compile(ctx->root.get(), ctx->gen_plan, ctx->gen_plan.code);
2240
2676
  ctx->gen_plan.code.push_back({cg::op::END});
@@ -2243,6 +2679,7 @@ schema_ref compile(std::string_view schema_json) {
2243
2679
 
2244
2680
  schema_ref ref;
2245
2681
  ref.impl = ctx;
2682
+ ref.warnings = std::move(ctx->warnings);
2246
2683
  return ref;
2247
2684
  }
2248
2685
 
@@ -2282,7 +2719,7 @@ validation_result validate(const schema_ref& schema, std::string_view json,
2282
2719
  // Fast path: codegen bytecode execution (DOM)
2283
2720
  if (!schema.impl->use_ondemand && !schema.impl->gen_plan.code.empty()) {
2284
2721
  if (cg_exec(schema.impl->gen_plan, schema.impl->gen_plan.code,
2285
- result.value())) {
2722
+ result.value_unsafe())) {
2286
2723
  return {true, {}};
2287
2724
  }
2288
2725
  // Codegen said invalid OR hit COMPOSITION — fall through to tree walker
@@ -2290,8 +2727,24 @@ validation_result validate(const schema_ref& schema, std::string_view json,
2290
2727
 
2291
2728
  // Slow path: tree walker with error details (reuse already-parsed DOM)
2292
2729
  std::vector<validation_error> errors;
2293
- validate_node(schema.impl->root, result.value(), "", *schema.impl, errors,
2294
- opts.all_errors);
2730
+ if (schema.impl->has_dynamic_refs) {
2731
+ dynamic_scope_t scope;
2732
+ auto rit = schema.impl->resource_dynamic_anchors.find("");
2733
+ if (rit != schema.impl->resource_dynamic_anchors.end()) {
2734
+ scope.push_back(&rit->second);
2735
+ }
2736
+ if (!schema.impl->root->id.empty()) {
2737
+ auto iit = schema.impl->resource_dynamic_anchors.find(schema.impl->root->id);
2738
+ if (iit != schema.impl->resource_dynamic_anchors.end()) {
2739
+ scope.push_back(&iit->second);
2740
+ }
2741
+ }
2742
+ validate_node(schema.impl->root, result.value_unsafe(), "", *schema.impl, errors,
2743
+ opts.all_errors, &scope);
2744
+ } else {
2745
+ validate_node(schema.impl->root, result.value_unsafe(), "", *schema.impl, errors,
2746
+ opts.all_errors);
2747
+ }
2295
2748
 
2296
2749
  return {errors.empty(), std::move(errors)};
2297
2750
  }
@@ -2332,10 +2785,10 @@ bool is_valid_prepadded(const schema_ref& schema, const char* data, size_t lengt
2332
2785
  if (result.error()) return false;
2333
2786
 
2334
2787
  if (!schema.impl->gen_plan.code.empty()) {
2335
- return cg_exec(schema.impl->gen_plan, schema.impl->gen_plan.code, result.value());
2788
+ return cg_exec(schema.impl->gen_plan, schema.impl->gen_plan.code, result.value_unsafe());
2336
2789
  }
2337
2790
 
2338
- return validate_fast(schema.impl->root, result.value(), *schema.impl);
2791
+ return validate_fast(schema.impl->root, result.value_unsafe(), *schema.impl);
2339
2792
  }
2340
2793
 
2341
2794
  bool is_valid_buf(const schema_ref& schema, const uint8_t* data, size_t length) {