ata-validator 0.7.3 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CMakeLists.txt CHANGED
@@ -19,43 +19,48 @@ endif()
19
19
  option(ATA_TESTING "Build test suite" ON)
20
20
  option(ATA_BENCHMARKS "Build benchmarks" OFF)
21
21
  option(ATA_SANITIZE "Enable address sanitizer" OFF)
22
+ option(ATA_NO_RE2 "Build without RE2 regex engine (disables pattern keyword)" OFF)
22
23
 
23
24
  # Fetch simdjson
24
25
  include(FetchContent)
25
26
  FetchContent_Declare(
26
27
  simdjson
27
28
  GIT_REPOSITORY https://github.com/simdjson/simdjson.git
28
- GIT_TAG v3.12.2
29
+ GIT_TAG v4.5.0
29
30
  GIT_SHALLOW TRUE
30
31
  )
31
32
 
32
- # RE2 — fast regex engine (replaces std::regex)
33
- set(RE2_BUILD_TESTING OFF CACHE BOOL "" FORCE)
34
- FetchContent_Declare(
35
- re2
36
- GIT_REPOSITORY https://github.com/google/re2.git
37
- GIT_TAG 2024-07-02
38
- GIT_SHALLOW TRUE
39
- EXCLUDE_FROM_ALL
40
- )
33
+ if(NOT ATA_NO_RE2)
34
+ # RE2 fast regex engine (replaces std::regex)
35
+ set(RE2_BUILD_TESTING OFF CACHE BOOL "" FORCE)
36
+ FetchContent_Declare(
37
+ re2
38
+ GIT_REPOSITORY https://github.com/google/re2.git
39
+ GIT_TAG 2024-07-02
40
+ GIT_SHALLOW TRUE
41
+ EXCLUDE_FROM_ALL
42
+ )
41
43
 
42
- # Abseil — required by RE2
43
- set(ABSL_PROPAGATE_CXX_STD ON CACHE BOOL "" FORCE)
44
- set(ABSL_BUILD_TESTING OFF CACHE BOOL "" FORCE)
45
- set(ABSL_ENABLE_INSTALL ON CACHE BOOL "" FORCE)
46
- FetchContent_Declare(
47
- abseil-cpp
48
- GIT_REPOSITORY https://github.com/abseil/abseil-cpp.git
49
- GIT_TAG 20240722.0
50
- GIT_SHALLOW TRUE
51
- )
44
+ # Abseil — required by RE2
45
+ set(ABSL_PROPAGATE_CXX_STD ON CACHE BOOL "" FORCE)
46
+ set(ABSL_BUILD_TESTING OFF CACHE BOOL "" FORCE)
47
+ set(ABSL_ENABLE_INSTALL ON CACHE BOOL "" FORCE)
48
+ FetchContent_Declare(
49
+ abseil-cpp
50
+ GIT_REPOSITORY https://github.com/abseil/abseil-cpp.git
51
+ GIT_TAG 20240722.0
52
+ GIT_SHALLOW TRUE
53
+ )
52
54
 
53
- FetchContent_MakeAvailable(abseil-cpp simdjson re2)
55
+ FetchContent_MakeAvailable(abseil-cpp simdjson re2)
56
+ else()
57
+ FetchContent_MakeAvailable(simdjson)
58
+ endif()
54
59
 
55
60
  if (CMAKE_JS_VERSION)
56
61
  # add_definitions(-DNAPI_VERSION=10)
57
62
  include_directories(${CMAKE_JS_INC})
58
- file(GLOB SOURCE_FILES "binding/*.cpp" "src/*.cpp" "deps/simdjson/*.cpp")
63
+ file(GLOB SOURCE_FILES "binding/*.cpp" "src/*.cpp")
59
64
  else()
60
65
  file(GLOB SOURCE_FILES "src/*.cpp")
61
66
  endif()
@@ -88,7 +93,12 @@ else()
88
93
  endif()
89
94
 
90
95
  target_include_directories(${PROJECT_NAME} PUBLIC include)
91
- target_link_libraries(${PROJECT_NAME} PRIVATE simdjson re2)
96
+ if(ATA_NO_RE2)
97
+ target_link_libraries(${PROJECT_NAME} PRIVATE simdjson)
98
+ target_compile_definitions(${PROJECT_NAME} PRIVATE ATA_NO_RE2)
99
+ else()
100
+ target_link_libraries(${PROJECT_NAME} PRIVATE simdjson re2)
101
+ endif()
92
102
 
93
103
  if (CMAKE_JS_VERSION)
94
104
  target_include_directories(${PROJECT_NAME} PRIVATE ${CMAKE_JS_INC})
@@ -130,3 +140,11 @@ if(ATA_BENCHMARKS)
130
140
  add_executable(ata_bench benchmark/bench.cpp)
131
141
  target_link_libraries(ata_bench PRIVATE ata simdjson)
132
142
  endif()
143
+
144
+ # Fuzz targets (for OSS-Fuzz / libFuzzer)
145
+ if(ATA_FUZZING)
146
+ foreach(fuzzer compile_fuzzer validate_fuzzer roundtrip_fuzzer)
147
+ add_executable(${fuzzer} fuzz/${fuzzer}.cpp)
148
+ target_link_libraries(${fuzzer} PRIVATE ata simdjson ${LIB_FUZZING_ENGINE})
149
+ endforeach()
150
+ endif()
package/README.md CHANGED
@@ -106,8 +106,8 @@ Three-tier hybrid codegen: static schemas compile to zero-overhead key checks, d
106
106
 
107
107
  ## When to use ajv
108
108
 
109
- - **100% spec compliance needed** - ajv covers more edge cases (ata: 96.9%)
110
109
  - **`$dynamicRef`** - not yet supported in ata
110
+ - **Existing ajv ecosystem** - plugins, custom keywords, large community
111
111
 
112
112
  ## Features
113
113
 
@@ -9,7 +9,9 @@
9
9
  #include <functional>
10
10
  #include <queue>
11
11
  #include <atomic>
12
+ #ifndef ATA_NO_RE2
12
13
  #include <re2/re2.h>
14
+ #endif
13
15
  #include <set>
14
16
  #include <string>
15
17
  #include <vector>
@@ -39,7 +41,9 @@ struct schema_node {
39
41
  std::optional<uint64_t> min_length;
40
42
  std::optional<uint64_t> max_length;
41
43
  std::optional<std::string> pattern;
44
+ #ifndef ATA_NO_RE2
42
45
  std::shared_ptr<re2::RE2> compiled_pattern;
46
+ #endif
43
47
 
44
48
  std::optional<uint64_t> min_items;
45
49
  std::optional<uint64_t> max_items;
@@ -63,7 +67,9 @@ struct schema_node {
63
67
  struct pattern_prop {
64
68
  std::string pattern;
65
69
  schema_node_ptr schema;
70
+ #ifndef ATA_NO_RE2
66
71
  std::shared_ptr<re2::RE2> compiled;
72
+ #endif
67
73
  };
68
74
  std::vector<pattern_prop> pattern_properties;
69
75
 
@@ -83,6 +89,8 @@ struct schema_node {
83
89
  schema_node_ptr else_schema;
84
90
 
85
91
  std::string ref;
92
+ std::string dynamic_ref;
93
+ std::string id;
86
94
 
87
95
  std::unordered_map<std::string, schema_node_ptr> defs;
88
96
 
@@ -114,10 +122,14 @@ static bool napi_check_format(const std::string& sv, const std::string& fmt) {
114
122
  (sv.size() - dot - 1) >= 2;
115
123
  }
116
124
  if (fmt == "date") {
117
- return sv.size() == 10 && nb_is_digit(sv[0]) && nb_is_digit(sv[1]) &&
118
- nb_is_digit(sv[2]) && nb_is_digit(sv[3]) && sv[4] == '-' &&
119
- nb_is_digit(sv[5]) && nb_is_digit(sv[6]) && sv[7] == '-' &&
120
- nb_is_digit(sv[8]) && nb_is_digit(sv[9]);
125
+ if (sv.size() != 10 || !nb_is_digit(sv[0]) || !nb_is_digit(sv[1]) ||
126
+ !nb_is_digit(sv[2]) || !nb_is_digit(sv[3]) || sv[4] != '-' ||
127
+ !nb_is_digit(sv[5]) || !nb_is_digit(sv[6]) || sv[7] != '-' ||
128
+ !nb_is_digit(sv[8]) || !nb_is_digit(sv[9]))
129
+ return false;
130
+ int month = (sv[5] - '0') * 10 + (sv[6] - '0');
131
+ int day = (sv[8] - '0') * 10 + (sv[9] - '0');
132
+ return month >= 1 && month <= 12 && day >= 1 && day <= 31;
121
133
  }
122
134
  if (fmt == "time") {
123
135
  if (sv.size() < 8) return false;
@@ -286,6 +298,15 @@ static void validate_napi(const schema_node_ptr& node,
286
298
  const compiled_schema_internal& ctx,
287
299
  std::vector<ata::validation_error>& errors);
288
300
 
301
+ // Recursion depth guard — prevents stack overflow on self-referencing schemas
302
+ struct NapiDepthGuard {
303
+ static thread_local int depth;
304
+ bool overflow;
305
+ NapiDepthGuard() : overflow(++depth > 100) {}
306
+ ~NapiDepthGuard() { --depth; }
307
+ };
308
+ thread_local int NapiDepthGuard::depth = 0;
309
+
289
310
  static void validate_napi(const schema_node_ptr& node,
290
311
  Napi::Value value,
291
312
  Napi::Env env,
@@ -294,6 +315,9 @@ static void validate_napi(const schema_node_ptr& node,
294
315
  std::vector<ata::validation_error>& errors) {
295
316
  if (!node) return;
296
317
 
318
+ NapiDepthGuard guard;
319
+ if (guard.overflow) return;
320
+
297
321
  // Boolean schema
298
322
  if (node->boolean_schema.has_value()) {
299
323
  if (!node->boolean_schema.value()) {
@@ -516,6 +540,7 @@ static void validate_napi(const schema_node_ptr& node,
516
540
  " > maxLength " +
517
541
  std::to_string(node->max_length.value())});
518
542
  }
543
+ #ifndef ATA_NO_RE2
519
544
  if (node->compiled_pattern) {
520
545
  if (!re2::RE2::PartialMatch(sv, *node->compiled_pattern)) {
521
546
  errors.push_back({ata::error_code::pattern_mismatch, path,
@@ -523,6 +548,7 @@ static void validate_napi(const schema_node_ptr& node,
523
548
  node->pattern.value()});
524
549
  }
525
550
  }
551
+ #endif
526
552
  if (node->format.has_value()) {
527
553
  const auto& fmt = node->format.value();
528
554
  bool format_ok = napi_check_format(sv, fmt);
@@ -644,11 +670,13 @@ static void validate_napi(const schema_node_ptr& node,
644
670
  }
645
671
 
646
672
  for (const auto& pp : node->pattern_properties) {
673
+ #ifndef ATA_NO_RE2
647
674
  if (pp.compiled && re2::RE2::PartialMatch(key_str, *pp.compiled)) {
648
675
  validate_napi(pp.schema, val, env, path + "/" + key_str, ctx,
649
676
  errors);
650
677
  matched = true;
651
678
  }
679
+ #endif
652
680
  }
653
681
 
654
682
  if (!matched) {
package/index.js CHANGED
@@ -309,6 +309,14 @@ function buildSchemaMap(schemas) {
309
309
  return map
310
310
  }
311
311
 
312
+ // Resolve a relative URI ref against a base URI
313
+ function resolveRelativeRef(ref, baseId) {
314
+ if (!baseId || ref.includes('://') || ref.startsWith('#')) return ref
315
+ const lastSlash = baseId.lastIndexOf('/')
316
+ if (lastSlash < 0) return ref
317
+ return baseId.substring(0, lastSlash + 1) + ref
318
+ }
319
+
312
320
  class Validator {
313
321
  constructor(schema, opts) {
314
322
  const options = opts || {};
@@ -405,17 +413,20 @@ class Validator {
405
413
  ? this._schemaStr + '\0' + [...this._schemaMap.keys()].sort().join('\0')
406
414
  : this._schemaStr;
407
415
  const cached = _compileCache.get(mapKey);
408
- let jsFn, jsCombinedFn, jsErrFn;
416
+ let jsFn, jsCombinedFn, jsErrFn, _isCodegen = false;
409
417
  var _forceNapi = typeof process !== 'undefined' && process.env && process.env.ATA_FORCE_NAPI;
410
418
  if (cached && !_forceNapi) {
411
419
  jsFn = cached.jsFn;
412
420
  jsCombinedFn = cached.combined;
413
421
  jsErrFn = cached.errFn;
422
+ _isCodegen = !!cached.isCodegen;
414
423
  } else if (!_forceNapi) {
415
- jsFn = compileToJSCodegen(schemaObj, sm) || compileToJS(schemaObj, null, sm);
424
+ const _cgFn = compileToJSCodegen(schemaObj, sm);
425
+ jsFn = _cgFn || compileToJS(schemaObj, null, sm);
416
426
  jsCombinedFn = compileToJSCombined(schemaObj, VALID_RESULT, sm);
417
427
  jsErrFn = compileToJSCodegenWithErrors(schemaObj, sm);
418
- _compileCache.set(mapKey, { jsFn, combined: jsCombinedFn, errFn: jsErrFn });
428
+ _isCodegen = !!_cgFn;
429
+ _compileCache.set(mapKey, { jsFn, combined: jsCombinedFn, errFn: jsErrFn, isCodegen: _isCodegen });
419
430
  } else {
420
431
  jsFn = null; jsCombinedFn = null; jsErrFn = null;
421
432
  }
@@ -465,14 +476,20 @@ class Validator {
465
476
  // errFn: use JS codegen if safe, else lazy-native fallback
466
477
  // For unevaluated schemas without errFn, use jsFn as boolean-only fallback
467
478
  const hasUnevaluated = schemaObj && JSON.stringify(schemaObj).includes('unevaluatedProperties') || JSON.stringify(schemaObj).includes('unevaluatedItems')
479
+ const hasDynRef = this._schemaStr.includes('"$dynamicRef"') || this._schemaStr.includes('"$dynamicAnchor"')
468
480
  const errFn =
469
481
  safeErrFn ||
470
482
  (hasUnevaluated
471
483
  ? (d) => ({ valid: jsFn(d), errors: jsFn(d) ? [] : [{ code: 'unevaluated', path: '', message: 'unevaluated property or item' }] })
472
- : (d) => {
473
- this._ensureNative();
474
- return this._compiled.validate(d);
475
- });
484
+ : hasDynRef
485
+ ? (d) => {
486
+ this._ensureNative();
487
+ return this._compiled.validateJSON(JSON.stringify(d));
488
+ }
489
+ : (d) => {
490
+ this._ensureNative();
491
+ return this._compiled.validate(d);
492
+ });
476
493
 
477
494
  // Best path: combined validator (single pass, validates + collects errors)
478
495
  // Valid data: returns VALID_RESULT, no allocation
@@ -501,23 +518,30 @@ class Validator {
501
518
  } catch {}
502
519
  }
503
520
 
504
- if (safeCombinedFn && jsFn) {
505
- // Hybrid: jsFn boolean guard for valid (fast, no allocation), combined for invalid
521
+ if (hasDynRef && _isCodegen && jsFn) {
522
+ // $dynamicRef with JS codegen: direct path, no wrapper layers
523
+ const _fn = jsFn, _efn = safeErrFn || errFn, _R = VALID_RESULT;
506
524
  this.validate = preprocess
507
- ? (data) => {
508
- preprocess(data);
509
- return jsFn(data) ? VALID_RESULT : safeCombinedFn(data);
510
- }
511
- : (data) => jsFn(data) ? VALID_RESULT : safeCombinedFn(data);
525
+ ? (data) => { preprocess(data); return _fn(data) ? _R : _efn(data); }
526
+ : (data) => _fn(data) ? _R : _efn(data);
527
+ } else if (hasDynRef) {
528
+ // $dynamicRef without codegen: delegate to native C++ (interpretive path unreliable)
529
+ this.validate = preprocess
530
+ ? (data) => { preprocess(data); return errFn(data); }
531
+ : errFn;
532
+ } else if (jsFn && jsFn._hybridFactory) {
533
+ // Zero-wrapper: hybridFactory bakes VALID_RESULT + errFn into a single function
534
+ // No arrow function wrapper, no ternary, one function call
535
+ const hybridFn = jsFn._hybridFactory(VALID_RESULT, safeCombinedFn || errFn);
536
+ this.validate = preprocess
537
+ ? (data) => { preprocess(data); return hybridFn(data); }
538
+ : hybridFn;
512
539
  } else if (safeCombinedFn) {
513
540
  this.validate = preprocess
514
- ? (data) => {
515
- preprocess(data);
516
- return safeCombinedFn(data);
517
- }
541
+ ? (data) => { preprocess(data); return safeCombinedFn(data); }
518
542
  : safeCombinedFn;
519
543
  } else {
520
- const hybridFn = jsFn._hybridFactory
544
+ const hybridFn = jsFn && jsFn._hybridFactory
521
545
  ? jsFn._hybridFactory(VALID_RESULT, errFn)
522
546
  : null;
523
547
  this.validate = hybridFn
@@ -616,15 +640,21 @@ class Validator {
616
640
  };
617
641
  }
618
642
  } else if (native) {
619
- // ATA_FORCE_NAPI path: no JS codegen, use native for everything
643
+ // Native-only path: no JS codegen, use native for everything
620
644
  this._ensureNative();
645
+ const _hasDynamic = this._schemaStr.includes('"$dynamicRef"') || this._schemaStr.includes('"$dynamicAnchor"') || this._schemaStr.includes('"$anchor"')
646
+ // For schemas with dynamic refs/anchors, use validateJSON (C++ path with full support)
647
+ // instead of validate (NAPI direct V8 path without anchor maps)
648
+ const _validate = _hasDynamic
649
+ ? (data) => this._compiled.validateJSON(JSON.stringify(data))
650
+ : (data) => this._compiled.validate(data);
621
651
  this.validate = preprocess
622
652
  ? (data) => {
623
653
  preprocess(data);
624
- return this._compiled.validate(data);
654
+ return _validate(data);
625
655
  }
626
- : (data) => this._compiled.validate(data);
627
- this.isValidObject = (data) => this._compiled.validate(data).valid;
656
+ : _validate;
657
+ this.isValidObject = (data) => _validate(data).valid;
628
658
  this.validateJSON = (jsonStr) => this._compiled.validateJSON(jsonStr);
629
659
  this.isValidJSON = (jsonStr) => this._compiled.isValidJSON(jsonStr);
630
660
  {