ata-validator 0.7.2 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +41 -23
- package/README.md +1 -1
- package/binding/ata_napi.cpp +32 -4
- package/index.js +53 -23
- package/lib/js-compiler.js +404 -34
- package/package.json +5 -3
- package/prebuilds/ata-darwin-arm64/node-napi-v10.node +0 -0
- package/scripts/install.js +23 -0
- package/src/ata.cpp +536 -147
package/CMakeLists.txt
CHANGED
|
@@ -19,43 +19,48 @@ endif()
|
|
|
19
19
|
option(ATA_TESTING "Build test suite" ON)
|
|
20
20
|
option(ATA_BENCHMARKS "Build benchmarks" OFF)
|
|
21
21
|
option(ATA_SANITIZE "Enable address sanitizer" OFF)
|
|
22
|
+
option(ATA_NO_RE2 "Build without RE2 regex engine (disables pattern keyword)" OFF)
|
|
22
23
|
|
|
23
24
|
# Fetch simdjson
|
|
24
25
|
include(FetchContent)
|
|
25
26
|
FetchContent_Declare(
|
|
26
27
|
simdjson
|
|
27
28
|
GIT_REPOSITORY https://github.com/simdjson/simdjson.git
|
|
28
|
-
GIT_TAG
|
|
29
|
+
GIT_TAG v4.5.0
|
|
29
30
|
GIT_SHALLOW TRUE
|
|
30
31
|
)
|
|
31
32
|
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
33
|
+
if(NOT ATA_NO_RE2)
|
|
34
|
+
# RE2 — fast regex engine (replaces std::regex)
|
|
35
|
+
set(RE2_BUILD_TESTING OFF CACHE BOOL "" FORCE)
|
|
36
|
+
FetchContent_Declare(
|
|
37
|
+
re2
|
|
38
|
+
GIT_REPOSITORY https://github.com/google/re2.git
|
|
39
|
+
GIT_TAG 2024-07-02
|
|
40
|
+
GIT_SHALLOW TRUE
|
|
41
|
+
EXCLUDE_FROM_ALL
|
|
42
|
+
)
|
|
41
43
|
|
|
42
|
-
# Abseil — required by RE2
|
|
43
|
-
set(ABSL_PROPAGATE_CXX_STD ON CACHE BOOL "" FORCE)
|
|
44
|
-
set(ABSL_BUILD_TESTING OFF CACHE BOOL "" FORCE)
|
|
45
|
-
set(ABSL_ENABLE_INSTALL ON CACHE BOOL "" FORCE)
|
|
46
|
-
FetchContent_Declare(
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
)
|
|
44
|
+
# Abseil — required by RE2
|
|
45
|
+
set(ABSL_PROPAGATE_CXX_STD ON CACHE BOOL "" FORCE)
|
|
46
|
+
set(ABSL_BUILD_TESTING OFF CACHE BOOL "" FORCE)
|
|
47
|
+
set(ABSL_ENABLE_INSTALL ON CACHE BOOL "" FORCE)
|
|
48
|
+
FetchContent_Declare(
|
|
49
|
+
abseil-cpp
|
|
50
|
+
GIT_REPOSITORY https://github.com/abseil/abseil-cpp.git
|
|
51
|
+
GIT_TAG 20240722.0
|
|
52
|
+
GIT_SHALLOW TRUE
|
|
53
|
+
)
|
|
52
54
|
|
|
53
|
-
FetchContent_MakeAvailable(abseil-cpp simdjson re2)
|
|
55
|
+
FetchContent_MakeAvailable(abseil-cpp simdjson re2)
|
|
56
|
+
else()
|
|
57
|
+
FetchContent_MakeAvailable(simdjson)
|
|
58
|
+
endif()
|
|
54
59
|
|
|
55
60
|
if (CMAKE_JS_VERSION)
|
|
56
61
|
# add_definitions(-DNAPI_VERSION=10)
|
|
57
62
|
include_directories(${CMAKE_JS_INC})
|
|
58
|
-
file(GLOB SOURCE_FILES "binding/*.cpp" "src/*.cpp"
|
|
63
|
+
file(GLOB SOURCE_FILES "binding/*.cpp" "src/*.cpp")
|
|
59
64
|
else()
|
|
60
65
|
file(GLOB SOURCE_FILES "src/*.cpp")
|
|
61
66
|
endif()
|
|
@@ -88,7 +93,12 @@ else()
|
|
|
88
93
|
endif()
|
|
89
94
|
|
|
90
95
|
target_include_directories(${PROJECT_NAME} PUBLIC include)
|
|
91
|
-
|
|
96
|
+
if(ATA_NO_RE2)
|
|
97
|
+
target_link_libraries(${PROJECT_NAME} PRIVATE simdjson)
|
|
98
|
+
target_compile_definitions(${PROJECT_NAME} PRIVATE ATA_NO_RE2)
|
|
99
|
+
else()
|
|
100
|
+
target_link_libraries(${PROJECT_NAME} PRIVATE simdjson re2)
|
|
101
|
+
endif()
|
|
92
102
|
|
|
93
103
|
if (CMAKE_JS_VERSION)
|
|
94
104
|
target_include_directories(${PROJECT_NAME} PRIVATE ${CMAKE_JS_INC})
|
|
@@ -130,3 +140,11 @@ if(ATA_BENCHMARKS)
|
|
|
130
140
|
add_executable(ata_bench benchmark/bench.cpp)
|
|
131
141
|
target_link_libraries(ata_bench PRIVATE ata simdjson)
|
|
132
142
|
endif()
|
|
143
|
+
|
|
144
|
+
# Fuzz targets (for OSS-Fuzz / libFuzzer)
|
|
145
|
+
if(ATA_FUZZING)
|
|
146
|
+
foreach(fuzzer compile_fuzzer validate_fuzzer roundtrip_fuzzer)
|
|
147
|
+
add_executable(${fuzzer} fuzz/${fuzzer}.cpp)
|
|
148
|
+
target_link_libraries(${fuzzer} PRIVATE ata simdjson ${LIB_FUZZING_ENGINE})
|
|
149
|
+
endforeach()
|
|
150
|
+
endif()
|
package/README.md
CHANGED
|
@@ -106,8 +106,8 @@ Three-tier hybrid codegen: static schemas compile to zero-overhead key checks, d
|
|
|
106
106
|
|
|
107
107
|
## When to use ajv
|
|
108
108
|
|
|
109
|
-
- **100% spec compliance needed** - ajv covers more edge cases (ata: 96.9%)
|
|
110
109
|
- **`$dynamicRef`** - not yet supported in ata
|
|
110
|
+
- **Existing ajv ecosystem** - plugins, custom keywords, large community
|
|
111
111
|
|
|
112
112
|
## Features
|
|
113
113
|
|
package/binding/ata_napi.cpp
CHANGED
|
@@ -9,7 +9,9 @@
|
|
|
9
9
|
#include <functional>
|
|
10
10
|
#include <queue>
|
|
11
11
|
#include <atomic>
|
|
12
|
+
#ifndef ATA_NO_RE2
|
|
12
13
|
#include <re2/re2.h>
|
|
14
|
+
#endif
|
|
13
15
|
#include <set>
|
|
14
16
|
#include <string>
|
|
15
17
|
#include <vector>
|
|
@@ -39,7 +41,9 @@ struct schema_node {
|
|
|
39
41
|
std::optional<uint64_t> min_length;
|
|
40
42
|
std::optional<uint64_t> max_length;
|
|
41
43
|
std::optional<std::string> pattern;
|
|
44
|
+
#ifndef ATA_NO_RE2
|
|
42
45
|
std::shared_ptr<re2::RE2> compiled_pattern;
|
|
46
|
+
#endif
|
|
43
47
|
|
|
44
48
|
std::optional<uint64_t> min_items;
|
|
45
49
|
std::optional<uint64_t> max_items;
|
|
@@ -63,7 +67,9 @@ struct schema_node {
|
|
|
63
67
|
struct pattern_prop {
|
|
64
68
|
std::string pattern;
|
|
65
69
|
schema_node_ptr schema;
|
|
70
|
+
#ifndef ATA_NO_RE2
|
|
66
71
|
std::shared_ptr<re2::RE2> compiled;
|
|
72
|
+
#endif
|
|
67
73
|
};
|
|
68
74
|
std::vector<pattern_prop> pattern_properties;
|
|
69
75
|
|
|
@@ -83,6 +89,8 @@ struct schema_node {
|
|
|
83
89
|
schema_node_ptr else_schema;
|
|
84
90
|
|
|
85
91
|
std::string ref;
|
|
92
|
+
std::string dynamic_ref;
|
|
93
|
+
std::string id;
|
|
86
94
|
|
|
87
95
|
std::unordered_map<std::string, schema_node_ptr> defs;
|
|
88
96
|
|
|
@@ -114,10 +122,14 @@ static bool napi_check_format(const std::string& sv, const std::string& fmt) {
|
|
|
114
122
|
(sv.size() - dot - 1) >= 2;
|
|
115
123
|
}
|
|
116
124
|
if (fmt == "date") {
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
125
|
+
if (sv.size() != 10 || !nb_is_digit(sv[0]) || !nb_is_digit(sv[1]) ||
|
|
126
|
+
!nb_is_digit(sv[2]) || !nb_is_digit(sv[3]) || sv[4] != '-' ||
|
|
127
|
+
!nb_is_digit(sv[5]) || !nb_is_digit(sv[6]) || sv[7] != '-' ||
|
|
128
|
+
!nb_is_digit(sv[8]) || !nb_is_digit(sv[9]))
|
|
129
|
+
return false;
|
|
130
|
+
int month = (sv[5] - '0') * 10 + (sv[6] - '0');
|
|
131
|
+
int day = (sv[8] - '0') * 10 + (sv[9] - '0');
|
|
132
|
+
return month >= 1 && month <= 12 && day >= 1 && day <= 31;
|
|
121
133
|
}
|
|
122
134
|
if (fmt == "time") {
|
|
123
135
|
if (sv.size() < 8) return false;
|
|
@@ -286,6 +298,15 @@ static void validate_napi(const schema_node_ptr& node,
|
|
|
286
298
|
const compiled_schema_internal& ctx,
|
|
287
299
|
std::vector<ata::validation_error>& errors);
|
|
288
300
|
|
|
301
|
+
// Recursion depth guard — prevents stack overflow on self-referencing schemas
|
|
302
|
+
struct NapiDepthGuard {
|
|
303
|
+
static thread_local int depth;
|
|
304
|
+
bool overflow;
|
|
305
|
+
NapiDepthGuard() : overflow(++depth > 100) {}
|
|
306
|
+
~NapiDepthGuard() { --depth; }
|
|
307
|
+
};
|
|
308
|
+
thread_local int NapiDepthGuard::depth = 0;
|
|
309
|
+
|
|
289
310
|
static void validate_napi(const schema_node_ptr& node,
|
|
290
311
|
Napi::Value value,
|
|
291
312
|
Napi::Env env,
|
|
@@ -294,6 +315,9 @@ static void validate_napi(const schema_node_ptr& node,
|
|
|
294
315
|
std::vector<ata::validation_error>& errors) {
|
|
295
316
|
if (!node) return;
|
|
296
317
|
|
|
318
|
+
NapiDepthGuard guard;
|
|
319
|
+
if (guard.overflow) return;
|
|
320
|
+
|
|
297
321
|
// Boolean schema
|
|
298
322
|
if (node->boolean_schema.has_value()) {
|
|
299
323
|
if (!node->boolean_schema.value()) {
|
|
@@ -516,6 +540,7 @@ static void validate_napi(const schema_node_ptr& node,
|
|
|
516
540
|
" > maxLength " +
|
|
517
541
|
std::to_string(node->max_length.value())});
|
|
518
542
|
}
|
|
543
|
+
#ifndef ATA_NO_RE2
|
|
519
544
|
if (node->compiled_pattern) {
|
|
520
545
|
if (!re2::RE2::PartialMatch(sv, *node->compiled_pattern)) {
|
|
521
546
|
errors.push_back({ata::error_code::pattern_mismatch, path,
|
|
@@ -523,6 +548,7 @@ static void validate_napi(const schema_node_ptr& node,
|
|
|
523
548
|
node->pattern.value()});
|
|
524
549
|
}
|
|
525
550
|
}
|
|
551
|
+
#endif
|
|
526
552
|
if (node->format.has_value()) {
|
|
527
553
|
const auto& fmt = node->format.value();
|
|
528
554
|
bool format_ok = napi_check_format(sv, fmt);
|
|
@@ -644,11 +670,13 @@ static void validate_napi(const schema_node_ptr& node,
|
|
|
644
670
|
}
|
|
645
671
|
|
|
646
672
|
for (const auto& pp : node->pattern_properties) {
|
|
673
|
+
#ifndef ATA_NO_RE2
|
|
647
674
|
if (pp.compiled && re2::RE2::PartialMatch(key_str, *pp.compiled)) {
|
|
648
675
|
validate_napi(pp.schema, val, env, path + "/" + key_str, ctx,
|
|
649
676
|
errors);
|
|
650
677
|
matched = true;
|
|
651
678
|
}
|
|
679
|
+
#endif
|
|
652
680
|
}
|
|
653
681
|
|
|
654
682
|
if (!matched) {
|
package/index.js
CHANGED
|
@@ -309,6 +309,14 @@ function buildSchemaMap(schemas) {
|
|
|
309
309
|
return map
|
|
310
310
|
}
|
|
311
311
|
|
|
312
|
+
// Resolve a relative URI ref against a base URI
|
|
313
|
+
function resolveRelativeRef(ref, baseId) {
|
|
314
|
+
if (!baseId || ref.includes('://') || ref.startsWith('#')) return ref
|
|
315
|
+
const lastSlash = baseId.lastIndexOf('/')
|
|
316
|
+
if (lastSlash < 0) return ref
|
|
317
|
+
return baseId.substring(0, lastSlash + 1) + ref
|
|
318
|
+
}
|
|
319
|
+
|
|
312
320
|
class Validator {
|
|
313
321
|
constructor(schema, opts) {
|
|
314
322
|
const options = opts || {};
|
|
@@ -405,17 +413,20 @@ class Validator {
|
|
|
405
413
|
? this._schemaStr + '\0' + [...this._schemaMap.keys()].sort().join('\0')
|
|
406
414
|
: this._schemaStr;
|
|
407
415
|
const cached = _compileCache.get(mapKey);
|
|
408
|
-
let jsFn, jsCombinedFn, jsErrFn;
|
|
416
|
+
let jsFn, jsCombinedFn, jsErrFn, _isCodegen = false;
|
|
409
417
|
var _forceNapi = typeof process !== 'undefined' && process.env && process.env.ATA_FORCE_NAPI;
|
|
410
418
|
if (cached && !_forceNapi) {
|
|
411
419
|
jsFn = cached.jsFn;
|
|
412
420
|
jsCombinedFn = cached.combined;
|
|
413
421
|
jsErrFn = cached.errFn;
|
|
422
|
+
_isCodegen = !!cached.isCodegen;
|
|
414
423
|
} else if (!_forceNapi) {
|
|
415
|
-
|
|
424
|
+
const _cgFn = compileToJSCodegen(schemaObj, sm);
|
|
425
|
+
jsFn = _cgFn || compileToJS(schemaObj, null, sm);
|
|
416
426
|
jsCombinedFn = compileToJSCombined(schemaObj, VALID_RESULT, sm);
|
|
417
427
|
jsErrFn = compileToJSCodegenWithErrors(schemaObj, sm);
|
|
418
|
-
|
|
428
|
+
_isCodegen = !!_cgFn;
|
|
429
|
+
_compileCache.set(mapKey, { jsFn, combined: jsCombinedFn, errFn: jsErrFn, isCodegen: _isCodegen });
|
|
419
430
|
} else {
|
|
420
431
|
jsFn = null; jsCombinedFn = null; jsErrFn = null;
|
|
421
432
|
}
|
|
@@ -465,14 +476,20 @@ class Validator {
|
|
|
465
476
|
// errFn: use JS codegen if safe, else lazy-native fallback
|
|
466
477
|
// For unevaluated schemas without errFn, use jsFn as boolean-only fallback
|
|
467
478
|
const hasUnevaluated = schemaObj && JSON.stringify(schemaObj).includes('unevaluatedProperties') || JSON.stringify(schemaObj).includes('unevaluatedItems')
|
|
479
|
+
const hasDynRef = this._schemaStr.includes('"$dynamicRef"') || this._schemaStr.includes('"$dynamicAnchor"')
|
|
468
480
|
const errFn =
|
|
469
481
|
safeErrFn ||
|
|
470
482
|
(hasUnevaluated
|
|
471
483
|
? (d) => ({ valid: jsFn(d), errors: jsFn(d) ? [] : [{ code: 'unevaluated', path: '', message: 'unevaluated property or item' }] })
|
|
472
|
-
:
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
484
|
+
: hasDynRef
|
|
485
|
+
? (d) => {
|
|
486
|
+
this._ensureNative();
|
|
487
|
+
return this._compiled.validateJSON(JSON.stringify(d));
|
|
488
|
+
}
|
|
489
|
+
: (d) => {
|
|
490
|
+
this._ensureNative();
|
|
491
|
+
return this._compiled.validate(d);
|
|
492
|
+
});
|
|
476
493
|
|
|
477
494
|
// Best path: combined validator (single pass, validates + collects errors)
|
|
478
495
|
// Valid data: returns VALID_RESULT, no allocation
|
|
@@ -501,23 +518,30 @@ class Validator {
|
|
|
501
518
|
} catch {}
|
|
502
519
|
}
|
|
503
520
|
|
|
504
|
-
if (
|
|
505
|
-
//
|
|
521
|
+
if (hasDynRef && _isCodegen && jsFn) {
|
|
522
|
+
// $dynamicRef with JS codegen: direct path, no wrapper layers
|
|
523
|
+
const _fn = jsFn, _efn = safeErrFn || errFn, _R = VALID_RESULT;
|
|
506
524
|
this.validate = preprocess
|
|
507
|
-
? (data) => {
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
525
|
+
? (data) => { preprocess(data); return _fn(data) ? _R : _efn(data); }
|
|
526
|
+
: (data) => _fn(data) ? _R : _efn(data);
|
|
527
|
+
} else if (hasDynRef) {
|
|
528
|
+
// $dynamicRef without codegen: delegate to native C++ (interpretive path unreliable)
|
|
529
|
+
this.validate = preprocess
|
|
530
|
+
? (data) => { preprocess(data); return errFn(data); }
|
|
531
|
+
: errFn;
|
|
532
|
+
} else if (jsFn && jsFn._hybridFactory) {
|
|
533
|
+
// Zero-wrapper: hybridFactory bakes VALID_RESULT + errFn into a single function
|
|
534
|
+
// No arrow function wrapper, no ternary, one function call
|
|
535
|
+
const hybridFn = jsFn._hybridFactory(VALID_RESULT, safeCombinedFn || errFn);
|
|
536
|
+
this.validate = preprocess
|
|
537
|
+
? (data) => { preprocess(data); return hybridFn(data); }
|
|
538
|
+
: hybridFn;
|
|
512
539
|
} else if (safeCombinedFn) {
|
|
513
540
|
this.validate = preprocess
|
|
514
|
-
? (data) => {
|
|
515
|
-
preprocess(data);
|
|
516
|
-
return safeCombinedFn(data);
|
|
517
|
-
}
|
|
541
|
+
? (data) => { preprocess(data); return safeCombinedFn(data); }
|
|
518
542
|
: safeCombinedFn;
|
|
519
543
|
} else {
|
|
520
|
-
const hybridFn = jsFn._hybridFactory
|
|
544
|
+
const hybridFn = jsFn && jsFn._hybridFactory
|
|
521
545
|
? jsFn._hybridFactory(VALID_RESULT, errFn)
|
|
522
546
|
: null;
|
|
523
547
|
this.validate = hybridFn
|
|
@@ -616,15 +640,21 @@ class Validator {
|
|
|
616
640
|
};
|
|
617
641
|
}
|
|
618
642
|
} else if (native) {
|
|
619
|
-
//
|
|
643
|
+
// Native-only path: no JS codegen, use native for everything
|
|
620
644
|
this._ensureNative();
|
|
645
|
+
const _hasDynamic = this._schemaStr.includes('"$dynamicRef"') || this._schemaStr.includes('"$dynamicAnchor"') || this._schemaStr.includes('"$anchor"')
|
|
646
|
+
// For schemas with dynamic refs/anchors, use validateJSON (C++ path with full support)
|
|
647
|
+
// instead of validate (NAPI direct V8 path without anchor maps)
|
|
648
|
+
const _validate = _hasDynamic
|
|
649
|
+
? (data) => this._compiled.validateJSON(JSON.stringify(data))
|
|
650
|
+
: (data) => this._compiled.validate(data);
|
|
621
651
|
this.validate = preprocess
|
|
622
652
|
? (data) => {
|
|
623
653
|
preprocess(data);
|
|
624
|
-
return
|
|
654
|
+
return _validate(data);
|
|
625
655
|
}
|
|
626
|
-
:
|
|
627
|
-
this.isValidObject = (data) =>
|
|
656
|
+
: _validate;
|
|
657
|
+
this.isValidObject = (data) => _validate(data).valid;
|
|
628
658
|
this.validateJSON = (jsonStr) => this._compiled.validateJSON(jsonStr);
|
|
629
659
|
this.isValidJSON = (jsonStr) => this._compiled.isValidJSON(jsonStr);
|
|
630
660
|
{
|