ata-validator 0.5.1 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +13 -13
- package/binding/ata_napi.cpp +13 -7
- package/compat.js +1 -9
- package/include/ata.h +4 -0
- package/index.d.ts +12 -4
- package/index.js +91 -10
- package/lib/js-compiler.js +327 -151
- package/package.json +2 -2
- package/prebuilds/darwin-arm64/ata-validator.node +0 -0
- package/src/ata.cpp +327 -1
package/README.md
CHANGED
|
@@ -10,19 +10,19 @@ Ultra-fast JSON Schema validator powered by [simdjson](https://github.com/simdjs
|
|
|
10
10
|
|
|
11
11
|
| Scenario | ata | ajv | |
|
|
12
12
|
|---|---|---|---|
|
|
13
|
-
| **validate(obj)** valid |
|
|
14
|
-
| **validate(obj)** invalid |
|
|
15
|
-
| **isValidObject(obj)** |
|
|
16
|
-
| **Schema compilation** |
|
|
17
|
-
| **First validation** | 2.
|
|
13
|
+
| **validate(obj)** valid | 22ns | 102ns | **ata 4.6x faster** |
|
|
14
|
+
| **validate(obj)** invalid | 87ns | 182ns | **ata 2.1x faster** |
|
|
15
|
+
| **isValidObject(obj)** | 21ns | 100ns | **ata 4.7x faster** |
|
|
16
|
+
| **Schema compilation** | 695ns | 1.30ms | **ata 1,867x faster** |
|
|
17
|
+
| **First validation** | 2.07μs | 1.11ms | **ata 534x faster** |
|
|
18
18
|
|
|
19
19
|
### Complex Schema (patternProperties + dependentSchemas + propertyNames + additionalProperties)
|
|
20
20
|
|
|
21
21
|
| Scenario | ata | ajv | |
|
|
22
22
|
|---|---|---|---|
|
|
23
|
-
| **validate(obj)** valid | 17ns |
|
|
24
|
-
| **validate(obj)** invalid |
|
|
25
|
-
| **isValidObject(obj)** |
|
|
23
|
+
| **validate(obj)** valid | 17ns | 115ns | **ata 6.8x faster** |
|
|
24
|
+
| **validate(obj)** invalid | 59ns | 194ns | **ata 3.3x faster** |
|
|
25
|
+
| **isValidObject(obj)** | 19ns | 124ns | **ata 6.6x faster** |
|
|
26
26
|
|
|
27
27
|
### Cross-Schema `$ref` (multi-schema with `$id` registry)
|
|
28
28
|
|
|
@@ -52,10 +52,10 @@ Three-tier hybrid codegen: static schemas compile to zero-overhead key checks, d
|
|
|
52
52
|
|
|
53
53
|
| Scenario | ata | ajv | typebox | zod | valibot |
|
|
54
54
|
|---|---|---|---|---|---|
|
|
55
|
-
| **validate (valid)** | **9ns** |
|
|
56
|
-
| **validate (invalid)** | **
|
|
57
|
-
| **compilation** | **
|
|
58
|
-
| **first validation** | **2.
|
|
55
|
+
| **validate (valid)** | **9ns** | 38ns | 50ns | 334ns | 326ns |
|
|
56
|
+
| **validate (invalid)** | **37ns** | 103ns | 4ns | 11.8μs | 842ns |
|
|
57
|
+
| **compilation** | **584ns** | 1.20ms | 52μs | — | — |
|
|
58
|
+
| **first validation** | **2.1μs** | 1.11ms | 54μs | — | — |
|
|
59
59
|
|
|
60
60
|
> Different categories: ata/ajv/typebox are JSON Schema validators, zod/valibot are schema-builder DSLs. [Benchmark code](benchmark/bench_all_mitata.mjs)
|
|
61
61
|
|
|
@@ -71,7 +71,7 @@ Three-tier hybrid codegen: static schemas compile to zero-overhead key checks, d
|
|
|
71
71
|
|
|
72
72
|
| Scenario | ata | ajv | |
|
|
73
73
|
|---|---|---|---|
|
|
74
|
-
| **Serverless cold start** (50 schemas) | 0.1ms | 23ms | **ata
|
|
74
|
+
| **Serverless cold start** (50 schemas) | 0.1ms | 23ms | **ata 230x faster** |
|
|
75
75
|
| **ReDoS protection** (`^(a+)+$`) | 0.3ms | 765ms | **ata immune (RE2)** |
|
|
76
76
|
| **Batch NDJSON** (10K items, multi-core) | 13.4M/sec | 5.1M/sec | **ata 2.6x faster** |
|
|
77
77
|
| **Fastify startup** (5 routes) | 0.5ms | 6.0ms | **ata 12x faster** |
|
package/binding/ata_napi.cpp
CHANGED
|
@@ -1040,24 +1040,31 @@ static ThreadPool& pool() {
|
|
|
1040
1040
|
}
|
|
1041
1041
|
|
|
1042
1042
|
// --- Fast Validation Registry ---
|
|
1043
|
-
// Global schema slots for
|
|
1043
|
+
// Global schema slots for pre-compiled validation (bypasses per-call compilation)
|
|
1044
1044
|
static constexpr size_t MAX_FAST_SLOTS = 4096;
|
|
1045
1045
|
static ata::schema_ref g_fast_schemas[MAX_FAST_SLOTS];
|
|
1046
1046
|
static std::string g_fast_schema_jsons[MAX_FAST_SLOTS];
|
|
1047
1047
|
static uint32_t g_fast_slot_count = 0;
|
|
1048
1048
|
|
|
1049
|
-
// Register a compiled schema in a fast slot, returns slot ID
|
|
1049
|
+
// Register a compiled schema in a fast slot, returns slot ID.
|
|
1050
|
+
// Deduplicates: same schema JSON string returns existing slot.
|
|
1050
1051
|
Napi::Value FastRegister(const Napi::CallbackInfo& info) {
|
|
1051
1052
|
Napi::Env env = info.Env();
|
|
1052
1053
|
if (info.Length() < 1 || !info[0].IsString()) {
|
|
1053
1054
|
Napi::TypeError::New(env, "Schema JSON string expected").ThrowAsJavaScriptException();
|
|
1054
1055
|
return env.Undefined();
|
|
1055
1056
|
}
|
|
1057
|
+
std::string schema_json = info[0].As<Napi::String>().Utf8Value();
|
|
1058
|
+
// Deduplicate: return existing slot for identical schema JSON
|
|
1059
|
+
for (uint32_t i = 0; i < g_fast_slot_count; i++) {
|
|
1060
|
+
if (g_fast_schema_jsons[i] == schema_json) {
|
|
1061
|
+
return Napi::Number::New(env, i);
|
|
1062
|
+
}
|
|
1063
|
+
}
|
|
1056
1064
|
if (g_fast_slot_count >= MAX_FAST_SLOTS) {
|
|
1057
1065
|
Napi::Error::New(env, "Max fast schema slots reached").ThrowAsJavaScriptException();
|
|
1058
1066
|
return env.Undefined();
|
|
1059
1067
|
}
|
|
1060
|
-
std::string schema_json = info[0].As<Napi::String>().Utf8Value();
|
|
1061
1068
|
auto schema = ata::compile(schema_json);
|
|
1062
1069
|
if (!schema) {
|
|
1063
1070
|
Napi::Error::New(env, "Failed to compile schema").ThrowAsJavaScriptException();
|
|
@@ -1069,12 +1076,11 @@ Napi::Value FastRegister(const Napi::CallbackInfo& info) {
|
|
|
1069
1076
|
return Napi::Number::New(env, slot);
|
|
1070
1077
|
}
|
|
1071
1078
|
|
|
1072
|
-
// Fast validation: slot +
|
|
1079
|
+
// Fast validation: slot + raw buffer → bool
|
|
1080
|
+
// Routes through is_valid_buf → is_valid_prepadded → On-Demand od_plan fast path
|
|
1073
1081
|
static bool FastValidateImpl(uint32_t slot, const uint8_t* data, size_t length) {
|
|
1074
1082
|
if (slot >= g_fast_slot_count) return false;
|
|
1075
|
-
|
|
1076
|
-
std::string_view(reinterpret_cast<const char*>(data), length));
|
|
1077
|
-
return result.valid;
|
|
1083
|
+
return ata::is_valid_buf(g_fast_schemas[slot], data, length);
|
|
1078
1084
|
}
|
|
1079
1085
|
|
|
1080
1086
|
// Zero-copy validation with pre-padded buffer
|
package/compat.js
CHANGED
|
@@ -10,15 +10,7 @@ class Ata {
|
|
|
10
10
|
const v = new Validator(schema);
|
|
11
11
|
const validate = (data) => {
|
|
12
12
|
const result = v.validate(data);
|
|
13
|
-
validate.errors = result.valid
|
|
14
|
-
? null
|
|
15
|
-
: result.errors.map((e) => ({
|
|
16
|
-
instancePath: e.path ? "/" + e.path.replace(/\//g, "/") : "",
|
|
17
|
-
schemaPath: "",
|
|
18
|
-
keyword: "",
|
|
19
|
-
params: {},
|
|
20
|
-
message: e.message,
|
|
21
|
-
}));
|
|
13
|
+
validate.errors = result.valid ? null : result.errors;
|
|
22
14
|
return result.valid;
|
|
23
15
|
};
|
|
24
16
|
validate.errors = null;
|
package/include/ata.h
CHANGED
|
@@ -91,6 +91,10 @@ validation_result validate(std::string_view schema_json,
|
|
|
91
91
|
// Use this when you only need true/false and can provide pre-padded input.
|
|
92
92
|
bool is_valid_prepadded(const schema_ref& schema, const char* data, size_t length);
|
|
93
93
|
|
|
94
|
+
// Validate raw buffer — handles padding internally via thread-local copy.
|
|
95
|
+
// Use this when input doesn't have simdjson padding (e.g., from V8 TypedArray).
|
|
96
|
+
bool is_valid_buf(const schema_ref& schema, const uint8_t* data, size_t length);
|
|
97
|
+
|
|
94
98
|
// Required padding size for is_valid_prepadded
|
|
95
99
|
inline constexpr size_t REQUIRED_PADDING = 64;
|
|
96
100
|
|
package/index.d.ts
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
export interface ValidationError {
|
|
2
|
-
|
|
3
|
-
|
|
2
|
+
keyword: string;
|
|
3
|
+
instancePath: string;
|
|
4
|
+
schemaPath: string;
|
|
5
|
+
params: Record<string, unknown>;
|
|
4
6
|
message: string;
|
|
5
7
|
}
|
|
6
8
|
|
|
@@ -43,8 +45,14 @@ export class Validator {
|
|
|
43
45
|
/** Fast boolean check for JSON string */
|
|
44
46
|
isValidJSON(jsonString: string): boolean;
|
|
45
47
|
|
|
46
|
-
/**
|
|
47
|
-
isValid(input: Buffer | Uint8Array): boolean;
|
|
48
|
+
/** Ultra-fast buffer validation via V8 CFunction — zero NAPI overhead */
|
|
49
|
+
isValid(input: Buffer | Uint8Array | string): boolean;
|
|
50
|
+
|
|
51
|
+
/** Count valid documents in an NDJSON buffer */
|
|
52
|
+
countValid(ndjsonBuf: Buffer | Uint8Array | string): number;
|
|
53
|
+
|
|
54
|
+
/** Count valid documents from an array of buffers */
|
|
55
|
+
batchIsValid(buffers: (Buffer | Uint8Array)[]): number;
|
|
48
56
|
|
|
49
57
|
/** Zero-copy validation with pre-padded buffer */
|
|
50
58
|
isValidPrepadded(paddedBuffer: Buffer, jsonLength: number): boolean;
|
package/index.js
CHANGED
|
@@ -1,4 +1,7 @@
|
|
|
1
|
-
|
|
1
|
+
// Native addon: optional. Core validate() uses JS codegen and works without it.
|
|
2
|
+
// Buffer APIs (isValid, countValid, isValidParallel) require native.
|
|
3
|
+
let native;
|
|
4
|
+
try { native = require("node-gyp-build")(__dirname); } catch {}
|
|
2
5
|
const {
|
|
3
6
|
compileToJS,
|
|
4
7
|
compileToJSCodegen,
|
|
@@ -346,6 +349,21 @@ class Validator {
|
|
|
346
349
|
this._ensureCompiled();
|
|
347
350
|
return this.isValidJSON(jsonStr);
|
|
348
351
|
};
|
|
352
|
+
this.isValid = (buf) => {
|
|
353
|
+
if (!native) throw new Error('Native addon required for isValid() — use validate() or isValidObject() instead');
|
|
354
|
+
this._ensureCompiled();
|
|
355
|
+
return this.isValid(buf);
|
|
356
|
+
};
|
|
357
|
+
this.countValid = (ndjsonBuf) => {
|
|
358
|
+
if (!native) throw new Error('Native addon required for countValid()');
|
|
359
|
+
this._ensureCompiled();
|
|
360
|
+
return this.countValid(ndjsonBuf);
|
|
361
|
+
};
|
|
362
|
+
this.batchIsValid = (buffers) => {
|
|
363
|
+
if (!native) throw new Error('Native addon required for batchIsValid()');
|
|
364
|
+
this._ensureCompiled();
|
|
365
|
+
return this.batchIsValid(buffers);
|
|
366
|
+
};
|
|
349
367
|
|
|
350
368
|
// ~standard uses self.validate() -- works with lazy because it goes through
|
|
351
369
|
// the instance property which gets swapped after compilation
|
|
@@ -362,7 +380,7 @@ class Validator {
|
|
|
362
380
|
return {
|
|
363
381
|
issues: result.errors.map((err) => ({
|
|
364
382
|
message: err.message,
|
|
365
|
-
path: parsePointerPath(err.
|
|
383
|
+
path: parsePointerPath(err.instancePath),
|
|
366
384
|
})),
|
|
367
385
|
};
|
|
368
386
|
},
|
|
@@ -521,7 +539,7 @@ class Validator {
|
|
|
521
539
|
const jsonValidateFn = safeCombinedFn
|
|
522
540
|
|| hybridFn
|
|
523
541
|
|| ((obj) => (jsFn(obj) ? VALID_RESULT : errFn(obj)));
|
|
524
|
-
this.validateJSON = useSimdjsonForLarge
|
|
542
|
+
this.validateJSON = useSimdjsonForLarge && native
|
|
525
543
|
? (jsonStr) => {
|
|
526
544
|
if (jsonStr.length >= SIMDJSON_THRESHOLD) {
|
|
527
545
|
this._ensureNative();
|
|
@@ -543,11 +561,12 @@ class Validator {
|
|
|
543
561
|
return jsonValidateFn(JSON.parse(jsonStr));
|
|
544
562
|
} catch (e) {
|
|
545
563
|
if (!(e instanceof SyntaxError)) throw e;
|
|
564
|
+
if (!native) return { valid: false, errors: [{ keyword: 'syntax', instancePath: '', schemaPath: '#', params: {}, message: e.message }] };
|
|
546
565
|
}
|
|
547
566
|
this._ensureNative();
|
|
548
567
|
return this._compiled.validateJSON(jsonStr);
|
|
549
568
|
};
|
|
550
|
-
this.isValidJSON = useSimdjsonForLarge
|
|
569
|
+
this.isValidJSON = useSimdjsonForLarge && native
|
|
551
570
|
? (jsonStr) => {
|
|
552
571
|
if (jsonStr.length >= SIMDJSON_THRESHOLD) {
|
|
553
572
|
this._ensureNative();
|
|
@@ -571,7 +590,30 @@ class Validator {
|
|
|
571
590
|
return false;
|
|
572
591
|
}
|
|
573
592
|
};
|
|
574
|
-
|
|
593
|
+
// Buffer APIs: lazy native init — only compile native schema on first buffer call.
|
|
594
|
+
// This keeps cold start fast (JS codegen only) for users who only use validate().
|
|
595
|
+
if (native) {
|
|
596
|
+
const self = this;
|
|
597
|
+
this.isValid = (buf) => {
|
|
598
|
+
self._ensureNative();
|
|
599
|
+
const slot = self._fastSlot;
|
|
600
|
+
self.isValid = (b) => { if (typeof b === 'string') b = Buffer.from(b); return native.rawFastValidate(slot, b); };
|
|
601
|
+
return self.isValid(buf);
|
|
602
|
+
};
|
|
603
|
+
this.countValid = (ndjsonBuf) => {
|
|
604
|
+
self._ensureNative();
|
|
605
|
+
const slot = self._fastSlot;
|
|
606
|
+
self.countValid = (b) => { if (typeof b === 'string') b = Buffer.from(b); const r = native.rawNDJSONValidate(slot, b); let c = 0; for (let i = 0; i < r.length; i++) if (r[i]) c++; return c; };
|
|
607
|
+
return self.countValid(ndjsonBuf);
|
|
608
|
+
};
|
|
609
|
+
this.batchIsValid = (buffers) => {
|
|
610
|
+
self._ensureNative();
|
|
611
|
+
const slot = self._fastSlot;
|
|
612
|
+
self.batchIsValid = (bufs) => { let v = 0; for (const b of bufs) if (native.rawFastValidate(slot, b)) v++; return v; };
|
|
613
|
+
return self.batchIsValid(buffers);
|
|
614
|
+
};
|
|
615
|
+
}
|
|
616
|
+
} else if (native) {
|
|
575
617
|
// ATA_FORCE_NAPI path: no JS codegen, use native for everything
|
|
576
618
|
this._ensureNative();
|
|
577
619
|
this.validate = preprocess
|
|
@@ -583,12 +625,40 @@ class Validator {
|
|
|
583
625
|
this.isValidObject = (data) => this._compiled.validate(data).valid;
|
|
584
626
|
this.validateJSON = (jsonStr) => this._compiled.validateJSON(jsonStr);
|
|
585
627
|
this.isValidJSON = (jsonStr) => this._compiled.isValidJSON(jsonStr);
|
|
628
|
+
{
|
|
629
|
+
const slot = this._fastSlot;
|
|
630
|
+
this.isValid = (buf) => {
|
|
631
|
+
if (typeof buf === 'string') buf = Buffer.from(buf);
|
|
632
|
+
return native.rawFastValidate(slot, buf);
|
|
633
|
+
};
|
|
634
|
+
}
|
|
635
|
+
{
|
|
636
|
+
const slot = this._fastSlot;
|
|
637
|
+
this.countValid = (ndjsonBuf) => {
|
|
638
|
+
if (typeof ndjsonBuf === 'string') ndjsonBuf = Buffer.from(ndjsonBuf);
|
|
639
|
+
const results = native.rawNDJSONValidate(slot, ndjsonBuf);
|
|
640
|
+
let count = 0;
|
|
641
|
+
for (let i = 0; i < results.length; i++) if (results[i]) count++;
|
|
642
|
+
return count;
|
|
643
|
+
};
|
|
644
|
+
}
|
|
645
|
+
{
|
|
646
|
+
const slot = this._fastSlot;
|
|
647
|
+
this.batchIsValid = (buffers) => {
|
|
648
|
+
let valid = 0;
|
|
649
|
+
for (const buf of buffers) {
|
|
650
|
+
if (native.rawFastValidate(slot, buf)) valid++;
|
|
651
|
+
}
|
|
652
|
+
return valid;
|
|
653
|
+
};
|
|
654
|
+
}
|
|
586
655
|
}
|
|
587
656
|
}
|
|
588
657
|
|
|
589
658
|
_ensureNative() {
|
|
590
659
|
if (this._nativeReady) return;
|
|
591
660
|
this._nativeReady = true;
|
|
661
|
+
if (!native) return;
|
|
592
662
|
let nativeSchemaStr = this._schemaStr;
|
|
593
663
|
if (this._schemaMap.size > 0) {
|
|
594
664
|
const merged = JSON.parse(this._schemaStr);
|
|
@@ -770,7 +840,7 @@ module.exports = { boolFn, hybridFactory, errFn };
|
|
|
770
840
|
return {
|
|
771
841
|
issues: result.errors.map((e) => ({
|
|
772
842
|
message: e.message,
|
|
773
|
-
path: parsePointerPath(e.
|
|
843
|
+
path: parsePointerPath(e.instancePath),
|
|
774
844
|
})),
|
|
775
845
|
};
|
|
776
846
|
},
|
|
@@ -785,43 +855,54 @@ module.exports = { boolFn, hybridFactory, errFn };
|
|
|
785
855
|
|
|
786
856
|
// Raw NAPI fast path for Buffer/Uint8Array
|
|
787
857
|
isValid(input) {
|
|
858
|
+
if (!native) throw new Error('Native addon required for isValid() — install build tools or use validate() instead');
|
|
788
859
|
this._ensureNative();
|
|
789
860
|
return native.rawFastValidate(this._fastSlot, input);
|
|
790
861
|
}
|
|
791
862
|
|
|
792
863
|
// Zero-copy pre-padded path
|
|
793
864
|
isValidPrepadded(paddedBuffer, jsonLength) {
|
|
865
|
+
if (!native) throw new Error('Native addon required for isValidPrepadded()');
|
|
794
866
|
this._ensureNative();
|
|
795
867
|
return native.rawFastValidate(this._fastSlot, paddedBuffer, jsonLength);
|
|
796
868
|
}
|
|
797
869
|
|
|
798
870
|
// Parallel NDJSON batch (multi-core)
|
|
799
871
|
isValidParallel(buffer) {
|
|
872
|
+
if (!native) throw new Error('Native addon required for isValidParallel()');
|
|
800
873
|
this._ensureNative();
|
|
801
874
|
return native.rawParallelValidate(this._fastSlot, buffer);
|
|
802
875
|
}
|
|
803
876
|
|
|
804
877
|
// Parallel count (fastest -- single uint32 return)
|
|
805
878
|
countValid(buffer) {
|
|
879
|
+
if (!native) throw new Error('Native addon required for countValid()');
|
|
806
880
|
this._ensureNative();
|
|
807
881
|
return native.rawParallelCount(this._fastSlot, buffer);
|
|
808
882
|
}
|
|
809
883
|
|
|
810
884
|
// NDJSON single-thread batch
|
|
811
885
|
isValidNDJSON(buffer) {
|
|
886
|
+
if (!native) throw new Error('Native addon required for isValidNDJSON()');
|
|
812
887
|
this._ensureNative();
|
|
813
888
|
return native.rawNDJSONValidate(this._fastSlot, buffer);
|
|
814
889
|
}
|
|
815
890
|
}
|
|
816
891
|
|
|
817
892
|
function validate(schema, data) {
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
893
|
+
if (native) {
|
|
894
|
+
const schemaStr =
|
|
895
|
+
typeof schema === "string" ? schema : JSON.stringify(schema);
|
|
896
|
+
return native.validate(schemaStr, data);
|
|
897
|
+
}
|
|
898
|
+
// JS fallback: compile and validate
|
|
899
|
+
const v = new Validator(typeof schema === "string" ? JSON.parse(schema) : schema);
|
|
900
|
+
return v.validate(data);
|
|
821
901
|
}
|
|
822
902
|
|
|
823
903
|
function version() {
|
|
824
|
-
return native.version();
|
|
904
|
+
if (native) return native.version();
|
|
905
|
+
return require("./package.json").version;
|
|
825
906
|
}
|
|
826
907
|
|
|
827
908
|
// Bundle multiple validators into a single JS file for fast startup.
|