ata-validator 0.10.0 → 0.10.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/index.d.ts +58 -15
- package/lib/js-compiler.js +62 -21
- package/lib/schema-parser.js +223 -0
- package/lib/shape-classifier.js +1 -1
- package/lib/tier0.js +17 -4
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -102,7 +102,7 @@ Self-recursive named functions for $dynamicRef, compile-time cross-schema resolu
|
|
|
102
102
|
|
|
103
103
|
### JSON Schema Test Suite
|
|
104
104
|
|
|
105
|
-
**
|
|
105
|
+
**98.5%** pass rate (1172/1190) on official [JSON Schema Test Suite](https://github.com/json-schema-org/JSON-Schema-Test-Suite) (Draft 2020-12), excluding remote refs and vocabulary (intentionally unsupported). **95.3%** on [@exodus/schemasafe](https://github.com/ExodusMovement/schemasafe) test suite.
|
|
106
106
|
|
|
107
107
|
## When to use ata
|
|
108
108
|
|
|
@@ -135,7 +135,7 @@ Self-recursive named functions for $dynamicRef, compile-time cross-schema resolu
|
|
|
135
135
|
- **Zero-copy paths**: Buffer and pre-padded input support - no unnecessary copies
|
|
136
136
|
- **Defaults + coercion**: `default` values, `coerceTypes`, `removeAdditional` support
|
|
137
137
|
- **C/C++ library**: Native API for non-Node.js environments
|
|
138
|
-
- **
|
|
138
|
+
- **98.5% spec compliant**: Draft 2020-12
|
|
139
139
|
|
|
140
140
|
## Installation
|
|
141
141
|
|
package/index.d.ts
CHANGED
|
@@ -11,6 +11,12 @@ export interface ValidationResult {
|
|
|
11
11
|
errors: ValidationError[];
|
|
12
12
|
}
|
|
13
13
|
|
|
14
|
+
export interface ValidateAndParseResult {
|
|
15
|
+
valid: boolean;
|
|
16
|
+
value: unknown;
|
|
17
|
+
errors: ValidationError[];
|
|
18
|
+
}
|
|
19
|
+
|
|
14
20
|
export interface ValidatorOptions {
|
|
15
21
|
coerceTypes?: boolean;
|
|
16
22
|
removeAdditional?: boolean;
|
|
@@ -27,56 +33,93 @@ export interface StandardSchemaV1Props {
|
|
|
27
33
|
| { issues: Array<{ message: string; path?: ReadonlyArray<{ key: PropertyKey }> }> };
|
|
28
34
|
}
|
|
29
35
|
|
|
36
|
+
export interface StandaloneModule {
|
|
37
|
+
boolFn: (data: unknown) => boolean;
|
|
38
|
+
hybridFactory: (validResult: object, errFn: Function) => (data: unknown) => ValidationResult;
|
|
39
|
+
errFn: ((data: unknown, allErrors?: boolean) => ValidationResult) | null;
|
|
40
|
+
}
|
|
41
|
+
|
|
30
42
|
export class Validator {
|
|
31
43
|
constructor(schema: object | string, options?: ValidatorOptions);
|
|
32
44
|
|
|
33
|
-
/** Add a schema to the
|
|
45
|
+
/** Add a schema to the registry for cross-schema $ref resolution */
|
|
34
46
|
addSchema(schema: object): void;
|
|
35
47
|
|
|
36
|
-
/** Validate data
|
|
48
|
+
/** Validate data, returns result with errors. Applies defaults, coerceTypes, removeAdditional. */
|
|
37
49
|
validate(data: unknown): ValidationResult;
|
|
38
50
|
|
|
39
|
-
/** Fast boolean check
|
|
51
|
+
/** Fast boolean check via JS codegen or tier 0 interpreter. No error collection. */
|
|
40
52
|
isValidObject(data: unknown): boolean;
|
|
41
53
|
|
|
42
|
-
/** Validate JSON string
|
|
54
|
+
/** Validate a JSON string. Uses simdjson fast path for large documents. */
|
|
43
55
|
validateJSON(jsonString: string): ValidationResult;
|
|
44
56
|
|
|
45
|
-
/** Fast boolean check for JSON string */
|
|
57
|
+
/** Fast boolean check for a JSON string */
|
|
46
58
|
isValidJSON(jsonString: string): boolean;
|
|
47
59
|
|
|
48
|
-
/**
|
|
60
|
+
/** Parse JSON with simdjson + validate against schema. Returns parsed value and validation result. Requires native addon. */
|
|
61
|
+
validateAndParse(jsonString: string | Buffer): ValidateAndParseResult;
|
|
62
|
+
|
|
63
|
+
/** Ultra-fast buffer validation via native addon */
|
|
49
64
|
isValid(input: Buffer | Uint8Array | string): boolean;
|
|
50
65
|
|
|
51
|
-
/** Count valid documents in an NDJSON buffer */
|
|
66
|
+
/** Count valid documents in an NDJSON buffer. Requires native addon. */
|
|
52
67
|
countValid(ndjsonBuf: Buffer | Uint8Array | string): number;
|
|
53
68
|
|
|
54
|
-
/**
|
|
69
|
+
/** Validate an array of buffers, returns count of valid ones. Requires native addon. */
|
|
55
70
|
batchIsValid(buffers: (Buffer | Uint8Array)[]): number;
|
|
56
71
|
|
|
57
|
-
/** Zero-copy validation with pre-padded buffer */
|
|
72
|
+
/** Zero-copy validation with pre-padded buffer. Requires native addon. */
|
|
58
73
|
isValidPrepadded(paddedBuffer: Buffer, jsonLength: number): boolean;
|
|
59
74
|
|
|
60
|
-
/** Multi-core parallel NDJSON validation
|
|
75
|
+
/** Multi-core parallel NDJSON validation. Returns boolean per line. Requires native addon. */
|
|
61
76
|
isValidParallel(ndjsonBuffer: Buffer): boolean[];
|
|
62
77
|
|
|
63
|
-
/**
|
|
64
|
-
countValid(ndjsonBuffer: Buffer): number;
|
|
65
|
-
|
|
66
|
-
/** Single-thread NDJSON batch validation */
|
|
78
|
+
/** Single-thread NDJSON batch validation. Requires native addon. */
|
|
67
79
|
isValidNDJSON(ndjsonBuffer: Buffer): boolean[];
|
|
68
80
|
|
|
69
|
-
/**
|
|
81
|
+
/** Generate a standalone JS module string for zero-compile loading. Returns null if schema can't be standalone-compiled. */
|
|
82
|
+
toStandalone(): string | null;
|
|
83
|
+
|
|
84
|
+
/** Load a pre-compiled standalone module. Zero schema compilation at startup. */
|
|
85
|
+
static fromStandalone(mod: StandaloneModule, schema: object | string, options?: ValidatorOptions): Validator;
|
|
86
|
+
|
|
87
|
+
/** Bundle multiple schemas into a single JS module string. Load with Validator.loadBundle(). */
|
|
88
|
+
static bundle(schemas: object[], options?: ValidatorOptions): string;
|
|
89
|
+
|
|
90
|
+
/** Bundle multiple schemas into a self-contained JS module. No ata-validator import needed at runtime. */
|
|
91
|
+
static bundleStandalone(schemas: object[], options?: ValidatorOptions): string;
|
|
92
|
+
|
|
93
|
+
/** Bundle multiple schemas with deduplicated shared templates. Smaller output than bundle(). */
|
|
94
|
+
static bundleCompact(schemas: object[], options?: ValidatorOptions): string;
|
|
95
|
+
|
|
96
|
+
/** Load a bundle created by Validator.bundle(). Returns array of Validator instances. */
|
|
97
|
+
static loadBundle(mods: object[], schemas: object[], options?: ValidatorOptions): Validator[];
|
|
98
|
+
|
|
99
|
+
/** Standard Schema V1 interface, compatible with Fastify, tRPC, TanStack, etc. */
|
|
70
100
|
readonly "~standard": StandardSchemaV1Props;
|
|
71
101
|
}
|
|
72
102
|
|
|
103
|
+
/** One-shot validate: creates a Validator, validates data, returns result. */
|
|
73
104
|
export function validate(
|
|
74
105
|
schema: object | string,
|
|
75
106
|
data: unknown
|
|
76
107
|
): ValidationResult;
|
|
77
108
|
|
|
109
|
+
/** Fast compile: returns a validate function directly. WeakMap cached, second call with same schema is near-zero cost. */
|
|
110
|
+
export function compile(
|
|
111
|
+
schema: object | string,
|
|
112
|
+
options?: ValidatorOptions
|
|
113
|
+
): (data: unknown) => ValidationResult;
|
|
114
|
+
|
|
115
|
+
/** Parse JSON using simdjson (native addon) or JSON.parse (fallback). */
|
|
116
|
+
export function parseJSON(jsonString: string | Buffer): unknown;
|
|
117
|
+
|
|
118
|
+
/** Returns ata-validator version string. */
|
|
78
119
|
export function version(): string;
|
|
79
120
|
|
|
121
|
+
/** Create a simdjson-compatible padded buffer from a JSON string. */
|
|
80
122
|
export function createPaddedBuffer(jsonStr: string): { buffer: Buffer; length: number };
|
|
81
123
|
|
|
124
|
+
/** Required padding size for simdjson buffers. */
|
|
82
125
|
export const SIMDJSON_PADDING: number;
|
package/lib/js-compiler.js
CHANGED
|
@@ -4,6 +4,20 @@
|
|
|
4
4
|
// Closure-based validator — no new Function() or eval().
|
|
5
5
|
// Returns null if the schema is too complex for JS compilation.
|
|
6
6
|
|
|
7
|
+
// Count Unicode code points, not UTF-16 code units (surrogate pairs).
|
|
8
|
+
// JSON Schema: minLength/maxLength count characters per RFC 8259.
|
|
9
|
+
// Fast path: if no surrogate pairs exist, .length is correct (covers >99% of real data).
|
|
10
|
+
function _cpLen(s) {
|
|
11
|
+
const len = s.length;
|
|
12
|
+
for (let i = 0; i < len; i++) {
|
|
13
|
+
if (s.charCodeAt(i) >= 0xD800 && s.charCodeAt(i) <= 0xDBFF) {
|
|
14
|
+
// Found a high surrogate — count code points the slow way
|
|
15
|
+
let n = 0; for (const _ of s) n++; return n;
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
return len;
|
|
19
|
+
}
|
|
20
|
+
|
|
7
21
|
// AJV-compatible error message templates (compile-time, not runtime)
|
|
8
22
|
const AJV_MESSAGES = {
|
|
9
23
|
type: (p) => `must be ${p.type}`,
|
|
@@ -271,11 +285,12 @@ function compileToJS(schema, defs, schemaMap) {
|
|
|
271
285
|
// string
|
|
272
286
|
if (schema.minLength !== undefined) {
|
|
273
287
|
const min = schema.minLength
|
|
274
|
-
|
|
288
|
+
const min2 = min * 2
|
|
289
|
+
checks.push((d) => typeof d !== 'string' || d.length >= min2 || (d.length >= min && _cpLen(d) >= min))
|
|
275
290
|
}
|
|
276
291
|
if (schema.maxLength !== undefined) {
|
|
277
292
|
const max = schema.maxLength
|
|
278
|
-
checks.push((d) => typeof d !== 'string' || d.length <= max)
|
|
293
|
+
checks.push((d) => typeof d !== 'string' || d.length <= max || _cpLen(d) <= max)
|
|
279
294
|
}
|
|
280
295
|
if (schema.pattern) {
|
|
281
296
|
try {
|
|
@@ -775,7 +790,7 @@ function compileToJSCodegen(schema, schemaMap) {
|
|
|
775
790
|
}
|
|
776
791
|
}
|
|
777
792
|
|
|
778
|
-
const ctx = { varCounter: 0, helpers: [], helperCode: [], closureVars: [], closureVals: [], rootDefs, refStack: new Set(), schemaMap: schemaMap || null, anchors, rootSchema: schema }
|
|
793
|
+
const ctx = { varCounter: 0, helpers: [], helperCode: [], closureVars: ['_cpLen'], closureVals: [_cpLen], rootDefs, refStack: new Set(), schemaMap: schemaMap || null, anchors, rootSchema: schema }
|
|
779
794
|
const lines = []
|
|
780
795
|
genCode(schema, 'd', lines, ctx)
|
|
781
796
|
|
|
@@ -914,8 +929,8 @@ function tryGenCombined(schema, access, ctx) {
|
|
|
914
929
|
|
|
915
930
|
if (t === 'string') {
|
|
916
931
|
const conds = [`typeof _v!=='string'`]
|
|
917
|
-
if (schema.minLength !== undefined) conds.push(`_v
|
|
918
|
-
if (schema.maxLength !== undefined) conds.push(`_v
|
|
932
|
+
if (schema.minLength !== undefined) conds.push(`_cpLen(_v)<${schema.minLength}`)
|
|
933
|
+
if (schema.maxLength !== undefined) conds.push(`_cpLen(_v)>${schema.maxLength}`)
|
|
919
934
|
if (conds.length < 2 && !schema.pattern && !schema.format) return null // not worth combining
|
|
920
935
|
// pattern and format need separate statements, fall back if present
|
|
921
936
|
if (schema.pattern || schema.format) return null
|
|
@@ -956,6 +971,9 @@ function tryGenCombined(schema, access, ctx) {
|
|
|
956
971
|
// 'string' / 'number' / 'integer' = we know the primitive type
|
|
957
972
|
function genCode(schema, v, lines, ctx, knownType) {
|
|
958
973
|
if (typeof schema !== 'object' || schema === null) return
|
|
974
|
+
if (!ctx.regExpMap) {
|
|
975
|
+
ctx.regExpMap = new Map();
|
|
976
|
+
}
|
|
959
977
|
|
|
960
978
|
// $ref — guard against circular references
|
|
961
979
|
// In 2020-12 with unevaluated*, $ref can coexist with siblings — don't early return
|
|
@@ -1159,8 +1177,8 @@ function genCode(schema, v, lines, ctx, knownType) {
|
|
|
1159
1177
|
if (schema.multipleOf !== undefined) lines.push(isNum ? `if(${v}%${schema.multipleOf}!==0)return false` : `if(typeof ${v}==='number'&&${v}%${schema.multipleOf}!==0)return false`)
|
|
1160
1178
|
|
|
1161
1179
|
// string — skip type guard if known string
|
|
1162
|
-
if (schema.minLength !== undefined) lines.push(isStr ? `if(${v}
|
|
1163
|
-
if (schema.maxLength !== undefined) lines.push(isStr ? `if(${v}
|
|
1180
|
+
if (schema.minLength !== undefined) lines.push(isStr ? `if(_cpLen(${v})<${schema.minLength})return false` : `if(typeof ${v}==='string'&&_cpLen(${v})<${schema.minLength})return false`)
|
|
1181
|
+
if (schema.maxLength !== undefined) lines.push(isStr ? `if(_cpLen(${v})>${schema.maxLength})return false` : `if(typeof ${v}==='string'&&_cpLen(${v})>${schema.maxLength})return false`)
|
|
1164
1182
|
|
|
1165
1183
|
// array size — skip guard if known array
|
|
1166
1184
|
if (schema.minItems !== undefined) lines.push(isArr ? `if(${v}.length<${schema.minItems})return false` : `if(Array.isArray(${v})&&${v}.length<${schema.minItems})return false`)
|
|
@@ -1176,8 +1194,13 @@ function genCode(schema, v, lines, ctx, knownType) {
|
|
|
1176
1194
|
if (inlineCheck) {
|
|
1177
1195
|
lines.push(isStr ? `if(!(${inlineCheck}))return false` : `if(typeof ${v}==='string'&&!(${inlineCheck}))return false`)
|
|
1178
1196
|
} else {
|
|
1179
|
-
const
|
|
1180
|
-
ctx.
|
|
1197
|
+
const pattern = JSON.stringify(schema.pattern);
|
|
1198
|
+
if (!ctx.regExpMap.has(pattern)) {
|
|
1199
|
+
const ri = ctx.varCounter++
|
|
1200
|
+
ctx.regExpMap.set(pattern, ri)
|
|
1201
|
+
ctx.helperCode.push(`const _re${ri}=new RegExp(${pattern})`);
|
|
1202
|
+
}
|
|
1203
|
+
const ri = ctx.regExpMap.get(pattern);
|
|
1181
1204
|
lines.push(isStr ? `if(!_re${ri}.test(${v}))return false` : `if(typeof ${v}==='string'&&!_re${ri}.test(${v}))return false`)
|
|
1182
1205
|
}
|
|
1183
1206
|
}
|
|
@@ -2264,6 +2287,7 @@ function compileToJSCodegenWithErrors(schema, schemaMap) {
|
|
|
2264
2287
|
}
|
|
2265
2288
|
|
|
2266
2289
|
const ctx = { varCounter: 0, helperCode: [], rootDefs: eRootDefs, refStack: new Set(), schemaMap: schemaMap || null, anchors: eAnchors, rootSchema: schema }
|
|
2290
|
+
ctx.helperCode.push('const _cpLen=s=>{let n=0;for(const _ of s)n++;return n}')
|
|
2267
2291
|
const lines = []
|
|
2268
2292
|
genCodeE(schema, 'd', '', lines, ctx, '#')
|
|
2269
2293
|
if (lines.length === 0) return (d) => ({ valid: true, errors: [] })
|
|
@@ -2296,7 +2320,9 @@ function compileToJSCodegenWithErrors(schema, schemaMap) {
|
|
|
2296
2320
|
function genCodeE(schema, v, pathExpr, lines, ctx, schemaPrefix) {
|
|
2297
2321
|
if (!schemaPrefix) schemaPrefix = '#'
|
|
2298
2322
|
if (typeof schema !== 'object' || schema === null) return
|
|
2299
|
-
|
|
2323
|
+
if (!ctx.regExpMap) {
|
|
2324
|
+
ctx.regExpMap = new Map();
|
|
2325
|
+
}
|
|
2300
2326
|
// $ref — resolve local and cross-schema refs
|
|
2301
2327
|
if (schema.$ref) {
|
|
2302
2328
|
// Self-reference "#" — no-op (permissive) to avoid infinite recursion
|
|
@@ -2440,11 +2466,11 @@ function genCodeE(schema, v, pathExpr, lines, ctx, schemaPrefix) {
|
|
|
2440
2466
|
|
|
2441
2467
|
// string
|
|
2442
2468
|
if (schema.minLength !== undefined) {
|
|
2443
|
-
const c = isStr ?
|
|
2469
|
+
const c = isStr ? `_cpLen(${v})<${schema.minLength}` : `typeof ${v}==='string'&&_cpLen(${v})<${schema.minLength}`
|
|
2444
2470
|
lines.push(`if(${c}){${fail('minLength', 'minLength', `{limit:${schema.minLength}}`, `'must NOT have fewer than ${schema.minLength} characters'`)}}`)
|
|
2445
2471
|
}
|
|
2446
2472
|
if (schema.maxLength !== undefined) {
|
|
2447
|
-
const c = isStr ?
|
|
2473
|
+
const c = isStr ? `_cpLen(${v})>${schema.maxLength}` : `typeof ${v}==='string'&&_cpLen(${v})>${schema.maxLength}`
|
|
2448
2474
|
lines.push(`if(${c}){${fail('maxLength', 'maxLength', `{limit:${schema.maxLength}}`, `'must NOT have more than ${schema.maxLength} characters'`)}}`)
|
|
2449
2475
|
}
|
|
2450
2476
|
if (schema.pattern) {
|
|
@@ -2453,8 +2479,13 @@ function genCodeE(schema, v, pathExpr, lines, ctx, schemaPrefix) {
|
|
|
2453
2479
|
const c = isStr ? `!(${inlineCheck})` : `typeof ${v}==='string'&&!(${inlineCheck})`
|
|
2454
2480
|
lines.push(`if(${c}){${fail('pattern', 'pattern', `{pattern:${JSON.stringify(schema.pattern)}}`, `'must match pattern "${schema.pattern}"'`)}}`)
|
|
2455
2481
|
} else {
|
|
2456
|
-
const
|
|
2457
|
-
ctx.
|
|
2482
|
+
const pattern = JSON.stringify(schema.pattern);
|
|
2483
|
+
if (!ctx.regExpMap.has(pattern)) {
|
|
2484
|
+
const ri = ctx.varCounter++
|
|
2485
|
+
ctx.regExpMap.set(pattern, ri)
|
|
2486
|
+
ctx.helperCode.push(`const _re${ri}=new RegExp(${pattern})`)
|
|
2487
|
+
}
|
|
2488
|
+
const ri = ctx.regExpMap.get(pattern);
|
|
2458
2489
|
const c = isStr ? `!_re${ri}.test(${v})` : `typeof ${v}==='string'&&!_re${ri}.test(${v})`
|
|
2459
2490
|
lines.push(`if(${c}){${fail('pattern', 'pattern', `{pattern:${JSON.stringify(schema.pattern)}}`, `'must match pattern "${schema.pattern}"'`)}}`)
|
|
2460
2491
|
}
|
|
@@ -2539,8 +2570,13 @@ function genCodeE(schema, v, pathExpr, lines, ctx, schemaPrefix) {
|
|
|
2539
2570
|
// patternProperties
|
|
2540
2571
|
if (schema.patternProperties) {
|
|
2541
2572
|
for (const [pat, sub] of Object.entries(schema.patternProperties)) {
|
|
2542
|
-
const
|
|
2543
|
-
ctx.
|
|
2573
|
+
const pattern = JSON.stringify(pat);
|
|
2574
|
+
if (!ctx.regExpMap.has(pattern)) {
|
|
2575
|
+
const ri = ctx.varCounter++
|
|
2576
|
+
ctx.regExpMap.set(pattern, ri)
|
|
2577
|
+
ctx.helperCode.push(`const _re${ri}=new RegExp(${pattern})`);
|
|
2578
|
+
}
|
|
2579
|
+
const ri = ctx.regExpMap.get(pattern);
|
|
2544
2580
|
const ki = ctx.varCounter++
|
|
2545
2581
|
lines.push(`if(typeof ${v}==='object'&&${v}!==null&&!Array.isArray(${v})){for(const _k${ki} in ${v}){if(_re${ri}.test(_k${ki})){`)
|
|
2546
2582
|
const p = pathExpr ? `${pathExpr}+'/'+_k${ki}` : `'/'+_k${ki}`
|
|
@@ -2570,8 +2606,13 @@ function genCodeE(schema, v, pathExpr, lines, ctx, schemaPrefix) {
|
|
|
2570
2606
|
lines.push(`if(_k${ki}.length>${pn.maxLength}){${fail('maxLength', 'propertyNames/maxLength', `{limit:${pn.maxLength}}`, `'must NOT have more than ${pn.maxLength} characters'`)}}`)
|
|
2571
2607
|
}
|
|
2572
2608
|
if (pn.pattern) {
|
|
2573
|
-
const
|
|
2574
|
-
ctx.
|
|
2609
|
+
const pattern = JSON.stringify(pn.pattern);
|
|
2610
|
+
if (!ctx.regExpMap.has(pattern)) {
|
|
2611
|
+
const ri = ctx.varCounter++
|
|
2612
|
+
ctx.regExpMap.set(pattern, ri)
|
|
2613
|
+
ctx.helperCode.push(`const _re${ri}=new RegExp(${pattern})`);
|
|
2614
|
+
}
|
|
2615
|
+
const ri = ctx.regExpMap.get(pattern);
|
|
2575
2616
|
lines.push(`if(!_re${ri}.test(_k${ki})){${fail('pattern', 'propertyNames/pattern', `{pattern:${JSON.stringify(pn.pattern)}}`, `'must match pattern "${pn.pattern}"'`)}}`)
|
|
2576
2617
|
}
|
|
2577
2618
|
if (pn.const !== undefined) {
|
|
@@ -2746,7 +2787,7 @@ function compileToJSCombined(schema, VALID_RESULT, schemaMap) {
|
|
|
2746
2787
|
}
|
|
2747
2788
|
}
|
|
2748
2789
|
|
|
2749
|
-
const ctx = { varCounter: 0, helperCode: [], closureVars: [], closureVals: [],
|
|
2790
|
+
const ctx = { varCounter: 0, helperCode: [], closureVars: ['_cpLen'], closureVals: [_cpLen],
|
|
2750
2791
|
rootDefs: cRootDefs, refStack: new Set(), schemaMap: schemaMap || null, anchors: cAnchors, rootSchema: schema }
|
|
2751
2792
|
const lines = []
|
|
2752
2793
|
genCodeC(schema, 'd', '', lines, ctx, '#')
|
|
@@ -2963,8 +3004,8 @@ function genCodeC(schema, v, pathExpr, lines, ctx, schemaPrefix) {
|
|
|
2963
3004
|
}
|
|
2964
3005
|
|
|
2965
3006
|
// string — skip guard if known
|
|
2966
|
-
if (schema.minLength !== undefined) { const c = isStr ?
|
|
2967
|
-
if (schema.maxLength !== undefined) { const c = isStr ?
|
|
3007
|
+
if (schema.minLength !== undefined) { const c = isStr ? `_cpLen(${v})<${schema.minLength}` : `typeof ${v}==='string'&&_cpLen(${v})<${schema.minLength}`; lines.push(`if(${c}){${fail('minLength', 'minLength', `{limit:${schema.minLength}}`, `'must NOT have fewer than ${schema.minLength} characters'`)}}`) }
|
|
3008
|
+
if (schema.maxLength !== undefined) { const c = isStr ? `_cpLen(${v})>${schema.maxLength}` : `typeof ${v}==='string'&&_cpLen(${v})>${schema.maxLength}`; lines.push(`if(${c}){${fail('maxLength', 'maxLength', `{limit:${schema.maxLength}}`, `'must NOT have more than ${schema.maxLength} characters'`)}}`) }
|
|
2968
3009
|
if (schema.pattern) {
|
|
2969
3010
|
const inlineCheck = compilePatternInline(schema.pattern, v)
|
|
2970
3011
|
if (inlineCheck) {
|
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
// Schema-compiled JSON parser: combines parse + validate in one pass.
|
|
4
|
+
// For known-shape schemas, skips JSON.parse entirely and builds typed
|
|
5
|
+
// JS objects directly from the JSON string.
|
|
6
|
+
//
|
|
7
|
+
// Returns null on invalid JSON or validation failure.
|
|
8
|
+
// Falls back to JSON.parse for unsupported schemas.
|
|
9
|
+
|
|
10
|
+
function compileSchemaParser(schema) {
|
|
11
|
+
if (!schema || typeof schema !== 'object') return null;
|
|
12
|
+
|
|
13
|
+
// Array of objects
|
|
14
|
+
if (schema.type === 'array' && schema.items && schema.items.type === 'object') {
|
|
15
|
+
const itemParser = compileSchemaParser(schema.items);
|
|
16
|
+
if (!itemParser) return null;
|
|
17
|
+
return function parseArray(str) {
|
|
18
|
+
const len = str.length;
|
|
19
|
+
let pos = 0;
|
|
20
|
+
while (pos < len && str.charCodeAt(pos) <= 32) pos++;
|
|
21
|
+
if (str.charCodeAt(pos) !== 91) return null; // [
|
|
22
|
+
pos++;
|
|
23
|
+
const arr = [];
|
|
24
|
+
let first = true;
|
|
25
|
+
while (pos < len) {
|
|
26
|
+
while (pos < len && str.charCodeAt(pos) <= 32) pos++;
|
|
27
|
+
if (str.charCodeAt(pos) === 93) return arr; // ]
|
|
28
|
+
if (!first) {
|
|
29
|
+
if (str.charCodeAt(pos) !== 44) return null;
|
|
30
|
+
pos++;
|
|
31
|
+
while (pos < len && str.charCodeAt(pos) <= 32) pos++;
|
|
32
|
+
}
|
|
33
|
+
first = false;
|
|
34
|
+
// Parse nested object inline
|
|
35
|
+
const result = parseObject(str, pos, itemParser._keys, itemParser._keyTypes, itemParser._required, itemParser._additionalAllowed);
|
|
36
|
+
if (result === null) return null;
|
|
37
|
+
arr.push(result.value);
|
|
38
|
+
pos = result.pos;
|
|
39
|
+
}
|
|
40
|
+
return null;
|
|
41
|
+
};
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
// Wrapper object containing array
|
|
45
|
+
if (schema.type === 'object' && schema.properties) {
|
|
46
|
+
const props = schema.properties;
|
|
47
|
+
const keys = Object.keys(props);
|
|
48
|
+
for (const k of keys) {
|
|
49
|
+
if (props[k] && props[k].type === 'array' && props[k].items) {
|
|
50
|
+
// Has nested array — use the general parser
|
|
51
|
+
return compileObjectParser(schema);
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
return compileObjectParser(schema);
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
return null;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
function compileObjectParser(schema) {
|
|
61
|
+
if (!schema.properties) return null;
|
|
62
|
+
const props = schema.properties;
|
|
63
|
+
const keys = Object.keys(props);
|
|
64
|
+
const required = new Set(schema.required || []);
|
|
65
|
+
const additionalAllowed = schema.additionalProperties !== false;
|
|
66
|
+
|
|
67
|
+
// Build a key→type map for O(1) lookup
|
|
68
|
+
const keyTypes = {};
|
|
69
|
+
for (const k of keys) {
|
|
70
|
+
const p = props[k];
|
|
71
|
+
if (!p || typeof p.type !== 'string') return null; // bail on complex
|
|
72
|
+
if (p.type === 'object' || p.type === 'array') return null; // flat only for now
|
|
73
|
+
keyTypes[k] = p;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// The compiled parser function
|
|
77
|
+
return function parseAndValidate(str) {
|
|
78
|
+
const len = str.length;
|
|
79
|
+
let pos = 0;
|
|
80
|
+
|
|
81
|
+
// Skip whitespace
|
|
82
|
+
while (pos < len && (str.charCodeAt(pos) <= 32)) pos++;
|
|
83
|
+
if (pos >= len || str.charCodeAt(pos) !== 123) return null; // {
|
|
84
|
+
pos++;
|
|
85
|
+
|
|
86
|
+
const result = {};
|
|
87
|
+
let foundKeys = 0;
|
|
88
|
+
let first = true;
|
|
89
|
+
|
|
90
|
+
while (pos < len) {
|
|
91
|
+
// Skip whitespace
|
|
92
|
+
while (pos < len && str.charCodeAt(pos) <= 32) pos++;
|
|
93
|
+
if (str.charCodeAt(pos) === 125) break; // }
|
|
94
|
+
|
|
95
|
+
// Comma between entries
|
|
96
|
+
if (!first) {
|
|
97
|
+
if (str.charCodeAt(pos) !== 44) return null; // ,
|
|
98
|
+
pos++;
|
|
99
|
+
while (pos < len && str.charCodeAt(pos) <= 32) pos++;
|
|
100
|
+
}
|
|
101
|
+
first = false;
|
|
102
|
+
|
|
103
|
+
// Key
|
|
104
|
+
if (str.charCodeAt(pos) !== 34) return null; // "
|
|
105
|
+
pos++;
|
|
106
|
+
const keyStart = pos;
|
|
107
|
+
while (pos < len && str.charCodeAt(pos) !== 34) {
|
|
108
|
+
if (str.charCodeAt(pos) === 92) pos++; // skip escape
|
|
109
|
+
pos++;
|
|
110
|
+
}
|
|
111
|
+
const key = str.substring(keyStart, pos);
|
|
112
|
+
pos++; // closing "
|
|
113
|
+
|
|
114
|
+
// Colon
|
|
115
|
+
while (pos < len && str.charCodeAt(pos) <= 32) pos++;
|
|
116
|
+
if (str.charCodeAt(pos) !== 58) return null; // :
|
|
117
|
+
pos++;
|
|
118
|
+
while (pos < len && str.charCodeAt(pos) <= 32) pos++;
|
|
119
|
+
|
|
120
|
+
// Value — parse based on schema type if known key
|
|
121
|
+
const propSchema = keyTypes[key];
|
|
122
|
+
if (propSchema) {
|
|
123
|
+
const parsed = parseValue(str, pos, propSchema);
|
|
124
|
+
if (parsed === null) return null; // validation fail
|
|
125
|
+
result[key] = parsed.value;
|
|
126
|
+
pos = parsed.pos;
|
|
127
|
+
foundKeys++;
|
|
128
|
+
} else if (!additionalAllowed) {
|
|
129
|
+
return null; // additional property not allowed
|
|
130
|
+
} else {
|
|
131
|
+
// Skip unknown value
|
|
132
|
+
const skipped = skipValue(str, pos);
|
|
133
|
+
if (skipped < 0) return null;
|
|
134
|
+
pos = skipped;
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
// Check required
|
|
139
|
+
for (const r of required) {
|
|
140
|
+
if (result[r] === undefined) return null;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
return result;
|
|
144
|
+
};
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
function parseValue(str, pos, schema) {
|
|
148
|
+
const ch = str.charCodeAt(pos);
|
|
149
|
+
const type = schema.type;
|
|
150
|
+
|
|
151
|
+
if (type === 'string') {
|
|
152
|
+
if (ch !== 34) return null; // must be "
|
|
153
|
+
pos++;
|
|
154
|
+
const start = pos;
|
|
155
|
+
while (pos < str.length && str.charCodeAt(pos) !== 34) {
|
|
156
|
+
if (str.charCodeAt(pos) === 92) pos++; // skip escape
|
|
157
|
+
pos++;
|
|
158
|
+
}
|
|
159
|
+
const val = str.substring(start, pos);
|
|
160
|
+
pos++; // closing "
|
|
161
|
+
if (schema.minLength !== undefined && val.length < schema.minLength) return null;
|
|
162
|
+
if (schema.maxLength !== undefined && val.length > schema.maxLength) return null;
|
|
163
|
+
return { value: val, pos };
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
if (type === 'integer' || type === 'number') {
|
|
167
|
+
const start = pos;
|
|
168
|
+
if (str.charCodeAt(pos) === 45) pos++; // -
|
|
169
|
+
while (pos < str.length && str.charCodeAt(pos) >= 48 && str.charCodeAt(pos) <= 57) pos++;
|
|
170
|
+
if (type === 'number' && str.charCodeAt(pos) === 46) {
|
|
171
|
+
pos++;
|
|
172
|
+
while (pos < str.length && str.charCodeAt(pos) >= 48 && str.charCodeAt(pos) <= 57) pos++;
|
|
173
|
+
}
|
|
174
|
+
const val = +str.substring(start, pos);
|
|
175
|
+
if (type === 'integer' && !Number.isInteger(val)) return null;
|
|
176
|
+
if (schema.minimum !== undefined && val < schema.minimum) return null;
|
|
177
|
+
if (schema.maximum !== undefined && val > schema.maximum) return null;
|
|
178
|
+
return { value: val, pos };
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
if (type === 'boolean') {
|
|
182
|
+
if (str.startsWith('true', pos)) return { value: true, pos: pos + 4 };
|
|
183
|
+
if (str.startsWith('false', pos)) return { value: false, pos: pos + 5 };
|
|
184
|
+
return null;
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
return null;
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
function skipValue(str, pos) {
|
|
191
|
+
const ch = str.charCodeAt(pos);
|
|
192
|
+
if (ch === 34) { // string
|
|
193
|
+
pos++;
|
|
194
|
+
while (pos < str.length && str.charCodeAt(pos) !== 34) {
|
|
195
|
+
if (str.charCodeAt(pos) === 92) pos++;
|
|
196
|
+
pos++;
|
|
197
|
+
}
|
|
198
|
+
return pos + 1;
|
|
199
|
+
}
|
|
200
|
+
if (ch === 123 || ch === 91) { // { or [
|
|
201
|
+
let depth = 1;
|
|
202
|
+
const open = ch;
|
|
203
|
+
const close = ch === 123 ? 125 : 93;
|
|
204
|
+
pos++;
|
|
205
|
+
while (pos < str.length && depth > 0) {
|
|
206
|
+
const c = str.charCodeAt(pos);
|
|
207
|
+
if (c === open) depth++;
|
|
208
|
+
else if (c === close) depth--;
|
|
209
|
+
else if (c === 34) { pos++; while (pos < str.length && str.charCodeAt(pos) !== 34) { if (str.charCodeAt(pos) === 92) pos++; pos++; } }
|
|
210
|
+
pos++;
|
|
211
|
+
}
|
|
212
|
+
return pos;
|
|
213
|
+
}
|
|
214
|
+
// number, true, false, null
|
|
215
|
+
while (pos < str.length) {
|
|
216
|
+
const c = str.charCodeAt(pos);
|
|
217
|
+
if (c === 44 || c === 125 || c === 93 || c <= 32) break;
|
|
218
|
+
pos++;
|
|
219
|
+
}
|
|
220
|
+
return pos;
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
module.exports = { compileSchemaParser };
|
package/lib/shape-classifier.js
CHANGED
package/lib/tier0.js
CHANGED
|
@@ -17,6 +17,19 @@ const T_NUMBER = TYPE_MASK.number;
|
|
|
17
17
|
const T_INTEGER = TYPE_MASK.integer;
|
|
18
18
|
const T_BOOLEAN = TYPE_MASK.boolean;
|
|
19
19
|
|
|
20
|
+
// Count Unicode code points, not UTF-16 code units.
|
|
21
|
+
// JSON Schema spec: minLength/maxLength count characters (RFC 8259 = code points).
|
|
22
|
+
// Fast path: scan for surrogates first, skip slow iteration for ASCII-only strings.
|
|
23
|
+
function codePointLength(s) {
|
|
24
|
+
const len = s.length;
|
|
25
|
+
for (let i = 0; i < len; i++) {
|
|
26
|
+
if (s.charCodeAt(i) >= 0xD800 && s.charCodeAt(i) <= 0xDBFF) {
|
|
27
|
+
let n = 0; for (const _ of s) n++; return n;
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
return len;
|
|
31
|
+
}
|
|
32
|
+
|
|
20
33
|
// Numeric constraint flags, packed into constraint.numFlags.
|
|
21
34
|
// Using bit flags means the validator does a cheap bitwise-and instead of
|
|
22
35
|
// five Number.isNaN() calls per numeric property when only one bound is set.
|
|
@@ -94,8 +107,8 @@ function checkPrimitive(c, v) {
|
|
|
94
107
|
if (typeof v !== 'string') return false;
|
|
95
108
|
const minLen = c.minLen;
|
|
96
109
|
const maxLen = c.maxLen;
|
|
97
|
-
if (minLen >= 0
|
|
98
|
-
if (maxLen >= 0
|
|
110
|
+
if (minLen >= 0) { const l = v.length; if (l < minLen) return false; if (l < minLen * 2 && codePointLength(v) < minLen) return false; }
|
|
111
|
+
if (maxLen >= 0) { const l = v.length; if (l > maxLen && codePointLength(v) > maxLen) return false; }
|
|
99
112
|
} else if (m === T_INTEGER) {
|
|
100
113
|
if (typeof v !== 'number' || !Number.isInteger(v)) return false;
|
|
101
114
|
const f = c.numFlags;
|
|
@@ -148,8 +161,8 @@ function tier0ValidateObject(plan, data) {
|
|
|
148
161
|
if (typeof v !== 'string') return false;
|
|
149
162
|
const minLen = c.minLen;
|
|
150
163
|
const maxLen = c.maxLen;
|
|
151
|
-
if (minLen >= 0
|
|
152
|
-
if (maxLen >= 0
|
|
164
|
+
if (minLen >= 0) { const l = v.length; if (l < minLen) return false; if (l < minLen * 2 && codePointLength(v) < minLen) return false; }
|
|
165
|
+
if (maxLen >= 0) { const l = v.length; if (l > maxLen && codePointLength(v) > maxLen) return false; }
|
|
153
166
|
} else if (m === T_INTEGER) {
|
|
154
167
|
if (typeof v !== 'number' || !Number.isInteger(v)) return false;
|
|
155
168
|
const f = c.numFlags;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "ata-validator",
|
|
3
|
-
"version": "0.10.
|
|
3
|
+
"version": "0.10.2",
|
|
4
4
|
"description": "Ultra-fast JSON Schema validator. 4.7x faster validation, 1,800x faster compilation. Works without native addon. Cross-schema $ref, Draft 2020-12 + Draft 7, V8-optimized JS codegen, simdjson, RE2, multi-core. Standard Schema V1 compatible.",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"module": "index.mjs",
|