ata-validator 0.10.0 → 0.10.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -102,7 +102,7 @@ Self-recursive named functions for $dynamicRef, compile-time cross-schema resolu
102
102
 
103
103
  ### JSON Schema Test Suite
104
104
 
105
- **95.3%** pass rate (1170/1227) on official [JSON Schema Test Suite](https://github.com/json-schema-org/JSON-Schema-Test-Suite) (Draft 2020-12). **95.3%** on [@exodus/schemasafe](https://github.com/ExodusMovement/schemasafe) test suite.
105
+ **98.5%** pass rate (1172/1190) on official [JSON Schema Test Suite](https://github.com/json-schema-org/JSON-Schema-Test-Suite) (Draft 2020-12), excluding remote refs and vocabulary (intentionally unsupported). **95.3%** on [@exodus/schemasafe](https://github.com/ExodusMovement/schemasafe) test suite.
106
106
 
107
107
  ## When to use ata
108
108
 
@@ -135,7 +135,7 @@ Self-recursive named functions for $dynamicRef, compile-time cross-schema resolu
135
135
  - **Zero-copy paths**: Buffer and pre-padded input support - no unnecessary copies
136
136
  - **Defaults + coercion**: `default` values, `coerceTypes`, `removeAdditional` support
137
137
  - **C/C++ library**: Native API for non-Node.js environments
138
- - **96.9% spec compliant**: Draft 2020-12
138
+ - **98.5% spec compliant**: Draft 2020-12
139
139
 
140
140
  ## Installation
141
141
 
package/index.d.ts CHANGED
@@ -11,6 +11,12 @@ export interface ValidationResult {
11
11
  errors: ValidationError[];
12
12
  }
13
13
 
14
+ export interface ValidateAndParseResult {
15
+ valid: boolean;
16
+ value: unknown;
17
+ errors: ValidationError[];
18
+ }
19
+
14
20
  export interface ValidatorOptions {
15
21
  coerceTypes?: boolean;
16
22
  removeAdditional?: boolean;
@@ -27,56 +33,93 @@ export interface StandardSchemaV1Props {
27
33
  | { issues: Array<{ message: string; path?: ReadonlyArray<{ key: PropertyKey }> }> };
28
34
  }
29
35
 
36
+ export interface StandaloneModule {
37
+ boolFn: (data: unknown) => boolean;
38
+ hybridFactory: (validResult: object, errFn: Function) => (data: unknown) => ValidationResult;
39
+ errFn: ((data: unknown, allErrors?: boolean) => ValidationResult) | null;
40
+ }
41
+
30
42
  export class Validator {
31
43
  constructor(schema: object | string, options?: ValidatorOptions);
32
44
 
33
- /** Add a schema to the validator */
45
+ /** Add a schema to the registry for cross-schema $ref resolution */
34
46
  addSchema(schema: object): void;
35
47
 
36
- /** Validate data returns result with errors. Applies defaults, coerceTypes, removeAdditional. */
48
+ /** Validate data, returns result with errors. Applies defaults, coerceTypes, removeAdditional. */
37
49
  validate(data: unknown): ValidationResult;
38
50
 
39
- /** Fast boolean check JS codegen, no error collection */
51
+ /** Fast boolean check via JS codegen or tier 0 interpreter. No error collection. */
40
52
  isValidObject(data: unknown): boolean;
41
53
 
42
- /** Validate JSON string simdjson fast path for large docs */
54
+ /** Validate a JSON string. Uses simdjson fast path for large documents. */
43
55
  validateJSON(jsonString: string): ValidationResult;
44
56
 
45
- /** Fast boolean check for JSON string */
57
+ /** Fast boolean check for a JSON string */
46
58
  isValidJSON(jsonString: string): boolean;
47
59
 
48
- /** Ultra-fast buffer validation via V8 CFunction zero NAPI overhead */
60
+ /** Parse JSON with simdjson + validate against schema. Returns parsed value and validation result. Requires native addon. */
61
+ validateAndParse(jsonString: string | Buffer): ValidateAndParseResult;
62
+
63
+ /** Ultra-fast buffer validation via native addon */
49
64
  isValid(input: Buffer | Uint8Array | string): boolean;
50
65
 
51
- /** Count valid documents in an NDJSON buffer */
66
+ /** Count valid documents in an NDJSON buffer. Requires native addon. */
52
67
  countValid(ndjsonBuf: Buffer | Uint8Array | string): number;
53
68
 
54
- /** Count valid documents from an array of buffers */
69
+ /** Validate an array of buffers, returns count of valid ones. Requires native addon. */
55
70
  batchIsValid(buffers: (Buffer | Uint8Array)[]): number;
56
71
 
57
- /** Zero-copy validation with pre-padded buffer */
72
+ /** Zero-copy validation with pre-padded buffer. Requires native addon. */
58
73
  isValidPrepadded(paddedBuffer: Buffer, jsonLength: number): boolean;
59
74
 
60
- /** Multi-core parallel NDJSON validation returns boolean per line */
75
+ /** Multi-core parallel NDJSON validation. Returns boolean per line. Requires native addon. */
61
76
  isValidParallel(ndjsonBuffer: Buffer): boolean[];
62
77
 
63
- /** Multi-core parallel NDJSON count returns number of valid items */
64
- countValid(ndjsonBuffer: Buffer): number;
65
-
66
- /** Single-thread NDJSON batch validation */
78
+ /** Single-thread NDJSON batch validation. Requires native addon. */
67
79
  isValidNDJSON(ndjsonBuffer: Buffer): boolean[];
68
80
 
69
- /** Standard Schema V1 interface compatible with Fastify, tRPC, TanStack, etc. */
81
+ /** Generate a standalone JS module string for zero-compile loading. Returns null if schema can't be standalone-compiled. */
82
+ toStandalone(): string | null;
83
+
84
+ /** Load a pre-compiled standalone module. Zero schema compilation at startup. */
85
+ static fromStandalone(mod: StandaloneModule, schema: object | string, options?: ValidatorOptions): Validator;
86
+
87
+ /** Bundle multiple schemas into a single JS module string. Load with Validator.loadBundle(). */
88
+ static bundle(schemas: object[], options?: ValidatorOptions): string;
89
+
90
+ /** Bundle multiple schemas into a self-contained JS module. No ata-validator import needed at runtime. */
91
+ static bundleStandalone(schemas: object[], options?: ValidatorOptions): string;
92
+
93
+ /** Bundle multiple schemas with deduplicated shared templates. Smaller output than bundle(). */
94
+ static bundleCompact(schemas: object[], options?: ValidatorOptions): string;
95
+
96
+ /** Load a bundle created by Validator.bundle(). Returns array of Validator instances. */
97
+ static loadBundle(mods: object[], schemas: object[], options?: ValidatorOptions): Validator[];
98
+
99
+ /** Standard Schema V1 interface, compatible with Fastify, tRPC, TanStack, etc. */
70
100
  readonly "~standard": StandardSchemaV1Props;
71
101
  }
72
102
 
103
+ /** One-shot validate: creates a Validator, validates data, returns result. */
73
104
  export function validate(
74
105
  schema: object | string,
75
106
  data: unknown
76
107
  ): ValidationResult;
77
108
 
109
+ /** Fast compile: returns a validate function directly. WeakMap cached, second call with same schema is near-zero cost. */
110
+ export function compile(
111
+ schema: object | string,
112
+ options?: ValidatorOptions
113
+ ): (data: unknown) => ValidationResult;
114
+
115
+ /** Parse JSON using simdjson (native addon) or JSON.parse (fallback). */
116
+ export function parseJSON(jsonString: string | Buffer): unknown;
117
+
118
+ /** Returns ata-validator version string. */
78
119
  export function version(): string;
79
120
 
121
+ /** Create a simdjson-compatible padded buffer from a JSON string. */
80
122
  export function createPaddedBuffer(jsonStr: string): { buffer: Buffer; length: number };
81
123
 
124
+ /** Required padding size for simdjson buffers. */
82
125
  export const SIMDJSON_PADDING: number;
@@ -4,6 +4,20 @@
4
4
  // Closure-based validator — no new Function() or eval().
5
5
  // Returns null if the schema is too complex for JS compilation.
6
6
 
7
+ // Count Unicode code points, not UTF-16 code units (surrogate pairs).
8
+ // JSON Schema: minLength/maxLength count characters per RFC 8259.
9
+ // Fast path: if no surrogate pairs exist, .length is correct (covers >99% of real data).
10
+ function _cpLen(s) {
11
+ const len = s.length;
12
+ for (let i = 0; i < len; i++) {
13
+ if (s.charCodeAt(i) >= 0xD800 && s.charCodeAt(i) <= 0xDBFF) {
14
+ // Found a high surrogate — count code points the slow way
15
+ let n = 0; for (const _ of s) n++; return n;
16
+ }
17
+ }
18
+ return len;
19
+ }
20
+
7
21
  // AJV-compatible error message templates (compile-time, not runtime)
8
22
  const AJV_MESSAGES = {
9
23
  type: (p) => `must be ${p.type}`,
@@ -271,11 +285,12 @@ function compileToJS(schema, defs, schemaMap) {
271
285
  // string
272
286
  if (schema.minLength !== undefined) {
273
287
  const min = schema.minLength
274
- checks.push((d) => typeof d !== 'string' || d.length >= min)
288
+ const min2 = min * 2
289
+ checks.push((d) => typeof d !== 'string' || d.length >= min2 || (d.length >= min && _cpLen(d) >= min))
275
290
  }
276
291
  if (schema.maxLength !== undefined) {
277
292
  const max = schema.maxLength
278
- checks.push((d) => typeof d !== 'string' || d.length <= max)
293
+ checks.push((d) => typeof d !== 'string' || d.length <= max || _cpLen(d) <= max)
279
294
  }
280
295
  if (schema.pattern) {
281
296
  try {
@@ -775,7 +790,7 @@ function compileToJSCodegen(schema, schemaMap) {
775
790
  }
776
791
  }
777
792
 
778
- const ctx = { varCounter: 0, helpers: [], helperCode: [], closureVars: [], closureVals: [], rootDefs, refStack: new Set(), schemaMap: schemaMap || null, anchors, rootSchema: schema }
793
+ const ctx = { varCounter: 0, helpers: [], helperCode: [], closureVars: ['_cpLen'], closureVals: [_cpLen], rootDefs, refStack: new Set(), schemaMap: schemaMap || null, anchors, rootSchema: schema }
779
794
  const lines = []
780
795
  genCode(schema, 'd', lines, ctx)
781
796
 
@@ -914,8 +929,8 @@ function tryGenCombined(schema, access, ctx) {
914
929
 
915
930
  if (t === 'string') {
916
931
  const conds = [`typeof _v!=='string'`]
917
- if (schema.minLength !== undefined) conds.push(`_v.length<${schema.minLength}`)
918
- if (schema.maxLength !== undefined) conds.push(`_v.length>${schema.maxLength}`)
932
+ if (schema.minLength !== undefined) conds.push(`_cpLen(_v)<${schema.minLength}`)
933
+ if (schema.maxLength !== undefined) conds.push(`_cpLen(_v)>${schema.maxLength}`)
919
934
  if (conds.length < 2 && !schema.pattern && !schema.format) return null // not worth combining
920
935
  // pattern and format need separate statements, fall back if present
921
936
  if (schema.pattern || schema.format) return null
@@ -956,6 +971,9 @@ function tryGenCombined(schema, access, ctx) {
956
971
  // 'string' / 'number' / 'integer' = we know the primitive type
957
972
  function genCode(schema, v, lines, ctx, knownType) {
958
973
  if (typeof schema !== 'object' || schema === null) return
974
+ if (!ctx.regExpMap) {
975
+ ctx.regExpMap = new Map();
976
+ }
959
977
 
960
978
  // $ref — guard against circular references
961
979
  // In 2020-12 with unevaluated*, $ref can coexist with siblings — don't early return
@@ -1159,8 +1177,8 @@ function genCode(schema, v, lines, ctx, knownType) {
1159
1177
  if (schema.multipleOf !== undefined) lines.push(isNum ? `if(${v}%${schema.multipleOf}!==0)return false` : `if(typeof ${v}==='number'&&${v}%${schema.multipleOf}!==0)return false`)
1160
1178
 
1161
1179
  // string — skip type guard if known string
1162
- if (schema.minLength !== undefined) lines.push(isStr ? `if(${v}.length<${schema.minLength})return false` : `if(typeof ${v}==='string'&&${v}.length<${schema.minLength})return false`)
1163
- if (schema.maxLength !== undefined) lines.push(isStr ? `if(${v}.length>${schema.maxLength})return false` : `if(typeof ${v}==='string'&&${v}.length>${schema.maxLength})return false`)
1180
+ if (schema.minLength !== undefined) lines.push(isStr ? `if(_cpLen(${v})<${schema.minLength})return false` : `if(typeof ${v}==='string'&&_cpLen(${v})<${schema.minLength})return false`)
1181
+ if (schema.maxLength !== undefined) lines.push(isStr ? `if(_cpLen(${v})>${schema.maxLength})return false` : `if(typeof ${v}==='string'&&_cpLen(${v})>${schema.maxLength})return false`)
1164
1182
 
1165
1183
  // array size — skip guard if known array
1166
1184
  if (schema.minItems !== undefined) lines.push(isArr ? `if(${v}.length<${schema.minItems})return false` : `if(Array.isArray(${v})&&${v}.length<${schema.minItems})return false`)
@@ -1176,8 +1194,13 @@ function genCode(schema, v, lines, ctx, knownType) {
1176
1194
  if (inlineCheck) {
1177
1195
  lines.push(isStr ? `if(!(${inlineCheck}))return false` : `if(typeof ${v}==='string'&&!(${inlineCheck}))return false`)
1178
1196
  } else {
1179
- const ri = ctx.varCounter++
1180
- ctx.helperCode.push(`const _re${ri}=new RegExp(${JSON.stringify(schema.pattern)})`)
1197
+ const pattern = JSON.stringify(schema.pattern);
1198
+ if (!ctx.regExpMap.has(pattern)) {
1199
+ const ri = ctx.varCounter++
1200
+ ctx.regExpMap.set(pattern, ri)
1201
+ ctx.helperCode.push(`const _re${ri}=new RegExp(${pattern})`);
1202
+ }
1203
+ const ri = ctx.regExpMap.get(pattern);
1181
1204
  lines.push(isStr ? `if(!_re${ri}.test(${v}))return false` : `if(typeof ${v}==='string'&&!_re${ri}.test(${v}))return false`)
1182
1205
  }
1183
1206
  }
@@ -2264,6 +2287,7 @@ function compileToJSCodegenWithErrors(schema, schemaMap) {
2264
2287
  }
2265
2288
 
2266
2289
  const ctx = { varCounter: 0, helperCode: [], rootDefs: eRootDefs, refStack: new Set(), schemaMap: schemaMap || null, anchors: eAnchors, rootSchema: schema }
2290
+ ctx.helperCode.push('const _cpLen=s=>{let n=0;for(const _ of s)n++;return n}')
2267
2291
  const lines = []
2268
2292
  genCodeE(schema, 'd', '', lines, ctx, '#')
2269
2293
  if (lines.length === 0) return (d) => ({ valid: true, errors: [] })
@@ -2296,7 +2320,9 @@ function compileToJSCodegenWithErrors(schema, schemaMap) {
2296
2320
  function genCodeE(schema, v, pathExpr, lines, ctx, schemaPrefix) {
2297
2321
  if (!schemaPrefix) schemaPrefix = '#'
2298
2322
  if (typeof schema !== 'object' || schema === null) return
2299
-
2323
+ if (!ctx.regExpMap) {
2324
+ ctx.regExpMap = new Map();
2325
+ }
2300
2326
  // $ref — resolve local and cross-schema refs
2301
2327
  if (schema.$ref) {
2302
2328
  // Self-reference "#" — no-op (permissive) to avoid infinite recursion
@@ -2440,11 +2466,11 @@ function genCodeE(schema, v, pathExpr, lines, ctx, schemaPrefix) {
2440
2466
 
2441
2467
  // string
2442
2468
  if (schema.minLength !== undefined) {
2443
- const c = isStr ? `${v}.length<${schema.minLength}` : `typeof ${v}==='string'&&${v}.length<${schema.minLength}`
2469
+ const c = isStr ? `_cpLen(${v})<${schema.minLength}` : `typeof ${v}==='string'&&_cpLen(${v})<${schema.minLength}`
2444
2470
  lines.push(`if(${c}){${fail('minLength', 'minLength', `{limit:${schema.minLength}}`, `'must NOT have fewer than ${schema.minLength} characters'`)}}`)
2445
2471
  }
2446
2472
  if (schema.maxLength !== undefined) {
2447
- const c = isStr ? `${v}.length>${schema.maxLength}` : `typeof ${v}==='string'&&${v}.length>${schema.maxLength}`
2473
+ const c = isStr ? `_cpLen(${v})>${schema.maxLength}` : `typeof ${v}==='string'&&_cpLen(${v})>${schema.maxLength}`
2448
2474
  lines.push(`if(${c}){${fail('maxLength', 'maxLength', `{limit:${schema.maxLength}}`, `'must NOT have more than ${schema.maxLength} characters'`)}}`)
2449
2475
  }
2450
2476
  if (schema.pattern) {
@@ -2453,8 +2479,13 @@ function genCodeE(schema, v, pathExpr, lines, ctx, schemaPrefix) {
2453
2479
  const c = isStr ? `!(${inlineCheck})` : `typeof ${v}==='string'&&!(${inlineCheck})`
2454
2480
  lines.push(`if(${c}){${fail('pattern', 'pattern', `{pattern:${JSON.stringify(schema.pattern)}}`, `'must match pattern "${schema.pattern}"'`)}}`)
2455
2481
  } else {
2456
- const ri = ctx.varCounter++
2457
- ctx.helperCode.push(`const _re${ri}=new RegExp(${JSON.stringify(schema.pattern)})`)
2482
+ const pattern = JSON.stringify(schema.pattern);
2483
+ if (!ctx.regExpMap.has(pattern)) {
2484
+ const ri = ctx.varCounter++
2485
+ ctx.regExpMap.set(pattern, ri)
2486
+ ctx.helperCode.push(`const _re${ri}=new RegExp(${pattern})`)
2487
+ }
2488
+ const ri = ctx.regExpMap.get(pattern);
2458
2489
  const c = isStr ? `!_re${ri}.test(${v})` : `typeof ${v}==='string'&&!_re${ri}.test(${v})`
2459
2490
  lines.push(`if(${c}){${fail('pattern', 'pattern', `{pattern:${JSON.stringify(schema.pattern)}}`, `'must match pattern "${schema.pattern}"'`)}}`)
2460
2491
  }
@@ -2539,8 +2570,13 @@ function genCodeE(schema, v, pathExpr, lines, ctx, schemaPrefix) {
2539
2570
  // patternProperties
2540
2571
  if (schema.patternProperties) {
2541
2572
  for (const [pat, sub] of Object.entries(schema.patternProperties)) {
2542
- const ri = ctx.varCounter++
2543
- ctx.helperCode.push(`const _re${ri}=new RegExp(${JSON.stringify(pat)})`)
2573
+ const pattern = JSON.stringify(pat);
2574
+ if (!ctx.regExpMap.has(pattern)) {
2575
+ const ri = ctx.varCounter++
2576
+ ctx.regExpMap.set(pattern, ri)
2577
+ ctx.helperCode.push(`const _re${ri}=new RegExp(${pattern})`);
2578
+ }
2579
+ const ri = ctx.regExpMap.get(pattern);
2544
2580
  const ki = ctx.varCounter++
2545
2581
  lines.push(`if(typeof ${v}==='object'&&${v}!==null&&!Array.isArray(${v})){for(const _k${ki} in ${v}){if(_re${ri}.test(_k${ki})){`)
2546
2582
  const p = pathExpr ? `${pathExpr}+'/'+_k${ki}` : `'/'+_k${ki}`
@@ -2570,8 +2606,13 @@ function genCodeE(schema, v, pathExpr, lines, ctx, schemaPrefix) {
2570
2606
  lines.push(`if(_k${ki}.length>${pn.maxLength}){${fail('maxLength', 'propertyNames/maxLength', `{limit:${pn.maxLength}}`, `'must NOT have more than ${pn.maxLength} characters'`)}}`)
2571
2607
  }
2572
2608
  if (pn.pattern) {
2573
- const ri = ctx.varCounter++
2574
- ctx.helperCode.push(`const _re${ri}=new RegExp(${JSON.stringify(pn.pattern)})`)
2609
+ const pattern = JSON.stringify(pn.pattern);
2610
+ if (!ctx.regExpMap.has(pattern)) {
2611
+ const ri = ctx.varCounter++
2612
+ ctx.regExpMap.set(pattern, ri)
2613
+ ctx.helperCode.push(`const _re${ri}=new RegExp(${pattern})`);
2614
+ }
2615
+ const ri = ctx.regExpMap.get(pattern);
2575
2616
  lines.push(`if(!_re${ri}.test(_k${ki})){${fail('pattern', 'propertyNames/pattern', `{pattern:${JSON.stringify(pn.pattern)}}`, `'must match pattern "${pn.pattern}"'`)}}`)
2576
2617
  }
2577
2618
  if (pn.const !== undefined) {
@@ -2746,7 +2787,7 @@ function compileToJSCombined(schema, VALID_RESULT, schemaMap) {
2746
2787
  }
2747
2788
  }
2748
2789
 
2749
- const ctx = { varCounter: 0, helperCode: [], closureVars: [], closureVals: [],
2790
+ const ctx = { varCounter: 0, helperCode: [], closureVars: ['_cpLen'], closureVals: [_cpLen],
2750
2791
  rootDefs: cRootDefs, refStack: new Set(), schemaMap: schemaMap || null, anchors: cAnchors, rootSchema: schema }
2751
2792
  const lines = []
2752
2793
  genCodeC(schema, 'd', '', lines, ctx, '#')
@@ -2963,8 +3004,8 @@ function genCodeC(schema, v, pathExpr, lines, ctx, schemaPrefix) {
2963
3004
  }
2964
3005
 
2965
3006
  // string — skip guard if known
2966
- if (schema.minLength !== undefined) { const c = isStr ? `${v}.length<${schema.minLength}` : `typeof ${v}==='string'&&${v}.length<${schema.minLength}`; lines.push(`if(${c}){${fail('minLength', 'minLength', `{limit:${schema.minLength}}`, `'must NOT have fewer than ${schema.minLength} characters'`)}}`) }
2967
- if (schema.maxLength !== undefined) { const c = isStr ? `${v}.length>${schema.maxLength}` : `typeof ${v}==='string'&&${v}.length>${schema.maxLength}`; lines.push(`if(${c}){${fail('maxLength', 'maxLength', `{limit:${schema.maxLength}}`, `'must NOT have more than ${schema.maxLength} characters'`)}}`) }
3007
+ if (schema.minLength !== undefined) { const c = isStr ? `_cpLen(${v})<${schema.minLength}` : `typeof ${v}==='string'&&_cpLen(${v})<${schema.minLength}`; lines.push(`if(${c}){${fail('minLength', 'minLength', `{limit:${schema.minLength}}`, `'must NOT have fewer than ${schema.minLength} characters'`)}}`) }
3008
+ if (schema.maxLength !== undefined) { const c = isStr ? `_cpLen(${v})>${schema.maxLength}` : `typeof ${v}==='string'&&_cpLen(${v})>${schema.maxLength}`; lines.push(`if(${c}){${fail('maxLength', 'maxLength', `{limit:${schema.maxLength}}`, `'must NOT have more than ${schema.maxLength} characters'`)}}`) }
2968
3009
  if (schema.pattern) {
2969
3010
  const inlineCheck = compilePatternInline(schema.pattern, v)
2970
3011
  if (inlineCheck) {
@@ -0,0 +1,223 @@
1
+ 'use strict';
2
+
3
+ // Schema-compiled JSON parser: combines parse + validate in one pass.
4
+ // For known-shape schemas, skips JSON.parse entirely and builds typed
5
+ // JS objects directly from the JSON string.
6
+ //
7
+ // Returns null on invalid JSON or validation failure.
8
+ // Falls back to JSON.parse for unsupported schemas.
9
+
10
+ function compileSchemaParser(schema) {
11
+ if (!schema || typeof schema !== 'object') return null;
12
+
13
+ // Array of objects
14
+ if (schema.type === 'array' && schema.items && schema.items.type === 'object') {
15
+ const itemParser = compileSchemaParser(schema.items);
16
+ if (!itemParser) return null;
17
+ return function parseArray(str) {
18
+ const len = str.length;
19
+ let pos = 0;
20
+ while (pos < len && str.charCodeAt(pos) <= 32) pos++;
21
+ if (str.charCodeAt(pos) !== 91) return null; // [
22
+ pos++;
23
+ const arr = [];
24
+ let first = true;
25
+ while (pos < len) {
26
+ while (pos < len && str.charCodeAt(pos) <= 32) pos++;
27
+ if (str.charCodeAt(pos) === 93) return arr; // ]
28
+ if (!first) {
29
+ if (str.charCodeAt(pos) !== 44) return null;
30
+ pos++;
31
+ while (pos < len && str.charCodeAt(pos) <= 32) pos++;
32
+ }
33
+ first = false;
34
+ // Parse nested object inline
35
+ const result = parseObject(str, pos, itemParser._keys, itemParser._keyTypes, itemParser._required, itemParser._additionalAllowed);
36
+ if (result === null) return null;
37
+ arr.push(result.value);
38
+ pos = result.pos;
39
+ }
40
+ return null;
41
+ };
42
+ }
43
+
44
+ // Wrapper object containing array
45
+ if (schema.type === 'object' && schema.properties) {
46
+ const props = schema.properties;
47
+ const keys = Object.keys(props);
48
+ for (const k of keys) {
49
+ if (props[k] && props[k].type === 'array' && props[k].items) {
50
+ // Has nested array — use the general parser
51
+ return compileObjectParser(schema);
52
+ }
53
+ }
54
+ return compileObjectParser(schema);
55
+ }
56
+
57
+ return null;
58
+ }
59
+
60
+ function compileObjectParser(schema) {
61
+ if (!schema.properties) return null;
62
+ const props = schema.properties;
63
+ const keys = Object.keys(props);
64
+ const required = new Set(schema.required || []);
65
+ const additionalAllowed = schema.additionalProperties !== false;
66
+
67
+ // Build a key→type map for O(1) lookup
68
+ const keyTypes = {};
69
+ for (const k of keys) {
70
+ const p = props[k];
71
+ if (!p || typeof p.type !== 'string') return null; // bail on complex
72
+ if (p.type === 'object' || p.type === 'array') return null; // flat only for now
73
+ keyTypes[k] = p;
74
+ }
75
+
76
+ // The compiled parser function
77
+ return function parseAndValidate(str) {
78
+ const len = str.length;
79
+ let pos = 0;
80
+
81
+ // Skip whitespace
82
+ while (pos < len && (str.charCodeAt(pos) <= 32)) pos++;
83
+ if (pos >= len || str.charCodeAt(pos) !== 123) return null; // {
84
+ pos++;
85
+
86
+ const result = {};
87
+ let foundKeys = 0;
88
+ let first = true;
89
+
90
+ while (pos < len) {
91
+ // Skip whitespace
92
+ while (pos < len && str.charCodeAt(pos) <= 32) pos++;
93
+ if (str.charCodeAt(pos) === 125) break; // }
94
+
95
+ // Comma between entries
96
+ if (!first) {
97
+ if (str.charCodeAt(pos) !== 44) return null; // ,
98
+ pos++;
99
+ while (pos < len && str.charCodeAt(pos) <= 32) pos++;
100
+ }
101
+ first = false;
102
+
103
+ // Key
104
+ if (str.charCodeAt(pos) !== 34) return null; // "
105
+ pos++;
106
+ const keyStart = pos;
107
+ while (pos < len && str.charCodeAt(pos) !== 34) {
108
+ if (str.charCodeAt(pos) === 92) pos++; // skip escape
109
+ pos++;
110
+ }
111
+ const key = str.substring(keyStart, pos);
112
+ pos++; // closing "
113
+
114
+ // Colon
115
+ while (pos < len && str.charCodeAt(pos) <= 32) pos++;
116
+ if (str.charCodeAt(pos) !== 58) return null; // :
117
+ pos++;
118
+ while (pos < len && str.charCodeAt(pos) <= 32) pos++;
119
+
120
+ // Value — parse based on schema type if known key
121
+ const propSchema = keyTypes[key];
122
+ if (propSchema) {
123
+ const parsed = parseValue(str, pos, propSchema);
124
+ if (parsed === null) return null; // validation fail
125
+ result[key] = parsed.value;
126
+ pos = parsed.pos;
127
+ foundKeys++;
128
+ } else if (!additionalAllowed) {
129
+ return null; // additional property not allowed
130
+ } else {
131
+ // Skip unknown value
132
+ const skipped = skipValue(str, pos);
133
+ if (skipped < 0) return null;
134
+ pos = skipped;
135
+ }
136
+ }
137
+
138
+ // Check required
139
+ for (const r of required) {
140
+ if (result[r] === undefined) return null;
141
+ }
142
+
143
+ return result;
144
+ };
145
+ }
146
+
147
+ function parseValue(str, pos, schema) {
148
+ const ch = str.charCodeAt(pos);
149
+ const type = schema.type;
150
+
151
+ if (type === 'string') {
152
+ if (ch !== 34) return null; // must be "
153
+ pos++;
154
+ const start = pos;
155
+ while (pos < str.length && str.charCodeAt(pos) !== 34) {
156
+ if (str.charCodeAt(pos) === 92) pos++; // skip escape
157
+ pos++;
158
+ }
159
+ const val = str.substring(start, pos);
160
+ pos++; // closing "
161
+ if (schema.minLength !== undefined && val.length < schema.minLength) return null;
162
+ if (schema.maxLength !== undefined && val.length > schema.maxLength) return null;
163
+ return { value: val, pos };
164
+ }
165
+
166
+ if (type === 'integer' || type === 'number') {
167
+ const start = pos;
168
+ if (str.charCodeAt(pos) === 45) pos++; // -
169
+ while (pos < str.length && str.charCodeAt(pos) >= 48 && str.charCodeAt(pos) <= 57) pos++;
170
+ if (type === 'number' && str.charCodeAt(pos) === 46) {
171
+ pos++;
172
+ while (pos < str.length && str.charCodeAt(pos) >= 48 && str.charCodeAt(pos) <= 57) pos++;
173
+ }
174
+ const val = +str.substring(start, pos);
175
+ if (type === 'integer' && !Number.isInteger(val)) return null;
176
+ if (schema.minimum !== undefined && val < schema.minimum) return null;
177
+ if (schema.maximum !== undefined && val > schema.maximum) return null;
178
+ return { value: val, pos };
179
+ }
180
+
181
+ if (type === 'boolean') {
182
+ if (str.startsWith('true', pos)) return { value: true, pos: pos + 4 };
183
+ if (str.startsWith('false', pos)) return { value: false, pos: pos + 5 };
184
+ return null;
185
+ }
186
+
187
+ return null;
188
+ }
189
+
190
+ function skipValue(str, pos) {
191
+ const ch = str.charCodeAt(pos);
192
+ if (ch === 34) { // string
193
+ pos++;
194
+ while (pos < str.length && str.charCodeAt(pos) !== 34) {
195
+ if (str.charCodeAt(pos) === 92) pos++;
196
+ pos++;
197
+ }
198
+ return pos + 1;
199
+ }
200
+ if (ch === 123 || ch === 91) { // { or [
201
+ let depth = 1;
202
+ const open = ch;
203
+ const close = ch === 123 ? 125 : 93;
204
+ pos++;
205
+ while (pos < str.length && depth > 0) {
206
+ const c = str.charCodeAt(pos);
207
+ if (c === open) depth++;
208
+ else if (c === close) depth--;
209
+ else if (c === 34) { pos++; while (pos < str.length && str.charCodeAt(pos) !== 34) { if (str.charCodeAt(pos) === 92) pos++; pos++; } }
210
+ pos++;
211
+ }
212
+ return pos;
213
+ }
214
+ // number, true, false, null
215
+ while (pos < str.length) {
216
+ const c = str.charCodeAt(pos);
217
+ if (c === 44 || c === 125 || c === 93 || c <= 32) break;
218
+ pos++;
219
+ }
220
+ return pos;
221
+ }
222
+
223
+ module.exports = { compileSchemaParser };
@@ -24,7 +24,7 @@ const TIER0_PRIMITIVE_ALLOWED = new Set([
24
24
  'type', 'enum', 'const',
25
25
  'minLength', 'maxLength',
26
26
  'minimum', 'maximum', 'exclusiveMinimum', 'exclusiveMaximum',
27
- 'multipleOf', 'format',
27
+ 'multipleOf',
28
28
  ...META_KEYS,
29
29
  ]);
30
30
 
package/lib/tier0.js CHANGED
@@ -17,6 +17,19 @@ const T_NUMBER = TYPE_MASK.number;
17
17
  const T_INTEGER = TYPE_MASK.integer;
18
18
  const T_BOOLEAN = TYPE_MASK.boolean;
19
19
 
20
+ // Count Unicode code points, not UTF-16 code units.
21
+ // JSON Schema spec: minLength/maxLength count characters (RFC 8259 = code points).
22
+ // Fast path: scan for surrogates first, skip slow iteration for ASCII-only strings.
23
+ function codePointLength(s) {
24
+ const len = s.length;
25
+ for (let i = 0; i < len; i++) {
26
+ if (s.charCodeAt(i) >= 0xD800 && s.charCodeAt(i) <= 0xDBFF) {
27
+ let n = 0; for (const _ of s) n++; return n;
28
+ }
29
+ }
30
+ return len;
31
+ }
32
+
20
33
  // Numeric constraint flags, packed into constraint.numFlags.
21
34
  // Using bit flags means the validator does a cheap bitwise-and instead of
22
35
  // five Number.isNaN() calls per numeric property when only one bound is set.
@@ -94,8 +107,8 @@ function checkPrimitive(c, v) {
94
107
  if (typeof v !== 'string') return false;
95
108
  const minLen = c.minLen;
96
109
  const maxLen = c.maxLen;
97
- if (minLen >= 0 && v.length < minLen) return false;
98
- if (maxLen >= 0 && v.length > maxLen) return false;
110
+ if (minLen >= 0) { const l = v.length; if (l < minLen) return false; if (l < minLen * 2 && codePointLength(v) < minLen) return false; }
111
+ if (maxLen >= 0) { const l = v.length; if (l > maxLen && codePointLength(v) > maxLen) return false; }
99
112
  } else if (m === T_INTEGER) {
100
113
  if (typeof v !== 'number' || !Number.isInteger(v)) return false;
101
114
  const f = c.numFlags;
@@ -148,8 +161,8 @@ function tier0ValidateObject(plan, data) {
148
161
  if (typeof v !== 'string') return false;
149
162
  const minLen = c.minLen;
150
163
  const maxLen = c.maxLen;
151
- if (minLen >= 0 && v.length < minLen) return false;
152
- if (maxLen >= 0 && v.length > maxLen) return false;
164
+ if (minLen >= 0) { const l = v.length; if (l < minLen) return false; if (l < minLen * 2 && codePointLength(v) < minLen) return false; }
165
+ if (maxLen >= 0) { const l = v.length; if (l > maxLen && codePointLength(v) > maxLen) return false; }
153
166
  } else if (m === T_INTEGER) {
154
167
  if (typeof v !== 'number' || !Number.isInteger(v)) return false;
155
168
  const f = c.numFlags;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ata-validator",
3
- "version": "0.10.0",
3
+ "version": "0.10.2",
4
4
  "description": "Ultra-fast JSON Schema validator. 4.7x faster validation, 1,800x faster compilation. Works without native addon. Cross-schema $ref, Draft 2020-12 + Draft 7, V8-optimized JS codegen, simdjson, RE2, multi-core. Standard Schema V1 compatible.",
5
5
  "main": "index.js",
6
6
  "module": "index.mjs",