ata-validator 0.4.6 → 0.4.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -10,12 +10,14 @@ Ultra-fast JSON Schema validator powered by [simdjson](https://github.com/simdjs
10
10
 
11
11
  | Scenario | ata | ajv | |
12
12
  |---|---|---|---|
13
- | **validate(obj)** valid | 15M ops/sec | 8M ops/sec | **ata 1.9x faster** |
14
- | **validate(obj)** invalid | 13.1M ops/sec | 8.1M ops/sec | **ata 1.6x faster** |
13
+ | **validate(obj)** valid | 76M ops/sec | 8M ops/sec | **ata 9.5x faster** |
14
+ | **validate(obj)** invalid | 34M ops/sec | 8M ops/sec | **ata 4.3x faster** |
15
15
  | **isValidObject(obj)** | 15.4M ops/sec | 9.2M ops/sec | **ata 1.7x faster** |
16
16
  | **validateJSON(str)** valid | 2.15M ops/sec | 1.88M ops/sec | **ata 1.1x faster** |
17
- | **validateJSON(str)** invalid | 2.62M ops/sec | 2.35M ops/sec | **ata 1.1x faster** |
18
- | **Schema compilation** | 112K ops/sec | 773 ops/sec | **ata 145x faster** |
17
+ | **validateJSON(str)** invalid | 2.17M ops/sec | 2.29M ops/sec | **ata 1.1x faster** |
18
+ | **Schema compilation** | 113K ops/sec | 818 ops/sec | **ata 138x faster** |
19
+
20
+ > validate(obj) numbers are isolated single-schema benchmarks. Multi-schema benchmark overhead reduces throughput; real-world numbers depend on workload.
19
21
 
20
22
  ### Large Data — JS Object Validation
21
23
 
@@ -38,7 +40,7 @@ Ultra-fast JSON Schema validator powered by [simdjson](https://github.com/simdjs
38
40
 
39
41
  ### How it works
40
42
 
41
- **Combined single-pass validation**: ata compiles schemas into monolithic JS functions that both validate and collect errors in a single pass. Valid data returns immediately (lazy error arrayzero allocation). Invalid data collects errors without a second pass.
43
+ **Hybrid validator**: ata compiles schemas into monolithic JS functions identical to the boolean fast path, but returning `VALID_RESULT` on success and calling the error collector on failure. V8 TurboFan optimizes it identically to a pure boolean function error code is dead code on the valid path. No try/catch (3.3x V8 deopt), no lazy arrays, no double-pass.
42
44
 
43
45
  **JS codegen**: Schemas are compiled to monolithic JS functions (like ajv). Supported keywords: `type`, `required`, `properties`, `items`, `enum`, `const`, `allOf`, `anyOf`, `oneOf`, `not`, `if/then/else`, `uniqueItems`, `contains`, `prefixItems`, `additionalProperties`, `dependentRequired`, `$ref` (local), `minimum/maximum`, `minLength/maxLength`, `pattern`, `format`.
44
46
 
@@ -52,7 +54,7 @@ Ultra-fast JSON Schema validator powered by [simdjson](https://github.com/simdjs
52
54
 
53
55
  ## When to use ata
54
56
 
55
- - **Any `validate(obj)` workload** — 1.6x2.7x faster than ajv on all data
57
+ - **Any `validate(obj)` workload** — 4.3x9.5x faster than ajv
56
58
  - **Serverless / cold starts** — 12.5x faster schema compilation
57
59
  - **Security-sensitive apps** — RE2 regex, immune to ReDoS attacks
58
60
  - **Batch/streaming validation** — NDJSON log processing, data pipelines (2.6x faster)
@@ -66,7 +68,7 @@ Ultra-fast JSON Schema validator powered by [simdjson](https://github.com/simdjs
66
68
 
67
69
  ## Features
68
70
 
69
- - **Combined single-pass validation**: One JS function validates + collects errors no double pass, lazy error allocation
71
+ - **Hybrid validator**: 76M ops/sec — same function body as boolean check, returns result or calls error collector. No try/catch, no double pass
70
72
  - **Multi-core**: Parallel validation across all CPU cores — 13.4M validations/sec
71
73
  - **simdjson**: SIMD-accelerated JSON parsing at GB/s speeds, adaptive On Demand for large docs
72
74
  - **RE2 regex**: Linear-time guarantees, immune to ReDoS attacks (2391x faster on pathological input)
@@ -101,7 +103,7 @@ const v = new Validator({
101
103
  required: ['name', 'email']
102
104
  });
103
105
 
104
- // Fast boolean check — JS codegen (1.7x faster than ajv)
106
+ // Fast boolean check — JS codegen (9.5x faster than ajv)
105
107
  v.isValidObject({ name: 'Mert', email: 'mert@example.com', age: 26 }); // true
106
108
 
107
109
  // Full validation with error details + defaults applied
package/index.js CHANGED
@@ -219,9 +219,9 @@ class Validator {
219
219
  ? null
220
220
  : compileToJSCombined(schemaObj, VALID_RESULT);
221
221
  // Fallback error-collecting codegen (less optimized, for schemas combined can't handle)
222
- const jsErrFn = (!jsCombinedFn && !process.env.ATA_FORCE_NAPI)
223
- ? compileToJSCodegenWithErrors(schemaObj)
224
- : null;
222
+ const jsErrFn = process.env.ATA_FORCE_NAPI
223
+ ? null
224
+ : compileToJSCodegenWithErrors(schemaObj);
225
225
  this._jsFn = jsFn;
226
226
 
227
227
  // Data mutators — applied in-place before validation
@@ -254,20 +254,28 @@ class Validator {
254
254
  // Valid data: no array allocation, returns VALID_RESULT
255
255
  // Invalid data: collects errors in one pass (no double validation)
256
256
  // Fallback: jsFn + errFn for schemas combined can't handle
257
- const errFn = jsErrFn
258
- ? (d) => { try { return jsErrFn(d, true); } catch { return compiled.validate(d); } }
259
- : (d) => compiled.validate(d);
260
- this.validate = jsCombinedFn
261
- ? (preprocess
262
- ? (data) => { preprocess(data); try { return jsCombinedFn(data); } catch { return jsFn(data) ? VALID_RESULT : errFn(data); } }
263
- : (data) => { try { return jsCombinedFn(data); } catch { return jsFn(data) ? VALID_RESULT : errFn(data); } })
257
+ // errFn: JS error codegen or NAPI fallback. No try/catch (V8 3.3x deopt).
258
+ // jsErrFn tested at compile time if it throws, don't use it.
259
+ let safeErrFn = null;
260
+ if (jsErrFn) {
261
+ try { jsErrFn({}, true); safeErrFn = (d) => jsErrFn(d, true); } catch {}
262
+ }
263
+ const errFn = safeErrFn || ((d) => compiled.validate(d));
264
+
265
+ // Hybrid validator: jsFn body with return R / return E(d).
266
+ // V8 optimizes identically to jsFn (83M) — E(d) is dead code on valid path.
267
+ // Invalid: E(d) calls errFn once (34M vs 6M two-pass).
268
+ // Fallback: jsFn + errFn speculative if hybrid unavailable.
269
+ const hybridFn = jsFn._hybridFactory
270
+ ? jsFn._hybridFactory(VALID_RESULT, errFn)
271
+ : null;
272
+ this.validate = hybridFn
273
+ ? (preprocess ? (data) => { preprocess(data); return hybridFn(data); } : hybridFn)
264
274
  : (preprocess
265
275
  ? (data) => { preprocess(data); return jsFn(data) ? VALID_RESULT : errFn(data); }
266
276
  : (data) => jsFn(data) ? VALID_RESULT : errFn(data));
267
277
  this.isValidObject = jsFn;
268
- const jsonValidateFn = jsCombinedFn
269
- ? (obj) => { try { return jsCombinedFn(obj); } catch { return jsFn(obj) ? VALID_RESULT : errFn(obj); } }
270
- : (obj) => jsFn(obj) ? VALID_RESULT : errFn(obj);
278
+ const jsonValidateFn = hybridFn || ((obj) => jsFn(obj) ? VALID_RESULT : errFn(obj));
271
279
  this.validateJSON = useSimdjsonForLarge
272
280
  ? (jsonStr) => {
273
281
  if (jsonStr.length >= SIMDJSON_THRESHOLD) {
@@ -528,12 +528,57 @@ function compileToJSCodegen(schema) {
528
528
  const body = helperStr + checkStr + '\n return true'
529
529
 
530
530
  try {
531
- return new Function('d', body)
531
+ const boolFn = new Function('d', body)
532
+
533
+ // Build hybrid: same body, return R instead of true, return E(d) instead of false.
534
+ // V8 optimizes this identically to jsFn — E(d) is dead code on valid path.
535
+ // 83M ops/sec vs 26M for combined. Invalid path: 34M vs 6M.
536
+ const hybridBody = replaceTopLevel(helperStr + checkStr + '\n return R')
537
+ try {
538
+ const factory = new Function('R', 'E', `return function(d){${hybridBody}}`)
539
+ boolFn._hybridFactory = factory
540
+ } catch {}
541
+
542
+ return boolFn
532
543
  } catch {
533
544
  return null
534
545
  }
535
546
  }
536
547
 
548
+ // Replace top-level `return false` → `return E(d)` and `return true` → `return R`.
549
+ // Tracks function nesting depth to preserve nested function internals.
550
+ function replaceTopLevel(code) {
551
+ let fnDepth = 0, result = '', i = 0
552
+ while (i < code.length) {
553
+ if (code.startsWith('function', i) && (i === 0 || /[^a-zA-Z_$]/.test(code[i - 1]))) {
554
+ // Found a nested function — skip to opening brace, track all braces inside
555
+ let j = i + 8
556
+ while (j < code.length && code[j] !== '{') j++
557
+ result += code.slice(i, j + 1)
558
+ i = j + 1
559
+ // Track braces inside this function body
560
+ let braceDepth = 1
561
+ while (i < code.length && braceDepth > 0) {
562
+ if (code[i] === '{') braceDepth++
563
+ else if (code[i] === '}') braceDepth--
564
+ if (braceDepth > 0) result += code[i]
565
+ else result += '}' // closing brace of function
566
+ i++
567
+ }
568
+ } else if (code.startsWith('return false', i)) {
569
+ result += 'return E(d)'
570
+ i += 12
571
+ } else if (code.startsWith('return true', i) && (i + 11 >= code.length || !/[a-zA-Z_$]/.test(code[i + 11]))) {
572
+ result += 'return R'
573
+ i += 11
574
+ } else {
575
+ result += code[i]
576
+ i++
577
+ }
578
+ }
579
+ return result
580
+ }
581
+
537
582
  // knownType: if parent already verified the type, skip redundant guards.
538
583
  // 'object' = we know v is a non-null non-array object
539
584
  // 'array' = we know v is an array
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ata-validator",
3
- "version": "0.4.6",
3
+ "version": "0.4.8",
4
4
  "description": "Ultra-fast JSON Schema validator. Beats ajv on every valid-path benchmark: 1.1x–2.7x faster validate(obj), 151x faster compilation, 5.9x faster parallel batch. Speculative validation with V8-optimized JS codegen, simdjson, multi-core. Standard Schema V1 compatible.",
5
5
  "main": "index.js",
6
6
  "types": "index.d.ts",