ata-validator 0.12.1 → 0.12.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1203,8 +1203,15 @@ static napi_value RawFastValidate(napi_env env, napi_callback_info info) {
1203
1203
  }
1204
1204
  }
1205
1205
  } else {
1206
- // String — must copy (can't pre-pad strings)
1207
- size_t len;
1206
+ napi_valuetype vtype;
1207
+ napi_typeof(env, args[1], &vtype);
1208
+ if (vtype != napi_string) {
1209
+ napi_throw_type_error(env, nullptr,
1210
+ "rawFastValidate() requires a Buffer, TypedArray, or string. For parsed objects, use validate() or isValidObject().");
1211
+ return nullptr;
1212
+ }
1213
+ // String, must copy (can't pre-pad strings)
1214
+ size_t len = 0;
1208
1215
  napi_get_value_string_utf8(env, args[1], nullptr, 0, &len);
1209
1216
  if (len <= 4096) {
1210
1217
  char buf[4097];
@@ -1267,7 +1274,14 @@ static napi_value RawBatchValidate(napi_env env, napi_callback_info info) {
1267
1274
  valid = ata::validate(g_fast_schemas[slot],
1268
1275
  std::string_view(static_cast<const char*>(data), length)).valid;
1269
1276
  } else {
1270
- size_t len;
1277
+ napi_valuetype vtype;
1278
+ napi_typeof(env, item, &vtype);
1279
+ if (vtype != napi_string) {
1280
+ napi_throw_type_error(env, nullptr,
1281
+ "rawNDJSONValidate() batch elements must be Buffer, TypedArray, or string");
1282
+ return nullptr;
1283
+ }
1284
+ size_t len = 0;
1271
1285
  napi_get_value_string_utf8(env, item, nullptr, 0, &len);
1272
1286
  std::string buf(len, '\0');
1273
1287
  napi_get_value_string_utf8(env, item, buf.data(), len + 1, &len);
package/include/ata.h CHANGED
@@ -8,16 +8,16 @@
8
8
  #include <variant>
9
9
  #include <vector>
10
10
 
11
- #define ATA_VERSION "0.10.3"
11
+ #define ATA_VERSION "0.10.4"
12
12
 
13
13
  namespace ata {
14
14
 
15
15
  inline constexpr uint32_t VERSION_MAJOR = 0;
16
16
  inline constexpr uint32_t VERSION_MINOR = 10;
17
- inline constexpr uint32_t VERSION_REVISION = 3;
17
+ inline constexpr uint32_t VERSION_REVISION = 4;
18
18
 
19
19
  inline constexpr std::string_view version() noexcept {
20
- return "0.10.3";
20
+ return "0.10.4";
21
21
  }
22
22
 
23
23
  enum class error_code : uint8_t {
@@ -55,6 +55,8 @@ struct validation_error {
55
55
  error_code code;
56
56
  std::string path;
57
57
  std::string message;
58
+ std::string expected;
59
+ std::string actual;
58
60
  };
59
61
 
60
62
  struct validation_result {
package/index.d.ts CHANGED
@@ -81,6 +81,17 @@ export class Validator {
81
81
  /** Generate a standalone JS module string for zero-compile loading. Returns null if schema can't be standalone-compiled. */
82
82
  toStandalone(): string | null;
83
83
 
84
+ /**
85
+ * Generate a self-contained module string with `validate`/`isValid` exports.
86
+ * The output has zero runtime dependency on ata-validator.
87
+ *
88
+ * - format: 'esm' (default) or 'cjs'.
89
+ * - abortEarly: if true, invalid results are a shared frozen stub (smaller output, no error details).
90
+ *
91
+ * Returns null if the schema cannot be compiled to a standalone module.
92
+ */
93
+ toStandaloneModule(options?: { format?: 'esm' | 'cjs'; abortEarly?: boolean }): string | null;
94
+
84
95
  /** Load a pre-compiled standalone module. Zero schema compilation at startup. */
85
96
  static fromStandalone(mod: StandaloneModule, schema: object | string, options?: ValidatorOptions): Validator;
86
97
 
package/index.js CHANGED
@@ -290,14 +290,41 @@ const _CP_LEN_SOURCE = `function _cpLen(s) {
290
290
  // (which must materialize the full JS object tree). Buffer.from + NAPI ~2x faster.
291
291
  const SIMDJSON_THRESHOLD = 8192;
292
292
 
293
+ // Resolve a JSON Schema path like "#/properties/name/type" to the schema object
294
+ // that *contains* the failing keyword. Used by verbose mode to populate
295
+ // `parentSchema` on validation errors. Returns undefined if the path can't be
296
+ // walked (malformed pointer or missing intermediate node).
297
+ function resolveSchemaByPath(rootSchema, schemaPath) {
298
+ if (!schemaPath || typeof schemaPath !== 'string' || !schemaPath.startsWith('#')) {
299
+ return undefined;
300
+ }
301
+ const stripped = schemaPath.slice(1);
302
+ if (!stripped || stripped === '/') return rootSchema;
303
+ const parts = stripped.split('/').filter(Boolean).map(s => s.replace(/~1/g, '/').replace(/~0/g, '~'));
304
+ // The last segment is the keyword that failed (e.g. "type"); parentSchema is
305
+ // the schema object that owns that keyword, so walk all but the last segment.
306
+ let target = rootSchema;
307
+ for (let i = 0; i < parts.length - 1; i++) {
308
+ if (target == null || typeof target !== 'object') return undefined;
309
+ target = target[parts[i]];
310
+ }
311
+ return target;
312
+ }
313
+
293
314
  function parsePointerPath(path) {
294
315
  if (!path) return [];
295
316
  return path
296
317
  .split("/")
297
318
  .filter(Boolean)
298
- .map((seg) => ({
299
- key: seg.replace(/~1/g, "/").replace(/~0/g, "~"),
300
- }));
319
+ .map((seg) => {
320
+ const decoded = seg.replace(/~1/g, "/").replace(/~0/g, "~");
321
+ // Per Standard Schema V1: array indices should be emitted as numbers,
322
+ // object keys as strings. Treat all-digit segments as numeric indices.
323
+ if (/^(0|[1-9][0-9]*)$/.test(decoded)) {
324
+ return { key: Number(decoded) };
325
+ }
326
+ return { key: decoded };
327
+ });
301
328
  }
302
329
 
303
330
  function createPaddedBuffer(jsonStr) {
@@ -366,6 +393,15 @@ class Validator {
366
393
  // Schema map for cross-schema $ref resolution
367
394
  this._schemaMap = buildSchemaMap(options.schemas) || new Map();
368
395
 
396
+ // User-supplied format checkers: { formatName: (value) => boolean }.
397
+ // Looked up at runtime when a schema references a format the built-in
398
+ // registry does not know about.
399
+ this._userFormats = options.formats || null;
400
+
401
+ // Verbose mode: when on, errors carry parentSchema (the schema object that
402
+ // produced the error). Matches ajv's `verbose: true` behavior.
403
+ this._verbose = !!options.verbose;
404
+
369
405
  // Lazy stubs: trigger compilation on first call, then re-dispatch
370
406
  this.validate = (data) => {
371
407
  this._ensureCompiled();
@@ -464,7 +500,9 @@ class Validator {
464
500
  const mapKey = this._schemaMap.size > 0
465
501
  ? this._schemaStr + '\0' + [...this._schemaMap.keys()].sort().join('\0')
466
502
  : this._schemaStr;
467
- const cached = _compileCache.get(mapKey);
503
+ // Custom formats are JS functions: bypass the compile cache since they can
504
+ // differ between validators that share the same schema string.
505
+ const cached = this._userFormats ? null : _compileCache.get(mapKey);
468
506
  let jsFn, jsCombinedFn, jsErrFn, _isCodegen = false;
469
507
  var _forceNapi = typeof process !== 'undefined' && process.env && process.env.ATA_FORCE_NAPI;
470
508
  if (cached && !_forceNapi) {
@@ -473,12 +511,15 @@ class Validator {
473
511
  jsErrFn = cached.errFn;
474
512
  _isCodegen = !!cached.isCodegen;
475
513
  } else if (!_forceNapi) {
476
- const _cgFn = compileToJSCodegen(schemaObj, sm);
514
+ const uf = this._userFormats;
515
+ const _cgFn = compileToJSCodegen(schemaObj, sm, uf);
477
516
  jsFn = _cgFn || compileToJS(schemaObj, null, sm);
478
- jsCombinedFn = compileToJSCombined(schemaObj, VALID_RESULT, sm);
479
- jsErrFn = compileToJSCodegenWithErrors(schemaObj, sm);
517
+ jsCombinedFn = compileToJSCombined(schemaObj, VALID_RESULT, sm, uf);
518
+ jsErrFn = compileToJSCodegenWithErrors(schemaObj, sm, uf);
480
519
  _isCodegen = !!_cgFn;
481
- _compileCache.set(mapKey, { jsFn, combined: jsCombinedFn, errFn: jsErrFn, isCodegen: _isCodegen });
520
+ if (!uf) {
521
+ _compileCache.set(mapKey, { jsFn, combined: jsCombinedFn, errFn: jsErrFn, isCodegen: _isCodegen });
522
+ }
482
523
  } else {
483
524
  jsFn = null; jsCombinedFn = null; jsErrFn = null;
484
525
  }
@@ -617,6 +658,24 @@ class Validator {
617
658
  }
618
659
  : (data) => (jsFn(data) ? VALID_RESULT : errFn(data));
619
660
  }
661
+ // Verbose mode: populate parentSchema on each error.
662
+ // Errors may be frozen, so clone them with the extra field.
663
+ if (this._verbose) {
664
+ const inner = this.validate;
665
+ const root = this._schemaObj;
666
+ this.validate = (data) => {
667
+ const result = inner(data);
668
+ if (result && !result.valid && result.errors) {
669
+ const enriched = result.errors.map((err) =>
670
+ err && err.parentSchema === undefined
671
+ ? { ...err, parentSchema: resolveSchemaByPath(root, err.schemaPath) }
672
+ : err
673
+ );
674
+ return { valid: false, errors: enriched };
675
+ }
676
+ return result;
677
+ };
678
+ }
620
679
  this.isValidObject = jsFn;
621
680
  const hybridFn = jsFn._hybridFactory
622
681
  ? jsFn._hybridFactory(VALID_RESULT, errFn)
@@ -693,19 +752,37 @@ class Validator {
693
752
  this.isValid = (buf) => {
694
753
  self._ensureNative();
695
754
  const slot = self._fastSlot;
696
- self.isValid = (b) => { if (typeof b === 'string') b = Buffer.from(b); return native.rawFastValidate(slot, b); };
755
+ self.isValid = (b) => {
756
+ if (typeof b === 'string') b = Buffer.from(b);
757
+ else if (!(b instanceof Uint8Array)) throw new TypeError('isValid() requires a Buffer, Uint8Array, or string. For parsed objects, use isValidObject().');
758
+ return native.rawFastValidate(slot, b);
759
+ };
697
760
  return self.isValid(buf);
698
761
  };
699
762
  this.countValid = (ndjsonBuf) => {
700
763
  self._ensureNative();
701
764
  const slot = self._fastSlot;
702
- self.countValid = (b) => { if (typeof b === 'string') b = Buffer.from(b); const r = native.rawNDJSONValidate(slot, b); let c = 0; for (let i = 0; i < r.length; i++) if (r[i]) c++; return c; };
765
+ self.countValid = (b) => {
766
+ if (typeof b === 'string') b = Buffer.from(b);
767
+ else if (!(b instanceof Uint8Array)) throw new TypeError('countValid() requires a Buffer, Uint8Array, or string');
768
+ const r = native.rawNDJSONValidate(slot, b);
769
+ let c = 0;
770
+ for (let i = 0; i < r.length; i++) if (r[i]) c++;
771
+ return c;
772
+ };
703
773
  return self.countValid(ndjsonBuf);
704
774
  };
705
775
  this.batchIsValid = (buffers) => {
706
776
  self._ensureNative();
707
777
  const slot = self._fastSlot;
708
- self.batchIsValid = (bufs) => { let v = 0; for (const b of bufs) if (native.rawFastValidate(slot, b)) v++; return v; };
778
+ self.batchIsValid = (bufs) => {
779
+ let v = 0;
780
+ for (const b of bufs) {
781
+ if (!(b instanceof Uint8Array)) throw new TypeError('batchIsValid() requires Buffer or Uint8Array elements');
782
+ if (native.rawFastValidate(slot, b)) v++;
783
+ }
784
+ return v;
785
+ };
709
786
  return self.batchIsValid(buffers);
710
787
  };
711
788
  }
@@ -732,6 +809,7 @@ class Validator {
732
809
  const slot = this._fastSlot;
733
810
  this.isValid = (buf) => {
734
811
  if (typeof buf === 'string') buf = Buffer.from(buf);
812
+ else if (!(buf instanceof Uint8Array)) throw new TypeError('isValid() requires a Buffer, Uint8Array, or string. For parsed objects, use isValidObject().');
735
813
  return native.rawFastValidate(slot, buf);
736
814
  };
737
815
  }
@@ -739,6 +817,7 @@ class Validator {
739
817
  const slot = this._fastSlot;
740
818
  this.countValid = (ndjsonBuf) => {
741
819
  if (typeof ndjsonBuf === 'string') ndjsonBuf = Buffer.from(ndjsonBuf);
820
+ else if (!(ndjsonBuf instanceof Uint8Array)) throw new TypeError('countValid() requires a Buffer, Uint8Array, or string');
742
821
  const results = native.rawNDJSONValidate(slot, ndjsonBuf);
743
822
  let count = 0;
744
823
  for (let i = 0; i < results.length; i++) if (results[i]) count++;
@@ -750,6 +829,7 @@ class Validator {
750
829
  this.batchIsValid = (buffers) => {
751
830
  let valid = 0;
752
831
  for (const buf of buffers) {
832
+ if (!(buf instanceof Uint8Array)) throw new TypeError('batchIsValid() requires Buffer or Uint8Array elements');
753
833
  if (native.rawFastValidate(slot, buf)) valid++;
754
834
  }
755
835
  return valid;
@@ -800,19 +880,24 @@ class Validator {
800
880
  const mapKey = this._schemaMap.size > 0
801
881
  ? this._schemaStr + '\0' + [...this._schemaMap.keys()].sort().join('\0')
802
882
  : this._schemaStr;
803
- const cached = _compileCache.get(mapKey);
883
+ // Custom formats are JS functions: skip the shared cache so different
884
+ // validators with the same schema string but different formats don't collide.
885
+ const cached = this._userFormats ? null : _compileCache.get(mapKey);
804
886
  if (cached && cached.jsFn) {
805
887
  this._jsFn = cached.jsFn;
806
888
  this.isValidObject = cached.jsFn;
807
889
  return;
808
890
  }
809
- const jsFn = compileToJSCodegen(this._schemaObj, sm) || compileToJS(this._schemaObj, null, sm);
891
+ const uf = this._userFormats;
892
+ const jsFn = compileToJSCodegen(this._schemaObj, sm, uf) || compileToJS(this._schemaObj, null, sm);
810
893
  this._jsFn = jsFn;
811
894
  if (jsFn) {
812
895
  this.isValidObject = jsFn;
813
896
  // seed cache with codegen, combined/errFn filled later by _ensureCompiled
814
- if (!cached) _compileCache.set(mapKey, { jsFn, combined: null, errFn: null });
815
- else cached.jsFn = jsFn;
897
+ if (!uf) {
898
+ if (!cached) _compileCache.set(mapKey, { jsFn, combined: null, errFn: null });
899
+ else cached.jsFn = jsFn;
900
+ }
816
901
  }
817
902
  }
818
903
 
@@ -825,6 +910,7 @@ class Validator {
825
910
  if (!jsFn || !jsFn._source) return null;
826
911
  const src = jsFn._source;
827
912
  const hybridSrc = jsFn._hybridSource || "";
913
+ const preambleSrc = jsFn._preambleSource || "";
828
914
 
829
915
  // Also capture error function source for zero-compile standalone load
830
916
  const jsErrFn = compileToJSCodegenWithErrors(
@@ -835,6 +921,7 @@ class Validator {
835
921
  return `// Auto-generated by ata-validator — do not edit
836
922
  'use strict';
837
923
  ${_CP_LEN_SOURCE}
924
+ ${preambleSrc}
838
925
  const boolFn = function(d) {
839
926
  ${src}
840
927
  };
@@ -1025,6 +1112,8 @@ ${exports}`;
1025
1112
  // Raw NAPI fast path for Buffer/Uint8Array
1026
1113
  isValid(input) {
1027
1114
  if (!native) throw new Error('Native addon required for isValid() — install build tools or use validate() instead');
1115
+ if (typeof input === 'string') input = Buffer.from(input);
1116
+ else if (!(input instanceof Uint8Array)) throw new TypeError('isValid() requires a Buffer, Uint8Array, or string. For parsed objects, use isValidObject().');
1028
1117
  this._ensureNative();
1029
1118
  return native.rawFastValidate(this._fastSlot, input);
1030
1119
  }
@@ -1097,15 +1186,21 @@ Validator.bundle = function (schemas, opts) {
1097
1186
  };
1098
1187
 
1099
1188
  // Zero-dependency self-contained bundle — no require('ata-validator') needed at runtime.
1189
+ // opts.format: 'cjs' (default) or 'esm'.
1100
1190
  Validator.bundleStandalone = function (schemas, opts) {
1191
+ // Cross-schema $ref resolution: every Validator in the bundle needs to know
1192
+ // about the others so $ref to a sibling $id can resolve at compile time.
1193
+ const bundleOpts = { ...(opts || {}), schemas };
1194
+ const format = (opts && opts.format) || 'cjs';
1101
1195
  const R = "Object.freeze({valid:true,errors:Object.freeze([])})";
1102
1196
  const fns = schemas.map((schema) => {
1103
- const v = new Validator(schema, opts);
1197
+ const v = new Validator(schema, bundleOpts);
1104
1198
  v._ensureCompiled();
1105
1199
  const jsFn = v._jsFn;
1106
1200
  if (!jsFn || !jsFn._hybridSource) return "null";
1107
1201
  const jsErrFn = compileToJSCodegenWithErrors(
1108
1202
  typeof schema === "string" ? JSON.parse(schema) : schema,
1203
+ v._schemaMap,
1109
1204
  );
1110
1205
  const errBody =
1111
1206
  jsErrFn && jsErrFn._errSource
@@ -1113,6 +1208,10 @@ Validator.bundleStandalone = function (schemas, opts) {
1113
1208
  : "return{valid:false,errors:[{code:'error',path:'',message:'validation failed'}]}";
1114
1209
  return `(function(R){var E=function(d){var _all=true;${errBody}};return function(d){${jsFn._hybridSource}}})(R)`;
1115
1210
  });
1211
+ const arr = `[${fns.join(",")}]`;
1212
+ if (format === 'esm') {
1213
+ return `// Auto-generated by ata-validator — do not edit\nconst R=${R};\nconst validators=${arr};\nexport default validators;\nexport { validators };\n`;
1214
+ }
1116
1215
  return `'use strict';\nvar R=${R};\nmodule.exports=[${fns.join(",")}];\n`;
1117
1216
  };
1118
1217
 
@@ -768,7 +768,7 @@ function hasAdditionalPropertiesSchema(schema) {
768
768
 
769
769
  // --- Codegen mode: generates a single Function (NOT CSP-safe) ---
770
770
  // This matches ajv's approach: one monolithic function, V8 JIT fully inlines it
771
- function compileToJSCodegen(schema, schemaMap) {
771
+ function compileToJSCodegen(schema, schemaMap, userFormats) {
772
772
  if (typeof schema === 'boolean') return schema ? () => true : () => false
773
773
  if (typeof schema !== 'object' || schema === null) return null
774
774
 
@@ -827,7 +827,7 @@ function compileToJSCodegen(schema, schemaMap) {
827
827
  }
828
828
  }
829
829
 
830
- const ctx = { varCounter: 0, helpers: [], helperCode: [], closureVars: ['_cpLen'], closureVals: [_cpLen], rootDefs, refStack: new Set(), schemaMap: schemaMap || null, anchors, rootSchema: schema }
830
+ const ctx = { varCounter: 0, helpers: [], helperCode: [], preamble: [], closureVars: ['_cpLen'], closureVals: [_cpLen], rootDefs, refStack: new Set(), schemaMap: schemaMap || null, anchors, rootSchema: schema, userFormats: userFormats || null }
831
831
  const lines = []
832
832
  genCode(schema, 'd', lines, ctx)
833
833
 
@@ -864,24 +864,30 @@ function compileToJSCodegen(schema, schemaMap) {
864
864
  hybridBody = replaceTopLevel(checkStr + '\n return R')
865
865
  }
866
866
 
867
+ const preambleStr = ctx.preamble && ctx.preamble.length ? ctx.preamble.join('\n ') + '\n ' : ''
868
+
867
869
  try {
868
870
  let boolFn
869
871
  if (closureNames.length > 0) {
870
- const factory = new Function(...closureNames, `return function(d){${body}}`)
872
+ const factory = new Function(...closureNames, `${preambleStr}return function(d){${body}}`)
871
873
  boolFn = factory(...closureValues)
874
+ } else if (preambleStr) {
875
+ const factory = new Function(`${preambleStr}return function(d){${body}}`)
876
+ boolFn = factory()
872
877
  } else {
873
878
  boolFn = new Function('d', body)
874
879
  }
875
880
 
876
881
  // Build hybrid: same body, return R instead of true, return E(d) instead of false.
877
882
  try {
878
- const hybridFactory = new Function(...closureNames, 'R', 'E', `return function(d){${hybridBody}}`)
883
+ const hybridFactory = new Function(...closureNames, 'R', 'E', `${preambleStr}return function(d){${hybridBody}}`)
879
884
  boolFn._hybridFactory = (R, E) => hybridFactory(...closureValues, R, E)
880
885
  } catch {}
881
886
 
882
887
  // Store source for standalone compilation (includes regex inline for file output)
883
888
  const helperStr = ctx.helperCode.length ? ctx.helperCode.join('\n ') + '\n ' : ''
884
889
  boolFn._source = helperStr + body
890
+ boolFn._preambleSource = preambleStr
885
891
  boolFn._hybridSource = helperStr + hybridBody
886
892
 
887
893
  return boolFn
@@ -984,15 +990,21 @@ function tryGenCombined(schema, access, ctx) {
984
990
  return `{${prelude}if(typeof ${v2}!=='string')return false;const _lv=${v2}.length;if(_lv<${M}||_lv>${X * 2})return false;if(_lv<${M * 2}||_lv>${X}){const _cp=_cpLen(${v2});if(_cp<${M}||_cp>${X})return false}}`
985
991
  }
986
992
  const conds = [`typeof _v!=='string'`]
987
- if (schema.minLength !== undefined) {
993
+ if (schema.minLength !== undefined && schema.minLength > 0) {
988
994
  const M = schema.minLength
989
995
  conds.push(`_v.length<${M}`)
990
- conds.push(`_v.length<${M * 2}&&_cpLen(_v)<${M}`)
996
+ // For M=1, length<1 already catches all failures (any non-empty string has cpLen>=1).
997
+ if (M > 1) conds.push(`_v.length<${M * 2}&&_cpLen(_v)<${M}`)
991
998
  }
992
999
  if (schema.maxLength !== undefined) {
993
1000
  const X = schema.maxLength
994
- conds.push(`_v.length>${X * 2}`)
995
- conds.push(`_v.length>${X}&&_cpLen(_v)>${X}`)
1001
+ // For X=0, only empty string passes, length>0 fails — no cpLen needed.
1002
+ if (X === 0) {
1003
+ conds.push(`_v.length>0`)
1004
+ } else {
1005
+ conds.push(`_v.length>${X * 2}`)
1006
+ conds.push(`_v.length>${X}&&_cpLen(_v)>${X}`)
1007
+ }
996
1008
  }
997
1009
  if (conds.length < 2) return null
998
1010
  return bind(conds)
@@ -1263,14 +1275,20 @@ function genCode(schema, v, lines, ctx, knownType) {
1263
1275
  const body = `{const ${lv}=${v}.length;if(${lv}<${M}||${lv}>${X * 2})return false;if(${lv}<${M * 2}||${lv}>${X}){const _cp=_cpLen(${v});if(_cp<${M}||_cp>${X})return false}}`
1264
1276
  lines.push(isStr ? body : `if(typeof ${v}==='string')${body}`)
1265
1277
  } else {
1266
- if (schema.minLength !== undefined) {
1278
+ if (schema.minLength !== undefined && schema.minLength > 0) {
1267
1279
  const M = schema.minLength
1268
- const body = `if(${v}.length<${M})return false;if(${v}.length<${M * 2}&&_cpLen(${v})<${M})return false`
1280
+ // M==1: length<1 already catches empty strings (any non-empty string has cpLen>=1).
1281
+ const body = M === 1
1282
+ ? `if(${v}.length<1)return false`
1283
+ : `if(${v}.length<${M})return false;if(${v}.length<${M * 2}&&_cpLen(${v})<${M})return false`
1269
1284
  lines.push(isStr ? body : `if(typeof ${v}==='string'){${body}}`)
1270
1285
  }
1271
1286
  if (schema.maxLength !== undefined) {
1272
1287
  const X = schema.maxLength
1273
- const body = `if(${v}.length>${X * 2})return false;if(${v}.length>${X}&&_cpLen(${v})>${X})return false`
1288
+ // X==0: only empty string passes, no cpLen needed.
1289
+ const body = X === 0
1290
+ ? `if(${v}.length>0)return false`
1291
+ : `if(${v}.length>${X * 2})return false;if(${v}.length>${X}&&_cpLen(${v})>${X})return false`
1274
1292
  lines.push(isStr ? body : `if(typeof ${v}==='string'){${body}}`)
1275
1293
  }
1276
1294
  }
@@ -1302,7 +1320,19 @@ function genCode(schema, v, lines, ctx, knownType) {
1302
1320
 
1303
1321
  if (schema.format) {
1304
1322
  const fc = FORMAT_CODEGEN[schema.format]
1305
- if (fc) lines.push(fc(v, isStr))
1323
+ if (fc) {
1324
+ lines.push(fc(v, isStr))
1325
+ } else if (ctx.userFormats && typeof ctx.userFormats[schema.format] === 'function') {
1326
+ // User-supplied format checker: thread the function via closure and call at runtime.
1327
+ const safeName = schema.format.replace(/[^a-zA-Z0-9_]/g, '_')
1328
+ const closureName = `_uf_${safeName}`
1329
+ if (!ctx.closureVars.includes(closureName)) {
1330
+ ctx.closureVars.push(closureName)
1331
+ ctx.closureVals.push(ctx.userFormats[schema.format])
1332
+ }
1333
+ const guard = isStr ? '' : `typeof ${v}==='string'&&`
1334
+ lines.push(`if(${guard}!${closureName}(${v}))return false`)
1335
+ }
1306
1336
  }
1307
1337
 
1308
1338
  // uniqueItems — tiered strategy based on expected array size
@@ -1603,37 +1633,59 @@ function genCode(schema, v, lines, ctx, knownType) {
1603
1633
  }
1604
1634
  }
1605
1635
 
1606
- // anyOf — need function wrappers since genCode uses return false
1636
+ // anyOf — branch fns hoisted when safe, inline fallback when recursive.
1607
1637
  // Skip standard anyOf if unevaluatedProperties will handle it (single-pass optimization)
1608
1638
  if (schema.anyOf && schema.unevaluatedProperties === undefined) {
1609
- const fns = []
1639
+ const fi = ctx.varCounter++
1640
+ const branchBodies = []
1641
+ let canHoist = !!ctx.preamble
1610
1642
  for (let i = 0; i < schema.anyOf.length; i++) {
1611
1643
  const subLines = []
1612
1644
  genCode(schema.anyOf[i], '_av', subLines, ctx)
1613
- if (subLines.length === 0) {
1614
- fns.push(`function(_av){return true}`)
1615
- } else {
1616
- fns.push(`function(_av){${subLines.join(';')};return true}`)
1617
- }
1645
+ const body = subLines.length === 0 ? 'return true' : `${subLines.join(';')};return true`
1646
+ if (/\b_validate\b/.test(body)) canHoist = false
1647
+ branchBodies.push(body)
1648
+ }
1649
+ if (canHoist) {
1650
+ const names = branchBodies.map((body, i) => {
1651
+ const name = `_af${fi}_b${i}`
1652
+ ctx.preamble.push(`function ${name}(_av){${body}}`)
1653
+ return name
1654
+ })
1655
+ const checks = names.map(n => `${n}(${v})`).join('||')
1656
+ lines.push(`if(!(${checks}))return false`)
1657
+ } else {
1658
+ const fns = branchBodies.map(body => `function(_av){${body}}`)
1659
+ lines.push(`{const _af${fi}=[${fns.join(',')}];let _am${fi}=false;for(let _ai=0;_ai<_af${fi}.length;_ai++){if(_af${fi}[_ai](${v})){_am${fi}=true;break}}if(!_am${fi})return false}`)
1618
1660
  }
1619
- const fi = ctx.varCounter++
1620
- lines.push(`{const _af${fi}=[${fns.join(',')}];let _am${fi}=false;for(let _ai=0;_ai<_af${fi}.length;_ai++){if(_af${fi}[_ai](${v})){_am${fi}=true;break}}if(!_am${fi})return false}`)
1621
1661
  }
1622
1662
 
1623
- // oneOf
1663
+ // oneOf — branch fns hoisted to factory scope when safe (no recursion/ref).
1664
+ // Falls back to inline closures if any branch touches recursive validation.
1624
1665
  if (schema.oneOf) {
1625
- const fns = []
1666
+ const fi = ctx.varCounter++
1667
+ const branchBodies = []
1668
+ let canHoist = !!ctx.preamble
1626
1669
  for (let i = 0; i < schema.oneOf.length; i++) {
1627
1670
  const subLines = []
1628
1671
  genCode(schema.oneOf[i], '_ov', subLines, ctx)
1629
- if (subLines.length === 0) {
1630
- fns.push(`function(_ov){return true}`)
1631
- } else {
1632
- fns.push(`function(_ov){${subLines.join(';')};return true}`)
1633
- }
1672
+ const body = subLines.length === 0 ? 'return true' : `${subLines.join(';')};return true`
1673
+ // _validate is the recursive entry — hoisting branches above it breaks scope.
1674
+ if (/\b_validate\b/.test(body)) canHoist = false
1675
+ branchBodies.push(body)
1676
+ }
1677
+ if (canHoist) {
1678
+ const names = branchBodies.map((body, i) => {
1679
+ const name = `_of${fi}_b${i}`
1680
+ ctx.preamble.push(`function ${name}(_ov){${body}}`)
1681
+ return name
1682
+ })
1683
+ const calls = names.map(n => `if(${n}(${v})){_oc${fi}++;if(_oc${fi}>1)return false}`).join(';')
1684
+ lines.push(`{let _oc${fi}=0;${calls};if(_oc${fi}!==1)return false}`)
1685
+ } else {
1686
+ const fns = branchBodies.map(body => `function(_ov){${body}}`)
1687
+ lines.push(`{const _of${fi}=[${fns.join(',')}];let _oc${fi}=0;for(let _oi=0;_oi<_of${fi}.length;_oi++){if(_of${fi}[_oi](${v}))_oc${fi}++;if(_oc${fi}>1)return false}if(_oc${fi}!==1)return false}`)
1634
1688
  }
1635
- const fi = ctx.varCounter++
1636
- lines.push(`{const _of${fi}=[${fns.join(',')}];let _oc${fi}=0;for(let _oi=0;_oi<_of${fi}.length;_oi++){if(_of${fi}[_oi](${v}))_oc${fi}++;if(_oc${fi}>1)return false}if(_oc${fi}!==1)return false}`)
1637
1689
  }
1638
1690
 
1639
1691
  // not
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ata-validator",
3
- "version": "0.12.1",
3
+ "version": "0.12.3",
4
4
  "description": "Ultra-fast JSON Schema validator. 5x faster validation, 159,000x faster compilation. Works without native addon. Cross-schema $ref, Draft 2020-12 + Draft 7, V8-optimized JS codegen, simdjson, RE2, multi-core. Standard Schema V1 compatible.",
5
5
  "main": "index.js",
6
6
  "module": "index.mjs",
package/src/ata.cpp CHANGED
@@ -391,13 +391,29 @@ struct od_plan {
391
391
  #ifndef ATA_NO_RE2
392
392
  re2::RE2* pattern = nullptr; // borrowed pointer from schema_node
393
393
  #endif
394
+ // Inline digit pattern: ^[0-9]+$ / ^[0-9]{N}$ / ^[0-9]{N,M}$ / ^\d+$ etc.
395
+ // When set, supersedes the RE2 pattern call.
396
+ struct digit_pattern_t { uint32_t min_len; uint32_t max_len; };
397
+ std::optional<digit_pattern_t> digit_pattern;
394
398
  uint8_t format_id = 255; // 255 = no format check
395
399
 
396
400
  // Object — single iterate with merged required+property lookup
401
+ // fast_kind allows the obj iterator to skip the recursive od_exec_plan call
402
+ // (and its inner type().get() + switch) for primitive sub-plans, dispatching
403
+ // straight to inline constraint code.
404
+ enum class fast_kind : uint8_t {
405
+ OTHER = 0, // recurse via od_exec_plan
406
+ INTEGER, // value.get(int64) + numeric range/multipleOf
407
+ STRING, // value.get(string) + length/pattern/format/enum
408
+ BOOLEAN, // value.get(bool)
409
+ OBJECT, // value.get(object) + nested obj iteration (skips type detect)
410
+ ARRAY, // value.get(array) + element iteration (skips type detect)
411
+ };
397
412
  struct prop_entry {
398
413
  std::string key;
399
414
  int required_idx = -1; // bit index for required tracking, or -1
400
415
  std::shared_ptr<od_plan> sub; // property sub-plan, or nullptr
416
+ fast_kind fk = fast_kind::OTHER; // inline dispatch hint, set at compile time
401
417
  };
402
418
  struct obj_plan {
403
419
  std::vector<prop_entry> entries; // merged required + properties — single scan
@@ -414,6 +430,17 @@ struct od_plan {
414
430
  };
415
431
  std::shared_ptr<arr_plan> array;
416
432
 
433
+ // Enum: primitive-only set membership. Complex (object/array) enums fall back.
434
+ struct enum_constraint {
435
+ std::vector<std::string> strings; // unescaped string values
436
+ std::vector<int64_t> integers;
437
+ std::vector<double> doubles;
438
+ bool has_null = false;
439
+ bool has_true = false;
440
+ bool has_false = false;
441
+ };
442
+ std::shared_ptr<enum_constraint> enum_check;
443
+
417
444
  // If false, schema uses unsupported features — must fall back to DOM path.
418
445
  bool supported = true;
419
446
  };
@@ -1360,8 +1387,10 @@ static void validate_node(const schema_node_ptr& node,
1360
1387
  expected += json_type_name(static_cast<json_type>(b));
1361
1388
  }
1362
1389
  }
1390
+ std::string actual = std::string(type_of_sv(value));
1363
1391
  errors.push_back({error_code::type_mismatch, path,
1364
- "expected type " + expected + ", got " + std::string(type_of_sv(value))});
1392
+ "expected type " + expected + ", got " + actual,
1393
+ expected, actual});
1365
1394
  ATA_CHECK_EARLY();
1366
1395
  }
1367
1396
  }
@@ -2399,6 +2428,59 @@ static simdjson::padded_string_view get_free_padded_view(
2399
2428
  return simdjson::padded_string_view(data, length, length + REQUIRED_PADDING);
2400
2429
  }
2401
2430
 
2431
+ // Recognize common digit-only regex patterns: ^[0-9]+$, ^[0-9]{N}$, ^[0-9]{N,M}$
2432
+ // (and \d variants). Returns true if recognized, fills min_n/max_n.
2433
+ static bool parse_digit_pattern(const std::string& p,
2434
+ uint32_t& min_n, uint32_t& max_n) {
2435
+ if (p.size() < 6) return false;
2436
+ if (p.front() != '^' || p.back() != '$') return false;
2437
+ std::string_view body(p.data() + 1, p.size() - 2);
2438
+ size_t pos = 0;
2439
+ if (body.size() >= 5 && body.compare(0, 5, "[0-9]") == 0) {
2440
+ pos = 5;
2441
+ } else if (body.size() >= 2 && body[0] == '\\' && body[1] == 'd') {
2442
+ pos = 2;
2443
+ } else {
2444
+ return false;
2445
+ }
2446
+ if (pos >= body.size()) return false;
2447
+ char q = body[pos];
2448
+ if (q == '+' && pos + 1 == body.size()) {
2449
+ min_n = 1; max_n = UINT32_MAX; return true;
2450
+ }
2451
+ if (q == '*' && pos + 1 == body.size()) {
2452
+ min_n = 0; max_n = UINT32_MAX; return true;
2453
+ }
2454
+ if (q != '{') return false;
2455
+ size_t close = body.find('}', pos);
2456
+ if (close == std::string_view::npos || close + 1 != body.size()) return false;
2457
+ auto inner = body.substr(pos + 1, close - pos - 1);
2458
+ if (inner.empty()) return false;
2459
+ size_t comma = inner.find(',');
2460
+ auto parse_num = [](std::string_view s, uint32_t& out) -> bool {
2461
+ if (s.empty()) return false;
2462
+ uint64_t v = 0;
2463
+ for (char c : s) {
2464
+ if (c < '0' || c > '9') return false;
2465
+ v = v * 10 + (c - '0');
2466
+ if (v > UINT32_MAX) return false;
2467
+ }
2468
+ out = static_cast<uint32_t>(v);
2469
+ return true;
2470
+ };
2471
+ if (comma == std::string_view::npos) {
2472
+ if (!parse_num(inner, min_n)) return false;
2473
+ max_n = min_n;
2474
+ } else {
2475
+ auto a = inner.substr(0, comma);
2476
+ auto b = inner.substr(comma + 1);
2477
+ if (!parse_num(a, min_n)) return false;
2478
+ if (b.empty()) max_n = UINT32_MAX;
2479
+ else if (!parse_num(b, max_n)) return false;
2480
+ }
2481
+ return true;
2482
+ }
2483
+
2402
2484
  // Build an od_plan from a schema_node tree.
2403
2485
  static od_plan_ptr compile_od_plan(const schema_node_ptr& node) {
2404
2486
  if (!node) return nullptr;
@@ -2412,7 +2494,6 @@ static od_plan_ptr compile_od_plan(const schema_node_ptr& node) {
2412
2494
 
2413
2495
  // Unsupported features → fall back to DOM
2414
2496
  if (!node->ref.empty() ||
2415
- !node->enum_values_minified.empty() ||
2416
2497
  node->const_value_raw.has_value() ||
2417
2498
  node->unique_items ||
2418
2499
  !node->all_of.empty() ||
@@ -2431,6 +2512,47 @@ static od_plan_ptr compile_od_plan(const schema_node_ptr& node) {
2431
2512
  return plan;
2432
2513
  }
2433
2514
 
2515
+ // Enum: handle primitive enums on the on-demand path. Complex enum values
2516
+ // (objects, arrays, escaped strings) fall back to DOM.
2517
+ if (!node->enum_values_minified.empty()) {
2518
+ auto ec = std::make_shared<od_plan::enum_constraint>();
2519
+ for (const auto& ev : node->enum_values_minified) {
2520
+ if (ev.empty()) { plan->supported = false; return plan; }
2521
+ char c = ev[0];
2522
+ if (c == '"') {
2523
+ if (ev.size() < 2 || ev.back() != '"') { plan->supported = false; return plan; }
2524
+ bool has_escape = false;
2525
+ for (size_t i = 1; i + 1 < ev.size(); i++) {
2526
+ if (ev[i] == '\\') { has_escape = true; break; }
2527
+ }
2528
+ if (has_escape) { plan->supported = false; return plan; }
2529
+ ec->strings.push_back(ev.substr(1, ev.size() - 2));
2530
+ } else if (c == '-' || (c >= '0' && c <= '9')) {
2531
+ bool is_int = true;
2532
+ for (size_t i = (c == '-' ? 1 : 0); i < ev.size(); i++) {
2533
+ if (ev[i] < '0' || ev[i] > '9') { is_int = false; break; }
2534
+ }
2535
+ if (is_int) {
2536
+ try { ec->integers.push_back(std::stoll(ev)); }
2537
+ catch (...) { plan->supported = false; return plan; }
2538
+ } else {
2539
+ try { ec->doubles.push_back(std::stod(ev)); }
2540
+ catch (...) { plan->supported = false; return plan; }
2541
+ }
2542
+ } else if (ev == "true") {
2543
+ ec->has_true = true;
2544
+ } else if (ev == "false") {
2545
+ ec->has_false = true;
2546
+ } else if (ev == "null") {
2547
+ ec->has_null = true;
2548
+ } else {
2549
+ plan->supported = false;
2550
+ return plan;
2551
+ }
2552
+ }
2553
+ plan->enum_check = std::move(ec);
2554
+ }
2555
+
2434
2556
  plan->type_mask = node->type_mask;
2435
2557
  if (node->minimum) { plan->num_flags |= od_plan::HAS_MIN; plan->num_min = *node->minimum; }
2436
2558
  if (node->maximum) { plan->num_flags |= od_plan::HAS_MAX; plan->num_max = *node->maximum; }
@@ -2442,6 +2564,16 @@ static od_plan_ptr compile_od_plan(const schema_node_ptr& node) {
2442
2564
  #ifndef ATA_NO_RE2
2443
2565
  plan->pattern = node->compiled_pattern.get();
2444
2566
  #endif
2567
+ // Try to recognize the pattern as an inlined digit check; if so, skip RE2.
2568
+ if (node->pattern.has_value()) {
2569
+ uint32_t min_n, max_n;
2570
+ if (parse_digit_pattern(*node->pattern, min_n, max_n)) {
2571
+ plan->digit_pattern = od_plan::digit_pattern_t{min_n, max_n};
2572
+ #ifndef ATA_NO_RE2
2573
+ plan->pattern = nullptr;
2574
+ #endif
2575
+ }
2576
+ }
2445
2577
  plan->format_id = node->format_id;
2446
2578
 
2447
2579
  // Object plan — build hash lookup for O(1) per-field dispatch
@@ -2477,7 +2609,46 @@ static od_plan_ptr compile_od_plan(const schema_node_ptr& node) {
2477
2609
  op->entries[it->second].sub = std::move(sub);
2478
2610
  } else {
2479
2611
  key_to_idx[key] = op->entries.size();
2480
- op->entries.push_back({key, -1, std::move(sub)});
2612
+ op->entries.push_back({key, -1, std::move(sub), od_plan::fast_kind::OTHER});
2613
+ }
2614
+ }
2615
+ // Compute fast_kind for each entry post-hoc — lets the obj iterator
2616
+ // skip the recursive od_exec_plan call (and its type().get + switch)
2617
+ // for primitive properties, and also skip type detection for nested
2618
+ // objects / primitive arrays.
2619
+ for (auto& e : op->entries) {
2620
+ if (!e.sub) continue;
2621
+ // Nested object: dispatch directly to value.get(obj) + iteration helper.
2622
+ if (e.sub->object && !e.sub->array) {
2623
+ e.fk = od_plan::fast_kind::OBJECT;
2624
+ continue;
2625
+ }
2626
+ // Array: dispatch directly to value.get(arr) + element iteration helper.
2627
+ if (e.sub->array && !e.sub->object) {
2628
+ e.fk = od_plan::fast_kind::ARRAY;
2629
+ continue;
2630
+ }
2631
+ if (e.sub->object || e.sub->array) continue; // unusual: both, skip
2632
+ uint8_t m = e.sub->type_mask;
2633
+ uint8_t s_bit = json_type_bit(json_type::string);
2634
+ uint8_t i_bit = json_type_bit(json_type::integer);
2635
+ uint8_t n_bit = json_type_bit(json_type::number);
2636
+ uint8_t b_bit = json_type_bit(json_type::boolean);
2637
+ if (m == s_bit) e.fk = od_plan::fast_kind::STRING;
2638
+ else if (m == i_bit || m == (i_bit | n_bit)) e.fk = od_plan::fast_kind::INTEGER;
2639
+ else if (m == b_bit) e.fk = od_plan::fast_kind::BOOLEAN;
2640
+ else if (m == 0 && e.sub->enum_check) {
2641
+ // Untyped enum: infer from enum value shapes
2642
+ auto& ec = *e.sub->enum_check;
2643
+ bool only_str = !ec.strings.empty() && ec.integers.empty() && ec.doubles.empty()
2644
+ && !ec.has_null && !ec.has_true && !ec.has_false;
2645
+ bool only_int = ec.strings.empty() && !ec.integers.empty() && ec.doubles.empty()
2646
+ && !ec.has_null && !ec.has_true && !ec.has_false;
2647
+ bool only_bool = ec.strings.empty() && ec.integers.empty() && ec.doubles.empty()
2648
+ && !ec.has_null && (ec.has_true || ec.has_false);
2649
+ if (only_str) e.fk = od_plan::fast_kind::STRING;
2650
+ else if (only_int) e.fk = od_plan::fast_kind::INTEGER;
2651
+ else if (only_bool) e.fk = od_plan::fast_kind::BOOLEAN;
2481
2652
  }
2482
2653
  }
2483
2654
  plan->object = std::move(op);
@@ -2517,11 +2688,18 @@ static inline uint64_t utf8_length_fast(std::string_view s) {
2517
2688
 
2518
2689
  // Execute an od_plan against a simdjson On-Demand value.
2519
2690
  // Each value consumed exactly once. Uses simdjson types directly — no od_type() overhead.
2520
- static bool od_exec_plan(const od_plan& plan, simdjson::ondemand::value value) {
2521
- // Use simdjson type directly skip od_type() conversion + get_number_type()
2691
+ // `pre_type` is an optional caller-supplied type hint; when set, the value.type().get
2692
+ // simdjson call is skipped (saves ~3-5 ns per recursion). Used by fast_kind::OBJECT
2693
+ // / ARRAY dispatch where the type is already established at compile time.
2694
+ static bool od_exec_plan(const od_plan& plan, simdjson::ondemand::value value,
2695
+ std::optional<simdjson::ondemand::json_type> pre_type = std::nullopt) {
2522
2696
  using sjt = simdjson::ondemand::json_type;
2523
2697
  sjt st;
2524
- if (value.type().get(st) != SUCCESS) return false;
2698
+ if (pre_type) {
2699
+ st = *pre_type;
2700
+ } else {
2701
+ if (value.type().get(st) != SUCCESS) return false;
2702
+ }
2525
2703
 
2526
2704
  // Type check using simdjson type directly
2527
2705
  if (plan.type_mask) {
@@ -2552,11 +2730,12 @@ static bool od_exec_plan(const od_plan& plan, simdjson::ondemand::value value) {
2552
2730
 
2553
2731
  switch (st) {
2554
2732
  case sjt::number: {
2555
- if (!plan.num_flags) break; // No numeric constraints
2733
+ bool need_value = plan.num_flags || plan.enum_check;
2734
+ if (!need_value) break;
2556
2735
  double v;
2557
- // Try integer first (more common), fall back to double
2558
2736
  int64_t iv;
2559
- if (value.get(iv) == SUCCESS) {
2737
+ bool got_int = (value.get(iv) == SUCCESS);
2738
+ if (got_int) {
2560
2739
  v = static_cast<double>(iv);
2561
2740
  } else if (value.get(v) != SUCCESS) {
2562
2741
  return false;
@@ -2570,6 +2749,20 @@ static bool od_exec_plan(const od_plan& plan, simdjson::ondemand::value value) {
2570
2749
  double r = std::fmod(v, plan.num_mul);
2571
2750
  if (std::abs(r) > 1e-8 && std::abs(r - plan.num_mul) > 1e-8) return false;
2572
2751
  }
2752
+ if (plan.enum_check) {
2753
+ auto& ec = *plan.enum_check;
2754
+ bool match = false;
2755
+ if (got_int) {
2756
+ for (auto i : ec.integers) if (i == iv) { match = true; break; }
2757
+ }
2758
+ if (!match) {
2759
+ for (auto d : ec.doubles) if (d == v) { match = true; break; }
2760
+ }
2761
+ if (!match && got_int) {
2762
+ for (auto d : ec.doubles) if (d == v) { match = true; break; }
2763
+ }
2764
+ if (!match) return false;
2765
+ }
2573
2766
  break;
2574
2767
  }
2575
2768
  case sjt::string: {
@@ -2580,8 +2773,16 @@ static bool od_exec_plan(const od_plan& plan, simdjson::ondemand::value value) {
2580
2773
  if (plan.min_length && len < *plan.min_length) return false;
2581
2774
  if (plan.max_length && len > *plan.max_length) return false;
2582
2775
  }
2776
+ if (plan.digit_pattern) {
2777
+ auto& dp = *plan.digit_pattern;
2778
+ if (sv.size() < dp.min_len || sv.size() > dp.max_len) return false;
2779
+ const uint8_t* p = reinterpret_cast<const uint8_t*>(sv.data());
2780
+ for (size_t i = 0, n = sv.size(); i < n; i++) {
2781
+ if (p[i] < '0' || p[i] > '9') return false;
2782
+ }
2783
+ }
2583
2784
  #ifndef ATA_NO_RE2
2584
- if (plan.pattern) {
2785
+ else if (plan.pattern) {
2585
2786
  if (!re2::RE2::PartialMatch(re2::StringPiece(sv.data(), sv.size()), *plan.pattern))
2586
2787
  return false;
2587
2788
  }
@@ -2589,6 +2790,17 @@ static bool od_exec_plan(const od_plan& plan, simdjson::ondemand::value value) {
2589
2790
  if (plan.format_id != 255) {
2590
2791
  if (!check_format_by_id(sv, plan.format_id)) return false;
2591
2792
  }
2793
+ if (plan.enum_check) {
2794
+ auto& ec = *plan.enum_check;
2795
+ bool match = false;
2796
+ for (auto& s : ec.strings) {
2797
+ if (sv.size() == s.size() && std::memcmp(sv.data(), s.data(), s.size()) == 0) {
2798
+ match = true;
2799
+ break;
2800
+ }
2801
+ }
2802
+ if (!match) return false;
2803
+ }
2592
2804
  break;
2593
2805
  }
2594
2806
  case sjt::object: {
@@ -2601,25 +2813,140 @@ static bool od_exec_plan(const od_plan& plan, simdjson::ondemand::value value) {
2601
2813
  uint64_t prop_count = 0;
2602
2814
 
2603
2815
  for (auto field : obj) {
2604
- std::string_view key;
2605
- if (field.unescaped_key().get(key)) continue;
2816
+ // Fast key compare: use raw_json_string against schema keys directly,
2817
+ // skipping the unescape pass. Schema keys are assumed to have no
2818
+ // unescaped quotes (true for any well-formed JSON Schema).
2819
+ simdjson::ondemand::raw_json_string raw_key;
2820
+ if (field.key().get(raw_key) != SUCCESS) continue;
2606
2821
  prop_count++;
2607
2822
 
2608
- // Single merged scan: required + property in one pass
2609
2823
  bool matched = false;
2610
2824
  for (auto& e : op.entries) {
2611
- if (key == e.key) {
2825
+ if (raw_key.unsafe_is_equal(e.key)) {
2612
2826
  if (e.required_idx >= 0)
2613
2827
  required_found |= (1ULL << e.required_idx);
2614
2828
  if (e.sub) {
2615
- simdjson::ondemand::value fv;
2616
- if (field.value().get(fv) != SUCCESS) return false;
2617
- if (!od_exec_plan(*e.sub, fv)) return false;
2829
+ // Fast dispatch: skip recursive od_exec_plan + type().get + switch
2830
+ // for primitive sub-plans. Mirrors the constraint code in od_exec_plan.
2831
+ switch (e.fk) {
2832
+ case od_plan::fast_kind::INTEGER: {
2833
+ int64_t iv;
2834
+ if (field.value().get(iv) != SUCCESS) return false;
2835
+ auto& sub = *e.sub;
2836
+ uint8_t f = sub.num_flags;
2837
+ double v = static_cast<double>(iv);
2838
+ if ((f & od_plan::HAS_MIN) && v < sub.num_min) return false;
2839
+ if ((f & od_plan::HAS_MAX) && v > sub.num_max) return false;
2840
+ if ((f & od_plan::HAS_EX_MIN) && v <= sub.num_ex_min) return false;
2841
+ if ((f & od_plan::HAS_EX_MAX) && v >= sub.num_ex_max) return false;
2842
+ if (f & od_plan::HAS_MUL) {
2843
+ double r = std::fmod(v, sub.num_mul);
2844
+ if (std::abs(r) > 1e-8 && std::abs(r - sub.num_mul) > 1e-8) return false;
2845
+ }
2846
+ if (sub.enum_check) {
2847
+ bool em = false;
2848
+ for (auto i2 : sub.enum_check->integers) if (i2 == iv) { em = true; break; }
2849
+ if (!em) for (auto d : sub.enum_check->doubles) if (d == v) { em = true; break; }
2850
+ if (!em) return false;
2851
+ }
2852
+ break;
2853
+ }
2854
+ case od_plan::fast_kind::STRING: {
2855
+ std::string_view sv;
2856
+ if (field.value().get(sv) != SUCCESS) return false;
2857
+ auto& sub = *e.sub;
2858
+ if (sub.min_length || sub.max_length) {
2859
+ uint64_t len = utf8_length_fast(sv);
2860
+ if (sub.min_length && len < *sub.min_length) return false;
2861
+ if (sub.max_length && len > *sub.max_length) return false;
2862
+ }
2863
+ if (sub.digit_pattern) {
2864
+ auto& dp = *sub.digit_pattern;
2865
+ if (sv.size() < dp.min_len || sv.size() > dp.max_len) return false;
2866
+ const uint8_t* sp = reinterpret_cast<const uint8_t*>(sv.data());
2867
+ for (size_t i = 0, n = sv.size(); i < n; i++) {
2868
+ if (sp[i] < '0' || sp[i] > '9') return false;
2869
+ }
2870
+ }
2871
+ #ifndef ATA_NO_RE2
2872
+ else if (sub.pattern) {
2873
+ if (!re2::RE2::PartialMatch(re2::StringPiece(sv.data(), sv.size()), *sub.pattern))
2874
+ return false;
2875
+ }
2876
+ #endif
2877
+ if (sub.format_id != 255) {
2878
+ if (!check_format_by_id(sv, sub.format_id)) return false;
2879
+ }
2880
+ if (sub.enum_check) {
2881
+ bool em = false;
2882
+ for (auto& s : sub.enum_check->strings) {
2883
+ if (sv.size() == s.size() && std::memcmp(sv.data(), s.data(), s.size()) == 0) {
2884
+ em = true;
2885
+ break;
2886
+ }
2887
+ }
2888
+ if (!em) return false;
2889
+ }
2890
+ break;
2891
+ }
2892
+ case od_plan::fast_kind::BOOLEAN: {
2893
+ bool bv;
2894
+ if (field.value().get(bv) != SUCCESS) return false;
2895
+ if (e.sub->enum_check) {
2896
+ if (bv ? !e.sub->enum_check->has_true : !e.sub->enum_check->has_false) return false;
2897
+ }
2898
+ break;
2899
+ }
2900
+ case od_plan::fast_kind::OBJECT: {
2901
+ simdjson::ondemand::value fv;
2902
+ if (field.value().get(fv) != SUCCESS) return false;
2903
+ if (!od_exec_plan(*e.sub, fv,
2904
+ simdjson::ondemand::json_type::object)) return false;
2905
+ break;
2906
+ }
2907
+ case od_plan::fast_kind::ARRAY: {
2908
+ simdjson::ondemand::value fv;
2909
+ if (field.value().get(fv) != SUCCESS) return false;
2910
+ if (!od_exec_plan(*e.sub, fv,
2911
+ simdjson::ondemand::json_type::array)) return false;
2912
+ break;
2913
+ }
2914
+ case od_plan::fast_kind::OTHER:
2915
+ default: {
2916
+ simdjson::ondemand::value fv;
2917
+ if (field.value().get(fv) != SUCCESS) return false;
2918
+ if (!od_exec_plan(*e.sub, fv)) return false;
2919
+ }
2920
+ }
2618
2921
  }
2619
2922
  matched = true;
2620
2923
  break;
2621
2924
  }
2622
2925
  }
2926
+ // Safety net: if no match via raw byte compare, the JSON key may be
2927
+ // escaped (e.g., "aname"). Fall back to a properly-unescaped
2928
+ // compare so escaped keys still match the unescaped schema keys.
2929
+ if (!matched) {
2930
+ std::string_view raw_view = field.escaped_key();
2931
+ if (raw_view.find('\\') != std::string_view::npos) {
2932
+ std::string_view ukey;
2933
+ if (field.unescaped_key().get(ukey) == SUCCESS) {
2934
+ for (auto& e : op.entries) {
2935
+ if (ukey == e.key) {
2936
+ if (e.required_idx >= 0)
2937
+ required_found |= (1ULL << e.required_idx);
2938
+ if (e.sub) {
2939
+ simdjson::ondemand::value fv;
2940
+ if (field.value().get(fv) != SUCCESS) return false;
2941
+ if (!od_exec_plan(*e.sub, fv)) return false;
2942
+ }
2943
+ matched = true;
2944
+ break;
2945
+ }
2946
+ }
2947
+ }
2948
+ }
2949
+ }
2623
2950
  if (!matched && op.no_additional) return false;
2624
2951
  }
2625
2952
 
@@ -2647,6 +2974,18 @@ static bool od_exec_plan(const od_plan& plan, simdjson::ondemand::value value) {
2647
2974
  if (ap.max_items && count > *ap.max_items) return false;
2648
2975
  break;
2649
2976
  }
2977
+ case sjt::boolean: {
2978
+ if (plan.enum_check) {
2979
+ bool b;
2980
+ if (value.get(b) != SUCCESS) return false;
2981
+ if (b ? !plan.enum_check->has_true : !plan.enum_check->has_false) return false;
2982
+ }
2983
+ break;
2984
+ }
2985
+ case sjt::null: {
2986
+ if (plan.enum_check && !plan.enum_check->has_null) return false;
2987
+ break;
2988
+ }
2650
2989
  default:
2651
2990
  break;
2652
2991
  }