ata-validator 0.12.1 → 0.12.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding/ata_napi.cpp +17 -3
- package/include/ata.h +5 -3
- package/index.d.ts +11 -0
- package/index.js +115 -16
- package/lib/js-compiler.js +82 -30
- package/package.json +1 -1
- package/prebuilds/ata-darwin-arm64/node-napi-v10.node +0 -0
- package/src/ata.cpp +356 -17
package/binding/ata_napi.cpp
CHANGED
|
@@ -1203,8 +1203,15 @@ static napi_value RawFastValidate(napi_env env, napi_callback_info info) {
|
|
|
1203
1203
|
}
|
|
1204
1204
|
}
|
|
1205
1205
|
} else {
|
|
1206
|
-
|
|
1207
|
-
|
|
1206
|
+
napi_valuetype vtype;
|
|
1207
|
+
napi_typeof(env, args[1], &vtype);
|
|
1208
|
+
if (vtype != napi_string) {
|
|
1209
|
+
napi_throw_type_error(env, nullptr,
|
|
1210
|
+
"rawFastValidate() requires a Buffer, TypedArray, or string. For parsed objects, use validate() or isValidObject().");
|
|
1211
|
+
return nullptr;
|
|
1212
|
+
}
|
|
1213
|
+
// String, must copy (can't pre-pad strings)
|
|
1214
|
+
size_t len = 0;
|
|
1208
1215
|
napi_get_value_string_utf8(env, args[1], nullptr, 0, &len);
|
|
1209
1216
|
if (len <= 4096) {
|
|
1210
1217
|
char buf[4097];
|
|
@@ -1267,7 +1274,14 @@ static napi_value RawBatchValidate(napi_env env, napi_callback_info info) {
|
|
|
1267
1274
|
valid = ata::validate(g_fast_schemas[slot],
|
|
1268
1275
|
std::string_view(static_cast<const char*>(data), length)).valid;
|
|
1269
1276
|
} else {
|
|
1270
|
-
|
|
1277
|
+
napi_valuetype vtype;
|
|
1278
|
+
napi_typeof(env, item, &vtype);
|
|
1279
|
+
if (vtype != napi_string) {
|
|
1280
|
+
napi_throw_type_error(env, nullptr,
|
|
1281
|
+
"rawNDJSONValidate() batch elements must be Buffer, TypedArray, or string");
|
|
1282
|
+
return nullptr;
|
|
1283
|
+
}
|
|
1284
|
+
size_t len = 0;
|
|
1271
1285
|
napi_get_value_string_utf8(env, item, nullptr, 0, &len);
|
|
1272
1286
|
std::string buf(len, '\0');
|
|
1273
1287
|
napi_get_value_string_utf8(env, item, buf.data(), len + 1, &len);
|
package/include/ata.h
CHANGED
|
@@ -8,16 +8,16 @@
|
|
|
8
8
|
#include <variant>
|
|
9
9
|
#include <vector>
|
|
10
10
|
|
|
11
|
-
#define ATA_VERSION "0.10.
|
|
11
|
+
#define ATA_VERSION "0.10.4"
|
|
12
12
|
|
|
13
13
|
namespace ata {
|
|
14
14
|
|
|
15
15
|
inline constexpr uint32_t VERSION_MAJOR = 0;
|
|
16
16
|
inline constexpr uint32_t VERSION_MINOR = 10;
|
|
17
|
-
inline constexpr uint32_t VERSION_REVISION =
|
|
17
|
+
inline constexpr uint32_t VERSION_REVISION = 4;
|
|
18
18
|
|
|
19
19
|
inline constexpr std::string_view version() noexcept {
|
|
20
|
-
return "0.10.
|
|
20
|
+
return "0.10.4";
|
|
21
21
|
}
|
|
22
22
|
|
|
23
23
|
enum class error_code : uint8_t {
|
|
@@ -55,6 +55,8 @@ struct validation_error {
|
|
|
55
55
|
error_code code;
|
|
56
56
|
std::string path;
|
|
57
57
|
std::string message;
|
|
58
|
+
std::string expected;
|
|
59
|
+
std::string actual;
|
|
58
60
|
};
|
|
59
61
|
|
|
60
62
|
struct validation_result {
|
package/index.d.ts
CHANGED
|
@@ -81,6 +81,17 @@ export class Validator {
|
|
|
81
81
|
/** Generate a standalone JS module string for zero-compile loading. Returns null if schema can't be standalone-compiled. */
|
|
82
82
|
toStandalone(): string | null;
|
|
83
83
|
|
|
84
|
+
/**
|
|
85
|
+
* Generate a self-contained module string with `validate`/`isValid` exports.
|
|
86
|
+
* The output has zero runtime dependency on ata-validator.
|
|
87
|
+
*
|
|
88
|
+
* - format: 'esm' (default) or 'cjs'.
|
|
89
|
+
* - abortEarly: if true, invalid results are a shared frozen stub (smaller output, no error details).
|
|
90
|
+
*
|
|
91
|
+
* Returns null if the schema cannot be compiled to a standalone module.
|
|
92
|
+
*/
|
|
93
|
+
toStandaloneModule(options?: { format?: 'esm' | 'cjs'; abortEarly?: boolean }): string | null;
|
|
94
|
+
|
|
84
95
|
/** Load a pre-compiled standalone module. Zero schema compilation at startup. */
|
|
85
96
|
static fromStandalone(mod: StandaloneModule, schema: object | string, options?: ValidatorOptions): Validator;
|
|
86
97
|
|
package/index.js
CHANGED
|
@@ -290,14 +290,41 @@ const _CP_LEN_SOURCE = `function _cpLen(s) {
|
|
|
290
290
|
// (which must materialize the full JS object tree). Buffer.from + NAPI ~2x faster.
|
|
291
291
|
const SIMDJSON_THRESHOLD = 8192;
|
|
292
292
|
|
|
293
|
+
// Resolve a JSON Schema path like "#/properties/name/type" to the schema object
|
|
294
|
+
// that *contains* the failing keyword. Used by verbose mode to populate
|
|
295
|
+
// `parentSchema` on validation errors. Returns undefined if the path can't be
|
|
296
|
+
// walked (malformed pointer or missing intermediate node).
|
|
297
|
+
function resolveSchemaByPath(rootSchema, schemaPath) {
|
|
298
|
+
if (!schemaPath || typeof schemaPath !== 'string' || !schemaPath.startsWith('#')) {
|
|
299
|
+
return undefined;
|
|
300
|
+
}
|
|
301
|
+
const stripped = schemaPath.slice(1);
|
|
302
|
+
if (!stripped || stripped === '/') return rootSchema;
|
|
303
|
+
const parts = stripped.split('/').filter(Boolean).map(s => s.replace(/~1/g, '/').replace(/~0/g, '~'));
|
|
304
|
+
// The last segment is the keyword that failed (e.g. "type"); parentSchema is
|
|
305
|
+
// the schema object that owns that keyword, so walk all but the last segment.
|
|
306
|
+
let target = rootSchema;
|
|
307
|
+
for (let i = 0; i < parts.length - 1; i++) {
|
|
308
|
+
if (target == null || typeof target !== 'object') return undefined;
|
|
309
|
+
target = target[parts[i]];
|
|
310
|
+
}
|
|
311
|
+
return target;
|
|
312
|
+
}
|
|
313
|
+
|
|
293
314
|
function parsePointerPath(path) {
|
|
294
315
|
if (!path) return [];
|
|
295
316
|
return path
|
|
296
317
|
.split("/")
|
|
297
318
|
.filter(Boolean)
|
|
298
|
-
.map((seg) =>
|
|
299
|
-
|
|
300
|
-
|
|
319
|
+
.map((seg) => {
|
|
320
|
+
const decoded = seg.replace(/~1/g, "/").replace(/~0/g, "~");
|
|
321
|
+
// Per Standard Schema V1: array indices should be emitted as numbers,
|
|
322
|
+
// object keys as strings. Treat all-digit segments as numeric indices.
|
|
323
|
+
if (/^(0|[1-9][0-9]*)$/.test(decoded)) {
|
|
324
|
+
return { key: Number(decoded) };
|
|
325
|
+
}
|
|
326
|
+
return { key: decoded };
|
|
327
|
+
});
|
|
301
328
|
}
|
|
302
329
|
|
|
303
330
|
function createPaddedBuffer(jsonStr) {
|
|
@@ -366,6 +393,15 @@ class Validator {
|
|
|
366
393
|
// Schema map for cross-schema $ref resolution
|
|
367
394
|
this._schemaMap = buildSchemaMap(options.schemas) || new Map();
|
|
368
395
|
|
|
396
|
+
// User-supplied format checkers: { formatName: (value) => boolean }.
|
|
397
|
+
// Looked up at runtime when a schema references a format the built-in
|
|
398
|
+
// registry does not know about.
|
|
399
|
+
this._userFormats = options.formats || null;
|
|
400
|
+
|
|
401
|
+
// Verbose mode: when on, errors carry parentSchema (the schema object that
|
|
402
|
+
// produced the error). Matches ajv's `verbose: true` behavior.
|
|
403
|
+
this._verbose = !!options.verbose;
|
|
404
|
+
|
|
369
405
|
// Lazy stubs: trigger compilation on first call, then re-dispatch
|
|
370
406
|
this.validate = (data) => {
|
|
371
407
|
this._ensureCompiled();
|
|
@@ -464,7 +500,9 @@ class Validator {
|
|
|
464
500
|
const mapKey = this._schemaMap.size > 0
|
|
465
501
|
? this._schemaStr + '\0' + [...this._schemaMap.keys()].sort().join('\0')
|
|
466
502
|
: this._schemaStr;
|
|
467
|
-
|
|
503
|
+
// Custom formats are JS functions: bypass the compile cache since they can
|
|
504
|
+
// differ between validators that share the same schema string.
|
|
505
|
+
const cached = this._userFormats ? null : _compileCache.get(mapKey);
|
|
468
506
|
let jsFn, jsCombinedFn, jsErrFn, _isCodegen = false;
|
|
469
507
|
var _forceNapi = typeof process !== 'undefined' && process.env && process.env.ATA_FORCE_NAPI;
|
|
470
508
|
if (cached && !_forceNapi) {
|
|
@@ -473,12 +511,15 @@ class Validator {
|
|
|
473
511
|
jsErrFn = cached.errFn;
|
|
474
512
|
_isCodegen = !!cached.isCodegen;
|
|
475
513
|
} else if (!_forceNapi) {
|
|
476
|
-
const
|
|
514
|
+
const uf = this._userFormats;
|
|
515
|
+
const _cgFn = compileToJSCodegen(schemaObj, sm, uf);
|
|
477
516
|
jsFn = _cgFn || compileToJS(schemaObj, null, sm);
|
|
478
|
-
jsCombinedFn = compileToJSCombined(schemaObj, VALID_RESULT, sm);
|
|
479
|
-
jsErrFn = compileToJSCodegenWithErrors(schemaObj, sm);
|
|
517
|
+
jsCombinedFn = compileToJSCombined(schemaObj, VALID_RESULT, sm, uf);
|
|
518
|
+
jsErrFn = compileToJSCodegenWithErrors(schemaObj, sm, uf);
|
|
480
519
|
_isCodegen = !!_cgFn;
|
|
481
|
-
|
|
520
|
+
if (!uf) {
|
|
521
|
+
_compileCache.set(mapKey, { jsFn, combined: jsCombinedFn, errFn: jsErrFn, isCodegen: _isCodegen });
|
|
522
|
+
}
|
|
482
523
|
} else {
|
|
483
524
|
jsFn = null; jsCombinedFn = null; jsErrFn = null;
|
|
484
525
|
}
|
|
@@ -617,6 +658,24 @@ class Validator {
|
|
|
617
658
|
}
|
|
618
659
|
: (data) => (jsFn(data) ? VALID_RESULT : errFn(data));
|
|
619
660
|
}
|
|
661
|
+
// Verbose mode: populate parentSchema on each error.
|
|
662
|
+
// Errors may be frozen, so clone them with the extra field.
|
|
663
|
+
if (this._verbose) {
|
|
664
|
+
const inner = this.validate;
|
|
665
|
+
const root = this._schemaObj;
|
|
666
|
+
this.validate = (data) => {
|
|
667
|
+
const result = inner(data);
|
|
668
|
+
if (result && !result.valid && result.errors) {
|
|
669
|
+
const enriched = result.errors.map((err) =>
|
|
670
|
+
err && err.parentSchema === undefined
|
|
671
|
+
? { ...err, parentSchema: resolveSchemaByPath(root, err.schemaPath) }
|
|
672
|
+
: err
|
|
673
|
+
);
|
|
674
|
+
return { valid: false, errors: enriched };
|
|
675
|
+
}
|
|
676
|
+
return result;
|
|
677
|
+
};
|
|
678
|
+
}
|
|
620
679
|
this.isValidObject = jsFn;
|
|
621
680
|
const hybridFn = jsFn._hybridFactory
|
|
622
681
|
? jsFn._hybridFactory(VALID_RESULT, errFn)
|
|
@@ -693,19 +752,37 @@ class Validator {
|
|
|
693
752
|
this.isValid = (buf) => {
|
|
694
753
|
self._ensureNative();
|
|
695
754
|
const slot = self._fastSlot;
|
|
696
|
-
self.isValid = (b) => {
|
|
755
|
+
self.isValid = (b) => {
|
|
756
|
+
if (typeof b === 'string') b = Buffer.from(b);
|
|
757
|
+
else if (!(b instanceof Uint8Array)) throw new TypeError('isValid() requires a Buffer, Uint8Array, or string. For parsed objects, use isValidObject().');
|
|
758
|
+
return native.rawFastValidate(slot, b);
|
|
759
|
+
};
|
|
697
760
|
return self.isValid(buf);
|
|
698
761
|
};
|
|
699
762
|
this.countValid = (ndjsonBuf) => {
|
|
700
763
|
self._ensureNative();
|
|
701
764
|
const slot = self._fastSlot;
|
|
702
|
-
self.countValid = (b) => {
|
|
765
|
+
self.countValid = (b) => {
|
|
766
|
+
if (typeof b === 'string') b = Buffer.from(b);
|
|
767
|
+
else if (!(b instanceof Uint8Array)) throw new TypeError('countValid() requires a Buffer, Uint8Array, or string');
|
|
768
|
+
const r = native.rawNDJSONValidate(slot, b);
|
|
769
|
+
let c = 0;
|
|
770
|
+
for (let i = 0; i < r.length; i++) if (r[i]) c++;
|
|
771
|
+
return c;
|
|
772
|
+
};
|
|
703
773
|
return self.countValid(ndjsonBuf);
|
|
704
774
|
};
|
|
705
775
|
this.batchIsValid = (buffers) => {
|
|
706
776
|
self._ensureNative();
|
|
707
777
|
const slot = self._fastSlot;
|
|
708
|
-
self.batchIsValid = (bufs) => {
|
|
778
|
+
self.batchIsValid = (bufs) => {
|
|
779
|
+
let v = 0;
|
|
780
|
+
for (const b of bufs) {
|
|
781
|
+
if (!(b instanceof Uint8Array)) throw new TypeError('batchIsValid() requires Buffer or Uint8Array elements');
|
|
782
|
+
if (native.rawFastValidate(slot, b)) v++;
|
|
783
|
+
}
|
|
784
|
+
return v;
|
|
785
|
+
};
|
|
709
786
|
return self.batchIsValid(buffers);
|
|
710
787
|
};
|
|
711
788
|
}
|
|
@@ -732,6 +809,7 @@ class Validator {
|
|
|
732
809
|
const slot = this._fastSlot;
|
|
733
810
|
this.isValid = (buf) => {
|
|
734
811
|
if (typeof buf === 'string') buf = Buffer.from(buf);
|
|
812
|
+
else if (!(buf instanceof Uint8Array)) throw new TypeError('isValid() requires a Buffer, Uint8Array, or string. For parsed objects, use isValidObject().');
|
|
735
813
|
return native.rawFastValidate(slot, buf);
|
|
736
814
|
};
|
|
737
815
|
}
|
|
@@ -739,6 +817,7 @@ class Validator {
|
|
|
739
817
|
const slot = this._fastSlot;
|
|
740
818
|
this.countValid = (ndjsonBuf) => {
|
|
741
819
|
if (typeof ndjsonBuf === 'string') ndjsonBuf = Buffer.from(ndjsonBuf);
|
|
820
|
+
else if (!(ndjsonBuf instanceof Uint8Array)) throw new TypeError('countValid() requires a Buffer, Uint8Array, or string');
|
|
742
821
|
const results = native.rawNDJSONValidate(slot, ndjsonBuf);
|
|
743
822
|
let count = 0;
|
|
744
823
|
for (let i = 0; i < results.length; i++) if (results[i]) count++;
|
|
@@ -750,6 +829,7 @@ class Validator {
|
|
|
750
829
|
this.batchIsValid = (buffers) => {
|
|
751
830
|
let valid = 0;
|
|
752
831
|
for (const buf of buffers) {
|
|
832
|
+
if (!(buf instanceof Uint8Array)) throw new TypeError('batchIsValid() requires Buffer or Uint8Array elements');
|
|
753
833
|
if (native.rawFastValidate(slot, buf)) valid++;
|
|
754
834
|
}
|
|
755
835
|
return valid;
|
|
@@ -800,19 +880,24 @@ class Validator {
|
|
|
800
880
|
const mapKey = this._schemaMap.size > 0
|
|
801
881
|
? this._schemaStr + '\0' + [...this._schemaMap.keys()].sort().join('\0')
|
|
802
882
|
: this._schemaStr;
|
|
803
|
-
|
|
883
|
+
// Custom formats are JS functions: skip the shared cache so different
|
|
884
|
+
// validators with the same schema string but different formats don't collide.
|
|
885
|
+
const cached = this._userFormats ? null : _compileCache.get(mapKey);
|
|
804
886
|
if (cached && cached.jsFn) {
|
|
805
887
|
this._jsFn = cached.jsFn;
|
|
806
888
|
this.isValidObject = cached.jsFn;
|
|
807
889
|
return;
|
|
808
890
|
}
|
|
809
|
-
const
|
|
891
|
+
const uf = this._userFormats;
|
|
892
|
+
const jsFn = compileToJSCodegen(this._schemaObj, sm, uf) || compileToJS(this._schemaObj, null, sm);
|
|
810
893
|
this._jsFn = jsFn;
|
|
811
894
|
if (jsFn) {
|
|
812
895
|
this.isValidObject = jsFn;
|
|
813
896
|
// seed cache with codegen, combined/errFn filled later by _ensureCompiled
|
|
814
|
-
if (!
|
|
815
|
-
|
|
897
|
+
if (!uf) {
|
|
898
|
+
if (!cached) _compileCache.set(mapKey, { jsFn, combined: null, errFn: null });
|
|
899
|
+
else cached.jsFn = jsFn;
|
|
900
|
+
}
|
|
816
901
|
}
|
|
817
902
|
}
|
|
818
903
|
|
|
@@ -825,6 +910,7 @@ class Validator {
|
|
|
825
910
|
if (!jsFn || !jsFn._source) return null;
|
|
826
911
|
const src = jsFn._source;
|
|
827
912
|
const hybridSrc = jsFn._hybridSource || "";
|
|
913
|
+
const preambleSrc = jsFn._preambleSource || "";
|
|
828
914
|
|
|
829
915
|
// Also capture error function source for zero-compile standalone load
|
|
830
916
|
const jsErrFn = compileToJSCodegenWithErrors(
|
|
@@ -835,6 +921,7 @@ class Validator {
|
|
|
835
921
|
return `// Auto-generated by ata-validator — do not edit
|
|
836
922
|
'use strict';
|
|
837
923
|
${_CP_LEN_SOURCE}
|
|
924
|
+
${preambleSrc}
|
|
838
925
|
const boolFn = function(d) {
|
|
839
926
|
${src}
|
|
840
927
|
};
|
|
@@ -1025,6 +1112,8 @@ ${exports}`;
|
|
|
1025
1112
|
// Raw NAPI fast path for Buffer/Uint8Array
|
|
1026
1113
|
isValid(input) {
|
|
1027
1114
|
if (!native) throw new Error('Native addon required for isValid() — install build tools or use validate() instead');
|
|
1115
|
+
if (typeof input === 'string') input = Buffer.from(input);
|
|
1116
|
+
else if (!(input instanceof Uint8Array)) throw new TypeError('isValid() requires a Buffer, Uint8Array, or string. For parsed objects, use isValidObject().');
|
|
1028
1117
|
this._ensureNative();
|
|
1029
1118
|
return native.rawFastValidate(this._fastSlot, input);
|
|
1030
1119
|
}
|
|
@@ -1097,15 +1186,21 @@ Validator.bundle = function (schemas, opts) {
|
|
|
1097
1186
|
};
|
|
1098
1187
|
|
|
1099
1188
|
// Zero-dependency self-contained bundle — no require('ata-validator') needed at runtime.
|
|
1189
|
+
// opts.format: 'cjs' (default) or 'esm'.
|
|
1100
1190
|
Validator.bundleStandalone = function (schemas, opts) {
|
|
1191
|
+
// Cross-schema $ref resolution: every Validator in the bundle needs to know
|
|
1192
|
+
// about the others so $ref to a sibling $id can resolve at compile time.
|
|
1193
|
+
const bundleOpts = { ...(opts || {}), schemas };
|
|
1194
|
+
const format = (opts && opts.format) || 'cjs';
|
|
1101
1195
|
const R = "Object.freeze({valid:true,errors:Object.freeze([])})";
|
|
1102
1196
|
const fns = schemas.map((schema) => {
|
|
1103
|
-
const v = new Validator(schema,
|
|
1197
|
+
const v = new Validator(schema, bundleOpts);
|
|
1104
1198
|
v._ensureCompiled();
|
|
1105
1199
|
const jsFn = v._jsFn;
|
|
1106
1200
|
if (!jsFn || !jsFn._hybridSource) return "null";
|
|
1107
1201
|
const jsErrFn = compileToJSCodegenWithErrors(
|
|
1108
1202
|
typeof schema === "string" ? JSON.parse(schema) : schema,
|
|
1203
|
+
v._schemaMap,
|
|
1109
1204
|
);
|
|
1110
1205
|
const errBody =
|
|
1111
1206
|
jsErrFn && jsErrFn._errSource
|
|
@@ -1113,6 +1208,10 @@ Validator.bundleStandalone = function (schemas, opts) {
|
|
|
1113
1208
|
: "return{valid:false,errors:[{code:'error',path:'',message:'validation failed'}]}";
|
|
1114
1209
|
return `(function(R){var E=function(d){var _all=true;${errBody}};return function(d){${jsFn._hybridSource}}})(R)`;
|
|
1115
1210
|
});
|
|
1211
|
+
const arr = `[${fns.join(",")}]`;
|
|
1212
|
+
if (format === 'esm') {
|
|
1213
|
+
return `// Auto-generated by ata-validator — do not edit\nconst R=${R};\nconst validators=${arr};\nexport default validators;\nexport { validators };\n`;
|
|
1214
|
+
}
|
|
1116
1215
|
return `'use strict';\nvar R=${R};\nmodule.exports=[${fns.join(",")}];\n`;
|
|
1117
1216
|
};
|
|
1118
1217
|
|
package/lib/js-compiler.js
CHANGED
|
@@ -768,7 +768,7 @@ function hasAdditionalPropertiesSchema(schema) {
|
|
|
768
768
|
|
|
769
769
|
// --- Codegen mode: generates a single Function (NOT CSP-safe) ---
|
|
770
770
|
// This matches ajv's approach: one monolithic function, V8 JIT fully inlines it
|
|
771
|
-
function compileToJSCodegen(schema, schemaMap) {
|
|
771
|
+
function compileToJSCodegen(schema, schemaMap, userFormats) {
|
|
772
772
|
if (typeof schema === 'boolean') return schema ? () => true : () => false
|
|
773
773
|
if (typeof schema !== 'object' || schema === null) return null
|
|
774
774
|
|
|
@@ -827,7 +827,7 @@ function compileToJSCodegen(schema, schemaMap) {
|
|
|
827
827
|
}
|
|
828
828
|
}
|
|
829
829
|
|
|
830
|
-
const ctx = { varCounter: 0, helpers: [], helperCode: [], closureVars: ['_cpLen'], closureVals: [_cpLen], rootDefs, refStack: new Set(), schemaMap: schemaMap || null, anchors, rootSchema: schema }
|
|
830
|
+
const ctx = { varCounter: 0, helpers: [], helperCode: [], preamble: [], closureVars: ['_cpLen'], closureVals: [_cpLen], rootDefs, refStack: new Set(), schemaMap: schemaMap || null, anchors, rootSchema: schema, userFormats: userFormats || null }
|
|
831
831
|
const lines = []
|
|
832
832
|
genCode(schema, 'd', lines, ctx)
|
|
833
833
|
|
|
@@ -864,24 +864,30 @@ function compileToJSCodegen(schema, schemaMap) {
|
|
|
864
864
|
hybridBody = replaceTopLevel(checkStr + '\n return R')
|
|
865
865
|
}
|
|
866
866
|
|
|
867
|
+
const preambleStr = ctx.preamble && ctx.preamble.length ? ctx.preamble.join('\n ') + '\n ' : ''
|
|
868
|
+
|
|
867
869
|
try {
|
|
868
870
|
let boolFn
|
|
869
871
|
if (closureNames.length > 0) {
|
|
870
|
-
const factory = new Function(...closureNames,
|
|
872
|
+
const factory = new Function(...closureNames, `${preambleStr}return function(d){${body}}`)
|
|
871
873
|
boolFn = factory(...closureValues)
|
|
874
|
+
} else if (preambleStr) {
|
|
875
|
+
const factory = new Function(`${preambleStr}return function(d){${body}}`)
|
|
876
|
+
boolFn = factory()
|
|
872
877
|
} else {
|
|
873
878
|
boolFn = new Function('d', body)
|
|
874
879
|
}
|
|
875
880
|
|
|
876
881
|
// Build hybrid: same body, return R instead of true, return E(d) instead of false.
|
|
877
882
|
try {
|
|
878
|
-
const hybridFactory = new Function(...closureNames, 'R', 'E',
|
|
883
|
+
const hybridFactory = new Function(...closureNames, 'R', 'E', `${preambleStr}return function(d){${hybridBody}}`)
|
|
879
884
|
boolFn._hybridFactory = (R, E) => hybridFactory(...closureValues, R, E)
|
|
880
885
|
} catch {}
|
|
881
886
|
|
|
882
887
|
// Store source for standalone compilation (includes regex inline for file output)
|
|
883
888
|
const helperStr = ctx.helperCode.length ? ctx.helperCode.join('\n ') + '\n ' : ''
|
|
884
889
|
boolFn._source = helperStr + body
|
|
890
|
+
boolFn._preambleSource = preambleStr
|
|
885
891
|
boolFn._hybridSource = helperStr + hybridBody
|
|
886
892
|
|
|
887
893
|
return boolFn
|
|
@@ -984,15 +990,21 @@ function tryGenCombined(schema, access, ctx) {
|
|
|
984
990
|
return `{${prelude}if(typeof ${v2}!=='string')return false;const _lv=${v2}.length;if(_lv<${M}||_lv>${X * 2})return false;if(_lv<${M * 2}||_lv>${X}){const _cp=_cpLen(${v2});if(_cp<${M}||_cp>${X})return false}}`
|
|
985
991
|
}
|
|
986
992
|
const conds = [`typeof _v!=='string'`]
|
|
987
|
-
if (schema.minLength !== undefined) {
|
|
993
|
+
if (schema.minLength !== undefined && schema.minLength > 0) {
|
|
988
994
|
const M = schema.minLength
|
|
989
995
|
conds.push(`_v.length<${M}`)
|
|
990
|
-
|
|
996
|
+
// For M=1, length<1 already catches all failures (any non-empty string has cpLen>=1).
|
|
997
|
+
if (M > 1) conds.push(`_v.length<${M * 2}&&_cpLen(_v)<${M}`)
|
|
991
998
|
}
|
|
992
999
|
if (schema.maxLength !== undefined) {
|
|
993
1000
|
const X = schema.maxLength
|
|
994
|
-
|
|
995
|
-
|
|
1001
|
+
// For X=0, only empty string passes, length>0 fails — no cpLen needed.
|
|
1002
|
+
if (X === 0) {
|
|
1003
|
+
conds.push(`_v.length>0`)
|
|
1004
|
+
} else {
|
|
1005
|
+
conds.push(`_v.length>${X * 2}`)
|
|
1006
|
+
conds.push(`_v.length>${X}&&_cpLen(_v)>${X}`)
|
|
1007
|
+
}
|
|
996
1008
|
}
|
|
997
1009
|
if (conds.length < 2) return null
|
|
998
1010
|
return bind(conds)
|
|
@@ -1263,14 +1275,20 @@ function genCode(schema, v, lines, ctx, knownType) {
|
|
|
1263
1275
|
const body = `{const ${lv}=${v}.length;if(${lv}<${M}||${lv}>${X * 2})return false;if(${lv}<${M * 2}||${lv}>${X}){const _cp=_cpLen(${v});if(_cp<${M}||_cp>${X})return false}}`
|
|
1264
1276
|
lines.push(isStr ? body : `if(typeof ${v}==='string')${body}`)
|
|
1265
1277
|
} else {
|
|
1266
|
-
if (schema.minLength !== undefined) {
|
|
1278
|
+
if (schema.minLength !== undefined && schema.minLength > 0) {
|
|
1267
1279
|
const M = schema.minLength
|
|
1268
|
-
|
|
1280
|
+
// M==1: length<1 already catches empty strings (any non-empty string has cpLen>=1).
|
|
1281
|
+
const body = M === 1
|
|
1282
|
+
? `if(${v}.length<1)return false`
|
|
1283
|
+
: `if(${v}.length<${M})return false;if(${v}.length<${M * 2}&&_cpLen(${v})<${M})return false`
|
|
1269
1284
|
lines.push(isStr ? body : `if(typeof ${v}==='string'){${body}}`)
|
|
1270
1285
|
}
|
|
1271
1286
|
if (schema.maxLength !== undefined) {
|
|
1272
1287
|
const X = schema.maxLength
|
|
1273
|
-
|
|
1288
|
+
// X==0: only empty string passes, no cpLen needed.
|
|
1289
|
+
const body = X === 0
|
|
1290
|
+
? `if(${v}.length>0)return false`
|
|
1291
|
+
: `if(${v}.length>${X * 2})return false;if(${v}.length>${X}&&_cpLen(${v})>${X})return false`
|
|
1274
1292
|
lines.push(isStr ? body : `if(typeof ${v}==='string'){${body}}`)
|
|
1275
1293
|
}
|
|
1276
1294
|
}
|
|
@@ -1302,7 +1320,19 @@ function genCode(schema, v, lines, ctx, knownType) {
|
|
|
1302
1320
|
|
|
1303
1321
|
if (schema.format) {
|
|
1304
1322
|
const fc = FORMAT_CODEGEN[schema.format]
|
|
1305
|
-
if (fc)
|
|
1323
|
+
if (fc) {
|
|
1324
|
+
lines.push(fc(v, isStr))
|
|
1325
|
+
} else if (ctx.userFormats && typeof ctx.userFormats[schema.format] === 'function') {
|
|
1326
|
+
// User-supplied format checker: thread the function via closure and call at runtime.
|
|
1327
|
+
const safeName = schema.format.replace(/[^a-zA-Z0-9_]/g, '_')
|
|
1328
|
+
const closureName = `_uf_${safeName}`
|
|
1329
|
+
if (!ctx.closureVars.includes(closureName)) {
|
|
1330
|
+
ctx.closureVars.push(closureName)
|
|
1331
|
+
ctx.closureVals.push(ctx.userFormats[schema.format])
|
|
1332
|
+
}
|
|
1333
|
+
const guard = isStr ? '' : `typeof ${v}==='string'&&`
|
|
1334
|
+
lines.push(`if(${guard}!${closureName}(${v}))return false`)
|
|
1335
|
+
}
|
|
1306
1336
|
}
|
|
1307
1337
|
|
|
1308
1338
|
// uniqueItems — tiered strategy based on expected array size
|
|
@@ -1603,37 +1633,59 @@ function genCode(schema, v, lines, ctx, knownType) {
|
|
|
1603
1633
|
}
|
|
1604
1634
|
}
|
|
1605
1635
|
|
|
1606
|
-
// anyOf —
|
|
1636
|
+
// anyOf — branch fns hoisted when safe, inline fallback when recursive.
|
|
1607
1637
|
// Skip standard anyOf if unevaluatedProperties will handle it (single-pass optimization)
|
|
1608
1638
|
if (schema.anyOf && schema.unevaluatedProperties === undefined) {
|
|
1609
|
-
const
|
|
1639
|
+
const fi = ctx.varCounter++
|
|
1640
|
+
const branchBodies = []
|
|
1641
|
+
let canHoist = !!ctx.preamble
|
|
1610
1642
|
for (let i = 0; i < schema.anyOf.length; i++) {
|
|
1611
1643
|
const subLines = []
|
|
1612
1644
|
genCode(schema.anyOf[i], '_av', subLines, ctx)
|
|
1613
|
-
|
|
1614
|
-
|
|
1615
|
-
|
|
1616
|
-
|
|
1617
|
-
|
|
1645
|
+
const body = subLines.length === 0 ? 'return true' : `${subLines.join(';')};return true`
|
|
1646
|
+
if (/\b_validate\b/.test(body)) canHoist = false
|
|
1647
|
+
branchBodies.push(body)
|
|
1648
|
+
}
|
|
1649
|
+
if (canHoist) {
|
|
1650
|
+
const names = branchBodies.map((body, i) => {
|
|
1651
|
+
const name = `_af${fi}_b${i}`
|
|
1652
|
+
ctx.preamble.push(`function ${name}(_av){${body}}`)
|
|
1653
|
+
return name
|
|
1654
|
+
})
|
|
1655
|
+
const checks = names.map(n => `${n}(${v})`).join('||')
|
|
1656
|
+
lines.push(`if(!(${checks}))return false`)
|
|
1657
|
+
} else {
|
|
1658
|
+
const fns = branchBodies.map(body => `function(_av){${body}}`)
|
|
1659
|
+
lines.push(`{const _af${fi}=[${fns.join(',')}];let _am${fi}=false;for(let _ai=0;_ai<_af${fi}.length;_ai++){if(_af${fi}[_ai](${v})){_am${fi}=true;break}}if(!_am${fi})return false}`)
|
|
1618
1660
|
}
|
|
1619
|
-
const fi = ctx.varCounter++
|
|
1620
|
-
lines.push(`{const _af${fi}=[${fns.join(',')}];let _am${fi}=false;for(let _ai=0;_ai<_af${fi}.length;_ai++){if(_af${fi}[_ai](${v})){_am${fi}=true;break}}if(!_am${fi})return false}`)
|
|
1621
1661
|
}
|
|
1622
1662
|
|
|
1623
|
-
// oneOf
|
|
1663
|
+
// oneOf — branch fns hoisted to factory scope when safe (no recursion/ref).
|
|
1664
|
+
// Falls back to inline closures if any branch touches recursive validation.
|
|
1624
1665
|
if (schema.oneOf) {
|
|
1625
|
-
const
|
|
1666
|
+
const fi = ctx.varCounter++
|
|
1667
|
+
const branchBodies = []
|
|
1668
|
+
let canHoist = !!ctx.preamble
|
|
1626
1669
|
for (let i = 0; i < schema.oneOf.length; i++) {
|
|
1627
1670
|
const subLines = []
|
|
1628
1671
|
genCode(schema.oneOf[i], '_ov', subLines, ctx)
|
|
1629
|
-
|
|
1630
|
-
|
|
1631
|
-
|
|
1632
|
-
|
|
1633
|
-
|
|
1672
|
+
const body = subLines.length === 0 ? 'return true' : `${subLines.join(';')};return true`
|
|
1673
|
+
// _validate is the recursive entry — hoisting branches above it breaks scope.
|
|
1674
|
+
if (/\b_validate\b/.test(body)) canHoist = false
|
|
1675
|
+
branchBodies.push(body)
|
|
1676
|
+
}
|
|
1677
|
+
if (canHoist) {
|
|
1678
|
+
const names = branchBodies.map((body, i) => {
|
|
1679
|
+
const name = `_of${fi}_b${i}`
|
|
1680
|
+
ctx.preamble.push(`function ${name}(_ov){${body}}`)
|
|
1681
|
+
return name
|
|
1682
|
+
})
|
|
1683
|
+
const calls = names.map(n => `if(${n}(${v})){_oc${fi}++;if(_oc${fi}>1)return false}`).join(';')
|
|
1684
|
+
lines.push(`{let _oc${fi}=0;${calls};if(_oc${fi}!==1)return false}`)
|
|
1685
|
+
} else {
|
|
1686
|
+
const fns = branchBodies.map(body => `function(_ov){${body}}`)
|
|
1687
|
+
lines.push(`{const _of${fi}=[${fns.join(',')}];let _oc${fi}=0;for(let _oi=0;_oi<_of${fi}.length;_oi++){if(_of${fi}[_oi](${v}))_oc${fi}++;if(_oc${fi}>1)return false}if(_oc${fi}!==1)return false}`)
|
|
1634
1688
|
}
|
|
1635
|
-
const fi = ctx.varCounter++
|
|
1636
|
-
lines.push(`{const _of${fi}=[${fns.join(',')}];let _oc${fi}=0;for(let _oi=0;_oi<_of${fi}.length;_oi++){if(_of${fi}[_oi](${v}))_oc${fi}++;if(_oc${fi}>1)return false}if(_oc${fi}!==1)return false}`)
|
|
1637
1689
|
}
|
|
1638
1690
|
|
|
1639
1691
|
// not
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "ata-validator",
|
|
3
|
-
"version": "0.12.
|
|
3
|
+
"version": "0.12.3",
|
|
4
4
|
"description": "Ultra-fast JSON Schema validator. 5x faster validation, 159,000x faster compilation. Works without native addon. Cross-schema $ref, Draft 2020-12 + Draft 7, V8-optimized JS codegen, simdjson, RE2, multi-core. Standard Schema V1 compatible.",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"module": "index.mjs",
|
|
Binary file
|
package/src/ata.cpp
CHANGED
|
@@ -391,13 +391,29 @@ struct od_plan {
|
|
|
391
391
|
#ifndef ATA_NO_RE2
|
|
392
392
|
re2::RE2* pattern = nullptr; // borrowed pointer from schema_node
|
|
393
393
|
#endif
|
|
394
|
+
// Inline digit pattern: ^[0-9]+$ / ^[0-9]{N}$ / ^[0-9]{N,M}$ / ^\d+$ etc.
|
|
395
|
+
// When set, supersedes the RE2 pattern call.
|
|
396
|
+
struct digit_pattern_t { uint32_t min_len; uint32_t max_len; };
|
|
397
|
+
std::optional<digit_pattern_t> digit_pattern;
|
|
394
398
|
uint8_t format_id = 255; // 255 = no format check
|
|
395
399
|
|
|
396
400
|
// Object — single iterate with merged required+property lookup
|
|
401
|
+
// fast_kind allows the obj iterator to skip the recursive od_exec_plan call
|
|
402
|
+
// (and its inner type().get() + switch) for primitive sub-plans, dispatching
|
|
403
|
+
// straight to inline constraint code.
|
|
404
|
+
enum class fast_kind : uint8_t {
|
|
405
|
+
OTHER = 0, // recurse via od_exec_plan
|
|
406
|
+
INTEGER, // value.get(int64) + numeric range/multipleOf
|
|
407
|
+
STRING, // value.get(string) + length/pattern/format/enum
|
|
408
|
+
BOOLEAN, // value.get(bool)
|
|
409
|
+
OBJECT, // value.get(object) + nested obj iteration (skips type detect)
|
|
410
|
+
ARRAY, // value.get(array) + element iteration (skips type detect)
|
|
411
|
+
};
|
|
397
412
|
struct prop_entry {
|
|
398
413
|
std::string key;
|
|
399
414
|
int required_idx = -1; // bit index for required tracking, or -1
|
|
400
415
|
std::shared_ptr<od_plan> sub; // property sub-plan, or nullptr
|
|
416
|
+
fast_kind fk = fast_kind::OTHER; // inline dispatch hint, set at compile time
|
|
401
417
|
};
|
|
402
418
|
struct obj_plan {
|
|
403
419
|
std::vector<prop_entry> entries; // merged required + properties — single scan
|
|
@@ -414,6 +430,17 @@ struct od_plan {
|
|
|
414
430
|
};
|
|
415
431
|
std::shared_ptr<arr_plan> array;
|
|
416
432
|
|
|
433
|
+
// Enum: primitive-only set membership. Complex (object/array) enums fall back.
|
|
434
|
+
struct enum_constraint {
|
|
435
|
+
std::vector<std::string> strings; // unescaped string values
|
|
436
|
+
std::vector<int64_t> integers;
|
|
437
|
+
std::vector<double> doubles;
|
|
438
|
+
bool has_null = false;
|
|
439
|
+
bool has_true = false;
|
|
440
|
+
bool has_false = false;
|
|
441
|
+
};
|
|
442
|
+
std::shared_ptr<enum_constraint> enum_check;
|
|
443
|
+
|
|
417
444
|
// If false, schema uses unsupported features — must fall back to DOM path.
|
|
418
445
|
bool supported = true;
|
|
419
446
|
};
|
|
@@ -1360,8 +1387,10 @@ static void validate_node(const schema_node_ptr& node,
|
|
|
1360
1387
|
expected += json_type_name(static_cast<json_type>(b));
|
|
1361
1388
|
}
|
|
1362
1389
|
}
|
|
1390
|
+
std::string actual = std::string(type_of_sv(value));
|
|
1363
1391
|
errors.push_back({error_code::type_mismatch, path,
|
|
1364
|
-
"expected type " + expected + ", got " +
|
|
1392
|
+
"expected type " + expected + ", got " + actual,
|
|
1393
|
+
expected, actual});
|
|
1365
1394
|
ATA_CHECK_EARLY();
|
|
1366
1395
|
}
|
|
1367
1396
|
}
|
|
@@ -2399,6 +2428,59 @@ static simdjson::padded_string_view get_free_padded_view(
|
|
|
2399
2428
|
return simdjson::padded_string_view(data, length, length + REQUIRED_PADDING);
|
|
2400
2429
|
}
|
|
2401
2430
|
|
|
2431
|
+
// Recognize common digit-only regex patterns: ^[0-9]+$, ^[0-9]{N}$, ^[0-9]{N,M}$
|
|
2432
|
+
// (and \d variants). Returns true if recognized, fills min_n/max_n.
|
|
2433
|
+
static bool parse_digit_pattern(const std::string& p,
|
|
2434
|
+
uint32_t& min_n, uint32_t& max_n) {
|
|
2435
|
+
if (p.size() < 6) return false;
|
|
2436
|
+
if (p.front() != '^' || p.back() != '$') return false;
|
|
2437
|
+
std::string_view body(p.data() + 1, p.size() - 2);
|
|
2438
|
+
size_t pos = 0;
|
|
2439
|
+
if (body.size() >= 5 && body.compare(0, 5, "[0-9]") == 0) {
|
|
2440
|
+
pos = 5;
|
|
2441
|
+
} else if (body.size() >= 2 && body[0] == '\\' && body[1] == 'd') {
|
|
2442
|
+
pos = 2;
|
|
2443
|
+
} else {
|
|
2444
|
+
return false;
|
|
2445
|
+
}
|
|
2446
|
+
if (pos >= body.size()) return false;
|
|
2447
|
+
char q = body[pos];
|
|
2448
|
+
if (q == '+' && pos + 1 == body.size()) {
|
|
2449
|
+
min_n = 1; max_n = UINT32_MAX; return true;
|
|
2450
|
+
}
|
|
2451
|
+
if (q == '*' && pos + 1 == body.size()) {
|
|
2452
|
+
min_n = 0; max_n = UINT32_MAX; return true;
|
|
2453
|
+
}
|
|
2454
|
+
if (q != '{') return false;
|
|
2455
|
+
size_t close = body.find('}', pos);
|
|
2456
|
+
if (close == std::string_view::npos || close + 1 != body.size()) return false;
|
|
2457
|
+
auto inner = body.substr(pos + 1, close - pos - 1);
|
|
2458
|
+
if (inner.empty()) return false;
|
|
2459
|
+
size_t comma = inner.find(',');
|
|
2460
|
+
auto parse_num = [](std::string_view s, uint32_t& out) -> bool {
|
|
2461
|
+
if (s.empty()) return false;
|
|
2462
|
+
uint64_t v = 0;
|
|
2463
|
+
for (char c : s) {
|
|
2464
|
+
if (c < '0' || c > '9') return false;
|
|
2465
|
+
v = v * 10 + (c - '0');
|
|
2466
|
+
if (v > UINT32_MAX) return false;
|
|
2467
|
+
}
|
|
2468
|
+
out = static_cast<uint32_t>(v);
|
|
2469
|
+
return true;
|
|
2470
|
+
};
|
|
2471
|
+
if (comma == std::string_view::npos) {
|
|
2472
|
+
if (!parse_num(inner, min_n)) return false;
|
|
2473
|
+
max_n = min_n;
|
|
2474
|
+
} else {
|
|
2475
|
+
auto a = inner.substr(0, comma);
|
|
2476
|
+
auto b = inner.substr(comma + 1);
|
|
2477
|
+
if (!parse_num(a, min_n)) return false;
|
|
2478
|
+
if (b.empty()) max_n = UINT32_MAX;
|
|
2479
|
+
else if (!parse_num(b, max_n)) return false;
|
|
2480
|
+
}
|
|
2481
|
+
return true;
|
|
2482
|
+
}
|
|
2483
|
+
|
|
2402
2484
|
// Build an od_plan from a schema_node tree.
|
|
2403
2485
|
static od_plan_ptr compile_od_plan(const schema_node_ptr& node) {
|
|
2404
2486
|
if (!node) return nullptr;
|
|
@@ -2412,7 +2494,6 @@ static od_plan_ptr compile_od_plan(const schema_node_ptr& node) {
|
|
|
2412
2494
|
|
|
2413
2495
|
// Unsupported features → fall back to DOM
|
|
2414
2496
|
if (!node->ref.empty() ||
|
|
2415
|
-
!node->enum_values_minified.empty() ||
|
|
2416
2497
|
node->const_value_raw.has_value() ||
|
|
2417
2498
|
node->unique_items ||
|
|
2418
2499
|
!node->all_of.empty() ||
|
|
@@ -2431,6 +2512,47 @@ static od_plan_ptr compile_od_plan(const schema_node_ptr& node) {
|
|
|
2431
2512
|
return plan;
|
|
2432
2513
|
}
|
|
2433
2514
|
|
|
2515
|
+
// Enum: handle primitive enums on the on-demand path. Complex enum values
|
|
2516
|
+
// (objects, arrays, escaped strings) fall back to DOM.
|
|
2517
|
+
if (!node->enum_values_minified.empty()) {
|
|
2518
|
+
auto ec = std::make_shared<od_plan::enum_constraint>();
|
|
2519
|
+
for (const auto& ev : node->enum_values_minified) {
|
|
2520
|
+
if (ev.empty()) { plan->supported = false; return plan; }
|
|
2521
|
+
char c = ev[0];
|
|
2522
|
+
if (c == '"') {
|
|
2523
|
+
if (ev.size() < 2 || ev.back() != '"') { plan->supported = false; return plan; }
|
|
2524
|
+
bool has_escape = false;
|
|
2525
|
+
for (size_t i = 1; i + 1 < ev.size(); i++) {
|
|
2526
|
+
if (ev[i] == '\\') { has_escape = true; break; }
|
|
2527
|
+
}
|
|
2528
|
+
if (has_escape) { plan->supported = false; return plan; }
|
|
2529
|
+
ec->strings.push_back(ev.substr(1, ev.size() - 2));
|
|
2530
|
+
} else if (c == '-' || (c >= '0' && c <= '9')) {
|
|
2531
|
+
bool is_int = true;
|
|
2532
|
+
for (size_t i = (c == '-' ? 1 : 0); i < ev.size(); i++) {
|
|
2533
|
+
if (ev[i] < '0' || ev[i] > '9') { is_int = false; break; }
|
|
2534
|
+
}
|
|
2535
|
+
if (is_int) {
|
|
2536
|
+
try { ec->integers.push_back(std::stoll(ev)); }
|
|
2537
|
+
catch (...) { plan->supported = false; return plan; }
|
|
2538
|
+
} else {
|
|
2539
|
+
try { ec->doubles.push_back(std::stod(ev)); }
|
|
2540
|
+
catch (...) { plan->supported = false; return plan; }
|
|
2541
|
+
}
|
|
2542
|
+
} else if (ev == "true") {
|
|
2543
|
+
ec->has_true = true;
|
|
2544
|
+
} else if (ev == "false") {
|
|
2545
|
+
ec->has_false = true;
|
|
2546
|
+
} else if (ev == "null") {
|
|
2547
|
+
ec->has_null = true;
|
|
2548
|
+
} else {
|
|
2549
|
+
plan->supported = false;
|
|
2550
|
+
return plan;
|
|
2551
|
+
}
|
|
2552
|
+
}
|
|
2553
|
+
plan->enum_check = std::move(ec);
|
|
2554
|
+
}
|
|
2555
|
+
|
|
2434
2556
|
plan->type_mask = node->type_mask;
|
|
2435
2557
|
if (node->minimum) { plan->num_flags |= od_plan::HAS_MIN; plan->num_min = *node->minimum; }
|
|
2436
2558
|
if (node->maximum) { plan->num_flags |= od_plan::HAS_MAX; plan->num_max = *node->maximum; }
|
|
@@ -2442,6 +2564,16 @@ static od_plan_ptr compile_od_plan(const schema_node_ptr& node) {
|
|
|
2442
2564
|
#ifndef ATA_NO_RE2
|
|
2443
2565
|
plan->pattern = node->compiled_pattern.get();
|
|
2444
2566
|
#endif
|
|
2567
|
+
// Try to recognize the pattern as an inlined digit check; if so, skip RE2.
|
|
2568
|
+
if (node->pattern.has_value()) {
|
|
2569
|
+
uint32_t min_n, max_n;
|
|
2570
|
+
if (parse_digit_pattern(*node->pattern, min_n, max_n)) {
|
|
2571
|
+
plan->digit_pattern = od_plan::digit_pattern_t{min_n, max_n};
|
|
2572
|
+
#ifndef ATA_NO_RE2
|
|
2573
|
+
plan->pattern = nullptr;
|
|
2574
|
+
#endif
|
|
2575
|
+
}
|
|
2576
|
+
}
|
|
2445
2577
|
plan->format_id = node->format_id;
|
|
2446
2578
|
|
|
2447
2579
|
// Object plan — build hash lookup for O(1) per-field dispatch
|
|
@@ -2477,7 +2609,46 @@ static od_plan_ptr compile_od_plan(const schema_node_ptr& node) {
|
|
|
2477
2609
|
op->entries[it->second].sub = std::move(sub);
|
|
2478
2610
|
} else {
|
|
2479
2611
|
key_to_idx[key] = op->entries.size();
|
|
2480
|
-
op->entries.push_back({key, -1, std::move(sub)});
|
|
2612
|
+
op->entries.push_back({key, -1, std::move(sub), od_plan::fast_kind::OTHER});
|
|
2613
|
+
}
|
|
2614
|
+
}
|
|
2615
|
+
// Compute fast_kind for each entry post-hoc — lets the obj iterator
|
|
2616
|
+
// skip the recursive od_exec_plan call (and its type().get + switch)
|
|
2617
|
+
// for primitive properties, and also skip type detection for nested
|
|
2618
|
+
// objects / primitive arrays.
|
|
2619
|
+
for (auto& e : op->entries) {
|
|
2620
|
+
if (!e.sub) continue;
|
|
2621
|
+
// Nested object: dispatch directly to value.get(obj) + iteration helper.
|
|
2622
|
+
if (e.sub->object && !e.sub->array) {
|
|
2623
|
+
e.fk = od_plan::fast_kind::OBJECT;
|
|
2624
|
+
continue;
|
|
2625
|
+
}
|
|
2626
|
+
// Array: dispatch directly to value.get(arr) + element iteration helper.
|
|
2627
|
+
if (e.sub->array && !e.sub->object) {
|
|
2628
|
+
e.fk = od_plan::fast_kind::ARRAY;
|
|
2629
|
+
continue;
|
|
2630
|
+
}
|
|
2631
|
+
if (e.sub->object || e.sub->array) continue; // unusual: both, skip
|
|
2632
|
+
uint8_t m = e.sub->type_mask;
|
|
2633
|
+
uint8_t s_bit = json_type_bit(json_type::string);
|
|
2634
|
+
uint8_t i_bit = json_type_bit(json_type::integer);
|
|
2635
|
+
uint8_t n_bit = json_type_bit(json_type::number);
|
|
2636
|
+
uint8_t b_bit = json_type_bit(json_type::boolean);
|
|
2637
|
+
if (m == s_bit) e.fk = od_plan::fast_kind::STRING;
|
|
2638
|
+
else if (m == i_bit || m == (i_bit | n_bit)) e.fk = od_plan::fast_kind::INTEGER;
|
|
2639
|
+
else if (m == b_bit) e.fk = od_plan::fast_kind::BOOLEAN;
|
|
2640
|
+
else if (m == 0 && e.sub->enum_check) {
|
|
2641
|
+
// Untyped enum: infer from enum value shapes
|
|
2642
|
+
auto& ec = *e.sub->enum_check;
|
|
2643
|
+
bool only_str = !ec.strings.empty() && ec.integers.empty() && ec.doubles.empty()
|
|
2644
|
+
&& !ec.has_null && !ec.has_true && !ec.has_false;
|
|
2645
|
+
bool only_int = ec.strings.empty() && !ec.integers.empty() && ec.doubles.empty()
|
|
2646
|
+
&& !ec.has_null && !ec.has_true && !ec.has_false;
|
|
2647
|
+
bool only_bool = ec.strings.empty() && ec.integers.empty() && ec.doubles.empty()
|
|
2648
|
+
&& !ec.has_null && (ec.has_true || ec.has_false);
|
|
2649
|
+
if (only_str) e.fk = od_plan::fast_kind::STRING;
|
|
2650
|
+
else if (only_int) e.fk = od_plan::fast_kind::INTEGER;
|
|
2651
|
+
else if (only_bool) e.fk = od_plan::fast_kind::BOOLEAN;
|
|
2481
2652
|
}
|
|
2482
2653
|
}
|
|
2483
2654
|
plan->object = std::move(op);
|
|
@@ -2517,11 +2688,18 @@ static inline uint64_t utf8_length_fast(std::string_view s) {
|
|
|
2517
2688
|
|
|
2518
2689
|
// Execute an od_plan against a simdjson On-Demand value.
|
|
2519
2690
|
// Each value consumed exactly once. Uses simdjson types directly — no od_type() overhead.
|
|
2520
|
-
|
|
2521
|
-
|
|
2691
|
+
// `pre_type` is an optional caller-supplied type hint; when set, the value.type().get
|
|
2692
|
+
// simdjson call is skipped (saves ~3-5 ns per recursion). Used by fast_kind::OBJECT
|
|
2693
|
+
// / ARRAY dispatch where the type is already established at compile time.
|
|
2694
|
+
static bool od_exec_plan(const od_plan& plan, simdjson::ondemand::value value,
|
|
2695
|
+
std::optional<simdjson::ondemand::json_type> pre_type = std::nullopt) {
|
|
2522
2696
|
using sjt = simdjson::ondemand::json_type;
|
|
2523
2697
|
sjt st;
|
|
2524
|
-
if (
|
|
2698
|
+
if (pre_type) {
|
|
2699
|
+
st = *pre_type;
|
|
2700
|
+
} else {
|
|
2701
|
+
if (value.type().get(st) != SUCCESS) return false;
|
|
2702
|
+
}
|
|
2525
2703
|
|
|
2526
2704
|
// Type check using simdjson type directly
|
|
2527
2705
|
if (plan.type_mask) {
|
|
@@ -2552,11 +2730,12 @@ static bool od_exec_plan(const od_plan& plan, simdjson::ondemand::value value) {
|
|
|
2552
2730
|
|
|
2553
2731
|
switch (st) {
|
|
2554
2732
|
case sjt::number: {
|
|
2555
|
-
|
|
2733
|
+
bool need_value = plan.num_flags || plan.enum_check;
|
|
2734
|
+
if (!need_value) break;
|
|
2556
2735
|
double v;
|
|
2557
|
-
// Try integer first (more common), fall back to double
|
|
2558
2736
|
int64_t iv;
|
|
2559
|
-
|
|
2737
|
+
bool got_int = (value.get(iv) == SUCCESS);
|
|
2738
|
+
if (got_int) {
|
|
2560
2739
|
v = static_cast<double>(iv);
|
|
2561
2740
|
} else if (value.get(v) != SUCCESS) {
|
|
2562
2741
|
return false;
|
|
@@ -2570,6 +2749,20 @@ static bool od_exec_plan(const od_plan& plan, simdjson::ondemand::value value) {
|
|
|
2570
2749
|
double r = std::fmod(v, plan.num_mul);
|
|
2571
2750
|
if (std::abs(r) > 1e-8 && std::abs(r - plan.num_mul) > 1e-8) return false;
|
|
2572
2751
|
}
|
|
2752
|
+
if (plan.enum_check) {
|
|
2753
|
+
auto& ec = *plan.enum_check;
|
|
2754
|
+
bool match = false;
|
|
2755
|
+
if (got_int) {
|
|
2756
|
+
for (auto i : ec.integers) if (i == iv) { match = true; break; }
|
|
2757
|
+
}
|
|
2758
|
+
if (!match) {
|
|
2759
|
+
for (auto d : ec.doubles) if (d == v) { match = true; break; }
|
|
2760
|
+
}
|
|
2761
|
+
if (!match && got_int) {
|
|
2762
|
+
for (auto d : ec.doubles) if (d == v) { match = true; break; }
|
|
2763
|
+
}
|
|
2764
|
+
if (!match) return false;
|
|
2765
|
+
}
|
|
2573
2766
|
break;
|
|
2574
2767
|
}
|
|
2575
2768
|
case sjt::string: {
|
|
@@ -2580,8 +2773,16 @@ static bool od_exec_plan(const od_plan& plan, simdjson::ondemand::value value) {
|
|
|
2580
2773
|
if (plan.min_length && len < *plan.min_length) return false;
|
|
2581
2774
|
if (plan.max_length && len > *plan.max_length) return false;
|
|
2582
2775
|
}
|
|
2776
|
+
if (plan.digit_pattern) {
|
|
2777
|
+
auto& dp = *plan.digit_pattern;
|
|
2778
|
+
if (sv.size() < dp.min_len || sv.size() > dp.max_len) return false;
|
|
2779
|
+
const uint8_t* p = reinterpret_cast<const uint8_t*>(sv.data());
|
|
2780
|
+
for (size_t i = 0, n = sv.size(); i < n; i++) {
|
|
2781
|
+
if (p[i] < '0' || p[i] > '9') return false;
|
|
2782
|
+
}
|
|
2783
|
+
}
|
|
2583
2784
|
#ifndef ATA_NO_RE2
|
|
2584
|
-
if (plan.pattern) {
|
|
2785
|
+
else if (plan.pattern) {
|
|
2585
2786
|
if (!re2::RE2::PartialMatch(re2::StringPiece(sv.data(), sv.size()), *plan.pattern))
|
|
2586
2787
|
return false;
|
|
2587
2788
|
}
|
|
@@ -2589,6 +2790,17 @@ static bool od_exec_plan(const od_plan& plan, simdjson::ondemand::value value) {
|
|
|
2589
2790
|
if (plan.format_id != 255) {
|
|
2590
2791
|
if (!check_format_by_id(sv, plan.format_id)) return false;
|
|
2591
2792
|
}
|
|
2793
|
+
if (plan.enum_check) {
|
|
2794
|
+
auto& ec = *plan.enum_check;
|
|
2795
|
+
bool match = false;
|
|
2796
|
+
for (auto& s : ec.strings) {
|
|
2797
|
+
if (sv.size() == s.size() && std::memcmp(sv.data(), s.data(), s.size()) == 0) {
|
|
2798
|
+
match = true;
|
|
2799
|
+
break;
|
|
2800
|
+
}
|
|
2801
|
+
}
|
|
2802
|
+
if (!match) return false;
|
|
2803
|
+
}
|
|
2592
2804
|
break;
|
|
2593
2805
|
}
|
|
2594
2806
|
case sjt::object: {
|
|
@@ -2601,25 +2813,140 @@ static bool od_exec_plan(const od_plan& plan, simdjson::ondemand::value value) {
|
|
|
2601
2813
|
uint64_t prop_count = 0;
|
|
2602
2814
|
|
|
2603
2815
|
for (auto field : obj) {
|
|
2604
|
-
|
|
2605
|
-
|
|
2816
|
+
// Fast key compare: use raw_json_string against schema keys directly,
|
|
2817
|
+
// skipping the unescape pass. Schema keys are assumed to have no
|
|
2818
|
+
// unescaped quotes (true for any well-formed JSON Schema).
|
|
2819
|
+
simdjson::ondemand::raw_json_string raw_key;
|
|
2820
|
+
if (field.key().get(raw_key) != SUCCESS) continue;
|
|
2606
2821
|
prop_count++;
|
|
2607
2822
|
|
|
2608
|
-
// Single merged scan: required + property in one pass
|
|
2609
2823
|
bool matched = false;
|
|
2610
2824
|
for (auto& e : op.entries) {
|
|
2611
|
-
if (
|
|
2825
|
+
if (raw_key.unsafe_is_equal(e.key)) {
|
|
2612
2826
|
if (e.required_idx >= 0)
|
|
2613
2827
|
required_found |= (1ULL << e.required_idx);
|
|
2614
2828
|
if (e.sub) {
|
|
2615
|
-
|
|
2616
|
-
|
|
2617
|
-
|
|
2829
|
+
// Fast dispatch: skip recursive od_exec_plan + type().get + switch
|
|
2830
|
+
// for primitive sub-plans. Mirrors the constraint code in od_exec_plan.
|
|
2831
|
+
switch (e.fk) {
|
|
2832
|
+
case od_plan::fast_kind::INTEGER: {
|
|
2833
|
+
int64_t iv;
|
|
2834
|
+
if (field.value().get(iv) != SUCCESS) return false;
|
|
2835
|
+
auto& sub = *e.sub;
|
|
2836
|
+
uint8_t f = sub.num_flags;
|
|
2837
|
+
double v = static_cast<double>(iv);
|
|
2838
|
+
if ((f & od_plan::HAS_MIN) && v < sub.num_min) return false;
|
|
2839
|
+
if ((f & od_plan::HAS_MAX) && v > sub.num_max) return false;
|
|
2840
|
+
if ((f & od_plan::HAS_EX_MIN) && v <= sub.num_ex_min) return false;
|
|
2841
|
+
if ((f & od_plan::HAS_EX_MAX) && v >= sub.num_ex_max) return false;
|
|
2842
|
+
if (f & od_plan::HAS_MUL) {
|
|
2843
|
+
double r = std::fmod(v, sub.num_mul);
|
|
2844
|
+
if (std::abs(r) > 1e-8 && std::abs(r - sub.num_mul) > 1e-8) return false;
|
|
2845
|
+
}
|
|
2846
|
+
if (sub.enum_check) {
|
|
2847
|
+
bool em = false;
|
|
2848
|
+
for (auto i2 : sub.enum_check->integers) if (i2 == iv) { em = true; break; }
|
|
2849
|
+
if (!em) for (auto d : sub.enum_check->doubles) if (d == v) { em = true; break; }
|
|
2850
|
+
if (!em) return false;
|
|
2851
|
+
}
|
|
2852
|
+
break;
|
|
2853
|
+
}
|
|
2854
|
+
case od_plan::fast_kind::STRING: {
|
|
2855
|
+
std::string_view sv;
|
|
2856
|
+
if (field.value().get(sv) != SUCCESS) return false;
|
|
2857
|
+
auto& sub = *e.sub;
|
|
2858
|
+
if (sub.min_length || sub.max_length) {
|
|
2859
|
+
uint64_t len = utf8_length_fast(sv);
|
|
2860
|
+
if (sub.min_length && len < *sub.min_length) return false;
|
|
2861
|
+
if (sub.max_length && len > *sub.max_length) return false;
|
|
2862
|
+
}
|
|
2863
|
+
if (sub.digit_pattern) {
|
|
2864
|
+
auto& dp = *sub.digit_pattern;
|
|
2865
|
+
if (sv.size() < dp.min_len || sv.size() > dp.max_len) return false;
|
|
2866
|
+
const uint8_t* sp = reinterpret_cast<const uint8_t*>(sv.data());
|
|
2867
|
+
for (size_t i = 0, n = sv.size(); i < n; i++) {
|
|
2868
|
+
if (sp[i] < '0' || sp[i] > '9') return false;
|
|
2869
|
+
}
|
|
2870
|
+
}
|
|
2871
|
+
#ifndef ATA_NO_RE2
|
|
2872
|
+
else if (sub.pattern) {
|
|
2873
|
+
if (!re2::RE2::PartialMatch(re2::StringPiece(sv.data(), sv.size()), *sub.pattern))
|
|
2874
|
+
return false;
|
|
2875
|
+
}
|
|
2876
|
+
#endif
|
|
2877
|
+
if (sub.format_id != 255) {
|
|
2878
|
+
if (!check_format_by_id(sv, sub.format_id)) return false;
|
|
2879
|
+
}
|
|
2880
|
+
if (sub.enum_check) {
|
|
2881
|
+
bool em = false;
|
|
2882
|
+
for (auto& s : sub.enum_check->strings) {
|
|
2883
|
+
if (sv.size() == s.size() && std::memcmp(sv.data(), s.data(), s.size()) == 0) {
|
|
2884
|
+
em = true;
|
|
2885
|
+
break;
|
|
2886
|
+
}
|
|
2887
|
+
}
|
|
2888
|
+
if (!em) return false;
|
|
2889
|
+
}
|
|
2890
|
+
break;
|
|
2891
|
+
}
|
|
2892
|
+
case od_plan::fast_kind::BOOLEAN: {
|
|
2893
|
+
bool bv;
|
|
2894
|
+
if (field.value().get(bv) != SUCCESS) return false;
|
|
2895
|
+
if (e.sub->enum_check) {
|
|
2896
|
+
if (bv ? !e.sub->enum_check->has_true : !e.sub->enum_check->has_false) return false;
|
|
2897
|
+
}
|
|
2898
|
+
break;
|
|
2899
|
+
}
|
|
2900
|
+
case od_plan::fast_kind::OBJECT: {
|
|
2901
|
+
simdjson::ondemand::value fv;
|
|
2902
|
+
if (field.value().get(fv) != SUCCESS) return false;
|
|
2903
|
+
if (!od_exec_plan(*e.sub, fv,
|
|
2904
|
+
simdjson::ondemand::json_type::object)) return false;
|
|
2905
|
+
break;
|
|
2906
|
+
}
|
|
2907
|
+
case od_plan::fast_kind::ARRAY: {
|
|
2908
|
+
simdjson::ondemand::value fv;
|
|
2909
|
+
if (field.value().get(fv) != SUCCESS) return false;
|
|
2910
|
+
if (!od_exec_plan(*e.sub, fv,
|
|
2911
|
+
simdjson::ondemand::json_type::array)) return false;
|
|
2912
|
+
break;
|
|
2913
|
+
}
|
|
2914
|
+
case od_plan::fast_kind::OTHER:
|
|
2915
|
+
default: {
|
|
2916
|
+
simdjson::ondemand::value fv;
|
|
2917
|
+
if (field.value().get(fv) != SUCCESS) return false;
|
|
2918
|
+
if (!od_exec_plan(*e.sub, fv)) return false;
|
|
2919
|
+
}
|
|
2920
|
+
}
|
|
2618
2921
|
}
|
|
2619
2922
|
matched = true;
|
|
2620
2923
|
break;
|
|
2621
2924
|
}
|
|
2622
2925
|
}
|
|
2926
|
+
// Safety net: if no match via raw byte compare, the JSON key may be
|
|
2927
|
+
// escaped (e.g., "aname"). Fall back to a properly-unescaped
|
|
2928
|
+
// compare so escaped keys still match the unescaped schema keys.
|
|
2929
|
+
if (!matched) {
|
|
2930
|
+
std::string_view raw_view = field.escaped_key();
|
|
2931
|
+
if (raw_view.find('\\') != std::string_view::npos) {
|
|
2932
|
+
std::string_view ukey;
|
|
2933
|
+
if (field.unescaped_key().get(ukey) == SUCCESS) {
|
|
2934
|
+
for (auto& e : op.entries) {
|
|
2935
|
+
if (ukey == e.key) {
|
|
2936
|
+
if (e.required_idx >= 0)
|
|
2937
|
+
required_found |= (1ULL << e.required_idx);
|
|
2938
|
+
if (e.sub) {
|
|
2939
|
+
simdjson::ondemand::value fv;
|
|
2940
|
+
if (field.value().get(fv) != SUCCESS) return false;
|
|
2941
|
+
if (!od_exec_plan(*e.sub, fv)) return false;
|
|
2942
|
+
}
|
|
2943
|
+
matched = true;
|
|
2944
|
+
break;
|
|
2945
|
+
}
|
|
2946
|
+
}
|
|
2947
|
+
}
|
|
2948
|
+
}
|
|
2949
|
+
}
|
|
2623
2950
|
if (!matched && op.no_additional) return false;
|
|
2624
2951
|
}
|
|
2625
2952
|
|
|
@@ -2647,6 +2974,18 @@ static bool od_exec_plan(const od_plan& plan, simdjson::ondemand::value value) {
|
|
|
2647
2974
|
if (ap.max_items && count > *ap.max_items) return false;
|
|
2648
2975
|
break;
|
|
2649
2976
|
}
|
|
2977
|
+
case sjt::boolean: {
|
|
2978
|
+
if (plan.enum_check) {
|
|
2979
|
+
bool b;
|
|
2980
|
+
if (value.get(b) != SUCCESS) return false;
|
|
2981
|
+
if (b ? !plan.enum_check->has_true : !plan.enum_check->has_false) return false;
|
|
2982
|
+
}
|
|
2983
|
+
break;
|
|
2984
|
+
}
|
|
2985
|
+
case sjt::null: {
|
|
2986
|
+
if (plan.enum_check && !plan.enum_check->has_null) return false;
|
|
2987
|
+
break;
|
|
2988
|
+
}
|
|
2650
2989
|
default:
|
|
2651
2990
|
break;
|
|
2652
2991
|
}
|