ata-validator 0.4.8 → 0.4.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding/ata_napi.cpp +27 -34
- package/binding.gyp +2 -2
- package/index.js +204 -0
- package/lib/js-compiler.js +7 -3
- package/package.json +1 -1
- package/src/ata.cpp +334 -202
package/binding/ata_napi.cpp
CHANGED
|
@@ -28,7 +28,7 @@ using schema_node_ptr = std::shared_ptr<schema_node>;
|
|
|
28
28
|
|
|
29
29
|
// MUST match layout in src/ata.cpp exactly (reinterpret_cast)
|
|
30
30
|
struct schema_node {
|
|
31
|
-
|
|
31
|
+
uint8_t type_mask = 0;
|
|
32
32
|
|
|
33
33
|
std::optional<double> minimum;
|
|
34
34
|
std::optional<double> maximum;
|
|
@@ -67,11 +67,11 @@ struct schema_node {
|
|
|
67
67
|
};
|
|
68
68
|
std::vector<pattern_prop> pattern_properties;
|
|
69
69
|
|
|
70
|
-
std::optional<std::string> enum_values_raw;
|
|
71
70
|
std::vector<std::string> enum_values_minified;
|
|
72
71
|
std::optional<std::string> const_value_raw;
|
|
73
72
|
|
|
74
73
|
std::optional<std::string> format;
|
|
74
|
+
uint8_t format_id = 255;
|
|
75
75
|
|
|
76
76
|
std::vector<schema_node_ptr> all_of;
|
|
77
77
|
std::vector<schema_node_ptr> any_of;
|
|
@@ -413,46 +413,39 @@ static void validate_napi(const schema_node_ptr& node,
|
|
|
413
413
|
|
|
414
414
|
auto actual_type = napi_type_of(value);
|
|
415
415
|
|
|
416
|
-
// type
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
if (
|
|
416
|
+
// type — uses bitmask matching ata.cpp json_type enum order:
|
|
417
|
+
// 0=string, 1=number, 2=integer, 3=boolean, 4=null_value, 5=object, 6=array
|
|
418
|
+
if (node->type_mask) {
|
|
419
|
+
uint8_t val_bits = 0;
|
|
420
|
+
if (actual_type == "string") val_bits = 1u << 0;
|
|
421
|
+
else if (actual_type == "number") val_bits = 1u << 1;
|
|
422
|
+
else if (actual_type == "integer") val_bits = (1u << 2) | (1u << 1); // integer matches number
|
|
423
|
+
else if (actual_type == "boolean") val_bits = 1u << 3;
|
|
424
|
+
else if (actual_type == "null") val_bits = 1u << 4;
|
|
425
|
+
else if (actual_type == "object") val_bits = 1u << 5;
|
|
426
|
+
else if (actual_type == "array") val_bits = 1u << 6;
|
|
427
|
+
if (!(val_bits & node->type_mask)) {
|
|
428
|
+
static const char* type_names[] = {"string","number","integer","boolean","null","object","array"};
|
|
426
429
|
std::string expected;
|
|
427
|
-
for (
|
|
428
|
-
if (
|
|
429
|
-
|
|
430
|
+
for (int b = 0; b < 7; ++b) {
|
|
431
|
+
if (node->type_mask & (1u << b)) {
|
|
432
|
+
if (!expected.empty()) expected += ", ";
|
|
433
|
+
expected += type_names[b];
|
|
434
|
+
}
|
|
430
435
|
}
|
|
431
436
|
errors.push_back({ata::error_code::type_mismatch, path,
|
|
432
437
|
"expected type " + expected + ", got " + actual_type});
|
|
433
438
|
}
|
|
434
439
|
}
|
|
435
440
|
|
|
436
|
-
// enum
|
|
437
|
-
if (node->
|
|
441
|
+
// enum — compare against pre-minified canonical values
|
|
442
|
+
if (!node->enum_values_minified.empty()) {
|
|
438
443
|
std::string val_json = napi_to_json(env, value);
|
|
439
|
-
// Parse enum from raw and compare
|
|
440
444
|
bool found = false;
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
auto parse_fn = json_obj.Get("parse").As<Napi::Function>();
|
|
446
|
-
auto enum_arr = parse_fn.Call(json_obj,
|
|
447
|
-
{Napi::String::New(env, node->enum_values_raw.value())});
|
|
448
|
-
if (enum_arr.IsArray()) {
|
|
449
|
-
auto arr = enum_arr.As<Napi::Array>();
|
|
450
|
-
for (uint32_t i = 0; i < arr.Length(); ++i) {
|
|
451
|
-
std::string elem_json = napi_to_json(env, arr.Get(i));
|
|
452
|
-
if (elem_json == val_json) {
|
|
453
|
-
found = true;
|
|
454
|
-
break;
|
|
455
|
-
}
|
|
445
|
+
for (const auto& ev : node->enum_values_minified) {
|
|
446
|
+
if (ev == val_json) {
|
|
447
|
+
found = true;
|
|
448
|
+
break;
|
|
456
449
|
}
|
|
457
450
|
}
|
|
458
451
|
if (!found) {
|
|
@@ -1048,7 +1041,7 @@ static ThreadPool& pool() {
|
|
|
1048
1041
|
|
|
1049
1042
|
// --- Fast Validation Registry ---
|
|
1050
1043
|
// Global schema slots for V8 Fast API (bypasses NAPI overhead)
|
|
1051
|
-
static constexpr size_t MAX_FAST_SLOTS =
|
|
1044
|
+
static constexpr size_t MAX_FAST_SLOTS = 4096;
|
|
1052
1045
|
static ata::schema_ref g_fast_schemas[MAX_FAST_SLOTS];
|
|
1053
1046
|
static std::string g_fast_schema_jsons[MAX_FAST_SLOTS];
|
|
1054
1047
|
static uint32_t g_fast_slot_count = 0;
|
package/binding.gyp
CHANGED
|
@@ -11,10 +11,10 @@
|
|
|
11
11
|
"<!@(node -p \"require('node-addon-api').include\")",
|
|
12
12
|
"include",
|
|
13
13
|
"deps/simdjson",
|
|
14
|
-
"<!@(node -e \"var p=process.platform,a=process.arch;if(p==='darwin'){console.log(a==='arm64'?'/opt/homebrew/opt/re2/include':'/usr/local/opt/re2/include');console.log(a==='arm64'?'/opt/homebrew/opt/abseil/include':'/usr/local/opt/abseil/include')}else{console.log('/usr/include')}\")"
|
|
14
|
+
"<!@(node -e \"var p=process.platform,a=process.arch;if(p==='darwin'){console.log(a==='arm64'?'/opt/homebrew/opt/re2/include':'/usr/local/opt/re2/include');console.log(a==='arm64'?'/opt/homebrew/opt/abseil/include':'/usr/local/opt/abseil/include');console.log(a==='arm64'?'/opt/homebrew/opt/mimalloc/include':'/usr/local/opt/mimalloc/include')}else{console.log('/usr/include')}\")"
|
|
15
15
|
],
|
|
16
16
|
"libraries": [
|
|
17
|
-
"<!@(node -e \"var p=process.platform,a=process.arch;if(p==='darwin'){var pre=a==='arm64'?'/opt/homebrew/opt/re2':'/usr/local/opt/re2';console.log('-L'+pre+'/lib -lre2')}else{console.log('-lre2')}\")"
|
|
17
|
+
"<!@(node -e \"var p=process.platform,a=process.arch;if(p==='darwin'){var pre=a==='arm64'?'/opt/homebrew/opt/re2':'/usr/local/opt/re2';var mi=a==='arm64'?'/opt/homebrew/opt/mimalloc':'/usr/local/opt/mimalloc';console.log('-L'+pre+'/lib -lre2 -L'+mi+'/lib -lmimalloc')}else{console.log('-lre2')}\")"
|
|
18
18
|
],
|
|
19
19
|
"dependencies": [
|
|
20
20
|
"<!(node -p \"require('node-addon-api').gyp\")"
|
package/index.js
CHANGED
|
@@ -211,6 +211,7 @@ class Validator {
|
|
|
211
211
|
// Pure JS fast path — no NAPI, runs in V8 JIT
|
|
212
212
|
// Set ATA_FORCE_NAPI=1 to disable JS codegen (for correctness testing)
|
|
213
213
|
const schemaObj = typeof schema === "string" ? JSON.parse(schema) : schema;
|
|
214
|
+
this._schemaObj = schemaObj;
|
|
214
215
|
const jsFn = process.env.ATA_FORCE_NAPI
|
|
215
216
|
? null
|
|
216
217
|
: (compileToJSCodegen(schemaObj) || compileToJS(schemaObj));
|
|
@@ -330,6 +331,107 @@ class Validator {
|
|
|
330
331
|
});
|
|
331
332
|
}
|
|
332
333
|
|
|
334
|
+
// --- Standalone pre-compilation ---
|
|
335
|
+
// Generate a JS module string that can be written to a file.
|
|
336
|
+
// On next startup, load with Validator.fromStandalone() — zero compile time.
|
|
337
|
+
toStandalone() {
|
|
338
|
+
const jsFn = this._jsFn;
|
|
339
|
+
if (!jsFn || !jsFn._source) return null;
|
|
340
|
+
const src = jsFn._source;
|
|
341
|
+
const hybridSrc = jsFn._hybridSource || '';
|
|
342
|
+
|
|
343
|
+
// Also capture error function source for zero-compile standalone load
|
|
344
|
+
const jsErrFn = compileToJSCodegenWithErrors(
|
|
345
|
+
typeof this._schemaObj === 'object' ? this._schemaObj : {}
|
|
346
|
+
);
|
|
347
|
+
const errSrc = jsErrFn && jsErrFn._errSource ? jsErrFn._errSource : '';
|
|
348
|
+
|
|
349
|
+
return `// Auto-generated by ata-validator — do not edit
|
|
350
|
+
'use strict';
|
|
351
|
+
const boolFn = function(d) {
|
|
352
|
+
${src}
|
|
353
|
+
};
|
|
354
|
+
const hybridFactory = function(R, E) {
|
|
355
|
+
return function(d) {
|
|
356
|
+
${hybridSrc}
|
|
357
|
+
};
|
|
358
|
+
};
|
|
359
|
+
${errSrc ? `const errFn = function(d, _all) {\n ${errSrc}\n};` : 'const errFn = null;'}
|
|
360
|
+
module.exports = { boolFn, hybridFactory, errFn };
|
|
361
|
+
`;
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
// Load a pre-compiled standalone module. Zero schema compilation.
|
|
365
|
+
// No NAPI, no native compile — pure JS. Startup in microseconds.
|
|
366
|
+
// Usage: const v = Validator.fromStandalone(require('./compiled.js'), schema, opts)
|
|
367
|
+
static fromStandalone(mod, schema, opts) {
|
|
368
|
+
const options = opts || {};
|
|
369
|
+
const schemaObj = typeof schema === "string" ? JSON.parse(schema) : schema;
|
|
370
|
+
|
|
371
|
+
// Create a lightweight instance — skip NAPI compile entirely
|
|
372
|
+
const v = Object.create(Validator.prototype);
|
|
373
|
+
v._jsFn = mod.boolFn;
|
|
374
|
+
v._compiled = null;
|
|
375
|
+
v._fastSlot = -1;
|
|
376
|
+
|
|
377
|
+
// Mutators
|
|
378
|
+
const applyDefaults = buildDefaultsApplier(schemaObj);
|
|
379
|
+
const applyCoerce = options.coerceTypes ? buildCoercer(schemaObj) : null;
|
|
380
|
+
const applyRemove = options.removeAdditional ? buildRemover(schemaObj) : null;
|
|
381
|
+
const mutators = [applyRemove, applyCoerce, applyDefaults].filter(Boolean);
|
|
382
|
+
const preprocess = mutators.length === 0 ? null
|
|
383
|
+
: mutators.length === 1 ? mutators[0]
|
|
384
|
+
: (data) => { for (let i = 0; i < mutators.length; i++) mutators[i](data); };
|
|
385
|
+
v._preprocess = preprocess;
|
|
386
|
+
|
|
387
|
+
// Error function — use pre-compiled from standalone if available, else compile
|
|
388
|
+
let errFn = (d) => ({ valid: false, errors: [{ code: 'validation_failed', path: '', message: 'validation failed' }] });
|
|
389
|
+
if (mod.errFn) {
|
|
390
|
+
errFn = (d) => mod.errFn(d, true);
|
|
391
|
+
} else {
|
|
392
|
+
const jsErrFn = compileToJSCodegenWithErrors(schemaObj);
|
|
393
|
+
if (jsErrFn) {
|
|
394
|
+
try { jsErrFn({}, true); errFn = (d) => jsErrFn(d, true); } catch {}
|
|
395
|
+
}
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
// Hybrid or speculative
|
|
399
|
+
const hybridFn = mod.hybridFactory
|
|
400
|
+
? mod.hybridFactory(VALID_RESULT, errFn)
|
|
401
|
+
: null;
|
|
402
|
+
|
|
403
|
+
v.validate = hybridFn
|
|
404
|
+
? (preprocess ? (data) => { preprocess(data); return hybridFn(data); } : hybridFn)
|
|
405
|
+
: (preprocess
|
|
406
|
+
? (data) => { preprocess(data); return mod.boolFn(data) ? VALID_RESULT : errFn(data); }
|
|
407
|
+
: (data) => mod.boolFn(data) ? VALID_RESULT : errFn(data));
|
|
408
|
+
v.isValidObject = mod.boolFn;
|
|
409
|
+
v.isValidJSON = (jsonStr) => {
|
|
410
|
+
try { return mod.boolFn(JSON.parse(jsonStr)); } catch { return false; }
|
|
411
|
+
};
|
|
412
|
+
v.validateJSON = (jsonStr) => {
|
|
413
|
+
try {
|
|
414
|
+
const obj = JSON.parse(jsonStr);
|
|
415
|
+
return hybridFn ? hybridFn(obj) : (mod.boolFn(obj) ? VALID_RESULT : errFn(obj));
|
|
416
|
+
} catch { return { valid: false, errors: [{ code: 'invalid_json', path: '', message: 'invalid JSON' }] }; }
|
|
417
|
+
};
|
|
418
|
+
|
|
419
|
+
// Standard Schema V1
|
|
420
|
+
Object.defineProperty(v, "~standard", {
|
|
421
|
+
value: Object.freeze({
|
|
422
|
+
version: 1, vendor: "ata-validator",
|
|
423
|
+
validate(value) {
|
|
424
|
+
const result = v.validate(value);
|
|
425
|
+
if (result.valid) return { value };
|
|
426
|
+
return { issues: result.errors.map(e => ({ message: e.message, path: parsePointerPath(e.path) })) };
|
|
427
|
+
},
|
|
428
|
+
}),
|
|
429
|
+
writable: false, enumerable: false, configurable: false,
|
|
430
|
+
});
|
|
431
|
+
|
|
432
|
+
return v;
|
|
433
|
+
}
|
|
434
|
+
|
|
333
435
|
// Fallback methods — only used when JS codegen is unavailable
|
|
334
436
|
validate(data) {
|
|
335
437
|
if (this._preprocess) this._preprocess(data);
|
|
@@ -384,4 +486,106 @@ function version() {
|
|
|
384
486
|
return native.version();
|
|
385
487
|
}
|
|
386
488
|
|
|
489
|
+
// Bundle multiple validators into a single JS file for fast startup.
|
|
490
|
+
// Usage:
|
|
491
|
+
// const bundle = Validator.bundle([schema1, schema2, ...]);
|
|
492
|
+
// fs.writeFileSync('validators.js', bundle);
|
|
493
|
+
// // On startup:
|
|
494
|
+
// const validators = Validator.loadBundle(require('./validators.js'), [schema1, schema2, ...]);
|
|
495
|
+
Validator.bundle = function(schemas, opts) {
|
|
496
|
+
const parts = schemas.map(schema => {
|
|
497
|
+
const v = new Validator(schema, opts);
|
|
498
|
+
const standalone = v.toStandalone();
|
|
499
|
+
if (!standalone) return 'null';
|
|
500
|
+
return '(function(){' + standalone.replace("'use strict';", '').replace('module.exports = ', 'return ') + '})()';
|
|
501
|
+
});
|
|
502
|
+
return "'use strict';\nmodule.exports = [\n" + parts.join(',\n') + '\n];\n';
|
|
503
|
+
};
|
|
504
|
+
|
|
505
|
+
// Zero-dependency self-contained bundle — no require('ata-validator') needed at runtime.
|
|
506
|
+
Validator.bundleStandalone = function(schemas, opts) {
|
|
507
|
+
const R = "Object.freeze({valid:true,errors:Object.freeze([])})";
|
|
508
|
+
const fns = schemas.map(schema => {
|
|
509
|
+
const v = new Validator(schema, opts);
|
|
510
|
+
const jsFn = v._jsFn;
|
|
511
|
+
if (!jsFn || !jsFn._hybridSource) return 'null';
|
|
512
|
+
const jsErrFn = compileToJSCodegenWithErrors(
|
|
513
|
+
typeof schema === 'string' ? JSON.parse(schema) : schema
|
|
514
|
+
);
|
|
515
|
+
const errBody = jsErrFn && jsErrFn._errSource
|
|
516
|
+
? jsErrFn._errSource
|
|
517
|
+
: "return{valid:false,errors:[{code:'error',path:'',message:'validation failed'}]}";
|
|
518
|
+
return `(function(R){var E=function(d){var _all=true;${errBody}};return function(d){${jsFn._hybridSource}}})(R)`;
|
|
519
|
+
});
|
|
520
|
+
return `'use strict';\nvar R=${R};\nmodule.exports=[${fns.join(',')}];\n`;
|
|
521
|
+
};
|
|
522
|
+
|
|
523
|
+
// Compact bundle: deduplicated code. Shared template functions + per-schema params.
|
|
524
|
+
// Much smaller file → faster V8 parse → faster startup.
|
|
525
|
+
Validator.bundleCompact = function(schemas, opts) {
|
|
526
|
+
// Analyze schemas and group by structure
|
|
527
|
+
const entries = schemas.map(schema => {
|
|
528
|
+
const v = new Validator(schema, opts);
|
|
529
|
+
const jsFn = v._jsFn;
|
|
530
|
+
if (!jsFn || !jsFn._hybridSource) return null;
|
|
531
|
+
const jsErrFn = compileToJSCodegenWithErrors(
|
|
532
|
+
typeof schema === 'string' ? JSON.parse(schema) : schema
|
|
533
|
+
);
|
|
534
|
+
return {
|
|
535
|
+
hybrid: jsFn._hybridSource,
|
|
536
|
+
err: jsErrFn && jsErrFn._errSource ? jsErrFn._errSource : null,
|
|
537
|
+
};
|
|
538
|
+
});
|
|
539
|
+
|
|
540
|
+
// Deduplicate function bodies — many schemas produce identical or near-identical code
|
|
541
|
+
const bodyMap = new Map(); // body → index
|
|
542
|
+
const bodies = [];
|
|
543
|
+
const errMap = new Map();
|
|
544
|
+
const errBodies = [];
|
|
545
|
+
|
|
546
|
+
const indices = entries.map(e => {
|
|
547
|
+
if (!e) return [-1, -1];
|
|
548
|
+
let hi = bodyMap.get(e.hybrid);
|
|
549
|
+
if (hi === undefined) { hi = bodies.length; bodies.push(e.hybrid); bodyMap.set(e.hybrid, hi); }
|
|
550
|
+
let ei = -1;
|
|
551
|
+
if (e.err) {
|
|
552
|
+
ei = errMap.get(e.err);
|
|
553
|
+
if (ei === undefined) { ei = errBodies.length; errBodies.push(e.err); errMap.set(e.err, ei); }
|
|
554
|
+
}
|
|
555
|
+
return [hi, ei];
|
|
556
|
+
});
|
|
557
|
+
|
|
558
|
+
// Generate compact bundle
|
|
559
|
+
let out = "'use strict';\n";
|
|
560
|
+
out += "var R=Object.freeze({valid:true,errors:Object.freeze([])});\n";
|
|
561
|
+
|
|
562
|
+
// Shared hybrid factories
|
|
563
|
+
out += "var H=[\n";
|
|
564
|
+
out += bodies.map(b => `function(R,E){return function(d){${b}}}`).join(',\n');
|
|
565
|
+
out += "\n];\n";
|
|
566
|
+
|
|
567
|
+
// Shared error functions
|
|
568
|
+
out += "var EF=[\n";
|
|
569
|
+
out += errBodies.map(b => `function(d){var _all=true;${b}}`).join(',\n');
|
|
570
|
+
out += "\n];\n";
|
|
571
|
+
|
|
572
|
+
// Build validators from shared templates
|
|
573
|
+
out += "module.exports=[";
|
|
574
|
+
out += indices.map(([hi, ei]) => {
|
|
575
|
+
if (hi < 0) return 'null';
|
|
576
|
+
if (ei >= 0) return `H[${hi}](R,EF[${ei}])`;
|
|
577
|
+
return `H[${hi}](R,function(){return{valid:false,errors:[]}})`;
|
|
578
|
+
}).join(',');
|
|
579
|
+
out += "];\n";
|
|
580
|
+
|
|
581
|
+
return out;
|
|
582
|
+
};
|
|
583
|
+
|
|
584
|
+
Validator.loadBundle = function(mods, schemas, opts) {
|
|
585
|
+
return schemas.map((schema, i) => {
|
|
586
|
+
if (mods[i]) return Validator.fromStandalone(mods[i], schema, opts);
|
|
587
|
+
return new Validator(schema, opts);
|
|
588
|
+
});
|
|
589
|
+
};
|
|
590
|
+
|
|
387
591
|
module.exports = { Validator, validate, version, createPaddedBuffer, SIMDJSON_PADDING };
|
package/lib/js-compiler.js
CHANGED
|
@@ -531,14 +531,16 @@ function compileToJSCodegen(schema) {
|
|
|
531
531
|
const boolFn = new Function('d', body)
|
|
532
532
|
|
|
533
533
|
// Build hybrid: same body, return R instead of true, return E(d) instead of false.
|
|
534
|
-
// V8 optimizes this identically to jsFn — E(d) is dead code on valid path.
|
|
535
|
-
// 83M ops/sec vs 26M for combined. Invalid path: 34M vs 6M.
|
|
536
534
|
const hybridBody = replaceTopLevel(helperStr + checkStr + '\n return R')
|
|
537
535
|
try {
|
|
538
536
|
const factory = new Function('R', 'E', `return function(d){${hybridBody}}`)
|
|
539
537
|
boolFn._hybridFactory = factory
|
|
540
538
|
} catch {}
|
|
541
539
|
|
|
540
|
+
// Store source for standalone compilation (pre-build to file)
|
|
541
|
+
boolFn._source = body
|
|
542
|
+
boolFn._hybridSource = hybridBody
|
|
543
|
+
|
|
542
544
|
return boolFn
|
|
543
545
|
} catch {
|
|
544
546
|
return null
|
|
@@ -940,7 +942,9 @@ function compileToJSCodegenWithErrors(schema) {
|
|
|
940
942
|
lines.join('\n ') +
|
|
941
943
|
`\n return{valid:_e.length===0,errors:_e}`
|
|
942
944
|
try {
|
|
943
|
-
|
|
945
|
+
const fn = new Function('d', '_all', body)
|
|
946
|
+
fn._errSource = body
|
|
947
|
+
return fn
|
|
944
948
|
} catch {
|
|
945
949
|
return null
|
|
946
950
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "ata-validator",
|
|
3
|
-
"version": "0.4.
|
|
3
|
+
"version": "0.4.10",
|
|
4
4
|
"description": "Ultra-fast JSON Schema validator. Beats ajv on every valid-path benchmark: 1.1x–2.7x faster validate(obj), 151x faster compilation, 5.9x faster parallel batch. Speculative validation with V8-optimized JS codegen, simdjson, multi-core. Standard Schema V1 compatible.",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"types": "index.d.ts",
|
package/src/ata.cpp
CHANGED
|
@@ -1,5 +1,10 @@
|
|
|
1
1
|
#include "ata.h"
|
|
2
2
|
|
|
3
|
+
// mimalloc: faster new/delete for small allocations.
|
|
4
|
+
#if __has_include(<mimalloc-new-delete.h>)
|
|
5
|
+
#include <mimalloc-new-delete.h>
|
|
6
|
+
#endif
|
|
7
|
+
|
|
3
8
|
#include <algorithm>
|
|
4
9
|
#include <cmath>
|
|
5
10
|
#include <re2/re2.h>
|
|
@@ -123,17 +128,20 @@ static bool fast_check_hostname(std::string_view s) {
|
|
|
123
128
|
return label_len > 0;
|
|
124
129
|
}
|
|
125
130
|
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
131
|
+
// Check format by pre-resolved numeric ID — no string comparisons.
|
|
132
|
+
static bool check_format_by_id(std::string_view sv, uint8_t fid) {
|
|
133
|
+
switch (fid) {
|
|
134
|
+
case 0: return fast_check_email(sv);
|
|
135
|
+
case 1: return fast_check_date(sv);
|
|
136
|
+
case 2: return fast_check_datetime(sv);
|
|
137
|
+
case 3: return fast_check_time(sv);
|
|
138
|
+
case 4: return fast_check_ipv4(sv);
|
|
139
|
+
case 5: return sv.find(':') != std::string_view::npos;
|
|
140
|
+
case 6: return fast_check_uri(sv);
|
|
141
|
+
case 7: return fast_check_uuid(sv);
|
|
142
|
+
case 8: return fast_check_hostname(sv);
|
|
143
|
+
default: return true; // unknown formats pass
|
|
144
|
+
}
|
|
137
145
|
}
|
|
138
146
|
|
|
139
147
|
namespace ata {
|
|
@@ -177,14 +185,74 @@ static std::string canonical_json(dom::element el) {
|
|
|
177
185
|
}
|
|
178
186
|
}
|
|
179
187
|
|
|
188
|
+
// JSON Schema type enum — avoids string comparisons on the hot path.
|
|
189
|
+
enum class json_type : uint8_t {
|
|
190
|
+
string, number, integer, boolean, null_value, object, array
|
|
191
|
+
};
|
|
192
|
+
|
|
193
|
+
static json_type json_type_from_sv(std::string_view s) {
|
|
194
|
+
if (s == "string") return json_type::string;
|
|
195
|
+
if (s == "number") return json_type::number;
|
|
196
|
+
if (s == "integer") return json_type::integer;
|
|
197
|
+
if (s == "boolean") return json_type::boolean;
|
|
198
|
+
if (s == "null") return json_type::null_value;
|
|
199
|
+
if (s == "object") return json_type::object;
|
|
200
|
+
if (s == "array") return json_type::array;
|
|
201
|
+
return json_type::string; // fallback
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
static const char* json_type_name(json_type t) {
|
|
205
|
+
switch (t) {
|
|
206
|
+
case json_type::string: return "string";
|
|
207
|
+
case json_type::number: return "number";
|
|
208
|
+
case json_type::integer: return "integer";
|
|
209
|
+
case json_type::boolean: return "boolean";
|
|
210
|
+
case json_type::null_value: return "null";
|
|
211
|
+
case json_type::object: return "object";
|
|
212
|
+
case json_type::array: return "array";
|
|
213
|
+
}
|
|
214
|
+
return "unknown";
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
// Bitmask for O(1) type checking: one bit per json_type value.
|
|
218
|
+
static uint8_t json_type_bit(json_type t) { return 1u << static_cast<uint8_t>(t); }
|
|
219
|
+
|
|
220
|
+
// Map dom::element_type to a json_type bitmask (number matches integer too).
|
|
221
|
+
static uint8_t element_type_mask(dom::element_type t) {
|
|
222
|
+
switch (t) {
|
|
223
|
+
case dom::element_type::STRING: return json_type_bit(json_type::string);
|
|
224
|
+
case dom::element_type::INT64:
|
|
225
|
+
case dom::element_type::UINT64: return json_type_bit(json_type::integer) | json_type_bit(json_type::number);
|
|
226
|
+
case dom::element_type::DOUBLE: return json_type_bit(json_type::number);
|
|
227
|
+
case dom::element_type::BOOL: return json_type_bit(json_type::boolean);
|
|
228
|
+
case dom::element_type::NULL_VALUE: return json_type_bit(json_type::null_value);
|
|
229
|
+
case dom::element_type::ARRAY: return json_type_bit(json_type::array);
|
|
230
|
+
case dom::element_type::OBJECT: return json_type_bit(json_type::object);
|
|
231
|
+
}
|
|
232
|
+
return 0;
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
// Resolve format string to numeric ID at compile time.
|
|
236
|
+
static uint8_t format_id_from_string(const std::string& f) {
|
|
237
|
+
if (f == "email") return 0;
|
|
238
|
+
if (f == "date") return 1;
|
|
239
|
+
if (f == "date-time") return 2;
|
|
240
|
+
if (f == "time") return 3;
|
|
241
|
+
if (f == "ipv4") return 4;
|
|
242
|
+
if (f == "ipv6") return 5;
|
|
243
|
+
if (f == "uri" || f == "uri-reference") return 6;
|
|
244
|
+
if (f == "uuid") return 7;
|
|
245
|
+
if (f == "hostname") return 8;
|
|
246
|
+
return 255;
|
|
247
|
+
}
|
|
248
|
+
|
|
180
249
|
// Forward declarations
|
|
181
250
|
struct schema_node;
|
|
182
251
|
using schema_node_ptr = std::shared_ptr<schema_node>;
|
|
183
252
|
|
|
184
253
|
struct schema_node {
|
|
185
|
-
// type constraint
|
|
186
|
-
//
|
|
187
|
-
std::vector<std::string> types;
|
|
254
|
+
// type constraint — bitmask for O(1) type checking
|
|
255
|
+
uint8_t type_mask = 0; // bit per json_type value
|
|
188
256
|
|
|
189
257
|
// numeric
|
|
190
258
|
std::optional<double> minimum;
|
|
@@ -229,12 +297,12 @@ struct schema_node {
|
|
|
229
297
|
std::vector<pattern_prop> pattern_properties;
|
|
230
298
|
|
|
231
299
|
// enum / const
|
|
232
|
-
std::optional<std::string> enum_values_raw; // raw JSON array string
|
|
233
300
|
std::vector<std::string> enum_values_minified; // pre-minified enum values
|
|
234
301
|
std::optional<std::string> const_value_raw; // raw JSON value string
|
|
235
302
|
|
|
236
303
|
// format
|
|
237
304
|
std::optional<std::string> format;
|
|
305
|
+
uint8_t format_id = 255; // pre-resolved format ID (255 = unknown/pass)
|
|
238
306
|
|
|
239
307
|
// composition
|
|
240
308
|
std::vector<schema_node_ptr> all_of;
|
|
@@ -276,7 +344,7 @@ struct plan {
|
|
|
276
344
|
std::vector<std::string> strings;
|
|
277
345
|
std::vector<std::shared_ptr<re2::RE2>> regexes;
|
|
278
346
|
std::vector<std::vector<std::string>> enum_sets;
|
|
279
|
-
std::vector<
|
|
347
|
+
std::vector<uint8_t> type_masks;
|
|
280
348
|
std::vector<uint8_t> format_ids;
|
|
281
349
|
std::vector<std::vector<ins>> subs;
|
|
282
350
|
};
|
|
@@ -297,6 +365,10 @@ static dom::parser& tl_dom_parser() {
|
|
|
297
365
|
thread_local dom::parser p;
|
|
298
366
|
return p;
|
|
299
367
|
}
|
|
368
|
+
static dom::parser& tl_dom_key_parser() {
|
|
369
|
+
thread_local dom::parser p;
|
|
370
|
+
return p;
|
|
371
|
+
}
|
|
300
372
|
static simdjson::ondemand::parser& tl_od_parser() {
|
|
301
373
|
thread_local simdjson::ondemand::parser p;
|
|
302
374
|
return p;
|
|
@@ -341,12 +413,12 @@ static schema_node_ptr compile_node(dom::element el,
|
|
|
341
413
|
if (type_el.is<std::string_view>()) {
|
|
342
414
|
std::string_view sv;
|
|
343
415
|
type_el.get(sv);
|
|
344
|
-
node->
|
|
416
|
+
node->type_mask |= json_type_bit(json_type_from_sv(sv));
|
|
345
417
|
} else if (type_el.is<dom::array>()) {
|
|
346
418
|
dom::array type_arr; type_el.get(type_arr); for (auto t : type_arr) {
|
|
347
419
|
std::string_view sv;
|
|
348
420
|
if (t.get(sv) == SUCCESS) {
|
|
349
|
-
node->
|
|
421
|
+
node->type_mask |= json_type_bit(json_type_from_sv(sv));
|
|
350
422
|
}
|
|
351
423
|
}
|
|
352
424
|
}
|
|
@@ -526,7 +598,10 @@ static schema_node_ptr compile_node(dom::element el,
|
|
|
526
598
|
dom::element fmt_el;
|
|
527
599
|
if (obj["format"].get(fmt_el) == SUCCESS) {
|
|
528
600
|
std::string_view sv;
|
|
529
|
-
if (fmt_el.get(sv) == SUCCESS)
|
|
601
|
+
if (fmt_el.get(sv) == SUCCESS) {
|
|
602
|
+
node->format = std::string(sv);
|
|
603
|
+
node->format_id = format_id_from_string(node->format.value());
|
|
604
|
+
}
|
|
530
605
|
}
|
|
531
606
|
|
|
532
607
|
// $id (register in defs for potential resolution)
|
|
@@ -541,7 +616,6 @@ static schema_node_ptr compile_node(dom::element el,
|
|
|
541
616
|
// enum — pre-minify each value at compile time
|
|
542
617
|
dom::element enum_el;
|
|
543
618
|
if (obj["enum"].get(enum_el) == SUCCESS) {
|
|
544
|
-
node->enum_values_raw = canonical_json(enum_el);
|
|
545
619
|
if (enum_el.is<dom::array>()) {
|
|
546
620
|
dom::array enum_arr; enum_el.get(enum_arr); for (auto e : enum_arr) {
|
|
547
621
|
node->enum_values_minified.push_back(canonical_json(e));
|
|
@@ -635,41 +709,37 @@ static bool validate_fast(const schema_node_ptr& node,
|
|
|
635
709
|
// Macro for early termination
|
|
636
710
|
#define ATA_CHECK_EARLY() if (!all_errors && !errors.empty()) return
|
|
637
711
|
|
|
712
|
+
using et = dom::element_type;
|
|
713
|
+
|
|
714
|
+
|
|
638
715
|
// Use string_view to avoid allocations in hot path
|
|
639
716
|
static std::string_view type_of_sv(dom::element el) {
|
|
640
717
|
switch (el.type()) {
|
|
641
|
-
case
|
|
642
|
-
case
|
|
643
|
-
case
|
|
644
|
-
case
|
|
645
|
-
case
|
|
646
|
-
case
|
|
647
|
-
case
|
|
648
|
-
case
|
|
718
|
+
case et::STRING: return "string";
|
|
719
|
+
case et::INT64:
|
|
720
|
+
case et::UINT64: return "integer";
|
|
721
|
+
case et::DOUBLE: return "number";
|
|
722
|
+
case et::BOOL: return "boolean";
|
|
723
|
+
case et::NULL_VALUE:return "null";
|
|
724
|
+
case et::ARRAY: return "array";
|
|
725
|
+
case et::OBJECT: return "object";
|
|
649
726
|
}
|
|
650
727
|
return "unknown";
|
|
651
728
|
}
|
|
652
729
|
|
|
653
|
-
static std::string type_of(dom::element el) {
|
|
654
|
-
return std::string(type_of_sv(el));
|
|
655
|
-
}
|
|
656
730
|
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
if (type == "number" && (actual == "integer" || actual == "number"))
|
|
661
|
-
return true;
|
|
662
|
-
return false;
|
|
731
|
+
// O(1) type check: test element's type bits against the schema's type_mask.
|
|
732
|
+
static bool type_matches_mask(dom::element el, uint8_t type_mask) {
|
|
733
|
+
return (element_type_mask(el.type()) & type_mask) != 0;
|
|
663
734
|
}
|
|
664
735
|
|
|
665
736
|
static double to_double(dom::element el) {
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
return 0;
|
|
737
|
+
switch (el.type()) {
|
|
738
|
+
case et::DOUBLE: { double v; el.get(v); return v; }
|
|
739
|
+
case et::INT64: { int64_t v; el.get(v); return static_cast<double>(v); }
|
|
740
|
+
case et::UINT64: { uint64_t v; el.get(v); return static_cast<double>(v); }
|
|
741
|
+
default: return 0;
|
|
742
|
+
}
|
|
673
743
|
}
|
|
674
744
|
|
|
675
745
|
// Count UTF-8 codepoints — branchless: count non-continuation bytes
|
|
@@ -838,22 +908,17 @@ static void validate_node(const schema_node_ptr& node,
|
|
|
838
908
|
}
|
|
839
909
|
|
|
840
910
|
// type
|
|
841
|
-
if (
|
|
842
|
-
|
|
843
|
-
for (const auto& t : node->types) {
|
|
844
|
-
if (type_matches(value, t)) {
|
|
845
|
-
match = true;
|
|
846
|
-
break;
|
|
847
|
-
}
|
|
848
|
-
}
|
|
849
|
-
if (!match) {
|
|
911
|
+
if (node->type_mask) {
|
|
912
|
+
if (!type_matches_mask(value, node->type_mask)) {
|
|
850
913
|
std::string expected;
|
|
851
|
-
for (
|
|
852
|
-
if (
|
|
853
|
-
|
|
914
|
+
for (int b = 0; b < 7; ++b) {
|
|
915
|
+
if (node->type_mask & (1u << b)) {
|
|
916
|
+
if (!expected.empty()) expected += ", ";
|
|
917
|
+
expected += json_type_name(static_cast<json_type>(b));
|
|
918
|
+
}
|
|
854
919
|
}
|
|
855
920
|
errors.push_back({error_code::type_mismatch, path,
|
|
856
|
-
"expected type " + expected + ", got " +
|
|
921
|
+
"expected type " + expected + ", got " + std::string(type_of_sv(value))});
|
|
857
922
|
ATA_CHECK_EARLY();
|
|
858
923
|
}
|
|
859
924
|
}
|
|
@@ -886,8 +951,8 @@ static void validate_node(const schema_node_ptr& node,
|
|
|
886
951
|
|
|
887
952
|
ATA_CHECK_EARLY();
|
|
888
953
|
// Numeric validations
|
|
889
|
-
auto
|
|
890
|
-
if (
|
|
954
|
+
auto vtype = value.type();
|
|
955
|
+
if (vtype == et::INT64 || vtype == et::UINT64 || vtype == et::DOUBLE) {
|
|
891
956
|
double v = to_double(value);
|
|
892
957
|
if (node->minimum.has_value() && v < node->minimum.value()) {
|
|
893
958
|
errors.push_back({error_code::minimum_violation, path,
|
|
@@ -924,7 +989,7 @@ static void validate_node(const schema_node_ptr& node,
|
|
|
924
989
|
}
|
|
925
990
|
|
|
926
991
|
// String validations
|
|
927
|
-
if (
|
|
992
|
+
if (vtype == et::STRING) {
|
|
928
993
|
std::string_view sv;
|
|
929
994
|
value.get(sv);
|
|
930
995
|
uint64_t len = utf8_length(sv);
|
|
@@ -950,7 +1015,7 @@ static void validate_node(const schema_node_ptr& node,
|
|
|
950
1015
|
}
|
|
951
1016
|
|
|
952
1017
|
if (node->format.has_value()) {
|
|
953
|
-
if (!
|
|
1018
|
+
if (!check_format_by_id(sv, node->format_id)) {
|
|
954
1019
|
errors.push_back({error_code::format_mismatch, path,
|
|
955
1020
|
"string does not match format: " +
|
|
956
1021
|
node->format.value()});
|
|
@@ -959,10 +1024,14 @@ static void validate_node(const schema_node_ptr& node,
|
|
|
959
1024
|
}
|
|
960
1025
|
|
|
961
1026
|
// Array validations
|
|
962
|
-
if (
|
|
1027
|
+
if (vtype == et::ARRAY) {
|
|
963
1028
|
dom::array arr; value.get(arr);
|
|
964
|
-
uint64_t arr_size =
|
|
965
|
-
|
|
1029
|
+
uint64_t arr_size = arr.size();
|
|
1030
|
+
if(arr_size == 0xFFFFFF) [[unlikely]] {
|
|
1031
|
+
// Fallback for large arrays where size() saturates — count manually to avoid overflow
|
|
1032
|
+
arr_size = 0;
|
|
1033
|
+
for ([[maybe_unused]] auto _ : arr) ++arr_size;
|
|
1034
|
+
}
|
|
966
1035
|
|
|
967
1036
|
if (node->min_items.has_value() && arr_size < node->min_items.value()) {
|
|
968
1037
|
errors.push_back({error_code::min_items_violation, path,
|
|
@@ -978,13 +1047,29 @@ static void validate_node(const schema_node_ptr& node,
|
|
|
978
1047
|
}
|
|
979
1048
|
|
|
980
1049
|
if (node->unique_items) {
|
|
981
|
-
std::set<std::string> seen;
|
|
982
1050
|
bool has_dup = false;
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
|
|
987
|
-
|
|
1051
|
+
// Fast path: check if all items are the same simple type
|
|
1052
|
+
auto first_it = arr.begin();
|
|
1053
|
+
if (first_it != arr.end()) {
|
|
1054
|
+
auto first_type = (*first_it).type();
|
|
1055
|
+
bool all_same = true;
|
|
1056
|
+
for (auto item : arr) { if (item.type() != first_type) { all_same = false; break; } }
|
|
1057
|
+
if (all_same && first_type == et::STRING) {
|
|
1058
|
+
std::set<std::string_view> seen;
|
|
1059
|
+
for (auto item : arr) {
|
|
1060
|
+
std::string_view sv; item.get(sv);
|
|
1061
|
+
if (!seen.insert(sv).second) { has_dup = true; break; }
|
|
1062
|
+
}
|
|
1063
|
+
} else if (all_same && (first_type == et::INT64 || first_type == et::UINT64 || first_type == et::DOUBLE)) {
|
|
1064
|
+
std::set<double> seen;
|
|
1065
|
+
for (auto item : arr) {
|
|
1066
|
+
if (!seen.insert(to_double(item)).second) { has_dup = true; break; }
|
|
1067
|
+
}
|
|
1068
|
+
} else {
|
|
1069
|
+
std::set<std::string> seen;
|
|
1070
|
+
for (auto item : arr) {
|
|
1071
|
+
if (!seen.insert(canonical_json(item)).second) { has_dup = true; break; }
|
|
1072
|
+
}
|
|
988
1073
|
}
|
|
989
1074
|
}
|
|
990
1075
|
if (has_dup) {
|
|
@@ -1012,9 +1097,7 @@ static void validate_node(const schema_node_ptr& node,
|
|
|
1012
1097
|
if (node->contains_schema) {
|
|
1013
1098
|
uint64_t match_count = 0;
|
|
1014
1099
|
for (auto item : arr) {
|
|
1015
|
-
|
|
1016
|
-
validate_node(node->contains_schema, item, path, ctx, tmp, false);
|
|
1017
|
-
if (tmp.empty()) ++match_count;
|
|
1100
|
+
if (validate_fast(node->contains_schema, item, ctx)) ++match_count;
|
|
1018
1101
|
}
|
|
1019
1102
|
uint64_t min_c = node->min_contains.value_or(1);
|
|
1020
1103
|
uint64_t max_c = node->max_contains.value_or(arr_size);
|
|
@@ -1032,24 +1115,26 @@ static void validate_node(const schema_node_ptr& node,
|
|
|
1032
1115
|
}
|
|
1033
1116
|
|
|
1034
1117
|
// Object validations
|
|
1035
|
-
if (
|
|
1118
|
+
if (vtype == et::OBJECT) {
|
|
1036
1119
|
dom::object obj; value.get(obj);
|
|
1037
|
-
uint64_t prop_count = 0;
|
|
1038
|
-
for ([[maybe_unused]] auto _ : obj) ++prop_count;
|
|
1039
1120
|
|
|
1040
|
-
if (node->min_properties.has_value()
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
|
-
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
|
|
1121
|
+
if (node->min_properties.has_value() || node->max_properties.has_value()) {
|
|
1122
|
+
uint64_t prop_count = 0;
|
|
1123
|
+
for ([[maybe_unused]] auto _ : obj) ++prop_count;
|
|
1124
|
+
if (node->min_properties.has_value() &&
|
|
1125
|
+
prop_count < node->min_properties.value()) {
|
|
1126
|
+
errors.push_back({error_code::min_properties_violation, path,
|
|
1127
|
+
"object has " + std::to_string(prop_count) +
|
|
1128
|
+
" properties, minimum " +
|
|
1129
|
+
std::to_string(node->min_properties.value())});
|
|
1130
|
+
}
|
|
1131
|
+
if (node->max_properties.has_value() &&
|
|
1132
|
+
prop_count > node->max_properties.value()) {
|
|
1133
|
+
errors.push_back({error_code::max_properties_violation, path,
|
|
1134
|
+
"object has " + std::to_string(prop_count) +
|
|
1135
|
+
" properties, maximum " +
|
|
1136
|
+
std::to_string(node->max_properties.value())});
|
|
1137
|
+
}
|
|
1053
1138
|
}
|
|
1054
1139
|
|
|
1055
1140
|
// required
|
|
@@ -1094,17 +1179,50 @@ static void validate_node(const schema_node_ptr& node,
|
|
|
1094
1179
|
}
|
|
1095
1180
|
}
|
|
1096
1181
|
}
|
|
1097
|
-
|
|
1098
|
-
// propertyNames
|
|
1182
|
+
// propertyNames — validate key as string directly when possible
|
|
1099
1183
|
if (node->property_names_schema) {
|
|
1100
|
-
|
|
1101
|
-
|
|
1102
|
-
|
|
1103
|
-
|
|
1104
|
-
|
|
1105
|
-
|
|
1106
|
-
|
|
1107
|
-
|
|
1184
|
+
auto pn = node->property_names_schema;
|
|
1185
|
+
bool string_only = pn->ref.empty() && pn->all_of.empty() &&
|
|
1186
|
+
pn->any_of.empty() && pn->one_of.empty() && !pn->not_schema &&
|
|
1187
|
+
!pn->if_schema && pn->enum_values_minified.empty() &&
|
|
1188
|
+
!pn->const_value_raw.has_value();
|
|
1189
|
+
if (string_only) {
|
|
1190
|
+
// Fast path: validate string constraints on key directly
|
|
1191
|
+
for (auto [key, val] : obj) {
|
|
1192
|
+
std::string_view key_sv(key);
|
|
1193
|
+
if (pn->type_mask && !(pn->type_mask & json_type_bit(json_type::string))) {
|
|
1194
|
+
errors.push_back({error_code::type_mismatch, path,
|
|
1195
|
+
"propertyNames: key is string but schema requires different type"});
|
|
1196
|
+
continue;
|
|
1197
|
+
}
|
|
1198
|
+
uint64_t len = utf8_length(key_sv);
|
|
1199
|
+
if (pn->min_length.has_value() && len < pn->min_length.value()) {
|
|
1200
|
+
errors.push_back({error_code::min_length_violation, path,
|
|
1201
|
+
"propertyNames: key too short: " + std::string(key_sv)});
|
|
1202
|
+
}
|
|
1203
|
+
if (pn->max_length.has_value() && len > pn->max_length.value()) {
|
|
1204
|
+
errors.push_back({error_code::max_length_violation, path,
|
|
1205
|
+
"propertyNames: key too long: " + std::string(key_sv)});
|
|
1206
|
+
}
|
|
1207
|
+
if (pn->compiled_pattern) {
|
|
1208
|
+
if (!re2::RE2::PartialMatch(re2::StringPiece(key_sv.data(), key_sv.size()), *pn->compiled_pattern)) {
|
|
1209
|
+
errors.push_back({error_code::pattern_mismatch, path,
|
|
1210
|
+
"propertyNames: key does not match pattern: " + std::string(key_sv)});
|
|
1211
|
+
}
|
|
1212
|
+
}
|
|
1213
|
+
if (pn->format.has_value() && !check_format_by_id(key_sv, pn->format_id)) {
|
|
1214
|
+
errors.push_back({error_code::format_mismatch, path,
|
|
1215
|
+
"propertyNames: key does not match format: " + std::string(key_sv)});
|
|
1216
|
+
}
|
|
1217
|
+
}
|
|
1218
|
+
} else {
|
|
1219
|
+
// Fallback: parse key as JSON string element
|
|
1220
|
+
for (auto [key, val] : obj) {
|
|
1221
|
+
std::string key_json = "\"" + std::string(key) + "\"";
|
|
1222
|
+
auto key_result = tl_dom_key_parser().parse(key_json);
|
|
1223
|
+
if (!key_result.error()) {
|
|
1224
|
+
validate_node(pn, key_result.value(), path, ctx, errors, all_errors);
|
|
1225
|
+
}
|
|
1108
1226
|
}
|
|
1109
1227
|
}
|
|
1110
1228
|
}
|
|
@@ -1230,12 +1348,8 @@ static bool validate_fast(const schema_node_ptr& node,
|
|
|
1230
1348
|
}
|
|
1231
1349
|
|
|
1232
1350
|
// type
|
|
1233
|
-
if (
|
|
1234
|
-
|
|
1235
|
-
for (const auto& t : node->types) {
|
|
1236
|
-
if (type_matches(value, t)) { match = true; break; }
|
|
1237
|
-
}
|
|
1238
|
-
if (!match) [[unlikely]] return false;
|
|
1351
|
+
if (node->type_mask) {
|
|
1352
|
+
if (!type_matches_mask(value, node->type_mask)) [[unlikely]] return false;
|
|
1239
1353
|
}
|
|
1240
1354
|
|
|
1241
1355
|
// enum
|
|
@@ -1253,10 +1367,10 @@ static bool validate_fast(const schema_node_ptr& node,
|
|
|
1253
1367
|
if (canonical_json(value) != node->const_value_raw.value()) [[unlikely]] return false;
|
|
1254
1368
|
}
|
|
1255
1369
|
|
|
1256
|
-
auto
|
|
1370
|
+
auto vtype = value.type();
|
|
1257
1371
|
|
|
1258
1372
|
// Numeric
|
|
1259
|
-
if (
|
|
1373
|
+
if (vtype == et::INT64 || vtype == et::UINT64 || vtype == et::DOUBLE) {
|
|
1260
1374
|
double v = to_double(value);
|
|
1261
1375
|
if (node->minimum.has_value() && v < node->minimum.value()) return false;
|
|
1262
1376
|
if (node->maximum.has_value() && v > node->maximum.value()) return false;
|
|
@@ -1269,7 +1383,7 @@ static bool validate_fast(const schema_node_ptr& node,
|
|
|
1269
1383
|
}
|
|
1270
1384
|
|
|
1271
1385
|
// String
|
|
1272
|
-
if (
|
|
1386
|
+
if (vtype == et::STRING) {
|
|
1273
1387
|
std::string_view sv;
|
|
1274
1388
|
value.get(sv);
|
|
1275
1389
|
uint64_t len = utf8_length(sv);
|
|
@@ -1279,22 +1393,38 @@ static bool validate_fast(const schema_node_ptr& node,
|
|
|
1279
1393
|
if (!re2::RE2::PartialMatch(re2::StringPiece(sv.data(), sv.size()), *node->compiled_pattern))
|
|
1280
1394
|
return false;
|
|
1281
1395
|
}
|
|
1282
|
-
if (node->format.has_value() && !
|
|
1396
|
+
if (node->format.has_value() && !check_format_by_id(sv, node->format_id)) return false;
|
|
1283
1397
|
}
|
|
1284
1398
|
|
|
1285
1399
|
// Array
|
|
1286
|
-
if (
|
|
1400
|
+
if (vtype == et::ARRAY) {
|
|
1287
1401
|
dom::array arr; value.get(arr);
|
|
1288
|
-
uint64_t arr_size =
|
|
1289
|
-
|
|
1402
|
+
uint64_t arr_size = arr.size();
|
|
1403
|
+
if(arr_size == 0xFFFFFF) [[unlikely]] {
|
|
1404
|
+
// Fallback for large arrays where size() saturates — count manually to avoid overflow
|
|
1405
|
+
arr_size = 0;
|
|
1406
|
+
for ([[maybe_unused]] auto _ : arr) ++arr_size;
|
|
1407
|
+
}
|
|
1290
1408
|
|
|
1291
1409
|
if (node->min_items.has_value() && arr_size < node->min_items.value()) return false;
|
|
1292
1410
|
if (node->max_items.has_value() && arr_size > node->max_items.value()) return false;
|
|
1293
1411
|
|
|
1294
1412
|
if (node->unique_items) {
|
|
1295
|
-
|
|
1296
|
-
|
|
1297
|
-
|
|
1413
|
+
auto first_it = arr.begin();
|
|
1414
|
+
if (first_it != arr.end()) {
|
|
1415
|
+
auto first_type = (*first_it).type();
|
|
1416
|
+
bool all_same = true;
|
|
1417
|
+
for (auto item : arr) { if (item.type() != first_type) { all_same = false; break; } }
|
|
1418
|
+
if (all_same && first_type == et::STRING) {
|
|
1419
|
+
std::set<std::string_view> seen;
|
|
1420
|
+
for (auto item : arr) { std::string_view sv; item.get(sv); if (!seen.insert(sv).second) return false; }
|
|
1421
|
+
} else if (all_same && (first_type == et::INT64 || first_type == et::UINT64 || first_type == et::DOUBLE)) {
|
|
1422
|
+
std::set<double> seen;
|
|
1423
|
+
for (auto item : arr) { if (!seen.insert(to_double(item)).second) return false; }
|
|
1424
|
+
} else {
|
|
1425
|
+
std::set<std::string> seen;
|
|
1426
|
+
for (auto item : arr) { if (!seen.insert(canonical_json(item)).second) return false; }
|
|
1427
|
+
}
|
|
1298
1428
|
}
|
|
1299
1429
|
}
|
|
1300
1430
|
|
|
@@ -1321,7 +1451,7 @@ static bool validate_fast(const schema_node_ptr& node,
|
|
|
1321
1451
|
}
|
|
1322
1452
|
|
|
1323
1453
|
// Object
|
|
1324
|
-
if (
|
|
1454
|
+
if (vtype == et::OBJECT) {
|
|
1325
1455
|
dom::object obj; value.get(obj);
|
|
1326
1456
|
|
|
1327
1457
|
if (node->min_properties.has_value() || node->max_properties.has_value()) {
|
|
@@ -1438,19 +1568,27 @@ static void cg_compile(const schema_node* n, cg::plan& p,
|
|
|
1438
1568
|
return;
|
|
1439
1569
|
}
|
|
1440
1570
|
// Type
|
|
1441
|
-
if (
|
|
1442
|
-
|
|
1443
|
-
|
|
1444
|
-
|
|
1445
|
-
|
|
1446
|
-
|
|
1447
|
-
|
|
1448
|
-
|
|
1449
|
-
|
|
1450
|
-
|
|
1571
|
+
if (n->type_mask) {
|
|
1572
|
+
int popcount = __builtin_popcount(n->type_mask);
|
|
1573
|
+
if (popcount == 1) {
|
|
1574
|
+
// Single type — emit specific opcode
|
|
1575
|
+
for (int b = 0; b < 7; ++b) {
|
|
1576
|
+
if (n->type_mask & (1u << b)) {
|
|
1577
|
+
switch (static_cast<json_type>(b)) {
|
|
1578
|
+
case json_type::object: out.push_back({cg::op::EXPECT_OBJECT}); break;
|
|
1579
|
+
case json_type::array: out.push_back({cg::op::EXPECT_ARRAY}); break;
|
|
1580
|
+
case json_type::string: out.push_back({cg::op::EXPECT_STRING}); break;
|
|
1581
|
+
case json_type::number: out.push_back({cg::op::EXPECT_NUMBER}); break;
|
|
1582
|
+
case json_type::integer: out.push_back({cg::op::EXPECT_INTEGER}); break;
|
|
1583
|
+
case json_type::boolean: out.push_back({cg::op::EXPECT_BOOLEAN}); break;
|
|
1584
|
+
case json_type::null_value: out.push_back({cg::op::EXPECT_NULL}); break;
|
|
1585
|
+
}
|
|
1586
|
+
break;
|
|
1587
|
+
}
|
|
1588
|
+
}
|
|
1451
1589
|
} else {
|
|
1452
|
-
uint32_t i = (uint32_t)p.
|
|
1453
|
-
p.
|
|
1590
|
+
uint32_t i = (uint32_t)p.type_masks.size();
|
|
1591
|
+
p.type_masks.push_back(n->type_mask);
|
|
1454
1592
|
out.push_back({cg::op::EXPECT_TYPE_MULTI, i});
|
|
1455
1593
|
}
|
|
1456
1594
|
}
|
|
@@ -1480,13 +1618,7 @@ static void cg_compile(const schema_node* n, cg::plan& p,
|
|
|
1480
1618
|
if (n->compiled_pattern) { uint32_t i=(uint32_t)p.regexes.size(); p.regexes.push_back(n->compiled_pattern); out.push_back({cg::op::CHECK_PATTERN,i}); }
|
|
1481
1619
|
if (n->format.has_value()) {
|
|
1482
1620
|
uint32_t i=(uint32_t)p.format_ids.size();
|
|
1483
|
-
|
|
1484
|
-
auto& f=*n->format;
|
|
1485
|
-
if(f=="email")fid=0;else if(f=="date")fid=1;else if(f=="date-time")fid=2;
|
|
1486
|
-
else if(f=="time")fid=3;else if(f=="ipv4")fid=4;else if(f=="ipv6")fid=5;
|
|
1487
|
-
else if(f=="uri"||f=="uri-reference")fid=6;else if(f=="uuid")fid=7;
|
|
1488
|
-
else if(f=="hostname")fid=8;
|
|
1489
|
-
p.format_ids.push_back(fid);
|
|
1621
|
+
p.format_ids.push_back(n->format_id);
|
|
1490
1622
|
out.push_back({cg::op::CHECK_FORMAT,i});
|
|
1491
1623
|
}
|
|
1492
1624
|
// Array
|
|
@@ -1530,44 +1662,43 @@ static void cg_compile(const schema_node* n, cg::plan& p,
|
|
|
1530
1662
|
}
|
|
1531
1663
|
|
|
1532
1664
|
// --- Codegen executor ---
|
|
1533
|
-
static const char* fmt_names[]={"email","date","date-time","time","ipv4","ipv6","uri","uuid","hostname"};
|
|
1534
1665
|
|
|
1535
1666
|
static bool cg_exec(const cg::plan& p, const std::vector<cg::ins>& code,
|
|
1536
1667
|
dom::element value) {
|
|
1537
|
-
auto t =
|
|
1668
|
+
auto t = value.type();
|
|
1669
|
+
bool t_numeric = (t == et::INT64 || t == et::UINT64 || t == et::DOUBLE);
|
|
1670
|
+
double t_dval = t_numeric ? to_double(value) : 0.0;
|
|
1538
1671
|
for (size_t i=0; i<code.size(); ++i) {
|
|
1539
1672
|
auto& c = code[i];
|
|
1540
1673
|
switch(c.o) {
|
|
1541
1674
|
case cg::op::END: return true;
|
|
1542
|
-
case cg::op::EXPECT_OBJECT: if(t!=
|
|
1543
|
-
case cg::op::EXPECT_ARRAY: if(t!=
|
|
1544
|
-
case cg::op::EXPECT_STRING: if(t!=
|
|
1545
|
-
case cg::op::EXPECT_NUMBER: if(
|
|
1546
|
-
case cg::op::EXPECT_INTEGER: if(t!=
|
|
1547
|
-
case cg::op::EXPECT_BOOLEAN: if(t!=
|
|
1548
|
-
case cg::op::EXPECT_NULL: if(t!=
|
|
1675
|
+
case cg::op::EXPECT_OBJECT: if(t!=et::OBJECT) return false; break;
|
|
1676
|
+
case cg::op::EXPECT_ARRAY: if(t!=et::ARRAY) return false; break;
|
|
1677
|
+
case cg::op::EXPECT_STRING: if(t!=et::STRING) return false; break;
|
|
1678
|
+
case cg::op::EXPECT_NUMBER: if(!t_numeric) return false; break;
|
|
1679
|
+
case cg::op::EXPECT_INTEGER: if(t!=et::INT64&&t!=et::UINT64) return false; break;
|
|
1680
|
+
case cg::op::EXPECT_BOOLEAN: if(t!=et::BOOL) return false; break;
|
|
1681
|
+
case cg::op::EXPECT_NULL: if(t!=et::NULL_VALUE) return false; break;
|
|
1549
1682
|
case cg::op::EXPECT_TYPE_MULTI: {
|
|
1550
|
-
|
|
1551
|
-
|
|
1552
|
-
|
|
1553
|
-
|
|
1554
|
-
case cg::op::
|
|
1555
|
-
case cg::op::
|
|
1556
|
-
case cg::op::
|
|
1557
|
-
case cg::op::
|
|
1558
|
-
case cg::op::
|
|
1559
|
-
case cg::op::
|
|
1560
|
-
case cg::op::
|
|
1561
|
-
case cg::op::
|
|
1562
|
-
case cg::op::
|
|
1563
|
-
case cg::op::
|
|
1564
|
-
case cg::op::
|
|
1565
|
-
case cg::op::
|
|
1566
|
-
case cg::op::
|
|
1567
|
-
case cg::op::
|
|
1568
|
-
case cg::op::
|
|
1569
|
-
case cg::op::CHECK_MAX_PROPS: if(t=="object"){dom::object o;value.get(o);uint64_t n=0;for([[maybe_unused]]auto _:o)++n;if(n>c.a)return false;} break;
|
|
1570
|
-
case cg::op::OBJ_PROPS_START: if(t=="object"){
|
|
1683
|
+
if(!(element_type_mask(t) & p.type_masks[c.a])) return false; break;
|
|
1684
|
+
}
|
|
1685
|
+
case cg::op::CHECK_MINIMUM: if(t_numeric&&t_dval<p.doubles[c.a])return false; break;
|
|
1686
|
+
case cg::op::CHECK_MAXIMUM: if(t_numeric&&t_dval>p.doubles[c.a])return false; break;
|
|
1687
|
+
case cg::op::CHECK_EX_MINIMUM: if(t_numeric&&t_dval<=p.doubles[c.a])return false; break;
|
|
1688
|
+
case cg::op::CHECK_EX_MAXIMUM: if(t_numeric&&t_dval>=p.doubles[c.a])return false; break;
|
|
1689
|
+
case cg::op::CHECK_MULTIPLE_OF: if(t_numeric){double d=p.doubles[c.a],r=std::fmod(t_dval,d);if(std::abs(r)>1e-8&&std::abs(r-d)>1e-8)return false;} break;
|
|
1690
|
+
case cg::op::CHECK_MIN_LENGTH: if(t==et::STRING){std::string_view sv;value.get(sv);if(utf8_length(sv)<c.a)return false;} break;
|
|
1691
|
+
case cg::op::CHECK_MAX_LENGTH: if(t==et::STRING){std::string_view sv;value.get(sv);if(utf8_length(sv)>c.a)return false;} break;
|
|
1692
|
+
case cg::op::CHECK_PATTERN: if(t==et::STRING){std::string_view sv;value.get(sv);if(!re2::RE2::PartialMatch(re2::StringPiece(sv.data(),sv.size()),*p.regexes[c.a]))return false;} break;
|
|
1693
|
+
case cg::op::CHECK_FORMAT: if(t==et::STRING){std::string_view sv;value.get(sv);if(!check_format_by_id(sv,p.format_ids[c.a]))return false;} break;
|
|
1694
|
+
case cg::op::CHECK_MIN_ITEMS: if(t==et::ARRAY){dom::array a;value.get(a);uint64_t s=0;for([[maybe_unused]]auto _:a)++s;if(s<c.a)return false;} break;
|
|
1695
|
+
case cg::op::CHECK_MAX_ITEMS: if(t==et::ARRAY){dom::array a;value.get(a);uint64_t s=0;for([[maybe_unused]]auto _:a)++s;if(s>c.a)return false;} break;
|
|
1696
|
+
case cg::op::CHECK_UNIQUE_ITEMS: if(t==et::ARRAY){dom::array a;value.get(a);std::set<std::string> seen;for(auto x:a)if(!seen.insert(canonical_json(x)).second)return false;} break;
|
|
1697
|
+
case cg::op::ARRAY_ITEMS: if(t==et::ARRAY){dom::array a;value.get(a);for(auto x:a)if(!cg_exec(p,p.subs[c.a],x))return false;} break;
|
|
1698
|
+
case cg::op::CHECK_REQUIRED: if(t==et::OBJECT){dom::object o;value.get(o);dom::element d;if(o[p.strings[c.a]].get(d)!=SUCCESS)return false;} break;
|
|
1699
|
+
case cg::op::CHECK_MIN_PROPS: if(t==et::OBJECT){dom::object o;value.get(o);uint64_t n=0;for([[maybe_unused]]auto _:o)++n;if(n<c.a)return false;} break;
|
|
1700
|
+
case cg::op::CHECK_MAX_PROPS: if(t==et::OBJECT){dom::object o;value.get(o);uint64_t n=0;for([[maybe_unused]]auto _:o)++n;if(n>c.a)return false;} break;
|
|
1701
|
+
case cg::op::OBJ_PROPS_START: if(t==et::OBJECT){
|
|
1571
1702
|
dom::object o; value.get(o);
|
|
1572
1703
|
// collect prop defs
|
|
1573
1704
|
struct pd{std::string_view nm;uint32_t si;};
|
|
@@ -1587,13 +1718,13 @@ static bool cg_exec(const cg::plan& p, const std::vector<cg::ins>& code,
|
|
|
1587
1718
|
case cg::op::OBJ_PROP: case cg::op::OBJ_PROPS_END: case cg::op::CHECK_NO_ADDITIONAL: break;
|
|
1588
1719
|
case cg::op::CHECK_ENUM_STR: {
|
|
1589
1720
|
auto& es=p.enum_sets[c.a]; bool f=false;
|
|
1590
|
-
if(t==
|
|
1721
|
+
if(t==et::STRING){std::string_view sv;value.get(sv);for(auto& e:es)if(e.size()==sv.size()+2&&e[0]=='"'&&e.back()=='"'&&e.compare(1,sv.size(),sv)==0){f=true;break;}}
|
|
1591
1722
|
if(!f){std::string v=canonical_json(value);for(auto& e:es)if(e==v){f=true;break;}}
|
|
1592
1723
|
if(!f)return false; break;
|
|
1593
1724
|
}
|
|
1594
1725
|
case cg::op::CHECK_ENUM: {
|
|
1595
1726
|
auto& es=p.enum_sets[c.a]; bool f=false;
|
|
1596
|
-
if(t==
|
|
1727
|
+
if(t==et::STRING){std::string_view sv;value.get(sv);for(auto& e:es)if(e.size()==sv.size()+2&&e[0]=='"'&&e.back()=='"'&&e.compare(1,sv.size(),sv)==0){f=true;break;}}
|
|
1597
1728
|
if(!f&&value.is<int64_t>()){int64_t v;value.get(v);auto s=std::to_string(v);for(auto& e:es)if(e==s){f=true;break;}}
|
|
1598
1729
|
if(!f){std::string v=canonical_json(value);for(auto& e:es)if(e==v){f=true;break;}}
|
|
1599
1730
|
if(!f)return false; break;
|
|
@@ -1609,51 +1740,53 @@ static bool cg_exec(const cg::plan& p, const std::vector<cg::ins>& code,
|
|
|
1609
1740
|
// Uses simdjson On Demand API to avoid materializing the full DOM tree.
|
|
1610
1741
|
// Returns: true = valid, false = invalid OR unsupported (fallback to DOM).
|
|
1611
1742
|
|
|
1612
|
-
static
|
|
1743
|
+
static json_type od_type(simdjson::ondemand::value& v) {
|
|
1613
1744
|
switch (v.type()) {
|
|
1614
|
-
case simdjson::ondemand::json_type::object: return
|
|
1615
|
-
case simdjson::ondemand::json_type::array: return
|
|
1616
|
-
case simdjson::ondemand::json_type::string: return
|
|
1617
|
-
case simdjson::ondemand::json_type::boolean: return
|
|
1618
|
-
case simdjson::ondemand::json_type::null: return
|
|
1745
|
+
case simdjson::ondemand::json_type::object: return json_type::object;
|
|
1746
|
+
case simdjson::ondemand::json_type::array: return json_type::array;
|
|
1747
|
+
case simdjson::ondemand::json_type::string: return json_type::string;
|
|
1748
|
+
case simdjson::ondemand::json_type::boolean: return json_type::boolean;
|
|
1749
|
+
case simdjson::ondemand::json_type::null: return json_type::null_value;
|
|
1619
1750
|
case simdjson::ondemand::json_type::number: {
|
|
1620
1751
|
simdjson::ondemand::number_type nt;
|
|
1621
1752
|
if (v.get_number_type().get(nt) == SUCCESS &&
|
|
1622
1753
|
nt == simdjson::ondemand::number_type::floating_point_number)
|
|
1623
|
-
return
|
|
1624
|
-
return
|
|
1754
|
+
return json_type::number;
|
|
1755
|
+
return json_type::integer;
|
|
1625
1756
|
}
|
|
1626
1757
|
}
|
|
1627
|
-
return
|
|
1758
|
+
return json_type::string;
|
|
1628
1759
|
}
|
|
1629
1760
|
|
|
1630
1761
|
static bool od_exec(const cg::plan& p, const std::vector<cg::ins>& code,
|
|
1631
1762
|
simdjson::ondemand::value value) {
|
|
1632
1763
|
auto t = od_type(value);
|
|
1764
|
+
bool t_numeric = (t == json_type::integer || t == json_type::number);
|
|
1633
1765
|
for (size_t i = 0; i < code.size(); ++i) {
|
|
1634
1766
|
auto& c = code[i];
|
|
1635
1767
|
switch (c.o) {
|
|
1636
1768
|
case cg::op::END: return true;
|
|
1637
|
-
case cg::op::EXPECT_OBJECT: if(t!=
|
|
1638
|
-
case cg::op::EXPECT_ARRAY: if(t!=
|
|
1639
|
-
case cg::op::EXPECT_STRING: if(t!=
|
|
1640
|
-
case cg::op::EXPECT_NUMBER: if(
|
|
1641
|
-
case cg::op::EXPECT_INTEGER: if(t!=
|
|
1642
|
-
case cg::op::EXPECT_BOOLEAN: if(t!=
|
|
1643
|
-
case cg::op::EXPECT_NULL: if(t!=
|
|
1769
|
+
case cg::op::EXPECT_OBJECT: if(t!=json_type::object) return false; break;
|
|
1770
|
+
case cg::op::EXPECT_ARRAY: if(t!=json_type::array) return false; break;
|
|
1771
|
+
case cg::op::EXPECT_STRING: if(t!=json_type::string) return false; break;
|
|
1772
|
+
case cg::op::EXPECT_NUMBER: if(!t_numeric) return false; break;
|
|
1773
|
+
case cg::op::EXPECT_INTEGER: if(t!=json_type::integer) return false; break;
|
|
1774
|
+
case cg::op::EXPECT_BOOLEAN: if(t!=json_type::boolean) return false; break;
|
|
1775
|
+
case cg::op::EXPECT_NULL: if(t!=json_type::null_value) return false; break;
|
|
1644
1776
|
case cg::op::EXPECT_TYPE_MULTI: {
|
|
1645
|
-
|
|
1646
|
-
|
|
1647
|
-
if(
|
|
1777
|
+
// integer matches both "integer" and "number" type constraints
|
|
1778
|
+
uint8_t tbits = json_type_bit(t);
|
|
1779
|
+
if (t == json_type::integer) tbits |= json_type_bit(json_type::number);
|
|
1780
|
+
if(!(tbits & p.type_masks[c.a])) return false; break;
|
|
1648
1781
|
}
|
|
1649
1782
|
case cg::op::CHECK_MINIMUM:
|
|
1650
1783
|
case cg::op::CHECK_MAXIMUM:
|
|
1651
1784
|
case cg::op::CHECK_EX_MINIMUM:
|
|
1652
1785
|
case cg::op::CHECK_EX_MAXIMUM:
|
|
1653
1786
|
case cg::op::CHECK_MULTIPLE_OF: {
|
|
1654
|
-
if (
|
|
1787
|
+
if (t_numeric) {
|
|
1655
1788
|
double v;
|
|
1656
|
-
if (t==
|
|
1789
|
+
if (t==json_type::integer) { int64_t iv; if(value.get(iv)!=SUCCESS) return false; v=(double)iv; }
|
|
1657
1790
|
else { if(value.get(v)!=SUCCESS) return false; }
|
|
1658
1791
|
double d=p.doubles[c.a];
|
|
1659
1792
|
if(c.o==cg::op::CHECK_MINIMUM && v<d) return false;
|
|
@@ -1664,39 +1797,39 @@ static bool od_exec(const cg::plan& p, const std::vector<cg::ins>& code,
|
|
|
1664
1797
|
}
|
|
1665
1798
|
break;
|
|
1666
1799
|
}
|
|
1667
|
-
case cg::op::CHECK_MIN_LENGTH: if(t==
|
|
1668
|
-
case cg::op::CHECK_MAX_LENGTH: if(t==
|
|
1669
|
-
case cg::op::CHECK_PATTERN: if(t==
|
|
1670
|
-
case cg::op::CHECK_FORMAT: if(t==
|
|
1671
|
-
case cg::op::CHECK_MIN_ITEMS: if(t==
|
|
1800
|
+
case cg::op::CHECK_MIN_LENGTH: if(t==json_type::string){std::string_view sv; if(value.get(sv)!=SUCCESS) return false; if(utf8_length(sv)<c.a) return false;} break;
|
|
1801
|
+
case cg::op::CHECK_MAX_LENGTH: if(t==json_type::string){std::string_view sv; if(value.get(sv)!=SUCCESS) return false; if(utf8_length(sv)>c.a) return false;} break;
|
|
1802
|
+
case cg::op::CHECK_PATTERN: if(t==json_type::string){std::string_view sv; if(value.get(sv)!=SUCCESS) return false; if(!re2::RE2::PartialMatch(re2::StringPiece(sv.data(),sv.size()),*p.regexes[c.a]))return false;} break;
|
|
1803
|
+
case cg::op::CHECK_FORMAT: if(t==json_type::string){std::string_view sv; if(value.get(sv)!=SUCCESS) return false; if(!check_format_by_id(sv,p.format_ids[c.a]))return false;} break;
|
|
1804
|
+
case cg::op::CHECK_MIN_ITEMS: if(t==json_type::array){
|
|
1672
1805
|
simdjson::ondemand::array a; if(value.get(a)!=SUCCESS) return false;
|
|
1673
1806
|
uint64_t s=0; for(auto x:a){(void)x;++s;} if(s<c.a) return false;
|
|
1674
1807
|
} break;
|
|
1675
|
-
case cg::op::CHECK_MAX_ITEMS: if(t==
|
|
1808
|
+
case cg::op::CHECK_MAX_ITEMS: if(t==json_type::array){
|
|
1676
1809
|
simdjson::ondemand::array a; if(value.get(a)!=SUCCESS) return false;
|
|
1677
1810
|
uint64_t s=0; for(auto x:a){(void)x;++s;} if(s>c.a) return false;
|
|
1678
1811
|
} break;
|
|
1679
|
-
case cg::op::ARRAY_ITEMS: if(t==
|
|
1812
|
+
case cg::op::ARRAY_ITEMS: if(t==json_type::array){
|
|
1680
1813
|
simdjson::ondemand::array a; if(value.get(a)!=SUCCESS) return false;
|
|
1681
1814
|
for(auto elem:a){
|
|
1682
1815
|
simdjson::ondemand::value v; if(elem.get(v)!=SUCCESS) return false;
|
|
1683
1816
|
if(!od_exec(p,p.subs[c.a],v)) return false;
|
|
1684
1817
|
}
|
|
1685
1818
|
} break;
|
|
1686
|
-
case cg::op::CHECK_REQUIRED: if(t==
|
|
1819
|
+
case cg::op::CHECK_REQUIRED: if(t==json_type::object){
|
|
1687
1820
|
simdjson::ondemand::object o; if(value.get(o)!=SUCCESS) return false;
|
|
1688
1821
|
auto f = o.find_field_unordered(p.strings[c.a]);
|
|
1689
1822
|
if(f.error()) return false;
|
|
1690
1823
|
} break;
|
|
1691
|
-
case cg::op::CHECK_MIN_PROPS: if(t==
|
|
1824
|
+
case cg::op::CHECK_MIN_PROPS: if(t==json_type::object){
|
|
1692
1825
|
simdjson::ondemand::object o; if(value.get(o)!=SUCCESS) return false;
|
|
1693
1826
|
uint64_t n=0; for(auto f:o){(void)f;++n;} if(n<c.a) return false;
|
|
1694
1827
|
} break;
|
|
1695
|
-
case cg::op::CHECK_MAX_PROPS: if(t==
|
|
1828
|
+
case cg::op::CHECK_MAX_PROPS: if(t==json_type::object){
|
|
1696
1829
|
simdjson::ondemand::object o; if(value.get(o)!=SUCCESS) return false;
|
|
1697
1830
|
uint64_t n=0; for(auto f:o){(void)f;++n;} if(n>c.a) return false;
|
|
1698
1831
|
} break;
|
|
1699
|
-
case cg::op::OBJ_PROPS_START: if(t==
|
|
1832
|
+
case cg::op::OBJ_PROPS_START: if(t==json_type::object){
|
|
1700
1833
|
simdjson::ondemand::object o; if(value.get(o)!=SUCCESS) return false;
|
|
1701
1834
|
struct pd{std::string_view nm;uint32_t si;};
|
|
1702
1835
|
std::vector<pd> props; bool no_add=false;
|
|
@@ -1849,10 +1982,9 @@ validation_result validate(const schema_ref& schema, std::string_view json,
|
|
|
1849
1982
|
// Codegen said invalid OR hit COMPOSITION — fall through to tree walker
|
|
1850
1983
|
}
|
|
1851
1984
|
|
|
1852
|
-
// Slow path:
|
|
1853
|
-
auto result2 = dom_p.parse(psv);
|
|
1985
|
+
// Slow path: tree walker with error details (reuse already-parsed DOM)
|
|
1854
1986
|
std::vector<validation_error> errors;
|
|
1855
|
-
validate_node(schema.impl->root,
|
|
1987
|
+
validate_node(schema.impl->root, result.value(), "", *schema.impl, errors,
|
|
1856
1988
|
opts.all_errors);
|
|
1857
1989
|
|
|
1858
1990
|
return {errors.empty(), std::move(errors)};
|