ata-validator 0.4.8 → 0.4.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -28,7 +28,7 @@ using schema_node_ptr = std::shared_ptr<schema_node>;
28
28
 
29
29
  // MUST match layout in src/ata.cpp exactly (reinterpret_cast)
30
30
  struct schema_node {
31
- std::vector<std::string> types;
31
+ uint8_t type_mask = 0;
32
32
 
33
33
  std::optional<double> minimum;
34
34
  std::optional<double> maximum;
@@ -67,11 +67,11 @@ struct schema_node {
67
67
  };
68
68
  std::vector<pattern_prop> pattern_properties;
69
69
 
70
- std::optional<std::string> enum_values_raw;
71
70
  std::vector<std::string> enum_values_minified;
72
71
  std::optional<std::string> const_value_raw;
73
72
 
74
73
  std::optional<std::string> format;
74
+ uint8_t format_id = 255;
75
75
 
76
76
  std::vector<schema_node_ptr> all_of;
77
77
  std::vector<schema_node_ptr> any_of;
@@ -413,46 +413,39 @@ static void validate_napi(const schema_node_ptr& node,
413
413
 
414
414
  auto actual_type = napi_type_of(value);
415
415
 
416
- // type
417
- if (!node->types.empty()) {
418
- bool match = false;
419
- for (const auto& t : node->types) {
420
- if (napi_type_matches(value, t)) {
421
- match = true;
422
- break;
423
- }
424
- }
425
- if (!match) {
416
+ // type — uses bitmask matching ata.cpp json_type enum order:
417
+ // 0=string, 1=number, 2=integer, 3=boolean, 4=null_value, 5=object, 6=array
418
+ if (node->type_mask) {
419
+ uint8_t val_bits = 0;
420
+ if (actual_type == "string") val_bits = 1u << 0;
421
+ else if (actual_type == "number") val_bits = 1u << 1;
422
+ else if (actual_type == "integer") val_bits = (1u << 2) | (1u << 1); // integer matches number
423
+ else if (actual_type == "boolean") val_bits = 1u << 3;
424
+ else if (actual_type == "null") val_bits = 1u << 4;
425
+ else if (actual_type == "object") val_bits = 1u << 5;
426
+ else if (actual_type == "array") val_bits = 1u << 6;
427
+ if (!(val_bits & node->type_mask)) {
428
+ static const char* type_names[] = {"string","number","integer","boolean","null","object","array"};
426
429
  std::string expected;
427
- for (size_t i = 0; i < node->types.size(); ++i) {
428
- if (i > 0) expected += ", ";
429
- expected += node->types[i];
430
+ for (int b = 0; b < 7; ++b) {
431
+ if (node->type_mask & (1u << b)) {
432
+ if (!expected.empty()) expected += ", ";
433
+ expected += type_names[b];
434
+ }
430
435
  }
431
436
  errors.push_back({ata::error_code::type_mismatch, path,
432
437
  "expected type " + expected + ", got " + actual_type});
433
438
  }
434
439
  }
435
440
 
436
- // enum
437
- if (node->enum_values_raw.has_value()) {
441
+ // enum — compare against pre-minified canonical values
442
+ if (!node->enum_values_minified.empty()) {
438
443
  std::string val_json = napi_to_json(env, value);
439
- // Parse enum from raw and compare
440
444
  bool found = false;
441
- // We need to compare against each element in the enum array
442
- // The enum_values_raw is a JSON array string like [1,2,3]
443
- // We'll use JSON.parse in JS to handle this
444
- auto json_obj = env.Global().Get("JSON").As<Napi::Object>();
445
- auto parse_fn = json_obj.Get("parse").As<Napi::Function>();
446
- auto enum_arr = parse_fn.Call(json_obj,
447
- {Napi::String::New(env, node->enum_values_raw.value())});
448
- if (enum_arr.IsArray()) {
449
- auto arr = enum_arr.As<Napi::Array>();
450
- for (uint32_t i = 0; i < arr.Length(); ++i) {
451
- std::string elem_json = napi_to_json(env, arr.Get(i));
452
- if (elem_json == val_json) {
453
- found = true;
454
- break;
455
- }
445
+ for (const auto& ev : node->enum_values_minified) {
446
+ if (ev == val_json) {
447
+ found = true;
448
+ break;
456
449
  }
457
450
  }
458
451
  if (!found) {
@@ -1048,7 +1041,7 @@ static ThreadPool& pool() {
1048
1041
 
1049
1042
  // --- Fast Validation Registry ---
1050
1043
  // Global schema slots for V8 Fast API (bypasses NAPI overhead)
1051
- static constexpr size_t MAX_FAST_SLOTS = 256;
1044
+ static constexpr size_t MAX_FAST_SLOTS = 4096;
1052
1045
  static ata::schema_ref g_fast_schemas[MAX_FAST_SLOTS];
1053
1046
  static std::string g_fast_schema_jsons[MAX_FAST_SLOTS];
1054
1047
  static uint32_t g_fast_slot_count = 0;
package/binding.gyp CHANGED
@@ -11,10 +11,10 @@
11
11
  "<!@(node -p \"require('node-addon-api').include\")",
12
12
  "include",
13
13
  "deps/simdjson",
14
- "<!@(node -e \"var p=process.platform,a=process.arch;if(p==='darwin'){console.log(a==='arm64'?'/opt/homebrew/opt/re2/include':'/usr/local/opt/re2/include');console.log(a==='arm64'?'/opt/homebrew/opt/abseil/include':'/usr/local/opt/abseil/include')}else{console.log('/usr/include')}\")"
14
+ "<!@(node -e \"var p=process.platform,a=process.arch;if(p==='darwin'){console.log(a==='arm64'?'/opt/homebrew/opt/re2/include':'/usr/local/opt/re2/include');console.log(a==='arm64'?'/opt/homebrew/opt/abseil/include':'/usr/local/opt/abseil/include');console.log(a==='arm64'?'/opt/homebrew/opt/mimalloc/include':'/usr/local/opt/mimalloc/include')}else{console.log('/usr/include')}\")"
15
15
  ],
16
16
  "libraries": [
17
- "<!@(node -e \"var p=process.platform,a=process.arch;if(p==='darwin'){var pre=a==='arm64'?'/opt/homebrew/opt/re2':'/usr/local/opt/re2';console.log('-L'+pre+'/lib -lre2')}else{console.log('-lre2')}\")"
17
+ "<!@(node -e \"var p=process.platform,a=process.arch;if(p==='darwin'){var pre=a==='arm64'?'/opt/homebrew/opt/re2':'/usr/local/opt/re2';var mi=a==='arm64'?'/opt/homebrew/opt/mimalloc':'/usr/local/opt/mimalloc';console.log('-L'+pre+'/lib -lre2 -L'+mi+'/lib -lmimalloc')}else{console.log('-lre2')}\")"
18
18
  ],
19
19
  "dependencies": [
20
20
  "<!(node -p \"require('node-addon-api').gyp\")"
package/index.js CHANGED
@@ -211,6 +211,7 @@ class Validator {
211
211
  // Pure JS fast path — no NAPI, runs in V8 JIT
212
212
  // Set ATA_FORCE_NAPI=1 to disable JS codegen (for correctness testing)
213
213
  const schemaObj = typeof schema === "string" ? JSON.parse(schema) : schema;
214
+ this._schemaObj = schemaObj;
214
215
  const jsFn = process.env.ATA_FORCE_NAPI
215
216
  ? null
216
217
  : (compileToJSCodegen(schemaObj) || compileToJS(schemaObj));
@@ -330,6 +331,107 @@ class Validator {
330
331
  });
331
332
  }
332
333
 
334
+ // --- Standalone pre-compilation ---
335
+ // Generate a JS module string that can be written to a file.
336
+ // On next startup, load with Validator.fromStandalone() — zero compile time.
337
+ toStandalone() {
338
+ const jsFn = this._jsFn;
339
+ if (!jsFn || !jsFn._source) return null;
340
+ const src = jsFn._source;
341
+ const hybridSrc = jsFn._hybridSource || '';
342
+
343
+ // Also capture error function source for zero-compile standalone load
344
+ const jsErrFn = compileToJSCodegenWithErrors(
345
+ typeof this._schemaObj === 'object' ? this._schemaObj : {}
346
+ );
347
+ const errSrc = jsErrFn && jsErrFn._errSource ? jsErrFn._errSource : '';
348
+
349
+ return `// Auto-generated by ata-validator — do not edit
350
+ 'use strict';
351
+ const boolFn = function(d) {
352
+ ${src}
353
+ };
354
+ const hybridFactory = function(R, E) {
355
+ return function(d) {
356
+ ${hybridSrc}
357
+ };
358
+ };
359
+ ${errSrc ? `const errFn = function(d, _all) {\n ${errSrc}\n};` : 'const errFn = null;'}
360
+ module.exports = { boolFn, hybridFactory, errFn };
361
+ `;
362
+ }
363
+
364
+ // Load a pre-compiled standalone module. Zero schema compilation.
365
+ // No NAPI, no native compile — pure JS. Startup in microseconds.
366
+ // Usage: const v = Validator.fromStandalone(require('./compiled.js'), schema, opts)
367
+ static fromStandalone(mod, schema, opts) {
368
+ const options = opts || {};
369
+ const schemaObj = typeof schema === "string" ? JSON.parse(schema) : schema;
370
+
371
+ // Create a lightweight instance — skip NAPI compile entirely
372
+ const v = Object.create(Validator.prototype);
373
+ v._jsFn = mod.boolFn;
374
+ v._compiled = null;
375
+ v._fastSlot = -1;
376
+
377
+ // Mutators
378
+ const applyDefaults = buildDefaultsApplier(schemaObj);
379
+ const applyCoerce = options.coerceTypes ? buildCoercer(schemaObj) : null;
380
+ const applyRemove = options.removeAdditional ? buildRemover(schemaObj) : null;
381
+ const mutators = [applyRemove, applyCoerce, applyDefaults].filter(Boolean);
382
+ const preprocess = mutators.length === 0 ? null
383
+ : mutators.length === 1 ? mutators[0]
384
+ : (data) => { for (let i = 0; i < mutators.length; i++) mutators[i](data); };
385
+ v._preprocess = preprocess;
386
+
387
+ // Error function — use pre-compiled from standalone if available, else compile
388
+ let errFn = (d) => ({ valid: false, errors: [{ code: 'validation_failed', path: '', message: 'validation failed' }] });
389
+ if (mod.errFn) {
390
+ errFn = (d) => mod.errFn(d, true);
391
+ } else {
392
+ const jsErrFn = compileToJSCodegenWithErrors(schemaObj);
393
+ if (jsErrFn) {
394
+ try { jsErrFn({}, true); errFn = (d) => jsErrFn(d, true); } catch {}
395
+ }
396
+ }
397
+
398
+ // Hybrid or speculative
399
+ const hybridFn = mod.hybridFactory
400
+ ? mod.hybridFactory(VALID_RESULT, errFn)
401
+ : null;
402
+
403
+ v.validate = hybridFn
404
+ ? (preprocess ? (data) => { preprocess(data); return hybridFn(data); } : hybridFn)
405
+ : (preprocess
406
+ ? (data) => { preprocess(data); return mod.boolFn(data) ? VALID_RESULT : errFn(data); }
407
+ : (data) => mod.boolFn(data) ? VALID_RESULT : errFn(data));
408
+ v.isValidObject = mod.boolFn;
409
+ v.isValidJSON = (jsonStr) => {
410
+ try { return mod.boolFn(JSON.parse(jsonStr)); } catch { return false; }
411
+ };
412
+ v.validateJSON = (jsonStr) => {
413
+ try {
414
+ const obj = JSON.parse(jsonStr);
415
+ return hybridFn ? hybridFn(obj) : (mod.boolFn(obj) ? VALID_RESULT : errFn(obj));
416
+ } catch { return { valid: false, errors: [{ code: 'invalid_json', path: '', message: 'invalid JSON' }] }; }
417
+ };
418
+
419
+ // Standard Schema V1
420
+ Object.defineProperty(v, "~standard", {
421
+ value: Object.freeze({
422
+ version: 1, vendor: "ata-validator",
423
+ validate(value) {
424
+ const result = v.validate(value);
425
+ if (result.valid) return { value };
426
+ return { issues: result.errors.map(e => ({ message: e.message, path: parsePointerPath(e.path) })) };
427
+ },
428
+ }),
429
+ writable: false, enumerable: false, configurable: false,
430
+ });
431
+
432
+ return v;
433
+ }
434
+
333
435
  // Fallback methods — only used when JS codegen is unavailable
334
436
  validate(data) {
335
437
  if (this._preprocess) this._preprocess(data);
@@ -384,4 +486,106 @@ function version() {
384
486
  return native.version();
385
487
  }
386
488
 
489
+ // Bundle multiple validators into a single JS file for fast startup.
490
+ // Usage:
491
+ // const bundle = Validator.bundle([schema1, schema2, ...]);
492
+ // fs.writeFileSync('validators.js', bundle);
493
+ // // On startup:
494
+ // const validators = Validator.loadBundle(require('./validators.js'), [schema1, schema2, ...]);
495
+ Validator.bundle = function(schemas, opts) {
496
+ const parts = schemas.map(schema => {
497
+ const v = new Validator(schema, opts);
498
+ const standalone = v.toStandalone();
499
+ if (!standalone) return 'null';
500
+ return '(function(){' + standalone.replace("'use strict';", '').replace('module.exports = ', 'return ') + '})()';
501
+ });
502
+ return "'use strict';\nmodule.exports = [\n" + parts.join(',\n') + '\n];\n';
503
+ };
504
+
505
+ // Zero-dependency self-contained bundle — no require('ata-validator') needed at runtime.
506
+ Validator.bundleStandalone = function(schemas, opts) {
507
+ const R = "Object.freeze({valid:true,errors:Object.freeze([])})";
508
+ const fns = schemas.map(schema => {
509
+ const v = new Validator(schema, opts);
510
+ const jsFn = v._jsFn;
511
+ if (!jsFn || !jsFn._hybridSource) return 'null';
512
+ const jsErrFn = compileToJSCodegenWithErrors(
513
+ typeof schema === 'string' ? JSON.parse(schema) : schema
514
+ );
515
+ const errBody = jsErrFn && jsErrFn._errSource
516
+ ? jsErrFn._errSource
517
+ : "return{valid:false,errors:[{code:'error',path:'',message:'validation failed'}]}";
518
+ return `(function(R){var E=function(d){var _all=true;${errBody}};return function(d){${jsFn._hybridSource}}})(R)`;
519
+ });
520
+ return `'use strict';\nvar R=${R};\nmodule.exports=[${fns.join(',')}];\n`;
521
+ };
522
+
523
+ // Compact bundle: deduplicated code. Shared template functions + per-schema params.
524
+ // Much smaller file → faster V8 parse → faster startup.
525
+ Validator.bundleCompact = function(schemas, opts) {
526
+ // Analyze schemas and group by structure
527
+ const entries = schemas.map(schema => {
528
+ const v = new Validator(schema, opts);
529
+ const jsFn = v._jsFn;
530
+ if (!jsFn || !jsFn._hybridSource) return null;
531
+ const jsErrFn = compileToJSCodegenWithErrors(
532
+ typeof schema === 'string' ? JSON.parse(schema) : schema
533
+ );
534
+ return {
535
+ hybrid: jsFn._hybridSource,
536
+ err: jsErrFn && jsErrFn._errSource ? jsErrFn._errSource : null,
537
+ };
538
+ });
539
+
540
+ // Deduplicate function bodies — many schemas produce identical or near-identical code
541
+ const bodyMap = new Map(); // body → index
542
+ const bodies = [];
543
+ const errMap = new Map();
544
+ const errBodies = [];
545
+
546
+ const indices = entries.map(e => {
547
+ if (!e) return [-1, -1];
548
+ let hi = bodyMap.get(e.hybrid);
549
+ if (hi === undefined) { hi = bodies.length; bodies.push(e.hybrid); bodyMap.set(e.hybrid, hi); }
550
+ let ei = -1;
551
+ if (e.err) {
552
+ ei = errMap.get(e.err);
553
+ if (ei === undefined) { ei = errBodies.length; errBodies.push(e.err); errMap.set(e.err, ei); }
554
+ }
555
+ return [hi, ei];
556
+ });
557
+
558
+ // Generate compact bundle
559
+ let out = "'use strict';\n";
560
+ out += "var R=Object.freeze({valid:true,errors:Object.freeze([])});\n";
561
+
562
+ // Shared hybrid factories
563
+ out += "var H=[\n";
564
+ out += bodies.map(b => `function(R,E){return function(d){${b}}}`).join(',\n');
565
+ out += "\n];\n";
566
+
567
+ // Shared error functions
568
+ out += "var EF=[\n";
569
+ out += errBodies.map(b => `function(d){var _all=true;${b}}`).join(',\n');
570
+ out += "\n];\n";
571
+
572
+ // Build validators from shared templates
573
+ out += "module.exports=[";
574
+ out += indices.map(([hi, ei]) => {
575
+ if (hi < 0) return 'null';
576
+ if (ei >= 0) return `H[${hi}](R,EF[${ei}])`;
577
+ return `H[${hi}](R,function(){return{valid:false,errors:[]}})`;
578
+ }).join(',');
579
+ out += "];\n";
580
+
581
+ return out;
582
+ };
583
+
584
+ Validator.loadBundle = function(mods, schemas, opts) {
585
+ return schemas.map((schema, i) => {
586
+ if (mods[i]) return Validator.fromStandalone(mods[i], schema, opts);
587
+ return new Validator(schema, opts);
588
+ });
589
+ };
590
+
387
591
  module.exports = { Validator, validate, version, createPaddedBuffer, SIMDJSON_PADDING };
@@ -531,14 +531,16 @@ function compileToJSCodegen(schema) {
531
531
  const boolFn = new Function('d', body)
532
532
 
533
533
  // Build hybrid: same body, return R instead of true, return E(d) instead of false.
534
- // V8 optimizes this identically to jsFn — E(d) is dead code on valid path.
535
- // 83M ops/sec vs 26M for combined. Invalid path: 34M vs 6M.
536
534
  const hybridBody = replaceTopLevel(helperStr + checkStr + '\n return R')
537
535
  try {
538
536
  const factory = new Function('R', 'E', `return function(d){${hybridBody}}`)
539
537
  boolFn._hybridFactory = factory
540
538
  } catch {}
541
539
 
540
+ // Store source for standalone compilation (pre-build to file)
541
+ boolFn._source = body
542
+ boolFn._hybridSource = hybridBody
543
+
542
544
  return boolFn
543
545
  } catch {
544
546
  return null
@@ -940,7 +942,9 @@ function compileToJSCodegenWithErrors(schema) {
940
942
  lines.join('\n ') +
941
943
  `\n return{valid:_e.length===0,errors:_e}`
942
944
  try {
943
- return new Function('d', '_all', body)
945
+ const fn = new Function('d', '_all', body)
946
+ fn._errSource = body
947
+ return fn
944
948
  } catch {
945
949
  return null
946
950
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ata-validator",
3
- "version": "0.4.8",
3
+ "version": "0.4.10",
4
4
  "description": "Ultra-fast JSON Schema validator. Beats ajv on every valid-path benchmark: 1.1x–2.7x faster validate(obj), 151x faster compilation, 5.9x faster parallel batch. Speculative validation with V8-optimized JS codegen, simdjson, multi-core. Standard Schema V1 compatible.",
5
5
  "main": "index.js",
6
6
  "types": "index.d.ts",
package/src/ata.cpp CHANGED
@@ -1,5 +1,10 @@
1
1
  #include "ata.h"
2
2
 
3
+ // mimalloc: faster new/delete for small allocations.
4
+ #if __has_include(<mimalloc-new-delete.h>)
5
+ #include <mimalloc-new-delete.h>
6
+ #endif
7
+
3
8
  #include <algorithm>
4
9
  #include <cmath>
5
10
  #include <re2/re2.h>
@@ -123,17 +128,20 @@ static bool fast_check_hostname(std::string_view s) {
123
128
  return label_len > 0;
124
129
  }
125
130
 
126
- static bool check_format(std::string_view sv, const std::string& fmt) {
127
- if (fmt == "email") return fast_check_email(sv);
128
- if (fmt == "date") return fast_check_date(sv);
129
- if (fmt == "date-time") return fast_check_datetime(sv);
130
- if (fmt == "time") return fast_check_time(sv);
131
- if (fmt == "ipv4") return fast_check_ipv4(sv);
132
- if (fmt == "ipv6") return sv.find(':') != std::string_view::npos;
133
- if (fmt == "uri" || fmt == "uri-reference") return fast_check_uri(sv);
134
- if (fmt == "uuid") return fast_check_uuid(sv);
135
- if (fmt == "hostname") return fast_check_hostname(sv);
136
- return true; // unknown formats pass
131
+ // Check format by pre-resolved numeric ID — no string comparisons.
132
+ static bool check_format_by_id(std::string_view sv, uint8_t fid) {
133
+ switch (fid) {
134
+ case 0: return fast_check_email(sv);
135
+ case 1: return fast_check_date(sv);
136
+ case 2: return fast_check_datetime(sv);
137
+ case 3: return fast_check_time(sv);
138
+ case 4: return fast_check_ipv4(sv);
139
+ case 5: return sv.find(':') != std::string_view::npos;
140
+ case 6: return fast_check_uri(sv);
141
+ case 7: return fast_check_uuid(sv);
142
+ case 8: return fast_check_hostname(sv);
143
+ default: return true; // unknown formats pass
144
+ }
137
145
  }
138
146
 
139
147
  namespace ata {
@@ -177,14 +185,74 @@ static std::string canonical_json(dom::element el) {
177
185
  }
178
186
  }
179
187
 
188
+ // JSON Schema type enum — avoids string comparisons on the hot path.
189
+ enum class json_type : uint8_t {
190
+ string, number, integer, boolean, null_value, object, array
191
+ };
192
+
193
+ static json_type json_type_from_sv(std::string_view s) {
194
+ if (s == "string") return json_type::string;
195
+ if (s == "number") return json_type::number;
196
+ if (s == "integer") return json_type::integer;
197
+ if (s == "boolean") return json_type::boolean;
198
+ if (s == "null") return json_type::null_value;
199
+ if (s == "object") return json_type::object;
200
+ if (s == "array") return json_type::array;
201
+ return json_type::string; // fallback
202
+ }
203
+
204
+ static const char* json_type_name(json_type t) {
205
+ switch (t) {
206
+ case json_type::string: return "string";
207
+ case json_type::number: return "number";
208
+ case json_type::integer: return "integer";
209
+ case json_type::boolean: return "boolean";
210
+ case json_type::null_value: return "null";
211
+ case json_type::object: return "object";
212
+ case json_type::array: return "array";
213
+ }
214
+ return "unknown";
215
+ }
216
+
217
+ // Bitmask for O(1) type checking: one bit per json_type value.
218
+ static uint8_t json_type_bit(json_type t) { return 1u << static_cast<uint8_t>(t); }
219
+
220
+ // Map dom::element_type to a json_type bitmask (number matches integer too).
221
+ static uint8_t element_type_mask(dom::element_type t) {
222
+ switch (t) {
223
+ case dom::element_type::STRING: return json_type_bit(json_type::string);
224
+ case dom::element_type::INT64:
225
+ case dom::element_type::UINT64: return json_type_bit(json_type::integer) | json_type_bit(json_type::number);
226
+ case dom::element_type::DOUBLE: return json_type_bit(json_type::number);
227
+ case dom::element_type::BOOL: return json_type_bit(json_type::boolean);
228
+ case dom::element_type::NULL_VALUE: return json_type_bit(json_type::null_value);
229
+ case dom::element_type::ARRAY: return json_type_bit(json_type::array);
230
+ case dom::element_type::OBJECT: return json_type_bit(json_type::object);
231
+ }
232
+ return 0;
233
+ }
234
+
235
+ // Resolve format string to numeric ID at compile time.
236
+ static uint8_t format_id_from_string(const std::string& f) {
237
+ if (f == "email") return 0;
238
+ if (f == "date") return 1;
239
+ if (f == "date-time") return 2;
240
+ if (f == "time") return 3;
241
+ if (f == "ipv4") return 4;
242
+ if (f == "ipv6") return 5;
243
+ if (f == "uri" || f == "uri-reference") return 6;
244
+ if (f == "uuid") return 7;
245
+ if (f == "hostname") return 8;
246
+ return 255;
247
+ }
248
+
180
249
  // Forward declarations
181
250
  struct schema_node;
182
251
  using schema_node_ptr = std::shared_ptr<schema_node>;
183
252
 
184
253
  struct schema_node {
185
- // type constraint: "string", "number", "integer", "boolean", "null",
186
- // "object", "array"
187
- std::vector<std::string> types;
254
+ // type constraint bitmask for O(1) type checking
255
+ uint8_t type_mask = 0; // bit per json_type value
188
256
 
189
257
  // numeric
190
258
  std::optional<double> minimum;
@@ -229,12 +297,12 @@ struct schema_node {
229
297
  std::vector<pattern_prop> pattern_properties;
230
298
 
231
299
  // enum / const
232
- std::optional<std::string> enum_values_raw; // raw JSON array string
233
300
  std::vector<std::string> enum_values_minified; // pre-minified enum values
234
301
  std::optional<std::string> const_value_raw; // raw JSON value string
235
302
 
236
303
  // format
237
304
  std::optional<std::string> format;
305
+ uint8_t format_id = 255; // pre-resolved format ID (255 = unknown/pass)
238
306
 
239
307
  // composition
240
308
  std::vector<schema_node_ptr> all_of;
@@ -276,7 +344,7 @@ struct plan {
276
344
  std::vector<std::string> strings;
277
345
  std::vector<std::shared_ptr<re2::RE2>> regexes;
278
346
  std::vector<std::vector<std::string>> enum_sets;
279
- std::vector<std::vector<std::string>> type_sets;
347
+ std::vector<uint8_t> type_masks;
280
348
  std::vector<uint8_t> format_ids;
281
349
  std::vector<std::vector<ins>> subs;
282
350
  };
@@ -297,6 +365,10 @@ static dom::parser& tl_dom_parser() {
297
365
  thread_local dom::parser p;
298
366
  return p;
299
367
  }
368
+ static dom::parser& tl_dom_key_parser() {
369
+ thread_local dom::parser p;
370
+ return p;
371
+ }
300
372
  static simdjson::ondemand::parser& tl_od_parser() {
301
373
  thread_local simdjson::ondemand::parser p;
302
374
  return p;
@@ -341,12 +413,12 @@ static schema_node_ptr compile_node(dom::element el,
341
413
  if (type_el.is<std::string_view>()) {
342
414
  std::string_view sv;
343
415
  type_el.get(sv);
344
- node->types.emplace_back(sv);
416
+ node->type_mask |= json_type_bit(json_type_from_sv(sv));
345
417
  } else if (type_el.is<dom::array>()) {
346
418
  dom::array type_arr; type_el.get(type_arr); for (auto t : type_arr) {
347
419
  std::string_view sv;
348
420
  if (t.get(sv) == SUCCESS) {
349
- node->types.emplace_back(sv);
421
+ node->type_mask |= json_type_bit(json_type_from_sv(sv));
350
422
  }
351
423
  }
352
424
  }
@@ -526,7 +598,10 @@ static schema_node_ptr compile_node(dom::element el,
526
598
  dom::element fmt_el;
527
599
  if (obj["format"].get(fmt_el) == SUCCESS) {
528
600
  std::string_view sv;
529
- if (fmt_el.get(sv) == SUCCESS) node->format = std::string(sv);
601
+ if (fmt_el.get(sv) == SUCCESS) {
602
+ node->format = std::string(sv);
603
+ node->format_id = format_id_from_string(node->format.value());
604
+ }
530
605
  }
531
606
 
532
607
  // $id (register in defs for potential resolution)
@@ -541,7 +616,6 @@ static schema_node_ptr compile_node(dom::element el,
541
616
  // enum — pre-minify each value at compile time
542
617
  dom::element enum_el;
543
618
  if (obj["enum"].get(enum_el) == SUCCESS) {
544
- node->enum_values_raw = canonical_json(enum_el);
545
619
  if (enum_el.is<dom::array>()) {
546
620
  dom::array enum_arr; enum_el.get(enum_arr); for (auto e : enum_arr) {
547
621
  node->enum_values_minified.push_back(canonical_json(e));
@@ -635,41 +709,37 @@ static bool validate_fast(const schema_node_ptr& node,
635
709
  // Macro for early termination
636
710
  #define ATA_CHECK_EARLY() if (!all_errors && !errors.empty()) return
637
711
 
712
+ using et = dom::element_type;
713
+
714
+
638
715
  // Use string_view to avoid allocations in hot path
639
716
  static std::string_view type_of_sv(dom::element el) {
640
717
  switch (el.type()) {
641
- case dom::element_type::STRING: return "string";
642
- case dom::element_type::INT64:
643
- case dom::element_type::UINT64: return "integer";
644
- case dom::element_type::DOUBLE: return "number";
645
- case dom::element_type::BOOL: return "boolean";
646
- case dom::element_type::NULL_VALUE:return "null";
647
- case dom::element_type::ARRAY: return "array";
648
- case dom::element_type::OBJECT: return "object";
718
+ case et::STRING: return "string";
719
+ case et::INT64:
720
+ case et::UINT64: return "integer";
721
+ case et::DOUBLE: return "number";
722
+ case et::BOOL: return "boolean";
723
+ case et::NULL_VALUE:return "null";
724
+ case et::ARRAY: return "array";
725
+ case et::OBJECT: return "object";
649
726
  }
650
727
  return "unknown";
651
728
  }
652
729
 
653
- static std::string type_of(dom::element el) {
654
- return std::string(type_of_sv(el));
655
- }
656
730
 
657
- static bool type_matches(dom::element el, const std::string& type) {
658
- auto actual = type_of_sv(el);
659
- if (actual == type) return true;
660
- if (type == "number" && (actual == "integer" || actual == "number"))
661
- return true;
662
- return false;
731
+ // O(1) type check: test element's type bits against the schema's type_mask.
732
+ static bool type_matches_mask(dom::element el, uint8_t type_mask) {
733
+ return (element_type_mask(el.type()) & type_mask) != 0;
663
734
  }
664
735
 
665
736
  static double to_double(dom::element el) {
666
- double v = 0;
667
- if (el.get(v) == SUCCESS) return v;
668
- int64_t i = 0;
669
- if (el.get(i) == SUCCESS) return static_cast<double>(i);
670
- uint64_t u = 0;
671
- if (el.get(u) == SUCCESS) return static_cast<double>(u);
672
- return 0;
737
+ switch (el.type()) {
738
+ case et::DOUBLE: { double v; el.get(v); return v; }
739
+ case et::INT64: { int64_t v; el.get(v); return static_cast<double>(v); }
740
+ case et::UINT64: { uint64_t v; el.get(v); return static_cast<double>(v); }
741
+ default: return 0;
742
+ }
673
743
  }
674
744
 
675
745
  // Count UTF-8 codepoints — branchless: count non-continuation bytes
@@ -838,22 +908,17 @@ static void validate_node(const schema_node_ptr& node,
838
908
  }
839
909
 
840
910
  // type
841
- if (!node->types.empty()) {
842
- bool match = false;
843
- for (const auto& t : node->types) {
844
- if (type_matches(value, t)) {
845
- match = true;
846
- break;
847
- }
848
- }
849
- if (!match) {
911
+ if (node->type_mask) {
912
+ if (!type_matches_mask(value, node->type_mask)) {
850
913
  std::string expected;
851
- for (size_t i = 0; i < node->types.size(); ++i) {
852
- if (i > 0) expected += ", ";
853
- expected += node->types[i];
914
+ for (int b = 0; b < 7; ++b) {
915
+ if (node->type_mask & (1u << b)) {
916
+ if (!expected.empty()) expected += ", ";
917
+ expected += json_type_name(static_cast<json_type>(b));
918
+ }
854
919
  }
855
920
  errors.push_back({error_code::type_mismatch, path,
856
- "expected type " + expected + ", got " + type_of(value)});
921
+ "expected type " + expected + ", got " + std::string(type_of_sv(value))});
857
922
  ATA_CHECK_EARLY();
858
923
  }
859
924
  }
@@ -886,8 +951,8 @@ static void validate_node(const schema_node_ptr& node,
886
951
 
887
952
  ATA_CHECK_EARLY();
888
953
  // Numeric validations
889
- auto actual_type = type_of(value);
890
- if (actual_type == "integer" || actual_type == "number") {
954
+ auto vtype = value.type();
955
+ if (vtype == et::INT64 || vtype == et::UINT64 || vtype == et::DOUBLE) {
891
956
  double v = to_double(value);
892
957
  if (node->minimum.has_value() && v < node->minimum.value()) {
893
958
  errors.push_back({error_code::minimum_violation, path,
@@ -924,7 +989,7 @@ static void validate_node(const schema_node_ptr& node,
924
989
  }
925
990
 
926
991
  // String validations
927
- if (actual_type == "string") {
992
+ if (vtype == et::STRING) {
928
993
  std::string_view sv;
929
994
  value.get(sv);
930
995
  uint64_t len = utf8_length(sv);
@@ -950,7 +1015,7 @@ static void validate_node(const schema_node_ptr& node,
950
1015
  }
951
1016
 
952
1017
  if (node->format.has_value()) {
953
- if (!check_format(sv, node->format.value())) {
1018
+ if (!check_format_by_id(sv, node->format_id)) {
954
1019
  errors.push_back({error_code::format_mismatch, path,
955
1020
  "string does not match format: " +
956
1021
  node->format.value()});
@@ -959,10 +1024,14 @@ static void validate_node(const schema_node_ptr& node,
959
1024
  }
960
1025
 
961
1026
  // Array validations
962
- if (actual_type == "array" && value.is<dom::array>()) {
1027
+ if (vtype == et::ARRAY) {
963
1028
  dom::array arr; value.get(arr);
964
- uint64_t arr_size = 0;
965
- for ([[maybe_unused]] auto _ : arr) ++arr_size;
1029
+ uint64_t arr_size = arr.size();
1030
+ if(arr_size == 0xFFFFFF) [[unlikely]] {
1031
+ // Fallback for large arrays where size() saturates — count manually to avoid overflow
1032
+ arr_size = 0;
1033
+ for ([[maybe_unused]] auto _ : arr) ++arr_size;
1034
+ }
966
1035
 
967
1036
  if (node->min_items.has_value() && arr_size < node->min_items.value()) {
968
1037
  errors.push_back({error_code::min_items_violation, path,
@@ -978,13 +1047,29 @@ static void validate_node(const schema_node_ptr& node,
978
1047
  }
979
1048
 
980
1049
  if (node->unique_items) {
981
- std::set<std::string> seen;
982
1050
  bool has_dup = false;
983
- for (auto item : arr) {
984
- auto s = canonical_json(item);
985
- if (!seen.insert(s).second) {
986
- has_dup = true;
987
- break;
1051
+ // Fast path: check if all items are the same simple type
1052
+ auto first_it = arr.begin();
1053
+ if (first_it != arr.end()) {
1054
+ auto first_type = (*first_it).type();
1055
+ bool all_same = true;
1056
+ for (auto item : arr) { if (item.type() != first_type) { all_same = false; break; } }
1057
+ if (all_same && first_type == et::STRING) {
1058
+ std::set<std::string_view> seen;
1059
+ for (auto item : arr) {
1060
+ std::string_view sv; item.get(sv);
1061
+ if (!seen.insert(sv).second) { has_dup = true; break; }
1062
+ }
1063
+ } else if (all_same && (first_type == et::INT64 || first_type == et::UINT64 || first_type == et::DOUBLE)) {
1064
+ std::set<double> seen;
1065
+ for (auto item : arr) {
1066
+ if (!seen.insert(to_double(item)).second) { has_dup = true; break; }
1067
+ }
1068
+ } else {
1069
+ std::set<std::string> seen;
1070
+ for (auto item : arr) {
1071
+ if (!seen.insert(canonical_json(item)).second) { has_dup = true; break; }
1072
+ }
988
1073
  }
989
1074
  }
990
1075
  if (has_dup) {
@@ -1012,9 +1097,7 @@ static void validate_node(const schema_node_ptr& node,
1012
1097
  if (node->contains_schema) {
1013
1098
  uint64_t match_count = 0;
1014
1099
  for (auto item : arr) {
1015
- std::vector<validation_error> tmp;
1016
- validate_node(node->contains_schema, item, path, ctx, tmp, false);
1017
- if (tmp.empty()) ++match_count;
1100
+ if (validate_fast(node->contains_schema, item, ctx)) ++match_count;
1018
1101
  }
1019
1102
  uint64_t min_c = node->min_contains.value_or(1);
1020
1103
  uint64_t max_c = node->max_contains.value_or(arr_size);
@@ -1032,24 +1115,26 @@ static void validate_node(const schema_node_ptr& node,
1032
1115
  }
1033
1116
 
1034
1117
  // Object validations
1035
- if (actual_type == "object" && value.is<dom::object>()) {
1118
+ if (vtype == et::OBJECT) {
1036
1119
  dom::object obj; value.get(obj);
1037
- uint64_t prop_count = 0;
1038
- for ([[maybe_unused]] auto _ : obj) ++prop_count;
1039
1120
 
1040
- if (node->min_properties.has_value() &&
1041
- prop_count < node->min_properties.value()) {
1042
- errors.push_back({error_code::min_properties_violation, path,
1043
- "object has " + std::to_string(prop_count) +
1044
- " properties, minimum " +
1045
- std::to_string(node->min_properties.value())});
1046
- }
1047
- if (node->max_properties.has_value() &&
1048
- prop_count > node->max_properties.value()) {
1049
- errors.push_back({error_code::max_properties_violation, path,
1050
- "object has " + std::to_string(prop_count) +
1051
- " properties, maximum " +
1052
- std::to_string(node->max_properties.value())});
1121
+ if (node->min_properties.has_value() || node->max_properties.has_value()) {
1122
+ uint64_t prop_count = 0;
1123
+ for ([[maybe_unused]] auto _ : obj) ++prop_count;
1124
+ if (node->min_properties.has_value() &&
1125
+ prop_count < node->min_properties.value()) {
1126
+ errors.push_back({error_code::min_properties_violation, path,
1127
+ "object has " + std::to_string(prop_count) +
1128
+ " properties, minimum " +
1129
+ std::to_string(node->min_properties.value())});
1130
+ }
1131
+ if (node->max_properties.has_value() &&
1132
+ prop_count > node->max_properties.value()) {
1133
+ errors.push_back({error_code::max_properties_violation, path,
1134
+ "object has " + std::to_string(prop_count) +
1135
+ " properties, maximum " +
1136
+ std::to_string(node->max_properties.value())});
1137
+ }
1053
1138
  }
1054
1139
 
1055
1140
  // required
@@ -1094,17 +1179,50 @@ static void validate_node(const schema_node_ptr& node,
1094
1179
  }
1095
1180
  }
1096
1181
  }
1097
-
1098
- // propertyNames
1182
+ // propertyNames — validate key as string directly when possible
1099
1183
  if (node->property_names_schema) {
1100
- for (auto [key, val] : obj) {
1101
- // Create a string element to validate the key
1102
- std::string key_json = "\"" + std::string(key) + "\"";
1103
- dom::parser key_parser;
1104
- auto key_result = key_parser.parse(key_json);
1105
- if (!key_result.error()) {
1106
- validate_node(node->property_names_schema, key_result.value(),
1107
- path, ctx, errors, all_errors);
1184
+ auto pn = node->property_names_schema;
1185
+ bool string_only = pn->ref.empty() && pn->all_of.empty() &&
1186
+ pn->any_of.empty() && pn->one_of.empty() && !pn->not_schema &&
1187
+ !pn->if_schema && pn->enum_values_minified.empty() &&
1188
+ !pn->const_value_raw.has_value();
1189
+ if (string_only) {
1190
+ // Fast path: validate string constraints on key directly
1191
+ for (auto [key, val] : obj) {
1192
+ std::string_view key_sv(key);
1193
+ if (pn->type_mask && !(pn->type_mask & json_type_bit(json_type::string))) {
1194
+ errors.push_back({error_code::type_mismatch, path,
1195
+ "propertyNames: key is string but schema requires different type"});
1196
+ continue;
1197
+ }
1198
+ uint64_t len = utf8_length(key_sv);
1199
+ if (pn->min_length.has_value() && len < pn->min_length.value()) {
1200
+ errors.push_back({error_code::min_length_violation, path,
1201
+ "propertyNames: key too short: " + std::string(key_sv)});
1202
+ }
1203
+ if (pn->max_length.has_value() && len > pn->max_length.value()) {
1204
+ errors.push_back({error_code::max_length_violation, path,
1205
+ "propertyNames: key too long: " + std::string(key_sv)});
1206
+ }
1207
+ if (pn->compiled_pattern) {
1208
+ if (!re2::RE2::PartialMatch(re2::StringPiece(key_sv.data(), key_sv.size()), *pn->compiled_pattern)) {
1209
+ errors.push_back({error_code::pattern_mismatch, path,
1210
+ "propertyNames: key does not match pattern: " + std::string(key_sv)});
1211
+ }
1212
+ }
1213
+ if (pn->format.has_value() && !check_format_by_id(key_sv, pn->format_id)) {
1214
+ errors.push_back({error_code::format_mismatch, path,
1215
+ "propertyNames: key does not match format: " + std::string(key_sv)});
1216
+ }
1217
+ }
1218
+ } else {
1219
+ // Fallback: parse key as JSON string element
1220
+ for (auto [key, val] : obj) {
1221
+ std::string key_json = "\"" + std::string(key) + "\"";
1222
+ auto key_result = tl_dom_key_parser().parse(key_json);
1223
+ if (!key_result.error()) {
1224
+ validate_node(pn, key_result.value(), path, ctx, errors, all_errors);
1225
+ }
1108
1226
  }
1109
1227
  }
1110
1228
  }
@@ -1230,12 +1348,8 @@ static bool validate_fast(const schema_node_ptr& node,
1230
1348
  }
1231
1349
 
1232
1350
  // type
1233
- if (!node->types.empty()) {
1234
- bool match = false;
1235
- for (const auto& t : node->types) {
1236
- if (type_matches(value, t)) { match = true; break; }
1237
- }
1238
- if (!match) [[unlikely]] return false;
1351
+ if (node->type_mask) {
1352
+ if (!type_matches_mask(value, node->type_mask)) [[unlikely]] return false;
1239
1353
  }
1240
1354
 
1241
1355
  // enum
@@ -1253,10 +1367,10 @@ static bool validate_fast(const schema_node_ptr& node,
1253
1367
  if (canonical_json(value) != node->const_value_raw.value()) [[unlikely]] return false;
1254
1368
  }
1255
1369
 
1256
- auto actual_type = type_of_sv(value);
1370
+ auto vtype = value.type();
1257
1371
 
1258
1372
  // Numeric
1259
- if (actual_type == "integer" || actual_type == "number") {
1373
+ if (vtype == et::INT64 || vtype == et::UINT64 || vtype == et::DOUBLE) {
1260
1374
  double v = to_double(value);
1261
1375
  if (node->minimum.has_value() && v < node->minimum.value()) return false;
1262
1376
  if (node->maximum.has_value() && v > node->maximum.value()) return false;
@@ -1269,7 +1383,7 @@ static bool validate_fast(const schema_node_ptr& node,
1269
1383
  }
1270
1384
 
1271
1385
  // String
1272
- if (actual_type == "string") {
1386
+ if (vtype == et::STRING) {
1273
1387
  std::string_view sv;
1274
1388
  value.get(sv);
1275
1389
  uint64_t len = utf8_length(sv);
@@ -1279,22 +1393,38 @@ static bool validate_fast(const schema_node_ptr& node,
1279
1393
  if (!re2::RE2::PartialMatch(re2::StringPiece(sv.data(), sv.size()), *node->compiled_pattern))
1280
1394
  return false;
1281
1395
  }
1282
- if (node->format.has_value() && !check_format(sv, node->format.value())) return false;
1396
+ if (node->format.has_value() && !check_format_by_id(sv, node->format_id)) return false;
1283
1397
  }
1284
1398
 
1285
1399
  // Array
1286
- if (actual_type == "array" && value.is<dom::array>()) {
1400
+ if (vtype == et::ARRAY) {
1287
1401
  dom::array arr; value.get(arr);
1288
- uint64_t arr_size = 0;
1289
- for ([[maybe_unused]] auto _ : arr) ++arr_size;
1402
+ uint64_t arr_size = arr.size();
1403
+ if(arr_size == 0xFFFFFF) [[unlikely]] {
1404
+ // Fallback for large arrays where size() saturates — count manually to avoid overflow
1405
+ arr_size = 0;
1406
+ for ([[maybe_unused]] auto _ : arr) ++arr_size;
1407
+ }
1290
1408
 
1291
1409
  if (node->min_items.has_value() && arr_size < node->min_items.value()) return false;
1292
1410
  if (node->max_items.has_value() && arr_size > node->max_items.value()) return false;
1293
1411
 
1294
1412
  if (node->unique_items) {
1295
- std::set<std::string> seen;
1296
- for (auto item : arr) {
1297
- if (!seen.insert(canonical_json(item)).second) return false;
1413
+ auto first_it = arr.begin();
1414
+ if (first_it != arr.end()) {
1415
+ auto first_type = (*first_it).type();
1416
+ bool all_same = true;
1417
+ for (auto item : arr) { if (item.type() != first_type) { all_same = false; break; } }
1418
+ if (all_same && first_type == et::STRING) {
1419
+ std::set<std::string_view> seen;
1420
+ for (auto item : arr) { std::string_view sv; item.get(sv); if (!seen.insert(sv).second) return false; }
1421
+ } else if (all_same && (first_type == et::INT64 || first_type == et::UINT64 || first_type == et::DOUBLE)) {
1422
+ std::set<double> seen;
1423
+ for (auto item : arr) { if (!seen.insert(to_double(item)).second) return false; }
1424
+ } else {
1425
+ std::set<std::string> seen;
1426
+ for (auto item : arr) { if (!seen.insert(canonical_json(item)).second) return false; }
1427
+ }
1298
1428
  }
1299
1429
  }
1300
1430
 
@@ -1321,7 +1451,7 @@ static bool validate_fast(const schema_node_ptr& node,
1321
1451
  }
1322
1452
 
1323
1453
  // Object
1324
- if (actual_type == "object" && value.is<dom::object>()) {
1454
+ if (vtype == et::OBJECT) {
1325
1455
  dom::object obj; value.get(obj);
1326
1456
 
1327
1457
  if (node->min_properties.has_value() || node->max_properties.has_value()) {
@@ -1438,19 +1568,27 @@ static void cg_compile(const schema_node* n, cg::plan& p,
1438
1568
  return;
1439
1569
  }
1440
1570
  // Type
1441
- if (!n->types.empty()) {
1442
- if (n->types.size() == 1) {
1443
- auto& t = n->types[0];
1444
- if (t=="object") out.push_back({cg::op::EXPECT_OBJECT});
1445
- else if (t=="array") out.push_back({cg::op::EXPECT_ARRAY});
1446
- else if (t=="string") out.push_back({cg::op::EXPECT_STRING});
1447
- else if (t=="number") out.push_back({cg::op::EXPECT_NUMBER});
1448
- else if (t=="integer") out.push_back({cg::op::EXPECT_INTEGER});
1449
- else if (t=="boolean") out.push_back({cg::op::EXPECT_BOOLEAN});
1450
- else if (t=="null") out.push_back({cg::op::EXPECT_NULL});
1571
+ if (n->type_mask) {
1572
+ int popcount = __builtin_popcount(n->type_mask);
1573
+ if (popcount == 1) {
1574
+ // Single type — emit specific opcode
1575
+ for (int b = 0; b < 7; ++b) {
1576
+ if (n->type_mask & (1u << b)) {
1577
+ switch (static_cast<json_type>(b)) {
1578
+ case json_type::object: out.push_back({cg::op::EXPECT_OBJECT}); break;
1579
+ case json_type::array: out.push_back({cg::op::EXPECT_ARRAY}); break;
1580
+ case json_type::string: out.push_back({cg::op::EXPECT_STRING}); break;
1581
+ case json_type::number: out.push_back({cg::op::EXPECT_NUMBER}); break;
1582
+ case json_type::integer: out.push_back({cg::op::EXPECT_INTEGER}); break;
1583
+ case json_type::boolean: out.push_back({cg::op::EXPECT_BOOLEAN}); break;
1584
+ case json_type::null_value: out.push_back({cg::op::EXPECT_NULL}); break;
1585
+ }
1586
+ break;
1587
+ }
1588
+ }
1451
1589
  } else {
1452
- uint32_t i = (uint32_t)p.type_sets.size();
1453
- p.type_sets.push_back(n->types);
1590
+ uint32_t i = (uint32_t)p.type_masks.size();
1591
+ p.type_masks.push_back(n->type_mask);
1454
1592
  out.push_back({cg::op::EXPECT_TYPE_MULTI, i});
1455
1593
  }
1456
1594
  }
@@ -1480,13 +1618,7 @@ static void cg_compile(const schema_node* n, cg::plan& p,
1480
1618
  if (n->compiled_pattern) { uint32_t i=(uint32_t)p.regexes.size(); p.regexes.push_back(n->compiled_pattern); out.push_back({cg::op::CHECK_PATTERN,i}); }
1481
1619
  if (n->format.has_value()) {
1482
1620
  uint32_t i=(uint32_t)p.format_ids.size();
1483
- uint8_t fid=255;
1484
- auto& f=*n->format;
1485
- if(f=="email")fid=0;else if(f=="date")fid=1;else if(f=="date-time")fid=2;
1486
- else if(f=="time")fid=3;else if(f=="ipv4")fid=4;else if(f=="ipv6")fid=5;
1487
- else if(f=="uri"||f=="uri-reference")fid=6;else if(f=="uuid")fid=7;
1488
- else if(f=="hostname")fid=8;
1489
- p.format_ids.push_back(fid);
1621
+ p.format_ids.push_back(n->format_id);
1490
1622
  out.push_back({cg::op::CHECK_FORMAT,i});
1491
1623
  }
1492
1624
  // Array
@@ -1530,44 +1662,43 @@ static void cg_compile(const schema_node* n, cg::plan& p,
1530
1662
  }
1531
1663
 
1532
1664
  // --- Codegen executor ---
1533
- static const char* fmt_names[]={"email","date","date-time","time","ipv4","ipv6","uri","uuid","hostname"};
1534
1665
 
1535
1666
  static bool cg_exec(const cg::plan& p, const std::vector<cg::ins>& code,
1536
1667
  dom::element value) {
1537
- auto t = type_of_sv(value);
1668
+ auto t = value.type();
1669
+ bool t_numeric = (t == et::INT64 || t == et::UINT64 || t == et::DOUBLE);
1670
+ double t_dval = t_numeric ? to_double(value) : 0.0;
1538
1671
  for (size_t i=0; i<code.size(); ++i) {
1539
1672
  auto& c = code[i];
1540
1673
  switch(c.o) {
1541
1674
  case cg::op::END: return true;
1542
- case cg::op::EXPECT_OBJECT: if(t!="object") return false; break;
1543
- case cg::op::EXPECT_ARRAY: if(t!="array") return false; break;
1544
- case cg::op::EXPECT_STRING: if(t!="string") return false; break;
1545
- case cg::op::EXPECT_NUMBER: if(t!="number"&&t!="integer") return false; break;
1546
- case cg::op::EXPECT_INTEGER: if(t!="integer") return false; break;
1547
- case cg::op::EXPECT_BOOLEAN: if(t!="boolean") return false; break;
1548
- case cg::op::EXPECT_NULL: if(t!="null") return false; break;
1675
+ case cg::op::EXPECT_OBJECT: if(t!=et::OBJECT) return false; break;
1676
+ case cg::op::EXPECT_ARRAY: if(t!=et::ARRAY) return false; break;
1677
+ case cg::op::EXPECT_STRING: if(t!=et::STRING) return false; break;
1678
+ case cg::op::EXPECT_NUMBER: if(!t_numeric) return false; break;
1679
+ case cg::op::EXPECT_INTEGER: if(t!=et::INT64&&t!=et::UINT64) return false; break;
1680
+ case cg::op::EXPECT_BOOLEAN: if(t!=et::BOOL) return false; break;
1681
+ case cg::op::EXPECT_NULL: if(t!=et::NULL_VALUE) return false; break;
1549
1682
  case cg::op::EXPECT_TYPE_MULTI: {
1550
- auto& ts=p.type_sets[c.a]; bool m=false;
1551
- for(auto& ty:ts){if(t==ty||(ty=="number"&&(t=="integer"||t=="number"))){m=true;break;}}
1552
- if(!m) return false; break;
1553
- }
1554
- case cg::op::CHECK_MINIMUM: if(t=="integer"||t=="number"){if(to_double(value)<p.doubles[c.a])return false;} break;
1555
- case cg::op::CHECK_MAXIMUM: if(t=="integer"||t=="number"){if(to_double(value)>p.doubles[c.a])return false;} break;
1556
- case cg::op::CHECK_EX_MINIMUM: if(t=="integer"||t=="number"){if(to_double(value)<=p.doubles[c.a])return false;} break;
1557
- case cg::op::CHECK_EX_MAXIMUM: if(t=="integer"||t=="number"){if(to_double(value)>=p.doubles[c.a])return false;} break;
1558
- case cg::op::CHECK_MULTIPLE_OF: if(t=="integer"||t=="number"){double v=to_double(value),d=p.doubles[c.a],r=std::fmod(v,d);if(std::abs(r)>1e-8&&std::abs(r-d)>1e-8)return false;} break;
1559
- case cg::op::CHECK_MIN_LENGTH: if(t=="string"){std::string_view sv;value.get(sv);if(utf8_length(sv)<c.a)return false;} break;
1560
- case cg::op::CHECK_MAX_LENGTH: if(t=="string"){std::string_view sv;value.get(sv);if(utf8_length(sv)>c.a)return false;} break;
1561
- case cg::op::CHECK_PATTERN: if(t=="string"){std::string_view sv;value.get(sv);if(!re2::RE2::PartialMatch(re2::StringPiece(sv.data(),sv.size()),*p.regexes[c.a]))return false;} break;
1562
- case cg::op::CHECK_FORMAT: if(t=="string"){std::string_view sv;value.get(sv);uint8_t f=p.format_ids[c.a];if(f<9&&!check_format(sv,fmt_names[f]))return false;} break;
1563
- case cg::op::CHECK_MIN_ITEMS: if(t=="array"){dom::array a;value.get(a);uint64_t s=0;for([[maybe_unused]]auto _:a)++s;if(s<c.a)return false;} break;
1564
- case cg::op::CHECK_MAX_ITEMS: if(t=="array"){dom::array a;value.get(a);uint64_t s=0;for([[maybe_unused]]auto _:a)++s;if(s>c.a)return false;} break;
1565
- case cg::op::CHECK_UNIQUE_ITEMS: if(t=="array"){dom::array a;value.get(a);std::set<std::string> seen;for(auto x:a)if(!seen.insert(canonical_json(x)).second)return false;} break;
1566
- case cg::op::ARRAY_ITEMS: if(t=="array"){dom::array a;value.get(a);for(auto x:a)if(!cg_exec(p,p.subs[c.a],x))return false;} break;
1567
- case cg::op::CHECK_REQUIRED: if(t=="object"){dom::object o;value.get(o);dom::element d;if(o[p.strings[c.a]].get(d)!=SUCCESS)return false;} break;
1568
- case cg::op::CHECK_MIN_PROPS: if(t=="object"){dom::object o;value.get(o);uint64_t n=0;for([[maybe_unused]]auto _:o)++n;if(n<c.a)return false;} break;
1569
- case cg::op::CHECK_MAX_PROPS: if(t=="object"){dom::object o;value.get(o);uint64_t n=0;for([[maybe_unused]]auto _:o)++n;if(n>c.a)return false;} break;
1570
- case cg::op::OBJ_PROPS_START: if(t=="object"){
1683
+ if(!(element_type_mask(t) & p.type_masks[c.a])) return false; break;
1684
+ }
1685
+ case cg::op::CHECK_MINIMUM: if(t_numeric&&t_dval<p.doubles[c.a])return false; break;
1686
+ case cg::op::CHECK_MAXIMUM: if(t_numeric&&t_dval>p.doubles[c.a])return false; break;
1687
+ case cg::op::CHECK_EX_MINIMUM: if(t_numeric&&t_dval<=p.doubles[c.a])return false; break;
1688
+ case cg::op::CHECK_EX_MAXIMUM: if(t_numeric&&t_dval>=p.doubles[c.a])return false; break;
1689
+ case cg::op::CHECK_MULTIPLE_OF: if(t_numeric){double d=p.doubles[c.a],r=std::fmod(t_dval,d);if(std::abs(r)>1e-8&&std::abs(r-d)>1e-8)return false;} break;
1690
+ case cg::op::CHECK_MIN_LENGTH: if(t==et::STRING){std::string_view sv;value.get(sv);if(utf8_length(sv)<c.a)return false;} break;
1691
+ case cg::op::CHECK_MAX_LENGTH: if(t==et::STRING){std::string_view sv;value.get(sv);if(utf8_length(sv)>c.a)return false;} break;
1692
+ case cg::op::CHECK_PATTERN: if(t==et::STRING){std::string_view sv;value.get(sv);if(!re2::RE2::PartialMatch(re2::StringPiece(sv.data(),sv.size()),*p.regexes[c.a]))return false;} break;
1693
+ case cg::op::CHECK_FORMAT: if(t==et::STRING){std::string_view sv;value.get(sv);if(!check_format_by_id(sv,p.format_ids[c.a]))return false;} break;
1694
+ case cg::op::CHECK_MIN_ITEMS: if(t==et::ARRAY){dom::array a;value.get(a);uint64_t s=0;for([[maybe_unused]]auto _:a)++s;if(s<c.a)return false;} break;
1695
+ case cg::op::CHECK_MAX_ITEMS: if(t==et::ARRAY){dom::array a;value.get(a);uint64_t s=0;for([[maybe_unused]]auto _:a)++s;if(s>c.a)return false;} break;
1696
+ case cg::op::CHECK_UNIQUE_ITEMS: if(t==et::ARRAY){dom::array a;value.get(a);std::set<std::string> seen;for(auto x:a)if(!seen.insert(canonical_json(x)).second)return false;} break;
1697
+ case cg::op::ARRAY_ITEMS: if(t==et::ARRAY){dom::array a;value.get(a);for(auto x:a)if(!cg_exec(p,p.subs[c.a],x))return false;} break;
1698
+ case cg::op::CHECK_REQUIRED: if(t==et::OBJECT){dom::object o;value.get(o);dom::element d;if(o[p.strings[c.a]].get(d)!=SUCCESS)return false;} break;
1699
+ case cg::op::CHECK_MIN_PROPS: if(t==et::OBJECT){dom::object o;value.get(o);uint64_t n=0;for([[maybe_unused]]auto _:o)++n;if(n<c.a)return false;} break;
1700
+ case cg::op::CHECK_MAX_PROPS: if(t==et::OBJECT){dom::object o;value.get(o);uint64_t n=0;for([[maybe_unused]]auto _:o)++n;if(n>c.a)return false;} break;
1701
+ case cg::op::OBJ_PROPS_START: if(t==et::OBJECT){
1571
1702
  dom::object o; value.get(o);
1572
1703
  // collect prop defs
1573
1704
  struct pd{std::string_view nm;uint32_t si;};
@@ -1587,13 +1718,13 @@ static bool cg_exec(const cg::plan& p, const std::vector<cg::ins>& code,
1587
1718
  case cg::op::OBJ_PROP: case cg::op::OBJ_PROPS_END: case cg::op::CHECK_NO_ADDITIONAL: break;
1588
1719
  case cg::op::CHECK_ENUM_STR: {
1589
1720
  auto& es=p.enum_sets[c.a]; bool f=false;
1590
- if(t=="string"){std::string_view sv;value.get(sv);for(auto& e:es)if(e.size()==sv.size()+2&&e[0]=='"'&&e.back()=='"'&&e.compare(1,sv.size(),sv)==0){f=true;break;}}
1721
+ if(t==et::STRING){std::string_view sv;value.get(sv);for(auto& e:es)if(e.size()==sv.size()+2&&e[0]=='"'&&e.back()=='"'&&e.compare(1,sv.size(),sv)==0){f=true;break;}}
1591
1722
  if(!f){std::string v=canonical_json(value);for(auto& e:es)if(e==v){f=true;break;}}
1592
1723
  if(!f)return false; break;
1593
1724
  }
1594
1725
  case cg::op::CHECK_ENUM: {
1595
1726
  auto& es=p.enum_sets[c.a]; bool f=false;
1596
- if(t=="string"){std::string_view sv;value.get(sv);for(auto& e:es)if(e.size()==sv.size()+2&&e[0]=='"'&&e.back()=='"'&&e.compare(1,sv.size(),sv)==0){f=true;break;}}
1727
+ if(t==et::STRING){std::string_view sv;value.get(sv);for(auto& e:es)if(e.size()==sv.size()+2&&e[0]=='"'&&e.back()=='"'&&e.compare(1,sv.size(),sv)==0){f=true;break;}}
1597
1728
  if(!f&&value.is<int64_t>()){int64_t v;value.get(v);auto s=std::to_string(v);for(auto& e:es)if(e==s){f=true;break;}}
1598
1729
  if(!f){std::string v=canonical_json(value);for(auto& e:es)if(e==v){f=true;break;}}
1599
1730
  if(!f)return false; break;
@@ -1609,51 +1740,53 @@ static bool cg_exec(const cg::plan& p, const std::vector<cg::ins>& code,
1609
1740
  // Uses simdjson On Demand API to avoid materializing the full DOM tree.
1610
1741
  // Returns: true = valid, false = invalid OR unsupported (fallback to DOM).
1611
1742
 
1612
- static std::string_view od_type(simdjson::ondemand::value& v) {
1743
+ static json_type od_type(simdjson::ondemand::value& v) {
1613
1744
  switch (v.type()) {
1614
- case simdjson::ondemand::json_type::object: return "object";
1615
- case simdjson::ondemand::json_type::array: return "array";
1616
- case simdjson::ondemand::json_type::string: return "string";
1617
- case simdjson::ondemand::json_type::boolean: return "boolean";
1618
- case simdjson::ondemand::json_type::null: return "null";
1745
+ case simdjson::ondemand::json_type::object: return json_type::object;
1746
+ case simdjson::ondemand::json_type::array: return json_type::array;
1747
+ case simdjson::ondemand::json_type::string: return json_type::string;
1748
+ case simdjson::ondemand::json_type::boolean: return json_type::boolean;
1749
+ case simdjson::ondemand::json_type::null: return json_type::null_value;
1619
1750
  case simdjson::ondemand::json_type::number: {
1620
1751
  simdjson::ondemand::number_type nt;
1621
1752
  if (v.get_number_type().get(nt) == SUCCESS &&
1622
1753
  nt == simdjson::ondemand::number_type::floating_point_number)
1623
- return "number";
1624
- return "integer";
1754
+ return json_type::number;
1755
+ return json_type::integer;
1625
1756
  }
1626
1757
  }
1627
- return "unknown";
1758
+ return json_type::string;
1628
1759
  }
1629
1760
 
1630
1761
  static bool od_exec(const cg::plan& p, const std::vector<cg::ins>& code,
1631
1762
  simdjson::ondemand::value value) {
1632
1763
  auto t = od_type(value);
1764
+ bool t_numeric = (t == json_type::integer || t == json_type::number);
1633
1765
  for (size_t i = 0; i < code.size(); ++i) {
1634
1766
  auto& c = code[i];
1635
1767
  switch (c.o) {
1636
1768
  case cg::op::END: return true;
1637
- case cg::op::EXPECT_OBJECT: if(t!="object") return false; break;
1638
- case cg::op::EXPECT_ARRAY: if(t!="array") return false; break;
1639
- case cg::op::EXPECT_STRING: if(t!="string") return false; break;
1640
- case cg::op::EXPECT_NUMBER: if(t!="number"&&t!="integer") return false; break;
1641
- case cg::op::EXPECT_INTEGER: if(t!="integer") return false; break;
1642
- case cg::op::EXPECT_BOOLEAN: if(t!="boolean") return false; break;
1643
- case cg::op::EXPECT_NULL: if(t!="null") return false; break;
1769
+ case cg::op::EXPECT_OBJECT: if(t!=json_type::object) return false; break;
1770
+ case cg::op::EXPECT_ARRAY: if(t!=json_type::array) return false; break;
1771
+ case cg::op::EXPECT_STRING: if(t!=json_type::string) return false; break;
1772
+ case cg::op::EXPECT_NUMBER: if(!t_numeric) return false; break;
1773
+ case cg::op::EXPECT_INTEGER: if(t!=json_type::integer) return false; break;
1774
+ case cg::op::EXPECT_BOOLEAN: if(t!=json_type::boolean) return false; break;
1775
+ case cg::op::EXPECT_NULL: if(t!=json_type::null_value) return false; break;
1644
1776
  case cg::op::EXPECT_TYPE_MULTI: {
1645
- auto& ts=p.type_sets[c.a]; bool m=false;
1646
- for(auto& ty:ts){if(t==ty||(ty=="number"&&(t=="integer"||t=="number"))){m=true;break;}}
1647
- if(!m) return false; break;
1777
+ // integer matches both "integer" and "number" type constraints
1778
+ uint8_t tbits = json_type_bit(t);
1779
+ if (t == json_type::integer) tbits |= json_type_bit(json_type::number);
1780
+ if(!(tbits & p.type_masks[c.a])) return false; break;
1648
1781
  }
1649
1782
  case cg::op::CHECK_MINIMUM:
1650
1783
  case cg::op::CHECK_MAXIMUM:
1651
1784
  case cg::op::CHECK_EX_MINIMUM:
1652
1785
  case cg::op::CHECK_EX_MAXIMUM:
1653
1786
  case cg::op::CHECK_MULTIPLE_OF: {
1654
- if (t=="integer"||t=="number") {
1787
+ if (t_numeric) {
1655
1788
  double v;
1656
- if (t=="integer") { int64_t iv; if(value.get(iv)!=SUCCESS) return false; v=(double)iv; }
1789
+ if (t==json_type::integer) { int64_t iv; if(value.get(iv)!=SUCCESS) return false; v=(double)iv; }
1657
1790
  else { if(value.get(v)!=SUCCESS) return false; }
1658
1791
  double d=p.doubles[c.a];
1659
1792
  if(c.o==cg::op::CHECK_MINIMUM && v<d) return false;
@@ -1664,39 +1797,39 @@ static bool od_exec(const cg::plan& p, const std::vector<cg::ins>& code,
1664
1797
  }
1665
1798
  break;
1666
1799
  }
1667
- case cg::op::CHECK_MIN_LENGTH: if(t=="string"){std::string_view sv; if(value.get(sv)!=SUCCESS) return false; if(utf8_length(sv)<c.a) return false;} break;
1668
- case cg::op::CHECK_MAX_LENGTH: if(t=="string"){std::string_view sv; if(value.get(sv)!=SUCCESS) return false; if(utf8_length(sv)>c.a) return false;} break;
1669
- case cg::op::CHECK_PATTERN: if(t=="string"){std::string_view sv; if(value.get(sv)!=SUCCESS) return false; if(!re2::RE2::PartialMatch(re2::StringPiece(sv.data(),sv.size()),*p.regexes[c.a]))return false;} break;
1670
- case cg::op::CHECK_FORMAT: if(t=="string"){std::string_view sv; if(value.get(sv)!=SUCCESS) return false; uint8_t f=p.format_ids[c.a]; if(f<9&&!check_format(sv,fmt_names[f]))return false;} break;
1671
- case cg::op::CHECK_MIN_ITEMS: if(t=="array"){
1800
+ case cg::op::CHECK_MIN_LENGTH: if(t==json_type::string){std::string_view sv; if(value.get(sv)!=SUCCESS) return false; if(utf8_length(sv)<c.a) return false;} break;
1801
+ case cg::op::CHECK_MAX_LENGTH: if(t==json_type::string){std::string_view sv; if(value.get(sv)!=SUCCESS) return false; if(utf8_length(sv)>c.a) return false;} break;
1802
+ case cg::op::CHECK_PATTERN: if(t==json_type::string){std::string_view sv; if(value.get(sv)!=SUCCESS) return false; if(!re2::RE2::PartialMatch(re2::StringPiece(sv.data(),sv.size()),*p.regexes[c.a]))return false;} break;
1803
+ case cg::op::CHECK_FORMAT: if(t==json_type::string){std::string_view sv; if(value.get(sv)!=SUCCESS) return false; if(!check_format_by_id(sv,p.format_ids[c.a]))return false;} break;
1804
+ case cg::op::CHECK_MIN_ITEMS: if(t==json_type::array){
1672
1805
  simdjson::ondemand::array a; if(value.get(a)!=SUCCESS) return false;
1673
1806
  uint64_t s=0; for(auto x:a){(void)x;++s;} if(s<c.a) return false;
1674
1807
  } break;
1675
- case cg::op::CHECK_MAX_ITEMS: if(t=="array"){
1808
+ case cg::op::CHECK_MAX_ITEMS: if(t==json_type::array){
1676
1809
  simdjson::ondemand::array a; if(value.get(a)!=SUCCESS) return false;
1677
1810
  uint64_t s=0; for(auto x:a){(void)x;++s;} if(s>c.a) return false;
1678
1811
  } break;
1679
- case cg::op::ARRAY_ITEMS: if(t=="array"){
1812
+ case cg::op::ARRAY_ITEMS: if(t==json_type::array){
1680
1813
  simdjson::ondemand::array a; if(value.get(a)!=SUCCESS) return false;
1681
1814
  for(auto elem:a){
1682
1815
  simdjson::ondemand::value v; if(elem.get(v)!=SUCCESS) return false;
1683
1816
  if(!od_exec(p,p.subs[c.a],v)) return false;
1684
1817
  }
1685
1818
  } break;
1686
- case cg::op::CHECK_REQUIRED: if(t=="object"){
1819
+ case cg::op::CHECK_REQUIRED: if(t==json_type::object){
1687
1820
  simdjson::ondemand::object o; if(value.get(o)!=SUCCESS) return false;
1688
1821
  auto f = o.find_field_unordered(p.strings[c.a]);
1689
1822
  if(f.error()) return false;
1690
1823
  } break;
1691
- case cg::op::CHECK_MIN_PROPS: if(t=="object"){
1824
+ case cg::op::CHECK_MIN_PROPS: if(t==json_type::object){
1692
1825
  simdjson::ondemand::object o; if(value.get(o)!=SUCCESS) return false;
1693
1826
  uint64_t n=0; for(auto f:o){(void)f;++n;} if(n<c.a) return false;
1694
1827
  } break;
1695
- case cg::op::CHECK_MAX_PROPS: if(t=="object"){
1828
+ case cg::op::CHECK_MAX_PROPS: if(t==json_type::object){
1696
1829
  simdjson::ondemand::object o; if(value.get(o)!=SUCCESS) return false;
1697
1830
  uint64_t n=0; for(auto f:o){(void)f;++n;} if(n>c.a) return false;
1698
1831
  } break;
1699
- case cg::op::OBJ_PROPS_START: if(t=="object"){
1832
+ case cg::op::OBJ_PROPS_START: if(t==json_type::object){
1700
1833
  simdjson::ondemand::object o; if(value.get(o)!=SUCCESS) return false;
1701
1834
  struct pd{std::string_view nm;uint32_t si;};
1702
1835
  std::vector<pd> props; bool no_add=false;
@@ -1849,10 +1982,9 @@ validation_result validate(const schema_ref& schema, std::string_view json,
1849
1982
  // Codegen said invalid OR hit COMPOSITION — fall through to tree walker
1850
1983
  }
1851
1984
 
1852
- // Slow path: re-parse + tree walker with error details
1853
- auto result2 = dom_p.parse(psv);
1985
+ // Slow path: tree walker with error details (reuse already-parsed DOM)
1854
1986
  std::vector<validation_error> errors;
1855
- validate_node(schema.impl->root, result2.value(), "", *schema.impl, errors,
1987
+ validate_node(schema.impl->root, result.value(), "", *schema.impl, errors,
1856
1988
  opts.all_errors);
1857
1989
 
1858
1990
  return {errors.empty(), std::move(errors)};