ata-validator 0.12.1 → 0.12.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding/ata_napi.cpp +17 -3
- package/include/ata.h +5 -3
- package/index.js +28 -3
- package/lib/js-compiler.js +68 -28
- package/package.json +1 -1
- package/prebuilds/ata-darwin-arm64/node-napi-v10.node +0 -0
- package/src/ata.cpp +356 -17
package/binding/ata_napi.cpp
CHANGED
|
@@ -1203,8 +1203,15 @@ static napi_value RawFastValidate(napi_env env, napi_callback_info info) {
|
|
|
1203
1203
|
}
|
|
1204
1204
|
}
|
|
1205
1205
|
} else {
|
|
1206
|
-
|
|
1207
|
-
|
|
1206
|
+
napi_valuetype vtype;
|
|
1207
|
+
napi_typeof(env, args[1], &vtype);
|
|
1208
|
+
if (vtype != napi_string) {
|
|
1209
|
+
napi_throw_type_error(env, nullptr,
|
|
1210
|
+
"rawFastValidate() requires a Buffer, TypedArray, or string. For parsed objects, use validate() or isValidObject().");
|
|
1211
|
+
return nullptr;
|
|
1212
|
+
}
|
|
1213
|
+
// String, must copy (can't pre-pad strings)
|
|
1214
|
+
size_t len = 0;
|
|
1208
1215
|
napi_get_value_string_utf8(env, args[1], nullptr, 0, &len);
|
|
1209
1216
|
if (len <= 4096) {
|
|
1210
1217
|
char buf[4097];
|
|
@@ -1267,7 +1274,14 @@ static napi_value RawBatchValidate(napi_env env, napi_callback_info info) {
|
|
|
1267
1274
|
valid = ata::validate(g_fast_schemas[slot],
|
|
1268
1275
|
std::string_view(static_cast<const char*>(data), length)).valid;
|
|
1269
1276
|
} else {
|
|
1270
|
-
|
|
1277
|
+
napi_valuetype vtype;
|
|
1278
|
+
napi_typeof(env, item, &vtype);
|
|
1279
|
+
if (vtype != napi_string) {
|
|
1280
|
+
napi_throw_type_error(env, nullptr,
|
|
1281
|
+
"rawNDJSONValidate() batch elements must be Buffer, TypedArray, or string");
|
|
1282
|
+
return nullptr;
|
|
1283
|
+
}
|
|
1284
|
+
size_t len = 0;
|
|
1271
1285
|
napi_get_value_string_utf8(env, item, nullptr, 0, &len);
|
|
1272
1286
|
std::string buf(len, '\0');
|
|
1273
1287
|
napi_get_value_string_utf8(env, item, buf.data(), len + 1, &len);
|
package/include/ata.h
CHANGED
|
@@ -8,16 +8,16 @@
|
|
|
8
8
|
#include <variant>
|
|
9
9
|
#include <vector>
|
|
10
10
|
|
|
11
|
-
#define ATA_VERSION "0.10.
|
|
11
|
+
#define ATA_VERSION "0.10.4"
|
|
12
12
|
|
|
13
13
|
namespace ata {
|
|
14
14
|
|
|
15
15
|
inline constexpr uint32_t VERSION_MAJOR = 0;
|
|
16
16
|
inline constexpr uint32_t VERSION_MINOR = 10;
|
|
17
|
-
inline constexpr uint32_t VERSION_REVISION =
|
|
17
|
+
inline constexpr uint32_t VERSION_REVISION = 4;
|
|
18
18
|
|
|
19
19
|
inline constexpr std::string_view version() noexcept {
|
|
20
|
-
return "0.10.
|
|
20
|
+
return "0.10.4";
|
|
21
21
|
}
|
|
22
22
|
|
|
23
23
|
enum class error_code : uint8_t {
|
|
@@ -55,6 +55,8 @@ struct validation_error {
|
|
|
55
55
|
error_code code;
|
|
56
56
|
std::string path;
|
|
57
57
|
std::string message;
|
|
58
|
+
std::string expected;
|
|
59
|
+
std::string actual;
|
|
58
60
|
};
|
|
59
61
|
|
|
60
62
|
struct validation_result {
|
package/index.js
CHANGED
|
@@ -693,19 +693,37 @@ class Validator {
|
|
|
693
693
|
this.isValid = (buf) => {
|
|
694
694
|
self._ensureNative();
|
|
695
695
|
const slot = self._fastSlot;
|
|
696
|
-
self.isValid = (b) => {
|
|
696
|
+
self.isValid = (b) => {
|
|
697
|
+
if (typeof b === 'string') b = Buffer.from(b);
|
|
698
|
+
else if (!(b instanceof Uint8Array)) throw new TypeError('isValid() requires a Buffer, Uint8Array, or string. For parsed objects, use isValidObject().');
|
|
699
|
+
return native.rawFastValidate(slot, b);
|
|
700
|
+
};
|
|
697
701
|
return self.isValid(buf);
|
|
698
702
|
};
|
|
699
703
|
this.countValid = (ndjsonBuf) => {
|
|
700
704
|
self._ensureNative();
|
|
701
705
|
const slot = self._fastSlot;
|
|
702
|
-
self.countValid = (b) => {
|
|
706
|
+
self.countValid = (b) => {
|
|
707
|
+
if (typeof b === 'string') b = Buffer.from(b);
|
|
708
|
+
else if (!(b instanceof Uint8Array)) throw new TypeError('countValid() requires a Buffer, Uint8Array, or string');
|
|
709
|
+
const r = native.rawNDJSONValidate(slot, b);
|
|
710
|
+
let c = 0;
|
|
711
|
+
for (let i = 0; i < r.length; i++) if (r[i]) c++;
|
|
712
|
+
return c;
|
|
713
|
+
};
|
|
703
714
|
return self.countValid(ndjsonBuf);
|
|
704
715
|
};
|
|
705
716
|
this.batchIsValid = (buffers) => {
|
|
706
717
|
self._ensureNative();
|
|
707
718
|
const slot = self._fastSlot;
|
|
708
|
-
self.batchIsValid = (bufs) => {
|
|
719
|
+
self.batchIsValid = (bufs) => {
|
|
720
|
+
let v = 0;
|
|
721
|
+
for (const b of bufs) {
|
|
722
|
+
if (!(b instanceof Uint8Array)) throw new TypeError('batchIsValid() requires Buffer or Uint8Array elements');
|
|
723
|
+
if (native.rawFastValidate(slot, b)) v++;
|
|
724
|
+
}
|
|
725
|
+
return v;
|
|
726
|
+
};
|
|
709
727
|
return self.batchIsValid(buffers);
|
|
710
728
|
};
|
|
711
729
|
}
|
|
@@ -732,6 +750,7 @@ class Validator {
|
|
|
732
750
|
const slot = this._fastSlot;
|
|
733
751
|
this.isValid = (buf) => {
|
|
734
752
|
if (typeof buf === 'string') buf = Buffer.from(buf);
|
|
753
|
+
else if (!(buf instanceof Uint8Array)) throw new TypeError('isValid() requires a Buffer, Uint8Array, or string. For parsed objects, use isValidObject().');
|
|
735
754
|
return native.rawFastValidate(slot, buf);
|
|
736
755
|
};
|
|
737
756
|
}
|
|
@@ -739,6 +758,7 @@ class Validator {
|
|
|
739
758
|
const slot = this._fastSlot;
|
|
740
759
|
this.countValid = (ndjsonBuf) => {
|
|
741
760
|
if (typeof ndjsonBuf === 'string') ndjsonBuf = Buffer.from(ndjsonBuf);
|
|
761
|
+
else if (!(ndjsonBuf instanceof Uint8Array)) throw new TypeError('countValid() requires a Buffer, Uint8Array, or string');
|
|
742
762
|
const results = native.rawNDJSONValidate(slot, ndjsonBuf);
|
|
743
763
|
let count = 0;
|
|
744
764
|
for (let i = 0; i < results.length; i++) if (results[i]) count++;
|
|
@@ -750,6 +770,7 @@ class Validator {
|
|
|
750
770
|
this.batchIsValid = (buffers) => {
|
|
751
771
|
let valid = 0;
|
|
752
772
|
for (const buf of buffers) {
|
|
773
|
+
if (!(buf instanceof Uint8Array)) throw new TypeError('batchIsValid() requires Buffer or Uint8Array elements');
|
|
753
774
|
if (native.rawFastValidate(slot, buf)) valid++;
|
|
754
775
|
}
|
|
755
776
|
return valid;
|
|
@@ -825,6 +846,7 @@ class Validator {
|
|
|
825
846
|
if (!jsFn || !jsFn._source) return null;
|
|
826
847
|
const src = jsFn._source;
|
|
827
848
|
const hybridSrc = jsFn._hybridSource || "";
|
|
849
|
+
const preambleSrc = jsFn._preambleSource || "";
|
|
828
850
|
|
|
829
851
|
// Also capture error function source for zero-compile standalone load
|
|
830
852
|
const jsErrFn = compileToJSCodegenWithErrors(
|
|
@@ -835,6 +857,7 @@ class Validator {
|
|
|
835
857
|
return `// Auto-generated by ata-validator — do not edit
|
|
836
858
|
'use strict';
|
|
837
859
|
${_CP_LEN_SOURCE}
|
|
860
|
+
${preambleSrc}
|
|
838
861
|
const boolFn = function(d) {
|
|
839
862
|
${src}
|
|
840
863
|
};
|
|
@@ -1025,6 +1048,8 @@ ${exports}`;
|
|
|
1025
1048
|
// Raw NAPI fast path for Buffer/Uint8Array
|
|
1026
1049
|
isValid(input) {
|
|
1027
1050
|
if (!native) throw new Error('Native addon required for isValid() — install build tools or use validate() instead');
|
|
1051
|
+
if (typeof input === 'string') input = Buffer.from(input);
|
|
1052
|
+
else if (!(input instanceof Uint8Array)) throw new TypeError('isValid() requires a Buffer, Uint8Array, or string. For parsed objects, use isValidObject().');
|
|
1028
1053
|
this._ensureNative();
|
|
1029
1054
|
return native.rawFastValidate(this._fastSlot, input);
|
|
1030
1055
|
}
|
package/lib/js-compiler.js
CHANGED
|
@@ -827,7 +827,7 @@ function compileToJSCodegen(schema, schemaMap) {
|
|
|
827
827
|
}
|
|
828
828
|
}
|
|
829
829
|
|
|
830
|
-
const ctx = { varCounter: 0, helpers: [], helperCode: [], closureVars: ['_cpLen'], closureVals: [_cpLen], rootDefs, refStack: new Set(), schemaMap: schemaMap || null, anchors, rootSchema: schema }
|
|
830
|
+
const ctx = { varCounter: 0, helpers: [], helperCode: [], preamble: [], closureVars: ['_cpLen'], closureVals: [_cpLen], rootDefs, refStack: new Set(), schemaMap: schemaMap || null, anchors, rootSchema: schema }
|
|
831
831
|
const lines = []
|
|
832
832
|
genCode(schema, 'd', lines, ctx)
|
|
833
833
|
|
|
@@ -864,24 +864,30 @@ function compileToJSCodegen(schema, schemaMap) {
|
|
|
864
864
|
hybridBody = replaceTopLevel(checkStr + '\n return R')
|
|
865
865
|
}
|
|
866
866
|
|
|
867
|
+
const preambleStr = ctx.preamble && ctx.preamble.length ? ctx.preamble.join('\n ') + '\n ' : ''
|
|
868
|
+
|
|
867
869
|
try {
|
|
868
870
|
let boolFn
|
|
869
871
|
if (closureNames.length > 0) {
|
|
870
|
-
const factory = new Function(...closureNames,
|
|
872
|
+
const factory = new Function(...closureNames, `${preambleStr}return function(d){${body}}`)
|
|
871
873
|
boolFn = factory(...closureValues)
|
|
874
|
+
} else if (preambleStr) {
|
|
875
|
+
const factory = new Function(`${preambleStr}return function(d){${body}}`)
|
|
876
|
+
boolFn = factory()
|
|
872
877
|
} else {
|
|
873
878
|
boolFn = new Function('d', body)
|
|
874
879
|
}
|
|
875
880
|
|
|
876
881
|
// Build hybrid: same body, return R instead of true, return E(d) instead of false.
|
|
877
882
|
try {
|
|
878
|
-
const hybridFactory = new Function(...closureNames, 'R', 'E',
|
|
883
|
+
const hybridFactory = new Function(...closureNames, 'R', 'E', `${preambleStr}return function(d){${hybridBody}}`)
|
|
879
884
|
boolFn._hybridFactory = (R, E) => hybridFactory(...closureValues, R, E)
|
|
880
885
|
} catch {}
|
|
881
886
|
|
|
882
887
|
// Store source for standalone compilation (includes regex inline for file output)
|
|
883
888
|
const helperStr = ctx.helperCode.length ? ctx.helperCode.join('\n ') + '\n ' : ''
|
|
884
889
|
boolFn._source = helperStr + body
|
|
890
|
+
boolFn._preambleSource = preambleStr
|
|
885
891
|
boolFn._hybridSource = helperStr + hybridBody
|
|
886
892
|
|
|
887
893
|
return boolFn
|
|
@@ -984,15 +990,21 @@ function tryGenCombined(schema, access, ctx) {
|
|
|
984
990
|
return `{${prelude}if(typeof ${v2}!=='string')return false;const _lv=${v2}.length;if(_lv<${M}||_lv>${X * 2})return false;if(_lv<${M * 2}||_lv>${X}){const _cp=_cpLen(${v2});if(_cp<${M}||_cp>${X})return false}}`
|
|
985
991
|
}
|
|
986
992
|
const conds = [`typeof _v!=='string'`]
|
|
987
|
-
if (schema.minLength !== undefined) {
|
|
993
|
+
if (schema.minLength !== undefined && schema.minLength > 0) {
|
|
988
994
|
const M = schema.minLength
|
|
989
995
|
conds.push(`_v.length<${M}`)
|
|
990
|
-
|
|
996
|
+
// For M=1, length<1 already catches all failures (any non-empty string has cpLen>=1).
|
|
997
|
+
if (M > 1) conds.push(`_v.length<${M * 2}&&_cpLen(_v)<${M}`)
|
|
991
998
|
}
|
|
992
999
|
if (schema.maxLength !== undefined) {
|
|
993
1000
|
const X = schema.maxLength
|
|
994
|
-
|
|
995
|
-
|
|
1001
|
+
// For X=0, only empty string passes, length>0 fails — no cpLen needed.
|
|
1002
|
+
if (X === 0) {
|
|
1003
|
+
conds.push(`_v.length>0`)
|
|
1004
|
+
} else {
|
|
1005
|
+
conds.push(`_v.length>${X * 2}`)
|
|
1006
|
+
conds.push(`_v.length>${X}&&_cpLen(_v)>${X}`)
|
|
1007
|
+
}
|
|
996
1008
|
}
|
|
997
1009
|
if (conds.length < 2) return null
|
|
998
1010
|
return bind(conds)
|
|
@@ -1263,14 +1275,20 @@ function genCode(schema, v, lines, ctx, knownType) {
|
|
|
1263
1275
|
const body = `{const ${lv}=${v}.length;if(${lv}<${M}||${lv}>${X * 2})return false;if(${lv}<${M * 2}||${lv}>${X}){const _cp=_cpLen(${v});if(_cp<${M}||_cp>${X})return false}}`
|
|
1264
1276
|
lines.push(isStr ? body : `if(typeof ${v}==='string')${body}`)
|
|
1265
1277
|
} else {
|
|
1266
|
-
if (schema.minLength !== undefined) {
|
|
1278
|
+
if (schema.minLength !== undefined && schema.minLength > 0) {
|
|
1267
1279
|
const M = schema.minLength
|
|
1268
|
-
|
|
1280
|
+
// M==1: length<1 already catches empty strings (any non-empty string has cpLen>=1).
|
|
1281
|
+
const body = M === 1
|
|
1282
|
+
? `if(${v}.length<1)return false`
|
|
1283
|
+
: `if(${v}.length<${M})return false;if(${v}.length<${M * 2}&&_cpLen(${v})<${M})return false`
|
|
1269
1284
|
lines.push(isStr ? body : `if(typeof ${v}==='string'){${body}}`)
|
|
1270
1285
|
}
|
|
1271
1286
|
if (schema.maxLength !== undefined) {
|
|
1272
1287
|
const X = schema.maxLength
|
|
1273
|
-
|
|
1288
|
+
// X==0: only empty string passes, no cpLen needed.
|
|
1289
|
+
const body = X === 0
|
|
1290
|
+
? `if(${v}.length>0)return false`
|
|
1291
|
+
: `if(${v}.length>${X * 2})return false;if(${v}.length>${X}&&_cpLen(${v})>${X})return false`
|
|
1274
1292
|
lines.push(isStr ? body : `if(typeof ${v}==='string'){${body}}`)
|
|
1275
1293
|
}
|
|
1276
1294
|
}
|
|
@@ -1603,37 +1621,59 @@ function genCode(schema, v, lines, ctx, knownType) {
|
|
|
1603
1621
|
}
|
|
1604
1622
|
}
|
|
1605
1623
|
|
|
1606
|
-
// anyOf —
|
|
1624
|
+
// anyOf — branch fns hoisted when safe, inline fallback when recursive.
|
|
1607
1625
|
// Skip standard anyOf if unevaluatedProperties will handle it (single-pass optimization)
|
|
1608
1626
|
if (schema.anyOf && schema.unevaluatedProperties === undefined) {
|
|
1609
|
-
const
|
|
1627
|
+
const fi = ctx.varCounter++
|
|
1628
|
+
const branchBodies = []
|
|
1629
|
+
let canHoist = !!ctx.preamble
|
|
1610
1630
|
for (let i = 0; i < schema.anyOf.length; i++) {
|
|
1611
1631
|
const subLines = []
|
|
1612
1632
|
genCode(schema.anyOf[i], '_av', subLines, ctx)
|
|
1613
|
-
|
|
1614
|
-
|
|
1615
|
-
|
|
1616
|
-
|
|
1617
|
-
|
|
1633
|
+
const body = subLines.length === 0 ? 'return true' : `${subLines.join(';')};return true`
|
|
1634
|
+
if (/\b_validate\b/.test(body)) canHoist = false
|
|
1635
|
+
branchBodies.push(body)
|
|
1636
|
+
}
|
|
1637
|
+
if (canHoist) {
|
|
1638
|
+
const names = branchBodies.map((body, i) => {
|
|
1639
|
+
const name = `_af${fi}_b${i}`
|
|
1640
|
+
ctx.preamble.push(`function ${name}(_av){${body}}`)
|
|
1641
|
+
return name
|
|
1642
|
+
})
|
|
1643
|
+
const checks = names.map(n => `${n}(${v})`).join('||')
|
|
1644
|
+
lines.push(`if(!(${checks}))return false`)
|
|
1645
|
+
} else {
|
|
1646
|
+
const fns = branchBodies.map(body => `function(_av){${body}}`)
|
|
1647
|
+
lines.push(`{const _af${fi}=[${fns.join(',')}];let _am${fi}=false;for(let _ai=0;_ai<_af${fi}.length;_ai++){if(_af${fi}[_ai](${v})){_am${fi}=true;break}}if(!_am${fi})return false}`)
|
|
1618
1648
|
}
|
|
1619
|
-
const fi = ctx.varCounter++
|
|
1620
|
-
lines.push(`{const _af${fi}=[${fns.join(',')}];let _am${fi}=false;for(let _ai=0;_ai<_af${fi}.length;_ai++){if(_af${fi}[_ai](${v})){_am${fi}=true;break}}if(!_am${fi})return false}`)
|
|
1621
1649
|
}
|
|
1622
1650
|
|
|
1623
|
-
// oneOf
|
|
1651
|
+
// oneOf — branch fns hoisted to factory scope when safe (no recursion/ref).
|
|
1652
|
+
// Falls back to inline closures if any branch touches recursive validation.
|
|
1624
1653
|
if (schema.oneOf) {
|
|
1625
|
-
const
|
|
1654
|
+
const fi = ctx.varCounter++
|
|
1655
|
+
const branchBodies = []
|
|
1656
|
+
let canHoist = !!ctx.preamble
|
|
1626
1657
|
for (let i = 0; i < schema.oneOf.length; i++) {
|
|
1627
1658
|
const subLines = []
|
|
1628
1659
|
genCode(schema.oneOf[i], '_ov', subLines, ctx)
|
|
1629
|
-
|
|
1630
|
-
|
|
1631
|
-
|
|
1632
|
-
|
|
1633
|
-
|
|
1660
|
+
const body = subLines.length === 0 ? 'return true' : `${subLines.join(';')};return true`
|
|
1661
|
+
// _validate is the recursive entry — hoisting branches above it breaks scope.
|
|
1662
|
+
if (/\b_validate\b/.test(body)) canHoist = false
|
|
1663
|
+
branchBodies.push(body)
|
|
1664
|
+
}
|
|
1665
|
+
if (canHoist) {
|
|
1666
|
+
const names = branchBodies.map((body, i) => {
|
|
1667
|
+
const name = `_of${fi}_b${i}`
|
|
1668
|
+
ctx.preamble.push(`function ${name}(_ov){${body}}`)
|
|
1669
|
+
return name
|
|
1670
|
+
})
|
|
1671
|
+
const calls = names.map(n => `if(${n}(${v})){_oc${fi}++;if(_oc${fi}>1)return false}`).join(';')
|
|
1672
|
+
lines.push(`{let _oc${fi}=0;${calls};if(_oc${fi}!==1)return false}`)
|
|
1673
|
+
} else {
|
|
1674
|
+
const fns = branchBodies.map(body => `function(_ov){${body}}`)
|
|
1675
|
+
lines.push(`{const _of${fi}=[${fns.join(',')}];let _oc${fi}=0;for(let _oi=0;_oi<_of${fi}.length;_oi++){if(_of${fi}[_oi](${v}))_oc${fi}++;if(_oc${fi}>1)return false}if(_oc${fi}!==1)return false}`)
|
|
1634
1676
|
}
|
|
1635
|
-
const fi = ctx.varCounter++
|
|
1636
|
-
lines.push(`{const _of${fi}=[${fns.join(',')}];let _oc${fi}=0;for(let _oi=0;_oi<_of${fi}.length;_oi++){if(_of${fi}[_oi](${v}))_oc${fi}++;if(_oc${fi}>1)return false}if(_oc${fi}!==1)return false}`)
|
|
1637
1677
|
}
|
|
1638
1678
|
|
|
1639
1679
|
// not
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "ata-validator",
|
|
3
|
-
"version": "0.12.
|
|
3
|
+
"version": "0.12.2",
|
|
4
4
|
"description": "Ultra-fast JSON Schema validator. 5x faster validation, 159,000x faster compilation. Works without native addon. Cross-schema $ref, Draft 2020-12 + Draft 7, V8-optimized JS codegen, simdjson, RE2, multi-core. Standard Schema V1 compatible.",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"module": "index.mjs",
|
|
Binary file
|
package/src/ata.cpp
CHANGED
|
@@ -391,13 +391,29 @@ struct od_plan {
|
|
|
391
391
|
#ifndef ATA_NO_RE2
|
|
392
392
|
re2::RE2* pattern = nullptr; // borrowed pointer from schema_node
|
|
393
393
|
#endif
|
|
394
|
+
// Inline digit pattern: ^[0-9]+$ / ^[0-9]{N}$ / ^[0-9]{N,M}$ / ^\d+$ etc.
|
|
395
|
+
// When set, supersedes the RE2 pattern call.
|
|
396
|
+
struct digit_pattern_t { uint32_t min_len; uint32_t max_len; };
|
|
397
|
+
std::optional<digit_pattern_t> digit_pattern;
|
|
394
398
|
uint8_t format_id = 255; // 255 = no format check
|
|
395
399
|
|
|
396
400
|
// Object — single iterate with merged required+property lookup
|
|
401
|
+
// fast_kind allows the obj iterator to skip the recursive od_exec_plan call
|
|
402
|
+
// (and its inner type().get() + switch) for primitive sub-plans, dispatching
|
|
403
|
+
// straight to inline constraint code.
|
|
404
|
+
enum class fast_kind : uint8_t {
|
|
405
|
+
OTHER = 0, // recurse via od_exec_plan
|
|
406
|
+
INTEGER, // value.get(int64) + numeric range/multipleOf
|
|
407
|
+
STRING, // value.get(string) + length/pattern/format/enum
|
|
408
|
+
BOOLEAN, // value.get(bool)
|
|
409
|
+
OBJECT, // value.get(object) + nested obj iteration (skips type detect)
|
|
410
|
+
ARRAY, // value.get(array) + element iteration (skips type detect)
|
|
411
|
+
};
|
|
397
412
|
struct prop_entry {
|
|
398
413
|
std::string key;
|
|
399
414
|
int required_idx = -1; // bit index for required tracking, or -1
|
|
400
415
|
std::shared_ptr<od_plan> sub; // property sub-plan, or nullptr
|
|
416
|
+
fast_kind fk = fast_kind::OTHER; // inline dispatch hint, set at compile time
|
|
401
417
|
};
|
|
402
418
|
struct obj_plan {
|
|
403
419
|
std::vector<prop_entry> entries; // merged required + properties — single scan
|
|
@@ -414,6 +430,17 @@ struct od_plan {
|
|
|
414
430
|
};
|
|
415
431
|
std::shared_ptr<arr_plan> array;
|
|
416
432
|
|
|
433
|
+
// Enum: primitive-only set membership. Complex (object/array) enums fall back.
|
|
434
|
+
struct enum_constraint {
|
|
435
|
+
std::vector<std::string> strings; // unescaped string values
|
|
436
|
+
std::vector<int64_t> integers;
|
|
437
|
+
std::vector<double> doubles;
|
|
438
|
+
bool has_null = false;
|
|
439
|
+
bool has_true = false;
|
|
440
|
+
bool has_false = false;
|
|
441
|
+
};
|
|
442
|
+
std::shared_ptr<enum_constraint> enum_check;
|
|
443
|
+
|
|
417
444
|
// If false, schema uses unsupported features — must fall back to DOM path.
|
|
418
445
|
bool supported = true;
|
|
419
446
|
};
|
|
@@ -1360,8 +1387,10 @@ static void validate_node(const schema_node_ptr& node,
|
|
|
1360
1387
|
expected += json_type_name(static_cast<json_type>(b));
|
|
1361
1388
|
}
|
|
1362
1389
|
}
|
|
1390
|
+
std::string actual = std::string(type_of_sv(value));
|
|
1363
1391
|
errors.push_back({error_code::type_mismatch, path,
|
|
1364
|
-
"expected type " + expected + ", got " +
|
|
1392
|
+
"expected type " + expected + ", got " + actual,
|
|
1393
|
+
expected, actual});
|
|
1365
1394
|
ATA_CHECK_EARLY();
|
|
1366
1395
|
}
|
|
1367
1396
|
}
|
|
@@ -2399,6 +2428,59 @@ static simdjson::padded_string_view get_free_padded_view(
|
|
|
2399
2428
|
return simdjson::padded_string_view(data, length, length + REQUIRED_PADDING);
|
|
2400
2429
|
}
|
|
2401
2430
|
|
|
2431
|
+
// Recognize common digit-only regex patterns: ^[0-9]+$, ^[0-9]{N}$, ^[0-9]{N,M}$
|
|
2432
|
+
// (and \d variants). Returns true if recognized, fills min_n/max_n.
|
|
2433
|
+
static bool parse_digit_pattern(const std::string& p,
|
|
2434
|
+
uint32_t& min_n, uint32_t& max_n) {
|
|
2435
|
+
if (p.size() < 6) return false;
|
|
2436
|
+
if (p.front() != '^' || p.back() != '$') return false;
|
|
2437
|
+
std::string_view body(p.data() + 1, p.size() - 2);
|
|
2438
|
+
size_t pos = 0;
|
|
2439
|
+
if (body.size() >= 5 && body.compare(0, 5, "[0-9]") == 0) {
|
|
2440
|
+
pos = 5;
|
|
2441
|
+
} else if (body.size() >= 2 && body[0] == '\\' && body[1] == 'd') {
|
|
2442
|
+
pos = 2;
|
|
2443
|
+
} else {
|
|
2444
|
+
return false;
|
|
2445
|
+
}
|
|
2446
|
+
if (pos >= body.size()) return false;
|
|
2447
|
+
char q = body[pos];
|
|
2448
|
+
if (q == '+' && pos + 1 == body.size()) {
|
|
2449
|
+
min_n = 1; max_n = UINT32_MAX; return true;
|
|
2450
|
+
}
|
|
2451
|
+
if (q == '*' && pos + 1 == body.size()) {
|
|
2452
|
+
min_n = 0; max_n = UINT32_MAX; return true;
|
|
2453
|
+
}
|
|
2454
|
+
if (q != '{') return false;
|
|
2455
|
+
size_t close = body.find('}', pos);
|
|
2456
|
+
if (close == std::string_view::npos || close + 1 != body.size()) return false;
|
|
2457
|
+
auto inner = body.substr(pos + 1, close - pos - 1);
|
|
2458
|
+
if (inner.empty()) return false;
|
|
2459
|
+
size_t comma = inner.find(',');
|
|
2460
|
+
auto parse_num = [](std::string_view s, uint32_t& out) -> bool {
|
|
2461
|
+
if (s.empty()) return false;
|
|
2462
|
+
uint64_t v = 0;
|
|
2463
|
+
for (char c : s) {
|
|
2464
|
+
if (c < '0' || c > '9') return false;
|
|
2465
|
+
v = v * 10 + (c - '0');
|
|
2466
|
+
if (v > UINT32_MAX) return false;
|
|
2467
|
+
}
|
|
2468
|
+
out = static_cast<uint32_t>(v);
|
|
2469
|
+
return true;
|
|
2470
|
+
};
|
|
2471
|
+
if (comma == std::string_view::npos) {
|
|
2472
|
+
if (!parse_num(inner, min_n)) return false;
|
|
2473
|
+
max_n = min_n;
|
|
2474
|
+
} else {
|
|
2475
|
+
auto a = inner.substr(0, comma);
|
|
2476
|
+
auto b = inner.substr(comma + 1);
|
|
2477
|
+
if (!parse_num(a, min_n)) return false;
|
|
2478
|
+
if (b.empty()) max_n = UINT32_MAX;
|
|
2479
|
+
else if (!parse_num(b, max_n)) return false;
|
|
2480
|
+
}
|
|
2481
|
+
return true;
|
|
2482
|
+
}
|
|
2483
|
+
|
|
2402
2484
|
// Build an od_plan from a schema_node tree.
|
|
2403
2485
|
static od_plan_ptr compile_od_plan(const schema_node_ptr& node) {
|
|
2404
2486
|
if (!node) return nullptr;
|
|
@@ -2412,7 +2494,6 @@ static od_plan_ptr compile_od_plan(const schema_node_ptr& node) {
|
|
|
2412
2494
|
|
|
2413
2495
|
// Unsupported features → fall back to DOM
|
|
2414
2496
|
if (!node->ref.empty() ||
|
|
2415
|
-
!node->enum_values_minified.empty() ||
|
|
2416
2497
|
node->const_value_raw.has_value() ||
|
|
2417
2498
|
node->unique_items ||
|
|
2418
2499
|
!node->all_of.empty() ||
|
|
@@ -2431,6 +2512,47 @@ static od_plan_ptr compile_od_plan(const schema_node_ptr& node) {
|
|
|
2431
2512
|
return plan;
|
|
2432
2513
|
}
|
|
2433
2514
|
|
|
2515
|
+
// Enum: handle primitive enums on the on-demand path. Complex enum values
|
|
2516
|
+
// (objects, arrays, escaped strings) fall back to DOM.
|
|
2517
|
+
if (!node->enum_values_minified.empty()) {
|
|
2518
|
+
auto ec = std::make_shared<od_plan::enum_constraint>();
|
|
2519
|
+
for (const auto& ev : node->enum_values_minified) {
|
|
2520
|
+
if (ev.empty()) { plan->supported = false; return plan; }
|
|
2521
|
+
char c = ev[0];
|
|
2522
|
+
if (c == '"') {
|
|
2523
|
+
if (ev.size() < 2 || ev.back() != '"') { plan->supported = false; return plan; }
|
|
2524
|
+
bool has_escape = false;
|
|
2525
|
+
for (size_t i = 1; i + 1 < ev.size(); i++) {
|
|
2526
|
+
if (ev[i] == '\\') { has_escape = true; break; }
|
|
2527
|
+
}
|
|
2528
|
+
if (has_escape) { plan->supported = false; return plan; }
|
|
2529
|
+
ec->strings.push_back(ev.substr(1, ev.size() - 2));
|
|
2530
|
+
} else if (c == '-' || (c >= '0' && c <= '9')) {
|
|
2531
|
+
bool is_int = true;
|
|
2532
|
+
for (size_t i = (c == '-' ? 1 : 0); i < ev.size(); i++) {
|
|
2533
|
+
if (ev[i] < '0' || ev[i] > '9') { is_int = false; break; }
|
|
2534
|
+
}
|
|
2535
|
+
if (is_int) {
|
|
2536
|
+
try { ec->integers.push_back(std::stoll(ev)); }
|
|
2537
|
+
catch (...) { plan->supported = false; return plan; }
|
|
2538
|
+
} else {
|
|
2539
|
+
try { ec->doubles.push_back(std::stod(ev)); }
|
|
2540
|
+
catch (...) { plan->supported = false; return plan; }
|
|
2541
|
+
}
|
|
2542
|
+
} else if (ev == "true") {
|
|
2543
|
+
ec->has_true = true;
|
|
2544
|
+
} else if (ev == "false") {
|
|
2545
|
+
ec->has_false = true;
|
|
2546
|
+
} else if (ev == "null") {
|
|
2547
|
+
ec->has_null = true;
|
|
2548
|
+
} else {
|
|
2549
|
+
plan->supported = false;
|
|
2550
|
+
return plan;
|
|
2551
|
+
}
|
|
2552
|
+
}
|
|
2553
|
+
plan->enum_check = std::move(ec);
|
|
2554
|
+
}
|
|
2555
|
+
|
|
2434
2556
|
plan->type_mask = node->type_mask;
|
|
2435
2557
|
if (node->minimum) { plan->num_flags |= od_plan::HAS_MIN; plan->num_min = *node->minimum; }
|
|
2436
2558
|
if (node->maximum) { plan->num_flags |= od_plan::HAS_MAX; plan->num_max = *node->maximum; }
|
|
@@ -2442,6 +2564,16 @@ static od_plan_ptr compile_od_plan(const schema_node_ptr& node) {
|
|
|
2442
2564
|
#ifndef ATA_NO_RE2
|
|
2443
2565
|
plan->pattern = node->compiled_pattern.get();
|
|
2444
2566
|
#endif
|
|
2567
|
+
// Try to recognize the pattern as an inlined digit check; if so, skip RE2.
|
|
2568
|
+
if (node->pattern.has_value()) {
|
|
2569
|
+
uint32_t min_n, max_n;
|
|
2570
|
+
if (parse_digit_pattern(*node->pattern, min_n, max_n)) {
|
|
2571
|
+
plan->digit_pattern = od_plan::digit_pattern_t{min_n, max_n};
|
|
2572
|
+
#ifndef ATA_NO_RE2
|
|
2573
|
+
plan->pattern = nullptr;
|
|
2574
|
+
#endif
|
|
2575
|
+
}
|
|
2576
|
+
}
|
|
2445
2577
|
plan->format_id = node->format_id;
|
|
2446
2578
|
|
|
2447
2579
|
// Object plan — build hash lookup for O(1) per-field dispatch
|
|
@@ -2477,7 +2609,46 @@ static od_plan_ptr compile_od_plan(const schema_node_ptr& node) {
|
|
|
2477
2609
|
op->entries[it->second].sub = std::move(sub);
|
|
2478
2610
|
} else {
|
|
2479
2611
|
key_to_idx[key] = op->entries.size();
|
|
2480
|
-
op->entries.push_back({key, -1, std::move(sub)});
|
|
2612
|
+
op->entries.push_back({key, -1, std::move(sub), od_plan::fast_kind::OTHER});
|
|
2613
|
+
}
|
|
2614
|
+
}
|
|
2615
|
+
// Compute fast_kind for each entry post-hoc — lets the obj iterator
|
|
2616
|
+
// skip the recursive od_exec_plan call (and its type().get + switch)
|
|
2617
|
+
// for primitive properties, and also skip type detection for nested
|
|
2618
|
+
// objects / primitive arrays.
|
|
2619
|
+
for (auto& e : op->entries) {
|
|
2620
|
+
if (!e.sub) continue;
|
|
2621
|
+
// Nested object: dispatch directly to value.get(obj) + iteration helper.
|
|
2622
|
+
if (e.sub->object && !e.sub->array) {
|
|
2623
|
+
e.fk = od_plan::fast_kind::OBJECT;
|
|
2624
|
+
continue;
|
|
2625
|
+
}
|
|
2626
|
+
// Array: dispatch directly to value.get(arr) + element iteration helper.
|
|
2627
|
+
if (e.sub->array && !e.sub->object) {
|
|
2628
|
+
e.fk = od_plan::fast_kind::ARRAY;
|
|
2629
|
+
continue;
|
|
2630
|
+
}
|
|
2631
|
+
if (e.sub->object || e.sub->array) continue; // unusual: both, skip
|
|
2632
|
+
uint8_t m = e.sub->type_mask;
|
|
2633
|
+
uint8_t s_bit = json_type_bit(json_type::string);
|
|
2634
|
+
uint8_t i_bit = json_type_bit(json_type::integer);
|
|
2635
|
+
uint8_t n_bit = json_type_bit(json_type::number);
|
|
2636
|
+
uint8_t b_bit = json_type_bit(json_type::boolean);
|
|
2637
|
+
if (m == s_bit) e.fk = od_plan::fast_kind::STRING;
|
|
2638
|
+
else if (m == i_bit || m == (i_bit | n_bit)) e.fk = od_plan::fast_kind::INTEGER;
|
|
2639
|
+
else if (m == b_bit) e.fk = od_plan::fast_kind::BOOLEAN;
|
|
2640
|
+
else if (m == 0 && e.sub->enum_check) {
|
|
2641
|
+
// Untyped enum: infer from enum value shapes
|
|
2642
|
+
auto& ec = *e.sub->enum_check;
|
|
2643
|
+
bool only_str = !ec.strings.empty() && ec.integers.empty() && ec.doubles.empty()
|
|
2644
|
+
&& !ec.has_null && !ec.has_true && !ec.has_false;
|
|
2645
|
+
bool only_int = ec.strings.empty() && !ec.integers.empty() && ec.doubles.empty()
|
|
2646
|
+
&& !ec.has_null && !ec.has_true && !ec.has_false;
|
|
2647
|
+
bool only_bool = ec.strings.empty() && ec.integers.empty() && ec.doubles.empty()
|
|
2648
|
+
&& !ec.has_null && (ec.has_true || ec.has_false);
|
|
2649
|
+
if (only_str) e.fk = od_plan::fast_kind::STRING;
|
|
2650
|
+
else if (only_int) e.fk = od_plan::fast_kind::INTEGER;
|
|
2651
|
+
else if (only_bool) e.fk = od_plan::fast_kind::BOOLEAN;
|
|
2481
2652
|
}
|
|
2482
2653
|
}
|
|
2483
2654
|
plan->object = std::move(op);
|
|
@@ -2517,11 +2688,18 @@ static inline uint64_t utf8_length_fast(std::string_view s) {
|
|
|
2517
2688
|
|
|
2518
2689
|
// Execute an od_plan against a simdjson On-Demand value.
|
|
2519
2690
|
// Each value consumed exactly once. Uses simdjson types directly — no od_type() overhead.
|
|
2520
|
-
|
|
2521
|
-
|
|
2691
|
+
// `pre_type` is an optional caller-supplied type hint; when set, the value.type().get
|
|
2692
|
+
// simdjson call is skipped (saves ~3-5 ns per recursion). Used by fast_kind::OBJECT
|
|
2693
|
+
// / ARRAY dispatch where the type is already established at compile time.
|
|
2694
|
+
static bool od_exec_plan(const od_plan& plan, simdjson::ondemand::value value,
|
|
2695
|
+
std::optional<simdjson::ondemand::json_type> pre_type = std::nullopt) {
|
|
2522
2696
|
using sjt = simdjson::ondemand::json_type;
|
|
2523
2697
|
sjt st;
|
|
2524
|
-
if (
|
|
2698
|
+
if (pre_type) {
|
|
2699
|
+
st = *pre_type;
|
|
2700
|
+
} else {
|
|
2701
|
+
if (value.type().get(st) != SUCCESS) return false;
|
|
2702
|
+
}
|
|
2525
2703
|
|
|
2526
2704
|
// Type check using simdjson type directly
|
|
2527
2705
|
if (plan.type_mask) {
|
|
@@ -2552,11 +2730,12 @@ static bool od_exec_plan(const od_plan& plan, simdjson::ondemand::value value) {
|
|
|
2552
2730
|
|
|
2553
2731
|
switch (st) {
|
|
2554
2732
|
case sjt::number: {
|
|
2555
|
-
|
|
2733
|
+
bool need_value = plan.num_flags || plan.enum_check;
|
|
2734
|
+
if (!need_value) break;
|
|
2556
2735
|
double v;
|
|
2557
|
-
// Try integer first (more common), fall back to double
|
|
2558
2736
|
int64_t iv;
|
|
2559
|
-
|
|
2737
|
+
bool got_int = (value.get(iv) == SUCCESS);
|
|
2738
|
+
if (got_int) {
|
|
2560
2739
|
v = static_cast<double>(iv);
|
|
2561
2740
|
} else if (value.get(v) != SUCCESS) {
|
|
2562
2741
|
return false;
|
|
@@ -2570,6 +2749,20 @@ static bool od_exec_plan(const od_plan& plan, simdjson::ondemand::value value) {
|
|
|
2570
2749
|
double r = std::fmod(v, plan.num_mul);
|
|
2571
2750
|
if (std::abs(r) > 1e-8 && std::abs(r - plan.num_mul) > 1e-8) return false;
|
|
2572
2751
|
}
|
|
2752
|
+
if (plan.enum_check) {
|
|
2753
|
+
auto& ec = *plan.enum_check;
|
|
2754
|
+
bool match = false;
|
|
2755
|
+
if (got_int) {
|
|
2756
|
+
for (auto i : ec.integers) if (i == iv) { match = true; break; }
|
|
2757
|
+
}
|
|
2758
|
+
if (!match) {
|
|
2759
|
+
for (auto d : ec.doubles) if (d == v) { match = true; break; }
|
|
2760
|
+
}
|
|
2761
|
+
if (!match && got_int) {
|
|
2762
|
+
for (auto d : ec.doubles) if (d == v) { match = true; break; }
|
|
2763
|
+
}
|
|
2764
|
+
if (!match) return false;
|
|
2765
|
+
}
|
|
2573
2766
|
break;
|
|
2574
2767
|
}
|
|
2575
2768
|
case sjt::string: {
|
|
@@ -2580,8 +2773,16 @@ static bool od_exec_plan(const od_plan& plan, simdjson::ondemand::value value) {
|
|
|
2580
2773
|
if (plan.min_length && len < *plan.min_length) return false;
|
|
2581
2774
|
if (plan.max_length && len > *plan.max_length) return false;
|
|
2582
2775
|
}
|
|
2776
|
+
if (plan.digit_pattern) {
|
|
2777
|
+
auto& dp = *plan.digit_pattern;
|
|
2778
|
+
if (sv.size() < dp.min_len || sv.size() > dp.max_len) return false;
|
|
2779
|
+
const uint8_t* p = reinterpret_cast<const uint8_t*>(sv.data());
|
|
2780
|
+
for (size_t i = 0, n = sv.size(); i < n; i++) {
|
|
2781
|
+
if (p[i] < '0' || p[i] > '9') return false;
|
|
2782
|
+
}
|
|
2783
|
+
}
|
|
2583
2784
|
#ifndef ATA_NO_RE2
|
|
2584
|
-
if (plan.pattern) {
|
|
2785
|
+
else if (plan.pattern) {
|
|
2585
2786
|
if (!re2::RE2::PartialMatch(re2::StringPiece(sv.data(), sv.size()), *plan.pattern))
|
|
2586
2787
|
return false;
|
|
2587
2788
|
}
|
|
@@ -2589,6 +2790,17 @@ static bool od_exec_plan(const od_plan& plan, simdjson::ondemand::value value) {
|
|
|
2589
2790
|
if (plan.format_id != 255) {
|
|
2590
2791
|
if (!check_format_by_id(sv, plan.format_id)) return false;
|
|
2591
2792
|
}
|
|
2793
|
+
if (plan.enum_check) {
|
|
2794
|
+
auto& ec = *plan.enum_check;
|
|
2795
|
+
bool match = false;
|
|
2796
|
+
for (auto& s : ec.strings) {
|
|
2797
|
+
if (sv.size() == s.size() && std::memcmp(sv.data(), s.data(), s.size()) == 0) {
|
|
2798
|
+
match = true;
|
|
2799
|
+
break;
|
|
2800
|
+
}
|
|
2801
|
+
}
|
|
2802
|
+
if (!match) return false;
|
|
2803
|
+
}
|
|
2592
2804
|
break;
|
|
2593
2805
|
}
|
|
2594
2806
|
case sjt::object: {
|
|
@@ -2601,25 +2813,140 @@ static bool od_exec_plan(const od_plan& plan, simdjson::ondemand::value value) {
|
|
|
2601
2813
|
uint64_t prop_count = 0;
|
|
2602
2814
|
|
|
2603
2815
|
for (auto field : obj) {
|
|
2604
|
-
|
|
2605
|
-
|
|
2816
|
+
// Fast key compare: use raw_json_string against schema keys directly,
|
|
2817
|
+
// skipping the unescape pass. Schema keys are assumed to have no
|
|
2818
|
+
// unescaped quotes (true for any well-formed JSON Schema).
|
|
2819
|
+
simdjson::ondemand::raw_json_string raw_key;
|
|
2820
|
+
if (field.key().get(raw_key) != SUCCESS) continue;
|
|
2606
2821
|
prop_count++;
|
|
2607
2822
|
|
|
2608
|
-
// Single merged scan: required + property in one pass
|
|
2609
2823
|
bool matched = false;
|
|
2610
2824
|
for (auto& e : op.entries) {
|
|
2611
|
-
if (
|
|
2825
|
+
if (raw_key.unsafe_is_equal(e.key)) {
|
|
2612
2826
|
if (e.required_idx >= 0)
|
|
2613
2827
|
required_found |= (1ULL << e.required_idx);
|
|
2614
2828
|
if (e.sub) {
|
|
2615
|
-
|
|
2616
|
-
|
|
2617
|
-
|
|
2829
|
+
// Fast dispatch: skip recursive od_exec_plan + type().get + switch
|
|
2830
|
+
// for primitive sub-plans. Mirrors the constraint code in od_exec_plan.
|
|
2831
|
+
switch (e.fk) {
|
|
2832
|
+
case od_plan::fast_kind::INTEGER: {
|
|
2833
|
+
int64_t iv;
|
|
2834
|
+
if (field.value().get(iv) != SUCCESS) return false;
|
|
2835
|
+
auto& sub = *e.sub;
|
|
2836
|
+
uint8_t f = sub.num_flags;
|
|
2837
|
+
double v = static_cast<double>(iv);
|
|
2838
|
+
if ((f & od_plan::HAS_MIN) && v < sub.num_min) return false;
|
|
2839
|
+
if ((f & od_plan::HAS_MAX) && v > sub.num_max) return false;
|
|
2840
|
+
if ((f & od_plan::HAS_EX_MIN) && v <= sub.num_ex_min) return false;
|
|
2841
|
+
if ((f & od_plan::HAS_EX_MAX) && v >= sub.num_ex_max) return false;
|
|
2842
|
+
if (f & od_plan::HAS_MUL) {
|
|
2843
|
+
double r = std::fmod(v, sub.num_mul);
|
|
2844
|
+
if (std::abs(r) > 1e-8 && std::abs(r - sub.num_mul) > 1e-8) return false;
|
|
2845
|
+
}
|
|
2846
|
+
if (sub.enum_check) {
|
|
2847
|
+
bool em = false;
|
|
2848
|
+
for (auto i2 : sub.enum_check->integers) if (i2 == iv) { em = true; break; }
|
|
2849
|
+
if (!em) for (auto d : sub.enum_check->doubles) if (d == v) { em = true; break; }
|
|
2850
|
+
if (!em) return false;
|
|
2851
|
+
}
|
|
2852
|
+
break;
|
|
2853
|
+
}
|
|
2854
|
+
case od_plan::fast_kind::STRING: {
|
|
2855
|
+
std::string_view sv;
|
|
2856
|
+
if (field.value().get(sv) != SUCCESS) return false;
|
|
2857
|
+
auto& sub = *e.sub;
|
|
2858
|
+
if (sub.min_length || sub.max_length) {
|
|
2859
|
+
uint64_t len = utf8_length_fast(sv);
|
|
2860
|
+
if (sub.min_length && len < *sub.min_length) return false;
|
|
2861
|
+
if (sub.max_length && len > *sub.max_length) return false;
|
|
2862
|
+
}
|
|
2863
|
+
if (sub.digit_pattern) {
|
|
2864
|
+
auto& dp = *sub.digit_pattern;
|
|
2865
|
+
if (sv.size() < dp.min_len || sv.size() > dp.max_len) return false;
|
|
2866
|
+
const uint8_t* sp = reinterpret_cast<const uint8_t*>(sv.data());
|
|
2867
|
+
for (size_t i = 0, n = sv.size(); i < n; i++) {
|
|
2868
|
+
if (sp[i] < '0' || sp[i] > '9') return false;
|
|
2869
|
+
}
|
|
2870
|
+
}
|
|
2871
|
+
#ifndef ATA_NO_RE2
|
|
2872
|
+
else if (sub.pattern) {
|
|
2873
|
+
if (!re2::RE2::PartialMatch(re2::StringPiece(sv.data(), sv.size()), *sub.pattern))
|
|
2874
|
+
return false;
|
|
2875
|
+
}
|
|
2876
|
+
#endif
|
|
2877
|
+
if (sub.format_id != 255) {
|
|
2878
|
+
if (!check_format_by_id(sv, sub.format_id)) return false;
|
|
2879
|
+
}
|
|
2880
|
+
if (sub.enum_check) {
|
|
2881
|
+
bool em = false;
|
|
2882
|
+
for (auto& s : sub.enum_check->strings) {
|
|
2883
|
+
if (sv.size() == s.size() && std::memcmp(sv.data(), s.data(), s.size()) == 0) {
|
|
2884
|
+
em = true;
|
|
2885
|
+
break;
|
|
2886
|
+
}
|
|
2887
|
+
}
|
|
2888
|
+
if (!em) return false;
|
|
2889
|
+
}
|
|
2890
|
+
break;
|
|
2891
|
+
}
|
|
2892
|
+
case od_plan::fast_kind::BOOLEAN: {
|
|
2893
|
+
bool bv;
|
|
2894
|
+
if (field.value().get(bv) != SUCCESS) return false;
|
|
2895
|
+
if (e.sub->enum_check) {
|
|
2896
|
+
if (bv ? !e.sub->enum_check->has_true : !e.sub->enum_check->has_false) return false;
|
|
2897
|
+
}
|
|
2898
|
+
break;
|
|
2899
|
+
}
|
|
2900
|
+
case od_plan::fast_kind::OBJECT: {
|
|
2901
|
+
simdjson::ondemand::value fv;
|
|
2902
|
+
if (field.value().get(fv) != SUCCESS) return false;
|
|
2903
|
+
if (!od_exec_plan(*e.sub, fv,
|
|
2904
|
+
simdjson::ondemand::json_type::object)) return false;
|
|
2905
|
+
break;
|
|
2906
|
+
}
|
|
2907
|
+
case od_plan::fast_kind::ARRAY: {
|
|
2908
|
+
simdjson::ondemand::value fv;
|
|
2909
|
+
if (field.value().get(fv) != SUCCESS) return false;
|
|
2910
|
+
if (!od_exec_plan(*e.sub, fv,
|
|
2911
|
+
simdjson::ondemand::json_type::array)) return false;
|
|
2912
|
+
break;
|
|
2913
|
+
}
|
|
2914
|
+
case od_plan::fast_kind::OTHER:
|
|
2915
|
+
default: {
|
|
2916
|
+
simdjson::ondemand::value fv;
|
|
2917
|
+
if (field.value().get(fv) != SUCCESS) return false;
|
|
2918
|
+
if (!od_exec_plan(*e.sub, fv)) return false;
|
|
2919
|
+
}
|
|
2920
|
+
}
|
|
2618
2921
|
}
|
|
2619
2922
|
matched = true;
|
|
2620
2923
|
break;
|
|
2621
2924
|
}
|
|
2622
2925
|
}
|
|
2926
|
+
// Safety net: if no match via raw byte compare, the JSON key may be
|
|
2927
|
+
// escaped (e.g., "aname"). Fall back to a properly-unescaped
|
|
2928
|
+
// compare so escaped keys still match the unescaped schema keys.
|
|
2929
|
+
if (!matched) {
|
|
2930
|
+
std::string_view raw_view = field.escaped_key();
|
|
2931
|
+
if (raw_view.find('\\') != std::string_view::npos) {
|
|
2932
|
+
std::string_view ukey;
|
|
2933
|
+
if (field.unescaped_key().get(ukey) == SUCCESS) {
|
|
2934
|
+
for (auto& e : op.entries) {
|
|
2935
|
+
if (ukey == e.key) {
|
|
2936
|
+
if (e.required_idx >= 0)
|
|
2937
|
+
required_found |= (1ULL << e.required_idx);
|
|
2938
|
+
if (e.sub) {
|
|
2939
|
+
simdjson::ondemand::value fv;
|
|
2940
|
+
if (field.value().get(fv) != SUCCESS) return false;
|
|
2941
|
+
if (!od_exec_plan(*e.sub, fv)) return false;
|
|
2942
|
+
}
|
|
2943
|
+
matched = true;
|
|
2944
|
+
break;
|
|
2945
|
+
}
|
|
2946
|
+
}
|
|
2947
|
+
}
|
|
2948
|
+
}
|
|
2949
|
+
}
|
|
2623
2950
|
if (!matched && op.no_additional) return false;
|
|
2624
2951
|
}
|
|
2625
2952
|
|
|
@@ -2647,6 +2974,18 @@ static bool od_exec_plan(const od_plan& plan, simdjson::ondemand::value value) {
|
|
|
2647
2974
|
if (ap.max_items && count > *ap.max_items) return false;
|
|
2648
2975
|
break;
|
|
2649
2976
|
}
|
|
2977
|
+
case sjt::boolean: {
|
|
2978
|
+
if (plan.enum_check) {
|
|
2979
|
+
bool b;
|
|
2980
|
+
if (value.get(b) != SUCCESS) return false;
|
|
2981
|
+
if (b ? !plan.enum_check->has_true : !plan.enum_check->has_false) return false;
|
|
2982
|
+
}
|
|
2983
|
+
break;
|
|
2984
|
+
}
|
|
2985
|
+
case sjt::null: {
|
|
2986
|
+
if (plan.enum_check && !plan.enum_check->has_null) return false;
|
|
2987
|
+
break;
|
|
2988
|
+
}
|
|
2650
2989
|
default:
|
|
2651
2990
|
break;
|
|
2652
2991
|
}
|