ata-validator 0.10.1 → 0.10.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -17,6 +17,7 @@
17
17
  #include <vector>
18
18
 
19
19
  #include "ata.h"
20
+ #include <simdjson.h>
20
21
 
21
22
  // ============================================================================
22
23
  // V8 Direct Object Traversal Engine
@@ -797,6 +798,67 @@ static void validate_napi(const schema_node_ptr& node,
797
798
  // N-API Binding
798
799
  // ============================================================================
799
800
 
801
+ // ============================================================================
802
+ // simdjson DOM to V8 JS Object conversion
803
+ // ============================================================================
804
+
805
+ static Napi::Value dom_to_napi(Napi::Env env, simdjson::dom::element el) {
806
+ using namespace simdjson;
807
+ switch (el.type()) {
808
+ case dom::element_type::OBJECT: {
809
+ auto obj = Napi::Object::New(env);
810
+ for (auto [key, val] : dom::object(el)) {
811
+ obj.Set(std::string(key), dom_to_napi(env, val));
812
+ }
813
+ return obj;
814
+ }
815
+ case dom::element_type::ARRAY: {
816
+ dom::array arr = el;
817
+ auto jsArr = Napi::Array::New(env, arr.size());
818
+ uint32_t i = 0;
819
+ for (auto val : arr) {
820
+ jsArr.Set(i++, dom_to_napi(env, val));
821
+ }
822
+ return jsArr;
823
+ }
824
+ case dom::element_type::STRING: {
825
+ std::string_view sv;
826
+ el.get(sv);
827
+ return Napi::String::New(env, sv.data(), sv.length());
828
+ }
829
+ case dom::element_type::INT64: {
830
+ int64_t v;
831
+ el.get(v);
832
+ return Napi::Number::New(env, static_cast<double>(v));
833
+ }
834
+ case dom::element_type::UINT64: {
835
+ uint64_t v;
836
+ el.get(v);
837
+ return Napi::Number::New(env, static_cast<double>(v));
838
+ }
839
+ case dom::element_type::DOUBLE: {
840
+ double v;
841
+ el.get(v);
842
+ return Napi::Number::New(env, v);
843
+ }
844
+ case dom::element_type::BOOL: {
845
+ bool v;
846
+ el.get(v);
847
+ return Napi::Boolean::New(env, v);
848
+ }
849
+ case dom::element_type::NULL_VALUE:
850
+ return env.Null();
851
+ default:
852
+ return env.Undefined();
853
+ }
854
+ }
855
+
856
+ // Thread-local simdjson DOM parser for parseJSON / validateAndParse
857
+ static simdjson::dom::parser& tl_dom_parser() {
858
+ thread_local simdjson::dom::parser parser;
859
+ return parser;
860
+ }
861
+
800
862
  static Napi::Object make_result(Napi::Env env,
801
863
  const ata::validation_result& result) {
802
864
  Napi::Object obj = Napi::Object::New(env);
@@ -822,7 +884,8 @@ class CompiledSchema : public Napi::ObjectWrap<CompiledSchema> {
822
884
  {InstanceMethod("validate", &CompiledSchema::Validate),
823
885
  InstanceMethod("validateJSON", &CompiledSchema::ValidateJSON),
824
886
  InstanceMethod("validateDirect", &CompiledSchema::ValidateDirect),
825
- InstanceMethod("isValidJSON", &CompiledSchema::IsValidJSON)});
887
+ InstanceMethod("isValidJSON", &CompiledSchema::IsValidJSON),
888
+ InstanceMethod("validateAndParse", &CompiledSchema::ValidateAndParse)});
826
889
  auto* constructor = new Napi::FunctionReference();
827
890
  *constructor = Napi::Persistent(func);
828
891
  env.SetInstanceData(constructor);
@@ -944,6 +1007,77 @@ class CompiledSchema : public Napi::ObjectWrap<CompiledSchema> {
944
1007
  return ValidateDirectImpl(env, info[0]);
945
1008
  }
946
1009
 
1010
+ // Parse JSON with simdjson, validate against schema, return parsed JS object
1011
+ Napi::Value ValidateAndParse(const Napi::CallbackInfo& info) {
1012
+ Napi::Env env = info.Env();
1013
+ if (info.Length() < 1) {
1014
+ Napi::TypeError::New(env, "JSON string or Buffer expected")
1015
+ .ThrowAsJavaScriptException();
1016
+ return env.Undefined();
1017
+ }
1018
+
1019
+ const char* data;
1020
+ size_t len;
1021
+
1022
+ if (info[0].IsBuffer()) {
1023
+ auto buf = info[0].As<Napi::Buffer<char>>();
1024
+ data = buf.Data();
1025
+ len = buf.Length();
1026
+ } else if (info[0].IsString()) {
1027
+ auto [d, l] = extract_string(env, info[0]);
1028
+ data = d;
1029
+ len = l;
1030
+ } else {
1031
+ Napi::TypeError::New(env, "JSON string or Buffer expected")
1032
+ .ThrowAsJavaScriptException();
1033
+ return env.Undefined();
1034
+ }
1035
+
1036
+ // Parse with simdjson
1037
+ simdjson::padded_string padded(data, len);
1038
+ auto& parser = tl_dom_parser();
1039
+ auto doc_result = parser.parse(padded);
1040
+ if (doc_result.error()) {
1041
+ auto obj = Napi::Object::New(env);
1042
+ obj.Set("valid", false);
1043
+ obj.Set("value", env.Null());
1044
+ auto errors = Napi::Array::New(env, 1);
1045
+ auto err = Napi::Object::New(env);
1046
+ err.Set("code", Napi::Number::New(env, static_cast<int>(ata::error_code::invalid_json)));
1047
+ err.Set("path", Napi::String::New(env, ""));
1048
+ err.Set("message", Napi::String::New(env, "Invalid JSON"));
1049
+ errors[0u] = err;
1050
+ obj.Set("errors", errors);
1051
+ return obj;
1052
+ }
1053
+
1054
+ // Validate
1055
+ auto valResult = ata::validate(schema_, std::string_view(data, len));
1056
+
1057
+ // Convert DOM to JS object
1058
+ Napi::Value jsValue = dom_to_napi(env, doc_result.value());
1059
+
1060
+ // Build result
1061
+ auto obj = Napi::Object::New(env);
1062
+ obj.Set("valid", valResult.valid);
1063
+ obj.Set("value", jsValue);
1064
+ if (valResult.valid) {
1065
+ obj.Set("errors", Napi::Array::New(env, 0));
1066
+ } else {
1067
+ Napi::Array errors = Napi::Array::New(env, valResult.errors.size());
1068
+ for (size_t i = 0; i < valResult.errors.size(); ++i) {
1069
+ Napi::Object err = Napi::Object::New(env);
1070
+ err.Set("code",
1071
+ Napi::Number::New(env, static_cast<int>(valResult.errors[i].code)));
1072
+ err.Set("path", Napi::String::New(env, valResult.errors[i].path));
1073
+ err.Set("message", Napi::String::New(env, valResult.errors[i].message));
1074
+ errors[i] = err;
1075
+ }
1076
+ obj.Set("errors", errors);
1077
+ }
1078
+ return obj;
1079
+ }
1080
+
947
1081
  private:
948
1082
  Napi::Value ValidateDirectImpl(Napi::Env env, Napi::Value value) {
949
1083
  compiled_schema_internal ctx;
@@ -996,6 +1130,44 @@ Napi::Value GetVersion(const Napi::CallbackInfo& info) {
996
1130
  return Napi::String::New(info.Env(), std::string(ata::version()));
997
1131
  }
998
1132
 
1133
+ // Standalone JSON parser using simdjson — returns parsed JS object
1134
+ Napi::Value ParseJSON(const Napi::CallbackInfo& info) {
1135
+ Napi::Env env = info.Env();
1136
+ if (info.Length() < 1) {
1137
+ Napi::TypeError::New(env, "JSON string or Buffer expected")
1138
+ .ThrowAsJavaScriptException();
1139
+ return env.Undefined();
1140
+ }
1141
+
1142
+ const char* data;
1143
+ size_t len;
1144
+
1145
+ if (info[0].IsBuffer()) {
1146
+ auto buf = info[0].As<Napi::Buffer<char>>();
1147
+ data = buf.Data();
1148
+ len = buf.Length();
1149
+ } else if (info[0].IsString()) {
1150
+ auto [d, l] = CompiledSchema::extract_string(env, info[0]);
1151
+ data = d;
1152
+ len = l;
1153
+ } else {
1154
+ Napi::TypeError::New(env, "JSON string or Buffer expected")
1155
+ .ThrowAsJavaScriptException();
1156
+ return env.Undefined();
1157
+ }
1158
+
1159
+ // Parse with simdjson using thread-local parser
1160
+ simdjson::padded_string padded(data, len);
1161
+ auto& parser = tl_dom_parser();
1162
+ auto result = parser.parse(padded);
1163
+ if (result.error()) {
1164
+ Napi::Error::New(env, "Invalid JSON").ThrowAsJavaScriptException();
1165
+ return env.Undefined();
1166
+ }
1167
+
1168
+ return dom_to_napi(env, result.value());
1169
+ }
1170
+
999
1171
  // --- Thread Pool ---
1000
1172
  class ThreadPool {
1001
1173
  public:
@@ -1531,6 +1703,7 @@ Napi::Object Init(Napi::Env env, Napi::Object exports) {
1531
1703
  CompiledSchema::Init(env, exports);
1532
1704
  exports.Set("validate", Napi::Function::New(env, ValidateOneShot));
1533
1705
  exports.Set("version", Napi::Function::New(env, GetVersion));
1706
+ exports.Set("parseJSON", Napi::Function::New(env, ParseJSON));
1534
1707
  exports.Set("fastRegister", Napi::Function::New(env, FastRegister));
1535
1708
  exports.Set("fastValidate", Napi::Function::New(env, FastValidateSlow));
1536
1709
 
@@ -6,7 +6,17 @@
6
6
 
7
7
  // Count Unicode code points, not UTF-16 code units (surrogate pairs).
8
8
  // JSON Schema: minLength/maxLength count characters per RFC 8259.
9
- function _cpLen(s) { let n = 0; for (const _ of s) n++; return n; }
9
+ // Fast path: if no surrogate pairs exist, .length is correct (covers >99% of real data).
10
+ function _cpLen(s) {
11
+ const len = s.length;
12
+ for (let i = 0; i < len; i++) {
13
+ if (s.charCodeAt(i) >= 0xD800 && s.charCodeAt(i) <= 0xDBFF) {
14
+ // Found a high surrogate — count code points the slow way
15
+ let n = 0; for (const _ of s) n++; return n;
16
+ }
17
+ }
18
+ return len;
19
+ }
10
20
 
11
21
  // AJV-compatible error message templates (compile-time, not runtime)
12
22
  const AJV_MESSAGES = {
@@ -275,11 +285,12 @@ function compileToJS(schema, defs, schemaMap) {
275
285
  // string
276
286
  if (schema.minLength !== undefined) {
277
287
  const min = schema.minLength
278
- checks.push((d) => typeof d !== 'string' || _cpLen(d) >= min)
288
+ const min2 = min * 2
289
+ checks.push((d) => typeof d !== 'string' || d.length >= min2 || (d.length >= min && _cpLen(d) >= min))
279
290
  }
280
291
  if (schema.maxLength !== undefined) {
281
292
  const max = schema.maxLength
282
- checks.push((d) => typeof d !== 'string' || _cpLen(d) <= max)
293
+ checks.push((d) => typeof d !== 'string' || d.length <= max || _cpLen(d) <= max)
283
294
  }
284
295
  if (schema.pattern) {
285
296
  try {
@@ -0,0 +1,223 @@
1
+ 'use strict';
2
+
3
+ // Schema-compiled JSON parser: combines parse + validate in one pass.
4
+ // For known-shape schemas, skips JSON.parse entirely and builds typed
5
+ // JS objects directly from the JSON string.
6
+ //
7
+ // Returns null on invalid JSON or validation failure.
8
+ // Falls back to JSON.parse for unsupported schemas.
9
+
10
+ function compileSchemaParser(schema) {
11
+ if (!schema || typeof schema !== 'object') return null;
12
+
13
+ // Array of objects
14
+ if (schema.type === 'array' && schema.items && schema.items.type === 'object') {
15
+ const itemParser = compileSchemaParser(schema.items);
16
+ if (!itemParser) return null;
17
+ return function parseArray(str) {
18
+ const len = str.length;
19
+ let pos = 0;
20
+ while (pos < len && str.charCodeAt(pos) <= 32) pos++;
21
+ if (str.charCodeAt(pos) !== 91) return null; // [
22
+ pos++;
23
+ const arr = [];
24
+ let first = true;
25
+ while (pos < len) {
26
+ while (pos < len && str.charCodeAt(pos) <= 32) pos++;
27
+ if (str.charCodeAt(pos) === 93) return arr; // ]
28
+ if (!first) {
29
+ if (str.charCodeAt(pos) !== 44) return null;
30
+ pos++;
31
+ while (pos < len && str.charCodeAt(pos) <= 32) pos++;
32
+ }
33
+ first = false;
34
+ // Parse nested object inline
35
+ const result = parseObject(str, pos, itemParser._keys, itemParser._keyTypes, itemParser._required, itemParser._additionalAllowed);
36
+ if (result === null) return null;
37
+ arr.push(result.value);
38
+ pos = result.pos;
39
+ }
40
+ return null;
41
+ };
42
+ }
43
+
44
+ // Wrapper object containing array
45
+ if (schema.type === 'object' && schema.properties) {
46
+ const props = schema.properties;
47
+ const keys = Object.keys(props);
48
+ for (const k of keys) {
49
+ if (props[k] && props[k].type === 'array' && props[k].items) {
50
+ // Has nested array — use the general parser
51
+ return compileObjectParser(schema);
52
+ }
53
+ }
54
+ return compileObjectParser(schema);
55
+ }
56
+
57
+ return null;
58
+ }
59
+
60
+ function compileObjectParser(schema) {
61
+ if (!schema.properties) return null;
62
+ const props = schema.properties;
63
+ const keys = Object.keys(props);
64
+ const required = new Set(schema.required || []);
65
+ const additionalAllowed = schema.additionalProperties !== false;
66
+
67
+ // Build a key→type map for O(1) lookup
68
+ const keyTypes = {};
69
+ for (const k of keys) {
70
+ const p = props[k];
71
+ if (!p || typeof p.type !== 'string') return null; // bail on complex
72
+ if (p.type === 'object' || p.type === 'array') return null; // flat only for now
73
+ keyTypes[k] = p;
74
+ }
75
+
76
+ // The compiled parser function
77
+ return function parseAndValidate(str) {
78
+ const len = str.length;
79
+ let pos = 0;
80
+
81
+ // Skip whitespace
82
+ while (pos < len && (str.charCodeAt(pos) <= 32)) pos++;
83
+ if (pos >= len || str.charCodeAt(pos) !== 123) return null; // {
84
+ pos++;
85
+
86
+ const result = {};
87
+ let foundKeys = 0;
88
+ let first = true;
89
+
90
+ while (pos < len) {
91
+ // Skip whitespace
92
+ while (pos < len && str.charCodeAt(pos) <= 32) pos++;
93
+ if (str.charCodeAt(pos) === 125) break; // }
94
+
95
+ // Comma between entries
96
+ if (!first) {
97
+ if (str.charCodeAt(pos) !== 44) return null; // ,
98
+ pos++;
99
+ while (pos < len && str.charCodeAt(pos) <= 32) pos++;
100
+ }
101
+ first = false;
102
+
103
+ // Key
104
+ if (str.charCodeAt(pos) !== 34) return null; // "
105
+ pos++;
106
+ const keyStart = pos;
107
+ while (pos < len && str.charCodeAt(pos) !== 34) {
108
+ if (str.charCodeAt(pos) === 92) pos++; // skip escape
109
+ pos++;
110
+ }
111
+ const key = str.substring(keyStart, pos);
112
+ pos++; // closing "
113
+
114
+ // Colon
115
+ while (pos < len && str.charCodeAt(pos) <= 32) pos++;
116
+ if (str.charCodeAt(pos) !== 58) return null; // :
117
+ pos++;
118
+ while (pos < len && str.charCodeAt(pos) <= 32) pos++;
119
+
120
+ // Value — parse based on schema type if known key
121
+ const propSchema = keyTypes[key];
122
+ if (propSchema) {
123
+ const parsed = parseValue(str, pos, propSchema);
124
+ if (parsed === null) return null; // validation fail
125
+ result[key] = parsed.value;
126
+ pos = parsed.pos;
127
+ foundKeys++;
128
+ } else if (!additionalAllowed) {
129
+ return null; // additional property not allowed
130
+ } else {
131
+ // Skip unknown value
132
+ const skipped = skipValue(str, pos);
133
+ if (skipped < 0) return null;
134
+ pos = skipped;
135
+ }
136
+ }
137
+
138
+ // Check required
139
+ for (const r of required) {
140
+ if (result[r] === undefined) return null;
141
+ }
142
+
143
+ return result;
144
+ };
145
+ }
146
+
147
+ function parseValue(str, pos, schema) {
148
+ const ch = str.charCodeAt(pos);
149
+ const type = schema.type;
150
+
151
+ if (type === 'string') {
152
+ if (ch !== 34) return null; // must be "
153
+ pos++;
154
+ const start = pos;
155
+ while (pos < str.length && str.charCodeAt(pos) !== 34) {
156
+ if (str.charCodeAt(pos) === 92) pos++; // skip escape
157
+ pos++;
158
+ }
159
+ const val = str.substring(start, pos);
160
+ pos++; // closing "
161
+ if (schema.minLength !== undefined && val.length < schema.minLength) return null;
162
+ if (schema.maxLength !== undefined && val.length > schema.maxLength) return null;
163
+ return { value: val, pos };
164
+ }
165
+
166
+ if (type === 'integer' || type === 'number') {
167
+ const start = pos;
168
+ if (str.charCodeAt(pos) === 45) pos++; // -
169
+ while (pos < str.length && str.charCodeAt(pos) >= 48 && str.charCodeAt(pos) <= 57) pos++;
170
+ if (type === 'number' && str.charCodeAt(pos) === 46) {
171
+ pos++;
172
+ while (pos < str.length && str.charCodeAt(pos) >= 48 && str.charCodeAt(pos) <= 57) pos++;
173
+ }
174
+ const val = +str.substring(start, pos);
175
+ if (type === 'integer' && !Number.isInteger(val)) return null;
176
+ if (schema.minimum !== undefined && val < schema.minimum) return null;
177
+ if (schema.maximum !== undefined && val > schema.maximum) return null;
178
+ return { value: val, pos };
179
+ }
180
+
181
+ if (type === 'boolean') {
182
+ if (str.startsWith('true', pos)) return { value: true, pos: pos + 4 };
183
+ if (str.startsWith('false', pos)) return { value: false, pos: pos + 5 };
184
+ return null;
185
+ }
186
+
187
+ return null;
188
+ }
189
+
190
+ function skipValue(str, pos) {
191
+ const ch = str.charCodeAt(pos);
192
+ if (ch === 34) { // string
193
+ pos++;
194
+ while (pos < str.length && str.charCodeAt(pos) !== 34) {
195
+ if (str.charCodeAt(pos) === 92) pos++;
196
+ pos++;
197
+ }
198
+ return pos + 1;
199
+ }
200
+ if (ch === 123 || ch === 91) { // { or [
201
+ let depth = 1;
202
+ const open = ch;
203
+ const close = ch === 123 ? 125 : 93;
204
+ pos++;
205
+ while (pos < str.length && depth > 0) {
206
+ const c = str.charCodeAt(pos);
207
+ if (c === open) depth++;
208
+ else if (c === close) depth--;
209
+ else if (c === 34) { pos++; while (pos < str.length && str.charCodeAt(pos) !== 34) { if (str.charCodeAt(pos) === 92) pos++; pos++; } }
210
+ pos++;
211
+ }
212
+ return pos;
213
+ }
214
+ // number, true, false, null
215
+ while (pos < str.length) {
216
+ const c = str.charCodeAt(pos);
217
+ if (c === 44 || c === 125 || c === 93 || c <= 32) break;
218
+ pos++;
219
+ }
220
+ return pos;
221
+ }
222
+
223
+ module.exports = { compileSchemaParser };
@@ -24,7 +24,7 @@ const TIER0_PRIMITIVE_ALLOWED = new Set([
24
24
  'type', 'enum', 'const',
25
25
  'minLength', 'maxLength',
26
26
  'minimum', 'maximum', 'exclusiveMinimum', 'exclusiveMaximum',
27
- 'multipleOf', 'format',
27
+ 'multipleOf',
28
28
  ...META_KEYS,
29
29
  ]);
30
30
 
package/lib/tier0.js CHANGED
@@ -19,10 +19,15 @@ const T_BOOLEAN = TYPE_MASK.boolean;
19
19
 
20
20
  // Count Unicode code points, not UTF-16 code units.
21
21
  // JSON Schema spec: minLength/maxLength count characters (RFC 8259 = code points).
22
+ // Fast path: scan for surrogates first, skip slow iteration for ASCII-only strings.
22
23
  function codePointLength(s) {
23
- let n = 0;
24
- for (const _ of s) n++;
25
- return n;
24
+ const len = s.length;
25
+ for (let i = 0; i < len; i++) {
26
+ if (s.charCodeAt(i) >= 0xD800 && s.charCodeAt(i) <= 0xDBFF) {
27
+ let n = 0; for (const _ of s) n++; return n;
28
+ }
29
+ }
30
+ return len;
26
31
  }
27
32
 
28
33
  // Numeric constraint flags, packed into constraint.numFlags.
@@ -102,8 +107,8 @@ function checkPrimitive(c, v) {
102
107
  if (typeof v !== 'string') return false;
103
108
  const minLen = c.minLen;
104
109
  const maxLen = c.maxLen;
105
- if (minLen >= 0 && codePointLength(v) < minLen) return false;
106
- if (maxLen >= 0 && codePointLength(v) > maxLen) return false;
110
+ if (minLen >= 0) { const l = v.length; if (l < minLen) return false; if (l < minLen * 2 && codePointLength(v) < minLen) return false; }
111
+ if (maxLen >= 0) { const l = v.length; if (l > maxLen && codePointLength(v) > maxLen) return false; }
107
112
  } else if (m === T_INTEGER) {
108
113
  if (typeof v !== 'number' || !Number.isInteger(v)) return false;
109
114
  const f = c.numFlags;
@@ -156,8 +161,8 @@ function tier0ValidateObject(plan, data) {
156
161
  if (typeof v !== 'string') return false;
157
162
  const minLen = c.minLen;
158
163
  const maxLen = c.maxLen;
159
- if (minLen >= 0 && v.length < minLen) return false;
160
- if (maxLen >= 0 && v.length > maxLen) return false;
164
+ if (minLen >= 0) { const l = v.length; if (l < minLen) return false; if (l < minLen * 2 && codePointLength(v) < minLen) return false; }
165
+ if (maxLen >= 0) { const l = v.length; if (l > maxLen && codePointLength(v) > maxLen) return false; }
161
166
  } else if (m === T_INTEGER) {
162
167
  if (typeof v !== 'number' || !Number.isInteger(v)) return false;
163
168
  const f = c.numFlags;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ata-validator",
3
- "version": "0.10.1",
3
+ "version": "0.10.2",
4
4
  "description": "Ultra-fast JSON Schema validator. 4.7x faster validation, 1,800x faster compilation. Works without native addon. Cross-schema $ref, Draft 2020-12 + Draft 7, V8-optimized JS codegen, simdjson, RE2, multi-core. Standard Schema V1 compatible.",
5
5
  "main": "index.js",
6
6
  "module": "index.mjs",