ata-validator 0.1.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +120 -187
- package/binding/ata_napi.cpp +903 -114
- package/binding.gyp +13 -2
- package/compat.d.ts +23 -0
- package/include/ata.h +10 -2
- package/index.d.ts +37 -0
- package/index.js +150 -5
- package/lib/js-compiler.js +845 -0
- package/package.json +15 -8
- package/prebuilds/darwin-arm64/ata-validator.node +0 -0
- package/src/ata.cpp +776 -125
package/binding/ata_napi.cpp
CHANGED
|
@@ -1,7 +1,15 @@
|
|
|
1
1
|
#include <napi.h>
|
|
2
|
+
#include <node_api.h>
|
|
2
3
|
|
|
3
4
|
#include <cmath>
|
|
4
|
-
#include <
|
|
5
|
+
#include <thread>
|
|
6
|
+
#include <future>
|
|
7
|
+
#include <mutex>
|
|
8
|
+
#include <condition_variable>
|
|
9
|
+
#include <functional>
|
|
10
|
+
#include <queue>
|
|
11
|
+
#include <atomic>
|
|
12
|
+
#include <re2/re2.h>
|
|
5
13
|
#include <set>
|
|
6
14
|
#include <string>
|
|
7
15
|
#include <vector>
|
|
@@ -31,13 +39,16 @@ struct schema_node {
|
|
|
31
39
|
std::optional<uint64_t> min_length;
|
|
32
40
|
std::optional<uint64_t> max_length;
|
|
33
41
|
std::optional<std::string> pattern;
|
|
34
|
-
std::shared_ptr<
|
|
42
|
+
std::shared_ptr<re2::RE2> compiled_pattern;
|
|
35
43
|
|
|
36
44
|
std::optional<uint64_t> min_items;
|
|
37
45
|
std::optional<uint64_t> max_items;
|
|
38
46
|
bool unique_items = false;
|
|
39
47
|
schema_node_ptr items_schema;
|
|
40
48
|
std::vector<schema_node_ptr> prefix_items;
|
|
49
|
+
schema_node_ptr contains_schema;
|
|
50
|
+
std::optional<uint64_t> min_contains;
|
|
51
|
+
std::optional<uint64_t> max_contains;
|
|
41
52
|
|
|
42
53
|
std::unordered_map<std::string, schema_node_ptr> properties;
|
|
43
54
|
std::vector<std::string> required;
|
|
@@ -45,8 +56,16 @@ struct schema_node {
|
|
|
45
56
|
schema_node_ptr additional_properties_schema;
|
|
46
57
|
std::optional<uint64_t> min_properties;
|
|
47
58
|
std::optional<uint64_t> max_properties;
|
|
59
|
+
schema_node_ptr property_names_schema;
|
|
60
|
+
std::unordered_map<std::string, std::vector<std::string>> dependent_required;
|
|
61
|
+
std::unordered_map<std::string, schema_node_ptr> dependent_schemas;
|
|
48
62
|
|
|
49
|
-
|
|
63
|
+
struct pattern_prop {
|
|
64
|
+
std::string pattern;
|
|
65
|
+
schema_node_ptr schema;
|
|
66
|
+
std::shared_ptr<re2::RE2> compiled;
|
|
67
|
+
};
|
|
68
|
+
std::vector<pattern_prop> pattern_properties;
|
|
50
69
|
|
|
51
70
|
std::optional<std::string> enum_values_raw;
|
|
52
71
|
std::vector<std::string> enum_values_minified;
|
|
@@ -65,6 +84,8 @@ struct schema_node {
|
|
|
65
84
|
|
|
66
85
|
std::string ref;
|
|
67
86
|
|
|
87
|
+
std::unordered_map<std::string, schema_node_ptr> defs;
|
|
88
|
+
|
|
68
89
|
std::optional<bool> boolean_schema;
|
|
69
90
|
};
|
|
70
91
|
|
|
@@ -73,6 +94,89 @@ struct compiled_schema_internal {
|
|
|
73
94
|
std::unordered_map<std::string, schema_node_ptr> defs;
|
|
74
95
|
};
|
|
75
96
|
|
|
97
|
+
// --- Fast format validators (no regex) ---
|
|
98
|
+
|
|
99
|
+
static bool nb_is_digit(char c) { return c >= '0' && c <= '9'; }
|
|
100
|
+
static bool nb_is_alpha(char c) {
|
|
101
|
+
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
|
|
102
|
+
}
|
|
103
|
+
static bool nb_is_alnum(char c) { return nb_is_alpha(c) || nb_is_digit(c); }
|
|
104
|
+
static bool nb_is_hex(char c) {
|
|
105
|
+
return nb_is_digit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
static bool napi_check_format(const std::string& sv, const std::string& fmt) {
|
|
109
|
+
if (fmt == "email") {
|
|
110
|
+
auto at = sv.find('@');
|
|
111
|
+
if (at == std::string::npos || at == 0 || at == sv.size() - 1) return false;
|
|
112
|
+
auto dot = sv.find('.', at + 1);
|
|
113
|
+
return dot != std::string::npos && dot != at + 1 && dot != sv.size() - 1 &&
|
|
114
|
+
(sv.size() - dot - 1) >= 2;
|
|
115
|
+
}
|
|
116
|
+
if (fmt == "date") {
|
|
117
|
+
return sv.size() == 10 && nb_is_digit(sv[0]) && nb_is_digit(sv[1]) &&
|
|
118
|
+
nb_is_digit(sv[2]) && nb_is_digit(sv[3]) && sv[4] == '-' &&
|
|
119
|
+
nb_is_digit(sv[5]) && nb_is_digit(sv[6]) && sv[7] == '-' &&
|
|
120
|
+
nb_is_digit(sv[8]) && nb_is_digit(sv[9]);
|
|
121
|
+
}
|
|
122
|
+
if (fmt == "time") {
|
|
123
|
+
if (sv.size() < 8) return false;
|
|
124
|
+
return nb_is_digit(sv[0]) && nb_is_digit(sv[1]) && sv[2] == ':' &&
|
|
125
|
+
nb_is_digit(sv[3]) && nb_is_digit(sv[4]) && sv[5] == ':' &&
|
|
126
|
+
nb_is_digit(sv[6]) && nb_is_digit(sv[7]);
|
|
127
|
+
}
|
|
128
|
+
if (fmt == "date-time") {
|
|
129
|
+
if (sv.size() < 19) return false;
|
|
130
|
+
if (!napi_check_format(sv.substr(0, 10), "date")) return false;
|
|
131
|
+
if (sv[10] != 'T' && sv[10] != 't' && sv[10] != ' ') return false;
|
|
132
|
+
return napi_check_format(sv.substr(11), "time");
|
|
133
|
+
}
|
|
134
|
+
if (fmt == "ipv4") {
|
|
135
|
+
int parts = 0, val = 0, digits = 0;
|
|
136
|
+
for (size_t i = 0; i <= sv.size(); ++i) {
|
|
137
|
+
if (i == sv.size() || sv[i] == '.') {
|
|
138
|
+
if (digits == 0 || val > 255) return false;
|
|
139
|
+
++parts; val = 0; digits = 0;
|
|
140
|
+
} else if (nb_is_digit(sv[i])) {
|
|
141
|
+
val = val * 10 + (sv[i] - '0'); ++digits;
|
|
142
|
+
if (digits > 3) return false;
|
|
143
|
+
} else {
|
|
144
|
+
return false;
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
return parts == 4;
|
|
148
|
+
}
|
|
149
|
+
if (fmt == "ipv6") return sv.find(':') != std::string::npos;
|
|
150
|
+
if (fmt == "uri" || fmt == "uri-reference") {
|
|
151
|
+
if (sv.size() < 3 || !nb_is_alpha(sv[0])) return false;
|
|
152
|
+
size_t i = 1;
|
|
153
|
+
while (i < sv.size() && (nb_is_alnum(sv[i]) || sv[i] == '+' || sv[i] == '-' || sv[i] == '.')) ++i;
|
|
154
|
+
return i < sv.size() && sv[i] == ':' && i + 1 < sv.size();
|
|
155
|
+
}
|
|
156
|
+
if (fmt == "uuid") {
|
|
157
|
+
if (sv.size() != 36) return false;
|
|
158
|
+
for (size_t i = 0; i < 36; ++i) {
|
|
159
|
+
if (i == 8 || i == 13 || i == 18 || i == 23) {
|
|
160
|
+
if (sv[i] != '-') return false;
|
|
161
|
+
} else {
|
|
162
|
+
if (!nb_is_hex(sv[i])) return false;
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
return true;
|
|
166
|
+
}
|
|
167
|
+
if (fmt == "hostname") {
|
|
168
|
+
if (sv.empty() || sv.size() > 253) return false;
|
|
169
|
+
size_t label_len = 0;
|
|
170
|
+
for (size_t i = 0; i < sv.size(); ++i) {
|
|
171
|
+
if (sv[i] == '.') { if (label_len == 0) return false; label_len = 0; }
|
|
172
|
+
else if (nb_is_alnum(sv[i]) || sv[i] == '-') { ++label_len; if (label_len > 63) return false; }
|
|
173
|
+
else return false;
|
|
174
|
+
}
|
|
175
|
+
return label_len > 0;
|
|
176
|
+
}
|
|
177
|
+
return true;
|
|
178
|
+
}
|
|
179
|
+
|
|
76
180
|
// --- V8 Direct Validator ---
|
|
77
181
|
|
|
78
182
|
static std::string napi_type_of(Napi::Value val) {
|
|
@@ -115,17 +219,66 @@ static uint64_t utf8_codepoint_length(const std::string& s) {
|
|
|
115
219
|
}
|
|
116
220
|
|
|
117
221
|
// Serialize a Napi::Value to a minified JSON string (for enum/const comparison)
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
if (
|
|
123
|
-
|
|
222
|
+
// Canonical JSON: sort object keys for semantic equality comparison
|
|
223
|
+
static std::string napi_canonical_json(Napi::Env env, Napi::Value val) {
|
|
224
|
+
if (val.IsNull() || val.IsUndefined()) return "null";
|
|
225
|
+
if (val.IsBoolean()) return val.As<Napi::Boolean>().Value() ? "true" : "false";
|
|
226
|
+
if (val.IsNumber()) {
|
|
227
|
+
double d = val.As<Napi::Number>().DoubleValue();
|
|
228
|
+
if (d == static_cast<int64_t>(d) && std::abs(d) <= 9007199254740991.0) {
|
|
229
|
+
return std::to_string(static_cast<int64_t>(d));
|
|
230
|
+
}
|
|
231
|
+
auto json = env.Global().Get("JSON").As<Napi::Object>();
|
|
232
|
+
auto stringify = json.Get("stringify").As<Napi::Function>();
|
|
233
|
+
auto r = stringify.Call(json, {val});
|
|
234
|
+
return r.IsString() ? r.As<Napi::String>().Utf8Value() : "null";
|
|
235
|
+
}
|
|
236
|
+
if (val.IsString()) {
|
|
237
|
+
// JSON-encode the string
|
|
238
|
+
auto json = env.Global().Get("JSON").As<Napi::Object>();
|
|
239
|
+
auto stringify = json.Get("stringify").As<Napi::Function>();
|
|
240
|
+
auto r = stringify.Call(json, {val});
|
|
241
|
+
return r.IsString() ? r.As<Napi::String>().Utf8Value() : "null";
|
|
242
|
+
}
|
|
243
|
+
if (val.IsArray()) {
|
|
244
|
+
auto arr = val.As<Napi::Array>();
|
|
245
|
+
std::string r = "[";
|
|
246
|
+
for (uint32_t i = 0; i < arr.Length(); ++i) {
|
|
247
|
+
if (i) r += ',';
|
|
248
|
+
r += napi_canonical_json(env, arr.Get(i));
|
|
249
|
+
}
|
|
250
|
+
r += ']';
|
|
251
|
+
return r;
|
|
252
|
+
}
|
|
253
|
+
if (val.IsObject()) {
|
|
254
|
+
auto obj = val.As<Napi::Object>();
|
|
255
|
+
auto keys = obj.GetPropertyNames();
|
|
256
|
+
std::vector<std::string> sorted_keys;
|
|
257
|
+
for (uint32_t i = 0; i < keys.Length(); ++i) {
|
|
258
|
+
sorted_keys.push_back(keys.Get(i).As<Napi::String>().Utf8Value());
|
|
259
|
+
}
|
|
260
|
+
std::sort(sorted_keys.begin(), sorted_keys.end());
|
|
261
|
+
std::string r = "{";
|
|
262
|
+
for (size_t i = 0; i < sorted_keys.size(); ++i) {
|
|
263
|
+
if (i) r += ',';
|
|
264
|
+
// JSON-encode the key
|
|
265
|
+
auto json = env.Global().Get("JSON").As<Napi::Object>();
|
|
266
|
+
auto stringify = json.Get("stringify").As<Napi::Function>();
|
|
267
|
+
auto k = stringify.Call(json, {Napi::String::New(env, sorted_keys[i])});
|
|
268
|
+
r += k.As<Napi::String>().Utf8Value();
|
|
269
|
+
r += ':';
|
|
270
|
+
r += napi_canonical_json(env, obj.Get(sorted_keys[i]));
|
|
271
|
+
}
|
|
272
|
+
r += '}';
|
|
273
|
+
return r;
|
|
124
274
|
}
|
|
125
|
-
if (val.IsUndefined()) return "null";
|
|
126
275
|
return "null";
|
|
127
276
|
}
|
|
128
277
|
|
|
278
|
+
static std::string napi_to_json(Napi::Env env, Napi::Value val) {
|
|
279
|
+
return napi_canonical_json(env, val);
|
|
280
|
+
}
|
|
281
|
+
|
|
129
282
|
static void validate_napi(const schema_node_ptr& node,
|
|
130
283
|
Napi::Value value,
|
|
131
284
|
Napi::Env env,
|
|
@@ -150,63 +303,80 @@ static void validate_napi(const schema_node_ptr& node,
|
|
|
150
303
|
return;
|
|
151
304
|
}
|
|
152
305
|
|
|
153
|
-
// $ref
|
|
306
|
+
// $ref — Draft 2020-12: $ref is not a short-circuit, sibling keywords still apply
|
|
307
|
+
bool ref_resolved = false;
|
|
154
308
|
if (!node->ref.empty()) {
|
|
155
|
-
// First check defs map
|
|
156
309
|
auto it = ctx.defs.find(node->ref);
|
|
157
310
|
if (it != ctx.defs.end()) {
|
|
158
311
|
validate_napi(it->second, value, env, path, ctx, errors);
|
|
159
|
-
|
|
312
|
+
ref_resolved = true;
|
|
160
313
|
}
|
|
161
|
-
|
|
162
|
-
if (node->ref.size() > 1 && node->ref[0] == '#' &&
|
|
314
|
+
if (!ref_resolved && node->ref.size() > 1 && node->ref[0] == '#' &&
|
|
163
315
|
node->ref[1] == '/') {
|
|
316
|
+
// Decode JSON Pointer segments
|
|
317
|
+
auto decode_seg = [](const std::string& seg) -> std::string {
|
|
318
|
+
std::string pct;
|
|
319
|
+
for (size_t i = 0; i < seg.size(); ++i) {
|
|
320
|
+
if (seg[i] == '%' && i + 2 < seg.size()) {
|
|
321
|
+
auto hex = [](char c) -> int {
|
|
322
|
+
if (c >= '0' && c <= '9') return c - '0';
|
|
323
|
+
if (c >= 'a' && c <= 'f') return 10 + c - 'a';
|
|
324
|
+
if (c >= 'A' && c <= 'F') return 10 + c - 'A';
|
|
325
|
+
return -1;
|
|
326
|
+
};
|
|
327
|
+
int hv = hex(seg[i+1]), lv = hex(seg[i+2]);
|
|
328
|
+
if (hv >= 0 && lv >= 0) { pct += static_cast<char>(hv * 16 + lv); i += 2; }
|
|
329
|
+
else pct += seg[i];
|
|
330
|
+
} else pct += seg[i];
|
|
331
|
+
}
|
|
332
|
+
std::string out;
|
|
333
|
+
for (size_t i = 0; i < pct.size(); ++i) {
|
|
334
|
+
if (pct[i] == '~' && i + 1 < pct.size()) {
|
|
335
|
+
if (pct[i+1] == '1') { out += '/'; ++i; }
|
|
336
|
+
else if (pct[i+1] == '0') { out += '~'; ++i; }
|
|
337
|
+
else out += pct[i];
|
|
338
|
+
} else out += pct[i];
|
|
339
|
+
}
|
|
340
|
+
return out;
|
|
341
|
+
};
|
|
164
342
|
std::string pointer = node->ref.substr(2);
|
|
343
|
+
std::vector<std::string> segments;
|
|
344
|
+
size_t spos = 0;
|
|
345
|
+
while (spos < pointer.size()) {
|
|
346
|
+
size_t snext = pointer.find('/', spos);
|
|
347
|
+
segments.push_back(decode_seg(
|
|
348
|
+
pointer.substr(spos, snext == std::string::npos ? snext : snext - spos)));
|
|
349
|
+
spos = (snext == std::string::npos) ? pointer.size() : snext + 1;
|
|
350
|
+
}
|
|
165
351
|
schema_node_ptr current = ctx.root;
|
|
166
352
|
bool resolved = true;
|
|
167
|
-
size_t
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
pointer.substr(pos, next == std::string::npos ? next : next - pos);
|
|
172
|
-
std::string key;
|
|
173
|
-
for (size_t i = 0; i < segment.size(); ++i) {
|
|
174
|
-
if (segment[i] == '~' && i + 1 < segment.size()) {
|
|
175
|
-
if (segment[i + 1] == '1') { key += '/'; ++i; }
|
|
176
|
-
else if (segment[i + 1] == '0') { key += '~'; ++i; }
|
|
177
|
-
else key += segment[i];
|
|
178
|
-
} else {
|
|
179
|
-
key += segment[i];
|
|
180
|
-
}
|
|
181
|
-
}
|
|
182
|
-
if (key == "properties" && !current->properties.empty()) {
|
|
183
|
-
pos = (next == std::string::npos) ? pointer.size() : next + 1;
|
|
184
|
-
next = pointer.find('/', pos);
|
|
185
|
-
std::string prop = pointer.substr(
|
|
186
|
-
pos, next == std::string::npos ? next : next - pos);
|
|
187
|
-
auto pit = current->properties.find(prop);
|
|
353
|
+
for (size_t si = 0; si < segments.size() && current; ++si) {
|
|
354
|
+
const auto& key = segments[si];
|
|
355
|
+
if (key == "properties" && si + 1 < segments.size()) {
|
|
356
|
+
auto pit = current->properties.find(segments[++si]);
|
|
188
357
|
if (pit != current->properties.end()) current = pit->second;
|
|
189
358
|
else { resolved = false; break; }
|
|
190
359
|
} else if (key == "items" && current->items_schema) {
|
|
191
360
|
current = current->items_schema;
|
|
192
361
|
} else if (key == "$defs" || key == "definitions") {
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
362
|
+
if (si + 1 < segments.size()) {
|
|
363
|
+
const auto& def_name = segments[++si];
|
|
364
|
+
auto dit = current->defs.find(def_name);
|
|
365
|
+
if (dit != current->defs.end()) current = dit->second;
|
|
366
|
+
else {
|
|
367
|
+
auto cit = ctx.defs.find("#/" + key + "/" + def_name);
|
|
368
|
+
if (cit != ctx.defs.end()) current = cit->second;
|
|
369
|
+
else { resolved = false; break; }
|
|
370
|
+
}
|
|
371
|
+
} else { resolved = false; break; }
|
|
200
372
|
} else if (key == "allOf" || key == "anyOf" || key == "oneOf") {
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
if (idx < vec.size()) current = vec[idx];
|
|
209
|
-
else { resolved = false; break; }
|
|
373
|
+
if (si + 1 < segments.size()) {
|
|
374
|
+
size_t idx = std::stoul(segments[++si]);
|
|
375
|
+
auto& vec = (key == "allOf") ? current->all_of
|
|
376
|
+
: (key == "anyOf") ? current->any_of : current->one_of;
|
|
377
|
+
if (idx < vec.size()) current = vec[idx];
|
|
378
|
+
else { resolved = false; break; }
|
|
379
|
+
} else { resolved = false; break; }
|
|
210
380
|
} else if (key == "not" && current->not_schema) {
|
|
211
381
|
current = current->not_schema;
|
|
212
382
|
} else if (key == "if" && current->if_schema) {
|
|
@@ -219,28 +389,26 @@ static void validate_napi(const schema_node_ptr& node,
|
|
|
219
389
|
current->additional_properties_schema) {
|
|
220
390
|
current = current->additional_properties_schema;
|
|
221
391
|
} else if (key == "prefixItems") {
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
if (idx < current->prefix_items.size()) current = current->prefix_items[idx];
|
|
228
|
-
else { resolved = false; break; }
|
|
392
|
+
if (si + 1 < segments.size()) {
|
|
393
|
+
size_t idx = std::stoul(segments[++si]);
|
|
394
|
+
if (idx < current->prefix_items.size()) current = current->prefix_items[idx];
|
|
395
|
+
else { resolved = false; break; }
|
|
396
|
+
} else { resolved = false; break; }
|
|
229
397
|
} else { resolved = false; break; }
|
|
230
|
-
pos = (next == std::string::npos) ? pointer.size() : next + 1;
|
|
231
398
|
}
|
|
232
399
|
if (resolved && current) {
|
|
233
400
|
validate_napi(current, value, env, path, ctx, errors);
|
|
234
|
-
|
|
401
|
+
ref_resolved = true;
|
|
235
402
|
}
|
|
236
403
|
}
|
|
237
|
-
if (node->ref == "#" && ctx.root) {
|
|
404
|
+
if (!ref_resolved && node->ref == "#" && ctx.root) {
|
|
238
405
|
validate_napi(ctx.root, value, env, path, ctx, errors);
|
|
239
|
-
|
|
406
|
+
ref_resolved = true;
|
|
407
|
+
}
|
|
408
|
+
if (!ref_resolved) {
|
|
409
|
+
errors.push_back({ata::error_code::ref_not_found, path,
|
|
410
|
+
"cannot resolve $ref: " + node->ref});
|
|
240
411
|
}
|
|
241
|
-
errors.push_back({ata::error_code::ref_not_found, path,
|
|
242
|
-
"cannot resolve $ref: " + node->ref});
|
|
243
|
-
return;
|
|
244
412
|
}
|
|
245
413
|
|
|
246
414
|
auto actual_type = napi_type_of(value);
|
|
@@ -356,7 +524,7 @@ static void validate_napi(const schema_node_ptr& node,
|
|
|
356
524
|
std::to_string(node->max_length.value())});
|
|
357
525
|
}
|
|
358
526
|
if (node->compiled_pattern) {
|
|
359
|
-
if (!
|
|
527
|
+
if (!re2::RE2::PartialMatch(sv, *node->compiled_pattern)) {
|
|
360
528
|
errors.push_back({ata::error_code::pattern_mismatch, path,
|
|
361
529
|
"string does not match pattern: " +
|
|
362
530
|
node->pattern.value()});
|
|
@@ -364,40 +532,7 @@ static void validate_napi(const schema_node_ptr& node,
|
|
|
364
532
|
}
|
|
365
533
|
if (node->format.has_value()) {
|
|
366
534
|
const auto& fmt = node->format.value();
|
|
367
|
-
bool format_ok =
|
|
368
|
-
if (fmt == "email") {
|
|
369
|
-
static const std::regex email_re(
|
|
370
|
-
R"([a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,})");
|
|
371
|
-
format_ok = std::regex_match(sv, email_re);
|
|
372
|
-
} else if (fmt == "uri" || fmt == "uri-reference") {
|
|
373
|
-
static const std::regex uri_re(R"([a-zA-Z][a-zA-Z0-9+\-.]*:.+)");
|
|
374
|
-
format_ok = std::regex_match(sv, uri_re);
|
|
375
|
-
} else if (fmt == "date") {
|
|
376
|
-
static const std::regex date_re(R"(\d{4}-\d{2}-\d{2})");
|
|
377
|
-
format_ok = std::regex_match(sv, date_re);
|
|
378
|
-
} else if (fmt == "date-time") {
|
|
379
|
-
static const std::regex dt_re(
|
|
380
|
-
R"(\d{4}-\d{2}-\d{2}[T ]\d{2}:\d{2}:\d{2}(\.\d+)?(Z|[+\-]\d{2}:\d{2})?)");
|
|
381
|
-
format_ok = std::regex_match(sv, dt_re);
|
|
382
|
-
} else if (fmt == "time") {
|
|
383
|
-
static const std::regex time_re(
|
|
384
|
-
R"(\d{2}:\d{2}:\d{2}(\.\d+)?(Z|[+\-]\d{2}:\d{2})?)");
|
|
385
|
-
format_ok = std::regex_match(sv, time_re);
|
|
386
|
-
} else if (fmt == "ipv4") {
|
|
387
|
-
static const std::regex ipv4_re(
|
|
388
|
-
R"((\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3}))");
|
|
389
|
-
format_ok = std::regex_match(sv, ipv4_re);
|
|
390
|
-
} else if (fmt == "ipv6") {
|
|
391
|
-
format_ok = sv.find(':') != std::string::npos;
|
|
392
|
-
} else if (fmt == "uuid") {
|
|
393
|
-
static const std::regex uuid_re(
|
|
394
|
-
R"([0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12})");
|
|
395
|
-
format_ok = std::regex_match(sv, uuid_re);
|
|
396
|
-
} else if (fmt == "hostname") {
|
|
397
|
-
static const std::regex host_re(
|
|
398
|
-
R"([a-zA-Z0-9]([a-zA-Z0-9\-]*[a-zA-Z0-9])?(\.[a-zA-Z0-9]([a-zA-Z0-9\-]*[a-zA-Z0-9])?)*)");
|
|
399
|
-
format_ok = std::regex_match(sv, host_re);
|
|
400
|
-
}
|
|
535
|
+
bool format_ok = napi_check_format(sv, fmt);
|
|
401
536
|
if (!format_ok) {
|
|
402
537
|
errors.push_back({ata::error_code::format_mismatch, path,
|
|
403
538
|
"string does not match format: " + fmt});
|
|
@@ -449,6 +584,28 @@ static void validate_napi(const schema_node_ptr& node,
|
|
|
449
584
|
path + "/" + std::to_string(i), ctx, errors);
|
|
450
585
|
}
|
|
451
586
|
}
|
|
587
|
+
|
|
588
|
+
// contains / minContains / maxContains
|
|
589
|
+
if (node->contains_schema) {
|
|
590
|
+
uint64_t match_count = 0;
|
|
591
|
+
for (uint32_t i = 0; i < arr_size; ++i) {
|
|
592
|
+
std::vector<ata::validation_error> tmp;
|
|
593
|
+
validate_napi(node->contains_schema, arr.Get(i), env, path, ctx, tmp);
|
|
594
|
+
if (tmp.empty()) ++match_count;
|
|
595
|
+
}
|
|
596
|
+
uint64_t min_c = node->min_contains.value_or(1);
|
|
597
|
+
uint64_t max_c = node->max_contains.value_or(arr_size);
|
|
598
|
+
if (match_count < min_c) {
|
|
599
|
+
errors.push_back({ata::error_code::min_items_violation, path,
|
|
600
|
+
"contains: " + std::to_string(match_count) +
|
|
601
|
+
" matches, minimum " + std::to_string(min_c)});
|
|
602
|
+
}
|
|
603
|
+
if (match_count > max_c) {
|
|
604
|
+
errors.push_back({ata::error_code::max_items_violation, path,
|
|
605
|
+
"contains: " + std::to_string(match_count) +
|
|
606
|
+
" matches, maximum " + std::to_string(max_c)});
|
|
607
|
+
}
|
|
608
|
+
}
|
|
452
609
|
}
|
|
453
610
|
|
|
454
611
|
// Object validations
|
|
@@ -493,15 +650,11 @@ static void validate_napi(const schema_node_ptr& node,
|
|
|
493
650
|
matched = true;
|
|
494
651
|
}
|
|
495
652
|
|
|
496
|
-
for (const auto&
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
errors);
|
|
502
|
-
matched = true;
|
|
503
|
-
}
|
|
504
|
-
} catch (...) {
|
|
653
|
+
for (const auto& pp : node->pattern_properties) {
|
|
654
|
+
if (pp.compiled && re2::RE2::PartialMatch(key_str, *pp.compiled)) {
|
|
655
|
+
validate_napi(pp.schema, val, env, path + "/" + key_str, ctx,
|
|
656
|
+
errors);
|
|
657
|
+
matched = true;
|
|
505
658
|
}
|
|
506
659
|
}
|
|
507
660
|
|
|
@@ -517,6 +670,35 @@ static void validate_napi(const schema_node_ptr& node,
|
|
|
517
670
|
}
|
|
518
671
|
}
|
|
519
672
|
}
|
|
673
|
+
|
|
674
|
+
// propertyNames
|
|
675
|
+
if (node->property_names_schema) {
|
|
676
|
+
for (uint32_t i = 0; i < prop_count; ++i) {
|
|
677
|
+
Napi::Value key_val = keys.Get(i);
|
|
678
|
+
validate_napi(node->property_names_schema, key_val, env, path, ctx,
|
|
679
|
+
errors);
|
|
680
|
+
}
|
|
681
|
+
}
|
|
682
|
+
|
|
683
|
+
// dependentRequired
|
|
684
|
+
for (const auto& [prop, deps] : node->dependent_required) {
|
|
685
|
+
if (obj.HasOwnProperty(prop)) {
|
|
686
|
+
for (const auto& dep : deps) {
|
|
687
|
+
if (!obj.HasOwnProperty(dep)) {
|
|
688
|
+
errors.push_back({ata::error_code::required_property_missing, path,
|
|
689
|
+
"property '" + prop + "' requires '" + dep +
|
|
690
|
+
"' to be present"});
|
|
691
|
+
}
|
|
692
|
+
}
|
|
693
|
+
}
|
|
694
|
+
}
|
|
695
|
+
|
|
696
|
+
// dependentSchemas
|
|
697
|
+
for (const auto& [prop, schema] : node->dependent_schemas) {
|
|
698
|
+
if (obj.HasOwnProperty(prop)) {
|
|
699
|
+
validate_napi(schema, value, env, path, ctx, errors);
|
|
700
|
+
}
|
|
701
|
+
}
|
|
520
702
|
}
|
|
521
703
|
|
|
522
704
|
// allOf
|
|
@@ -618,7 +800,8 @@ class CompiledSchema : public Napi::ObjectWrap<CompiledSchema> {
|
|
|
618
800
|
env, "CompiledSchema",
|
|
619
801
|
{InstanceMethod("validate", &CompiledSchema::Validate),
|
|
620
802
|
InstanceMethod("validateJSON", &CompiledSchema::ValidateJSON),
|
|
621
|
-
InstanceMethod("validateDirect", &CompiledSchema::ValidateDirect)
|
|
803
|
+
InstanceMethod("validateDirect", &CompiledSchema::ValidateDirect),
|
|
804
|
+
InstanceMethod("isValidJSON", &CompiledSchema::IsValidJSON)});
|
|
622
805
|
auto* constructor = new Napi::FunctionReference();
|
|
623
806
|
*constructor = Napi::Persistent(func);
|
|
624
807
|
env.SetInstanceData(constructor);
|
|
@@ -658,19 +841,77 @@ class CompiledSchema : public Napi::ObjectWrap<CompiledSchema> {
|
|
|
658
841
|
return ValidateDirectImpl(env, info[0]);
|
|
659
842
|
}
|
|
660
843
|
|
|
844
|
+
// Thread-local reusable buffer for string extraction — avoids per-call allocation.
|
|
845
|
+
// Sized with SIMDJSON_PADDING so simdjson can read safely beyond the JSON.
|
|
846
|
+
static constexpr size_t TL_BUF_SHRINK_THRESHOLD = 64 * 1024; // 64KB
|
|
847
|
+
|
|
848
|
+
static std::string& tl_json_buf() {
|
|
849
|
+
thread_local std::string buf;
|
|
850
|
+
return buf;
|
|
851
|
+
}
|
|
852
|
+
|
|
853
|
+
// Extract JS string into reusable thread-local buffer with simdjson padding.
|
|
854
|
+
// Returns {data, length} — data is valid until next call on same thread.
|
|
855
|
+
static std::pair<const char*, size_t> extract_string(napi_env env, napi_value val) {
|
|
856
|
+
size_t len = 0;
|
|
857
|
+
napi_get_value_string_utf8(env, val, nullptr, 0, &len);
|
|
858
|
+
auto& buf = tl_json_buf();
|
|
859
|
+
const size_t needed = len + 1 + ata::REQUIRED_PADDING;
|
|
860
|
+
if (buf.size() < needed) buf.resize(needed);
|
|
861
|
+
napi_get_value_string_utf8(env, val, buf.data(), len + 1, &len);
|
|
862
|
+
// Shrink back if a one-off large string bloated the buffer
|
|
863
|
+
if (buf.size() > TL_BUF_SHRINK_THRESHOLD && len < TL_BUF_SHRINK_THRESHOLD / 2) {
|
|
864
|
+
buf.resize(TL_BUF_SHRINK_THRESHOLD);
|
|
865
|
+
buf.shrink_to_fit();
|
|
866
|
+
}
|
|
867
|
+
return {buf.data(), len};
|
|
868
|
+
}
|
|
869
|
+
|
|
661
870
|
// Validate via JSON string (simdjson parse path)
|
|
662
871
|
Napi::Value ValidateJSON(const Napi::CallbackInfo& info) {
|
|
663
872
|
Napi::Env env = info.Env();
|
|
664
|
-
if (info.Length() < 1
|
|
873
|
+
if (info.Length() < 1) {
|
|
665
874
|
Napi::TypeError::New(env, "JSON string expected")
|
|
666
875
|
.ThrowAsJavaScriptException();
|
|
667
876
|
return env.Undefined();
|
|
668
877
|
}
|
|
669
|
-
|
|
670
|
-
|
|
878
|
+
// Support Buffer for zero-copy
|
|
879
|
+
if (info[0].IsBuffer()) {
|
|
880
|
+
auto buf = info[0].As<Napi::Buffer<char>>();
|
|
881
|
+
auto result = ata::validate(schema_, std::string_view(buf.Data(), buf.Length()));
|
|
882
|
+
return make_result(env, result);
|
|
883
|
+
}
|
|
884
|
+
if (!info[0].IsString()) {
|
|
885
|
+
Napi::TypeError::New(env, "JSON string or Buffer expected")
|
|
886
|
+
.ThrowAsJavaScriptException();
|
|
887
|
+
return env.Undefined();
|
|
888
|
+
}
|
|
889
|
+
auto [data, len] = extract_string(env, info[0]);
|
|
890
|
+
auto result = ata::validate(schema_, std::string_view(data, len));
|
|
671
891
|
return make_result(env, result);
|
|
672
892
|
}
|
|
673
893
|
|
|
894
|
+
// Fast boolean-only validation — no error object creation
|
|
895
|
+
Napi::Value IsValidJSON(const Napi::CallbackInfo& info) {
|
|
896
|
+
Napi::Env env = info.Env();
|
|
897
|
+
if (info.Length() < 1) {
|
|
898
|
+
return Napi::Boolean::New(env, false);
|
|
899
|
+
}
|
|
900
|
+
// Support both String and Buffer inputs
|
|
901
|
+
if (info[0].IsBuffer()) {
|
|
902
|
+
auto buf = info[0].As<Napi::Buffer<char>>();
|
|
903
|
+
auto result = ata::validate(schema_, std::string_view(buf.Data(), buf.Length()));
|
|
904
|
+
return Napi::Boolean::New(env, result.valid);
|
|
905
|
+
}
|
|
906
|
+
if (!info[0].IsString()) {
|
|
907
|
+
return Napi::Boolean::New(env, false);
|
|
908
|
+
}
|
|
909
|
+
auto [data, len] = extract_string(env, info[0]);
|
|
910
|
+
// Buffer already has REQUIRED_PADDING — use zero-copy prepadded path
|
|
911
|
+
bool valid = ata::is_valid_prepadded(schema_, data, len);
|
|
912
|
+
return Napi::Boolean::New(env, valid);
|
|
913
|
+
}
|
|
914
|
+
|
|
674
915
|
// Explicit direct validation (always V8 traversal, never stringify)
|
|
675
916
|
Napi::Value ValidateDirect(const Napi::CallbackInfo& info) {
|
|
676
917
|
Napi::Env env = info.Env();
|
|
@@ -734,10 +975,558 @@ Napi::Value GetVersion(const Napi::CallbackInfo& info) {
|
|
|
734
975
|
return Napi::String::New(info.Env(), std::string(ata::version()));
|
|
735
976
|
}
|
|
736
977
|
|
|
978
|
+
// --- Thread Pool ---
|
|
979
|
+
class ThreadPool {
|
|
980
|
+
public:
|
|
981
|
+
ThreadPool() {
|
|
982
|
+
unsigned n = std::thread::hardware_concurrency();
|
|
983
|
+
if (n == 0) n = 4;
|
|
984
|
+
for (unsigned i = 0; i < n; i++) {
|
|
985
|
+
workers_.emplace_back([this] {
|
|
986
|
+
// Each thread gets its own schema cache
|
|
987
|
+
std::unordered_map<uint32_t, ata::schema_ref> cache;
|
|
988
|
+
while (true) {
|
|
989
|
+
std::function<void(std::unordered_map<uint32_t, ata::schema_ref>&)> task;
|
|
990
|
+
{
|
|
991
|
+
std::unique_lock<std::mutex> lock(mtx_);
|
|
992
|
+
cv_.wait(lock, [this] { return stop_ || !tasks_.empty(); });
|
|
993
|
+
if (stop_ && tasks_.empty()) return;
|
|
994
|
+
task = std::move(tasks_.front());
|
|
995
|
+
tasks_.pop();
|
|
996
|
+
}
|
|
997
|
+
task(cache);
|
|
998
|
+
{
|
|
999
|
+
std::unique_lock<std::mutex> lock(done_mtx_);
|
|
1000
|
+
pending_--;
|
|
1001
|
+
if (pending_ == 0) done_cv_.notify_all();
|
|
1002
|
+
}
|
|
1003
|
+
}
|
|
1004
|
+
});
|
|
1005
|
+
}
|
|
1006
|
+
}
|
|
1007
|
+
|
|
1008
|
+
void submit(std::function<void(std::unordered_map<uint32_t, ata::schema_ref>&)> task) {
|
|
1009
|
+
{
|
|
1010
|
+
std::unique_lock<std::mutex> lock(mtx_);
|
|
1011
|
+
tasks_.push(std::move(task));
|
|
1012
|
+
}
|
|
1013
|
+
{
|
|
1014
|
+
std::unique_lock<std::mutex> lock(done_mtx_);
|
|
1015
|
+
pending_++;
|
|
1016
|
+
}
|
|
1017
|
+
cv_.notify_one();
|
|
1018
|
+
}
|
|
1019
|
+
|
|
1020
|
+
void wait() {
|
|
1021
|
+
std::unique_lock<std::mutex> lock(done_mtx_);
|
|
1022
|
+
done_cv_.wait(lock, [this] { return pending_ == 0; });
|
|
1023
|
+
}
|
|
1024
|
+
|
|
1025
|
+
unsigned size() const { return (unsigned)workers_.size(); }
|
|
1026
|
+
|
|
1027
|
+
~ThreadPool() {
|
|
1028
|
+
{ std::unique_lock<std::mutex> lock(mtx_); stop_ = true; }
|
|
1029
|
+
cv_.notify_all();
|
|
1030
|
+
for (auto& w : workers_) w.join();
|
|
1031
|
+
}
|
|
1032
|
+
|
|
1033
|
+
private:
|
|
1034
|
+
std::vector<std::thread> workers_;
|
|
1035
|
+
std::queue<std::function<void(std::unordered_map<uint32_t, ata::schema_ref>&)>> tasks_;
|
|
1036
|
+
std::mutex mtx_;
|
|
1037
|
+
std::condition_variable cv_;
|
|
1038
|
+
std::mutex done_mtx_;
|
|
1039
|
+
std::condition_variable done_cv_;
|
|
1040
|
+
std::atomic<int> pending_{0};
|
|
1041
|
+
bool stop_ = false;
|
|
1042
|
+
};
|
|
1043
|
+
|
|
1044
|
+
static ThreadPool& pool() {
|
|
1045
|
+
static ThreadPool p;
|
|
1046
|
+
return p;
|
|
1047
|
+
}
|
|
1048
|
+
|
|
1049
|
+
// --- Fast Validation Registry ---
|
|
1050
|
+
// Global schema slots for V8 Fast API (bypasses NAPI overhead)
|
|
1051
|
+
static constexpr size_t MAX_FAST_SLOTS = 256;
|
|
1052
|
+
static ata::schema_ref g_fast_schemas[MAX_FAST_SLOTS];
|
|
1053
|
+
static std::string g_fast_schema_jsons[MAX_FAST_SLOTS];
|
|
1054
|
+
static uint32_t g_fast_slot_count = 0;
|
|
1055
|
+
|
|
1056
|
+
// Register a compiled schema in a fast slot, returns slot ID
|
|
1057
|
+
Napi::Value FastRegister(const Napi::CallbackInfo& info) {
|
|
1058
|
+
Napi::Env env = info.Env();
|
|
1059
|
+
if (info.Length() < 1 || !info[0].IsString()) {
|
|
1060
|
+
Napi::TypeError::New(env, "Schema JSON string expected").ThrowAsJavaScriptException();
|
|
1061
|
+
return env.Undefined();
|
|
1062
|
+
}
|
|
1063
|
+
if (g_fast_slot_count >= MAX_FAST_SLOTS) {
|
|
1064
|
+
Napi::Error::New(env, "Max fast schema slots reached").ThrowAsJavaScriptException();
|
|
1065
|
+
return env.Undefined();
|
|
1066
|
+
}
|
|
1067
|
+
std::string schema_json = info[0].As<Napi::String>().Utf8Value();
|
|
1068
|
+
auto schema = ata::compile(schema_json);
|
|
1069
|
+
if (!schema) {
|
|
1070
|
+
Napi::Error::New(env, "Failed to compile schema").ThrowAsJavaScriptException();
|
|
1071
|
+
return env.Undefined();
|
|
1072
|
+
}
|
|
1073
|
+
uint32_t slot = g_fast_slot_count++;
|
|
1074
|
+
g_fast_schemas[slot] = std::move(schema);
|
|
1075
|
+
g_fast_schema_jsons[slot] = schema_json;
|
|
1076
|
+
return Napi::Number::New(env, slot);
|
|
1077
|
+
}
|
|
1078
|
+
|
|
1079
|
+
// Fast validation: slot + Uint8Array → bool (called via V8 Fast API)
|
|
1080
|
+
static bool FastValidateImpl(uint32_t slot, const uint8_t* data, size_t length) {
|
|
1081
|
+
if (slot >= g_fast_slot_count) return false;
|
|
1082
|
+
auto result = ata::validate(g_fast_schemas[slot],
|
|
1083
|
+
std::string_view(reinterpret_cast<const char*>(data), length));
|
|
1084
|
+
return result.valid;
|
|
1085
|
+
}
|
|
1086
|
+
|
|
1087
|
+
// Zero-copy validation with pre-padded buffer
|
|
1088
|
+
static bool FastValidatePrepadded(uint32_t slot, const uint8_t* data, size_t length) {
|
|
1089
|
+
if (slot >= g_fast_slot_count) return false;
|
|
1090
|
+
return ata::is_valid_prepadded(g_fast_schemas[slot],
|
|
1091
|
+
reinterpret_cast<const char*>(data), length);
|
|
1092
|
+
}
|
|
1093
|
+
|
|
1094
|
+
// Slow path (NAPI) — called when V8 can't use fast path
|
|
1095
|
+
Napi::Value FastValidateSlow(const Napi::CallbackInfo& info) {
|
|
1096
|
+
Napi::Env env = info.Env();
|
|
1097
|
+
if (info.Length() < 2 || !info[0].IsNumber()) {
|
|
1098
|
+
return Napi::Boolean::New(env, false);
|
|
1099
|
+
}
|
|
1100
|
+
uint32_t slot = info[0].As<Napi::Number>().Uint32Value();
|
|
1101
|
+
if (info[1].IsTypedArray()) {
|
|
1102
|
+
auto arr = info[1].As<Napi::TypedArray>();
|
|
1103
|
+
if (arr.TypedArrayType() == napi_uint8_array) {
|
|
1104
|
+
auto u8 = info[1].As<Napi::Uint8Array>();
|
|
1105
|
+
bool ok = FastValidateImpl(slot, u8.Data(), u8.ByteLength());
|
|
1106
|
+
return Napi::Boolean::New(env, ok);
|
|
1107
|
+
}
|
|
1108
|
+
}
|
|
1109
|
+
if (info[1].IsBuffer()) {
|
|
1110
|
+
auto buf = info[1].As<Napi::Buffer<uint8_t>>();
|
|
1111
|
+
bool ok = FastValidateImpl(slot, buf.Data(), buf.Length());
|
|
1112
|
+
return Napi::Boolean::New(env, ok);
|
|
1113
|
+
}
|
|
1114
|
+
if (info[1].IsString()) {
|
|
1115
|
+
std::string json = info[1].As<Napi::String>().Utf8Value();
|
|
1116
|
+
bool ok = FastValidateImpl(slot, reinterpret_cast<const uint8_t*>(json.data()), json.size());
|
|
1117
|
+
return Napi::Boolean::New(env, ok);
|
|
1118
|
+
}
|
|
1119
|
+
return Napi::Boolean::New(env, false);
|
|
1120
|
+
}
|
|
1121
|
+
|
|
1122
|
+
// --- Raw NAPI fast path (minimal overhead) ---
|
|
1123
|
+
static napi_value RawFastValidate(napi_env env, napi_callback_info info) {
|
|
1124
|
+
size_t argc = 3;
|
|
1125
|
+
napi_value args[3];
|
|
1126
|
+
napi_get_cb_info(env, info, &argc, args, nullptr, nullptr);
|
|
1127
|
+
|
|
1128
|
+
if (argc < 2) {
|
|
1129
|
+
napi_value result;
|
|
1130
|
+
napi_get_boolean(env, false, &result);
|
|
1131
|
+
return result;
|
|
1132
|
+
}
|
|
1133
|
+
|
|
1134
|
+
uint32_t slot;
|
|
1135
|
+
napi_get_value_uint32(env, args[0], &slot);
|
|
1136
|
+
|
|
1137
|
+
// Check if pre-padded mode (3rd arg = json length, buffer has padding)
|
|
1138
|
+
bool prepadded = (argc >= 3);
|
|
1139
|
+
uint32_t json_length = 0;
|
|
1140
|
+
if (prepadded) {
|
|
1141
|
+
napi_get_value_uint32(env, args[2], &json_length);
|
|
1142
|
+
}
|
|
1143
|
+
|
|
1144
|
+
bool valid = false;
|
|
1145
|
+
|
|
1146
|
+
// Try typed array first (zero-copy)
|
|
1147
|
+
bool is_typedarray = false;
|
|
1148
|
+
napi_is_typedarray(env, args[1], &is_typedarray);
|
|
1149
|
+
|
|
1150
|
+
if (is_typedarray) {
|
|
1151
|
+
napi_typedarray_type type;
|
|
1152
|
+
size_t length;
|
|
1153
|
+
void* data;
|
|
1154
|
+
napi_get_typedarray_info(env, args[1], &type, &length, &data, nullptr, nullptr);
|
|
1155
|
+
if (data) {
|
|
1156
|
+
size_t actual_len = prepadded ? json_length : length;
|
|
1157
|
+
if (prepadded) {
|
|
1158
|
+
valid = FastValidatePrepadded(slot, static_cast<const uint8_t*>(data), actual_len);
|
|
1159
|
+
} else {
|
|
1160
|
+
valid = FastValidateImpl(slot, static_cast<const uint8_t*>(data), actual_len);
|
|
1161
|
+
}
|
|
1162
|
+
}
|
|
1163
|
+
} else {
|
|
1164
|
+
bool is_buffer = false;
|
|
1165
|
+
napi_is_buffer(env, args[1], &is_buffer);
|
|
1166
|
+
if (is_buffer) {
|
|
1167
|
+
void* data;
|
|
1168
|
+
size_t length;
|
|
1169
|
+
napi_get_buffer_info(env, args[1], &data, &length);
|
|
1170
|
+
if (data) {
|
|
1171
|
+
size_t actual_len = prepadded ? json_length : length;
|
|
1172
|
+
if (prepadded) {
|
|
1173
|
+
valid = FastValidatePrepadded(slot, static_cast<const uint8_t*>(data), actual_len);
|
|
1174
|
+
} else {
|
|
1175
|
+
valid = FastValidateImpl(slot, static_cast<const uint8_t*>(data), actual_len);
|
|
1176
|
+
}
|
|
1177
|
+
}
|
|
1178
|
+
} else {
|
|
1179
|
+
// String — must copy (can't pre-pad strings)
|
|
1180
|
+
size_t len;
|
|
1181
|
+
napi_get_value_string_utf8(env, args[1], nullptr, 0, &len);
|
|
1182
|
+
if (len <= 4096) {
|
|
1183
|
+
char buf[4097];
|
|
1184
|
+
napi_get_value_string_utf8(env, args[1], buf, len + 1, &len);
|
|
1185
|
+
valid = FastValidateImpl(slot, reinterpret_cast<const uint8_t*>(buf), len);
|
|
1186
|
+
} else {
|
|
1187
|
+
std::string buf(len, '\0');
|
|
1188
|
+
napi_get_value_string_utf8(env, args[1], buf.data(), len + 1, &len);
|
|
1189
|
+
valid = FastValidateImpl(slot, reinterpret_cast<const uint8_t*>(buf.data()), len);
|
|
1190
|
+
}
|
|
1191
|
+
}
|
|
1192
|
+
}
|
|
1193
|
+
|
|
1194
|
+
napi_value result;
|
|
1195
|
+
napi_get_boolean(env, valid, &result);
|
|
1196
|
+
return result;
|
|
1197
|
+
}
|
|
1198
|
+
|
|
1199
|
+
// --- Batch validation: one NAPI call, N validations ---
|
|
1200
|
+
static napi_value RawBatchValidate(napi_env env, napi_callback_info info) {
|
|
1201
|
+
size_t argc = 2;
|
|
1202
|
+
napi_value args[2];
|
|
1203
|
+
napi_get_cb_info(env, info, &argc, args, nullptr, nullptr);
|
|
1204
|
+
|
|
1205
|
+
uint32_t slot;
|
|
1206
|
+
napi_get_value_uint32(env, args[0], &slot);
|
|
1207
|
+
if (slot >= g_fast_slot_count) {
|
|
1208
|
+
napi_value r;
|
|
1209
|
+
napi_get_null(env, &r);
|
|
1210
|
+
return r;
|
|
1211
|
+
}
|
|
1212
|
+
|
|
1213
|
+
uint32_t arr_len;
|
|
1214
|
+
napi_get_array_length(env, args[1], &arr_len);
|
|
1215
|
+
|
|
1216
|
+
napi_value result_arr;
|
|
1217
|
+
napi_create_array_with_length(env, arr_len, &result_arr);
|
|
1218
|
+
|
|
1219
|
+
for (uint32_t i = 0; i < arr_len; i++) {
|
|
1220
|
+
napi_value item;
|
|
1221
|
+
napi_get_element(env, args[1], i, &item);
|
|
1222
|
+
|
|
1223
|
+
bool valid = false;
|
|
1224
|
+
bool is_buffer = false;
|
|
1225
|
+
napi_is_buffer(env, item, &is_buffer);
|
|
1226
|
+
|
|
1227
|
+
if (is_buffer) {
|
|
1228
|
+
void* data; size_t length;
|
|
1229
|
+
napi_get_buffer_info(env, item, &data, &length);
|
|
1230
|
+
if (data && length > 0)
|
|
1231
|
+
valid = ata::validate(g_fast_schemas[slot],
|
|
1232
|
+
std::string_view(static_cast<const char*>(data), length)).valid;
|
|
1233
|
+
} else {
|
|
1234
|
+
bool is_ta = false;
|
|
1235
|
+
napi_is_typedarray(env, item, &is_ta);
|
|
1236
|
+
if (is_ta) {
|
|
1237
|
+
napi_typedarray_type type; size_t length; void* data;
|
|
1238
|
+
napi_get_typedarray_info(env, item, &type, &length, &data, nullptr, nullptr);
|
|
1239
|
+
if (data && length > 0)
|
|
1240
|
+
valid = ata::validate(g_fast_schemas[slot],
|
|
1241
|
+
std::string_view(static_cast<const char*>(data), length)).valid;
|
|
1242
|
+
} else {
|
|
1243
|
+
size_t len;
|
|
1244
|
+
napi_get_value_string_utf8(env, item, nullptr, 0, &len);
|
|
1245
|
+
std::string buf(len, '\0');
|
|
1246
|
+
napi_get_value_string_utf8(env, item, buf.data(), len + 1, &len);
|
|
1247
|
+
valid = ata::validate(g_fast_schemas[slot], buf).valid;
|
|
1248
|
+
}
|
|
1249
|
+
}
|
|
1250
|
+
|
|
1251
|
+
napi_value bval;
|
|
1252
|
+
napi_get_boolean(env, valid, &bval);
|
|
1253
|
+
napi_set_element(env, result_arr, i, bval);
|
|
1254
|
+
}
|
|
1255
|
+
return result_arr;
|
|
1256
|
+
}
|
|
1257
|
+
|
|
1258
|
+
// --- Parallel NDJSON: multi-core validation, ajv can't do this ---
|
|
1259
|
+
static napi_value RawParallelValidate(napi_env env, napi_callback_info info) {
|
|
1260
|
+
size_t argc = 2;
|
|
1261
|
+
napi_value args[2];
|
|
1262
|
+
napi_get_cb_info(env, info, &argc, args, nullptr, nullptr);
|
|
1263
|
+
|
|
1264
|
+
uint32_t slot;
|
|
1265
|
+
napi_get_value_uint32(env, args[0], &slot);
|
|
1266
|
+
if (slot >= g_fast_slot_count) {
|
|
1267
|
+
napi_value r; napi_get_null(env, &r); return r;
|
|
1268
|
+
}
|
|
1269
|
+
|
|
1270
|
+
const char* data = nullptr;
|
|
1271
|
+
size_t total_len = 0;
|
|
1272
|
+
bool is_buffer = false;
|
|
1273
|
+
napi_is_buffer(env, args[1], &is_buffer);
|
|
1274
|
+
if (is_buffer) {
|
|
1275
|
+
void* d; napi_get_buffer_info(env, args[1], &d, &total_len);
|
|
1276
|
+
data = static_cast<const char*>(d);
|
|
1277
|
+
} else {
|
|
1278
|
+
bool is_ta = false;
|
|
1279
|
+
napi_is_typedarray(env, args[1], &is_ta);
|
|
1280
|
+
if (is_ta) {
|
|
1281
|
+
napi_typedarray_type type; void* d;
|
|
1282
|
+
napi_get_typedarray_info(env, args[1], &type, &total_len, &d, nullptr, nullptr);
|
|
1283
|
+
data = static_cast<const char*>(d);
|
|
1284
|
+
}
|
|
1285
|
+
}
|
|
1286
|
+
if (!data || total_len == 0) {
|
|
1287
|
+
napi_value r; napi_create_array_with_length(env, 0, &r); return r;
|
|
1288
|
+
}
|
|
1289
|
+
|
|
1290
|
+
// Split lines
|
|
1291
|
+
struct line { const char* ptr; size_t len; };
|
|
1292
|
+
std::vector<line> lines;
|
|
1293
|
+
const char* start = data;
|
|
1294
|
+
const char* end = data + total_len;
|
|
1295
|
+
while (start < end) {
|
|
1296
|
+
const char* nl = static_cast<const char*>(memchr(start, '\n', end - start));
|
|
1297
|
+
size_t line_len = nl ? (size_t)(nl - start) : (size_t)(end - start);
|
|
1298
|
+
if (line_len > 0) lines.push_back({start, line_len});
|
|
1299
|
+
start += line_len + 1;
|
|
1300
|
+
}
|
|
1301
|
+
|
|
1302
|
+
size_t n = lines.size();
|
|
1303
|
+
std::vector<bool> results(n, false);
|
|
1304
|
+
|
|
1305
|
+
// Parallel validation across CPU cores
|
|
1306
|
+
unsigned num_threads = std::thread::hardware_concurrency();
|
|
1307
|
+
if (num_threads == 0) num_threads = 4;
|
|
1308
|
+
if (num_threads > n) num_threads = (unsigned)n;
|
|
1309
|
+
|
|
1310
|
+
// Each thread gets its own schema_ref (thread-safe: compile is one-time, validate is read-only)
|
|
1311
|
+
// But ata::validate uses internal parser that's NOT thread-safe
|
|
1312
|
+
// So each thread needs its own compiled schema
|
|
1313
|
+
const auto& schema_json = g_fast_schema_jsons[slot];
|
|
1314
|
+
|
|
1315
|
+
if (schema_json.empty() || n < num_threads * 2) {
|
|
1316
|
+
// Fallback: single-threaded for small batches
|
|
1317
|
+
for (size_t i = 0; i < n; i++) {
|
|
1318
|
+
auto r = ata::validate(g_fast_schemas[slot], std::string_view(lines[i].ptr, lines[i].len));
|
|
1319
|
+
results[i] = r.valid;
|
|
1320
|
+
}
|
|
1321
|
+
} else {
|
|
1322
|
+
auto& tp = pool();
|
|
1323
|
+
unsigned nworkers = tp.size();
|
|
1324
|
+
size_t chunk = (n + nworkers - 1) / nworkers;
|
|
1325
|
+
|
|
1326
|
+
for (unsigned t = 0; t < nworkers; t++) {
|
|
1327
|
+
size_t from = t * chunk;
|
|
1328
|
+
size_t to = std::min(from + chunk, n);
|
|
1329
|
+
if (from >= n) break;
|
|
1330
|
+
|
|
1331
|
+
tp.submit([&results, &lines, from, to, slot](
|
|
1332
|
+
std::unordered_map<uint32_t, ata::schema_ref>& cache) {
|
|
1333
|
+
auto it = cache.find(slot);
|
|
1334
|
+
if (it == cache.end()) {
|
|
1335
|
+
it = cache.emplace(slot, ata::compile(g_fast_schema_jsons[slot])).first;
|
|
1336
|
+
}
|
|
1337
|
+
auto& s = it->second;
|
|
1338
|
+
// Free padding: lines in NDJSON buffer almost always have free padding
|
|
1339
|
+
// (next line's data serves as padding). Only last line might need copy.
|
|
1340
|
+
for (size_t i = from; i < to; i++) {
|
|
1341
|
+
results[i] = ata::is_valid_prepadded(s, lines[i].ptr, lines[i].len);
|
|
1342
|
+
}
|
|
1343
|
+
});
|
|
1344
|
+
}
|
|
1345
|
+
tp.wait();
|
|
1346
|
+
}
|
|
1347
|
+
|
|
1348
|
+
napi_value result_arr;
|
|
1349
|
+
napi_create_array_with_length(env, n, &result_arr);
|
|
1350
|
+
for (size_t i = 0; i < n; i++) {
|
|
1351
|
+
napi_value bval;
|
|
1352
|
+
napi_get_boolean(env, results[i], &bval);
|
|
1353
|
+
napi_set_element(env, result_arr, (uint32_t)i, bval);
|
|
1354
|
+
}
|
|
1355
|
+
return result_arr;
|
|
1356
|
+
}
|
|
1357
|
+
|
|
1358
|
+
// --- Parallel count: returns just the number of valid items (no array overhead) ---
|
|
1359
|
+
static napi_value RawParallelCount(napi_env env, napi_callback_info info) {
|
|
1360
|
+
size_t argc = 2;
|
|
1361
|
+
napi_value args[2];
|
|
1362
|
+
napi_get_cb_info(env, info, &argc, args, nullptr, nullptr);
|
|
1363
|
+
|
|
1364
|
+
uint32_t slot;
|
|
1365
|
+
napi_get_value_uint32(env, args[0], &slot);
|
|
1366
|
+
if (slot >= g_fast_slot_count) {
|
|
1367
|
+
napi_value r; napi_create_uint32(env, 0, &r); return r;
|
|
1368
|
+
}
|
|
1369
|
+
|
|
1370
|
+
const char* data = nullptr;
|
|
1371
|
+
size_t total_len = 0;
|
|
1372
|
+
bool is_buffer = false;
|
|
1373
|
+
napi_is_buffer(env, args[1], &is_buffer);
|
|
1374
|
+
if (is_buffer) {
|
|
1375
|
+
void* d; napi_get_buffer_info(env, args[1], &d, &total_len);
|
|
1376
|
+
data = static_cast<const char*>(d);
|
|
1377
|
+
} else {
|
|
1378
|
+
bool is_ta = false;
|
|
1379
|
+
napi_is_typedarray(env, args[1], &is_ta);
|
|
1380
|
+
if (is_ta) {
|
|
1381
|
+
napi_typedarray_type type; void* d;
|
|
1382
|
+
napi_get_typedarray_info(env, args[1], &type, &total_len, &d, nullptr, nullptr);
|
|
1383
|
+
data = static_cast<const char*>(d);
|
|
1384
|
+
}
|
|
1385
|
+
}
|
|
1386
|
+
if (!data || total_len == 0) {
|
|
1387
|
+
napi_value r; napi_create_uint32(env, 0, &r); return r;
|
|
1388
|
+
}
|
|
1389
|
+
|
|
1390
|
+
struct line { const char* ptr; size_t len; };
|
|
1391
|
+
std::vector<line> lines;
|
|
1392
|
+
const char* start = data;
|
|
1393
|
+
const char* end = data + total_len;
|
|
1394
|
+
while (start < end) {
|
|
1395
|
+
const char* nl = static_cast<const char*>(memchr(start, '\n', end - start));
|
|
1396
|
+
size_t line_len = nl ? (size_t)(nl - start) : (size_t)(end - start);
|
|
1397
|
+
if (line_len > 0) lines.push_back({start, line_len});
|
|
1398
|
+
start += line_len + 1;
|
|
1399
|
+
}
|
|
1400
|
+
|
|
1401
|
+
size_t n = lines.size();
|
|
1402
|
+
std::atomic<uint32_t> valid_count{0};
|
|
1403
|
+
|
|
1404
|
+
auto& tp = pool();
|
|
1405
|
+
unsigned nworkers = tp.size();
|
|
1406
|
+
size_t chunk = (n + nworkers - 1) / nworkers;
|
|
1407
|
+
|
|
1408
|
+
if (n < nworkers * 2) {
|
|
1409
|
+
// Small batch — single thread
|
|
1410
|
+
uint32_t cnt = 0;
|
|
1411
|
+
for (size_t i = 0; i < n; i++) {
|
|
1412
|
+
if (ata::validate(g_fast_schemas[slot], std::string_view(lines[i].ptr, lines[i].len)).valid)
|
|
1413
|
+
cnt++;
|
|
1414
|
+
}
|
|
1415
|
+
napi_value r; napi_create_uint32(env, cnt, &r); return r;
|
|
1416
|
+
}
|
|
1417
|
+
|
|
1418
|
+
for (unsigned t = 0; t < nworkers; t++) {
|
|
1419
|
+
size_t from = t * chunk;
|
|
1420
|
+
size_t to = std::min(from + chunk, n);
|
|
1421
|
+
if (from >= n) break;
|
|
1422
|
+
|
|
1423
|
+
tp.submit([&valid_count, &lines, from, to, slot](
|
|
1424
|
+
std::unordered_map<uint32_t, ata::schema_ref>& cache) {
|
|
1425
|
+
auto it = cache.find(slot);
|
|
1426
|
+
if (it == cache.end()) {
|
|
1427
|
+
it = cache.emplace(slot, ata::compile(g_fast_schema_jsons[slot])).first;
|
|
1428
|
+
}
|
|
1429
|
+
auto& s = it->second;
|
|
1430
|
+
uint32_t local_cnt = 0;
|
|
1431
|
+
for (size_t i = from; i < to; i++) {
|
|
1432
|
+
if (ata::is_valid_prepadded(s, lines[i].ptr, lines[i].len))
|
|
1433
|
+
local_cnt++;
|
|
1434
|
+
}
|
|
1435
|
+
valid_count.fetch_add(local_cnt, std::memory_order_relaxed);
|
|
1436
|
+
});
|
|
1437
|
+
}
|
|
1438
|
+
tp.wait();
|
|
1439
|
+
|
|
1440
|
+
napi_value r;
|
|
1441
|
+
napi_create_uint32(env, valid_count.load(), &r);
|
|
1442
|
+
return r;
|
|
1443
|
+
}
|
|
1444
|
+
|
|
1445
|
+
// --- NDJSON: single buffer, newline-delimited ---
|
|
1446
|
+
static napi_value RawNDJSONValidate(napi_env env, napi_callback_info info) {
|
|
1447
|
+
size_t argc = 2;
|
|
1448
|
+
napi_value args[2];
|
|
1449
|
+
napi_get_cb_info(env, info, &argc, args, nullptr, nullptr);
|
|
1450
|
+
|
|
1451
|
+
uint32_t slot;
|
|
1452
|
+
napi_get_value_uint32(env, args[0], &slot);
|
|
1453
|
+
if (slot >= g_fast_slot_count) {
|
|
1454
|
+
napi_value r; napi_get_null(env, &r); return r;
|
|
1455
|
+
}
|
|
1456
|
+
|
|
1457
|
+
const char* data = nullptr;
|
|
1458
|
+
size_t total_len = 0;
|
|
1459
|
+
bool is_buffer = false;
|
|
1460
|
+
napi_is_buffer(env, args[1], &is_buffer);
|
|
1461
|
+
if (is_buffer) {
|
|
1462
|
+
void* d; napi_get_buffer_info(env, args[1], &d, &total_len);
|
|
1463
|
+
data = static_cast<const char*>(d);
|
|
1464
|
+
} else {
|
|
1465
|
+
bool is_ta = false;
|
|
1466
|
+
napi_is_typedarray(env, args[1], &is_ta);
|
|
1467
|
+
if (is_ta) {
|
|
1468
|
+
napi_typedarray_type type; void* d;
|
|
1469
|
+
napi_get_typedarray_info(env, args[1], &type, &total_len, &d, nullptr, nullptr);
|
|
1470
|
+
data = static_cast<const char*>(d);
|
|
1471
|
+
}
|
|
1472
|
+
}
|
|
1473
|
+
if (!data || total_len == 0) {
|
|
1474
|
+
napi_value r; napi_create_array_with_length(env, 0, &r); return r;
|
|
1475
|
+
}
|
|
1476
|
+
|
|
1477
|
+
// Count lines first for array allocation
|
|
1478
|
+
uint32_t count = 0;
|
|
1479
|
+
for (size_t i = 0; i < total_len; i++) if (data[i] == '\n') count++;
|
|
1480
|
+
if (total_len > 0 && data[total_len-1] != '\n') count++;
|
|
1481
|
+
|
|
1482
|
+
napi_value result_arr;
|
|
1483
|
+
napi_create_array_with_length(env, count, &result_arr);
|
|
1484
|
+
|
|
1485
|
+
const char* start = data;
|
|
1486
|
+
const char* end = data + total_len;
|
|
1487
|
+
uint32_t idx = 0;
|
|
1488
|
+
|
|
1489
|
+
while (start < end) {
|
|
1490
|
+
const char* nl = static_cast<const char*>(memchr(start, '\n', end - start));
|
|
1491
|
+
size_t line_len = nl ? (size_t)(nl - start) : (size_t)(end - start);
|
|
1492
|
+
if (line_len > 0) {
|
|
1493
|
+
auto r = ata::validate(g_fast_schemas[slot], std::string_view(start, line_len));
|
|
1494
|
+
napi_value bval;
|
|
1495
|
+
napi_get_boolean(env, r.valid, &bval);
|
|
1496
|
+
napi_set_element(env, result_arr, idx++, bval);
|
|
1497
|
+
}
|
|
1498
|
+
start += line_len + 1;
|
|
1499
|
+
}
|
|
1500
|
+
return result_arr;
|
|
1501
|
+
}
|
|
1502
|
+
|
|
737
1503
|
Napi::Object Init(Napi::Env env, Napi::Object exports) {
|
|
738
1504
|
CompiledSchema::Init(env, exports);
|
|
739
1505
|
exports.Set("validate", Napi::Function::New(env, ValidateOneShot));
|
|
740
1506
|
exports.Set("version", Napi::Function::New(env, GetVersion));
|
|
1507
|
+
exports.Set("fastRegister", Napi::Function::New(env, FastRegister));
|
|
1508
|
+
exports.Set("fastValidate", Napi::Function::New(env, FastValidateSlow));
|
|
1509
|
+
|
|
1510
|
+
napi_value raw_fn;
|
|
1511
|
+
napi_create_function(env, "rawFastValidate", NAPI_AUTO_LENGTH, RawFastValidate, nullptr, &raw_fn);
|
|
1512
|
+
exports.Set("rawFastValidate", Napi::Value(env, raw_fn));
|
|
1513
|
+
|
|
1514
|
+
napi_value batch_fn;
|
|
1515
|
+
napi_create_function(env, "rawBatchValidate", NAPI_AUTO_LENGTH, RawBatchValidate, nullptr, &batch_fn);
|
|
1516
|
+
exports.Set("rawBatchValidate", Napi::Value(env, batch_fn));
|
|
1517
|
+
|
|
1518
|
+
napi_value ndjson_fn;
|
|
1519
|
+
napi_create_function(env, "rawNDJSONValidate", NAPI_AUTO_LENGTH, RawNDJSONValidate, nullptr, &ndjson_fn);
|
|
1520
|
+
exports.Set("rawNDJSONValidate", Napi::Value(env, ndjson_fn));
|
|
1521
|
+
|
|
1522
|
+
napi_value parallel_fn;
|
|
1523
|
+
napi_create_function(env, "rawParallelValidate", NAPI_AUTO_LENGTH, RawParallelValidate, nullptr, ¶llel_fn);
|
|
1524
|
+
exports.Set("rawParallelValidate", Napi::Value(env, parallel_fn));
|
|
1525
|
+
|
|
1526
|
+
napi_value pcount_fn;
|
|
1527
|
+
napi_create_function(env, "rawParallelCount", NAPI_AUTO_LENGTH, RawParallelCount, nullptr, &pcount_fn);
|
|
1528
|
+
exports.Set("rawParallelCount", Napi::Value(env, pcount_fn));
|
|
1529
|
+
|
|
741
1530
|
return exports;
|
|
742
1531
|
}
|
|
743
1532
|
|