ata-validator 0.1.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,15 @@
1
1
  #include <napi.h>
2
+ #include <node_api.h>
2
3
 
3
4
  #include <cmath>
4
- #include <regex>
5
+ #include <thread>
6
+ #include <future>
7
+ #include <mutex>
8
+ #include <condition_variable>
9
+ #include <functional>
10
+ #include <queue>
11
+ #include <atomic>
12
+ #include <re2/re2.h>
5
13
  #include <set>
6
14
  #include <string>
7
15
  #include <vector>
@@ -31,13 +39,16 @@ struct schema_node {
31
39
  std::optional<uint64_t> min_length;
32
40
  std::optional<uint64_t> max_length;
33
41
  std::optional<std::string> pattern;
34
- std::shared_ptr<std::regex> compiled_pattern;
42
+ std::shared_ptr<re2::RE2> compiled_pattern;
35
43
 
36
44
  std::optional<uint64_t> min_items;
37
45
  std::optional<uint64_t> max_items;
38
46
  bool unique_items = false;
39
47
  schema_node_ptr items_schema;
40
48
  std::vector<schema_node_ptr> prefix_items;
49
+ schema_node_ptr contains_schema;
50
+ std::optional<uint64_t> min_contains;
51
+ std::optional<uint64_t> max_contains;
41
52
 
42
53
  std::unordered_map<std::string, schema_node_ptr> properties;
43
54
  std::vector<std::string> required;
@@ -45,8 +56,16 @@ struct schema_node {
45
56
  schema_node_ptr additional_properties_schema;
46
57
  std::optional<uint64_t> min_properties;
47
58
  std::optional<uint64_t> max_properties;
59
+ schema_node_ptr property_names_schema;
60
+ std::unordered_map<std::string, std::vector<std::string>> dependent_required;
61
+ std::unordered_map<std::string, schema_node_ptr> dependent_schemas;
48
62
 
49
- std::vector<std::pair<std::string, schema_node_ptr>> pattern_properties;
63
+ struct pattern_prop {
64
+ std::string pattern;
65
+ schema_node_ptr schema;
66
+ std::shared_ptr<re2::RE2> compiled;
67
+ };
68
+ std::vector<pattern_prop> pattern_properties;
50
69
 
51
70
  std::optional<std::string> enum_values_raw;
52
71
  std::vector<std::string> enum_values_minified;
@@ -65,6 +84,8 @@ struct schema_node {
65
84
 
66
85
  std::string ref;
67
86
 
87
+ std::unordered_map<std::string, schema_node_ptr> defs;
88
+
68
89
  std::optional<bool> boolean_schema;
69
90
  };
70
91
 
@@ -73,6 +94,89 @@ struct compiled_schema_internal {
73
94
  std::unordered_map<std::string, schema_node_ptr> defs;
74
95
  };
75
96
 
97
+ // --- Fast format validators (no regex) ---
98
+
99
+ static bool nb_is_digit(char c) { return c >= '0' && c <= '9'; }
100
+ static bool nb_is_alpha(char c) {
101
+ return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
102
+ }
103
+ static bool nb_is_alnum(char c) { return nb_is_alpha(c) || nb_is_digit(c); }
104
+ static bool nb_is_hex(char c) {
105
+ return nb_is_digit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
106
+ }
107
+
108
+ static bool napi_check_format(const std::string& sv, const std::string& fmt) {
109
+ if (fmt == "email") {
110
+ auto at = sv.find('@');
111
+ if (at == std::string::npos || at == 0 || at == sv.size() - 1) return false;
112
+ auto dot = sv.find('.', at + 1);
113
+ return dot != std::string::npos && dot != at + 1 && dot != sv.size() - 1 &&
114
+ (sv.size() - dot - 1) >= 2;
115
+ }
116
+ if (fmt == "date") {
117
+ return sv.size() == 10 && nb_is_digit(sv[0]) && nb_is_digit(sv[1]) &&
118
+ nb_is_digit(sv[2]) && nb_is_digit(sv[3]) && sv[4] == '-' &&
119
+ nb_is_digit(sv[5]) && nb_is_digit(sv[6]) && sv[7] == '-' &&
120
+ nb_is_digit(sv[8]) && nb_is_digit(sv[9]);
121
+ }
122
+ if (fmt == "time") {
123
+ if (sv.size() < 8) return false;
124
+ return nb_is_digit(sv[0]) && nb_is_digit(sv[1]) && sv[2] == ':' &&
125
+ nb_is_digit(sv[3]) && nb_is_digit(sv[4]) && sv[5] == ':' &&
126
+ nb_is_digit(sv[6]) && nb_is_digit(sv[7]);
127
+ }
128
+ if (fmt == "date-time") {
129
+ if (sv.size() < 19) return false;
130
+ if (!napi_check_format(sv.substr(0, 10), "date")) return false;
131
+ if (sv[10] != 'T' && sv[10] != 't' && sv[10] != ' ') return false;
132
+ return napi_check_format(sv.substr(11), "time");
133
+ }
134
+ if (fmt == "ipv4") {
135
+ int parts = 0, val = 0, digits = 0;
136
+ for (size_t i = 0; i <= sv.size(); ++i) {
137
+ if (i == sv.size() || sv[i] == '.') {
138
+ if (digits == 0 || val > 255) return false;
139
+ ++parts; val = 0; digits = 0;
140
+ } else if (nb_is_digit(sv[i])) {
141
+ val = val * 10 + (sv[i] - '0'); ++digits;
142
+ if (digits > 3) return false;
143
+ } else {
144
+ return false;
145
+ }
146
+ }
147
+ return parts == 4;
148
+ }
149
+ if (fmt == "ipv6") return sv.find(':') != std::string::npos;
150
+ if (fmt == "uri" || fmt == "uri-reference") {
151
+ if (sv.size() < 3 || !nb_is_alpha(sv[0])) return false;
152
+ size_t i = 1;
153
+ while (i < sv.size() && (nb_is_alnum(sv[i]) || sv[i] == '+' || sv[i] == '-' || sv[i] == '.')) ++i;
154
+ return i < sv.size() && sv[i] == ':' && i + 1 < sv.size();
155
+ }
156
+ if (fmt == "uuid") {
157
+ if (sv.size() != 36) return false;
158
+ for (size_t i = 0; i < 36; ++i) {
159
+ if (i == 8 || i == 13 || i == 18 || i == 23) {
160
+ if (sv[i] != '-') return false;
161
+ } else {
162
+ if (!nb_is_hex(sv[i])) return false;
163
+ }
164
+ }
165
+ return true;
166
+ }
167
+ if (fmt == "hostname") {
168
+ if (sv.empty() || sv.size() > 253) return false;
169
+ size_t label_len = 0;
170
+ for (size_t i = 0; i < sv.size(); ++i) {
171
+ if (sv[i] == '.') { if (label_len == 0) return false; label_len = 0; }
172
+ else if (nb_is_alnum(sv[i]) || sv[i] == '-') { ++label_len; if (label_len > 63) return false; }
173
+ else return false;
174
+ }
175
+ return label_len > 0;
176
+ }
177
+ return true;
178
+ }
179
+
76
180
  // --- V8 Direct Validator ---
77
181
 
78
182
  static std::string napi_type_of(Napi::Value val) {
@@ -115,17 +219,66 @@ static uint64_t utf8_codepoint_length(const std::string& s) {
115
219
  }
116
220
 
117
221
  // Serialize a Napi::Value to a minified JSON string (for enum/const comparison)
118
- static std::string napi_to_json(Napi::Env env, Napi::Value val) {
119
- auto json = env.Global().Get("JSON").As<Napi::Object>();
120
- auto stringify = json.Get("stringify").As<Napi::Function>();
121
- auto result = stringify.Call(json, {val});
122
- if (result.IsString()) {
123
- return result.As<Napi::String>().Utf8Value();
222
+ // Canonical JSON: sort object keys for semantic equality comparison
223
+ static std::string napi_canonical_json(Napi::Env env, Napi::Value val) {
224
+ if (val.IsNull() || val.IsUndefined()) return "null";
225
+ if (val.IsBoolean()) return val.As<Napi::Boolean>().Value() ? "true" : "false";
226
+ if (val.IsNumber()) {
227
+ double d = val.As<Napi::Number>().DoubleValue();
228
+ if (d == static_cast<int64_t>(d) && std::abs(d) <= 9007199254740991.0) {
229
+ return std::to_string(static_cast<int64_t>(d));
230
+ }
231
+ auto json = env.Global().Get("JSON").As<Napi::Object>();
232
+ auto stringify = json.Get("stringify").As<Napi::Function>();
233
+ auto r = stringify.Call(json, {val});
234
+ return r.IsString() ? r.As<Napi::String>().Utf8Value() : "null";
235
+ }
236
+ if (val.IsString()) {
237
+ // JSON-encode the string
238
+ auto json = env.Global().Get("JSON").As<Napi::Object>();
239
+ auto stringify = json.Get("stringify").As<Napi::Function>();
240
+ auto r = stringify.Call(json, {val});
241
+ return r.IsString() ? r.As<Napi::String>().Utf8Value() : "null";
242
+ }
243
+ if (val.IsArray()) {
244
+ auto arr = val.As<Napi::Array>();
245
+ std::string r = "[";
246
+ for (uint32_t i = 0; i < arr.Length(); ++i) {
247
+ if (i) r += ',';
248
+ r += napi_canonical_json(env, arr.Get(i));
249
+ }
250
+ r += ']';
251
+ return r;
252
+ }
253
+ if (val.IsObject()) {
254
+ auto obj = val.As<Napi::Object>();
255
+ auto keys = obj.GetPropertyNames();
256
+ std::vector<std::string> sorted_keys;
257
+ for (uint32_t i = 0; i < keys.Length(); ++i) {
258
+ sorted_keys.push_back(keys.Get(i).As<Napi::String>().Utf8Value());
259
+ }
260
+ std::sort(sorted_keys.begin(), sorted_keys.end());
261
+ std::string r = "{";
262
+ for (size_t i = 0; i < sorted_keys.size(); ++i) {
263
+ if (i) r += ',';
264
+ // JSON-encode the key
265
+ auto json = env.Global().Get("JSON").As<Napi::Object>();
266
+ auto stringify = json.Get("stringify").As<Napi::Function>();
267
+ auto k = stringify.Call(json, {Napi::String::New(env, sorted_keys[i])});
268
+ r += k.As<Napi::String>().Utf8Value();
269
+ r += ':';
270
+ r += napi_canonical_json(env, obj.Get(sorted_keys[i]));
271
+ }
272
+ r += '}';
273
+ return r;
124
274
  }
125
- if (val.IsUndefined()) return "null";
126
275
  return "null";
127
276
  }
128
277
 
278
+ static std::string napi_to_json(Napi::Env env, Napi::Value val) {
279
+ return napi_canonical_json(env, val);
280
+ }
281
+
129
282
  static void validate_napi(const schema_node_ptr& node,
130
283
  Napi::Value value,
131
284
  Napi::Env env,
@@ -150,63 +303,80 @@ static void validate_napi(const schema_node_ptr& node,
150
303
  return;
151
304
  }
152
305
 
153
- // $ref
306
+ // $ref — Draft 2020-12: $ref is not a short-circuit, sibling keywords still apply
307
+ bool ref_resolved = false;
154
308
  if (!node->ref.empty()) {
155
- // First check defs map
156
309
  auto it = ctx.defs.find(node->ref);
157
310
  if (it != ctx.defs.end()) {
158
311
  validate_napi(it->second, value, env, path, ctx, errors);
159
- return;
312
+ ref_resolved = true;
160
313
  }
161
- // JSON Pointer resolution from root
162
- if (node->ref.size() > 1 && node->ref[0] == '#' &&
314
+ if (!ref_resolved && node->ref.size() > 1 && node->ref[0] == '#' &&
163
315
  node->ref[1] == '/') {
316
+ // Decode JSON Pointer segments
317
+ auto decode_seg = [](const std::string& seg) -> std::string {
318
+ std::string pct;
319
+ for (size_t i = 0; i < seg.size(); ++i) {
320
+ if (seg[i] == '%' && i + 2 < seg.size()) {
321
+ auto hex = [](char c) -> int {
322
+ if (c >= '0' && c <= '9') return c - '0';
323
+ if (c >= 'a' && c <= 'f') return 10 + c - 'a';
324
+ if (c >= 'A' && c <= 'F') return 10 + c - 'A';
325
+ return -1;
326
+ };
327
+ int hv = hex(seg[i+1]), lv = hex(seg[i+2]);
328
+ if (hv >= 0 && lv >= 0) { pct += static_cast<char>(hv * 16 + lv); i += 2; }
329
+ else pct += seg[i];
330
+ } else pct += seg[i];
331
+ }
332
+ std::string out;
333
+ for (size_t i = 0; i < pct.size(); ++i) {
334
+ if (pct[i] == '~' && i + 1 < pct.size()) {
335
+ if (pct[i+1] == '1') { out += '/'; ++i; }
336
+ else if (pct[i+1] == '0') { out += '~'; ++i; }
337
+ else out += pct[i];
338
+ } else out += pct[i];
339
+ }
340
+ return out;
341
+ };
164
342
  std::string pointer = node->ref.substr(2);
343
+ std::vector<std::string> segments;
344
+ size_t spos = 0;
345
+ while (spos < pointer.size()) {
346
+ size_t snext = pointer.find('/', spos);
347
+ segments.push_back(decode_seg(
348
+ pointer.substr(spos, snext == std::string::npos ? snext : snext - spos)));
349
+ spos = (snext == std::string::npos) ? pointer.size() : snext + 1;
350
+ }
165
351
  schema_node_ptr current = ctx.root;
166
352
  bool resolved = true;
167
- size_t pos = 0;
168
- while (pos < pointer.size() && current) {
169
- size_t next = pointer.find('/', pos);
170
- std::string segment =
171
- pointer.substr(pos, next == std::string::npos ? next : next - pos);
172
- std::string key;
173
- for (size_t i = 0; i < segment.size(); ++i) {
174
- if (segment[i] == '~' && i + 1 < segment.size()) {
175
- if (segment[i + 1] == '1') { key += '/'; ++i; }
176
- else if (segment[i + 1] == '0') { key += '~'; ++i; }
177
- else key += segment[i];
178
- } else {
179
- key += segment[i];
180
- }
181
- }
182
- if (key == "properties" && !current->properties.empty()) {
183
- pos = (next == std::string::npos) ? pointer.size() : next + 1;
184
- next = pointer.find('/', pos);
185
- std::string prop = pointer.substr(
186
- pos, next == std::string::npos ? next : next - pos);
187
- auto pit = current->properties.find(prop);
353
+ for (size_t si = 0; si < segments.size() && current; ++si) {
354
+ const auto& key = segments[si];
355
+ if (key == "properties" && si + 1 < segments.size()) {
356
+ auto pit = current->properties.find(segments[++si]);
188
357
  if (pit != current->properties.end()) current = pit->second;
189
358
  else { resolved = false; break; }
190
359
  } else if (key == "items" && current->items_schema) {
191
360
  current = current->items_schema;
192
361
  } else if (key == "$defs" || key == "definitions") {
193
- pos = (next == std::string::npos) ? pointer.size() : next + 1;
194
- next = pointer.find('/', pos);
195
- std::string def = pointer.substr(
196
- pos, next == std::string::npos ? next : next - pos);
197
- auto dit = ctx.defs.find("#/" + key + "/" + def);
198
- if (dit != ctx.defs.end()) current = dit->second;
199
- else { resolved = false; break; }
362
+ if (si + 1 < segments.size()) {
363
+ const auto& def_name = segments[++si];
364
+ auto dit = current->defs.find(def_name);
365
+ if (dit != current->defs.end()) current = dit->second;
366
+ else {
367
+ auto cit = ctx.defs.find("#/" + key + "/" + def_name);
368
+ if (cit != ctx.defs.end()) current = cit->second;
369
+ else { resolved = false; break; }
370
+ }
371
+ } else { resolved = false; break; }
200
372
  } else if (key == "allOf" || key == "anyOf" || key == "oneOf") {
201
- pos = (next == std::string::npos) ? pointer.size() : next + 1;
202
- next = pointer.find('/', pos);
203
- std::string idx_s = pointer.substr(
204
- pos, next == std::string::npos ? next : next - pos);
205
- size_t idx = std::stoul(idx_s);
206
- auto& vec = (key == "allOf") ? current->all_of
207
- : (key == "anyOf") ? current->any_of : current->one_of;
208
- if (idx < vec.size()) current = vec[idx];
209
- else { resolved = false; break; }
373
+ if (si + 1 < segments.size()) {
374
+ size_t idx = std::stoul(segments[++si]);
375
+ auto& vec = (key == "allOf") ? current->all_of
376
+ : (key == "anyOf") ? current->any_of : current->one_of;
377
+ if (idx < vec.size()) current = vec[idx];
378
+ else { resolved = false; break; }
379
+ } else { resolved = false; break; }
210
380
  } else if (key == "not" && current->not_schema) {
211
381
  current = current->not_schema;
212
382
  } else if (key == "if" && current->if_schema) {
@@ -219,28 +389,26 @@ static void validate_napi(const schema_node_ptr& node,
219
389
  current->additional_properties_schema) {
220
390
  current = current->additional_properties_schema;
221
391
  } else if (key == "prefixItems") {
222
- pos = (next == std::string::npos) ? pointer.size() : next + 1;
223
- next = pointer.find('/', pos);
224
- std::string idx_s = pointer.substr(
225
- pos, next == std::string::npos ? next : next - pos);
226
- size_t idx = std::stoul(idx_s);
227
- if (idx < current->prefix_items.size()) current = current->prefix_items[idx];
228
- else { resolved = false; break; }
392
+ if (si + 1 < segments.size()) {
393
+ size_t idx = std::stoul(segments[++si]);
394
+ if (idx < current->prefix_items.size()) current = current->prefix_items[idx];
395
+ else { resolved = false; break; }
396
+ } else { resolved = false; break; }
229
397
  } else { resolved = false; break; }
230
- pos = (next == std::string::npos) ? pointer.size() : next + 1;
231
398
  }
232
399
  if (resolved && current) {
233
400
  validate_napi(current, value, env, path, ctx, errors);
234
- return;
401
+ ref_resolved = true;
235
402
  }
236
403
  }
237
- if (node->ref == "#" && ctx.root) {
404
+ if (!ref_resolved && node->ref == "#" && ctx.root) {
238
405
  validate_napi(ctx.root, value, env, path, ctx, errors);
239
- return;
406
+ ref_resolved = true;
407
+ }
408
+ if (!ref_resolved) {
409
+ errors.push_back({ata::error_code::ref_not_found, path,
410
+ "cannot resolve $ref: " + node->ref});
240
411
  }
241
- errors.push_back({ata::error_code::ref_not_found, path,
242
- "cannot resolve $ref: " + node->ref});
243
- return;
244
412
  }
245
413
 
246
414
  auto actual_type = napi_type_of(value);
@@ -356,7 +524,7 @@ static void validate_napi(const schema_node_ptr& node,
356
524
  std::to_string(node->max_length.value())});
357
525
  }
358
526
  if (node->compiled_pattern) {
359
- if (!std::regex_search(sv, *node->compiled_pattern)) {
527
+ if (!re2::RE2::PartialMatch(sv, *node->compiled_pattern)) {
360
528
  errors.push_back({ata::error_code::pattern_mismatch, path,
361
529
  "string does not match pattern: " +
362
530
  node->pattern.value()});
@@ -364,40 +532,7 @@ static void validate_napi(const schema_node_ptr& node,
364
532
  }
365
533
  if (node->format.has_value()) {
366
534
  const auto& fmt = node->format.value();
367
- bool format_ok = true;
368
- if (fmt == "email") {
369
- static const std::regex email_re(
370
- R"([a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,})");
371
- format_ok = std::regex_match(sv, email_re);
372
- } else if (fmt == "uri" || fmt == "uri-reference") {
373
- static const std::regex uri_re(R"([a-zA-Z][a-zA-Z0-9+\-.]*:.+)");
374
- format_ok = std::regex_match(sv, uri_re);
375
- } else if (fmt == "date") {
376
- static const std::regex date_re(R"(\d{4}-\d{2}-\d{2})");
377
- format_ok = std::regex_match(sv, date_re);
378
- } else if (fmt == "date-time") {
379
- static const std::regex dt_re(
380
- R"(\d{4}-\d{2}-\d{2}[T ]\d{2}:\d{2}:\d{2}(\.\d+)?(Z|[+\-]\d{2}:\d{2})?)");
381
- format_ok = std::regex_match(sv, dt_re);
382
- } else if (fmt == "time") {
383
- static const std::regex time_re(
384
- R"(\d{2}:\d{2}:\d{2}(\.\d+)?(Z|[+\-]\d{2}:\d{2})?)");
385
- format_ok = std::regex_match(sv, time_re);
386
- } else if (fmt == "ipv4") {
387
- static const std::regex ipv4_re(
388
- R"((\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3}))");
389
- format_ok = std::regex_match(sv, ipv4_re);
390
- } else if (fmt == "ipv6") {
391
- format_ok = sv.find(':') != std::string::npos;
392
- } else if (fmt == "uuid") {
393
- static const std::regex uuid_re(
394
- R"([0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12})");
395
- format_ok = std::regex_match(sv, uuid_re);
396
- } else if (fmt == "hostname") {
397
- static const std::regex host_re(
398
- R"([a-zA-Z0-9]([a-zA-Z0-9\-]*[a-zA-Z0-9])?(\.[a-zA-Z0-9]([a-zA-Z0-9\-]*[a-zA-Z0-9])?)*)");
399
- format_ok = std::regex_match(sv, host_re);
400
- }
535
+ bool format_ok = napi_check_format(sv, fmt);
401
536
  if (!format_ok) {
402
537
  errors.push_back({ata::error_code::format_mismatch, path,
403
538
  "string does not match format: " + fmt});
@@ -449,6 +584,28 @@ static void validate_napi(const schema_node_ptr& node,
449
584
  path + "/" + std::to_string(i), ctx, errors);
450
585
  }
451
586
  }
587
+
588
+ // contains / minContains / maxContains
589
+ if (node->contains_schema) {
590
+ uint64_t match_count = 0;
591
+ for (uint32_t i = 0; i < arr_size; ++i) {
592
+ std::vector<ata::validation_error> tmp;
593
+ validate_napi(node->contains_schema, arr.Get(i), env, path, ctx, tmp);
594
+ if (tmp.empty()) ++match_count;
595
+ }
596
+ uint64_t min_c = node->min_contains.value_or(1);
597
+ uint64_t max_c = node->max_contains.value_or(arr_size);
598
+ if (match_count < min_c) {
599
+ errors.push_back({ata::error_code::min_items_violation, path,
600
+ "contains: " + std::to_string(match_count) +
601
+ " matches, minimum " + std::to_string(min_c)});
602
+ }
603
+ if (match_count > max_c) {
604
+ errors.push_back({ata::error_code::max_items_violation, path,
605
+ "contains: " + std::to_string(match_count) +
606
+ " matches, maximum " + std::to_string(max_c)});
607
+ }
608
+ }
452
609
  }
453
610
 
454
611
  // Object validations
@@ -493,15 +650,11 @@ static void validate_napi(const schema_node_ptr& node,
493
650
  matched = true;
494
651
  }
495
652
 
496
- for (const auto& [pat, pat_schema] : node->pattern_properties) {
497
- try {
498
- std::regex re(pat);
499
- if (std::regex_search(key_str, re)) {
500
- validate_napi(pat_schema, val, env, path + "/" + key_str, ctx,
501
- errors);
502
- matched = true;
503
- }
504
- } catch (...) {
653
+ for (const auto& pp : node->pattern_properties) {
654
+ if (pp.compiled && re2::RE2::PartialMatch(key_str, *pp.compiled)) {
655
+ validate_napi(pp.schema, val, env, path + "/" + key_str, ctx,
656
+ errors);
657
+ matched = true;
505
658
  }
506
659
  }
507
660
 
@@ -517,6 +670,35 @@ static void validate_napi(const schema_node_ptr& node,
517
670
  }
518
671
  }
519
672
  }
673
+
674
+ // propertyNames
675
+ if (node->property_names_schema) {
676
+ for (uint32_t i = 0; i < prop_count; ++i) {
677
+ Napi::Value key_val = keys.Get(i);
678
+ validate_napi(node->property_names_schema, key_val, env, path, ctx,
679
+ errors);
680
+ }
681
+ }
682
+
683
+ // dependentRequired
684
+ for (const auto& [prop, deps] : node->dependent_required) {
685
+ if (obj.HasOwnProperty(prop)) {
686
+ for (const auto& dep : deps) {
687
+ if (!obj.HasOwnProperty(dep)) {
688
+ errors.push_back({ata::error_code::required_property_missing, path,
689
+ "property '" + prop + "' requires '" + dep +
690
+ "' to be present"});
691
+ }
692
+ }
693
+ }
694
+ }
695
+
696
+ // dependentSchemas
697
+ for (const auto& [prop, schema] : node->dependent_schemas) {
698
+ if (obj.HasOwnProperty(prop)) {
699
+ validate_napi(schema, value, env, path, ctx, errors);
700
+ }
701
+ }
520
702
  }
521
703
 
522
704
  // allOf
@@ -618,7 +800,8 @@ class CompiledSchema : public Napi::ObjectWrap<CompiledSchema> {
618
800
  env, "CompiledSchema",
619
801
  {InstanceMethod("validate", &CompiledSchema::Validate),
620
802
  InstanceMethod("validateJSON", &CompiledSchema::ValidateJSON),
621
- InstanceMethod("validateDirect", &CompiledSchema::ValidateDirect)});
803
+ InstanceMethod("validateDirect", &CompiledSchema::ValidateDirect),
804
+ InstanceMethod("isValidJSON", &CompiledSchema::IsValidJSON)});
622
805
  auto* constructor = new Napi::FunctionReference();
623
806
  *constructor = Napi::Persistent(func);
624
807
  env.SetInstanceData(constructor);
@@ -658,19 +841,77 @@ class CompiledSchema : public Napi::ObjectWrap<CompiledSchema> {
658
841
  return ValidateDirectImpl(env, info[0]);
659
842
  }
660
843
 
844
+ // Thread-local reusable buffer for string extraction — avoids per-call allocation.
845
+ // Sized with SIMDJSON_PADDING so simdjson can read safely beyond the JSON.
846
+ static constexpr size_t TL_BUF_SHRINK_THRESHOLD = 64 * 1024; // 64KB
847
+
848
+ static std::string& tl_json_buf() {
849
+ thread_local std::string buf;
850
+ return buf;
851
+ }
852
+
853
+ // Extract JS string into reusable thread-local buffer with simdjson padding.
854
+ // Returns {data, length} — data is valid until next call on same thread.
855
+ static std::pair<const char*, size_t> extract_string(napi_env env, napi_value val) {
856
+ size_t len = 0;
857
+ napi_get_value_string_utf8(env, val, nullptr, 0, &len);
858
+ auto& buf = tl_json_buf();
859
+ const size_t needed = len + 1 + ata::REQUIRED_PADDING;
860
+ if (buf.size() < needed) buf.resize(needed);
861
+ napi_get_value_string_utf8(env, val, buf.data(), len + 1, &len);
862
+ // Shrink back if a one-off large string bloated the buffer
863
+ if (buf.size() > TL_BUF_SHRINK_THRESHOLD && len < TL_BUF_SHRINK_THRESHOLD / 2) {
864
+ buf.resize(TL_BUF_SHRINK_THRESHOLD);
865
+ buf.shrink_to_fit();
866
+ }
867
+ return {buf.data(), len};
868
+ }
869
+
661
870
  // Validate via JSON string (simdjson parse path)
662
871
  Napi::Value ValidateJSON(const Napi::CallbackInfo& info) {
663
872
  Napi::Env env = info.Env();
664
- if (info.Length() < 1 || !info[0].IsString()) {
873
+ if (info.Length() < 1) {
665
874
  Napi::TypeError::New(env, "JSON string expected")
666
875
  .ThrowAsJavaScriptException();
667
876
  return env.Undefined();
668
877
  }
669
- std::string json = info[0].As<Napi::String>().Utf8Value();
670
- auto result = ata::validate(schema_, json);
878
+ // Support Buffer for zero-copy
879
+ if (info[0].IsBuffer()) {
880
+ auto buf = info[0].As<Napi::Buffer<char>>();
881
+ auto result = ata::validate(schema_, std::string_view(buf.Data(), buf.Length()));
882
+ return make_result(env, result);
883
+ }
884
+ if (!info[0].IsString()) {
885
+ Napi::TypeError::New(env, "JSON string or Buffer expected")
886
+ .ThrowAsJavaScriptException();
887
+ return env.Undefined();
888
+ }
889
+ auto [data, len] = extract_string(env, info[0]);
890
+ auto result = ata::validate(schema_, std::string_view(data, len));
671
891
  return make_result(env, result);
672
892
  }
673
893
 
894
+ // Fast boolean-only validation — no error object creation
895
+ Napi::Value IsValidJSON(const Napi::CallbackInfo& info) {
896
+ Napi::Env env = info.Env();
897
+ if (info.Length() < 1) {
898
+ return Napi::Boolean::New(env, false);
899
+ }
900
+ // Support both String and Buffer inputs
901
+ if (info[0].IsBuffer()) {
902
+ auto buf = info[0].As<Napi::Buffer<char>>();
903
+ auto result = ata::validate(schema_, std::string_view(buf.Data(), buf.Length()));
904
+ return Napi::Boolean::New(env, result.valid);
905
+ }
906
+ if (!info[0].IsString()) {
907
+ return Napi::Boolean::New(env, false);
908
+ }
909
+ auto [data, len] = extract_string(env, info[0]);
910
+ // Buffer already has REQUIRED_PADDING — use zero-copy prepadded path
911
+ bool valid = ata::is_valid_prepadded(schema_, data, len);
912
+ return Napi::Boolean::New(env, valid);
913
+ }
914
+
674
915
  // Explicit direct validation (always V8 traversal, never stringify)
675
916
  Napi::Value ValidateDirect(const Napi::CallbackInfo& info) {
676
917
  Napi::Env env = info.Env();
@@ -734,10 +975,558 @@ Napi::Value GetVersion(const Napi::CallbackInfo& info) {
734
975
  return Napi::String::New(info.Env(), std::string(ata::version()));
735
976
  }
736
977
 
978
+ // --- Thread Pool ---
979
+ class ThreadPool {
980
+ public:
981
+ ThreadPool() {
982
+ unsigned n = std::thread::hardware_concurrency();
983
+ if (n == 0) n = 4;
984
+ for (unsigned i = 0; i < n; i++) {
985
+ workers_.emplace_back([this] {
986
+ // Each thread gets its own schema cache
987
+ std::unordered_map<uint32_t, ata::schema_ref> cache;
988
+ while (true) {
989
+ std::function<void(std::unordered_map<uint32_t, ata::schema_ref>&)> task;
990
+ {
991
+ std::unique_lock<std::mutex> lock(mtx_);
992
+ cv_.wait(lock, [this] { return stop_ || !tasks_.empty(); });
993
+ if (stop_ && tasks_.empty()) return;
994
+ task = std::move(tasks_.front());
995
+ tasks_.pop();
996
+ }
997
+ task(cache);
998
+ {
999
+ std::unique_lock<std::mutex> lock(done_mtx_);
1000
+ pending_--;
1001
+ if (pending_ == 0) done_cv_.notify_all();
1002
+ }
1003
+ }
1004
+ });
1005
+ }
1006
+ }
1007
+
1008
+ void submit(std::function<void(std::unordered_map<uint32_t, ata::schema_ref>&)> task) {
1009
+ {
1010
+ std::unique_lock<std::mutex> lock(mtx_);
1011
+ tasks_.push(std::move(task));
1012
+ }
1013
+ {
1014
+ std::unique_lock<std::mutex> lock(done_mtx_);
1015
+ pending_++;
1016
+ }
1017
+ cv_.notify_one();
1018
+ }
1019
+
1020
+ void wait() {
1021
+ std::unique_lock<std::mutex> lock(done_mtx_);
1022
+ done_cv_.wait(lock, [this] { return pending_ == 0; });
1023
+ }
1024
+
1025
+ unsigned size() const { return (unsigned)workers_.size(); }
1026
+
1027
+ ~ThreadPool() {
1028
+ { std::unique_lock<std::mutex> lock(mtx_); stop_ = true; }
1029
+ cv_.notify_all();
1030
+ for (auto& w : workers_) w.join();
1031
+ }
1032
+
1033
+ private:
1034
+ std::vector<std::thread> workers_;
1035
+ std::queue<std::function<void(std::unordered_map<uint32_t, ata::schema_ref>&)>> tasks_;
1036
+ std::mutex mtx_;
1037
+ std::condition_variable cv_;
1038
+ std::mutex done_mtx_;
1039
+ std::condition_variable done_cv_;
1040
+ std::atomic<int> pending_{0};
1041
+ bool stop_ = false;
1042
+ };
1043
+
1044
+ static ThreadPool& pool() {
1045
+ static ThreadPool p;
1046
+ return p;
1047
+ }
1048
+
1049
+ // --- Fast Validation Registry ---
1050
+ // Global schema slots for V8 Fast API (bypasses NAPI overhead)
1051
+ static constexpr size_t MAX_FAST_SLOTS = 256;
1052
+ static ata::schema_ref g_fast_schemas[MAX_FAST_SLOTS];
1053
+ static std::string g_fast_schema_jsons[MAX_FAST_SLOTS];
1054
+ static uint32_t g_fast_slot_count = 0;
1055
+
1056
+ // Register a compiled schema in a fast slot, returns slot ID
1057
+ Napi::Value FastRegister(const Napi::CallbackInfo& info) {
1058
+ Napi::Env env = info.Env();
1059
+ if (info.Length() < 1 || !info[0].IsString()) {
1060
+ Napi::TypeError::New(env, "Schema JSON string expected").ThrowAsJavaScriptException();
1061
+ return env.Undefined();
1062
+ }
1063
+ if (g_fast_slot_count >= MAX_FAST_SLOTS) {
1064
+ Napi::Error::New(env, "Max fast schema slots reached").ThrowAsJavaScriptException();
1065
+ return env.Undefined();
1066
+ }
1067
+ std::string schema_json = info[0].As<Napi::String>().Utf8Value();
1068
+ auto schema = ata::compile(schema_json);
1069
+ if (!schema) {
1070
+ Napi::Error::New(env, "Failed to compile schema").ThrowAsJavaScriptException();
1071
+ return env.Undefined();
1072
+ }
1073
+ uint32_t slot = g_fast_slot_count++;
1074
+ g_fast_schemas[slot] = std::move(schema);
1075
+ g_fast_schema_jsons[slot] = schema_json;
1076
+ return Napi::Number::New(env, slot);
1077
+ }
1078
+
1079
+ // Fast validation: slot + Uint8Array → bool (called via V8 Fast API)
1080
+ static bool FastValidateImpl(uint32_t slot, const uint8_t* data, size_t length) {
1081
+ if (slot >= g_fast_slot_count) return false;
1082
+ auto result = ata::validate(g_fast_schemas[slot],
1083
+ std::string_view(reinterpret_cast<const char*>(data), length));
1084
+ return result.valid;
1085
+ }
1086
+
1087
+ // Zero-copy validation with pre-padded buffer
1088
+ static bool FastValidatePrepadded(uint32_t slot, const uint8_t* data, size_t length) {
1089
+ if (slot >= g_fast_slot_count) return false;
1090
+ return ata::is_valid_prepadded(g_fast_schemas[slot],
1091
+ reinterpret_cast<const char*>(data), length);
1092
+ }
1093
+
1094
+ // Slow path (NAPI) — called when V8 can't use fast path
1095
+ Napi::Value FastValidateSlow(const Napi::CallbackInfo& info) {
1096
+ Napi::Env env = info.Env();
1097
+ if (info.Length() < 2 || !info[0].IsNumber()) {
1098
+ return Napi::Boolean::New(env, false);
1099
+ }
1100
+ uint32_t slot = info[0].As<Napi::Number>().Uint32Value();
1101
+ if (info[1].IsTypedArray()) {
1102
+ auto arr = info[1].As<Napi::TypedArray>();
1103
+ if (arr.TypedArrayType() == napi_uint8_array) {
1104
+ auto u8 = info[1].As<Napi::Uint8Array>();
1105
+ bool ok = FastValidateImpl(slot, u8.Data(), u8.ByteLength());
1106
+ return Napi::Boolean::New(env, ok);
1107
+ }
1108
+ }
1109
+ if (info[1].IsBuffer()) {
1110
+ auto buf = info[1].As<Napi::Buffer<uint8_t>>();
1111
+ bool ok = FastValidateImpl(slot, buf.Data(), buf.Length());
1112
+ return Napi::Boolean::New(env, ok);
1113
+ }
1114
+ if (info[1].IsString()) {
1115
+ std::string json = info[1].As<Napi::String>().Utf8Value();
1116
+ bool ok = FastValidateImpl(slot, reinterpret_cast<const uint8_t*>(json.data()), json.size());
1117
+ return Napi::Boolean::New(env, ok);
1118
+ }
1119
+ return Napi::Boolean::New(env, false);
1120
+ }
1121
+
1122
+ // --- Raw NAPI fast path (minimal overhead) ---
1123
+ static napi_value RawFastValidate(napi_env env, napi_callback_info info) {
1124
+ size_t argc = 3;
1125
+ napi_value args[3];
1126
+ napi_get_cb_info(env, info, &argc, args, nullptr, nullptr);
1127
+
1128
+ if (argc < 2) {
1129
+ napi_value result;
1130
+ napi_get_boolean(env, false, &result);
1131
+ return result;
1132
+ }
1133
+
1134
+ uint32_t slot;
1135
+ napi_get_value_uint32(env, args[0], &slot);
1136
+
1137
+ // Check if pre-padded mode (3rd arg = json length, buffer has padding)
1138
+ bool prepadded = (argc >= 3);
1139
+ uint32_t json_length = 0;
1140
+ if (prepadded) {
1141
+ napi_get_value_uint32(env, args[2], &json_length);
1142
+ }
1143
+
1144
+ bool valid = false;
1145
+
1146
+ // Try typed array first (zero-copy)
1147
+ bool is_typedarray = false;
1148
+ napi_is_typedarray(env, args[1], &is_typedarray);
1149
+
1150
+ if (is_typedarray) {
1151
+ napi_typedarray_type type;
1152
+ size_t length;
1153
+ void* data;
1154
+ napi_get_typedarray_info(env, args[1], &type, &length, &data, nullptr, nullptr);
1155
+ if (data) {
1156
+ size_t actual_len = prepadded ? json_length : length;
1157
+ if (prepadded) {
1158
+ valid = FastValidatePrepadded(slot, static_cast<const uint8_t*>(data), actual_len);
1159
+ } else {
1160
+ valid = FastValidateImpl(slot, static_cast<const uint8_t*>(data), actual_len);
1161
+ }
1162
+ }
1163
+ } else {
1164
+ bool is_buffer = false;
1165
+ napi_is_buffer(env, args[1], &is_buffer);
1166
+ if (is_buffer) {
1167
+ void* data;
1168
+ size_t length;
1169
+ napi_get_buffer_info(env, args[1], &data, &length);
1170
+ if (data) {
1171
+ size_t actual_len = prepadded ? json_length : length;
1172
+ if (prepadded) {
1173
+ valid = FastValidatePrepadded(slot, static_cast<const uint8_t*>(data), actual_len);
1174
+ } else {
1175
+ valid = FastValidateImpl(slot, static_cast<const uint8_t*>(data), actual_len);
1176
+ }
1177
+ }
1178
+ } else {
1179
+ // String — must copy (can't pre-pad strings)
1180
+ size_t len;
1181
+ napi_get_value_string_utf8(env, args[1], nullptr, 0, &len);
1182
+ if (len <= 4096) {
1183
+ char buf[4097];
1184
+ napi_get_value_string_utf8(env, args[1], buf, len + 1, &len);
1185
+ valid = FastValidateImpl(slot, reinterpret_cast<const uint8_t*>(buf), len);
1186
+ } else {
1187
+ std::string buf(len, '\0');
1188
+ napi_get_value_string_utf8(env, args[1], buf.data(), len + 1, &len);
1189
+ valid = FastValidateImpl(slot, reinterpret_cast<const uint8_t*>(buf.data()), len);
1190
+ }
1191
+ }
1192
+ }
1193
+
1194
+ napi_value result;
1195
+ napi_get_boolean(env, valid, &result);
1196
+ return result;
1197
+ }
1198
+
1199
+ // --- Batch validation: one NAPI call, N validations ---
1200
+ static napi_value RawBatchValidate(napi_env env, napi_callback_info info) {
1201
+ size_t argc = 2;
1202
+ napi_value args[2];
1203
+ napi_get_cb_info(env, info, &argc, args, nullptr, nullptr);
1204
+
1205
+ uint32_t slot;
1206
+ napi_get_value_uint32(env, args[0], &slot);
1207
+ if (slot >= g_fast_slot_count) {
1208
+ napi_value r;
1209
+ napi_get_null(env, &r);
1210
+ return r;
1211
+ }
1212
+
1213
+ uint32_t arr_len;
1214
+ napi_get_array_length(env, args[1], &arr_len);
1215
+
1216
+ napi_value result_arr;
1217
+ napi_create_array_with_length(env, arr_len, &result_arr);
1218
+
1219
+ for (uint32_t i = 0; i < arr_len; i++) {
1220
+ napi_value item;
1221
+ napi_get_element(env, args[1], i, &item);
1222
+
1223
+ bool valid = false;
1224
+ bool is_buffer = false;
1225
+ napi_is_buffer(env, item, &is_buffer);
1226
+
1227
+ if (is_buffer) {
1228
+ void* data; size_t length;
1229
+ napi_get_buffer_info(env, item, &data, &length);
1230
+ if (data && length > 0)
1231
+ valid = ata::validate(g_fast_schemas[slot],
1232
+ std::string_view(static_cast<const char*>(data), length)).valid;
1233
+ } else {
1234
+ bool is_ta = false;
1235
+ napi_is_typedarray(env, item, &is_ta);
1236
+ if (is_ta) {
1237
+ napi_typedarray_type type; size_t length; void* data;
1238
+ napi_get_typedarray_info(env, item, &type, &length, &data, nullptr, nullptr);
1239
+ if (data && length > 0)
1240
+ valid = ata::validate(g_fast_schemas[slot],
1241
+ std::string_view(static_cast<const char*>(data), length)).valid;
1242
+ } else {
1243
+ size_t len;
1244
+ napi_get_value_string_utf8(env, item, nullptr, 0, &len);
1245
+ std::string buf(len, '\0');
1246
+ napi_get_value_string_utf8(env, item, buf.data(), len + 1, &len);
1247
+ valid = ata::validate(g_fast_schemas[slot], buf).valid;
1248
+ }
1249
+ }
1250
+
1251
+ napi_value bval;
1252
+ napi_get_boolean(env, valid, &bval);
1253
+ napi_set_element(env, result_arr, i, bval);
1254
+ }
1255
+ return result_arr;
1256
+ }
1257
+
1258
+ // --- Parallel NDJSON: multi-core validation, ajv can't do this ---
1259
+ static napi_value RawParallelValidate(napi_env env, napi_callback_info info) {
1260
+ size_t argc = 2;
1261
+ napi_value args[2];
1262
+ napi_get_cb_info(env, info, &argc, args, nullptr, nullptr);
1263
+
1264
+ uint32_t slot;
1265
+ napi_get_value_uint32(env, args[0], &slot);
1266
+ if (slot >= g_fast_slot_count) {
1267
+ napi_value r; napi_get_null(env, &r); return r;
1268
+ }
1269
+
1270
+ const char* data = nullptr;
1271
+ size_t total_len = 0;
1272
+ bool is_buffer = false;
1273
+ napi_is_buffer(env, args[1], &is_buffer);
1274
+ if (is_buffer) {
1275
+ void* d; napi_get_buffer_info(env, args[1], &d, &total_len);
1276
+ data = static_cast<const char*>(d);
1277
+ } else {
1278
+ bool is_ta = false;
1279
+ napi_is_typedarray(env, args[1], &is_ta);
1280
+ if (is_ta) {
1281
+ napi_typedarray_type type; void* d;
1282
+ napi_get_typedarray_info(env, args[1], &type, &total_len, &d, nullptr, nullptr);
1283
+ data = static_cast<const char*>(d);
1284
+ }
1285
+ }
1286
+ if (!data || total_len == 0) {
1287
+ napi_value r; napi_create_array_with_length(env, 0, &r); return r;
1288
+ }
1289
+
1290
+ // Split lines
1291
+ struct line { const char* ptr; size_t len; };
1292
+ std::vector<line> lines;
1293
+ const char* start = data;
1294
+ const char* end = data + total_len;
1295
+ while (start < end) {
1296
+ const char* nl = static_cast<const char*>(memchr(start, '\n', end - start));
1297
+ size_t line_len = nl ? (size_t)(nl - start) : (size_t)(end - start);
1298
+ if (line_len > 0) lines.push_back({start, line_len});
1299
+ start += line_len + 1;
1300
+ }
1301
+
1302
+ size_t n = lines.size();
1303
+ std::vector<bool> results(n, false);
1304
+
1305
+ // Parallel validation across CPU cores
1306
+ unsigned num_threads = std::thread::hardware_concurrency();
1307
+ if (num_threads == 0) num_threads = 4;
1308
+ if (num_threads > n) num_threads = (unsigned)n;
1309
+
1310
+ // Each thread gets its own schema_ref (thread-safe: compile is one-time, validate is read-only)
1311
+ // But ata::validate uses internal parser that's NOT thread-safe
1312
+ // So each thread needs its own compiled schema
1313
+ const auto& schema_json = g_fast_schema_jsons[slot];
1314
+
1315
+ if (schema_json.empty() || n < num_threads * 2) {
1316
+ // Fallback: single-threaded for small batches
1317
+ for (size_t i = 0; i < n; i++) {
1318
+ auto r = ata::validate(g_fast_schemas[slot], std::string_view(lines[i].ptr, lines[i].len));
1319
+ results[i] = r.valid;
1320
+ }
1321
+ } else {
1322
+ auto& tp = pool();
1323
+ unsigned nworkers = tp.size();
1324
+ size_t chunk = (n + nworkers - 1) / nworkers;
1325
+
1326
+ for (unsigned t = 0; t < nworkers; t++) {
1327
+ size_t from = t * chunk;
1328
+ size_t to = std::min(from + chunk, n);
1329
+ if (from >= n) break;
1330
+
1331
+ tp.submit([&results, &lines, from, to, slot](
1332
+ std::unordered_map<uint32_t, ata::schema_ref>& cache) {
1333
+ auto it = cache.find(slot);
1334
+ if (it == cache.end()) {
1335
+ it = cache.emplace(slot, ata::compile(g_fast_schema_jsons[slot])).first;
1336
+ }
1337
+ auto& s = it->second;
1338
+ // Free padding: lines in NDJSON buffer almost always have free padding
1339
+ // (next line's data serves as padding). Only last line might need copy.
1340
+ for (size_t i = from; i < to; i++) {
1341
+ results[i] = ata::is_valid_prepadded(s, lines[i].ptr, lines[i].len);
1342
+ }
1343
+ });
1344
+ }
1345
+ tp.wait();
1346
+ }
1347
+
1348
+ napi_value result_arr;
1349
+ napi_create_array_with_length(env, n, &result_arr);
1350
+ for (size_t i = 0; i < n; i++) {
1351
+ napi_value bval;
1352
+ napi_get_boolean(env, results[i], &bval);
1353
+ napi_set_element(env, result_arr, (uint32_t)i, bval);
1354
+ }
1355
+ return result_arr;
1356
+ }
1357
+
1358
+ // --- Parallel count: returns just the number of valid items (no array overhead) ---
1359
+ static napi_value RawParallelCount(napi_env env, napi_callback_info info) {
1360
+ size_t argc = 2;
1361
+ napi_value args[2];
1362
+ napi_get_cb_info(env, info, &argc, args, nullptr, nullptr);
1363
+
1364
+ uint32_t slot;
1365
+ napi_get_value_uint32(env, args[0], &slot);
1366
+ if (slot >= g_fast_slot_count) {
1367
+ napi_value r; napi_create_uint32(env, 0, &r); return r;
1368
+ }
1369
+
1370
+ const char* data = nullptr;
1371
+ size_t total_len = 0;
1372
+ bool is_buffer = false;
1373
+ napi_is_buffer(env, args[1], &is_buffer);
1374
+ if (is_buffer) {
1375
+ void* d; napi_get_buffer_info(env, args[1], &d, &total_len);
1376
+ data = static_cast<const char*>(d);
1377
+ } else {
1378
+ bool is_ta = false;
1379
+ napi_is_typedarray(env, args[1], &is_ta);
1380
+ if (is_ta) {
1381
+ napi_typedarray_type type; void* d;
1382
+ napi_get_typedarray_info(env, args[1], &type, &total_len, &d, nullptr, nullptr);
1383
+ data = static_cast<const char*>(d);
1384
+ }
1385
+ }
1386
+ if (!data || total_len == 0) {
1387
+ napi_value r; napi_create_uint32(env, 0, &r); return r;
1388
+ }
1389
+
1390
+ struct line { const char* ptr; size_t len; };
1391
+ std::vector<line> lines;
1392
+ const char* start = data;
1393
+ const char* end = data + total_len;
1394
+ while (start < end) {
1395
+ const char* nl = static_cast<const char*>(memchr(start, '\n', end - start));
1396
+ size_t line_len = nl ? (size_t)(nl - start) : (size_t)(end - start);
1397
+ if (line_len > 0) lines.push_back({start, line_len});
1398
+ start += line_len + 1;
1399
+ }
1400
+
1401
+ size_t n = lines.size();
1402
+ std::atomic<uint32_t> valid_count{0};
1403
+
1404
+ auto& tp = pool();
1405
+ unsigned nworkers = tp.size();
1406
+ size_t chunk = (n + nworkers - 1) / nworkers;
1407
+
1408
+ if (n < nworkers * 2) {
1409
+ // Small batch — single thread
1410
+ uint32_t cnt = 0;
1411
+ for (size_t i = 0; i < n; i++) {
1412
+ if (ata::validate(g_fast_schemas[slot], std::string_view(lines[i].ptr, lines[i].len)).valid)
1413
+ cnt++;
1414
+ }
1415
+ napi_value r; napi_create_uint32(env, cnt, &r); return r;
1416
+ }
1417
+
1418
+ for (unsigned t = 0; t < nworkers; t++) {
1419
+ size_t from = t * chunk;
1420
+ size_t to = std::min(from + chunk, n);
1421
+ if (from >= n) break;
1422
+
1423
+ tp.submit([&valid_count, &lines, from, to, slot](
1424
+ std::unordered_map<uint32_t, ata::schema_ref>& cache) {
1425
+ auto it = cache.find(slot);
1426
+ if (it == cache.end()) {
1427
+ it = cache.emplace(slot, ata::compile(g_fast_schema_jsons[slot])).first;
1428
+ }
1429
+ auto& s = it->second;
1430
+ uint32_t local_cnt = 0;
1431
+ for (size_t i = from; i < to; i++) {
1432
+ if (ata::is_valid_prepadded(s, lines[i].ptr, lines[i].len))
1433
+ local_cnt++;
1434
+ }
1435
+ valid_count.fetch_add(local_cnt, std::memory_order_relaxed);
1436
+ });
1437
+ }
1438
+ tp.wait();
1439
+
1440
+ napi_value r;
1441
+ napi_create_uint32(env, valid_count.load(), &r);
1442
+ return r;
1443
+ }
1444
+
1445
+ // --- NDJSON: single buffer, newline-delimited ---
1446
+ static napi_value RawNDJSONValidate(napi_env env, napi_callback_info info) {
1447
+ size_t argc = 2;
1448
+ napi_value args[2];
1449
+ napi_get_cb_info(env, info, &argc, args, nullptr, nullptr);
1450
+
1451
+ uint32_t slot;
1452
+ napi_get_value_uint32(env, args[0], &slot);
1453
+ if (slot >= g_fast_slot_count) {
1454
+ napi_value r; napi_get_null(env, &r); return r;
1455
+ }
1456
+
1457
+ const char* data = nullptr;
1458
+ size_t total_len = 0;
1459
+ bool is_buffer = false;
1460
+ napi_is_buffer(env, args[1], &is_buffer);
1461
+ if (is_buffer) {
1462
+ void* d; napi_get_buffer_info(env, args[1], &d, &total_len);
1463
+ data = static_cast<const char*>(d);
1464
+ } else {
1465
+ bool is_ta = false;
1466
+ napi_is_typedarray(env, args[1], &is_ta);
1467
+ if (is_ta) {
1468
+ napi_typedarray_type type; void* d;
1469
+ napi_get_typedarray_info(env, args[1], &type, &total_len, &d, nullptr, nullptr);
1470
+ data = static_cast<const char*>(d);
1471
+ }
1472
+ }
1473
+ if (!data || total_len == 0) {
1474
+ napi_value r; napi_create_array_with_length(env, 0, &r); return r;
1475
+ }
1476
+
1477
+ // Count lines first for array allocation
1478
+ uint32_t count = 0;
1479
+ for (size_t i = 0; i < total_len; i++) if (data[i] == '\n') count++;
1480
+ if (total_len > 0 && data[total_len-1] != '\n') count++;
1481
+
1482
+ napi_value result_arr;
1483
+ napi_create_array_with_length(env, count, &result_arr);
1484
+
1485
+ const char* start = data;
1486
+ const char* end = data + total_len;
1487
+ uint32_t idx = 0;
1488
+
1489
+ while (start < end) {
1490
+ const char* nl = static_cast<const char*>(memchr(start, '\n', end - start));
1491
+ size_t line_len = nl ? (size_t)(nl - start) : (size_t)(end - start);
1492
+ if (line_len > 0) {
1493
+ auto r = ata::validate(g_fast_schemas[slot], std::string_view(start, line_len));
1494
+ napi_value bval;
1495
+ napi_get_boolean(env, r.valid, &bval);
1496
+ napi_set_element(env, result_arr, idx++, bval);
1497
+ }
1498
+ start += line_len + 1;
1499
+ }
1500
+ return result_arr;
1501
+ }
1502
+
737
1503
  Napi::Object Init(Napi::Env env, Napi::Object exports) {
738
1504
  CompiledSchema::Init(env, exports);
739
1505
  exports.Set("validate", Napi::Function::New(env, ValidateOneShot));
740
1506
  exports.Set("version", Napi::Function::New(env, GetVersion));
1507
+ exports.Set("fastRegister", Napi::Function::New(env, FastRegister));
1508
+ exports.Set("fastValidate", Napi::Function::New(env, FastValidateSlow));
1509
+
1510
+ napi_value raw_fn;
1511
+ napi_create_function(env, "rawFastValidate", NAPI_AUTO_LENGTH, RawFastValidate, nullptr, &raw_fn);
1512
+ exports.Set("rawFastValidate", Napi::Value(env, raw_fn));
1513
+
1514
+ napi_value batch_fn;
1515
+ napi_create_function(env, "rawBatchValidate", NAPI_AUTO_LENGTH, RawBatchValidate, nullptr, &batch_fn);
1516
+ exports.Set("rawBatchValidate", Napi::Value(env, batch_fn));
1517
+
1518
+ napi_value ndjson_fn;
1519
+ napi_create_function(env, "rawNDJSONValidate", NAPI_AUTO_LENGTH, RawNDJSONValidate, nullptr, &ndjson_fn);
1520
+ exports.Set("rawNDJSONValidate", Napi::Value(env, ndjson_fn));
1521
+
1522
+ napi_value parallel_fn;
1523
+ napi_create_function(env, "rawParallelValidate", NAPI_AUTO_LENGTH, RawParallelValidate, nullptr, &parallel_fn);
1524
+ exports.Set("rawParallelValidate", Napi::Value(env, parallel_fn));
1525
+
1526
+ napi_value pcount_fn;
1527
+ napi_create_function(env, "rawParallelCount", NAPI_AUTO_LENGTH, RawParallelCount, nullptr, &pcount_fn);
1528
+ exports.Set("rawParallelCount", Napi::Value(env, pcount_fn));
1529
+
741
1530
  return exports;
742
1531
  }
743
1532