ata-validator 0.2.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "ata-validator",
3
- "version": "0.2.0",
4
- "description": "Ultra-fast JSON Schema validator powered by simdjson, RE2, and codegen bytecode engine. 120x faster schema compilation, 98.6% spec compliant, Standard Schema V1 compatible.",
3
+ "version": "0.4.0",
4
+ "description": "Ultra-fast JSON Schema validator. Beats ajv on every valid-path benchmark: 1.1x–2.7x faster validate(obj), 151x faster compilation, 5.9x faster parallel batch. Speculative validation with V8-optimized JS codegen, simdjson, multi-core. Standard Schema V1 compatible.",
5
5
  "main": "index.js",
6
6
  "types": "index.d.ts",
7
7
  "scripts": {
@@ -46,6 +46,7 @@
46
46
  "files": [
47
47
  "index.js",
48
48
  "index.d.ts",
49
+ "lib/",
49
50
  "compat.js",
50
51
  "compat.d.ts",
51
52
  "binding.gyp",
package/src/ata.cpp CHANGED
@@ -6,6 +6,13 @@
6
6
  #include <set>
7
7
  #include <unordered_map>
8
8
 
9
+ #ifdef _WIN32
10
+ #include <windows.h>
11
+ #include <sysinfoapi.h>
12
+ #else
13
+ #include <unistd.h>
14
+ #endif
15
+
9
16
  #include "simdjson.h"
10
17
 
11
18
  // --- Fast format validators (no std::regex) ---
@@ -279,11 +286,20 @@ struct compiled_schema {
279
286
  schema_node_ptr root;
280
287
  std::unordered_map<std::string, schema_node_ptr> defs;
281
288
  std::string raw_schema;
282
- dom::parser parser;
283
- dom::parser doc_parser;
284
- simdjson::ondemand::parser od_parser; // On Demand parser for fast path
285
- cg::plan gen_plan; // codegen validation plan
286
- bool use_ondemand = false; // true if codegen plan supports On Demand
289
+ dom::parser parser; // used only at compile time
290
+ cg::plan gen_plan; // codegen validation plan
291
+ bool use_ondemand = false; // true if codegen plan supports On Demand
292
+ };
293
+
294
+ // Thread-local persistent parsers — reused across all validate calls on the
295
+ // same thread. Keeps internal buffers hot in cache and avoids re-allocation.
296
+ static dom::parser& tl_dom_parser() {
297
+ thread_local dom::parser p;
298
+ return p;
299
+ }
300
+ static simdjson::ondemand::parser& tl_od_parser() {
301
+ thread_local simdjson::ondemand::parser p;
302
+ return p;
287
303
  };
288
304
 
289
305
  // --- Schema compilation ---
@@ -1517,6 +1533,36 @@ static bool plan_supports_ondemand(const cg::plan& p) {
1517
1533
  return true;
1518
1534
  }
1519
1535
 
1536
+ // Free padding: check if buffer is near a page boundary
1537
+ // On modern systems, pages are at least 4096 bytes. If we're far enough
1538
+ // from the end of a page, we can read 64 bytes beyond without a fault.
1539
+ static long get_page_size() {
1540
+ #ifdef _WIN32
1541
+ SYSTEM_INFO si; GetSystemInfo(&si); return si.dwPageSize;
1542
+ #else
1543
+ static long ps = sysconf(_SC_PAGESIZE);
1544
+ return ps;
1545
+ #endif
1546
+ }
1547
+
1548
+ static bool near_page_boundary(const char* buf, size_t len) {
1549
+ return ((reinterpret_cast<uintptr_t>(buf + len - 1) % get_page_size())
1550
+ + REQUIRED_PADDING >= static_cast<uintptr_t>(get_page_size()));
1551
+ }
1552
+
1553
+ // Zero-copy validate with free padding (Lemire's trick).
1554
+ // Almost never allocates — only if buffer is near a page boundary.
1555
+ static simdjson::padded_string_view get_free_padded_view(
1556
+ const char* data, size_t length, simdjson::padded_string& fallback) {
1557
+ if (near_page_boundary(data, length)) {
1558
+ // Rare: near page boundary, must copy
1559
+ fallback = simdjson::padded_string(data, length);
1560
+ return fallback;
1561
+ }
1562
+ // Common: free padding available, zero-copy
1563
+ return simdjson::padded_string_view(data, length, length + REQUIRED_PADDING);
1564
+ }
1565
+
1520
1566
  schema_ref compile(std::string_view schema_json) {
1521
1567
  auto ctx = std::make_shared<compiled_schema>();
1522
1568
  ctx->raw_schema = std::string(schema_json);
@@ -1546,14 +1592,15 @@ validation_result validate(const schema_ref& schema, std::string_view json,
1546
1592
  return {false, {{error_code::invalid_schema, "", "schema not compiled"}}};
1547
1593
  }
1548
1594
 
1549
- auto padded = simdjson::padded_string(json);
1595
+ // Free padding trick: avoid padded_string copy when possible
1596
+ simdjson::padded_string fallback;
1597
+ auto psv = get_free_padded_view(json.data(), json.size(), fallback);
1550
1598
 
1551
1599
  // Ultra-fast path: On Demand (no DOM materialization)
1552
- // Only beneficial for larger documents where DOM materialization cost dominates
1553
1600
  static constexpr size_t OD_THRESHOLD = 32;
1554
1601
  if (schema.impl->use_ondemand && !schema.impl->gen_plan.code.empty() &&
1555
1602
  json.size() >= OD_THRESHOLD) {
1556
- auto od_result = schema.impl->od_parser.iterate(padded);
1603
+ auto od_result = tl_od_parser().iterate(psv);
1557
1604
  if (!od_result.error()) {
1558
1605
  simdjson::ondemand::value root_val;
1559
1606
  if (od_result.get_value().get(root_val) == SUCCESS) {
@@ -1562,10 +1609,12 @@ validation_result validate(const schema_ref& schema, std::string_view json,
1562
1609
  }
1563
1610
  }
1564
1611
  }
1565
- // On Demand said invalid fall through to DOM for error details
1612
+ // Need fresh view for DOM parse (On Demand consumed it)
1613
+ psv = get_free_padded_view(json.data(), json.size(), fallback);
1566
1614
  }
1567
1615
 
1568
- auto result = schema.impl->doc_parser.parse(padded);
1616
+ auto& dom_p = tl_dom_parser();
1617
+ auto result = dom_p.parse(psv);
1569
1618
  if (result.error()) {
1570
1619
  return {false, {{error_code::invalid_json, "", "invalid JSON document"}}};
1571
1620
  }
@@ -1580,7 +1629,7 @@ validation_result validate(const schema_ref& schema, std::string_view json,
1580
1629
  }
1581
1630
 
1582
1631
  // Slow path: re-parse + tree walker with error details
1583
- auto result2 = schema.impl->doc_parser.parse(padded);
1632
+ auto result2 = dom_p.parse(psv);
1584
1633
  std::vector<validation_error> errors;
1585
1634
  validate_node(schema.impl->root, result2.value(), "", *schema.impl, errors,
1586
1635
  opts.all_errors);
@@ -1598,4 +1647,22 @@ validation_result validate(std::string_view schema_json,
1598
1647
  return validate(s, json, opts);
1599
1648
  }
1600
1649
 
1650
+
1651
+ bool is_valid_prepadded(const schema_ref& schema, const char* data, size_t length) {
1652
+ if (!schema.impl || !schema.impl->root) return false;
1653
+
1654
+ simdjson::padded_string fallback;
1655
+ auto psv = get_free_padded_view(data, length, fallback);
1656
+ auto result = tl_dom_parser().parse(psv);
1657
+ if (result.error()) return false;
1658
+
1659
+ if (!schema.impl->gen_plan.code.empty()) {
1660
+ return cg_exec(schema.impl->gen_plan, schema.impl->gen_plan.code, result.value());
1661
+ }
1662
+
1663
+ std::vector<validation_error> errors;
1664
+ validate_node(schema.impl->root, result.value(), "", *schema.impl, errors, false);
1665
+ return errors.empty();
1666
+ }
1667
+
1601
1668
  } // namespace ata