ata-validator 0.2.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +118 -185
- package/binding/ata_napi.cpp +610 -7
- package/binding.gyp +1 -1
- package/include/ata.h +11 -3
- package/index.js +169 -3
- package/lib/js-compiler.js +845 -0
- package/package.json +3 -2
- package/prebuilds/darwin-arm64/ata-validator.node +0 -0
- package/src/ata.cpp +78 -11
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "ata-validator",
|
|
3
|
-
"version": "0.
|
|
4
|
-
"description": "Ultra-fast JSON Schema validator
|
|
3
|
+
"version": "0.4.1",
|
|
4
|
+
"description": "Ultra-fast JSON Schema validator. Beats ajv on every valid-path benchmark: 1.1x–2.7x faster validate(obj), 151x faster compilation, 5.9x faster parallel batch. Speculative validation with V8-optimized JS codegen, simdjson, multi-core. Standard Schema V1 compatible.",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"types": "index.d.ts",
|
|
7
7
|
"scripts": {
|
|
@@ -46,6 +46,7 @@
|
|
|
46
46
|
"files": [
|
|
47
47
|
"index.js",
|
|
48
48
|
"index.d.ts",
|
|
49
|
+
"lib/",
|
|
49
50
|
"compat.js",
|
|
50
51
|
"compat.d.ts",
|
|
51
52
|
"binding.gyp",
|
|
Binary file
|
package/src/ata.cpp
CHANGED
|
@@ -6,6 +6,13 @@
|
|
|
6
6
|
#include <set>
|
|
7
7
|
#include <unordered_map>
|
|
8
8
|
|
|
9
|
+
#ifdef _WIN32
|
|
10
|
+
#include <windows.h>
|
|
11
|
+
#include <sysinfoapi.h>
|
|
12
|
+
#else
|
|
13
|
+
#include <unistd.h>
|
|
14
|
+
#endif
|
|
15
|
+
|
|
9
16
|
#include "simdjson.h"
|
|
10
17
|
|
|
11
18
|
// --- Fast format validators (no std::regex) ---
|
|
@@ -279,11 +286,20 @@ struct compiled_schema {
|
|
|
279
286
|
schema_node_ptr root;
|
|
280
287
|
std::unordered_map<std::string, schema_node_ptr> defs;
|
|
281
288
|
std::string raw_schema;
|
|
282
|
-
dom::parser parser;
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
289
|
+
dom::parser parser; // used only at compile time
|
|
290
|
+
cg::plan gen_plan; // codegen validation plan
|
|
291
|
+
bool use_ondemand = false; // true if codegen plan supports On Demand
|
|
292
|
+
};
|
|
293
|
+
|
|
294
|
+
// Thread-local persistent parsers — reused across all validate calls on the
|
|
295
|
+
// same thread. Keeps internal buffers hot in cache and avoids re-allocation.
|
|
296
|
+
static dom::parser& tl_dom_parser() {
|
|
297
|
+
thread_local dom::parser p;
|
|
298
|
+
return p;
|
|
299
|
+
}
|
|
300
|
+
static simdjson::ondemand::parser& tl_od_parser() {
|
|
301
|
+
thread_local simdjson::ondemand::parser p;
|
|
302
|
+
return p;
|
|
287
303
|
};
|
|
288
304
|
|
|
289
305
|
// --- Schema compilation ---
|
|
@@ -1517,6 +1533,36 @@ static bool plan_supports_ondemand(const cg::plan& p) {
|
|
|
1517
1533
|
return true;
|
|
1518
1534
|
}
|
|
1519
1535
|
|
|
1536
|
+
// Free padding: check if buffer is near a page boundary
|
|
1537
|
+
// On modern systems, pages are at least 4096 bytes. If we're far enough
|
|
1538
|
+
// from the end of a page, we can read 64 bytes beyond without a fault.
|
|
1539
|
+
static long get_page_size() {
|
|
1540
|
+
#ifdef _WIN32
|
|
1541
|
+
SYSTEM_INFO si; GetSystemInfo(&si); return si.dwPageSize;
|
|
1542
|
+
#else
|
|
1543
|
+
static long ps = sysconf(_SC_PAGESIZE);
|
|
1544
|
+
return ps;
|
|
1545
|
+
#endif
|
|
1546
|
+
}
|
|
1547
|
+
|
|
1548
|
+
static bool near_page_boundary(const char* buf, size_t len) {
|
|
1549
|
+
return ((reinterpret_cast<uintptr_t>(buf + len - 1) % get_page_size())
|
|
1550
|
+
+ REQUIRED_PADDING >= static_cast<uintptr_t>(get_page_size()));
|
|
1551
|
+
}
|
|
1552
|
+
|
|
1553
|
+
// Zero-copy validate with free padding (Lemire's trick).
|
|
1554
|
+
// Almost never allocates — only if buffer is near a page boundary.
|
|
1555
|
+
static simdjson::padded_string_view get_free_padded_view(
|
|
1556
|
+
const char* data, size_t length, simdjson::padded_string& fallback) {
|
|
1557
|
+
if (near_page_boundary(data, length)) {
|
|
1558
|
+
// Rare: near page boundary, must copy
|
|
1559
|
+
fallback = simdjson::padded_string(data, length);
|
|
1560
|
+
return fallback;
|
|
1561
|
+
}
|
|
1562
|
+
// Common: free padding available, zero-copy
|
|
1563
|
+
return simdjson::padded_string_view(data, length, length + REQUIRED_PADDING);
|
|
1564
|
+
}
|
|
1565
|
+
|
|
1520
1566
|
schema_ref compile(std::string_view schema_json) {
|
|
1521
1567
|
auto ctx = std::make_shared<compiled_schema>();
|
|
1522
1568
|
ctx->raw_schema = std::string(schema_json);
|
|
@@ -1546,14 +1592,15 @@ validation_result validate(const schema_ref& schema, std::string_view json,
|
|
|
1546
1592
|
return {false, {{error_code::invalid_schema, "", "schema not compiled"}}};
|
|
1547
1593
|
}
|
|
1548
1594
|
|
|
1549
|
-
|
|
1595
|
+
// Free padding trick: avoid padded_string copy when possible
|
|
1596
|
+
simdjson::padded_string fallback;
|
|
1597
|
+
auto psv = get_free_padded_view(json.data(), json.size(), fallback);
|
|
1550
1598
|
|
|
1551
1599
|
// Ultra-fast path: On Demand (no DOM materialization)
|
|
1552
|
-
// Only beneficial for larger documents where DOM materialization cost dominates
|
|
1553
1600
|
static constexpr size_t OD_THRESHOLD = 32;
|
|
1554
1601
|
if (schema.impl->use_ondemand && !schema.impl->gen_plan.code.empty() &&
|
|
1555
1602
|
json.size() >= OD_THRESHOLD) {
|
|
1556
|
-
auto od_result =
|
|
1603
|
+
auto od_result = tl_od_parser().iterate(psv);
|
|
1557
1604
|
if (!od_result.error()) {
|
|
1558
1605
|
simdjson::ondemand::value root_val;
|
|
1559
1606
|
if (od_result.get_value().get(root_val) == SUCCESS) {
|
|
@@ -1562,10 +1609,12 @@ validation_result validate(const schema_ref& schema, std::string_view json,
|
|
|
1562
1609
|
}
|
|
1563
1610
|
}
|
|
1564
1611
|
}
|
|
1565
|
-
//
|
|
1612
|
+
// Need fresh view for DOM parse (On Demand consumed it)
|
|
1613
|
+
psv = get_free_padded_view(json.data(), json.size(), fallback);
|
|
1566
1614
|
}
|
|
1567
1615
|
|
|
1568
|
-
auto
|
|
1616
|
+
auto& dom_p = tl_dom_parser();
|
|
1617
|
+
auto result = dom_p.parse(psv);
|
|
1569
1618
|
if (result.error()) {
|
|
1570
1619
|
return {false, {{error_code::invalid_json, "", "invalid JSON document"}}};
|
|
1571
1620
|
}
|
|
@@ -1580,7 +1629,7 @@ validation_result validate(const schema_ref& schema, std::string_view json,
|
|
|
1580
1629
|
}
|
|
1581
1630
|
|
|
1582
1631
|
// Slow path: re-parse + tree walker with error details
|
|
1583
|
-
auto result2 =
|
|
1632
|
+
auto result2 = dom_p.parse(psv);
|
|
1584
1633
|
std::vector<validation_error> errors;
|
|
1585
1634
|
validate_node(schema.impl->root, result2.value(), "", *schema.impl, errors,
|
|
1586
1635
|
opts.all_errors);
|
|
@@ -1598,4 +1647,22 @@ validation_result validate(std::string_view schema_json,
|
|
|
1598
1647
|
return validate(s, json, opts);
|
|
1599
1648
|
}
|
|
1600
1649
|
|
|
1650
|
+
|
|
1651
|
+
bool is_valid_prepadded(const schema_ref& schema, const char* data, size_t length) {
|
|
1652
|
+
if (!schema.impl || !schema.impl->root) return false;
|
|
1653
|
+
|
|
1654
|
+
simdjson::padded_string fallback;
|
|
1655
|
+
auto psv = get_free_padded_view(data, length, fallback);
|
|
1656
|
+
auto result = tl_dom_parser().parse(psv);
|
|
1657
|
+
if (result.error()) return false;
|
|
1658
|
+
|
|
1659
|
+
if (!schema.impl->gen_plan.code.empty()) {
|
|
1660
|
+
return cg_exec(schema.impl->gen_plan, schema.impl->gen_plan.code, result.value());
|
|
1661
|
+
}
|
|
1662
|
+
|
|
1663
|
+
std::vector<validation_error> errors;
|
|
1664
|
+
validate_node(schema.impl->root, result.value(), "", *schema.impl, errors, false);
|
|
1665
|
+
return errors.empty();
|
|
1666
|
+
}
|
|
1667
|
+
|
|
1601
1668
|
} // namespace ata
|