ata-validator 0.2.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +118 -185
- package/binding/ata_napi.cpp +610 -7
- package/binding.gyp +1 -1
- package/include/ata.h +11 -3
- package/index.js +169 -3
- package/lib/js-compiler.js +845 -0
- package/package.json +3 -2
- package/prebuilds/darwin-arm64/ata-validator.node +0 -0
- package/src/ata.cpp +78 -11
package/binding/ata_napi.cpp
CHANGED
|
@@ -1,6 +1,14 @@
|
|
|
1
1
|
#include <napi.h>
|
|
2
|
+
#include <node_api.h>
|
|
2
3
|
|
|
3
4
|
#include <cmath>
|
|
5
|
+
#include <thread>
|
|
6
|
+
#include <future>
|
|
7
|
+
#include <mutex>
|
|
8
|
+
#include <condition_variable>
|
|
9
|
+
#include <functional>
|
|
10
|
+
#include <queue>
|
|
11
|
+
#include <atomic>
|
|
4
12
|
#include <re2/re2.h>
|
|
5
13
|
#include <set>
|
|
6
14
|
#include <string>
|
|
@@ -833,28 +841,75 @@ class CompiledSchema : public Napi::ObjectWrap<CompiledSchema> {
|
|
|
833
841
|
return ValidateDirectImpl(env, info[0]);
|
|
834
842
|
}
|
|
835
843
|
|
|
844
|
+
// Thread-local reusable buffer for string extraction — avoids per-call allocation.
|
|
845
|
+
// Sized with SIMDJSON_PADDING so simdjson can read safely beyond the JSON.
|
|
846
|
+
static constexpr size_t TL_BUF_SHRINK_THRESHOLD = 64 * 1024; // 64KB
|
|
847
|
+
|
|
848
|
+
static std::string& tl_json_buf() {
|
|
849
|
+
thread_local std::string buf;
|
|
850
|
+
return buf;
|
|
851
|
+
}
|
|
852
|
+
|
|
853
|
+
// Extract JS string into reusable thread-local buffer with simdjson padding.
|
|
854
|
+
// Returns {data, length} — data is valid until next call on same thread.
|
|
855
|
+
static std::pair<const char*, size_t> extract_string(napi_env env, napi_value val) {
|
|
856
|
+
size_t len = 0;
|
|
857
|
+
napi_get_value_string_utf8(env, val, nullptr, 0, &len);
|
|
858
|
+
auto& buf = tl_json_buf();
|
|
859
|
+
const size_t needed = len + 1 + ata::REQUIRED_PADDING;
|
|
860
|
+
if (buf.size() < needed) buf.resize(needed);
|
|
861
|
+
napi_get_value_string_utf8(env, val, buf.data(), len + 1, &len);
|
|
862
|
+
// Shrink back if a one-off large string bloated the buffer
|
|
863
|
+
if (buf.size() > TL_BUF_SHRINK_THRESHOLD && len < TL_BUF_SHRINK_THRESHOLD / 2) {
|
|
864
|
+
buf.resize(TL_BUF_SHRINK_THRESHOLD);
|
|
865
|
+
buf.shrink_to_fit();
|
|
866
|
+
}
|
|
867
|
+
return {buf.data(), len};
|
|
868
|
+
}
|
|
869
|
+
|
|
836
870
|
// Validate via JSON string (simdjson parse path)
|
|
837
871
|
Napi::Value ValidateJSON(const Napi::CallbackInfo& info) {
|
|
838
872
|
Napi::Env env = info.Env();
|
|
839
|
-
if (info.Length() < 1
|
|
873
|
+
if (info.Length() < 1) {
|
|
840
874
|
Napi::TypeError::New(env, "JSON string expected")
|
|
841
875
|
.ThrowAsJavaScriptException();
|
|
842
876
|
return env.Undefined();
|
|
843
877
|
}
|
|
844
|
-
|
|
845
|
-
|
|
878
|
+
// Support Buffer for zero-copy
|
|
879
|
+
if (info[0].IsBuffer()) {
|
|
880
|
+
auto buf = info[0].As<Napi::Buffer<char>>();
|
|
881
|
+
auto result = ata::validate(schema_, std::string_view(buf.Data(), buf.Length()));
|
|
882
|
+
return make_result(env, result);
|
|
883
|
+
}
|
|
884
|
+
if (!info[0].IsString()) {
|
|
885
|
+
Napi::TypeError::New(env, "JSON string or Buffer expected")
|
|
886
|
+
.ThrowAsJavaScriptException();
|
|
887
|
+
return env.Undefined();
|
|
888
|
+
}
|
|
889
|
+
auto [data, len] = extract_string(env, info[0]);
|
|
890
|
+
auto result = ata::validate(schema_, std::string_view(data, len));
|
|
846
891
|
return make_result(env, result);
|
|
847
892
|
}
|
|
848
893
|
|
|
849
894
|
// Fast boolean-only validation — no error object creation
|
|
850
895
|
Napi::Value IsValidJSON(const Napi::CallbackInfo& info) {
|
|
851
896
|
Napi::Env env = info.Env();
|
|
852
|
-
if (info.Length() < 1
|
|
897
|
+
if (info.Length() < 1) {
|
|
898
|
+
return Napi::Boolean::New(env, false);
|
|
899
|
+
}
|
|
900
|
+
// Support both String and Buffer inputs
|
|
901
|
+
if (info[0].IsBuffer()) {
|
|
902
|
+
auto buf = info[0].As<Napi::Buffer<char>>();
|
|
903
|
+
auto result = ata::validate(schema_, std::string_view(buf.Data(), buf.Length()));
|
|
904
|
+
return Napi::Boolean::New(env, result.valid);
|
|
905
|
+
}
|
|
906
|
+
if (!info[0].IsString()) {
|
|
853
907
|
return Napi::Boolean::New(env, false);
|
|
854
908
|
}
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
909
|
+
auto [data, len] = extract_string(env, info[0]);
|
|
910
|
+
// Buffer already has REQUIRED_PADDING — use zero-copy prepadded path
|
|
911
|
+
bool valid = ata::is_valid_prepadded(schema_, data, len);
|
|
912
|
+
return Napi::Boolean::New(env, valid);
|
|
858
913
|
}
|
|
859
914
|
|
|
860
915
|
// Explicit direct validation (always V8 traversal, never stringify)
|
|
@@ -920,10 +975,558 @@ Napi::Value GetVersion(const Napi::CallbackInfo& info) {
|
|
|
920
975
|
return Napi::String::New(info.Env(), std::string(ata::version()));
|
|
921
976
|
}
|
|
922
977
|
|
|
978
|
+
// --- Thread Pool ---
|
|
979
|
+
class ThreadPool {
|
|
980
|
+
public:
|
|
981
|
+
ThreadPool() {
|
|
982
|
+
unsigned n = std::thread::hardware_concurrency();
|
|
983
|
+
if (n == 0) n = 4;
|
|
984
|
+
for (unsigned i = 0; i < n; i++) {
|
|
985
|
+
workers_.emplace_back([this] {
|
|
986
|
+
// Each thread gets its own schema cache
|
|
987
|
+
std::unordered_map<uint32_t, ata::schema_ref> cache;
|
|
988
|
+
while (true) {
|
|
989
|
+
std::function<void(std::unordered_map<uint32_t, ata::schema_ref>&)> task;
|
|
990
|
+
{
|
|
991
|
+
std::unique_lock<std::mutex> lock(mtx_);
|
|
992
|
+
cv_.wait(lock, [this] { return stop_ || !tasks_.empty(); });
|
|
993
|
+
if (stop_ && tasks_.empty()) return;
|
|
994
|
+
task = std::move(tasks_.front());
|
|
995
|
+
tasks_.pop();
|
|
996
|
+
}
|
|
997
|
+
task(cache);
|
|
998
|
+
{
|
|
999
|
+
std::unique_lock<std::mutex> lock(done_mtx_);
|
|
1000
|
+
pending_--;
|
|
1001
|
+
if (pending_ == 0) done_cv_.notify_all();
|
|
1002
|
+
}
|
|
1003
|
+
}
|
|
1004
|
+
});
|
|
1005
|
+
}
|
|
1006
|
+
}
|
|
1007
|
+
|
|
1008
|
+
void submit(std::function<void(std::unordered_map<uint32_t, ata::schema_ref>&)> task) {
|
|
1009
|
+
{
|
|
1010
|
+
std::unique_lock<std::mutex> lock(mtx_);
|
|
1011
|
+
tasks_.push(std::move(task));
|
|
1012
|
+
}
|
|
1013
|
+
{
|
|
1014
|
+
std::unique_lock<std::mutex> lock(done_mtx_);
|
|
1015
|
+
pending_++;
|
|
1016
|
+
}
|
|
1017
|
+
cv_.notify_one();
|
|
1018
|
+
}
|
|
1019
|
+
|
|
1020
|
+
void wait() {
|
|
1021
|
+
std::unique_lock<std::mutex> lock(done_mtx_);
|
|
1022
|
+
done_cv_.wait(lock, [this] { return pending_ == 0; });
|
|
1023
|
+
}
|
|
1024
|
+
|
|
1025
|
+
unsigned size() const { return (unsigned)workers_.size(); }
|
|
1026
|
+
|
|
1027
|
+
~ThreadPool() {
|
|
1028
|
+
{ std::unique_lock<std::mutex> lock(mtx_); stop_ = true; }
|
|
1029
|
+
cv_.notify_all();
|
|
1030
|
+
for (auto& w : workers_) w.join();
|
|
1031
|
+
}
|
|
1032
|
+
|
|
1033
|
+
private:
|
|
1034
|
+
std::vector<std::thread> workers_;
|
|
1035
|
+
std::queue<std::function<void(std::unordered_map<uint32_t, ata::schema_ref>&)>> tasks_;
|
|
1036
|
+
std::mutex mtx_;
|
|
1037
|
+
std::condition_variable cv_;
|
|
1038
|
+
std::mutex done_mtx_;
|
|
1039
|
+
std::condition_variable done_cv_;
|
|
1040
|
+
std::atomic<int> pending_{0};
|
|
1041
|
+
bool stop_ = false;
|
|
1042
|
+
};
|
|
1043
|
+
|
|
1044
|
+
static ThreadPool& pool() {
|
|
1045
|
+
static ThreadPool p;
|
|
1046
|
+
return p;
|
|
1047
|
+
}
|
|
1048
|
+
|
|
1049
|
+
// --- Fast Validation Registry ---
|
|
1050
|
+
// Global schema slots for V8 Fast API (bypasses NAPI overhead)
|
|
1051
|
+
static constexpr size_t MAX_FAST_SLOTS = 256;
|
|
1052
|
+
static ata::schema_ref g_fast_schemas[MAX_FAST_SLOTS];
|
|
1053
|
+
static std::string g_fast_schema_jsons[MAX_FAST_SLOTS];
|
|
1054
|
+
static uint32_t g_fast_slot_count = 0;
|
|
1055
|
+
|
|
1056
|
+
// Register a compiled schema in a fast slot, returns slot ID
|
|
1057
|
+
Napi::Value FastRegister(const Napi::CallbackInfo& info) {
|
|
1058
|
+
Napi::Env env = info.Env();
|
|
1059
|
+
if (info.Length() < 1 || !info[0].IsString()) {
|
|
1060
|
+
Napi::TypeError::New(env, "Schema JSON string expected").ThrowAsJavaScriptException();
|
|
1061
|
+
return env.Undefined();
|
|
1062
|
+
}
|
|
1063
|
+
if (g_fast_slot_count >= MAX_FAST_SLOTS) {
|
|
1064
|
+
Napi::Error::New(env, "Max fast schema slots reached").ThrowAsJavaScriptException();
|
|
1065
|
+
return env.Undefined();
|
|
1066
|
+
}
|
|
1067
|
+
std::string schema_json = info[0].As<Napi::String>().Utf8Value();
|
|
1068
|
+
auto schema = ata::compile(schema_json);
|
|
1069
|
+
if (!schema) {
|
|
1070
|
+
Napi::Error::New(env, "Failed to compile schema").ThrowAsJavaScriptException();
|
|
1071
|
+
return env.Undefined();
|
|
1072
|
+
}
|
|
1073
|
+
uint32_t slot = g_fast_slot_count++;
|
|
1074
|
+
g_fast_schemas[slot] = std::move(schema);
|
|
1075
|
+
g_fast_schema_jsons[slot] = schema_json;
|
|
1076
|
+
return Napi::Number::New(env, slot);
|
|
1077
|
+
}
|
|
1078
|
+
|
|
1079
|
+
// Fast validation: slot + Uint8Array → bool (called via V8 Fast API)
|
|
1080
|
+
static bool FastValidateImpl(uint32_t slot, const uint8_t* data, size_t length) {
|
|
1081
|
+
if (slot >= g_fast_slot_count) return false;
|
|
1082
|
+
auto result = ata::validate(g_fast_schemas[slot],
|
|
1083
|
+
std::string_view(reinterpret_cast<const char*>(data), length));
|
|
1084
|
+
return result.valid;
|
|
1085
|
+
}
|
|
1086
|
+
|
|
1087
|
+
// Zero-copy validation with pre-padded buffer
|
|
1088
|
+
static bool FastValidatePrepadded(uint32_t slot, const uint8_t* data, size_t length) {
|
|
1089
|
+
if (slot >= g_fast_slot_count) return false;
|
|
1090
|
+
return ata::is_valid_prepadded(g_fast_schemas[slot],
|
|
1091
|
+
reinterpret_cast<const char*>(data), length);
|
|
1092
|
+
}
|
|
1093
|
+
|
|
1094
|
+
// Slow path (NAPI) — called when V8 can't use fast path
|
|
1095
|
+
Napi::Value FastValidateSlow(const Napi::CallbackInfo& info) {
|
|
1096
|
+
Napi::Env env = info.Env();
|
|
1097
|
+
if (info.Length() < 2 || !info[0].IsNumber()) {
|
|
1098
|
+
return Napi::Boolean::New(env, false);
|
|
1099
|
+
}
|
|
1100
|
+
uint32_t slot = info[0].As<Napi::Number>().Uint32Value();
|
|
1101
|
+
if (info[1].IsTypedArray()) {
|
|
1102
|
+
auto arr = info[1].As<Napi::TypedArray>();
|
|
1103
|
+
if (arr.TypedArrayType() == napi_uint8_array) {
|
|
1104
|
+
auto u8 = info[1].As<Napi::Uint8Array>();
|
|
1105
|
+
bool ok = FastValidateImpl(slot, u8.Data(), u8.ByteLength());
|
|
1106
|
+
return Napi::Boolean::New(env, ok);
|
|
1107
|
+
}
|
|
1108
|
+
}
|
|
1109
|
+
if (info[1].IsBuffer()) {
|
|
1110
|
+
auto buf = info[1].As<Napi::Buffer<uint8_t>>();
|
|
1111
|
+
bool ok = FastValidateImpl(slot, buf.Data(), buf.Length());
|
|
1112
|
+
return Napi::Boolean::New(env, ok);
|
|
1113
|
+
}
|
|
1114
|
+
if (info[1].IsString()) {
|
|
1115
|
+
std::string json = info[1].As<Napi::String>().Utf8Value();
|
|
1116
|
+
bool ok = FastValidateImpl(slot, reinterpret_cast<const uint8_t*>(json.data()), json.size());
|
|
1117
|
+
return Napi::Boolean::New(env, ok);
|
|
1118
|
+
}
|
|
1119
|
+
return Napi::Boolean::New(env, false);
|
|
1120
|
+
}
|
|
1121
|
+
|
|
1122
|
+
// --- Raw NAPI fast path (minimal overhead) ---
|
|
1123
|
+
static napi_value RawFastValidate(napi_env env, napi_callback_info info) {
|
|
1124
|
+
size_t argc = 3;
|
|
1125
|
+
napi_value args[3];
|
|
1126
|
+
napi_get_cb_info(env, info, &argc, args, nullptr, nullptr);
|
|
1127
|
+
|
|
1128
|
+
if (argc < 2) {
|
|
1129
|
+
napi_value result;
|
|
1130
|
+
napi_get_boolean(env, false, &result);
|
|
1131
|
+
return result;
|
|
1132
|
+
}
|
|
1133
|
+
|
|
1134
|
+
uint32_t slot;
|
|
1135
|
+
napi_get_value_uint32(env, args[0], &slot);
|
|
1136
|
+
|
|
1137
|
+
// Check if pre-padded mode (3rd arg = json length, buffer has padding)
|
|
1138
|
+
bool prepadded = (argc >= 3);
|
|
1139
|
+
uint32_t json_length = 0;
|
|
1140
|
+
if (prepadded) {
|
|
1141
|
+
napi_get_value_uint32(env, args[2], &json_length);
|
|
1142
|
+
}
|
|
1143
|
+
|
|
1144
|
+
bool valid = false;
|
|
1145
|
+
|
|
1146
|
+
// Try typed array first (zero-copy)
|
|
1147
|
+
bool is_typedarray = false;
|
|
1148
|
+
napi_is_typedarray(env, args[1], &is_typedarray);
|
|
1149
|
+
|
|
1150
|
+
if (is_typedarray) {
|
|
1151
|
+
napi_typedarray_type type;
|
|
1152
|
+
size_t length;
|
|
1153
|
+
void* data;
|
|
1154
|
+
napi_get_typedarray_info(env, args[1], &type, &length, &data, nullptr, nullptr);
|
|
1155
|
+
if (data) {
|
|
1156
|
+
size_t actual_len = prepadded ? json_length : length;
|
|
1157
|
+
if (prepadded) {
|
|
1158
|
+
valid = FastValidatePrepadded(slot, static_cast<const uint8_t*>(data), actual_len);
|
|
1159
|
+
} else {
|
|
1160
|
+
valid = FastValidateImpl(slot, static_cast<const uint8_t*>(data), actual_len);
|
|
1161
|
+
}
|
|
1162
|
+
}
|
|
1163
|
+
} else {
|
|
1164
|
+
bool is_buffer = false;
|
|
1165
|
+
napi_is_buffer(env, args[1], &is_buffer);
|
|
1166
|
+
if (is_buffer) {
|
|
1167
|
+
void* data;
|
|
1168
|
+
size_t length;
|
|
1169
|
+
napi_get_buffer_info(env, args[1], &data, &length);
|
|
1170
|
+
if (data) {
|
|
1171
|
+
size_t actual_len = prepadded ? json_length : length;
|
|
1172
|
+
if (prepadded) {
|
|
1173
|
+
valid = FastValidatePrepadded(slot, static_cast<const uint8_t*>(data), actual_len);
|
|
1174
|
+
} else {
|
|
1175
|
+
valid = FastValidateImpl(slot, static_cast<const uint8_t*>(data), actual_len);
|
|
1176
|
+
}
|
|
1177
|
+
}
|
|
1178
|
+
} else {
|
|
1179
|
+
// String — must copy (can't pre-pad strings)
|
|
1180
|
+
size_t len;
|
|
1181
|
+
napi_get_value_string_utf8(env, args[1], nullptr, 0, &len);
|
|
1182
|
+
if (len <= 4096) {
|
|
1183
|
+
char buf[4097];
|
|
1184
|
+
napi_get_value_string_utf8(env, args[1], buf, len + 1, &len);
|
|
1185
|
+
valid = FastValidateImpl(slot, reinterpret_cast<const uint8_t*>(buf), len);
|
|
1186
|
+
} else {
|
|
1187
|
+
std::string buf(len, '\0');
|
|
1188
|
+
napi_get_value_string_utf8(env, args[1], buf.data(), len + 1, &len);
|
|
1189
|
+
valid = FastValidateImpl(slot, reinterpret_cast<const uint8_t*>(buf.data()), len);
|
|
1190
|
+
}
|
|
1191
|
+
}
|
|
1192
|
+
}
|
|
1193
|
+
|
|
1194
|
+
napi_value result;
|
|
1195
|
+
napi_get_boolean(env, valid, &result);
|
|
1196
|
+
return result;
|
|
1197
|
+
}
|
|
1198
|
+
|
|
1199
|
+
// --- Batch validation: one NAPI call, N validations ---
|
|
1200
|
+
static napi_value RawBatchValidate(napi_env env, napi_callback_info info) {
|
|
1201
|
+
size_t argc = 2;
|
|
1202
|
+
napi_value args[2];
|
|
1203
|
+
napi_get_cb_info(env, info, &argc, args, nullptr, nullptr);
|
|
1204
|
+
|
|
1205
|
+
uint32_t slot;
|
|
1206
|
+
napi_get_value_uint32(env, args[0], &slot);
|
|
1207
|
+
if (slot >= g_fast_slot_count) {
|
|
1208
|
+
napi_value r;
|
|
1209
|
+
napi_get_null(env, &r);
|
|
1210
|
+
return r;
|
|
1211
|
+
}
|
|
1212
|
+
|
|
1213
|
+
uint32_t arr_len;
|
|
1214
|
+
napi_get_array_length(env, args[1], &arr_len);
|
|
1215
|
+
|
|
1216
|
+
napi_value result_arr;
|
|
1217
|
+
napi_create_array_with_length(env, arr_len, &result_arr);
|
|
1218
|
+
|
|
1219
|
+
for (uint32_t i = 0; i < arr_len; i++) {
|
|
1220
|
+
napi_value item;
|
|
1221
|
+
napi_get_element(env, args[1], i, &item);
|
|
1222
|
+
|
|
1223
|
+
bool valid = false;
|
|
1224
|
+
bool is_buffer = false;
|
|
1225
|
+
napi_is_buffer(env, item, &is_buffer);
|
|
1226
|
+
|
|
1227
|
+
if (is_buffer) {
|
|
1228
|
+
void* data; size_t length;
|
|
1229
|
+
napi_get_buffer_info(env, item, &data, &length);
|
|
1230
|
+
if (data && length > 0)
|
|
1231
|
+
valid = ata::validate(g_fast_schemas[slot],
|
|
1232
|
+
std::string_view(static_cast<const char*>(data), length)).valid;
|
|
1233
|
+
} else {
|
|
1234
|
+
bool is_ta = false;
|
|
1235
|
+
napi_is_typedarray(env, item, &is_ta);
|
|
1236
|
+
if (is_ta) {
|
|
1237
|
+
napi_typedarray_type type; size_t length; void* data;
|
|
1238
|
+
napi_get_typedarray_info(env, item, &type, &length, &data, nullptr, nullptr);
|
|
1239
|
+
if (data && length > 0)
|
|
1240
|
+
valid = ata::validate(g_fast_schemas[slot],
|
|
1241
|
+
std::string_view(static_cast<const char*>(data), length)).valid;
|
|
1242
|
+
} else {
|
|
1243
|
+
size_t len;
|
|
1244
|
+
napi_get_value_string_utf8(env, item, nullptr, 0, &len);
|
|
1245
|
+
std::string buf(len, '\0');
|
|
1246
|
+
napi_get_value_string_utf8(env, item, buf.data(), len + 1, &len);
|
|
1247
|
+
valid = ata::validate(g_fast_schemas[slot], buf).valid;
|
|
1248
|
+
}
|
|
1249
|
+
}
|
|
1250
|
+
|
|
1251
|
+
napi_value bval;
|
|
1252
|
+
napi_get_boolean(env, valid, &bval);
|
|
1253
|
+
napi_set_element(env, result_arr, i, bval);
|
|
1254
|
+
}
|
|
1255
|
+
return result_arr;
|
|
1256
|
+
}
|
|
1257
|
+
|
|
1258
|
+
// --- Parallel NDJSON: multi-core validation, ajv can't do this ---
|
|
1259
|
+
static napi_value RawParallelValidate(napi_env env, napi_callback_info info) {
|
|
1260
|
+
size_t argc = 2;
|
|
1261
|
+
napi_value args[2];
|
|
1262
|
+
napi_get_cb_info(env, info, &argc, args, nullptr, nullptr);
|
|
1263
|
+
|
|
1264
|
+
uint32_t slot;
|
|
1265
|
+
napi_get_value_uint32(env, args[0], &slot);
|
|
1266
|
+
if (slot >= g_fast_slot_count) {
|
|
1267
|
+
napi_value r; napi_get_null(env, &r); return r;
|
|
1268
|
+
}
|
|
1269
|
+
|
|
1270
|
+
const char* data = nullptr;
|
|
1271
|
+
size_t total_len = 0;
|
|
1272
|
+
bool is_buffer = false;
|
|
1273
|
+
napi_is_buffer(env, args[1], &is_buffer);
|
|
1274
|
+
if (is_buffer) {
|
|
1275
|
+
void* d; napi_get_buffer_info(env, args[1], &d, &total_len);
|
|
1276
|
+
data = static_cast<const char*>(d);
|
|
1277
|
+
} else {
|
|
1278
|
+
bool is_ta = false;
|
|
1279
|
+
napi_is_typedarray(env, args[1], &is_ta);
|
|
1280
|
+
if (is_ta) {
|
|
1281
|
+
napi_typedarray_type type; void* d;
|
|
1282
|
+
napi_get_typedarray_info(env, args[1], &type, &total_len, &d, nullptr, nullptr);
|
|
1283
|
+
data = static_cast<const char*>(d);
|
|
1284
|
+
}
|
|
1285
|
+
}
|
|
1286
|
+
if (!data || total_len == 0) {
|
|
1287
|
+
napi_value r; napi_create_array_with_length(env, 0, &r); return r;
|
|
1288
|
+
}
|
|
1289
|
+
|
|
1290
|
+
// Split lines
|
|
1291
|
+
struct line { const char* ptr; size_t len; };
|
|
1292
|
+
std::vector<line> lines;
|
|
1293
|
+
const char* start = data;
|
|
1294
|
+
const char* end = data + total_len;
|
|
1295
|
+
while (start < end) {
|
|
1296
|
+
const char* nl = static_cast<const char*>(memchr(start, '\n', end - start));
|
|
1297
|
+
size_t line_len = nl ? (size_t)(nl - start) : (size_t)(end - start);
|
|
1298
|
+
if (line_len > 0) lines.push_back({start, line_len});
|
|
1299
|
+
start += line_len + 1;
|
|
1300
|
+
}
|
|
1301
|
+
|
|
1302
|
+
size_t n = lines.size();
|
|
1303
|
+
std::vector<bool> results(n, false);
|
|
1304
|
+
|
|
1305
|
+
// Parallel validation across CPU cores
|
|
1306
|
+
unsigned num_threads = std::thread::hardware_concurrency();
|
|
1307
|
+
if (num_threads == 0) num_threads = 4;
|
|
1308
|
+
if (num_threads > n) num_threads = (unsigned)n;
|
|
1309
|
+
|
|
1310
|
+
// Each thread gets its own schema_ref (thread-safe: compile is one-time, validate is read-only)
|
|
1311
|
+
// But ata::validate uses internal parser that's NOT thread-safe
|
|
1312
|
+
// So each thread needs its own compiled schema
|
|
1313
|
+
const auto& schema_json = g_fast_schema_jsons[slot];
|
|
1314
|
+
|
|
1315
|
+
if (schema_json.empty() || n < num_threads * 2) {
|
|
1316
|
+
// Fallback: single-threaded for small batches
|
|
1317
|
+
for (size_t i = 0; i < n; i++) {
|
|
1318
|
+
auto r = ata::validate(g_fast_schemas[slot], std::string_view(lines[i].ptr, lines[i].len));
|
|
1319
|
+
results[i] = r.valid;
|
|
1320
|
+
}
|
|
1321
|
+
} else {
|
|
1322
|
+
auto& tp = pool();
|
|
1323
|
+
unsigned nworkers = tp.size();
|
|
1324
|
+
size_t chunk = (n + nworkers - 1) / nworkers;
|
|
1325
|
+
|
|
1326
|
+
for (unsigned t = 0; t < nworkers; t++) {
|
|
1327
|
+
size_t from = t * chunk;
|
|
1328
|
+
size_t to = std::min(from + chunk, n);
|
|
1329
|
+
if (from >= n) break;
|
|
1330
|
+
|
|
1331
|
+
tp.submit([&results, &lines, from, to, slot](
|
|
1332
|
+
std::unordered_map<uint32_t, ata::schema_ref>& cache) {
|
|
1333
|
+
auto it = cache.find(slot);
|
|
1334
|
+
if (it == cache.end()) {
|
|
1335
|
+
it = cache.emplace(slot, ata::compile(g_fast_schema_jsons[slot])).first;
|
|
1336
|
+
}
|
|
1337
|
+
auto& s = it->second;
|
|
1338
|
+
// Free padding: lines in NDJSON buffer almost always have free padding
|
|
1339
|
+
// (next line's data serves as padding). Only last line might need copy.
|
|
1340
|
+
for (size_t i = from; i < to; i++) {
|
|
1341
|
+
results[i] = ata::is_valid_prepadded(s, lines[i].ptr, lines[i].len);
|
|
1342
|
+
}
|
|
1343
|
+
});
|
|
1344
|
+
}
|
|
1345
|
+
tp.wait();
|
|
1346
|
+
}
|
|
1347
|
+
|
|
1348
|
+
napi_value result_arr;
|
|
1349
|
+
napi_create_array_with_length(env, n, &result_arr);
|
|
1350
|
+
for (size_t i = 0; i < n; i++) {
|
|
1351
|
+
napi_value bval;
|
|
1352
|
+
napi_get_boolean(env, results[i], &bval);
|
|
1353
|
+
napi_set_element(env, result_arr, (uint32_t)i, bval);
|
|
1354
|
+
}
|
|
1355
|
+
return result_arr;
|
|
1356
|
+
}
|
|
1357
|
+
|
|
1358
|
+
// --- Parallel count: returns just the number of valid items (no array overhead) ---
|
|
1359
|
+
static napi_value RawParallelCount(napi_env env, napi_callback_info info) {
|
|
1360
|
+
size_t argc = 2;
|
|
1361
|
+
napi_value args[2];
|
|
1362
|
+
napi_get_cb_info(env, info, &argc, args, nullptr, nullptr);
|
|
1363
|
+
|
|
1364
|
+
uint32_t slot;
|
|
1365
|
+
napi_get_value_uint32(env, args[0], &slot);
|
|
1366
|
+
if (slot >= g_fast_slot_count) {
|
|
1367
|
+
napi_value r; napi_create_uint32(env, 0, &r); return r;
|
|
1368
|
+
}
|
|
1369
|
+
|
|
1370
|
+
const char* data = nullptr;
|
|
1371
|
+
size_t total_len = 0;
|
|
1372
|
+
bool is_buffer = false;
|
|
1373
|
+
napi_is_buffer(env, args[1], &is_buffer);
|
|
1374
|
+
if (is_buffer) {
|
|
1375
|
+
void* d; napi_get_buffer_info(env, args[1], &d, &total_len);
|
|
1376
|
+
data = static_cast<const char*>(d);
|
|
1377
|
+
} else {
|
|
1378
|
+
bool is_ta = false;
|
|
1379
|
+
napi_is_typedarray(env, args[1], &is_ta);
|
|
1380
|
+
if (is_ta) {
|
|
1381
|
+
napi_typedarray_type type; void* d;
|
|
1382
|
+
napi_get_typedarray_info(env, args[1], &type, &total_len, &d, nullptr, nullptr);
|
|
1383
|
+
data = static_cast<const char*>(d);
|
|
1384
|
+
}
|
|
1385
|
+
}
|
|
1386
|
+
if (!data || total_len == 0) {
|
|
1387
|
+
napi_value r; napi_create_uint32(env, 0, &r); return r;
|
|
1388
|
+
}
|
|
1389
|
+
|
|
1390
|
+
struct line { const char* ptr; size_t len; };
|
|
1391
|
+
std::vector<line> lines;
|
|
1392
|
+
const char* start = data;
|
|
1393
|
+
const char* end = data + total_len;
|
|
1394
|
+
while (start < end) {
|
|
1395
|
+
const char* nl = static_cast<const char*>(memchr(start, '\n', end - start));
|
|
1396
|
+
size_t line_len = nl ? (size_t)(nl - start) : (size_t)(end - start);
|
|
1397
|
+
if (line_len > 0) lines.push_back({start, line_len});
|
|
1398
|
+
start += line_len + 1;
|
|
1399
|
+
}
|
|
1400
|
+
|
|
1401
|
+
size_t n = lines.size();
|
|
1402
|
+
std::atomic<uint32_t> valid_count{0};
|
|
1403
|
+
|
|
1404
|
+
auto& tp = pool();
|
|
1405
|
+
unsigned nworkers = tp.size();
|
|
1406
|
+
size_t chunk = (n + nworkers - 1) / nworkers;
|
|
1407
|
+
|
|
1408
|
+
if (n < nworkers * 2) {
|
|
1409
|
+
// Small batch — single thread
|
|
1410
|
+
uint32_t cnt = 0;
|
|
1411
|
+
for (size_t i = 0; i < n; i++) {
|
|
1412
|
+
if (ata::validate(g_fast_schemas[slot], std::string_view(lines[i].ptr, lines[i].len)).valid)
|
|
1413
|
+
cnt++;
|
|
1414
|
+
}
|
|
1415
|
+
napi_value r; napi_create_uint32(env, cnt, &r); return r;
|
|
1416
|
+
}
|
|
1417
|
+
|
|
1418
|
+
for (unsigned t = 0; t < nworkers; t++) {
|
|
1419
|
+
size_t from = t * chunk;
|
|
1420
|
+
size_t to = std::min(from + chunk, n);
|
|
1421
|
+
if (from >= n) break;
|
|
1422
|
+
|
|
1423
|
+
tp.submit([&valid_count, &lines, from, to, slot](
|
|
1424
|
+
std::unordered_map<uint32_t, ata::schema_ref>& cache) {
|
|
1425
|
+
auto it = cache.find(slot);
|
|
1426
|
+
if (it == cache.end()) {
|
|
1427
|
+
it = cache.emplace(slot, ata::compile(g_fast_schema_jsons[slot])).first;
|
|
1428
|
+
}
|
|
1429
|
+
auto& s = it->second;
|
|
1430
|
+
uint32_t local_cnt = 0;
|
|
1431
|
+
for (size_t i = from; i < to; i++) {
|
|
1432
|
+
if (ata::is_valid_prepadded(s, lines[i].ptr, lines[i].len))
|
|
1433
|
+
local_cnt++;
|
|
1434
|
+
}
|
|
1435
|
+
valid_count.fetch_add(local_cnt, std::memory_order_relaxed);
|
|
1436
|
+
});
|
|
1437
|
+
}
|
|
1438
|
+
tp.wait();
|
|
1439
|
+
|
|
1440
|
+
napi_value r;
|
|
1441
|
+
napi_create_uint32(env, valid_count.load(), &r);
|
|
1442
|
+
return r;
|
|
1443
|
+
}
|
|
1444
|
+
|
|
1445
|
+
// --- NDJSON: single buffer, newline-delimited ---
|
|
1446
|
+
static napi_value RawNDJSONValidate(napi_env env, napi_callback_info info) {
|
|
1447
|
+
size_t argc = 2;
|
|
1448
|
+
napi_value args[2];
|
|
1449
|
+
napi_get_cb_info(env, info, &argc, args, nullptr, nullptr);
|
|
1450
|
+
|
|
1451
|
+
uint32_t slot;
|
|
1452
|
+
napi_get_value_uint32(env, args[0], &slot);
|
|
1453
|
+
if (slot >= g_fast_slot_count) {
|
|
1454
|
+
napi_value r; napi_get_null(env, &r); return r;
|
|
1455
|
+
}
|
|
1456
|
+
|
|
1457
|
+
const char* data = nullptr;
|
|
1458
|
+
size_t total_len = 0;
|
|
1459
|
+
bool is_buffer = false;
|
|
1460
|
+
napi_is_buffer(env, args[1], &is_buffer);
|
|
1461
|
+
if (is_buffer) {
|
|
1462
|
+
void* d; napi_get_buffer_info(env, args[1], &d, &total_len);
|
|
1463
|
+
data = static_cast<const char*>(d);
|
|
1464
|
+
} else {
|
|
1465
|
+
bool is_ta = false;
|
|
1466
|
+
napi_is_typedarray(env, args[1], &is_ta);
|
|
1467
|
+
if (is_ta) {
|
|
1468
|
+
napi_typedarray_type type; void* d;
|
|
1469
|
+
napi_get_typedarray_info(env, args[1], &type, &total_len, &d, nullptr, nullptr);
|
|
1470
|
+
data = static_cast<const char*>(d);
|
|
1471
|
+
}
|
|
1472
|
+
}
|
|
1473
|
+
if (!data || total_len == 0) {
|
|
1474
|
+
napi_value r; napi_create_array_with_length(env, 0, &r); return r;
|
|
1475
|
+
}
|
|
1476
|
+
|
|
1477
|
+
// Count lines first for array allocation
|
|
1478
|
+
uint32_t count = 0;
|
|
1479
|
+
for (size_t i = 0; i < total_len; i++) if (data[i] == '\n') count++;
|
|
1480
|
+
if (total_len > 0 && data[total_len-1] != '\n') count++;
|
|
1481
|
+
|
|
1482
|
+
napi_value result_arr;
|
|
1483
|
+
napi_create_array_with_length(env, count, &result_arr);
|
|
1484
|
+
|
|
1485
|
+
const char* start = data;
|
|
1486
|
+
const char* end = data + total_len;
|
|
1487
|
+
uint32_t idx = 0;
|
|
1488
|
+
|
|
1489
|
+
while (start < end) {
|
|
1490
|
+
const char* nl = static_cast<const char*>(memchr(start, '\n', end - start));
|
|
1491
|
+
size_t line_len = nl ? (size_t)(nl - start) : (size_t)(end - start);
|
|
1492
|
+
if (line_len > 0) {
|
|
1493
|
+
auto r = ata::validate(g_fast_schemas[slot], std::string_view(start, line_len));
|
|
1494
|
+
napi_value bval;
|
|
1495
|
+
napi_get_boolean(env, r.valid, &bval);
|
|
1496
|
+
napi_set_element(env, result_arr, idx++, bval);
|
|
1497
|
+
}
|
|
1498
|
+
start += line_len + 1;
|
|
1499
|
+
}
|
|
1500
|
+
return result_arr;
|
|
1501
|
+
}
|
|
1502
|
+
|
|
923
1503
|
Napi::Object Init(Napi::Env env, Napi::Object exports) {
|
|
924
1504
|
CompiledSchema::Init(env, exports);
|
|
925
1505
|
exports.Set("validate", Napi::Function::New(env, ValidateOneShot));
|
|
926
1506
|
exports.Set("version", Napi::Function::New(env, GetVersion));
|
|
1507
|
+
exports.Set("fastRegister", Napi::Function::New(env, FastRegister));
|
|
1508
|
+
exports.Set("fastValidate", Napi::Function::New(env, FastValidateSlow));
|
|
1509
|
+
|
|
1510
|
+
napi_value raw_fn;
|
|
1511
|
+
napi_create_function(env, "rawFastValidate", NAPI_AUTO_LENGTH, RawFastValidate, nullptr, &raw_fn);
|
|
1512
|
+
exports.Set("rawFastValidate", Napi::Value(env, raw_fn));
|
|
1513
|
+
|
|
1514
|
+
napi_value batch_fn;
|
|
1515
|
+
napi_create_function(env, "rawBatchValidate", NAPI_AUTO_LENGTH, RawBatchValidate, nullptr, &batch_fn);
|
|
1516
|
+
exports.Set("rawBatchValidate", Napi::Value(env, batch_fn));
|
|
1517
|
+
|
|
1518
|
+
napi_value ndjson_fn;
|
|
1519
|
+
napi_create_function(env, "rawNDJSONValidate", NAPI_AUTO_LENGTH, RawNDJSONValidate, nullptr, &ndjson_fn);
|
|
1520
|
+
exports.Set("rawNDJSONValidate", Napi::Value(env, ndjson_fn));
|
|
1521
|
+
|
|
1522
|
+
napi_value parallel_fn;
|
|
1523
|
+
napi_create_function(env, "rawParallelValidate", NAPI_AUTO_LENGTH, RawParallelValidate, nullptr, ¶llel_fn);
|
|
1524
|
+
exports.Set("rawParallelValidate", Napi::Value(env, parallel_fn));
|
|
1525
|
+
|
|
1526
|
+
napi_value pcount_fn;
|
|
1527
|
+
napi_create_function(env, "rawParallelCount", NAPI_AUTO_LENGTH, RawParallelCount, nullptr, &pcount_fn);
|
|
1528
|
+
exports.Set("rawParallelCount", Napi::Value(env, pcount_fn));
|
|
1529
|
+
|
|
927
1530
|
return exports;
|
|
928
1531
|
}
|
|
929
1532
|
|
package/binding.gyp
CHANGED
|
@@ -22,7 +22,7 @@
|
|
|
22
22
|
"cflags!": ["-fno-exceptions"],
|
|
23
23
|
"cflags_cc!": ["-fno-exceptions"],
|
|
24
24
|
"cflags_cc": ["-std=c++20"],
|
|
25
|
-
"defines": ["NAPI_DISABLE_CPP_EXCEPTIONS"],
|
|
25
|
+
"defines": ["NAPI_DISABLE_CPP_EXCEPTIONS", "NDEBUG"],
|
|
26
26
|
"conditions": [
|
|
27
27
|
["OS=='mac'", {
|
|
28
28
|
"xcode_settings": {
|
package/include/ata.h
CHANGED
|
@@ -11,11 +11,11 @@
|
|
|
11
11
|
namespace ata {
|
|
12
12
|
|
|
13
13
|
inline constexpr uint32_t VERSION_MAJOR = 0;
|
|
14
|
-
inline constexpr uint32_t VERSION_MINOR =
|
|
15
|
-
inline constexpr uint32_t VERSION_REVISION =
|
|
14
|
+
inline constexpr uint32_t VERSION_MINOR = 4;
|
|
15
|
+
inline constexpr uint32_t VERSION_REVISION = 1;
|
|
16
16
|
|
|
17
17
|
inline constexpr std::string_view version() noexcept {
|
|
18
|
-
return "0.1
|
|
18
|
+
return "0.4.1";
|
|
19
19
|
}
|
|
20
20
|
|
|
21
21
|
enum class error_code : uint8_t {
|
|
@@ -86,4 +86,12 @@ validation_result validate(std::string_view schema_json,
|
|
|
86
86
|
std::string_view json,
|
|
87
87
|
const validate_options& opts = {});
|
|
88
88
|
|
|
89
|
+
// Ultra-fast boolean validation — no error collection, no allocation.
|
|
90
|
+
// Input MUST have at least 64 bytes of padding after data (simdjson requirement).
|
|
91
|
+
// Use this when you only need true/false and can provide pre-padded input.
|
|
92
|
+
bool is_valid_prepadded(const schema_ref& schema, const char* data, size_t length);
|
|
93
|
+
|
|
94
|
+
// Required padding size for is_valid_prepadded
|
|
95
|
+
inline constexpr size_t REQUIRED_PADDING = 64;
|
|
96
|
+
|
|
89
97
|
} // namespace ata
|