ata-validator 0.1.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +120 -187
- package/binding/ata_napi.cpp +903 -114
- package/binding.gyp +13 -2
- package/compat.d.ts +23 -0
- package/include/ata.h +10 -2
- package/index.d.ts +37 -0
- package/index.js +150 -5
- package/lib/js-compiler.js +845 -0
- package/package.json +15 -8
- package/prebuilds/darwin-arm64/ata-validator.node +0 -0
- package/src/ata.cpp +776 -125
package/src/ata.cpp
CHANGED
|
@@ -2,10 +2,17 @@
|
|
|
2
2
|
|
|
3
3
|
#include <algorithm>
|
|
4
4
|
#include <cmath>
|
|
5
|
-
#include <
|
|
5
|
+
#include <re2/re2.h>
|
|
6
6
|
#include <set>
|
|
7
7
|
#include <unordered_map>
|
|
8
8
|
|
|
9
|
+
#ifdef _WIN32
|
|
10
|
+
#include <windows.h>
|
|
11
|
+
#include <sysinfoapi.h>
|
|
12
|
+
#else
|
|
13
|
+
#include <unistd.h>
|
|
14
|
+
#endif
|
|
15
|
+
|
|
9
16
|
#include "simdjson.h"
|
|
10
17
|
|
|
11
18
|
// --- Fast format validators (no std::regex) ---
|
|
@@ -133,6 +140,43 @@ namespace ata {
|
|
|
133
140
|
|
|
134
141
|
using namespace simdjson;
|
|
135
142
|
|
|
143
|
+
// Canonical JSON: sort object keys for semantic equality comparison
|
|
144
|
+
static std::string canonical_json(dom::element el) {
|
|
145
|
+
switch (el.type()) {
|
|
146
|
+
case dom::element_type::OBJECT: {
|
|
147
|
+
dom::object obj; el.get(obj);
|
|
148
|
+
std::vector<std::pair<std::string_view, dom::element>> entries;
|
|
149
|
+
for (auto [k, v] : obj) entries.push_back({k, v});
|
|
150
|
+
std::sort(entries.begin(), entries.end(),
|
|
151
|
+
[](const auto& a, const auto& b) { return a.first < b.first; });
|
|
152
|
+
std::string r = "{";
|
|
153
|
+
for (size_t i = 0; i < entries.size(); ++i) {
|
|
154
|
+
if (i) r += ',';
|
|
155
|
+
r += '"';
|
|
156
|
+
r += entries[i].first;
|
|
157
|
+
r += "\":";
|
|
158
|
+
r += canonical_json(entries[i].second);
|
|
159
|
+
}
|
|
160
|
+
r += '}';
|
|
161
|
+
return r;
|
|
162
|
+
}
|
|
163
|
+
case dom::element_type::ARRAY: {
|
|
164
|
+
dom::array arr; el.get(arr);
|
|
165
|
+
std::string r = "[";
|
|
166
|
+
bool first = true;
|
|
167
|
+
for (auto v : arr) {
|
|
168
|
+
if (!first) r += ',';
|
|
169
|
+
first = false;
|
|
170
|
+
r += canonical_json(v);
|
|
171
|
+
}
|
|
172
|
+
r += ']';
|
|
173
|
+
return r;
|
|
174
|
+
}
|
|
175
|
+
default:
|
|
176
|
+
return std::string(minify(el));
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
|
|
136
180
|
// Forward declarations
|
|
137
181
|
struct schema_node;
|
|
138
182
|
using schema_node_ptr = std::shared_ptr<schema_node>;
|
|
@@ -153,7 +197,7 @@ struct schema_node {
|
|
|
153
197
|
std::optional<uint64_t> min_length;
|
|
154
198
|
std::optional<uint64_t> max_length;
|
|
155
199
|
std::optional<std::string> pattern;
|
|
156
|
-
std::shared_ptr<
|
|
200
|
+
std::shared_ptr<re2::RE2> compiled_pattern; // cached compiled regex (RE2)
|
|
157
201
|
|
|
158
202
|
// array
|
|
159
203
|
std::optional<uint64_t> min_items;
|
|
@@ -161,6 +205,9 @@ struct schema_node {
|
|
|
161
205
|
bool unique_items = false;
|
|
162
206
|
schema_node_ptr items_schema;
|
|
163
207
|
std::vector<schema_node_ptr> prefix_items;
|
|
208
|
+
schema_node_ptr contains_schema;
|
|
209
|
+
std::optional<uint64_t> min_contains;
|
|
210
|
+
std::optional<uint64_t> max_contains;
|
|
164
211
|
|
|
165
212
|
// object
|
|
166
213
|
std::unordered_map<std::string, schema_node_ptr> properties;
|
|
@@ -169,9 +216,17 @@ struct schema_node {
|
|
|
169
216
|
schema_node_ptr additional_properties_schema;
|
|
170
217
|
std::optional<uint64_t> min_properties;
|
|
171
218
|
std::optional<uint64_t> max_properties;
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
std::
|
|
219
|
+
schema_node_ptr property_names_schema;
|
|
220
|
+
std::unordered_map<std::string, std::vector<std::string>> dependent_required;
|
|
221
|
+
std::unordered_map<std::string, schema_node_ptr> dependent_schemas;
|
|
222
|
+
|
|
223
|
+
// patternProperties — each entry: (pattern_string, schema, compiled_regex)
|
|
224
|
+
struct pattern_prop {
|
|
225
|
+
std::string pattern;
|
|
226
|
+
schema_node_ptr schema;
|
|
227
|
+
std::shared_ptr<re2::RE2> compiled;
|
|
228
|
+
};
|
|
229
|
+
std::vector<pattern_prop> pattern_properties;
|
|
175
230
|
|
|
176
231
|
// enum / const
|
|
177
232
|
std::optional<std::string> enum_values_raw; // raw JSON array string
|
|
@@ -195,16 +250,56 @@ struct schema_node {
|
|
|
195
250
|
// $ref
|
|
196
251
|
std::string ref;
|
|
197
252
|
|
|
253
|
+
// $defs — stored on node for pointer navigation
|
|
254
|
+
std::unordered_map<std::string, schema_node_ptr> defs;
|
|
255
|
+
|
|
198
256
|
// boolean schema
|
|
199
257
|
std::optional<bool> boolean_schema;
|
|
200
258
|
};
|
|
201
259
|
|
|
260
|
+
// --- Codegen: flat bytecode plan ---
|
|
261
|
+
namespace cg {
|
|
262
|
+
enum class op : uint8_t {
|
|
263
|
+
END=0, EXPECT_OBJECT, EXPECT_ARRAY, EXPECT_STRING, EXPECT_NUMBER,
|
|
264
|
+
EXPECT_INTEGER, EXPECT_BOOLEAN, EXPECT_NULL, EXPECT_TYPE_MULTI,
|
|
265
|
+
CHECK_MINIMUM, CHECK_MAXIMUM, CHECK_EX_MINIMUM, CHECK_EX_MAXIMUM,
|
|
266
|
+
CHECK_MULTIPLE_OF, CHECK_MIN_LENGTH, CHECK_MAX_LENGTH, CHECK_PATTERN,
|
|
267
|
+
CHECK_FORMAT, CHECK_MIN_ITEMS, CHECK_MAX_ITEMS, CHECK_UNIQUE_ITEMS,
|
|
268
|
+
ARRAY_ITEMS, CHECK_REQUIRED, CHECK_MIN_PROPS, CHECK_MAX_PROPS,
|
|
269
|
+
OBJ_PROPS_START, OBJ_PROP, OBJ_PROPS_END, CHECK_NO_ADDITIONAL,
|
|
270
|
+
CHECK_ENUM_STR, CHECK_ENUM, CHECK_CONST, COMPOSITION,
|
|
271
|
+
};
|
|
272
|
+
struct ins { op o; uint32_t a=0, b=0; };
|
|
273
|
+
struct plan {
|
|
274
|
+
std::vector<ins> code;
|
|
275
|
+
std::vector<double> doubles;
|
|
276
|
+
std::vector<std::string> strings;
|
|
277
|
+
std::vector<std::shared_ptr<re2::RE2>> regexes;
|
|
278
|
+
std::vector<std::vector<std::string>> enum_sets;
|
|
279
|
+
std::vector<std::vector<std::string>> type_sets;
|
|
280
|
+
std::vector<uint8_t> format_ids;
|
|
281
|
+
std::vector<std::vector<ins>> subs;
|
|
282
|
+
};
|
|
283
|
+
} // namespace cg
|
|
284
|
+
|
|
202
285
|
struct compiled_schema {
|
|
203
286
|
schema_node_ptr root;
|
|
204
287
|
std::unordered_map<std::string, schema_node_ptr> defs;
|
|
205
288
|
std::string raw_schema;
|
|
206
|
-
dom::parser parser;
|
|
207
|
-
|
|
289
|
+
dom::parser parser; // used only at compile time
|
|
290
|
+
cg::plan gen_plan; // codegen validation plan
|
|
291
|
+
bool use_ondemand = false; // true if codegen plan supports On Demand
|
|
292
|
+
};
|
|
293
|
+
|
|
294
|
+
// Thread-local persistent parsers — reused across all validate calls on the
|
|
295
|
+
// same thread. Keeps internal buffers hot in cache and avoids re-allocation.
|
|
296
|
+
static dom::parser& tl_dom_parser() {
|
|
297
|
+
thread_local dom::parser p;
|
|
298
|
+
return p;
|
|
299
|
+
}
|
|
300
|
+
static simdjson::ondemand::parser& tl_od_parser() {
|
|
301
|
+
thread_local simdjson::ondemand::parser p;
|
|
302
|
+
return p;
|
|
208
303
|
};
|
|
209
304
|
|
|
210
305
|
// --- Schema compilation ---
|
|
@@ -218,7 +313,9 @@ static schema_node_ptr compile_node(dom::element el,
|
|
|
218
313
|
|
|
219
314
|
// Boolean schema
|
|
220
315
|
if (el.is<bool>()) {
|
|
221
|
-
|
|
316
|
+
bool bval;
|
|
317
|
+
el.get(bval);
|
|
318
|
+
node->boolean_schema = bval;
|
|
222
319
|
return node;
|
|
223
320
|
}
|
|
224
321
|
|
|
@@ -226,7 +323,8 @@ static schema_node_ptr compile_node(dom::element el,
|
|
|
226
323
|
return node;
|
|
227
324
|
}
|
|
228
325
|
|
|
229
|
-
|
|
326
|
+
dom::object obj;
|
|
327
|
+
el.get(obj);
|
|
230
328
|
|
|
231
329
|
// $ref
|
|
232
330
|
dom::element ref_el;
|
|
@@ -245,7 +343,7 @@ static schema_node_ptr compile_node(dom::element el,
|
|
|
245
343
|
type_el.get(sv);
|
|
246
344
|
node->types.emplace_back(sv);
|
|
247
345
|
} else if (type_el.is<dom::array>()) {
|
|
248
|
-
for (auto t :
|
|
346
|
+
dom::array type_arr; type_el.get(type_arr); for (auto t : type_arr) {
|
|
249
347
|
std::string_view sv;
|
|
250
348
|
if (t.get(sv) == SUCCESS) {
|
|
251
349
|
node->types.emplace_back(sv);
|
|
@@ -291,11 +389,9 @@ static schema_node_ptr compile_node(dom::element el,
|
|
|
291
389
|
std::string_view sv;
|
|
292
390
|
if (str_el.get(sv) == SUCCESS) {
|
|
293
391
|
node->pattern = std::string(sv);
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
} catch (...) {
|
|
298
|
-
// Invalid regex — leave compiled_pattern null
|
|
392
|
+
auto re = std::make_shared<re2::RE2>(node->pattern.value());
|
|
393
|
+
if (re->ok()) {
|
|
394
|
+
node->compiled_pattern = std::move(re);
|
|
299
395
|
}
|
|
300
396
|
}
|
|
301
397
|
}
|
|
@@ -317,7 +413,7 @@ static schema_node_ptr compile_node(dom::element el,
|
|
|
317
413
|
// prefixItems (Draft 2020-12)
|
|
318
414
|
dom::element pi_el;
|
|
319
415
|
if (obj["prefixItems"].get(pi_el) == SUCCESS && pi_el.is<dom::array>()) {
|
|
320
|
-
for (auto item :
|
|
416
|
+
dom::array pi_arr; pi_el.get(pi_arr); for (auto item : pi_arr) {
|
|
321
417
|
node->prefix_items.push_back(compile_node(item, ctx));
|
|
322
418
|
}
|
|
323
419
|
}
|
|
@@ -327,17 +423,32 @@ static schema_node_ptr compile_node(dom::element el,
|
|
|
327
423
|
node->items_schema = compile_node(items_el, ctx);
|
|
328
424
|
}
|
|
329
425
|
|
|
426
|
+
// contains
|
|
427
|
+
dom::element contains_el;
|
|
428
|
+
if (obj["contains"].get(contains_el) == SUCCESS) {
|
|
429
|
+
node->contains_schema = compile_node(contains_el, ctx);
|
|
430
|
+
}
|
|
431
|
+
dom::element mc_el;
|
|
432
|
+
if (obj["minContains"].get(mc_el) == SUCCESS) {
|
|
433
|
+
uint64_t v;
|
|
434
|
+
if (mc_el.get(v) == SUCCESS) node->min_contains = v;
|
|
435
|
+
}
|
|
436
|
+
if (obj["maxContains"].get(mc_el) == SUCCESS) {
|
|
437
|
+
uint64_t v;
|
|
438
|
+
if (mc_el.get(v) == SUCCESS) node->max_contains = v;
|
|
439
|
+
}
|
|
440
|
+
|
|
330
441
|
// object constraints
|
|
331
442
|
dom::element props_el;
|
|
332
443
|
if (obj["properties"].get(props_el) == SUCCESS && props_el.is<dom::object>()) {
|
|
333
|
-
for (auto [key, val] :
|
|
444
|
+
dom::object props_obj; props_el.get(props_obj); for (auto [key, val] : props_obj) {
|
|
334
445
|
node->properties[std::string(key)] = compile_node(val, ctx);
|
|
335
446
|
}
|
|
336
447
|
}
|
|
337
448
|
|
|
338
449
|
dom::element req_el;
|
|
339
450
|
if (obj["required"].get(req_el) == SUCCESS && req_el.is<dom::array>()) {
|
|
340
|
-
for (auto r :
|
|
451
|
+
dom::array req_arr; req_el.get(req_arr); for (auto r : req_arr) {
|
|
341
452
|
std::string_view sv;
|
|
342
453
|
if (r.get(sv) == SUCCESS) {
|
|
343
454
|
node->required.emplace_back(sv);
|
|
@@ -348,7 +459,7 @@ static schema_node_ptr compile_node(dom::element el,
|
|
|
348
459
|
dom::element ap_el;
|
|
349
460
|
if (obj["additionalProperties"].get(ap_el) == SUCCESS) {
|
|
350
461
|
if (ap_el.is<bool>()) {
|
|
351
|
-
node->additional_properties_bool =
|
|
462
|
+
bool ap_bool; ap_el.get(ap_bool); node->additional_properties_bool = ap_bool;
|
|
352
463
|
} else {
|
|
353
464
|
node->additional_properties_schema = compile_node(ap_el, ctx);
|
|
354
465
|
}
|
|
@@ -363,13 +474,51 @@ static schema_node_ptr compile_node(dom::element el,
|
|
|
363
474
|
if (str_el.get(v) == SUCCESS) node->max_properties = v;
|
|
364
475
|
}
|
|
365
476
|
|
|
366
|
-
//
|
|
477
|
+
// propertyNames
|
|
478
|
+
dom::element pn_el;
|
|
479
|
+
if (obj["propertyNames"].get(pn_el) == SUCCESS) {
|
|
480
|
+
node->property_names_schema = compile_node(pn_el, ctx);
|
|
481
|
+
}
|
|
482
|
+
|
|
483
|
+
// dependentRequired
|
|
484
|
+
dom::element dr_el;
|
|
485
|
+
if (obj["dependentRequired"].get(dr_el) == SUCCESS &&
|
|
486
|
+
dr_el.is<dom::object>()) {
|
|
487
|
+
dom::object dr_obj; dr_el.get(dr_obj); for (auto [key, val] : dr_obj) {
|
|
488
|
+
std::vector<std::string> deps;
|
|
489
|
+
if (val.is<dom::array>()) {
|
|
490
|
+
dom::array val_arr; val.get(val_arr); for (auto d : val_arr) {
|
|
491
|
+
std::string_view sv;
|
|
492
|
+
if (d.get(sv) == SUCCESS) deps.emplace_back(sv);
|
|
493
|
+
}
|
|
494
|
+
}
|
|
495
|
+
node->dependent_required[std::string(key)] = std::move(deps);
|
|
496
|
+
}
|
|
497
|
+
}
|
|
498
|
+
|
|
499
|
+
// dependentSchemas
|
|
500
|
+
dom::element ds_el;
|
|
501
|
+
if (obj["dependentSchemas"].get(ds_el) == SUCCESS &&
|
|
502
|
+
ds_el.is<dom::object>()) {
|
|
503
|
+
dom::object ds_obj; ds_el.get(ds_obj); for (auto [key, val] : ds_obj) {
|
|
504
|
+
node->dependent_schemas[std::string(key)] = compile_node(val, ctx);
|
|
505
|
+
}
|
|
506
|
+
}
|
|
507
|
+
|
|
508
|
+
// patternProperties — compile regex at schema compile time
|
|
367
509
|
dom::element pp_el;
|
|
368
510
|
if (obj["patternProperties"].get(pp_el) == SUCCESS &&
|
|
369
511
|
pp_el.is<dom::object>()) {
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
512
|
+
dom::object pp_obj; pp_el.get(pp_obj);
|
|
513
|
+
for (auto [key, val] : pp_obj) {
|
|
514
|
+
schema_node::pattern_prop pp;
|
|
515
|
+
pp.pattern = std::string(key);
|
|
516
|
+
pp.schema = compile_node(val, ctx);
|
|
517
|
+
auto re = std::make_shared<re2::RE2>(pp.pattern);
|
|
518
|
+
if (re->ok()) {
|
|
519
|
+
pp.compiled = std::move(re);
|
|
520
|
+
}
|
|
521
|
+
node->pattern_properties.push_back(std::move(pp));
|
|
373
522
|
}
|
|
374
523
|
}
|
|
375
524
|
|
|
@@ -392,10 +541,10 @@ static schema_node_ptr compile_node(dom::element el,
|
|
|
392
541
|
// enum — pre-minify each value at compile time
|
|
393
542
|
dom::element enum_el;
|
|
394
543
|
if (obj["enum"].get(enum_el) == SUCCESS) {
|
|
395
|
-
node->enum_values_raw =
|
|
544
|
+
node->enum_values_raw = canonical_json(enum_el);
|
|
396
545
|
if (enum_el.is<dom::array>()) {
|
|
397
|
-
for (auto e :
|
|
398
|
-
node->enum_values_minified.push_back(
|
|
546
|
+
dom::array enum_arr; enum_el.get(enum_arr); for (auto e : enum_arr) {
|
|
547
|
+
node->enum_values_minified.push_back(canonical_json(e));
|
|
399
548
|
}
|
|
400
549
|
}
|
|
401
550
|
}
|
|
@@ -403,23 +552,26 @@ static schema_node_ptr compile_node(dom::element el,
|
|
|
403
552
|
// const
|
|
404
553
|
dom::element const_el;
|
|
405
554
|
if (obj["const"].get(const_el) == SUCCESS) {
|
|
406
|
-
node->const_value_raw =
|
|
555
|
+
node->const_value_raw = canonical_json(const_el);
|
|
407
556
|
}
|
|
408
557
|
|
|
409
558
|
// composition
|
|
410
559
|
dom::element comp_el;
|
|
411
560
|
if (obj["allOf"].get(comp_el) == SUCCESS && comp_el.is<dom::array>()) {
|
|
412
|
-
|
|
561
|
+
dom::array comp_arr; comp_el.get(comp_arr);
|
|
562
|
+
for (auto s : comp_arr) {
|
|
413
563
|
node->all_of.push_back(compile_node(s, ctx));
|
|
414
564
|
}
|
|
415
565
|
}
|
|
416
566
|
if (obj["anyOf"].get(comp_el) == SUCCESS && comp_el.is<dom::array>()) {
|
|
417
|
-
|
|
567
|
+
dom::array comp_arr2; comp_el.get(comp_arr2);
|
|
568
|
+
for (auto s : comp_arr2) {
|
|
418
569
|
node->any_of.push_back(compile_node(s, ctx));
|
|
419
570
|
}
|
|
420
571
|
}
|
|
421
572
|
if (obj["oneOf"].get(comp_el) == SUCCESS && comp_el.is<dom::array>()) {
|
|
422
|
-
|
|
573
|
+
dom::array comp_arr3; comp_el.get(comp_arr3);
|
|
574
|
+
for (auto s : comp_arr3) {
|
|
423
575
|
node->one_of.push_back(compile_node(s, ctx));
|
|
424
576
|
}
|
|
425
577
|
}
|
|
@@ -445,16 +597,20 @@ static schema_node_ptr compile_node(dom::element el,
|
|
|
445
597
|
// $defs / definitions
|
|
446
598
|
dom::element defs_el;
|
|
447
599
|
if (obj["$defs"].get(defs_el) == SUCCESS && defs_el.is<dom::object>()) {
|
|
448
|
-
for (auto [key, val] :
|
|
600
|
+
dom::object defs_obj; defs_el.get(defs_obj); for (auto [key, val] : defs_obj) {
|
|
449
601
|
std::string def_path = "#/$defs/" + std::string(key);
|
|
450
|
-
|
|
602
|
+
auto compiled = compile_node(val, ctx);
|
|
603
|
+
ctx.defs[def_path] = compiled;
|
|
604
|
+
node->defs[std::string(key)] = compiled;
|
|
451
605
|
}
|
|
452
606
|
}
|
|
453
607
|
if (obj["definitions"].get(defs_el) == SUCCESS &&
|
|
454
608
|
defs_el.is<dom::object>()) {
|
|
455
|
-
for (auto [key, val] :
|
|
609
|
+
dom::object defs_obj; defs_el.get(defs_obj); for (auto [key, val] : defs_obj) {
|
|
456
610
|
std::string def_path = "#/definitions/" + std::string(key);
|
|
457
|
-
|
|
611
|
+
auto compiled = compile_node(val, ctx);
|
|
612
|
+
ctx.defs[def_path] = compiled;
|
|
613
|
+
node->defs[std::string(key)] = compiled;
|
|
458
614
|
}
|
|
459
615
|
}
|
|
460
616
|
|
|
@@ -538,79 +694,106 @@ static void validate_node(const schema_node_ptr& node,
|
|
|
538
694
|
return;
|
|
539
695
|
}
|
|
540
696
|
|
|
541
|
-
// $ref
|
|
697
|
+
// $ref — Draft 2020-12: $ref is not a short-circuit, sibling keywords still apply
|
|
698
|
+
bool ref_resolved = false;
|
|
542
699
|
if (!node->ref.empty()) {
|
|
543
700
|
// First check defs map
|
|
544
701
|
auto it = ctx.defs.find(node->ref);
|
|
545
702
|
if (it != ctx.defs.end()) {
|
|
546
703
|
validate_node(it->second, value, path, ctx, errors, all_errors);
|
|
547
|
-
|
|
704
|
+
ref_resolved = true;
|
|
548
705
|
}
|
|
549
706
|
// Try JSON Pointer resolution from root (e.g., "#/properties/foo")
|
|
550
707
|
if (node->ref.size() > 1 && node->ref[0] == '#' &&
|
|
551
708
|
node->ref[1] == '/') {
|
|
552
|
-
//
|
|
553
|
-
std::string
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
709
|
+
// Decode JSON Pointer segments
|
|
710
|
+
auto decode_pointer_segment = [](const std::string& seg) -> std::string {
|
|
711
|
+
// Percent-decode first
|
|
712
|
+
std::string pct;
|
|
713
|
+
for (size_t i = 0; i < seg.size(); ++i) {
|
|
714
|
+
if (seg[i] == '%' && i + 2 < seg.size()) {
|
|
715
|
+
char h = seg[i+1], l = seg[i+2];
|
|
716
|
+
auto hex = [](char c) -> int {
|
|
717
|
+
if (c >= '0' && c <= '9') return c - '0';
|
|
718
|
+
if (c >= 'a' && c <= 'f') return 10 + c - 'a';
|
|
719
|
+
if (c >= 'A' && c <= 'F') return 10 + c - 'A';
|
|
720
|
+
return -1;
|
|
721
|
+
};
|
|
722
|
+
int hv = hex(h), lv = hex(l);
|
|
723
|
+
if (hv >= 0 && lv >= 0) {
|
|
724
|
+
pct += static_cast<char>(hv * 16 + lv);
|
|
725
|
+
i += 2;
|
|
726
|
+
} else {
|
|
727
|
+
pct += seg[i];
|
|
728
|
+
}
|
|
568
729
|
} else {
|
|
569
|
-
|
|
730
|
+
pct += seg[i];
|
|
570
731
|
}
|
|
571
732
|
}
|
|
572
|
-
//
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
733
|
+
// Then JSON Pointer unescape: ~1 -> /, ~0 -> ~
|
|
734
|
+
std::string out;
|
|
735
|
+
for (size_t i = 0; i < pct.size(); ++i) {
|
|
736
|
+
if (pct[i] == '~' && i + 1 < pct.size()) {
|
|
737
|
+
if (pct[i + 1] == '1') { out += '/'; ++i; }
|
|
738
|
+
else if (pct[i + 1] == '0') { out += '~'; ++i; }
|
|
739
|
+
else out += pct[i];
|
|
740
|
+
} else {
|
|
741
|
+
out += pct[i];
|
|
742
|
+
}
|
|
743
|
+
}
|
|
744
|
+
return out;
|
|
745
|
+
};
|
|
746
|
+
|
|
747
|
+
// Split pointer into segments
|
|
748
|
+
std::string pointer = node->ref.substr(2);
|
|
749
|
+
std::vector<std::string> segments;
|
|
750
|
+
size_t spos = 0;
|
|
751
|
+
while (spos < pointer.size()) {
|
|
752
|
+
size_t snext = pointer.find('/', spos);
|
|
753
|
+
segments.push_back(decode_pointer_segment(
|
|
754
|
+
pointer.substr(spos, snext == std::string::npos ? snext : snext - spos)));
|
|
755
|
+
spos = (snext == std::string::npos) ? pointer.size() : snext + 1;
|
|
756
|
+
}
|
|
757
|
+
|
|
758
|
+
// Walk the schema tree
|
|
759
|
+
schema_node_ptr current = ctx.root;
|
|
760
|
+
bool resolved = true;
|
|
761
|
+
for (size_t si = 0; si < segments.size() && current; ++si) {
|
|
762
|
+
const auto& key = segments[si];
|
|
763
|
+
|
|
764
|
+
if (key == "properties" && si + 1 < segments.size()) {
|
|
765
|
+
const auto& prop_name = segments[++si];
|
|
579
766
|
auto pit = current->properties.find(prop_name);
|
|
580
767
|
if (pit != current->properties.end()) {
|
|
581
768
|
current = pit->second;
|
|
582
|
-
} else {
|
|
583
|
-
resolved = false; break;
|
|
584
|
-
}
|
|
769
|
+
} else { resolved = false; break; }
|
|
585
770
|
} else if (key == "items" && current->items_schema) {
|
|
586
771
|
current = current->items_schema;
|
|
587
772
|
} else if (key == "$defs" || key == "definitions") {
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
773
|
+
if (si + 1 < segments.size()) {
|
|
774
|
+
const auto& def_name = segments[++si];
|
|
775
|
+
// Navigate into node's defs map
|
|
776
|
+
auto dit = current->defs.find(def_name);
|
|
777
|
+
if (dit != current->defs.end()) {
|
|
778
|
+
current = dit->second;
|
|
779
|
+
} else {
|
|
780
|
+
// Fallback: try ctx.defs with full path
|
|
781
|
+
std::string full_ref = "#/" + key + "/" + def_name;
|
|
782
|
+
auto cit = ctx.defs.find(full_ref);
|
|
783
|
+
if (cit != ctx.defs.end()) {
|
|
784
|
+
current = cit->second;
|
|
785
|
+
} else { resolved = false; break; }
|
|
786
|
+
}
|
|
787
|
+
} else { resolved = false; break; }
|
|
600
788
|
} else if (key == "allOf" || key == "anyOf" || key == "oneOf") {
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
if (idx < vec.size()) {
|
|
610
|
-
current = vec[idx];
|
|
611
|
-
} else {
|
|
612
|
-
resolved = false; break;
|
|
613
|
-
}
|
|
789
|
+
if (si + 1 < segments.size()) {
|
|
790
|
+
size_t idx = std::stoul(segments[++si]);
|
|
791
|
+
auto& vec = (key == "allOf") ? current->all_of
|
|
792
|
+
: (key == "anyOf") ? current->any_of
|
|
793
|
+
: current->one_of;
|
|
794
|
+
if (idx < vec.size()) { current = vec[idx]; }
|
|
795
|
+
else { resolved = false; break; }
|
|
796
|
+
} else { resolved = false; break; }
|
|
614
797
|
} else if (key == "not" && current->not_schema) {
|
|
615
798
|
current = current->not_schema;
|
|
616
799
|
} else if (key == "if" && current->if_schema) {
|
|
@@ -623,34 +806,29 @@ static void validate_node(const schema_node_ptr& node,
|
|
|
623
806
|
current->additional_properties_schema) {
|
|
624
807
|
current = current->additional_properties_schema;
|
|
625
808
|
} else if (key == "prefixItems") {
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
if (idx < current->prefix_items.size()) {
|
|
632
|
-
current = current->prefix_items[idx];
|
|
633
|
-
} else {
|
|
634
|
-
resolved = false; break;
|
|
635
|
-
}
|
|
809
|
+
if (si + 1 < segments.size()) {
|
|
810
|
+
size_t idx = std::stoul(segments[++si]);
|
|
811
|
+
if (idx < current->prefix_items.size()) { current = current->prefix_items[idx]; }
|
|
812
|
+
else { resolved = false; break; }
|
|
813
|
+
} else { resolved = false; break; }
|
|
636
814
|
} else {
|
|
637
815
|
resolved = false; break;
|
|
638
816
|
}
|
|
639
|
-
pos = (next == std::string::npos) ? pointer.size() : next + 1;
|
|
640
817
|
}
|
|
641
818
|
if (resolved && current) {
|
|
642
819
|
validate_node(current, value, path, ctx, errors, all_errors);
|
|
643
|
-
|
|
820
|
+
ref_resolved = true;
|
|
644
821
|
}
|
|
645
822
|
}
|
|
646
823
|
// Self-reference: "#"
|
|
647
|
-
if (node->ref == "#" && ctx.root) {
|
|
824
|
+
if (!ref_resolved && node->ref == "#" && ctx.root) {
|
|
648
825
|
validate_node(ctx.root, value, path, ctx, errors, all_errors);
|
|
649
|
-
|
|
826
|
+
ref_resolved = true;
|
|
827
|
+
}
|
|
828
|
+
if (!ref_resolved) {
|
|
829
|
+
errors.push_back({error_code::ref_not_found, path,
|
|
830
|
+
"cannot resolve $ref: " + node->ref});
|
|
650
831
|
}
|
|
651
|
-
errors.push_back({error_code::ref_not_found, path,
|
|
652
|
-
"cannot resolve $ref: " + node->ref});
|
|
653
|
-
return;
|
|
654
832
|
}
|
|
655
833
|
|
|
656
834
|
// type
|
|
@@ -676,7 +854,7 @@ static void validate_node(const schema_node_ptr& node,
|
|
|
676
854
|
|
|
677
855
|
// enum — use pre-minified values (no re-parsing)
|
|
678
856
|
if (!node->enum_values_minified.empty()) {
|
|
679
|
-
std::string val_str =
|
|
857
|
+
std::string val_str = canonical_json(value);
|
|
680
858
|
bool found = false;
|
|
681
859
|
for (const auto& ev : node->enum_values_minified) {
|
|
682
860
|
if (ev == val_str) {
|
|
@@ -692,7 +870,7 @@ static void validate_node(const schema_node_ptr& node,
|
|
|
692
870
|
|
|
693
871
|
// const
|
|
694
872
|
if (node->const_value_raw.has_value()) {
|
|
695
|
-
std::string val_str =
|
|
873
|
+
std::string val_str = canonical_json(value);
|
|
696
874
|
if (val_str != node->const_value_raw.value()) {
|
|
697
875
|
errors.push_back({error_code::const_mismatch, path,
|
|
698
876
|
"value does not match const"});
|
|
@@ -758,7 +936,7 @@ static void validate_node(const schema_node_ptr& node,
|
|
|
758
936
|
std::to_string(node->max_length.value())});
|
|
759
937
|
}
|
|
760
938
|
if (node->compiled_pattern) {
|
|
761
|
-
if (!
|
|
939
|
+
if (!re2::RE2::PartialMatch(re2::StringPiece(sv.data(), sv.size()), *node->compiled_pattern)) {
|
|
762
940
|
errors.push_back({error_code::pattern_mismatch, path,
|
|
763
941
|
"string does not match pattern: " +
|
|
764
942
|
node->pattern.value()});
|
|
@@ -776,7 +954,7 @@ static void validate_node(const schema_node_ptr& node,
|
|
|
776
954
|
|
|
777
955
|
// Array validations
|
|
778
956
|
if (actual_type == "array" && value.is<dom::array>()) {
|
|
779
|
-
|
|
957
|
+
dom::array arr; value.get(arr);
|
|
780
958
|
uint64_t arr_size = 0;
|
|
781
959
|
for ([[maybe_unused]] auto _ : arr) ++arr_size;
|
|
782
960
|
|
|
@@ -797,7 +975,7 @@ static void validate_node(const schema_node_ptr& node,
|
|
|
797
975
|
std::set<std::string> seen;
|
|
798
976
|
bool has_dup = false;
|
|
799
977
|
for (auto item : arr) {
|
|
800
|
-
auto s =
|
|
978
|
+
auto s = canonical_json(item);
|
|
801
979
|
if (!seen.insert(s).second) {
|
|
802
980
|
has_dup = true;
|
|
803
981
|
break;
|
|
@@ -815,19 +993,41 @@ static void validate_node(const schema_node_ptr& node,
|
|
|
815
993
|
for (auto item : arr) {
|
|
816
994
|
if (idx < node->prefix_items.size()) {
|
|
817
995
|
validate_node(node->prefix_items[idx], item,
|
|
818
|
-
path + "/" + std::to_string(idx), ctx, errors);
|
|
996
|
+
path + "/" + std::to_string(idx), ctx, errors, all_errors);
|
|
819
997
|
} else if (node->items_schema) {
|
|
820
998
|
validate_node(node->items_schema, item,
|
|
821
|
-
path + "/" + std::to_string(idx), ctx, errors);
|
|
999
|
+
path + "/" + std::to_string(idx), ctx, errors, all_errors);
|
|
822
1000
|
}
|
|
823
1001
|
++idx;
|
|
824
1002
|
}
|
|
825
1003
|
}
|
|
1004
|
+
|
|
1005
|
+
// contains / minContains / maxContains
|
|
1006
|
+
if (node->contains_schema) {
|
|
1007
|
+
uint64_t match_count = 0;
|
|
1008
|
+
for (auto item : arr) {
|
|
1009
|
+
std::vector<validation_error> tmp;
|
|
1010
|
+
validate_node(node->contains_schema, item, path, ctx, tmp, false);
|
|
1011
|
+
if (tmp.empty()) ++match_count;
|
|
1012
|
+
}
|
|
1013
|
+
uint64_t min_c = node->min_contains.value_or(1);
|
|
1014
|
+
uint64_t max_c = node->max_contains.value_or(arr_size);
|
|
1015
|
+
if (match_count < min_c) {
|
|
1016
|
+
errors.push_back({error_code::min_items_violation, path,
|
|
1017
|
+
"contains: " + std::to_string(match_count) +
|
|
1018
|
+
" matches, minimum " + std::to_string(min_c)});
|
|
1019
|
+
}
|
|
1020
|
+
if (match_count > max_c) {
|
|
1021
|
+
errors.push_back({error_code::max_items_violation, path,
|
|
1022
|
+
"contains: " + std::to_string(match_count) +
|
|
1023
|
+
" matches, maximum " + std::to_string(max_c)});
|
|
1024
|
+
}
|
|
1025
|
+
}
|
|
826
1026
|
}
|
|
827
1027
|
|
|
828
1028
|
// Object validations
|
|
829
1029
|
if (actual_type == "object" && value.is<dom::object>()) {
|
|
830
|
-
|
|
1030
|
+
dom::object obj; value.get(obj);
|
|
831
1031
|
uint64_t prop_count = 0;
|
|
832
1032
|
for ([[maybe_unused]] auto _ : obj) ++prop_count;
|
|
833
1033
|
|
|
@@ -867,15 +1067,11 @@ static void validate_node(const schema_node_ptr& node,
|
|
|
867
1067
|
matched = true;
|
|
868
1068
|
}
|
|
869
1069
|
|
|
870
|
-
// Check patternProperties
|
|
871
|
-
for (const auto&
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
validate_node(pat_schema, val, path + "/" + key_str, ctx, errors, all_errors);
|
|
876
|
-
matched = true;
|
|
877
|
-
}
|
|
878
|
-
} catch (...) {
|
|
1070
|
+
// Check patternProperties (use cached compiled regex)
|
|
1071
|
+
for (const auto& pp : node->pattern_properties) {
|
|
1072
|
+
if (pp.compiled && re2::RE2::PartialMatch(key_str, *pp.compiled)) {
|
|
1073
|
+
validate_node(pp.schema, val, path + "/" + key_str, ctx, errors, all_errors);
|
|
1074
|
+
matched = true;
|
|
879
1075
|
}
|
|
880
1076
|
}
|
|
881
1077
|
|
|
@@ -892,6 +1088,43 @@ static void validate_node(const schema_node_ptr& node,
|
|
|
892
1088
|
}
|
|
893
1089
|
}
|
|
894
1090
|
}
|
|
1091
|
+
|
|
1092
|
+
// propertyNames
|
|
1093
|
+
if (node->property_names_schema) {
|
|
1094
|
+
for (auto [key, val] : obj) {
|
|
1095
|
+
// Create a string element to validate the key
|
|
1096
|
+
std::string key_json = "\"" + std::string(key) + "\"";
|
|
1097
|
+
dom::parser key_parser;
|
|
1098
|
+
auto key_result = key_parser.parse(key_json);
|
|
1099
|
+
if (!key_result.error()) {
|
|
1100
|
+
validate_node(node->property_names_schema, key_result.value(),
|
|
1101
|
+
path, ctx, errors, all_errors);
|
|
1102
|
+
}
|
|
1103
|
+
}
|
|
1104
|
+
}
|
|
1105
|
+
|
|
1106
|
+
// dependentRequired
|
|
1107
|
+
for (const auto& [prop, deps] : node->dependent_required) {
|
|
1108
|
+
dom::element dummy;
|
|
1109
|
+
if (obj[prop].get(dummy) == SUCCESS) {
|
|
1110
|
+
for (const auto& dep : deps) {
|
|
1111
|
+
dom::element dep_dummy;
|
|
1112
|
+
if (obj[dep].get(dep_dummy) != SUCCESS) {
|
|
1113
|
+
errors.push_back({error_code::required_property_missing, path,
|
|
1114
|
+
"property '" + prop + "' requires '" + dep +
|
|
1115
|
+
"' to be present"});
|
|
1116
|
+
}
|
|
1117
|
+
}
|
|
1118
|
+
}
|
|
1119
|
+
}
|
|
1120
|
+
|
|
1121
|
+
// dependentSchemas
|
|
1122
|
+
for (const auto& [prop, schema] : node->dependent_schemas) {
|
|
1123
|
+
dom::element dummy;
|
|
1124
|
+
if (obj[prop].get(dummy) == SUCCESS) {
|
|
1125
|
+
validate_node(schema, value, path, ctx, errors, all_errors);
|
|
1126
|
+
}
|
|
1127
|
+
}
|
|
895
1128
|
}
|
|
896
1129
|
|
|
897
1130
|
// allOf
|
|
@@ -967,6 +1200,369 @@ static void validate_node(const schema_node_ptr& node,
|
|
|
967
1200
|
}
|
|
968
1201
|
}
|
|
969
1202
|
|
|
1203
|
+
// --- Codegen compiler ---
|
|
1204
|
+
static void cg_compile(const schema_node* n, cg::plan& p,
|
|
1205
|
+
std::vector<cg::ins>& out) {
|
|
1206
|
+
if (!n) return;
|
|
1207
|
+
if (n->boolean_schema.has_value()) {
|
|
1208
|
+
if (!*n->boolean_schema) out.push_back({cg::op::EXPECT_NULL});
|
|
1209
|
+
return;
|
|
1210
|
+
}
|
|
1211
|
+
// Composition fallback
|
|
1212
|
+
if (!n->ref.empty() || !n->all_of.empty() || !n->any_of.empty() ||
|
|
1213
|
+
!n->one_of.empty() || n->not_schema || n->if_schema) {
|
|
1214
|
+
uintptr_t ptr = reinterpret_cast<uintptr_t>(n);
|
|
1215
|
+
out.push_back({cg::op::COMPOSITION, (uint32_t)(ptr & 0xFFFFFFFF),
|
|
1216
|
+
(uint32_t)((ptr >> 32) & 0xFFFFFFFF)});
|
|
1217
|
+
return;
|
|
1218
|
+
}
|
|
1219
|
+
// Type
|
|
1220
|
+
if (!n->types.empty()) {
|
|
1221
|
+
if (n->types.size() == 1) {
|
|
1222
|
+
auto& t = n->types[0];
|
|
1223
|
+
if (t=="object") out.push_back({cg::op::EXPECT_OBJECT});
|
|
1224
|
+
else if (t=="array") out.push_back({cg::op::EXPECT_ARRAY});
|
|
1225
|
+
else if (t=="string") out.push_back({cg::op::EXPECT_STRING});
|
|
1226
|
+
else if (t=="number") out.push_back({cg::op::EXPECT_NUMBER});
|
|
1227
|
+
else if (t=="integer") out.push_back({cg::op::EXPECT_INTEGER});
|
|
1228
|
+
else if (t=="boolean") out.push_back({cg::op::EXPECT_BOOLEAN});
|
|
1229
|
+
else if (t=="null") out.push_back({cg::op::EXPECT_NULL});
|
|
1230
|
+
} else {
|
|
1231
|
+
uint32_t i = (uint32_t)p.type_sets.size();
|
|
1232
|
+
p.type_sets.push_back(n->types);
|
|
1233
|
+
out.push_back({cg::op::EXPECT_TYPE_MULTI, i});
|
|
1234
|
+
}
|
|
1235
|
+
}
|
|
1236
|
+
// Enum
|
|
1237
|
+
if (!n->enum_values_minified.empty()) {
|
|
1238
|
+
bool all_str = true;
|
|
1239
|
+
for (auto& e : n->enum_values_minified)
|
|
1240
|
+
if (e.empty() || e[0]!='"') { all_str=false; break; }
|
|
1241
|
+
uint32_t i = (uint32_t)p.enum_sets.size();
|
|
1242
|
+
p.enum_sets.push_back(n->enum_values_minified);
|
|
1243
|
+
out.push_back({all_str ? cg::op::CHECK_ENUM_STR : cg::op::CHECK_ENUM, i});
|
|
1244
|
+
}
|
|
1245
|
+
if (n->const_value_raw.has_value()) {
|
|
1246
|
+
uint32_t i=(uint32_t)p.strings.size();
|
|
1247
|
+
p.strings.push_back(*n->const_value_raw);
|
|
1248
|
+
out.push_back({cg::op::CHECK_CONST, i});
|
|
1249
|
+
}
|
|
1250
|
+
// Numeric
|
|
1251
|
+
if (n->minimum.has_value()) { uint32_t i=(uint32_t)p.doubles.size(); p.doubles.push_back(*n->minimum); out.push_back({cg::op::CHECK_MINIMUM,i}); }
|
|
1252
|
+
if (n->maximum.has_value()) { uint32_t i=(uint32_t)p.doubles.size(); p.doubles.push_back(*n->maximum); out.push_back({cg::op::CHECK_MAXIMUM,i}); }
|
|
1253
|
+
if (n->exclusive_minimum.has_value()) { uint32_t i=(uint32_t)p.doubles.size(); p.doubles.push_back(*n->exclusive_minimum); out.push_back({cg::op::CHECK_EX_MINIMUM,i}); }
|
|
1254
|
+
if (n->exclusive_maximum.has_value()) { uint32_t i=(uint32_t)p.doubles.size(); p.doubles.push_back(*n->exclusive_maximum); out.push_back({cg::op::CHECK_EX_MAXIMUM,i}); }
|
|
1255
|
+
if (n->multiple_of.has_value()) { uint32_t i=(uint32_t)p.doubles.size(); p.doubles.push_back(*n->multiple_of); out.push_back({cg::op::CHECK_MULTIPLE_OF,i}); }
|
|
1256
|
+
// String
|
|
1257
|
+
if (n->min_length.has_value()) out.push_back({cg::op::CHECK_MIN_LENGTH,(uint32_t)*n->min_length});
|
|
1258
|
+
if (n->max_length.has_value()) out.push_back({cg::op::CHECK_MAX_LENGTH,(uint32_t)*n->max_length});
|
|
1259
|
+
if (n->compiled_pattern) { uint32_t i=(uint32_t)p.regexes.size(); p.regexes.push_back(n->compiled_pattern); out.push_back({cg::op::CHECK_PATTERN,i}); }
|
|
1260
|
+
if (n->format.has_value()) {
|
|
1261
|
+
uint32_t i=(uint32_t)p.format_ids.size();
|
|
1262
|
+
uint8_t fid=255;
|
|
1263
|
+
auto& f=*n->format;
|
|
1264
|
+
if(f=="email")fid=0;else if(f=="date")fid=1;else if(f=="date-time")fid=2;
|
|
1265
|
+
else if(f=="time")fid=3;else if(f=="ipv4")fid=4;else if(f=="ipv6")fid=5;
|
|
1266
|
+
else if(f=="uri"||f=="uri-reference")fid=6;else if(f=="uuid")fid=7;
|
|
1267
|
+
else if(f=="hostname")fid=8;
|
|
1268
|
+
p.format_ids.push_back(fid);
|
|
1269
|
+
out.push_back({cg::op::CHECK_FORMAT,i});
|
|
1270
|
+
}
|
|
1271
|
+
// Array
|
|
1272
|
+
if (n->min_items.has_value()) out.push_back({cg::op::CHECK_MIN_ITEMS,(uint32_t)*n->min_items});
|
|
1273
|
+
if (n->max_items.has_value()) out.push_back({cg::op::CHECK_MAX_ITEMS,(uint32_t)*n->max_items});
|
|
1274
|
+
if (n->unique_items) out.push_back({cg::op::CHECK_UNIQUE_ITEMS});
|
|
1275
|
+
if (n->items_schema) {
|
|
1276
|
+
uint32_t si=(uint32_t)p.subs.size();
|
|
1277
|
+
p.subs.emplace_back();
|
|
1278
|
+
std::vector<cg::ins> sub_code;
|
|
1279
|
+
cg_compile(n->items_schema.get(), p, sub_code);
|
|
1280
|
+
sub_code.push_back({cg::op::END});
|
|
1281
|
+
p.subs[si] = std::move(sub_code);
|
|
1282
|
+
out.push_back({cg::op::ARRAY_ITEMS, si});
|
|
1283
|
+
}
|
|
1284
|
+
// Object
|
|
1285
|
+
for (auto& r : n->required) { uint32_t i=(uint32_t)p.strings.size(); p.strings.push_back(r); out.push_back({cg::op::CHECK_REQUIRED,i}); }
|
|
1286
|
+
if (n->min_properties.has_value()) out.push_back({cg::op::CHECK_MIN_PROPS,(uint32_t)*n->min_properties});
|
|
1287
|
+
if (n->max_properties.has_value()) out.push_back({cg::op::CHECK_MAX_PROPS,(uint32_t)*n->max_properties});
|
|
1288
|
+
// additional_properties_schema requires tree walker — bail out to COMPOSITION
|
|
1289
|
+
if (n->additional_properties_schema) {
|
|
1290
|
+
out.push_back({cg::op::COMPOSITION, 0, 0});
|
|
1291
|
+
return;
|
|
1292
|
+
}
|
|
1293
|
+
if (!n->properties.empty() || (n->additional_properties_bool.has_value() && !*n->additional_properties_bool)) {
|
|
1294
|
+
out.push_back({cg::op::OBJ_PROPS_START});
|
|
1295
|
+
if (n->additional_properties_bool.has_value() && !*n->additional_properties_bool)
|
|
1296
|
+
out.push_back({cg::op::CHECK_NO_ADDITIONAL});
|
|
1297
|
+
for (auto& [name, schema] : n->properties) {
|
|
1298
|
+
uint32_t ni=(uint32_t)p.strings.size(); p.strings.push_back(name);
|
|
1299
|
+
uint32_t si=(uint32_t)p.subs.size();
|
|
1300
|
+
p.subs.emplace_back();
|
|
1301
|
+
std::vector<cg::ins> sub_code;
|
|
1302
|
+
cg_compile(schema.get(), p, sub_code);
|
|
1303
|
+
sub_code.push_back({cg::op::END});
|
|
1304
|
+
p.subs[si] = std::move(sub_code);
|
|
1305
|
+
out.push_back({cg::op::OBJ_PROP, ni, si});
|
|
1306
|
+
}
|
|
1307
|
+
out.push_back({cg::op::OBJ_PROPS_END});
|
|
1308
|
+
}
|
|
1309
|
+
}
|
|
1310
|
+
|
|
1311
|
+
// --- Codegen executor ---
|
|
1312
|
+
static const char* fmt_names[]={"email","date","date-time","time","ipv4","ipv6","uri","uuid","hostname"};
|
|
1313
|
+
|
|
1314
|
+
static bool cg_exec(const cg::plan& p, const std::vector<cg::ins>& code,
|
|
1315
|
+
dom::element value) {
|
|
1316
|
+
auto t = type_of_sv(value);
|
|
1317
|
+
for (size_t i=0; i<code.size(); ++i) {
|
|
1318
|
+
auto& c = code[i];
|
|
1319
|
+
switch(c.o) {
|
|
1320
|
+
case cg::op::END: return true;
|
|
1321
|
+
case cg::op::EXPECT_OBJECT: if(t!="object") return false; break;
|
|
1322
|
+
case cg::op::EXPECT_ARRAY: if(t!="array") return false; break;
|
|
1323
|
+
case cg::op::EXPECT_STRING: if(t!="string") return false; break;
|
|
1324
|
+
case cg::op::EXPECT_NUMBER: if(t!="number"&&t!="integer") return false; break;
|
|
1325
|
+
case cg::op::EXPECT_INTEGER: if(t!="integer") return false; break;
|
|
1326
|
+
case cg::op::EXPECT_BOOLEAN: if(t!="boolean") return false; break;
|
|
1327
|
+
case cg::op::EXPECT_NULL: if(t!="null") return false; break;
|
|
1328
|
+
case cg::op::EXPECT_TYPE_MULTI: {
|
|
1329
|
+
auto& ts=p.type_sets[c.a]; bool m=false;
|
|
1330
|
+
for(auto& ty:ts){if(t==ty||(ty=="number"&&(t=="integer"||t=="number"))){m=true;break;}}
|
|
1331
|
+
if(!m) return false; break;
|
|
1332
|
+
}
|
|
1333
|
+
case cg::op::CHECK_MINIMUM: if(t=="integer"||t=="number"){if(to_double(value)<p.doubles[c.a])return false;} break;
|
|
1334
|
+
case cg::op::CHECK_MAXIMUM: if(t=="integer"||t=="number"){if(to_double(value)>p.doubles[c.a])return false;} break;
|
|
1335
|
+
case cg::op::CHECK_EX_MINIMUM: if(t=="integer"||t=="number"){if(to_double(value)<=p.doubles[c.a])return false;} break;
|
|
1336
|
+
case cg::op::CHECK_EX_MAXIMUM: if(t=="integer"||t=="number"){if(to_double(value)>=p.doubles[c.a])return false;} break;
|
|
1337
|
+
case cg::op::CHECK_MULTIPLE_OF: if(t=="integer"||t=="number"){double v=to_double(value),d=p.doubles[c.a],r=std::fmod(v,d);if(std::abs(r)>1e-8&&std::abs(r-d)>1e-8)return false;} break;
|
|
1338
|
+
case cg::op::CHECK_MIN_LENGTH: if(t=="string"){std::string_view sv;value.get(sv);if(utf8_length(sv)<c.a)return false;} break;
|
|
1339
|
+
case cg::op::CHECK_MAX_LENGTH: if(t=="string"){std::string_view sv;value.get(sv);if(utf8_length(sv)>c.a)return false;} break;
|
|
1340
|
+
case cg::op::CHECK_PATTERN: if(t=="string"){std::string_view sv;value.get(sv);if(!re2::RE2::PartialMatch(re2::StringPiece(sv.data(),sv.size()),*p.regexes[c.a]))return false;} break;
|
|
1341
|
+
case cg::op::CHECK_FORMAT: if(t=="string"){std::string_view sv;value.get(sv);uint8_t f=p.format_ids[c.a];if(f<9&&!check_format(sv,fmt_names[f]))return false;} break;
|
|
1342
|
+
case cg::op::CHECK_MIN_ITEMS: if(t=="array"){dom::array a;value.get(a);uint64_t s=0;for([[maybe_unused]]auto _:a)++s;if(s<c.a)return false;} break;
|
|
1343
|
+
case cg::op::CHECK_MAX_ITEMS: if(t=="array"){dom::array a;value.get(a);uint64_t s=0;for([[maybe_unused]]auto _:a)++s;if(s>c.a)return false;} break;
|
|
1344
|
+
case cg::op::CHECK_UNIQUE_ITEMS: if(t=="array"){dom::array a;value.get(a);std::set<std::string> seen;for(auto x:a)if(!seen.insert(canonical_json(x)).second)return false;} break;
|
|
1345
|
+
case cg::op::ARRAY_ITEMS: if(t=="array"){dom::array a;value.get(a);for(auto x:a)if(!cg_exec(p,p.subs[c.a],x))return false;} break;
|
|
1346
|
+
case cg::op::CHECK_REQUIRED: if(t=="object"){dom::object o;value.get(o);dom::element d;if(o[p.strings[c.a]].get(d)!=SUCCESS)return false;} break;
|
|
1347
|
+
case cg::op::CHECK_MIN_PROPS: if(t=="object"){dom::object o;value.get(o);uint64_t n=0;for([[maybe_unused]]auto _:o)++n;if(n<c.a)return false;} break;
|
|
1348
|
+
case cg::op::CHECK_MAX_PROPS: if(t=="object"){dom::object o;value.get(o);uint64_t n=0;for([[maybe_unused]]auto _:o)++n;if(n>c.a)return false;} break;
|
|
1349
|
+
case cg::op::OBJ_PROPS_START: if(t=="object"){
|
|
1350
|
+
dom::object o; value.get(o);
|
|
1351
|
+
// collect prop defs
|
|
1352
|
+
struct pd{std::string_view nm;uint32_t si;};
|
|
1353
|
+
std::vector<pd> props; bool no_add=false;
|
|
1354
|
+
size_t j=i+1;
|
|
1355
|
+
for(;j<code.size()&&code[j].o!=cg::op::OBJ_PROPS_END;++j){
|
|
1356
|
+
if(code[j].o==cg::op::OBJ_PROP) props.push_back({p.strings[code[j].a],code[j].b});
|
|
1357
|
+
else if(code[j].o==cg::op::CHECK_NO_ADDITIONAL) no_add=true;
|
|
1358
|
+
}
|
|
1359
|
+
for(auto [key,val]:o){
|
|
1360
|
+
bool matched=false;
|
|
1361
|
+
for(auto& pp:props){if(key==pp.nm){if(!cg_exec(p,p.subs[pp.si],val))return false;matched=true;break;}}
|
|
1362
|
+
if(!matched&&no_add)return false;
|
|
1363
|
+
}
|
|
1364
|
+
i=j; break;
|
|
1365
|
+
} else { /* skip to OBJ_PROPS_END */ size_t j=i+1; for(;j<code.size()&&code[j].o!=cg::op::OBJ_PROPS_END;++j); i=j; } break;
|
|
1366
|
+
case cg::op::OBJ_PROP: case cg::op::OBJ_PROPS_END: case cg::op::CHECK_NO_ADDITIONAL: break;
|
|
1367
|
+
case cg::op::CHECK_ENUM_STR: {
|
|
1368
|
+
auto& es=p.enum_sets[c.a]; bool f=false;
|
|
1369
|
+
if(t=="string"){std::string_view sv;value.get(sv);for(auto& e:es)if(e.size()==sv.size()+2&&e[0]=='"'&&e.back()=='"'&&e.compare(1,sv.size(),sv)==0){f=true;break;}}
|
|
1370
|
+
if(!f){std::string v=canonical_json(value);for(auto& e:es)if(e==v){f=true;break;}}
|
|
1371
|
+
if(!f)return false; break;
|
|
1372
|
+
}
|
|
1373
|
+
case cg::op::CHECK_ENUM: {
|
|
1374
|
+
auto& es=p.enum_sets[c.a]; bool f=false;
|
|
1375
|
+
if(t=="string"){std::string_view sv;value.get(sv);for(auto& e:es)if(e.size()==sv.size()+2&&e[0]=='"'&&e.back()=='"'&&e.compare(1,sv.size(),sv)==0){f=true;break;}}
|
|
1376
|
+
if(!f&&value.is<int64_t>()){int64_t v;value.get(v);auto s=std::to_string(v);for(auto& e:es)if(e==s){f=true;break;}}
|
|
1377
|
+
if(!f){std::string v=canonical_json(value);for(auto& e:es)if(e==v){f=true;break;}}
|
|
1378
|
+
if(!f)return false; break;
|
|
1379
|
+
}
|
|
1380
|
+
case cg::op::CHECK_CONST: if(canonical_json(value)!=p.strings[c.a])return false; break;
|
|
1381
|
+
case cg::op::COMPOSITION: return false; // fallback to tree walker
|
|
1382
|
+
}
|
|
1383
|
+
}
|
|
1384
|
+
return true;
|
|
1385
|
+
}
|
|
1386
|
+
|
|
1387
|
+
// --- On Demand fast path executor ---
|
|
1388
|
+
// Uses simdjson On Demand API to avoid materializing the full DOM tree.
|
|
1389
|
+
// Returns: true = valid, false = invalid OR unsupported (fallback to DOM).
|
|
1390
|
+
|
|
1391
|
+
static std::string_view od_type(simdjson::ondemand::value& v) {
|
|
1392
|
+
switch (v.type()) {
|
|
1393
|
+
case simdjson::ondemand::json_type::object: return "object";
|
|
1394
|
+
case simdjson::ondemand::json_type::array: return "array";
|
|
1395
|
+
case simdjson::ondemand::json_type::string: return "string";
|
|
1396
|
+
case simdjson::ondemand::json_type::boolean: return "boolean";
|
|
1397
|
+
case simdjson::ondemand::json_type::null: return "null";
|
|
1398
|
+
case simdjson::ondemand::json_type::number: {
|
|
1399
|
+
simdjson::ondemand::number_type nt;
|
|
1400
|
+
if (v.get_number_type().get(nt) == SUCCESS &&
|
|
1401
|
+
nt == simdjson::ondemand::number_type::floating_point_number)
|
|
1402
|
+
return "number";
|
|
1403
|
+
return "integer";
|
|
1404
|
+
}
|
|
1405
|
+
}
|
|
1406
|
+
return "unknown";
|
|
1407
|
+
}
|
|
1408
|
+
|
|
1409
|
+
static bool od_exec(const cg::plan& p, const std::vector<cg::ins>& code,
|
|
1410
|
+
simdjson::ondemand::value value) {
|
|
1411
|
+
auto t = od_type(value);
|
|
1412
|
+
for (size_t i = 0; i < code.size(); ++i) {
|
|
1413
|
+
auto& c = code[i];
|
|
1414
|
+
switch (c.o) {
|
|
1415
|
+
case cg::op::END: return true;
|
|
1416
|
+
case cg::op::EXPECT_OBJECT: if(t!="object") return false; break;
|
|
1417
|
+
case cg::op::EXPECT_ARRAY: if(t!="array") return false; break;
|
|
1418
|
+
case cg::op::EXPECT_STRING: if(t!="string") return false; break;
|
|
1419
|
+
case cg::op::EXPECT_NUMBER: if(t!="number"&&t!="integer") return false; break;
|
|
1420
|
+
case cg::op::EXPECT_INTEGER: if(t!="integer") return false; break;
|
|
1421
|
+
case cg::op::EXPECT_BOOLEAN: if(t!="boolean") return false; break;
|
|
1422
|
+
case cg::op::EXPECT_NULL: if(t!="null") return false; break;
|
|
1423
|
+
case cg::op::EXPECT_TYPE_MULTI: {
|
|
1424
|
+
auto& ts=p.type_sets[c.a]; bool m=false;
|
|
1425
|
+
for(auto& ty:ts){if(t==ty||(ty=="number"&&(t=="integer"||t=="number"))){m=true;break;}}
|
|
1426
|
+
if(!m) return false; break;
|
|
1427
|
+
}
|
|
1428
|
+
case cg::op::CHECK_MINIMUM:
|
|
1429
|
+
case cg::op::CHECK_MAXIMUM:
|
|
1430
|
+
case cg::op::CHECK_EX_MINIMUM:
|
|
1431
|
+
case cg::op::CHECK_EX_MAXIMUM:
|
|
1432
|
+
case cg::op::CHECK_MULTIPLE_OF: {
|
|
1433
|
+
if (t=="integer"||t=="number") {
|
|
1434
|
+
double v;
|
|
1435
|
+
if (t=="integer") { int64_t iv; if(value.get(iv)!=SUCCESS) return false; v=(double)iv; }
|
|
1436
|
+
else { if(value.get(v)!=SUCCESS) return false; }
|
|
1437
|
+
double d=p.doubles[c.a];
|
|
1438
|
+
if(c.o==cg::op::CHECK_MINIMUM && v<d) return false;
|
|
1439
|
+
if(c.o==cg::op::CHECK_MAXIMUM && v>d) return false;
|
|
1440
|
+
if(c.o==cg::op::CHECK_EX_MINIMUM && v<=d) return false;
|
|
1441
|
+
if(c.o==cg::op::CHECK_EX_MAXIMUM && v>=d) return false;
|
|
1442
|
+
if(c.o==cg::op::CHECK_MULTIPLE_OF){double r=std::fmod(v,d);if(std::abs(r)>1e-8&&std::abs(r-d)>1e-8)return false;}
|
|
1443
|
+
}
|
|
1444
|
+
break;
|
|
1445
|
+
}
|
|
1446
|
+
case cg::op::CHECK_MIN_LENGTH: if(t=="string"){std::string_view sv; if(value.get(sv)!=SUCCESS) return false; if(utf8_length(sv)<c.a) return false;} break;
|
|
1447
|
+
case cg::op::CHECK_MAX_LENGTH: if(t=="string"){std::string_view sv; if(value.get(sv)!=SUCCESS) return false; if(utf8_length(sv)>c.a) return false;} break;
|
|
1448
|
+
case cg::op::CHECK_PATTERN: if(t=="string"){std::string_view sv; if(value.get(sv)!=SUCCESS) return false; if(!re2::RE2::PartialMatch(re2::StringPiece(sv.data(),sv.size()),*p.regexes[c.a]))return false;} break;
|
|
1449
|
+
case cg::op::CHECK_FORMAT: if(t=="string"){std::string_view sv; if(value.get(sv)!=SUCCESS) return false; uint8_t f=p.format_ids[c.a]; if(f<9&&!check_format(sv,fmt_names[f]))return false;} break;
|
|
1450
|
+
case cg::op::CHECK_MIN_ITEMS: if(t=="array"){
|
|
1451
|
+
simdjson::ondemand::array a; if(value.get(a)!=SUCCESS) return false;
|
|
1452
|
+
uint64_t s=0; for(auto x:a){(void)x;++s;} if(s<c.a) return false;
|
|
1453
|
+
} break;
|
|
1454
|
+
case cg::op::CHECK_MAX_ITEMS: if(t=="array"){
|
|
1455
|
+
simdjson::ondemand::array a; if(value.get(a)!=SUCCESS) return false;
|
|
1456
|
+
uint64_t s=0; for(auto x:a){(void)x;++s;} if(s>c.a) return false;
|
|
1457
|
+
} break;
|
|
1458
|
+
case cg::op::ARRAY_ITEMS: if(t=="array"){
|
|
1459
|
+
simdjson::ondemand::array a; if(value.get(a)!=SUCCESS) return false;
|
|
1460
|
+
for(auto elem:a){
|
|
1461
|
+
simdjson::ondemand::value v; if(elem.get(v)!=SUCCESS) return false;
|
|
1462
|
+
if(!od_exec(p,p.subs[c.a],v)) return false;
|
|
1463
|
+
}
|
|
1464
|
+
} break;
|
|
1465
|
+
case cg::op::CHECK_REQUIRED: if(t=="object"){
|
|
1466
|
+
simdjson::ondemand::object o; if(value.get(o)!=SUCCESS) return false;
|
|
1467
|
+
auto f = o.find_field_unordered(p.strings[c.a]);
|
|
1468
|
+
if(f.error()) return false;
|
|
1469
|
+
} break;
|
|
1470
|
+
case cg::op::CHECK_MIN_PROPS: if(t=="object"){
|
|
1471
|
+
simdjson::ondemand::object o; if(value.get(o)!=SUCCESS) return false;
|
|
1472
|
+
uint64_t n=0; for(auto f:o){(void)f;++n;} if(n<c.a) return false;
|
|
1473
|
+
} break;
|
|
1474
|
+
case cg::op::CHECK_MAX_PROPS: if(t=="object"){
|
|
1475
|
+
simdjson::ondemand::object o; if(value.get(o)!=SUCCESS) return false;
|
|
1476
|
+
uint64_t n=0; for(auto f:o){(void)f;++n;} if(n>c.a) return false;
|
|
1477
|
+
} break;
|
|
1478
|
+
case cg::op::OBJ_PROPS_START: if(t=="object"){
|
|
1479
|
+
simdjson::ondemand::object o; if(value.get(o)!=SUCCESS) return false;
|
|
1480
|
+
struct pd{std::string_view nm;uint32_t si;};
|
|
1481
|
+
std::vector<pd> props; bool no_add=false;
|
|
1482
|
+
size_t j=i+1;
|
|
1483
|
+
for(;j<code.size()&&code[j].o!=cg::op::OBJ_PROPS_END;++j){
|
|
1484
|
+
if(code[j].o==cg::op::OBJ_PROP) props.push_back({p.strings[code[j].a],code[j].b});
|
|
1485
|
+
else if(code[j].o==cg::op::CHECK_NO_ADDITIONAL) no_add=true;
|
|
1486
|
+
}
|
|
1487
|
+
for(auto field:o){
|
|
1488
|
+
simdjson::ondemand::raw_json_string rk; if(field.key().get(rk)!=SUCCESS) return false;
|
|
1489
|
+
std::string_view key = field.unescaped_key();
|
|
1490
|
+
bool matched=false;
|
|
1491
|
+
for(auto& pp:props){
|
|
1492
|
+
if(key==pp.nm){
|
|
1493
|
+
simdjson::ondemand::value fv; if(field.value().get(fv)!=SUCCESS) return false;
|
|
1494
|
+
if(!od_exec(p,p.subs[pp.si],fv)) return false;
|
|
1495
|
+
matched=true; break;
|
|
1496
|
+
}
|
|
1497
|
+
}
|
|
1498
|
+
if(!matched&&no_add) return false;
|
|
1499
|
+
}
|
|
1500
|
+
i=j; break;
|
|
1501
|
+
} else { size_t j=i+1; for(;j<code.size()&&code[j].o!=cg::op::OBJ_PROPS_END;++j); i=j; } break;
|
|
1502
|
+
case cg::op::OBJ_PROP: case cg::op::OBJ_PROPS_END: case cg::op::CHECK_NO_ADDITIONAL: break;
|
|
1503
|
+
|
|
1504
|
+
// These require full materialization — bail to DOM path
|
|
1505
|
+
case cg::op::CHECK_UNIQUE_ITEMS:
|
|
1506
|
+
case cg::op::CHECK_ENUM_STR:
|
|
1507
|
+
case cg::op::CHECK_ENUM:
|
|
1508
|
+
case cg::op::CHECK_CONST:
|
|
1509
|
+
case cg::op::COMPOSITION:
|
|
1510
|
+
return false;
|
|
1511
|
+
}
|
|
1512
|
+
}
|
|
1513
|
+
return true;
|
|
1514
|
+
}
|
|
1515
|
+
|
|
1516
|
+
// Determine if a codegen plan can use On Demand (no enum/const/uniqueItems)
|
|
1517
|
+
static bool plan_supports_ondemand(const cg::plan& p) {
|
|
1518
|
+
for (auto& c : p.code) {
|
|
1519
|
+
if (c.o == cg::op::CHECK_UNIQUE_ITEMS || c.o == cg::op::CHECK_ENUM_STR ||
|
|
1520
|
+
c.o == cg::op::CHECK_ENUM || c.o == cg::op::CHECK_CONST ||
|
|
1521
|
+
c.o == cg::op::COMPOSITION)
|
|
1522
|
+
return false;
|
|
1523
|
+
}
|
|
1524
|
+
// Also check sub-plans
|
|
1525
|
+
for (auto& sub : p.subs) {
|
|
1526
|
+
for (auto& c : sub) {
|
|
1527
|
+
if (c.o == cg::op::CHECK_UNIQUE_ITEMS || c.o == cg::op::CHECK_ENUM_STR ||
|
|
1528
|
+
c.o == cg::op::CHECK_ENUM || c.o == cg::op::CHECK_CONST ||
|
|
1529
|
+
c.o == cg::op::COMPOSITION)
|
|
1530
|
+
return false;
|
|
1531
|
+
}
|
|
1532
|
+
}
|
|
1533
|
+
return true;
|
|
1534
|
+
}
|
|
1535
|
+
|
|
1536
|
+
// Free padding: check if buffer is near a page boundary
|
|
1537
|
+
// On modern systems, pages are at least 4096 bytes. If we're far enough
|
|
1538
|
+
// from the end of a page, we can read 64 bytes beyond without a fault.
|
|
1539
|
+
static long get_page_size() {
|
|
1540
|
+
#ifdef _WIN32
|
|
1541
|
+
SYSTEM_INFO si; GetSystemInfo(&si); return si.dwPageSize;
|
|
1542
|
+
#else
|
|
1543
|
+
static long ps = sysconf(_SC_PAGESIZE);
|
|
1544
|
+
return ps;
|
|
1545
|
+
#endif
|
|
1546
|
+
}
|
|
1547
|
+
|
|
1548
|
+
static bool near_page_boundary(const char* buf, size_t len) {
|
|
1549
|
+
return ((reinterpret_cast<uintptr_t>(buf + len - 1) % get_page_size())
|
|
1550
|
+
+ REQUIRED_PADDING >= static_cast<uintptr_t>(get_page_size()));
|
|
1551
|
+
}
|
|
1552
|
+
|
|
1553
|
+
// Zero-copy validate with free padding (Lemire's trick).
|
|
1554
|
+
// Almost never allocates — only if buffer is near a page boundary.
|
|
1555
|
+
static simdjson::padded_string_view get_free_padded_view(
|
|
1556
|
+
const char* data, size_t length, simdjson::padded_string& fallback) {
|
|
1557
|
+
if (near_page_boundary(data, length)) {
|
|
1558
|
+
// Rare: near page boundary, must copy
|
|
1559
|
+
fallback = simdjson::padded_string(data, length);
|
|
1560
|
+
return fallback;
|
|
1561
|
+
}
|
|
1562
|
+
// Common: free padding available, zero-copy
|
|
1563
|
+
return simdjson::padded_string_view(data, length, length + REQUIRED_PADDING);
|
|
1564
|
+
}
|
|
1565
|
+
|
|
970
1566
|
schema_ref compile(std::string_view schema_json) {
|
|
971
1567
|
auto ctx = std::make_shared<compiled_schema>();
|
|
972
1568
|
ctx->raw_schema = std::string(schema_json);
|
|
@@ -980,6 +1576,11 @@ schema_ref compile(std::string_view schema_json) {
|
|
|
980
1576
|
|
|
981
1577
|
ctx->root = compile_node(doc, *ctx);
|
|
982
1578
|
|
|
1579
|
+
// Generate codegen plan
|
|
1580
|
+
cg_compile(ctx->root.get(), ctx->gen_plan, ctx->gen_plan.code);
|
|
1581
|
+
ctx->gen_plan.code.push_back({cg::op::END});
|
|
1582
|
+
ctx->use_ondemand = plan_supports_ondemand(ctx->gen_plan);
|
|
1583
|
+
|
|
983
1584
|
schema_ref ref;
|
|
984
1585
|
ref.impl = ctx;
|
|
985
1586
|
return ref;
|
|
@@ -991,14 +1592,46 @@ validation_result validate(const schema_ref& schema, std::string_view json,
|
|
|
991
1592
|
return {false, {{error_code::invalid_schema, "", "schema not compiled"}}};
|
|
992
1593
|
}
|
|
993
1594
|
|
|
994
|
-
|
|
995
|
-
|
|
1595
|
+
// Free padding trick: avoid padded_string copy when possible
|
|
1596
|
+
simdjson::padded_string fallback;
|
|
1597
|
+
auto psv = get_free_padded_view(json.data(), json.size(), fallback);
|
|
1598
|
+
|
|
1599
|
+
// Ultra-fast path: On Demand (no DOM materialization)
|
|
1600
|
+
static constexpr size_t OD_THRESHOLD = 32;
|
|
1601
|
+
if (schema.impl->use_ondemand && !schema.impl->gen_plan.code.empty() &&
|
|
1602
|
+
json.size() >= OD_THRESHOLD) {
|
|
1603
|
+
auto od_result = tl_od_parser().iterate(psv);
|
|
1604
|
+
if (!od_result.error()) {
|
|
1605
|
+
simdjson::ondemand::value root_val;
|
|
1606
|
+
if (od_result.get_value().get(root_val) == SUCCESS) {
|
|
1607
|
+
if (od_exec(schema.impl->gen_plan, schema.impl->gen_plan.code, root_val)) {
|
|
1608
|
+
return {true, {}};
|
|
1609
|
+
}
|
|
1610
|
+
}
|
|
1611
|
+
}
|
|
1612
|
+
// Need fresh view for DOM parse (On Demand consumed it)
|
|
1613
|
+
psv = get_free_padded_view(json.data(), json.size(), fallback);
|
|
1614
|
+
}
|
|
1615
|
+
|
|
1616
|
+
auto& dom_p = tl_dom_parser();
|
|
1617
|
+
auto result = dom_p.parse(psv);
|
|
996
1618
|
if (result.error()) {
|
|
997
1619
|
return {false, {{error_code::invalid_json, "", "invalid JSON document"}}};
|
|
998
1620
|
}
|
|
999
1621
|
|
|
1622
|
+
// Fast path: codegen bytecode execution (DOM)
|
|
1623
|
+
if (!schema.impl->use_ondemand && !schema.impl->gen_plan.code.empty()) {
|
|
1624
|
+
if (cg_exec(schema.impl->gen_plan, schema.impl->gen_plan.code,
|
|
1625
|
+
result.value())) {
|
|
1626
|
+
return {true, {}};
|
|
1627
|
+
}
|
|
1628
|
+
// Codegen said invalid OR hit COMPOSITION — fall through to tree walker
|
|
1629
|
+
}
|
|
1630
|
+
|
|
1631
|
+
// Slow path: re-parse + tree walker with error details
|
|
1632
|
+
auto result2 = dom_p.parse(psv);
|
|
1000
1633
|
std::vector<validation_error> errors;
|
|
1001
|
-
validate_node(schema.impl->root,
|
|
1634
|
+
validate_node(schema.impl->root, result2.value(), "", *schema.impl, errors,
|
|
1002
1635
|
opts.all_errors);
|
|
1003
1636
|
|
|
1004
1637
|
return {errors.empty(), std::move(errors)};
|
|
@@ -1014,4 +1647,22 @@ validation_result validate(std::string_view schema_json,
|
|
|
1014
1647
|
return validate(s, json, opts);
|
|
1015
1648
|
}
|
|
1016
1649
|
|
|
1650
|
+
|
|
1651
|
+
bool is_valid_prepadded(const schema_ref& schema, const char* data, size_t length) {
|
|
1652
|
+
if (!schema.impl || !schema.impl->root) return false;
|
|
1653
|
+
|
|
1654
|
+
simdjson::padded_string fallback;
|
|
1655
|
+
auto psv = get_free_padded_view(data, length, fallback);
|
|
1656
|
+
auto result = tl_dom_parser().parse(psv);
|
|
1657
|
+
if (result.error()) return false;
|
|
1658
|
+
|
|
1659
|
+
if (!schema.impl->gen_plan.code.empty()) {
|
|
1660
|
+
return cg_exec(schema.impl->gen_plan, schema.impl->gen_plan.code, result.value());
|
|
1661
|
+
}
|
|
1662
|
+
|
|
1663
|
+
std::vector<validation_error> errors;
|
|
1664
|
+
validate_node(schema.impl->root, result.value(), "", *schema.impl, errors, false);
|
|
1665
|
+
return errors.empty();
|
|
1666
|
+
}
|
|
1667
|
+
|
|
1017
1668
|
} // namespace ata
|