ata-validator 0.10.0 → 0.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -102,7 +102,7 @@ Self-recursive named functions for $dynamicRef, compile-time cross-schema resolu
102
102
 
103
103
  ### JSON Schema Test Suite
104
104
 
105
- **95.3%** pass rate (1170/1227) on official [JSON Schema Test Suite](https://github.com/json-schema-org/JSON-Schema-Test-Suite) (Draft 2020-12). **95.3%** on [@exodus/schemasafe](https://github.com/ExodusMovement/schemasafe) test suite.
105
+ **98.5%** pass rate (1172/1190) on official [JSON Schema Test Suite](https://github.com/json-schema-org/JSON-Schema-Test-Suite) (Draft 2020-12), excluding remote refs and vocabulary (intentionally unsupported). **95.3%** on [@exodus/schemasafe](https://github.com/ExodusMovement/schemasafe) test suite.
106
106
 
107
107
  ## When to use ata
108
108
 
@@ -135,7 +135,7 @@ Self-recursive named functions for $dynamicRef, compile-time cross-schema resolu
135
135
  - **Zero-copy paths**: Buffer and pre-padded input support - no unnecessary copies
136
136
  - **Defaults + coercion**: `default` values, `coerceTypes`, `removeAdditional` support
137
137
  - **C/C++ library**: Native API for non-Node.js environments
138
- - **96.9% spec compliant**: Draft 2020-12
138
+ - **98.5% spec compliant**: Draft 2020-12
139
139
 
140
140
  ## Installation
141
141
 
@@ -17,7 +17,6 @@
17
17
  #include <vector>
18
18
 
19
19
  #include "ata.h"
20
- #include <simdjson.h>
21
20
 
22
21
  // ============================================================================
23
22
  // V8 Direct Object Traversal Engine
@@ -798,67 +797,6 @@ static void validate_napi(const schema_node_ptr& node,
798
797
  // N-API Binding
799
798
  // ============================================================================
800
799
 
801
- // ============================================================================
802
- // simdjson DOM to V8 JS Object conversion
803
- // ============================================================================
804
-
805
- static Napi::Value dom_to_napi(Napi::Env env, simdjson::dom::element el) {
806
- using namespace simdjson;
807
- switch (el.type()) {
808
- case dom::element_type::OBJECT: {
809
- auto obj = Napi::Object::New(env);
810
- for (auto [key, val] : dom::object(el)) {
811
- obj.Set(std::string(key), dom_to_napi(env, val));
812
- }
813
- return obj;
814
- }
815
- case dom::element_type::ARRAY: {
816
- dom::array arr = el;
817
- auto jsArr = Napi::Array::New(env, arr.size());
818
- uint32_t i = 0;
819
- for (auto val : arr) {
820
- jsArr.Set(i++, dom_to_napi(env, val));
821
- }
822
- return jsArr;
823
- }
824
- case dom::element_type::STRING: {
825
- std::string_view sv;
826
- el.get(sv);
827
- return Napi::String::New(env, sv.data(), sv.length());
828
- }
829
- case dom::element_type::INT64: {
830
- int64_t v;
831
- el.get(v);
832
- return Napi::Number::New(env, static_cast<double>(v));
833
- }
834
- case dom::element_type::UINT64: {
835
- uint64_t v;
836
- el.get(v);
837
- return Napi::Number::New(env, static_cast<double>(v));
838
- }
839
- case dom::element_type::DOUBLE: {
840
- double v;
841
- el.get(v);
842
- return Napi::Number::New(env, v);
843
- }
844
- case dom::element_type::BOOL: {
845
- bool v;
846
- el.get(v);
847
- return Napi::Boolean::New(env, v);
848
- }
849
- case dom::element_type::NULL_VALUE:
850
- return env.Null();
851
- default:
852
- return env.Undefined();
853
- }
854
- }
855
-
856
- // Thread-local simdjson DOM parser for parseJSON / validateAndParse
857
- static simdjson::dom::parser& tl_dom_parser() {
858
- thread_local simdjson::dom::parser parser;
859
- return parser;
860
- }
861
-
862
800
  static Napi::Object make_result(Napi::Env env,
863
801
  const ata::validation_result& result) {
864
802
  Napi::Object obj = Napi::Object::New(env);
@@ -884,8 +822,7 @@ class CompiledSchema : public Napi::ObjectWrap<CompiledSchema> {
884
822
  {InstanceMethod("validate", &CompiledSchema::Validate),
885
823
  InstanceMethod("validateJSON", &CompiledSchema::ValidateJSON),
886
824
  InstanceMethod("validateDirect", &CompiledSchema::ValidateDirect),
887
- InstanceMethod("isValidJSON", &CompiledSchema::IsValidJSON),
888
- InstanceMethod("validateAndParse", &CompiledSchema::ValidateAndParse)});
825
+ InstanceMethod("isValidJSON", &CompiledSchema::IsValidJSON)});
889
826
  auto* constructor = new Napi::FunctionReference();
890
827
  *constructor = Napi::Persistent(func);
891
828
  env.SetInstanceData(constructor);
@@ -1007,77 +944,6 @@ class CompiledSchema : public Napi::ObjectWrap<CompiledSchema> {
1007
944
  return ValidateDirectImpl(env, info[0]);
1008
945
  }
1009
946
 
1010
- // Parse JSON with simdjson, validate against schema, return parsed JS object
1011
- Napi::Value ValidateAndParse(const Napi::CallbackInfo& info) {
1012
- Napi::Env env = info.Env();
1013
- if (info.Length() < 1) {
1014
- Napi::TypeError::New(env, "JSON string or Buffer expected")
1015
- .ThrowAsJavaScriptException();
1016
- return env.Undefined();
1017
- }
1018
-
1019
- const char* data;
1020
- size_t len;
1021
-
1022
- if (info[0].IsBuffer()) {
1023
- auto buf = info[0].As<Napi::Buffer<char>>();
1024
- data = buf.Data();
1025
- len = buf.Length();
1026
- } else if (info[0].IsString()) {
1027
- auto [d, l] = extract_string(env, info[0]);
1028
- data = d;
1029
- len = l;
1030
- } else {
1031
- Napi::TypeError::New(env, "JSON string or Buffer expected")
1032
- .ThrowAsJavaScriptException();
1033
- return env.Undefined();
1034
- }
1035
-
1036
- // Parse with simdjson
1037
- simdjson::padded_string padded(data, len);
1038
- auto& parser = tl_dom_parser();
1039
- auto doc_result = parser.parse(padded);
1040
- if (doc_result.error()) {
1041
- auto obj = Napi::Object::New(env);
1042
- obj.Set("valid", false);
1043
- obj.Set("value", env.Null());
1044
- auto errors = Napi::Array::New(env, 1);
1045
- auto err = Napi::Object::New(env);
1046
- err.Set("code", Napi::Number::New(env, static_cast<int>(ata::error_code::invalid_json)));
1047
- err.Set("path", Napi::String::New(env, ""));
1048
- err.Set("message", Napi::String::New(env, "Invalid JSON"));
1049
- errors[0u] = err;
1050
- obj.Set("errors", errors);
1051
- return obj;
1052
- }
1053
-
1054
- // Validate
1055
- auto valResult = ata::validate(schema_, std::string_view(data, len));
1056
-
1057
- // Convert DOM to JS object
1058
- Napi::Value jsValue = dom_to_napi(env, doc_result.value());
1059
-
1060
- // Build result
1061
- auto obj = Napi::Object::New(env);
1062
- obj.Set("valid", valResult.valid);
1063
- obj.Set("value", jsValue);
1064
- if (valResult.valid) {
1065
- obj.Set("errors", Napi::Array::New(env, 0));
1066
- } else {
1067
- Napi::Array errors = Napi::Array::New(env, valResult.errors.size());
1068
- for (size_t i = 0; i < valResult.errors.size(); ++i) {
1069
- Napi::Object err = Napi::Object::New(env);
1070
- err.Set("code",
1071
- Napi::Number::New(env, static_cast<int>(valResult.errors[i].code)));
1072
- err.Set("path", Napi::String::New(env, valResult.errors[i].path));
1073
- err.Set("message", Napi::String::New(env, valResult.errors[i].message));
1074
- errors[i] = err;
1075
- }
1076
- obj.Set("errors", errors);
1077
- }
1078
- return obj;
1079
- }
1080
-
1081
947
  private:
1082
948
  Napi::Value ValidateDirectImpl(Napi::Env env, Napi::Value value) {
1083
949
  compiled_schema_internal ctx;
@@ -1130,44 +996,6 @@ Napi::Value GetVersion(const Napi::CallbackInfo& info) {
1130
996
  return Napi::String::New(info.Env(), std::string(ata::version()));
1131
997
  }
1132
998
 
1133
- // Standalone JSON parser using simdjson — returns parsed JS object
1134
- Napi::Value ParseJSON(const Napi::CallbackInfo& info) {
1135
- Napi::Env env = info.Env();
1136
- if (info.Length() < 1) {
1137
- Napi::TypeError::New(env, "JSON string or Buffer expected")
1138
- .ThrowAsJavaScriptException();
1139
- return env.Undefined();
1140
- }
1141
-
1142
- const char* data;
1143
- size_t len;
1144
-
1145
- if (info[0].IsBuffer()) {
1146
- auto buf = info[0].As<Napi::Buffer<char>>();
1147
- data = buf.Data();
1148
- len = buf.Length();
1149
- } else if (info[0].IsString()) {
1150
- auto [d, l] = CompiledSchema::extract_string(env, info[0]);
1151
- data = d;
1152
- len = l;
1153
- } else {
1154
- Napi::TypeError::New(env, "JSON string or Buffer expected")
1155
- .ThrowAsJavaScriptException();
1156
- return env.Undefined();
1157
- }
1158
-
1159
- // Parse with simdjson using thread-local parser
1160
- simdjson::padded_string padded(data, len);
1161
- auto& parser = tl_dom_parser();
1162
- auto result = parser.parse(padded);
1163
- if (result.error()) {
1164
- Napi::Error::New(env, "Invalid JSON").ThrowAsJavaScriptException();
1165
- return env.Undefined();
1166
- }
1167
-
1168
- return dom_to_napi(env, result.value());
1169
- }
1170
-
1171
999
  // --- Thread Pool ---
1172
1000
  class ThreadPool {
1173
1001
  public:
@@ -1703,7 +1531,6 @@ Napi::Object Init(Napi::Env env, Napi::Object exports) {
1703
1531
  CompiledSchema::Init(env, exports);
1704
1532
  exports.Set("validate", Napi::Function::New(env, ValidateOneShot));
1705
1533
  exports.Set("version", Napi::Function::New(env, GetVersion));
1706
- exports.Set("parseJSON", Napi::Function::New(env, ParseJSON));
1707
1534
  exports.Set("fastRegister", Napi::Function::New(env, FastRegister));
1708
1535
  exports.Set("fastValidate", Napi::Function::New(env, FastValidateSlow));
1709
1536
 
package/index.d.ts CHANGED
@@ -11,6 +11,12 @@ export interface ValidationResult {
11
11
  errors: ValidationError[];
12
12
  }
13
13
 
14
+ export interface ValidateAndParseResult {
15
+ valid: boolean;
16
+ value: unknown;
17
+ errors: ValidationError[];
18
+ }
19
+
14
20
  export interface ValidatorOptions {
15
21
  coerceTypes?: boolean;
16
22
  removeAdditional?: boolean;
@@ -27,56 +33,93 @@ export interface StandardSchemaV1Props {
27
33
  | { issues: Array<{ message: string; path?: ReadonlyArray<{ key: PropertyKey }> }> };
28
34
  }
29
35
 
36
+ export interface StandaloneModule {
37
+ boolFn: (data: unknown) => boolean;
38
+ hybridFactory: (validResult: object, errFn: Function) => (data: unknown) => ValidationResult;
39
+ errFn: ((data: unknown, allErrors?: boolean) => ValidationResult) | null;
40
+ }
41
+
30
42
  export class Validator {
31
43
  constructor(schema: object | string, options?: ValidatorOptions);
32
44
 
33
- /** Add a schema to the validator */
45
+ /** Add a schema to the registry for cross-schema $ref resolution */
34
46
  addSchema(schema: object): void;
35
47
 
36
- /** Validate data returns result with errors. Applies defaults, coerceTypes, removeAdditional. */
48
+ /** Validate data, returns result with errors. Applies defaults, coerceTypes, removeAdditional. */
37
49
  validate(data: unknown): ValidationResult;
38
50
 
39
- /** Fast boolean check JS codegen, no error collection */
51
+ /** Fast boolean check via JS codegen or tier 0 interpreter. No error collection. */
40
52
  isValidObject(data: unknown): boolean;
41
53
 
42
- /** Validate JSON string simdjson fast path for large docs */
54
+ /** Validate a JSON string. Uses simdjson fast path for large documents. */
43
55
  validateJSON(jsonString: string): ValidationResult;
44
56
 
45
- /** Fast boolean check for JSON string */
57
+ /** Fast boolean check for a JSON string */
46
58
  isValidJSON(jsonString: string): boolean;
47
59
 
48
- /** Ultra-fast buffer validation via V8 CFunction zero NAPI overhead */
60
+ /** Parse JSON with simdjson + validate against schema. Returns parsed value and validation result. Requires native addon. */
61
+ validateAndParse(jsonString: string | Buffer): ValidateAndParseResult;
62
+
63
+ /** Ultra-fast buffer validation via native addon */
49
64
  isValid(input: Buffer | Uint8Array | string): boolean;
50
65
 
51
- /** Count valid documents in an NDJSON buffer */
66
+ /** Count valid documents in an NDJSON buffer. Requires native addon. */
52
67
  countValid(ndjsonBuf: Buffer | Uint8Array | string): number;
53
68
 
54
- /** Count valid documents from an array of buffers */
69
+ /** Validate an array of buffers, returns count of valid ones. Requires native addon. */
55
70
  batchIsValid(buffers: (Buffer | Uint8Array)[]): number;
56
71
 
57
- /** Zero-copy validation with pre-padded buffer */
72
+ /** Zero-copy validation with pre-padded buffer. Requires native addon. */
58
73
  isValidPrepadded(paddedBuffer: Buffer, jsonLength: number): boolean;
59
74
 
60
- /** Multi-core parallel NDJSON validation returns boolean per line */
75
+ /** Multi-core parallel NDJSON validation. Returns boolean per line. Requires native addon. */
61
76
  isValidParallel(ndjsonBuffer: Buffer): boolean[];
62
77
 
63
- /** Multi-core parallel NDJSON count returns number of valid items */
64
- countValid(ndjsonBuffer: Buffer): number;
65
-
66
- /** Single-thread NDJSON batch validation */
78
+ /** Single-thread NDJSON batch validation. Requires native addon. */
67
79
  isValidNDJSON(ndjsonBuffer: Buffer): boolean[];
68
80
 
69
- /** Standard Schema V1 interface compatible with Fastify, tRPC, TanStack, etc. */
81
+ /** Generate a standalone JS module string for zero-compile loading. Returns null if schema can't be standalone-compiled. */
82
+ toStandalone(): string | null;
83
+
84
+ /** Load a pre-compiled standalone module. Zero schema compilation at startup. */
85
+ static fromStandalone(mod: StandaloneModule, schema: object | string, options?: ValidatorOptions): Validator;
86
+
87
+ /** Bundle multiple schemas into a single JS module string. Load with Validator.loadBundle(). */
88
+ static bundle(schemas: object[], options?: ValidatorOptions): string;
89
+
90
+ /** Bundle multiple schemas into a self-contained JS module. No ata-validator import needed at runtime. */
91
+ static bundleStandalone(schemas: object[], options?: ValidatorOptions): string;
92
+
93
+ /** Bundle multiple schemas with deduplicated shared templates. Smaller output than bundle(). */
94
+ static bundleCompact(schemas: object[], options?: ValidatorOptions): string;
95
+
96
+ /** Load a bundle created by Validator.bundle(). Returns array of Validator instances. */
97
+ static loadBundle(mods: object[], schemas: object[], options?: ValidatorOptions): Validator[];
98
+
99
+ /** Standard Schema V1 interface, compatible with Fastify, tRPC, TanStack, etc. */
70
100
  readonly "~standard": StandardSchemaV1Props;
71
101
  }
72
102
 
103
+ /** One-shot validate: creates a Validator, validates data, returns result. */
73
104
  export function validate(
74
105
  schema: object | string,
75
106
  data: unknown
76
107
  ): ValidationResult;
77
108
 
109
+ /** Fast compile: returns a validate function directly. WeakMap cached, second call with same schema is near-zero cost. */
110
+ export function compile(
111
+ schema: object | string,
112
+ options?: ValidatorOptions
113
+ ): (data: unknown) => ValidationResult;
114
+
115
+ /** Parse JSON using simdjson (native addon) or JSON.parse (fallback). */
116
+ export function parseJSON(jsonString: string | Buffer): unknown;
117
+
118
+ /** Returns ata-validator version string. */
78
119
  export function version(): string;
79
120
 
121
+ /** Create a simdjson-compatible padded buffer from a JSON string. */
80
122
  export function createPaddedBuffer(jsonStr: string): { buffer: Buffer; length: number };
81
123
 
124
+ /** Required padding size for simdjson buffers. */
82
125
  export const SIMDJSON_PADDING: number;
@@ -4,6 +4,10 @@
4
4
  // Closure-based validator — no new Function() or eval().
5
5
  // Returns null if the schema is too complex for JS compilation.
6
6
 
7
+ // Count Unicode code points, not UTF-16 code units (surrogate pairs).
8
+ // JSON Schema: minLength/maxLength count characters per RFC 8259.
9
+ function _cpLen(s) { let n = 0; for (const _ of s) n++; return n; }
10
+
7
11
  // AJV-compatible error message templates (compile-time, not runtime)
8
12
  const AJV_MESSAGES = {
9
13
  type: (p) => `must be ${p.type}`,
@@ -271,11 +275,11 @@ function compileToJS(schema, defs, schemaMap) {
271
275
  // string
272
276
  if (schema.minLength !== undefined) {
273
277
  const min = schema.minLength
274
- checks.push((d) => typeof d !== 'string' || d.length >= min)
278
+ checks.push((d) => typeof d !== 'string' || _cpLen(d) >= min)
275
279
  }
276
280
  if (schema.maxLength !== undefined) {
277
281
  const max = schema.maxLength
278
- checks.push((d) => typeof d !== 'string' || d.length <= max)
282
+ checks.push((d) => typeof d !== 'string' || _cpLen(d) <= max)
279
283
  }
280
284
  if (schema.pattern) {
281
285
  try {
@@ -775,7 +779,7 @@ function compileToJSCodegen(schema, schemaMap) {
775
779
  }
776
780
  }
777
781
 
778
- const ctx = { varCounter: 0, helpers: [], helperCode: [], closureVars: [], closureVals: [], rootDefs, refStack: new Set(), schemaMap: schemaMap || null, anchors, rootSchema: schema }
782
+ const ctx = { varCounter: 0, helpers: [], helperCode: [], closureVars: ['_cpLen'], closureVals: [_cpLen], rootDefs, refStack: new Set(), schemaMap: schemaMap || null, anchors, rootSchema: schema }
779
783
  const lines = []
780
784
  genCode(schema, 'd', lines, ctx)
781
785
 
@@ -914,8 +918,8 @@ function tryGenCombined(schema, access, ctx) {
914
918
 
915
919
  if (t === 'string') {
916
920
  const conds = [`typeof _v!=='string'`]
917
- if (schema.minLength !== undefined) conds.push(`_v.length<${schema.minLength}`)
918
- if (schema.maxLength !== undefined) conds.push(`_v.length>${schema.maxLength}`)
921
+ if (schema.minLength !== undefined) conds.push(`_cpLen(_v)<${schema.minLength}`)
922
+ if (schema.maxLength !== undefined) conds.push(`_cpLen(_v)>${schema.maxLength}`)
919
923
  if (conds.length < 2 && !schema.pattern && !schema.format) return null // not worth combining
920
924
  // pattern and format need separate statements, fall back if present
921
925
  if (schema.pattern || schema.format) return null
@@ -956,6 +960,9 @@ function tryGenCombined(schema, access, ctx) {
956
960
  // 'string' / 'number' / 'integer' = we know the primitive type
957
961
  function genCode(schema, v, lines, ctx, knownType) {
958
962
  if (typeof schema !== 'object' || schema === null) return
963
+ if (!ctx.regExpMap) {
964
+ ctx.regExpMap = new Map();
965
+ }
959
966
 
960
967
  // $ref — guard against circular references
961
968
  // In 2020-12 with unevaluated*, $ref can coexist with siblings — don't early return
@@ -1159,8 +1166,8 @@ function genCode(schema, v, lines, ctx, knownType) {
1159
1166
  if (schema.multipleOf !== undefined) lines.push(isNum ? `if(${v}%${schema.multipleOf}!==0)return false` : `if(typeof ${v}==='number'&&${v}%${schema.multipleOf}!==0)return false`)
1160
1167
 
1161
1168
  // string — skip type guard if known string
1162
- if (schema.minLength !== undefined) lines.push(isStr ? `if(${v}.length<${schema.minLength})return false` : `if(typeof ${v}==='string'&&${v}.length<${schema.minLength})return false`)
1163
- if (schema.maxLength !== undefined) lines.push(isStr ? `if(${v}.length>${schema.maxLength})return false` : `if(typeof ${v}==='string'&&${v}.length>${schema.maxLength})return false`)
1169
+ if (schema.minLength !== undefined) lines.push(isStr ? `if(_cpLen(${v})<${schema.minLength})return false` : `if(typeof ${v}==='string'&&_cpLen(${v})<${schema.minLength})return false`)
1170
+ if (schema.maxLength !== undefined) lines.push(isStr ? `if(_cpLen(${v})>${schema.maxLength})return false` : `if(typeof ${v}==='string'&&_cpLen(${v})>${schema.maxLength})return false`)
1164
1171
 
1165
1172
  // array size — skip guard if known array
1166
1173
  if (schema.minItems !== undefined) lines.push(isArr ? `if(${v}.length<${schema.minItems})return false` : `if(Array.isArray(${v})&&${v}.length<${schema.minItems})return false`)
@@ -1176,8 +1183,13 @@ function genCode(schema, v, lines, ctx, knownType) {
1176
1183
  if (inlineCheck) {
1177
1184
  lines.push(isStr ? `if(!(${inlineCheck}))return false` : `if(typeof ${v}==='string'&&!(${inlineCheck}))return false`)
1178
1185
  } else {
1179
- const ri = ctx.varCounter++
1180
- ctx.helperCode.push(`const _re${ri}=new RegExp(${JSON.stringify(schema.pattern)})`)
1186
+ const pattern = JSON.stringify(schema.pattern);
1187
+ if (!ctx.regExpMap.has(pattern)) {
1188
+ const ri = ctx.varCounter++
1189
+ ctx.regExpMap.set(pattern, ri)
1190
+ ctx.helperCode.push(`const _re${ri}=new RegExp(${pattern})`);
1191
+ }
1192
+ const ri = ctx.regExpMap.get(pattern);
1181
1193
  lines.push(isStr ? `if(!_re${ri}.test(${v}))return false` : `if(typeof ${v}==='string'&&!_re${ri}.test(${v}))return false`)
1182
1194
  }
1183
1195
  }
@@ -2264,6 +2276,7 @@ function compileToJSCodegenWithErrors(schema, schemaMap) {
2264
2276
  }
2265
2277
 
2266
2278
  const ctx = { varCounter: 0, helperCode: [], rootDefs: eRootDefs, refStack: new Set(), schemaMap: schemaMap || null, anchors: eAnchors, rootSchema: schema }
2279
+ ctx.helperCode.push('const _cpLen=s=>{let n=0;for(const _ of s)n++;return n}')
2267
2280
  const lines = []
2268
2281
  genCodeE(schema, 'd', '', lines, ctx, '#')
2269
2282
  if (lines.length === 0) return (d) => ({ valid: true, errors: [] })
@@ -2296,7 +2309,9 @@ function compileToJSCodegenWithErrors(schema, schemaMap) {
2296
2309
  function genCodeE(schema, v, pathExpr, lines, ctx, schemaPrefix) {
2297
2310
  if (!schemaPrefix) schemaPrefix = '#'
2298
2311
  if (typeof schema !== 'object' || schema === null) return
2299
-
2312
+ if (!ctx.regExpMap) {
2313
+ ctx.regExpMap = new Map();
2314
+ }
2300
2315
  // $ref — resolve local and cross-schema refs
2301
2316
  if (schema.$ref) {
2302
2317
  // Self-reference "#" — no-op (permissive) to avoid infinite recursion
@@ -2440,11 +2455,11 @@ function genCodeE(schema, v, pathExpr, lines, ctx, schemaPrefix) {
2440
2455
 
2441
2456
  // string
2442
2457
  if (schema.minLength !== undefined) {
2443
- const c = isStr ? `${v}.length<${schema.minLength}` : `typeof ${v}==='string'&&${v}.length<${schema.minLength}`
2458
+ const c = isStr ? `_cpLen(${v})<${schema.minLength}` : `typeof ${v}==='string'&&_cpLen(${v})<${schema.minLength}`
2444
2459
  lines.push(`if(${c}){${fail('minLength', 'minLength', `{limit:${schema.minLength}}`, `'must NOT have fewer than ${schema.minLength} characters'`)}}`)
2445
2460
  }
2446
2461
  if (schema.maxLength !== undefined) {
2447
- const c = isStr ? `${v}.length>${schema.maxLength}` : `typeof ${v}==='string'&&${v}.length>${schema.maxLength}`
2462
+ const c = isStr ? `_cpLen(${v})>${schema.maxLength}` : `typeof ${v}==='string'&&_cpLen(${v})>${schema.maxLength}`
2448
2463
  lines.push(`if(${c}){${fail('maxLength', 'maxLength', `{limit:${schema.maxLength}}`, `'must NOT have more than ${schema.maxLength} characters'`)}}`)
2449
2464
  }
2450
2465
  if (schema.pattern) {
@@ -2453,8 +2468,13 @@ function genCodeE(schema, v, pathExpr, lines, ctx, schemaPrefix) {
2453
2468
  const c = isStr ? `!(${inlineCheck})` : `typeof ${v}==='string'&&!(${inlineCheck})`
2454
2469
  lines.push(`if(${c}){${fail('pattern', 'pattern', `{pattern:${JSON.stringify(schema.pattern)}}`, `'must match pattern "${schema.pattern}"'`)}}`)
2455
2470
  } else {
2456
- const ri = ctx.varCounter++
2457
- ctx.helperCode.push(`const _re${ri}=new RegExp(${JSON.stringify(schema.pattern)})`)
2471
+ const pattern = JSON.stringify(schema.pattern);
2472
+ if (!ctx.regExpMap.has(pattern)) {
2473
+ const ri = ctx.varCounter++
2474
+ ctx.regExpMap.set(pattern, ri)
2475
+ ctx.helperCode.push(`const _re${ri}=new RegExp(${pattern})`)
2476
+ }
2477
+ const ri = ctx.regExpMap.get(pattern);
2458
2478
  const c = isStr ? `!_re${ri}.test(${v})` : `typeof ${v}==='string'&&!_re${ri}.test(${v})`
2459
2479
  lines.push(`if(${c}){${fail('pattern', 'pattern', `{pattern:${JSON.stringify(schema.pattern)}}`, `'must match pattern "${schema.pattern}"'`)}}`)
2460
2480
  }
@@ -2539,8 +2559,13 @@ function genCodeE(schema, v, pathExpr, lines, ctx, schemaPrefix) {
2539
2559
  // patternProperties
2540
2560
  if (schema.patternProperties) {
2541
2561
  for (const [pat, sub] of Object.entries(schema.patternProperties)) {
2542
- const ri = ctx.varCounter++
2543
- ctx.helperCode.push(`const _re${ri}=new RegExp(${JSON.stringify(pat)})`)
2562
+ const pattern = JSON.stringify(pat);
2563
+ if (!ctx.regExpMap.has(pattern)) {
2564
+ const ri = ctx.varCounter++
2565
+ ctx.regExpMap.set(pattern, ri)
2566
+ ctx.helperCode.push(`const _re${ri}=new RegExp(${pattern})`);
2567
+ }
2568
+ const ri = ctx.regExpMap.get(pattern);
2544
2569
  const ki = ctx.varCounter++
2545
2570
  lines.push(`if(typeof ${v}==='object'&&${v}!==null&&!Array.isArray(${v})){for(const _k${ki} in ${v}){if(_re${ri}.test(_k${ki})){`)
2546
2571
  const p = pathExpr ? `${pathExpr}+'/'+_k${ki}` : `'/'+_k${ki}`
@@ -2570,8 +2595,13 @@ function genCodeE(schema, v, pathExpr, lines, ctx, schemaPrefix) {
2570
2595
  lines.push(`if(_k${ki}.length>${pn.maxLength}){${fail('maxLength', 'propertyNames/maxLength', `{limit:${pn.maxLength}}`, `'must NOT have more than ${pn.maxLength} characters'`)}}`)
2571
2596
  }
2572
2597
  if (pn.pattern) {
2573
- const ri = ctx.varCounter++
2574
- ctx.helperCode.push(`const _re${ri}=new RegExp(${JSON.stringify(pn.pattern)})`)
2598
+ const pattern = JSON.stringify(pn.pattern);
2599
+ if (!ctx.regExpMap.has(pattern)) {
2600
+ const ri = ctx.varCounter++
2601
+ ctx.regExpMap.set(pattern, ri)
2602
+ ctx.helperCode.push(`const _re${ri}=new RegExp(${pattern})`);
2603
+ }
2604
+ const ri = ctx.regExpMap.get(pattern);
2575
2605
  lines.push(`if(!_re${ri}.test(_k${ki})){${fail('pattern', 'propertyNames/pattern', `{pattern:${JSON.stringify(pn.pattern)}}`, `'must match pattern "${pn.pattern}"'`)}}`)
2576
2606
  }
2577
2607
  if (pn.const !== undefined) {
@@ -2746,7 +2776,7 @@ function compileToJSCombined(schema, VALID_RESULT, schemaMap) {
2746
2776
  }
2747
2777
  }
2748
2778
 
2749
- const ctx = { varCounter: 0, helperCode: [], closureVars: [], closureVals: [],
2779
+ const ctx = { varCounter: 0, helperCode: [], closureVars: ['_cpLen'], closureVals: [_cpLen],
2750
2780
  rootDefs: cRootDefs, refStack: new Set(), schemaMap: schemaMap || null, anchors: cAnchors, rootSchema: schema }
2751
2781
  const lines = []
2752
2782
  genCodeC(schema, 'd', '', lines, ctx, '#')
@@ -2963,8 +2993,8 @@ function genCodeC(schema, v, pathExpr, lines, ctx, schemaPrefix) {
2963
2993
  }
2964
2994
 
2965
2995
  // string — skip guard if known
2966
- if (schema.minLength !== undefined) { const c = isStr ? `${v}.length<${schema.minLength}` : `typeof ${v}==='string'&&${v}.length<${schema.minLength}`; lines.push(`if(${c}){${fail('minLength', 'minLength', `{limit:${schema.minLength}}`, `'must NOT have fewer than ${schema.minLength} characters'`)}}`) }
2967
- if (schema.maxLength !== undefined) { const c = isStr ? `${v}.length>${schema.maxLength}` : `typeof ${v}==='string'&&${v}.length>${schema.maxLength}`; lines.push(`if(${c}){${fail('maxLength', 'maxLength', `{limit:${schema.maxLength}}`, `'must NOT have more than ${schema.maxLength} characters'`)}}`) }
2996
+ if (schema.minLength !== undefined) { const c = isStr ? `_cpLen(${v})<${schema.minLength}` : `typeof ${v}==='string'&&_cpLen(${v})<${schema.minLength}`; lines.push(`if(${c}){${fail('minLength', 'minLength', `{limit:${schema.minLength}}`, `'must NOT have fewer than ${schema.minLength} characters'`)}}`) }
2997
+ if (schema.maxLength !== undefined) { const c = isStr ? `_cpLen(${v})>${schema.maxLength}` : `typeof ${v}==='string'&&_cpLen(${v})>${schema.maxLength}`; lines.push(`if(${c}){${fail('maxLength', 'maxLength', `{limit:${schema.maxLength}}`, `'must NOT have more than ${schema.maxLength} characters'`)}}`) }
2968
2998
  if (schema.pattern) {
2969
2999
  const inlineCheck = compilePatternInline(schema.pattern, v)
2970
3000
  if (inlineCheck) {
package/lib/tier0.js CHANGED
@@ -17,6 +17,14 @@ const T_NUMBER = TYPE_MASK.number;
17
17
  const T_INTEGER = TYPE_MASK.integer;
18
18
  const T_BOOLEAN = TYPE_MASK.boolean;
19
19
 
20
+ // Count Unicode code points, not UTF-16 code units.
21
+ // JSON Schema spec: minLength/maxLength count characters (RFC 8259 = code points).
22
+ function codePointLength(s) {
23
+ let n = 0;
24
+ for (const _ of s) n++;
25
+ return n;
26
+ }
27
+
20
28
  // Numeric constraint flags, packed into constraint.numFlags.
21
29
  // Using bit flags means the validator does a cheap bitwise-and instead of
22
30
  // five Number.isNaN() calls per numeric property when only one bound is set.
@@ -94,8 +102,8 @@ function checkPrimitive(c, v) {
94
102
  if (typeof v !== 'string') return false;
95
103
  const minLen = c.minLen;
96
104
  const maxLen = c.maxLen;
97
- if (minLen >= 0 && v.length < minLen) return false;
98
- if (maxLen >= 0 && v.length > maxLen) return false;
105
+ if (minLen >= 0 && codePointLength(v) < minLen) return false;
106
+ if (maxLen >= 0 && codePointLength(v) > maxLen) return false;
99
107
  } else if (m === T_INTEGER) {
100
108
  if (typeof v !== 'number' || !Number.isInteger(v)) return false;
101
109
  const f = c.numFlags;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ata-validator",
3
- "version": "0.10.0",
3
+ "version": "0.10.1",
4
4
  "description": "Ultra-fast JSON Schema validator. 4.7x faster validation, 1,800x faster compilation. Works without native addon. Cross-schema $ref, Draft 2020-12 + Draft 7, V8-optimized JS codegen, simdjson, RE2, multi-core. Standard Schema V1 compatible.",
5
5
  "main": "index.js",
6
6
  "module": "index.mjs",