avrotize 3.0.2__py3-none-any.whl → 3.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- avrotize/_version.py +2 -2
- avrotize/avrotize.py +4 -0
- avrotize/avrotots/class_core.ts.jinja +2 -2
- avrotize/avrotots.py +62 -7
- avrotize/avrovalidator.py +518 -0
- avrotize/commands.json +465 -0
- avrotize/dependencies/cs/net90/dependencies.csproj +4 -4
- avrotize/dependencies/java/jdk21/pom.xml +6 -6
- avrotize/dependencies/typescript/node22/package.json +1 -1
- avrotize/jsontoschema.py +151 -0
- avrotize/schema_inference.py +825 -0
- avrotize/sqltoavro.py +1159 -0
- avrotize/validate.py +242 -0
- avrotize/xmltoschema.py +122 -0
- {avrotize-3.0.2.dist-info → avrotize-3.1.0.dist-info}/METADATA +220 -2
- {avrotize-3.0.2.dist-info → avrotize-3.1.0.dist-info}/RECORD +19 -13
- {avrotize-3.0.2.dist-info → avrotize-3.1.0.dist-info}/WHEEL +0 -0
- {avrotize-3.0.2.dist-info → avrotize-3.1.0.dist-info}/entry_points.txt +0 -0
- {avrotize-3.0.2.dist-info → avrotize-3.1.0.dist-info}/licenses/LICENSE +0 -0
avrotize/_version.py
CHANGED
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '3.0
|
|
32
|
-
__version_tuple__ = version_tuple = (3,
|
|
31
|
+
__version__ = version = '3.1.0'
|
|
32
|
+
__version_tuple__ = version_tuple = (3, 1, 0)
|
|
33
33
|
|
|
34
34
|
__commit_id__ = commit_id = None
|
avrotize/avrotize.py
CHANGED
|
@@ -38,6 +38,10 @@ def create_subparsers(subparsers, commands):
|
|
|
38
38
|
kwargs['choices'] = arg['choices']
|
|
39
39
|
if 'default' in arg:
|
|
40
40
|
kwargs['default'] = arg['default']
|
|
41
|
+
# Handle dest for optional arguments only (positional args can't have dest)
|
|
42
|
+
arg_is_positional = not arg['name'].startswith('-')
|
|
43
|
+
if 'dest' in arg and not arg_is_positional:
|
|
44
|
+
kwargs['dest'] = arg['dest']
|
|
41
45
|
if arg['type'] == 'bool':
|
|
42
46
|
kwargs['action'] = 'store_true'
|
|
43
47
|
del kwargs['type']
|
|
@@ -8,7 +8,7 @@ import { jsonObject, jsonMember, TypedJSON } from 'typedjson';
|
|
|
8
8
|
{%- endif %}
|
|
9
9
|
{%- endif %}
|
|
10
10
|
{%- if avro_annotation %}
|
|
11
|
-
import avro from 'avro-js';
|
|
11
|
+
import avro, { type Type } from 'avro-js';
|
|
12
12
|
{%- endif %}
|
|
13
13
|
{%- for import_type, import_path in imports.items() %}
|
|
14
14
|
import { {{ import_type }} } from '{{ import_path }}';
|
|
@@ -22,7 +22,7 @@ import pako from 'pako';
|
|
|
22
22
|
{%- endif %}
|
|
23
23
|
export class {{ class_name }} {
|
|
24
24
|
{%- if avro_annotation %}
|
|
25
|
-
public static AvroType:
|
|
25
|
+
public static AvroType: Type = avro.parse({{ avro_schema_json }});
|
|
26
26
|
{%- endif %}
|
|
27
27
|
|
|
28
28
|
{%- for field in fields %}
|
avrotize/avrotots.py
CHANGED
|
@@ -500,10 +500,11 @@ class AvroToTypeScript:
|
|
|
500
500
|
"""Generate TypeScript type declaration file for avro-js module."""
|
|
501
501
|
avro_js_types = '''declare module 'avro-js' {
|
|
502
502
|
/**
|
|
503
|
-
* Avro Type
|
|
503
|
+
* Avro Type interface.
|
|
504
|
+
* Represents the structure of Type instances returned by avro.parse().
|
|
504
505
|
* Provides methods for encoding, decoding, and validating Avro data.
|
|
505
506
|
*/
|
|
506
|
-
export
|
|
507
|
+
export interface Type {
|
|
507
508
|
/**
|
|
508
509
|
* Encode a value to a Buffer.
|
|
509
510
|
* @param obj - Value to encode
|
|
@@ -575,12 +576,66 @@ class AvroToTypeScript:
|
|
|
575
576
|
}
|
|
576
577
|
|
|
577
578
|
/**
|
|
578
|
-
*
|
|
579
|
-
*
|
|
580
|
-
* @param options - Parse options
|
|
581
|
-
* @returns Type instance
|
|
579
|
+
* avro-js default export interface.
|
|
580
|
+
* This module is CommonJS, so in ESM context it only has a default export.
|
|
582
581
|
*/
|
|
583
|
-
export
|
|
582
|
+
export interface Avro {
|
|
583
|
+
/**
|
|
584
|
+
* Type class constructor.
|
|
585
|
+
*/
|
|
586
|
+
Type: any;
|
|
587
|
+
|
|
588
|
+
/**
|
|
589
|
+
* Parse an Avro schema and return a Type instance.
|
|
590
|
+
* @param schema - Schema as string or object
|
|
591
|
+
* @param options - Parse options
|
|
592
|
+
* @returns Type instance
|
|
593
|
+
*/
|
|
594
|
+
parse(schema: string | any, options?: any): Type;
|
|
595
|
+
|
|
596
|
+
/**
|
|
597
|
+
* Protocol class constructor.
|
|
598
|
+
*/
|
|
599
|
+
Protocol: any;
|
|
600
|
+
|
|
601
|
+
/**
|
|
602
|
+
* Create a file decoder.
|
|
603
|
+
*/
|
|
604
|
+
createFileDecoder(path: string, options?: any): any;
|
|
605
|
+
|
|
606
|
+
/**
|
|
607
|
+
* Create a file encoder.
|
|
608
|
+
*/
|
|
609
|
+
createFileEncoder(path: string, schema: any, options?: any): any;
|
|
610
|
+
|
|
611
|
+
/**
|
|
612
|
+
* Extract file header.
|
|
613
|
+
*/
|
|
614
|
+
extractFileHeader(buffer: Buffer): any;
|
|
615
|
+
|
|
616
|
+
/**
|
|
617
|
+
* Streams utilities.
|
|
618
|
+
*/
|
|
619
|
+
streams: any;
|
|
620
|
+
|
|
621
|
+
/**
|
|
622
|
+
* Built-in types.
|
|
623
|
+
*/
|
|
624
|
+
types: any;
|
|
625
|
+
|
|
626
|
+
/**
|
|
627
|
+
* Validator (deprecated).
|
|
628
|
+
*/
|
|
629
|
+
Validator: any;
|
|
630
|
+
|
|
631
|
+
/**
|
|
632
|
+
* ProtocolValidator (deprecated).
|
|
633
|
+
*/
|
|
634
|
+
ProtocolValidator: any;
|
|
635
|
+
}
|
|
636
|
+
|
|
637
|
+
const avro: Avro;
|
|
638
|
+
export default avro;
|
|
584
639
|
}
|
|
585
640
|
'''
|
|
586
641
|
|
|
@@ -0,0 +1,518 @@
|
|
|
1
|
+
"""Validates JSON instances against Avro schemas.
|
|
2
|
+
|
|
3
|
+
This module implements JSON validation against Avro schemas according to
|
|
4
|
+
the Avrotize Schema Specification (avrotize-schema.md). It validates:
|
|
5
|
+
- Primitive types: null, boolean, int, long, float, double, bytes, string
|
|
6
|
+
- Logical types: decimal, uuid, date, time, timestamp, duration
|
|
7
|
+
- Complex types: record, enum, array, map, fixed
|
|
8
|
+
- Type unions
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import base64
|
|
12
|
+
import re
|
|
13
|
+
from typing import Any, Dict, List, Tuple, Union
|
|
14
|
+
|
|
15
|
+
# Type alias for Avro schema
|
|
16
|
+
AvroSchema = Union[str, Dict[str, Any], List[Any]]
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class AvroValidationError(Exception):
|
|
20
|
+
"""Exception raised when JSON instance doesn't match Avro schema."""
|
|
21
|
+
|
|
22
|
+
def __init__(self, message: str, path: str = "#"):
|
|
23
|
+
self.message = message
|
|
24
|
+
self.path = path
|
|
25
|
+
super().__init__(f"{message} at {path}")
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class AvroValidator:
|
|
29
|
+
"""Validates JSON instances against Avro schemas."""
|
|
30
|
+
|
|
31
|
+
# RFC 3339 patterns for logical type validation
|
|
32
|
+
UUID_PATTERN = re.compile(
|
|
33
|
+
r'^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$'
|
|
34
|
+
)
|
|
35
|
+
DATE_PATTERN = re.compile(r'^\d{4}-\d{2}-\d{2}$')
|
|
36
|
+
TIME_PATTERN = re.compile(r'^\d{2}:\d{2}:\d{2}(\.\d+)?([+-]\d{2}:\d{2}|Z)?$')
|
|
37
|
+
DATETIME_PATTERN = re.compile(
|
|
38
|
+
r'^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?([+-]\d{2}:\d{2}|Z)?$'
|
|
39
|
+
)
|
|
40
|
+
DURATION_PATTERN = re.compile(
|
|
41
|
+
r'^P(\d+Y)?(\d+M)?(\d+D)?(T(\d+H)?(\d+M)?(\d+(\.\d+)?S)?)?$'
|
|
42
|
+
)
|
|
43
|
+
DECIMAL_PATTERN = re.compile(r'^[+-]?\d+(\.\d+)?$')
|
|
44
|
+
|
|
45
|
+
# Int32 and Int64 bounds
|
|
46
|
+
INT32_MIN = -(2**31)
|
|
47
|
+
INT32_MAX = 2**31 - 1
|
|
48
|
+
INT64_MIN = -(2**63)
|
|
49
|
+
INT64_MAX = 2**63 - 1
|
|
50
|
+
|
|
51
|
+
def __init__(self, schema: AvroSchema):
|
|
52
|
+
"""Initialize the validator with an Avro schema.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
schema: The Avro schema to validate against
|
|
56
|
+
"""
|
|
57
|
+
self.schema = schema
|
|
58
|
+
self.named_types: Dict[str, Dict[str, Any]] = {}
|
|
59
|
+
self._collect_named_types(schema, '')
|
|
60
|
+
|
|
61
|
+
def _collect_named_types(self, schema: AvroSchema, current_namespace: str) -> None:
|
|
62
|
+
"""Collects all named types from the schema for reference resolution.
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
schema: The schema to scan
|
|
66
|
+
current_namespace: The current namespace context
|
|
67
|
+
"""
|
|
68
|
+
if isinstance(schema, dict):
|
|
69
|
+
schema_type = schema.get('type')
|
|
70
|
+
if schema_type in ('record', 'enum', 'fixed'):
|
|
71
|
+
namespace = schema.get('namespace', current_namespace)
|
|
72
|
+
name = schema.get('name', '')
|
|
73
|
+
if namespace:
|
|
74
|
+
fullname = f"{namespace}.{name}"
|
|
75
|
+
else:
|
|
76
|
+
fullname = name
|
|
77
|
+
self.named_types[fullname] = schema
|
|
78
|
+
self.named_types[name] = schema # Also store short name
|
|
79
|
+
|
|
80
|
+
# Recurse into record fields
|
|
81
|
+
if schema_type == 'record':
|
|
82
|
+
for field in schema.get('fields', []):
|
|
83
|
+
self._collect_named_types(field.get('type', 'null'), namespace)
|
|
84
|
+
|
|
85
|
+
elif schema_type == 'array':
|
|
86
|
+
self._collect_named_types(schema.get('items', 'null'), current_namespace)
|
|
87
|
+
elif schema_type == 'map':
|
|
88
|
+
self._collect_named_types(schema.get('values', 'null'), current_namespace)
|
|
89
|
+
|
|
90
|
+
elif isinstance(schema, list):
|
|
91
|
+
# Type union
|
|
92
|
+
for item in schema:
|
|
93
|
+
self._collect_named_types(item, current_namespace)
|
|
94
|
+
|
|
95
|
+
def validate(self, instance: Any) -> None:
|
|
96
|
+
"""Validates a JSON instance against the schema.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
instance: The JSON value to validate
|
|
100
|
+
|
|
101
|
+
Raises:
|
|
102
|
+
AvroValidationError: If the instance doesn't match the schema
|
|
103
|
+
"""
|
|
104
|
+
self._validate(instance, self.schema, "#")
|
|
105
|
+
|
|
106
|
+
def _validate(self, instance: Any, schema: AvroSchema, path: str) -> None:
|
|
107
|
+
"""Internal validation method.
|
|
108
|
+
|
|
109
|
+
Args:
|
|
110
|
+
instance: The JSON value to validate
|
|
111
|
+
schema: The schema to validate against
|
|
112
|
+
path: JSON pointer path for error messages
|
|
113
|
+
|
|
114
|
+
Raises:
|
|
115
|
+
AvroValidationError: If validation fails
|
|
116
|
+
"""
|
|
117
|
+
if isinstance(schema, str):
|
|
118
|
+
self._validate_primitive_or_reference(instance, schema, path)
|
|
119
|
+
elif isinstance(schema, dict):
|
|
120
|
+
self._validate_complex(instance, schema, path)
|
|
121
|
+
elif isinstance(schema, list):
|
|
122
|
+
self._validate_union(instance, schema, path)
|
|
123
|
+
else:
|
|
124
|
+
raise AvroValidationError(f"Invalid schema type: {type(schema)}", path)
|
|
125
|
+
|
|
126
|
+
def _validate_primitive_or_reference(
|
|
127
|
+
self, instance: Any, schema: str, path: str
|
|
128
|
+
) -> None:
|
|
129
|
+
"""Validates against a primitive type or named type reference.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
instance: The JSON value to validate
|
|
133
|
+
schema: The primitive type name or named type reference
|
|
134
|
+
path: JSON pointer path for error messages
|
|
135
|
+
"""
|
|
136
|
+
# Check if it's a named type reference
|
|
137
|
+
if schema in self.named_types:
|
|
138
|
+
self._validate_complex(instance, self.named_types[schema], path)
|
|
139
|
+
return
|
|
140
|
+
|
|
141
|
+
# Primitive type validation
|
|
142
|
+
if schema == 'null':
|
|
143
|
+
if instance is not None:
|
|
144
|
+
raise AvroValidationError(f"Expected null, got {type(instance).__name__}", path)
|
|
145
|
+
|
|
146
|
+
elif schema == 'boolean':
|
|
147
|
+
if not isinstance(instance, bool):
|
|
148
|
+
raise AvroValidationError(f"Expected boolean, got {type(instance).__name__}", path)
|
|
149
|
+
|
|
150
|
+
elif schema == 'int':
|
|
151
|
+
if not isinstance(instance, int) or isinstance(instance, bool):
|
|
152
|
+
raise AvroValidationError(f"Expected int, got {type(instance).__name__}", path)
|
|
153
|
+
if not (self.INT32_MIN <= instance <= self.INT32_MAX):
|
|
154
|
+
raise AvroValidationError(
|
|
155
|
+
f"Integer {instance} out of int32 range [{self.INT32_MIN}, {self.INT32_MAX}]",
|
|
156
|
+
path
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
elif schema == 'long':
|
|
160
|
+
if not isinstance(instance, int) or isinstance(instance, bool):
|
|
161
|
+
raise AvroValidationError(f"Expected long, got {type(instance).__name__}", path)
|
|
162
|
+
if not (self.INT64_MIN <= instance <= self.INT64_MAX):
|
|
163
|
+
raise AvroValidationError(
|
|
164
|
+
f"Integer {instance} out of int64 range [{self.INT64_MIN}, {self.INT64_MAX}]",
|
|
165
|
+
path
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
elif schema == 'float':
|
|
169
|
+
if not isinstance(instance, (int, float)) or isinstance(instance, bool):
|
|
170
|
+
raise AvroValidationError(f"Expected float, got {type(instance).__name__}", path)
|
|
171
|
+
|
|
172
|
+
elif schema == 'double':
|
|
173
|
+
if not isinstance(instance, (int, float)) or isinstance(instance, bool):
|
|
174
|
+
raise AvroValidationError(f"Expected double, got {type(instance).__name__}", path)
|
|
175
|
+
|
|
176
|
+
elif schema == 'bytes':
|
|
177
|
+
# In JSON, bytes are represented as strings with unicode escapes
|
|
178
|
+
if not isinstance(instance, str):
|
|
179
|
+
raise AvroValidationError(f"Expected bytes (string), got {type(instance).__name__}", path)
|
|
180
|
+
|
|
181
|
+
elif schema == 'string':
|
|
182
|
+
if not isinstance(instance, str):
|
|
183
|
+
raise AvroValidationError(f"Expected string, got {type(instance).__name__}", path)
|
|
184
|
+
|
|
185
|
+
else:
|
|
186
|
+
raise AvroValidationError(f"Unknown primitive type: {schema}", path)
|
|
187
|
+
|
|
188
|
+
def _validate_complex(self, instance: Any, schema: Dict[str, Any], path: str) -> None:
|
|
189
|
+
"""Validates against a complex type schema.
|
|
190
|
+
|
|
191
|
+
Args:
|
|
192
|
+
instance: The JSON value to validate
|
|
193
|
+
schema: The complex type schema
|
|
194
|
+
path: JSON pointer path for error messages
|
|
195
|
+
"""
|
|
196
|
+
schema_type = schema.get('type')
|
|
197
|
+
|
|
198
|
+
if schema_type == 'record':
|
|
199
|
+
self._validate_record(instance, schema, path)
|
|
200
|
+
elif schema_type == 'enum':
|
|
201
|
+
self._validate_enum(instance, schema, path)
|
|
202
|
+
elif schema_type == 'array':
|
|
203
|
+
self._validate_array(instance, schema, path)
|
|
204
|
+
elif schema_type == 'map':
|
|
205
|
+
self._validate_map(instance, schema, path)
|
|
206
|
+
elif schema_type == 'fixed':
|
|
207
|
+
self._validate_fixed(instance, schema, path)
|
|
208
|
+
elif schema_type in ('null', 'boolean', 'int', 'long', 'float', 'double', 'bytes', 'string'):
|
|
209
|
+
# Complex form of primitive type, possibly with logical type
|
|
210
|
+
logical_type = schema.get('logicalType')
|
|
211
|
+
if logical_type:
|
|
212
|
+
self._validate_logical_type(instance, schema, logical_type, path)
|
|
213
|
+
else:
|
|
214
|
+
self._validate_primitive_or_reference(instance, schema_type, path)
|
|
215
|
+
else:
|
|
216
|
+
raise AvroValidationError(f"Unknown complex type: {schema_type}", path)
|
|
217
|
+
|
|
218
|
+
def _validate_record(self, instance: Any, schema: Dict[str, Any], path: str) -> None:
|
|
219
|
+
"""Validates a record type.
|
|
220
|
+
|
|
221
|
+
Args:
|
|
222
|
+
instance: The JSON value to validate
|
|
223
|
+
schema: The record schema
|
|
224
|
+
path: JSON pointer path for error messages
|
|
225
|
+
"""
|
|
226
|
+
if not isinstance(instance, dict):
|
|
227
|
+
raise AvroValidationError(
|
|
228
|
+
f"Expected object for record '{schema.get('name')}', got {type(instance).__name__}",
|
|
229
|
+
path
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
fields = schema.get('fields', [])
|
|
233
|
+
field_names = set()
|
|
234
|
+
|
|
235
|
+
for field in fields:
|
|
236
|
+
field_name = field.get('name')
|
|
237
|
+
field_names.add(field_name)
|
|
238
|
+
|
|
239
|
+
# Check for altnames (JSON encoding)
|
|
240
|
+
json_name = field_name
|
|
241
|
+
altnames = field.get('altnames', {})
|
|
242
|
+
if 'json' in altnames:
|
|
243
|
+
json_name = altnames['json']
|
|
244
|
+
|
|
245
|
+
if json_name in instance:
|
|
246
|
+
field_path = f"{path}/{json_name}"
|
|
247
|
+
self._validate(instance[json_name], field.get('type', 'null'), field_path)
|
|
248
|
+
elif field_name in instance:
|
|
249
|
+
field_path = f"{path}/{field_name}"
|
|
250
|
+
self._validate(instance[field_name], field.get('type', 'null'), field_path)
|
|
251
|
+
elif 'default' not in field:
|
|
252
|
+
# Check if the field type allows null
|
|
253
|
+
field_type = field.get('type', 'null')
|
|
254
|
+
if not self._type_allows_null(field_type):
|
|
255
|
+
raise AvroValidationError(
|
|
256
|
+
f"Missing required field '{field_name}'",
|
|
257
|
+
path
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
def _type_allows_null(self, schema: AvroSchema) -> bool:
|
|
261
|
+
"""Check if a type allows null values."""
|
|
262
|
+
if schema == 'null':
|
|
263
|
+
return True
|
|
264
|
+
if isinstance(schema, list):
|
|
265
|
+
return 'null' in schema or any(
|
|
266
|
+
(isinstance(s, dict) and s.get('type') == 'null') for s in schema
|
|
267
|
+
)
|
|
268
|
+
return False
|
|
269
|
+
|
|
270
|
+
def _validate_enum(self, instance: Any, schema: Dict[str, Any], path: str) -> None:
|
|
271
|
+
"""Validates an enum type.
|
|
272
|
+
|
|
273
|
+
Args:
|
|
274
|
+
instance: The JSON value to validate
|
|
275
|
+
schema: The enum schema
|
|
276
|
+
path: JSON pointer path for error messages
|
|
277
|
+
"""
|
|
278
|
+
if not isinstance(instance, str):
|
|
279
|
+
raise AvroValidationError(
|
|
280
|
+
f"Expected string for enum '{schema.get('name')}', got {type(instance).__name__}",
|
|
281
|
+
path
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
symbols = schema.get('symbols', [])
|
|
285
|
+
|
|
286
|
+
# Check direct symbol match
|
|
287
|
+
if instance in symbols:
|
|
288
|
+
return
|
|
289
|
+
|
|
290
|
+
# Check altsymbols for JSON encoding
|
|
291
|
+
altsymbols = schema.get('altsymbols', {}).get('json', {})
|
|
292
|
+
for symbol, alt_value in altsymbols.items():
|
|
293
|
+
if instance == alt_value:
|
|
294
|
+
return
|
|
295
|
+
|
|
296
|
+
raise AvroValidationError(
|
|
297
|
+
f"'{instance}' is not a valid symbol for enum '{schema.get('name')}'. Valid symbols: {symbols}",
|
|
298
|
+
path
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
def _validate_array(self, instance: Any, schema: Dict[str, Any], path: str) -> None:
|
|
302
|
+
"""Validates an array type.
|
|
303
|
+
|
|
304
|
+
Args:
|
|
305
|
+
instance: The JSON value to validate
|
|
306
|
+
schema: The array schema
|
|
307
|
+
path: JSON pointer path for error messages
|
|
308
|
+
"""
|
|
309
|
+
if not isinstance(instance, list):
|
|
310
|
+
raise AvroValidationError(
|
|
311
|
+
f"Expected array, got {type(instance).__name__}",
|
|
312
|
+
path
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
items_schema = schema.get('items', 'null')
|
|
316
|
+
for i, item in enumerate(instance):
|
|
317
|
+
item_path = f"{path}/{i}"
|
|
318
|
+
self._validate(item, items_schema, item_path)
|
|
319
|
+
|
|
320
|
+
def _validate_map(self, instance: Any, schema: Dict[str, Any], path: str) -> None:
|
|
321
|
+
"""Validates a map type.
|
|
322
|
+
|
|
323
|
+
Args:
|
|
324
|
+
instance: The JSON value to validate
|
|
325
|
+
schema: The map schema
|
|
326
|
+
path: JSON pointer path for error messages
|
|
327
|
+
"""
|
|
328
|
+
if not isinstance(instance, dict):
|
|
329
|
+
raise AvroValidationError(
|
|
330
|
+
f"Expected object for map, got {type(instance).__name__}",
|
|
331
|
+
path
|
|
332
|
+
)
|
|
333
|
+
|
|
334
|
+
values_schema = schema.get('values', 'null')
|
|
335
|
+
for key, value in instance.items():
|
|
336
|
+
if not isinstance(key, str):
|
|
337
|
+
raise AvroValidationError(
|
|
338
|
+
f"Map keys must be strings, got {type(key).__name__}",
|
|
339
|
+
path
|
|
340
|
+
)
|
|
341
|
+
value_path = f"{path}/{key}"
|
|
342
|
+
self._validate(value, values_schema, value_path)
|
|
343
|
+
|
|
344
|
+
def _validate_fixed(self, instance: Any, schema: Dict[str, Any], path: str) -> None:
|
|
345
|
+
"""Validates a fixed type.
|
|
346
|
+
|
|
347
|
+
Args:
|
|
348
|
+
instance: The JSON value to validate
|
|
349
|
+
schema: The fixed schema
|
|
350
|
+
path: JSON pointer path for error messages
|
|
351
|
+
"""
|
|
352
|
+
if not isinstance(instance, str):
|
|
353
|
+
raise AvroValidationError(
|
|
354
|
+
f"Expected string for fixed '{schema.get('name')}', got {type(instance).__name__}",
|
|
355
|
+
path
|
|
356
|
+
)
|
|
357
|
+
|
|
358
|
+
size = schema.get('size', 0)
|
|
359
|
+
# In JSON, fixed values are represented as unicode escape sequences
|
|
360
|
+
# Each byte is represented as a unicode character
|
|
361
|
+
if len(instance) != size:
|
|
362
|
+
raise AvroValidationError(
|
|
363
|
+
f"Fixed '{schema.get('name')}' requires exactly {size} bytes, got {len(instance)}",
|
|
364
|
+
path
|
|
365
|
+
)
|
|
366
|
+
|
|
367
|
+
def _validate_union(self, instance: Any, schema: List[Any], path: str) -> None:
|
|
368
|
+
"""Validates against a type union.
|
|
369
|
+
|
|
370
|
+
Args:
|
|
371
|
+
instance: The JSON value to validate
|
|
372
|
+
schema: The union schema (list of types)
|
|
373
|
+
path: JSON pointer path for error messages
|
|
374
|
+
"""
|
|
375
|
+
errors = []
|
|
376
|
+
for union_type in schema:
|
|
377
|
+
try:
|
|
378
|
+
self._validate(instance, union_type, path)
|
|
379
|
+
return # Validation succeeded for this type
|
|
380
|
+
except AvroValidationError as e:
|
|
381
|
+
errors.append(str(e))
|
|
382
|
+
|
|
383
|
+
# None of the union types matched
|
|
384
|
+
type_names = [self._get_type_name(t) for t in schema]
|
|
385
|
+
raise AvroValidationError(
|
|
386
|
+
f"Value doesn't match any type in union {type_names}",
|
|
387
|
+
path
|
|
388
|
+
)
|
|
389
|
+
|
|
390
|
+
def _get_type_name(self, schema: AvroSchema) -> str:
|
|
391
|
+
"""Gets a human-readable name for a schema type."""
|
|
392
|
+
if isinstance(schema, str):
|
|
393
|
+
return schema
|
|
394
|
+
elif isinstance(schema, dict):
|
|
395
|
+
schema_type = schema.get('type', 'unknown')
|
|
396
|
+
name = schema.get('name')
|
|
397
|
+
if name:
|
|
398
|
+
return f"{schema_type}:{name}"
|
|
399
|
+
return schema_type
|
|
400
|
+
elif isinstance(schema, list):
|
|
401
|
+
return f"union[{', '.join(self._get_type_name(t) for t in schema)}]"
|
|
402
|
+
return 'unknown'
|
|
403
|
+
|
|
404
|
+
def _validate_logical_type(
|
|
405
|
+
self, instance: Any, schema: Dict[str, Any], logical_type: str, path: str
|
|
406
|
+
) -> None:
|
|
407
|
+
"""Validates a logical type.
|
|
408
|
+
|
|
409
|
+
Args:
|
|
410
|
+
instance: The JSON value to validate
|
|
411
|
+
schema: The schema with logical type
|
|
412
|
+
logical_type: The logical type name
|
|
413
|
+
path: JSON pointer path for error messages
|
|
414
|
+
"""
|
|
415
|
+
base_type = schema.get('type')
|
|
416
|
+
|
|
417
|
+
if logical_type == 'decimal':
|
|
418
|
+
self._validate_decimal(instance, schema, path)
|
|
419
|
+
|
|
420
|
+
elif logical_type == 'uuid':
|
|
421
|
+
if base_type != 'string':
|
|
422
|
+
raise AvroValidationError(f"uuid logical type requires string base type", path)
|
|
423
|
+
if not isinstance(instance, str):
|
|
424
|
+
raise AvroValidationError(f"Expected string for uuid, got {type(instance).__name__}", path)
|
|
425
|
+
if not self.UUID_PATTERN.match(instance):
|
|
426
|
+
raise AvroValidationError(f"Invalid UUID format: {instance}", path)
|
|
427
|
+
|
|
428
|
+
elif logical_type == 'date':
|
|
429
|
+
if base_type == 'int':
|
|
430
|
+
if not isinstance(instance, int) or isinstance(instance, bool):
|
|
431
|
+
raise AvroValidationError(f"Expected int for date, got {type(instance).__name__}", path)
|
|
432
|
+
elif base_type == 'string':
|
|
433
|
+
if not isinstance(instance, str):
|
|
434
|
+
raise AvroValidationError(f"Expected string for date, got {type(instance).__name__}", path)
|
|
435
|
+
if not self.DATE_PATTERN.match(instance):
|
|
436
|
+
raise AvroValidationError(f"Invalid date format (expected YYYY-MM-DD): {instance}", path)
|
|
437
|
+
else:
|
|
438
|
+
raise AvroValidationError(f"date logical type requires int or string base type", path)
|
|
439
|
+
|
|
440
|
+
elif logical_type in ('time-millis', 'time-micros'):
|
|
441
|
+
if base_type in ('int', 'long'):
|
|
442
|
+
if not isinstance(instance, int) or isinstance(instance, bool):
|
|
443
|
+
raise AvroValidationError(f"Expected int for {logical_type}, got {type(instance).__name__}", path)
|
|
444
|
+
elif base_type == 'string':
|
|
445
|
+
if not isinstance(instance, str):
|
|
446
|
+
raise AvroValidationError(f"Expected string for {logical_type}, got {type(instance).__name__}", path)
|
|
447
|
+
if not self.TIME_PATTERN.match(instance):
|
|
448
|
+
raise AvroValidationError(f"Invalid time format: {instance}", path)
|
|
449
|
+
else:
|
|
450
|
+
raise AvroValidationError(f"{logical_type} logical type requires int, long, or string base type", path)
|
|
451
|
+
|
|
452
|
+
elif logical_type in ('timestamp-millis', 'timestamp-micros', 'local-timestamp-millis', 'local-timestamp-micros'):
|
|
453
|
+
if base_type == 'long':
|
|
454
|
+
if not isinstance(instance, int) or isinstance(instance, bool):
|
|
455
|
+
raise AvroValidationError(f"Expected long for {logical_type}, got {type(instance).__name__}", path)
|
|
456
|
+
elif base_type == 'string':
|
|
457
|
+
if not isinstance(instance, str):
|
|
458
|
+
raise AvroValidationError(f"Expected string for {logical_type}, got {type(instance).__name__}", path)
|
|
459
|
+
if not self.DATETIME_PATTERN.match(instance):
|
|
460
|
+
raise AvroValidationError(f"Invalid datetime format: {instance}", path)
|
|
461
|
+
else:
|
|
462
|
+
raise AvroValidationError(f"{logical_type} logical type requires long or string base type", path)
|
|
463
|
+
|
|
464
|
+
elif logical_type == 'duration':
|
|
465
|
+
if base_type == 'fixed':
|
|
466
|
+
self._validate_fixed(instance, schema, path)
|
|
467
|
+
elif base_type == 'string':
|
|
468
|
+
if not isinstance(instance, str):
|
|
469
|
+
raise AvroValidationError(f"Expected string for duration, got {type(instance).__name__}", path)
|
|
470
|
+
if not self.DURATION_PATTERN.match(instance):
|
|
471
|
+
raise AvroValidationError(f"Invalid duration format: {instance}", path)
|
|
472
|
+
else:
|
|
473
|
+
raise AvroValidationError(f"duration logical type requires fixed or string base type", path)
|
|
474
|
+
|
|
475
|
+
else:
|
|
476
|
+
# Unknown logical type - fall back to base type validation
|
|
477
|
+
self._validate_primitive_or_reference(instance, base_type, path)
|
|
478
|
+
|
|
479
|
+
def _validate_decimal(self, instance: Any, schema: Dict[str, Any], path: str) -> None:
|
|
480
|
+
"""Validates a decimal logical type.
|
|
481
|
+
|
|
482
|
+
Args:
|
|
483
|
+
instance: The JSON value to validate
|
|
484
|
+
schema: The decimal schema
|
|
485
|
+
path: JSON pointer path for error messages
|
|
486
|
+
"""
|
|
487
|
+
base_type = schema.get('type')
|
|
488
|
+
|
|
489
|
+
if base_type == 'string':
|
|
490
|
+
if not isinstance(instance, str):
|
|
491
|
+
raise AvroValidationError(f"Expected string for decimal, got {type(instance).__name__}", path)
|
|
492
|
+
if not self.DECIMAL_PATTERN.match(instance):
|
|
493
|
+
raise AvroValidationError(f"Invalid decimal format: {instance}", path)
|
|
494
|
+
elif base_type == 'bytes':
|
|
495
|
+
if not isinstance(instance, str):
|
|
496
|
+
raise AvroValidationError(f"Expected bytes (string) for decimal, got {type(instance).__name__}", path)
|
|
497
|
+
elif base_type == 'fixed':
|
|
498
|
+
self._validate_fixed(instance, schema, path)
|
|
499
|
+
else:
|
|
500
|
+
raise AvroValidationError(f"decimal logical type requires bytes, fixed, or string base type", path)
|
|
501
|
+
|
|
502
|
+
|
|
503
|
+
def validate_json_against_avro(instance: Any, schema: AvroSchema) -> List[str]:
|
|
504
|
+
"""Validates a JSON instance against an Avro schema.
|
|
505
|
+
|
|
506
|
+
Args:
|
|
507
|
+
instance: The JSON value to validate
|
|
508
|
+
schema: The Avro schema
|
|
509
|
+
|
|
510
|
+
Returns:
|
|
511
|
+
List of validation error messages (empty if valid)
|
|
512
|
+
"""
|
|
513
|
+
validator = AvroValidator(schema)
|
|
514
|
+
try:
|
|
515
|
+
validator.validate(instance)
|
|
516
|
+
return []
|
|
517
|
+
except AvroValidationError as e:
|
|
518
|
+
return [str(e)]
|