avrotize 3.0.1__tar.gz → 3.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- avrotize-3.0.1/README.md → avrotize-3.1.0/PKG-INFO +271 -2
- avrotize-3.0.1/PKG-INFO → avrotize-3.1.0/README.md +209 -51
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/_version.py +3 -3
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/avrotize.py +4 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/avrotoiceberg.py +111 -13
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/avrotots/class_core.ts.jinja +2 -2
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/avrotots.py +62 -7
- avrotize-3.1.0/avrotize/avrovalidator.py +518 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/commands.json +485 -2
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/dependencies/cs/net90/dependencies.csproj +4 -4
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/dependencies/java/jdk21/pom.xml +6 -6
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/dependencies/typescript/node22/package.json +1 -1
- avrotize-3.1.0/avrotize/jsontoschema.py +151 -0
- avrotize-3.1.0/avrotize/schema_inference.py +825 -0
- avrotize-3.1.0/avrotize/sqltoavro.py +1159 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/structuretodb.py +1 -1
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/structuretoiceberg.py +113 -13
- avrotize-3.1.0/avrotize/validate.py +242 -0
- avrotize-3.1.0/avrotize/xmltoschema.py +122 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/pyproject.toml +19 -1
- {avrotize-3.0.1 → avrotize-3.1.0}/LICENSE +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/__init__.py +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/__main__.py +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/asn1toavro.py +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/avrotocpp/CMakeLists.txt.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/avrotocpp/build.bat.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/avrotocpp/build.sh.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/avrotocpp/dataclass_body.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/avrotocpp/vcpkg.json.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/avrotocpp.py +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/avrotocsharp/README.md.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/avrotocsharp/class_test.cs.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/avrotocsharp/dataclass_core.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/avrotocsharp/enum_test.cs.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/avrotocsharp/project.csproj.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/avrotocsharp/project.sln.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/avrotocsharp/run_coverage.ps1.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/avrotocsharp/run_coverage.sh.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/avrotocsharp/testproject.csproj.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/avrotocsharp.py +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/avrotocsv.py +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/avrotodatapackage.py +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/avrotodb.py +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/avrotogo/go_enum.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/avrotogo/go_helpers.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/avrotogo/go_struct.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/avrotogo/go_test.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/avrotogo/go_union.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/avrotogo.py +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/avrotographql.py +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/avrotojava/class_test.java.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/avrotojava/enum_test.java.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/avrotojava/testproject.pom.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/avrotojava.py +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/avrotojs.py +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/avrotojsons.py +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/avrotojstruct.py +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/avrotokusto.py +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/avrotomd/README.md.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/avrotomd.py +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/avrotools.py +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/avrotoparquet.py +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/avrotoproto.py +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/avrotopython/dataclass_core.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/avrotopython/enum_core.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/avrotopython/pyproject_toml.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/avrotopython/test_class.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/avrotopython/test_enum.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/avrotopython.py +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/avrotorust/dataclass_enum.rs.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/avrotorust/dataclass_struct.rs.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/avrotorust/dataclass_union.rs.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/avrotorust.py +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/avrotots/class_test.ts.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/avrotots/enum_core.ts.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/avrotots/gitignore.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/avrotots/index.ts.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/avrotots/package.json.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/avrotots/tsconfig.json.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/avrotoxsd.py +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/cddltostructure.py +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/common.py +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/constants.py +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/csvtoavro.py +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/datapackagetoavro.py +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/dependencies/cpp/vcpkg/vcpkg.json +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/dependencies/go/go121/go.mod +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/dependencies/python/py312/requirements.txt +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/dependencies/rust/stable/Cargo.toml +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/dependency_resolver.py +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/dependency_version.py +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/generic/generic.avsc +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/jsonstoavro.py +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/jsonstostructure.py +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/jstructtoavro.py +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/kstructtoavro.py +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/kustotoavro.py +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/openapitostructure.py +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/parquettoavro.py +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/proto2parser.py +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/proto3parser.py +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/prototoavro.py +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/prototypes/any.avsc +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/prototypes/api.avsc +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/prototypes/duration.avsc +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/prototypes/field_mask.avsc +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/prototypes/struct.avsc +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/prototypes/timestamp.avsc +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/prototypes/type.avsc +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/prototypes/wrappers.avsc +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/structuretocddl.py +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/structuretocpp/CMakeLists.txt.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/structuretocpp/build.bat.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/structuretocpp/build.sh.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/structuretocpp/dataclass_body.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/structuretocpp/vcpkg.json.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/structuretocpp.py +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/structuretocsharp/class_test.cs.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/structuretocsharp/dataclass_core.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/structuretocsharp/enum_test.cs.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/structuretocsharp/json_structure_converters.cs.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/structuretocsharp/program.cs.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/structuretocsharp/project.csproj.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/structuretocsharp/project.sln.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/structuretocsharp/testproject.csproj.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/structuretocsharp/tuple_converter.cs.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/structuretocsharp.py +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/structuretocsv.py +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/structuretodatapackage.py +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/structuretogo/go_enum.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/structuretogo/go_helpers.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/structuretogo/go_interface.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/structuretogo/go_struct.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/structuretogo/go_test.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/structuretogo.py +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/structuretographql.py +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/structuretojava/choice_core.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/structuretojava/class_core.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/structuretojava/enum_core.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/structuretojava/equals_hashcode.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/structuretojava/pom.xml.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/structuretojava/tuple_core.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/structuretojava.py +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/structuretojs/class_core.js.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/structuretojs/enum_core.js.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/structuretojs/package.json.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/structuretojs/test_class.js.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/structuretojs/test_enum.js.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/structuretojs/test_runner.js.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/structuretojs.py +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/structuretojsons.py +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/structuretokusto.py +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/structuretomd/README.md.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/structuretomd.py +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/structuretoproto.py +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/structuretopython/dataclass_core.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/structuretopython/enum_core.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/structuretopython/map_alias.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/structuretopython/pyproject_toml.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/structuretopython/test_class.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/structuretopython/test_enum.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/structuretopython.py +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/structuretorust/dataclass_enum.rs.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/structuretorust/dataclass_struct.rs.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/structuretorust/dataclass_union.rs.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/structuretorust.py +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/structuretots/class_core.ts.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/structuretots/enum_core.ts.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/structuretots/gitignore.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/structuretots/index.ts.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/structuretots/package.json.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/structuretots/test_class.ts.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/structuretots/tsconfig.json.jinja +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/structuretots.py +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/structuretoxsd.py +0 -0
- {avrotize-3.0.1 → avrotize-3.1.0}/avrotize/xsdtoavro.py +0 -0
|
@@ -1,3 +1,64 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: avrotize
|
|
3
|
+
Version: 3.1.0
|
|
4
|
+
Summary: Tools to convert from and to Avro Schema from various other schema languages.
|
|
5
|
+
Author-email: Clemens Vasters <clemensv@microsoft.com>
|
|
6
|
+
Requires-Python: >=3.10
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Requires-Dist: jsonschema>=4.23.0
|
|
13
|
+
Requires-Dist: lark>=1.1.9
|
|
14
|
+
Requires-Dist: pyarrow>=22.0.0
|
|
15
|
+
Requires-Dist: asn1tools>=0.167.0
|
|
16
|
+
Requires-Dist: jsonpointer>=3.0.0
|
|
17
|
+
Requires-Dist: jsonpath-ng>=1.6.1
|
|
18
|
+
Requires-Dist: jsoncomparison>=1.1.0
|
|
19
|
+
Requires-Dist: requests>=2.32.3
|
|
20
|
+
Requires-Dist: azure-kusto-data>=5.0.5
|
|
21
|
+
Requires-Dist: azure-identity>=1.17.1
|
|
22
|
+
Requires-Dist: datapackage>=1.15.4
|
|
23
|
+
Requires-Dist: jinja2>=3.1.4
|
|
24
|
+
Requires-Dist: pyiceberg>=0.10.0
|
|
25
|
+
Requires-Dist: pandas>=2.2.2
|
|
26
|
+
Requires-Dist: docker>=7.1.0
|
|
27
|
+
Requires-Dist: cddlparser>=0.5.0
|
|
28
|
+
Requires-Dist: json-structure>=0.1.8
|
|
29
|
+
Requires-Dist: psycopg2-binary>=2.9.9 ; extra == "all-sql"
|
|
30
|
+
Requires-Dist: pymysql>=1.1.1 ; extra == "all-sql"
|
|
31
|
+
Requires-Dist: pyodbc>=5.1.0 ; extra == "all-sql"
|
|
32
|
+
Requires-Dist: oracledb>=2.3.0 ; extra == "all-sql"
|
|
33
|
+
Requires-Dist: pytest>=8.3.2 ; extra == "dev"
|
|
34
|
+
Requires-Dist: fastavro>=1.9.5 ; extra == "dev"
|
|
35
|
+
Requires-Dist: xmlschema>=3.3.2 ; extra == "dev"
|
|
36
|
+
Requires-Dist: xmlunittest>=1.0.1 ; extra == "dev"
|
|
37
|
+
Requires-Dist: pylint>=3.2.6 ; extra == "dev"
|
|
38
|
+
Requires-Dist: dataclasses_json>=0.6.7 ; extra == "dev"
|
|
39
|
+
Requires-Dist: dataclasses>=0.8 ; extra == "dev"
|
|
40
|
+
Requires-Dist: pydantic>=2.8.2 ; extra == "dev"
|
|
41
|
+
Requires-Dist: avro>=1.12.0 ; extra == "dev"
|
|
42
|
+
Requires-Dist: testcontainers>=4.7.2 ; extra == "dev"
|
|
43
|
+
Requires-Dist: pymysql>=1.1.1 ; extra == "dev"
|
|
44
|
+
Requires-Dist: psycopg2-binary>=2.9.9 ; extra == "dev"
|
|
45
|
+
Requires-Dist: pyodbc>=5.1.0 ; extra == "dev"
|
|
46
|
+
Requires-Dist: pymongo>=4.8.0 ; extra == "dev"
|
|
47
|
+
Requires-Dist: oracledb>=2.3.0 ; extra == "dev"
|
|
48
|
+
Requires-Dist: cassandra-driver>=3.29.1 ; extra == "dev"
|
|
49
|
+
Requires-Dist: sqlalchemy>=2.0.32 ; extra == "dev"
|
|
50
|
+
Requires-Dist: graphql-core>=3.2.0 ; extra == "dev"
|
|
51
|
+
Requires-Dist: pymysql>=1.1.1 ; extra == "mysql"
|
|
52
|
+
Requires-Dist: oracledb>=2.3.0 ; extra == "oracle"
|
|
53
|
+
Requires-Dist: psycopg2-binary>=2.9.9 ; extra == "postgres"
|
|
54
|
+
Requires-Dist: pyodbc>=5.1.0 ; extra == "sqlserver"
|
|
55
|
+
Provides-Extra: all-sql
|
|
56
|
+
Provides-Extra: dev
|
|
57
|
+
Provides-Extra: mysql
|
|
58
|
+
Provides-Extra: oracle
|
|
59
|
+
Provides-Extra: postgres
|
|
60
|
+
Provides-Extra: sqlserver
|
|
61
|
+
|
|
1
62
|
# Avrotize & Structurize
|
|
2
63
|
|
|
3
64
|
[](https://pypi.org/project/avrotize/)
|
|
@@ -27,6 +88,22 @@ You can install Avrotize from PyPI, [having installed Python 3.10 or later](http
|
|
|
27
88
|
pip install avrotize
|
|
28
89
|
```
|
|
29
90
|
|
|
91
|
+
For SQL database support (`sql2a` command), install the optional database drivers:
|
|
92
|
+
|
|
93
|
+
```bash
|
|
94
|
+
# PostgreSQL
|
|
95
|
+
pip install avrotize[postgres]
|
|
96
|
+
|
|
97
|
+
# MySQL
|
|
98
|
+
pip install avrotize[mysql]
|
|
99
|
+
|
|
100
|
+
# SQL Server
|
|
101
|
+
pip install avrotize[sqlserver]
|
|
102
|
+
|
|
103
|
+
# All SQL databases
|
|
104
|
+
pip install avrotize[all-sql]
|
|
105
|
+
```
|
|
106
|
+
|
|
30
107
|
## Usage
|
|
31
108
|
|
|
32
109
|
Avrotize provides several commands for converting schema formats via Avrotize Schema.
|
|
@@ -38,6 +115,11 @@ Converting to Avrotize Schema:
|
|
|
38
115
|
- [`avrotize x2a`](#convert-xml-schema-xsd-to-avrotize-schema) - Convert XML schema to Avrotize Schema.
|
|
39
116
|
- [`avrotize asn2a`](#convert-asn1-schema-to-avrotize-schema) - Convert ASN.1 to Avrotize Schema.
|
|
40
117
|
- [`avrotize k2a`](#convert-kusto-table-definition-to-avrotize-schema) - Convert Kusto table definitions to Avrotize Schema.
|
|
118
|
+
- [`avrotize sql2a`](#convert-sql-database-schema-to-avrotize-schema) - Convert SQL database schema to Avrotize Schema.
|
|
119
|
+
- [`avrotize json2a`](#infer-avro-schema-from-json-files) - Infer Avro schema from JSON files.
|
|
120
|
+
- [`avrotize json2s`](#infer-json-structure-schema-from-json-files) - Infer JSON Structure schema from JSON files.
|
|
121
|
+
- [`avrotize xml2a`](#infer-avro-schema-from-xml-files) - Infer Avro schema from XML files.
|
|
122
|
+
- [`avrotize xml2s`](#infer-json-structure-schema-from-xml-files) - Infer JSON Structure schema from XML files.
|
|
41
123
|
- [`avrotize pq2a`](#convert-parquet-schema-to-avrotize-schema) - Convert Parquet schema to Avrotize Schema.
|
|
42
124
|
- [`avrotize csv2a`](#convert-csv-file-to-avrotize-schema) - Convert CSV file to Avrotize Schema.
|
|
43
125
|
- [`avrotize kstruct2a`](#convert-kafka-connect-schema-to-avrotize-schema) - Convert Kafka Connect Schema to Avrotize Schema.
|
|
@@ -105,6 +187,7 @@ Direct JSON Structure conversions:
|
|
|
105
187
|
Other commands:
|
|
106
188
|
|
|
107
189
|
- [`avrotize pcf`](#create-the-parsing-canonical-form-pcf-of-an-avrotize-schema) - Create the Parsing Canonical Form (PCF) of an Avrotize Schema.
|
|
190
|
+
- [`avrotize validate`](#validate-json-instances-against-schemas) - Validate JSON instances against Avro or JSON Structure schemas.
|
|
108
191
|
|
|
109
192
|
JSON Structure conversions:
|
|
110
193
|
|
|
@@ -378,6 +461,150 @@ Conversion notes:
|
|
|
378
461
|
- For `dynamic` columns, the tool will sample the data in the table to determine the structure of the dynamic column. The tool will map the dynamic column to an Avro record type with fields that correspond to the fields found in the dynamic column. If the dynamic column contains nested dynamic columns, the tool will recursively map those to Avro record types. If records with conflicting structures are found in the dynamic column, the tool will emit a union of record types for the dynamic column.
|
|
379
462
|
- If the `--emit-cloudevents-xregistry` option is set, the tool will emit an [xRegistry](http://xregistry.io) registry manifest file with a CloudEvent message definition for each table in the Kusto database and a separate Avro Schema for each table in the embedded schema registry. If one or more tables are found to contain CloudEvent data (as indicated by the presence of the CloudEvents attribute columns), the tool will inspect the content of the `type` (or `__type` or `__type`) columns to determine which CloudEvent types have been stored in the table and will emit a CloudEvent definition and schema for each unique type.
|
|
380
463
|
|
|
464
|
+
### Convert SQL database schema to Avrotize Schema
|
|
465
|
+
|
|
466
|
+
```bash
|
|
467
|
+
avrotize sql2a --connection-string <connection_string> [--username <user>] [--password <pass>] [--dialect <dialect>] [--database <database>] [--table-name <table>] [--out <path_to_avro_schema_file>] [--namespace <namespace>] [--infer-json] [--infer-xml] [--sample-size <n>] [--emit-cloudevents] [--emit-xregistry]
|
|
468
|
+
```
|
|
469
|
+
|
|
470
|
+
Parameters:
|
|
471
|
+
|
|
472
|
+
- `--connection-string`: The database connection string. Supports SSL/TLS and integrated authentication options (see examples below).
|
|
473
|
+
- `--username`: (optional) Database username. Overrides any username in the connection string. Use this to avoid credentials in command history.
|
|
474
|
+
- `--password`: (optional) Database password. Overrides any password in the connection string. Use this to avoid credentials in command history.
|
|
475
|
+
- `--dialect`: (optional) The SQL dialect: `postgres` (default), `mysql`, `sqlserver`, `oracle`, or `sqlite`.
|
|
476
|
+
- `--database`: (optional) The database name if not specified in the connection string.
|
|
477
|
+
- `--table-name`: (optional) A specific table to convert. If omitted, all tables are converted.
|
|
478
|
+
- `--out`: The path to the Avrotize Schema file. If omitted, output goes to stdout.
|
|
479
|
+
- `--namespace`: (optional) The Avro namespace for the generated schema.
|
|
480
|
+
- `--infer-json`: (optional, default: true) Infer schema for JSON/JSONB columns by sampling data.
|
|
481
|
+
- `--infer-xml`: (optional, default: true) Infer schema for XML columns by sampling data.
|
|
482
|
+
- `--sample-size`: (optional, default: 100) Number of rows to sample for JSON/XML schema inference.
|
|
483
|
+
- `--emit-cloudevents`: (optional) Detect CloudEvents tables and emit CloudEvents declarations.
|
|
484
|
+
- `--emit-xregistry`: (optional) Emit an xRegistry manifest instead of a single schema file.
|
|
485
|
+
|
|
486
|
+
Connection string examples:
|
|
487
|
+
|
|
488
|
+
```bash
|
|
489
|
+
# PostgreSQL with separate credentials (preferred for security)
|
|
490
|
+
avrotize sql2a --connection-string "postgresql://host:5432/mydb?sslmode=require" --username myuser --password mypass --out schema.avsc
|
|
491
|
+
|
|
492
|
+
# PostgreSQL with SSL (credentials in URL)
|
|
493
|
+
avrotize sql2a --connection-string "postgresql://user:pass@host:5432/mydb?sslmode=require" --out schema.avsc
|
|
494
|
+
|
|
495
|
+
# MySQL with SSL
|
|
496
|
+
avrotize sql2a --connection-string "mysql://user:pass@host:3306/mydb?ssl=true" --dialect mysql --out schema.avsc
|
|
497
|
+
|
|
498
|
+
# SQL Server with Windows Authentication (omit user/password)
|
|
499
|
+
avrotize sql2a --connection-string "mssql://@host:1433/mydb" --dialect sqlserver --out schema.avsc
|
|
500
|
+
|
|
501
|
+
# SQL Server with TLS encryption
|
|
502
|
+
avrotize sql2a --connection-string "mssql://user:pass@host:1433/mydb?encrypt=true" --dialect sqlserver --out schema.avsc
|
|
503
|
+
|
|
504
|
+
# SQLite file
|
|
505
|
+
avrotize sql2a --connection-string "/path/to/database.db" --dialect sqlite --out schema.avsc
|
|
506
|
+
```
|
|
507
|
+
|
|
508
|
+
Conversion notes:
|
|
509
|
+
|
|
510
|
+
- The tool connects to a live database and reads the schema from the information schema or system catalogs.
|
|
511
|
+
- Type mappings for each dialect:
|
|
512
|
+
- **PostgreSQL**: All standard types including `uuid`, `jsonb`, `xml`, arrays, and custom types.
|
|
513
|
+
- **MySQL**: Standard types including `json`, `enum`, `set`, and spatial types.
|
|
514
|
+
- **SQL Server**: Standard types including `uniqueidentifier`, `xml`, `money`, and `hierarchyid`.
|
|
515
|
+
- **Oracle**: Standard types including `number`, `clob`, `blob`, and Oracle-specific types.
|
|
516
|
+
- **SQLite**: Dynamic typing mapped based on declared type affinity.
|
|
517
|
+
- For JSON/JSONB columns (PostgreSQL, MySQL) and XML columns, the tool samples data to infer the structure. Fields that appear in some but not all records are folded together. If field types conflict across records, the tool emits a union of record types.
|
|
518
|
+
- For columns with keys that cannot be valid Avro identifiers (UUIDs, URLs, special characters), the tool generates `map<string, T>` types instead of record types.
|
|
519
|
+
- Table and column comments are preserved as Avro `doc` attributes where available.
|
|
520
|
+
- Primary key columns are noted in the schema's `unique` attribute.
|
|
521
|
+
|
|
522
|
+
### Infer Avro schema from JSON files
|
|
523
|
+
|
|
524
|
+
```bash
|
|
525
|
+
avrotize json2a <json_files...> [--out <path_to_avro_schema_file>] [--type-name <name>] [--namespace <namespace>] [--sample-size <n>]
|
|
526
|
+
```
|
|
527
|
+
|
|
528
|
+
Parameters:
|
|
529
|
+
|
|
530
|
+
- `<json_files...>`: One or more JSON files to analyze. Supports JSON arrays, single objects, and JSONL (JSON Lines) format.
|
|
531
|
+
- `--out`: The path to the Avro schema file. If omitted, output goes to stdout.
|
|
532
|
+
- `--type-name`: (optional) Name for the root type (default: "Document").
|
|
533
|
+
- `--namespace`: (optional) Avro namespace for generated types.
|
|
534
|
+
- `--sample-size`: (optional) Maximum number of records to sample (0 = all, default: 0).
|
|
535
|
+
|
|
536
|
+
Example:
|
|
537
|
+
|
|
538
|
+
```bash
|
|
539
|
+
# Infer schema from multiple JSON files
|
|
540
|
+
avrotize json2a data1.json data2.json --out schema.avsc --type-name Event --namespace com.example
|
|
541
|
+
|
|
542
|
+
# Infer schema from JSONL file
|
|
543
|
+
avrotize json2a events.jsonl --out events.avsc --type-name LogEntry
|
|
544
|
+
```
|
|
545
|
+
|
|
546
|
+
### Infer JSON Structure schema from JSON files
|
|
547
|
+
|
|
548
|
+
```bash
|
|
549
|
+
avrotize json2s <json_files...> [--out <path_to_jstruct_schema_file>] [--type-name <name>] [--base-id <uri>] [--sample-size <n>]
|
|
550
|
+
```
|
|
551
|
+
|
|
552
|
+
Parameters:
|
|
553
|
+
|
|
554
|
+
- `<json_files...>`: One or more JSON files to analyze.
|
|
555
|
+
- `--out`: The path to the JSON Structure schema file. If omitted, output goes to stdout.
|
|
556
|
+
- `--type-name`: (optional) Name for the root type (default: "Document").
|
|
557
|
+
- `--base-id`: (optional) Base URI for $id generation (default: "https://example.com/").
|
|
558
|
+
- `--sample-size`: (optional) Maximum number of records to sample (0 = all, default: 0).
|
|
559
|
+
|
|
560
|
+
Example:
|
|
561
|
+
|
|
562
|
+
```bash
|
|
563
|
+
avrotize json2s data.json --out schema.jstruct.json --type-name Person --base-id https://myapi.example.com/schemas/
|
|
564
|
+
```
|
|
565
|
+
|
|
566
|
+
### Infer Avro schema from XML files
|
|
567
|
+
|
|
568
|
+
```bash
|
|
569
|
+
avrotize xml2a <xml_files...> [--out <path_to_avro_schema_file>] [--type-name <name>] [--namespace <namespace>] [--sample-size <n>]
|
|
570
|
+
```
|
|
571
|
+
|
|
572
|
+
Parameters:
|
|
573
|
+
|
|
574
|
+
- `<xml_files...>`: One or more XML files to analyze.
|
|
575
|
+
- `--out`: The path to the Avro schema file. If omitted, output goes to stdout.
|
|
576
|
+
- `--type-name`: (optional) Name for the root type (default: "Document").
|
|
577
|
+
- `--namespace`: (optional) Avro namespace for generated types.
|
|
578
|
+
- `--sample-size`: (optional) Maximum number of documents to sample (0 = all, default: 0).
|
|
579
|
+
|
|
580
|
+
Example:
|
|
581
|
+
|
|
582
|
+
```bash
|
|
583
|
+
avrotize xml2a config.xml --out config.avsc --type-name Configuration --namespace com.example.config
|
|
584
|
+
```
|
|
585
|
+
|
|
586
|
+
### Infer JSON Structure schema from XML files
|
|
587
|
+
|
|
588
|
+
```bash
|
|
589
|
+
avrotize xml2s <xml_files...> [--out <path_to_jstruct_schema_file>] [--type-name <name>] [--base-id <uri>] [--sample-size <n>]
|
|
590
|
+
```
|
|
591
|
+
|
|
592
|
+
Parameters:
|
|
593
|
+
|
|
594
|
+
- `<xml_files...>`: One or more XML files to analyze.
|
|
595
|
+
- `--out`: The path to the JSON Structure schema file. If omitted, output goes to stdout.
|
|
596
|
+
- `--type-name`: (optional) Name for the root type (default: "Document").
|
|
597
|
+
- `--base-id`: (optional) Base URI for $id generation (default: "https://example.com/").
|
|
598
|
+
- `--sample-size`: (optional) Maximum number of documents to sample (0 = all, default: 0).
|
|
599
|
+
|
|
600
|
+
Conversion notes (applies to all inference commands):
|
|
601
|
+
|
|
602
|
+
- XML attributes are converted to fields prefixed with `@` (normalized to valid identifiers).
|
|
603
|
+
- Text content in mixed-content elements becomes a `#text` field.
|
|
604
|
+
- Repeated elements are inferred as arrays.
|
|
605
|
+
- Multiple files with different structures are merged into a unified schema.
|
|
606
|
+
- Sparse data (fields that appear in some but not all records) is folded into a single type.
|
|
607
|
+
|
|
381
608
|
### Convert Avrotize Schema to Kusto table declaration
|
|
382
609
|
|
|
383
610
|
```bash
|
|
@@ -628,7 +855,7 @@ Conversion notes:
|
|
|
628
855
|
### Convert Avrotize Schema to Iceberg schema
|
|
629
856
|
|
|
630
857
|
```bash
|
|
631
|
-
avrotize a2ib <path_to_avro_schema_file> [--out <path_to_iceberg_schema_file>] [--record-type <record-type-from-avro>] [--emit-cloudevents-columns]
|
|
858
|
+
avrotize a2ib <path_to_avro_schema_file> [--out <path_to_iceberg_schema_file>] [--record-type <record-type-from-avro>] [--emit-cloudevents-columns] [--format schema|arrow]
|
|
632
859
|
```
|
|
633
860
|
|
|
634
861
|
Parameters:
|
|
@@ -637,6 +864,7 @@ Parameters:
|
|
|
637
864
|
- `--out`: The path to the Iceberg schema file to write the conversion result to. If omitted, the output is directed to stdout.
|
|
638
865
|
- `--record-type`: (optional) The name of the Avro record type to convert to an Iceberg schema.
|
|
639
866
|
- `--emit-cloudevents-columns`: (optional) If set, the tool will add [CloudEvents](https://cloudevents.io) attribute columns to the Iceberg schema: `__id`, `__source`, `__subject`, `__type`, and `__time`.
|
|
867
|
+
- `--format`: (optional) Output format. `schema` (default) outputs JSON per the [Iceberg Table Spec Appendix C](https://iceberg.apache.org/spec/#appendix-c-json-serialization). `arrow` outputs a binary Arrow IPC serialized schema.
|
|
640
868
|
|
|
641
869
|
Conversion notes:
|
|
642
870
|
|
|
@@ -647,7 +875,7 @@ Conversion notes:
|
|
|
647
875
|
### Convert JSON Structure to Iceberg schema
|
|
648
876
|
|
|
649
877
|
```bash
|
|
650
|
-
avrotize s2ib <path_to_structure_schema_file> [--out <path_to_iceberg_schema_file>] [--record-type <record-type-from-structure>] [--emit-cloudevents-columns]
|
|
878
|
+
avrotize s2ib <path_to_structure_schema_file> [--out <path_to_iceberg_schema_file>] [--record-type <record-type-from-structure>] [--emit-cloudevents-columns] [--format schema|arrow]
|
|
651
879
|
```
|
|
652
880
|
|
|
653
881
|
Parameters:
|
|
@@ -656,6 +884,7 @@ Parameters:
|
|
|
656
884
|
- `--out`: The path to the Iceberg schema file to write the conversion result to. If omitted, the output is directed to stdout.
|
|
657
885
|
- `--record-type`: (optional) The name of the record type in definitions to convert to an Iceberg schema.
|
|
658
886
|
- `--emit-cloudevents-columns`: (optional) If set, the tool will add [CloudEvents](https://cloudevents.io) attribute columns to the Iceberg schema: `___id`, `___source`, `___subject`, `___type`, and `___time`.
|
|
887
|
+
- `--format`: (optional) Output format. `schema` (default) outputs JSON per the [Iceberg Table Spec Appendix C](https://iceberg.apache.org/spec/#appendix-c-json-serialization). `arrow` outputs a binary Arrow IPC serialized schema.
|
|
659
888
|
|
|
660
889
|
Conversion notes:
|
|
661
890
|
|
|
@@ -1258,6 +1487,45 @@ Conversion notes:
|
|
|
1258
1487
|
- The tool generates the Parsing Canonical Form (PCF) of the Avrotize Schema. The PCF is a normalized form of the schema that is used for schema comparison and compatibility checking.
|
|
1259
1488
|
- The PCF is a JSON object that is written to stdout.
|
|
1260
1489
|
|
|
1490
|
+
### Validate JSON instances against schemas
|
|
1491
|
+
|
|
1492
|
+
```bash
|
|
1493
|
+
avrotize validate <json_files...> --schema <schema_file> [--schema-type <type>] [--quiet]
|
|
1494
|
+
```
|
|
1495
|
+
|
|
1496
|
+
Parameters:
|
|
1497
|
+
|
|
1498
|
+
- `<json_files...>`: One or more JSON files to validate. Supports single JSON objects, JSON arrays, and JSONL (newline-delimited JSON) formats.
|
|
1499
|
+
- `--schema <schema_file>`: Path to the schema file (`.avsc` for Avro, `.jstruct.json` for JSON Structure).
|
|
1500
|
+
- `--schema-type`: (optional) Schema type: `avro` or `jstruct`. Auto-detected from file extension if omitted.
|
|
1501
|
+
- `--quiet`: (optional) Suppress output. Exit code 0 if all instances are valid, 1 if any are invalid.
|
|
1502
|
+
|
|
1503
|
+
Validation notes:
|
|
1504
|
+
|
|
1505
|
+
- Validates JSON instances against Avro schemas per the [Avrotize Schema specification](specs/avrotize-schema.md).
|
|
1506
|
+
- Supports all Avro primitive types: null, boolean, int, long, float, double, bytes, string.
|
|
1507
|
+
- Supports all Avro complex types: record, enum, array, map, fixed.
|
|
1508
|
+
- Supports logical types with both native and string encodings: decimal, uuid, date, time-millis, time-micros, timestamp-millis, timestamp-micros, duration.
|
|
1509
|
+
- Supports field `altnames` for JSON field name mapping.
|
|
1510
|
+
- Supports enum `altsymbols` for JSON symbol mapping.
|
|
1511
|
+
- For JSON Structure validation, requires the `json-structure` package.
|
|
1512
|
+
|
|
1513
|
+
Example:
|
|
1514
|
+
|
|
1515
|
+
```bash
|
|
1516
|
+
# Validate JSON file against Avro schema
|
|
1517
|
+
avrotize validate data.json --schema schema.avsc
|
|
1518
|
+
|
|
1519
|
+
# Validate multiple files
|
|
1520
|
+
avrotize validate file1.json file2.json --schema schema.avsc
|
|
1521
|
+
|
|
1522
|
+
# Validate JSONL file against JSON Structure schema
|
|
1523
|
+
avrotize validate events.jsonl --schema events.jstruct.json
|
|
1524
|
+
|
|
1525
|
+
# Quiet mode for CI/CD pipelines (exit code only)
|
|
1526
|
+
avrotize validate data.json --schema schema.avsc --quiet
|
|
1527
|
+
```
|
|
1528
|
+
|
|
1261
1529
|
### Convert JSON Structure schema to GraphQL schema
|
|
1262
1530
|
|
|
1263
1531
|
```bash
|
|
@@ -1325,3 +1593,4 @@ cat myschema.avsc | avrotize a2graphql > myschema.graphql
|
|
|
1325
1593
|
```
|
|
1326
1594
|
|
|
1327
1595
|
This document provides an overview of the usage and functionality of Avrotize. For more detailed information, please refer to the [Avrotize Schema documentation](specs/avrotize-schema.md) and the individual command help messages.
|
|
1596
|
+
|
|
@@ -1,51 +1,3 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: avrotize
|
|
3
|
-
Version: 3.0.1
|
|
4
|
-
Summary: Tools to convert from and to Avro Schema from various other schema languages.
|
|
5
|
-
Author-email: Clemens Vasters <clemensv@microsoft.com>
|
|
6
|
-
Requires-Python: >=3.10
|
|
7
|
-
Description-Content-Type: text/markdown
|
|
8
|
-
Classifier: Programming Language :: Python :: 3
|
|
9
|
-
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
-
Classifier: Operating System :: OS Independent
|
|
11
|
-
License-File: LICENSE
|
|
12
|
-
Requires-Dist: jsonschema>=4.23.0
|
|
13
|
-
Requires-Dist: lark>=1.1.9
|
|
14
|
-
Requires-Dist: pyarrow>=22.0.0
|
|
15
|
-
Requires-Dist: asn1tools>=0.167.0
|
|
16
|
-
Requires-Dist: jsonpointer>=3.0.0
|
|
17
|
-
Requires-Dist: jsonpath-ng>=1.6.1
|
|
18
|
-
Requires-Dist: jsoncomparison>=1.1.0
|
|
19
|
-
Requires-Dist: requests>=2.32.3
|
|
20
|
-
Requires-Dist: azure-kusto-data>=5.0.5
|
|
21
|
-
Requires-Dist: azure-identity>=1.17.1
|
|
22
|
-
Requires-Dist: datapackage>=1.15.4
|
|
23
|
-
Requires-Dist: jinja2>=3.1.4
|
|
24
|
-
Requires-Dist: pyiceberg>=0.10.0
|
|
25
|
-
Requires-Dist: pandas>=2.2.2
|
|
26
|
-
Requires-Dist: docker>=7.1.0
|
|
27
|
-
Requires-Dist: cddlparser>=0.5.0
|
|
28
|
-
Requires-Dist: json-structure>=0.1.8
|
|
29
|
-
Requires-Dist: pytest>=8.3.2 ; extra == "dev"
|
|
30
|
-
Requires-Dist: fastavro>=1.9.5 ; extra == "dev"
|
|
31
|
-
Requires-Dist: xmlschema>=3.3.2 ; extra == "dev"
|
|
32
|
-
Requires-Dist: xmlunittest>=1.0.1 ; extra == "dev"
|
|
33
|
-
Requires-Dist: pylint>=3.2.6 ; extra == "dev"
|
|
34
|
-
Requires-Dist: dataclasses_json>=0.6.7 ; extra == "dev"
|
|
35
|
-
Requires-Dist: dataclasses>=0.8 ; extra == "dev"
|
|
36
|
-
Requires-Dist: pydantic>=2.8.2 ; extra == "dev"
|
|
37
|
-
Requires-Dist: avro>=1.12.0 ; extra == "dev"
|
|
38
|
-
Requires-Dist: testcontainers>=4.7.2 ; extra == "dev"
|
|
39
|
-
Requires-Dist: pymysql>=1.1.1 ; extra == "dev"
|
|
40
|
-
Requires-Dist: psycopg2>=2.9.9 ; extra == "dev"
|
|
41
|
-
Requires-Dist: pyodbc>=5.1.0 ; extra == "dev"
|
|
42
|
-
Requires-Dist: pymongo>=4.8.0 ; extra == "dev"
|
|
43
|
-
Requires-Dist: oracledb>=2.3.0 ; extra == "dev"
|
|
44
|
-
Requires-Dist: cassandra-driver>=3.29.1 ; extra == "dev"
|
|
45
|
-
Requires-Dist: sqlalchemy>=2.0.32 ; extra == "dev"
|
|
46
|
-
Requires-Dist: graphql-core>=3.2.0 ; extra == "dev"
|
|
47
|
-
Provides-Extra: dev
|
|
48
|
-
|
|
49
1
|
# Avrotize & Structurize
|
|
50
2
|
|
|
51
3
|
[](https://pypi.org/project/avrotize/)
|
|
@@ -75,6 +27,22 @@ You can install Avrotize from PyPI, [having installed Python 3.10 or later](http
|
|
|
75
27
|
pip install avrotize
|
|
76
28
|
```
|
|
77
29
|
|
|
30
|
+
For SQL database support (`sql2a` command), install the optional database drivers:
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
# PostgreSQL
|
|
34
|
+
pip install avrotize[postgres]
|
|
35
|
+
|
|
36
|
+
# MySQL
|
|
37
|
+
pip install avrotize[mysql]
|
|
38
|
+
|
|
39
|
+
# SQL Server
|
|
40
|
+
pip install avrotize[sqlserver]
|
|
41
|
+
|
|
42
|
+
# All SQL databases
|
|
43
|
+
pip install avrotize[all-sql]
|
|
44
|
+
```
|
|
45
|
+
|
|
78
46
|
## Usage
|
|
79
47
|
|
|
80
48
|
Avrotize provides several commands for converting schema formats via Avrotize Schema.
|
|
@@ -86,6 +54,11 @@ Converting to Avrotize Schema:
|
|
|
86
54
|
- [`avrotize x2a`](#convert-xml-schema-xsd-to-avrotize-schema) - Convert XML schema to Avrotize Schema.
|
|
87
55
|
- [`avrotize asn2a`](#convert-asn1-schema-to-avrotize-schema) - Convert ASN.1 to Avrotize Schema.
|
|
88
56
|
- [`avrotize k2a`](#convert-kusto-table-definition-to-avrotize-schema) - Convert Kusto table definitions to Avrotize Schema.
|
|
57
|
+
- [`avrotize sql2a`](#convert-sql-database-schema-to-avrotize-schema) - Convert SQL database schema to Avrotize Schema.
|
|
58
|
+
- [`avrotize json2a`](#infer-avro-schema-from-json-files) - Infer Avro schema from JSON files.
|
|
59
|
+
- [`avrotize json2s`](#infer-json-structure-schema-from-json-files) - Infer JSON Structure schema from JSON files.
|
|
60
|
+
- [`avrotize xml2a`](#infer-avro-schema-from-xml-files) - Infer Avro schema from XML files.
|
|
61
|
+
- [`avrotize xml2s`](#infer-json-structure-schema-from-xml-files) - Infer JSON Structure schema from XML files.
|
|
89
62
|
- [`avrotize pq2a`](#convert-parquet-schema-to-avrotize-schema) - Convert Parquet schema to Avrotize Schema.
|
|
90
63
|
- [`avrotize csv2a`](#convert-csv-file-to-avrotize-schema) - Convert CSV file to Avrotize Schema.
|
|
91
64
|
- [`avrotize kstruct2a`](#convert-kafka-connect-schema-to-avrotize-schema) - Convert Kafka Connect Schema to Avrotize Schema.
|
|
@@ -153,6 +126,7 @@ Direct JSON Structure conversions:
|
|
|
153
126
|
Other commands:
|
|
154
127
|
|
|
155
128
|
- [`avrotize pcf`](#create-the-parsing-canonical-form-pcf-of-an-avrotize-schema) - Create the Parsing Canonical Form (PCF) of an Avrotize Schema.
|
|
129
|
+
- [`avrotize validate`](#validate-json-instances-against-schemas) - Validate JSON instances against Avro or JSON Structure schemas.
|
|
156
130
|
|
|
157
131
|
JSON Structure conversions:
|
|
158
132
|
|
|
@@ -426,6 +400,150 @@ Conversion notes:
|
|
|
426
400
|
- For `dynamic` columns, the tool will sample the data in the table to determine the structure of the dynamic column. The tool will map the dynamic column to an Avro record type with fields that correspond to the fields found in the dynamic column. If the dynamic column contains nested dynamic columns, the tool will recursively map those to Avro record types. If records with conflicting structures are found in the dynamic column, the tool will emit a union of record types for the dynamic column.
|
|
427
401
|
- If the `--emit-cloudevents-xregistry` option is set, the tool will emit an [xRegistry](http://xregistry.io) registry manifest file with a CloudEvent message definition for each table in the Kusto database and a separate Avro Schema for each table in the embedded schema registry. If one or more tables are found to contain CloudEvent data (as indicated by the presence of the CloudEvents attribute columns), the tool will inspect the content of the `type` (or `__type` or `__type`) columns to determine which CloudEvent types have been stored in the table and will emit a CloudEvent definition and schema for each unique type.
|
|
428
402
|
|
|
403
|
+
### Convert SQL database schema to Avrotize Schema
|
|
404
|
+
|
|
405
|
+
```bash
|
|
406
|
+
avrotize sql2a --connection-string <connection_string> [--username <user>] [--password <pass>] [--dialect <dialect>] [--database <database>] [--table-name <table>] [--out <path_to_avro_schema_file>] [--namespace <namespace>] [--infer-json] [--infer-xml] [--sample-size <n>] [--emit-cloudevents] [--emit-xregistry]
|
|
407
|
+
```
|
|
408
|
+
|
|
409
|
+
Parameters:
|
|
410
|
+
|
|
411
|
+
- `--connection-string`: The database connection string. Supports SSL/TLS and integrated authentication options (see examples below).
|
|
412
|
+
- `--username`: (optional) Database username. Overrides any username in the connection string. Use this to avoid credentials in command history.
|
|
413
|
+
- `--password`: (optional) Database password. Overrides any password in the connection string. Use this to avoid credentials in command history.
|
|
414
|
+
- `--dialect`: (optional) The SQL dialect: `postgres` (default), `mysql`, `sqlserver`, `oracle`, or `sqlite`.
|
|
415
|
+
- `--database`: (optional) The database name if not specified in the connection string.
|
|
416
|
+
- `--table-name`: (optional) A specific table to convert. If omitted, all tables are converted.
|
|
417
|
+
- `--out`: The path to the Avrotize Schema file. If omitted, output goes to stdout.
|
|
418
|
+
- `--namespace`: (optional) The Avro namespace for the generated schema.
|
|
419
|
+
- `--infer-json`: (optional, default: true) Infer schema for JSON/JSONB columns by sampling data.
|
|
420
|
+
- `--infer-xml`: (optional, default: true) Infer schema for XML columns by sampling data.
|
|
421
|
+
- `--sample-size`: (optional, default: 100) Number of rows to sample for JSON/XML schema inference.
|
|
422
|
+
- `--emit-cloudevents`: (optional) Detect CloudEvents tables and emit CloudEvents declarations.
|
|
423
|
+
- `--emit-xregistry`: (optional) Emit an xRegistry manifest instead of a single schema file.
|
|
424
|
+
|
|
425
|
+
Connection string examples:
|
|
426
|
+
|
|
427
|
+
```bash
|
|
428
|
+
# PostgreSQL with separate credentials (preferred for security)
|
|
429
|
+
avrotize sql2a --connection-string "postgresql://host:5432/mydb?sslmode=require" --username myuser --password mypass --out schema.avsc
|
|
430
|
+
|
|
431
|
+
# PostgreSQL with SSL (credentials in URL)
|
|
432
|
+
avrotize sql2a --connection-string "postgresql://user:pass@host:5432/mydb?sslmode=require" --out schema.avsc
|
|
433
|
+
|
|
434
|
+
# MySQL with SSL
|
|
435
|
+
avrotize sql2a --connection-string "mysql://user:pass@host:3306/mydb?ssl=true" --dialect mysql --out schema.avsc
|
|
436
|
+
|
|
437
|
+
# SQL Server with Windows Authentication (omit user/password)
|
|
438
|
+
avrotize sql2a --connection-string "mssql://@host:1433/mydb" --dialect sqlserver --out schema.avsc
|
|
439
|
+
|
|
440
|
+
# SQL Server with TLS encryption
|
|
441
|
+
avrotize sql2a --connection-string "mssql://user:pass@host:1433/mydb?encrypt=true" --dialect sqlserver --out schema.avsc
|
|
442
|
+
|
|
443
|
+
# SQLite file
|
|
444
|
+
avrotize sql2a --connection-string "/path/to/database.db" --dialect sqlite --out schema.avsc
|
|
445
|
+
```
|
|
446
|
+
|
|
447
|
+
Conversion notes:
|
|
448
|
+
|
|
449
|
+
- The tool connects to a live database and reads the schema from the information schema or system catalogs.
|
|
450
|
+
- Type mappings for each dialect:
|
|
451
|
+
- **PostgreSQL**: All standard types including `uuid`, `jsonb`, `xml`, arrays, and custom types.
|
|
452
|
+
- **MySQL**: Standard types including `json`, `enum`, `set`, and spatial types.
|
|
453
|
+
- **SQL Server**: Standard types including `uniqueidentifier`, `xml`, `money`, and `hierarchyid`.
|
|
454
|
+
- **Oracle**: Standard types including `number`, `clob`, `blob`, and Oracle-specific types.
|
|
455
|
+
- **SQLite**: Dynamic typing mapped based on declared type affinity.
|
|
456
|
+
- For JSON/JSONB columns (PostgreSQL, MySQL) and XML columns, the tool samples data to infer the structure. Fields that appear in some but not all records are folded together. If field types conflict across records, the tool emits a union of record types.
|
|
457
|
+
- For columns with keys that cannot be valid Avro identifiers (UUIDs, URLs, special characters), the tool generates `map<string, T>` types instead of record types.
|
|
458
|
+
- Table and column comments are preserved as Avro `doc` attributes where available.
|
|
459
|
+
- Primary key columns are noted in the schema's `unique` attribute.
|
|
460
|
+
|
|
461
|
+
### Infer Avro schema from JSON files
|
|
462
|
+
|
|
463
|
+
```bash
|
|
464
|
+
avrotize json2a <json_files...> [--out <path_to_avro_schema_file>] [--type-name <name>] [--namespace <namespace>] [--sample-size <n>]
|
|
465
|
+
```
|
|
466
|
+
|
|
467
|
+
Parameters:
|
|
468
|
+
|
|
469
|
+
- `<json_files...>`: One or more JSON files to analyze. Supports JSON arrays, single objects, and JSONL (JSON Lines) format.
|
|
470
|
+
- `--out`: The path to the Avro schema file. If omitted, output goes to stdout.
|
|
471
|
+
- `--type-name`: (optional) Name for the root type (default: "Document").
|
|
472
|
+
- `--namespace`: (optional) Avro namespace for generated types.
|
|
473
|
+
- `--sample-size`: (optional) Maximum number of records to sample (0 = all, default: 0).
|
|
474
|
+
|
|
475
|
+
Example:
|
|
476
|
+
|
|
477
|
+
```bash
|
|
478
|
+
# Infer schema from multiple JSON files
|
|
479
|
+
avrotize json2a data1.json data2.json --out schema.avsc --type-name Event --namespace com.example
|
|
480
|
+
|
|
481
|
+
# Infer schema from JSONL file
|
|
482
|
+
avrotize json2a events.jsonl --out events.avsc --type-name LogEntry
|
|
483
|
+
```
|
|
484
|
+
|
|
485
|
+
### Infer JSON Structure schema from JSON files
|
|
486
|
+
|
|
487
|
+
```bash
|
|
488
|
+
avrotize json2s <json_files...> [--out <path_to_jstruct_schema_file>] [--type-name <name>] [--base-id <uri>] [--sample-size <n>]
|
|
489
|
+
```
|
|
490
|
+
|
|
491
|
+
Parameters:
|
|
492
|
+
|
|
493
|
+
- `<json_files...>`: One or more JSON files to analyze.
|
|
494
|
+
- `--out`: The path to the JSON Structure schema file. If omitted, output goes to stdout.
|
|
495
|
+
- `--type-name`: (optional) Name for the root type (default: "Document").
|
|
496
|
+
- `--base-id`: (optional) Base URI for $id generation (default: "https://example.com/").
|
|
497
|
+
- `--sample-size`: (optional) Maximum number of records to sample (0 = all, default: 0).
|
|
498
|
+
|
|
499
|
+
Example:
|
|
500
|
+
|
|
501
|
+
```bash
|
|
502
|
+
avrotize json2s data.json --out schema.jstruct.json --type-name Person --base-id https://myapi.example.com/schemas/
|
|
503
|
+
```
|
|
504
|
+
|
|
505
|
+
### Infer Avro schema from XML files
|
|
506
|
+
|
|
507
|
+
```bash
|
|
508
|
+
avrotize xml2a <xml_files...> [--out <path_to_avro_schema_file>] [--type-name <name>] [--namespace <namespace>] [--sample-size <n>]
|
|
509
|
+
```
|
|
510
|
+
|
|
511
|
+
Parameters:
|
|
512
|
+
|
|
513
|
+
- `<xml_files...>`: One or more XML files to analyze.
|
|
514
|
+
- `--out`: The path to the Avro schema file. If omitted, output goes to stdout.
|
|
515
|
+
- `--type-name`: (optional) Name for the root type (default: "Document").
|
|
516
|
+
- `--namespace`: (optional) Avro namespace for generated types.
|
|
517
|
+
- `--sample-size`: (optional) Maximum number of documents to sample (0 = all, default: 0).
|
|
518
|
+
|
|
519
|
+
Example:
|
|
520
|
+
|
|
521
|
+
```bash
|
|
522
|
+
avrotize xml2a config.xml --out config.avsc --type-name Configuration --namespace com.example.config
|
|
523
|
+
```
|
|
524
|
+
|
|
525
|
+
### Infer JSON Structure schema from XML files
|
|
526
|
+
|
|
527
|
+
```bash
|
|
528
|
+
avrotize xml2s <xml_files...> [--out <path_to_jstruct_schema_file>] [--type-name <name>] [--base-id <uri>] [--sample-size <n>]
|
|
529
|
+
```
|
|
530
|
+
|
|
531
|
+
Parameters:
|
|
532
|
+
|
|
533
|
+
- `<xml_files...>`: One or more XML files to analyze.
|
|
534
|
+
- `--out`: The path to the JSON Structure schema file. If omitted, output goes to stdout.
|
|
535
|
+
- `--type-name`: (optional) Name for the root type (default: "Document").
|
|
536
|
+
- `--base-id`: (optional) Base URI for $id generation (default: "https://example.com/").
|
|
537
|
+
- `--sample-size`: (optional) Maximum number of documents to sample (0 = all, default: 0).
|
|
538
|
+
|
|
539
|
+
Conversion notes (applies to all inference commands):
|
|
540
|
+
|
|
541
|
+
- XML attributes are converted to fields prefixed with `@` (normalized to valid identifiers).
|
|
542
|
+
- Text content in mixed-content elements becomes a `#text` field.
|
|
543
|
+
- Repeated elements are inferred as arrays.
|
|
544
|
+
- Multiple files with different structures are merged into a unified schema.
|
|
545
|
+
- Sparse data (fields that appear in some but not all records) is folded into a single type.
|
|
546
|
+
|
|
429
547
|
### Convert Avrotize Schema to Kusto table declaration
|
|
430
548
|
|
|
431
549
|
```bash
|
|
@@ -676,7 +794,7 @@ Conversion notes:
|
|
|
676
794
|
### Convert Avrotize Schema to Iceberg schema
|
|
677
795
|
|
|
678
796
|
```bash
|
|
679
|
-
avrotize a2ib <path_to_avro_schema_file> [--out <path_to_iceberg_schema_file>] [--record-type <record-type-from-avro>] [--emit-cloudevents-columns]
|
|
797
|
+
avrotize a2ib <path_to_avro_schema_file> [--out <path_to_iceberg_schema_file>] [--record-type <record-type-from-avro>] [--emit-cloudevents-columns] [--format schema|arrow]
|
|
680
798
|
```
|
|
681
799
|
|
|
682
800
|
Parameters:
|
|
@@ -685,6 +803,7 @@ Parameters:
|
|
|
685
803
|
- `--out`: The path to the Iceberg schema file to write the conversion result to. If omitted, the output is directed to stdout.
|
|
686
804
|
- `--record-type`: (optional) The name of the Avro record type to convert to an Iceberg schema.
|
|
687
805
|
- `--emit-cloudevents-columns`: (optional) If set, the tool will add [CloudEvents](https://cloudevents.io) attribute columns to the Iceberg schema: `__id`, `__source`, `__subject`, `__type`, and `__time`.
|
|
806
|
+
- `--format`: (optional) Output format. `schema` (default) outputs JSON per the [Iceberg Table Spec Appendix C](https://iceberg.apache.org/spec/#appendix-c-json-serialization). `arrow` outputs a binary Arrow IPC serialized schema.
|
|
688
807
|
|
|
689
808
|
Conversion notes:
|
|
690
809
|
|
|
@@ -695,7 +814,7 @@ Conversion notes:
|
|
|
695
814
|
### Convert JSON Structure to Iceberg schema
|
|
696
815
|
|
|
697
816
|
```bash
|
|
698
|
-
avrotize s2ib <path_to_structure_schema_file> [--out <path_to_iceberg_schema_file>] [--record-type <record-type-from-structure>] [--emit-cloudevents-columns]
|
|
817
|
+
avrotize s2ib <path_to_structure_schema_file> [--out <path_to_iceberg_schema_file>] [--record-type <record-type-from-structure>] [--emit-cloudevents-columns] [--format schema|arrow]
|
|
699
818
|
```
|
|
700
819
|
|
|
701
820
|
Parameters:
|
|
@@ -704,6 +823,7 @@ Parameters:
|
|
|
704
823
|
- `--out`: The path to the Iceberg schema file to write the conversion result to. If omitted, the output is directed to stdout.
|
|
705
824
|
- `--record-type`: (optional) The name of the record type in definitions to convert to an Iceberg schema.
|
|
706
825
|
- `--emit-cloudevents-columns`: (optional) If set, the tool will add [CloudEvents](https://cloudevents.io) attribute columns to the Iceberg schema: `___id`, `___source`, `___subject`, `___type`, and `___time`.
|
|
826
|
+
- `--format`: (optional) Output format. `schema` (default) outputs JSON per the [Iceberg Table Spec Appendix C](https://iceberg.apache.org/spec/#appendix-c-json-serialization). `arrow` outputs a binary Arrow IPC serialized schema.
|
|
707
827
|
|
|
708
828
|
Conversion notes:
|
|
709
829
|
|
|
@@ -1306,6 +1426,45 @@ Conversion notes:
|
|
|
1306
1426
|
- The tool generates the Parsing Canonical Form (PCF) of the Avrotize Schema. The PCF is a normalized form of the schema that is used for schema comparison and compatibility checking.
|
|
1307
1427
|
- The PCF is a JSON object that is written to stdout.
|
|
1308
1428
|
|
|
1429
|
+
### Validate JSON instances against schemas
|
|
1430
|
+
|
|
1431
|
+
```bash
|
|
1432
|
+
avrotize validate <json_files...> --schema <schema_file> [--schema-type <type>] [--quiet]
|
|
1433
|
+
```
|
|
1434
|
+
|
|
1435
|
+
Parameters:
|
|
1436
|
+
|
|
1437
|
+
- `<json_files...>`: One or more JSON files to validate. Supports single JSON objects, JSON arrays, and JSONL (newline-delimited JSON) formats.
|
|
1438
|
+
- `--schema <schema_file>`: Path to the schema file (`.avsc` for Avro, `.jstruct.json` for JSON Structure).
|
|
1439
|
+
- `--schema-type`: (optional) Schema type: `avro` or `jstruct`. Auto-detected from file extension if omitted.
|
|
1440
|
+
- `--quiet`: (optional) Suppress output. Exit code 0 if all instances are valid, 1 if any are invalid.
|
|
1441
|
+
|
|
1442
|
+
Validation notes:
|
|
1443
|
+
|
|
1444
|
+
- Validates JSON instances against Avro schemas per the [Avrotize Schema specification](specs/avrotize-schema.md).
|
|
1445
|
+
- Supports all Avro primitive types: null, boolean, int, long, float, double, bytes, string.
|
|
1446
|
+
- Supports all Avro complex types: record, enum, array, map, fixed.
|
|
1447
|
+
- Supports logical types with both native and string encodings: decimal, uuid, date, time-millis, time-micros, timestamp-millis, timestamp-micros, duration.
|
|
1448
|
+
- Supports field `altnames` for JSON field name mapping.
|
|
1449
|
+
- Supports enum `altsymbols` for JSON symbol mapping.
|
|
1450
|
+
- For JSON Structure validation, requires the `json-structure` package.
|
|
1451
|
+
|
|
1452
|
+
Example:
|
|
1453
|
+
|
|
1454
|
+
```bash
|
|
1455
|
+
# Validate JSON file against Avro schema
|
|
1456
|
+
avrotize validate data.json --schema schema.avsc
|
|
1457
|
+
|
|
1458
|
+
# Validate multiple files
|
|
1459
|
+
avrotize validate file1.json file2.json --schema schema.avsc
|
|
1460
|
+
|
|
1461
|
+
# Validate JSONL file against JSON Structure schema
|
|
1462
|
+
avrotize validate events.jsonl --schema events.jstruct.json
|
|
1463
|
+
|
|
1464
|
+
# Quiet mode for CI/CD pipelines (exit code only)
|
|
1465
|
+
avrotize validate data.json --schema schema.avsc --quiet
|
|
1466
|
+
```
|
|
1467
|
+
|
|
1309
1468
|
### Convert JSON Structure schema to GraphQL schema
|
|
1310
1469
|
|
|
1311
1470
|
```bash
|
|
@@ -1373,4 +1532,3 @@ cat myschema.avsc | avrotize a2graphql > myschema.graphql
|
|
|
1373
1532
|
```
|
|
1374
1533
|
|
|
1375
1534
|
This document provides an overview of the usage and functionality of Avrotize. For more detailed information, please refer to the [Avrotize Schema documentation](specs/avrotize-schema.md) and the individual command help messages.
|
|
1376
|
-
|