pydantic-avro 0.0.3__tar.gz → 0.9.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,104 @@
1
+ Metadata-Version: 2.1
2
+ Name: pydantic-avro
3
+ Version: 0.9.2
4
+ Summary: Converting pydantic classes to avro schemas
5
+ Home-page: https://github.com/godatadriven/pydantic-avro
6
+ License: MIT
7
+ Keywords: pydantic,avro
8
+ Author: Peter van 't Hof'
9
+ Author-email: peter.vanthof@godatadriven.com
10
+ Requires-Python: >=3.8.1,<4.0
11
+ Classifier: License :: OSI Approved :: MIT License
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.9
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Requires-Dist: pydantic (>=1.4,<3.0)
17
+ Project-URL: Repository, https://github.com/godatadriven/pydantic-avro
18
+ Description-Content-Type: text/markdown
19
+
20
+ [![Python package](https://github.com/godatadriven/pydantic-avro/actions/workflows/python-package.yml/badge.svg)](https://github.com/godatadriven/pydantic-avro/actions/workflows/python-package.yml)
21
+ [![codecov](https://codecov.io/gh/godatadriven/pydantic-avro/branch/main/graph/badge.svg?token=5L08GOERAW)](https://codecov.io/gh/godatadriven/pydantic-avro)
22
+ [![PyPI version](https://badge.fury.io/py/pydantic-avro.svg)](https://badge.fury.io/py/pydantic-avro)
23
+ [![CodeQL](https://github.com/godatadriven/pydantic-avro/actions/workflows/codeql-analysis.yml/badge.svg)](https://github.com/godatadriven/pydantic-avro/actions/workflows/codeql-analysis.yml)
24
+
25
+ # pydantic-avro
26
+
27
+ This library can convert a pydantic class to a avro schema or generate python code from a avro schema.
28
+
29
+ ### Install
30
+
31
+ ```bash
32
+ pip install pydantic-avro
33
+ ```
34
+
35
+ ### Pydantic class to avro schema
36
+
37
+ ```python
38
+ import json
39
+ from typing import Optional
40
+
41
+ from pydantic_avro.base import AvroBase
42
+
43
+
44
+ class TestModel(AvroBase):
45
+ key1: str
46
+ key2: int
47
+ key2: Optional[str]
48
+
49
+
50
+ schema_dict: dict = TestModel.avro_schema()
51
+ print(json.dumps(schema_dict))
52
+
53
+ ```
54
+
55
+ ### Avro schema to pydantic
56
+
57
+ ```shell
58
+ # Print to stdout
59
+ pydantic-avro avro_to_pydantic --asvc /path/to/schema.asvc
60
+
61
+ # Save it to a file
62
+ pydantic-avro avro_to_pydantic --asvc /path/to/schema.asvc --output /path/to/output.py
63
+ ```
64
+
65
+ ### Specify expected Avro type
66
+
67
+ ```python
68
+ from datetime import datetime
69
+ from pydantic import Field
70
+ from pydantic_avro.base import AvroBase
71
+
72
+ class ExampleModel(AvroBase):
73
+ field1: int = Field(..., avro_type="long") # Explicitly set Avro type to "long"
74
+ field2: datetime = Field(..., avro_type="timestamp-millis") # Explicitly set Avro type to "timestamp-millis"
75
+ ```
76
+
77
+ ### Install for developers
78
+
79
+ ###### Install package
80
+
81
+ - Requirement: Poetry 1.*
82
+
83
+ ```shell
84
+ poetry install
85
+ ```
86
+
87
+ ###### Run unit tests
88
+ ```shell
89
+ pytest
90
+ coverage run -m pytest # with coverage
91
+ # or (depends on your local env)
92
+ poetry run pytest
93
+ poetry run coverage run -m pytest # with coverage
94
+ ```
95
+
96
+ ##### Run linting
97
+
98
+ The linting is checked in the github workflow. To fix and review issues run this:
99
+ ```shell
100
+ black . # Auto fix all issues
101
+ isort . # Auto fix all issues
102
+ pflake . # Only display issues, fixing is manual
103
+ ```
104
+
@@ -0,0 +1,84 @@
1
+ [![Python package](https://github.com/godatadriven/pydantic-avro/actions/workflows/python-package.yml/badge.svg)](https://github.com/godatadriven/pydantic-avro/actions/workflows/python-package.yml)
2
+ [![codecov](https://codecov.io/gh/godatadriven/pydantic-avro/branch/main/graph/badge.svg?token=5L08GOERAW)](https://codecov.io/gh/godatadriven/pydantic-avro)
3
+ [![PyPI version](https://badge.fury.io/py/pydantic-avro.svg)](https://badge.fury.io/py/pydantic-avro)
4
+ [![CodeQL](https://github.com/godatadriven/pydantic-avro/actions/workflows/codeql-analysis.yml/badge.svg)](https://github.com/godatadriven/pydantic-avro/actions/workflows/codeql-analysis.yml)
5
+
6
+ # pydantic-avro
7
+
8
+ This library can convert a pydantic class to a avro schema or generate python code from a avro schema.
9
+
10
+ ### Install
11
+
12
+ ```bash
13
+ pip install pydantic-avro
14
+ ```
15
+
16
+ ### Pydantic class to avro schema
17
+
18
+ ```python
19
+ import json
20
+ from typing import Optional
21
+
22
+ from pydantic_avro.base import AvroBase
23
+
24
+
25
+ class TestModel(AvroBase):
26
+ key1: str
27
+ key2: int
28
+ key2: Optional[str]
29
+
30
+
31
+ schema_dict: dict = TestModel.avro_schema()
32
+ print(json.dumps(schema_dict))
33
+
34
+ ```
35
+
36
+ ### Avro schema to pydantic
37
+
38
+ ```shell
39
+ # Print to stdout
40
+ pydantic-avro avro_to_pydantic --asvc /path/to/schema.asvc
41
+
42
+ # Save it to a file
43
+ pydantic-avro avro_to_pydantic --asvc /path/to/schema.asvc --output /path/to/output.py
44
+ ```
45
+
46
+ ### Specify expected Avro type
47
+
48
+ ```python
49
+ from datetime import datetime
50
+ from pydantic import Field
51
+ from pydantic_avro.base import AvroBase
52
+
53
+ class ExampleModel(AvroBase):
54
+ field1: int = Field(..., avro_type="long") # Explicitly set Avro type to "long"
55
+ field2: datetime = Field(..., avro_type="timestamp-millis") # Explicitly set Avro type to "timestamp-millis"
56
+ ```
57
+
58
+ ### Install for developers
59
+
60
+ ###### Install package
61
+
62
+ - Requirement: Poetry 1.*
63
+
64
+ ```shell
65
+ poetry install
66
+ ```
67
+
68
+ ###### Run unit tests
69
+ ```shell
70
+ pytest
71
+ coverage run -m pytest # with coverage
72
+ # or (depends on your local env)
73
+ poetry run pytest
74
+ poetry run coverage run -m pytest # with coverage
75
+ ```
76
+
77
+ ##### Run linting
78
+
79
+ The linting is checked in the github workflow. To fix and review issues run this:
80
+ ```shell
81
+ black . # Auto fix all issues
82
+ isort . # Auto fix all issues
83
+ pflake . # Only display issues, fixing is manual
84
+ ```
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "pydantic-avro"
3
- version = "0.0.3"
3
+ version = "0.9.2"
4
4
  description = "Converting pydantic classes to avro schemas"
5
5
  authors = ["Peter van 't Hof' <peter.vanthof@godatadriven.com>"]
6
6
 
@@ -9,28 +9,26 @@ readme = "README.md"
9
9
  license = "MIT"
10
10
  homepage = "https://github.com/godatadriven/pydantic-avro"
11
11
  repository = "https://github.com/godatadriven/pydantic-avro"
12
- include = [
13
- "LICENSE",
14
- ]
15
12
 
16
13
 
17
- packages = [{ include = "pydantic_avro", from = "src"}]
14
+ packages = [{ include = "pydantic_avro", from = "src" }]
18
15
 
19
16
  [tool.poetry.dependencies]
20
- python = ">=3.6.1,<4.0"
21
- pydantic = ">=1.0.0"
17
+ python = ">=3.8.1,<4.0"
18
+ pydantic = ">=1.4,<3.0"
22
19
 
23
20
  [tool.poetry.dev-dependencies]
24
- coverage= {version= "^6.1.1", extras=["toml"]}
25
- pytest= "6.1.0"
26
- pytest-mock="3.3.1"
27
- pyproject-flake8 ="^"
28
- isort ="^5.10.0"
29
- black="20.8b1"
30
- pytest-cov= "^2.10.1"
31
- mypy = "^0.910"
32
- avro = ""
33
- fastavro = ""
21
+ coverage = { version = "^7.6.1", extras = ["toml"] }
22
+ pytest = "^8.3.5"
23
+ pytest-mock = "^3.10.0"
24
+ pyproject-flake8 = "^7.0.0"
25
+ isort = "^5.10.0"
26
+ black = "24.8.0"
27
+ pytest-cov = "^5.0.0"
28
+ mypy = "^1.1.1"
29
+ avro = "^1.12.0"
30
+ fastavro = "^1.8.1"
31
+ typing-extensions = "^4.13.2"
34
32
 
35
33
  [tool.poetry.scripts]
36
34
  pydantic-avro = "pydantic_avro.__main__:root_main"
@@ -51,7 +49,7 @@ show_missing = true
51
49
 
52
50
  [tool.coverage.run]
53
51
  command_line = "-m pytest -v tests/"
54
- omit = ["tests/*",".venv/*"]
52
+ omit = ["tests/*", ".venv/*"]
55
53
 
56
54
  [tool.flake8]
57
55
  max-line-length = 180
@@ -65,6 +63,7 @@ line_length = 120
65
63
  [tool.mypy]
66
64
  files = "src/"
67
65
  python_version = "3.9"
66
+ ignore_missing_imports = "true"
68
67
 
69
68
  [build-system]
70
69
  requires = ["poetry-core>=1.0.0"]
@@ -0,0 +1,38 @@
1
+ # -*- coding: utf-8 -*-
2
+ from setuptools import setup
3
+
4
+ package_dir = \
5
+ {'': 'src'}
6
+
7
+ packages = \
8
+ ['pydantic_avro', 'pydantic_avro.from_avro', 'pydantic_avro.to_avro']
9
+
10
+ package_data = \
11
+ {'': ['*']}
12
+
13
+ install_requires = \
14
+ ['pydantic>=1.4,<3.0']
15
+
16
+ entry_points = \
17
+ {'console_scripts': ['pydantic-avro = pydantic_avro.__main__:root_main']}
18
+
19
+ setup_kwargs = {
20
+ 'name': 'pydantic-avro',
21
+ 'version': '0.9.2',
22
+ 'description': 'Converting pydantic classes to avro schemas',
23
+ 'long_description': '[![Python package](https://github.com/godatadriven/pydantic-avro/actions/workflows/python-package.yml/badge.svg)](https://github.com/godatadriven/pydantic-avro/actions/workflows/python-package.yml)\n[![codecov](https://codecov.io/gh/godatadriven/pydantic-avro/branch/main/graph/badge.svg?token=5L08GOERAW)](https://codecov.io/gh/godatadriven/pydantic-avro)\n[![PyPI version](https://badge.fury.io/py/pydantic-avro.svg)](https://badge.fury.io/py/pydantic-avro)\n[![CodeQL](https://github.com/godatadriven/pydantic-avro/actions/workflows/codeql-analysis.yml/badge.svg)](https://github.com/godatadriven/pydantic-avro/actions/workflows/codeql-analysis.yml)\n\n# pydantic-avro\n\nThis library can convert a pydantic class to a avro schema or generate python code from a avro schema.\n\n### Install\n\n```bash\npip install pydantic-avro\n```\n\n### Pydantic class to avro schema\n\n```python\nimport json\nfrom typing import Optional\n\nfrom pydantic_avro.base import AvroBase\n\n\nclass TestModel(AvroBase):\n key1: str\n key2: int\n key2: Optional[str]\n\n\nschema_dict: dict = TestModel.avro_schema()\nprint(json.dumps(schema_dict))\n\n```\n\n### Avro schema to pydantic\n\n```shell\n# Print to stdout\npydantic-avro avro_to_pydantic --asvc /path/to/schema.asvc\n\n# Save it to a file\npydantic-avro avro_to_pydantic --asvc /path/to/schema.asvc --output /path/to/output.py\n```\n\n### Specify expected Avro type\n\n```python\nfrom datetime import datetime\nfrom pydantic import Field\nfrom pydantic_avro.base import AvroBase \n\nclass ExampleModel(AvroBase):\n field1: int = Field(..., avro_type="long") # Explicitly set Avro type to "long"\n field2: datetime = Field(..., avro_type="timestamp-millis") # Explicitly set Avro type to "timestamp-millis"\n```\n\n### Install for developers\n\n###### Install package\n\n- Requirement: Poetry 1.*\n\n```shell\npoetry install\n```\n\n###### Run unit tests\n```shell\npytest\ncoverage run -m pytest # with coverage\n# or (depends on your local env) \npoetry run pytest\npoetry run coverage run -m pytest # with coverage\n```\n\n##### Run linting\n\nThe linting is checked in the github workflow. To fix and review issues run this:\n```shell\nblack . # Auto fix all issues\nisort . # Auto fix all issues\npflake . # Only display issues, fixing is manual\n```\n',
24
+ 'author': "Peter van 't Hof'",
25
+ 'author_email': 'peter.vanthof@godatadriven.com',
26
+ 'maintainer': 'None',
27
+ 'maintainer_email': 'None',
28
+ 'url': 'https://github.com/godatadriven/pydantic-avro',
29
+ 'package_dir': package_dir,
30
+ 'packages': packages,
31
+ 'package_data': package_data,
32
+ 'install_requires': install_requires,
33
+ 'entry_points': entry_points,
34
+ 'python_requires': '>=3.8.1,<4.0',
35
+ }
36
+
37
+
38
+ setup(**setup_kwargs)
@@ -0,0 +1 @@
1
+ from pydantic_avro.to_avro.base import AvroBase
@@ -2,7 +2,7 @@ import argparse
2
2
  import sys
3
3
  from typing import List
4
4
 
5
- from pydantic_avro.avro_to_pydantic import convert_file
5
+ from pydantic_avro.from_avro.avro_to_pydantic import convert_file
6
6
 
7
7
 
8
8
  def main(input_args: List[str]):
@@ -0,0 +1,2 @@
1
+ # For backwards compatability
2
+ from pydantic_avro.to_avro.base import AvroBase
@@ -0,0 +1,48 @@
1
+ import json
2
+ from typing import Optional
3
+
4
+ from pydantic_avro.from_avro.class_registery import ClassRegistry
5
+ from pydantic_avro.from_avro.types import get_pydantic_type
6
+
7
+
8
+ def validate_schema(schema: dict) -> None:
9
+ if "type" not in schema:
10
+ raise AttributeError("Type not supported")
11
+ if "name" not in schema:
12
+ raise AttributeError("Name is required")
13
+ if "fields" not in schema:
14
+ raise AttributeError("Fields are required")
15
+
16
+
17
+ def avsc_to_pydantic(schema: dict) -> str:
18
+ """Generate python code of pydantic of given Avro Schema"""
19
+ # Ensures that state is clean from previous calls
20
+ ClassRegistry().clear()
21
+ validate_schema(schema)
22
+ get_pydantic_type(schema)
23
+
24
+ file_content = """
25
+ from datetime import date, datetime, time
26
+ from decimal import Decimal
27
+ from enum import Enum
28
+ from typing import List, Optional, Dict, Union
29
+ from uuid import UUID
30
+
31
+ from pydantic import BaseModel, Field
32
+
33
+
34
+ """
35
+ file_content += "\n\n".join(ClassRegistry().classes.values())
36
+
37
+ return file_content
38
+
39
+
40
+ def convert_file(avsc_path: str, output_path: Optional[str] = None):
41
+ with open(avsc_path, "r") as fh:
42
+ avsc_dict = json.load(fh)
43
+ file_content = avsc_to_pydantic(avsc_dict)
44
+ if output_path is None:
45
+ print(file_content)
46
+ else:
47
+ with open(output_path, "w") as fh:
48
+ fh.write(file_content)
@@ -0,0 +1,29 @@
1
+ class ClassRegistry:
2
+ """Singleton class to store generated Pydantic classes."""
3
+
4
+ _instance = None
5
+ _classes: dict = {}
6
+
7
+ def __new__(cls):
8
+ """Singleton implementation."""
9
+ if cls._instance is None:
10
+ cls._instance = super(ClassRegistry, cls).__new__(cls)
11
+ cls._instance._classes = {}
12
+ return cls._instance
13
+
14
+ def add_class(self, name: str, class_def: str):
15
+ """Add a class to the registry."""
16
+ self._classes[name] = class_def
17
+
18
+ @property
19
+ def classes(self) -> dict:
20
+ """Get all classes in the registry."""
21
+ return self._classes
22
+
23
+ def has_class(self, name: str) -> bool:
24
+ """Check if a class is in the registry."""
25
+ return name in self._classes.keys()
26
+
27
+ def clear(self):
28
+ """Clear all classes from the registry."""
29
+ self._classes.clear()
@@ -0,0 +1,164 @@
1
+ import json
2
+ from typing import Callable, Union
3
+
4
+ from pydantic_avro.from_avro.class_registery import ClassRegistry
5
+
6
+ LOGICAL_TYPES = {
7
+ "uuid": "UUID",
8
+ "decimal": "Decimal",
9
+ "timestamp-millis": "datetime",
10
+ "timestamp-micros": "datetime",
11
+ "time-millis": "time",
12
+ "time-micros": "time",
13
+ "date": "date",
14
+ }
15
+
16
+
17
+ AVRO_TO_PY_MAPPING = {
18
+ "string": "str",
19
+ "int": "int",
20
+ "long": "int",
21
+ "boolean": "bool",
22
+ "double": "float",
23
+ "float": "float",
24
+ "bytes": "bytes",
25
+ }
26
+
27
+
28
+ def list_type_handler(t: dict) -> str:
29
+ """Get the Python type of a given Avro list type"""
30
+ l = t["type"]
31
+ if "null" in l and len(l) == 2:
32
+ c = l.copy()
33
+ c.remove("null")
34
+ return f"Optional[{get_pydantic_type(c[0])}]"
35
+ if "null" in l:
36
+ return f"Optional[Union[{','.join([get_pydantic_type(e) for e in l if e != 'null'])}]]"
37
+ return f"Union[{','.join([get_pydantic_type(e) for e in l])}]"
38
+
39
+
40
+ def map_type_handler(t: dict) -> str:
41
+ """Get the Python type of a given Avro map type"""
42
+ type_field = t["type"]
43
+ value_type = None
44
+ if isinstance(type_field, dict):
45
+ avro_value_type = type_field.get("values")
46
+ if avro_value_type is None:
47
+ raise AttributeError(f"Values are required for map type. Received: {t}")
48
+ value_type = get_pydantic_type(avro_value_type)
49
+ if isinstance(type_field, str):
50
+ value_type = t.get("values")
51
+
52
+ if value_type is None:
53
+ raise AttributeError(f"Values are required for map type. Received: {t}")
54
+
55
+ return f"Dict[str, {value_type}]"
56
+
57
+
58
+ def logical_type_handler(t: dict) -> str:
59
+ """Get the Python type of a given Avro logical type"""
60
+ return LOGICAL_TYPES[t["type"]["logicalType"]]
61
+
62
+
63
+ def enum_type_handler(t: dict) -> str:
64
+ """Gets the enum type of a given Avro enum type and adds it to the class registry"""
65
+ if t["type"] == "enum":
66
+ # comes from a unioned enum (e.g. ["null", "enum"])
67
+ type_info = t
68
+ else:
69
+ # comes from a direct enum
70
+ type_info = t["type"]
71
+
72
+ name = type_info["name"]
73
+ symbols = type_info["symbols"]
74
+
75
+ if not ClassRegistry().has_class(name):
76
+ enum_class = f"class {name}(str, Enum):\n"
77
+ for s in symbols:
78
+ enum_class += f' {s} = "{s}"\n'
79
+ ClassRegistry().add_class(name, enum_class)
80
+ return name
81
+
82
+
83
+ def array_type_handler(t: dict) -> str:
84
+ """Get the Python type of a given Avro array type"""
85
+ if isinstance(t["type"], dict):
86
+ sub_type = get_pydantic_type(t["type"]["items"])
87
+ else:
88
+ sub_type = get_pydantic_type(t["items"])
89
+ return f"List[{sub_type}]"
90
+
91
+
92
+ def record_type_handler(t: dict) -> str:
93
+ """Gets the record type of a given Avro record type and adds it to the class registry"""
94
+ t = t["type"] if isinstance(t["type"], dict) else t
95
+ name = t["name"]
96
+ fields = t["fields"] if "fields" in t else t["type"]["fields"]
97
+ field_strings = [generate_field_string(field) for field in fields]
98
+ class_body = "\n".join(field_strings) if field_strings else " pass"
99
+ current = f"class {name}(BaseModel):\n{class_body}\n"
100
+ ClassRegistry().add_class(name, current)
101
+ return name
102
+
103
+
104
+ TYPE_HANDLERS = {
105
+ "list": list_type_handler,
106
+ "map": map_type_handler,
107
+ "logical": logical_type_handler,
108
+ "enum": enum_type_handler,
109
+ "array": array_type_handler,
110
+ "record": record_type_handler,
111
+ }
112
+
113
+
114
+ def generate_field_string(field: dict) -> str:
115
+ """Generate a string representing a field in the Pydantic model."""
116
+ n = field["name"]
117
+ t = get_pydantic_type(field)
118
+ default = field.get("default")
119
+ if field["type"] == "int" and "default" in field and isinstance(default, (bool, type(None))):
120
+ return f" {n}: {t} = Field({default}, ge=-2**31, le=(2**31 - 1))"
121
+ elif field["type"] == "int" and "default" in field:
122
+ return f" {n}: {t} = Field({json.dumps(default)}, ge=-2**31, le=(2**31 - 1))"
123
+ elif field["type"] == "int":
124
+ return f" {n}: {t} = Field(..., ge=-2**31, le=(2**31 - 1))"
125
+ elif "default" not in field:
126
+ return f" {n}: {t}"
127
+ elif isinstance(default, (bool, type(None))):
128
+ return f" {n}: {t} = {default}"
129
+ else:
130
+ return f" {n}: {t} = {json.dumps(default)}"
131
+
132
+
133
+ def get_pydantic_type(t: Union[str, dict]) -> str:
134
+ """Get the Pydantic type for a given Avro type"""
135
+ if isinstance(t, str) or isinstance(t, list):
136
+ t = {"type": t}
137
+
138
+ if isinstance(t.get("type"), str):
139
+ if ClassRegistry().has_class(t["type"]):
140
+ return t["type"]
141
+
142
+ if t["type"] in AVRO_TO_PY_MAPPING:
143
+ return AVRO_TO_PY_MAPPING[t["type"]]
144
+
145
+ return get_type_handler(t)(t)
146
+
147
+
148
+ def get_type_handler(t: dict) -> Callable:
149
+ """Get the handler for a given Avro type"""
150
+ h = None
151
+ t = t["type"]
152
+ if isinstance(t, str):
153
+ h = TYPE_HANDLERS.get(t)
154
+ elif isinstance(t, dict) and "logicalType" in t:
155
+ h = TYPE_HANDLERS.get("logical")
156
+ elif isinstance(t, dict) and "type" in t:
157
+ h = TYPE_HANDLERS.get(t["type"])
158
+ elif isinstance(t, list):
159
+ h = TYPE_HANDLERS.get("list")
160
+
161
+ if h:
162
+ return h
163
+
164
+ raise NotImplementedError(f"Type {t} not supported yet")
File without changes
@@ -0,0 +1,35 @@
1
+ from typing import Optional
2
+
3
+ from pydantic import BaseModel
4
+
5
+ from pydantic_avro.to_avro.config import PYDANTIC_V2
6
+ from pydantic_avro.to_avro.types import AvroTypeConverter
7
+
8
+
9
+ class AvroBase(BaseModel):
10
+ """This class provides functionality to convert a pydantic model to an Avro schema."""
11
+
12
+ @classmethod
13
+ def avro_schema(cls, by_alias: bool = True, namespace: Optional[str] = None) -> dict:
14
+ """Returns the avro schema for the pydantic class
15
+
16
+ :param by_alias: generate the schemas using the aliases defined, if any
17
+ :param namespace: Provide an optional namespace string to use in schema generation
18
+ :return: dict with the Avro Schema for the model
19
+ """
20
+ schema = cls.model_json_schema(by_alias=by_alias) if PYDANTIC_V2 else cls.schema(by_alias=by_alias)
21
+
22
+ if namespace is None:
23
+ # Default namespace will be based on title
24
+ namespace = schema["title"]
25
+
26
+ avro_type_handler = AvroTypeConverter(schema)
27
+
28
+ return cls._avro_schema(schema, namespace, avro_type_handler)
29
+
30
+ @staticmethod
31
+ def _avro_schema(schema: dict, namespace: str, avro_type_handler: AvroTypeConverter) -> dict:
32
+ """Return the avro schema for the given pydantic schema"""
33
+
34
+ fields = avro_type_handler.fields_to_avro_dicts(schema)
35
+ return {"type": "record", "namespace": namespace, "name": schema["title"], "fields": fields}
@@ -0,0 +1,4 @@
1
+ from pydantic import VERSION as PYDANTIC_VERSION
2
+
3
+ PYDANTIC_V2 = PYDANTIC_VERSION.startswith("2.")
4
+ DEFS_NAME = "$defs" if PYDANTIC_V2 else "definitions"
@@ -0,0 +1,323 @@
1
+ from typing import Any, Dict, List, Optional, Set
2
+
3
+ from pydantic_avro.to_avro.config import DEFS_NAME
4
+
5
+ STRING_TYPE_MAPPING = {
6
+ "date-time": {
7
+ "type": "long",
8
+ "logicalType": "timestamp-micros",
9
+ },
10
+ "date": {
11
+ "type": "int",
12
+ "logicalType": "date",
13
+ },
14
+ "time": {
15
+ "type": "long",
16
+ "logicalType": "time-micros",
17
+ },
18
+ "uuid": {
19
+ "type": "string",
20
+ "logicalType": "uuid",
21
+ },
22
+ "binary": "bytes",
23
+ }
24
+
25
+ AVRO_TYPE_MAPPING = {
26
+ "timestamp-millis": {
27
+ "type": "long",
28
+ "logicalType": "timestamp-millis",
29
+ },
30
+ "timestamp-micros": {
31
+ "type": "long",
32
+ "logicalType": "timestamp-micros",
33
+ },
34
+ "time-millis": {
35
+ "type": "int",
36
+ "logicalType": "time-millis",
37
+ },
38
+ "time-micros": {
39
+ "type": "long",
40
+ "logicalType": "time-micros",
41
+ },
42
+ "decimal": {
43
+ "type": "bytes",
44
+ "logicalType": "decimal",
45
+ },
46
+ "uuid": {
47
+ "type": "string",
48
+ "logicalType": "uuid",
49
+ },
50
+ "int": "int",
51
+ "long": "long",
52
+ "float": "float",
53
+ "double": "double",
54
+ "boolean": "boolean",
55
+ "bytes": "bytes",
56
+ "string": "string",
57
+ "null": "null",
58
+ "date": {
59
+ "type": "int",
60
+ "logicalType": "date",
61
+ },
62
+ }
63
+
64
+ PRIMITVE_TYPES = ["int", "long", "float", "double", "boolean", "null"]
65
+
66
+
67
+ def get_definition(ref: str, schema: dict):
68
+ """Reading definition of base schema for nested structs"""
69
+ id = ref.replace(f"#/{DEFS_NAME}/", "")
70
+ d = schema.get(DEFS_NAME, {}).get(id)
71
+ if d is None:
72
+ raise RuntimeError(f"Definition {id} does not exist")
73
+ return d
74
+
75
+
76
+ def set_nullability(avro_type_dict: dict) -> dict:
77
+ """Set the nullability of the field"""
78
+ if type(avro_type_dict["type"]) is list:
79
+ if "null" not in avro_type_dict["type"]:
80
+ avro_type_dict["type"].insert(0, "null")
81
+ elif avro_type_dict.get("default") is None:
82
+ avro_type_dict["type"] = ["null", avro_type_dict["type"]]
83
+ avro_type_dict.setdefault("default", None)
84
+ return avro_type_dict
85
+
86
+
87
+ def null_to_first_element(avro_type_dict: dict) -> dict:
88
+ """Set the null as the first element in the list as per avro schema requirements"""
89
+ if type(avro_type_dict["type"]) is list and "null" in avro_type_dict["type"]:
90
+ avro_type_dict["type"].remove("null")
91
+ avro_type_dict["type"].insert(0, "null")
92
+ return avro_type_dict
93
+
94
+
95
+ class AvroTypeConverter:
96
+ """Converts Pydantic schema to AVRO schema."""
97
+
98
+ def __init__(self, schema: dict):
99
+ self.root_schema = schema
100
+ self.classes_seen: Set[str] = set()
101
+
102
+ def fields_to_avro_dicts(self, parent_schema: dict) -> List[dict]:
103
+ """Converts fields from the schema to AVRO and returns them as a list of dictionaries.
104
+
105
+ :param parent_schema: The parent schema of the field (not the root schema for nested models)
106
+ """
107
+ fields = []
108
+
109
+ required = parent_schema.get("required", [])
110
+ for name, field_props in parent_schema.get("properties", {}).items():
111
+ avro_type_dict = self._get_avro_type_dict(field_props=field_props)
112
+ avro_type_dict["name"] = name
113
+ if name not in required:
114
+ set_nullability(avro_type_dict)
115
+ avro_type_dict = null_to_first_element(avro_type_dict)
116
+
117
+ fields.append(avro_type_dict)
118
+ return fields
119
+
120
+ def _get_avro_type_dict(self, field_props: dict) -> dict:
121
+ """Returns a type of a single field"""
122
+ avro_type_dict: Dict[str, Any] = {}
123
+ if "default" in field_props:
124
+ avro_type_dict["default"] = field_props.get("default")
125
+ if "description" in field_props:
126
+ avro_type_dict["doc"] = field_props.get("description")
127
+ avro_type_dict = self._get_avro_type(field_props, avro_type_dict)
128
+ return avro_type_dict
129
+
130
+ def _get_avro_type(self, field_props: dict, avro_type_dict: dict) -> dict:
131
+ """Returns AVRO type of a single field"""
132
+ t = field_props.get("type")
133
+ f = field_props.get("format")
134
+ r = field_props.get("$ref")
135
+ at = field_props.get("avro_type")
136
+ if "allOf" in field_props and len(field_props["allOf"]) == 1:
137
+ r = field_props["allOf"][0]["$ref"]
138
+ if ("prefixItems" in field_props or ("minItems" in field_props and "maxItems" in field_props)) and t == "array":
139
+ t = "tuple"
140
+ u = field_props.get("anyOf")
141
+ o = field_props.get("oneOf")
142
+ discriminator = field_props.get("discriminator")
143
+
144
+ if u is not None:
145
+ return self._union_to_avro(u, avro_type_dict, discriminator)
146
+ elif o is not None:
147
+ return self._union_to_avro(o, avro_type_dict, discriminator)
148
+ elif r is not None:
149
+ return self._handle_references(r, avro_type_dict)
150
+ elif t is None:
151
+ raise ValueError(f"Field '{field_props}' does not have a defined type.")
152
+ elif at is not None:
153
+ if not isinstance(at, str) or at not in AVRO_TYPE_MAPPING:
154
+ raise ValueError(
155
+ f"Field '{field_props}' does not have a supported avro_type. Type should be one of "
156
+ f" {AVRO_TYPE_MAPPING.keys()}"
157
+ )
158
+ avro_type_dict["type"] = AVRO_TYPE_MAPPING[at]
159
+ elif t == "array":
160
+ return self._array_to_avro(field_props, avro_type_dict)
161
+ elif t == "string":
162
+ avro_type_dict["type"] = self._string_to_avro(f)
163
+ elif t == "integer":
164
+ avro_type_dict["type"] = self._integer_to_avro(field_props)
165
+ elif t == "object":
166
+ avro_type_dict["type"] = self._object_to_avro(field_props)
167
+ elif t == "number":
168
+ avro_type_dict["type"] = "double"
169
+ elif t == "boolean":
170
+ avro_type_dict["type"] = "boolean"
171
+ elif t == "null":
172
+ avro_type_dict["type"] = "null"
173
+ elif t == "tuple":
174
+ return self._tuple_to_avro(field_props, avro_type_dict)
175
+ else:
176
+ raise NotImplementedError(
177
+ f"Type '{t}' not support yet, "
178
+ f"please report this at https://github.com/godatadriven/pydantic-avro/issues"
179
+ )
180
+
181
+ return avro_type_dict
182
+
183
+ def _handle_references(self, r: str, avro_type_dict: dict) -> dict:
184
+ """Finds the type of a reference field"""
185
+ class_name = r.replace(f"#/{DEFS_NAME}/", "")
186
+ if class_name in self.classes_seen:
187
+ avro_type_dict["type"] = class_name
188
+ return avro_type_dict
189
+
190
+ d = get_definition(r, self.root_schema)
191
+ if "enum" in d:
192
+ avro_type_dict["type"] = {
193
+ "type": "enum",
194
+ "symbols": [str(v) for v in d["enum"]],
195
+ "name": d["title"],
196
+ }
197
+ else:
198
+ avro_type_dict["type"] = {
199
+ "type": "record",
200
+ "fields": self.fields_to_avro_dicts(d),
201
+ # Name of the struct should be unique to the complete schema
202
+ # Because of this the path in the schema is tracked and used as name for a nested struct/array
203
+ "name": class_name,
204
+ }
205
+
206
+ self.classes_seen.add(class_name)
207
+ return avro_type_dict
208
+
209
+ @staticmethod
210
+ def _string_to_avro(f: Optional[str]):
211
+ """Returns a type of a string field"""
212
+ if not f:
213
+ return "string"
214
+ return STRING_TYPE_MAPPING[f]
215
+
216
+ @staticmethod
217
+ def _integer_to_avro(field_props: dict) -> str:
218
+ """Returns a type of an integer field"""
219
+ minimum = field_props.get("minimum")
220
+ maximum = field_props.get("maximum")
221
+ # integer in python can be a long, only if minimum and maximum value is set an int can be used
222
+ if minimum is not None and minimum >= -(2**31) and maximum is not None and maximum <= (2**31 - 1):
223
+ return "int"
224
+ return "long"
225
+
226
+ def _object_to_avro(self, field_props: dict) -> dict:
227
+ """Returns a type of an object field"""
228
+ a = field_props.get("additionalProperties")
229
+ if not a or isinstance(a, bool):
230
+ value_type = "string"
231
+ else:
232
+ value_type = self._get_avro_type_dict(a)["type"]
233
+ return {"type": "map", "values": value_type}
234
+
235
+ def _union_to_avro(self, field_props: list, avro_type_dict: dict, discriminator: Optional[dict] = None) -> dict:
236
+ """Returns a type of a union field, including discriminated unions"""
237
+ # Handle discriminated unions
238
+ if discriminator is not None:
239
+ return self._discriminated_union_to_avro(field_props, avro_type_dict, discriminator)
240
+
241
+ # Standard union handling (unchanged)
242
+ avro_type_dict["type"] = []
243
+ for union_element in field_props:
244
+ t = self._get_avro_type_dict(union_element)
245
+ avro_type_dict["type"].append(t["type"])
246
+ return avro_type_dict
247
+
248
+ def _tuple_to_avro(self, field_props: dict, avro_type_dict: dict) -> dict:
249
+ """Returns a type of a tuple field"""
250
+ prefix_items = field_props.get("prefixItems")
251
+ if not prefix_items:
252
+ # Pydantic v1 there is no prefixItems, but minItems and maxItems and items is a list.
253
+ prefix_items = field_props.get("items")
254
+ if not prefix_items:
255
+ raise ValueError(f"Tuple Field '{field_props}' does not have any items .")
256
+ possible_types = []
257
+ for prefix_item in prefix_items:
258
+ item_type = self._get_avro_type(prefix_item, avro_type_dict).get("type")
259
+ if not item_type:
260
+ raise ValueError(f"Field '{avro_type_dict}' does not have a defined type.")
261
+ if isinstance(item_type, list):
262
+ possible_types.extend([x for x in item_type if x not in possible_types])
263
+ elif item_type not in possible_types:
264
+ possible_types.append(item_type)
265
+ avro_type_dict["type"] = {"type": "array", "items": possible_types}
266
+ return avro_type_dict
267
+
268
+ def _array_to_avro(self, field_props: dict, avro_type_dict: dict) -> dict:
269
+ """Returns a type of an array field"""
270
+ items = field_props.get("items")
271
+ # In pydantic v1. items is a list, we need to handle this case first
272
+ # E.g. [{'type': 'number'}, {'type': 'number'}]}
273
+ if isinstance(items, list):
274
+ if not len(items):
275
+ raise ValueError(f"Field '{field_props}' does not have valid items.")
276
+ items = items[0]
277
+ if not isinstance(items, dict):
278
+ raise ValueError(f"Field '{field_props}' does not have valid items.")
279
+ tn = self._get_avro_type_dict(items)
280
+ # If items in array are an object:
281
+ if "$ref" in items:
282
+ tn = tn["type"]
283
+ # If items in array are a logicalType
284
+ if isinstance(tn, dict) and isinstance(tn.get("type", None), (dict, list)):
285
+ tn = tn["type"]
286
+ avro_type_dict["type"] = {"type": "array", "items": tn}
287
+ return avro_type_dict
288
+
289
+ def _discriminated_union_to_avro(self, variants: list, avro_type_dict: dict, discriminator: dict) -> dict:
290
+ """Handles discriminated unions with a discriminator field"""
291
+ discriminator_property = discriminator.get("propertyName", "type")
292
+ variant_types = []
293
+
294
+ # Process each variant in the union
295
+ for variant in variants:
296
+ # Get the discriminator value for this variant
297
+ disc_value = None
298
+ if "properties" in variant and discriminator_property in variant.get("properties", {}):
299
+ prop = variant["properties"][discriminator_property]
300
+ if "const" in prop:
301
+ disc_value = prop["const"]
302
+ elif "enum" in prop and len(prop["enum"]) == 1:
303
+ disc_value = prop["enum"][0]
304
+
305
+ # If we can't determine the discriminator value, generate a regular record
306
+ if disc_value is None:
307
+ variant_type = self._get_avro_type_dict(variant)
308
+ else:
309
+ # Create a named record for this variant
310
+ variant_name = f"{disc_value}Variant"
311
+ if "$ref" in variant:
312
+ ref_schema = get_definition(variant["$ref"], self.root_schema)
313
+ variant_fields = self.fields_to_avro_dicts(ref_schema)
314
+ else:
315
+ variant_fields = self.fields_to_avro_dicts(variant)
316
+
317
+ variant_type = {"type": {"type": "record", "name": variant_name, "fields": variant_fields}}
318
+
319
+ variant_types.append(variant_type["type"])
320
+
321
+ # Set the union of all variant types
322
+ avro_type_dict["type"] = variant_types
323
+ return avro_type_dict
@@ -1,59 +0,0 @@
1
- Metadata-Version: 2.1
2
- Name: pydantic-avro
3
- Version: 0.0.3
4
- Summary: Converting pydantic classes to avro schemas
5
- Home-page: https://github.com/godatadriven/pydantic-avro
6
- License: MIT
7
- Keywords: pydantic,avro
8
- Author: Peter van 't Hof'
9
- Author-email: peter.vanthof@godatadriven.com
10
- Requires-Python: >=3.6.1,<4.0
11
- Classifier: License :: OSI Approved :: MIT License
12
- Classifier: Programming Language :: Python :: 3
13
- Classifier: Programming Language :: Python :: 3.10
14
- Classifier: Programming Language :: Python :: 3.7
15
- Classifier: Programming Language :: Python :: 3.8
16
- Classifier: Programming Language :: Python :: 3.9
17
- Requires-Dist: pydantic (>=1.0.0)
18
- Project-URL: Repository, https://github.com/godatadriven/pydantic-avro
19
- Description-Content-Type: text/markdown
20
-
21
- # pydantic-avro
22
-
23
- This library can convert a pydantic class to a avro schema or generate python code from a avro schema.
24
-
25
- ### Install
26
-
27
- ```bash
28
- pip install pydantic-avro
29
- ```
30
-
31
- ### Pydantic class to avro schema
32
-
33
- ```python
34
- import json
35
- from typing import Optional
36
-
37
- from pydantic_avro.base import AvroBase
38
-
39
- class TestModel(AvroBase):
40
- key1: str
41
- key2: int
42
- key2: Optional[str]
43
-
44
- schema_dict: dict = TestModel.avro_schema()
45
- print(json.dumps(schema_dict))
46
-
47
- ```
48
-
49
- ### Avro schema to pydantic
50
-
51
- ```bash
52
- #!/usr/bin/env bash
53
- # Print to stdout
54
- pydantic-avro avro_to_pydantic --asvc /path/to/schema.asvc
55
-
56
- # Save it to a file
57
- pydantic-avro avro_to_pydantic --asvc /path/to/schema.asvc --output /path/to/output.py
58
- ```
59
-
@@ -1,38 +0,0 @@
1
- # pydantic-avro
2
-
3
- This library can convert a pydantic class to a avro schema or generate python code from a avro schema.
4
-
5
- ### Install
6
-
7
- ```bash
8
- pip install pydantic-avro
9
- ```
10
-
11
- ### Pydantic class to avro schema
12
-
13
- ```python
14
- import json
15
- from typing import Optional
16
-
17
- from pydantic_avro.base import AvroBase
18
-
19
- class TestModel(AvroBase):
20
- key1: str
21
- key2: int
22
- key2: Optional[str]
23
-
24
- schema_dict: dict = TestModel.avro_schema()
25
- print(json.dumps(schema_dict))
26
-
27
- ```
28
-
29
- ### Avro schema to pydantic
30
-
31
- ```bash
32
- #!/usr/bin/env bash
33
- # Print to stdout
34
- pydantic-avro avro_to_pydantic --asvc /path/to/schema.asvc
35
-
36
- # Save it to a file
37
- pydantic-avro avro_to_pydantic --asvc /path/to/schema.asvc --output /path/to/output.py
38
- ```
@@ -1,38 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- from setuptools import setup
3
-
4
- package_dir = \
5
- {'': 'src'}
6
-
7
- packages = \
8
- ['pydantic_avro']
9
-
10
- package_data = \
11
- {'': ['*']}
12
-
13
- install_requires = \
14
- ['pydantic>=1.0.0']
15
-
16
- entry_points = \
17
- {'console_scripts': ['pydantic-avro = pydantic_avro.__main__:root_main']}
18
-
19
- setup_kwargs = {
20
- 'name': 'pydantic-avro',
21
- 'version': '0.0.3',
22
- 'description': 'Converting pydantic classes to avro schemas',
23
- 'long_description': '# pydantic-avro\n\nThis library can convert a pydantic class to a avro schema or generate python code from a avro schema.\n\n### Install\n\n```bash\npip install pydantic-avro\n```\n\n### Pydantic class to avro schema\n\n```python\nimport json\nfrom typing import Optional\n\nfrom pydantic_avro.base import AvroBase\n\nclass TestModel(AvroBase):\n key1: str\n key2: int\n key2: Optional[str]\n\nschema_dict: dict = TestModel.avro_schema()\nprint(json.dumps(schema_dict))\n\n```\n\n### Avro schema to pydantic\n\n```bash\n#!/usr/bin/env bash\n# Print to stdout\npydantic-avro avro_to_pydantic --asvc /path/to/schema.asvc\n\n# Save it to a file\npydantic-avro avro_to_pydantic --asvc /path/to/schema.asvc --output /path/to/output.py\n```\n',
24
- 'author': "Peter van 't Hof'",
25
- 'author_email': 'peter.vanthof@godatadriven.com',
26
- 'maintainer': None,
27
- 'maintainer_email': None,
28
- 'url': 'https://github.com/godatadriven/pydantic-avro',
29
- 'package_dir': package_dir,
30
- 'packages': packages,
31
- 'package_data': package_data,
32
- 'install_requires': install_requires,
33
- 'entry_points': entry_points,
34
- 'python_requires': '>=3.6.1,<4.0',
35
- }
36
-
37
-
38
- setup(**setup_kwargs)
@@ -1,107 +0,0 @@
1
- import json
2
- from typing import Optional, Union
3
-
4
-
5
- def avsc_to_pydatic(schema: dict) -> str:
6
- """Generate python code of pydantic of given Avro Schema"""
7
- if "type" not in schema or schema["type"] != "record":
8
- raise AttributeError("Type not supported")
9
- if "name" not in schema:
10
- raise AttributeError("Name is required")
11
- if "fields" not in schema:
12
- raise AttributeError("fields are required")
13
-
14
- classes = {}
15
-
16
- def get_python_type(t: Union[str, dict]) -> str:
17
- """Returns python type for given avro type"""
18
- optional = False
19
- if isinstance(t, str):
20
- if t == "string":
21
- py_type = "str"
22
- elif t == "long" or t == "int":
23
- py_type = "int"
24
- elif t == "boolean":
25
- py_type = "bool"
26
- elif t == "double" or t == "float":
27
- py_type = "float"
28
- elif t in classes:
29
- py_type = t
30
- else:
31
- raise NotImplementedError(f"Type {t} not supported yet")
32
- elif isinstance(t, list):
33
- if "null" in t:
34
- optional = True
35
- if len(t) > 2 or (not optional and len(t) > 1):
36
- raise NotImplementedError("Only a single type ia supported yet")
37
- c = t.copy()
38
- c.remove("null")
39
- py_type = get_python_type(c[0])
40
- elif t.get("logicalType") == "uuid":
41
- py_type = "UUID"
42
- elif t.get("logicalType") == "decimal":
43
- py_type = "Decimal"
44
- elif t.get("logicalType") == "timestamp-millis" or t.get("logicalType") == "timestamp-micros":
45
- py_type = "datetime"
46
- elif t.get("logicalType") == "time-millis" or t.get("logicalType") == "time-micros":
47
- py_type = "time"
48
- elif t.get("logicalType") == "date":
49
- py_type = "date"
50
- elif t.get("type") == "enum":
51
- # TODO: implement Python enum optional
52
- py_type = "str"
53
- elif t.get("type") == "string":
54
- py_type = "str"
55
- elif t.get("type") == "array":
56
- sub_type = get_python_type(t.get("items"))
57
- py_type = f"List[{sub_type}] = []"
58
- elif t.get("type") == "record":
59
- record_type_to_pydantic(t)
60
- py_type = t.get("name")
61
- else:
62
- raise NotImplementedError(f"Type {t} not supported yet")
63
- if optional:
64
- return f"Optional[{py_type}] = None"
65
- else:
66
- return py_type
67
-
68
- def record_type_to_pydantic(schema: dict):
69
- """Convert a single avro record type to a pydantic class"""
70
- name = schema["name"]
71
- current = f"class {name}(BaseModel):\n"
72
-
73
- for field in schema["fields"]:
74
- n = field["name"]
75
- t = get_python_type(field["type"])
76
- current += f" {n}: {t}\n"
77
- if len(schema["fields"]) == 0:
78
- current += " pass\n"
79
-
80
- classes[name] = current
81
-
82
- record_type_to_pydantic(schema)
83
-
84
- file_content = """
85
- from datetime import date, datetime, time
86
- from decimal import Decimal
87
- from typing import List, Optional
88
- from uuid import UUID
89
-
90
- from pydantic import BaseModel
91
-
92
-
93
- """
94
- file_content += "\n\n".join(classes.values())
95
-
96
- return file_content
97
-
98
-
99
- def convert_file(avsc_path: str, output_path: Optional[str] = None):
100
- with open(avsc_path, "r") as fh:
101
- avsc_dict = json.load(fh)
102
- file_content = avsc_to_pydatic(avsc_dict)
103
- if output_path is None:
104
- print(file_content)
105
- else:
106
- with open(output_path, "w") as fh:
107
- fh.write(file_content)
@@ -1,105 +0,0 @@
1
- from typing import List
2
-
3
- from pydantic import BaseModel
4
-
5
-
6
- class AvroBase(BaseModel):
7
- """This is base pydantic class that will add some methods"""
8
-
9
- @classmethod
10
- def avro_schema(cls) -> dict:
11
- """Return the avro schema for the pydantic class"""
12
- schema = cls.schema()
13
- return cls._avro_schema(schema)
14
-
15
- @staticmethod
16
- def _avro_schema(schema: dict) -> dict:
17
- """Return the avro schema for the given pydantic schema"""
18
-
19
- def get_definition(ref: str, schema: dict, prefix: str):
20
- """Reading definition of base schema for nested structs"""
21
- id = ref.replace("#/definitions/", "")
22
- d = schema.get("definitions", {}).get(id)
23
- if d is None:
24
- raise RuntimeError(f"Definition {id} does not exist")
25
- return get_fields(d, prefix)
26
-
27
- def get_type(key: str, value: dict, prefix: str) -> dict:
28
- """Returns a type of a single field"""
29
- if len(prefix) > 0:
30
- prefix += "_"
31
- t = value.get("type")
32
- f = value.get("format")
33
- r = value.get("$ref")
34
- avro_type_dict = {}
35
- if r is not None:
36
- prefix += key
37
- avro_type_dict["type"] = {
38
- "type": "record",
39
- "fields": get_definition(r, schema, prefix),
40
- # Name of the struct should be unique true the complete schema
41
- # Because of this the path in the schema is tracked and used as name for a nested struct/array
42
- "name": prefix + "_" + r.replace("#/definitions/", ""),
43
- }
44
- elif t == "array":
45
- items = value.get("items")
46
- tn = get_type(key, items, prefix)
47
- # If items in array are a object:
48
- if "$ref" in items:
49
- tn = tn["type"]
50
- # If items in array are a logicalType
51
- if isinstance(tn.get("type", {}), dict) and tn.get("type", {}).get("logicalType") is not None:
52
- tn = tn["type"]
53
- avro_type_dict["type"] = {"type": "array", "items": tn}
54
- elif t == "string" and f == "date-time":
55
- avro_type_dict["type"] = {
56
- "type": "long",
57
- "logicalType": "timestamp-micros",
58
- }
59
- elif t == "string" and f == "date":
60
- avro_type_dict["type"] = {
61
- "type": "int",
62
- "logicalType": "date",
63
- }
64
- elif t == "string" and f == "time":
65
- avro_type_dict["type"] = {
66
- "type": "long",
67
- "logicalType": "time-micros",
68
- }
69
- elif t == "string" and f == "uuid":
70
- avro_type_dict["type"] = {
71
- "type": "string",
72
- "logicalType": "uuid",
73
- }
74
- elif t == "string":
75
- avro_type_dict["type"] = "string"
76
- elif t == "number":
77
- avro_type_dict["type"] = "double"
78
- elif t == "integer":
79
- # integer in python can be a long
80
- avro_type_dict["type"] = "long"
81
- elif t == "boolean":
82
- avro_type_dict["type"] = "boolean"
83
- else:
84
- raise RuntimeError(f"Unknown type found: '{t}'")
85
- return avro_type_dict
86
-
87
- def get_fields(s: dict, prefix: str = "") -> List[dict]:
88
- """Return a list of fields of a struct"""
89
- fields = []
90
-
91
- required = s.get("required", [])
92
- for key, value in s.get("properties", {}).items():
93
- avro_type_dict = get_type(key, value, prefix)
94
- avro_type_dict["name"] = key
95
-
96
- if key not in required:
97
- avro_type_dict["type"] = [avro_type_dict["type"], "null"]
98
- avro_type_dict["default"] = None
99
-
100
- fields.append(avro_type_dict)
101
- return fields
102
-
103
- fields = get_fields(schema)
104
-
105
- return {"type": "record", "namespace": schema["title"], "name": schema["title"], "fields": fields}
File without changes