pydantic-avro 0.6.5__py3-none-any.whl → 0.7.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pydantic_avro/__init__.py CHANGED
@@ -0,0 +1 @@
1
+ from pydantic_avro.to_avro.base import AvroBase
pydantic_avro/__main__.py CHANGED
@@ -2,7 +2,7 @@ import argparse
2
2
  import sys
3
3
  from typing import List
4
4
 
5
- from pydantic_avro.avro_to_pydantic import convert_file
5
+ from pydantic_avro.from_avro.avro_to_pydantic import convert_file
6
6
 
7
7
 
8
8
  def main(input_args: List[str]):
pydantic_avro/base.py CHANGED
@@ -1,187 +1,2 @@
1
- from typing import Any, Dict, List, Optional
2
-
3
- from pydantic import BaseModel
4
- from pydantic.version import VERSION as PYDANTIC_VERSION
5
-
6
- PYDANTIC_V2 = PYDANTIC_VERSION.startswith("2.")
7
-
8
- DEFS_NAME = "$defs" if PYDANTIC_V2 else "definitions"
9
-
10
-
11
- class AvroBase(BaseModel):
12
- """This is base pydantic class that will add some methods"""
13
-
14
- @classmethod
15
- def avro_schema(cls, by_alias: bool = True, namespace: Optional[str] = None) -> dict:
16
- """
17
- Return the avro schema for the pydantic class
18
-
19
- :param by_alias: generate the schemas using the aliases defined, if any
20
- :param namespace: Provide an optional namespace string to use in schema generation
21
- :return: dict with the Avro Schema for the model
22
- """
23
- schema = cls.model_json_schema(by_alias=by_alias) if PYDANTIC_V2 else cls.schema(by_alias=by_alias)
24
-
25
- if namespace is None:
26
- # default namespace will be based on title
27
- namespace = schema["title"]
28
-
29
- return cls._avro_schema(schema, namespace)
30
-
31
- @staticmethod
32
- def _avro_schema(schema: dict, namespace: str) -> dict:
33
- """Return the avro schema for the given pydantic schema"""
34
-
35
- classes_seen = set()
36
-
37
- def get_definition(ref: str, schema: dict):
38
- """Reading definition of base schema for nested structs"""
39
- id = ref.replace(f"#/{DEFS_NAME}/", "")
40
- d = schema.get(DEFS_NAME, {}).get(id)
41
- if d is None:
42
- raise RuntimeError(f"Definition {id} does not exist")
43
- return d
44
-
45
- def get_type(value: dict) -> dict:
46
- """Returns a type of a single field"""
47
- t = value.get("type")
48
- f = value.get("format")
49
- r = value.get("$ref")
50
- a = value.get("additionalProperties")
51
- u = value.get("anyOf")
52
- minimum = value.get("minimum")
53
- maximum = value.get("maximum")
54
- avro_type_dict: Dict[str, Any] = {}
55
- if "default" in value:
56
- avro_type_dict["default"] = value.get("default")
57
- if "description" in value:
58
- avro_type_dict["doc"] = value.get("description")
59
- if "allOf" in value and len(value["allOf"]) == 1:
60
- r = value["allOf"][0]["$ref"]
61
- if u is not None:
62
- avro_type_dict["type"] = []
63
- for union_element in u:
64
- avro_type_dict["type"].append(get_type(union_element)["type"])
65
- elif r is not None:
66
- class_name = r.replace(f"#/{DEFS_NAME}/", "")
67
- if class_name in classes_seen:
68
- avro_type_dict["type"] = class_name
69
- else:
70
- d = get_definition(r, schema)
71
- if "enum" in d:
72
- avro_type_dict["type"] = {
73
- "type": "enum",
74
- "symbols": [str(v) for v in d["enum"]],
75
- "name": d["title"],
76
- }
77
- else:
78
- avro_type_dict["type"] = {
79
- "type": "record",
80
- "fields": get_fields(d),
81
- # Name of the struct should be unique true the complete schema
82
- # Because of this the path in the schema is tracked and used as name for a nested struct/array
83
- "name": class_name,
84
- }
85
-
86
- classes_seen.add(class_name)
87
- elif t == "array":
88
- items = value.get("items")
89
- tn = get_type(items)
90
- # If items in array are a object:
91
- if "$ref" in items:
92
- tn = tn["type"]
93
- # If items in array are a logicalType
94
- if (
95
- isinstance(tn, dict)
96
- and isinstance(tn.get("type", {}), dict)
97
- and tn.get("type", {}).get("logicalType") is not None
98
- ):
99
- tn = tn["type"]
100
- # If items in array are an array, the structure must be corrected
101
- if (
102
- isinstance(tn, dict)
103
- and isinstance(tn.get("type", {}), dict)
104
- and tn.get("type", {}).get("type") == "array"
105
- ):
106
- items = tn["type"]["items"]
107
- tn = {"type": "array", "items": items}
108
- avro_type_dict["type"] = {"type": "array", "items": tn}
109
- elif t == "string" and f == "date-time":
110
- avro_type_dict["type"] = {
111
- "type": "long",
112
- "logicalType": "timestamp-micros",
113
- }
114
- elif t == "string" and f == "date":
115
- avro_type_dict["type"] = {
116
- "type": "int",
117
- "logicalType": "date",
118
- }
119
- elif t == "string" and f == "time":
120
- avro_type_dict["type"] = {
121
- "type": "long",
122
- "logicalType": "time-micros",
123
- }
124
- elif t == "string" and f == "uuid":
125
- avro_type_dict["type"] = {
126
- "type": "string",
127
- "logicalType": "uuid",
128
- }
129
- elif t == "string" and f == "binary":
130
- avro_type_dict["type"] = "bytes"
131
- elif t == "string":
132
- avro_type_dict["type"] = "string"
133
- elif t == "number":
134
- avro_type_dict["type"] = "double"
135
- elif t == "integer":
136
- # integer in python can be a long, only if minimum and maximum value is set a int can be used
137
- if minimum is not None and minimum >= -(2**31) and maximum is not None and maximum <= (2**31 - 1):
138
- avro_type_dict["type"] = "int"
139
- else:
140
- avro_type_dict["type"] = "long"
141
- elif t == "boolean":
142
- avro_type_dict["type"] = "boolean"
143
- elif t == "null":
144
- avro_type_dict["type"] = "null"
145
- elif t == "object":
146
- if a is None:
147
- value_type = "string"
148
- else:
149
- value_type = get_type(a)
150
- if isinstance(value_type, dict) and len(value_type) == 1:
151
- value_type = value_type.get("type")
152
- avro_type_dict["type"] = {"type": "map", "values": value_type}
153
- else:
154
- raise NotImplementedError(
155
- f"Type '{t}' not support yet, "
156
- f"please report this at https://github.com/godatadriven/pydantic-avro/issues"
157
- )
158
- return avro_type_dict
159
-
160
- def get_fields(s: dict) -> List[dict]:
161
- """Return a list of fields of a struct"""
162
- fields = []
163
-
164
- required = s.get("required", [])
165
- for key, value in s.get("properties", {}).items():
166
- avro_type_dict = get_type(value)
167
- avro_type_dict["name"] = key
168
-
169
- if key not in required:
170
- if type(avro_type_dict["type"]) is list:
171
- if "null" not in avro_type_dict["type"]:
172
- avro_type_dict["type"].insert(0, "null")
173
- elif avro_type_dict.get("default") is None:
174
- avro_type_dict["type"] = ["null", avro_type_dict["type"]]
175
- avro_type_dict.setdefault("default", None)
176
-
177
- # null must be the first element in the list
178
- if type(avro_type_dict["type"]) is list and "null" in avro_type_dict["type"]:
179
- avro_type_dict["type"].remove("null")
180
- avro_type_dict["type"].insert(0, "null")
181
-
182
- fields.append(avro_type_dict)
183
- return fields
184
-
185
- fields = get_fields(schema)
186
-
187
- return {"type": "record", "namespace": namespace, "name": schema["title"], "fields": fields}
1
+ # For backwards compatability
2
+ from pydantic_avro.to_avro.base import AvroBase
File without changes
@@ -0,0 +1,48 @@
1
+ import json
2
+ from typing import Optional
3
+
4
+ from pydantic_avro.from_avro.class_registery import ClassRegistry
5
+ from pydantic_avro.from_avro.types import get_pydantic_type
6
+
7
+
8
+ def validate_schema(schema: dict) -> None:
9
+ if "type" not in schema:
10
+ raise AttributeError("Type not supported")
11
+ if "name" not in schema:
12
+ raise AttributeError("Name is required")
13
+ if "fields" not in schema:
14
+ raise AttributeError("Fields are required")
15
+
16
+
17
+ def avsc_to_pydantic(schema: dict) -> str:
18
+ """Generate python code of pydantic of given Avro Schema"""
19
+ # Ensures that state is clean from previous calls
20
+ ClassRegistry().clear()
21
+ validate_schema(schema)
22
+ get_pydantic_type(schema)
23
+
24
+ file_content = """
25
+ from datetime import date, datetime, time
26
+ from decimal import Decimal
27
+ from enum import Enum
28
+ from typing import List, Optional, Dict, Union
29
+ from uuid import UUID
30
+
31
+ from pydantic import BaseModel, Field
32
+
33
+
34
+ """
35
+ file_content += "\n\n".join(ClassRegistry().classes.values())
36
+
37
+ return file_content
38
+
39
+
40
+ def convert_file(avsc_path: str, output_path: Optional[str] = None):
41
+ with open(avsc_path, "r") as fh:
42
+ avsc_dict = json.load(fh)
43
+ file_content = avsc_to_pydantic(avsc_dict)
44
+ if output_path is None:
45
+ print(file_content)
46
+ else:
47
+ with open(output_path, "w") as fh:
48
+ fh.write(file_content)
@@ -0,0 +1,29 @@
1
+ class ClassRegistry:
2
+ """Singleton class to store generated Pydantic classes."""
3
+
4
+ _instance = None
5
+ _classes: dict = {}
6
+
7
+ def __new__(cls):
8
+ """Singleton implementation."""
9
+ if cls._instance is None:
10
+ cls._instance = super(ClassRegistry, cls).__new__(cls)
11
+ cls._instance._classes = {}
12
+ return cls._instance
13
+
14
+ def add_class(self, name: str, class_def: str):
15
+ """Add a class to the registry."""
16
+ self._classes[name] = class_def
17
+
18
+ @property
19
+ def classes(self) -> dict:
20
+ """Get all classes in the registry."""
21
+ return self._classes
22
+
23
+ def has_class(self, name: str) -> bool:
24
+ """Check if a class is in the registry."""
25
+ return name in self._classes.keys()
26
+
27
+ def clear(self):
28
+ """Clear all classes from the registry."""
29
+ self._classes.clear()
@@ -0,0 +1,149 @@
1
+ import json
2
+ from typing import Callable, Union
3
+
4
+ from pydantic_avro.from_avro.class_registery import ClassRegistry
5
+
6
+ LOGICAL_TYPES = {
7
+ "uuid": "UUID",
8
+ "decimal": "Decimal",
9
+ "timestamp-millis": "datetime",
10
+ "timestamp-micros": "datetime",
11
+ "time-millis": "time",
12
+ "time-micros": "time",
13
+ "date": "date",
14
+ }
15
+
16
+
17
+ AVRO_TO_PY_MAPPING = {
18
+ "string": "str",
19
+ "int": "int",
20
+ "long": "int",
21
+ "boolean": "bool",
22
+ "double": "float",
23
+ "float": "float",
24
+ "bytes": "bytes",
25
+ }
26
+
27
+
28
+ def list_type_handler(t: dict) -> str:
29
+ """Get the Python type of a given Avro list type"""
30
+ l = t["type"]
31
+ if "null" in l and len(l) == 2:
32
+ c = l.copy()
33
+ c.remove("null")
34
+ return f"Optional[{get_pydantic_type(c[0])}]"
35
+ if "null" in l:
36
+ return f"Optional[Union[{','.join([get_pydantic_type(e) for e in l if e != 'null'])}]]"
37
+ return f"Union[{','.join([get_pydantic_type(e) for e in l])}]"
38
+
39
+
40
+ def map_type_handler(t: dict) -> str:
41
+ """Get the Python type of a given Avro map type"""
42
+
43
+ avro_value_type = t["type"].get("values")
44
+
45
+ if avro_value_type is None:
46
+ raise AttributeError("Values are required for map type")
47
+
48
+ value_type = get_pydantic_type(avro_value_type)
49
+ return f"Dict[str, {value_type}]"
50
+
51
+
52
+ def logical_type_handler(t: dict) -> str:
53
+ """Get the Python type of a given Avro logical type"""
54
+ return LOGICAL_TYPES[t["type"]["logicalType"]]
55
+
56
+
57
+ def enum_type_handler(t: dict) -> str:
58
+ """Gets the enum type of a given Avro enum type and adds it to the class registry"""
59
+ name = t["type"].get("name")
60
+ if not ClassRegistry().has_class(name):
61
+ enum_class = f"class {name}(str, Enum):\n"
62
+ for s in t["type"].get("symbols"):
63
+ enum_class += f' {s} = "{s}"\n'
64
+ ClassRegistry().add_class(name, enum_class)
65
+ return name
66
+
67
+
68
+ def array_type_handler(t: dict) -> str:
69
+ """Get the Python type of a given Avro array type"""
70
+ if isinstance(t["type"], dict):
71
+ sub_type = get_pydantic_type(t["type"]["items"])
72
+ else:
73
+ sub_type = get_pydantic_type(t["items"])
74
+ return f"List[{sub_type}]"
75
+
76
+
77
+ def record_type_handler(t: dict) -> str:
78
+ """Gets the record type of a given Avro record type and adds it to the class registry"""
79
+ t = t["type"] if isinstance(t["type"], dict) else t
80
+ name = t["name"]
81
+ fields = t["fields"] if "fields" in t else t["type"]["fields"]
82
+ field_strings = [generate_field_string(field) for field in fields]
83
+ class_body = "\n".join(field_strings) if field_strings else " pass"
84
+ current = f"class {name}(BaseModel):\n{class_body}\n"
85
+ ClassRegistry().add_class(name, current)
86
+ return name
87
+
88
+
89
+ TYPE_HANDLERS = {
90
+ "list": list_type_handler,
91
+ "map": map_type_handler,
92
+ "logical": logical_type_handler,
93
+ "enum": enum_type_handler,
94
+ "array": array_type_handler,
95
+ "record": record_type_handler,
96
+ }
97
+
98
+
99
+ def generate_field_string(field: dict) -> str:
100
+ """Generate a string representing a field in the Pydantic model."""
101
+ n = field["name"]
102
+ t = get_pydantic_type(field)
103
+ default = field.get("default")
104
+ if field["type"] == "int" and "default" in field and isinstance(default, (bool, type(None))):
105
+ return f" {n}: {t} = Field({default}, ge=-2**31, le=(2**31 - 1))"
106
+ elif field["type"] == "int" and "default" in field:
107
+ return f" {n}: {t} = Field({json.dumps(default)}, ge=-2**31, le=(2**31 - 1))"
108
+ elif field["type"] == "int":
109
+ return f" {n}: {t} = Field(..., ge=-2**31, le=(2**31 - 1))"
110
+ elif "default" not in field:
111
+ return f" {n}: {t}"
112
+ elif isinstance(default, (bool, type(None))):
113
+ return f" {n}: {t} = {default}"
114
+ else:
115
+ return f" {n}: {t} = {json.dumps(default)}"
116
+
117
+
118
+ def get_pydantic_type(t: Union[str, dict]) -> str:
119
+ """Get the Pydantic type for a given Avro type"""
120
+ if isinstance(t, str):
121
+ t = {"type": t}
122
+
123
+ if isinstance(t["type"], str):
124
+ if ClassRegistry().has_class(t["type"]):
125
+ return t["type"]
126
+
127
+ if t["type"] in AVRO_TO_PY_MAPPING:
128
+ return AVRO_TO_PY_MAPPING[t["type"]]
129
+
130
+ return get_type_handler(t)(t)
131
+
132
+
133
+ def get_type_handler(t: dict) -> Callable:
134
+ """Get the handler for a given Avro type"""
135
+ h = None
136
+ t = t["type"]
137
+ if isinstance(t, str):
138
+ h = TYPE_HANDLERS.get(t)
139
+ elif isinstance(t, dict) and "logicalType" in t:
140
+ h = TYPE_HANDLERS.get("logical")
141
+ elif isinstance(t, dict) and "type" in t:
142
+ h = TYPE_HANDLERS.get(t["type"])
143
+ elif isinstance(t, list):
144
+ h = TYPE_HANDLERS.get("list")
145
+
146
+ if h:
147
+ return h
148
+
149
+ raise NotImplementedError(f"Type {t} not supported yet")
pydantic_avro/py.typed ADDED
File without changes
File without changes
@@ -0,0 +1,35 @@
1
+ from typing import Optional
2
+
3
+ from pydantic import BaseModel
4
+
5
+ from pydantic_avro.to_avro.config import PYDANTIC_V2
6
+ from pydantic_avro.to_avro.types import AvroTypeConverter
7
+
8
+
9
+ class AvroBase(BaseModel):
10
+ """This class provides functionality to convert a pydantic model to an Avro schema."""
11
+
12
+ @classmethod
13
+ def avro_schema(cls, by_alias: bool = True, namespace: Optional[str] = None) -> dict:
14
+ """Returns the avro schema for the pydantic class
15
+
16
+ :param by_alias: generate the schemas using the aliases defined, if any
17
+ :param namespace: Provide an optional namespace string to use in schema generation
18
+ :return: dict with the Avro Schema for the model
19
+ """
20
+ schema = cls.model_json_schema(by_alias=by_alias) if PYDANTIC_V2 else cls.schema(by_alias=by_alias)
21
+
22
+ if namespace is None:
23
+ # Default namespace will be based on title
24
+ namespace = schema["title"]
25
+
26
+ avro_type_handler = AvroTypeConverter(schema)
27
+
28
+ return cls._avro_schema(schema, namespace, avro_type_handler)
29
+
30
+ @staticmethod
31
+ def _avro_schema(schema: dict, namespace: str, avro_type_handler: AvroTypeConverter) -> dict:
32
+ """Return the avro schema for the given pydantic schema"""
33
+
34
+ fields = avro_type_handler.fields_to_avro_dicts(schema)
35
+ return {"type": "record", "namespace": namespace, "name": schema["title"], "fields": fields}
@@ -0,0 +1,4 @@
1
+ from pydantic import VERSION as PYDANTIC_VERSION
2
+
3
+ PYDANTIC_V2 = PYDANTIC_VERSION.startswith("2.")
4
+ DEFS_NAME = "$defs" if PYDANTIC_V2 else "definitions"
@@ -0,0 +1,202 @@
1
+ from typing import Any, Dict, List, Optional, Set
2
+
3
+ from pydantic_avro.to_avro.config import DEFS_NAME
4
+
5
+ STRING_TYPE_MAPPING = {
6
+ "date-time": {
7
+ "type": "long",
8
+ "logicalType": "timestamp-micros",
9
+ },
10
+ "date": {
11
+ "type": "int",
12
+ "logicalType": "date",
13
+ },
14
+ "time": {
15
+ "type": "long",
16
+ "logicalType": "time-micros",
17
+ },
18
+ "uuid": {
19
+ "type": "string",
20
+ "logicalType": "uuid",
21
+ },
22
+ "binary": "bytes",
23
+ }
24
+
25
+
26
+ def get_definition(ref: str, schema: dict):
27
+ """Reading definition of base schema for nested structs"""
28
+ id = ref.replace(f"#/{DEFS_NAME}/", "")
29
+ d = schema.get(DEFS_NAME, {}).get(id)
30
+ if d is None:
31
+ raise RuntimeError(f"Definition {id} does not exist")
32
+ return d
33
+
34
+
35
+ def set_nullability(avro_type_dict: dict) -> dict:
36
+ """Set the nullability of the field"""
37
+ if type(avro_type_dict["type"]) is list:
38
+ if "null" not in avro_type_dict["type"]:
39
+ avro_type_dict["type"].insert(0, "null")
40
+ elif avro_type_dict.get("default") is None:
41
+ avro_type_dict["type"] = ["null", avro_type_dict["type"]]
42
+ avro_type_dict.setdefault("default", None)
43
+ return avro_type_dict
44
+
45
+
46
+ def null_to_first_element(avro_type_dict: dict) -> dict:
47
+ """Set the null as the first element in the list as per avro schema requirements"""
48
+ if type(avro_type_dict["type"]) is list and "null" in avro_type_dict["type"]:
49
+ avro_type_dict["type"].remove("null")
50
+ avro_type_dict["type"].insert(0, "null")
51
+ return avro_type_dict
52
+
53
+
54
+ class AvroTypeConverter:
55
+ """Converts Pydantic schema to AVRO schema."""
56
+
57
+ def __init__(self, schema: dict):
58
+ self.root_schema = schema
59
+ self.classes_seen: Set[str] = set()
60
+
61
+ def fields_to_avro_dicts(self, parent_schema: dict) -> List[dict]:
62
+ """Converts fields from the schema to AVRO and returns them as a list of dictionaries.
63
+
64
+ :param parent_schema: The parent schema of the field (not the root schema for nested models)
65
+ """
66
+ fields = []
67
+
68
+ required = parent_schema.get("required", [])
69
+ for name, field_props in parent_schema.get("properties", {}).items():
70
+ avro_type_dict = self._get_avro_type_dict(field_props=field_props)
71
+ avro_type_dict["name"] = name
72
+ if name not in required:
73
+ set_nullability(avro_type_dict)
74
+ avro_type_dict = null_to_first_element(avro_type_dict)
75
+
76
+ fields.append(avro_type_dict)
77
+ return fields
78
+
79
+ def _get_avro_type_dict(self, field_props: dict) -> dict:
80
+ """Returns a type of a single field"""
81
+ avro_type_dict: Dict[str, Any] = {}
82
+ if "default" in field_props:
83
+ avro_type_dict["default"] = field_props.get("default")
84
+ if "description" in field_props:
85
+ avro_type_dict["doc"] = field_props.get("description")
86
+ avro_type_dict = self._get_avro_type(field_props, avro_type_dict)
87
+ return avro_type_dict
88
+
89
+ def _get_avro_type(self, field_props: dict, avro_type_dict: dict) -> dict:
90
+ """Returns AVRO type of a single field"""
91
+ t = field_props.get("type")
92
+ f = field_props.get("format")
93
+ r = field_props.get("$ref")
94
+ if "allOf" in field_props and len(field_props["allOf"]) == 1:
95
+ r = field_props["allOf"][0]["$ref"]
96
+ u = field_props.get("anyOf")
97
+
98
+ if u is not None:
99
+ return self._union_to_avro(u, avro_type_dict)
100
+ elif r is not None:
101
+ return self._handle_references(r, avro_type_dict)
102
+ elif t is None:
103
+ raise ValueError(f"Field '{field_props}' does not have a defined type.")
104
+ elif t == "array":
105
+ return self._array_to_avro(field_props, avro_type_dict)
106
+ elif t == "string":
107
+ avro_type_dict["type"] = self._string_to_avro(f)
108
+ elif t == "integer":
109
+ avro_type_dict["type"] = self._integer_to_avro(field_props)
110
+ elif t == "object":
111
+ avro_type_dict["type"] = self._object_to_avro(field_props)
112
+ elif t == "number":
113
+ avro_type_dict["type"] = "double"
114
+ elif t == "boolean":
115
+ avro_type_dict["type"] = "boolean"
116
+ elif t == "null":
117
+ avro_type_dict["type"] = "null"
118
+ else:
119
+ raise NotImplementedError(
120
+ f"Type '{t}' not support yet, "
121
+ f"please report this at https://github.com/godatadriven/pydantic-avro/issues"
122
+ )
123
+
124
+ return avro_type_dict
125
+
126
+ def _handle_references(self, r: str, avro_type_dict: dict) -> dict:
127
+ """Finds the type of a reference field"""
128
+ class_name = r.replace(f"#/{DEFS_NAME}/", "")
129
+ if class_name in self.classes_seen:
130
+ avro_type_dict["type"] = class_name
131
+ return avro_type_dict
132
+
133
+ d = get_definition(r, self.root_schema)
134
+ if "enum" in d:
135
+ avro_type_dict["type"] = {
136
+ "type": "enum",
137
+ "symbols": [str(v) for v in d["enum"]],
138
+ "name": d["title"],
139
+ }
140
+ else:
141
+ avro_type_dict["type"] = {
142
+ "type": "record",
143
+ "fields": self.fields_to_avro_dicts(d),
144
+ # Name of the struct should be unique to the complete schema
145
+ # Because of this the path in the schema is tracked and used as name for a nested struct/array
146
+ "name": class_name,
147
+ }
148
+
149
+ self.classes_seen.add(class_name)
150
+ return avro_type_dict
151
+
152
+ @staticmethod
153
+ def _string_to_avro(f: Optional[str]):
154
+ """Returns a type of a string field"""
155
+ if not f:
156
+ return "string"
157
+ return STRING_TYPE_MAPPING[f]
158
+
159
+ @staticmethod
160
+ def _integer_to_avro(field_props: dict) -> str:
161
+ """Returns a type of an integer field"""
162
+ minimum = field_props.get("minimum")
163
+ maximum = field_props.get("maximum")
164
+ # integer in python can be a long, only if minimum and maximum value is set an int can be used
165
+ if minimum is not None and minimum >= -(2**31) and maximum is not None and maximum <= (2**31 - 1):
166
+ return "int"
167
+ return "long"
168
+
169
+ def _object_to_avro(self, field_props: dict) -> dict:
170
+ """Returns a type of an object field"""
171
+ a = field_props.get("additionalProperties")
172
+ value_type = "string" if a is None else self._get_avro_type_dict(a)["type"]
173
+ return {"type": "map", "values": value_type}
174
+
175
+ def _union_to_avro(self, field_props: list, avro_type_dict: dict) -> dict:
176
+ """Returns a type of a union field"""
177
+ avro_type_dict["type"] = []
178
+ for union_element in field_props:
179
+ t = self._get_avro_type_dict(union_element)
180
+ avro_type_dict["type"].append(t["type"])
181
+ return avro_type_dict
182
+
183
+ def _array_to_avro(self, field_props: dict, avro_type_dict: dict) -> dict:
184
+ """Returns a type of an array field"""
185
+ items = field_props["items"]
186
+ tn = self._get_avro_type_dict(items)
187
+ # If items in array are an object:
188
+ if "$ref" in items:
189
+ tn = tn["type"]
190
+ # If items in array are a logicalType
191
+ if (
192
+ isinstance(tn, dict)
193
+ and isinstance(tn.get("type", {}), dict)
194
+ and tn.get("type", {}).get("logicalType") is not None
195
+ ):
196
+ tn = tn["type"]
197
+ # If items in array are an array, the structure must be corrected
198
+ if isinstance(tn, dict) and isinstance(tn.get("type", {}), dict) and tn.get("type", {}).get("type") == "array":
199
+ items = tn["type"]["items"]
200
+ tn = {"type": "array", "items": items}
201
+ avro_type_dict["type"] = {"type": "array", "items": tn}
202
+ return avro_type_dict
@@ -1,17 +1,15 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pydantic-avro
3
- Version: 0.6.5
3
+ Version: 0.7.1
4
4
  Summary: Converting pydantic classes to avro schemas
5
5
  Home-page: https://github.com/godatadriven/pydantic-avro
6
6
  License: MIT
7
7
  Keywords: pydantic,avro
8
8
  Author: Peter van 't Hof'
9
9
  Author-email: peter.vanthof@godatadriven.com
10
- Requires-Python: >=3.7,<4.0
10
+ Requires-Python: >=3.8.1,<4.0
11
11
  Classifier: License :: OSI Approved :: MIT License
12
12
  Classifier: Programming Language :: Python :: 3
13
- Classifier: Programming Language :: Python :: 3.7
14
- Classifier: Programming Language :: Python :: 3.8
15
13
  Classifier: Programming Language :: Python :: 3.9
16
14
  Classifier: Programming Language :: Python :: 3.10
17
15
  Classifier: Programming Language :: Python :: 3.11
@@ -42,11 +40,13 @@ from typing import Optional
42
40
 
43
41
  from pydantic_avro.base import AvroBase
44
42
 
43
+
45
44
  class TestModel(AvroBase):
46
45
  key1: str
47
46
  key2: int
48
47
  key2: Optional[str]
49
48
 
49
+
50
50
  schema_dict: dict = TestModel.avro_schema()
51
51
  print(json.dumps(schema_dict))
52
52
 
@@ -0,0 +1,18 @@
1
+ LICENSE,sha256=gBlYCG1yxb0vGlsmek0lMPVOK5YDxQope4F54jzeqoY,1069
2
+ pydantic_avro/__init__.py,sha256=P4vaozEL8Rea7xWdB6ENj3DNF2RPPOWascfwbtQS7gE,48
3
+ pydantic_avro/__main__.py,sha256=-AL9FNYsAdFmilm-96MbbxymTW6QminXPCBZJ1nkqBE,695
4
+ pydantic_avro/base.py,sha256=i1wco_T6FXFdtlxzbzXt3Jv93TmTzTfJ493KCfZh1dc,78
5
+ pydantic_avro/from_avro/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ pydantic_avro/from_avro/avro_to_pydantic.py,sha256=FOEd_6TqoHSlLALgyveb456B3f7TEPGw2ByRgRcigqc,1356
7
+ pydantic_avro/from_avro/class_registery.py,sha256=n_8yELp-Eux9bAs4CJOV78bQ061VV_-2FuvowOQpWCg,864
8
+ pydantic_avro/from_avro/types.py,sha256=yMpUXHr-06VQwECr3zwVITy9RzNbtVrs7oNbcXYQF7M,4663
9
+ pydantic_avro/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
+ pydantic_avro/to_avro/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
+ pydantic_avro/to_avro/base.py,sha256=Y8QOGTpXO3BwJ6arrRK-k2JniK5kLTvlDpQ0PyZojmg,1409
12
+ pydantic_avro/to_avro/config.py,sha256=R9HLkWiXs7YZ02te8CQt5Kf2rbsE8Fx-hjqv3tBSFJU,152
13
+ pydantic_avro/to_avro/types.py,sha256=gY-4ay0IF1dDIxtVsLyzH-AkzcE_dHpnyCrd00cj3TE,7755
14
+ pydantic_avro-0.7.1.dist-info/entry_points.txt,sha256=gwHiQfbGLO8Np2sa1bZ_bpxU7sEufx6IachViBE_Fnw,66
15
+ pydantic_avro-0.7.1.dist-info/LICENSE,sha256=gBlYCG1yxb0vGlsmek0lMPVOK5YDxQope4F54jzeqoY,1069
16
+ pydantic_avro-0.7.1.dist-info/WHEEL,sha256=vxFmldFsRN_Hx10GDvsdv1wroKq8r5Lzvjp6GZ4OO8c,88
17
+ pydantic_avro-0.7.1.dist-info/METADATA,sha256=61FXueogn8guvvq5Of_UdSv88e9xibKsYSkrbxkL7vw,2571
18
+ pydantic_avro-0.7.1.dist-info/RECORD,,
@@ -1,138 +0,0 @@
1
- import json
2
- from typing import Optional, Union
3
-
4
-
5
- def avsc_to_pydantic(schema: dict) -> str:
6
- """Generate python code of pydantic of given Avro Schema"""
7
- if "type" not in schema or schema["type"] != "record":
8
- raise AttributeError("Type not supported")
9
- if "name" not in schema:
10
- raise AttributeError("Name is required")
11
- if "fields" not in schema:
12
- raise AttributeError("fields are required")
13
-
14
- classes = {}
15
-
16
- def get_python_type(t: Union[str, dict]) -> str:
17
- """Returns python type for given avro type"""
18
- optional = False
19
- if isinstance(t, str):
20
- if t == "string":
21
- py_type = "str"
22
- elif t == "int":
23
- py_type = "int"
24
- elif t == "long":
25
- py_type = "int"
26
- elif t == "boolean":
27
- py_type = "bool"
28
- elif t == "double" or t == "float":
29
- py_type = "float"
30
- elif t == "bytes":
31
- py_type = "bytes"
32
- elif t in classes:
33
- py_type = t
34
- else:
35
- raise NotImplementedError(f"Type {t} not supported yet")
36
- elif isinstance(t, list):
37
- if "null" in t and len(t) == 2:
38
- optional = True
39
- c = t.copy()
40
- c.remove("null")
41
- py_type = get_python_type(c[0])
42
- else:
43
- if "null" in t:
44
- py_type = f"Optional[Union[{','.join([ get_python_type(e) for e in t if e != 'null'])}]]"
45
- else:
46
- py_type = f"Union[{','.join([ get_python_type(e) for e in t])}]"
47
- elif t.get("logicalType") == "uuid":
48
- py_type = "UUID"
49
- elif t.get("logicalType") == "decimal":
50
- py_type = "Decimal"
51
- elif t.get("logicalType") == "timestamp-millis" or t.get("logicalType") == "timestamp-micros":
52
- py_type = "datetime"
53
- elif t.get("logicalType") == "time-millis" or t.get("logicalType") == "time-micros":
54
- py_type = "time"
55
- elif t.get("logicalType") == "date":
56
- py_type = "date"
57
- elif t.get("type") == "enum":
58
- enum_name = t.get("name")
59
- if enum_name not in classes:
60
- enum_class = f"class {enum_name}(str, Enum):\n"
61
- for s in t.get("symbols"):
62
- enum_class += f' {s} = "{s}"\n'
63
- classes[enum_name] = enum_class
64
- py_type = enum_name
65
- elif t.get("type") == "string":
66
- py_type = "str"
67
- elif t.get("type") == "array":
68
- sub_type = get_python_type(t.get("items"))
69
- py_type = f"List[{sub_type}]"
70
- elif t.get("type") == "record":
71
- record_type_to_pydantic(t)
72
- py_type = t.get("name")
73
- elif t.get("type") == "map":
74
- value_type = get_python_type(t.get("values"))
75
- py_type = f"Dict[str, {value_type}]"
76
- else:
77
- raise NotImplementedError(
78
- f"Type {t} not supported yet, "
79
- f"please report this at https://github.com/godatadriven/pydantic-avro/issues"
80
- )
81
- if optional:
82
- return f"Optional[{py_type}]"
83
- else:
84
- return py_type
85
-
86
- def record_type_to_pydantic(schema: dict):
87
- """Convert a single avro record type to a pydantic class"""
88
- name = schema["name"]
89
- current = f"class {name}(BaseModel):\n"
90
-
91
- for field in schema["fields"]:
92
- n = field["name"]
93
- t = get_python_type(field["type"])
94
- default = field.get("default")
95
- if field["type"] == "int" and "default" in field and isinstance(default, (bool, type(None))):
96
- current += f" {n}: {t} = Field({default}, ge=-2**31, le=(2**31 - 1))\n"
97
- elif field["type"] == "int" and "default" in field:
98
- current += f" {n}: {t} = Field({json.dumps(default)}, ge=-2**31, le=(2**31 - 1))\n"
99
- elif field["type"] == "int":
100
- current += f" {n}: {t} = Field(..., ge=-2**31, le=(2**31 - 1))\n"
101
- elif "default" not in field:
102
- current += f" {n}: {t}\n"
103
- elif isinstance(default, (bool, type(None))):
104
- current += f" {n}: {t} = {default}\n"
105
- else:
106
- current += f" {n}: {t} = {json.dumps(default)}\n"
107
- if len(schema["fields"]) == 0:
108
- current += " pass\n"
109
-
110
- classes[name] = current
111
-
112
- record_type_to_pydantic(schema)
113
-
114
- file_content = """
115
- from datetime import date, datetime, time
116
- from decimal import Decimal
117
- from enum import Enum
118
- from typing import List, Optional, Dict, Union
119
- from uuid import UUID
120
-
121
- from pydantic import BaseModel, Field
122
-
123
-
124
- """
125
- file_content += "\n\n".join(classes.values())
126
-
127
- return file_content
128
-
129
-
130
- def convert_file(avsc_path: str, output_path: Optional[str] = None):
131
- with open(avsc_path, "r") as fh:
132
- avsc_dict = json.load(fh)
133
- file_content = avsc_to_pydantic(avsc_dict)
134
- if output_path is None:
135
- print(file_content)
136
- else:
137
- with open(output_path, "w") as fh:
138
- fh.write(file_content)
@@ -1,10 +0,0 @@
1
- LICENSE,sha256=gBlYCG1yxb0vGlsmek0lMPVOK5YDxQope4F54jzeqoY,1069
2
- pydantic_avro/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
- pydantic_avro/__main__.py,sha256=O0GAVHLzCYv3rSNpOv6JQ0xXjIe0k_76ZWSw7YpAlvw,685
4
- pydantic_avro/avro_to_pydantic.py,sha256=6l7XcD0AXfnR_8UqE1RfnLs263n_r9DUmWfIV8F5Osw,5180
5
- pydantic_avro/base.py,sha256=RNCOXgrFZEeboUjJctRRy6An_V0AIOuFDzbLJmt74xs,8032
6
- pydantic_avro-0.6.5.dist-info/entry_points.txt,sha256=gwHiQfbGLO8Np2sa1bZ_bpxU7sEufx6IachViBE_Fnw,66
7
- pydantic_avro-0.6.5.dist-info/LICENSE,sha256=gBlYCG1yxb0vGlsmek0lMPVOK5YDxQope4F54jzeqoY,1069
8
- pydantic_avro-0.6.5.dist-info/WHEEL,sha256=vxFmldFsRN_Hx10GDvsdv1wroKq8r5Lzvjp6GZ4OO8c,88
9
- pydantic_avro-0.6.5.dist-info/METADATA,sha256=bpH0VeOOSWZd9kdMkp9cJakD8TC1zM9vbAdlfKPXIXQ,2667
10
- pydantic_avro-0.6.5.dist-info/RECORD,,