avrotize 2.21.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. avrotize/__init__.py +66 -0
  2. avrotize/__main__.py +6 -0
  3. avrotize/_version.py +34 -0
  4. avrotize/asn1toavro.py +160 -0
  5. avrotize/avrotize.py +152 -0
  6. avrotize/avrotocpp/CMakeLists.txt.jinja +77 -0
  7. avrotize/avrotocpp/build.bat.jinja +7 -0
  8. avrotize/avrotocpp/build.sh.jinja +7 -0
  9. avrotize/avrotocpp/dataclass_body.jinja +108 -0
  10. avrotize/avrotocpp/vcpkg.json.jinja +21 -0
  11. avrotize/avrotocpp.py +483 -0
  12. avrotize/avrotocsharp/README.md.jinja +166 -0
  13. avrotize/avrotocsharp/class_test.cs.jinja +266 -0
  14. avrotize/avrotocsharp/dataclass_core.jinja +293 -0
  15. avrotize/avrotocsharp/enum_test.cs.jinja +20 -0
  16. avrotize/avrotocsharp/project.csproj.jinja +30 -0
  17. avrotize/avrotocsharp/project.sln.jinja +34 -0
  18. avrotize/avrotocsharp/run_coverage.ps1.jinja +98 -0
  19. avrotize/avrotocsharp/run_coverage.sh.jinja +149 -0
  20. avrotize/avrotocsharp/testproject.csproj.jinja +19 -0
  21. avrotize/avrotocsharp.py +1180 -0
  22. avrotize/avrotocsv.py +121 -0
  23. avrotize/avrotodatapackage.py +173 -0
  24. avrotize/avrotodb.py +1383 -0
  25. avrotize/avrotogo/go_enum.jinja +12 -0
  26. avrotize/avrotogo/go_helpers.jinja +31 -0
  27. avrotize/avrotogo/go_struct.jinja +151 -0
  28. avrotize/avrotogo/go_test.jinja +47 -0
  29. avrotize/avrotogo/go_union.jinja +38 -0
  30. avrotize/avrotogo.py +476 -0
  31. avrotize/avrotographql.py +197 -0
  32. avrotize/avrotoiceberg.py +210 -0
  33. avrotize/avrotojava/class_test.java.jinja +212 -0
  34. avrotize/avrotojava/enum_test.java.jinja +21 -0
  35. avrotize/avrotojava/testproject.pom.jinja +54 -0
  36. avrotize/avrotojava.py +2156 -0
  37. avrotize/avrotojs.py +250 -0
  38. avrotize/avrotojsons.py +481 -0
  39. avrotize/avrotojstruct.py +345 -0
  40. avrotize/avrotokusto.py +364 -0
  41. avrotize/avrotomd/README.md.jinja +49 -0
  42. avrotize/avrotomd.py +137 -0
  43. avrotize/avrotools.py +168 -0
  44. avrotize/avrotoparquet.py +208 -0
  45. avrotize/avrotoproto.py +359 -0
  46. avrotize/avrotopython/dataclass_core.jinja +241 -0
  47. avrotize/avrotopython/enum_core.jinja +87 -0
  48. avrotize/avrotopython/pyproject_toml.jinja +18 -0
  49. avrotize/avrotopython/test_class.jinja +97 -0
  50. avrotize/avrotopython/test_enum.jinja +23 -0
  51. avrotize/avrotopython.py +626 -0
  52. avrotize/avrotorust/dataclass_enum.rs.jinja +74 -0
  53. avrotize/avrotorust/dataclass_struct.rs.jinja +204 -0
  54. avrotize/avrotorust/dataclass_union.rs.jinja +105 -0
  55. avrotize/avrotorust.py +435 -0
  56. avrotize/avrotots/class_core.ts.jinja +140 -0
  57. avrotize/avrotots/class_test.ts.jinja +77 -0
  58. avrotize/avrotots/enum_core.ts.jinja +46 -0
  59. avrotize/avrotots/gitignore.jinja +34 -0
  60. avrotize/avrotots/index.ts.jinja +0 -0
  61. avrotize/avrotots/package.json.jinja +23 -0
  62. avrotize/avrotots/tsconfig.json.jinja +21 -0
  63. avrotize/avrotots.py +687 -0
  64. avrotize/avrotoxsd.py +344 -0
  65. avrotize/cddltostructure.py +1841 -0
  66. avrotize/commands.json +3496 -0
  67. avrotize/common.py +834 -0
  68. avrotize/constants.py +87 -0
  69. avrotize/csvtoavro.py +132 -0
  70. avrotize/datapackagetoavro.py +76 -0
  71. avrotize/dependencies/cpp/vcpkg/vcpkg.json +19 -0
  72. avrotize/dependencies/cs/net90/dependencies.csproj +29 -0
  73. avrotize/dependencies/go/go121/go.mod +6 -0
  74. avrotize/dependencies/java/jdk21/pom.xml +91 -0
  75. avrotize/dependencies/python/py312/requirements.txt +13 -0
  76. avrotize/dependencies/rust/stable/Cargo.toml +17 -0
  77. avrotize/dependencies/typescript/node22/package.json +16 -0
  78. avrotize/dependency_resolver.py +348 -0
  79. avrotize/dependency_version.py +432 -0
  80. avrotize/generic/generic.avsc +57 -0
  81. avrotize/jsonstoavro.py +2167 -0
  82. avrotize/jsonstostructure.py +2864 -0
  83. avrotize/jstructtoavro.py +878 -0
  84. avrotize/kstructtoavro.py +93 -0
  85. avrotize/kustotoavro.py +455 -0
  86. avrotize/openapitostructure.py +717 -0
  87. avrotize/parquettoavro.py +157 -0
  88. avrotize/proto2parser.py +498 -0
  89. avrotize/proto3parser.py +403 -0
  90. avrotize/prototoavro.py +382 -0
  91. avrotize/prototypes/any.avsc +19 -0
  92. avrotize/prototypes/api.avsc +106 -0
  93. avrotize/prototypes/duration.avsc +20 -0
  94. avrotize/prototypes/field_mask.avsc +18 -0
  95. avrotize/prototypes/struct.avsc +60 -0
  96. avrotize/prototypes/timestamp.avsc +20 -0
  97. avrotize/prototypes/type.avsc +253 -0
  98. avrotize/prototypes/wrappers.avsc +117 -0
  99. avrotize/structuretocddl.py +597 -0
  100. avrotize/structuretocpp/CMakeLists.txt.jinja +76 -0
  101. avrotize/structuretocpp/build.bat.jinja +3 -0
  102. avrotize/structuretocpp/build.sh.jinja +3 -0
  103. avrotize/structuretocpp/dataclass_body.jinja +50 -0
  104. avrotize/structuretocpp/vcpkg.json.jinja +11 -0
  105. avrotize/structuretocpp.py +697 -0
  106. avrotize/structuretocsharp/class_test.cs.jinja +180 -0
  107. avrotize/structuretocsharp/dataclass_core.jinja +156 -0
  108. avrotize/structuretocsharp/enum_test.cs.jinja +36 -0
  109. avrotize/structuretocsharp/json_structure_converters.cs.jinja +399 -0
  110. avrotize/structuretocsharp/program.cs.jinja +49 -0
  111. avrotize/structuretocsharp/project.csproj.jinja +17 -0
  112. avrotize/structuretocsharp/project.sln.jinja +34 -0
  113. avrotize/structuretocsharp/testproject.csproj.jinja +18 -0
  114. avrotize/structuretocsharp/tuple_converter.cs.jinja +121 -0
  115. avrotize/structuretocsharp.py +2295 -0
  116. avrotize/structuretocsv.py +365 -0
  117. avrotize/structuretodatapackage.py +659 -0
  118. avrotize/structuretodb.py +1125 -0
  119. avrotize/structuretogo/go_enum.jinja +12 -0
  120. avrotize/structuretogo/go_helpers.jinja +26 -0
  121. avrotize/structuretogo/go_interface.jinja +18 -0
  122. avrotize/structuretogo/go_struct.jinja +187 -0
  123. avrotize/structuretogo/go_test.jinja +70 -0
  124. avrotize/structuretogo.py +729 -0
  125. avrotize/structuretographql.py +502 -0
  126. avrotize/structuretoiceberg.py +355 -0
  127. avrotize/structuretojava/choice_core.jinja +34 -0
  128. avrotize/structuretojava/class_core.jinja +23 -0
  129. avrotize/structuretojava/enum_core.jinja +18 -0
  130. avrotize/structuretojava/equals_hashcode.jinja +30 -0
  131. avrotize/structuretojava/pom.xml.jinja +26 -0
  132. avrotize/structuretojava/tuple_core.jinja +49 -0
  133. avrotize/structuretojava.py +938 -0
  134. avrotize/structuretojs/class_core.js.jinja +33 -0
  135. avrotize/structuretojs/enum_core.js.jinja +10 -0
  136. avrotize/structuretojs/package.json.jinja +12 -0
  137. avrotize/structuretojs/test_class.js.jinja +84 -0
  138. avrotize/structuretojs/test_enum.js.jinja +58 -0
  139. avrotize/structuretojs/test_runner.js.jinja +45 -0
  140. avrotize/structuretojs.py +657 -0
  141. avrotize/structuretojsons.py +498 -0
  142. avrotize/structuretokusto.py +639 -0
  143. avrotize/structuretomd/README.md.jinja +204 -0
  144. avrotize/structuretomd.py +322 -0
  145. avrotize/structuretoproto.py +764 -0
  146. avrotize/structuretopython/dataclass_core.jinja +363 -0
  147. avrotize/structuretopython/enum_core.jinja +45 -0
  148. avrotize/structuretopython/map_alias.jinja +21 -0
  149. avrotize/structuretopython/pyproject_toml.jinja +23 -0
  150. avrotize/structuretopython/test_class.jinja +103 -0
  151. avrotize/structuretopython/test_enum.jinja +34 -0
  152. avrotize/structuretopython.py +799 -0
  153. avrotize/structuretorust/dataclass_enum.rs.jinja +63 -0
  154. avrotize/structuretorust/dataclass_struct.rs.jinja +121 -0
  155. avrotize/structuretorust/dataclass_union.rs.jinja +81 -0
  156. avrotize/structuretorust.py +714 -0
  157. avrotize/structuretots/class_core.ts.jinja +78 -0
  158. avrotize/structuretots/enum_core.ts.jinja +6 -0
  159. avrotize/structuretots/gitignore.jinja +8 -0
  160. avrotize/structuretots/index.ts.jinja +1 -0
  161. avrotize/structuretots/package.json.jinja +39 -0
  162. avrotize/structuretots/test_class.ts.jinja +35 -0
  163. avrotize/structuretots/tsconfig.json.jinja +21 -0
  164. avrotize/structuretots.py +740 -0
  165. avrotize/structuretoxsd.py +679 -0
  166. avrotize/xsdtoavro.py +413 -0
  167. avrotize-2.21.1.dist-info/METADATA +1319 -0
  168. avrotize-2.21.1.dist-info/RECORD +171 -0
  169. avrotize-2.21.1.dist-info/WHEEL +4 -0
  170. avrotize-2.21.1.dist-info/entry_points.txt +3 -0
  171. avrotize-2.21.1.dist-info/licenses/LICENSE +201 -0
avrotize/avrotools.py ADDED
@@ -0,0 +1,168 @@
1
+ """ Avro Tools Module """
2
+
3
+ import json
4
+ import hashlib
5
+ import base64
6
+ from typing import Dict, List, cast
7
+
8
+ JsonNode = Dict[str, 'JsonNode'] | List['JsonNode'] | str | int | bool | None
9
+
10
+ def transform_to_pcf(schema_json: str) -> str:
11
+ """
12
+ Transforms an Avro schema into its Parsing Canonical Form (PCF).
13
+
14
+ :param schema_json: The Avro schema as a JSON string.
15
+ :return: The Parsing Canonical Form (PCF) as a JSON string.
16
+ """
17
+ schema = json.loads(schema_json)
18
+ canonical_schema = canonicalize_schema(schema)
19
+ return json.dumps(canonical_schema, separators=(',', ':'))
20
+
21
+ def avsc_to_pcf(schema_file: str) -> None:
22
+ """ Convert an Avro schema file to its Parsing Canonical Form (PCF)."""
23
+ with open(schema_file, 'r', encoding='utf-8') as file:
24
+ schema = json.load(file)
25
+ print(transform_to_pcf(json.dumps(schema)))
26
+
27
+ def canonicalize_schema(schema: JsonNode, namespace:str="") -> JsonNode:
28
+ """
29
+ Recursively processes the schema to convert it to the Parsing Canonical Form (PCF).
30
+
31
+ :param schema: The Avro schema as a dictionary.
32
+ :param namespace: The current namespace for resolving names.
33
+ :return: The canonicalized schema as a dictionary.
34
+ """
35
+ if isinstance(schema, str):
36
+ return schema
37
+ elif isinstance(schema, dict):
38
+ if 'type' in schema and isinstance(schema['type'], str):
39
+ if schema['type'] in PRIMITIVE_TYPES:
40
+ return schema['type']
41
+ if '.' not in schema['type'] and namespace:
42
+ schema['type'] = namespace + '.' + schema['type']
43
+
44
+ if 'name' in schema and '.' not in cast(str,schema['name']) and namespace:
45
+ schema['name'] = namespace + '.' + cast(str,schema['name'])
46
+
47
+ canonical = {}
48
+ for field in FIELD_ORDER:
49
+ if field in schema:
50
+ value = schema[field]
51
+ if field == 'fields' and isinstance(value, list):
52
+ value = [canonicalize_schema(f, cast(str,schema.get('namespace', namespace))) for f in value]
53
+ elif field == 'symbols' or field == 'items' or field == 'values':
54
+ value = canonicalize_schema(value, namespace)
55
+ elif isinstance(value, dict):
56
+ value = canonicalize_schema(value, namespace)
57
+ elif isinstance(value, list):
58
+ value = [canonicalize_schema(v, namespace) for v in value]
59
+ elif isinstance(value, str):
60
+ value = normalize_string(value)
61
+ elif isinstance(value, int):
62
+ value = normalize_integer(value)
63
+ canonical[field] = value
64
+ return canonical
65
+ elif isinstance(schema, list):
66
+ return [canonicalize_schema(s, namespace) for s in schema]
67
+ raise ValueError("Invalid schema: " + str(schema))
68
+
69
+ def normalize_string(value):
70
+ """
71
+ Normalizes JSON string literals by replacing escaped characters with their UTF-8 equivalents.
72
+
73
+ :param value: The string value to normalize.
74
+ :return: The normalized string.
75
+ """
76
+ return value.encode('utf-8').decode('unicode_escape')
77
+
78
+ def normalize_integer(value):
79
+ """
80
+ Normalizes JSON integer literals by removing leading zeros.
81
+
82
+ :param value: The integer value to normalize.
83
+ :return: The normalized integer.
84
+ """
85
+ return int(value)
86
+
87
+ def fingerprint_sha256(schema_json):
88
+ """
89
+ Generates a SHA-256 fingerprint for the given Avro schema.
90
+
91
+ :param schema_json: The Avro schema as a JSON string.
92
+ :return: The SHA-256 fingerprint as a base64 string.
93
+ """
94
+ pcf = transform_to_pcf(schema_json)
95
+ sha256_hash = hashlib.sha256(pcf.encode('utf-8')).digest()
96
+ return base64.b64encode(sha256_hash).decode('utf-8')
97
+
98
+ def fingerprint_md5(schema_json):
99
+ """
100
+ Generates an MD5 fingerprint for the given Avro schema.
101
+
102
+ :param schema_json: The Avro schema as a JSON string.
103
+ :return: The MD5 fingerprint as a base64 string.
104
+ """
105
+ pcf = transform_to_pcf(schema_json)
106
+ md5_hash = hashlib.md5(pcf.encode('utf-8')).digest()
107
+ return base64.b64encode(md5_hash).decode('utf-8')
108
+
109
+ def fingerprint_rabin(schema_json):
110
+ """
111
+ Generates a 64-bit Rabin fingerprint for the given Avro schema.
112
+
113
+ :param schema_json: The Avro schema as a JSON string.
114
+ :return: The Rabin fingerprint as a base64 string.
115
+ """
116
+ pcf = transform_to_pcf(schema_json).encode('utf-8')
117
+ fp = fingerprint64(pcf)
118
+ return base64.b64encode(fp.to_bytes(8, 'big')).decode('utf-8')
119
+
120
+ def fingerprint64(buf):
121
+ """
122
+ Computes a 64-bit Rabin fingerprint.
123
+
124
+ :param buf: The input byte buffer.
125
+ :return: The 64-bit Rabin fingerprint.
126
+ """
127
+ if FP_TABLE is None:
128
+ init_fp_table()
129
+ fp = EMPTY
130
+ for byte in buf:
131
+ fp = (fp >> 8) ^ FP_TABLE[(fp ^ byte) & 0xff]
132
+ return fp
133
+
134
+ def init_fp_table():
135
+ """
136
+ Initializes the fingerprint table for the Rabin fingerprint algorithm.
137
+ """
138
+ global FP_TABLE
139
+ FP_TABLE = []
140
+ for i in range(256):
141
+ fp = i
142
+ for _ in range(8):
143
+ fp = (fp >> 1) ^ (EMPTY & -(fp & 1))
144
+ FP_TABLE.append(fp)
145
+
146
+ PRIMITIVE_TYPES = {"null", "boolean", "int", "long", "float", "double", "bytes", "string"}
147
+ FIELD_ORDER = ["name", "type", "fields", "symbols", "items", "values", "size"]
148
+
149
+ EMPTY = 0xc15d213aa4d7a795
150
+ FP_TABLE = None
151
+
152
+ class PCFSchemaResult:
153
+ def __init__(self, pcf: str, sha256: str, md5: str, rabin: str) -> None:
154
+ self.pcf = pcf
155
+ self.sha256 = sha256
156
+ self.md5 = md5
157
+ self.rabin = rabin
158
+
159
+ def pcf_schema(schema_json):
160
+ """
161
+ Wrapper function to provide PCF transformation and fingerprinting.
162
+
163
+ :param schema_json: The Avro schema as a JSON string.
164
+ :return: An instance of the PCFSchemaResult class containing the PCF and fingerprints (SHA-256, MD5, and Rabin) as base64 strings.
165
+ """
166
+ pcf = transform_to_pcf(schema_json)
167
+ return PCFSchemaResult(pcf, fingerprint_sha256(schema_json), fingerprint_md5(schema_json), fingerprint_rabin(schema_json))
168
+
@@ -0,0 +1,208 @@
1
+ """ Convert an Avro schema to a Parquet schema. """
2
+
3
+ import json
4
+ import sys
5
+ from typing import Dict, List
6
+ import pyarrow as pa
7
+ import pyarrow.parquet as pq
8
+
9
+ JsonNode = Dict[str, 'JsonNode'] | List['JsonNode'] | str | bool | int | None
10
+
11
+
12
+ class AvroToParquetConverter:
13
+ """ Class to convert Avro schema to Parquet schema."""
14
+
15
+ def __init__(self: 'AvroToParquetConverter'):
16
+ self.named_type_cache: Dict[str, JsonNode] = {}
17
+
18
+ def get_fullname(self, namespace: str, name: str) -> str:
19
+ """ Get the full name of a record type."""
20
+ return f"{namespace}.{name}" if namespace else name
21
+
22
+ def convert_avro_to_parquet(self, avro_schema_path, avro_record_type, parquet_file_path, emit_cloudevents_columns=False):
23
+ """ Convert an Avro schema to a Parquet schema."""
24
+ schema_file = avro_schema_path
25
+ if not schema_file:
26
+ print("Please specify the avro schema file")
27
+ sys.exit(1)
28
+ with open(schema_file, "r", encoding="utf-8") as f:
29
+ schema_json = f.read()
30
+
31
+ # Parse the schema as a JSON object
32
+ schema = json.loads(schema_json)
33
+ self.cache_named_types(schema)
34
+
35
+ if isinstance(schema, list) and avro_record_type:
36
+ schema = next(
37
+ (x for x in schema if x["name"] == avro_record_type or x["namespace"]+"."+x["name"] == avro_record_type), None)
38
+ if schema is None:
39
+ print(
40
+ f"No top-level record type {avro_record_type} found in the Avro schema")
41
+ sys.exit(1)
42
+ elif not isinstance(schema, dict):
43
+ print(
44
+ "Expected a single Avro schema as a JSON object, or a list of schema records")
45
+ sys.exit(1)
46
+
47
+ # Get the name and fields of the top-level record
48
+ table_name = schema["name"]
49
+ fields = schema["fields"]
50
+
51
+ # Create a list to store the parquet schema
52
+ parquet_schema = []
53
+
54
+ # Append the parquet schema with the column names and types
55
+ for field in fields:
56
+ column_name = field["name"]
57
+ column_type = self.convert_avro_type_to_parquet_type(field["type"])
58
+ parquet_schema.append((column_name, column_type))
59
+
60
+ if emit_cloudevents_columns:
61
+ parquet_schema.extend([
62
+ ("___type", pa.string()),
63
+ ("___source", pa.string()),
64
+ ("___id", pa.string()),
65
+ ("___time", pa.timestamp('ns')),
66
+ ("___subject", pa.string())
67
+ ])
68
+
69
+ # Create an empty table with the schema
70
+ table = pa.Table.from_batches([], schema=pa.schema(parquet_schema))
71
+ pq.write_table(table, parquet_file_path)
72
+
73
+ def convert_avro_type_to_parquet_type(self, avro_type):
74
+ """ Convert an Avro type to a Parquet type."""
75
+ if isinstance(avro_type, list):
76
+ # If the type is an array, then it is a union type. Look whether it's a pair of a scalar type and null:
77
+ item_count = len(avro_type)
78
+ if item_count == 1:
79
+ return self.convert_avro_type_to_parquet_type(avro_type[0])
80
+ elif item_count == 2:
81
+ first = avro_type[0]
82
+ second = avro_type[1]
83
+ if isinstance(first, str) and first == "null":
84
+ return self.convert_avro_type_to_parquet_type(second)
85
+ elif isinstance(second, str) and second == "null":
86
+ return self.convert_avro_type_to_parquet_type(first)
87
+ else:
88
+ struct_fields = self.map_union_fields(avro_type)
89
+ return pa.struct(struct_fields)
90
+ elif item_count > 0:
91
+ struct_fields = self.map_union_fields(avro_type)
92
+ return pa.struct(struct_fields)
93
+ else:
94
+ print(f"WARNING: Empty union type {avro_type}")
95
+ return pa.string()
96
+ elif isinstance(avro_type, dict):
97
+ type_name = avro_type.get("type")
98
+ if type_name == "array":
99
+ return pa.list_(self.convert_avro_type_to_parquet_type(avro_type.get("items")))
100
+ elif type_name == "map":
101
+ return pa.map_(pa.string(), self.convert_avro_type_to_parquet_type(avro_type.get("values")))
102
+ elif type_name == "record":
103
+ fields = avro_type.get("fields")
104
+ if len(fields) == 0:
105
+ print(
106
+ f"WARNING: No fields in record type {avro_type.get('name')}")
107
+ return pa.string()
108
+ return pa.struct({field.get("name"): self.convert_avro_type_to_parquet_type(field.get("type")) for field in fields})
109
+ if type_name == "enum":
110
+ return pa.string()
111
+ elif type_name == "fixed":
112
+ return pa.string()
113
+ elif type_name == "string":
114
+ logical_type = avro_type.get("logicalType")
115
+ if logical_type == "uuid":
116
+ return pa.string()
117
+ return pa.string()
118
+ elif type_name == "bytes":
119
+ logical_type = avro_type.get("logicalType")
120
+ if logical_type == "decimal":
121
+ return pa.decimal128(38, 18)
122
+ return pa.binary()
123
+ elif type_name == "long":
124
+ logical_type = avro_type.get("logicalType")
125
+ if logical_type in ["timestamp-millis", "timestamp-micros"]:
126
+ return pa.timestamp('ns')
127
+ if logical_type in ["time-millis", "time-micros"]:
128
+ return pa.time64('ns')
129
+ return pa.int64()
130
+ elif type_name == "int":
131
+ logical_type = avro_type.get("logicalType")
132
+ if logical_type == "date":
133
+ return pa.date32()
134
+ return pa.int32()
135
+ else:
136
+ return self.map_scalar_type(type_name)
137
+ elif isinstance(avro_type, str):
138
+ if avro_type in self.named_type_cache:
139
+ return self.convert_avro_type_to_parquet_type(self.named_type_cache[avro_type])
140
+ return self.map_scalar_type(avro_type)
141
+
142
+ return pa.string()
143
+
144
+ def cache_named_types(self, avro_type):
145
+ """ Add an encountered type to the list of types."""
146
+ if isinstance(avro_type, list):
147
+ for item in avro_type:
148
+ self.cache_named_types(item)
149
+ if isinstance(avro_type, dict) and avro_type.get("name"):
150
+ self.named_type_cache[self.get_fullname(avro_type.get(
151
+ "namespace"), avro_type.get("name"))] = avro_type
152
+ if "fields" in avro_type:
153
+ for field in avro_type.get("fields"):
154
+ if "type" in field:
155
+ self.cache_named_types(field.get("type"))
156
+
157
+ def map_union_fields(self, avro_type):
158
+ """ Map the fields of a union type to Parquet fields."""
159
+ struct_fields = []
160
+ for i, avro_union_type in enumerate(avro_type):
161
+ field_type = self.convert_avro_type_to_parquet_type(
162
+ avro_union_type)
163
+ if isinstance(avro_union_type, str):
164
+ if "null" == avro_union_type:
165
+ continue
166
+ if avro_union_type in self.named_type_cache:
167
+ avro_union_type = self.named_type_cache[avro_union_type]
168
+ if isinstance(avro_union_type, str):
169
+ field_name = f'{avro_union_type}Value'
170
+ elif isinstance(avro_union_type, dict):
171
+ if "type" in avro_union_type and "array" == avro_union_type["type"]:
172
+ field_name = 'ArrayValue'
173
+ elif "type" in avro_union_type and "map" == avro_union_type["type"]:
174
+ field_name = 'MapValue'
175
+ elif "name" in avro_union_type:
176
+ field_name = f'{avro_union_type.get("name")}Value'
177
+ else:
178
+ field_name = f'_{i}'
179
+ struct_fields.append(pa.field(field_name, field_type))
180
+ return struct_fields
181
+
182
+ def map_scalar_type(self, type_name: str):
183
+ """ Map an Avro scalar type to a Parquet scalar type."""
184
+ if type_name == "null":
185
+ return pa.string()
186
+ elif type_name == "int":
187
+ return pa.int32()
188
+ elif type_name == "long":
189
+ return pa.int64()
190
+ elif type_name == "float":
191
+ return pa.float32()
192
+ elif type_name == "double":
193
+ return pa.float64()
194
+ elif type_name == "boolean":
195
+ return pa.bool_()
196
+ elif type_name == "bytes":
197
+ return pa.binary()
198
+ elif type_name == "string":
199
+ return pa.string()
200
+ else:
201
+ return pa.string()
202
+
203
+
204
+ def convert_avro_to_parquet(avro_schema_path, avro_record_type, parquet_file_path, emit_cloudevents_columns=False):
205
+ """ Convert an Avro schema to a Parquet schema."""
206
+ converter = AvroToParquetConverter()
207
+ converter.convert_avro_to_parquet(
208
+ avro_schema_path, avro_record_type, parquet_file_path, emit_cloudevents_columns)