avrotize 2.21.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. avrotize/__init__.py +66 -0
  2. avrotize/__main__.py +6 -0
  3. avrotize/_version.py +34 -0
  4. avrotize/asn1toavro.py +160 -0
  5. avrotize/avrotize.py +152 -0
  6. avrotize/avrotocpp/CMakeLists.txt.jinja +77 -0
  7. avrotize/avrotocpp/build.bat.jinja +7 -0
  8. avrotize/avrotocpp/build.sh.jinja +7 -0
  9. avrotize/avrotocpp/dataclass_body.jinja +108 -0
  10. avrotize/avrotocpp/vcpkg.json.jinja +21 -0
  11. avrotize/avrotocpp.py +483 -0
  12. avrotize/avrotocsharp/README.md.jinja +166 -0
  13. avrotize/avrotocsharp/class_test.cs.jinja +266 -0
  14. avrotize/avrotocsharp/dataclass_core.jinja +293 -0
  15. avrotize/avrotocsharp/enum_test.cs.jinja +20 -0
  16. avrotize/avrotocsharp/project.csproj.jinja +30 -0
  17. avrotize/avrotocsharp/project.sln.jinja +34 -0
  18. avrotize/avrotocsharp/run_coverage.ps1.jinja +98 -0
  19. avrotize/avrotocsharp/run_coverage.sh.jinja +149 -0
  20. avrotize/avrotocsharp/testproject.csproj.jinja +19 -0
  21. avrotize/avrotocsharp.py +1180 -0
  22. avrotize/avrotocsv.py +121 -0
  23. avrotize/avrotodatapackage.py +173 -0
  24. avrotize/avrotodb.py +1383 -0
  25. avrotize/avrotogo/go_enum.jinja +12 -0
  26. avrotize/avrotogo/go_helpers.jinja +31 -0
  27. avrotize/avrotogo/go_struct.jinja +151 -0
  28. avrotize/avrotogo/go_test.jinja +47 -0
  29. avrotize/avrotogo/go_union.jinja +38 -0
  30. avrotize/avrotogo.py +476 -0
  31. avrotize/avrotographql.py +197 -0
  32. avrotize/avrotoiceberg.py +210 -0
  33. avrotize/avrotojava/class_test.java.jinja +212 -0
  34. avrotize/avrotojava/enum_test.java.jinja +21 -0
  35. avrotize/avrotojava/testproject.pom.jinja +54 -0
  36. avrotize/avrotojava.py +2156 -0
  37. avrotize/avrotojs.py +250 -0
  38. avrotize/avrotojsons.py +481 -0
  39. avrotize/avrotojstruct.py +345 -0
  40. avrotize/avrotokusto.py +364 -0
  41. avrotize/avrotomd/README.md.jinja +49 -0
  42. avrotize/avrotomd.py +137 -0
  43. avrotize/avrotools.py +168 -0
  44. avrotize/avrotoparquet.py +208 -0
  45. avrotize/avrotoproto.py +359 -0
  46. avrotize/avrotopython/dataclass_core.jinja +241 -0
  47. avrotize/avrotopython/enum_core.jinja +87 -0
  48. avrotize/avrotopython/pyproject_toml.jinja +18 -0
  49. avrotize/avrotopython/test_class.jinja +97 -0
  50. avrotize/avrotopython/test_enum.jinja +23 -0
  51. avrotize/avrotopython.py +626 -0
  52. avrotize/avrotorust/dataclass_enum.rs.jinja +74 -0
  53. avrotize/avrotorust/dataclass_struct.rs.jinja +204 -0
  54. avrotize/avrotorust/dataclass_union.rs.jinja +105 -0
  55. avrotize/avrotorust.py +435 -0
  56. avrotize/avrotots/class_core.ts.jinja +140 -0
  57. avrotize/avrotots/class_test.ts.jinja +77 -0
  58. avrotize/avrotots/enum_core.ts.jinja +46 -0
  59. avrotize/avrotots/gitignore.jinja +34 -0
  60. avrotize/avrotots/index.ts.jinja +0 -0
  61. avrotize/avrotots/package.json.jinja +23 -0
  62. avrotize/avrotots/tsconfig.json.jinja +21 -0
  63. avrotize/avrotots.py +687 -0
  64. avrotize/avrotoxsd.py +344 -0
  65. avrotize/cddltostructure.py +1841 -0
  66. avrotize/commands.json +3496 -0
  67. avrotize/common.py +834 -0
  68. avrotize/constants.py +87 -0
  69. avrotize/csvtoavro.py +132 -0
  70. avrotize/datapackagetoavro.py +76 -0
  71. avrotize/dependencies/cpp/vcpkg/vcpkg.json +19 -0
  72. avrotize/dependencies/cs/net90/dependencies.csproj +29 -0
  73. avrotize/dependencies/go/go121/go.mod +6 -0
  74. avrotize/dependencies/java/jdk21/pom.xml +91 -0
  75. avrotize/dependencies/python/py312/requirements.txt +13 -0
  76. avrotize/dependencies/rust/stable/Cargo.toml +17 -0
  77. avrotize/dependencies/typescript/node22/package.json +16 -0
  78. avrotize/dependency_resolver.py +348 -0
  79. avrotize/dependency_version.py +432 -0
  80. avrotize/generic/generic.avsc +57 -0
  81. avrotize/jsonstoavro.py +2167 -0
  82. avrotize/jsonstostructure.py +2864 -0
  83. avrotize/jstructtoavro.py +878 -0
  84. avrotize/kstructtoavro.py +93 -0
  85. avrotize/kustotoavro.py +455 -0
  86. avrotize/openapitostructure.py +717 -0
  87. avrotize/parquettoavro.py +157 -0
  88. avrotize/proto2parser.py +498 -0
  89. avrotize/proto3parser.py +403 -0
  90. avrotize/prototoavro.py +382 -0
  91. avrotize/prototypes/any.avsc +19 -0
  92. avrotize/prototypes/api.avsc +106 -0
  93. avrotize/prototypes/duration.avsc +20 -0
  94. avrotize/prototypes/field_mask.avsc +18 -0
  95. avrotize/prototypes/struct.avsc +60 -0
  96. avrotize/prototypes/timestamp.avsc +20 -0
  97. avrotize/prototypes/type.avsc +253 -0
  98. avrotize/prototypes/wrappers.avsc +117 -0
  99. avrotize/structuretocddl.py +597 -0
  100. avrotize/structuretocpp/CMakeLists.txt.jinja +76 -0
  101. avrotize/structuretocpp/build.bat.jinja +3 -0
  102. avrotize/structuretocpp/build.sh.jinja +3 -0
  103. avrotize/structuretocpp/dataclass_body.jinja +50 -0
  104. avrotize/structuretocpp/vcpkg.json.jinja +11 -0
  105. avrotize/structuretocpp.py +697 -0
  106. avrotize/structuretocsharp/class_test.cs.jinja +180 -0
  107. avrotize/structuretocsharp/dataclass_core.jinja +156 -0
  108. avrotize/structuretocsharp/enum_test.cs.jinja +36 -0
  109. avrotize/structuretocsharp/json_structure_converters.cs.jinja +399 -0
  110. avrotize/structuretocsharp/program.cs.jinja +49 -0
  111. avrotize/structuretocsharp/project.csproj.jinja +17 -0
  112. avrotize/structuretocsharp/project.sln.jinja +34 -0
  113. avrotize/structuretocsharp/testproject.csproj.jinja +18 -0
  114. avrotize/structuretocsharp/tuple_converter.cs.jinja +121 -0
  115. avrotize/structuretocsharp.py +2295 -0
  116. avrotize/structuretocsv.py +365 -0
  117. avrotize/structuretodatapackage.py +659 -0
  118. avrotize/structuretodb.py +1125 -0
  119. avrotize/structuretogo/go_enum.jinja +12 -0
  120. avrotize/structuretogo/go_helpers.jinja +26 -0
  121. avrotize/structuretogo/go_interface.jinja +18 -0
  122. avrotize/structuretogo/go_struct.jinja +187 -0
  123. avrotize/structuretogo/go_test.jinja +70 -0
  124. avrotize/structuretogo.py +729 -0
  125. avrotize/structuretographql.py +502 -0
  126. avrotize/structuretoiceberg.py +355 -0
  127. avrotize/structuretojava/choice_core.jinja +34 -0
  128. avrotize/structuretojava/class_core.jinja +23 -0
  129. avrotize/structuretojava/enum_core.jinja +18 -0
  130. avrotize/structuretojava/equals_hashcode.jinja +30 -0
  131. avrotize/structuretojava/pom.xml.jinja +26 -0
  132. avrotize/structuretojava/tuple_core.jinja +49 -0
  133. avrotize/structuretojava.py +938 -0
  134. avrotize/structuretojs/class_core.js.jinja +33 -0
  135. avrotize/structuretojs/enum_core.js.jinja +10 -0
  136. avrotize/structuretojs/package.json.jinja +12 -0
  137. avrotize/structuretojs/test_class.js.jinja +84 -0
  138. avrotize/structuretojs/test_enum.js.jinja +58 -0
  139. avrotize/structuretojs/test_runner.js.jinja +45 -0
  140. avrotize/structuretojs.py +657 -0
  141. avrotize/structuretojsons.py +498 -0
  142. avrotize/structuretokusto.py +639 -0
  143. avrotize/structuretomd/README.md.jinja +204 -0
  144. avrotize/structuretomd.py +322 -0
  145. avrotize/structuretoproto.py +764 -0
  146. avrotize/structuretopython/dataclass_core.jinja +363 -0
  147. avrotize/structuretopython/enum_core.jinja +45 -0
  148. avrotize/structuretopython/map_alias.jinja +21 -0
  149. avrotize/structuretopython/pyproject_toml.jinja +23 -0
  150. avrotize/structuretopython/test_class.jinja +103 -0
  151. avrotize/structuretopython/test_enum.jinja +34 -0
  152. avrotize/structuretopython.py +799 -0
  153. avrotize/structuretorust/dataclass_enum.rs.jinja +63 -0
  154. avrotize/structuretorust/dataclass_struct.rs.jinja +121 -0
  155. avrotize/structuretorust/dataclass_union.rs.jinja +81 -0
  156. avrotize/structuretorust.py +714 -0
  157. avrotize/structuretots/class_core.ts.jinja +78 -0
  158. avrotize/structuretots/enum_core.ts.jinja +6 -0
  159. avrotize/structuretots/gitignore.jinja +8 -0
  160. avrotize/structuretots/index.ts.jinja +1 -0
  161. avrotize/structuretots/package.json.jinja +39 -0
  162. avrotize/structuretots/test_class.ts.jinja +35 -0
  163. avrotize/structuretots/tsconfig.json.jinja +21 -0
  164. avrotize/structuretots.py +740 -0
  165. avrotize/structuretoxsd.py +679 -0
  166. avrotize/xsdtoavro.py +413 -0
  167. avrotize-2.21.1.dist-info/METADATA +1319 -0
  168. avrotize-2.21.1.dist-info/RECORD +171 -0
  169. avrotize-2.21.1.dist-info/WHEEL +4 -0
  170. avrotize-2.21.1.dist-info/entry_points.txt +3 -0
  171. avrotize-2.21.1.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,355 @@
1
+ """Convert a JSON Structure schema to an Iceberg schema."""
2
+
3
+ import json
4
+ import sys
5
+ from typing import Dict, List, Any, Optional
6
+ import pyarrow as pa
7
+ from pyiceberg.schema import Schema, NestedField
8
+ from pyiceberg.types import (
9
+ BooleanType,
10
+ IntegerType,
11
+ LongType,
12
+ FloatType,
13
+ DoubleType,
14
+ StringType,
15
+ BinaryType,
16
+ DateType,
17
+ TimestampType,
18
+ DecimalType,
19
+ FixedType,
20
+ ListType,
21
+ MapType,
22
+ StructType,
23
+ TimeType
24
+ )
25
+ from pyiceberg.io.pyarrow import PyArrowFileIO, schema_to_pyarrow
26
+
27
+ JsonNode = Dict[str, 'JsonNode'] | List['JsonNode'] | str | bool | int | None
28
+
29
+
30
+ class StructureToIcebergConverter:
31
+ """Class to convert JSON Structure schema to Iceberg schema."""
32
+
33
+ def __init__(self: 'StructureToIcebergConverter'):
34
+ self.named_type_cache: Dict[str, JsonNode] = {}
35
+ self.id_counter = 0
36
+ self.definitions: Dict[str, Any] = {}
37
+ self.schema_doc: Optional[Dict[str, Any]] = None
38
+
39
+ def get_id(self) -> int:
40
+ """Get a unique ID for a field."""
41
+ self.id_counter += 1
42
+ return self.id_counter
43
+
44
+ def get_fullname(self, namespace: str, name: str) -> str:
45
+ """Get the full name of a record type."""
46
+ return f"{namespace}.{name}" if namespace else name
47
+
48
+ def convert_structure_to_iceberg(self, structure_schema_path: str, structure_record_type: Optional[str], output_path: str, emit_cloudevents_columns: bool=False):
49
+ """Convert a JSON Structure schema to an Iceberg schema."""
50
+ schema_file = structure_schema_path
51
+ if not schema_file:
52
+ print("Please specify the JSON Structure schema file")
53
+ sys.exit(1)
54
+ with open(schema_file, "r", encoding="utf-8") as f:
55
+ schema_json = f.read()
56
+
57
+ # Parse the schema as a JSON object
58
+ schema = json.loads(schema_json)
59
+ self.schema_doc = schema
60
+
61
+ # Handle definitions if present
62
+ if "definitions" in schema:
63
+ self.definitions = schema["definitions"]
64
+
65
+ # For JSON Structure, we expect an object type at the top level
66
+ if schema.get("type") != "object":
67
+ # Check if we have a $ref at the top level
68
+ if "$ref" in schema:
69
+ ref = schema["$ref"]
70
+ schema = self.resolve_ref(ref)
71
+ elif structure_record_type and "definitions" in schema:
72
+ # Look for the type in definitions
73
+ if structure_record_type in schema["definitions"]:
74
+ schema = schema["definitions"][structure_record_type]
75
+ else:
76
+ print(f"No record type {structure_record_type} found in the JSON Structure schema definitions")
77
+ sys.exit(1)
78
+ else:
79
+ print("Expected a JSON Structure schema with type 'object' at the top level")
80
+ sys.exit(1)
81
+
82
+ # Get the name and properties of the top-level object
83
+ table_name = schema.get("name", "Table")
84
+ properties = schema.get("properties", {})
85
+ required = schema.get("required", [])
86
+
87
+ # Create a list to store the iceberg schema
88
+ iceberg_fields: List[NestedField] = []
89
+
90
+ # Append the iceberg schema with the column names and types
91
+ for prop_name, prop_schema in properties.items():
92
+ is_required = prop_name in required
93
+ column_type = self.convert_structure_type_to_iceberg_type(prop_schema)
94
+ iceberg_fields.append(
95
+ NestedField(
96
+ field_id=self.get_id(),
97
+ name=prop_name,
98
+ field_type=column_type,
99
+ required=is_required
100
+ ))
101
+
102
+ if emit_cloudevents_columns:
103
+ iceberg_fields.extend([
104
+ NestedField(field_id=self.get_id(),
105
+ name="___type", field_type=StringType(), required=False),
106
+ NestedField(field_id=self.get_id(),
107
+ name="___source", field_type=StringType(), required=False),
108
+ NestedField(field_id=self.get_id(),
109
+ name="___id", field_type=StringType(), required=False),
110
+ NestedField(field_id=self.get_id(),
111
+ name="___time", field_type=TimestampType(), required=False),
112
+ NestedField(field_id=self.get_id(),
113
+ name="___subject", field_type=StringType(), required=False)
114
+ ])
115
+
116
+ iceberg_schema = Schema(*iceberg_fields)
117
+ arrow_schema = schema_to_pyarrow(iceberg_schema)
118
+ print(f"Iceberg schema created: {arrow_schema}")
119
+
120
+ # Write to Iceberg table (for demonstration, using local file system)
121
+ file_io = PyArrowFileIO()
122
+ output_file = file_io.new_output("file://"+output_path)
123
+ with output_file.create(overwrite=True) as f:
124
+ pa.output_stream(f).write(arrow_schema.serialize().to_pybytes())
125
+
126
+ def resolve_ref(self, ref: str) -> Dict[str, Any]:
127
+ """Resolve a $ref reference."""
128
+ if not ref.startswith("#/"):
129
+ raise ValueError(f"Only local references are supported, got: {ref}")
130
+
131
+ parts = ref[2:].split("/")
132
+ current = self.schema_doc
133
+
134
+ for part in parts:
135
+ if isinstance(current, dict) and part in current:
136
+ current = current[part]
137
+ else:
138
+ raise ValueError(f"Could not resolve reference: {ref}")
139
+
140
+ return current
141
+
142
+ def convert_structure_type_to_iceberg_type(self, structure_type):
143
+ """Convert a JSON Structure type to an Iceberg type."""
144
+ # Handle $ref
145
+ if isinstance(structure_type, dict) and "$ref" in structure_type:
146
+ ref = structure_type["$ref"]
147
+ resolved = self.resolve_ref(ref)
148
+ return self.convert_structure_type_to_iceberg_type(resolved)
149
+
150
+ # Handle array of types (e.g., ["string", "null"] for nullable types)
151
+ if isinstance(structure_type, list):
152
+ # Filter out null from the list
153
+ non_null_types = [t for t in structure_type if t != "null"]
154
+ if len(non_null_types) == 1:
155
+ # Nullable type - just use the non-null type (Iceberg handles optionality with required flag)
156
+ return self.convert_structure_type_to_iceberg_type(non_null_types[0])
157
+ elif len(non_null_types) > 1:
158
+ # Union of multiple non-null types - create a struct with alternatives
159
+ fields = []
160
+ for i, choice in enumerate(non_null_types):
161
+ choice_type = self.convert_structure_type_to_iceberg_type(choice)
162
+ fields.append(NestedField(
163
+ field_id=self.get_id(),
164
+ name=f"option_{i}",
165
+ field_type=choice_type,
166
+ required=False
167
+ ))
168
+ return StructType(*fields)
169
+ else:
170
+ # Only null - return string as fallback
171
+ return StringType()
172
+
173
+ # Handle dictionary with type field
174
+ if isinstance(structure_type, dict):
175
+ type_name = structure_type.get("type")
176
+
177
+ # Handle type being an array
178
+ if isinstance(type_name, list):
179
+ # This is like {"type": ["string", "null"]}
180
+ return self.convert_structure_type_to_iceberg_type(type_name)
181
+
182
+ # Handle array type
183
+ if type_name == "array":
184
+ items = structure_type.get("items", {"type": "string"})
185
+ return ListType(
186
+ element_id=self.get_id(),
187
+ element_type=self.convert_structure_type_to_iceberg_type(items),
188
+ element_required=True
189
+ )
190
+
191
+ # Handle set type (treated as array in Iceberg)
192
+ elif type_name == "set":
193
+ items = structure_type.get("items", {"type": "string"})
194
+ return ListType(
195
+ element_id=self.get_id(),
196
+ element_type=self.convert_structure_type_to_iceberg_type(items),
197
+ element_required=True
198
+ )
199
+
200
+ # Handle map type
201
+ elif type_name == "map":
202
+ values = structure_type.get("values", {"type": "string"})
203
+ return MapType(
204
+ key_id=self.get_id(),
205
+ key_type=StringType(),
206
+ value_id=self.get_id(),
207
+ value_type=self.convert_structure_type_to_iceberg_type(values),
208
+ value_required=True
209
+ )
210
+
211
+ # Handle tuple type (treated as struct with indexed fields)
212
+ elif type_name == "tuple":
213
+ items = structure_type.get("items", [])
214
+ fields = []
215
+ for i, item in enumerate(items):
216
+ fields.append(NestedField(
217
+ field_id=self.get_id(),
218
+ name=f"field_{i}",
219
+ field_type=self.convert_structure_type_to_iceberg_type(item),
220
+ required=True
221
+ ))
222
+ return StructType(*fields)
223
+
224
+ # Handle object type
225
+ elif type_name == "object":
226
+ properties = structure_type.get("properties", {})
227
+ required = structure_type.get("required", [])
228
+ fields = []
229
+
230
+ # Handle $extends if present
231
+ if "$extends" in structure_type:
232
+ extends_ref = structure_type["$extends"]
233
+ base_schema = self.resolve_ref(extends_ref)
234
+ base_properties = base_schema.get("properties", {})
235
+ base_required = base_schema.get("required", [])
236
+
237
+ # Add base properties first
238
+ for prop_name, prop_schema in base_properties.items():
239
+ is_required = prop_name in base_required
240
+ fields.append(NestedField(
241
+ field_id=self.get_id(),
242
+ name=prop_name,
243
+ field_type=self.convert_structure_type_to_iceberg_type(prop_schema),
244
+ required=is_required
245
+ ))
246
+
247
+ # Add own properties
248
+ for prop_name, prop_schema in properties.items():
249
+ is_required = prop_name in required
250
+ fields.append(NestedField(
251
+ field_id=self.get_id(),
252
+ name=prop_name,
253
+ field_type=self.convert_structure_type_to_iceberg_type(prop_schema),
254
+ required=is_required
255
+ ))
256
+
257
+ return StructType(*fields)
258
+
259
+ # Handle choice type (union)
260
+ elif type_name == "choice":
261
+ choices = structure_type.get("choices", [])
262
+ if isinstance(choices, list):
263
+ # For inline choices, create a struct with alternatives
264
+ fields = []
265
+ for i, choice in enumerate(choices):
266
+ choice_type = self.convert_structure_type_to_iceberg_type(choice)
267
+ fields.append(NestedField(
268
+ field_id=self.get_id(),
269
+ name=f"option_{i}",
270
+ field_type=choice_type,
271
+ required=False
272
+ ))
273
+ return StructType(*fields)
274
+ elif isinstance(choices, dict):
275
+ # For tagged choices, create a struct with named alternatives
276
+ fields = []
277
+ for choice_name, choice_schema in choices.items():
278
+ choice_type = self.convert_structure_type_to_iceberg_type(choice_schema)
279
+ fields.append(NestedField(
280
+ field_id=self.get_id(),
281
+ name=choice_name,
282
+ field_type=choice_type,
283
+ required=False
284
+ ))
285
+ return StructType(*fields)
286
+ else:
287
+ return StringType()
288
+
289
+ # Handle any type
290
+ elif type_name == "any":
291
+ return StringType()
292
+
293
+ # Handle primitive types with annotations
294
+ elif type_name:
295
+ return self.map_iceberg_scalar_type(type_name, structure_type)
296
+
297
+ # Handle string type name directly
298
+ elif isinstance(structure_type, str):
299
+ return self.map_iceberg_scalar_type(structure_type, {})
300
+
301
+ return StringType()
302
+
303
+ def map_iceberg_scalar_type(self, type_name: str, type_schema: Dict[str, Any]):
304
+ """Map a JSON Structure scalar type to an Iceberg scalar type."""
305
+ # Check for decimal with precision and scale
306
+ if type_name == "decimal":
307
+ precision = type_schema.get("precision", 38)
308
+ scale = type_schema.get("scale", 18)
309
+ return DecimalType(precision, scale)
310
+
311
+ # Map other primitive types
312
+ type_mapping = {
313
+ 'null': StringType(), # Iceberg doesn't have a null type
314
+ 'boolean': BooleanType(),
315
+ 'string': StringType(),
316
+ 'int8': IntegerType(), # Iceberg doesn't have byte type
317
+ 'uint8': IntegerType(),
318
+ 'int16': IntegerType(), # Iceberg doesn't have short type
319
+ 'uint16': IntegerType(),
320
+ 'int32': IntegerType(),
321
+ 'uint32': LongType(), # Use long for unsigned int32
322
+ 'int64': LongType(),
323
+ 'uint64': LongType(), # Iceberg doesn't distinguish signed/unsigned
324
+ 'int128': StringType(), # No native 128-bit support
325
+ 'uint128': StringType(),
326
+ 'integer': IntegerType(), # Generic integer
327
+ 'number': DoubleType(), # Generic number
328
+ 'float8': FloatType(),
329
+ 'float': FloatType(),
330
+ 'float32': FloatType(),
331
+ 'binary32': FloatType(),
332
+ 'double': DoubleType(),
333
+ 'float64': DoubleType(),
334
+ 'binary64': DoubleType(),
335
+ 'decimal': DecimalType(38, 18),
336
+ 'binary': BinaryType(),
337
+ 'bytes': BinaryType(), # Binary data
338
+ 'date': DateType(),
339
+ 'time': TimeType(),
340
+ 'datetime': TimestampType(),
341
+ 'timestamp': TimestampType(),
342
+ 'duration': LongType(), # Store as microseconds
343
+ 'uuid': StringType(), # Store UUID as string
344
+ 'uri': StringType(),
345
+ 'jsonpointer': StringType(),
346
+ }
347
+
348
+ return type_mapping.get(type_name, StringType())
349
+
350
+
351
+ def convert_structure_to_iceberg(structure_schema_path, structure_record_type, output_path, emit_cloudevents_columns=False):
352
+ """Convert a JSON Structure schema to an Iceberg schema."""
353
+ converter = StructureToIcebergConverter()
354
+ converter.convert_structure_to_iceberg(
355
+ structure_schema_path, structure_record_type, output_path, emit_cloudevents_columns)
@@ -0,0 +1,34 @@
1
+ /** {{ docstring }} */
2
+ /** JSON Structure choice (discriminated union) type */
3
+ {%- if deprecated %}
4
+ @Deprecated
5
+ {%- endif %}
6
+ {%- if jackson_annotation %}
7
+ @JsonTypeInfo(use = JsonTypeInfo.Id.NAME, include = JsonTypeInfo.As.WRAPPER_OBJECT, property = "type")
8
+ @JsonSubTypes({
9
+ {%- for choice in choices %}
10
+ @JsonSubTypes.Type(value = {{ choice.type }}.class, name = "{{ choice.name }}"){% if not loop.last %},{% endif %}
11
+
12
+ {%- endfor %}
13
+ })
14
+ {%- endif %}
15
+ public abstract class {{ class_name }} {
16
+ public {{ class_name }}() {}
17
+
18
+ {%- for choice in choices %}
19
+
20
+ /** {{ choice.docstring }} */
21
+ public static class {{ choice.type }} extends {{ class_name }} {
22
+ private {{ choice.value_type }} value;
23
+
24
+ public {{ choice.type }}() {}
25
+
26
+ public {{ choice.type }}({{ choice.value_type }} value) {
27
+ this.value = value;
28
+ }
29
+
30
+ public {{ choice.value_type }} getValue() { return value; }
31
+ public void setValue({{ choice.value_type }} value) { this.value = value; }
32
+ }
33
+ {%- endfor %}
34
+ }
@@ -0,0 +1,23 @@
1
+ /** {{ docstring }} */
2
+ {%- if is_abstract %}
3
+ /** This is an abstract type and cannot be instantiated directly. */
4
+ {%- endif %}
5
+ {%- if deprecated %}
6
+ @Deprecated
7
+ {%- endif %}
8
+ public {{ 'abstract ' if is_abstract else '' }}class {{ class_name }}{% if base_class %} extends {{ base_class }}{% endif %} {
9
+ {% if is_abstract %}protected{% else %}public{% endif %} {{ class_name }}() {}
10
+ {%- for field in fields %}
11
+
12
+ /** {{ field.docstring }} */
13
+ {%- if jackson_annotation and field.original_name != field.name %}
14
+ @JsonProperty("{{ field.original_name }}")
15
+ {%- endif %}
16
+ {%- if field.is_const %}
17
+ public static final {{ field.type }} {{ field.name }} = {{ field.const_value }};
18
+ {%- else %}
19
+ private {{ field.type }} {{ field.name }};
20
+ public {{ field.type }} get{{ field.name|pascal }}() { return {{ field.name }}; }
21
+ public void set{{ field.name|pascal }}({{ field.type }} {{ field.name }}) { this.{{ field.name }} = {{ field.name }}; }
22
+ {%- endif %}
23
+ {%- endfor %}
@@ -0,0 +1,18 @@
1
+ /** {{ docstring }} */
2
+ {%- if deprecated %}
3
+ @Deprecated
4
+ {%- endif %}
5
+ public enum {{ class_name }} {
6
+ {%- if is_numeric %}
7
+ {%- for symbol in symbols %}
8
+ {{ symbol.name }}({{ symbol.value }}){% if not loop.last %},{% else %};{% endif %}
9
+
10
+ {%- endfor %}
11
+
12
+ private final {{ numeric_type }} value;
13
+ {{ class_name }}({{ numeric_type }} value) { this.value = value; }
14
+ public {{ numeric_type }} getValue() { return value; }
15
+ {%- else %}
16
+ {{ symbols|join(', ') }}
17
+ {%- endif %}
18
+ }
@@ -0,0 +1,30 @@
1
+
2
+ @Override
3
+ public boolean equals(Object obj) {
4
+ if (this == obj) return true;
5
+ if (!(obj instanceof {{ class_name }})) return false;
6
+ {{ class_name }} other = ({{ class_name }}) obj;
7
+ {%- if field_count == 0 %}
8
+ return true;
9
+ {%- elif field_count == 1 %}
10
+ return Objects.equals(this.{{ fields[0] }}, other.{{ fields[0] }});
11
+ {%- else %}
12
+ return {% for field in fields %}Objects.equals(this.{{ field }}, other.{{ field }}){% if not loop.last %}
13
+ && {% endif %}{% endfor %};
14
+ {%- endif %}
15
+ }
16
+
17
+ @Override
18
+ public int hashCode() {
19
+ {%- if field_count == 0 %}
20
+ return 0;
21
+ {%- elif field_count <= 8 %}
22
+ return Objects.hash({{ fields|join(', ') }});
23
+ {%- else %}
24
+ int result = Objects.hash({{ fields[:8]|join(', ') }});
25
+ {%- for field in fields[8:] %}
26
+ result = 31 * result + Objects.hashCode({{ field }});
27
+ {%- endfor %}
28
+ return result;
29
+ {%- endif %}
30
+ }
@@ -0,0 +1,26 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project xmlns="http://maven.apache.org/POM/4.0.0"
3
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
4
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
5
+ <modelVersion>4.0.0</modelVersion>
6
+ <groupId>{{ groupid }}</groupId>
7
+ <artifactId>{{ artifactid }}</artifactId>
8
+ <version>1.0-SNAPSHOT</version>
9
+ <properties>
10
+ <maven.compiler.source>17</maven.compiler.source>
11
+ <maven.compiler.target>17</maven.compiler.target>
12
+ <jackson.version>{{ jackson_version }}</jackson.version>
13
+ </properties>
14
+ <dependencies>
15
+ <dependency>
16
+ <groupId>com.fasterxml.jackson.core</groupId>
17
+ <artifactId>jackson-databind</artifactId>
18
+ <version>${jackson.version}</version>
19
+ </dependency>
20
+ <dependency>
21
+ <groupId>com.fasterxml.jackson.core</groupId>
22
+ <artifactId>jackson-annotations</artifactId>
23
+ <version>${jackson.version}</version>
24
+ </dependency>
25
+ </dependencies>
26
+ </project>
@@ -0,0 +1,49 @@
1
+ /** {{ docstring }} */
2
+ /** JSON Structure tuple type - serializes as JSON array */
3
+ {%- if deprecated %}
4
+ @Deprecated
5
+ {%- endif %}
6
+ {%- if jackson_annotation %}
7
+ @JsonFormat(shape = JsonFormat.Shape.ARRAY)
8
+ {%- endif %}
9
+ public class {{ class_name }} {
10
+ {%- for element in elements %}
11
+
12
+ /** {{ element.docstring }} */
13
+ private {{ element.type }} {{ element.name }};
14
+ {%- endfor %}
15
+
16
+ public {{ class_name }}() {}
17
+
18
+ public {{ class_name }}({% for element in elements %}{{ element.type }} {{ element.name }}{% if not loop.last %}, {% endif %}{% endfor %}) {
19
+ {%- for element in elements %}
20
+ this.{{ element.name }} = {{ element.name }};
21
+ {%- endfor %}
22
+ }
23
+
24
+ {%- for element in elements %}
25
+
26
+ public {{ element.type }} get{{ element.name|pascal }}() { return {{ element.name }}; }
27
+ public void set{{ element.name|pascal }}({{ element.type }} {{ element.name }}) { this.{{ element.name }} = {{ element.name }}; }
28
+ {%- endfor %}
29
+
30
+ {%- if jackson_annotation %}
31
+
32
+ @JsonCreator
33
+ public static {{ class_name }} fromArray(Object[] array) {
34
+ if (array == null || array.length != {{ elements|length }}) {
35
+ throw new IllegalArgumentException("Array must have exactly {{ elements|length }} elements");
36
+ }
37
+ {{ class_name }} tuple = new {{ class_name }}();
38
+ {%- for element in elements %}
39
+ tuple.{{ element.name }} = ({{ element.type }}) array[{{ loop.index0 }}];
40
+ {%- endfor %}
41
+ return tuple;
42
+ }
43
+
44
+ @JsonValue
45
+ public Object[] toArray() {
46
+ return new Object[] { {% for element in elements %}{{ element.name }}{% if not loop.last %}, {% endif %}{% endfor %} };
47
+ }
48
+ {%- endif %}
49
+ }