avrotize 2.21.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. avrotize/__init__.py +66 -0
  2. avrotize/__main__.py +6 -0
  3. avrotize/_version.py +34 -0
  4. avrotize/asn1toavro.py +160 -0
  5. avrotize/avrotize.py +152 -0
  6. avrotize/avrotocpp/CMakeLists.txt.jinja +77 -0
  7. avrotize/avrotocpp/build.bat.jinja +7 -0
  8. avrotize/avrotocpp/build.sh.jinja +7 -0
  9. avrotize/avrotocpp/dataclass_body.jinja +108 -0
  10. avrotize/avrotocpp/vcpkg.json.jinja +21 -0
  11. avrotize/avrotocpp.py +483 -0
  12. avrotize/avrotocsharp/README.md.jinja +166 -0
  13. avrotize/avrotocsharp/class_test.cs.jinja +266 -0
  14. avrotize/avrotocsharp/dataclass_core.jinja +293 -0
  15. avrotize/avrotocsharp/enum_test.cs.jinja +20 -0
  16. avrotize/avrotocsharp/project.csproj.jinja +30 -0
  17. avrotize/avrotocsharp/project.sln.jinja +34 -0
  18. avrotize/avrotocsharp/run_coverage.ps1.jinja +98 -0
  19. avrotize/avrotocsharp/run_coverage.sh.jinja +149 -0
  20. avrotize/avrotocsharp/testproject.csproj.jinja +19 -0
  21. avrotize/avrotocsharp.py +1180 -0
  22. avrotize/avrotocsv.py +121 -0
  23. avrotize/avrotodatapackage.py +173 -0
  24. avrotize/avrotodb.py +1383 -0
  25. avrotize/avrotogo/go_enum.jinja +12 -0
  26. avrotize/avrotogo/go_helpers.jinja +31 -0
  27. avrotize/avrotogo/go_struct.jinja +151 -0
  28. avrotize/avrotogo/go_test.jinja +47 -0
  29. avrotize/avrotogo/go_union.jinja +38 -0
  30. avrotize/avrotogo.py +476 -0
  31. avrotize/avrotographql.py +197 -0
  32. avrotize/avrotoiceberg.py +210 -0
  33. avrotize/avrotojava/class_test.java.jinja +212 -0
  34. avrotize/avrotojava/enum_test.java.jinja +21 -0
  35. avrotize/avrotojava/testproject.pom.jinja +54 -0
  36. avrotize/avrotojava.py +2156 -0
  37. avrotize/avrotojs.py +250 -0
  38. avrotize/avrotojsons.py +481 -0
  39. avrotize/avrotojstruct.py +345 -0
  40. avrotize/avrotokusto.py +364 -0
  41. avrotize/avrotomd/README.md.jinja +49 -0
  42. avrotize/avrotomd.py +137 -0
  43. avrotize/avrotools.py +168 -0
  44. avrotize/avrotoparquet.py +208 -0
  45. avrotize/avrotoproto.py +359 -0
  46. avrotize/avrotopython/dataclass_core.jinja +241 -0
  47. avrotize/avrotopython/enum_core.jinja +87 -0
  48. avrotize/avrotopython/pyproject_toml.jinja +18 -0
  49. avrotize/avrotopython/test_class.jinja +97 -0
  50. avrotize/avrotopython/test_enum.jinja +23 -0
  51. avrotize/avrotopython.py +626 -0
  52. avrotize/avrotorust/dataclass_enum.rs.jinja +74 -0
  53. avrotize/avrotorust/dataclass_struct.rs.jinja +204 -0
  54. avrotize/avrotorust/dataclass_union.rs.jinja +105 -0
  55. avrotize/avrotorust.py +435 -0
  56. avrotize/avrotots/class_core.ts.jinja +140 -0
  57. avrotize/avrotots/class_test.ts.jinja +77 -0
  58. avrotize/avrotots/enum_core.ts.jinja +46 -0
  59. avrotize/avrotots/gitignore.jinja +34 -0
  60. avrotize/avrotots/index.ts.jinja +0 -0
  61. avrotize/avrotots/package.json.jinja +23 -0
  62. avrotize/avrotots/tsconfig.json.jinja +21 -0
  63. avrotize/avrotots.py +687 -0
  64. avrotize/avrotoxsd.py +344 -0
  65. avrotize/cddltostructure.py +1841 -0
  66. avrotize/commands.json +3496 -0
  67. avrotize/common.py +834 -0
  68. avrotize/constants.py +87 -0
  69. avrotize/csvtoavro.py +132 -0
  70. avrotize/datapackagetoavro.py +76 -0
  71. avrotize/dependencies/cpp/vcpkg/vcpkg.json +19 -0
  72. avrotize/dependencies/cs/net90/dependencies.csproj +29 -0
  73. avrotize/dependencies/go/go121/go.mod +6 -0
  74. avrotize/dependencies/java/jdk21/pom.xml +91 -0
  75. avrotize/dependencies/python/py312/requirements.txt +13 -0
  76. avrotize/dependencies/rust/stable/Cargo.toml +17 -0
  77. avrotize/dependencies/typescript/node22/package.json +16 -0
  78. avrotize/dependency_resolver.py +348 -0
  79. avrotize/dependency_version.py +432 -0
  80. avrotize/generic/generic.avsc +57 -0
  81. avrotize/jsonstoavro.py +2167 -0
  82. avrotize/jsonstostructure.py +2864 -0
  83. avrotize/jstructtoavro.py +878 -0
  84. avrotize/kstructtoavro.py +93 -0
  85. avrotize/kustotoavro.py +455 -0
  86. avrotize/openapitostructure.py +717 -0
  87. avrotize/parquettoavro.py +157 -0
  88. avrotize/proto2parser.py +498 -0
  89. avrotize/proto3parser.py +403 -0
  90. avrotize/prototoavro.py +382 -0
  91. avrotize/prototypes/any.avsc +19 -0
  92. avrotize/prototypes/api.avsc +106 -0
  93. avrotize/prototypes/duration.avsc +20 -0
  94. avrotize/prototypes/field_mask.avsc +18 -0
  95. avrotize/prototypes/struct.avsc +60 -0
  96. avrotize/prototypes/timestamp.avsc +20 -0
  97. avrotize/prototypes/type.avsc +253 -0
  98. avrotize/prototypes/wrappers.avsc +117 -0
  99. avrotize/structuretocddl.py +597 -0
  100. avrotize/structuretocpp/CMakeLists.txt.jinja +76 -0
  101. avrotize/structuretocpp/build.bat.jinja +3 -0
  102. avrotize/structuretocpp/build.sh.jinja +3 -0
  103. avrotize/structuretocpp/dataclass_body.jinja +50 -0
  104. avrotize/structuretocpp/vcpkg.json.jinja +11 -0
  105. avrotize/structuretocpp.py +697 -0
  106. avrotize/structuretocsharp/class_test.cs.jinja +180 -0
  107. avrotize/structuretocsharp/dataclass_core.jinja +156 -0
  108. avrotize/structuretocsharp/enum_test.cs.jinja +36 -0
  109. avrotize/structuretocsharp/json_structure_converters.cs.jinja +399 -0
  110. avrotize/structuretocsharp/program.cs.jinja +49 -0
  111. avrotize/structuretocsharp/project.csproj.jinja +17 -0
  112. avrotize/structuretocsharp/project.sln.jinja +34 -0
  113. avrotize/structuretocsharp/testproject.csproj.jinja +18 -0
  114. avrotize/structuretocsharp/tuple_converter.cs.jinja +121 -0
  115. avrotize/structuretocsharp.py +2295 -0
  116. avrotize/structuretocsv.py +365 -0
  117. avrotize/structuretodatapackage.py +659 -0
  118. avrotize/structuretodb.py +1125 -0
  119. avrotize/structuretogo/go_enum.jinja +12 -0
  120. avrotize/structuretogo/go_helpers.jinja +26 -0
  121. avrotize/structuretogo/go_interface.jinja +18 -0
  122. avrotize/structuretogo/go_struct.jinja +187 -0
  123. avrotize/structuretogo/go_test.jinja +70 -0
  124. avrotize/structuretogo.py +729 -0
  125. avrotize/structuretographql.py +502 -0
  126. avrotize/structuretoiceberg.py +355 -0
  127. avrotize/structuretojava/choice_core.jinja +34 -0
  128. avrotize/structuretojava/class_core.jinja +23 -0
  129. avrotize/structuretojava/enum_core.jinja +18 -0
  130. avrotize/structuretojava/equals_hashcode.jinja +30 -0
  131. avrotize/structuretojava/pom.xml.jinja +26 -0
  132. avrotize/structuretojava/tuple_core.jinja +49 -0
  133. avrotize/structuretojava.py +938 -0
  134. avrotize/structuretojs/class_core.js.jinja +33 -0
  135. avrotize/structuretojs/enum_core.js.jinja +10 -0
  136. avrotize/structuretojs/package.json.jinja +12 -0
  137. avrotize/structuretojs/test_class.js.jinja +84 -0
  138. avrotize/structuretojs/test_enum.js.jinja +58 -0
  139. avrotize/structuretojs/test_runner.js.jinja +45 -0
  140. avrotize/structuretojs.py +657 -0
  141. avrotize/structuretojsons.py +498 -0
  142. avrotize/structuretokusto.py +639 -0
  143. avrotize/structuretomd/README.md.jinja +204 -0
  144. avrotize/structuretomd.py +322 -0
  145. avrotize/structuretoproto.py +764 -0
  146. avrotize/structuretopython/dataclass_core.jinja +363 -0
  147. avrotize/structuretopython/enum_core.jinja +45 -0
  148. avrotize/structuretopython/map_alias.jinja +21 -0
  149. avrotize/structuretopython/pyproject_toml.jinja +23 -0
  150. avrotize/structuretopython/test_class.jinja +103 -0
  151. avrotize/structuretopython/test_enum.jinja +34 -0
  152. avrotize/structuretopython.py +799 -0
  153. avrotize/structuretorust/dataclass_enum.rs.jinja +63 -0
  154. avrotize/structuretorust/dataclass_struct.rs.jinja +121 -0
  155. avrotize/structuretorust/dataclass_union.rs.jinja +81 -0
  156. avrotize/structuretorust.py +714 -0
  157. avrotize/structuretots/class_core.ts.jinja +78 -0
  158. avrotize/structuretots/enum_core.ts.jinja +6 -0
  159. avrotize/structuretots/gitignore.jinja +8 -0
  160. avrotize/structuretots/index.ts.jinja +1 -0
  161. avrotize/structuretots/package.json.jinja +39 -0
  162. avrotize/structuretots/test_class.ts.jinja +35 -0
  163. avrotize/structuretots/tsconfig.json.jinja +21 -0
  164. avrotize/structuretots.py +740 -0
  165. avrotize/structuretoxsd.py +679 -0
  166. avrotize/xsdtoavro.py +413 -0
  167. avrotize-2.21.1.dist-info/METADATA +1319 -0
  168. avrotize-2.21.1.dist-info/RECORD +171 -0
  169. avrotize-2.21.1.dist-info/WHEEL +4 -0
  170. avrotize-2.21.1.dist-info/entry_points.txt +3 -0
  171. avrotize-2.21.1.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,2864 @@
1
+ """ JSON Schema to JSON Structure converter. """
2
+
3
+ # pylint: disable=too-many-lines, line-too-long, too-many-branches, too-many-statements, too-many-locals, too-many-nested-blocks, too-many-arguments, too-many-instance-attributes, too-many-public-methods, too-many-boolean-expressions
4
+
5
+ import json
6
+ import os
7
+ import copy
8
+ import re
9
+ import urllib.parse
10
+ from urllib.parse import ParseResult, urlparse, unquote
11
+ from typing import Any, Dict, List, Tuple, Union, Optional
12
+ import jsonpointer
13
+ from jsonpointer import JsonPointerException
14
+ import requests
15
+
16
+ from avrotize.common import avro_name, avro_name_with_altname, avro_namespace, find_schema_node, generic_type, set_schema_node
17
+ from avrotize.dependency_resolver import inline_dependencies_of, sort_messages_by_dependencies
18
+
19
+ # JSON Structure primitive types
20
+ structure_primitive_types = [
21
+ 'null', 'string', 'int8', 'int16', 'int32', 'int64',
22
+ 'uint8', 'uint16', 'uint32', 'uint64', 'float', 'double',
23
+ 'decimal', 'boolean', 'bytes', 'date', 'time', 'datetime',
24
+ 'duration', 'uuid', 'set', 'map', 'object', 'choice'
25
+ ]
26
+
27
+
28
+ class JsonToStructureConverter:
29
+ """
30
+ Converts JSON Schema documents to JSON Structure format.
31
+
32
+ Attributes:
33
+ imported_types: A dictionary of imported type schemas.
34
+ root_namespace: The namespace for the root schema.
35
+ max_recursion_depth: The maximum recursion depth.
36
+ types_with_unmerged_types: A list of types with unmerged types.
37
+ content_cache: A dictionary for caching fetched URLs.
38
+ utility_namespace: The namespace for utility types.
39
+ preserve_composition: Flag to preserve composition keywords.
40
+ detect_inheritance: Flag to detect inheritance patterns. detect_discriminators: Flag to detect OpenAPI discriminator patterns.
41
+ convert_empty_objects_to_maps: Flag to convert objects with only additionalProperties to maps.
42
+ split_top_level_records: Flag to split top-level records.
43
+ root_class_name: The name of the root class.
44
+ """
45
+
46
+ def __init__(self) -> None:
47
+ self.imported_types: Dict[Any, Any] = {}
48
+ self.root_namespace = 'example.com'
49
+ self.max_recursion_depth = 40
50
+ self.types_with_unmerged_types: List[dict] = []
51
+ self.content_cache: Dict[str, str] = {}
52
+ self.utility_namespace = 'utility.vasters.com'
53
+ self.split_top_level_records = False
54
+ self.root_class_name = 'document'
55
+ self.type_registry: Dict[str, str] = {} # Track type definitions for reference resolution
56
+ # JSON Structure specific configuration
57
+ self.preserve_composition = False # Resolve composition keywords by default for JSON Structure compliance
58
+ self.detect_inheritance = True
59
+ self.detect_discriminators = True
60
+ self.convert_empty_objects_to_maps = True
61
+
62
+ def is_empty_type(self, structure_type):
63
+ """
64
+ Check if the JSON Structure type is an empty type.
65
+
66
+ Parameters:
67
+ structure_type (any): The JSON Structure type to check.
68
+
69
+ Returns:
70
+ bool: True if the type is empty, False otherwise.
71
+ """
72
+ if len(structure_type) == 0:
73
+ return True
74
+ if isinstance(structure_type, list):
75
+ return all(self.is_empty_type(t) for t in structure_type)
76
+ if isinstance(structure_type, dict):
77
+ if not 'type' in structure_type:
78
+ return True
79
+ if (structure_type['type'] == 'object' and (not 'properties' in structure_type or len(structure_type['properties']) == 0)) or \
80
+ (structure_type['type'] == 'choice' and (not 'choices' in structure_type or len(structure_type['choices']) == 0)) or \
81
+ (structure_type['type'] == 'set' and (not 'items' in structure_type or not structure_type['items'])) or \
82
+ (structure_type['type'] == 'map' and (not 'values' in structure_type or not structure_type['values'])):
83
+ return True
84
+ return False
85
+
86
+ def is_empty_json_type(self, json_type):
87
+ """
88
+ Check if the JSON type is an empty type.
89
+
90
+ Parameters:
91
+ json_type (any): The JSON type to check.
92
+
93
+ Returns:
94
+ bool: True if the JSON type is empty, False otherwise.
95
+ """
96
+ if len(json_type) == 0:
97
+ return True
98
+ if isinstance(json_type, list):
99
+ return all(self.is_empty_json_type(t) for t in json_type)
100
+ elif isinstance(json_type, dict):
101
+ if not 'type' in json_type:
102
+ return True
103
+ return False
104
+
105
+ def detect_numeric_type(self, schema: dict) -> str:
106
+ """
107
+ Analyze schema constraints to determine the appropriate numeric type.
108
+
109
+ Args:
110
+ schema (dict): The JSON schema object
111
+
112
+ Returns:
113
+ str: The appropriate JSON Structure numeric type
114
+ """
115
+ # Check for format hints first
116
+ format_hint = schema.get('format')
117
+ if format_hint:
118
+ format_mapping = {
119
+ 'int8': 'int32', # Use int32 instead of int8 for better compatibility
120
+ 'int16': 'int32', # Use int32 instead of int16 for better compatibility
121
+ 'int32': 'int32',
122
+ 'int64': 'int64',
123
+ 'uint8': 'int32', # Use int32 instead of uint8 for better compatibility
124
+ 'uint16': 'int32', # Use int32 instead of uint16 for better compatibility
125
+ 'uint32': 'int64', # Use int64 instead of uint32 for better compatibility
126
+ 'uint64': 'int64', # Use int64 instead of uint64 for better compatibility
127
+ 'float': 'float',
128
+ 'double': 'double'
129
+ }
130
+ if format_hint in format_mapping:
131
+ return format_mapping[format_hint]
132
+
133
+ # Analyze constraints for integer types
134
+ if schema.get('type') == 'integer':
135
+ minimum = schema.get('minimum', schema.get('exclusiveMinimum'))
136
+ maximum = schema.get('maximum', schema.get('exclusiveMaximum'))
137
+
138
+ # For integers with constraints, use conservative type mapping
139
+ if minimum is not None and maximum is not None:
140
+ # Both bounds specified
141
+ if minimum >= -2147483648 and maximum <= 2147483647:
142
+ return 'int32'
143
+ else:
144
+ return 'int64'
145
+ elif minimum is not None and minimum >= 0:
146
+ # Non-negative integers - use int32 for reasonable ranges
147
+ if maximum is None or maximum <= 2147483647:
148
+ return 'int32' # Conservative choice for age-like fields
149
+ else:
150
+ return 'int64'
151
+ else:
152
+ # General integers or negative minimum
153
+ return 'int32' # Conservative default
154
+
155
+ # For number type, check for decimal indicators
156
+ elif schema.get('type') == 'number':
157
+ if 'multipleOf' in schema:
158
+ multiple_of = schema['multipleOf']
159
+ if isinstance(multiple_of, float) or '.' in str(multiple_of):
160
+ return 'decimal'
161
+
162
+ # Check for precision/scale hints in description or custom properties
163
+ if 'precision' in schema or 'scale' in schema:
164
+ return 'decimal'
165
+
166
+ return 'double' # Default for floating point
167
+
168
+ return 'double' # Default fallback
169
+
170
+ def detect_temporal_type(self, schema: dict) -> str:
171
+ """
172
+ Detect temporal types based on format.
173
+
174
+ Args:
175
+ schema (dict): The JSON schema object
176
+
177
+ Returns:
178
+ str: The appropriate JSON Structure temporal type
179
+ """
180
+ format_hint = schema.get('format')
181
+ if format_hint:
182
+ temporal_mapping = {
183
+ 'date': 'date',
184
+ 'time': 'time',
185
+ 'date-time': 'datetime',
186
+ 'duration': 'duration'
187
+ }
188
+ return temporal_mapping.get(format_hint, 'string')
189
+ return 'string'
190
+
191
+ def detect_collection_type(self, schema: dict) -> str:
192
+ """
193
+ Determine if array should be 'set' based on uniqueItems.
194
+
195
+ Args:
196
+ schema (dict): The JSON schema array object
197
+
198
+ Returns:
199
+ str: Either 'set' or array (for list) """
200
+ if schema.get('type') == 'array' and schema.get('uniqueItems', False):
201
+ return 'set'
202
+ return 'array'
203
+
204
+ def should_convert_to_map(self, json_object: dict) -> bool:
205
+ """
206
+ Determine if object should be converted to map type.
207
+
208
+ Args:
209
+ json_object (dict): The JSON schema object
210
+
211
+ Returns:
212
+ bool: True if should be converted to map
213
+ """
214
+ if not self.convert_empty_objects_to_maps:
215
+ return False
216
+
217
+ # Convert if object has only additionalProperties and no properties
218
+ if ('additionalProperties' in json_object and
219
+ (not 'properties' in json_object or len(json_object['properties']) == 0) and
220
+ (not 'patternProperties' in json_object or len(json_object['patternProperties']) == 0)):
221
+ return True
222
+
223
+ # Convert if object has only patternProperties and no properties or additionalProperties
224
+ if ('patternProperties' in json_object and
225
+ (not 'properties' in json_object or len(json_object['properties']) == 0) and
226
+ 'additionalProperties' not in json_object):
227
+ return True
228
+
229
+ return False
230
+
231
+ def detect_discriminator_pattern(self, schema: dict) -> dict | None:
232
+ """
233
+ Detect OpenAPI discriminator patterns for choice type.
234
+
235
+ Args:
236
+ schema (dict): The JSON schema object
237
+
238
+ Returns:
239
+ dict | None: Discriminator info if detected, None otherwise
240
+ """
241
+ if not self.detect_discriminators:
242
+ return None
243
+
244
+ # Check for OpenAPI discriminator
245
+ if 'discriminator' in schema:
246
+ discriminator = schema['discriminator']
247
+ if isinstance(discriminator, dict) and 'propertyName' in discriminator:
248
+ return {
249
+ 'propertyName': discriminator['propertyName'],
250
+ 'mapping': discriminator.get('mapping', {})
251
+ }
252
+
253
+ return None
254
+
255
+ # Check for oneOf with discriminator-like pattern
256
+ if 'oneOf' in schema:
257
+ # Look for common property across all oneOf options that could be a discriminator
258
+ oneof_options = schema['oneOf']
259
+ if len(oneof_options) > 1:
260
+ common_props = None
261
+ for option in oneof_options:
262
+ if '$ref' in option:
263
+ continue # Skip refs for now
264
+ if 'properties' in option:
265
+ props = set(option['properties'].keys())
266
+ if common_props is None:
267
+ common_props = props
268
+ else:
269
+ common_props = common_props.intersection(props)
270
+
271
+ # If there's exactly one common property, it might be a discriminator
272
+ if common_props and len(common_props) == 1:
273
+ prop_name = list(common_props)[0]
274
+ return {
275
+ 'property': prop_name,
276
+ 'mapping': {} # Would need more analysis to populate
277
+ }
278
+
279
+ return None
280
+
281
+ def detect_inheritance_pattern(self, schema: dict, type_name: str = '') -> dict | None:
282
+ """
283
+ Detect simple inheritance patterns in allOf schemas.
284
+
285
+ Only detects patterns with exactly 2 items where one is a $ref and the other
286
+ contains properties/required/other object schema keywords.
287
+ Excludes self-referential patterns.
288
+
289
+ Args:
290
+ schema (dict): The JSON schema object
291
+ type_name (str): The name of the current type (to detect self-references)
292
+
293
+ Returns:
294
+ dict | None: Inheritance info if detected, None otherwise
295
+ """
296
+ if not self.detect_inheritance or 'allOf' not in schema:
297
+ return None
298
+
299
+ allof_items = schema['allOf']
300
+
301
+ # Only handle simple 2-item inheritance patterns
302
+ if len(allof_items) != 2:
303
+ return None
304
+
305
+ # Look for pattern: [{"$ref": "..."}, {"properties": {...}}] or similar
306
+ ref_item = None
307
+ extension_item = None
308
+
309
+ for item in allof_items:
310
+ if '$ref' in item and len(item) == 1: # Pure reference, no other properties
311
+ ref_item = item
312
+ elif ('type' in item or 'properties' in item or 'required' in item or
313
+ 'additionalProperties' in item) and '$ref' not in item: # Pure extension, no ref
314
+ extension_item = item
315
+
316
+ # Only return inheritance info for simple base + extension pattern
317
+ if ref_item and extension_item:
318
+ base_ref = ref_item['$ref']
319
+
320
+ # Check for self-referential patterns
321
+ if base_ref.startswith('#/definitions/'):
322
+ ref_type_name = base_ref[14:] # Remove '#/definitions/'
323
+ if ref_type_name == type_name:
324
+ # Self-referential pattern - don't convert to inheritance
325
+ return None
326
+
327
+ return {
328
+ 'base_ref': base_ref,
329
+ 'extension': extension_item
330
+ }
331
+
332
+ return None
333
+
334
+ def json_schema_primitive_to_structure_type(self, json_primitive: Optional[str | list], format: Optional[str], enum: Optional[list], record_name: str, field_name: str, namespace: str, dependencies: list, schema: dict) -> str | dict[str, Any] | list:
335
+ """
336
+ Convert a JSON Schema primitive type to JSON Structure primitive type.
337
+
338
+ Args:
339
+ json_primitive (str | list): The JSON Schema primitive type to be converted.
340
+ format (str | None): The format of the JSON primitive type, if applicable.
341
+ enum (list | None): The list of enum values, if applicable.
342
+ record_name (str): The name of the record.
343
+ field_name (str): The name of the field.
344
+ namespace (str): The namespace of the type.
345
+ dependencies (list): The list of dependencies.
346
+ schema (dict): The complete schema object for analysis. Returns:
347
+ str | dict[str,Any] | list: The converted JSON Structure primitive type. """
348
+
349
+ if isinstance(json_primitive, list):
350
+ if enum:
351
+ # Handle enum with multiple types (convert to string enum)
352
+ return {
353
+ 'type': 'string',
354
+ 'enum': list(enum)
355
+ }
356
+ else:
357
+ # Handle union types
358
+ union_types = []
359
+ for item in json_primitive:
360
+ if isinstance(item, str):
361
+ converted = self.json_schema_primitive_to_structure_type(
362
+ item, format, None, record_name, field_name, namespace, dependencies, schema)
363
+ union_types.append(converted)
364
+ elif isinstance(item, dict):
365
+ item_format = item.get('format', format)
366
+ item_enum = item.get('enum', enum)
367
+ item_type = item.get('type', item)
368
+ converted = self.json_schema_primitive_to_structure_type(
369
+ item_type, item_format, item_enum, record_name, field_name, namespace, dependencies, item)
370
+ union_types.append(converted)
371
+ # Always wrap as {"type": [ ... ]} for unions
372
+ return {"type": self.flatten_union(union_types, None, field_name)}
373
+ # ...existing code...
374
+ structure_type = None
375
+
376
+ if json_primitive == 'string':
377
+ if format:
378
+ if format in ('date', 'time', 'date-time', 'duration'):
379
+ structure_type = self.detect_temporal_type({'type': 'string', 'format': format})
380
+ elif format == 'uuid':
381
+ structure_type = 'uuid'
382
+ elif format == 'byte':
383
+ structure_type = 'string' # Map bytes to string in JSON Structure elif format == 'binary':
384
+ structure_type = 'string' # Map binary to string in JSON Structure
385
+ else:
386
+ structure_type = 'string'
387
+ else:
388
+ structure_type = 'string'
389
+
390
+ elif json_primitive == 'integer':
391
+ structure_type = self.detect_numeric_type({**schema, 'type': 'integer', 'format': format})
392
+
393
+ elif json_primitive == 'number':
394
+ structure_type = self.detect_numeric_type({**schema, 'type': 'number', 'format': format})
395
+
396
+ elif json_primitive == 'boolean':
397
+ structure_type = 'boolean'
398
+
399
+ elif json_primitive == 'null':
400
+ structure_type = 'null'
401
+
402
+ else:
403
+ # Handle case where type is not specified but enum is present
404
+ if json_primitive is None and enum is not None:
405
+ # Default to string type for enums without explicit type
406
+ structure_type = 'string'
407
+ else: # Unknown type, keep as string reference
408
+ if isinstance(json_primitive, str):
409
+ dependencies.append(json_primitive)
410
+ structure_type = json_primitive or 'string' # Ensure we never return None # Always return proper schema objects, not simple strings
411
+ if isinstance(structure_type, str):
412
+ result: dict[str, Any] = {'type': structure_type}
413
+
414
+ # Ensure map and set types are complete
415
+ if structure_type == 'map':
416
+ result['values'] = {'type': 'any'} # Default values type per user instruction
417
+ elif structure_type == 'set':
418
+ result['items'] = {'type': 'any'} # Default items type per user instruction
419
+
420
+ # Handle enums
421
+ if enum is not None:
422
+ result['enum'] = list(enum)
423
+
424
+ # Add constraints for string types
425
+ if structure_type == 'string' and isinstance(schema, dict):
426
+ if 'maxLength' in schema:
427
+ result['maxLength'] = schema['maxLength']
428
+ if 'minLength' in schema:
429
+ result['minLength'] = schema['minLength']
430
+ if 'pattern' in schema:
431
+ result['pattern'] = schema['pattern']
432
+
433
+ # Add precision/scale for decimal types
434
+ elif structure_type == 'decimal' and isinstance(schema, dict):
435
+ if 'multipleOf' in schema:
436
+ # Try to infer precision/scale from multipleOf
437
+ multiple_str = str(schema['multipleOf'])
438
+ if '.' in multiple_str:
439
+ scale = len(multiple_str.split('.')[1])
440
+ result['scale'] = str(scale)
441
+
442
+ return result
443
+
444
+ # If already a dict or other complex type, return as-is
445
+ return structure_type
446
+
447
+ def _hoist_definition(self, schema, structure_schema, name_hint):
448
+ """
449
+ Hoist a compound schema to the top-level definitions and return a $ref.
450
+ """
451
+ if 'definitions' not in structure_schema:
452
+ structure_schema['definitions'] = {}
453
+ # Generate a unique name
454
+ base = avro_name(name_hint or 'UnionType')
455
+ idx = 1
456
+ name = base
457
+ while name in structure_schema['definitions']:
458
+ idx += 1
459
+ name = f"{base}{idx}"
460
+ schema = dict(schema) # Copy
461
+ schema['name'] = name
462
+ structure_schema['definitions'][name] = schema
463
+ return {'$ref': f"#/definitions/{name}"}
464
+
465
+ def _ensure_schema_object(self, value, structure_schema=None, name_hint=None, force_hoist_in_union=False):
466
+ """
467
+ Ensure that a value is wrapped as a proper JSON Structure schema object.
468
+
469
+ Args:
470
+ value: The value to wrap
471
+ structure_schema: The structure schema for hoisting definitions
472
+ name_hint: Hint for naming hoisted definitions
473
+ force_hoist_in_union: Whether to hoist complex types in union contexts
474
+
475
+ Returns:
476
+ A proper schema object with type or $ref
477
+ """ # Always return a schema object (dict with at least 'type' or '$ref') for use in properties/items/values
478
+ if isinstance(value, dict):
479
+ # Special handling for $ref
480
+ if '$ref' in value and len(value) == 1: # Pure $ref reference
481
+ if force_hoist_in_union:
482
+ # In union contexts, $ref should be returned as-is (not wrapped in type)
483
+ return value
484
+ else:
485
+ # In property contexts, wrap in type field for JSON Structure compliance
486
+ return {'type': value}
487
+
488
+ # Check if this dict has composition keywords - preserve as-is for JSON Structure conditional composition
489
+ if any(key in value for key in ['anyOf', 'oneOf', 'allOf']):
490
+ return value
491
+
492
+ # Ensure map types have values and set types have items
493
+ if value.get('type') == 'map' and 'values' not in value:
494
+ value = dict(value) # Create a copy to avoid modifying original
495
+ value['values'] = {'type': 'any'} # Default to 'any' as per user instruction
496
+ elif value.get('type') == 'set' and 'items' not in value:
497
+ value = dict(value) # Create a copy to avoid modifying original
498
+ value['items'] = {'type': 'any'} # Default to 'any' as per user instruction
499
+ # If force_hoist_in_union, check if this is a simple primitive type that should be extracted
500
+ if force_hoist_in_union and ('$ref' not in value):
501
+ # Check if this is a simple primitive type like {"type": "int32"}
502
+ if (len(value) == 1 and 'type' in value and
503
+ value['type'] in ['string', 'boolean', 'integer', 'number', 'null', 'int32', 'int64', 'float', 'double', 'decimal', 'uuid', 'date', 'time', 'datetime', 'duration', 'bytes']):
504
+ # Return the primitive type string directly for JSON Structure compliance
505
+ return value['type']
506
+ elif structure_schema is not None:
507
+ # For complex types, hoist to definitions
508
+ return self._hoist_definition(value, structure_schema, name_hint or 'UnionType')
509
+ return value
510
+ elif isinstance(value, str):
511
+ # Handle special cases where string primitives represent incomplete complex types
512
+ if value == 'map':
513
+ # Convert incomplete map type to complete structure
514
+ schema_obj = {'type': 'map', 'values': {'type': 'any'}}
515
+ elif value == 'set':
516
+ # Convert incomplete set type to complete structure
517
+ schema_obj = {'type': 'set', 'items': {'type': 'any'}}
518
+ else:
519
+ schema_obj = {'type': value}
520
+
521
+ # For JSON Structure unions, primitive types should be direct type strings, not hoisted
522
+ if force_hoist_in_union:
523
+ # Return the primitive type string directly for JSON Structure compliance
524
+ # But only for actual primitives, not for complex types like map/set
525
+ if value in ['string', 'boolean', 'integer', 'number', 'null', 'int32', 'int64', 'float', 'double', 'decimal', 'uuid', 'date', 'time', 'datetime', 'duration', 'bytes']:
526
+ return value
527
+ else:
528
+ # For complex types like map/set, return the complete schema object
529
+ return schema_obj
530
+ return schema_obj
531
+ elif isinstance(value, list):
532
+ # For unions, process each type appropriately
533
+ result = []
534
+ for idx, v in enumerate(value):
535
+ if isinstance(v, str):
536
+ # Primitive types in unions should be direct strings
537
+ result.append(v)
538
+ else:
539
+ # Complex types should be hoisted to definitions and referenced via $ref
540
+ obj = self._ensure_schema_object(v, structure_schema, f"{name_hint}_option_{idx}" if name_hint else None, force_hoist_in_union=True)
541
+ result.append(obj)
542
+ return {"type": result}
543
+ else:
544
+ return {'type': 'string'}
545
+
546
+ def _scan_for_uses(self, structure_schema: dict) -> list:
547
+ """
548
+ Scan the structure schema for extension feature usage and return the list of required $uses.
549
+ """
550
+ uses = set()
551
+ def scan(obj):
552
+ if isinstance(obj, dict):
553
+ for k, v in obj.items():
554
+ if k == 'altnames':
555
+ uses.add('JSONStructureAlternateNames')
556
+ if k in {'unit', 'currency', 'symbol'}:
557
+ uses.add('JSONStructureUnits')
558
+ if k in {'pattern', 'minLength', 'maxLength', 'minimum', 'maximum', 'exclusiveMinimum', 'exclusiveMaximum', 'multipleOf', 'const', 'enum', 'required', 'propertyNames', 'keyNames'}:
559
+ uses.add('JSONStructureValidation')
560
+ if k in {'if', 'then', 'else', 'dependentRequired', 'dependentSchemas', 'anyOf', 'allOf', 'oneOf', 'not'}:
561
+ uses.add('JSONStructureConditionalComposition')
562
+ scan(v)
563
+ elif isinstance(obj, list):
564
+ for item in obj:
565
+ scan(item)
566
+ scan(structure_schema)
567
+ return sorted(uses)
568
+
569
+ def _ensure_validation_extension_in_structure_schema(self, structure_schema) -> None:
570
+ """
571
+ Ensure that the JSONStructureValidation extension is included in the $uses array.
572
+ This is handled automatically by the _scan_for_uses method when propertyNames or keyNames are detected.
573
+
574
+ Args:
575
+ structure_schema: The structure schema to update (dict or list)
576
+ """
577
+ # No action needed - the _scan_for_uses method automatically detects
578
+ # propertyNames and keyNames and adds JSONStructureValidation to $uses
579
+ pass
580
+
581
+ def create_structure_object(self, properties: dict, required: list, record_name: str, namespace: str, dependencies: list, json_schema: dict, base_uri: str, structure_schema: dict, record_stack: list, recursion_depth: int = 1, original_schema: dict | None = None) -> dict:
582
+ """
583
+ Create a JSON Structure object type from properties.
584
+
585
+ Args:
586
+ properties (dict): The properties of the object
587
+ required (list): List of required property names
588
+ record_name (str): Name of the record
589
+ namespace (str): Namespace
590
+ dependencies (list): Dependencies list
591
+ json_schema (dict): The full JSON schema
592
+ base_uri (str): Base URI
593
+ structure_schema (list): Structure schema list
594
+ record_stack (list): Record stack for recursion detection
595
+ recursion_depth (int): Current recursion depth
596
+ original_schema (dict): The original JSON schema object containing additionalProperties
597
+
598
+ Returns:
599
+ dict: JSON Structure object definition """ # Create the basic structure object
600
+ structure_obj = {
601
+ 'type': 'object'
602
+ }
603
+
604
+ # Add required field if it's not empty
605
+ if required:
606
+ structure_obj['required'] = required
607
+
608
+ # Add name if provided
609
+ if record_name:
610
+ structure_obj['name'] = avro_name(record_name)
611
+
612
+ # Initialize properties dict only if we have properties to add
613
+ has_properties = bool(properties)
614
+ if has_properties:
615
+ structure_obj['properties'] = {}
616
+
617
+ # Process regular properties
618
+ for prop_name, prop_schema in properties.items():
619
+ prop_type = self.json_type_to_structure_type(
620
+ prop_schema, record_name, prop_name, namespace, dependencies,
621
+ json_schema, base_uri, structure_schema, record_stack, recursion_depth + 1
622
+ )
623
+ # Normalize property name if needed
624
+ if not self.is_valid_identifier(prop_name):
625
+ normalized_name = self.normalize_identifier(prop_name)
626
+ prop_entry = self._ensure_schema_object(prop_type, structure_schema, normalized_name)
627
+ # Always create a new dict to add altnames
628
+ new_entry = {}
629
+ if isinstance(prop_entry, dict):
630
+ new_entry.update(prop_entry)
631
+ else:
632
+ new_entry['type'] = prop_entry
633
+ new_entry['altnames'] = {'json': prop_name}
634
+ structure_obj['properties'][normalized_name] = new_entry
635
+ else:
636
+ structure_obj['properties'][prop_name] = self._ensure_schema_object(prop_type, structure_schema, prop_name) # Handle patternProperties and additionalProperties
637
+ has_additional_schema = False
638
+ if original_schema:
639
+ # Check for patternProperties that coexist with properties/additionalProperties
640
+ pattern_properties = original_schema.get('patternProperties')
641
+ additional_props = original_schema.get('additionalProperties')
642
+ # Special case: multiple patternProperties with no properties
643
+ # Should create a type union of maps, not a single object with anyOf
644
+ # This applies whether additionalProperties is false OR a schema
645
+ if (pattern_properties and len(pattern_properties) > 1 and
646
+ (not 'properties' in original_schema or not original_schema['properties'])):
647
+ # Return type union of maps instead of object
648
+ return self.create_pattern_union_maps(
649
+ pattern_properties, additional_props, record_name, namespace, dependencies,
650
+ json_schema, base_uri, structure_schema, record_stack, recursion_depth
651
+ )
652
+
653
+ # Merge patternProperties into additionalProperties if both exist
654
+ if pattern_properties and ('properties' in original_schema or additional_props is not None):
655
+ # patternProperties coexists with properties/additionalProperties - merge into additionalProperties # Get the pattern schema for values (merge all pattern schemas)
656
+ if len(pattern_properties) == 1:
657
+ pattern_schema = list(pattern_properties.values())[0]
658
+ else:
659
+ # Multiple patterns - create a union type instead of anyOf
660
+ schemas = list(pattern_properties.values())
661
+ # Convert each schema and create a proper union
662
+ converted_schemas = []
663
+ for idx, schema in enumerate(schemas):
664
+ converted_schema = self.json_type_to_structure_type(
665
+ schema, record_name, f'pattern_{idx}', namespace, dependencies,
666
+ json_schema, base_uri, structure_schema, record_stack, recursion_depth + 1
667
+ )
668
+ converted_schemas.append(converted_schema)
669
+
670
+ if len(converted_schemas) == 1:
671
+ pattern_schema = converted_schemas[0]
672
+ else:
673
+ # Create union type array - hoist compound types if needed
674
+ hoisted_schemas = []
675
+ for idx, schema in enumerate(converted_schemas):
676
+ hoisted_schema = self._ensure_schema_object(schema, structure_schema, f'pattern_{idx}', force_hoist_in_union=True)
677
+ hoisted_schemas.append(hoisted_schema)
678
+ pattern_schema = {'type': hoisted_schemas}
679
+
680
+ if additional_props is False:
681
+ # Override false additionalProperties with pattern schema
682
+ merged_additional = pattern_schema
683
+ elif additional_props is True:
684
+ # Keep true (allow any additional properties)
685
+ merged_additional = True
686
+ elif isinstance(additional_props, dict):
687
+ # Merge both schemas using a union type instead of anyOf
688
+ additional_converted = self.json_type_to_structure_type(
689
+ additional_props, record_name, 'additional', namespace, dependencies,
690
+ json_schema, base_uri, structure_schema, record_stack, recursion_depth + 1
691
+ )
692
+
693
+ # Create union of additional props and pattern schema
694
+ additional_hoisted = self._ensure_schema_object(additional_converted, structure_schema, 'additional', force_hoist_in_union=True)
695
+ pattern_hoisted = self._ensure_schema_object(pattern_schema, structure_schema, 'pattern', force_hoist_in_union=True)
696
+
697
+ merged_additional = {
698
+ 'type': [additional_hoisted, pattern_hoisted]
699
+ }
700
+ elif additional_props is None:
701
+ # No additionalProperties, use pattern schema
702
+ merged_additional = pattern_schema
703
+ else:
704
+ merged_additional = pattern_schema
705
+ # Convert merged schema to structure type
706
+ if merged_additional is not True and isinstance(merged_additional, dict):
707
+ additional_type = self.json_type_to_structure_type(
708
+ merged_additional, record_name, 'additionalProperty', namespace, dependencies,
709
+ json_schema, base_uri, structure_schema, record_stack, recursion_depth + 1
710
+ )
711
+ structure_obj['additionalProperties'] = self._ensure_schema_object(additional_type, structure_schema, 'additionalProperty', force_hoist_in_union=True)
712
+ has_additional_schema = True
713
+ elif merged_additional is True:
714
+ structure_obj['additionalProperties'] = True
715
+ has_additional_schema = True
716
+ # Add propertyNames validation for the patterns
717
+ patterns = list(pattern_properties.keys())
718
+ if len(patterns) == 1:
719
+ # Single pattern - use it directly
720
+ pattern = patterns[0]
721
+ structure_obj['propertyNames'] = {
722
+ "type": "string",
723
+ "pattern": pattern
724
+ }
725
+ else:
726
+ # Multiple patterns - in JSON Structure, we cannot use anyOf for propertyNames
727
+ # Skip propertyNames validation when there are multiple patterns
728
+ # The patterns are already handled via the merged additionalProperties schema
729
+ pass
730
+
731
+ # Ensure $uses includes JSONStructureValidation
732
+ self._ensure_validation_extension_in_structure_schema(structure_schema)
733
+
734
+ elif additional_props is not None and additional_props is not False:
735
+ # Handle additionalProperties without patternProperties
736
+ if isinstance(additional_props, dict):
737
+ # Convert the additionalProperties schema to JSON Structure type
738
+ additional_type = self.json_type_to_structure_type(
739
+ additional_props, record_name, 'additionalProperty', namespace, dependencies,
740
+ json_schema, base_uri, structure_schema, record_stack, recursion_depth + 1
741
+ )
742
+ structure_obj['additionalProperties'] = self._ensure_schema_object(additional_type, structure_schema, 'additionalProperty')
743
+ has_additional_schema = True
744
+ elif additional_props is True:
745
+ # True means any additional properties are allowed with any type
746
+ structure_obj['additionalProperties'] = True
747
+ has_additional_schema = True
748
+
749
+ # For JSON Structure compliance: If we have no properties and no additionalProperties/extension,
750
+ # add a default additionalProperties to make the object schema valid
751
+ if not has_properties and not has_additional_schema and '$extends' not in structure_obj:
752
+ # Add default additionalProperties to make the object valid per JSON Structure spec
753
+ structure_obj['additionalProperties'] = True
754
+
755
+ return structure_obj
756
+
757
+ def create_structure_choice(self, discriminator_info: dict, oneof_options: list, record_name: str, namespace: str, dependencies: list, json_schema: dict, base_uri: str, structure_schema: dict, record_stack: list, recursion_depth: int = 1) -> dict:
758
+ """
759
+ Create a JSON Structure choice type from discriminator pattern.
760
+
761
+ Args:
762
+ discriminator_info (dict): Discriminator information
763
+ oneof_options (list): List of oneOf options
764
+ record_name (str): Name of the record
765
+ namespace (str): Namespace
766
+ dependencies (list): Dependencies list
767
+ json_schema (dict): The full JSON schema
768
+ base_uri (str): Base URI
769
+ structure_schema (list): Structure schema list
770
+ record_stack (list): Record stack for recursion detection
771
+ recursion_depth (int): Current recursion depth
772
+
773
+ Returns:
774
+ dict: JSON Structure choice definition
775
+ """
776
+ # Handle both 'property' and 'propertyName' keys for compatibility
777
+ discriminator_property = discriminator_info.get('property') or discriminator_info.get('propertyName')
778
+ mapping = discriminator_info.get('mapping', {})
779
+
780
+ choice_obj = {
781
+ 'type': 'choice',
782
+ 'selector': discriminator_property,
783
+ 'choices': {}
784
+ }
785
+
786
+ if record_name:
787
+ choice_obj['name'] = avro_name(record_name)
788
+
789
+ # Build reverse mapping from $ref to choice key
790
+ ref_to_key = {}
791
+ for key, ref in mapping.items():
792
+ ref_to_key[ref] = key
793
+
794
+ # Process each choice option
795
+ for i, option in enumerate(oneof_options):
796
+ if '$ref' in option:
797
+ # Handle reference - use mapping to get the choice key
798
+ ref = option['$ref']
799
+ if ref in ref_to_key:
800
+ choice_key = ref_to_key[ref]
801
+ else:
802
+ # Extract name from reference
803
+ if ref.startswith('#/definitions/'):
804
+ choice_key = ref[14:] # Remove '#/definitions/' prefix
805
+ else:
806
+ choice_key = f"option_{i}"
807
+ choice_obj['choices'][choice_key] = {'$ref': ref}
808
+ else:
809
+ # Convert option to structure type
810
+ choice_key = f"option_{i}"
811
+ choice_type = self.json_type_to_structure_type(
812
+ option, record_name, f"choice_{i}", namespace, dependencies,
813
+ json_schema, base_uri, structure_schema, record_stack, recursion_depth + 1
814
+ )
815
+ choice_obj['choices'][choice_key] = choice_type
816
+
817
+ return choice_obj
818
+
819
+ def create_structure_map(self, values_schema: dict, record_name: str, namespace: str, dependencies: list, json_schema: dict, base_uri: str, structure_schema: dict, record_stack: list, recursion_depth: int = 1) -> dict:
820
+ """
821
+ Create a JSON Structure map type.
822
+
823
+ Args:
824
+ values_schema (dict): Schema for map values
825
+ record_name (str): Name of the record
826
+ namespace (str): Namespace
827
+ dependencies (list): Dependencies list
828
+ json_schema (dict): The full JSON schema
829
+ base_uri (str): Base URI
830
+ structure_schema (list): Structure schema list
831
+ record_stack (list): Record stack for recursion detection
832
+ recursion_depth (int): Current recursion depth
833
+ Returns:
834
+ dict: JSON Structure map definition
835
+ """
836
+ values_type = self.json_type_to_structure_type(
837
+ values_schema, record_name, 'value', namespace, dependencies,
838
+ json_schema, base_uri, structure_schema, record_stack, recursion_depth + 1
839
+ )
840
+ # Always wrap as schema object values_type = self._ensure_schema_object(values_type, structure_schema, 'value')
841
+ return {
842
+ 'type': 'map',
843
+ 'values': values_type
844
+ }
845
+
846
+ def create_structure_map_with_pattern(self, values_schema: dict, pattern_properties: dict, record_name: str, namespace: str, dependencies: list, json_schema: dict, base_uri: str, structure_schema: dict, record_stack: list, recursion_depth: int = 1) -> dict:
847
+ """
848
+ Create a JSON Structure map type with pattern validation using keyNames.
849
+
850
+ Args:
851
+ values_schema (dict): Schema for map values
852
+ pattern_properties (dict): The patternProperties object with patterns as keys
853
+ record_name (str): Name of the record
854
+ namespace (str): Namespace
855
+ dependencies (list): Dependencies list
856
+ json_schema (dict): The full JSON schema
857
+ base_uri (str): Base URI
858
+ structure_schema (list): Structure schema list
859
+ record_stack (list): Record stack for recursion detection
860
+ recursion_depth (int): Current recursion depth
861
+
862
+ Returns:
863
+ dict: JSON Structure map definition with keyNames validation
864
+ """
865
+ map_result = self.create_structure_map(
866
+ values_schema, record_name, namespace, dependencies,
867
+ json_schema, base_uri, structure_schema, record_stack, recursion_depth
868
+ )
869
+
870
+ if pattern_properties and len(pattern_properties) > 0:
871
+ # Extract patterns and create keyNames validation schema
872
+ patterns = list(pattern_properties.keys())
873
+
874
+ if len(patterns) == 1:
875
+ # Single pattern - use it directly
876
+ pattern = patterns[0]
877
+ map_result['keyNames'] = {
878
+ "type": "string",
879
+ "pattern": pattern
880
+ }
881
+ else:
882
+ # Multiple patterns - combine with anyOf
883
+ pattern_schemas = []
884
+ for pattern in patterns:
885
+ pattern_schemas.append({
886
+ "type": "string",
887
+ "pattern": pattern
888
+ })
889
+ map_result['keyNames'] = {
890
+ "anyOf": pattern_schemas
891
+ }
892
+
893
+ # Ensure $uses includes JSONStructureValidation
894
+ self._ensure_validation_extension_in_structure_schema(structure_schema)
895
+
896
+ return map_result
897
+
898
+ def create_structure_array_or_set(self, items_schema: dict, is_set: bool, record_name: str, namespace: str, dependencies: list, json_schema: dict, base_uri: str, structure_schema: dict, record_stack: list, recursion_depth: int = 1) -> dict:
899
+ """
900
+ Create a JSON Structure array or set type.
901
+
902
+ Args:
903
+ items_schema (dict): Schema for array/set items
904
+ is_set (bool): True for set, False for array
905
+ record_name (str): Name of the record
906
+ namespace (str): Namespace
907
+ dependencies (list): Dependencies list
908
+ json_schema (dict): The full JSON schema
909
+ base_uri (str): Base URI
910
+ structure_schema (list): Structure schema list
911
+ record_stack (list): Record stack for recursion detection
912
+ recursion_depth (int): Current recursion depth
913
+
914
+ Returns:
915
+ dict: JSON Structure array/set definition
916
+ """
917
+ items_type = self.json_type_to_structure_type(
918
+ items_schema, record_name, 'item', namespace, dependencies,
919
+ json_schema, base_uri, structure_schema, record_stack, recursion_depth + 1
920
+ )
921
+ # Always wrap as schema object
922
+ items_type = self._ensure_schema_object(items_type, structure_schema, 'item')
923
+ return {
924
+ 'type': 'set' if is_set else 'array',
925
+ 'items': items_type
926
+ }
927
+
928
+ def _process_array_type(self, json_type: dict, record_name: str, field_name: str, namespace: str,
929
+ dependencies: list, json_schema: dict, base_uri: str,
930
+ structure_schema: dict, record_stack: list, recursion_depth: int) -> dict:
931
+ """
932
+ Process an array type schema into JSON Structure format.
933
+
934
+ Args:
935
+ json_type: The JSON Schema with type: "array"
936
+ record_name: Name of the containing record
937
+ field_name: Name of the field
938
+ namespace: Namespace
939
+ dependencies: Dependencies list
940
+ json_schema: Full JSON schema
941
+ base_uri: Base URI
942
+ structure_schema: Structure schema being built
943
+ record_stack: Record stack for recursion detection
944
+ recursion_depth: Current recursion depth
945
+
946
+ Returns:
947
+ dict: JSON Structure array definition
948
+ """
949
+ items_schema = json_type.get('items', {})
950
+ is_set = json_type.get('uniqueItems', False)
951
+ return self.create_structure_array_or_set(
952
+ items_schema, is_set, record_name, namespace, dependencies,
953
+ json_schema, base_uri, structure_schema, record_stack, recursion_depth + 1
954
+ )
955
+
956
+ def _process_object_type(self, json_type: dict, record_name: str, field_name: str, namespace: str,
957
+ dependencies: list, json_schema: dict, base_uri: str,
958
+ structure_schema: dict, record_stack: list, recursion_depth: int) -> dict:
959
+ """
960
+ Process an object type schema into JSON Structure format.
961
+
962
+ Args:
963
+ json_type: The JSON Schema with type: "object"
964
+ record_name: Name of the containing record
965
+ field_name: Name of the field
966
+ namespace: Namespace
967
+ dependencies: Dependencies list
968
+ json_schema: Full JSON schema
969
+ base_uri: Base URI
970
+ structure_schema: Structure schema being built
971
+ record_stack: Record stack for recursion detection
972
+ recursion_depth: Current recursion depth
973
+
974
+ Returns:
975
+ dict: JSON Structure object definition
976
+ """
977
+ properties = json_type.get('properties', {})
978
+ required = json_type.get('required', [])
979
+
980
+ structure_properties = {}
981
+ for prop_name, prop_schema in properties.items():
982
+ prop_type = self.json_type_to_structure_type(
983
+ prop_schema, record_name, prop_name, namespace, dependencies,
984
+ json_schema, base_uri, structure_schema, record_stack, recursion_depth + 1
985
+ )
986
+ prop_type = self._ensure_schema_object(prop_type, structure_schema, prop_name)
987
+ structure_properties[prop_name] = prop_type
988
+
989
+ result = {
990
+ 'type': 'object',
991
+ 'properties': structure_properties
992
+ }
993
+
994
+ if required:
995
+ result['required'] = required
996
+
997
+ return result
998
+
999
+ def add_alternate_names(self, structure: dict, original_name: str) -> dict:
1000
+ """
1001
+ Add alternate names for different naming conventions.
1002
+
1003
+ Args:
1004
+ structure (dict): The structure definition
1005
+ original_name (str): The original property/type name
1006
+
1007
+ Returns:
1008
+ dict: Structure with altnames added
1009
+ """
1010
+ if not original_name:
1011
+ return structure
1012
+
1013
+ altnames = {}
1014
+
1015
+ # Add camelCase if original is snake_case
1016
+ if '_' in original_name:
1017
+ camel_case = ''.join(word.capitalize() if i > 0 else word
1018
+ for i, word in enumerate(original_name.split('_')))
1019
+ altnames['camelCase'] = camel_case
1020
+ # Add snake_case if original is camelCase
1021
+ elif any(c.isupper() for c in original_name):
1022
+ snake_case = re.sub('([a-z0-9])([A-Z])', r'\1_\2', original_name).lower()
1023
+ altnames['snake_case'] = snake_case
1024
+
1025
+ if altnames:
1026
+ if not isinstance(structure, dict):
1027
+ structure = {'type': structure}
1028
+ structure['altnames'] = altnames
1029
+
1030
+ return structure
1031
+
1032
+ def add_validation_constraints(self, structure: dict, schema: dict) -> dict:
1033
+ """
1034
+ Convert JSON Schema validation constraints to JSON Structure format.
1035
+
1036
+ Args:
1037
+ structure (dict): The structure definition
1038
+ schema (dict): The original JSON schema
1039
+
1040
+ Returns:
1041
+ dict: Structure with validation constraints added
1042
+ """
1043
+ if not isinstance(structure, dict):
1044
+ structure = {'type': structure}
1045
+
1046
+ # Copy validation constraints
1047
+ validation_keys = [
1048
+ 'minimum', 'maximum', 'exclusiveMinimum', 'exclusiveMaximum',
1049
+ 'minLength', 'maxLength', 'pattern', 'minItems', 'maxItems',
1050
+ 'const', 'enum'
1051
+ ]
1052
+
1053
+ # Check if this is an int64 type
1054
+ is_int64 = structure.get('type') == 'int64'
1055
+
1056
+ for key in validation_keys:
1057
+ if key in schema:
1058
+ value = schema[key]
1059
+ # For int64 types, convert numeric minimum/maximum values to strings
1060
+ if is_int64 and key in ('minimum', 'maximum', 'exclusiveMinimum', 'exclusiveMaximum') and isinstance(value, (int, float)):
1061
+ structure[key] = str(int(value))
1062
+ else:
1063
+ structure[key] = value
1064
+
1065
+ return structure
1066
+
1067
+ def ensure_object_compliance(self, structure: dict) -> dict:
1068
+ """
1069
+ Ensure that object types comply with JSON Structure spec requirements.
1070
+
1071
+ Args:
1072
+ structure (dict): The structure definition
1073
+
1074
+ Returns:
1075
+ dict: Structure with JSON Structure compliance ensured
1076
+ """
1077
+ if not isinstance(structure, dict) or structure.get('type') != 'object':
1078
+ return structure
1079
+
1080
+ # Check if this object type needs properties to be compliant
1081
+ has_properties = 'properties' in structure and structure['properties']
1082
+ has_additional_props = 'additionalProperties' in structure
1083
+ has_extensions = '$extends' in structure
1084
+
1085
+ # If object has empty properties and no additionalProperties or extensions,
1086
+ # add additionalProperties: true to make it compliant
1087
+ if 'properties' in structure and not has_properties and not has_additional_props and not has_extensions:
1088
+ structure['additionalProperties'] = True
1089
+
1090
+ return structure
1091
+
1092
+ def flatten_union(self, type_list: list, structure_schema=None, name_hint=None) -> list:
1093
+ """
1094
+ Flatten the list of types in a union into a single list.
1095
+
1096
+ JSON Structure Core requires union members to be either:
1097
+ - Simple type strings (e.g., "string", "null", "boolean")
1098
+ - References to definitions (e.g., {"$ref": "#/definitions/Foo"})
1099
+
1100
+ Inline compound types or primitives with constraints must be hoisted to definitions.
1101
+
1102
+ Args:
1103
+ type_list (list): The list of types in a union.
1104
+ structure_schema: The structure schema for hoisting definitions.
1105
+ name_hint: Hint for naming hoisted definitions.
1106
+
1107
+ Returns:
1108
+ list: The flattened list of types.
1109
+ """
1110
+ flat_list = []
1111
+ simple_primitives = {'string', 'boolean', 'integer', 'number', 'null', 'int8', 'int16',
1112
+ 'int32', 'int64', 'float', 'double', 'bytes', 'uuid', 'datetime',
1113
+ 'date', 'time', 'duration', 'decimal'}
1114
+
1115
+ for idx, t in enumerate(type_list):
1116
+ if isinstance(t, list):
1117
+ inner = self.flatten_union(t, structure_schema, name_hint)
1118
+ for u in inner:
1119
+ obj = self._ensure_schema_object(u, structure_schema, f"{name_hint}_option_{idx}" if name_hint else None, force_hoist_in_union=True)
1120
+ if obj not in flat_list:
1121
+ flat_list.append(obj)
1122
+ elif isinstance(t, str):
1123
+ # Simple type string - use directly
1124
+ if t not in flat_list:
1125
+ flat_list.append(t)
1126
+ elif isinstance(t, dict):
1127
+ # Check if it's a simple primitive (type only, no constraints)
1128
+ if '$ref' in t:
1129
+ # Reference - use directly
1130
+ if t not in flat_list:
1131
+ flat_list.append(t)
1132
+ elif 'type' in t and t['type'] in simple_primitives and len(t) == 1:
1133
+ # Simple primitive type object like {"type": "boolean"} - extract the type string
1134
+ if t['type'] not in flat_list:
1135
+ flat_list.append(t['type'])
1136
+ elif 'type' in t and t['type'] in simple_primitives:
1137
+ # Primitive with constraints (e.g., {type: "string", maxLength: 280})
1138
+ # This needs to be hoisted because inline constraints not allowed in union
1139
+ if structure_schema is not None:
1140
+ hoisted = self._hoist_definition(t, structure_schema, f"{name_hint or 'union'}_{t['type']}")
1141
+ if hoisted not in flat_list:
1142
+ flat_list.append(hoisted)
1143
+ else:
1144
+ # No structure schema for hoisting - fallback to just the type
1145
+ if t['type'] not in flat_list:
1146
+ flat_list.append(t['type'])
1147
+ elif 'type' in t and t['type'] in ('array', 'object', 'set', 'map', 'choice'):
1148
+ # Compound type - must be hoisted
1149
+ if structure_schema is not None:
1150
+ hoisted = self._hoist_definition(t, structure_schema, f"{name_hint or 'union'}_{t['type']}")
1151
+ if hoisted not in flat_list:
1152
+ flat_list.append(hoisted)
1153
+ else:
1154
+ # No structure schema - use as is (will likely fail validation)
1155
+ obj = self._ensure_schema_object(t, structure_schema, f"{name_hint}_option_{idx}" if name_hint else None, force_hoist_in_union=True)
1156
+ if obj not in flat_list:
1157
+ flat_list.append(obj)
1158
+ else:
1159
+ # Other dict structure - use normal processing
1160
+ obj = self._ensure_schema_object(t, structure_schema, f"{name_hint}_option_{idx}" if name_hint else None, force_hoist_in_union=True)
1161
+ if obj not in flat_list:
1162
+ flat_list.append(obj)
1163
+ else:
1164
+ # Unknown type - use normal processing
1165
+ obj = self._ensure_schema_object(t, structure_schema, f"{name_hint}_option_{idx}" if name_hint else None, force_hoist_in_union=True)
1166
+ if obj not in flat_list:
1167
+ flat_list.append(obj)
1168
+ return flat_list
1169
+
1170
+ def merge_structure_schemas(self, schemas: list, structure_schemas: list, type_name: str | None = None, deps: List[str] = []) -> str | list | dict:
1171
+ """Merge multiple JSON Structure type schemas into one."""
1172
+
1173
+ if len(schemas) == 1:
1174
+ return schemas[0]
1175
+
1176
+ merged_schema: dict = {}
1177
+ if type_name:
1178
+ merged_schema['name'] = type_name
1179
+
1180
+ for schema in schemas:
1181
+ schema = copy.deepcopy(schema)
1182
+ if isinstance(schema, dict) and 'dependencies' in schema:
1183
+ deps1: List[str] = merged_schema.get('dependencies', [])
1184
+ deps1.extend(schema['dependencies'])
1185
+ merged_schema['dependencies'] = deps1
1186
+
1187
+ if isinstance(schema, str):
1188
+ # Simple type reference
1189
+ if 'type' not in merged_schema:
1190
+ merged_schema['type'] = schema
1191
+ elif merged_schema['type'] != schema:
1192
+ # Type conflict, create union
1193
+ if not isinstance(merged_schema['type'], list):
1194
+ merged_schema['type'] = [merged_schema['type']]
1195
+ if schema not in merged_schema['type']:
1196
+ merged_schema['type'].append(schema)
1197
+
1198
+ elif isinstance(schema, dict):
1199
+ # Merge object schemas
1200
+ for key, value in schema.items():
1201
+ if key == 'properties' and 'properties' in merged_schema:
1202
+ # Merge properties
1203
+ for prop_name, prop_schema in value.items():
1204
+ if prop_name in merged_schema['properties']:
1205
+ # Property exists, merge types
1206
+ existing = merged_schema['properties'][prop_name]
1207
+ merged_schema['properties'][prop_name] = self.merge_structure_schemas(
1208
+ [existing, prop_schema], structure_schemas, None, deps)
1209
+ else:
1210
+ merged_schema['properties'][prop_name] = prop_schema
1211
+ elif key == 'required' and 'required' in merged_schema:
1212
+ # Merge required arrays
1213
+ merged_schema['required'] = list(set(merged_schema['required'] + value))
1214
+ else:
1215
+ merged_schema[key] = value
1216
+
1217
+ return merged_schema
1218
+
1219
+ def json_type_to_structure_type(self, json_type: str | dict, record_name: str, field_name: str, namespace: str, dependencies: list, json_schema: dict, base_uri: str, structure_schema: dict, record_stack: list, recursion_depth=1) -> dict | list | str:
1220
+ """Convert a JSON Schema type to JSON Structure type."""
1221
+
1222
+ try:
1223
+ if recursion_depth >= self.max_recursion_depth:
1224
+ print(f'WARNING: Maximum recursion depth reached for {record_name} at field {field_name}')
1225
+ return 'string' # Fallback to string instead of generic_type() structure_type: dict = {}
1226
+ local_name = avro_name(field_name if field_name else record_name)
1227
+
1228
+ if isinstance(json_type, str):
1229
+ # Simple type reference
1230
+ return self.json_schema_primitive_to_structure_type(
1231
+ json_type, None, None, record_name, field_name, namespace, dependencies, {})
1232
+
1233
+ if isinstance(json_type, dict): # Handle inheritance pattern first (only if inheritance detection is enabled)
1234
+ inheritance_info = self.detect_inheritance_pattern(json_type, record_name)
1235
+ if inheritance_info:
1236
+ base_ref = inheritance_info['base_ref']
1237
+ extension = inheritance_info['extension']
1238
+ # Create abstract base type name
1239
+ if base_ref.startswith('#/definitions/'):
1240
+ base_type_name = base_ref[14:] # Remove '#/definitions/'
1241
+ abstract_base_name = avro_name(f"{base_type_name}Base")
1242
+ else:
1243
+ # Handle external references or other formats
1244
+ abstract_base_name = avro_name(f"{record_name}Base")
1245
+
1246
+ # Ensure the abstract base type exists
1247
+ self._ensure_abstract_base_type(base_ref, abstract_base_name, structure_schema, json_schema, base_uri)
1248
+
1249
+ structure_type = {
1250
+ 'type': 'object',
1251
+ 'name': record_name,
1252
+ '$extends': f"#/definitions/{abstract_base_name}"
1253
+ }
1254
+ if 'properties' in extension and extension['properties']:
1255
+ structure_type['properties'] = {}
1256
+ for prop_name, prop_schema in extension['properties'].items():
1257
+ prop_type = self.json_type_to_structure_type(
1258
+ prop_schema, record_name, prop_name, namespace, dependencies,
1259
+ json_schema, base_uri, structure_schema, record_stack, recursion_depth + 1
1260
+ )
1261
+ # Normalize property name if needed
1262
+ if not self.is_valid_identifier(prop_name):
1263
+ normalized_name = self.normalize_identifier(prop_name)
1264
+ prop_entry = self._ensure_schema_object(prop_type, structure_schema, normalized_name)
1265
+ # Always create a new dict to add altnames
1266
+ new_entry = {}
1267
+ if isinstance(prop_entry, dict):
1268
+ new_entry.update(prop_entry)
1269
+ else:
1270
+ new_entry['type'] = prop_entry
1271
+ new_entry['altnames'] = {'json': prop_name}
1272
+ structure_type['properties'][normalized_name] = new_entry
1273
+ else:
1274
+ structure_type['properties'][prop_name] = self._ensure_schema_object(prop_type, structure_schema, prop_name)
1275
+
1276
+ if 'required' in extension:
1277
+ structure_type['required'] = extension['required']
1278
+
1279
+ # Copy other extension properties (validation constraints, etc.)
1280
+ for key, value in extension.items():
1281
+ if key not in ['properties', 'required', 'type']:
1282
+ structure_type[key] = value
1283
+
1284
+ # Apply any remaining validation constraints from the original schema
1285
+ structure_type = self.add_validation_constraints(structure_type, json_type)
1286
+
1287
+ return structure_type
1288
+
1289
+ # Handle discriminator pattern
1290
+ discriminator_info = self.detect_discriminator_pattern(json_type)
1291
+ if discriminator_info and 'oneOf' in json_type:
1292
+ return self.create_structure_choice(
1293
+ discriminator_info, json_type['oneOf'], record_name, namespace,
1294
+ dependencies, json_schema, base_uri, structure_schema, record_stack, recursion_depth
1295
+ ) # Handle $ref first (before checking for type)
1296
+ if '$ref' in json_type:
1297
+ ref = json_type['$ref']
1298
+ # Normalize references to use definitions instead of $defs
1299
+ if ref.startswith('#/$defs/'):
1300
+ ref = ref.replace('#/$defs/', '#/definitions/')
1301
+ elif ref.startswith('#/definitions/'):
1302
+ # Already correct format
1303
+ pass
1304
+
1305
+ # Handle nested JSON Pointer references like #/definitions/pipelineCommon/execution
1306
+ if '/' in ref.split('#/definitions/')[-1] and ref.startswith('#/definitions/'):
1307
+ try:
1308
+ # Resolve the nested JSON Pointer reference
1309
+ resolved_schema, _ = self.resolve_reference(json_type, base_uri, json_schema)
1310
+ if resolved_schema != json_type:
1311
+ # We successfully resolved a nested reference, process the resolved schema # Create a new definition name based on the nested path
1312
+ ref_parts = ref.split('/')
1313
+ if len(ref_parts) >= 4: # ['#', 'definitions', 'parent', 'child', ...]
1314
+ parent_name = ref_parts[2]
1315
+ child_path = '/'.join(ref_parts[3:])
1316
+ new_def_name = avro_name(f"{parent_name}_{child_path.replace('/', '_')}")
1317
+
1318
+ # Process the resolved schema recursively
1319
+ converted_schema = self.json_type_to_structure_type(
1320
+ resolved_schema, new_def_name, field_name, namespace, dependencies,
1321
+ json_schema, base_uri, structure_schema, record_stack, recursion_depth + 1
1322
+ )
1323
+
1324
+ # Check if we need to add to definitions
1325
+ if isinstance(converted_schema, dict) and converted_schema.get('type') in ['object', 'array', 'choice']:
1326
+ # Set name for the definition and add to structure_schema definitions
1327
+ converted_schema['name'] = new_def_name
1328
+ if 'definitions' not in structure_schema:
1329
+ structure_schema['definitions'] = {}
1330
+ structure_schema['definitions'][new_def_name] = converted_schema
1331
+ return {'$ref': f"#/definitions/{new_def_name}"}
1332
+ else:
1333
+ # For simple types, return the converted schema directly
1334
+ return converted_schema
1335
+ except Exception as e:
1336
+ # If resolution fails, fall back to original reference
1337
+ print(f"Failed to resolve nested reference {ref}: {e}")
1338
+ pass
1339
+
1340
+ # Check if we need to use type registry to normalize the reference
1341
+ # Extract the definition name from the reference
1342
+ if ref.startswith('#/definitions/'):
1343
+ def_name = ref[14:] # Remove '#/definitions/' prefix
1344
+ if def_name in self.type_registry:
1345
+ # Use the normalized reference from the registry
1346
+ ref = self.type_registry[def_name]
1347
+
1348
+ return {'$ref': ref}
1349
+
1350
+ # Handle schemas without explicit type
1351
+ if json_type.get('type') is None:
1352
+ if 'enum' in json_type:
1353
+ # Enum-only schema - default to string type
1354
+ enum_values = json_type.get('enum')
1355
+ structure_type = self.json_schema_primitive_to_structure_type(
1356
+ 'string', json_type.get('format'), enum_values, record_name, field_name, namespace, dependencies, json_type
1357
+ )
1358
+ if isinstance(structure_type, dict):
1359
+ structure_type = self.add_validation_constraints(structure_type, json_type)
1360
+ return structure_type
1361
+ elif 'properties' in json_type or 'additionalProperties' in json_type or 'patternProperties' in json_type:
1362
+ # Object schema without explicit type - treat as object
1363
+ # Apply constraint composition conversion if applicable
1364
+ effective_schema = self._convert_constraint_composition_to_required(json_type)
1365
+ properties = effective_schema.get('properties', {})
1366
+ required = effective_schema.get('required', [])
1367
+ return self.create_structure_object(
1368
+ properties, required, record_name, namespace, dependencies,
1369
+ json_schema, base_uri, structure_schema, record_stack, recursion_depth, effective_schema
1370
+ )
1371
+ elif self.has_composition_keywords(json_type):
1372
+ # Handle composition keywords without explicit type
1373
+ # Continue to composition handling below instead of returning empty object
1374
+ pass
1375
+ else:
1376
+ # Other schema without type - default to generic object
1377
+ # Create a generic object that allows any properties
1378
+ return {
1379
+ 'type': 'object',
1380
+ 'properties': {} }
1381
+
1382
+ # Handle type arrays (e.g., ["integer", "null"] for nullable in JSON Schema 2020-12/OpenAPI 3.1)
1383
+ if json_type.get('type') and isinstance(json_type['type'], list):
1384
+ type_list = json_type['type']
1385
+ format_hint = json_type.get('format')
1386
+ enum_values = json_type.get('enum')
1387
+
1388
+ # Check if any type in the list is a compound type (array, object)
1389
+ compound_types = {'array', 'object'}
1390
+ has_compound = any(t in compound_types for t in type_list if isinstance(t, str))
1391
+
1392
+ if has_compound:
1393
+ # For compound types, we need to process each variant separately
1394
+ # and create a proper union. Compound types must be hoisted to definitions
1395
+ # because JSON Structure Core doesn't allow inline compound types in unions.
1396
+ union_members = []
1397
+ for t in type_list:
1398
+ if t == 'null':
1399
+ union_members.append('null')
1400
+ elif t == 'array':
1401
+ # Process array with the full schema context (items, etc.)
1402
+ array_schema = {**json_type, 'type': 'array'}
1403
+ array_type = self._process_array_type(
1404
+ array_schema, record_name, field_name, namespace, dependencies,
1405
+ json_schema, base_uri, structure_schema, record_stack, recursion_depth
1406
+ )
1407
+ # Hoist to definitions - inline compound types not allowed in union
1408
+ hoisted = self._hoist_definition(array_type, structure_schema, f"{field_name or record_name}_array")
1409
+ union_members.append(hoisted)
1410
+ elif t == 'object':
1411
+ # Process object with the full schema context (properties, etc.)
1412
+ object_schema = {**json_type, 'type': 'object'}
1413
+ object_type = self._process_object_type(
1414
+ object_schema, record_name, field_name, namespace, dependencies,
1415
+ json_schema, base_uri, structure_schema, record_stack, recursion_depth
1416
+ )
1417
+ # Hoist to definitions - inline compound types not allowed in union
1418
+ hoisted = self._hoist_definition(object_type, structure_schema, f"{field_name or record_name}_object")
1419
+ union_members.append(hoisted)
1420
+ elif isinstance(t, str):
1421
+ # Primitive type
1422
+ prim_type = self.json_schema_primitive_to_structure_type(
1423
+ t, format_hint, enum_values, record_name, field_name, namespace, dependencies, json_type
1424
+ )
1425
+ union_members.append(prim_type)
1426
+
1427
+ return {'type': self.flatten_union(union_members, structure_schema, field_name)}
1428
+ else:
1429
+ # All primitives, use the existing logic
1430
+ structure_type = self.json_schema_primitive_to_structure_type(
1431
+ type_list, format_hint, enum_values, record_name, field_name, namespace, dependencies, json_type
1432
+ )
1433
+ if isinstance(structure_type, dict):
1434
+ structure_type = self.add_validation_constraints(structure_type, json_type)
1435
+ return structure_type
1436
+
1437
+ if json_type.get('type') and isinstance(json_type['type'], str):
1438
+ # Check if this schema also has composition keywords that should be preserved
1439
+ if self.preserve_composition and self.has_composition_keywords(json_type):
1440
+ # Skip primitive handling and continue to composition handling below
1441
+ pass
1442
+ else:
1443
+ format_hint = json_type.get('format')
1444
+ enum_values = json_type.get('enum')
1445
+ # Special handling for objects
1446
+ if json_type['type'] == 'object':
1447
+ # Check if should convert to map
1448
+ if self.should_convert_to_map(json_type):
1449
+ # Handle patternProperties conversion to map
1450
+ pattern_properties = json_type.get('patternProperties')
1451
+ if pattern_properties:
1452
+ # Get the pattern schema for values (merge all pattern schemas)
1453
+ if len(pattern_properties) == 1:
1454
+ pattern_schema = list(pattern_properties.values())[0]
1455
+ else:
1456
+ # Multiple patterns - merge schemas using anyOf
1457
+ schemas = list(pattern_properties.values())
1458
+ pattern_schema = {'anyOf': schemas}
1459
+
1460
+ # Convert patternProperties to map with keyNames validation
1461
+ return self.create_structure_map_with_pattern(
1462
+ pattern_schema, pattern_properties, record_name, namespace, dependencies,
1463
+ json_schema, base_uri, structure_schema, record_stack, recursion_depth
1464
+ )
1465
+ else:
1466
+ # Handle additionalProperties conversion to map
1467
+ additional_props = json_type.get('additionalProperties', True)
1468
+ if isinstance(additional_props, dict):
1469
+ return self.create_structure_map(
1470
+ additional_props, record_name, namespace, dependencies,
1471
+ json_schema, base_uri, structure_schema, record_stack, recursion_depth
1472
+ )
1473
+ else: return {
1474
+ 'type': 'map',
1475
+ 'values': {'type': 'string'} # Default for boolean additionalProperties
1476
+ }
1477
+ else:
1478
+ # Regular object - first check for discriminated union patterns
1479
+ if 'oneOf' in json_type:
1480
+ choice_info = self.detect_discriminated_union_pattern(json_type)
1481
+ if choice_info:
1482
+ # Convert to JSON Structure choice type
1483
+ choice_result = {
1484
+ 'type': 'choice',
1485
+ 'choices': choice_info['choices']
1486
+ }
1487
+
1488
+ # Add selector if specified (for tagged unions)
1489
+ if choice_info.get('selector'):
1490
+ choice_result['selector'] = choice_info['selector']
1491
+
1492
+ # Add name if we have one
1493
+ if record_name:
1494
+ choice_result['name'] = avro_name(record_name)
1495
+
1496
+ return choice_result
1497
+ # Regular object without discriminated union
1498
+ # Check if this is a bare object type that should be converted to "any"
1499
+ if (not json_type.get('properties') and
1500
+ not json_type.get('additionalProperties') and
1501
+ not json_type.get('patternProperties') and
1502
+ not json_type.get('required') and
1503
+ not json_type.get('$extends') and
1504
+ not any(k in json_type for k in ['allOf', 'anyOf', 'oneOf', 'if', 'then', 'else'])):
1505
+ # This is a bare "type": "object" which means "any object" in JSON Schema
1506
+ # Convert to "any" type in JSON Structure
1507
+ return {'type': 'any'}
1508
+
1509
+ # Apply constraint composition conversion if applicable
1510
+ effective_schema = self._convert_constraint_composition_to_required(json_type)
1511
+ properties = effective_schema.get('properties', {})
1512
+ required = effective_schema.get('required', [])
1513
+ return self.create_structure_object(
1514
+ properties, required, record_name, namespace, dependencies,
1515
+ json_schema, base_uri, structure_schema, record_stack, recursion_depth, json_type
1516
+ )
1517
+ # Special handling for arrays
1518
+ elif json_type['type'] == 'array':
1519
+ items_schema = json_type.get('items', {'type': 'string'})
1520
+ is_set = self.detect_collection_type(json_type) == 'set'
1521
+ return self.create_structure_array_or_set(
1522
+ items_schema, is_set, record_name, namespace, dependencies,
1523
+ json_schema, base_uri, structure_schema, record_stack, recursion_depth
1524
+ )
1525
+
1526
+ # Special handling for maps
1527
+ elif json_type['type'] == 'map':
1528
+ values_schema = json_type.get('values', {'type': 'string'})
1529
+ return self.create_structure_map(
1530
+ values_schema, record_name, namespace, dependencies,
1531
+ json_schema, base_uri, structure_schema, record_stack, recursion_depth
1532
+ )
1533
+
1534
+ # Special handling for sets
1535
+ elif json_type['type'] == 'set':
1536
+ items_schema = json_type.get('items', {'type': 'string'})
1537
+ return self.create_structure_array_or_set(
1538
+ items_schema, True, record_name, namespace, dependencies,
1539
+ json_schema, base_uri, structure_schema, record_stack, recursion_depth
1540
+ )
1541
+
1542
+ else:
1543
+ # Primitive type
1544
+ structure_type = self.json_schema_primitive_to_structure_type(
1545
+ json_type['type'], format_hint, enum_values, record_name, field_name, namespace, dependencies, json_type
1546
+ )
1547
+ # Add validation constraints
1548
+ if isinstance(structure_type, str):
1549
+ structure_type = self.add_validation_constraints({'type': structure_type}, json_type)
1550
+ if len(structure_type) == 1:
1551
+ structure_type = structure_type['type']
1552
+ elif isinstance(structure_type, dict):
1553
+ structure_type = self.add_validation_constraints(structure_type, json_type)
1554
+ return structure_type
1555
+
1556
+ # Handle composition keywords - resolve when preserve_composition is False
1557
+ if not self.preserve_composition and self.has_composition_keywords(json_type):
1558
+ return self.resolve_composition_keywords(
1559
+ json_type, record_name, field_name, namespace, dependencies,
1560
+ json_schema, base_uri, structure_schema, record_stack, recursion_depth
1561
+ )
1562
+
1563
+ # Handle composition keywords
1564
+ if self.preserve_composition:
1565
+ if 'allOf' in json_type and not inheritance_info:
1566
+ # Non-inheritance allOf - keep as-is or merge based on configuration
1567
+ allof_schemas = []
1568
+ for allof_item in json_type['allOf']:
1569
+ converted = self.json_type_to_structure_type(
1570
+ allof_item, record_name, field_name, namespace, dependencies,
1571
+ json_schema, base_uri, structure_schema, record_stack, recursion_depth + 1
1572
+ )
1573
+ allof_schemas.append(converted)
1574
+ # For now, merge them - JSON Structure supports allOf natively
1575
+ return {
1576
+ 'allOf': allof_schemas
1577
+ }
1578
+
1579
+ if 'oneOf' in json_type and not discriminator_info: # Check if this is a discriminated union pattern
1580
+ choice_info = self.detect_discriminated_union_pattern(json_type)
1581
+ if choice_info:
1582
+ # Convert to JSON Structure choice type (tagged union)
1583
+ choice_result = {
1584
+ 'type': 'choice',
1585
+ 'choices': choice_info['choices']
1586
+ }
1587
+
1588
+ # Add selector if specified (for tagged unions)
1589
+ if choice_info.get('selector'):
1590
+ choice_result['selector'] = choice_info['selector']
1591
+
1592
+ # Add name if we have one
1593
+ if record_name:
1594
+ choice_result['name'] = avro_name(record_name)
1595
+
1596
+ return choice_result
1597
+
1598
+ # Regular oneOf without discriminator
1599
+ oneof_schemas = []
1600
+ for oneof_item in json_type['oneOf']:
1601
+ # For constraint-only schemas, preserve them but add type: object
1602
+ if self._is_constraint_only_schema(oneof_item):
1603
+ preserved_item = dict(oneof_item)
1604
+ preserved_item['type'] = 'object'
1605
+ # Add properties for required fields to make it valid JSON Structure
1606
+ if 'required' in preserved_item and 'properties' not in preserved_item:
1607
+ preserved_item['properties'] = {}
1608
+ for req_field in preserved_item['required']:
1609
+ preserved_item['properties'][req_field] = {'type': 'any'}
1610
+ # Allow additional properties since this is a constraint-only schema
1611
+ preserved_item['additionalProperties'] = True
1612
+ oneof_schemas.append(preserved_item)
1613
+ else:
1614
+ converted = self.json_type_to_structure_type(
1615
+ oneof_item, record_name, field_name, namespace, dependencies,
1616
+ json_schema, base_uri, structure_schema, record_stack, recursion_depth + 1
1617
+ )
1618
+ oneof_schemas.append(converted)
1619
+ return {
1620
+ 'oneOf': oneof_schemas
1621
+ }
1622
+
1623
+ if 'anyOf' in json_type: # Check if this is a constraint-only anyOf pattern that should be converted to permutations
1624
+ anyof_items = json_type['anyOf']
1625
+ constraint_only = all(
1626
+ self._is_constraint_only_schema(item) and 'required' in item
1627
+ for item in anyof_items
1628
+ )
1629
+
1630
+ if constraint_only:
1631
+ # Convert constraint-only anyOf to permutations and return as object with required
1632
+ converted_schema = self._convert_constraint_anyof_to_permutations(json_type)
1633
+
1634
+ # Use create_structure_object to properly handle patternProperties
1635
+ result = self.create_structure_object(
1636
+ json_type.get('properties', {}),
1637
+ converted_schema.get('required', []),
1638
+ record_name, namespace, dependencies,
1639
+ json_schema, base_uri, structure_schema,
1640
+ record_stack, recursion_depth, json_type
1641
+ )
1642
+ # Add other properties from the original schema (except structural properties)
1643
+ for key, value in json_type.items():
1644
+ if key not in ['anyOf', 'type', 'properties', 'required', 'patternProperties', 'additionalProperties']:
1645
+ result[key] = value
1646
+ return result
1647
+ else:
1648
+ # Regular anyOf composition - preserve original structure when preserve_composition=True
1649
+ anyof_schemas = []
1650
+ for anyof_item in anyof_items:
1651
+ converted = self.json_type_to_structure_type(
1652
+ anyof_item, record_name, field_name, namespace, dependencies,
1653
+ json_schema, base_uri, structure_schema, record_stack, recursion_depth + 1
1654
+ )
1655
+ anyof_schemas.append(converted)
1656
+
1657
+ return {
1658
+ 'anyOf': anyof_schemas
1659
+ }
1660
+
1661
+ # Handle conditional schemas (if/then/else)
1662
+ if 'if' in json_type:
1663
+ # Preserve conditional schemas as-is in JSON Structure
1664
+ result = {}
1665
+
1666
+ # Process if clause
1667
+ if_schema = self.json_type_to_structure_type(
1668
+ json_type['if'], record_name, field_name, namespace, dependencies,
1669
+ json_schema, base_uri, structure_schema, record_stack, recursion_depth + 1
1670
+ )
1671
+ result['if'] = if_schema
1672
+
1673
+ # Process then clause if present
1674
+ if 'then' in json_type:
1675
+ then_schema = self.json_type_to_structure_type(
1676
+ json_type['then'], record_name, field_name, namespace, dependencies,
1677
+ json_schema, base_uri, structure_schema, record_stack, recursion_depth + 1
1678
+ )
1679
+ result['then'] = then_schema
1680
+
1681
+ # Process else clause if present
1682
+ if 'else' in json_type:
1683
+ else_schema = self.json_type_to_structure_type(
1684
+ json_type['else'], record_name, field_name, namespace, dependencies,
1685
+ json_schema, base_uri, structure_schema, record_stack, recursion_depth + 1
1686
+ )
1687
+ result['else'] = else_schema
1688
+ # Add any other properties from the original schema
1689
+ for key, value in json_type.items():
1690
+ if key not in ['if', 'then', 'else']:
1691
+ if key in ['properties', 'required', 'type']:
1692
+ # Handle structural properties
1693
+ if key == 'properties':
1694
+ converted_props = {}
1695
+ for prop_name, prop_schema in value.items():
1696
+ prop_type = self.json_type_to_structure_type(
1697
+ prop_schema, record_name, prop_name, namespace, dependencies,
1698
+ json_schema, base_uri, structure_schema, record_stack, recursion_depth + 1
1699
+ )
1700
+ converted_props[prop_name] = self._ensure_schema_object(prop_type, structure_schema, prop_name)
1701
+ result[key] = converted_props
1702
+ else:
1703
+ result[key] = value
1704
+ else:
1705
+ # Copy validation and other properties as-is
1706
+ result[key] = value
1707
+
1708
+ # Ensure conditional object schemas are valid JSON Structure
1709
+ if result.get('type') == 'object':
1710
+ # If we have no properties but we're an object type, add additionalProperties
1711
+ if 'properties' not in result and '$extends' not in result:
1712
+ result['additionalProperties'] = True
1713
+ # If we have empty properties, remove it and add additionalProperties instead
1714
+ elif 'properties' in result and not result['properties']:
1715
+ del result['properties']
1716
+ if 'additionalProperties' not in result:
1717
+ result['additionalProperties'] = True
1718
+
1719
+ return result
1720
+
1721
+ # Handle const
1722
+ if 'const' in json_type:
1723
+ const_value = json_type['const']
1724
+ if isinstance(const_value, str):
1725
+ return {
1726
+ 'type': 'string',
1727
+ 'const': const_value
1728
+ }
1729
+ elif isinstance(const_value, (int, float)):
1730
+ return {
1731
+ 'type': 'int32' if isinstance(const_value, int) else 'double',
1732
+ 'const': const_value
1733
+ }
1734
+ elif isinstance(const_value, bool):
1735
+ return {
1736
+ 'type': 'boolean',
1737
+ 'const': const_value
1738
+ }# Fallback for unhandled cases
1739
+ if 'properties' in json_type: # Treat as object even without explicit type
1740
+ properties = json_type['properties']
1741
+ required = json_type.get('required', [])
1742
+ return self.create_structure_object(
1743
+ properties, required, record_name, namespace, dependencies, json_schema, base_uri, structure_schema, record_stack, recursion_depth, json_type
1744
+ )
1745
+
1746
+ # Fallback
1747
+ return 'string'
1748
+
1749
+ except Exception as e:
1750
+ print(f'ERROR: Failed to convert type for {record_name}.{field_name}: {e}')
1751
+ return 'string'
1752
+
1753
+ def fetch_content(self, url: str | ParseResult):
1754
+ """
1755
+ Fetches the content from the specified URL.
1756
+
1757
+ Args:
1758
+ url (str or ParseResult): The URL to fetch the content from.
1759
+
1760
+ Returns:
1761
+ str: The fetched content.
1762
+
1763
+ Raises:
1764
+ requests.RequestException: If there is an error while making the HTTP request.
1765
+ Exception: If there is an error while reading the file.
1766
+ """
1767
+ # Parse the URL to determine the scheme
1768
+ if isinstance(url, str):
1769
+ parsed_url = urlparse(url)
1770
+ else:
1771
+ parsed_url = url
1772
+
1773
+ if parsed_url.geturl() in self.content_cache:
1774
+ return self.content_cache[parsed_url.geturl()]
1775
+ scheme = parsed_url.scheme
1776
+
1777
+ # Handle HTTP and HTTPS URLs
1778
+ if scheme in ['http', 'https']:
1779
+ response = requests.get(url if isinstance(
1780
+ url, str) else parsed_url.geturl(), timeout=30)
1781
+ # Raises an HTTPError if the response status code is 4XX/5XX
1782
+ response.raise_for_status()
1783
+ self.content_cache[parsed_url.geturl()] = response.text
1784
+ return response.text
1785
+
1786
+ # Handle file URLs
1787
+ elif scheme == 'file':
1788
+ # Remove the leading 'file://' from the path for compatibility
1789
+ file_path = parsed_url.netloc
1790
+ if not file_path:
1791
+ file_path = parsed_url.path
1792
+ # On Windows, a file URL might start with a '/' but it's not part of the actual path
1793
+ if os.name == 'nt' and file_path.startswith('/'):
1794
+ file_path = file_path[1:]
1795
+ with open(file_path, 'r', encoding='utf-8') as file:
1796
+ text = file.read()
1797
+ self.content_cache[parsed_url.geturl()] = text
1798
+ return text
1799
+ else:
1800
+ raise NotImplementedError(f'Unsupported URL scheme: {scheme}')
1801
+
1802
+ def resolve_reference(self, json_type: dict, base_uri: str, json_doc: dict) -> Tuple[Union[dict, Any], dict]:
1803
+ """
1804
+ Resolve a JSON Pointer reference or a JSON $ref reference.
1805
+
1806
+ Args:
1807
+ json_type (dict): The JSON type containing the reference.
1808
+ base_uri (str): The base URI of the JSON document.
1809
+ json_doc (dict): The JSON document containing the reference.
1810
+
1811
+ Returns:
1812
+ Tuple[Union[dict, Any], dict]: A tuple containing the resolved JSON schema and the original JSON schema document.
1813
+
1814
+ Raises:
1815
+ Exception: If there is an error decoding JSON from the reference.
1816
+ Exception: If there is an error resolving the JSON Pointer reference.
1817
+ """
1818
+ try:
1819
+ ref = json_type['$ref']
1820
+ content = None
1821
+ url = urlparse(ref)
1822
+ if url.scheme:
1823
+ content = self.fetch_content(ref)
1824
+ elif url.path:
1825
+ file_uri = self.compose_uri(base_uri, url)
1826
+ content = self.fetch_content(file_uri)
1827
+ if content:
1828
+ try:
1829
+ json_schema_doc = json_schema = json.loads(content)
1830
+ # resolve the JSON Pointer reference, if any
1831
+ if url.fragment:
1832
+ json_schema = jsonpointer.resolve_pointer(
1833
+ json_schema, url.fragment)
1834
+ return json_schema, json_schema_doc
1835
+ except json.JSONDecodeError:
1836
+ raise Exception(f'Error decoding JSON from {ref}')
1837
+
1838
+ if url.fragment:
1839
+ json_pointer = unquote(url.fragment)
1840
+ ref_schema = jsonpointer.resolve_pointer(
1841
+ json_doc, json_pointer)
1842
+ if ref_schema:
1843
+ return ref_schema, json_doc
1844
+ except JsonPointerException as e:
1845
+ raise Exception(
1846
+ f'Error resolving JSON Pointer reference for {base_uri}')
1847
+ return json_type, json_doc
1848
+
1849
+ def compose_uri(self, base_uri, url):
1850
+ """Compose a URI from a base URI and a relative URL."""
1851
+ if isinstance(url, str):
1852
+ url = urlparse(url)
1853
+ if url.scheme:
1854
+ return url.geturl()
1855
+ if not url.path and not url.netloc:
1856
+ return base_uri
1857
+ if base_uri.startswith('file'):
1858
+ parsed_file_uri = urlparse(base_uri)
1859
+ dir = os.path.dirname(
1860
+ parsed_file_uri.netloc if parsed_file_uri.netloc else parsed_file_uri.path)
1861
+ filename = os.path.join(dir, url.path)
1862
+ file_uri = f'file://{filename}'
1863
+ else:
1864
+ # combine the base URI with the URL
1865
+ file_uri = urllib.parse.urljoin(base_uri, url.geturl())
1866
+ return file_uri
1867
+
1868
+ def convert_schema(self, json_schema_path: str, output_path: str | None = None):
1869
+ """
1870
+ Convert a JSON Schema file to JSON Structure format.
1871
+
1872
+ Args:
1873
+ json_schema_path (str): Path to the input JSON Schema file
1874
+ output_path (str): Path for the output JSON Structure file (optional)
1875
+ """
1876
+ # Read the JSON Schema
1877
+ with open(json_schema_path, 'r', encoding='utf-8') as file:
1878
+ json_schema = json.load(file)
1879
+
1880
+ # Convert to JSON Structure
1881
+ structure_schema = self.convert_json_schema_to_structure(json_schema, json_schema_path)
1882
+
1883
+ # Determine output path
1884
+ if not output_path:
1885
+ base_name = os.path.splitext(json_schema_path)[0]
1886
+ output_path = f"{base_name}.structure.json"
1887
+ # Write the result
1888
+ with open(output_path, 'w', encoding='utf-8') as file:
1889
+ # Sort properties before writing
1890
+ # Sort properties before writing
1891
+ sorted_schema = self._sort_json_structure_properties(structure_schema)
1892
+ json.dump(sorted_schema, file, indent=2)
1893
+
1894
+ print(f"Converted {json_schema_path} to {output_path}")
1895
+ return structure_schema
1896
+
1897
+
1898
+ def _mark_abstract_types(self, structure_schema: dict) -> None:
1899
+ """
1900
+ Mark abstract types in the structure schema.
1901
+
1902
+ Args:
1903
+ structure_schema (dict): The structure schema to mark
1904
+ """
1905
+ if 'definitions' in structure_schema:
1906
+ for def_name, def_schema in structure_schema['definitions'].items():
1907
+ if isinstance(def_schema, dict):
1908
+ # Mark types with only inheritance as abstract
1909
+ if ('$extends' in def_schema and
1910
+ ('properties' not in def_schema or len(def_schema['properties']) == 0)):
1911
+ def_schema['abstract'] = True
1912
+
1913
+ # Mark choice types with discriminators as abstract
1914
+ if (def_schema.get('type') == 'choice' and
1915
+ 'discriminator' in def_schema):
1916
+ def_schema['abstract'] = True
1917
+
1918
+ def jsons_to_structure(self, json_schema: Union[dict, list], namespace: str, base_uri: str) -> dict:
1919
+ """
1920
+ Convert a JSON Schema to JSON Structure format.
1921
+
1922
+ Args:
1923
+ json_schema (dict | list): The JSON Schema to convert
1924
+ namespace (str): The target namespace
1925
+ base_uri (str): Base URI for reference resolution
1926
+
1927
+ Returns:
1928
+ dict: The converted JSON Structure schema
1929
+ """
1930
+ # Clear type registry for new conversion
1931
+ self.type_registry.clear()
1932
+
1933
+ structure_schema: Dict[str, Any] = {
1934
+ "$schema": "https://json-structure.org/meta/extended/v0/#"
1935
+ }
1936
+ # Do NOT set $uses here; it will be set after scanning for actual usage
1937
+
1938
+ # Handle schema with definitions/defs
1939
+ if isinstance(json_schema, dict) and ('definitions' in json_schema or '$defs' in json_schema):
1940
+ # Process definitions
1941
+ defs_key = '$defs' if '$defs' in json_schema else 'definitions'
1942
+ json_schema_defs = json_schema[defs_key]
1943
+ if json_schema_defs:
1944
+ structure_schema['definitions'] = {}
1945
+ # First pass: populate type registry for reference resolution
1946
+ for def_name in json_schema_defs.keys():
1947
+ normalized_def_name = avro_name(def_name)
1948
+ self.type_registry[def_name] = f"#/definitions/{normalized_def_name}" # Second pass: convert each definition
1949
+ for def_name, def_schema in json_schema_defs.items():
1950
+ # Skip empty definitions or ones that are just plain values/strings
1951
+ if not isinstance(def_schema, dict) or not def_schema:
1952
+ continue
1953
+
1954
+ # Check if this is a pure container definition (only contains nested schemas, no actual schema keywords)
1955
+ schema_keywords = {'type', 'properties', 'items', 'additionalProperties', 'patternProperties',
1956
+ 'oneOf', 'anyOf', 'allOf', '$ref', 'required', 'enum', 'const', 'minimum',
1957
+ 'maximum', 'minLength', 'maxLength', 'pattern', 'format', 'if', 'then', 'else'}
1958
+ has_schema_keywords = any(key in def_schema for key in schema_keywords) # If it only contains nested object definitions (no schema keywords), handle as container
1959
+ # These are typically namespace containers like "resourceTypes" that only organize other types
1960
+ if not has_schema_keywords:
1961
+ non_meta_items = {k: v for k, v in def_schema.items()
1962
+ if not k.startswith('$') and k not in ['title', 'description', 'examples']}
1963
+ if non_meta_items and all(isinstance(value, dict) for value in non_meta_items.values()):
1964
+ # This looks like a pure container - but check if any references point to it
1965
+ ref_target = f"#/definitions/{def_name}"
1966
+ ref_target_normalized = f"#/definitions/{avro_name(def_name)}"
1967
+
1968
+ # Search the entire schema for references to this definition
1969
+ schema_str = json.dumps(json_schema)
1970
+ if ref_target in schema_str or ref_target_normalized in schema_str:
1971
+ # This container is being referenced, so we need to keep it as a valid object
1972
+ # Create a minimal valid object type
1973
+ dependencies = []
1974
+ normalized_def_name, original_name = avro_name_with_altname(def_name)
1975
+ container_def = {
1976
+ 'type': 'object',
1977
+ 'name': normalized_def_name,
1978
+ 'additionalProperties': True # Allow any properties to make it valid
1979
+ }
1980
+ if original_name is not None:
1981
+ container_def['altnames'] = {'json': original_name}
1982
+ structure_schema['definitions'][normalized_def_name] = container_def
1983
+ continue
1984
+ else:
1985
+ # Skip pure container definitions that aren't referenced
1986
+ continue
1987
+ # Process all dictionary definitions - this includes schemas with only descriptions
1988
+ dependencies = []
1989
+ normalized_def_name, original_name = avro_name_with_altname(def_name)
1990
+ converted_def = self.json_type_to_structure_type(
1991
+ def_schema, def_name, '', namespace, dependencies,
1992
+ json_schema, base_uri, structure_schema, [], 1
1993
+ )
1994
+ if isinstance(converted_def, dict):
1995
+ converted_def['name'] = normalized_def_name
1996
+ # Add alternate name if the original was different
1997
+ if original_name is not None:
1998
+ if 'altnames' not in converted_def:
1999
+ converted_def['altnames'] = {}
2000
+ converted_def['altnames']['json'] = original_name
2001
+ structure_schema['definitions'][normalized_def_name] = converted_def
2002
+ else:
2003
+ definition_obj = {
2004
+ 'type': converted_def,
2005
+ 'name': normalized_def_name
2006
+ }
2007
+ # Add alternate name if the original was different
2008
+ if original_name is not None:
2009
+ definition_obj['altnames'] = {'json': original_name}
2010
+ structure_schema['definitions'][normalized_def_name] = definition_obj
2011
+
2012
+ # Handle root-level schema type
2013
+ root_type_keys = ['type', 'properties', 'items', 'additionalProperties', 'oneOf', 'anyOf', 'allOf']
2014
+ has_root_type = any(key in json_schema for key in root_type_keys)
2015
+ if has_root_type and isinstance(json_schema, dict):
2016
+ dependencies = []
2017
+ root_converted = self.json_type_to_structure_type(
2018
+ json_schema, self.root_class_name, '', namespace, dependencies,
2019
+ json_schema, base_uri, structure_schema, [], 1
2020
+ )
2021
+
2022
+ # Merge root type properties into schema
2023
+ if isinstance(root_converted, dict):
2024
+ for key, value in root_converted.items():
2025
+ if key not in structure_schema:
2026
+ structure_schema[key] = value
2027
+ else:
2028
+ structure_schema['type'] = root_converted
2029
+ # Handle schema metadata
2030
+ if isinstance(json_schema, dict):
2031
+ if '$id' in json_schema:
2032
+ structure_schema['$id'] = json_schema['$id']
2033
+ elif 'id' in json_schema:
2034
+ structure_schema['$id'] = json_schema['id']
2035
+ else:
2036
+ # Generate default $id if missing
2037
+ structure_schema['$id'] = f"https://example.com/{namespace.replace('.', '/')}.schema.json"
2038
+
2039
+ # Add description if present, or map title to description if no description exists
2040
+ if 'description' in json_schema:
2041
+ structure_schema['description'] = json_schema['description']
2042
+ elif 'title' in json_schema:
2043
+ structure_schema['description'] = json_schema['title']
2044
+ else:
2045
+ # Generate default $id for non-dict schemas
2046
+ structure_schema['$id'] = f"https://example.com/{namespace.replace('.', '/')}.schema.json"
2047
+
2048
+ # Mark abstract types
2049
+ self._mark_abstract_types(structure_schema)
2050
+
2051
+ return structure_schema
2052
+
2053
+ def convert_json_schema_to_structure(self, json_schema: Union[dict, list], base_uri: str = "") -> dict:
2054
+ """
2055
+ Convert a JSON Schema dictionary to JSON Structure format.
2056
+
2057
+ Args:
2058
+ json_schema (dict): The JSON Schema to convert
2059
+ base_uri (str): Base URI for reference resolution
2060
+
2061
+ Returns:
2062
+ dict: The converted JSON Structure schema
2063
+
2064
+ Raises:
2065
+ ValueError: If the input schema is invalid
2066
+ TypeError: If the schema format is not supported
2067
+ """
2068
+ if not isinstance(json_schema, (dict, list)):
2069
+ raise TypeError(f"Expected dict or list, got {type(json_schema)}")
2070
+
2071
+ if isinstance(json_schema, dict) and not json_schema:
2072
+ raise ValueError("Empty schema dictionary provided")
2073
+
2074
+ try:
2075
+ structure_schema = self.jsons_to_structure(json_schema, self.root_namespace, base_uri)
2076
+ # Always add a name to the root if it has a type and no name
2077
+ if 'type' in structure_schema and 'name' not in structure_schema:
2078
+ structure_schema['name'] = avro_name(self.root_class_name) # Only add $uses if the feature is actually used
2079
+ used = self._scan_for_uses(structure_schema)
2080
+ if used:
2081
+ structure_schema['$uses'] = used
2082
+ elif '$uses' in structure_schema:
2083
+ del structure_schema['$uses'] # Final validation to ensure map and set types are complete
2084
+ # validation_errors = self.validate_structure_completeness(structure_schema)
2085
+ # if validation_errors:
2086
+ # print(f"WARNING: Structure validation found incomplete types:")
2087
+ # for error in validation_errors:
2088
+ # print(f" - {error}")
2089
+
2090
+ # Validate and fix JSON Structure compliance
2091
+ structure_schema = self._validate_and_fix_json_structure_type(structure_schema)
2092
+
2093
+ return structure_schema
2094
+ except Exception as e:
2095
+ raise ValueError(f"Failed to convert JSON Schema to JSON Structure: {e}") from e
2096
+
2097
+ def is_valid_identifier(self, name: str) -> bool:
2098
+ """
2099
+ Check if a name is a valid identifier (for property names, etc.).
2100
+ """
2101
+ if not name or not isinstance(name, str):
2102
+ return False # Check if it's a valid Python identifier (basic check)
2103
+ import re
2104
+ return bool(re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*$', name))
2105
+
2106
+ def normalize_identifier(self, name: str) -> str:
2107
+ """
2108
+ Normalize a name to be a valid identifier.
2109
+ """
2110
+ if not name or not isinstance(name, str):
2111
+ return 'property'
2112
+
2113
+ # Replace invalid characters with underscores
2114
+ import re
2115
+ normalized = re.sub(r'[^a-zA-Z0-9_]', '_', name)
2116
+ # Ensure it doesn't start with a number
2117
+ if normalized and normalized[0].isdigit():
2118
+ normalized = 'prop_' + normalized
2119
+
2120
+ # Ensure it's not empty
2121
+ if not normalized:
2122
+ normalized = 'property'
2123
+
2124
+ return normalized
2125
+
2126
+ def _validate_and_fix_json_structure_type(self, structure_type: Any) -> Any:
2127
+ """
2128
+ Validate and fix a JSON Structure type to ensure compliance.
2129
+
2130
+ This method post-processes generated JSON Structure schemas to fix common issues:
2131
+ - Converts "integer" type to "number" (JSON Structure doesn't support integer)
2132
+ - Ensures arrays have "items"
2133
+ - Ensures objects have "properties"
2134
+ - Ensures map/set "values"/"items" are schema objects, not strings
2135
+ - Recursively fixes nested schemas
2136
+ """
2137
+ if not isinstance(structure_type, dict):
2138
+ return structure_type
2139
+
2140
+ # Create a copy to avoid modifying the original
2141
+ structure_type = structure_type.copy()
2142
+
2143
+ # Fix invalid types
2144
+ if structure_type.get('type') == 'integer':
2145
+ structure_type['type'] = 'number' # JSON Structure doesn't have integer type
2146
+
2147
+ # Ensure arrays have items
2148
+ elif structure_type.get('type') == 'array' and 'items' not in structure_type:
2149
+ structure_type['items'] = {'type': 'object'} # Default to object items
2150
+
2151
+ # Ensure objects have properties (unless they extend another type)
2152
+ elif (structure_type.get('type') == 'object' and
2153
+ 'properties' not in structure_type and
2154
+ '$ref' not in structure_type and
2155
+ '$extends' not in structure_type):
2156
+ structure_type['properties'] = {} # Default to empty properties
2157
+
2158
+ # Ensure map values are schema objects
2159
+ elif structure_type.get('type') == 'map' and 'values' in structure_type:
2160
+ values = structure_type['values']
2161
+ if isinstance(values, str):
2162
+ structure_type['values'] = {'type': values}
2163
+ elif isinstance(values, dict):
2164
+ structure_type['values'] = self._validate_and_fix_json_structure_type(values)
2165
+
2166
+ # Ensure set items are schema objects
2167
+ elif structure_type.get('type') == 'set' and 'items' in structure_type:
2168
+ items = structure_type['items']
2169
+ if isinstance(items, str):
2170
+ structure_type['items'] = {'type': items}
2171
+ elif isinstance(items, dict):
2172
+ structure_type['items'] = self._validate_and_fix_json_structure_type(items)
2173
+
2174
+ # Recursively validate nested structures - comprehensive approach
2175
+ if 'anyOf' in structure_type:
2176
+ structure_type['anyOf'] = [
2177
+ self._validate_and_fix_json_structure_type(item)
2178
+ for item in structure_type['anyOf']
2179
+ ]
2180
+ elif 'oneOf' in structure_type:
2181
+ structure_type['oneOf'] = [
2182
+ self._validate_and_fix_json_structure_type(item)
2183
+ for item in structure_type['oneOf']
2184
+ ]
2185
+ elif 'allOf' in structure_type:
2186
+ structure_type['allOf'] = [
2187
+ self._validate_and_fix_json_structure_type(item)
2188
+ for item in structure_type['allOf']
2189
+ ]
2190
+
2191
+ # Handle nested schemas in various contexts
2192
+ if 'items' in structure_type and isinstance(structure_type['items'], dict):
2193
+ structure_type['items'] = self._validate_and_fix_json_structure_type(structure_type['items'])
2194
+ elif 'values' in structure_type and isinstance(structure_type['values'], dict):
2195
+ structure_type['values'] = self._validate_and_fix_json_structure_type(structure_type['values'])
2196
+ elif 'properties' in structure_type and isinstance(structure_type['properties'], dict):
2197
+ structure_type['properties'] = {
2198
+ k: self._validate_and_fix_json_structure_type(v)
2199
+ for k, v in structure_type['properties'].items()
2200
+ }
2201
+ # Handle additionalProperties
2202
+ if 'additionalProperties' in structure_type and isinstance(structure_type['additionalProperties'], dict):
2203
+ structure_type['additionalProperties'] = self._validate_and_fix_json_structure_type(structure_type['additionalProperties'])
2204
+
2205
+ # Handle definitions
2206
+ if 'definitions' in structure_type and isinstance(structure_type['definitions'], dict):
2207
+ structure_type['definitions'] = {
2208
+ k: self._validate_and_fix_json_structure_type(v)
2209
+ for k, v in structure_type['definitions'].items()
2210
+ }
2211
+
2212
+ # Handle $defs (JSON Schema 2019-09+)
2213
+ if '$defs' in structure_type and isinstance(structure_type['$defs'], dict):
2214
+ structure_type['$defs'] = {
2215
+ k: self._validate_and_fix_json_structure_type(v)
2216
+ for k, v in structure_type['$defs'].items()
2217
+ }
2218
+
2219
+ return structure_type
2220
+
2221
+ def _sort_json_structure_properties(self, schema: Any) -> Any:
2222
+ """
2223
+ Recursively sort properties in a JSON Structure schema for consistent output.
2224
+ """
2225
+ if not isinstance(schema, dict):
2226
+ return schema
2227
+
2228
+ result = {}
2229
+
2230
+ # Sort keys, putting common keys first
2231
+
2232
+ key_order = ['$schema', 'type', 'title', 'description', 'properties', 'required', 'items', 'values', 'anyOf', 'oneOf', 'allOf', '$ref', '$extends']
2233
+ sorted_keys = []
2234
+
2235
+ # Add keys in preferred order
2236
+ for key in key_order:
2237
+ if key in schema:
2238
+ sorted_keys.append(key)
2239
+
2240
+ # Add remaining keys alphabetically
2241
+ remaining_keys = sorted([k for k in schema.keys() if k not in key_order])
2242
+ sorted_keys.extend(remaining_keys)
2243
+
2244
+ # Build result with sorted keys
2245
+ for key in sorted_keys:
2246
+ value = schema[key]
2247
+ if key == 'properties' and isinstance(value, dict):
2248
+ # Sort properties alphabetically
2249
+ result[key] = {k: self._sort_json_structure_properties(v) for k, v in sorted(value.items())}
2250
+ elif key in ['anyOf', 'oneOf', 'allOf'] and isinstance(value, list):
2251
+ # Recursively sort composition schemas
2252
+ result[key] = [self._sort_json_structure_properties(item) for item in value]
2253
+ elif key in ['items', 'values'] and isinstance(value, dict):
2254
+ # Recursively sort nested schemas
2255
+ result[key] = self._sort_json_structure_properties(value)
2256
+ else:
2257
+ result[key] = value
2258
+
2259
+ return result
2260
+
2261
+ def _convert_constraint_composition_to_required(self, json_type: dict) -> dict:
2262
+ """
2263
+ Convert constraint-only composition (anyOf with property requirements) to a simple required array.
2264
+ This is used when anyOf items only add constraints without changing the structure.
2265
+ """
2266
+ try:
2267
+ # Check if this is constraint-only anyOf
2268
+ if 'anyOf' not in json_type:
2269
+ return json_type
2270
+
2271
+ # Gather all required properties from anyOf items
2272
+ all_required = set()
2273
+ base_properties = json_type.get('properties', {})
2274
+
2275
+ for anyof_item in json_type['anyOf']:
2276
+ if isinstance(anyof_item, dict) and 'required' in anyof_item:
2277
+ # Only consider it constraint-only if it doesn't define new properties
2278
+ item_properties = anyof_item.get('properties', {})
2279
+ if not item_properties or all(prop in base_properties for prop in item_properties):
2280
+ all_required.update(anyof_item['required'])
2281
+
2282
+ # Create simplified schema
2283
+ result = {k: v for k, v in json_type.items() if k != 'anyOf'}
2284
+ if all_required:
2285
+ result['required'] = sorted(list(all_required))
2286
+
2287
+ return result
2288
+
2289
+ except Exception as e:
2290
+ # If conversion fails, return original
2291
+ return json_type
2292
+
2293
+ def _is_constraint_only_schema(self, json_type: dict) -> bool:
2294
+ """
2295
+ Check if a schema contains only constraints (no structural elements).
2296
+ Used to determine if anyOf items are constraint-only.
2297
+ """
2298
+ if not isinstance(json_type, dict):
2299
+ return False
2300
+
2301
+ # Constraint-only keys
2302
+ constraint_keys = {'required', 'minProperties', 'maxProperties', 'dependencies', 'dependentRequired', 'dependentSchemas'}
2303
+
2304
+ # Structural keys that would make it not constraint-only
2305
+ structural_keys = {'type', 'properties', 'additionalProperties', 'patternProperties', 'items', 'anyOf', 'oneOf', 'allOf'}
2306
+
2307
+ schema_keys = set(json_type.keys()) - {'title', 'description', '$id', '$schema'}
2308
+
2309
+ # It's constraint-only if it has only constraint keys and no structural keys
2310
+ return bool(schema_keys & constraint_keys) and not bool(schema_keys & structural_keys)
2311
+
2312
+ def _convert_constraint_anyof_to_permutations(self, json_type: dict) -> dict:
2313
+ """
2314
+ Convert constraint-only anyOf to a schema with required properties that represent
2315
+ the union of all constraint requirements.
2316
+ """
2317
+ try:
2318
+ if 'anyOf' not in json_type:
2319
+ return json_type
2320
+
2321
+ # Collect all required properties from constraint-only anyOf items
2322
+ all_required = set()
2323
+
2324
+ for anyof_item in json_type['anyOf']:
2325
+ if isinstance(anyof_item, dict) and self._is_constraint_only_schema(anyof_item):
2326
+ if 'required' in anyof_item:
2327
+ all_required.update(anyof_item['required'])
2328
+ # Return schema with union of required properties
2329
+ result = {'required': sorted(list(all_required))} if all_required else {}
2330
+ return result
2331
+
2332
+ except Exception as e:
2333
+ # If conversion fails, return empty schema
2334
+ return {}
2335
+
2336
+ def has_composition_keywords(self, json_type: dict) -> bool:
2337
+ """
2338
+ Check if a JSON schema has composition keywords (anyOf, oneOf, allOf) or conditional keywords (if/then/else).
2339
+ """
2340
+ if not isinstance(json_type, dict):
2341
+ return False
2342
+ return any(keyword in json_type for keyword in ['anyOf', 'oneOf', 'allOf', 'if', 'then', 'else'])
2343
+
2344
+ def resolve_composition_keywords(self, json_type: dict, record_name: str, field_name: str, namespace: str, dependencies: list, json_schema: dict, base_uri: str, structure_schema: dict, record_stack: list, recursion_depth: int) -> dict:
2345
+ """
2346
+ Resolve composition keywords in JSON schema by flattening them.
2347
+ This is a simple implementation that merges composition schemas.
2348
+ """
2349
+ try:
2350
+ if 'allOf' in json_type:
2351
+ # Merge all schemas in allOf
2352
+ merged = {}
2353
+ has_ref = False
2354
+ ref_value = None
2355
+
2356
+ for schema in json_type['allOf']:
2357
+ converted = self.json_type_to_structure_type(
2358
+ schema, record_name, field_name, namespace, dependencies,
2359
+ json_schema, base_uri, structure_schema, record_stack, recursion_depth + 1
2360
+ )
2361
+ if isinstance(converted, dict):
2362
+ # Track if we have a $ref
2363
+ if '$ref' in converted:
2364
+ has_ref = True
2365
+ ref_value = converted['$ref']
2366
+
2367
+ # Simple merge - in real scenarios this would be more complex
2368
+ for key, value in converted.items():
2369
+ if key == 'properties' and key in merged:
2370
+ merged[key].update(value)
2371
+ elif key == 'required' and key in merged:
2372
+ merged[key] = list(set(merged[key] + value))
2373
+ else:
2374
+ merged[key] = value
2375
+
2376
+ # JSON Structure doesn't allow both $ref and type
2377
+ # If we have a $ref and type with no meaningful properties, just use $ref
2378
+ if has_ref and 'type' in merged:
2379
+ has_meaningful_properties = (
2380
+ 'properties' in merged and merged['properties'] or
2381
+ 'required' in merged and merged['required'] or
2382
+ 'items' in merged
2383
+ )
2384
+ if not has_meaningful_properties:
2385
+ # Just return the reference
2386
+ result = {'$ref': ref_value}
2387
+ # Copy metadata fields if present
2388
+ for meta_key in ['description', 'title', 'doc']:
2389
+ if meta_key in merged:
2390
+ result[meta_key] = merged[meta_key]
2391
+ return result
2392
+ else:
2393
+ # Has meaningful extensions - need to create a proper extension
2394
+ # Remove the $ref and keep the merged type
2395
+ del merged['$ref']
2396
+
2397
+ return merged
2398
+
2399
+ elif 'anyOf' in json_type:
2400
+ # For anyOf, convert to JSON Structure type union
2401
+ anyof_schemas = []
2402
+ for anyof_item in json_type['anyOf']:
2403
+ converted = self.json_type_to_structure_type(
2404
+ anyof_item, record_name, field_name, namespace, dependencies,
2405
+ json_schema, base_uri, structure_schema, record_stack, recursion_depth + 1
2406
+ )
2407
+ anyof_schemas.append(converted)
2408
+
2409
+ # Use flatten_union to properly hoist compound types
2410
+ flattened = self.flatten_union(anyof_schemas, structure_schema, field_name or record_name)
2411
+ return {
2412
+ 'type': flattened
2413
+ }
2414
+
2415
+ elif 'oneOf' in json_type:
2416
+ # First check if this is a discriminated union pattern
2417
+ choice_info = self.detect_discriminated_union_pattern(json_type)
2418
+ if choice_info:
2419
+ # Convert to JSON Structure choice type
2420
+ choice_result = {
2421
+ 'type': 'choice',
2422
+ 'choices': choice_info['choices']
2423
+ }
2424
+
2425
+ # Add selector if specified (for tagged unions)
2426
+ if choice_info.get('selector'):
2427
+ choice_result['selector'] = choice_info['selector']
2428
+
2429
+ # Add name if we have one
2430
+ if record_name:
2431
+ choice_result['name'] = avro_name(record_name)
2432
+
2433
+ return choice_result
2434
+
2435
+ # For oneOf without discriminated union, return the first option as a fallback
2436
+ if json_type['oneOf']:
2437
+ return self.json_type_to_structure_type(
2438
+ json_type['oneOf'][0], record_name, field_name, namespace, dependencies,
2439
+ json_schema, base_uri, structure_schema, record_stack, recursion_depth + 1
2440
+ )# Fallback to map type with any values
2441
+ return {'type': 'map', 'values': {'type': 'any'}}
2442
+
2443
+ except Exception as e: # If resolution fails, return a basic map type
2444
+ return {'type': 'map', 'values': {'type': 'any'}}
2445
+
2446
+ def detect_discriminated_union_pattern(self, json_type: dict) -> dict | None:
2447
+ """
2448
+ Detect discriminated union patterns in oneOf schemas that should be converted to choice type.
2449
+
2450
+ A discriminated union pattern is identified when:
2451
+ 1. Schema has oneOf with multiple object schemas
2452
+ 2. Each schema has a distinct set of required properties (mutually exclusive)
2453
+ 3. The schemas define object structures (have properties)
2454
+
2455
+ Patterns supported:
2456
+ - Simple discriminated unions: Each schema has exactly one unique required property
2457
+ - Complex discriminated unions: Each schema has a unique combination of required properties
2458
+ - Tagged unions: Each schema has a discriminator property with different enum/const values
2459
+
2460
+ Args:
2461
+ json_type (dict): The JSON schema object with oneOf
2462
+
2463
+ Returns:
2464
+ dict | None: Choice type configuration or None if not a discriminated union
2465
+ """
2466
+ if 'oneOf' not in json_type:
2467
+ return None
2468
+
2469
+ oneof_items = json_type['oneOf']
2470
+ if len(oneof_items) < 2:
2471
+ return None
2472
+
2473
+ # Check if all items are object schemas with properties
2474
+ all_schemas_are_objects = True
2475
+ for item in oneof_items:
2476
+ if not isinstance(item, dict):
2477
+ return None
2478
+ if not (item.get('type') == 'object' or 'properties' in item):
2479
+ all_schemas_are_objects = False
2480
+ break
2481
+
2482
+ if not all_schemas_are_objects:
2483
+ return None
2484
+
2485
+ # Pattern 1: Check for tagged unions with discriminator property
2486
+ discriminator_result = self._detect_tagged_union_pattern(oneof_items)
2487
+ if discriminator_result:
2488
+ return discriminator_result
2489
+
2490
+ # Pattern 2: Check for simple discriminated unions (each schema has exactly one unique required property)
2491
+ simple_result = self._detect_simple_discriminated_union(oneof_items)
2492
+ if simple_result:
2493
+ return simple_result
2494
+
2495
+ # Pattern 3: Check for complex discriminated unions (unique combinations of required properties)
2496
+ complex_result = self._detect_complex_discriminated_union(oneof_items)
2497
+ if complex_result:
2498
+ return complex_result
2499
+
2500
+ return None
2501
+
2502
+ def _detect_tagged_union_pattern(self, oneof_items: list) -> dict | None:
2503
+ """
2504
+ Detect tagged union pattern where all schemas have the same discriminator property
2505
+ with different enum/const values.
2506
+ """
2507
+ discriminator_props = {}
2508
+ common_discriminator = None
2509
+
2510
+ for item in oneof_items:
2511
+ properties = item.get('properties', {})
2512
+
2513
+ # Look for a property with enum or const value
2514
+ discriminator_found = False
2515
+ for prop_name, prop_schema in properties.items():
2516
+ if 'enum' in prop_schema and len(prop_schema['enum']) == 1:
2517
+ # Single enum value acts as discriminator
2518
+ disc_value = prop_schema['enum'][0]
2519
+ if common_discriminator is None:
2520
+ common_discriminator = prop_name
2521
+ elif common_discriminator != prop_name:
2522
+ return None # Different discriminator properties
2523
+ discriminator_props[str(disc_value)] = {'type': 'object'}
2524
+ discriminator_found = True
2525
+ break
2526
+ elif 'const' in prop_schema:
2527
+ # Const value acts as discriminator
2528
+ disc_value = prop_schema['const']
2529
+ if common_discriminator is None:
2530
+ common_discriminator = prop_name
2531
+ elif common_discriminator != prop_name:
2532
+ return None # Different discriminator properties
2533
+ discriminator_props[str(disc_value)] = {'type': 'object'}
2534
+ discriminator_found = True
2535
+ break
2536
+
2537
+ if not discriminator_found:
2538
+ return None
2539
+
2540
+ if common_discriminator and len(discriminator_props) == len(oneof_items):
2541
+ return {
2542
+ 'type': 'choice',
2543
+ 'choices': discriminator_props,
2544
+ 'selector': common_discriminator # Tagged union with explicit selector
2545
+ }
2546
+
2547
+ return None
2548
+
2549
+ def _detect_simple_discriminated_union(self, oneof_items: list) -> dict | None:
2550
+ """
2551
+ Detect simple discriminated union where each schema has exactly one unique required property.
2552
+ """
2553
+ choice_mapping = {}
2554
+
2555
+ for item in oneof_items:
2556
+ if 'properties' not in item or 'required' not in item:
2557
+ return None
2558
+
2559
+ required = item['required']
2560
+ if not isinstance(required, list) or len(required) != 1:
2561
+ return None # Must have exactly one required property
2562
+
2563
+ required_prop = required[0]
2564
+
2565
+ # Check if this property name is already used by another choice
2566
+ if required_prop in choice_mapping:
2567
+ return None # Properties must be mutually exclusive
2568
+
2569
+ # Ensure the required property exists in the properties
2570
+ if required_prop not in item['properties']:
2571
+ return None # Required property must exist in properties
2572
+
2573
+ # Store the choice information - use 'any' type for discriminated unions
2574
+ choice_mapping[required_prop] = {
2575
+ 'type': 'any',
2576
+ 'description': f'Choice variant with {required_prop} property'
2577
+ }
2578
+
2579
+ if len(choice_mapping) == len(oneof_items):
2580
+ return {
2581
+ 'type': 'choice',
2582
+ 'choices': choice_mapping,
2583
+ 'selector': None # Inline choice without explicit selector property
2584
+ }
2585
+
2586
+ return None
2587
+
2588
+ def _detect_complex_discriminated_union(self, oneof_items: list) -> dict | None:
2589
+ """
2590
+ Detect complex discriminated union where each schema has a unique combination of required properties.
2591
+ """
2592
+ required_sets = []
2593
+ choice_mapping = {}
2594
+
2595
+ for i, item in enumerate(oneof_items):
2596
+ if 'properties' not in item:
2597
+ return None
2598
+
2599
+ required = set(item.get('required', []))
2600
+
2601
+ # Check if this combination of required properties is unique
2602
+ for existing_set in required_sets:
2603
+ if required == existing_set:
2604
+ return None # Non-unique required property combination
2605
+ # Check for overlap - if sets overlap significantly, it's not a clean discriminated union
2606
+ overlap = required & existing_set
2607
+ if len(overlap) > 0 and (len(overlap) / len(required | existing_set)) > 0.5:
2608
+ return None # Too much overlap
2609
+
2610
+ required_sets.append(required)
2611
+
2612
+ # Create a choice name based on the required properties
2613
+ if len(required) == 0:
2614
+ choice_name = f'variant_{i}'
2615
+ elif len(required) == 1:
2616
+ choice_name = list(required)[0]
2617
+ else:
2618
+ # Sort for consistent naming
2619
+ sorted_props = sorted(required)
2620
+ choice_name = '_'.join(sorted_props[:2]) # Use first two properties for name
2621
+ if len(sorted_props) > 2:
2622
+ choice_name += '_etc'
2623
+
2624
+ # Use 'any' type for discriminated unions
2625
+ choice_mapping[choice_name] = {
2626
+ 'type': 'any',
2627
+ 'description': f'Choice variant requiring: {", ".join(sorted(required))}'
2628
+ }
2629
+
2630
+ if len(choice_mapping) == len(oneof_items) and len(choice_mapping) >= 2:
2631
+ return {
2632
+ 'type': 'choice',
2633
+ 'choices': choice_mapping,
2634
+ 'selector': None # Inline choice without explicit selector property
2635
+ }
2636
+
2637
+ return None
2638
+
2639
+ def _ensure_abstract_base_type(self, base_ref: str, abstract_base_name: str, structure_schema: dict, json_schema: dict, base_uri: str) -> None:
2640
+ """
2641
+ Ensure that an abstract base type exists for inheritance patterns.
2642
+
2643
+ Args:
2644
+ base_ref (str): The original $ref to the base type
2645
+ abstract_base_name (str): The name for the abstract base type
2646
+ structure_schema (dict): The structure schema being built
2647
+ json_schema (dict): The original JSON schema
2648
+ base_uri (str): The base URI for resolving references
2649
+ """
2650
+ # Ensure definitions section exists
2651
+ if 'definitions' not in structure_schema:
2652
+ structure_schema['definitions'] = {}
2653
+
2654
+ # If abstract base type already exists, don't recreate it
2655
+ if abstract_base_name in structure_schema['definitions']:
2656
+ return
2657
+
2658
+ # Guard against recursive abstract base type creation
2659
+ if not hasattr(self, '_creating_abstract_bases'):
2660
+ self._creating_abstract_bases = set()
2661
+
2662
+ if abstract_base_name in self._creating_abstract_bases:
2663
+ print(f"WARNING: Circular reference detected while creating abstract base type {abstract_base_name}")
2664
+ # Create a minimal abstract type to break the cycle
2665
+ structure_schema['definitions'][abstract_base_name] = {
2666
+ 'type': 'object',
2667
+ 'abstract': True,
2668
+ 'name': abstract_base_name,
2669
+ 'properties': {}
2670
+ }
2671
+ return
2672
+
2673
+ # Add to the guard set
2674
+ self._creating_abstract_bases.add(abstract_base_name)
2675
+
2676
+ try:
2677
+ # Resolve the original base type reference
2678
+ base_schema, _ = self.resolve_reference({'$ref': base_ref}, base_uri, json_schema)
2679
+
2680
+ # Convert the base type to structure format, but without triggering inheritance conversion
2681
+ # to avoid infinite recursion
2682
+ old_detect_inheritance = self.detect_inheritance
2683
+ old_preserve_composition = self.preserve_composition
2684
+ self.detect_inheritance = False # Temporarily disable inheritance detection
2685
+ self.preserve_composition = False # Force composition flattening for base types
2686
+
2687
+ try:
2688
+ base_structure = self.json_type_to_structure_type(
2689
+ base_schema, abstract_base_name, '', '', [], json_schema, base_uri,
2690
+ structure_schema, [], 1
2691
+ )
2692
+
2693
+ # Mark it as abstract
2694
+ if isinstance(base_structure, dict):
2695
+ base_structure['abstract'] = True
2696
+ base_structure['name'] = abstract_base_name
2697
+
2698
+ # Store the abstract base type
2699
+ structure_schema['definitions'][abstract_base_name] = base_structure
2700
+ elif isinstance(base_structure, str):
2701
+ # If the base resolves to a simple type, create an object wrapper
2702
+ structure_schema['definitions'][abstract_base_name] = {
2703
+ 'type': 'object',
2704
+ 'abstract': True,
2705
+ 'name': abstract_base_name,
2706
+ 'properties': {}
2707
+ }
2708
+
2709
+ finally:
2710
+ # Restore inheritance detection and composition settings
2711
+ self.detect_inheritance = old_detect_inheritance
2712
+ self.preserve_composition = old_preserve_composition
2713
+
2714
+ except Exception as e:
2715
+ # If we can't resolve the base type, create a minimal abstract type
2716
+ print(f"WARNING: Failed to create abstract base type {abstract_base_name}: {e}")
2717
+ structure_schema['definitions'][abstract_base_name] = {
2718
+ 'type': 'object',
2719
+ 'abstract': True,
2720
+ 'name': abstract_base_name,
2721
+ 'properties': {}
2722
+ }
2723
+ finally:
2724
+ # Remove from the guard set
2725
+ self._creating_abstract_bases.discard(abstract_base_name)
2726
+
2727
+ def create_pattern_union_maps(self, pattern_properties: dict, additional_props, record_name: str, namespace: str, dependencies: list, json_schema: dict, base_uri: str, structure_schema: dict, record_stack: list, recursion_depth: int = 1) -> dict:
2728
+ """
2729
+ Create a type union of maps for multiple patternProperties with optional additionalProperties.
2730
+ Each map in the union has a single pattern constraint.
2731
+ If additionalProperties is not False, creates an additional map for the fallback.
2732
+ Uses JSON Structure type union syntax: {"type": [map1, map2, ...]}
2733
+ All compound types are hoisted to /definitions and referenced via $ref.
2734
+
2735
+ Args:
2736
+ pattern_properties (dict): The patternProperties object with patterns as keys
2737
+ additional_props: The additionalProperties value (False, True, or schema dict)
2738
+ record_name (str): Name of the record
2739
+ namespace (str): Namespace
2740
+ dependencies (list): Dependencies list
2741
+ json_schema (dict): The full JSON schema
2742
+ base_uri (str): Base URI
2743
+ structure_schema (dict): Structure schema list
2744
+ record_stack (list): Record stack for recursion detection
2745
+ recursion_depth (int): Current recursion depth
2746
+
2747
+ Returns:
2748
+ dict: JSON Structure type union of maps using {"type": [...]} syntax
2749
+ """
2750
+ # Initialize definitions if it doesn't exist
2751
+ if 'definitions' not in structure_schema:
2752
+ structure_schema['definitions'] = {}
2753
+
2754
+ # Create a map for each pattern and hoist to definitions
2755
+ map_refs = []
2756
+ for idx, (pattern, values_schema) in enumerate(pattern_properties.items()):
2757
+ # Create map with pattern validation
2758
+ map_result = self.create_structure_map(
2759
+ values_schema, record_name, namespace, dependencies,
2760
+ json_schema, base_uri, structure_schema, record_stack, recursion_depth
2761
+ )
2762
+
2763
+ # Add keyNames validation for this specific pattern
2764
+ map_result['keyNames'] = {
2765
+ "type": "string",
2766
+ "pattern": pattern
2767
+ }
2768
+
2769
+ # Hoist this map to definitions and get a $ref
2770
+ pattern_safe = re.sub(r'[^a-zA-Z0-9_]', '_', pattern)
2771
+ map_name_hint = f"{record_name}_PatternMap_{pattern_safe}_{idx}"
2772
+ map_ref = self._hoist_definition(map_result, structure_schema, map_name_hint)
2773
+ map_refs.append(map_ref)
2774
+
2775
+ # If additionalProperties is not False, create an additional map for the fallback
2776
+ if additional_props is not False and additional_props is not None:
2777
+ if isinstance(additional_props, dict):
2778
+ # Create map for additionalProperties schema
2779
+ additional_map_result = self.create_structure_map(
2780
+ additional_props, record_name, namespace, dependencies,
2781
+ json_schema, base_uri, structure_schema, record_stack, recursion_depth
2782
+ )
2783
+
2784
+ # Add keyNames validation for catch-all pattern (any string)
2785
+ additional_map_result['keyNames'] = {
2786
+ "type": "string"
2787
+ }
2788
+
2789
+ # Hoist this map to definitions and get a $ref
2790
+ additional_map_name_hint = f"{record_name}_AdditionalMap"
2791
+ additional_map_ref = self._hoist_definition(additional_map_result, structure_schema, additional_map_name_hint)
2792
+ map_refs.append(additional_map_ref)
2793
+ elif additional_props is True:
2794
+ # additionalProperties: true means any type - create a map with any values
2795
+ any_map_result = {
2796
+ "type": "map",
2797
+ "keyNames": {
2798
+ "type": "string"
2799
+ },
2800
+ "values": "any"
2801
+ }
2802
+
2803
+ # Hoist this map to definitions and get a $ref
2804
+ any_map_name_hint = f"{record_name}_AnyMap"
2805
+ any_map_ref = self._hoist_definition(any_map_result, structure_schema, any_map_name_hint)
2806
+ map_refs.append(any_map_ref)
2807
+
2808
+ # Ensure $uses includes JSONStructureValidation
2809
+ self._ensure_validation_extension_in_structure_schema(structure_schema)
2810
+
2811
+ # Return type union using JSON Structure type array syntax with hoisted references
2812
+ return {
2813
+ "type": map_refs
2814
+ }
2815
+
2816
+ def convert_json_schema_to_structure(input_data: str, root_namespace: str = 'example.com', base_uri: str = '') -> str:
2817
+ """
2818
+ Converts a JSON Schema document to JSON Structure format.
2819
+
2820
+ Args:
2821
+ input_data (str): The JSON Schema document as a string.
2822
+ root_namespace (str): The namespace for the root schema. Defaults to 'example.com'.
2823
+ base_uri (str): The base URI for resolving references. Defaults to ''.
2824
+
2825
+ Returns:
2826
+ str: The converted JSON Structure document as a string.
2827
+ """
2828
+ converter = JsonToStructureConverter()
2829
+ converter.root_namespace = root_namespace
2830
+
2831
+ json_schema = json.loads(input_data)
2832
+
2833
+ # Convert the JSON Schema to JSON Structure
2834
+ result = converter.jsons_to_structure(json_schema, root_namespace, base_uri)
2835
+
2836
+ return json.dumps(result, indent=2)
2837
+
2838
+ def convert_json_schema_to_structure_files(
2839
+ json_schema_file_path: str,
2840
+ structure_schema_path: str,
2841
+ root_namespace = None
2842
+ ) -> None:
2843
+ """
2844
+ Convert a JSON Schema file to JSON Structure format.
2845
+
2846
+ Args:
2847
+ json_schema_file_path (str): Path to the input JSON Schema file
2848
+ structure_schema_path (str): Path to the output JSON Structure file
2849
+ root_namespace (str): The namespace for the root schema
2850
+ """
2851
+ # Use default namespace if None provided
2852
+ if root_namespace is None:
2853
+ root_namespace = 'example.com'
2854
+
2855
+ # Read the JSON Schema file
2856
+ with open(json_schema_file_path, 'r', encoding='utf-8') as f:
2857
+ schema_content = f.read()
2858
+
2859
+ # Convert to JSON Structure
2860
+ result = convert_json_schema_to_structure(schema_content, root_namespace)
2861
+
2862
+ # Write the result
2863
+ with open(structure_schema_path, 'w', encoding='utf-8') as f:
2864
+ f.write(result)