avrotize 2.21.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. avrotize/__init__.py +66 -0
  2. avrotize/__main__.py +6 -0
  3. avrotize/_version.py +34 -0
  4. avrotize/asn1toavro.py +160 -0
  5. avrotize/avrotize.py +152 -0
  6. avrotize/avrotocpp/CMakeLists.txt.jinja +77 -0
  7. avrotize/avrotocpp/build.bat.jinja +7 -0
  8. avrotize/avrotocpp/build.sh.jinja +7 -0
  9. avrotize/avrotocpp/dataclass_body.jinja +108 -0
  10. avrotize/avrotocpp/vcpkg.json.jinja +21 -0
  11. avrotize/avrotocpp.py +483 -0
  12. avrotize/avrotocsharp/README.md.jinja +166 -0
  13. avrotize/avrotocsharp/class_test.cs.jinja +266 -0
  14. avrotize/avrotocsharp/dataclass_core.jinja +293 -0
  15. avrotize/avrotocsharp/enum_test.cs.jinja +20 -0
  16. avrotize/avrotocsharp/project.csproj.jinja +30 -0
  17. avrotize/avrotocsharp/project.sln.jinja +34 -0
  18. avrotize/avrotocsharp/run_coverage.ps1.jinja +98 -0
  19. avrotize/avrotocsharp/run_coverage.sh.jinja +149 -0
  20. avrotize/avrotocsharp/testproject.csproj.jinja +19 -0
  21. avrotize/avrotocsharp.py +1180 -0
  22. avrotize/avrotocsv.py +121 -0
  23. avrotize/avrotodatapackage.py +173 -0
  24. avrotize/avrotodb.py +1383 -0
  25. avrotize/avrotogo/go_enum.jinja +12 -0
  26. avrotize/avrotogo/go_helpers.jinja +31 -0
  27. avrotize/avrotogo/go_struct.jinja +151 -0
  28. avrotize/avrotogo/go_test.jinja +47 -0
  29. avrotize/avrotogo/go_union.jinja +38 -0
  30. avrotize/avrotogo.py +476 -0
  31. avrotize/avrotographql.py +197 -0
  32. avrotize/avrotoiceberg.py +210 -0
  33. avrotize/avrotojava/class_test.java.jinja +212 -0
  34. avrotize/avrotojava/enum_test.java.jinja +21 -0
  35. avrotize/avrotojava/testproject.pom.jinja +54 -0
  36. avrotize/avrotojava.py +2156 -0
  37. avrotize/avrotojs.py +250 -0
  38. avrotize/avrotojsons.py +481 -0
  39. avrotize/avrotojstruct.py +345 -0
  40. avrotize/avrotokusto.py +364 -0
  41. avrotize/avrotomd/README.md.jinja +49 -0
  42. avrotize/avrotomd.py +137 -0
  43. avrotize/avrotools.py +168 -0
  44. avrotize/avrotoparquet.py +208 -0
  45. avrotize/avrotoproto.py +359 -0
  46. avrotize/avrotopython/dataclass_core.jinja +241 -0
  47. avrotize/avrotopython/enum_core.jinja +87 -0
  48. avrotize/avrotopython/pyproject_toml.jinja +18 -0
  49. avrotize/avrotopython/test_class.jinja +97 -0
  50. avrotize/avrotopython/test_enum.jinja +23 -0
  51. avrotize/avrotopython.py +626 -0
  52. avrotize/avrotorust/dataclass_enum.rs.jinja +74 -0
  53. avrotize/avrotorust/dataclass_struct.rs.jinja +204 -0
  54. avrotize/avrotorust/dataclass_union.rs.jinja +105 -0
  55. avrotize/avrotorust.py +435 -0
  56. avrotize/avrotots/class_core.ts.jinja +140 -0
  57. avrotize/avrotots/class_test.ts.jinja +77 -0
  58. avrotize/avrotots/enum_core.ts.jinja +46 -0
  59. avrotize/avrotots/gitignore.jinja +34 -0
  60. avrotize/avrotots/index.ts.jinja +0 -0
  61. avrotize/avrotots/package.json.jinja +23 -0
  62. avrotize/avrotots/tsconfig.json.jinja +21 -0
  63. avrotize/avrotots.py +687 -0
  64. avrotize/avrotoxsd.py +344 -0
  65. avrotize/cddltostructure.py +1841 -0
  66. avrotize/commands.json +3496 -0
  67. avrotize/common.py +834 -0
  68. avrotize/constants.py +87 -0
  69. avrotize/csvtoavro.py +132 -0
  70. avrotize/datapackagetoavro.py +76 -0
  71. avrotize/dependencies/cpp/vcpkg/vcpkg.json +19 -0
  72. avrotize/dependencies/cs/net90/dependencies.csproj +29 -0
  73. avrotize/dependencies/go/go121/go.mod +6 -0
  74. avrotize/dependencies/java/jdk21/pom.xml +91 -0
  75. avrotize/dependencies/python/py312/requirements.txt +13 -0
  76. avrotize/dependencies/rust/stable/Cargo.toml +17 -0
  77. avrotize/dependencies/typescript/node22/package.json +16 -0
  78. avrotize/dependency_resolver.py +348 -0
  79. avrotize/dependency_version.py +432 -0
  80. avrotize/generic/generic.avsc +57 -0
  81. avrotize/jsonstoavro.py +2167 -0
  82. avrotize/jsonstostructure.py +2864 -0
  83. avrotize/jstructtoavro.py +878 -0
  84. avrotize/kstructtoavro.py +93 -0
  85. avrotize/kustotoavro.py +455 -0
  86. avrotize/openapitostructure.py +717 -0
  87. avrotize/parquettoavro.py +157 -0
  88. avrotize/proto2parser.py +498 -0
  89. avrotize/proto3parser.py +403 -0
  90. avrotize/prototoavro.py +382 -0
  91. avrotize/prototypes/any.avsc +19 -0
  92. avrotize/prototypes/api.avsc +106 -0
  93. avrotize/prototypes/duration.avsc +20 -0
  94. avrotize/prototypes/field_mask.avsc +18 -0
  95. avrotize/prototypes/struct.avsc +60 -0
  96. avrotize/prototypes/timestamp.avsc +20 -0
  97. avrotize/prototypes/type.avsc +253 -0
  98. avrotize/prototypes/wrappers.avsc +117 -0
  99. avrotize/structuretocddl.py +597 -0
  100. avrotize/structuretocpp/CMakeLists.txt.jinja +76 -0
  101. avrotize/structuretocpp/build.bat.jinja +3 -0
  102. avrotize/structuretocpp/build.sh.jinja +3 -0
  103. avrotize/structuretocpp/dataclass_body.jinja +50 -0
  104. avrotize/structuretocpp/vcpkg.json.jinja +11 -0
  105. avrotize/structuretocpp.py +697 -0
  106. avrotize/structuretocsharp/class_test.cs.jinja +180 -0
  107. avrotize/structuretocsharp/dataclass_core.jinja +156 -0
  108. avrotize/structuretocsharp/enum_test.cs.jinja +36 -0
  109. avrotize/structuretocsharp/json_structure_converters.cs.jinja +399 -0
  110. avrotize/structuretocsharp/program.cs.jinja +49 -0
  111. avrotize/structuretocsharp/project.csproj.jinja +17 -0
  112. avrotize/structuretocsharp/project.sln.jinja +34 -0
  113. avrotize/structuretocsharp/testproject.csproj.jinja +18 -0
  114. avrotize/structuretocsharp/tuple_converter.cs.jinja +121 -0
  115. avrotize/structuretocsharp.py +2295 -0
  116. avrotize/structuretocsv.py +365 -0
  117. avrotize/structuretodatapackage.py +659 -0
  118. avrotize/structuretodb.py +1125 -0
  119. avrotize/structuretogo/go_enum.jinja +12 -0
  120. avrotize/structuretogo/go_helpers.jinja +26 -0
  121. avrotize/structuretogo/go_interface.jinja +18 -0
  122. avrotize/structuretogo/go_struct.jinja +187 -0
  123. avrotize/structuretogo/go_test.jinja +70 -0
  124. avrotize/structuretogo.py +729 -0
  125. avrotize/structuretographql.py +502 -0
  126. avrotize/structuretoiceberg.py +355 -0
  127. avrotize/structuretojava/choice_core.jinja +34 -0
  128. avrotize/structuretojava/class_core.jinja +23 -0
  129. avrotize/structuretojava/enum_core.jinja +18 -0
  130. avrotize/structuretojava/equals_hashcode.jinja +30 -0
  131. avrotize/structuretojava/pom.xml.jinja +26 -0
  132. avrotize/structuretojava/tuple_core.jinja +49 -0
  133. avrotize/structuretojava.py +938 -0
  134. avrotize/structuretojs/class_core.js.jinja +33 -0
  135. avrotize/structuretojs/enum_core.js.jinja +10 -0
  136. avrotize/structuretojs/package.json.jinja +12 -0
  137. avrotize/structuretojs/test_class.js.jinja +84 -0
  138. avrotize/structuretojs/test_enum.js.jinja +58 -0
  139. avrotize/structuretojs/test_runner.js.jinja +45 -0
  140. avrotize/structuretojs.py +657 -0
  141. avrotize/structuretojsons.py +498 -0
  142. avrotize/structuretokusto.py +639 -0
  143. avrotize/structuretomd/README.md.jinja +204 -0
  144. avrotize/structuretomd.py +322 -0
  145. avrotize/structuretoproto.py +764 -0
  146. avrotize/structuretopython/dataclass_core.jinja +363 -0
  147. avrotize/structuretopython/enum_core.jinja +45 -0
  148. avrotize/structuretopython/map_alias.jinja +21 -0
  149. avrotize/structuretopython/pyproject_toml.jinja +23 -0
  150. avrotize/structuretopython/test_class.jinja +103 -0
  151. avrotize/structuretopython/test_enum.jinja +34 -0
  152. avrotize/structuretopython.py +799 -0
  153. avrotize/structuretorust/dataclass_enum.rs.jinja +63 -0
  154. avrotize/structuretorust/dataclass_struct.rs.jinja +121 -0
  155. avrotize/structuretorust/dataclass_union.rs.jinja +81 -0
  156. avrotize/structuretorust.py +714 -0
  157. avrotize/structuretots/class_core.ts.jinja +78 -0
  158. avrotize/structuretots/enum_core.ts.jinja +6 -0
  159. avrotize/structuretots/gitignore.jinja +8 -0
  160. avrotize/structuretots/index.ts.jinja +1 -0
  161. avrotize/structuretots/package.json.jinja +39 -0
  162. avrotize/structuretots/test_class.ts.jinja +35 -0
  163. avrotize/structuretots/tsconfig.json.jinja +21 -0
  164. avrotize/structuretots.py +740 -0
  165. avrotize/structuretoxsd.py +679 -0
  166. avrotize/xsdtoavro.py +413 -0
  167. avrotize-2.21.1.dist-info/METADATA +1319 -0
  168. avrotize-2.21.1.dist-info/RECORD +171 -0
  169. avrotize-2.21.1.dist-info/WHEEL +4 -0
  170. avrotize-2.21.1.dist-info/entry_points.txt +3 -0
  171. avrotize-2.21.1.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,639 @@
1
+ """Converts a JSON Structure schema to a Kusto table schema."""
2
+
3
+ import json
4
+ import sys
5
+ from typing import Any, List, Optional, Dict, Union
6
+ from avrotize.common import build_flat_type_dict, inline_avro_references, strip_first_doc
7
+ from azure.kusto.data import KustoClient, KustoConnectionStringBuilder, ClientRequestProperties
8
+
9
+
10
+ class StructureToKusto:
11
+ """Converts a JSON Structure schema to a Kusto table schema."""
12
+
13
+ def __init__(self):
14
+ """Initializes a new instance of the StructureToKusto class."""
15
+ self.schema_registry: Dict[str, Dict] = {}
16
+ self.processed_types: set = set() # Track processed types to avoid duplicates
17
+
18
+ def resolve_ref(self, ref: str, context_schema: Optional[Dict] = None, schema_doc: Optional[Dict] = None) -> Optional[Dict]:
19
+ """Resolves a $ref to the actual schema definition"""
20
+ # Check if it's an absolute URI reference (schema with $id)
21
+ if not ref.startswith('#/'):
22
+ # Try to resolve from schema registry
23
+ if ref in self.schema_registry:
24
+ return self.schema_registry[ref]
25
+ return None
26
+
27
+ # Handle fragment-only references (internal to document)
28
+ path = ref[2:].split('/')
29
+ schema = context_schema if context_schema else schema_doc
30
+
31
+ for part in path:
32
+ if not isinstance(schema, dict) or part not in schema:
33
+ return None
34
+ schema = schema[part]
35
+
36
+ return schema
37
+
38
+ def register_schema_ids(self, schema: Dict, base_uri: str = '') -> None:
39
+ """Recursively registers schemas with $id keywords"""
40
+ if not isinstance(schema, dict):
41
+ return
42
+
43
+ # Register this schema if it has an $id
44
+ if '$id' in schema:
45
+ schema_id = schema['$id']
46
+ # Handle relative URIs
47
+ if base_uri and not schema_id.startswith(('http://', 'https://', 'urn:')):
48
+ from urllib.parse import urljoin
49
+ schema_id = urljoin(base_uri, schema_id)
50
+ self.schema_registry[schema_id] = schema
51
+ base_uri = schema_id # Update base URI for nested schemas
52
+
53
+ # Recursively process definitions
54
+ if 'definitions' in schema:
55
+ for def_name, def_schema in schema['definitions'].items():
56
+ if isinstance(def_schema, dict):
57
+ self.register_schema_ids(def_schema, base_uri)
58
+
59
+ # Recursively process properties
60
+ if 'properties' in schema:
61
+ for prop_name, prop_schema in schema['properties'].items():
62
+ if isinstance(prop_schema, dict):
63
+ self.register_schema_ids(prop_schema, base_uri)
64
+
65
+ # Recursively process items, values, etc.
66
+ for key in ['items', 'values', 'additionalProperties']:
67
+ if key in schema and isinstance(schema[key], dict):
68
+ self.register_schema_ids(schema[key], base_uri)
69
+
70
+ def flatten_inheritance(self, schema: Dict, schema_doc: Dict) -> Dict:
71
+ """
72
+ Flattens inheritance by merging properties from $extends base type.
73
+ Returns a new schema with all properties merged.
74
+ """
75
+ if '$extends' not in schema:
76
+ return schema
77
+
78
+ flattened = schema.copy()
79
+ base_ref = schema['$extends']
80
+
81
+ # Resolve the base schema
82
+ base_schema = self.resolve_ref(base_ref, schema_doc, schema_doc)
83
+ if not base_schema:
84
+ return flattened
85
+
86
+ # Recursively flatten the base (in case it also extends something)
87
+ flattened_base = self.flatten_inheritance(base_schema, schema_doc)
88
+
89
+ # Merge properties: base properties first, then derived (derived can override)
90
+ base_props = flattened_base.get('properties', {})
91
+ derived_props = schema.get('properties', {})
92
+
93
+ merged_props = {}
94
+ merged_props.update(base_props)
95
+ merged_props.update(derived_props)
96
+
97
+ flattened['properties'] = merged_props
98
+
99
+ # Merge required fields
100
+ base_required = flattened_base.get('required', [])
101
+ derived_required = schema.get('required', [])
102
+ if base_required or derived_required:
103
+ flattened['required'] = list(set(base_required + derived_required))
104
+
105
+ # Add comment about flattened inheritance
106
+ base_name = flattened_base.get('name', 'base type')
107
+ orig_desc = flattened.get('description', '')
108
+ if orig_desc:
109
+ flattened['description'] = f"{orig_desc} (flattened from {base_name})"
110
+ else:
111
+ flattened['description'] = f"Flattened from {base_name}"
112
+
113
+ # Remove $extends as it's now flattened
114
+ if '$extends' in flattened:
115
+ del flattened['$extends']
116
+
117
+ return flattened
118
+
119
+ def is_concrete_type(self, schema: Dict) -> bool:
120
+ """Check if a type is concrete (not abstract)."""
121
+ return not schema.get('abstract', False)
122
+
123
+ def find_all_object_types(self, schema: Dict, schema_doc: Dict) -> List[Dict]:
124
+ """
125
+ Find all concrete object types in the schema, including those in definitions.
126
+ Filters out abstract types and includes flattened versions of types with inheritance.
127
+ """
128
+ object_types = []
129
+
130
+ def process_schema(s: Dict, path: str = ""):
131
+ if not isinstance(s, dict):
132
+ return
133
+
134
+ # Check if this is an object type
135
+ if s.get('type') == 'object':
136
+ # Only include concrete types
137
+ if self.is_concrete_type(s):
138
+ # Flatten inheritance if present
139
+ flattened = self.flatten_inheritance(s, schema_doc)
140
+ object_types.append(flattened)
141
+
142
+ # Recursively process definitions
143
+ if 'definitions' in s:
144
+ for def_name, def_schema in s['definitions'].items():
145
+ if isinstance(def_schema, dict):
146
+ # Handle nested definitions
147
+ if def_schema.get('type') == 'object':
148
+ process_schema(def_schema, f"{path}/{def_name}")
149
+ else:
150
+ # Recurse into nested namespaces
151
+ for nested_key, nested_val in def_schema.items():
152
+ if isinstance(nested_val, dict):
153
+ process_schema(nested_val, f"{path}/{def_name}/{nested_key}")
154
+
155
+ # Process top-level schema
156
+ if isinstance(schema, dict):
157
+ if '$root' in schema:
158
+ root_ref = schema['$root']
159
+ root_schema = self.resolve_ref(root_ref, schema, schema)
160
+ if root_schema:
161
+ process_schema(root_schema)
162
+ elif 'type' in schema and schema['type'] == 'object':
163
+ process_schema(schema)
164
+
165
+ # Always process definitions
166
+ if 'definitions' in schema:
167
+ process_schema(schema)
168
+
169
+ elif isinstance(schema, list):
170
+ for s in schema:
171
+ if isinstance(s, dict):
172
+ process_schema(s)
173
+
174
+ return object_types
175
+
176
+ def convert_record_to_kusto(self, recordschema: dict, schema_doc: dict, emit_cloudevents_columns: bool, emit_cloudevents_dispatch_table: bool) -> List[str]:
177
+ """Converts a JSON Structure object schema to a Kusto table schema."""
178
+ # Get the name and fields of the top-level record
179
+ table_name = recordschema.get("name", "UnnamedTable")
180
+
181
+ # Handle properties from JSON Structure
182
+ properties = recordschema.get("properties", {})
183
+
184
+ # Create a StringBuilder to store the kusto statements
185
+ kusto = []
186
+
187
+ # Append the create table statement with the column names and types
188
+ kusto.append(f".create-merge table [{table_name}] (")
189
+ columns = []
190
+ for prop_name, prop_schema in properties.items():
191
+ column_name = prop_name
192
+ # Skip const fields - they will be documented but not create columns
193
+ if isinstance(prop_schema, dict) and 'const' in prop_schema:
194
+ continue
195
+ column_type = self.convert_structure_type_to_kusto_type(prop_schema, schema_doc)
196
+ columns.append(f" [{column_name}]: {column_type}")
197
+ if emit_cloudevents_columns:
198
+ columns.append(" [___type]: string")
199
+ columns.append(" [___source]: string")
200
+ columns.append(" [___id]: string")
201
+ columns.append(" [___time]: datetime")
202
+ columns.append(" [___subject]: string")
203
+ kusto.append(",\n".join(columns))
204
+ kusto.append(");")
205
+ kusto.append("")
206
+
207
+ # Add the doc string as table metadata
208
+ if "description" in recordschema or "doc" in recordschema:
209
+ doc_data = recordschema.get("description", recordschema.get("doc", ""))
210
+ doc_data = (doc_data[:997] + "...") if len(doc_data) > 1000 else doc_data
211
+
212
+ # Add notes about flattened features
213
+ notes = []
214
+ if '$extends' in recordschema:
215
+ notes.append("Note: Properties from base types have been flattened into this table.")
216
+ if recordschema.get('abstract', False):
217
+ notes.append("Warning: Abstract type - should not be instantiated directly.")
218
+
219
+ if notes:
220
+ doc_data = doc_data + " " + " ".join(notes)
221
+
222
+ doc_string = json.dumps(json.dumps({
223
+ "description": doc_data
224
+ }))
225
+ kusto.append(
226
+ f".alter table [{table_name}] docstring {doc_string};")
227
+ kusto.append("")
228
+
229
+ doc_string_statement = []
230
+ for prop_name, prop_schema in properties.items():
231
+ column_name = prop_name
232
+
233
+ # Handle const fields - document them but note they're const
234
+ if isinstance(prop_schema, dict) and 'const' in prop_schema:
235
+ const_value = prop_schema['const']
236
+ doc_data = prop_schema.get("description", prop_schema.get("doc", ""))
237
+ if doc_data:
238
+ doc_data = f"{doc_data} (const value: {json.dumps(const_value)})"
239
+ else:
240
+ doc_data = f"Constant field with value: {json.dumps(const_value)}"
241
+ doc_content = {"description": doc_data}
242
+ doc = json.dumps(json.dumps(doc_content))
243
+ # Add as comment - const fields are not stored in table
244
+ kusto.insert(len(kusto) - (2 if kusto and kusto[-1] == "" else 1),
245
+ f"-- Const field '{column_name}' with value: {json.dumps(const_value)}")
246
+ continue
247
+
248
+ if "description" in prop_schema or "doc" in prop_schema:
249
+ doc_data = prop_schema.get("description", prop_schema.get("doc", ""))
250
+ if len(doc_data) > 900:
251
+ doc_data = (doc_data[:897] + "...")
252
+ doc_content = {
253
+ "description": doc_data
254
+ }
255
+ # Include schema info for complex types
256
+ if isinstance(prop_schema, dict) and 'type' in prop_schema and prop_schema['type'] in ['object', 'array', 'map', 'set', 'choice', 'tuple']:
257
+ if (len(json.dumps(prop_schema)) + len(doc_data)) > 900:
258
+ doc_content["schema"] = '{ "doc": "Schema too large to inline. Please refer to the JSON Structure schema for more details." }'
259
+ else:
260
+ doc_content["schema"] = prop_schema
261
+ doc = json.dumps(json.dumps(doc_content))
262
+ doc_string_statement.append(f" [{column_name}]: {doc}")
263
+ if doc_string_statement and emit_cloudevents_columns:
264
+ doc_string_statement.extend([
265
+ " [___type] : 'Event type'",
266
+ " [___source]: 'Context origin/source of the event'",
267
+ " [___id]: 'Event identifier'",
268
+ " [___time]: 'Event generation time'",
269
+ " [___subject]: 'Context subject of the event'"
270
+ ])
271
+ if doc_string_statement:
272
+ kusto.append(f".alter table [{table_name}] column-docstrings (")
273
+ kusto.append(",\n".join(doc_string_statement))
274
+ kusto.append(");")
275
+ kusto.append("")
276
+
277
+ # add the JSON mapping for the table
278
+ kusto.append(
279
+ f".create-or-alter table [{table_name}] ingestion json mapping \"{table_name}_json_flat\"")
280
+ kusto.append("```\n[")
281
+ if emit_cloudevents_columns:
282
+ kusto.append(" {\"column\": \"___type\", \"path\": \"$.type\"},")
283
+ kusto.append(
284
+ " {\"column\": \"___source\", \"path\": \"$.source\"},")
285
+ kusto.append(" {\"column\": \"___id\", \"path\": \"$.id\"},")
286
+ kusto.append(" {\"column\": \"___time\", \"path\": \"$.time\"},")
287
+ kusto.append(
288
+ " {\"column\": \"___subject\", \"path\": \"$.subject\"},")
289
+ for prop_name, prop_schema in properties.items():
290
+ # Skip const fields in JSON mapping since they're not stored as columns
291
+ if isinstance(prop_schema, dict) and 'const' in prop_schema:
292
+ continue
293
+ column_name = prop_name
294
+ kusto.append(
295
+ f" {{\"column\": \"{column_name}\", \"path\": \"$.{prop_name}\"}},")
296
+ kusto.append("]\n```\n\n")
297
+
298
+ if emit_cloudevents_columns:
299
+ kusto.append(
300
+ f".create-or-alter table [{table_name}] ingestion json mapping \"{table_name}_json_ce_structured\"")
301
+ kusto.append("```\n[")
302
+ kusto.append(" {\"column\": \"___type\", \"path\": \"$.type\"},")
303
+ kusto.append(
304
+ " {\"column\": \"___source\", \"path\": \"$.source\"},")
305
+ kusto.append(" {\"column\": \"___id\", \"path\": \"$.id\"},")
306
+ kusto.append(" {\"column\": \"___time\", \"path\": \"$.time\"},")
307
+ kusto.append(
308
+ " {\"column\": \"___subject\", \"path\": \"$.subject\"},")
309
+ for prop_name, prop_schema in properties.items():
310
+ # Skip const fields in JSON mapping since they're not stored as columns
311
+ if isinstance(prop_schema, dict) and 'const' in prop_schema:
312
+ continue
313
+ column_name = prop_name
314
+ kusto.append(
315
+ f" {{\"column\": \"{column_name}\", \"path\": \"$.data.{prop_name}\"}},")
316
+ kusto.append("]\n```\n\n")
317
+
318
+ if emit_cloudevents_columns:
319
+ kusto.append(
320
+ f".drop materialized-view {table_name}Latest ifexists;")
321
+ kusto.append("")
322
+ kusto.append(
323
+ f".create materialized-view with (backfill=true) {table_name}Latest on table {table_name} {{")
324
+ kusto.append(
325
+ f" {table_name} | summarize arg_max(___time, *) by ___type, ___source, ___subject")
326
+ kusto.append("}")
327
+ kusto.append("")
328
+
329
+ if emit_cloudevents_dispatch_table:
330
+ namespace = recordschema.get("namespace", "")
331
+ event_type = namespace + "." + table_name if namespace else table_name
332
+
333
+ query = f"_cloudevents_dispatch | where (specversion == '1.0' and type == '{event_type}') | " + \
334
+ "project"
335
+ for prop_name, prop_schema in properties.items():
336
+ column_name = prop_name
337
+ column_type = self.convert_structure_type_to_kusto_type(prop_schema, schema_doc)
338
+ query += f"['{column_name}'] = to{column_type}(data.['{column_name}']),"
339
+ query += "___type = type,___source = source,___id = ['id'],___time = ['time'],___subject = subject"
340
+
341
+ # build an update policy for the table that gets triggered by updates to the dispatch table and extracts the event
342
+ kusto.append(f".alter table [{table_name}] policy update")
343
+ kusto.append("```")
344
+ kusto.append("[{")
345
+ kusto.append(" \"IsEnabled\": true,")
346
+ kusto.append(" \"Source\": \"_cloudevents_dispatch\",")
347
+ kusto.append(
348
+ f" \"Query\": \"{query}\",")
349
+ kusto.append(" \"IsTransactional\": false,")
350
+ kusto.append(" \"PropagateIngestionProperties\": true,")
351
+ kusto.append("}]")
352
+ kusto.append("```\n")
353
+
354
+ return kusto
355
+
356
+ def convert_structure_to_kusto_script(self, structure_schema_path, structure_record_type, emit_cloudevents_columns=False, emit_cloudevents_dispatch_table=False) -> str:
357
+ """Converts a JSON Structure schema to a Kusto table schema."""
358
+ if emit_cloudevents_dispatch_table:
359
+ emit_cloudevents_columns = True
360
+ schema_file = structure_schema_path
361
+ if not schema_file:
362
+ print("Please specify the JSON Structure schema file")
363
+ sys.exit(1)
364
+ with open(schema_file, "r", encoding="utf-8") as f:
365
+ schema_json = f.read()
366
+
367
+ # Parse the schema as a JSON object
368
+ schema = json.loads(schema_json)
369
+
370
+ # Register schema IDs for $ref resolution
371
+ if isinstance(schema, dict):
372
+ self.register_schema_ids(schema)
373
+
374
+ # Handle root-level array of schemas
375
+ if isinstance(schema, list):
376
+ for s in schema:
377
+ if isinstance(s, dict):
378
+ self.register_schema_ids(s)
379
+
380
+ # Find the record(s) to convert
381
+ record_schemas = []
382
+ schema_doc = None
383
+
384
+ if isinstance(schema, list):
385
+ schema_doc = schema[0] if schema else {}
386
+ if structure_record_type:
387
+ record_schema = next(
388
+ (x for x in schema if isinstance(x, dict) and x.get("name") == structure_record_type), None)
389
+ if record_schema is None:
390
+ print(
391
+ f"No record type {structure_record_type} found in the JSON Structure schema")
392
+ sys.exit(1)
393
+ # Flatten inheritance if present
394
+ record_schemas = [self.flatten_inheritance(record_schema, schema_doc)]
395
+ else:
396
+ # Find all concrete object types
397
+ all_types = self.find_all_object_types(schema, schema_doc)
398
+ if all_types:
399
+ record_schemas = all_types
400
+ else:
401
+ # Fallback to first object type
402
+ record_schema = next(
403
+ (x for x in schema if isinstance(x, dict) and x.get("type") == "object"), None)
404
+ if record_schema:
405
+ record_schemas = [self.flatten_inheritance(record_schema, schema_doc)]
406
+ elif isinstance(schema, dict):
407
+ schema_doc = schema
408
+ # Check for $root reference
409
+ if '$root' in schema:
410
+ root_ref = schema['$root']
411
+ record_schema = self.resolve_ref(root_ref, schema, schema)
412
+ if record_schema:
413
+ # Flatten inheritance
414
+ record_schemas = [self.flatten_inheritance(record_schema, schema_doc)]
415
+ elif 'type' in schema and schema['type'] == 'object':
416
+ # Flatten inheritance
417
+ record_schemas = [self.flatten_inheritance(schema, schema_doc)]
418
+ elif not structure_record_type:
419
+ # Find all concrete object types in definitions
420
+ all_types = self.find_all_object_types(schema, schema_doc)
421
+ if all_types:
422
+ record_schemas = all_types
423
+ else:
424
+ # Look for object types in definitions (old fallback logic)
425
+ if 'definitions' in schema:
426
+ defs = schema['definitions']
427
+ for def_key, def_val in defs.items():
428
+ if isinstance(def_val, dict):
429
+ # Navigate nested definitions
430
+ for nested_key, nested_val in def_val.items():
431
+ if isinstance(nested_val, dict) and nested_val.get('type') == 'object':
432
+ if structure_record_type and nested_val.get('name') == structure_record_type:
433
+ record_schemas = [self.flatten_inheritance(nested_val, schema_doc)]
434
+ break
435
+ elif not structure_record_type and self.is_concrete_type(nested_val):
436
+ record_schemas.append(self.flatten_inheritance(nested_val, schema_doc))
437
+ if record_schemas and structure_record_type:
438
+ break
439
+ else:
440
+ # Look for specific record type in definitions
441
+ if 'definitions' in schema:
442
+ defs = schema['definitions']
443
+ for def_key, def_val in defs.items():
444
+ if isinstance(def_val, dict):
445
+ for nested_key, nested_val in def_val.items():
446
+ if isinstance(nested_val, dict) and nested_val.get('name') == structure_record_type:
447
+ record_schemas = [self.flatten_inheritance(nested_val, schema_doc)]
448
+ break
449
+ if record_schemas:
450
+ break
451
+
452
+ if not record_schemas:
453
+ print("Expected a JSON Structure schema with a root object type or a $root reference")
454
+ sys.exit(1)
455
+
456
+ kusto_script = []
457
+
458
+ if emit_cloudevents_dispatch_table:
459
+ kusto_script.append(
460
+ ".create-merge table [_cloudevents_dispatch] (")
461
+ kusto_script.append(" [specversion]: string,")
462
+ kusto_script.append(" [type]: string,")
463
+ kusto_script.append(" [source]: string,")
464
+ kusto_script.append(" [id]: string,")
465
+ kusto_script.append(" [time]: datetime,")
466
+ kusto_script.append(" [subject]: string,")
467
+ kusto_script.append(" [datacontenttype]: string,")
468
+ kusto_script.append(" [dataschema]: string,")
469
+ kusto_script.append(" [data]: dynamic")
470
+ kusto_script.append(");\n\n")
471
+ kusto_script.append(
472
+ ".create-or-alter table [_cloudevents_dispatch] ingestion json mapping \"_cloudevents_dispatch_json\"")
473
+ kusto_script.append("```\n[")
474
+ kusto_script.append(
475
+ " {\"column\": \"specversion\", \"path\": \"$.specversion\"},")
476
+ kusto_script.append(
477
+ " {\"column\": \"type\", \"path\": \"$.type\"},")
478
+ kusto_script.append(
479
+ " {\"column\": \"source\", \"path\": \"$.source\"},")
480
+ kusto_script.append(" {\"column\": \"id\", \"path\": \"$.id\"},")
481
+ kusto_script.append(
482
+ " {\"column\": \"time\", \"path\": \"$.time\"},")
483
+ kusto_script.append(
484
+ " {\"column\": \"subject\", \"path\": \"$.subject\"},")
485
+ kusto_script.append(
486
+ " {\"column\": \"datacontenttype\", \"path\": \"$.datacontenttype\"},")
487
+ kusto_script.append(
488
+ " {\"column\": \"dataschema\", \"path\": \"$.dataschema\"},")
489
+ kusto_script.append(
490
+ " {\"column\": \"data\", \"path\": \"$.data\"}")
491
+ kusto_script.append("]\n```\n\n")
492
+
493
+ # Convert each record schema to Kusto
494
+ for record_schema in record_schemas:
495
+ if not isinstance(record_schema, dict) or "type" not in record_schema or record_schema["type"] != "object":
496
+ continue
497
+
498
+ # Skip abstract types that somehow made it through
499
+ if not self.is_concrete_type(record_schema):
500
+ continue
501
+
502
+ kusto_script.extend(self.convert_record_to_kusto(
503
+ record_schema, schema_doc, emit_cloudevents_columns, emit_cloudevents_dispatch_table))
504
+
505
+ # Join and clean up extra blank lines at the end
506
+ result = "\n".join(kusto_script)
507
+ # Remove trailing whitespace while preserving intentional blank lines
508
+ return result.rstrip() + "\n" if result else ""
509
+
510
+ def convert_structure_to_kusto_file(self, structure_schema_path, structure_record_type, kusto_file_path, emit_cloudevents_columns=False, emit_cloudevents_dispatch_table=False):
511
+ """Converts a JSON Structure schema to a Kusto table schema."""
512
+ script = self.convert_structure_to_kusto_script(
513
+ structure_schema_path, structure_record_type, emit_cloudevents_columns, emit_cloudevents_dispatch_table)
514
+ with open(kusto_file_path, "w", encoding="utf-8") as kusto_file:
515
+ kusto_file.write(script)
516
+
517
+ def convert_structure_type_to_kusto_type(self, structure_type: Union[str, dict, list], schema_doc: Optional[Dict] = None) -> str:
518
+ """Converts a JSON Structure type to a Kusto type."""
519
+ if isinstance(structure_type, list):
520
+ # Handle type unions
521
+ non_null_types = [t for t in structure_type if t != 'null']
522
+ if len(non_null_types) == 0:
523
+ return "dynamic"
524
+ elif len(non_null_types) == 1:
525
+ return self.convert_structure_type_to_kusto_type(non_null_types[0], schema_doc)
526
+ else:
527
+ # Multiple non-null types - use dynamic
528
+ return "dynamic"
529
+ elif isinstance(structure_type, dict):
530
+ # Handle $ref
531
+ if '$ref' in structure_type:
532
+ ref_schema = self.resolve_ref(structure_type['$ref'], schema_doc, schema_doc)
533
+ if ref_schema:
534
+ return self.convert_structure_type_to_kusto_type(ref_schema, schema_doc)
535
+ return "dynamic"
536
+
537
+ # Handle enum keyword
538
+ if 'enum' in structure_type:
539
+ # Enums map to string in Kusto
540
+ return "string"
541
+
542
+ # Handle type keyword
543
+ if 'type' not in structure_type:
544
+ return "dynamic"
545
+
546
+ struct_type = structure_type['type']
547
+
548
+ # Handle complex types
549
+ if struct_type in ['object', 'array', 'set', 'map', 'choice', 'tuple']:
550
+ return "dynamic"
551
+ else:
552
+ return self.map_primitive_type(struct_type)
553
+ elif isinstance(structure_type, str):
554
+ return self.map_primitive_type(structure_type)
555
+
556
+ return "dynamic"
557
+
558
+ def map_primitive_type(self, type_value: str) -> str:
559
+ """Maps a JSON Structure primitive type to a Kusto scalar type."""
560
+ mapping = {
561
+ # JSON primitive types
562
+ 'null': 'dynamic',
563
+ 'boolean': 'bool',
564
+ 'string': 'string',
565
+ 'integer': 'int',
566
+ 'number': 'real',
567
+
568
+ # Extended integer types
569
+ 'int8': 'int',
570
+ 'uint8': 'int',
571
+ 'int16': 'int',
572
+ 'uint16': 'int',
573
+ 'int32': 'int',
574
+ 'uint32': 'long', # uint32 can exceed int range
575
+ 'int64': 'long',
576
+ 'uint64': 'long',
577
+ 'int128': 'decimal', # Use decimal for very large integers
578
+ 'uint128': 'decimal',
579
+
580
+ # Extended float types
581
+ 'float8': 'real',
582
+ 'float': 'real',
583
+ 'double': 'real',
584
+ 'binary32': 'real',
585
+ 'binary64': 'real',
586
+ 'decimal': 'decimal',
587
+
588
+ # Binary
589
+ 'binary': 'dynamic',
590
+
591
+ # Date/time types
592
+ 'date': 'datetime',
593
+ 'time': 'timespan',
594
+ 'datetime': 'datetime',
595
+ 'timestamp': 'datetime',
596
+ 'duration': 'timespan',
597
+
598
+ # Other types
599
+ 'uuid': 'guid',
600
+ 'uri': 'string',
601
+ 'jsonpointer': 'string',
602
+ 'any': 'dynamic'
603
+ }
604
+
605
+ return mapping.get(type_value, 'dynamic')
606
+
607
+
608
+ def convert_structure_to_kusto_file(structure_schema_path, structure_record_type, kusto_file_path, emit_cloudevents_columns=False, emit_cloudevents_dispatch_table=False):
609
+ """Converts a JSON Structure schema to a Kusto table schema."""
610
+ structure_to_kusto = StructureToKusto()
611
+ structure_to_kusto.convert_structure_to_kusto_file(
612
+ structure_schema_path, structure_record_type, kusto_file_path, emit_cloudevents_columns, emit_cloudevents_dispatch_table)
613
+
614
+
615
+ def convert_structure_to_kusto_db(structure_schema_path, structure_record_type, kusto_uri, kusto_database, emit_cloudevents_columns=False, emit_cloudevents_dispatch_table=False, token_provider=None):
616
+ """Converts a JSON Structure schema to a Kusto table schema."""
617
+ structure_to_kusto = StructureToKusto()
618
+ script = structure_to_kusto.convert_structure_to_kusto_script(
619
+ structure_schema_path, structure_record_type, emit_cloudevents_columns, emit_cloudevents_dispatch_table)
620
+ kcsb = KustoConnectionStringBuilder.with_az_cli_authentication(
621
+ kusto_uri) if not token_provider else KustoConnectionStringBuilder.with_token_provider(kusto_uri, token_provider)
622
+ client = KustoClient(kcsb)
623
+ for statement in script.split("\n\n"):
624
+ if statement.strip():
625
+ try:
626
+ client.execute_mgmt(kusto_database, statement)
627
+ except Exception as e:
628
+ print(e)
629
+ sys.exit(1)
630
+
631
+
632
+ def convert_structure_to_kusto(structure_schema_path, structure_record_type, kusto_file_path, kusto_uri, kusto_database, emit_cloudevents_columns=False, emit_cloudevents_dispatch_table=False, token_provider=None):
633
+ """Converts a JSON Structure schema to a Kusto table schema."""
634
+ if not kusto_uri and not kusto_database:
635
+ convert_structure_to_kusto_file(
636
+ structure_schema_path, structure_record_type, kusto_file_path, emit_cloudevents_columns, emit_cloudevents_dispatch_table)
637
+ else:
638
+ convert_structure_to_kusto_db(
639
+ structure_schema_path, structure_record_type, kusto_uri, kusto_database, emit_cloudevents_columns, emit_cloudevents_dispatch_table, token_provider)