avrotize 2.21.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- avrotize/__init__.py +66 -0
- avrotize/__main__.py +6 -0
- avrotize/_version.py +34 -0
- avrotize/asn1toavro.py +160 -0
- avrotize/avrotize.py +152 -0
- avrotize/avrotocpp/CMakeLists.txt.jinja +77 -0
- avrotize/avrotocpp/build.bat.jinja +7 -0
- avrotize/avrotocpp/build.sh.jinja +7 -0
- avrotize/avrotocpp/dataclass_body.jinja +108 -0
- avrotize/avrotocpp/vcpkg.json.jinja +21 -0
- avrotize/avrotocpp.py +483 -0
- avrotize/avrotocsharp/README.md.jinja +166 -0
- avrotize/avrotocsharp/class_test.cs.jinja +266 -0
- avrotize/avrotocsharp/dataclass_core.jinja +293 -0
- avrotize/avrotocsharp/enum_test.cs.jinja +20 -0
- avrotize/avrotocsharp/project.csproj.jinja +30 -0
- avrotize/avrotocsharp/project.sln.jinja +34 -0
- avrotize/avrotocsharp/run_coverage.ps1.jinja +98 -0
- avrotize/avrotocsharp/run_coverage.sh.jinja +149 -0
- avrotize/avrotocsharp/testproject.csproj.jinja +19 -0
- avrotize/avrotocsharp.py +1180 -0
- avrotize/avrotocsv.py +121 -0
- avrotize/avrotodatapackage.py +173 -0
- avrotize/avrotodb.py +1383 -0
- avrotize/avrotogo/go_enum.jinja +12 -0
- avrotize/avrotogo/go_helpers.jinja +31 -0
- avrotize/avrotogo/go_struct.jinja +151 -0
- avrotize/avrotogo/go_test.jinja +47 -0
- avrotize/avrotogo/go_union.jinja +38 -0
- avrotize/avrotogo.py +476 -0
- avrotize/avrotographql.py +197 -0
- avrotize/avrotoiceberg.py +210 -0
- avrotize/avrotojava/class_test.java.jinja +212 -0
- avrotize/avrotojava/enum_test.java.jinja +21 -0
- avrotize/avrotojava/testproject.pom.jinja +54 -0
- avrotize/avrotojava.py +2156 -0
- avrotize/avrotojs.py +250 -0
- avrotize/avrotojsons.py +481 -0
- avrotize/avrotojstruct.py +345 -0
- avrotize/avrotokusto.py +364 -0
- avrotize/avrotomd/README.md.jinja +49 -0
- avrotize/avrotomd.py +137 -0
- avrotize/avrotools.py +168 -0
- avrotize/avrotoparquet.py +208 -0
- avrotize/avrotoproto.py +359 -0
- avrotize/avrotopython/dataclass_core.jinja +241 -0
- avrotize/avrotopython/enum_core.jinja +87 -0
- avrotize/avrotopython/pyproject_toml.jinja +18 -0
- avrotize/avrotopython/test_class.jinja +97 -0
- avrotize/avrotopython/test_enum.jinja +23 -0
- avrotize/avrotopython.py +626 -0
- avrotize/avrotorust/dataclass_enum.rs.jinja +74 -0
- avrotize/avrotorust/dataclass_struct.rs.jinja +204 -0
- avrotize/avrotorust/dataclass_union.rs.jinja +105 -0
- avrotize/avrotorust.py +435 -0
- avrotize/avrotots/class_core.ts.jinja +140 -0
- avrotize/avrotots/class_test.ts.jinja +77 -0
- avrotize/avrotots/enum_core.ts.jinja +46 -0
- avrotize/avrotots/gitignore.jinja +34 -0
- avrotize/avrotots/index.ts.jinja +0 -0
- avrotize/avrotots/package.json.jinja +23 -0
- avrotize/avrotots/tsconfig.json.jinja +21 -0
- avrotize/avrotots.py +687 -0
- avrotize/avrotoxsd.py +344 -0
- avrotize/cddltostructure.py +1841 -0
- avrotize/commands.json +3496 -0
- avrotize/common.py +834 -0
- avrotize/constants.py +87 -0
- avrotize/csvtoavro.py +132 -0
- avrotize/datapackagetoavro.py +76 -0
- avrotize/dependencies/cpp/vcpkg/vcpkg.json +19 -0
- avrotize/dependencies/cs/net90/dependencies.csproj +29 -0
- avrotize/dependencies/go/go121/go.mod +6 -0
- avrotize/dependencies/java/jdk21/pom.xml +91 -0
- avrotize/dependencies/python/py312/requirements.txt +13 -0
- avrotize/dependencies/rust/stable/Cargo.toml +17 -0
- avrotize/dependencies/typescript/node22/package.json +16 -0
- avrotize/dependency_resolver.py +348 -0
- avrotize/dependency_version.py +432 -0
- avrotize/generic/generic.avsc +57 -0
- avrotize/jsonstoavro.py +2167 -0
- avrotize/jsonstostructure.py +2864 -0
- avrotize/jstructtoavro.py +878 -0
- avrotize/kstructtoavro.py +93 -0
- avrotize/kustotoavro.py +455 -0
- avrotize/openapitostructure.py +717 -0
- avrotize/parquettoavro.py +157 -0
- avrotize/proto2parser.py +498 -0
- avrotize/proto3parser.py +403 -0
- avrotize/prototoavro.py +382 -0
- avrotize/prototypes/any.avsc +19 -0
- avrotize/prototypes/api.avsc +106 -0
- avrotize/prototypes/duration.avsc +20 -0
- avrotize/prototypes/field_mask.avsc +18 -0
- avrotize/prototypes/struct.avsc +60 -0
- avrotize/prototypes/timestamp.avsc +20 -0
- avrotize/prototypes/type.avsc +253 -0
- avrotize/prototypes/wrappers.avsc +117 -0
- avrotize/structuretocddl.py +597 -0
- avrotize/structuretocpp/CMakeLists.txt.jinja +76 -0
- avrotize/structuretocpp/build.bat.jinja +3 -0
- avrotize/structuretocpp/build.sh.jinja +3 -0
- avrotize/structuretocpp/dataclass_body.jinja +50 -0
- avrotize/structuretocpp/vcpkg.json.jinja +11 -0
- avrotize/structuretocpp.py +697 -0
- avrotize/structuretocsharp/class_test.cs.jinja +180 -0
- avrotize/structuretocsharp/dataclass_core.jinja +156 -0
- avrotize/structuretocsharp/enum_test.cs.jinja +36 -0
- avrotize/structuretocsharp/json_structure_converters.cs.jinja +399 -0
- avrotize/structuretocsharp/program.cs.jinja +49 -0
- avrotize/structuretocsharp/project.csproj.jinja +17 -0
- avrotize/structuretocsharp/project.sln.jinja +34 -0
- avrotize/structuretocsharp/testproject.csproj.jinja +18 -0
- avrotize/structuretocsharp/tuple_converter.cs.jinja +121 -0
- avrotize/structuretocsharp.py +2295 -0
- avrotize/structuretocsv.py +365 -0
- avrotize/structuretodatapackage.py +659 -0
- avrotize/structuretodb.py +1125 -0
- avrotize/structuretogo/go_enum.jinja +12 -0
- avrotize/structuretogo/go_helpers.jinja +26 -0
- avrotize/structuretogo/go_interface.jinja +18 -0
- avrotize/structuretogo/go_struct.jinja +187 -0
- avrotize/structuretogo/go_test.jinja +70 -0
- avrotize/structuretogo.py +729 -0
- avrotize/structuretographql.py +502 -0
- avrotize/structuretoiceberg.py +355 -0
- avrotize/structuretojava/choice_core.jinja +34 -0
- avrotize/structuretojava/class_core.jinja +23 -0
- avrotize/structuretojava/enum_core.jinja +18 -0
- avrotize/structuretojava/equals_hashcode.jinja +30 -0
- avrotize/structuretojava/pom.xml.jinja +26 -0
- avrotize/structuretojava/tuple_core.jinja +49 -0
- avrotize/structuretojava.py +938 -0
- avrotize/structuretojs/class_core.js.jinja +33 -0
- avrotize/structuretojs/enum_core.js.jinja +10 -0
- avrotize/structuretojs/package.json.jinja +12 -0
- avrotize/structuretojs/test_class.js.jinja +84 -0
- avrotize/structuretojs/test_enum.js.jinja +58 -0
- avrotize/structuretojs/test_runner.js.jinja +45 -0
- avrotize/structuretojs.py +657 -0
- avrotize/structuretojsons.py +498 -0
- avrotize/structuretokusto.py +639 -0
- avrotize/structuretomd/README.md.jinja +204 -0
- avrotize/structuretomd.py +322 -0
- avrotize/structuretoproto.py +764 -0
- avrotize/structuretopython/dataclass_core.jinja +363 -0
- avrotize/structuretopython/enum_core.jinja +45 -0
- avrotize/structuretopython/map_alias.jinja +21 -0
- avrotize/structuretopython/pyproject_toml.jinja +23 -0
- avrotize/structuretopython/test_class.jinja +103 -0
- avrotize/structuretopython/test_enum.jinja +34 -0
- avrotize/structuretopython.py +799 -0
- avrotize/structuretorust/dataclass_enum.rs.jinja +63 -0
- avrotize/structuretorust/dataclass_struct.rs.jinja +121 -0
- avrotize/structuretorust/dataclass_union.rs.jinja +81 -0
- avrotize/structuretorust.py +714 -0
- avrotize/structuretots/class_core.ts.jinja +78 -0
- avrotize/structuretots/enum_core.ts.jinja +6 -0
- avrotize/structuretots/gitignore.jinja +8 -0
- avrotize/structuretots/index.ts.jinja +1 -0
- avrotize/structuretots/package.json.jinja +39 -0
- avrotize/structuretots/test_class.ts.jinja +35 -0
- avrotize/structuretots/tsconfig.json.jinja +21 -0
- avrotize/structuretots.py +740 -0
- avrotize/structuretoxsd.py +679 -0
- avrotize/xsdtoavro.py +413 -0
- avrotize-2.21.1.dist-info/METADATA +1319 -0
- avrotize-2.21.1.dist-info/RECORD +171 -0
- avrotize-2.21.1.dist-info/WHEEL +4 -0
- avrotize-2.21.1.dist-info/entry_points.txt +3 -0
- avrotize-2.21.1.dist-info/licenses/LICENSE +201 -0
|
@@ -0,0 +1,639 @@
|
|
|
1
|
+
"""Converts a JSON Structure schema to a Kusto table schema."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import sys
|
|
5
|
+
from typing import Any, List, Optional, Dict, Union
|
|
6
|
+
from avrotize.common import build_flat_type_dict, inline_avro_references, strip_first_doc
|
|
7
|
+
from azure.kusto.data import KustoClient, KustoConnectionStringBuilder, ClientRequestProperties
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class StructureToKusto:
|
|
11
|
+
"""Converts a JSON Structure schema to a Kusto table schema."""
|
|
12
|
+
|
|
13
|
+
def __init__(self):
|
|
14
|
+
"""Initializes a new instance of the StructureToKusto class."""
|
|
15
|
+
self.schema_registry: Dict[str, Dict] = {}
|
|
16
|
+
self.processed_types: set = set() # Track processed types to avoid duplicates
|
|
17
|
+
|
|
18
|
+
def resolve_ref(self, ref: str, context_schema: Optional[Dict] = None, schema_doc: Optional[Dict] = None) -> Optional[Dict]:
|
|
19
|
+
"""Resolves a $ref to the actual schema definition"""
|
|
20
|
+
# Check if it's an absolute URI reference (schema with $id)
|
|
21
|
+
if not ref.startswith('#/'):
|
|
22
|
+
# Try to resolve from schema registry
|
|
23
|
+
if ref in self.schema_registry:
|
|
24
|
+
return self.schema_registry[ref]
|
|
25
|
+
return None
|
|
26
|
+
|
|
27
|
+
# Handle fragment-only references (internal to document)
|
|
28
|
+
path = ref[2:].split('/')
|
|
29
|
+
schema = context_schema if context_schema else schema_doc
|
|
30
|
+
|
|
31
|
+
for part in path:
|
|
32
|
+
if not isinstance(schema, dict) or part not in schema:
|
|
33
|
+
return None
|
|
34
|
+
schema = schema[part]
|
|
35
|
+
|
|
36
|
+
return schema
|
|
37
|
+
|
|
38
|
+
def register_schema_ids(self, schema: Dict, base_uri: str = '') -> None:
|
|
39
|
+
"""Recursively registers schemas with $id keywords"""
|
|
40
|
+
if not isinstance(schema, dict):
|
|
41
|
+
return
|
|
42
|
+
|
|
43
|
+
# Register this schema if it has an $id
|
|
44
|
+
if '$id' in schema:
|
|
45
|
+
schema_id = schema['$id']
|
|
46
|
+
# Handle relative URIs
|
|
47
|
+
if base_uri and not schema_id.startswith(('http://', 'https://', 'urn:')):
|
|
48
|
+
from urllib.parse import urljoin
|
|
49
|
+
schema_id = urljoin(base_uri, schema_id)
|
|
50
|
+
self.schema_registry[schema_id] = schema
|
|
51
|
+
base_uri = schema_id # Update base URI for nested schemas
|
|
52
|
+
|
|
53
|
+
# Recursively process definitions
|
|
54
|
+
if 'definitions' in schema:
|
|
55
|
+
for def_name, def_schema in schema['definitions'].items():
|
|
56
|
+
if isinstance(def_schema, dict):
|
|
57
|
+
self.register_schema_ids(def_schema, base_uri)
|
|
58
|
+
|
|
59
|
+
# Recursively process properties
|
|
60
|
+
if 'properties' in schema:
|
|
61
|
+
for prop_name, prop_schema in schema['properties'].items():
|
|
62
|
+
if isinstance(prop_schema, dict):
|
|
63
|
+
self.register_schema_ids(prop_schema, base_uri)
|
|
64
|
+
|
|
65
|
+
# Recursively process items, values, etc.
|
|
66
|
+
for key in ['items', 'values', 'additionalProperties']:
|
|
67
|
+
if key in schema and isinstance(schema[key], dict):
|
|
68
|
+
self.register_schema_ids(schema[key], base_uri)
|
|
69
|
+
|
|
70
|
+
def flatten_inheritance(self, schema: Dict, schema_doc: Dict) -> Dict:
|
|
71
|
+
"""
|
|
72
|
+
Flattens inheritance by merging properties from $extends base type.
|
|
73
|
+
Returns a new schema with all properties merged.
|
|
74
|
+
"""
|
|
75
|
+
if '$extends' not in schema:
|
|
76
|
+
return schema
|
|
77
|
+
|
|
78
|
+
flattened = schema.copy()
|
|
79
|
+
base_ref = schema['$extends']
|
|
80
|
+
|
|
81
|
+
# Resolve the base schema
|
|
82
|
+
base_schema = self.resolve_ref(base_ref, schema_doc, schema_doc)
|
|
83
|
+
if not base_schema:
|
|
84
|
+
return flattened
|
|
85
|
+
|
|
86
|
+
# Recursively flatten the base (in case it also extends something)
|
|
87
|
+
flattened_base = self.flatten_inheritance(base_schema, schema_doc)
|
|
88
|
+
|
|
89
|
+
# Merge properties: base properties first, then derived (derived can override)
|
|
90
|
+
base_props = flattened_base.get('properties', {})
|
|
91
|
+
derived_props = schema.get('properties', {})
|
|
92
|
+
|
|
93
|
+
merged_props = {}
|
|
94
|
+
merged_props.update(base_props)
|
|
95
|
+
merged_props.update(derived_props)
|
|
96
|
+
|
|
97
|
+
flattened['properties'] = merged_props
|
|
98
|
+
|
|
99
|
+
# Merge required fields
|
|
100
|
+
base_required = flattened_base.get('required', [])
|
|
101
|
+
derived_required = schema.get('required', [])
|
|
102
|
+
if base_required or derived_required:
|
|
103
|
+
flattened['required'] = list(set(base_required + derived_required))
|
|
104
|
+
|
|
105
|
+
# Add comment about flattened inheritance
|
|
106
|
+
base_name = flattened_base.get('name', 'base type')
|
|
107
|
+
orig_desc = flattened.get('description', '')
|
|
108
|
+
if orig_desc:
|
|
109
|
+
flattened['description'] = f"{orig_desc} (flattened from {base_name})"
|
|
110
|
+
else:
|
|
111
|
+
flattened['description'] = f"Flattened from {base_name}"
|
|
112
|
+
|
|
113
|
+
# Remove $extends as it's now flattened
|
|
114
|
+
if '$extends' in flattened:
|
|
115
|
+
del flattened['$extends']
|
|
116
|
+
|
|
117
|
+
return flattened
|
|
118
|
+
|
|
119
|
+
def is_concrete_type(self, schema: Dict) -> bool:
|
|
120
|
+
"""Check if a type is concrete (not abstract)."""
|
|
121
|
+
return not schema.get('abstract', False)
|
|
122
|
+
|
|
123
|
+
def find_all_object_types(self, schema: Dict, schema_doc: Dict) -> List[Dict]:
|
|
124
|
+
"""
|
|
125
|
+
Find all concrete object types in the schema, including those in definitions.
|
|
126
|
+
Filters out abstract types and includes flattened versions of types with inheritance.
|
|
127
|
+
"""
|
|
128
|
+
object_types = []
|
|
129
|
+
|
|
130
|
+
def process_schema(s: Dict, path: str = ""):
|
|
131
|
+
if not isinstance(s, dict):
|
|
132
|
+
return
|
|
133
|
+
|
|
134
|
+
# Check if this is an object type
|
|
135
|
+
if s.get('type') == 'object':
|
|
136
|
+
# Only include concrete types
|
|
137
|
+
if self.is_concrete_type(s):
|
|
138
|
+
# Flatten inheritance if present
|
|
139
|
+
flattened = self.flatten_inheritance(s, schema_doc)
|
|
140
|
+
object_types.append(flattened)
|
|
141
|
+
|
|
142
|
+
# Recursively process definitions
|
|
143
|
+
if 'definitions' in s:
|
|
144
|
+
for def_name, def_schema in s['definitions'].items():
|
|
145
|
+
if isinstance(def_schema, dict):
|
|
146
|
+
# Handle nested definitions
|
|
147
|
+
if def_schema.get('type') == 'object':
|
|
148
|
+
process_schema(def_schema, f"{path}/{def_name}")
|
|
149
|
+
else:
|
|
150
|
+
# Recurse into nested namespaces
|
|
151
|
+
for nested_key, nested_val in def_schema.items():
|
|
152
|
+
if isinstance(nested_val, dict):
|
|
153
|
+
process_schema(nested_val, f"{path}/{def_name}/{nested_key}")
|
|
154
|
+
|
|
155
|
+
# Process top-level schema
|
|
156
|
+
if isinstance(schema, dict):
|
|
157
|
+
if '$root' in schema:
|
|
158
|
+
root_ref = schema['$root']
|
|
159
|
+
root_schema = self.resolve_ref(root_ref, schema, schema)
|
|
160
|
+
if root_schema:
|
|
161
|
+
process_schema(root_schema)
|
|
162
|
+
elif 'type' in schema and schema['type'] == 'object':
|
|
163
|
+
process_schema(schema)
|
|
164
|
+
|
|
165
|
+
# Always process definitions
|
|
166
|
+
if 'definitions' in schema:
|
|
167
|
+
process_schema(schema)
|
|
168
|
+
|
|
169
|
+
elif isinstance(schema, list):
|
|
170
|
+
for s in schema:
|
|
171
|
+
if isinstance(s, dict):
|
|
172
|
+
process_schema(s)
|
|
173
|
+
|
|
174
|
+
return object_types
|
|
175
|
+
|
|
176
|
+
def convert_record_to_kusto(self, recordschema: dict, schema_doc: dict, emit_cloudevents_columns: bool, emit_cloudevents_dispatch_table: bool) -> List[str]:
|
|
177
|
+
"""Converts a JSON Structure object schema to a Kusto table schema."""
|
|
178
|
+
# Get the name and fields of the top-level record
|
|
179
|
+
table_name = recordschema.get("name", "UnnamedTable")
|
|
180
|
+
|
|
181
|
+
# Handle properties from JSON Structure
|
|
182
|
+
properties = recordschema.get("properties", {})
|
|
183
|
+
|
|
184
|
+
# Create a StringBuilder to store the kusto statements
|
|
185
|
+
kusto = []
|
|
186
|
+
|
|
187
|
+
# Append the create table statement with the column names and types
|
|
188
|
+
kusto.append(f".create-merge table [{table_name}] (")
|
|
189
|
+
columns = []
|
|
190
|
+
for prop_name, prop_schema in properties.items():
|
|
191
|
+
column_name = prop_name
|
|
192
|
+
# Skip const fields - they will be documented but not create columns
|
|
193
|
+
if isinstance(prop_schema, dict) and 'const' in prop_schema:
|
|
194
|
+
continue
|
|
195
|
+
column_type = self.convert_structure_type_to_kusto_type(prop_schema, schema_doc)
|
|
196
|
+
columns.append(f" [{column_name}]: {column_type}")
|
|
197
|
+
if emit_cloudevents_columns:
|
|
198
|
+
columns.append(" [___type]: string")
|
|
199
|
+
columns.append(" [___source]: string")
|
|
200
|
+
columns.append(" [___id]: string")
|
|
201
|
+
columns.append(" [___time]: datetime")
|
|
202
|
+
columns.append(" [___subject]: string")
|
|
203
|
+
kusto.append(",\n".join(columns))
|
|
204
|
+
kusto.append(");")
|
|
205
|
+
kusto.append("")
|
|
206
|
+
|
|
207
|
+
# Add the doc string as table metadata
|
|
208
|
+
if "description" in recordschema or "doc" in recordschema:
|
|
209
|
+
doc_data = recordschema.get("description", recordschema.get("doc", ""))
|
|
210
|
+
doc_data = (doc_data[:997] + "...") if len(doc_data) > 1000 else doc_data
|
|
211
|
+
|
|
212
|
+
# Add notes about flattened features
|
|
213
|
+
notes = []
|
|
214
|
+
if '$extends' in recordschema:
|
|
215
|
+
notes.append("Note: Properties from base types have been flattened into this table.")
|
|
216
|
+
if recordschema.get('abstract', False):
|
|
217
|
+
notes.append("Warning: Abstract type - should not be instantiated directly.")
|
|
218
|
+
|
|
219
|
+
if notes:
|
|
220
|
+
doc_data = doc_data + " " + " ".join(notes)
|
|
221
|
+
|
|
222
|
+
doc_string = json.dumps(json.dumps({
|
|
223
|
+
"description": doc_data
|
|
224
|
+
}))
|
|
225
|
+
kusto.append(
|
|
226
|
+
f".alter table [{table_name}] docstring {doc_string};")
|
|
227
|
+
kusto.append("")
|
|
228
|
+
|
|
229
|
+
doc_string_statement = []
|
|
230
|
+
for prop_name, prop_schema in properties.items():
|
|
231
|
+
column_name = prop_name
|
|
232
|
+
|
|
233
|
+
# Handle const fields - document them but note they're const
|
|
234
|
+
if isinstance(prop_schema, dict) and 'const' in prop_schema:
|
|
235
|
+
const_value = prop_schema['const']
|
|
236
|
+
doc_data = prop_schema.get("description", prop_schema.get("doc", ""))
|
|
237
|
+
if doc_data:
|
|
238
|
+
doc_data = f"{doc_data} (const value: {json.dumps(const_value)})"
|
|
239
|
+
else:
|
|
240
|
+
doc_data = f"Constant field with value: {json.dumps(const_value)}"
|
|
241
|
+
doc_content = {"description": doc_data}
|
|
242
|
+
doc = json.dumps(json.dumps(doc_content))
|
|
243
|
+
# Add as comment - const fields are not stored in table
|
|
244
|
+
kusto.insert(len(kusto) - (2 if kusto and kusto[-1] == "" else 1),
|
|
245
|
+
f"-- Const field '{column_name}' with value: {json.dumps(const_value)}")
|
|
246
|
+
continue
|
|
247
|
+
|
|
248
|
+
if "description" in prop_schema or "doc" in prop_schema:
|
|
249
|
+
doc_data = prop_schema.get("description", prop_schema.get("doc", ""))
|
|
250
|
+
if len(doc_data) > 900:
|
|
251
|
+
doc_data = (doc_data[:897] + "...")
|
|
252
|
+
doc_content = {
|
|
253
|
+
"description": doc_data
|
|
254
|
+
}
|
|
255
|
+
# Include schema info for complex types
|
|
256
|
+
if isinstance(prop_schema, dict) and 'type' in prop_schema and prop_schema['type'] in ['object', 'array', 'map', 'set', 'choice', 'tuple']:
|
|
257
|
+
if (len(json.dumps(prop_schema)) + len(doc_data)) > 900:
|
|
258
|
+
doc_content["schema"] = '{ "doc": "Schema too large to inline. Please refer to the JSON Structure schema for more details." }'
|
|
259
|
+
else:
|
|
260
|
+
doc_content["schema"] = prop_schema
|
|
261
|
+
doc = json.dumps(json.dumps(doc_content))
|
|
262
|
+
doc_string_statement.append(f" [{column_name}]: {doc}")
|
|
263
|
+
if doc_string_statement and emit_cloudevents_columns:
|
|
264
|
+
doc_string_statement.extend([
|
|
265
|
+
" [___type] : 'Event type'",
|
|
266
|
+
" [___source]: 'Context origin/source of the event'",
|
|
267
|
+
" [___id]: 'Event identifier'",
|
|
268
|
+
" [___time]: 'Event generation time'",
|
|
269
|
+
" [___subject]: 'Context subject of the event'"
|
|
270
|
+
])
|
|
271
|
+
if doc_string_statement:
|
|
272
|
+
kusto.append(f".alter table [{table_name}] column-docstrings (")
|
|
273
|
+
kusto.append(",\n".join(doc_string_statement))
|
|
274
|
+
kusto.append(");")
|
|
275
|
+
kusto.append("")
|
|
276
|
+
|
|
277
|
+
# add the JSON mapping for the table
|
|
278
|
+
kusto.append(
|
|
279
|
+
f".create-or-alter table [{table_name}] ingestion json mapping \"{table_name}_json_flat\"")
|
|
280
|
+
kusto.append("```\n[")
|
|
281
|
+
if emit_cloudevents_columns:
|
|
282
|
+
kusto.append(" {\"column\": \"___type\", \"path\": \"$.type\"},")
|
|
283
|
+
kusto.append(
|
|
284
|
+
" {\"column\": \"___source\", \"path\": \"$.source\"},")
|
|
285
|
+
kusto.append(" {\"column\": \"___id\", \"path\": \"$.id\"},")
|
|
286
|
+
kusto.append(" {\"column\": \"___time\", \"path\": \"$.time\"},")
|
|
287
|
+
kusto.append(
|
|
288
|
+
" {\"column\": \"___subject\", \"path\": \"$.subject\"},")
|
|
289
|
+
for prop_name, prop_schema in properties.items():
|
|
290
|
+
# Skip const fields in JSON mapping since they're not stored as columns
|
|
291
|
+
if isinstance(prop_schema, dict) and 'const' in prop_schema:
|
|
292
|
+
continue
|
|
293
|
+
column_name = prop_name
|
|
294
|
+
kusto.append(
|
|
295
|
+
f" {{\"column\": \"{column_name}\", \"path\": \"$.{prop_name}\"}},")
|
|
296
|
+
kusto.append("]\n```\n\n")
|
|
297
|
+
|
|
298
|
+
if emit_cloudevents_columns:
|
|
299
|
+
kusto.append(
|
|
300
|
+
f".create-or-alter table [{table_name}] ingestion json mapping \"{table_name}_json_ce_structured\"")
|
|
301
|
+
kusto.append("```\n[")
|
|
302
|
+
kusto.append(" {\"column\": \"___type\", \"path\": \"$.type\"},")
|
|
303
|
+
kusto.append(
|
|
304
|
+
" {\"column\": \"___source\", \"path\": \"$.source\"},")
|
|
305
|
+
kusto.append(" {\"column\": \"___id\", \"path\": \"$.id\"},")
|
|
306
|
+
kusto.append(" {\"column\": \"___time\", \"path\": \"$.time\"},")
|
|
307
|
+
kusto.append(
|
|
308
|
+
" {\"column\": \"___subject\", \"path\": \"$.subject\"},")
|
|
309
|
+
for prop_name, prop_schema in properties.items():
|
|
310
|
+
# Skip const fields in JSON mapping since they're not stored as columns
|
|
311
|
+
if isinstance(prop_schema, dict) and 'const' in prop_schema:
|
|
312
|
+
continue
|
|
313
|
+
column_name = prop_name
|
|
314
|
+
kusto.append(
|
|
315
|
+
f" {{\"column\": \"{column_name}\", \"path\": \"$.data.{prop_name}\"}},")
|
|
316
|
+
kusto.append("]\n```\n\n")
|
|
317
|
+
|
|
318
|
+
if emit_cloudevents_columns:
|
|
319
|
+
kusto.append(
|
|
320
|
+
f".drop materialized-view {table_name}Latest ifexists;")
|
|
321
|
+
kusto.append("")
|
|
322
|
+
kusto.append(
|
|
323
|
+
f".create materialized-view with (backfill=true) {table_name}Latest on table {table_name} {{")
|
|
324
|
+
kusto.append(
|
|
325
|
+
f" {table_name} | summarize arg_max(___time, *) by ___type, ___source, ___subject")
|
|
326
|
+
kusto.append("}")
|
|
327
|
+
kusto.append("")
|
|
328
|
+
|
|
329
|
+
if emit_cloudevents_dispatch_table:
|
|
330
|
+
namespace = recordschema.get("namespace", "")
|
|
331
|
+
event_type = namespace + "." + table_name if namespace else table_name
|
|
332
|
+
|
|
333
|
+
query = f"_cloudevents_dispatch | where (specversion == '1.0' and type == '{event_type}') | " + \
|
|
334
|
+
"project"
|
|
335
|
+
for prop_name, prop_schema in properties.items():
|
|
336
|
+
column_name = prop_name
|
|
337
|
+
column_type = self.convert_structure_type_to_kusto_type(prop_schema, schema_doc)
|
|
338
|
+
query += f"['{column_name}'] = to{column_type}(data.['{column_name}']),"
|
|
339
|
+
query += "___type = type,___source = source,___id = ['id'],___time = ['time'],___subject = subject"
|
|
340
|
+
|
|
341
|
+
# build an update policy for the table that gets triggered by updates to the dispatch table and extracts the event
|
|
342
|
+
kusto.append(f".alter table [{table_name}] policy update")
|
|
343
|
+
kusto.append("```")
|
|
344
|
+
kusto.append("[{")
|
|
345
|
+
kusto.append(" \"IsEnabled\": true,")
|
|
346
|
+
kusto.append(" \"Source\": \"_cloudevents_dispatch\",")
|
|
347
|
+
kusto.append(
|
|
348
|
+
f" \"Query\": \"{query}\",")
|
|
349
|
+
kusto.append(" \"IsTransactional\": false,")
|
|
350
|
+
kusto.append(" \"PropagateIngestionProperties\": true,")
|
|
351
|
+
kusto.append("}]")
|
|
352
|
+
kusto.append("```\n")
|
|
353
|
+
|
|
354
|
+
return kusto
|
|
355
|
+
|
|
356
|
+
def convert_structure_to_kusto_script(self, structure_schema_path, structure_record_type, emit_cloudevents_columns=False, emit_cloudevents_dispatch_table=False) -> str:
|
|
357
|
+
"""Converts a JSON Structure schema to a Kusto table schema."""
|
|
358
|
+
if emit_cloudevents_dispatch_table:
|
|
359
|
+
emit_cloudevents_columns = True
|
|
360
|
+
schema_file = structure_schema_path
|
|
361
|
+
if not schema_file:
|
|
362
|
+
print("Please specify the JSON Structure schema file")
|
|
363
|
+
sys.exit(1)
|
|
364
|
+
with open(schema_file, "r", encoding="utf-8") as f:
|
|
365
|
+
schema_json = f.read()
|
|
366
|
+
|
|
367
|
+
# Parse the schema as a JSON object
|
|
368
|
+
schema = json.loads(schema_json)
|
|
369
|
+
|
|
370
|
+
# Register schema IDs for $ref resolution
|
|
371
|
+
if isinstance(schema, dict):
|
|
372
|
+
self.register_schema_ids(schema)
|
|
373
|
+
|
|
374
|
+
# Handle root-level array of schemas
|
|
375
|
+
if isinstance(schema, list):
|
|
376
|
+
for s in schema:
|
|
377
|
+
if isinstance(s, dict):
|
|
378
|
+
self.register_schema_ids(s)
|
|
379
|
+
|
|
380
|
+
# Find the record(s) to convert
|
|
381
|
+
record_schemas = []
|
|
382
|
+
schema_doc = None
|
|
383
|
+
|
|
384
|
+
if isinstance(schema, list):
|
|
385
|
+
schema_doc = schema[0] if schema else {}
|
|
386
|
+
if structure_record_type:
|
|
387
|
+
record_schema = next(
|
|
388
|
+
(x for x in schema if isinstance(x, dict) and x.get("name") == structure_record_type), None)
|
|
389
|
+
if record_schema is None:
|
|
390
|
+
print(
|
|
391
|
+
f"No record type {structure_record_type} found in the JSON Structure schema")
|
|
392
|
+
sys.exit(1)
|
|
393
|
+
# Flatten inheritance if present
|
|
394
|
+
record_schemas = [self.flatten_inheritance(record_schema, schema_doc)]
|
|
395
|
+
else:
|
|
396
|
+
# Find all concrete object types
|
|
397
|
+
all_types = self.find_all_object_types(schema, schema_doc)
|
|
398
|
+
if all_types:
|
|
399
|
+
record_schemas = all_types
|
|
400
|
+
else:
|
|
401
|
+
# Fallback to first object type
|
|
402
|
+
record_schema = next(
|
|
403
|
+
(x for x in schema if isinstance(x, dict) and x.get("type") == "object"), None)
|
|
404
|
+
if record_schema:
|
|
405
|
+
record_schemas = [self.flatten_inheritance(record_schema, schema_doc)]
|
|
406
|
+
elif isinstance(schema, dict):
|
|
407
|
+
schema_doc = schema
|
|
408
|
+
# Check for $root reference
|
|
409
|
+
if '$root' in schema:
|
|
410
|
+
root_ref = schema['$root']
|
|
411
|
+
record_schema = self.resolve_ref(root_ref, schema, schema)
|
|
412
|
+
if record_schema:
|
|
413
|
+
# Flatten inheritance
|
|
414
|
+
record_schemas = [self.flatten_inheritance(record_schema, schema_doc)]
|
|
415
|
+
elif 'type' in schema and schema['type'] == 'object':
|
|
416
|
+
# Flatten inheritance
|
|
417
|
+
record_schemas = [self.flatten_inheritance(schema, schema_doc)]
|
|
418
|
+
elif not structure_record_type:
|
|
419
|
+
# Find all concrete object types in definitions
|
|
420
|
+
all_types = self.find_all_object_types(schema, schema_doc)
|
|
421
|
+
if all_types:
|
|
422
|
+
record_schemas = all_types
|
|
423
|
+
else:
|
|
424
|
+
# Look for object types in definitions (old fallback logic)
|
|
425
|
+
if 'definitions' in schema:
|
|
426
|
+
defs = schema['definitions']
|
|
427
|
+
for def_key, def_val in defs.items():
|
|
428
|
+
if isinstance(def_val, dict):
|
|
429
|
+
# Navigate nested definitions
|
|
430
|
+
for nested_key, nested_val in def_val.items():
|
|
431
|
+
if isinstance(nested_val, dict) and nested_val.get('type') == 'object':
|
|
432
|
+
if structure_record_type and nested_val.get('name') == structure_record_type:
|
|
433
|
+
record_schemas = [self.flatten_inheritance(nested_val, schema_doc)]
|
|
434
|
+
break
|
|
435
|
+
elif not structure_record_type and self.is_concrete_type(nested_val):
|
|
436
|
+
record_schemas.append(self.flatten_inheritance(nested_val, schema_doc))
|
|
437
|
+
if record_schemas and structure_record_type:
|
|
438
|
+
break
|
|
439
|
+
else:
|
|
440
|
+
# Look for specific record type in definitions
|
|
441
|
+
if 'definitions' in schema:
|
|
442
|
+
defs = schema['definitions']
|
|
443
|
+
for def_key, def_val in defs.items():
|
|
444
|
+
if isinstance(def_val, dict):
|
|
445
|
+
for nested_key, nested_val in def_val.items():
|
|
446
|
+
if isinstance(nested_val, dict) and nested_val.get('name') == structure_record_type:
|
|
447
|
+
record_schemas = [self.flatten_inheritance(nested_val, schema_doc)]
|
|
448
|
+
break
|
|
449
|
+
if record_schemas:
|
|
450
|
+
break
|
|
451
|
+
|
|
452
|
+
if not record_schemas:
|
|
453
|
+
print("Expected a JSON Structure schema with a root object type or a $root reference")
|
|
454
|
+
sys.exit(1)
|
|
455
|
+
|
|
456
|
+
kusto_script = []
|
|
457
|
+
|
|
458
|
+
if emit_cloudevents_dispatch_table:
|
|
459
|
+
kusto_script.append(
|
|
460
|
+
".create-merge table [_cloudevents_dispatch] (")
|
|
461
|
+
kusto_script.append(" [specversion]: string,")
|
|
462
|
+
kusto_script.append(" [type]: string,")
|
|
463
|
+
kusto_script.append(" [source]: string,")
|
|
464
|
+
kusto_script.append(" [id]: string,")
|
|
465
|
+
kusto_script.append(" [time]: datetime,")
|
|
466
|
+
kusto_script.append(" [subject]: string,")
|
|
467
|
+
kusto_script.append(" [datacontenttype]: string,")
|
|
468
|
+
kusto_script.append(" [dataschema]: string,")
|
|
469
|
+
kusto_script.append(" [data]: dynamic")
|
|
470
|
+
kusto_script.append(");\n\n")
|
|
471
|
+
kusto_script.append(
|
|
472
|
+
".create-or-alter table [_cloudevents_dispatch] ingestion json mapping \"_cloudevents_dispatch_json\"")
|
|
473
|
+
kusto_script.append("```\n[")
|
|
474
|
+
kusto_script.append(
|
|
475
|
+
" {\"column\": \"specversion\", \"path\": \"$.specversion\"},")
|
|
476
|
+
kusto_script.append(
|
|
477
|
+
" {\"column\": \"type\", \"path\": \"$.type\"},")
|
|
478
|
+
kusto_script.append(
|
|
479
|
+
" {\"column\": \"source\", \"path\": \"$.source\"},")
|
|
480
|
+
kusto_script.append(" {\"column\": \"id\", \"path\": \"$.id\"},")
|
|
481
|
+
kusto_script.append(
|
|
482
|
+
" {\"column\": \"time\", \"path\": \"$.time\"},")
|
|
483
|
+
kusto_script.append(
|
|
484
|
+
" {\"column\": \"subject\", \"path\": \"$.subject\"},")
|
|
485
|
+
kusto_script.append(
|
|
486
|
+
" {\"column\": \"datacontenttype\", \"path\": \"$.datacontenttype\"},")
|
|
487
|
+
kusto_script.append(
|
|
488
|
+
" {\"column\": \"dataschema\", \"path\": \"$.dataschema\"},")
|
|
489
|
+
kusto_script.append(
|
|
490
|
+
" {\"column\": \"data\", \"path\": \"$.data\"}")
|
|
491
|
+
kusto_script.append("]\n```\n\n")
|
|
492
|
+
|
|
493
|
+
# Convert each record schema to Kusto
|
|
494
|
+
for record_schema in record_schemas:
|
|
495
|
+
if not isinstance(record_schema, dict) or "type" not in record_schema or record_schema["type"] != "object":
|
|
496
|
+
continue
|
|
497
|
+
|
|
498
|
+
# Skip abstract types that somehow made it through
|
|
499
|
+
if not self.is_concrete_type(record_schema):
|
|
500
|
+
continue
|
|
501
|
+
|
|
502
|
+
kusto_script.extend(self.convert_record_to_kusto(
|
|
503
|
+
record_schema, schema_doc, emit_cloudevents_columns, emit_cloudevents_dispatch_table))
|
|
504
|
+
|
|
505
|
+
# Join and clean up extra blank lines at the end
|
|
506
|
+
result = "\n".join(kusto_script)
|
|
507
|
+
# Remove trailing whitespace while preserving intentional blank lines
|
|
508
|
+
return result.rstrip() + "\n" if result else ""
|
|
509
|
+
|
|
510
|
+
def convert_structure_to_kusto_file(self, structure_schema_path, structure_record_type, kusto_file_path, emit_cloudevents_columns=False, emit_cloudevents_dispatch_table=False):
|
|
511
|
+
"""Converts a JSON Structure schema to a Kusto table schema."""
|
|
512
|
+
script = self.convert_structure_to_kusto_script(
|
|
513
|
+
structure_schema_path, structure_record_type, emit_cloudevents_columns, emit_cloudevents_dispatch_table)
|
|
514
|
+
with open(kusto_file_path, "w", encoding="utf-8") as kusto_file:
|
|
515
|
+
kusto_file.write(script)
|
|
516
|
+
|
|
517
|
+
def convert_structure_type_to_kusto_type(self, structure_type: Union[str, dict, list], schema_doc: Optional[Dict] = None) -> str:
|
|
518
|
+
"""Converts a JSON Structure type to a Kusto type."""
|
|
519
|
+
if isinstance(structure_type, list):
|
|
520
|
+
# Handle type unions
|
|
521
|
+
non_null_types = [t for t in structure_type if t != 'null']
|
|
522
|
+
if len(non_null_types) == 0:
|
|
523
|
+
return "dynamic"
|
|
524
|
+
elif len(non_null_types) == 1:
|
|
525
|
+
return self.convert_structure_type_to_kusto_type(non_null_types[0], schema_doc)
|
|
526
|
+
else:
|
|
527
|
+
# Multiple non-null types - use dynamic
|
|
528
|
+
return "dynamic"
|
|
529
|
+
elif isinstance(structure_type, dict):
|
|
530
|
+
# Handle $ref
|
|
531
|
+
if '$ref' in structure_type:
|
|
532
|
+
ref_schema = self.resolve_ref(structure_type['$ref'], schema_doc, schema_doc)
|
|
533
|
+
if ref_schema:
|
|
534
|
+
return self.convert_structure_type_to_kusto_type(ref_schema, schema_doc)
|
|
535
|
+
return "dynamic"
|
|
536
|
+
|
|
537
|
+
# Handle enum keyword
|
|
538
|
+
if 'enum' in structure_type:
|
|
539
|
+
# Enums map to string in Kusto
|
|
540
|
+
return "string"
|
|
541
|
+
|
|
542
|
+
# Handle type keyword
|
|
543
|
+
if 'type' not in structure_type:
|
|
544
|
+
return "dynamic"
|
|
545
|
+
|
|
546
|
+
struct_type = structure_type['type']
|
|
547
|
+
|
|
548
|
+
# Handle complex types
|
|
549
|
+
if struct_type in ['object', 'array', 'set', 'map', 'choice', 'tuple']:
|
|
550
|
+
return "dynamic"
|
|
551
|
+
else:
|
|
552
|
+
return self.map_primitive_type(struct_type)
|
|
553
|
+
elif isinstance(structure_type, str):
|
|
554
|
+
return self.map_primitive_type(structure_type)
|
|
555
|
+
|
|
556
|
+
return "dynamic"
|
|
557
|
+
|
|
558
|
+
def map_primitive_type(self, type_value: str) -> str:
|
|
559
|
+
"""Maps a JSON Structure primitive type to a Kusto scalar type."""
|
|
560
|
+
mapping = {
|
|
561
|
+
# JSON primitive types
|
|
562
|
+
'null': 'dynamic',
|
|
563
|
+
'boolean': 'bool',
|
|
564
|
+
'string': 'string',
|
|
565
|
+
'integer': 'int',
|
|
566
|
+
'number': 'real',
|
|
567
|
+
|
|
568
|
+
# Extended integer types
|
|
569
|
+
'int8': 'int',
|
|
570
|
+
'uint8': 'int',
|
|
571
|
+
'int16': 'int',
|
|
572
|
+
'uint16': 'int',
|
|
573
|
+
'int32': 'int',
|
|
574
|
+
'uint32': 'long', # uint32 can exceed int range
|
|
575
|
+
'int64': 'long',
|
|
576
|
+
'uint64': 'long',
|
|
577
|
+
'int128': 'decimal', # Use decimal for very large integers
|
|
578
|
+
'uint128': 'decimal',
|
|
579
|
+
|
|
580
|
+
# Extended float types
|
|
581
|
+
'float8': 'real',
|
|
582
|
+
'float': 'real',
|
|
583
|
+
'double': 'real',
|
|
584
|
+
'binary32': 'real',
|
|
585
|
+
'binary64': 'real',
|
|
586
|
+
'decimal': 'decimal',
|
|
587
|
+
|
|
588
|
+
# Binary
|
|
589
|
+
'binary': 'dynamic',
|
|
590
|
+
|
|
591
|
+
# Date/time types
|
|
592
|
+
'date': 'datetime',
|
|
593
|
+
'time': 'timespan',
|
|
594
|
+
'datetime': 'datetime',
|
|
595
|
+
'timestamp': 'datetime',
|
|
596
|
+
'duration': 'timespan',
|
|
597
|
+
|
|
598
|
+
# Other types
|
|
599
|
+
'uuid': 'guid',
|
|
600
|
+
'uri': 'string',
|
|
601
|
+
'jsonpointer': 'string',
|
|
602
|
+
'any': 'dynamic'
|
|
603
|
+
}
|
|
604
|
+
|
|
605
|
+
return mapping.get(type_value, 'dynamic')
|
|
606
|
+
|
|
607
|
+
|
|
608
|
+
def convert_structure_to_kusto_file(structure_schema_path, structure_record_type, kusto_file_path, emit_cloudevents_columns=False, emit_cloudevents_dispatch_table=False):
|
|
609
|
+
"""Converts a JSON Structure schema to a Kusto table schema."""
|
|
610
|
+
structure_to_kusto = StructureToKusto()
|
|
611
|
+
structure_to_kusto.convert_structure_to_kusto_file(
|
|
612
|
+
structure_schema_path, structure_record_type, kusto_file_path, emit_cloudevents_columns, emit_cloudevents_dispatch_table)
|
|
613
|
+
|
|
614
|
+
|
|
615
|
+
def convert_structure_to_kusto_db(structure_schema_path, structure_record_type, kusto_uri, kusto_database, emit_cloudevents_columns=False, emit_cloudevents_dispatch_table=False, token_provider=None):
|
|
616
|
+
"""Converts a JSON Structure schema to a Kusto table schema."""
|
|
617
|
+
structure_to_kusto = StructureToKusto()
|
|
618
|
+
script = structure_to_kusto.convert_structure_to_kusto_script(
|
|
619
|
+
structure_schema_path, structure_record_type, emit_cloudevents_columns, emit_cloudevents_dispatch_table)
|
|
620
|
+
kcsb = KustoConnectionStringBuilder.with_az_cli_authentication(
|
|
621
|
+
kusto_uri) if not token_provider else KustoConnectionStringBuilder.with_token_provider(kusto_uri, token_provider)
|
|
622
|
+
client = KustoClient(kcsb)
|
|
623
|
+
for statement in script.split("\n\n"):
|
|
624
|
+
if statement.strip():
|
|
625
|
+
try:
|
|
626
|
+
client.execute_mgmt(kusto_database, statement)
|
|
627
|
+
except Exception as e:
|
|
628
|
+
print(e)
|
|
629
|
+
sys.exit(1)
|
|
630
|
+
|
|
631
|
+
|
|
632
|
+
def convert_structure_to_kusto(structure_schema_path, structure_record_type, kusto_file_path, kusto_uri, kusto_database, emit_cloudevents_columns=False, emit_cloudevents_dispatch_table=False, token_provider=None):
|
|
633
|
+
"""Converts a JSON Structure schema to a Kusto table schema."""
|
|
634
|
+
if not kusto_uri and not kusto_database:
|
|
635
|
+
convert_structure_to_kusto_file(
|
|
636
|
+
structure_schema_path, structure_record_type, kusto_file_path, emit_cloudevents_columns, emit_cloudevents_dispatch_table)
|
|
637
|
+
else:
|
|
638
|
+
convert_structure_to_kusto_db(
|
|
639
|
+
structure_schema_path, structure_record_type, kusto_uri, kusto_database, emit_cloudevents_columns, emit_cloudevents_dispatch_table, token_provider)
|