datacontract-cli 0.10.0__py3-none-any.whl → 0.10.37__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datacontract/__init__.py +13 -0
- datacontract/api.py +260 -0
- datacontract/breaking/breaking.py +242 -12
- datacontract/breaking/breaking_rules.py +37 -1
- datacontract/catalog/catalog.py +80 -0
- datacontract/cli.py +387 -117
- datacontract/data_contract.py +216 -353
- datacontract/engines/data_contract_checks.py +1041 -0
- datacontract/engines/data_contract_test.py +113 -0
- datacontract/engines/datacontract/check_that_datacontract_contains_valid_servers_configuration.py +2 -3
- datacontract/engines/datacontract/check_that_datacontract_file_exists.py +1 -1
- datacontract/engines/fastjsonschema/check_jsonschema.py +176 -42
- datacontract/engines/fastjsonschema/s3/s3_read_files.py +16 -1
- datacontract/engines/soda/check_soda_execute.py +100 -56
- datacontract/engines/soda/connections/athena.py +79 -0
- datacontract/engines/soda/connections/bigquery.py +8 -1
- datacontract/engines/soda/connections/databricks.py +12 -3
- datacontract/engines/soda/connections/duckdb_connection.py +241 -0
- datacontract/engines/soda/connections/kafka.py +206 -113
- datacontract/engines/soda/connections/snowflake.py +8 -5
- datacontract/engines/soda/connections/sqlserver.py +43 -0
- datacontract/engines/soda/connections/trino.py +26 -0
- datacontract/export/avro_converter.py +72 -8
- datacontract/export/avro_idl_converter.py +31 -25
- datacontract/export/bigquery_converter.py +130 -0
- datacontract/export/custom_converter.py +40 -0
- datacontract/export/data_caterer_converter.py +161 -0
- datacontract/export/dbml_converter.py +148 -0
- datacontract/export/dbt_converter.py +141 -54
- datacontract/export/dcs_exporter.py +6 -0
- datacontract/export/dqx_converter.py +126 -0
- datacontract/export/duckdb_type_converter.py +57 -0
- datacontract/export/excel_exporter.py +923 -0
- datacontract/export/exporter.py +100 -0
- datacontract/export/exporter_factory.py +216 -0
- datacontract/export/go_converter.py +105 -0
- datacontract/export/great_expectations_converter.py +257 -36
- datacontract/export/html_exporter.py +86 -0
- datacontract/export/iceberg_converter.py +188 -0
- datacontract/export/jsonschema_converter.py +71 -16
- datacontract/export/markdown_converter.py +337 -0
- datacontract/export/mermaid_exporter.py +110 -0
- datacontract/export/odcs_v3_exporter.py +375 -0
- datacontract/export/pandas_type_converter.py +40 -0
- datacontract/export/protobuf_converter.py +168 -68
- datacontract/export/pydantic_converter.py +6 -0
- datacontract/export/rdf_converter.py +13 -6
- datacontract/export/sodacl_converter.py +36 -188
- datacontract/export/spark_converter.py +245 -0
- datacontract/export/sql_converter.py +37 -3
- datacontract/export/sql_type_converter.py +269 -8
- datacontract/export/sqlalchemy_converter.py +170 -0
- datacontract/export/terraform_converter.py +7 -2
- datacontract/imports/avro_importer.py +246 -26
- datacontract/imports/bigquery_importer.py +221 -0
- datacontract/imports/csv_importer.py +143 -0
- datacontract/imports/dbml_importer.py +112 -0
- datacontract/imports/dbt_importer.py +240 -0
- datacontract/imports/excel_importer.py +1111 -0
- datacontract/imports/glue_importer.py +288 -0
- datacontract/imports/iceberg_importer.py +172 -0
- datacontract/imports/importer.py +51 -0
- datacontract/imports/importer_factory.py +128 -0
- datacontract/imports/json_importer.py +325 -0
- datacontract/imports/jsonschema_importer.py +146 -0
- datacontract/imports/odcs_importer.py +60 -0
- datacontract/imports/odcs_v3_importer.py +516 -0
- datacontract/imports/parquet_importer.py +81 -0
- datacontract/imports/protobuf_importer.py +264 -0
- datacontract/imports/spark_importer.py +262 -0
- datacontract/imports/sql_importer.py +274 -35
- datacontract/imports/unity_importer.py +219 -0
- datacontract/init/init_template.py +20 -0
- datacontract/integration/datamesh_manager.py +86 -0
- datacontract/lint/resolve.py +271 -49
- datacontract/lint/resources.py +21 -0
- datacontract/lint/schema.py +53 -17
- datacontract/lint/urls.py +32 -12
- datacontract/model/data_contract_specification/__init__.py +1 -0
- datacontract/model/exceptions.py +4 -1
- datacontract/model/odcs.py +24 -0
- datacontract/model/run.py +49 -29
- datacontract/output/__init__.py +0 -0
- datacontract/output/junit_test_results.py +135 -0
- datacontract/output/output_format.py +10 -0
- datacontract/output/test_results_writer.py +79 -0
- datacontract/py.typed +0 -0
- datacontract/schemas/datacontract-1.1.0.init.yaml +91 -0
- datacontract/schemas/datacontract-1.1.0.schema.json +1975 -0
- datacontract/schemas/datacontract-1.2.0.init.yaml +91 -0
- datacontract/schemas/datacontract-1.2.0.schema.json +2029 -0
- datacontract/schemas/datacontract-1.2.1.init.yaml +91 -0
- datacontract/schemas/datacontract-1.2.1.schema.json +2058 -0
- datacontract/schemas/odcs-3.0.1.schema.json +2634 -0
- datacontract/schemas/odcs-3.0.2.schema.json +2382 -0
- datacontract/templates/datacontract.html +139 -294
- datacontract/templates/datacontract_odcs.html +685 -0
- datacontract/templates/index.html +236 -0
- datacontract/templates/partials/datacontract_information.html +86 -0
- datacontract/templates/partials/datacontract_servicelevels.html +253 -0
- datacontract/templates/partials/datacontract_terms.html +51 -0
- datacontract/templates/partials/definition.html +25 -0
- datacontract/templates/partials/example.html +27 -0
- datacontract/templates/partials/model_field.html +144 -0
- datacontract/templates/partials/quality.html +49 -0
- datacontract/templates/partials/server.html +211 -0
- datacontract/templates/style/output.css +491 -72
- datacontract_cli-0.10.37.dist-info/METADATA +2235 -0
- datacontract_cli-0.10.37.dist-info/RECORD +119 -0
- {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.37.dist-info}/WHEEL +1 -1
- {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.37.dist-info/licenses}/LICENSE +1 -1
- datacontract/engines/datacontract/check_that_datacontract_str_is_valid.py +0 -48
- datacontract/engines/soda/connections/dask.py +0 -28
- datacontract/engines/soda/connections/duckdb.py +0 -76
- datacontract/export/csv_type_converter.py +0 -36
- datacontract/export/html_export.py +0 -66
- datacontract/export/odcs_converter.py +0 -102
- datacontract/init/download_datacontract_file.py +0 -17
- datacontract/integration/publish_datamesh_manager.py +0 -33
- datacontract/integration/publish_opentelemetry.py +0 -107
- datacontract/lint/lint.py +0 -141
- datacontract/lint/linters/description_linter.py +0 -34
- datacontract/lint/linters/example_model_linter.py +0 -91
- datacontract/lint/linters/field_pattern_linter.py +0 -34
- datacontract/lint/linters/field_reference_linter.py +0 -38
- datacontract/lint/linters/notice_period_linter.py +0 -55
- datacontract/lint/linters/quality_schema_linter.py +0 -52
- datacontract/lint/linters/valid_constraints_linter.py +0 -99
- datacontract/model/data_contract_specification.py +0 -141
- datacontract/web.py +0 -14
- datacontract_cli-0.10.0.dist-info/METADATA +0 -951
- datacontract_cli-0.10.0.dist-info/RECORD +0 -66
- /datacontract/{model → breaking}/breaking_change.py +0 -0
- /datacontract/{lint/linters → export}/__init__.py +0 -0
- {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.37.dist-info}/entry_points.txt +0 -0
- {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.37.dist-info}/top_level.txt +0 -0
|
@@ -1,19 +1,37 @@
|
|
|
1
|
+
from datacontract.export.bigquery_converter import map_type_to_bigquery
|
|
1
2
|
from datacontract.model.data_contract_specification import Field
|
|
2
3
|
|
|
3
4
|
|
|
4
5
|
def convert_to_sql_type(field: Field, server_type: str) -> str:
|
|
6
|
+
if field.config and "physicalType" in field.config:
|
|
7
|
+
return field.config["physicalType"]
|
|
8
|
+
|
|
5
9
|
if server_type == "snowflake":
|
|
6
10
|
return convert_to_snowflake(field)
|
|
7
|
-
|
|
11
|
+
elif server_type == "postgres":
|
|
8
12
|
return convert_type_to_postgres(field)
|
|
9
|
-
|
|
13
|
+
elif server_type == "dataframe":
|
|
14
|
+
return convert_to_dataframe(field)
|
|
15
|
+
elif server_type == "databricks":
|
|
10
16
|
return convert_to_databricks(field)
|
|
17
|
+
elif server_type == "local" or server_type == "s3":
|
|
18
|
+
return convert_to_duckdb(field)
|
|
19
|
+
elif server_type == "sqlserver":
|
|
20
|
+
return convert_type_to_sqlserver(field)
|
|
21
|
+
elif server_type == "bigquery":
|
|
22
|
+
return convert_type_to_bigquery(field)
|
|
23
|
+
elif server_type == "trino":
|
|
24
|
+
return convert_type_to_trino(field)
|
|
25
|
+
|
|
11
26
|
return field.type
|
|
12
27
|
|
|
13
28
|
|
|
14
29
|
# snowflake data types:
|
|
15
30
|
# https://docs.snowflake.com/en/sql-reference/data-types.html
|
|
16
|
-
def convert_to_snowflake(field) -> None | str:
|
|
31
|
+
def convert_to_snowflake(field: Field) -> None | str:
|
|
32
|
+
if field.config and "snowflakeType" in field.config:
|
|
33
|
+
return field.config["snowflakeType"]
|
|
34
|
+
|
|
17
35
|
type = field.type
|
|
18
36
|
# currently optimized for snowflake
|
|
19
37
|
# LEARNING: data contract has no direct support for CHAR,CHARACTER
|
|
@@ -54,6 +72,9 @@ def convert_to_snowflake(field) -> None | str:
|
|
|
54
72
|
# https://www.postgresql.org/docs/current/datatype.html
|
|
55
73
|
# Using the name whenever possible
|
|
56
74
|
def convert_type_to_postgres(field: Field) -> None | str:
|
|
75
|
+
if field.config and "postgresType" in field.config:
|
|
76
|
+
return field.config["postgresType"]
|
|
77
|
+
|
|
57
78
|
type = field.type
|
|
58
79
|
if type is None:
|
|
59
80
|
return None
|
|
@@ -93,10 +114,62 @@ def convert_type_to_postgres(field: Field) -> None | str:
|
|
|
93
114
|
return None
|
|
94
115
|
|
|
95
116
|
|
|
117
|
+
# dataframe data types:
|
|
118
|
+
# https://spark.apache.org/docs/latest/sql-ref-datatypes.html
|
|
119
|
+
def convert_to_dataframe(field: Field) -> None | str:
|
|
120
|
+
if field.config and "dataframeType" in field.config:
|
|
121
|
+
return field.config["dataframeType"]
|
|
122
|
+
type = field.type
|
|
123
|
+
if type is None:
|
|
124
|
+
return None
|
|
125
|
+
if type.lower() in ["string", "varchar", "text"]:
|
|
126
|
+
return "STRING"
|
|
127
|
+
if type.lower() in ["timestamp", "timestamp_tz"]:
|
|
128
|
+
return "TIMESTAMP"
|
|
129
|
+
if type.lower() in ["timestamp_ntz"]:
|
|
130
|
+
return "TIMESTAMP_NTZ"
|
|
131
|
+
if type.lower() in ["date"]:
|
|
132
|
+
return "DATE"
|
|
133
|
+
if type.lower() in ["time"]:
|
|
134
|
+
return "STRING"
|
|
135
|
+
if type.lower() in ["number", "decimal", "numeric"]:
|
|
136
|
+
precision = field.precision if field.precision is not None else 38
|
|
137
|
+
scale = field.scale if field.scale is not None else 0
|
|
138
|
+
return f"DECIMAL({precision},{scale})"
|
|
139
|
+
if type.lower() in ["float"]:
|
|
140
|
+
return "FLOAT"
|
|
141
|
+
if type.lower() in ["double"]:
|
|
142
|
+
return "DOUBLE"
|
|
143
|
+
if type.lower() in ["integer", "int"]:
|
|
144
|
+
return "INT"
|
|
145
|
+
if type.lower() in ["long", "bigint"]:
|
|
146
|
+
return "BIGINT"
|
|
147
|
+
if type.lower() in ["boolean"]:
|
|
148
|
+
return "BOOLEAN"
|
|
149
|
+
if type.lower() in ["object", "record", "struct"]:
|
|
150
|
+
nested_fields = []
|
|
151
|
+
for nested_field_name, nested_field in field.fields.items():
|
|
152
|
+
nested_field_type = convert_to_dataframe(nested_field)
|
|
153
|
+
nested_fields.append(f"{nested_field_name}:{nested_field_type}")
|
|
154
|
+
return f"STRUCT<{','.join(nested_fields)}>"
|
|
155
|
+
if type.lower() in ["bytes"]:
|
|
156
|
+
return "BINARY"
|
|
157
|
+
if type.lower() in ["array"]:
|
|
158
|
+
item_type = convert_to_dataframe(field.items)
|
|
159
|
+
return f"ARRAY<{item_type}>"
|
|
160
|
+
return None
|
|
161
|
+
|
|
162
|
+
|
|
96
163
|
# databricks data types:
|
|
97
164
|
# https://docs.databricks.com/en/sql/language-manual/sql-ref-datatypes.html
|
|
98
|
-
def convert_to_databricks(field) -> None | str:
|
|
165
|
+
def convert_to_databricks(field: Field) -> None | str:
|
|
99
166
|
type = field.type
|
|
167
|
+
if (
|
|
168
|
+
field.config
|
|
169
|
+
and "databricksType" in field.config
|
|
170
|
+
and type.lower() not in ["array", "object", "record", "struct"]
|
|
171
|
+
):
|
|
172
|
+
return field.config["databricksType"]
|
|
100
173
|
if type is None:
|
|
101
174
|
return None
|
|
102
175
|
if type.lower() in ["string", "varchar", "text"]:
|
|
@@ -110,8 +183,9 @@ def convert_to_databricks(field) -> None | str:
|
|
|
110
183
|
if type.lower() in ["time"]:
|
|
111
184
|
return "STRING"
|
|
112
185
|
if type.lower() in ["number", "decimal", "numeric"]:
|
|
113
|
-
|
|
114
|
-
|
|
186
|
+
precision = field.precision if field.precision is not None else 38
|
|
187
|
+
scale = field.scale if field.scale is not None else 0
|
|
188
|
+
return f"DECIMAL({precision},{scale})"
|
|
115
189
|
if type.lower() in ["float"]:
|
|
116
190
|
return "FLOAT"
|
|
117
191
|
if type.lower() in ["double"]:
|
|
@@ -123,9 +197,196 @@ def convert_to_databricks(field) -> None | str:
|
|
|
123
197
|
if type.lower() in ["boolean"]:
|
|
124
198
|
return "BOOLEAN"
|
|
125
199
|
if type.lower() in ["object", "record", "struct"]:
|
|
126
|
-
|
|
200
|
+
nested_fields = []
|
|
201
|
+
for nested_field_name, nested_field in field.fields.items():
|
|
202
|
+
nested_field_type = convert_to_databricks(nested_field)
|
|
203
|
+
nested_fields.append(f"{nested_field_name}:{nested_field_type}")
|
|
204
|
+
return f"STRUCT<{','.join(nested_fields)}>"
|
|
127
205
|
if type.lower() in ["bytes"]:
|
|
128
206
|
return "BINARY"
|
|
129
207
|
if type.lower() in ["array"]:
|
|
130
|
-
|
|
208
|
+
item_type = convert_to_databricks(field.items)
|
|
209
|
+
return f"ARRAY<{item_type}>"
|
|
210
|
+
if type.lower() in ["variant"]:
|
|
211
|
+
return "VARIANT"
|
|
131
212
|
return None
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
def convert_to_duckdb(field: Field) -> None | str:
|
|
216
|
+
"""
|
|
217
|
+
Convert a data contract field to the corresponding DuckDB SQL type.
|
|
218
|
+
|
|
219
|
+
Parameters:
|
|
220
|
+
field (Field): The data contract field to convert.
|
|
221
|
+
|
|
222
|
+
Returns:
|
|
223
|
+
str: The corresponding DuckDB SQL type.
|
|
224
|
+
"""
|
|
225
|
+
# Check
|
|
226
|
+
if field is None or field.type is None:
|
|
227
|
+
return None
|
|
228
|
+
|
|
229
|
+
# Get
|
|
230
|
+
type_lower = field.type.lower()
|
|
231
|
+
|
|
232
|
+
# Prepare
|
|
233
|
+
type_mapping = {
|
|
234
|
+
"varchar": "VARCHAR",
|
|
235
|
+
"string": "VARCHAR",
|
|
236
|
+
"text": "VARCHAR",
|
|
237
|
+
"binary": "BLOB",
|
|
238
|
+
"bytes": "BLOB",
|
|
239
|
+
"blob": "BLOB",
|
|
240
|
+
"boolean": "BOOLEAN",
|
|
241
|
+
"float": "FLOAT",
|
|
242
|
+
"double": "DOUBLE",
|
|
243
|
+
"int": "INTEGER",
|
|
244
|
+
"int32": "INTEGER",
|
|
245
|
+
"integer": "INTEGER",
|
|
246
|
+
"int64": "BIGINT",
|
|
247
|
+
"long": "BIGINT",
|
|
248
|
+
"bigint": "BIGINT",
|
|
249
|
+
"date": "DATE",
|
|
250
|
+
"time": "TIME",
|
|
251
|
+
"timestamp": "TIMESTAMP WITH TIME ZONE",
|
|
252
|
+
"timestamp_tz": "TIMESTAMP WITH TIME ZONE",
|
|
253
|
+
"timestamp_ntz": "TIMESTAMP",
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
# Convert simple mappings
|
|
257
|
+
if type_lower in type_mapping:
|
|
258
|
+
return type_mapping[type_lower]
|
|
259
|
+
|
|
260
|
+
# convert decimal numbers with precision and scale
|
|
261
|
+
if type_lower == "decimal" or type_lower == "number" or type_lower == "numeric":
|
|
262
|
+
return f"DECIMAL({field.precision},{field.scale})"
|
|
263
|
+
|
|
264
|
+
# Check list and map
|
|
265
|
+
if type_lower == "list" or type_lower == "array":
|
|
266
|
+
item_type = convert_to_duckdb(field.items)
|
|
267
|
+
return f"{item_type}[]"
|
|
268
|
+
if type_lower == "map":
|
|
269
|
+
key_type = convert_to_duckdb(field.keys)
|
|
270
|
+
value_type = convert_to_duckdb(field.values)
|
|
271
|
+
return f"MAP({key_type}, {value_type})"
|
|
272
|
+
if type_lower == "struct" or type_lower == "object" or type_lower == "record":
|
|
273
|
+
structure_field = "STRUCT("
|
|
274
|
+
field_strings = []
|
|
275
|
+
for fieldKey, fieldValue in field.fields.items():
|
|
276
|
+
field_strings.append(f"{fieldKey} {convert_to_duckdb(fieldValue)}")
|
|
277
|
+
structure_field += ", ".join(field_strings)
|
|
278
|
+
structure_field += ")"
|
|
279
|
+
return structure_field
|
|
280
|
+
|
|
281
|
+
# Return none
|
|
282
|
+
return None
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
def convert_type_to_sqlserver(field: Field) -> None | str:
|
|
286
|
+
"""Convert from supported datacontract types to equivalent sqlserver types"""
|
|
287
|
+
field_type = field.type
|
|
288
|
+
if not field_type:
|
|
289
|
+
return None
|
|
290
|
+
|
|
291
|
+
# If provided sql-server config type, prefer it over default mapping
|
|
292
|
+
if sqlserver_type := get_type_config(field, "sqlserverType"):
|
|
293
|
+
return sqlserver_type
|
|
294
|
+
|
|
295
|
+
field_type = field_type.lower()
|
|
296
|
+
if field_type in ["string", "varchar", "text"]:
|
|
297
|
+
if field.format == "uuid":
|
|
298
|
+
return "uniqueidentifier"
|
|
299
|
+
return "varchar"
|
|
300
|
+
if field_type in ["timestamp", "timestamp_tz"]:
|
|
301
|
+
return "datetimeoffset"
|
|
302
|
+
if field_type in ["timestamp_ntz"]:
|
|
303
|
+
if field.format == "datetime":
|
|
304
|
+
return "datetime"
|
|
305
|
+
return "datetime2"
|
|
306
|
+
if field_type in ["date"]:
|
|
307
|
+
return "date"
|
|
308
|
+
if field_type in ["time"]:
|
|
309
|
+
return "time"
|
|
310
|
+
if field_type in ["number", "decimal", "numeric"]:
|
|
311
|
+
# precision and scale not supported by data contract
|
|
312
|
+
if field_type == "number":
|
|
313
|
+
return "numeric"
|
|
314
|
+
return field_type
|
|
315
|
+
if field_type in ["float"]:
|
|
316
|
+
return "float"
|
|
317
|
+
if field_type in ["double"]:
|
|
318
|
+
return "double precision"
|
|
319
|
+
if field_type in ["integer", "int", "bigint"]:
|
|
320
|
+
return field_type
|
|
321
|
+
if field_type in ["long"]:
|
|
322
|
+
return "bigint"
|
|
323
|
+
if field_type in ["boolean"]:
|
|
324
|
+
return "bit"
|
|
325
|
+
if field_type in ["object", "record", "struct"]:
|
|
326
|
+
return "jsonb"
|
|
327
|
+
if field_type in ["bytes"]:
|
|
328
|
+
return "binary"
|
|
329
|
+
if field_type in ["array"]:
|
|
330
|
+
raise NotImplementedError("SQLServer does not support array types.")
|
|
331
|
+
return None
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
def convert_type_to_bigquery(field: Field) -> None | str:
|
|
335
|
+
"""Convert from supported datacontract types to equivalent bigquery types"""
|
|
336
|
+
|
|
337
|
+
# BigQuery exporter cannot be used for complex types, as the exporter has different syntax than SodaCL
|
|
338
|
+
|
|
339
|
+
field_type = field.type
|
|
340
|
+
if not field_type:
|
|
341
|
+
return None
|
|
342
|
+
|
|
343
|
+
if field.config and "bigqueryType" in field.config:
|
|
344
|
+
return field.config["bigqueryType"]
|
|
345
|
+
|
|
346
|
+
if field_type.lower() in ["array"]:
|
|
347
|
+
item_type = convert_type_to_bigquery(field.items)
|
|
348
|
+
return f"ARRAY<{item_type}>"
|
|
349
|
+
|
|
350
|
+
if field_type.lower() in ["object", "record", "struct"]:
|
|
351
|
+
nested_fields = []
|
|
352
|
+
for nested_field_name, nested_field in field.fields.items():
|
|
353
|
+
nested_field_type = convert_type_to_bigquery(nested_field)
|
|
354
|
+
nested_fields.append(f"{nested_field_name} {nested_field_type}")
|
|
355
|
+
return f"STRUCT<{', '.join(nested_fields)}>"
|
|
356
|
+
|
|
357
|
+
return map_type_to_bigquery(field)
|
|
358
|
+
|
|
359
|
+
|
|
360
|
+
def get_type_config(field: Field, config_attr: str) -> dict[str, str] | None:
|
|
361
|
+
"""Retrieve type configuration if provided in datacontract."""
|
|
362
|
+
if not field.config:
|
|
363
|
+
return None
|
|
364
|
+
return field.config.get(config_attr, None)
|
|
365
|
+
|
|
366
|
+
|
|
367
|
+
def convert_type_to_trino(field: Field) -> None | str:
|
|
368
|
+
"""Convert from supported datacontract types to equivalent trino types"""
|
|
369
|
+
if field.config and "trinoType" in field.config:
|
|
370
|
+
return field.config["trinoType"]
|
|
371
|
+
|
|
372
|
+
field_type = field.type.lower()
|
|
373
|
+
if field_type in ["string", "text", "varchar"]:
|
|
374
|
+
return "varchar"
|
|
375
|
+
# tinyint, smallint not supported by data contract
|
|
376
|
+
if field_type in ["number", "decimal", "numeric"]:
|
|
377
|
+
# precision and scale not supported by data contract
|
|
378
|
+
return "decimal"
|
|
379
|
+
if field_type in ["int", "integer"]:
|
|
380
|
+
return "integer"
|
|
381
|
+
if field_type in ["long", "bigint"]:
|
|
382
|
+
return "bigint"
|
|
383
|
+
if field_type in ["float"]:
|
|
384
|
+
return "real"
|
|
385
|
+
if field_type in ["timestamp", "timestamp_tz"]:
|
|
386
|
+
return "timestamp(3) with time zone"
|
|
387
|
+
if field_type in ["timestamp_ntz"]:
|
|
388
|
+
return "timestamp(3)"
|
|
389
|
+
if field_type in ["bytes"]:
|
|
390
|
+
return "varbinary"
|
|
391
|
+
if field_type in ["object", "record", "struct"]:
|
|
392
|
+
return "json"
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
import ast
|
|
2
|
+
import typing
|
|
3
|
+
|
|
4
|
+
import datacontract.model.data_contract_specification as spec
|
|
5
|
+
from datacontract.export.exporter import Exporter, _determine_sql_server_type
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class SQLAlchemyExporter(Exporter):
|
|
9
|
+
def export(
|
|
10
|
+
self, data_contract: spec.DataContractSpecification, model, server, sql_server_type, export_args
|
|
11
|
+
) -> dict:
|
|
12
|
+
sql_server_type = _determine_sql_server_type(data_contract, sql_server_type, server)
|
|
13
|
+
return to_sqlalchemy_model_str(data_contract, sql_server_type, server)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
DECLARATIVE_BASE = "Base"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def to_sqlalchemy_model_str(contract: spec.DataContractSpecification, sql_server_type: str = "", server=None) -> str:
|
|
20
|
+
server_obj = contract.servers.get(server)
|
|
21
|
+
classdefs = [
|
|
22
|
+
generate_model_class(model_name, model, server_obj, sql_server_type)
|
|
23
|
+
for (model_name, model) in contract.models.items()
|
|
24
|
+
]
|
|
25
|
+
documentation = (
|
|
26
|
+
[ast.Expr(ast.Constant(contract.info.description))] if (contract.info and contract.info.description) else []
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
declarative_base = ast.ClassDef(
|
|
30
|
+
name=DECLARATIVE_BASE,
|
|
31
|
+
bases=[ast.Name(id="DeclarativeBase", ctx=ast.Load())],
|
|
32
|
+
body=[ast.Pass()],
|
|
33
|
+
keywords=[],
|
|
34
|
+
decorator_list=[],
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
databricks_timestamp = ast.ImportFrom(
|
|
38
|
+
module="databricks.sqlalchemy", names=[ast.alias("TIMESTAMP"), ast.alias("TIMESTAMP_NTZ")]
|
|
39
|
+
)
|
|
40
|
+
timestamp = ast.ImportFrom(module="sqlalchemy", names=[ast.alias(name="TIMESTAMP")])
|
|
41
|
+
result = ast.Module(
|
|
42
|
+
body=[
|
|
43
|
+
ast.ImportFrom(module="sqlalchemy.orm", names=[ast.alias(name="DeclarativeBase")]),
|
|
44
|
+
ast.ImportFrom(
|
|
45
|
+
module="sqlalchemy",
|
|
46
|
+
names=[
|
|
47
|
+
ast.alias("Column"),
|
|
48
|
+
ast.alias("Date"),
|
|
49
|
+
ast.alias("Integer"),
|
|
50
|
+
ast.alias("Numeric"),
|
|
51
|
+
ast.alias("String"),
|
|
52
|
+
ast.alias("Text"),
|
|
53
|
+
ast.alias("VARCHAR"),
|
|
54
|
+
ast.alias("BigInteger"),
|
|
55
|
+
ast.alias("Float"),
|
|
56
|
+
ast.alias("Double"),
|
|
57
|
+
ast.alias("Boolean"),
|
|
58
|
+
ast.alias("Date"),
|
|
59
|
+
ast.alias("ARRAY"),
|
|
60
|
+
ast.alias("LargeBinary"),
|
|
61
|
+
],
|
|
62
|
+
),
|
|
63
|
+
databricks_timestamp if sql_server_type == "databricks" else timestamp,
|
|
64
|
+
*documentation,
|
|
65
|
+
declarative_base,
|
|
66
|
+
*classdefs,
|
|
67
|
+
],
|
|
68
|
+
type_ignores=[],
|
|
69
|
+
)
|
|
70
|
+
return ast.unparse(result)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def Call(name, *args, **kwargs) -> ast.Call:
|
|
74
|
+
return ast.Call(
|
|
75
|
+
ast.Name(name),
|
|
76
|
+
args=[v for v in args],
|
|
77
|
+
keywords=[ast.keyword(arg=f"{k}", value=ast.Constant(v)) for (k, v) in kwargs.items()],
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def Column(predicate, **kwargs) -> ast.Call:
|
|
82
|
+
return Call("Column", predicate, **kwargs)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def sqlalchemy_primitive(field: spec.Field):
|
|
86
|
+
sqlalchemy_name = {
|
|
87
|
+
"string": Call("String", ast.Constant(field.maxLength)),
|
|
88
|
+
"text": Call("Text", ast.Constant(field.maxLength)),
|
|
89
|
+
"varchar": Call("VARCHAR", ast.Constant(field.maxLength)),
|
|
90
|
+
"number": Call("Numeric", ast.Constant(field.precision), ast.Constant(field.scale)),
|
|
91
|
+
"decimal": Call("Numeric", ast.Constant(field.precision), ast.Constant(field.scale)),
|
|
92
|
+
"numeric": Call("Numeric", ast.Constant(field.precision), ast.Constant(field.scale)),
|
|
93
|
+
"int": ast.Name("Integer"),
|
|
94
|
+
"integer": ast.Name("Integer"),
|
|
95
|
+
"long": ast.Name("BigInteger"),
|
|
96
|
+
"bigint": ast.Name("BigInteger"),
|
|
97
|
+
"float": ast.Name("Float"),
|
|
98
|
+
"double": ast.Name("Double"),
|
|
99
|
+
"boolean": ast.Name("Boolean"),
|
|
100
|
+
"timestamp": ast.Name("TIMESTAMP"),
|
|
101
|
+
"timestamp_tz": Call("TIMESTAMP", ast.Constant(True)),
|
|
102
|
+
"timestamp_ntz": ast.Name("TIMESTAMP_NTZ"),
|
|
103
|
+
"date": ast.Name("Date"),
|
|
104
|
+
"bytes": Call("LargeBinary", ast.Constant(field.maxLength)),
|
|
105
|
+
}
|
|
106
|
+
return sqlalchemy_name.get(field.type)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def constant_field_value(field_name: str, field: spec.Field) -> tuple[ast.Call, typing.Optional[ast.ClassDef]]:
|
|
110
|
+
new_type = sqlalchemy_primitive(field)
|
|
111
|
+
match field.type:
|
|
112
|
+
case "array":
|
|
113
|
+
new_type = Call("ARRAY", sqlalchemy_primitive(field.items))
|
|
114
|
+
if new_type is None:
|
|
115
|
+
raise RuntimeError(f"Unsupported field type {field.type}.")
|
|
116
|
+
|
|
117
|
+
return Column(
|
|
118
|
+
new_type, nullable=not field.required, comment=field.description, primary_key=field.primaryKey or field.primary
|
|
119
|
+
), None
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def column_assignment(field_name: str, field: spec.Field) -> tuple[ast.Call, typing.Optional[ast.ClassDef]]:
|
|
123
|
+
return constant_field_value(field_name, field)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def is_simple_field(field: spec.Field) -> bool:
|
|
127
|
+
return field.type not in set(["object", "record", "struct"])
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def field_definitions(fields: dict[str, spec.Field]) -> tuple[list[ast.Expr], list[ast.ClassDef]]:
|
|
131
|
+
annotations: list[ast.Expr] = []
|
|
132
|
+
classes: list[typing.Any] = []
|
|
133
|
+
for field_name, field in fields.items():
|
|
134
|
+
(ann, new_class) = column_assignment(field_name, field)
|
|
135
|
+
annotations.append(ast.Assign(targets=[ast.Name(id=field_name, ctx=ast.Store())], value=ann, lineno=0))
|
|
136
|
+
return (annotations, classes)
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def generate_model_class(
|
|
140
|
+
name: str, model_definition: spec.Model, server=None, sql_server_type: str = ""
|
|
141
|
+
) -> ast.ClassDef:
|
|
142
|
+
(field_assignments, nested_classes) = field_definitions(model_definition.fields)
|
|
143
|
+
documentation = [ast.Expr(ast.Constant(model_definition.description))] if model_definition.description else []
|
|
144
|
+
|
|
145
|
+
schema = None if server is None else server.schema_
|
|
146
|
+
table_name = ast.Constant(name)
|
|
147
|
+
if sql_server_type == "databricks":
|
|
148
|
+
table_name = ast.Constant(name.lower())
|
|
149
|
+
|
|
150
|
+
result = ast.ClassDef(
|
|
151
|
+
name=name.capitalize(),
|
|
152
|
+
bases=[ast.Name(id=DECLARATIVE_BASE, ctx=ast.Load())],
|
|
153
|
+
body=[
|
|
154
|
+
*documentation,
|
|
155
|
+
ast.Assign(targets=[ast.Name("__tablename__")], value=table_name, lineno=0),
|
|
156
|
+
ast.Assign(
|
|
157
|
+
targets=[ast.Name("__table_args__")],
|
|
158
|
+
value=ast.Dict(
|
|
159
|
+
keys=[ast.Constant("comment"), ast.Constant("schema")],
|
|
160
|
+
values=[ast.Constant(model_definition.description), ast.Constant(schema)],
|
|
161
|
+
),
|
|
162
|
+
lineno=0,
|
|
163
|
+
),
|
|
164
|
+
*nested_classes,
|
|
165
|
+
*field_assignments,
|
|
166
|
+
],
|
|
167
|
+
keywords=[],
|
|
168
|
+
decorator_list=[],
|
|
169
|
+
)
|
|
170
|
+
return result
|
|
@@ -1,7 +1,12 @@
|
|
|
1
1
|
import re
|
|
2
2
|
|
|
3
|
-
from datacontract.
|
|
4
|
-
|
|
3
|
+
from datacontract.export.exporter import Exporter
|
|
4
|
+
from datacontract.model.data_contract_specification import DataContractSpecification, Server
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class TerraformExporter(Exporter):
|
|
8
|
+
def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
|
|
9
|
+
return to_terraform(data_contract)
|
|
5
10
|
|
|
6
11
|
|
|
7
12
|
def to_terraform(data_contract_spec: DataContractSpecification, server_id: str = None) -> str:
|