datacontract-cli 0.10.20__py3-none-any.whl → 0.10.22__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datacontract-cli might be problematic. Click here for more details.
- datacontract/{web.py → api.py} +55 -3
- datacontract/breaking/breaking.py +1 -1
- datacontract/breaking/breaking_rules.py +1 -1
- datacontract/cli.py +32 -10
- datacontract/data_contract.py +14 -100
- datacontract/engines/data_contract_checks.py +735 -0
- datacontract/engines/data_contract_test.py +51 -0
- datacontract/engines/soda/check_soda_execute.py +36 -30
- datacontract/engines/soda/connections/kafka.py +8 -3
- datacontract/export/avro_converter.py +2 -0
- datacontract/export/custom_converter.py +40 -0
- datacontract/export/exporter.py +1 -2
- datacontract/export/exporter_factory.py +4 -12
- datacontract/export/sodacl_converter.py +22 -294
- datacontract/export/sql_type_converter.py +7 -2
- datacontract/imports/odcs_importer.py +6 -3
- datacontract/imports/odcs_v3_importer.py +2 -0
- datacontract/imports/sql_importer.py +229 -29
- datacontract/lint/urls.py +4 -4
- datacontract/model/data_contract_specification.py +130 -129
- datacontract/model/exceptions.py +4 -1
- datacontract/model/run.py +25 -18
- datacontract/templates/datacontract.html +16 -2
- datacontract/templates/partials/definition.html +3 -95
- datacontract/templates/partials/model_field.html +13 -0
- datacontract/templates/partials/quality.html +49 -0
- datacontract/templates/style/output.css +151 -152
- {datacontract_cli-0.10.20.dist-info → datacontract_cli-0.10.22.dist-info}/METADATA +238 -184
- {datacontract_cli-0.10.20.dist-info → datacontract_cli-0.10.22.dist-info}/RECORD +34 -34
- datacontract/engines/soda/connections/dask.py +0 -28
- datacontract/export/odcs_v2_exporter.py +0 -124
- datacontract/imports/odcs_v2_importer.py +0 -177
- datacontract/lint/linters/example_model_linter.py +0 -91
- /datacontract/{model → breaking}/breaking_change.py +0 -0
- {datacontract_cli-0.10.20.dist-info → datacontract_cli-0.10.22.dist-info}/LICENSE +0 -0
- {datacontract_cli-0.10.20.dist-info → datacontract_cli-0.10.22.dist-info}/WHEEL +0 -0
- {datacontract_cli-0.10.20.dist-info → datacontract_cli-0.10.22.dist-info}/entry_points.txt +0 -0
- {datacontract_cli-0.10.20.dist-info → datacontract_cli-0.10.22.dist-info}/top_level.txt +0 -0
|
@@ -1,44 +1,76 @@
|
|
|
1
|
-
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
|
|
4
|
+
import sqlglot
|
|
5
|
+
from sqlglot.dialects.dialect import Dialects
|
|
2
6
|
|
|
3
7
|
from datacontract.imports.importer import Importer
|
|
4
|
-
from datacontract.model.data_contract_specification import DataContractSpecification, Field, Model
|
|
8
|
+
from datacontract.model.data_contract_specification import DataContractSpecification, Field, Model, Server
|
|
9
|
+
from datacontract.model.exceptions import DataContractException
|
|
10
|
+
from datacontract.model.run import ResultEnum
|
|
5
11
|
|
|
6
12
|
|
|
7
13
|
class SqlImporter(Importer):
|
|
8
14
|
def import_source(
|
|
9
15
|
self, data_contract_specification: DataContractSpecification, source: str, import_args: dict
|
|
10
16
|
) -> DataContractSpecification:
|
|
11
|
-
return import_sql(data_contract_specification, self.import_format, source)
|
|
17
|
+
return import_sql(data_contract_specification, self.import_format, source, import_args)
|
|
18
|
+
|
|
12
19
|
|
|
20
|
+
def import_sql(
|
|
21
|
+
data_contract_specification: DataContractSpecification, format: str, source: str, import_args: dict = None
|
|
22
|
+
) -> DataContractSpecification:
|
|
23
|
+
sql = read_file(source)
|
|
13
24
|
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
25
|
+
dialect = to_dialect(import_args)
|
|
26
|
+
|
|
27
|
+
try:
|
|
28
|
+
parsed = sqlglot.parse_one(sql=sql, read=dialect)
|
|
29
|
+
except Exception as e:
|
|
30
|
+
logging.error(f"Error parsing SQL: {str(e)}")
|
|
31
|
+
raise DataContractException(
|
|
32
|
+
type="import",
|
|
33
|
+
name=f"Reading source from {source}",
|
|
34
|
+
reason=f"Error parsing SQL: {str(e)}",
|
|
35
|
+
engine="datacontract",
|
|
36
|
+
result=ResultEnum.error,
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
server_type: str | None = to_server_type(source, dialect)
|
|
40
|
+
if server_type is not None:
|
|
41
|
+
data_contract_specification.servers[server_type] = Server(type=server_type)
|
|
42
|
+
|
|
43
|
+
tables = parsed.find_all(sqlglot.expressions.Table)
|
|
17
44
|
|
|
18
45
|
for table in tables:
|
|
19
46
|
if data_contract_specification.models is None:
|
|
20
47
|
data_contract_specification.models = {}
|
|
21
48
|
|
|
22
|
-
table_name = table
|
|
49
|
+
table_name = table.this.name
|
|
23
50
|
|
|
24
51
|
fields = {}
|
|
25
|
-
for column in
|
|
52
|
+
for column in parsed.find_all(sqlglot.exp.ColumnDef):
|
|
53
|
+
if column.parent.this.name != table_name:
|
|
54
|
+
continue
|
|
55
|
+
|
|
26
56
|
field = Field()
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
57
|
+
col_name = column.this.name
|
|
58
|
+
col_type = to_col_type(column, dialect)
|
|
59
|
+
field.type = map_type_from_sql(col_type)
|
|
60
|
+
col_description = get_description(column)
|
|
61
|
+
field.description = col_description
|
|
62
|
+
field.maxLength = get_max_length(column)
|
|
63
|
+
precision, scale = get_precision_scale(column)
|
|
64
|
+
field.precision = precision
|
|
65
|
+
field.scale = scale
|
|
66
|
+
field.primaryKey = get_primary_key(column)
|
|
67
|
+
field.required = column.find(sqlglot.exp.NotNullColumnConstraint) is not None or None
|
|
68
|
+
physical_type_key = to_physical_type_key(dialect)
|
|
69
|
+
field.config = {
|
|
70
|
+
physical_type_key: col_type,
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
fields[col_name] = field
|
|
42
74
|
|
|
43
75
|
data_contract_specification.models[table_name] = Model(
|
|
44
76
|
type="table",
|
|
@@ -48,6 +80,124 @@ def import_sql(data_contract_specification: DataContractSpecification, format: s
|
|
|
48
80
|
return data_contract_specification
|
|
49
81
|
|
|
50
82
|
|
|
83
|
+
def get_primary_key(column) -> bool | None:
|
|
84
|
+
if column.find(sqlglot.exp.PrimaryKeyColumnConstraint) is not None:
|
|
85
|
+
return True
|
|
86
|
+
if column.find(sqlglot.exp.PrimaryKey) is not None:
|
|
87
|
+
return True
|
|
88
|
+
return None
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def to_dialect(import_args: dict) -> Dialects | None:
|
|
92
|
+
if import_args is None:
|
|
93
|
+
return None
|
|
94
|
+
if "dialect" not in import_args:
|
|
95
|
+
return None
|
|
96
|
+
dialect = import_args.get("dialect")
|
|
97
|
+
if dialect is None:
|
|
98
|
+
return None
|
|
99
|
+
if dialect == "sqlserver":
|
|
100
|
+
return Dialects.TSQL
|
|
101
|
+
if dialect.upper() in Dialects.__members__:
|
|
102
|
+
return Dialects[dialect.upper()]
|
|
103
|
+
if dialect == "sqlserver":
|
|
104
|
+
return Dialects.TSQL
|
|
105
|
+
return None
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def to_physical_type_key(dialect: Dialects | None) -> str:
|
|
109
|
+
dialect_map = {
|
|
110
|
+
Dialects.TSQL: "sqlserverType",
|
|
111
|
+
Dialects.POSTGRES: "postgresType",
|
|
112
|
+
Dialects.BIGQUERY: "bigqueryType",
|
|
113
|
+
Dialects.SNOWFLAKE: "snowflakeType",
|
|
114
|
+
Dialects.REDSHIFT: "redshiftType",
|
|
115
|
+
Dialects.ORACLE: "oracleType",
|
|
116
|
+
Dialects.MYSQL: "mysqlType",
|
|
117
|
+
Dialects.DATABRICKS: "databricksType",
|
|
118
|
+
}
|
|
119
|
+
return dialect_map.get(dialect, "physicalType")
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def to_server_type(source, dialect: Dialects | None) -> str | None:
|
|
123
|
+
if dialect is None:
|
|
124
|
+
return None
|
|
125
|
+
dialect_map = {
|
|
126
|
+
Dialects.TSQL: "sqlserver",
|
|
127
|
+
Dialects.POSTGRES: "postgres",
|
|
128
|
+
Dialects.BIGQUERY: "bigquery",
|
|
129
|
+
Dialects.SNOWFLAKE: "snowflake",
|
|
130
|
+
Dialects.REDSHIFT: "redshift",
|
|
131
|
+
Dialects.ORACLE: "oracle",
|
|
132
|
+
Dialects.MYSQL: "mysql",
|
|
133
|
+
Dialects.DATABRICKS: "databricks",
|
|
134
|
+
}
|
|
135
|
+
return dialect_map.get(dialect, None)
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def to_col_type(column, dialect):
|
|
139
|
+
col_type_kind = column.args["kind"]
|
|
140
|
+
if col_type_kind is None:
|
|
141
|
+
return None
|
|
142
|
+
|
|
143
|
+
return col_type_kind.sql(dialect)
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def to_col_type_normalized(column):
|
|
147
|
+
col_type = column.args["kind"].this.name
|
|
148
|
+
if col_type is None:
|
|
149
|
+
return None
|
|
150
|
+
return col_type.lower()
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def get_description(column: sqlglot.expressions.ColumnDef) -> str | None:
|
|
154
|
+
if column.comments is None:
|
|
155
|
+
return None
|
|
156
|
+
return " ".join(comment.strip() for comment in column.comments)
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def get_max_length(column: sqlglot.expressions.ColumnDef) -> int | None:
|
|
160
|
+
col_type = to_col_type_normalized(column)
|
|
161
|
+
if col_type is None:
|
|
162
|
+
return None
|
|
163
|
+
if col_type not in ["varchar", "char", "nvarchar", "nchar"]:
|
|
164
|
+
return None
|
|
165
|
+
col_params = list(column.args["kind"].find_all(sqlglot.expressions.DataTypeParam))
|
|
166
|
+
max_length_str = None
|
|
167
|
+
if len(col_params) == 0:
|
|
168
|
+
return None
|
|
169
|
+
if len(col_params) == 1:
|
|
170
|
+
max_length_str = col_params[0].name
|
|
171
|
+
if len(col_params) == 2:
|
|
172
|
+
max_length_str = col_params[1].name
|
|
173
|
+
if max_length_str is not None:
|
|
174
|
+
return int(max_length_str) if max_length_str.isdigit() else None
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def get_precision_scale(column):
|
|
178
|
+
col_type = to_col_type_normalized(column)
|
|
179
|
+
if col_type is None:
|
|
180
|
+
return None, None
|
|
181
|
+
if col_type not in ["decimal", "numeric", "float", "number"]:
|
|
182
|
+
return None, None
|
|
183
|
+
col_params = list(column.args["kind"].find_all(sqlglot.expressions.DataTypeParam))
|
|
184
|
+
if len(col_params) == 0:
|
|
185
|
+
return None, None
|
|
186
|
+
if len(col_params) == 1:
|
|
187
|
+
if not col_params[0].name.isdigit():
|
|
188
|
+
return None, None
|
|
189
|
+
precision = int(col_params[0].name)
|
|
190
|
+
scale = 0
|
|
191
|
+
return precision, scale
|
|
192
|
+
if len(col_params) == 2:
|
|
193
|
+
if not col_params[0].name.isdigit() or not col_params[1].name.isdigit():
|
|
194
|
+
return None, None
|
|
195
|
+
precision = int(col_params[0].name)
|
|
196
|
+
scale = int(col_params[1].name)
|
|
197
|
+
return precision, scale
|
|
198
|
+
return None, None
|
|
199
|
+
|
|
200
|
+
|
|
51
201
|
def map_type_from_sql(sql_type: str):
|
|
52
202
|
if sql_type is None:
|
|
53
203
|
return None
|
|
@@ -55,25 +205,55 @@ def map_type_from_sql(sql_type: str):
|
|
|
55
205
|
sql_type_normed = sql_type.lower().strip()
|
|
56
206
|
|
|
57
207
|
if sql_type_normed.startswith("varchar"):
|
|
58
|
-
return "
|
|
208
|
+
return "string"
|
|
209
|
+
elif sql_type_normed.startswith("char"):
|
|
210
|
+
return "string"
|
|
59
211
|
elif sql_type_normed.startswith("string"):
|
|
60
212
|
return "string"
|
|
213
|
+
elif sql_type_normed.startswith("nchar"):
|
|
214
|
+
return "string"
|
|
61
215
|
elif sql_type_normed.startswith("text"):
|
|
62
|
-
return "
|
|
216
|
+
return "string"
|
|
217
|
+
elif sql_type_normed.startswith("nvarchar"):
|
|
218
|
+
return "string"
|
|
219
|
+
elif sql_type_normed.startswith("ntext"):
|
|
220
|
+
return "string"
|
|
63
221
|
elif sql_type_normed.startswith("int"):
|
|
64
|
-
return "
|
|
222
|
+
return "int"
|
|
223
|
+
elif sql_type_normed.startswith("bigint"):
|
|
224
|
+
return "long"
|
|
225
|
+
elif sql_type_normed.startswith("tinyint"):
|
|
226
|
+
return "int"
|
|
227
|
+
elif sql_type_normed.startswith("smallint"):
|
|
228
|
+
return "int"
|
|
65
229
|
elif sql_type_normed.startswith("float"):
|
|
66
230
|
return "float"
|
|
67
231
|
elif sql_type_normed.startswith("decimal"):
|
|
68
232
|
return "decimal"
|
|
69
233
|
elif sql_type_normed.startswith("numeric"):
|
|
70
|
-
return "
|
|
234
|
+
return "decimal"
|
|
71
235
|
elif sql_type_normed.startswith("bool"):
|
|
72
236
|
return "boolean"
|
|
73
|
-
elif sql_type_normed.startswith("
|
|
74
|
-
return "
|
|
237
|
+
elif sql_type_normed.startswith("bit"):
|
|
238
|
+
return "boolean"
|
|
239
|
+
elif sql_type_normed.startswith("binary"):
|
|
240
|
+
return "bytes"
|
|
241
|
+
elif sql_type_normed.startswith("varbinary"):
|
|
242
|
+
return "bytes"
|
|
75
243
|
elif sql_type_normed == "date":
|
|
76
244
|
return "date"
|
|
245
|
+
elif sql_type_normed == "time":
|
|
246
|
+
return "string"
|
|
247
|
+
elif sql_type_normed == "timestamp":
|
|
248
|
+
return "timestamp_ntz"
|
|
249
|
+
elif (
|
|
250
|
+
sql_type_normed == "timestamptz"
|
|
251
|
+
or sql_type_normed == "timestamp_tz"
|
|
252
|
+
or sql_type_normed == "timestamp with time zone"
|
|
253
|
+
):
|
|
254
|
+
return "timestamp_tz"
|
|
255
|
+
elif sql_type_normed == "timestampntz" or sql_type_normed == "timestamp_ntz":
|
|
256
|
+
return "timestamp_ntz"
|
|
77
257
|
elif sql_type_normed == "smalldatetime":
|
|
78
258
|
return "timestamp_ntz"
|
|
79
259
|
elif sql_type_normed == "datetime":
|
|
@@ -82,5 +262,25 @@ def map_type_from_sql(sql_type: str):
|
|
|
82
262
|
return "timestamp_ntz"
|
|
83
263
|
elif sql_type_normed == "datetimeoffset":
|
|
84
264
|
return "timestamp_tz"
|
|
265
|
+
elif sql_type_normed == "uniqueidentifier": # tsql
|
|
266
|
+
return "string"
|
|
267
|
+
elif sql_type_normed == "json":
|
|
268
|
+
return "string"
|
|
269
|
+
elif sql_type_normed == "xml": # tsql
|
|
270
|
+
return "string"
|
|
85
271
|
else:
|
|
86
272
|
return "variant"
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
def read_file(path):
|
|
276
|
+
if not os.path.exists(path):
|
|
277
|
+
raise DataContractException(
|
|
278
|
+
type="import",
|
|
279
|
+
name=f"Reading source from {path}",
|
|
280
|
+
reason=f"The file '{path}' does not exist.",
|
|
281
|
+
engine="datacontract",
|
|
282
|
+
result=ResultEnum.error,
|
|
283
|
+
)
|
|
284
|
+
with open(path, "r") as file:
|
|
285
|
+
file_content = file.read()
|
|
286
|
+
return file_content
|
datacontract/lint/urls.py
CHANGED
|
@@ -33,22 +33,22 @@ def _set_api_key(headers, url):
|
|
|
33
33
|
|
|
34
34
|
if hostname == "datamesh-manager.com" or hostname.endswith(".datamesh-manager.com"):
|
|
35
35
|
if datamesh_manager_api_key is None or datamesh_manager_api_key == "":
|
|
36
|
-
print("Error: Data Mesh Manager API
|
|
36
|
+
print("Error: Data Mesh Manager API key is not set. Set env variable DATAMESH_MANAGER_API_KEY.")
|
|
37
37
|
raise DataContractException(
|
|
38
38
|
type="lint",
|
|
39
39
|
name=f"Reading data contract from {url}",
|
|
40
|
-
reason="Error: Data Mesh Manager API
|
|
40
|
+
reason="Error: Data Mesh Manager API key is not set. Set env variable DATAMESH_MANAGER_API_KEY.",
|
|
41
41
|
engine="datacontract",
|
|
42
42
|
result="error",
|
|
43
43
|
)
|
|
44
44
|
headers["x-api-key"] = datamesh_manager_api_key
|
|
45
45
|
elif hostname == "datacontract-manager.com" or hostname.endswith(".datacontract-manager.com"):
|
|
46
46
|
if datacontract_manager_api_key is None or datacontract_manager_api_key == "":
|
|
47
|
-
print("Error: Data Contract Manager API
|
|
47
|
+
print("Error: Data Contract Manager API key is not set. Set env variable DATACONTRACT_MANAGER_API_KEY.")
|
|
48
48
|
raise DataContractException(
|
|
49
49
|
type="lint",
|
|
50
50
|
name=f"Reading data contract from {url}",
|
|
51
|
-
reason="Error: Data Contract Manager API
|
|
51
|
+
reason="Error: Data Contract Manager API key is not set. Set env variable DATACONTRACT_MANAGER_API_KEY.",
|
|
52
52
|
engine="datacontract",
|
|
53
53
|
result="error",
|
|
54
54
|
)
|