datacontract-cli 0.10.12__py3-none-any.whl → 0.10.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datacontract-cli might be problematic. Click here for more details.
- datacontract/cli.py +5 -0
- datacontract/data_contract.py +9 -1
- datacontract/engines/soda/connections/kafka.py +28 -6
- datacontract/export/avro_converter.py +8 -1
- datacontract/export/avro_idl_converter.py +1 -0
- datacontract/export/bigquery_converter.py +30 -23
- datacontract/export/data_caterer_converter.py +148 -0
- datacontract/export/dcs_exporter.py +6 -0
- datacontract/export/exporter.py +5 -1
- datacontract/export/exporter_factory.py +19 -1
- datacontract/export/jsonschema_converter.py +13 -2
- datacontract/export/{odcs_converter.py → odcs_v2_exporter.py} +4 -4
- datacontract/export/odcs_v3_exporter.py +294 -0
- datacontract/export/sodacl_converter.py +82 -2
- datacontract/export/spark_converter.py +3 -1
- datacontract/export/sql_type_converter.py +56 -21
- datacontract/imports/iceberg_importer.py +162 -0
- datacontract/imports/importer.py +1 -0
- datacontract/imports/importer_factory.py +5 -0
- datacontract/imports/odcs_importer.py +25 -168
- datacontract/imports/odcs_v2_importer.py +177 -0
- datacontract/imports/odcs_v3_importer.py +309 -0
- datacontract/imports/spark_importer.py +5 -1
- datacontract/imports/unity_importer.py +105 -84
- datacontract/integration/datamesh_manager.py +1 -1
- datacontract/lint/resolve.py +24 -10
- datacontract/lint/resources.py +21 -0
- datacontract/lint/urls.py +29 -13
- datacontract/model/data_contract_specification.py +72 -8
- datacontract/model/odcs.py +11 -0
- {datacontract_cli-0.10.12.dist-info → datacontract_cli-0.10.14.dist-info}/METADATA +106 -52
- {datacontract_cli-0.10.12.dist-info → datacontract_cli-0.10.14.dist-info}/RECORD +36 -29
- {datacontract_cli-0.10.12.dist-info → datacontract_cli-0.10.14.dist-info}/WHEEL +1 -1
- datacontract/engines/datacontract/check_that_datacontract_str_is_valid.py +0 -48
- {datacontract_cli-0.10.12.dist-info → datacontract_cli-0.10.14.dist-info}/LICENSE +0 -0
- {datacontract_cli-0.10.12.dist-info → datacontract_cli-0.10.14.dist-info}/entry_points.txt +0 -0
- {datacontract_cli-0.10.12.dist-info → datacontract_cli-0.10.14.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,294 @@
|
|
|
1
|
+
from typing import Dict
|
|
2
|
+
|
|
3
|
+
import yaml
|
|
4
|
+
|
|
5
|
+
from datacontract.export.exporter import Exporter
|
|
6
|
+
from datacontract.model.data_contract_specification import DataContractSpecification, Model, Field
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class OdcsV3Exporter(Exporter):
|
|
10
|
+
def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
|
|
11
|
+
return to_odcs_v3_yaml(data_contract)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def to_odcs_v3_yaml(data_contract_spec: DataContractSpecification) -> str:
|
|
15
|
+
odcs = {
|
|
16
|
+
"apiVersion": "v3.0.0",
|
|
17
|
+
"kind": "DataContract",
|
|
18
|
+
"id": data_contract_spec.id,
|
|
19
|
+
"name": data_contract_spec.info.title,
|
|
20
|
+
"version": data_contract_spec.info.version,
|
|
21
|
+
"domain": data_contract_spec.info.owner,
|
|
22
|
+
"status": data_contract_spec.info.status,
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
if data_contract_spec.terms is not None:
|
|
26
|
+
odcs["description"] = {
|
|
27
|
+
"purpose": data_contract_spec.terms.description.strip()
|
|
28
|
+
if data_contract_spec.terms.description is not None
|
|
29
|
+
else None,
|
|
30
|
+
"usage": data_contract_spec.terms.usage.strip() if data_contract_spec.terms.usage is not None else None,
|
|
31
|
+
"limitations": data_contract_spec.terms.limitations.strip()
|
|
32
|
+
if data_contract_spec.terms.limitations is not None
|
|
33
|
+
else None,
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
odcs["schema"] = []
|
|
37
|
+
for model_key, model_value in data_contract_spec.models.items():
|
|
38
|
+
odcs_schema = to_odcs_schema(model_key, model_value)
|
|
39
|
+
odcs["schema"].append(odcs_schema)
|
|
40
|
+
|
|
41
|
+
if data_contract_spec.servicelevels is not None:
|
|
42
|
+
slas = []
|
|
43
|
+
if data_contract_spec.servicelevels.availability is not None:
|
|
44
|
+
slas.append(
|
|
45
|
+
{
|
|
46
|
+
"property": "generalAvailability",
|
|
47
|
+
"value": data_contract_spec.servicelevels.availability.description,
|
|
48
|
+
}
|
|
49
|
+
)
|
|
50
|
+
if data_contract_spec.servicelevels.retention is not None:
|
|
51
|
+
slas.append({"property": "retention", "value": data_contract_spec.servicelevels.retention.period})
|
|
52
|
+
|
|
53
|
+
if len(slas) > 0:
|
|
54
|
+
odcs["slaProperties"] = slas
|
|
55
|
+
|
|
56
|
+
if data_contract_spec.info.contact is not None:
|
|
57
|
+
support = []
|
|
58
|
+
if data_contract_spec.info.contact.email is not None:
|
|
59
|
+
support.append(
|
|
60
|
+
{
|
|
61
|
+
"channel": "email",
|
|
62
|
+
"url": "mailto:" + data_contract_spec.info.contact.email,
|
|
63
|
+
}
|
|
64
|
+
)
|
|
65
|
+
if data_contract_spec.info.contact.url is not None:
|
|
66
|
+
support.append(
|
|
67
|
+
{
|
|
68
|
+
"channel": "other",
|
|
69
|
+
"url": data_contract_spec.info.contact.url,
|
|
70
|
+
}
|
|
71
|
+
)
|
|
72
|
+
if len(support) > 0:
|
|
73
|
+
odcs["support"] = support
|
|
74
|
+
|
|
75
|
+
if data_contract_spec.servers is not None and len(data_contract_spec.servers) > 0:
|
|
76
|
+
servers = []
|
|
77
|
+
|
|
78
|
+
for server_key, server_value in data_contract_spec.servers.items():
|
|
79
|
+
server_dict = {}
|
|
80
|
+
server_dict["server"] = server_key
|
|
81
|
+
if server_value.type is not None:
|
|
82
|
+
server_dict["type"] = server_value.type
|
|
83
|
+
if server_value.environment is not None:
|
|
84
|
+
server_dict["environment"] = server_value.environment
|
|
85
|
+
if server_value.account is not None:
|
|
86
|
+
server_dict["account"] = server_value.account
|
|
87
|
+
if server_value.database is not None:
|
|
88
|
+
server_dict["database"] = server_value.database
|
|
89
|
+
if server_value.schema_ is not None:
|
|
90
|
+
server_dict["schema"] = server_value.schema_
|
|
91
|
+
if server_value.format is not None:
|
|
92
|
+
server_dict["format"] = server_value.format
|
|
93
|
+
if server_value.project is not None:
|
|
94
|
+
server_dict["project"] = server_value.project
|
|
95
|
+
if server_value.dataset is not None:
|
|
96
|
+
server_dict["dataset"] = server_value.dataset
|
|
97
|
+
if server_value.path is not None:
|
|
98
|
+
server_dict["path"] = server_value.path
|
|
99
|
+
if server_value.delimiter is not None:
|
|
100
|
+
server_dict["delimiter"] = server_value.delimiter
|
|
101
|
+
if server_value.endpointUrl is not None:
|
|
102
|
+
server_dict["endpointUrl"] = server_value.endpointUrl
|
|
103
|
+
if server_value.location is not None:
|
|
104
|
+
server_dict["location"] = server_value.location
|
|
105
|
+
if server_value.host is not None:
|
|
106
|
+
server_dict["host"] = server_value.host
|
|
107
|
+
if server_value.port is not None:
|
|
108
|
+
server_dict["port"] = server_value.port
|
|
109
|
+
if server_value.catalog is not None:
|
|
110
|
+
server_dict["catalog"] = server_value.catalog
|
|
111
|
+
if server_value.topic is not None:
|
|
112
|
+
server_dict["topic"] = server_value.topic
|
|
113
|
+
if server_value.http_path is not None:
|
|
114
|
+
server_dict["http_path"] = server_value.http_path
|
|
115
|
+
if server_value.token is not None:
|
|
116
|
+
server_dict["token"] = server_value.token
|
|
117
|
+
if server_value.driver is not None:
|
|
118
|
+
server_dict["driver"] = server_value.driver
|
|
119
|
+
if server_value.roles is not None:
|
|
120
|
+
server_dict["roles"] = [
|
|
121
|
+
{"name": role.name, "description": role.description} for role in server_value.roles
|
|
122
|
+
]
|
|
123
|
+
servers.append(server_dict)
|
|
124
|
+
|
|
125
|
+
if len(servers) > 0:
|
|
126
|
+
odcs["servers"] = servers
|
|
127
|
+
|
|
128
|
+
odcs["customProperties"] = []
|
|
129
|
+
if data_contract_spec.info.model_extra is not None:
|
|
130
|
+
for key, value in data_contract_spec.info.model_extra.items():
|
|
131
|
+
odcs["customProperties"].append({"property": key, "value": value})
|
|
132
|
+
if len(odcs["customProperties"]) == 0:
|
|
133
|
+
del odcs["customProperties"]
|
|
134
|
+
|
|
135
|
+
return yaml.dump(odcs, indent=2, sort_keys=False, allow_unicode=True)
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def to_odcs_schema(model_key, model_value: Model) -> dict:
|
|
139
|
+
odcs_table = {
|
|
140
|
+
"name": model_key,
|
|
141
|
+
"physicalName": model_key,
|
|
142
|
+
"logicalType": "object",
|
|
143
|
+
"physicalType": model_value.type,
|
|
144
|
+
}
|
|
145
|
+
if model_value.description is not None:
|
|
146
|
+
odcs_table["description"] = model_value.description
|
|
147
|
+
properties = to_properties(model_value.fields)
|
|
148
|
+
if properties:
|
|
149
|
+
odcs_table["properties"] = properties
|
|
150
|
+
|
|
151
|
+
odcs_table["customProperties"] = []
|
|
152
|
+
if model_value.model_extra is not None:
|
|
153
|
+
for key, value in model_value.model_extra.items():
|
|
154
|
+
odcs_table["customProperties"].append({"property": key, "value": value})
|
|
155
|
+
if len(odcs_table["customProperties"]) == 0:
|
|
156
|
+
del odcs_table["customProperties"]
|
|
157
|
+
|
|
158
|
+
return odcs_table
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def to_properties(fields: Dict[str, Field]) -> list:
|
|
162
|
+
properties = []
|
|
163
|
+
for field_name, field in fields.items():
|
|
164
|
+
property = to_property(field_name, field)
|
|
165
|
+
properties.append(property)
|
|
166
|
+
return properties
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def to_logical_type(type: str) -> str | None:
|
|
170
|
+
if type is None:
|
|
171
|
+
return None
|
|
172
|
+
if type.lower() in ["string", "varchar", "text"]:
|
|
173
|
+
return "string"
|
|
174
|
+
if type.lower() in ["timestamp", "timestamp_tz"]:
|
|
175
|
+
return "date"
|
|
176
|
+
if type.lower() in ["timestamp_ntz"]:
|
|
177
|
+
return "date"
|
|
178
|
+
if type.lower() in ["date"]:
|
|
179
|
+
return "date"
|
|
180
|
+
if type.lower() in ["time"]:
|
|
181
|
+
return "string"
|
|
182
|
+
if type.lower() in ["number", "decimal", "numeric"]:
|
|
183
|
+
return "number"
|
|
184
|
+
if type.lower() in ["float", "double"]:
|
|
185
|
+
return "number"
|
|
186
|
+
if type.lower() in ["integer", "int", "long", "bigint"]:
|
|
187
|
+
return "integer"
|
|
188
|
+
if type.lower() in ["boolean"]:
|
|
189
|
+
return "boolean"
|
|
190
|
+
if type.lower() in ["object", "record", "struct"]:
|
|
191
|
+
return "object"
|
|
192
|
+
if type.lower() in ["bytes"]:
|
|
193
|
+
return "array"
|
|
194
|
+
if type.lower() in ["array"]:
|
|
195
|
+
return "array"
|
|
196
|
+
if type.lower() in ["null"]:
|
|
197
|
+
return None
|
|
198
|
+
return None
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def to_physical_type(type: str) -> str | None:
|
|
202
|
+
# TODO: to we need to do a server mapping here?
|
|
203
|
+
return type
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
def to_property(field_name: str, field: Field) -> dict:
|
|
207
|
+
property = {"name": field_name}
|
|
208
|
+
if field.title is not None:
|
|
209
|
+
property["businessName"] = field.title
|
|
210
|
+
if field.type is not None:
|
|
211
|
+
property["logicalType"] = to_logical_type(field.type)
|
|
212
|
+
property["physicalType"] = to_physical_type(field.type)
|
|
213
|
+
if field.description is not None:
|
|
214
|
+
property["description"] = field.description
|
|
215
|
+
if field.required is not None:
|
|
216
|
+
property["isNullable"] = not field.required
|
|
217
|
+
if field.unique is not None:
|
|
218
|
+
property["isUnique"] = field.unique
|
|
219
|
+
if field.classification is not None:
|
|
220
|
+
property["classification"] = field.classification
|
|
221
|
+
if field.examples is not None:
|
|
222
|
+
property["examples"] = field.examples
|
|
223
|
+
if field.example is not None:
|
|
224
|
+
property["examples"] = [field.example]
|
|
225
|
+
|
|
226
|
+
property["customProperties"] = []
|
|
227
|
+
if field.model_extra is not None:
|
|
228
|
+
for key, value in field.model_extra.items():
|
|
229
|
+
property["customProperties"].append({"property": key, "value": value})
|
|
230
|
+
if field.pii is not None:
|
|
231
|
+
property["customProperties"].append({"property": "pii", "value": field.pii})
|
|
232
|
+
if property.get("customProperties") is not None and len(property["customProperties"]) == 0:
|
|
233
|
+
del property["customProperties"]
|
|
234
|
+
|
|
235
|
+
property["tags"] = []
|
|
236
|
+
if field.tags is not None:
|
|
237
|
+
property["tags"].extend(field.tags)
|
|
238
|
+
if not property["tags"]:
|
|
239
|
+
del property["tags"]
|
|
240
|
+
|
|
241
|
+
property["logicalTypeOptions"] = {}
|
|
242
|
+
if field.minLength is not None:
|
|
243
|
+
property["logicalTypeOptions"]["minLength"] = field.minLength
|
|
244
|
+
if field.maxLength is not None:
|
|
245
|
+
property["logicalTypeOptions"]["maxLength"] = field.maxLength
|
|
246
|
+
if field.pattern is not None:
|
|
247
|
+
property["logicalTypeOptions"]["pattern"] = field.pattern
|
|
248
|
+
if field.minimum is not None:
|
|
249
|
+
property["logicalTypeOptions"]["minimum"] = field.minimum
|
|
250
|
+
if field.maximum is not None:
|
|
251
|
+
property["logicalTypeOptions"]["maximum"] = field.maximum
|
|
252
|
+
if field.exclusiveMinimum is not None:
|
|
253
|
+
property["logicalTypeOptions"]["exclusiveMinimum"] = field.exclusiveMinimum
|
|
254
|
+
if field.exclusiveMaximum is not None:
|
|
255
|
+
property["logicalTypeOptions"]["exclusiveMaximum"] = field.exclusiveMaximum
|
|
256
|
+
if property["logicalTypeOptions"] == {}:
|
|
257
|
+
del property["logicalTypeOptions"]
|
|
258
|
+
|
|
259
|
+
if field.quality is not None:
|
|
260
|
+
quality_property = []
|
|
261
|
+
for quality in field.quality:
|
|
262
|
+
quality_dict = {"type": quality.type}
|
|
263
|
+
if quality.description is not None:
|
|
264
|
+
quality_dict["description"] = quality.description
|
|
265
|
+
if quality.query is not None:
|
|
266
|
+
quality_dict["query"] = quality.query
|
|
267
|
+
# dialect is not supported in v3.0.0
|
|
268
|
+
if quality.mustBe is not None:
|
|
269
|
+
quality_dict["mustBe"] = quality.mustBe
|
|
270
|
+
if quality.mustNotBe is not None:
|
|
271
|
+
quality_dict["mustNotBe"] = quality.mustNotBe
|
|
272
|
+
if quality.mustBeGreaterThan is not None:
|
|
273
|
+
quality_dict["mustBeGreaterThan"] = quality.mustBeGreaterThan
|
|
274
|
+
if quality.mustBeGreaterThanOrEqualTo is not None:
|
|
275
|
+
quality_dict["mustBeGreaterThanOrEqualTo"] = quality.mustBeGreaterThanOrEqualTo
|
|
276
|
+
if quality.mustBeLessThan is not None:
|
|
277
|
+
quality_dict["mustBeLessThan"] = quality.mustBeLessThan
|
|
278
|
+
if quality.mustBeLessThanOrEqualTo is not None:
|
|
279
|
+
quality_dict["mustBeLessThanOrEqualTo"] = quality.mustBeLessThanOrEqualTo
|
|
280
|
+
if quality.mustBeBetween is not None:
|
|
281
|
+
quality_dict["mustBeBetween"] = quality.mustBeBetween
|
|
282
|
+
if quality.mustNotBeBetween is not None:
|
|
283
|
+
quality_dict["mustNotBeBetween"] = quality.mustNotBeBetween
|
|
284
|
+
if quality.engine is not None:
|
|
285
|
+
quality_dict["engine"] = quality.engine
|
|
286
|
+
if quality.implementation is not None:
|
|
287
|
+
quality_dict["implementation"] = quality.implementation
|
|
288
|
+
quality_property.append(quality_dict)
|
|
289
|
+
if len(quality_property) > 0:
|
|
290
|
+
property["quality"] = quality_property
|
|
291
|
+
|
|
292
|
+
# todo enum
|
|
293
|
+
|
|
294
|
+
return property
|
|
@@ -1,8 +1,11 @@
|
|
|
1
|
+
from typing import List
|
|
2
|
+
from venv import logger
|
|
3
|
+
|
|
1
4
|
import yaml
|
|
2
5
|
|
|
3
|
-
from datacontract.export.sql_type_converter import convert_to_sql_type
|
|
4
|
-
from datacontract.model.data_contract_specification import DataContractSpecification
|
|
5
6
|
from datacontract.export.exporter import Exporter
|
|
7
|
+
from datacontract.export.sql_type_converter import convert_to_sql_type
|
|
8
|
+
from datacontract.model.data_contract_specification import DataContractSpecification, Quality
|
|
6
9
|
|
|
7
10
|
|
|
8
11
|
class SodaExporter(Exporter):
|
|
@@ -58,9 +61,14 @@ def to_checks(model_key, model_value, server_type: str, check_types: bool):
|
|
|
58
61
|
checks.append(check_field_regex(field_name, field.pattern, quote_field_name))
|
|
59
62
|
if field.enum is not None and len(field.enum) > 0:
|
|
60
63
|
checks.append(check_field_enum(field_name, field.enum, quote_field_name))
|
|
64
|
+
if field.quality is not None and len(field.quality) > 0:
|
|
65
|
+
checks.append(check_quality_list(model_key, field_name, field.quality))
|
|
61
66
|
# TODO references: str = None
|
|
62
67
|
# TODO format
|
|
63
68
|
|
|
69
|
+
if model_value.quality is not None and len(model_value.quality) > 0:
|
|
70
|
+
checks.append(check_quality_list(model_key, None, model_value.quality))
|
|
71
|
+
|
|
64
72
|
checks_for_model_key = f"checks for {model_key}"
|
|
65
73
|
|
|
66
74
|
if quote_field_name:
|
|
@@ -181,6 +189,78 @@ def check_field_regex(field_name, pattern, quote_field_name: bool = False):
|
|
|
181
189
|
}
|
|
182
190
|
|
|
183
191
|
|
|
192
|
+
def check_quality_list(model_name, field_name, quality_list: List[Quality]):
|
|
193
|
+
checks = {}
|
|
194
|
+
|
|
195
|
+
count = 0
|
|
196
|
+
for quality in quality_list:
|
|
197
|
+
if quality.type == "sql":
|
|
198
|
+
if field_name is None:
|
|
199
|
+
metric_name = f"{model_name}_{field_name}_quality_sql_{count}"
|
|
200
|
+
else:
|
|
201
|
+
metric_name = f"{model_name}_quality_sql_{count}"
|
|
202
|
+
threshold = to_sodacl_threshold(quality)
|
|
203
|
+
query = prepare_query(quality, model_name, field_name)
|
|
204
|
+
if query is None:
|
|
205
|
+
logger.warning(f"Quality check {metric_name} has no query")
|
|
206
|
+
continue
|
|
207
|
+
if threshold is None:
|
|
208
|
+
logger.warning(f"Quality check {metric_name} has no valid threshold")
|
|
209
|
+
continue
|
|
210
|
+
checks[f"{metric_name} {threshold}"] = {f"{metric_name} query": query}
|
|
211
|
+
count += 1
|
|
212
|
+
|
|
213
|
+
return checks
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def prepare_query(quality: Quality, model_name: str, field_name: str = None) -> str | None:
|
|
217
|
+
if quality.query is None:
|
|
218
|
+
return None
|
|
219
|
+
if quality.query == "":
|
|
220
|
+
return None
|
|
221
|
+
|
|
222
|
+
query = quality.query
|
|
223
|
+
|
|
224
|
+
query = query.replace("{model}", model_name)
|
|
225
|
+
query = query.replace("{table}", model_name)
|
|
226
|
+
|
|
227
|
+
if field_name is not None:
|
|
228
|
+
query = query.replace("{field}", field_name)
|
|
229
|
+
query = query.replace("{column}", field_name)
|
|
230
|
+
|
|
231
|
+
return query
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def to_sodacl_threshold(quality: Quality) -> str | None:
|
|
235
|
+
if quality.mustBe is not None:
|
|
236
|
+
return f"= {quality.mustBe}"
|
|
237
|
+
if quality.mustNotBe is not None:
|
|
238
|
+
return f"!= {quality.mustNotBe}"
|
|
239
|
+
if quality.mustBeGreaterThan is not None:
|
|
240
|
+
return f"> {quality.mustBeGreaterThan}"
|
|
241
|
+
if quality.mustBeGreaterThanOrEqualTo is not None:
|
|
242
|
+
return f">= {quality.mustBeGreaterThanOrEqualTo}"
|
|
243
|
+
if quality.mustBeLessThan is not None:
|
|
244
|
+
return f"< {quality.mustBeLessThan}"
|
|
245
|
+
if quality.mustBeLessThanOrEqualTo is not None:
|
|
246
|
+
return f"<= {quality.mustBeLessThanOrEqualTo}"
|
|
247
|
+
if quality.mustBeBetween is not None:
|
|
248
|
+
if len(quality.mustBeBetween) != 2:
|
|
249
|
+
logger.warning(
|
|
250
|
+
f"Quality check has invalid mustBeBetween, must have exactly 2 integers in an array: {quality.mustBeBetween}"
|
|
251
|
+
)
|
|
252
|
+
return None
|
|
253
|
+
return f"between {quality.mustBeBetween[0]} and {quality.mustBeBetween[1]}"
|
|
254
|
+
if quality.mustNotBeBetween is not None:
|
|
255
|
+
if len(quality.mustNotBeBetween) != 2:
|
|
256
|
+
logger.warning(
|
|
257
|
+
f"Quality check has invalid mustNotBeBetween, must have exactly 2 integers in an array: {quality.mustNotBeBetween}"
|
|
258
|
+
)
|
|
259
|
+
return None
|
|
260
|
+
return f"not between {quality.mustNotBeBetween[0]} and {quality.mustNotBeBetween[1]}"
|
|
261
|
+
return None
|
|
262
|
+
|
|
263
|
+
|
|
184
264
|
def add_quality_checks(sodacl, data_contract_spec):
|
|
185
265
|
if data_contract_spec.quality is None:
|
|
186
266
|
return
|
|
@@ -128,7 +128,9 @@ def to_data_type(field: Field) -> types.DataType:
|
|
|
128
128
|
if field_type in ["string", "varchar", "text"]:
|
|
129
129
|
return types.StringType()
|
|
130
130
|
if field_type in ["number", "decimal", "numeric"]:
|
|
131
|
-
|
|
131
|
+
precision = field.precision if field.precision is not None else 38
|
|
132
|
+
scale = field.scale if field.scale is not None else 0
|
|
133
|
+
return types.DecimalType(precision=precision, scale=scale)
|
|
132
134
|
if field_type in ["integer", "int"]:
|
|
133
135
|
return types.IntegerType()
|
|
134
136
|
if field_type == "long":
|
|
@@ -7,6 +7,8 @@ def convert_to_sql_type(field: Field, server_type: str) -> str:
|
|
|
7
7
|
return convert_to_snowflake(field)
|
|
8
8
|
elif server_type == "postgres":
|
|
9
9
|
return convert_type_to_postgres(field)
|
|
10
|
+
elif server_type == "dataframe":
|
|
11
|
+
return convert_to_dataframe(field)
|
|
10
12
|
elif server_type == "databricks":
|
|
11
13
|
return convert_to_databricks(field)
|
|
12
14
|
elif server_type == "local" or server_type == "s3":
|
|
@@ -108,6 +110,46 @@ def convert_type_to_postgres(field: Field) -> None | str:
|
|
|
108
110
|
return None
|
|
109
111
|
|
|
110
112
|
|
|
113
|
+
# dataframe data types:
|
|
114
|
+
# https://spark.apache.org/docs/latest/sql-ref-datatypes.html
|
|
115
|
+
def convert_to_dataframe(field: Field) -> None | str:
|
|
116
|
+
if field.config and "dataframeType" in field.config:
|
|
117
|
+
return field.config["dataframeType"]
|
|
118
|
+
type = field.type
|
|
119
|
+
if type is None:
|
|
120
|
+
return None
|
|
121
|
+
if type.lower() in ["string", "varchar", "text"]:
|
|
122
|
+
return "STRING"
|
|
123
|
+
if type.lower() in ["timestamp", "timestamp_tz"]:
|
|
124
|
+
return "TIMESTAMP"
|
|
125
|
+
if type.lower() in ["timestamp_ntz"]:
|
|
126
|
+
return "TIMESTAMP_NTZ"
|
|
127
|
+
if type.lower() in ["date"]:
|
|
128
|
+
return "DATE"
|
|
129
|
+
if type.lower() in ["time"]:
|
|
130
|
+
return "STRING"
|
|
131
|
+
if type.lower() in ["number", "decimal", "numeric"]:
|
|
132
|
+
# precision and scale not supported by data contract
|
|
133
|
+
return "DECIMAL"
|
|
134
|
+
if type.lower() in ["float"]:
|
|
135
|
+
return "FLOAT"
|
|
136
|
+
if type.lower() in ["double"]:
|
|
137
|
+
return "DOUBLE"
|
|
138
|
+
if type.lower() in ["integer", "int"]:
|
|
139
|
+
return "INT"
|
|
140
|
+
if type.lower() in ["long", "bigint"]:
|
|
141
|
+
return "BIGINT"
|
|
142
|
+
if type.lower() in ["boolean"]:
|
|
143
|
+
return "BOOLEAN"
|
|
144
|
+
if type.lower() in ["object", "record", "struct"]:
|
|
145
|
+
return "STRUCT"
|
|
146
|
+
if type.lower() in ["bytes"]:
|
|
147
|
+
return "BINARY"
|
|
148
|
+
if type.lower() in ["array"]:
|
|
149
|
+
return "ARRAY"
|
|
150
|
+
return None
|
|
151
|
+
|
|
152
|
+
|
|
111
153
|
# databricks data types:
|
|
112
154
|
# https://docs.databricks.com/en/sql/language-manual/sql-ref-datatypes.html
|
|
113
155
|
def convert_to_databricks(field: Field) -> None | str:
|
|
@@ -186,7 +228,7 @@ def convert_to_duckdb(field: Field) -> None | str:
|
|
|
186
228
|
"time": "TIME",
|
|
187
229
|
"timestamp": "TIMESTAMP WITH TIME ZONE",
|
|
188
230
|
"timestamp_tz": "TIMESTAMP WITH TIME ZONE",
|
|
189
|
-
"timestamp_ntz": "
|
|
231
|
+
"timestamp_ntz": "TIMESTAMP",
|
|
190
232
|
}
|
|
191
233
|
|
|
192
234
|
# Convert simple mappings
|
|
@@ -269,16 +311,7 @@ def convert_type_to_sqlserver(field: Field) -> None | str:
|
|
|
269
311
|
|
|
270
312
|
def convert_type_to_bigquery(field: Field) -> None | str:
|
|
271
313
|
"""Convert from supported datacontract types to equivalent bigquery types"""
|
|
272
|
-
|
|
273
|
-
if not field_type:
|
|
274
|
-
return None
|
|
275
|
-
|
|
276
|
-
# If provided sql-server config type, prefer it over default mapping
|
|
277
|
-
if bigquery_type := get_type_config(field, "bigqueryType"):
|
|
278
|
-
return bigquery_type
|
|
279
|
-
|
|
280
|
-
field_type = field_type.lower()
|
|
281
|
-
return map_type_to_bigquery(field_type, field.title)
|
|
314
|
+
return map_type_to_bigquery(field)
|
|
282
315
|
|
|
283
316
|
|
|
284
317
|
def get_type_config(field: Field, config_attr: str) -> dict[str, str] | None:
|
|
@@ -290,25 +323,27 @@ def get_type_config(field: Field, config_attr: str) -> dict[str, str] | None:
|
|
|
290
323
|
|
|
291
324
|
def convert_type_to_trino(field: Field) -> None | str:
|
|
292
325
|
"""Convert from supported datacontract types to equivalent trino types"""
|
|
293
|
-
|
|
326
|
+
if field.config and "trinoType" in field.config:
|
|
327
|
+
return field.config["trinoType"]
|
|
294
328
|
|
|
295
|
-
|
|
329
|
+
field_type = field.type.lower()
|
|
330
|
+
if field_type in ["string", "text", "varchar"]:
|
|
296
331
|
return "varchar"
|
|
297
332
|
# tinyint, smallint not supported by data contract
|
|
298
|
-
if field_type
|
|
333
|
+
if field_type in ["number", "decimal", "numeric"]:
|
|
299
334
|
# precision and scale not supported by data contract
|
|
300
335
|
return "decimal"
|
|
301
|
-
if field_type
|
|
336
|
+
if field_type in ["int", "integer"]:
|
|
302
337
|
return "integer"
|
|
303
|
-
if field_type
|
|
338
|
+
if field_type in ["long", "bigint"]:
|
|
304
339
|
return "bigint"
|
|
305
|
-
if field_type
|
|
340
|
+
if field_type in ["float"]:
|
|
306
341
|
return "real"
|
|
307
|
-
if field_type
|
|
342
|
+
if field_type in ["timestamp", "timestamp_tz"]:
|
|
308
343
|
return "timestamp(3) with time zone"
|
|
309
|
-
if field_type
|
|
344
|
+
if field_type in ["timestamp_ntz"]:
|
|
310
345
|
return "timestamp(3)"
|
|
311
|
-
if field_type
|
|
346
|
+
if field_type in ["bytes"]:
|
|
312
347
|
return "varbinary"
|
|
313
|
-
if field_type
|
|
348
|
+
if field_type in ["object", "record", "struct"]:
|
|
314
349
|
return "json"
|