datacontract-cli 0.10.13__py3-none-any.whl → 0.10.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datacontract-cli might be problematic. Click here for more details.
- datacontract/cli.py +5 -0
- datacontract/data_contract.py +9 -1
- datacontract/engines/soda/connections/kafka.py +26 -5
- datacontract/export/avro_converter.py +8 -1
- datacontract/export/avro_idl_converter.py +1 -0
- datacontract/export/dcs_exporter.py +6 -0
- datacontract/export/exporter.py +4 -1
- datacontract/export/exporter_factory.py +13 -1
- datacontract/export/{odcs_converter.py → odcs_v2_exporter.py} +4 -4
- datacontract/export/odcs_v3_exporter.py +294 -0
- datacontract/export/sodacl_converter.py +82 -2
- datacontract/export/spark_converter.py +3 -1
- datacontract/export/sql_type_converter.py +55 -11
- datacontract/imports/iceberg_importer.py +162 -0
- datacontract/imports/importer.py +1 -0
- datacontract/imports/importer_factory.py +5 -0
- datacontract/imports/odcs_importer.py +25 -168
- datacontract/imports/odcs_v2_importer.py +177 -0
- datacontract/imports/odcs_v3_importer.py +309 -0
- datacontract/integration/datamesh_manager.py +1 -1
- datacontract/lint/resolve.py +14 -9
- datacontract/lint/resources.py +21 -0
- datacontract/lint/urls.py +4 -2
- datacontract/model/data_contract_specification.py +72 -8
- datacontract/model/odcs.py +11 -0
- {datacontract_cli-0.10.13.dist-info → datacontract_cli-0.10.14.dist-info}/METADATA +89 -51
- {datacontract_cli-0.10.13.dist-info → datacontract_cli-0.10.14.dist-info}/RECORD +31 -25
- {datacontract_cli-0.10.13.dist-info → datacontract_cli-0.10.14.dist-info}/WHEEL +1 -1
- datacontract/engines/datacontract/check_that_datacontract_str_is_valid.py +0 -48
- {datacontract_cli-0.10.13.dist-info → datacontract_cli-0.10.14.dist-info}/LICENSE +0 -0
- {datacontract_cli-0.10.13.dist-info → datacontract_cli-0.10.14.dist-info}/entry_points.txt +0 -0
- {datacontract_cli-0.10.13.dist-info → datacontract_cli-0.10.14.dist-info}/top_level.txt +0 -0
datacontract/cli.py
CHANGED
|
@@ -244,6 +244,10 @@ def import_(
|
|
|
244
244
|
help="List of table names to import from the DBML file (repeat for multiple table names, leave empty for all tables in the file)."
|
|
245
245
|
),
|
|
246
246
|
] = None,
|
|
247
|
+
iceberg_table: Annotated[
|
|
248
|
+
Optional[str],
|
|
249
|
+
typer.Option(help="Table name to assign to the model created from the Iceberg schema."),
|
|
250
|
+
] = None,
|
|
247
251
|
):
|
|
248
252
|
"""
|
|
249
253
|
Create a data contract from the given source location. Prints to stdout.
|
|
@@ -259,6 +263,7 @@ def import_(
|
|
|
259
263
|
dbt_model=dbt_model,
|
|
260
264
|
dbml_schema=dbml_schema,
|
|
261
265
|
dbml_table=dbml_table,
|
|
266
|
+
iceberg_table=iceberg_table,
|
|
262
267
|
)
|
|
263
268
|
console.print(result.to_yaml())
|
|
264
269
|
|
datacontract/data_contract.py
CHANGED
|
@@ -199,7 +199,15 @@ class DataContract:
|
|
|
199
199
|
|
|
200
200
|
except DataContractException as e:
|
|
201
201
|
run.checks.append(
|
|
202
|
-
Check(
|
|
202
|
+
Check(
|
|
203
|
+
type=e.type,
|
|
204
|
+
name=e.name,
|
|
205
|
+
result=e.result,
|
|
206
|
+
reason=e.reason,
|
|
207
|
+
model=e.model,
|
|
208
|
+
engine=e.engine,
|
|
209
|
+
details="",
|
|
210
|
+
)
|
|
203
211
|
)
|
|
204
212
|
run.log_error(str(e))
|
|
205
213
|
except Exception as e:
|
|
@@ -112,17 +112,38 @@ def get_auth_options():
|
|
|
112
112
|
"""Retrieve Kafka authentication options from environment variables."""
|
|
113
113
|
kafka_sasl_username = os.getenv("DATACONTRACT_KAFKA_SASL_USERNAME")
|
|
114
114
|
kafka_sasl_password = os.getenv("DATACONTRACT_KAFKA_SASL_PASSWORD")
|
|
115
|
+
kafka_sasl_mechanism = os.getenv("DATACONTRACT_KAFKA_SASL_MECHANISM", "PLAIN").upper()
|
|
115
116
|
|
|
116
|
-
|
|
117
|
+
# Skip authentication if credentials are not provided
|
|
118
|
+
if not kafka_sasl_username or not kafka_sasl_password:
|
|
117
119
|
return {}
|
|
118
120
|
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
"
|
|
122
|
-
"kafka.sasl.jaas.config": (
|
|
121
|
+
# SASL mechanisms supported by Kafka
|
|
122
|
+
jaas_config = {
|
|
123
|
+
"PLAIN": (
|
|
123
124
|
f"org.apache.kafka.common.security.plain.PlainLoginModule required "
|
|
124
125
|
f'username="{kafka_sasl_username}" password="{kafka_sasl_password}";'
|
|
125
126
|
),
|
|
127
|
+
"SCRAM-SHA-256": (
|
|
128
|
+
f"org.apache.kafka.common.security.scram.ScramLoginModule required "
|
|
129
|
+
f'username="{kafka_sasl_username}" password="{kafka_sasl_password}";'
|
|
130
|
+
),
|
|
131
|
+
"SCRAM-SHA-512": (
|
|
132
|
+
f"org.apache.kafka.common.security.scram.ScramLoginModule required "
|
|
133
|
+
f'username="{kafka_sasl_username}" password="{kafka_sasl_password}";'
|
|
134
|
+
),
|
|
135
|
+
# Add more mechanisms as needed
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
# Validate SASL mechanism
|
|
139
|
+
if kafka_sasl_mechanism not in jaas_config:
|
|
140
|
+
raise ValueError(f"Unsupported SASL mechanism: {kafka_sasl_mechanism}")
|
|
141
|
+
|
|
142
|
+
# Return config
|
|
143
|
+
return {
|
|
144
|
+
"kafka.sasl.mechanism": kafka_sasl_mechanism,
|
|
145
|
+
"kafka.security.protocol": "SASL_SSL",
|
|
146
|
+
"kafka.sasl.jaas.config": jaas_config[kafka_sasl_mechanism],
|
|
126
147
|
}
|
|
127
148
|
|
|
128
149
|
|
|
@@ -81,9 +81,16 @@ def to_avro_type(field: Field, field_name: str) -> str | dict:
|
|
|
81
81
|
return "null"
|
|
82
82
|
if field.type in ["string", "varchar", "text"]:
|
|
83
83
|
return "string"
|
|
84
|
-
elif field.type in ["number", "
|
|
84
|
+
elif field.type in ["number", "numeric"]:
|
|
85
85
|
# https://avro.apache.org/docs/1.11.1/specification/#decimal
|
|
86
86
|
return "bytes"
|
|
87
|
+
elif field.type in ["decimal"]:
|
|
88
|
+
typeVal = {"type": "bytes", "logicalType": "decimal"}
|
|
89
|
+
if field.scale is not None:
|
|
90
|
+
typeVal["scale"] = field.scale
|
|
91
|
+
if field.precision is not None:
|
|
92
|
+
typeVal["precision"] = field.precision
|
|
93
|
+
return typeVal
|
|
87
94
|
elif field.type in ["float", "double"]:
|
|
88
95
|
return "double"
|
|
89
96
|
elif field.type in ["integer", "int"]:
|
datacontract/export/exporter.py
CHANGED
|
@@ -10,7 +10,7 @@ class Exporter(ABC):
|
|
|
10
10
|
self.export_format = export_format
|
|
11
11
|
|
|
12
12
|
@abstractmethod
|
|
13
|
-
def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
|
|
13
|
+
def export(self, data_contract, model, server, sql_server_type, export_args) -> dict | str:
|
|
14
14
|
pass
|
|
15
15
|
|
|
16
16
|
|
|
@@ -22,6 +22,8 @@ class ExportFormat(str, Enum):
|
|
|
22
22
|
dbt_sources = "dbt-sources"
|
|
23
23
|
dbt_staging_sql = "dbt-staging-sql"
|
|
24
24
|
odcs = "odcs"
|
|
25
|
+
odcs_v2 = "odcs_v2"
|
|
26
|
+
odcs_v3 = "odcs_v3"
|
|
25
27
|
rdf = "rdf"
|
|
26
28
|
avro = "avro"
|
|
27
29
|
protobuf = "protobuf"
|
|
@@ -37,6 +39,7 @@ class ExportFormat(str, Enum):
|
|
|
37
39
|
spark = "spark"
|
|
38
40
|
sqlalchemy = "sqlalchemy"
|
|
39
41
|
data_caterer = "data-caterer"
|
|
42
|
+
dcs = "dcs"
|
|
40
43
|
|
|
41
44
|
@classmethod
|
|
42
45
|
def get_supported_formats(cls):
|
|
@@ -99,7 +99,15 @@ exporter_factory.register_lazy_exporter(
|
|
|
99
99
|
)
|
|
100
100
|
|
|
101
101
|
exporter_factory.register_lazy_exporter(
|
|
102
|
-
name=ExportFormat.
|
|
102
|
+
name=ExportFormat.odcs_v2, module_path="datacontract.export.odcs_v2_exporter", class_name="OdcsV2Exporter"
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
exporter_factory.register_lazy_exporter(
|
|
106
|
+
name=ExportFormat.odcs_v3, module_path="datacontract.export.odcs_v3_exporter", class_name="OdcsV3Exporter"
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
exporter_factory.register_lazy_exporter(
|
|
110
|
+
name=ExportFormat.odcs, module_path="datacontract.export.odcs_v3_exporter", class_name="OdcsV3Exporter"
|
|
103
111
|
)
|
|
104
112
|
|
|
105
113
|
exporter_factory.register_lazy_exporter(
|
|
@@ -155,3 +163,7 @@ exporter_factory.register_lazy_exporter(
|
|
|
155
163
|
module_path="datacontract.export.sqlalchemy_converter",
|
|
156
164
|
class_name="SQLAlchemyExporter",
|
|
157
165
|
)
|
|
166
|
+
|
|
167
|
+
exporter_factory.register_lazy_exporter(
|
|
168
|
+
name=ExportFormat.dcs, module_path="datacontract.export.dcs_exporter", class_name="DcsExporter"
|
|
169
|
+
)
|
|
@@ -6,12 +6,12 @@ from datacontract.model.data_contract_specification import DataContractSpecifica
|
|
|
6
6
|
from datacontract.export.exporter import Exporter
|
|
7
7
|
|
|
8
8
|
|
|
9
|
-
class
|
|
9
|
+
class OdcsV2Exporter(Exporter):
|
|
10
10
|
def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
|
|
11
|
-
return
|
|
11
|
+
return to_odcs_v2_yaml(data_contract)
|
|
12
12
|
|
|
13
13
|
|
|
14
|
-
def
|
|
14
|
+
def to_odcs_v2_yaml(data_contract_spec: DataContractSpecification):
|
|
15
15
|
odcs = {
|
|
16
16
|
"kind": "DataContract",
|
|
17
17
|
"apiVersion": "2.3.0",
|
|
@@ -25,7 +25,7 @@ def to_odcs_yaml(data_contract_spec: DataContractSpecification):
|
|
|
25
25
|
if data_contract_spec.info.contact is not None:
|
|
26
26
|
if data_contract_spec.info.contact.email is not None:
|
|
27
27
|
odcs["productDl"] = data_contract_spec.info.contact.email
|
|
28
|
-
if data_contract_spec.info.contact.
|
|
28
|
+
if data_contract_spec.info.contact.url is not None:
|
|
29
29
|
odcs["productFeedbackUrl"] = data_contract_spec.info.contact.url
|
|
30
30
|
|
|
31
31
|
if data_contract_spec.terms is not None:
|
|
@@ -0,0 +1,294 @@
|
|
|
1
|
+
from typing import Dict
|
|
2
|
+
|
|
3
|
+
import yaml
|
|
4
|
+
|
|
5
|
+
from datacontract.export.exporter import Exporter
|
|
6
|
+
from datacontract.model.data_contract_specification import DataContractSpecification, Model, Field
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class OdcsV3Exporter(Exporter):
|
|
10
|
+
def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
|
|
11
|
+
return to_odcs_v3_yaml(data_contract)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def to_odcs_v3_yaml(data_contract_spec: DataContractSpecification) -> str:
|
|
15
|
+
odcs = {
|
|
16
|
+
"apiVersion": "v3.0.0",
|
|
17
|
+
"kind": "DataContract",
|
|
18
|
+
"id": data_contract_spec.id,
|
|
19
|
+
"name": data_contract_spec.info.title,
|
|
20
|
+
"version": data_contract_spec.info.version,
|
|
21
|
+
"domain": data_contract_spec.info.owner,
|
|
22
|
+
"status": data_contract_spec.info.status,
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
if data_contract_spec.terms is not None:
|
|
26
|
+
odcs["description"] = {
|
|
27
|
+
"purpose": data_contract_spec.terms.description.strip()
|
|
28
|
+
if data_contract_spec.terms.description is not None
|
|
29
|
+
else None,
|
|
30
|
+
"usage": data_contract_spec.terms.usage.strip() if data_contract_spec.terms.usage is not None else None,
|
|
31
|
+
"limitations": data_contract_spec.terms.limitations.strip()
|
|
32
|
+
if data_contract_spec.terms.limitations is not None
|
|
33
|
+
else None,
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
odcs["schema"] = []
|
|
37
|
+
for model_key, model_value in data_contract_spec.models.items():
|
|
38
|
+
odcs_schema = to_odcs_schema(model_key, model_value)
|
|
39
|
+
odcs["schema"].append(odcs_schema)
|
|
40
|
+
|
|
41
|
+
if data_contract_spec.servicelevels is not None:
|
|
42
|
+
slas = []
|
|
43
|
+
if data_contract_spec.servicelevels.availability is not None:
|
|
44
|
+
slas.append(
|
|
45
|
+
{
|
|
46
|
+
"property": "generalAvailability",
|
|
47
|
+
"value": data_contract_spec.servicelevels.availability.description,
|
|
48
|
+
}
|
|
49
|
+
)
|
|
50
|
+
if data_contract_spec.servicelevels.retention is not None:
|
|
51
|
+
slas.append({"property": "retention", "value": data_contract_spec.servicelevels.retention.period})
|
|
52
|
+
|
|
53
|
+
if len(slas) > 0:
|
|
54
|
+
odcs["slaProperties"] = slas
|
|
55
|
+
|
|
56
|
+
if data_contract_spec.info.contact is not None:
|
|
57
|
+
support = []
|
|
58
|
+
if data_contract_spec.info.contact.email is not None:
|
|
59
|
+
support.append(
|
|
60
|
+
{
|
|
61
|
+
"channel": "email",
|
|
62
|
+
"url": "mailto:" + data_contract_spec.info.contact.email,
|
|
63
|
+
}
|
|
64
|
+
)
|
|
65
|
+
if data_contract_spec.info.contact.url is not None:
|
|
66
|
+
support.append(
|
|
67
|
+
{
|
|
68
|
+
"channel": "other",
|
|
69
|
+
"url": data_contract_spec.info.contact.url,
|
|
70
|
+
}
|
|
71
|
+
)
|
|
72
|
+
if len(support) > 0:
|
|
73
|
+
odcs["support"] = support
|
|
74
|
+
|
|
75
|
+
if data_contract_spec.servers is not None and len(data_contract_spec.servers) > 0:
|
|
76
|
+
servers = []
|
|
77
|
+
|
|
78
|
+
for server_key, server_value in data_contract_spec.servers.items():
|
|
79
|
+
server_dict = {}
|
|
80
|
+
server_dict["server"] = server_key
|
|
81
|
+
if server_value.type is not None:
|
|
82
|
+
server_dict["type"] = server_value.type
|
|
83
|
+
if server_value.environment is not None:
|
|
84
|
+
server_dict["environment"] = server_value.environment
|
|
85
|
+
if server_value.account is not None:
|
|
86
|
+
server_dict["account"] = server_value.account
|
|
87
|
+
if server_value.database is not None:
|
|
88
|
+
server_dict["database"] = server_value.database
|
|
89
|
+
if server_value.schema_ is not None:
|
|
90
|
+
server_dict["schema"] = server_value.schema_
|
|
91
|
+
if server_value.format is not None:
|
|
92
|
+
server_dict["format"] = server_value.format
|
|
93
|
+
if server_value.project is not None:
|
|
94
|
+
server_dict["project"] = server_value.project
|
|
95
|
+
if server_value.dataset is not None:
|
|
96
|
+
server_dict["dataset"] = server_value.dataset
|
|
97
|
+
if server_value.path is not None:
|
|
98
|
+
server_dict["path"] = server_value.path
|
|
99
|
+
if server_value.delimiter is not None:
|
|
100
|
+
server_dict["delimiter"] = server_value.delimiter
|
|
101
|
+
if server_value.endpointUrl is not None:
|
|
102
|
+
server_dict["endpointUrl"] = server_value.endpointUrl
|
|
103
|
+
if server_value.location is not None:
|
|
104
|
+
server_dict["location"] = server_value.location
|
|
105
|
+
if server_value.host is not None:
|
|
106
|
+
server_dict["host"] = server_value.host
|
|
107
|
+
if server_value.port is not None:
|
|
108
|
+
server_dict["port"] = server_value.port
|
|
109
|
+
if server_value.catalog is not None:
|
|
110
|
+
server_dict["catalog"] = server_value.catalog
|
|
111
|
+
if server_value.topic is not None:
|
|
112
|
+
server_dict["topic"] = server_value.topic
|
|
113
|
+
if server_value.http_path is not None:
|
|
114
|
+
server_dict["http_path"] = server_value.http_path
|
|
115
|
+
if server_value.token is not None:
|
|
116
|
+
server_dict["token"] = server_value.token
|
|
117
|
+
if server_value.driver is not None:
|
|
118
|
+
server_dict["driver"] = server_value.driver
|
|
119
|
+
if server_value.roles is not None:
|
|
120
|
+
server_dict["roles"] = [
|
|
121
|
+
{"name": role.name, "description": role.description} for role in server_value.roles
|
|
122
|
+
]
|
|
123
|
+
servers.append(server_dict)
|
|
124
|
+
|
|
125
|
+
if len(servers) > 0:
|
|
126
|
+
odcs["servers"] = servers
|
|
127
|
+
|
|
128
|
+
odcs["customProperties"] = []
|
|
129
|
+
if data_contract_spec.info.model_extra is not None:
|
|
130
|
+
for key, value in data_contract_spec.info.model_extra.items():
|
|
131
|
+
odcs["customProperties"].append({"property": key, "value": value})
|
|
132
|
+
if len(odcs["customProperties"]) == 0:
|
|
133
|
+
del odcs["customProperties"]
|
|
134
|
+
|
|
135
|
+
return yaml.dump(odcs, indent=2, sort_keys=False, allow_unicode=True)
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def to_odcs_schema(model_key, model_value: Model) -> dict:
|
|
139
|
+
odcs_table = {
|
|
140
|
+
"name": model_key,
|
|
141
|
+
"physicalName": model_key,
|
|
142
|
+
"logicalType": "object",
|
|
143
|
+
"physicalType": model_value.type,
|
|
144
|
+
}
|
|
145
|
+
if model_value.description is not None:
|
|
146
|
+
odcs_table["description"] = model_value.description
|
|
147
|
+
properties = to_properties(model_value.fields)
|
|
148
|
+
if properties:
|
|
149
|
+
odcs_table["properties"] = properties
|
|
150
|
+
|
|
151
|
+
odcs_table["customProperties"] = []
|
|
152
|
+
if model_value.model_extra is not None:
|
|
153
|
+
for key, value in model_value.model_extra.items():
|
|
154
|
+
odcs_table["customProperties"].append({"property": key, "value": value})
|
|
155
|
+
if len(odcs_table["customProperties"]) == 0:
|
|
156
|
+
del odcs_table["customProperties"]
|
|
157
|
+
|
|
158
|
+
return odcs_table
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def to_properties(fields: Dict[str, Field]) -> list:
|
|
162
|
+
properties = []
|
|
163
|
+
for field_name, field in fields.items():
|
|
164
|
+
property = to_property(field_name, field)
|
|
165
|
+
properties.append(property)
|
|
166
|
+
return properties
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def to_logical_type(type: str) -> str | None:
|
|
170
|
+
if type is None:
|
|
171
|
+
return None
|
|
172
|
+
if type.lower() in ["string", "varchar", "text"]:
|
|
173
|
+
return "string"
|
|
174
|
+
if type.lower() in ["timestamp", "timestamp_tz"]:
|
|
175
|
+
return "date"
|
|
176
|
+
if type.lower() in ["timestamp_ntz"]:
|
|
177
|
+
return "date"
|
|
178
|
+
if type.lower() in ["date"]:
|
|
179
|
+
return "date"
|
|
180
|
+
if type.lower() in ["time"]:
|
|
181
|
+
return "string"
|
|
182
|
+
if type.lower() in ["number", "decimal", "numeric"]:
|
|
183
|
+
return "number"
|
|
184
|
+
if type.lower() in ["float", "double"]:
|
|
185
|
+
return "number"
|
|
186
|
+
if type.lower() in ["integer", "int", "long", "bigint"]:
|
|
187
|
+
return "integer"
|
|
188
|
+
if type.lower() in ["boolean"]:
|
|
189
|
+
return "boolean"
|
|
190
|
+
if type.lower() in ["object", "record", "struct"]:
|
|
191
|
+
return "object"
|
|
192
|
+
if type.lower() in ["bytes"]:
|
|
193
|
+
return "array"
|
|
194
|
+
if type.lower() in ["array"]:
|
|
195
|
+
return "array"
|
|
196
|
+
if type.lower() in ["null"]:
|
|
197
|
+
return None
|
|
198
|
+
return None
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def to_physical_type(type: str) -> str | None:
|
|
202
|
+
# TODO: to we need to do a server mapping here?
|
|
203
|
+
return type
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
def to_property(field_name: str, field: Field) -> dict:
|
|
207
|
+
property = {"name": field_name}
|
|
208
|
+
if field.title is not None:
|
|
209
|
+
property["businessName"] = field.title
|
|
210
|
+
if field.type is not None:
|
|
211
|
+
property["logicalType"] = to_logical_type(field.type)
|
|
212
|
+
property["physicalType"] = to_physical_type(field.type)
|
|
213
|
+
if field.description is not None:
|
|
214
|
+
property["description"] = field.description
|
|
215
|
+
if field.required is not None:
|
|
216
|
+
property["isNullable"] = not field.required
|
|
217
|
+
if field.unique is not None:
|
|
218
|
+
property["isUnique"] = field.unique
|
|
219
|
+
if field.classification is not None:
|
|
220
|
+
property["classification"] = field.classification
|
|
221
|
+
if field.examples is not None:
|
|
222
|
+
property["examples"] = field.examples
|
|
223
|
+
if field.example is not None:
|
|
224
|
+
property["examples"] = [field.example]
|
|
225
|
+
|
|
226
|
+
property["customProperties"] = []
|
|
227
|
+
if field.model_extra is not None:
|
|
228
|
+
for key, value in field.model_extra.items():
|
|
229
|
+
property["customProperties"].append({"property": key, "value": value})
|
|
230
|
+
if field.pii is not None:
|
|
231
|
+
property["customProperties"].append({"property": "pii", "value": field.pii})
|
|
232
|
+
if property.get("customProperties") is not None and len(property["customProperties"]) == 0:
|
|
233
|
+
del property["customProperties"]
|
|
234
|
+
|
|
235
|
+
property["tags"] = []
|
|
236
|
+
if field.tags is not None:
|
|
237
|
+
property["tags"].extend(field.tags)
|
|
238
|
+
if not property["tags"]:
|
|
239
|
+
del property["tags"]
|
|
240
|
+
|
|
241
|
+
property["logicalTypeOptions"] = {}
|
|
242
|
+
if field.minLength is not None:
|
|
243
|
+
property["logicalTypeOptions"]["minLength"] = field.minLength
|
|
244
|
+
if field.maxLength is not None:
|
|
245
|
+
property["logicalTypeOptions"]["maxLength"] = field.maxLength
|
|
246
|
+
if field.pattern is not None:
|
|
247
|
+
property["logicalTypeOptions"]["pattern"] = field.pattern
|
|
248
|
+
if field.minimum is not None:
|
|
249
|
+
property["logicalTypeOptions"]["minimum"] = field.minimum
|
|
250
|
+
if field.maximum is not None:
|
|
251
|
+
property["logicalTypeOptions"]["maximum"] = field.maximum
|
|
252
|
+
if field.exclusiveMinimum is not None:
|
|
253
|
+
property["logicalTypeOptions"]["exclusiveMinimum"] = field.exclusiveMinimum
|
|
254
|
+
if field.exclusiveMaximum is not None:
|
|
255
|
+
property["logicalTypeOptions"]["exclusiveMaximum"] = field.exclusiveMaximum
|
|
256
|
+
if property["logicalTypeOptions"] == {}:
|
|
257
|
+
del property["logicalTypeOptions"]
|
|
258
|
+
|
|
259
|
+
if field.quality is not None:
|
|
260
|
+
quality_property = []
|
|
261
|
+
for quality in field.quality:
|
|
262
|
+
quality_dict = {"type": quality.type}
|
|
263
|
+
if quality.description is not None:
|
|
264
|
+
quality_dict["description"] = quality.description
|
|
265
|
+
if quality.query is not None:
|
|
266
|
+
quality_dict["query"] = quality.query
|
|
267
|
+
# dialect is not supported in v3.0.0
|
|
268
|
+
if quality.mustBe is not None:
|
|
269
|
+
quality_dict["mustBe"] = quality.mustBe
|
|
270
|
+
if quality.mustNotBe is not None:
|
|
271
|
+
quality_dict["mustNotBe"] = quality.mustNotBe
|
|
272
|
+
if quality.mustBeGreaterThan is not None:
|
|
273
|
+
quality_dict["mustBeGreaterThan"] = quality.mustBeGreaterThan
|
|
274
|
+
if quality.mustBeGreaterThanOrEqualTo is not None:
|
|
275
|
+
quality_dict["mustBeGreaterThanOrEqualTo"] = quality.mustBeGreaterThanOrEqualTo
|
|
276
|
+
if quality.mustBeLessThan is not None:
|
|
277
|
+
quality_dict["mustBeLessThan"] = quality.mustBeLessThan
|
|
278
|
+
if quality.mustBeLessThanOrEqualTo is not None:
|
|
279
|
+
quality_dict["mustBeLessThanOrEqualTo"] = quality.mustBeLessThanOrEqualTo
|
|
280
|
+
if quality.mustBeBetween is not None:
|
|
281
|
+
quality_dict["mustBeBetween"] = quality.mustBeBetween
|
|
282
|
+
if quality.mustNotBeBetween is not None:
|
|
283
|
+
quality_dict["mustNotBeBetween"] = quality.mustNotBeBetween
|
|
284
|
+
if quality.engine is not None:
|
|
285
|
+
quality_dict["engine"] = quality.engine
|
|
286
|
+
if quality.implementation is not None:
|
|
287
|
+
quality_dict["implementation"] = quality.implementation
|
|
288
|
+
quality_property.append(quality_dict)
|
|
289
|
+
if len(quality_property) > 0:
|
|
290
|
+
property["quality"] = quality_property
|
|
291
|
+
|
|
292
|
+
# todo enum
|
|
293
|
+
|
|
294
|
+
return property
|
|
@@ -1,8 +1,11 @@
|
|
|
1
|
+
from typing import List
|
|
2
|
+
from venv import logger
|
|
3
|
+
|
|
1
4
|
import yaml
|
|
2
5
|
|
|
3
|
-
from datacontract.export.sql_type_converter import convert_to_sql_type
|
|
4
|
-
from datacontract.model.data_contract_specification import DataContractSpecification
|
|
5
6
|
from datacontract.export.exporter import Exporter
|
|
7
|
+
from datacontract.export.sql_type_converter import convert_to_sql_type
|
|
8
|
+
from datacontract.model.data_contract_specification import DataContractSpecification, Quality
|
|
6
9
|
|
|
7
10
|
|
|
8
11
|
class SodaExporter(Exporter):
|
|
@@ -58,9 +61,14 @@ def to_checks(model_key, model_value, server_type: str, check_types: bool):
|
|
|
58
61
|
checks.append(check_field_regex(field_name, field.pattern, quote_field_name))
|
|
59
62
|
if field.enum is not None and len(field.enum) > 0:
|
|
60
63
|
checks.append(check_field_enum(field_name, field.enum, quote_field_name))
|
|
64
|
+
if field.quality is not None and len(field.quality) > 0:
|
|
65
|
+
checks.append(check_quality_list(model_key, field_name, field.quality))
|
|
61
66
|
# TODO references: str = None
|
|
62
67
|
# TODO format
|
|
63
68
|
|
|
69
|
+
if model_value.quality is not None and len(model_value.quality) > 0:
|
|
70
|
+
checks.append(check_quality_list(model_key, None, model_value.quality))
|
|
71
|
+
|
|
64
72
|
checks_for_model_key = f"checks for {model_key}"
|
|
65
73
|
|
|
66
74
|
if quote_field_name:
|
|
@@ -181,6 +189,78 @@ def check_field_regex(field_name, pattern, quote_field_name: bool = False):
|
|
|
181
189
|
}
|
|
182
190
|
|
|
183
191
|
|
|
192
|
+
def check_quality_list(model_name, field_name, quality_list: List[Quality]):
|
|
193
|
+
checks = {}
|
|
194
|
+
|
|
195
|
+
count = 0
|
|
196
|
+
for quality in quality_list:
|
|
197
|
+
if quality.type == "sql":
|
|
198
|
+
if field_name is None:
|
|
199
|
+
metric_name = f"{model_name}_{field_name}_quality_sql_{count}"
|
|
200
|
+
else:
|
|
201
|
+
metric_name = f"{model_name}_quality_sql_{count}"
|
|
202
|
+
threshold = to_sodacl_threshold(quality)
|
|
203
|
+
query = prepare_query(quality, model_name, field_name)
|
|
204
|
+
if query is None:
|
|
205
|
+
logger.warning(f"Quality check {metric_name} has no query")
|
|
206
|
+
continue
|
|
207
|
+
if threshold is None:
|
|
208
|
+
logger.warning(f"Quality check {metric_name} has no valid threshold")
|
|
209
|
+
continue
|
|
210
|
+
checks[f"{metric_name} {threshold}"] = {f"{metric_name} query": query}
|
|
211
|
+
count += 1
|
|
212
|
+
|
|
213
|
+
return checks
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def prepare_query(quality: Quality, model_name: str, field_name: str = None) -> str | None:
|
|
217
|
+
if quality.query is None:
|
|
218
|
+
return None
|
|
219
|
+
if quality.query == "":
|
|
220
|
+
return None
|
|
221
|
+
|
|
222
|
+
query = quality.query
|
|
223
|
+
|
|
224
|
+
query = query.replace("{model}", model_name)
|
|
225
|
+
query = query.replace("{table}", model_name)
|
|
226
|
+
|
|
227
|
+
if field_name is not None:
|
|
228
|
+
query = query.replace("{field}", field_name)
|
|
229
|
+
query = query.replace("{column}", field_name)
|
|
230
|
+
|
|
231
|
+
return query
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def to_sodacl_threshold(quality: Quality) -> str | None:
|
|
235
|
+
if quality.mustBe is not None:
|
|
236
|
+
return f"= {quality.mustBe}"
|
|
237
|
+
if quality.mustNotBe is not None:
|
|
238
|
+
return f"!= {quality.mustNotBe}"
|
|
239
|
+
if quality.mustBeGreaterThan is not None:
|
|
240
|
+
return f"> {quality.mustBeGreaterThan}"
|
|
241
|
+
if quality.mustBeGreaterThanOrEqualTo is not None:
|
|
242
|
+
return f">= {quality.mustBeGreaterThanOrEqualTo}"
|
|
243
|
+
if quality.mustBeLessThan is not None:
|
|
244
|
+
return f"< {quality.mustBeLessThan}"
|
|
245
|
+
if quality.mustBeLessThanOrEqualTo is not None:
|
|
246
|
+
return f"<= {quality.mustBeLessThanOrEqualTo}"
|
|
247
|
+
if quality.mustBeBetween is not None:
|
|
248
|
+
if len(quality.mustBeBetween) != 2:
|
|
249
|
+
logger.warning(
|
|
250
|
+
f"Quality check has invalid mustBeBetween, must have exactly 2 integers in an array: {quality.mustBeBetween}"
|
|
251
|
+
)
|
|
252
|
+
return None
|
|
253
|
+
return f"between {quality.mustBeBetween[0]} and {quality.mustBeBetween[1]}"
|
|
254
|
+
if quality.mustNotBeBetween is not None:
|
|
255
|
+
if len(quality.mustNotBeBetween) != 2:
|
|
256
|
+
logger.warning(
|
|
257
|
+
f"Quality check has invalid mustNotBeBetween, must have exactly 2 integers in an array: {quality.mustNotBeBetween}"
|
|
258
|
+
)
|
|
259
|
+
return None
|
|
260
|
+
return f"not between {quality.mustNotBeBetween[0]} and {quality.mustNotBeBetween[1]}"
|
|
261
|
+
return None
|
|
262
|
+
|
|
263
|
+
|
|
184
264
|
def add_quality_checks(sodacl, data_contract_spec):
|
|
185
265
|
if data_contract_spec.quality is None:
|
|
186
266
|
return
|
|
@@ -128,7 +128,9 @@ def to_data_type(field: Field) -> types.DataType:
|
|
|
128
128
|
if field_type in ["string", "varchar", "text"]:
|
|
129
129
|
return types.StringType()
|
|
130
130
|
if field_type in ["number", "decimal", "numeric"]:
|
|
131
|
-
|
|
131
|
+
precision = field.precision if field.precision is not None else 38
|
|
132
|
+
scale = field.scale if field.scale is not None else 0
|
|
133
|
+
return types.DecimalType(precision=precision, scale=scale)
|
|
132
134
|
if field_type in ["integer", "int"]:
|
|
133
135
|
return types.IntegerType()
|
|
134
136
|
if field_type == "long":
|