datacontract-cli 0.9.7__py3-none-any.whl → 0.9.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datacontract-cli might be problematic. Click here for more details.
- datacontract/breaking/breaking.py +48 -57
- datacontract/cli.py +100 -80
- datacontract/data_contract.py +178 -128
- datacontract/engines/datacontract/check_that_datacontract_contains_valid_servers_configuration.py +5 -1
- datacontract/engines/datacontract/check_that_datacontract_file_exists.py +9 -8
- datacontract/engines/datacontract/check_that_datacontract_str_is_valid.py +26 -22
- datacontract/engines/fastjsonschema/check_jsonschema.py +31 -25
- datacontract/engines/fastjsonschema/s3/s3_read_files.py +8 -6
- datacontract/engines/soda/check_soda_execute.py +58 -36
- datacontract/engines/soda/connections/bigquery.py +5 -3
- datacontract/engines/soda/connections/dask.py +0 -1
- datacontract/engines/soda/connections/databricks.py +2 -2
- datacontract/engines/soda/connections/duckdb.py +25 -8
- datacontract/engines/soda/connections/kafka.py +36 -17
- datacontract/engines/soda/connections/postgres.py +3 -3
- datacontract/engines/soda/connections/snowflake.py +4 -4
- datacontract/export/avro_converter.py +9 -11
- datacontract/export/avro_idl_converter.py +65 -42
- datacontract/export/csv_type_converter.py +36 -0
- datacontract/export/dbt_converter.py +43 -32
- datacontract/export/great_expectations_converter.py +141 -0
- datacontract/export/html_export.py +46 -0
- datacontract/export/jsonschema_converter.py +3 -1
- datacontract/export/odcs_converter.py +5 -7
- datacontract/export/protobuf_converter.py +12 -10
- datacontract/export/pydantic_converter.py +131 -0
- datacontract/export/rdf_converter.py +34 -11
- datacontract/export/sodacl_converter.py +118 -21
- datacontract/export/sql_converter.py +30 -8
- datacontract/export/sql_type_converter.py +44 -4
- datacontract/export/terraform_converter.py +4 -3
- datacontract/imports/avro_importer.py +65 -18
- datacontract/imports/sql_importer.py +0 -2
- datacontract/init/download_datacontract_file.py +2 -2
- datacontract/integration/publish_datamesh_manager.py +6 -12
- datacontract/integration/publish_opentelemetry.py +30 -16
- datacontract/lint/files.py +2 -2
- datacontract/lint/lint.py +26 -31
- datacontract/lint/linters/description_linter.py +12 -21
- datacontract/lint/linters/example_model_linter.py +28 -29
- datacontract/lint/linters/field_pattern_linter.py +8 -8
- datacontract/lint/linters/field_reference_linter.py +11 -10
- datacontract/lint/linters/notice_period_linter.py +18 -22
- datacontract/lint/linters/quality_schema_linter.py +16 -20
- datacontract/lint/linters/valid_constraints_linter.py +42 -37
- datacontract/lint/resolve.py +50 -14
- datacontract/lint/schema.py +2 -3
- datacontract/lint/urls.py +4 -5
- datacontract/model/breaking_change.py +2 -1
- datacontract/model/data_contract_specification.py +8 -7
- datacontract/model/exceptions.py +13 -2
- datacontract/model/run.py +3 -2
- datacontract/web.py +3 -7
- datacontract_cli-0.9.9.dist-info/METADATA +951 -0
- datacontract_cli-0.9.9.dist-info/RECORD +64 -0
- datacontract/lint/linters/primary_field_linter.py +0 -30
- datacontract_cli-0.9.7.dist-info/METADATA +0 -603
- datacontract_cli-0.9.7.dist-info/RECORD +0 -61
- {datacontract_cli-0.9.7.dist-info → datacontract_cli-0.9.9.dist-info}/LICENSE +0 -0
- {datacontract_cli-0.9.7.dist-info → datacontract_cli-0.9.9.dist-info}/WHEEL +0 -0
- {datacontract_cli-0.9.7.dist-info → datacontract_cli-0.9.9.dist-info}/entry_points.txt +0 -0
- {datacontract_cli-0.9.7.dist-info → datacontract_cli-0.9.9.dist-info}/top_level.txt +0 -0
|
@@ -1,12 +1,14 @@
|
|
|
1
|
-
|
|
2
|
-
from datacontract.lint.resolve import inline_definitions_into_data_contract
|
|
1
|
+
import typing
|
|
3
2
|
from dataclasses import dataclass
|
|
4
3
|
from enum import Enum
|
|
5
|
-
import typing
|
|
6
4
|
from io import StringIO
|
|
7
5
|
|
|
6
|
+
from datacontract.lint.resolve import inline_definitions_into_data_contract
|
|
7
|
+
from datacontract.model.data_contract_specification import \
|
|
8
|
+
DataContractSpecification, Field
|
|
8
9
|
from datacontract.model.exceptions import DataContractException
|
|
9
10
|
|
|
11
|
+
|
|
10
12
|
def to_avro_idl(contract: DataContractSpecification) -> str:
|
|
11
13
|
"""Serialize the provided data contract specification into an Avro IDL string.
|
|
12
14
|
|
|
@@ -18,6 +20,7 @@ def to_avro_idl(contract: DataContractSpecification) -> str:
|
|
|
18
20
|
to_avro_idl_stream(contract, stream)
|
|
19
21
|
return stream.getvalue()
|
|
20
22
|
|
|
23
|
+
|
|
21
24
|
def to_avro_idl_stream(contract: DataContractSpecification, stream: typing.TextIO):
|
|
22
25
|
"""Serialize the provided data contract specification into Avro IDL."""
|
|
23
26
|
ir = _contract_to_avro_idl_ir(contract)
|
|
@@ -28,6 +31,7 @@ def to_avro_idl_stream(contract: DataContractSpecification, stream: typing.TextI
|
|
|
28
31
|
_write_model_type(model_type, stream)
|
|
29
32
|
stream.write("}\n")
|
|
30
33
|
|
|
34
|
+
|
|
31
35
|
class AvroPrimitiveType(Enum):
|
|
32
36
|
int = "int"
|
|
33
37
|
long = "long"
|
|
@@ -38,49 +42,71 @@ class AvroPrimitiveType(Enum):
|
|
|
38
42
|
null = "null"
|
|
39
43
|
bytes = "bytes"
|
|
40
44
|
|
|
45
|
+
|
|
41
46
|
class AvroLogicalType(Enum):
|
|
42
47
|
decimal = "decimal"
|
|
43
48
|
date = "date"
|
|
44
49
|
time_ms = "time_ms"
|
|
45
50
|
timestamp_ms = "timestamp_ms"
|
|
46
51
|
|
|
52
|
+
|
|
47
53
|
@dataclass
|
|
48
54
|
class AvroField:
|
|
49
55
|
name: str
|
|
50
56
|
required: bool
|
|
51
57
|
description: typing.Optional[str]
|
|
52
58
|
|
|
59
|
+
|
|
53
60
|
@dataclass
|
|
54
61
|
class AvroPrimitiveField(AvroField):
|
|
55
62
|
type: typing.Union[AvroPrimitiveType, AvroLogicalType]
|
|
56
63
|
|
|
64
|
+
|
|
57
65
|
@dataclass
|
|
58
66
|
class AvroComplexField(AvroField):
|
|
59
67
|
subfields: list[AvroField]
|
|
60
68
|
|
|
69
|
+
|
|
61
70
|
@dataclass
|
|
62
71
|
class AvroArrayField(AvroField):
|
|
63
72
|
type: AvroField
|
|
64
73
|
|
|
74
|
+
|
|
65
75
|
@dataclass
|
|
66
76
|
class AvroModelType:
|
|
67
77
|
name: str
|
|
68
78
|
description: typing.Optional[str]
|
|
69
79
|
fields: list[AvroField]
|
|
70
80
|
|
|
81
|
+
|
|
71
82
|
@dataclass
|
|
72
83
|
class AvroIDLProtocol:
|
|
73
84
|
name: typing.Optional[str]
|
|
74
85
|
description: typing.Optional[str]
|
|
75
86
|
model_types: list[AvroModelType]
|
|
76
87
|
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
88
|
+
|
|
89
|
+
avro_primitive_types = set(
|
|
90
|
+
[
|
|
91
|
+
"string",
|
|
92
|
+
"text",
|
|
93
|
+
"varchar",
|
|
94
|
+
"float",
|
|
95
|
+
"double",
|
|
96
|
+
"int",
|
|
97
|
+
"integer",
|
|
98
|
+
"long",
|
|
99
|
+
"bigint",
|
|
100
|
+
"boolean",
|
|
101
|
+
"timestamp_ntz",
|
|
102
|
+
"timestamp",
|
|
103
|
+
"timestamp_tz",
|
|
104
|
+
"date",
|
|
105
|
+
"bytes",
|
|
106
|
+
"null",
|
|
107
|
+
]
|
|
108
|
+
)
|
|
109
|
+
|
|
84
110
|
|
|
85
111
|
def _to_avro_primitive_logical_type(field_name: str, field: Field) -> AvroPrimitiveField:
|
|
86
112
|
result = AvroPrimitiveField(field_name, field.required, field.description, AvroPrimitiveType.string)
|
|
@@ -114,10 +140,11 @@ def _to_avro_primitive_logical_type(field_name: str, field: Field) -> AvroPrimit
|
|
|
114
140
|
model=field,
|
|
115
141
|
reason="Unknown field type {field.type}",
|
|
116
142
|
result="failed",
|
|
117
|
-
message="Avro IDL type conversion failed."
|
|
143
|
+
message="Avro IDL type conversion failed.",
|
|
118
144
|
)
|
|
119
145
|
return result
|
|
120
146
|
|
|
147
|
+
|
|
121
148
|
def _to_avro_idl_type(field_name: str, field: Field) -> AvroField:
|
|
122
149
|
if field.type in avro_primitive_types:
|
|
123
150
|
return _to_avro_primitive_logical_type(field_name, field)
|
|
@@ -125,17 +152,14 @@ def _to_avro_idl_type(field_name: str, field: Field) -> AvroField:
|
|
|
125
152
|
match field.type:
|
|
126
153
|
case "array":
|
|
127
154
|
return AvroArrayField(
|
|
128
|
-
field_name,
|
|
129
|
-
field.required,
|
|
130
|
-
field.description,
|
|
131
|
-
_to_avro_idl_type(field_name, field.items)
|
|
155
|
+
field_name, field.required, field.description, _to_avro_idl_type(field_name, field.items)
|
|
132
156
|
)
|
|
133
157
|
case "object" | "record" | "struct":
|
|
134
158
|
return AvroComplexField(
|
|
135
159
|
field_name,
|
|
136
160
|
field.required,
|
|
137
161
|
field.description,
|
|
138
|
-
[_to_avro_idl_type(field_name, field) for (field_name, field) in field.fields.items()]
|
|
162
|
+
[_to_avro_idl_type(field_name, field) for (field_name, field) in field.fields.items()],
|
|
139
163
|
)
|
|
140
164
|
case _:
|
|
141
165
|
raise DataContractException(
|
|
@@ -144,56 +168,55 @@ def _to_avro_idl_type(field_name: str, field: Field) -> AvroField:
|
|
|
144
168
|
model=type,
|
|
145
169
|
reason="Unknown Data Contract field type",
|
|
146
170
|
result="failed",
|
|
147
|
-
message="Avro IDL type conversion failed."
|
|
171
|
+
message="Avro IDL type conversion failed.",
|
|
148
172
|
)
|
|
149
173
|
|
|
150
174
|
|
|
151
175
|
def _generate_field_types(contract: DataContractSpecification) -> list[AvroField]:
|
|
152
176
|
result = []
|
|
153
|
-
for
|
|
154
|
-
for
|
|
177
|
+
for _, model in contract.models.items():
|
|
178
|
+
for field_name, field in model.fields.items():
|
|
155
179
|
result.append(_to_avro_idl_type(field_name, field))
|
|
156
180
|
return result
|
|
157
181
|
|
|
182
|
+
|
|
158
183
|
def generate_model_types(contract: DataContractSpecification) -> list[AvroModelType]:
|
|
159
184
|
result = []
|
|
160
|
-
for
|
|
161
|
-
result.append(
|
|
162
|
-
name=model_name,
|
|
163
|
-
|
|
164
|
-
fields=_generate_field_types(contract)
|
|
165
|
-
))
|
|
185
|
+
for model_name, model in contract.models.items():
|
|
186
|
+
result.append(
|
|
187
|
+
AvroModelType(name=model_name, description=model.description, fields=_generate_field_types(contract))
|
|
188
|
+
)
|
|
166
189
|
return result
|
|
167
190
|
|
|
191
|
+
|
|
168
192
|
def _model_name_to_identifier(model_name: str):
|
|
169
|
-
return "".join([word.title() for word in
|
|
193
|
+
return "".join([word.title() for word in model_name.split()])
|
|
170
194
|
|
|
171
|
-
def _contract_to_avro_idl_ir(contract: DataContractSpecification) -> AvroIDLProtocol:
|
|
172
195
|
|
|
196
|
+
def _contract_to_avro_idl_ir(contract: DataContractSpecification) -> AvroIDLProtocol:
|
|
173
197
|
"""Convert models into an intermediate representation for later serialization into Avro IDL.
|
|
174
198
|
|
|
175
|
-
|
|
176
|
-
|
|
199
|
+
Each model is converted to a record containing a field for each model field.
|
|
200
|
+
"""
|
|
177
201
|
inlined_contract = contract.model_copy()
|
|
178
202
|
inline_definitions_into_data_contract(inlined_contract)
|
|
179
|
-
protocol_name =
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
return AvroIDLProtocol(name=protocol_name,
|
|
186
|
-
description=description,
|
|
187
|
-
model_types=generate_model_types(inlined_contract))
|
|
203
|
+
protocol_name = _model_name_to_identifier(contract.info.title) if contract.info and contract.info.title else None
|
|
204
|
+
description = contract.info.description if contract.info and contract.info.description else None
|
|
205
|
+
return AvroIDLProtocol(
|
|
206
|
+
name=protocol_name, description=description, model_types=generate_model_types(inlined_contract)
|
|
207
|
+
)
|
|
208
|
+
|
|
188
209
|
|
|
189
210
|
def _write_indent(indent: int, stream: typing.TextIO):
|
|
190
211
|
stream.write(" " * indent)
|
|
191
212
|
|
|
213
|
+
|
|
192
214
|
def _write_field_description(field: AvroField, indent: int, stream: typing.TextIO):
|
|
193
215
|
if field.description:
|
|
194
216
|
_write_indent(indent, stream)
|
|
195
217
|
stream.write(f"/** {field.description} */\n")
|
|
196
218
|
|
|
219
|
+
|
|
197
220
|
def _write_field_type_definition(field: AvroField, indent: int, stream: typing.TextIO) -> str:
|
|
198
221
|
# Write any extra information (such as record type definition) and return
|
|
199
222
|
# the name of the generated type. Writes descriptions only for record
|
|
@@ -215,7 +238,7 @@ def _write_field_type_definition(field: AvroField, indent: int, stream: typing.T
|
|
|
215
238
|
for subfield in subfields:
|
|
216
239
|
subfield_types.append(_write_field_type_definition(subfield, indent + 1, stream))
|
|
217
240
|
# Reference all defined record types.
|
|
218
|
-
for
|
|
241
|
+
for field, subfield_type in zip(field.subfields, subfield_types):
|
|
219
242
|
_write_field_description(field, indent + 1, stream)
|
|
220
243
|
_write_indent(indent + 1, stream)
|
|
221
244
|
stream.write(f"{subfield_type} {field.name};\n")
|
|
@@ -234,15 +257,15 @@ def _write_field_type_definition(field: AvroField, indent: int, stream: typing.T
|
|
|
234
257
|
case _:
|
|
235
258
|
raise RuntimeError("Unknown Avro field type {field}")
|
|
236
259
|
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
stream: typing.TextIO):
|
|
260
|
+
|
|
261
|
+
def _write_field(field: AvroField, indent, stream: typing.TextIO):
|
|
240
262
|
# Start of recursion.
|
|
241
263
|
typename = _write_field_type_definition(field, indent, stream)
|
|
242
264
|
_write_field_description(field, indent, stream)
|
|
243
265
|
_write_indent(indent, stream)
|
|
244
266
|
stream.write(f"{typename} {field.name};\n")
|
|
245
267
|
|
|
268
|
+
|
|
246
269
|
def _write_model_type(model: AvroModelType, stream: typing.TextIO):
|
|
247
270
|
# Called once for each model
|
|
248
271
|
if model.description:
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# https://duckdb.org/docs/data/csv/overview.html
|
|
2
|
+
# ['SQLNULL', 'BOOLEAN', 'BIGINT', 'DOUBLE', 'TIME', 'DATE', 'TIMESTAMP', 'VARCHAR']
|
|
3
|
+
def convert_to_duckdb_csv_type(field) -> None | str:
|
|
4
|
+
type = field.type
|
|
5
|
+
if type is None:
|
|
6
|
+
return "VARCHAR"
|
|
7
|
+
if type.lower() in ["string", "varchar", "text"]:
|
|
8
|
+
return "VARCHAR"
|
|
9
|
+
if type.lower() in ["timestamp", "timestamp_tz"]:
|
|
10
|
+
return "TIMESTAMP"
|
|
11
|
+
if type.lower() in ["timestamp_ntz"]:
|
|
12
|
+
return "TIMESTAMP"
|
|
13
|
+
if type.lower() in ["date"]:
|
|
14
|
+
return "DATE"
|
|
15
|
+
if type.lower() in ["time"]:
|
|
16
|
+
return "TIME"
|
|
17
|
+
if type.lower() in ["number", "decimal", "numeric"]:
|
|
18
|
+
# precision and scale not supported by data contract
|
|
19
|
+
return "VARCHAR"
|
|
20
|
+
if type.lower() in ["float", "double"]:
|
|
21
|
+
return "DOUBLE"
|
|
22
|
+
if type.lower() in ["integer", "int", "long", "bigint"]:
|
|
23
|
+
return "BIGINT"
|
|
24
|
+
if type.lower() in ["boolean"]:
|
|
25
|
+
return "BOOLEAN"
|
|
26
|
+
if type.lower() in ["object", "record", "struct"]:
|
|
27
|
+
# not supported in CSV
|
|
28
|
+
return "VARCHAR"
|
|
29
|
+
if type.lower() in ["bytes"]:
|
|
30
|
+
# not supported in CSV
|
|
31
|
+
return "VARCHAR"
|
|
32
|
+
if type.lower() in ["array"]:
|
|
33
|
+
return "VARCHAR"
|
|
34
|
+
if type.lower() in ["null"]:
|
|
35
|
+
return "SQLNULL"
|
|
36
|
+
return "VARCHAR"
|
|
@@ -7,9 +7,6 @@ from datacontract.model.data_contract_specification import \
|
|
|
7
7
|
DataContractSpecification, Model, Field
|
|
8
8
|
|
|
9
9
|
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
10
|
def to_dbt_models_yaml(data_contract_spec: DataContractSpecification):
|
|
14
11
|
dbt = {
|
|
15
12
|
"version": 2,
|
|
@@ -23,7 +20,7 @@ def to_dbt_models_yaml(data_contract_spec: DataContractSpecification):
|
|
|
23
20
|
|
|
24
21
|
def to_dbt_staging_sql(data_contract_spec: DataContractSpecification, model_name: str, model_value: Model) -> str:
|
|
25
22
|
if data_contract_spec.models is None or len(data_contract_spec.models.items()) != 1:
|
|
26
|
-
print(
|
|
23
|
+
print("Export to dbt-staging-sql currently only works with exactly one model in the data contract.")
|
|
27
24
|
return ""
|
|
28
25
|
|
|
29
26
|
id = data_contract_spec.id
|
|
@@ -39,15 +36,10 @@ def to_dbt_staging_sql(data_contract_spec: DataContractSpecification, model_name
|
|
|
39
36
|
|
|
40
37
|
|
|
41
38
|
def to_dbt_sources_yaml(data_contract_spec: DataContractSpecification, server: str = None):
|
|
42
|
-
source = {
|
|
43
|
-
"name": data_contract_spec.id,
|
|
44
|
-
"tables": []
|
|
45
|
-
}
|
|
39
|
+
source = {"name": data_contract_spec.id, "tables": []}
|
|
46
40
|
dbt = {
|
|
47
41
|
"version": 2,
|
|
48
|
-
"sources": [
|
|
49
|
-
source
|
|
50
|
-
],
|
|
42
|
+
"sources": [source],
|
|
51
43
|
}
|
|
52
44
|
if data_contract_spec.info.owner is not None:
|
|
53
45
|
source["meta"] = {"owner": data_contract_spec.info.owner}
|
|
@@ -82,20 +74,14 @@ def _to_dbt_model(model_key, model_value: Model, data_contract_spec: DataContrac
|
|
|
82
74
|
"name": model_key,
|
|
83
75
|
}
|
|
84
76
|
model_type = _to_dbt_model_type(model_value.type)
|
|
85
|
-
dbt_model["config"] = {
|
|
86
|
-
"meta": {
|
|
87
|
-
"data_contract": data_contract_spec.id
|
|
88
|
-
}
|
|
89
|
-
}
|
|
77
|
+
dbt_model["config"] = {"meta": {"data_contract": data_contract_spec.id}}
|
|
90
78
|
dbt_model["config"]["materialized"] = model_type
|
|
91
79
|
|
|
92
80
|
if data_contract_spec.info.owner is not None:
|
|
93
81
|
dbt_model["config"]["meta"]["owner"] = data_contract_spec.info.owner
|
|
94
82
|
|
|
95
83
|
if _supports_constraints(model_type):
|
|
96
|
-
dbt_model["config"]["contract"] = {
|
|
97
|
-
"enforced": True
|
|
98
|
-
}
|
|
84
|
+
dbt_model["config"]["contract"] = {"enforced": True}
|
|
99
85
|
if model_value.description is not None:
|
|
100
86
|
dbt_model["description"] = model_value.description
|
|
101
87
|
columns = _to_columns(model_value.fields, _supports_constraints(model_type), True)
|
|
@@ -138,8 +124,8 @@ def _to_column(field: Field, supports_constraints: bool, supports_datatype: bool
|
|
|
138
124
|
column["data_type"] = dbt_type
|
|
139
125
|
else:
|
|
140
126
|
column.setdefault("tests", []).append(
|
|
141
|
-
{"dbt_expectations.dbt_expectations.expect_column_values_to_be_of_type": {
|
|
142
|
-
|
|
127
|
+
{"dbt_expectations.dbt_expectations.expect_column_values_to_be_of_type": {"column_type": dbt_type}}
|
|
128
|
+
)
|
|
143
129
|
if field.description is not None:
|
|
144
130
|
column["description"] = field.description
|
|
145
131
|
if field.required:
|
|
@@ -161,7 +147,8 @@ def _to_column(field: Field, supports_constraints: bool, supports_datatype: bool
|
|
|
161
147
|
if field.maxLength is not None:
|
|
162
148
|
length_test["max_value"] = field.maxLength
|
|
163
149
|
column.setdefault("tests", []).append(
|
|
164
|
-
{"dbt_expectations.expect_column_value_lengths_to_be_between": length_test}
|
|
150
|
+
{"dbt_expectations.expect_column_value_lengths_to_be_between": length_test}
|
|
151
|
+
)
|
|
165
152
|
if field.pii is not None:
|
|
166
153
|
column.setdefault("meta", {})["pii"] = field.pii
|
|
167
154
|
if field.classification is not None:
|
|
@@ -171,15 +158,26 @@ def _to_column(field: Field, supports_constraints: bool, supports_datatype: bool
|
|
|
171
158
|
if field.pattern is not None:
|
|
172
159
|
# Beware, the data contract pattern is a regex, not a like pattern
|
|
173
160
|
column.setdefault("tests", []).append(
|
|
174
|
-
{"dbt_expectations.expect_column_values_to_match_regex": {"regex": field.pattern}}
|
|
175
|
-
|
|
161
|
+
{"dbt_expectations.expect_column_values_to_match_regex": {"regex": field.pattern}}
|
|
162
|
+
)
|
|
163
|
+
if (
|
|
164
|
+
field.minimum is not None
|
|
165
|
+
or field.maximum is not None
|
|
166
|
+
and field.exclusiveMinimum is None
|
|
167
|
+
and field.exclusiveMaximum is None
|
|
168
|
+
):
|
|
176
169
|
range_test = {}
|
|
177
170
|
if field.minimum is not None:
|
|
178
171
|
range_test["min_value"] = field.minimum
|
|
179
172
|
if field.maximum is not None:
|
|
180
173
|
range_test["max_value"] = field.maximum
|
|
181
174
|
column.setdefault("tests", []).append({"dbt_expectations.expect_column_values_to_be_between": range_test})
|
|
182
|
-
elif
|
|
175
|
+
elif (
|
|
176
|
+
field.exclusiveMinimum is not None
|
|
177
|
+
or field.exclusiveMaximum is not None
|
|
178
|
+
and field.minimum is None
|
|
179
|
+
and field.maximum is None
|
|
180
|
+
):
|
|
183
181
|
range_test = {}
|
|
184
182
|
if field.exclusiveMinimum is not None:
|
|
185
183
|
range_test["min_value"] = field.exclusiveMinimum
|
|
@@ -190,17 +188,30 @@ def _to_column(field: Field, supports_constraints: bool, supports_datatype: bool
|
|
|
190
188
|
else:
|
|
191
189
|
if field.minimum is not None:
|
|
192
190
|
column.setdefault("tests", []).append(
|
|
193
|
-
{"dbt_expectations.expect_column_values_to_be_between": {"min_value": field.minimum}}
|
|
191
|
+
{"dbt_expectations.expect_column_values_to_be_between": {"min_value": field.minimum}}
|
|
192
|
+
)
|
|
194
193
|
if field.maximum is not None:
|
|
195
194
|
column.setdefault("tests", []).append(
|
|
196
|
-
{"dbt_expectations.expect_column_values_to_be_between": {"max_value": field.maximum}}
|
|
195
|
+
{"dbt_expectations.expect_column_values_to_be_between": {"max_value": field.maximum}}
|
|
196
|
+
)
|
|
197
197
|
if field.exclusiveMinimum is not None:
|
|
198
|
-
column.setdefault("tests", []).append(
|
|
199
|
-
|
|
198
|
+
column.setdefault("tests", []).append(
|
|
199
|
+
{
|
|
200
|
+
"dbt_expectations.expect_column_values_to_be_between": {
|
|
201
|
+
"min_value": field.exclusiveMinimum,
|
|
202
|
+
"strictly": True,
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
)
|
|
200
206
|
if field.exclusiveMaximum is not None:
|
|
201
|
-
column.setdefault("tests", []).append(
|
|
202
|
-
|
|
207
|
+
column.setdefault("tests", []).append(
|
|
208
|
+
{
|
|
209
|
+
"dbt_expectations.expect_column_values_to_be_between": {
|
|
210
|
+
"max_value": field.exclusiveMaximum,
|
|
211
|
+
"strictly": True,
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
)
|
|
203
215
|
|
|
204
216
|
# TODO: all constraints
|
|
205
217
|
return column
|
|
206
|
-
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from typing import Dict, List, Any
|
|
3
|
+
|
|
4
|
+
import yaml
|
|
5
|
+
|
|
6
|
+
from datacontract.model.data_contract_specification import \
|
|
7
|
+
DataContractSpecification, Field, Quality
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def to_great_expectations(data_contract_spec: DataContractSpecification, model_key: str) -> str:
|
|
11
|
+
"""
|
|
12
|
+
Convert each model in the contract to a Great Expectation suite
|
|
13
|
+
@param data_contract_spec: data contract to export to great expectations
|
|
14
|
+
@param model_key: model to great expectations to
|
|
15
|
+
@return: a dictionary of great expectation suites
|
|
16
|
+
"""
|
|
17
|
+
expectations = []
|
|
18
|
+
model_value = data_contract_spec.models.get(model_key)
|
|
19
|
+
quality_checks = get_quality_checks(data_contract_spec.quality)
|
|
20
|
+
expectations.extend(model_to_expectations(model_value.fields))
|
|
21
|
+
expectations.extend(checks_to_expectations(quality_checks, model_key))
|
|
22
|
+
model_expectation_suite = to_suite(model_key, data_contract_spec.info.version, expectations)
|
|
23
|
+
|
|
24
|
+
return model_expectation_suite
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def to_suite(
|
|
28
|
+
model_key: str,
|
|
29
|
+
contract_version: str,
|
|
30
|
+
expectations: List[Dict[str, Any]],
|
|
31
|
+
) -> str:
|
|
32
|
+
return json.dumps(
|
|
33
|
+
{
|
|
34
|
+
"data_asset_type": "null",
|
|
35
|
+
"expectation_suite_name": "user-defined.{model_key}.{contract_version}".format(
|
|
36
|
+
model_key=model_key, contract_version=contract_version
|
|
37
|
+
),
|
|
38
|
+
"expectations": expectations,
|
|
39
|
+
"meta": {},
|
|
40
|
+
},
|
|
41
|
+
indent=2,
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def model_to_expectations(fields: Dict[str, Field]) -> List[Dict[str, Any]]:
|
|
46
|
+
"""
|
|
47
|
+
Convert the model information to expectations
|
|
48
|
+
@param fields: model field
|
|
49
|
+
@return: list of expectations
|
|
50
|
+
"""
|
|
51
|
+
expectations = []
|
|
52
|
+
add_column_order_exp(fields, expectations)
|
|
53
|
+
for field_name, field in fields.items():
|
|
54
|
+
add_field_expectations(field_name, field, expectations)
|
|
55
|
+
return expectations
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def add_field_expectations(field_name, field: Field, expectations: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
59
|
+
if field.type is not None:
|
|
60
|
+
expectations.append(to_column_types_exp(field_name, field.type))
|
|
61
|
+
if field.unique:
|
|
62
|
+
expectations.append(to_column_unique_exp(field_name))
|
|
63
|
+
if field.maxLength is not None or field.minLength is not None:
|
|
64
|
+
expectations.append(to_column_length_exp(field_name, field.minLength, field.maxLength))
|
|
65
|
+
if field.minimum is not None or field.maximum is not None:
|
|
66
|
+
expectations.append(to_column_min_max_exp(field_name, field.minimum, field.maximum))
|
|
67
|
+
|
|
68
|
+
# TODO: all constraints
|
|
69
|
+
return expectations
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def add_column_order_exp(fields: Dict[str, Field], expectations: List[Dict[str, Any]]):
|
|
73
|
+
expectations.append(
|
|
74
|
+
{
|
|
75
|
+
"expectation_type": "expect_table_columns_to_match_ordered_list",
|
|
76
|
+
"kwargs": {"column_list": list(fields.keys())},
|
|
77
|
+
"meta": {},
|
|
78
|
+
}
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def to_column_types_exp(field_name, field_type) -> Dict[str, Any]:
|
|
83
|
+
return {
|
|
84
|
+
"expectation_type": "expect_column_values_to_be_of_type",
|
|
85
|
+
"kwargs": {"column": field_name, "type_": field_type},
|
|
86
|
+
"meta": {},
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def to_column_unique_exp(field_name) -> Dict[str, Any]:
|
|
91
|
+
return {"expectation_type": "expect_column_values_to_be_unique", "kwargs": {"column": field_name}, "meta": {}}
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def to_column_length_exp(field_name, min_length, max_length) -> Dict[str, Any]:
|
|
95
|
+
return {
|
|
96
|
+
"expectation_type": "expect_column_value_lengths_to_be_between",
|
|
97
|
+
"kwargs": {"column": field_name, "min_value": min_length, "max_value": max_length},
|
|
98
|
+
"meta": {},
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def to_column_min_max_exp(field_name, minimum, maximum) -> Dict[str, Any]:
|
|
103
|
+
return {
|
|
104
|
+
"expectation_type": "expect_column_values_to_be_between",
|
|
105
|
+
"kwargs": {"column": field_name, "min_value": minimum, "max_value": maximum},
|
|
106
|
+
"meta": {},
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def get_quality_checks(quality: Quality) -> Dict[str, Any]:
|
|
111
|
+
if quality is None:
|
|
112
|
+
return {}
|
|
113
|
+
if quality.type is None:
|
|
114
|
+
return {}
|
|
115
|
+
if quality.type.lower() != "great-expectations":
|
|
116
|
+
return {}
|
|
117
|
+
if isinstance(quality.specification, str):
|
|
118
|
+
quality_specification = yaml.safe_load(quality.specification)
|
|
119
|
+
else:
|
|
120
|
+
quality_specification = quality.specification
|
|
121
|
+
return quality_specification
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def checks_to_expectations(quality_checks: Dict[str, Any], model_key: str) -> List[Dict[str, Any]]:
|
|
125
|
+
"""
|
|
126
|
+
Get the quality definition for each model to the model expectation list
|
|
127
|
+
@param quality_checks: dictionary of quality checks by model
|
|
128
|
+
@param model_key: id of the model
|
|
129
|
+
@return: the list of expectations for that model
|
|
130
|
+
"""
|
|
131
|
+
if quality_checks is None or model_key not in quality_checks:
|
|
132
|
+
return []
|
|
133
|
+
|
|
134
|
+
model_quality_checks = quality_checks[model_key]
|
|
135
|
+
|
|
136
|
+
if model_quality_checks is None:
|
|
137
|
+
return []
|
|
138
|
+
|
|
139
|
+
if isinstance(model_quality_checks, str):
|
|
140
|
+
expectation_list = json.loads(model_quality_checks)
|
|
141
|
+
return expectation_list
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
import yaml
|
|
2
|
+
from jinja2 import Environment, PackageLoader, select_autoescape
|
|
3
|
+
|
|
4
|
+
from datacontract.model.data_contract_specification import \
|
|
5
|
+
DataContractSpecification
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def to_html(data_contract_spec: DataContractSpecification) -> str:
|
|
9
|
+
# Load templates from templates folder
|
|
10
|
+
package_loader = PackageLoader("datacontract", "templates")
|
|
11
|
+
env = Environment(
|
|
12
|
+
loader=package_loader,
|
|
13
|
+
autoescape=select_autoescape(
|
|
14
|
+
enabled_extensions=("html", "xml"),
|
|
15
|
+
default_for_string=True,
|
|
16
|
+
),
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
# Load the required template
|
|
20
|
+
template = env.get_template("datacontract.html")
|
|
21
|
+
|
|
22
|
+
if data_contract_spec.quality is not None and isinstance(data_contract_spec.quality.specification, str):
|
|
23
|
+
quality_specification = data_contract_spec.quality.specification
|
|
24
|
+
elif data_contract_spec.quality is not None and isinstance(data_contract_spec.quality.specification, object):
|
|
25
|
+
if data_contract_spec.quality.type == "great-expectations":
|
|
26
|
+
quality_specification = yaml.dump(
|
|
27
|
+
data_contract_spec.quality.specification, sort_keys=False, default_style="|"
|
|
28
|
+
)
|
|
29
|
+
else:
|
|
30
|
+
quality_specification = yaml.dump(data_contract_spec.quality.specification, sort_keys=False)
|
|
31
|
+
else:
|
|
32
|
+
quality_specification = None
|
|
33
|
+
|
|
34
|
+
style_content, _, _ = package_loader.get_source(env, "style/output.css")
|
|
35
|
+
|
|
36
|
+
datacontract_yaml = data_contract_spec.to_yaml()
|
|
37
|
+
|
|
38
|
+
# Render the template with necessary data
|
|
39
|
+
html_string = template.render(
|
|
40
|
+
datacontract=data_contract_spec,
|
|
41
|
+
quality_specification=quality_specification,
|
|
42
|
+
style=style_content,
|
|
43
|
+
datacontract_yaml=datacontract_yaml,
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
return html_string
|
|
@@ -12,16 +12,18 @@ def to_jsonschemas(data_contract_spec: DataContractSpecification):
|
|
|
12
12
|
jsonschmemas[model_key] = jsonschema
|
|
13
13
|
return jsonschmemas
|
|
14
14
|
|
|
15
|
+
|
|
15
16
|
def to_jsonschema_json(model_key, model_value: Model) -> str:
|
|
16
17
|
jsonschema = to_jsonschema(model_key, model_value)
|
|
17
18
|
return json.dumps(jsonschema, indent=2)
|
|
18
19
|
|
|
20
|
+
|
|
19
21
|
def to_jsonschema(model_key, model_value: Model) -> dict:
|
|
20
22
|
return {
|
|
21
23
|
"$schema": "http://json-schema.org/draft-07/schema#",
|
|
22
24
|
"type": "object",
|
|
23
25
|
"properties": to_properties(model_value.fields),
|
|
24
|
-
"required": to_required(model_value.fields)
|
|
26
|
+
"required": to_required(model_value.fields),
|
|
25
27
|
}
|
|
26
28
|
|
|
27
29
|
|
|
@@ -27,10 +27,12 @@ def to_odcs_yaml(data_contract_spec: DataContractSpecification):
|
|
|
27
27
|
odcs["description"] = {
|
|
28
28
|
"purpose": None,
|
|
29
29
|
"usage": data_contract_spec.terms.usage.strip() if data_contract_spec.terms.usage is not None else None,
|
|
30
|
-
"limitations": data_contract_spec.terms.limitations.strip()
|
|
30
|
+
"limitations": data_contract_spec.terms.limitations.strip()
|
|
31
|
+
if data_contract_spec.terms.limitations is not None
|
|
32
|
+
else None,
|
|
31
33
|
}
|
|
32
34
|
|
|
33
|
-
odcs["type"] = "tables"
|
|
35
|
+
odcs["type"] = "tables" # required, TODO read from models.type?
|
|
34
36
|
odcs["dataset"] = []
|
|
35
37
|
|
|
36
38
|
for model_key, model_value in data_contract_spec.models.items():
|
|
@@ -62,9 +64,7 @@ def to_columns(fields: Dict[str, Field]) -> list:
|
|
|
62
64
|
|
|
63
65
|
|
|
64
66
|
def to_column(field_name: str, field: Field) -> dict:
|
|
65
|
-
column = {
|
|
66
|
-
"column": field_name
|
|
67
|
-
}
|
|
67
|
+
column = {"column": field_name}
|
|
68
68
|
if field.type is not None:
|
|
69
69
|
column["logicalType"] = field.type
|
|
70
70
|
column["physicalType"] = field.type
|
|
@@ -100,5 +100,3 @@ def to_column(field_name: str, field: Field) -> dict:
|
|
|
100
100
|
|
|
101
101
|
# todo enum
|
|
102
102
|
return column
|
|
103
|
-
|
|
104
|
-
|