datacontract-cli 0.9.8__py3-none-any.whl → 0.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datacontract/cli.py +2 -0
- datacontract/data_contract.py +27 -27
- datacontract/engines/soda/check_soda_execute.py +17 -6
- datacontract/engines/soda/connections/duckdb.py +21 -4
- datacontract/export/avro_converter.py +6 -4
- datacontract/export/csv_type_converter.py +36 -0
- datacontract/export/great_expectations_converter.py +1 -1
- datacontract/export/html_export.py +66 -0
- datacontract/export/pydantic_converter.py +51 -60
- datacontract/export/sodacl_converter.py +104 -7
- datacontract/export/sql_converter.py +12 -1
- datacontract/imports/avro_importer.py +37 -12
- datacontract/integration/publish_datamesh_manager.py +2 -3
- datacontract/lint/resolve.py +45 -6
- datacontract/model/run.py +2 -1
- datacontract/templates/datacontract.html +502 -0
- datacontract/templates/style/output.css +1332 -0
- {datacontract_cli-0.9.8.dist-info → datacontract_cli-0.10.0.dist-info}/METADATA +314 -105
- {datacontract_cli-0.9.8.dist-info → datacontract_cli-0.10.0.dist-info}/RECORD +23 -20
- datacontract/lint/linters/primary_field_linter.py +0 -28
- {datacontract_cli-0.9.8.dist-info → datacontract_cli-0.10.0.dist-info}/LICENSE +0 -0
- {datacontract_cli-0.9.8.dist-info → datacontract_cli-0.10.0.dist-info}/WHEEL +0 -0
- {datacontract_cli-0.9.8.dist-info → datacontract_cli-0.10.0.dist-info}/entry_points.txt +0 -0
- {datacontract_cli-0.9.8.dist-info → datacontract_cli-0.10.0.dist-info}/top_level.txt +0 -0
|
@@ -5,7 +5,9 @@ from datacontract.model.data_contract_specification import \
|
|
|
5
5
|
DataContractSpecification
|
|
6
6
|
|
|
7
7
|
|
|
8
|
-
def to_sodacl_yaml(
|
|
8
|
+
def to_sodacl_yaml(
|
|
9
|
+
data_contract_spec: DataContractSpecification, server_type: str = None, check_types: bool = True
|
|
10
|
+
) -> str:
|
|
9
11
|
try:
|
|
10
12
|
sodacl = {}
|
|
11
13
|
for model_key, model_value in data_contract_spec.models.items():
|
|
@@ -33,6 +35,26 @@ def to_checks(model_key, model_value, server_type: str, check_types: bool):
|
|
|
33
35
|
checks.append(check_field_required(field_name, quote_field_name))
|
|
34
36
|
if field.unique:
|
|
35
37
|
checks.append(check_field_unique(field_name, quote_field_name))
|
|
38
|
+
if field.minLength is not None:
|
|
39
|
+
checks.append(check_field_min_length(field_name, field.minLength))
|
|
40
|
+
if field.maxLength is not None:
|
|
41
|
+
checks.append(check_field_max_length(field_name, field.maxLength))
|
|
42
|
+
if field.minimum is not None:
|
|
43
|
+
checks.append(check_field_minimum(field_name, field.minimum))
|
|
44
|
+
if field.maximum is not None:
|
|
45
|
+
checks.append(check_field_maximum(field_name, field.maximum))
|
|
46
|
+
if field.exclusiveMinimum is not None:
|
|
47
|
+
checks.append(check_field_minimum(field_name, field.exclusiveMinimum))
|
|
48
|
+
checks.append(check_field_not_equal(field_name, field.exclusiveMinimum))
|
|
49
|
+
if field.exclusiveMaximum is not None:
|
|
50
|
+
checks.append(check_field_maximum(field_name, field.exclusiveMaximum))
|
|
51
|
+
checks.append(check_field_not_equal(field_name, field.exclusiveMaximum))
|
|
52
|
+
if field.pattern is not None:
|
|
53
|
+
checks.append(check_field_regex(field_name, field.pattern))
|
|
54
|
+
if field.enum is not None and len(field.enum) > 0:
|
|
55
|
+
checks.append(check_field_enum(field_name, field.enum))
|
|
56
|
+
# TODO references: str = None
|
|
57
|
+
# TODO format
|
|
36
58
|
|
|
37
59
|
return f"checks for {model_key}", checks
|
|
38
60
|
|
|
@@ -59,18 +81,93 @@ def check_field_type(field_name: str, type: str):
|
|
|
59
81
|
|
|
60
82
|
def check_field_required(field_name: str, quote_field_name: bool = False):
|
|
61
83
|
if quote_field_name:
|
|
62
|
-
field_name = f"
|
|
84
|
+
field_name = f'"{field_name}"'
|
|
63
85
|
|
|
64
|
-
return {
|
|
65
|
-
f"missing_count({field_name}) = 0": {"name": f"Check that required field {field_name} has no null values"}}
|
|
86
|
+
return {f"missing_count({field_name}) = 0": {"name": f"Check that required field {field_name} has no null values"}}
|
|
66
87
|
|
|
67
88
|
|
|
68
89
|
def check_field_unique(field_name, quote_field_name: bool = False):
|
|
69
90
|
if quote_field_name:
|
|
70
|
-
field_name = f"
|
|
91
|
+
field_name = f'"{field_name}"'
|
|
92
|
+
return {
|
|
93
|
+
f"duplicate_count({field_name}) = 0": {"name": f"Check that unique field {field_name} has no duplicate values"}
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def check_field_min_length(field_name, min_length, quote_field_name: bool = False):
|
|
98
|
+
if quote_field_name:
|
|
99
|
+
field_name = f'"{field_name}"'
|
|
100
|
+
return {
|
|
101
|
+
f"invalid_count({field_name}) = 0": {
|
|
102
|
+
"name": f"Check that field {field_name} has a min length of {min}",
|
|
103
|
+
"valid min length": min_length,
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def check_field_max_length(field_name, max_length, quote_field_name: bool = False):
|
|
109
|
+
if quote_field_name:
|
|
110
|
+
field_name = f'"{field_name}"'
|
|
71
111
|
return {
|
|
72
|
-
f"
|
|
73
|
-
"name": f"Check that
|
|
112
|
+
f"invalid_count({field_name}) = 0": {
|
|
113
|
+
"name": f"Check that field {field_name} has a max length of {max_length}",
|
|
114
|
+
"valid max length": max_length,
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def check_field_minimum(field_name, minimum, quote_field_name: bool = False):
|
|
120
|
+
if quote_field_name:
|
|
121
|
+
field_name = f'"{field_name}"'
|
|
122
|
+
return {
|
|
123
|
+
f"invalid_count({field_name}) = 0": {
|
|
124
|
+
"name": f"Check that field {field_name} has a minimum of {min}",
|
|
125
|
+
"valid min": minimum,
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def check_field_maximum(field_name, maximum, quote_field_name: bool = False):
|
|
131
|
+
if quote_field_name:
|
|
132
|
+
field_name = f'"{field_name}"'
|
|
133
|
+
return {
|
|
134
|
+
f"invalid_count({field_name}) = 0": {
|
|
135
|
+
"name": f"Check that field {field_name} has a maximum of {maximum}",
|
|
136
|
+
"valid max": maximum,
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def check_field_not_equal(field_name, value, quote_field_name: bool = False):
|
|
142
|
+
if quote_field_name:
|
|
143
|
+
field_name = f'"{field_name}"'
|
|
144
|
+
return {
|
|
145
|
+
f"invalid_count({field_name}) = 0": {
|
|
146
|
+
"name": f"Check that field {field_name} is not equal to {value}",
|
|
147
|
+
"invalid values": [value],
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def check_field_enum(field_name, enum, quote_field_name: bool = False):
|
|
153
|
+
if quote_field_name:
|
|
154
|
+
field_name = f'"{field_name}"'
|
|
155
|
+
return {
|
|
156
|
+
f"invalid_count({field_name}) = 0": {
|
|
157
|
+
"name": f"Check that field {field_name} only contains enum values {enum}",
|
|
158
|
+
"valid values": enum,
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def check_field_regex(field_name, pattern, quote_field_name: bool = False):
|
|
164
|
+
if quote_field_name:
|
|
165
|
+
field_name = f'"{field_name}"'
|
|
166
|
+
return {
|
|
167
|
+
f"invalid_count({field_name}) = 0": {
|
|
168
|
+
"name": f"Check that field {field_name} matches regex pattern {pattern}",
|
|
169
|
+
"valid regex": pattern,
|
|
170
|
+
}
|
|
74
171
|
}
|
|
75
172
|
|
|
76
173
|
|
|
@@ -85,9 +85,20 @@ def _to_sql_table(model_name, model, server_type="snowflake"):
|
|
|
85
85
|
result += " not null"
|
|
86
86
|
if field.primary:
|
|
87
87
|
result += " primary key"
|
|
88
|
+
if server_type == "databricks" and field.description is not None:
|
|
89
|
+
result += f' COMMENT "{_escape(field.description)}"'
|
|
88
90
|
if current_field_index < fields:
|
|
89
91
|
result += ","
|
|
90
92
|
result += "\n"
|
|
91
93
|
current_field_index += 1
|
|
92
|
-
result += ")
|
|
94
|
+
result += ")"
|
|
95
|
+
if server_type == "databricks" and model.description is not None:
|
|
96
|
+
result += f' COMMENT "{_escape(model.description)}"'
|
|
97
|
+
result += ";\n"
|
|
93
98
|
return result
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def _escape(text: str | None) -> str | None:
|
|
102
|
+
if text is None:
|
|
103
|
+
return None
|
|
104
|
+
return text.replace('"', '\\"')
|
|
@@ -26,7 +26,6 @@ def import_avro(data_contract_specification: DataContractSpecification, source:
|
|
|
26
26
|
fields = import_record_fields(avro_schema.fields)
|
|
27
27
|
|
|
28
28
|
data_contract_specification.models[avro_schema.name] = Model(
|
|
29
|
-
type="table",
|
|
30
29
|
fields=fields,
|
|
31
30
|
)
|
|
32
31
|
|
|
@@ -42,10 +41,11 @@ def import_avro(data_contract_specification: DataContractSpecification, source:
|
|
|
42
41
|
def import_record_fields(record_fields):
|
|
43
42
|
imported_fields = {}
|
|
44
43
|
for field in record_fields:
|
|
45
|
-
|
|
46
44
|
imported_fields[field.name] = Field()
|
|
47
45
|
imported_fields[field.name].required = True
|
|
48
46
|
imported_fields[field.name].description = field.doc
|
|
47
|
+
for prop in field.other_props:
|
|
48
|
+
imported_fields[field.name].__setattr__(prop, field.other_props[prop])
|
|
49
49
|
|
|
50
50
|
if field.type.type == "record":
|
|
51
51
|
imported_fields[field.name].type = "object"
|
|
@@ -53,12 +53,36 @@ def import_record_fields(record_fields):
|
|
|
53
53
|
imported_fields[field.name].fields = import_record_fields(field.type.fields)
|
|
54
54
|
elif field.type.type == "union":
|
|
55
55
|
imported_fields[field.name].required = False
|
|
56
|
-
|
|
57
|
-
|
|
56
|
+
type = import_type_of_optional_field(field)
|
|
57
|
+
imported_fields[field.name].type = type
|
|
58
|
+
if type == "record":
|
|
59
|
+
imported_fields[field.name].fields = import_record_fields(get_record_from_union_field(field).fields)
|
|
60
|
+
elif field.type.type == "array":
|
|
61
|
+
imported_fields[field.name].type = "array"
|
|
62
|
+
imported_fields[field.name].items = import_avro_array_items(field.type)
|
|
63
|
+
else: # primitive type
|
|
58
64
|
imported_fields[field.name].type = map_type_from_avro(field.type.type)
|
|
65
|
+
|
|
59
66
|
return imported_fields
|
|
60
67
|
|
|
61
68
|
|
|
69
|
+
def import_avro_array_items(array_schema):
|
|
70
|
+
items = Field()
|
|
71
|
+
for prop in array_schema.other_props:
|
|
72
|
+
items.__setattr__(prop, array_schema.other_props[prop])
|
|
73
|
+
|
|
74
|
+
if array_schema.items.type == "record":
|
|
75
|
+
items.type = "object"
|
|
76
|
+
items.fields = import_record_fields(array_schema.items.fields)
|
|
77
|
+
elif array_schema.items.type == "array":
|
|
78
|
+
items.type = "array"
|
|
79
|
+
items.items = import_avro_array_items(array_schema.items)
|
|
80
|
+
else: # primitive type
|
|
81
|
+
items.type = map_type_from_avro(array_schema.items.type)
|
|
82
|
+
|
|
83
|
+
return items
|
|
84
|
+
|
|
85
|
+
|
|
62
86
|
def import_type_of_optional_field(field):
|
|
63
87
|
for field_type in field.type.schemas:
|
|
64
88
|
if field_type.type != "null":
|
|
@@ -72,6 +96,13 @@ def import_type_of_optional_field(field):
|
|
|
72
96
|
)
|
|
73
97
|
|
|
74
98
|
|
|
99
|
+
def get_record_from_union_field(field):
|
|
100
|
+
for field_type in field.type.schemas:
|
|
101
|
+
if field_type.type == "record":
|
|
102
|
+
return field_type
|
|
103
|
+
return None
|
|
104
|
+
|
|
105
|
+
|
|
75
106
|
def map_type_from_avro(avro_type_str: str):
|
|
76
107
|
# TODO: ambiguous mapping in the export
|
|
77
108
|
if avro_type_str == "null":
|
|
@@ -88,14 +119,8 @@ def map_type_from_avro(avro_type_str: str):
|
|
|
88
119
|
return "long"
|
|
89
120
|
elif avro_type_str == "boolean":
|
|
90
121
|
return "boolean"
|
|
91
|
-
elif avro_type_str == "
|
|
92
|
-
|
|
93
|
-
type="schema",
|
|
94
|
-
result="failed",
|
|
95
|
-
name="Map avro type to data contract type",
|
|
96
|
-
reason="Array type not supported",
|
|
97
|
-
engine="datacontract",
|
|
98
|
-
)
|
|
122
|
+
elif avro_type_str == "record":
|
|
123
|
+
return "record"
|
|
99
124
|
else:
|
|
100
125
|
raise DataContractException(
|
|
101
126
|
type="schema",
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import logging
|
|
2
1
|
import os
|
|
3
2
|
|
|
4
3
|
import requests
|
|
@@ -29,6 +28,6 @@ def publish_datamesh_manager(run: Run, publish_url: str):
|
|
|
29
28
|
if response.status_code != 200:
|
|
30
29
|
run.log_error(f"Error publishing test results to Data Mesh Manager: {response.text}")
|
|
31
30
|
return
|
|
32
|
-
|
|
31
|
+
run.log_info(f"Published test results to {url}")
|
|
33
32
|
except Exception as e:
|
|
34
|
-
|
|
33
|
+
run.log_error(f"Failed publishing test results. Error: {str(e)}")
|
datacontract/lint/resolve.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import logging
|
|
2
|
+
import os
|
|
2
3
|
|
|
3
4
|
import fastjsonschema
|
|
4
5
|
import yaml
|
|
@@ -8,7 +9,7 @@ from datacontract.lint.files import read_file
|
|
|
8
9
|
from datacontract.lint.schema import fetch_schema
|
|
9
10
|
from datacontract.lint.urls import fetch_resource
|
|
10
11
|
from datacontract.model.data_contract_specification import \
|
|
11
|
-
DataContractSpecification, Definition
|
|
12
|
+
DataContractSpecification, Definition, Quality
|
|
12
13
|
from datacontract.model.exceptions import DataContractException
|
|
13
14
|
|
|
14
15
|
|
|
@@ -36,13 +37,13 @@ def resolve_data_contract(
|
|
|
36
37
|
|
|
37
38
|
|
|
38
39
|
def resolve_data_contract_from_location(
|
|
39
|
-
location, schema_location: str = None, inline_definitions: bool = False
|
|
40
|
+
location, schema_location: str = None, inline_definitions: bool = False, include_quality: bool = True
|
|
40
41
|
) -> DataContractSpecification:
|
|
41
42
|
if location.startswith("http://") or location.startswith("https://"):
|
|
42
43
|
data_contract_str = fetch_resource(location)
|
|
43
44
|
else:
|
|
44
45
|
data_contract_str = read_file(location)
|
|
45
|
-
return resolve_data_contract_from_str(data_contract_str, schema_location, inline_definitions)
|
|
46
|
+
return resolve_data_contract_from_str(data_contract_str, schema_location, inline_definitions, include_quality)
|
|
46
47
|
|
|
47
48
|
|
|
48
49
|
def inline_definitions_into_data_contract(spec: DataContractSpecification):
|
|
@@ -52,7 +53,7 @@ def inline_definitions_into_data_contract(spec: DataContractSpecification):
|
|
|
52
53
|
if not field.ref and not field.ref_obj:
|
|
53
54
|
continue
|
|
54
55
|
|
|
55
|
-
definition =
|
|
56
|
+
definition = resolve_definition_ref(field.ref, spec.definitions)
|
|
56
57
|
field.ref_obj = definition
|
|
57
58
|
|
|
58
59
|
for field_name in field.model_fields.keys():
|
|
@@ -60,7 +61,7 @@ def inline_definitions_into_data_contract(spec: DataContractSpecification):
|
|
|
60
61
|
setattr(field, field_name, getattr(definition, field_name))
|
|
61
62
|
|
|
62
63
|
|
|
63
|
-
def
|
|
64
|
+
def resolve_definition_ref(ref, definitions) -> Definition:
|
|
64
65
|
if ref.startswith("http://") or ref.startswith("https://"):
|
|
65
66
|
definition_str = fetch_resource(ref)
|
|
66
67
|
definition_dict = to_yaml(definition_str)
|
|
@@ -79,8 +80,44 @@ def resolve_ref(ref, definitions) -> Definition:
|
|
|
79
80
|
)
|
|
80
81
|
|
|
81
82
|
|
|
83
|
+
def resolve_quality_ref(quality: Quality):
|
|
84
|
+
"""
|
|
85
|
+
Return the content of a ref file path
|
|
86
|
+
@param quality data contract quality specification
|
|
87
|
+
"""
|
|
88
|
+
if isinstance(quality.specification, dict):
|
|
89
|
+
specification = quality.specification
|
|
90
|
+
if quality.type == "great-expectations":
|
|
91
|
+
for model, model_quality in specification.items():
|
|
92
|
+
specification[model] = get_quality_ref_file(model_quality)
|
|
93
|
+
else:
|
|
94
|
+
if "$ref" in specification:
|
|
95
|
+
quality.specification = get_quality_ref_file(specification)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def get_quality_ref_file(quality_spec: str | object) -> str | object:
|
|
99
|
+
"""
|
|
100
|
+
Get the file associated with a quality reference
|
|
101
|
+
@param quality_spec quality specification
|
|
102
|
+
@returns: the content of the quality file
|
|
103
|
+
"""
|
|
104
|
+
if isinstance(quality_spec, dict) and "$ref" in quality_spec:
|
|
105
|
+
ref = quality_spec["$ref"]
|
|
106
|
+
if not os.path.exists(ref):
|
|
107
|
+
raise DataContractException(
|
|
108
|
+
type="export",
|
|
109
|
+
result="failed",
|
|
110
|
+
name="Check that data contract quality is valid",
|
|
111
|
+
reason=f"Cannot resolve reference {ref}",
|
|
112
|
+
engine="datacontract",
|
|
113
|
+
)
|
|
114
|
+
with open(ref, "r") as file:
|
|
115
|
+
quality_spec = file.read()
|
|
116
|
+
return quality_spec
|
|
117
|
+
|
|
118
|
+
|
|
82
119
|
def resolve_data_contract_from_str(
|
|
83
|
-
data_contract_str, schema_location: str = None, inline_definitions: bool = False
|
|
120
|
+
data_contract_str, schema_location: str = None, inline_definitions: bool = False, include_quality: bool = False
|
|
84
121
|
) -> DataContractSpecification:
|
|
85
122
|
data_contract_yaml_dict = to_yaml(data_contract_str)
|
|
86
123
|
validate(data_contract_yaml_dict, schema_location)
|
|
@@ -89,6 +126,8 @@ def resolve_data_contract_from_str(
|
|
|
89
126
|
|
|
90
127
|
if inline_definitions:
|
|
91
128
|
inline_definitions_into_data_contract(spec)
|
|
129
|
+
if spec.quality and include_quality:
|
|
130
|
+
resolve_quality_ref(spec.quality)
|
|
92
131
|
|
|
93
132
|
return spec
|
|
94
133
|
|
datacontract/model/run.py
CHANGED
|
@@ -15,6 +15,7 @@ class Check(BaseModel):
|
|
|
15
15
|
model: Optional[str] = None
|
|
16
16
|
field: Optional[str] = None
|
|
17
17
|
details: Optional[str] = None
|
|
18
|
+
diagnostics: Optional[dict] = None
|
|
18
19
|
|
|
19
20
|
|
|
20
21
|
class Log(BaseModel):
|
|
@@ -69,7 +70,7 @@ class Run(BaseModel):
|
|
|
69
70
|
self.logs.append(Log(level="ERROR", message=message, timestamp=datetime.now(timezone.utc)))
|
|
70
71
|
|
|
71
72
|
def pretty(self):
|
|
72
|
-
return self.model_dump_json()
|
|
73
|
+
return self.model_dump_json(indent=2)
|
|
73
74
|
|
|
74
75
|
@staticmethod
|
|
75
76
|
def create_run():
|