datacontract-cli 0.10.23__py3-none-any.whl → 0.10.40__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datacontract/__init__.py +13 -0
- datacontract/api.py +12 -5
- datacontract/catalog/catalog.py +5 -3
- datacontract/cli.py +119 -13
- datacontract/data_contract.py +145 -67
- datacontract/engines/data_contract_checks.py +366 -60
- datacontract/engines/data_contract_test.py +50 -4
- datacontract/engines/fastjsonschema/check_jsonschema.py +37 -19
- datacontract/engines/fastjsonschema/s3/s3_read_files.py +3 -2
- datacontract/engines/soda/check_soda_execute.py +27 -3
- datacontract/engines/soda/connections/athena.py +79 -0
- datacontract/engines/soda/connections/duckdb_connection.py +65 -6
- datacontract/engines/soda/connections/kafka.py +4 -2
- datacontract/engines/soda/connections/oracle.py +50 -0
- datacontract/export/avro_converter.py +20 -3
- datacontract/export/bigquery_converter.py +1 -1
- datacontract/export/dbt_converter.py +36 -7
- datacontract/export/dqx_converter.py +126 -0
- datacontract/export/duckdb_type_converter.py +57 -0
- datacontract/export/excel_exporter.py +923 -0
- datacontract/export/exporter.py +3 -0
- datacontract/export/exporter_factory.py +17 -1
- datacontract/export/great_expectations_converter.py +55 -5
- datacontract/export/{html_export.py → html_exporter.py} +31 -20
- datacontract/export/markdown_converter.py +134 -5
- datacontract/export/mermaid_exporter.py +110 -0
- datacontract/export/odcs_v3_exporter.py +193 -149
- datacontract/export/protobuf_converter.py +163 -69
- datacontract/export/rdf_converter.py +2 -2
- datacontract/export/sodacl_converter.py +9 -1
- datacontract/export/spark_converter.py +31 -4
- datacontract/export/sql_converter.py +6 -2
- datacontract/export/sql_type_converter.py +124 -8
- datacontract/imports/avro_importer.py +63 -12
- datacontract/imports/csv_importer.py +111 -57
- datacontract/imports/excel_importer.py +1112 -0
- datacontract/imports/importer.py +16 -3
- datacontract/imports/importer_factory.py +17 -0
- datacontract/imports/json_importer.py +325 -0
- datacontract/imports/odcs_importer.py +2 -2
- datacontract/imports/odcs_v3_importer.py +367 -151
- datacontract/imports/protobuf_importer.py +264 -0
- datacontract/imports/spark_importer.py +117 -13
- datacontract/imports/sql_importer.py +32 -16
- datacontract/imports/unity_importer.py +84 -38
- datacontract/init/init_template.py +1 -1
- datacontract/integration/entropy_data.py +126 -0
- datacontract/lint/resolve.py +112 -23
- datacontract/lint/schema.py +24 -15
- datacontract/lint/urls.py +17 -3
- datacontract/model/data_contract_specification/__init__.py +1 -0
- datacontract/model/odcs.py +13 -0
- datacontract/model/run.py +3 -0
- datacontract/output/junit_test_results.py +3 -3
- datacontract/schemas/datacontract-1.1.0.init.yaml +1 -1
- datacontract/schemas/datacontract-1.2.0.init.yaml +91 -0
- datacontract/schemas/datacontract-1.2.0.schema.json +2029 -0
- datacontract/schemas/datacontract-1.2.1.init.yaml +91 -0
- datacontract/schemas/datacontract-1.2.1.schema.json +2058 -0
- datacontract/schemas/odcs-3.0.2.schema.json +2382 -0
- datacontract/schemas/odcs-3.1.0.schema.json +2809 -0
- datacontract/templates/datacontract.html +54 -3
- datacontract/templates/datacontract_odcs.html +685 -0
- datacontract/templates/index.html +5 -2
- datacontract/templates/partials/server.html +2 -0
- datacontract/templates/style/output.css +319 -145
- {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.40.dist-info}/METADATA +711 -433
- datacontract_cli-0.10.40.dist-info/RECORD +121 -0
- {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.40.dist-info}/WHEEL +1 -1
- {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.40.dist-info/licenses}/LICENSE +1 -1
- datacontract/export/csv_type_converter.py +0 -36
- datacontract/integration/datamesh_manager.py +0 -72
- datacontract/lint/lint.py +0 -142
- datacontract/lint/linters/description_linter.py +0 -35
- datacontract/lint/linters/field_pattern_linter.py +0 -34
- datacontract/lint/linters/field_reference_linter.py +0 -48
- datacontract/lint/linters/notice_period_linter.py +0 -55
- datacontract/lint/linters/quality_schema_linter.py +0 -52
- datacontract/lint/linters/valid_constraints_linter.py +0 -100
- datacontract/model/data_contract_specification.py +0 -327
- datacontract_cli-0.10.23.dist-info/RECORD +0 -113
- /datacontract/{lint/linters → output}/__init__.py +0 -0
- {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.40.dist-info}/entry_points.txt +0 -0
- {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.40.dist-info}/top_level.txt +0 -0
|
@@ -27,7 +27,7 @@ class DbtStageExporter(Exporter):
|
|
|
27
27
|
)
|
|
28
28
|
|
|
29
29
|
|
|
30
|
-
def to_dbt_models_yaml(data_contract_spec: DataContractSpecification, server: str = None):
|
|
30
|
+
def to_dbt_models_yaml(data_contract_spec: DataContractSpecification, server: str = None) -> str:
|
|
31
31
|
dbt = {
|
|
32
32
|
"version": 2,
|
|
33
33
|
"models": [],
|
|
@@ -102,8 +102,11 @@ def _to_dbt_model(
|
|
|
102
102
|
"name": model_key,
|
|
103
103
|
}
|
|
104
104
|
model_type = _to_dbt_model_type(model_value.type)
|
|
105
|
+
|
|
105
106
|
dbt_model["config"] = {"meta": {"data_contract": data_contract_spec.id}}
|
|
106
|
-
|
|
107
|
+
|
|
108
|
+
if model_type:
|
|
109
|
+
dbt_model["config"]["materialized"] = model_type
|
|
107
110
|
|
|
108
111
|
if data_contract_spec.info.owner is not None:
|
|
109
112
|
dbt_model["config"]["meta"]["owner"] = data_contract_spec.info.owner
|
|
@@ -112,9 +115,28 @@ def _to_dbt_model(
|
|
|
112
115
|
dbt_model["config"]["contract"] = {"enforced": True}
|
|
113
116
|
if model_value.description is not None:
|
|
114
117
|
dbt_model["description"] = model_value.description.strip().replace("\n", " ")
|
|
115
|
-
|
|
118
|
+
|
|
119
|
+
# Handle model-level primaryKey (before columns for better YAML ordering)
|
|
120
|
+
primary_key_columns = []
|
|
121
|
+
if hasattr(model_value, "primaryKey") and model_value.primaryKey:
|
|
122
|
+
if isinstance(model_value.primaryKey, list) and len(model_value.primaryKey) > 1:
|
|
123
|
+
# Multiple columns: use dbt_utils.unique_combination_of_columns
|
|
124
|
+
dbt_model["data_tests"] = [
|
|
125
|
+
{"dbt_utils.unique_combination_of_columns": {"combination_of_columns": model_value.primaryKey}}
|
|
126
|
+
]
|
|
127
|
+
elif isinstance(model_value.primaryKey, list) and len(model_value.primaryKey) == 1:
|
|
128
|
+
# Single column: handle at column level (pass to _to_columns)
|
|
129
|
+
primary_key_columns = model_value.primaryKey
|
|
130
|
+
elif isinstance(model_value.primaryKey, str):
|
|
131
|
+
# Single column as string: handle at column level
|
|
132
|
+
primary_key_columns = [model_value.primaryKey]
|
|
133
|
+
|
|
134
|
+
columns = _to_columns(
|
|
135
|
+
data_contract_spec, model_value.fields, _supports_constraints(model_type), adapter_type, primary_key_columns
|
|
136
|
+
)
|
|
116
137
|
if columns:
|
|
117
138
|
dbt_model["columns"] = columns
|
|
139
|
+
|
|
118
140
|
return dbt_model
|
|
119
141
|
|
|
120
142
|
|
|
@@ -123,7 +145,7 @@ def _to_dbt_model_type(model_type):
|
|
|
123
145
|
# Allowed values: table, view, incremental, ephemeral, materialized view
|
|
124
146
|
# Custom values also possible
|
|
125
147
|
if model_type is None:
|
|
126
|
-
return
|
|
148
|
+
return None
|
|
127
149
|
if model_type.lower() == "table":
|
|
128
150
|
return "table"
|
|
129
151
|
if model_type.lower() == "view":
|
|
@@ -140,10 +162,13 @@ def _to_columns(
|
|
|
140
162
|
fields: Dict[str, Field],
|
|
141
163
|
supports_constraints: bool,
|
|
142
164
|
adapter_type: Optional[str],
|
|
165
|
+
primary_key_columns: Optional[list] = None,
|
|
143
166
|
) -> list:
|
|
144
167
|
columns = []
|
|
168
|
+
primary_key_columns = primary_key_columns or []
|
|
145
169
|
for field_name, field in fields.items():
|
|
146
|
-
|
|
170
|
+
is_primary_key = field_name in primary_key_columns
|
|
171
|
+
column = _to_column(data_contract_spec, field_name, field, supports_constraints, adapter_type, is_primary_key)
|
|
147
172
|
columns.append(column)
|
|
148
173
|
return columns
|
|
149
174
|
|
|
@@ -161,6 +186,7 @@ def _to_column(
|
|
|
161
186
|
field: Field,
|
|
162
187
|
supports_constraints: bool,
|
|
163
188
|
adapter_type: Optional[str],
|
|
189
|
+
is_primary_key: bool = False,
|
|
164
190
|
) -> dict:
|
|
165
191
|
column = {"name": field_name}
|
|
166
192
|
adapter_type = adapter_type or "snowflake"
|
|
@@ -175,12 +201,15 @@ def _to_column(
|
|
|
175
201
|
)
|
|
176
202
|
if field.description is not None:
|
|
177
203
|
column["description"] = field.description.strip().replace("\n", " ")
|
|
178
|
-
|
|
204
|
+
# Handle required/not_null constraint
|
|
205
|
+
if field.required or is_primary_key:
|
|
179
206
|
if supports_constraints:
|
|
180
207
|
column.setdefault("constraints", []).append({"type": "not_null"})
|
|
181
208
|
else:
|
|
182
209
|
column["data_tests"].append("not_null")
|
|
183
|
-
|
|
210
|
+
|
|
211
|
+
# Handle unique constraint
|
|
212
|
+
if field.unique or is_primary_key:
|
|
184
213
|
if supports_constraints:
|
|
185
214
|
column.setdefault("constraints", []).append({"type": "unique"})
|
|
186
215
|
else:
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
from typing import Any, Dict, List, Union
|
|
2
|
+
|
|
3
|
+
import yaml
|
|
4
|
+
|
|
5
|
+
from datacontract.export.exporter import Exporter, _check_models_for_export
|
|
6
|
+
from datacontract.model.data_contract_specification import DataContractSpecification, Field, Model, Quality
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class DqxKeys:
|
|
10
|
+
CHECK = "check"
|
|
11
|
+
ARGUMENTS = "arguments"
|
|
12
|
+
SPECIFICATION = "specification"
|
|
13
|
+
COL_NAME = "column"
|
|
14
|
+
COL_NAMES = "for_each_column"
|
|
15
|
+
COLUMNS = "columns"
|
|
16
|
+
FUNCTION = "function"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class DqxExporter(Exporter):
|
|
20
|
+
"""Exporter implementation for converting data contracts to DQX YAML file."""
|
|
21
|
+
|
|
22
|
+
def export(
|
|
23
|
+
self,
|
|
24
|
+
data_contract: DataContractSpecification,
|
|
25
|
+
model: Model,
|
|
26
|
+
server: str,
|
|
27
|
+
sql_server_type: str,
|
|
28
|
+
export_args: Dict[str, Any],
|
|
29
|
+
) -> str:
|
|
30
|
+
"""Exports a data contract to DQX format."""
|
|
31
|
+
model_name, model_value = _check_models_for_export(data_contract, model, self.export_format)
|
|
32
|
+
return to_dqx_yaml(model_value)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def to_dqx_yaml(model_value: Model) -> str:
|
|
36
|
+
"""
|
|
37
|
+
Converts the data contract's quality checks to DQX YAML format.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
model_value (Model): The data contract to convert.
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
str: YAML representation of the data contract's quality checks.
|
|
44
|
+
"""
|
|
45
|
+
extracted_rules = extract_quality_rules(model_value)
|
|
46
|
+
return yaml.dump(extracted_rules, sort_keys=False, allow_unicode=True, default_flow_style=False)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def process_quality_rule(rule: Quality, column_name: str) -> Dict[str, Any]:
|
|
50
|
+
"""
|
|
51
|
+
Processes a single quality rule by injecting the column path into its arguments if absent.
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
rule (Quality): The quality rule to process.
|
|
55
|
+
column_name (str): The full path to the current column.
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
dict: The processed quality rule specification.
|
|
59
|
+
"""
|
|
60
|
+
rule_data = rule.model_extra
|
|
61
|
+
specification = rule_data[DqxKeys.SPECIFICATION]
|
|
62
|
+
check = specification[DqxKeys.CHECK]
|
|
63
|
+
|
|
64
|
+
if column_name:
|
|
65
|
+
arguments = check.setdefault(DqxKeys.ARGUMENTS, {})
|
|
66
|
+
|
|
67
|
+
if (
|
|
68
|
+
DqxKeys.COL_NAME not in arguments
|
|
69
|
+
and DqxKeys.COL_NAMES not in arguments
|
|
70
|
+
and DqxKeys.COLUMNS not in arguments
|
|
71
|
+
):
|
|
72
|
+
if check[DqxKeys.FUNCTION] not in ("is_unique", "foreign_key"):
|
|
73
|
+
arguments[DqxKeys.COL_NAME] = column_name
|
|
74
|
+
else:
|
|
75
|
+
arguments[DqxKeys.COLUMNS] = [column_name]
|
|
76
|
+
|
|
77
|
+
return specification
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def extract_quality_rules(data: Union[Model, Field, Quality], column_path: str = "") -> List[Dict[str, Any]]:
|
|
81
|
+
"""
|
|
82
|
+
Recursively extracts all quality rules from a data contract structure.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
data (Union[Model, Field, Quality]): The data contract model, field, or quality rule.
|
|
86
|
+
column_path (str, optional): The current path in the schema hierarchy. Defaults to "".
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
List[Dict[str, Any]]: A list of quality rule specifications.
|
|
90
|
+
"""
|
|
91
|
+
quality_rules = []
|
|
92
|
+
|
|
93
|
+
if isinstance(data, Quality):
|
|
94
|
+
return [process_quality_rule(data, column_path)]
|
|
95
|
+
|
|
96
|
+
if isinstance(data, (Model, Field)):
|
|
97
|
+
for key, field in data.fields.items():
|
|
98
|
+
current_path = build_column_path(column_path, key)
|
|
99
|
+
|
|
100
|
+
if field.fields:
|
|
101
|
+
# Field is a struct-like object, recurse deeper
|
|
102
|
+
quality_rules.extend(extract_quality_rules(field, current_path))
|
|
103
|
+
else:
|
|
104
|
+
# Process quality rules at leaf fields
|
|
105
|
+
for rule in field.quality:
|
|
106
|
+
quality_rules.append(process_quality_rule(rule, current_path))
|
|
107
|
+
|
|
108
|
+
# Process any quality rules attached directly to this level
|
|
109
|
+
for rule in data.quality:
|
|
110
|
+
quality_rules.append(process_quality_rule(rule, column_path))
|
|
111
|
+
|
|
112
|
+
return quality_rules
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def build_column_path(current_path: str, key: str) -> str:
|
|
116
|
+
"""
|
|
117
|
+
Builds the full column path by concatenating parent path with current key.
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
current_path (str): The current path prefix.
|
|
121
|
+
key (str): The current field's key.
|
|
122
|
+
|
|
123
|
+
Returns:
|
|
124
|
+
str: The full path.
|
|
125
|
+
"""
|
|
126
|
+
return f"{current_path}.{key}" if current_path else key
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
from typing import Dict
|
|
2
|
+
|
|
3
|
+
from datacontract.model.data_contract_specification import Field
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
# https://duckdb.org/docs/data/csv/overview.html
|
|
7
|
+
# ['SQLNULL', 'BOOLEAN', 'BIGINT', 'DOUBLE', 'TIME', 'DATE', 'TIMESTAMP', 'VARCHAR']
|
|
8
|
+
def convert_to_duckdb_csv_type(field) -> None | str:
|
|
9
|
+
datacontract_type = field.type
|
|
10
|
+
if datacontract_type is None:
|
|
11
|
+
return "VARCHAR"
|
|
12
|
+
if datacontract_type.lower() in ["string", "varchar", "text"]:
|
|
13
|
+
return "VARCHAR"
|
|
14
|
+
if datacontract_type.lower() in ["timestamp", "timestamp_tz"]:
|
|
15
|
+
return "TIMESTAMP"
|
|
16
|
+
if datacontract_type.lower() in ["timestamp_ntz"]:
|
|
17
|
+
return "TIMESTAMP"
|
|
18
|
+
if datacontract_type.lower() in ["date"]:
|
|
19
|
+
return "DATE"
|
|
20
|
+
if datacontract_type.lower() in ["time"]:
|
|
21
|
+
return "TIME"
|
|
22
|
+
if datacontract_type.lower() in ["number", "decimal", "numeric"]:
|
|
23
|
+
# precision and scale not supported by data contract
|
|
24
|
+
return "VARCHAR"
|
|
25
|
+
if datacontract_type.lower() in ["float", "double"]:
|
|
26
|
+
return "DOUBLE"
|
|
27
|
+
if datacontract_type.lower() in ["integer", "int", "long", "bigint"]:
|
|
28
|
+
return "BIGINT"
|
|
29
|
+
if datacontract_type.lower() in ["boolean"]:
|
|
30
|
+
return "BOOLEAN"
|
|
31
|
+
if datacontract_type.lower() in ["object", "record", "struct"]:
|
|
32
|
+
# not supported in CSV
|
|
33
|
+
return "VARCHAR"
|
|
34
|
+
if datacontract_type.lower() in ["bytes"]:
|
|
35
|
+
# not supported in CSV
|
|
36
|
+
return "VARCHAR"
|
|
37
|
+
if datacontract_type.lower() in ["array"]:
|
|
38
|
+
return "VARCHAR"
|
|
39
|
+
if datacontract_type.lower() in ["null"]:
|
|
40
|
+
return "SQLNULL"
|
|
41
|
+
return "VARCHAR"
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def convert_to_duckdb_json_type(field: Field) -> None | str:
|
|
45
|
+
datacontract_type = field.type
|
|
46
|
+
if datacontract_type is None:
|
|
47
|
+
return "VARCHAR"
|
|
48
|
+
if datacontract_type.lower() in ["array"]:
|
|
49
|
+
return convert_to_duckdb_json_type(field.items) + "[]" # type: ignore
|
|
50
|
+
if datacontract_type.lower() in ["object", "record", "struct"]:
|
|
51
|
+
return convert_to_duckdb_object(field.fields)
|
|
52
|
+
return convert_to_duckdb_csv_type(field)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def convert_to_duckdb_object(fields: Dict[str, Field]):
|
|
56
|
+
columns = [f'"{x[0]}" {convert_to_duckdb_json_type(x[1])}' for x in fields.items()]
|
|
57
|
+
return f"STRUCT({', '.join(columns)})"
|