datacontract-cli 0.10.0__py3-none-any.whl → 0.10.37__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datacontract/__init__.py +13 -0
- datacontract/api.py +260 -0
- datacontract/breaking/breaking.py +242 -12
- datacontract/breaking/breaking_rules.py +37 -1
- datacontract/catalog/catalog.py +80 -0
- datacontract/cli.py +387 -117
- datacontract/data_contract.py +216 -353
- datacontract/engines/data_contract_checks.py +1041 -0
- datacontract/engines/data_contract_test.py +113 -0
- datacontract/engines/datacontract/check_that_datacontract_contains_valid_servers_configuration.py +2 -3
- datacontract/engines/datacontract/check_that_datacontract_file_exists.py +1 -1
- datacontract/engines/fastjsonschema/check_jsonschema.py +176 -42
- datacontract/engines/fastjsonschema/s3/s3_read_files.py +16 -1
- datacontract/engines/soda/check_soda_execute.py +100 -56
- datacontract/engines/soda/connections/athena.py +79 -0
- datacontract/engines/soda/connections/bigquery.py +8 -1
- datacontract/engines/soda/connections/databricks.py +12 -3
- datacontract/engines/soda/connections/duckdb_connection.py +241 -0
- datacontract/engines/soda/connections/kafka.py +206 -113
- datacontract/engines/soda/connections/snowflake.py +8 -5
- datacontract/engines/soda/connections/sqlserver.py +43 -0
- datacontract/engines/soda/connections/trino.py +26 -0
- datacontract/export/avro_converter.py +72 -8
- datacontract/export/avro_idl_converter.py +31 -25
- datacontract/export/bigquery_converter.py +130 -0
- datacontract/export/custom_converter.py +40 -0
- datacontract/export/data_caterer_converter.py +161 -0
- datacontract/export/dbml_converter.py +148 -0
- datacontract/export/dbt_converter.py +141 -54
- datacontract/export/dcs_exporter.py +6 -0
- datacontract/export/dqx_converter.py +126 -0
- datacontract/export/duckdb_type_converter.py +57 -0
- datacontract/export/excel_exporter.py +923 -0
- datacontract/export/exporter.py +100 -0
- datacontract/export/exporter_factory.py +216 -0
- datacontract/export/go_converter.py +105 -0
- datacontract/export/great_expectations_converter.py +257 -36
- datacontract/export/html_exporter.py +86 -0
- datacontract/export/iceberg_converter.py +188 -0
- datacontract/export/jsonschema_converter.py +71 -16
- datacontract/export/markdown_converter.py +337 -0
- datacontract/export/mermaid_exporter.py +110 -0
- datacontract/export/odcs_v3_exporter.py +375 -0
- datacontract/export/pandas_type_converter.py +40 -0
- datacontract/export/protobuf_converter.py +168 -68
- datacontract/export/pydantic_converter.py +6 -0
- datacontract/export/rdf_converter.py +13 -6
- datacontract/export/sodacl_converter.py +36 -188
- datacontract/export/spark_converter.py +245 -0
- datacontract/export/sql_converter.py +37 -3
- datacontract/export/sql_type_converter.py +269 -8
- datacontract/export/sqlalchemy_converter.py +170 -0
- datacontract/export/terraform_converter.py +7 -2
- datacontract/imports/avro_importer.py +246 -26
- datacontract/imports/bigquery_importer.py +221 -0
- datacontract/imports/csv_importer.py +143 -0
- datacontract/imports/dbml_importer.py +112 -0
- datacontract/imports/dbt_importer.py +240 -0
- datacontract/imports/excel_importer.py +1111 -0
- datacontract/imports/glue_importer.py +288 -0
- datacontract/imports/iceberg_importer.py +172 -0
- datacontract/imports/importer.py +51 -0
- datacontract/imports/importer_factory.py +128 -0
- datacontract/imports/json_importer.py +325 -0
- datacontract/imports/jsonschema_importer.py +146 -0
- datacontract/imports/odcs_importer.py +60 -0
- datacontract/imports/odcs_v3_importer.py +516 -0
- datacontract/imports/parquet_importer.py +81 -0
- datacontract/imports/protobuf_importer.py +264 -0
- datacontract/imports/spark_importer.py +262 -0
- datacontract/imports/sql_importer.py +274 -35
- datacontract/imports/unity_importer.py +219 -0
- datacontract/init/init_template.py +20 -0
- datacontract/integration/datamesh_manager.py +86 -0
- datacontract/lint/resolve.py +271 -49
- datacontract/lint/resources.py +21 -0
- datacontract/lint/schema.py +53 -17
- datacontract/lint/urls.py +32 -12
- datacontract/model/data_contract_specification/__init__.py +1 -0
- datacontract/model/exceptions.py +4 -1
- datacontract/model/odcs.py +24 -0
- datacontract/model/run.py +49 -29
- datacontract/output/__init__.py +0 -0
- datacontract/output/junit_test_results.py +135 -0
- datacontract/output/output_format.py +10 -0
- datacontract/output/test_results_writer.py +79 -0
- datacontract/py.typed +0 -0
- datacontract/schemas/datacontract-1.1.0.init.yaml +91 -0
- datacontract/schemas/datacontract-1.1.0.schema.json +1975 -0
- datacontract/schemas/datacontract-1.2.0.init.yaml +91 -0
- datacontract/schemas/datacontract-1.2.0.schema.json +2029 -0
- datacontract/schemas/datacontract-1.2.1.init.yaml +91 -0
- datacontract/schemas/datacontract-1.2.1.schema.json +2058 -0
- datacontract/schemas/odcs-3.0.1.schema.json +2634 -0
- datacontract/schemas/odcs-3.0.2.schema.json +2382 -0
- datacontract/templates/datacontract.html +139 -294
- datacontract/templates/datacontract_odcs.html +685 -0
- datacontract/templates/index.html +236 -0
- datacontract/templates/partials/datacontract_information.html +86 -0
- datacontract/templates/partials/datacontract_servicelevels.html +253 -0
- datacontract/templates/partials/datacontract_terms.html +51 -0
- datacontract/templates/partials/definition.html +25 -0
- datacontract/templates/partials/example.html +27 -0
- datacontract/templates/partials/model_field.html +144 -0
- datacontract/templates/partials/quality.html +49 -0
- datacontract/templates/partials/server.html +211 -0
- datacontract/templates/style/output.css +491 -72
- datacontract_cli-0.10.37.dist-info/METADATA +2235 -0
- datacontract_cli-0.10.37.dist-info/RECORD +119 -0
- {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.37.dist-info}/WHEEL +1 -1
- {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.37.dist-info/licenses}/LICENSE +1 -1
- datacontract/engines/datacontract/check_that_datacontract_str_is_valid.py +0 -48
- datacontract/engines/soda/connections/dask.py +0 -28
- datacontract/engines/soda/connections/duckdb.py +0 -76
- datacontract/export/csv_type_converter.py +0 -36
- datacontract/export/html_export.py +0 -66
- datacontract/export/odcs_converter.py +0 -102
- datacontract/init/download_datacontract_file.py +0 -17
- datacontract/integration/publish_datamesh_manager.py +0 -33
- datacontract/integration/publish_opentelemetry.py +0 -107
- datacontract/lint/lint.py +0 -141
- datacontract/lint/linters/description_linter.py +0 -34
- datacontract/lint/linters/example_model_linter.py +0 -91
- datacontract/lint/linters/field_pattern_linter.py +0 -34
- datacontract/lint/linters/field_reference_linter.py +0 -38
- datacontract/lint/linters/notice_period_linter.py +0 -55
- datacontract/lint/linters/quality_schema_linter.py +0 -52
- datacontract/lint/linters/valid_constraints_linter.py +0 -99
- datacontract/model/data_contract_specification.py +0 -141
- datacontract/web.py +0 -14
- datacontract_cli-0.10.0.dist-info/METADATA +0 -951
- datacontract_cli-0.10.0.dist-info/RECORD +0 -66
- /datacontract/{model → breaking}/breaking_change.py +0 -0
- /datacontract/{lint/linters → export}/__init__.py +0 -0
- {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.37.dist-info}/entry_points.txt +0 -0
- {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.37.dist-info}/top_level.txt +0 -0
datacontract/lint/resolve.py
CHANGED
|
@@ -1,16 +1,26 @@
|
|
|
1
|
+
import importlib.resources as resources
|
|
1
2
|
import logging
|
|
2
3
|
import os
|
|
4
|
+
import warnings
|
|
5
|
+
from pathlib import Path
|
|
3
6
|
|
|
4
7
|
import fastjsonschema
|
|
5
8
|
import yaml
|
|
6
9
|
from fastjsonschema import JsonSchemaValueException
|
|
10
|
+
from open_data_contract_standard.model import OpenDataContractStandard
|
|
7
11
|
|
|
8
|
-
from datacontract.
|
|
12
|
+
from datacontract.imports.odcs_v3_importer import import_from_odcs, parse_odcs_v3_from_str
|
|
13
|
+
from datacontract.lint.resources import read_resource
|
|
9
14
|
from datacontract.lint.schema import fetch_schema
|
|
10
15
|
from datacontract.lint.urls import fetch_resource
|
|
11
|
-
from datacontract.model.data_contract_specification import
|
|
12
|
-
DataContractSpecification,
|
|
16
|
+
from datacontract.model.data_contract_specification import (
|
|
17
|
+
DataContractSpecification,
|
|
18
|
+
Definition,
|
|
19
|
+
DeprecatedQuality,
|
|
20
|
+
)
|
|
13
21
|
from datacontract.model.exceptions import DataContractException
|
|
22
|
+
from datacontract.model.odcs import is_open_data_contract_standard, is_open_data_product_standard
|
|
23
|
+
from datacontract.model.run import ResultEnum
|
|
14
24
|
|
|
15
25
|
|
|
16
26
|
def resolve_data_contract(
|
|
@@ -19,68 +29,222 @@ def resolve_data_contract(
|
|
|
19
29
|
data_contract: DataContractSpecification = None,
|
|
20
30
|
schema_location: str = None,
|
|
21
31
|
inline_definitions: bool = False,
|
|
32
|
+
inline_quality: bool = False,
|
|
22
33
|
) -> DataContractSpecification:
|
|
23
34
|
if data_contract_location is not None:
|
|
24
|
-
return resolve_data_contract_from_location(
|
|
35
|
+
return resolve_data_contract_from_location(
|
|
36
|
+
data_contract_location, schema_location, inline_definitions, inline_quality
|
|
37
|
+
)
|
|
25
38
|
elif data_contract_str is not None:
|
|
26
|
-
return
|
|
39
|
+
return _resolve_data_contract_from_str(data_contract_str, schema_location, inline_definitions, inline_quality)
|
|
27
40
|
elif data_contract is not None:
|
|
28
41
|
return data_contract
|
|
29
42
|
else:
|
|
30
43
|
raise DataContractException(
|
|
31
44
|
type="lint",
|
|
32
|
-
result=
|
|
45
|
+
result=ResultEnum.failed,
|
|
46
|
+
name="Check that data contract YAML is valid",
|
|
47
|
+
reason="Data contract needs to be provided",
|
|
48
|
+
engine="datacontract",
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def resolve_data_contract_v2(
|
|
53
|
+
data_contract_location: str = None,
|
|
54
|
+
data_contract_str: str = None,
|
|
55
|
+
data_contract: DataContractSpecification | OpenDataContractStandard = None,
|
|
56
|
+
schema_location: str = None,
|
|
57
|
+
inline_definitions: bool = False,
|
|
58
|
+
inline_quality: bool = False,
|
|
59
|
+
) -> DataContractSpecification | OpenDataContractStandard:
|
|
60
|
+
if data_contract_location is not None:
|
|
61
|
+
return resolve_data_contract_from_location_v2(
|
|
62
|
+
data_contract_location, schema_location, inline_definitions, inline_quality
|
|
63
|
+
)
|
|
64
|
+
elif data_contract_str is not None:
|
|
65
|
+
return _resolve_data_contract_from_str_v2(
|
|
66
|
+
data_contract_str, schema_location, inline_definitions, inline_quality
|
|
67
|
+
)
|
|
68
|
+
elif data_contract is not None:
|
|
69
|
+
return data_contract
|
|
70
|
+
else:
|
|
71
|
+
raise DataContractException(
|
|
72
|
+
type="lint",
|
|
73
|
+
result=ResultEnum.failed,
|
|
33
74
|
name="Check that data contract YAML is valid",
|
|
34
75
|
reason="Data contract needs to be provided",
|
|
35
76
|
engine="datacontract",
|
|
36
77
|
)
|
|
37
78
|
|
|
38
79
|
|
|
80
|
+
def resolve_data_contract_dict(
|
|
81
|
+
data_contract_location: str = None,
|
|
82
|
+
data_contract_str: str = None,
|
|
83
|
+
data_contract: DataContractSpecification = None,
|
|
84
|
+
) -> dict:
|
|
85
|
+
if data_contract_location is not None:
|
|
86
|
+
return _to_yaml(read_resource(data_contract_location))
|
|
87
|
+
elif data_contract_str is not None:
|
|
88
|
+
return _to_yaml(data_contract_str)
|
|
89
|
+
elif data_contract is not None:
|
|
90
|
+
return data_contract.model_dump()
|
|
91
|
+
else:
|
|
92
|
+
raise DataContractException(
|
|
93
|
+
type="lint",
|
|
94
|
+
result=ResultEnum.failed,
|
|
95
|
+
name="Check that data contract YAML is valid",
|
|
96
|
+
reason="Data contract needs to be provided",
|
|
97
|
+
engine="datacontract",
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def resolve_data_contract_from_location_v2(
|
|
102
|
+
location, schema_location: str = None, inline_definitions: bool = False, inline_quality: bool = False
|
|
103
|
+
) -> DataContractSpecification | OpenDataContractStandard:
|
|
104
|
+
data_contract_str = read_resource(location)
|
|
105
|
+
return _resolve_data_contract_from_str_v2(data_contract_str, schema_location, inline_definitions, inline_quality)
|
|
106
|
+
|
|
107
|
+
|
|
39
108
|
def resolve_data_contract_from_location(
|
|
40
|
-
location, schema_location: str = None, inline_definitions: bool = False,
|
|
109
|
+
location, schema_location: str = None, inline_definitions: bool = False, inline_quality: bool = False
|
|
41
110
|
) -> DataContractSpecification:
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
else:
|
|
45
|
-
data_contract_str = read_file(location)
|
|
46
|
-
return resolve_data_contract_from_str(data_contract_str, schema_location, inline_definitions, include_quality)
|
|
111
|
+
data_contract_str = read_resource(location)
|
|
112
|
+
return _resolve_data_contract_from_str(data_contract_str, schema_location, inline_definitions, inline_quality)
|
|
47
113
|
|
|
48
114
|
|
|
49
115
|
def inline_definitions_into_data_contract(spec: DataContractSpecification):
|
|
50
116
|
for model in spec.models.values():
|
|
51
117
|
for field in model.fields.values():
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
118
|
+
inline_definition_into_field(field, spec)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def inline_definition_into_field(field, spec):
|
|
122
|
+
# iterate recursively over arrays
|
|
123
|
+
if field.items is not None:
|
|
124
|
+
inline_definition_into_field(field.items, spec)
|
|
55
125
|
|
|
56
|
-
|
|
57
|
-
|
|
126
|
+
# iterate recursively over nested fields
|
|
127
|
+
if field.fields is not None:
|
|
128
|
+
for nested_field_name, nested_field in field.fields.items():
|
|
129
|
+
inline_definition_into_field(nested_field, spec)
|
|
58
130
|
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
setattr(field, field_name, getattr(definition, field_name))
|
|
131
|
+
if not field.ref:
|
|
132
|
+
return
|
|
62
133
|
|
|
134
|
+
definition = _resolve_definition_ref(field.ref, spec)
|
|
135
|
+
for field_name in field.model_fields.keys():
|
|
136
|
+
if field_name in definition.model_fields_set and field_name not in field.model_fields_set:
|
|
137
|
+
setattr(field, field_name, getattr(definition, field_name))
|
|
138
|
+
# extras
|
|
139
|
+
for extra_field_name, extra_field_value in definition.model_extra.items():
|
|
140
|
+
if extra_field_name not in field.model_extra.keys():
|
|
141
|
+
setattr(field, extra_field_name, extra_field_value)
|
|
63
142
|
|
|
64
|
-
def resolve_definition_ref(ref, definitions) -> Definition:
|
|
65
|
-
if ref.startswith("http://") or ref.startswith("https://"):
|
|
66
|
-
definition_str = fetch_resource(ref)
|
|
67
|
-
definition_dict = to_yaml(definition_str)
|
|
68
|
-
return Definition(**definition_dict)
|
|
69
143
|
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
144
|
+
def _resolve_definition_ref(ref, spec) -> Definition:
|
|
145
|
+
logging.info(f"Resolving definition ref {ref}")
|
|
146
|
+
|
|
147
|
+
if "#" in ref:
|
|
148
|
+
path, definition_path = ref.split("#")
|
|
149
|
+
else:
|
|
150
|
+
path, definition_path = ref, None
|
|
151
|
+
|
|
152
|
+
if path.startswith("http://") or path.startswith("https://"):
|
|
153
|
+
logging.info(f"Resolving definition url {path}")
|
|
154
|
+
|
|
155
|
+
definition_str = fetch_resource(path)
|
|
156
|
+
definition_dict = _to_yaml(definition_str)
|
|
157
|
+
definition = Definition(**definition_dict)
|
|
158
|
+
if definition_path is not None:
|
|
159
|
+
return _find_by_path_in_definition(definition_path, definition)
|
|
160
|
+
else:
|
|
161
|
+
return definition
|
|
162
|
+
elif path.startswith("file://"):
|
|
163
|
+
logging.info(f"Resolving definition file path {path}")
|
|
164
|
+
|
|
165
|
+
path = path.replace("file://", "")
|
|
166
|
+
definition_str = _fetch_file(path)
|
|
167
|
+
definition_dict = _to_yaml(definition_str)
|
|
168
|
+
if definition_path:
|
|
169
|
+
path_parts = [part for part in definition_path.split("/") if part != ""]
|
|
170
|
+
for path_part in path_parts:
|
|
171
|
+
definition_dict = definition_dict.get(path_part, None)
|
|
172
|
+
if not definition_dict:
|
|
173
|
+
raise DataContractException(
|
|
174
|
+
type="lint",
|
|
175
|
+
result="failed",
|
|
176
|
+
name="Check that data contract YAML is valid",
|
|
177
|
+
reason=f"Cannot resolve definition {definition_path}, {path_part} not found",
|
|
178
|
+
engine="datacontract",
|
|
179
|
+
)
|
|
180
|
+
# this assumes that definitions_dict is a definitions dict, however,
|
|
181
|
+
# all we know is that it is a file!
|
|
182
|
+
definition = Definition(**definition_dict)
|
|
183
|
+
# if definition_path is not None:
|
|
184
|
+
# definition = _find_by_path_in_definition(definition_path, definition)
|
|
185
|
+
return definition
|
|
186
|
+
elif ref.startswith("#"):
|
|
187
|
+
logging.info(f"Resolving definition local path {path}")
|
|
188
|
+
|
|
189
|
+
definition_path = ref[1:]
|
|
190
|
+
|
|
191
|
+
return _find_by_path_in_spec(definition_path, spec)
|
|
73
192
|
else:
|
|
74
193
|
raise DataContractException(
|
|
75
194
|
type="lint",
|
|
76
|
-
result=
|
|
195
|
+
result=ResultEnum.failed,
|
|
77
196
|
name="Check that data contract YAML is valid",
|
|
78
197
|
reason=f"Cannot resolve reference {ref}",
|
|
79
198
|
engine="datacontract",
|
|
80
199
|
)
|
|
81
200
|
|
|
82
201
|
|
|
83
|
-
def
|
|
202
|
+
def _find_by_path_in_spec(definition_path: str, spec: DataContractSpecification):
|
|
203
|
+
path_elements = definition_path.split("/")
|
|
204
|
+
definition_key = path_elements[2]
|
|
205
|
+
if definition_key not in spec.definitions:
|
|
206
|
+
raise DataContractException(
|
|
207
|
+
type="lint",
|
|
208
|
+
result=ResultEnum.failed,
|
|
209
|
+
name="Check that data contract YAML is valid",
|
|
210
|
+
reason=f"Cannot resolve definition {definition_key}",
|
|
211
|
+
engine="datacontract",
|
|
212
|
+
)
|
|
213
|
+
definition = spec.definitions[definition_key]
|
|
214
|
+
definition = _find_subfield_in_definition(definition, path_elements[3:])
|
|
215
|
+
return definition
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
def _find_by_path_in_definition(definition_path: str, definition: Definition):
|
|
219
|
+
if definition_path == "" or definition_path == "/":
|
|
220
|
+
return definition
|
|
221
|
+
|
|
222
|
+
path_elements = definition_path.split("/")
|
|
223
|
+
return _find_subfield_in_definition(definition, path_elements[1:])
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def _find_subfield_in_definition(definition: Definition, path_elements):
|
|
227
|
+
while len(path_elements) > 0 and path_elements[0] == "fields":
|
|
228
|
+
definition = definition.fields[path_elements[1]]
|
|
229
|
+
path_elements = path_elements[2:]
|
|
230
|
+
|
|
231
|
+
return definition
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def _fetch_file(path) -> str:
|
|
235
|
+
if not os.path.exists(path):
|
|
236
|
+
raise DataContractException(
|
|
237
|
+
type="export",
|
|
238
|
+
result=ResultEnum.failed,
|
|
239
|
+
name="Check that data contract definition is valid",
|
|
240
|
+
reason=f"Cannot resolve reference {path}",
|
|
241
|
+
engine="datacontract",
|
|
242
|
+
)
|
|
243
|
+
with open(path, "r") as file:
|
|
244
|
+
return file.read()
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def _resolve_quality_ref(quality: DeprecatedQuality):
|
|
84
248
|
"""
|
|
85
249
|
Return the content of a ref file path
|
|
86
250
|
@param quality data contract quality specification
|
|
@@ -89,13 +253,13 @@ def resolve_quality_ref(quality: Quality):
|
|
|
89
253
|
specification = quality.specification
|
|
90
254
|
if quality.type == "great-expectations":
|
|
91
255
|
for model, model_quality in specification.items():
|
|
92
|
-
specification[model] =
|
|
256
|
+
specification[model] = _get_quality_ref_file(model_quality)
|
|
93
257
|
else:
|
|
94
258
|
if "$ref" in specification:
|
|
95
|
-
quality.specification =
|
|
259
|
+
quality.specification = _get_quality_ref_file(specification)
|
|
96
260
|
|
|
97
261
|
|
|
98
|
-
def
|
|
262
|
+
def _get_quality_ref_file(quality_spec: str | object) -> str | object:
|
|
99
263
|
"""
|
|
100
264
|
Get the file associated with a quality reference
|
|
101
265
|
@param quality_spec quality specification
|
|
@@ -106,7 +270,7 @@ def get_quality_ref_file(quality_spec: str | object) -> str | object:
|
|
|
106
270
|
if not os.path.exists(ref):
|
|
107
271
|
raise DataContractException(
|
|
108
272
|
type="export",
|
|
109
|
-
result=
|
|
273
|
+
result=ResultEnum.failed,
|
|
110
274
|
name="Check that data contract quality is valid",
|
|
111
275
|
reason=f"Cannot resolve reference {ref}",
|
|
112
276
|
engine="datacontract",
|
|
@@ -116,26 +280,83 @@ def get_quality_ref_file(quality_spec: str | object) -> str | object:
|
|
|
116
280
|
return quality_spec
|
|
117
281
|
|
|
118
282
|
|
|
119
|
-
def
|
|
120
|
-
data_contract_str, schema_location: str = None, inline_definitions: bool = False,
|
|
283
|
+
def _resolve_data_contract_from_str_v2(
|
|
284
|
+
data_contract_str, schema_location: str = None, inline_definitions: bool = False, inline_quality: bool = False
|
|
285
|
+
) -> DataContractSpecification | OpenDataContractStandard:
|
|
286
|
+
yaml_dict = _to_yaml(data_contract_str)
|
|
287
|
+
|
|
288
|
+
if is_open_data_product_standard(yaml_dict):
|
|
289
|
+
logging.info("Cannot import ODPS, as not supported")
|
|
290
|
+
raise DataContractException(
|
|
291
|
+
type="schema",
|
|
292
|
+
result=ResultEnum.failed,
|
|
293
|
+
name="Parse ODCS contract",
|
|
294
|
+
reason="Cannot parse ODPS product",
|
|
295
|
+
engine="datacontract",
|
|
296
|
+
)
|
|
297
|
+
|
|
298
|
+
if is_open_data_contract_standard(yaml_dict):
|
|
299
|
+
logging.info("Importing ODCS v3")
|
|
300
|
+
# if ODCS, then validate the ODCS schema and import to DataContractSpecification directly
|
|
301
|
+
odcs = parse_odcs_v3_from_str(data_contract_str)
|
|
302
|
+
return odcs
|
|
303
|
+
|
|
304
|
+
logging.info("Importing DCS")
|
|
305
|
+
return _resolve_dcs_from_yaml_dict(inline_definitions, inline_quality, schema_location, yaml_dict)
|
|
306
|
+
|
|
307
|
+
|
|
308
|
+
def _resolve_data_contract_from_str(
|
|
309
|
+
data_contract_str, schema_location: str = None, inline_definitions: bool = False, inline_quality: bool = False
|
|
121
310
|
) -> DataContractSpecification:
|
|
122
|
-
|
|
123
|
-
|
|
311
|
+
yaml_dict = _to_yaml(data_contract_str)
|
|
312
|
+
|
|
313
|
+
if schema_location is None:
|
|
314
|
+
if is_open_data_contract_standard(yaml_dict):
|
|
315
|
+
logging.info("Using ODCS 3.0.2 schema to validate data contract")
|
|
316
|
+
# TODO refactor this to a specific function
|
|
317
|
+
schema_location = resources.files("datacontract").joinpath("schemas", "odcs-3.0.2.schema.json")
|
|
318
|
+
|
|
319
|
+
_validate_json_schema(yaml_dict, schema_location)
|
|
320
|
+
|
|
321
|
+
if is_open_data_contract_standard(yaml_dict):
|
|
322
|
+
logging.info("Importing ODCS v3")
|
|
323
|
+
# if ODCS, then validate the ODCS schema and import to DataContractSpecification directly
|
|
324
|
+
odcs = parse_odcs_v3_from_str(data_contract_str)
|
|
325
|
+
|
|
326
|
+
data_contract_specification = DataContractSpecification(dataContractSpecification="1.2.1")
|
|
327
|
+
return import_from_odcs(data_contract_specification, odcs)
|
|
124
328
|
|
|
125
|
-
|
|
329
|
+
logging.info("Importing DCS")
|
|
330
|
+
return _resolve_dcs_from_yaml_dict(inline_definitions, inline_quality, schema_location, yaml_dict)
|
|
126
331
|
|
|
332
|
+
|
|
333
|
+
def _resolve_dcs_from_yaml_dict(inline_definitions, inline_quality, schema_location, yaml_dict):
|
|
334
|
+
_validate_json_schema(yaml_dict, schema_location)
|
|
335
|
+
data_contract_specification = yaml_dict
|
|
336
|
+
spec = DataContractSpecification(**data_contract_specification)
|
|
127
337
|
if inline_definitions:
|
|
128
338
|
inline_definitions_into_data_contract(spec)
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
339
|
+
## Suppress DeprecationWarning when accessing spec.quality,
|
|
340
|
+
## iif it is in fact *not* used.
|
|
341
|
+
with warnings.catch_warnings(record=True) as recorded_warnings:
|
|
342
|
+
spec_quality = spec.quality
|
|
343
|
+
for w in recorded_warnings:
|
|
344
|
+
if not issubclass(w.category, DeprecationWarning) or spec_quality is not None:
|
|
345
|
+
warnings.warn_explicit(
|
|
346
|
+
message=w.message,
|
|
347
|
+
category=w.category,
|
|
348
|
+
filename=w.filename,
|
|
349
|
+
lineno=w.lineno,
|
|
350
|
+
source=w.source,
|
|
351
|
+
)
|
|
352
|
+
if spec_quality and inline_quality:
|
|
353
|
+
_resolve_quality_ref(spec_quality)
|
|
132
354
|
return spec
|
|
133
355
|
|
|
134
356
|
|
|
135
|
-
def
|
|
357
|
+
def _to_yaml(data_contract_str) -> dict:
|
|
136
358
|
try:
|
|
137
|
-
|
|
138
|
-
return yaml_dict
|
|
359
|
+
return yaml.safe_load(data_contract_str)
|
|
139
360
|
except Exception as e:
|
|
140
361
|
logging.warning(f"Cannot parse YAML. Error: {str(e)}")
|
|
141
362
|
raise DataContractException(
|
|
@@ -147,16 +368,17 @@ def to_yaml(data_contract_str):
|
|
|
147
368
|
)
|
|
148
369
|
|
|
149
370
|
|
|
150
|
-
def
|
|
371
|
+
def _validate_json_schema(yaml_str, schema_location: str | Path = None):
|
|
372
|
+
logging.debug(f"Linting data contract with schema at {schema_location}")
|
|
151
373
|
schema = fetch_schema(schema_location)
|
|
152
374
|
try:
|
|
153
|
-
fastjsonschema.validate(schema,
|
|
375
|
+
fastjsonschema.validate(schema, yaml_str, use_default=False)
|
|
154
376
|
logging.debug("YAML data is valid.")
|
|
155
377
|
except JsonSchemaValueException as e:
|
|
156
378
|
logging.warning(f"Data Contract YAML is invalid. Validation error: {e.message}")
|
|
157
379
|
raise DataContractException(
|
|
158
380
|
type="lint",
|
|
159
|
-
result=
|
|
381
|
+
result=ResultEnum.failed,
|
|
160
382
|
name="Check that data contract YAML is valid",
|
|
161
383
|
reason=e.message,
|
|
162
384
|
engine="datacontract",
|
|
@@ -165,7 +387,7 @@ def validate(data_contract_yaml, schema_location: str = None):
|
|
|
165
387
|
logging.warning(f"Data Contract YAML is invalid. Validation error: {str(e)}")
|
|
166
388
|
raise DataContractException(
|
|
167
389
|
type="lint",
|
|
168
|
-
result=
|
|
390
|
+
result=ResultEnum.failed,
|
|
169
391
|
name="Check that data contract YAML is valid",
|
|
170
392
|
reason=str(e),
|
|
171
393
|
engine="datacontract",
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
from datacontract.lint.files import read_file
|
|
2
|
+
from datacontract.lint.urls import fetch_resource
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def read_resource(location: str) -> str:
|
|
6
|
+
"""
|
|
7
|
+
Read a resource from a given location.
|
|
8
|
+
|
|
9
|
+
If the location is a URL, fetch the resource from the web. API-Keys are supported.
|
|
10
|
+
Otherwise, read the resource from a local file.
|
|
11
|
+
|
|
12
|
+
Args:
|
|
13
|
+
location (str): The location of the resource, either a URL or a file path.
|
|
14
|
+
|
|
15
|
+
Returns:
|
|
16
|
+
str: The content of the resource.
|
|
17
|
+
"""
|
|
18
|
+
if location.startswith("http://") or location.startswith("https://"):
|
|
19
|
+
return fetch_resource(location)
|
|
20
|
+
else:
|
|
21
|
+
return read_file(location)
|
datacontract/lint/schema.py
CHANGED
|
@@ -1,27 +1,63 @@
|
|
|
1
|
+
import importlib.resources as resources
|
|
1
2
|
import json
|
|
3
|
+
import logging
|
|
2
4
|
import os
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any, Dict
|
|
3
7
|
|
|
4
8
|
import requests
|
|
5
9
|
|
|
6
10
|
from datacontract.model.exceptions import DataContractException
|
|
11
|
+
from datacontract.model.run import ResultEnum
|
|
7
12
|
|
|
13
|
+
DEFAULT_DATA_CONTRACT_SCHEMA = "datacontract-1.2.1.schema.json"
|
|
8
14
|
|
|
9
|
-
def fetch_schema(location: str = None):
|
|
10
|
-
if location is None:
|
|
11
|
-
location = "https://datacontract.com/datacontract.schema.json"
|
|
12
15
|
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
+
def fetch_schema(location: str | Path = None) -> Dict[str, Any]:
|
|
17
|
+
"""
|
|
18
|
+
Fetch and return a JSON schema from a given location.
|
|
19
|
+
|
|
20
|
+
This function retrieves a JSON schema either from a URL or a local file path.
|
|
21
|
+
If no location is provided, it defaults to the DataContract schema URL.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
location: The URL or file path of the schema.
|
|
25
|
+
|
|
26
|
+
Returns:
|
|
27
|
+
The JSON schema as a dictionary.
|
|
28
|
+
|
|
29
|
+
Raises:
|
|
30
|
+
DataContractException: If the specified local file does not exist.
|
|
31
|
+
requests.RequestException: If there's an error fetching the schema from a URL.
|
|
32
|
+
json.JSONDecodeError: If there's an error decoding the JSON schema.
|
|
33
|
+
|
|
34
|
+
"""
|
|
35
|
+
if location is None:
|
|
36
|
+
logging.info("Use default bundled schema " + DEFAULT_DATA_CONTRACT_SCHEMA)
|
|
37
|
+
schemas = resources.files("datacontract")
|
|
38
|
+
schema_file = schemas.joinpath("schemas", DEFAULT_DATA_CONTRACT_SCHEMA)
|
|
39
|
+
with schema_file.open("r") as file:
|
|
40
|
+
schema = json.load(file)
|
|
16
41
|
else:
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
42
|
+
# Convert Path objects to strings for string operations
|
|
43
|
+
location_str = str(location)
|
|
44
|
+
|
|
45
|
+
if location_str.startswith("http://") or location_str.startswith("https://"):
|
|
46
|
+
logging.debug(f"Downloading schema from {location_str}")
|
|
47
|
+
response = requests.get(location_str)
|
|
48
|
+
schema = response.json()
|
|
49
|
+
else:
|
|
50
|
+
if not os.path.exists(location):
|
|
51
|
+
raise DataContractException(
|
|
52
|
+
type="lint",
|
|
53
|
+
name=f"Reading schema from {location}",
|
|
54
|
+
reason=f"The file '{location}' does not exist.",
|
|
55
|
+
engine="datacontract",
|
|
56
|
+
result=ResultEnum.error,
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
logging.debug(f"Loading JSON schema locally at {location}")
|
|
60
|
+
with open(location, "r") as file:
|
|
61
|
+
schema = json.load(file)
|
|
62
|
+
|
|
63
|
+
return schema
|
datacontract/lint/urls.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import os
|
|
2
|
+
from urllib.parse import urlparse
|
|
2
3
|
|
|
3
4
|
import requests
|
|
4
5
|
|
|
@@ -25,16 +26,35 @@ def fetch_resource(url: str):
|
|
|
25
26
|
|
|
26
27
|
|
|
27
28
|
def _set_api_key(headers, url):
|
|
28
|
-
|
|
29
|
-
|
|
29
|
+
hostname = urlparse(url).hostname
|
|
30
|
+
|
|
30
31
|
datamesh_manager_api_key = os.getenv("DATAMESH_MANAGER_API_KEY")
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
32
|
+
datacontract_manager_api_key = os.getenv("DATACONTRACT_MANAGER_API_KEY")
|
|
33
|
+
|
|
34
|
+
if hostname == "datamesh-manager.com" or hostname.endswith(".datamesh-manager.com"):
|
|
35
|
+
if datamesh_manager_api_key is None or datamesh_manager_api_key == "":
|
|
36
|
+
print("Error: Data Mesh Manager API key is not set. Set env variable DATAMESH_MANAGER_API_KEY.")
|
|
37
|
+
raise DataContractException(
|
|
38
|
+
type="lint",
|
|
39
|
+
name=f"Reading data contract from {url}",
|
|
40
|
+
reason="Error: Data Mesh Manager API key is not set. Set env variable DATAMESH_MANAGER_API_KEY.",
|
|
41
|
+
engine="datacontract",
|
|
42
|
+
result="error",
|
|
43
|
+
)
|
|
44
|
+
headers["x-api-key"] = datamesh_manager_api_key
|
|
45
|
+
elif hostname == "datacontract-manager.com" or hostname.endswith(".datacontract-manager.com"):
|
|
46
|
+
if datacontract_manager_api_key is None or datacontract_manager_api_key == "":
|
|
47
|
+
print("Error: Data Contract Manager API key is not set. Set env variable DATACONTRACT_MANAGER_API_KEY.")
|
|
48
|
+
raise DataContractException(
|
|
49
|
+
type="lint",
|
|
50
|
+
name=f"Reading data contract from {url}",
|
|
51
|
+
reason="Error: Data Contract Manager API key is not set. Set env variable DATACONTRACT_MANAGER_API_KEY.",
|
|
52
|
+
engine="datacontract",
|
|
53
|
+
result="error",
|
|
54
|
+
)
|
|
55
|
+
headers["x-api-key"] = datacontract_manager_api_key
|
|
56
|
+
|
|
57
|
+
if datamesh_manager_api_key is not None and datamesh_manager_api_key != "":
|
|
58
|
+
headers["x-api-key"] = datamesh_manager_api_key
|
|
59
|
+
if datacontract_manager_api_key is not None and datacontract_manager_api_key != "":
|
|
60
|
+
headers["x-api-key"] = datacontract_manager_api_key
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from datacontract_specification.model import *
|
datacontract/model/exceptions.py
CHANGED
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
from datacontract.model.run import ResultEnum
|
|
2
|
+
|
|
3
|
+
|
|
1
4
|
class DataContractException(Exception):
|
|
2
5
|
"""Exception raised for errors in the execution of a run.
|
|
3
6
|
|
|
@@ -19,7 +22,7 @@ class DataContractException(Exception):
|
|
|
19
22
|
engine="datacontract",
|
|
20
23
|
model=None,
|
|
21
24
|
original_exception=None,
|
|
22
|
-
result:
|
|
25
|
+
result: ResultEnum = ResultEnum.failed,
|
|
23
26
|
message="Run operation failed",
|
|
24
27
|
):
|
|
25
28
|
self.type = type
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
def is_open_data_contract_standard(odcs: dict) -> bool:
|
|
2
|
+
"""
|
|
3
|
+
Check if the given dictionary is an OpenDataContractStandard.
|
|
4
|
+
|
|
5
|
+
Args:
|
|
6
|
+
odcs (dict): The dictionary to check.
|
|
7
|
+
|
|
8
|
+
Returns:
|
|
9
|
+
bool: True if the dictionary is an OpenDataContractStandard, False otherwise.
|
|
10
|
+
"""
|
|
11
|
+
return odcs.get("kind") == "DataContract" and odcs.get("apiVersion", "").startswith("v3")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def is_open_data_product_standard(odcs: dict) -> bool:
|
|
15
|
+
"""
|
|
16
|
+
Check if the given dictionary is an open data product standard.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
odcs (dict): The dictionary to check.
|
|
20
|
+
|
|
21
|
+
Returns:
|
|
22
|
+
bool: True if the dictionary is an open data product standard, False otherwise.
|
|
23
|
+
"""
|
|
24
|
+
return odcs.get("kind") == "DataProduct" and odcs.get("apiVersion", "").startswith("v1")
|