datacontract-cli 0.10.9__py3-none-any.whl → 0.10.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datacontract-cli might be problematic. Click here for more details.
- datacontract/cli.py +7 -0
- datacontract/data_contract.py +16 -9
- datacontract/engines/fastjsonschema/check_jsonschema.py +4 -1
- datacontract/engines/soda/check_soda_execute.py +5 -2
- datacontract/engines/soda/connections/duckdb.py +20 -12
- datacontract/engines/soda/connections/snowflake.py +8 -5
- datacontract/export/avro_converter.py +1 -1
- datacontract/export/dbml_converter.py +41 -19
- datacontract/export/exporter.py +1 -1
- datacontract/export/jsonschema_converter.py +1 -4
- datacontract/export/sodacl_converter.py +1 -1
- datacontract/imports/avro_importer.py +142 -8
- datacontract/imports/dbt_importer.py +117 -0
- datacontract/imports/glue_importer.py +9 -3
- datacontract/imports/importer.py +7 -2
- datacontract/imports/importer_factory.py +24 -6
- datacontract/imports/jsonschema_importer.py +106 -117
- datacontract/imports/spark_importer.py +134 -0
- datacontract/imports/sql_importer.py +4 -0
- datacontract/integration/publish_datamesh_manager.py +10 -5
- datacontract/lint/resolve.py +72 -27
- datacontract/lint/schema.py +24 -4
- datacontract/model/data_contract_specification.py +3 -0
- datacontract/templates/datacontract.html +1 -1
- datacontract/templates/index.html +1 -1
- datacontract/templates/partials/model_field.html +10 -2
- {datacontract_cli-0.10.9.dist-info → datacontract_cli-0.10.11.dist-info}/METADATA +300 -192
- {datacontract_cli-0.10.9.dist-info → datacontract_cli-0.10.11.dist-info}/RECORD +32 -30
- {datacontract_cli-0.10.9.dist-info → datacontract_cli-0.10.11.dist-info}/WHEEL +1 -1
- {datacontract_cli-0.10.9.dist-info → datacontract_cli-0.10.11.dist-info}/LICENSE +0 -0
- {datacontract_cli-0.10.9.dist-info → datacontract_cli-0.10.11.dist-info}/entry_points.txt +0 -0
- {datacontract_cli-0.10.9.dist-info → datacontract_cli-0.10.11.dist-info}/top_level.txt +0 -0
datacontract/lint/resolve.py
CHANGED
|
@@ -25,7 +25,7 @@ def resolve_data_contract(
|
|
|
25
25
|
data_contract_location, schema_location, inline_definitions, inline_quality
|
|
26
26
|
)
|
|
27
27
|
elif data_contract_str is not None:
|
|
28
|
-
return
|
|
28
|
+
return _resolve_data_contract_from_str(data_contract_str, schema_location, inline_definitions, inline_quality)
|
|
29
29
|
elif data_contract is not None:
|
|
30
30
|
return data_contract
|
|
31
31
|
else:
|
|
@@ -45,7 +45,7 @@ def resolve_data_contract_from_location(
|
|
|
45
45
|
data_contract_str = fetch_resource(location)
|
|
46
46
|
else:
|
|
47
47
|
data_contract_str = read_file(location)
|
|
48
|
-
return
|
|
48
|
+
return _resolve_data_contract_from_str(data_contract_str, schema_location, inline_definitions, inline_quality)
|
|
49
49
|
|
|
50
50
|
|
|
51
51
|
def inline_definitions_into_data_contract(spec: DataContractSpecification):
|
|
@@ -55,7 +55,7 @@ def inline_definitions_into_data_contract(spec: DataContractSpecification):
|
|
|
55
55
|
if not field.ref and not field.ref_obj:
|
|
56
56
|
continue
|
|
57
57
|
|
|
58
|
-
definition =
|
|
58
|
+
definition = _resolve_definition_ref(field.ref, spec)
|
|
59
59
|
field.ref_obj = definition
|
|
60
60
|
|
|
61
61
|
for field_name in field.model_fields.keys():
|
|
@@ -67,19 +67,41 @@ def inline_definitions_into_data_contract(spec: DataContractSpecification):
|
|
|
67
67
|
setattr(field, extra_field_name, extra_field_value)
|
|
68
68
|
|
|
69
69
|
|
|
70
|
-
def
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
path = ref
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
70
|
+
def _resolve_definition_ref(ref, spec) -> Definition:
|
|
71
|
+
logging.info(f"Resolving definition ref {ref}")
|
|
72
|
+
|
|
73
|
+
if "#" in ref:
|
|
74
|
+
path, definition_path = ref.split("#")
|
|
75
|
+
else:
|
|
76
|
+
path, definition_path = ref, None
|
|
77
|
+
|
|
78
|
+
if path.startswith("http://") or path.startswith("https://"):
|
|
79
|
+
logging.info(f"Resolving definition url {path}")
|
|
80
|
+
|
|
81
|
+
definition_str = fetch_resource(path)
|
|
82
|
+
definition_dict = _to_yaml(definition_str)
|
|
83
|
+
definition = Definition(**definition_dict)
|
|
84
|
+
if definition_path is not None:
|
|
85
|
+
return _find_by_path_in_definition(definition_path, definition)
|
|
86
|
+
else:
|
|
87
|
+
return definition
|
|
88
|
+
elif path.startswith("file://"):
|
|
89
|
+
logging.info(f"Resolving definition file path {path}")
|
|
90
|
+
|
|
91
|
+
path = path.replace("file://", "")
|
|
92
|
+
definition_str = _fetch_file(path)
|
|
93
|
+
definition_dict = _to_yaml(definition_str)
|
|
94
|
+
definition = Definition(**definition_dict)
|
|
95
|
+
if definition_path is not None:
|
|
96
|
+
return _find_by_path_in_definition(definition_path, definition)
|
|
97
|
+
else:
|
|
98
|
+
return definition
|
|
99
|
+
elif ref.startswith("#"):
|
|
100
|
+
logging.info(f"Resolving definition local path {path}")
|
|
101
|
+
|
|
102
|
+
definition_path = ref[1:]
|
|
103
|
+
|
|
104
|
+
return _find_by_path_in_spec(definition_path, spec)
|
|
83
105
|
else:
|
|
84
106
|
raise DataContractException(
|
|
85
107
|
type="lint",
|
|
@@ -90,7 +112,30 @@ def resolve_definition_ref(ref, definitions) -> Definition:
|
|
|
90
112
|
)
|
|
91
113
|
|
|
92
114
|
|
|
93
|
-
def
|
|
115
|
+
def _find_by_path_in_spec(definition_path: str, spec: DataContractSpecification):
|
|
116
|
+
path_elements = definition_path.split("/")
|
|
117
|
+
definition = spec.definitions[path_elements[2]]
|
|
118
|
+
definition = _find_subfield_in_definition(definition, path_elements[3:])
|
|
119
|
+
return definition
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def _find_by_path_in_definition(definition_path: str, definition: Definition):
|
|
123
|
+
if definition_path == "" or definition_path == "/":
|
|
124
|
+
return definition
|
|
125
|
+
|
|
126
|
+
path_elements = definition_path.split("/")
|
|
127
|
+
return _find_subfield_in_definition(definition, path_elements[1:])
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def _find_subfield_in_definition(definition: Definition, path_elements):
|
|
131
|
+
while len(path_elements) > 0 and path_elements[0] == "fields":
|
|
132
|
+
definition = definition.fields[path_elements[1]]
|
|
133
|
+
path_elements = path_elements[2:]
|
|
134
|
+
|
|
135
|
+
return definition
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def _fetch_file(path) -> str:
|
|
94
139
|
if not os.path.exists(path):
|
|
95
140
|
raise DataContractException(
|
|
96
141
|
type="export",
|
|
@@ -103,7 +148,7 @@ def fetch_file(path) -> str:
|
|
|
103
148
|
return file.read()
|
|
104
149
|
|
|
105
150
|
|
|
106
|
-
def
|
|
151
|
+
def _resolve_quality_ref(quality: Quality):
|
|
107
152
|
"""
|
|
108
153
|
Return the content of a ref file path
|
|
109
154
|
@param quality data contract quality specification
|
|
@@ -112,13 +157,13 @@ def resolve_quality_ref(quality: Quality):
|
|
|
112
157
|
specification = quality.specification
|
|
113
158
|
if quality.type == "great-expectations":
|
|
114
159
|
for model, model_quality in specification.items():
|
|
115
|
-
specification[model] =
|
|
160
|
+
specification[model] = _get_quality_ref_file(model_quality)
|
|
116
161
|
else:
|
|
117
162
|
if "$ref" in specification:
|
|
118
|
-
quality.specification =
|
|
163
|
+
quality.specification = _get_quality_ref_file(specification)
|
|
119
164
|
|
|
120
165
|
|
|
121
|
-
def
|
|
166
|
+
def _get_quality_ref_file(quality_spec: str | object) -> str | object:
|
|
122
167
|
"""
|
|
123
168
|
Get the file associated with a quality reference
|
|
124
169
|
@param quality_spec quality specification
|
|
@@ -139,23 +184,23 @@ def get_quality_ref_file(quality_spec: str | object) -> str | object:
|
|
|
139
184
|
return quality_spec
|
|
140
185
|
|
|
141
186
|
|
|
142
|
-
def
|
|
187
|
+
def _resolve_data_contract_from_str(
|
|
143
188
|
data_contract_str, schema_location: str = None, inline_definitions: bool = False, inline_quality: bool = False
|
|
144
189
|
) -> DataContractSpecification:
|
|
145
|
-
data_contract_yaml_dict =
|
|
146
|
-
|
|
190
|
+
data_contract_yaml_dict = _to_yaml(data_contract_str)
|
|
191
|
+
_validate(data_contract_yaml_dict, schema_location)
|
|
147
192
|
|
|
148
193
|
spec = DataContractSpecification(**data_contract_yaml_dict)
|
|
149
194
|
|
|
150
195
|
if inline_definitions:
|
|
151
196
|
inline_definitions_into_data_contract(spec)
|
|
152
197
|
if spec.quality and inline_quality:
|
|
153
|
-
|
|
198
|
+
_resolve_quality_ref(spec.quality)
|
|
154
199
|
|
|
155
200
|
return spec
|
|
156
201
|
|
|
157
202
|
|
|
158
|
-
def
|
|
203
|
+
def _to_yaml(data_contract_str):
|
|
159
204
|
try:
|
|
160
205
|
yaml_dict = yaml.safe_load(data_contract_str)
|
|
161
206
|
return yaml_dict
|
|
@@ -170,7 +215,7 @@ def to_yaml(data_contract_str):
|
|
|
170
215
|
)
|
|
171
216
|
|
|
172
217
|
|
|
173
|
-
def
|
|
218
|
+
def _validate(data_contract_yaml, schema_location: str = None):
|
|
174
219
|
schema = fetch_schema(schema_location)
|
|
175
220
|
try:
|
|
176
221
|
fastjsonschema.validate(schema, data_contract_yaml)
|
datacontract/lint/schema.py
CHANGED
|
@@ -1,18 +1,37 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import os
|
|
3
|
+
from typing import Dict, Any
|
|
3
4
|
|
|
4
5
|
import requests
|
|
5
6
|
|
|
6
7
|
from datacontract.model.exceptions import DataContractException
|
|
7
8
|
|
|
8
9
|
|
|
9
|
-
def fetch_schema(location: str = None):
|
|
10
|
+
def fetch_schema(location: str = None) -> Dict[str, Any]:
|
|
11
|
+
"""
|
|
12
|
+
Fetch and return a JSON schema from a given location.
|
|
13
|
+
|
|
14
|
+
This function retrieves a JSON schema either from a URL or a local file path.
|
|
15
|
+
If no location is provided, it defaults to the DataContract schema URL.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
location: The URL or file path of the schema.
|
|
19
|
+
|
|
20
|
+
Returns:
|
|
21
|
+
The JSON schema as a dictionary.
|
|
22
|
+
|
|
23
|
+
Raises:
|
|
24
|
+
DataContractException: If the specified local file does not exist.
|
|
25
|
+
requests.RequestException: If there's an error fetching the schema from a URL.
|
|
26
|
+
json.JSONDecodeError: If there's an error decoding the JSON schema.
|
|
27
|
+
|
|
28
|
+
"""
|
|
10
29
|
if location is None:
|
|
11
30
|
location = "https://datacontract.com/datacontract.schema.json"
|
|
12
31
|
|
|
13
32
|
if location.startswith("http://") or location.startswith("https://"):
|
|
14
33
|
response = requests.get(location)
|
|
15
|
-
|
|
34
|
+
schema = response.json()
|
|
16
35
|
else:
|
|
17
36
|
if not os.path.exists(location):
|
|
18
37
|
raise DataContractException(
|
|
@@ -23,5 +42,6 @@ def fetch_schema(location: str = None):
|
|
|
23
42
|
result="error",
|
|
24
43
|
)
|
|
25
44
|
with open(location, "r") as file:
|
|
26
|
-
|
|
27
|
-
|
|
45
|
+
schema = json.load(file)
|
|
46
|
+
|
|
47
|
+
return schema
|
|
@@ -73,6 +73,7 @@ class Definition(pyd.BaseModel):
|
|
|
73
73
|
exclusiveMaximum: int = None
|
|
74
74
|
pii: bool = None
|
|
75
75
|
classification: str = None
|
|
76
|
+
fields: Dict[str, "Definition"] = {}
|
|
76
77
|
tags: List[str] = []
|
|
77
78
|
links: Dict[str, str] = {}
|
|
78
79
|
example: str = None
|
|
@@ -107,6 +108,8 @@ class Field(pyd.BaseModel):
|
|
|
107
108
|
links: Dict[str, str] = {}
|
|
108
109
|
fields: Dict[str, "Field"] = {}
|
|
109
110
|
items: "Field" = None
|
|
111
|
+
keys: "Field" = None
|
|
112
|
+
values: "Field" = None
|
|
110
113
|
precision: int = None
|
|
111
114
|
scale: int = None
|
|
112
115
|
example: str = None
|
|
@@ -250,7 +250,7 @@
|
|
|
250
250
|
</div>
|
|
251
251
|
<div class="mt-8 md:order-1 md:mt-0">
|
|
252
252
|
<p class="text-center leading-5 text-gray-400">
|
|
253
|
-
Supported with ❤️ by <a href="https://
|
|
253
|
+
Supported with ❤️ by <a href="https://datacontract-manager.com" class="text-gray-400 hover:text-gray-500">Data Contract Manager</a>
|
|
254
254
|
</p>
|
|
255
255
|
</div>
|
|
256
256
|
</div>
|
|
@@ -190,7 +190,7 @@
|
|
|
190
190
|
</div>
|
|
191
191
|
<div class="mt-8 md:order-1 md:mt-0">
|
|
192
192
|
<p class="text-center leading-5 text-gray-400">
|
|
193
|
-
Supported with ❤️ by <a href="https://
|
|
193
|
+
Supported with ❤️ by <a href="https://datacontract-manager.com" class="text-gray-400 hover:text-gray-500">Data Contract Manager</a>
|
|
194
194
|
</p>
|
|
195
195
|
</div>
|
|
196
196
|
</div>
|
|
@@ -110,5 +110,13 @@
|
|
|
110
110
|
{% endif %}
|
|
111
111
|
|
|
112
112
|
{% if field.items %}
|
|
113
|
-
{{ render_nested_partial("
|
|
114
|
-
{% endif %}
|
|
113
|
+
{{ render_nested_partial("items", field.items, level) }}
|
|
114
|
+
{% endif %}
|
|
115
|
+
|
|
116
|
+
{% if field.keys %}
|
|
117
|
+
{{ render_nested_partial("keys", field.keys, level) }}
|
|
118
|
+
{% endif %}
|
|
119
|
+
|
|
120
|
+
{% if field.values %}
|
|
121
|
+
{{ render_nested_partial("values", field.values, level) }}
|
|
122
|
+
{% endif %}
|