datacontract-cli 0.10.21__py3-none-any.whl → 0.10.22__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datacontract-cli might be problematic. Click here for more details.
- datacontract/breaking/breaking.py +1 -1
- datacontract/breaking/breaking_rules.py +1 -1
- datacontract/cli.py +5 -5
- datacontract/data_contract.py +14 -100
- datacontract/engines/data_contract_checks.py +735 -0
- datacontract/engines/data_contract_test.py +51 -0
- datacontract/engines/soda/check_soda_execute.py +36 -30
- datacontract/engines/soda/connections/kafka.py +8 -3
- datacontract/export/avro_converter.py +2 -0
- datacontract/export/exporter.py +0 -2
- datacontract/export/exporter_factory.py +0 -12
- datacontract/export/sodacl_converter.py +22 -294
- datacontract/export/sql_type_converter.py +7 -2
- datacontract/imports/odcs_importer.py +6 -3
- datacontract/imports/odcs_v3_importer.py +2 -0
- datacontract/imports/sql_importer.py +229 -29
- datacontract/model/exceptions.py +4 -1
- datacontract/model/run.py +11 -4
- {datacontract_cli-0.10.21.dist-info → datacontract_cli-0.10.22.dist-info}/METADATA +139 -166
- {datacontract_cli-0.10.21.dist-info → datacontract_cli-0.10.22.dist-info}/RECORD +25 -27
- datacontract/engines/soda/connections/dask.py +0 -28
- datacontract/export/odcs_v2_exporter.py +0 -124
- datacontract/imports/odcs_v2_importer.py +0 -177
- datacontract/lint/linters/example_model_linter.py +0 -91
- /datacontract/{model → breaking}/breaking_change.py +0 -0
- {datacontract_cli-0.10.21.dist-info → datacontract_cli-0.10.22.dist-info}/LICENSE +0 -0
- {datacontract_cli-0.10.21.dist-info → datacontract_cli-0.10.22.dist-info}/WHEEL +0 -0
- {datacontract_cli-0.10.21.dist-info → datacontract_cli-0.10.22.dist-info}/entry_points.txt +0 -0
- {datacontract_cli-0.10.21.dist-info → datacontract_cli-0.10.22.dist-info}/top_level.txt +0 -0
|
@@ -1,5 +1,5 @@
|
|
|
1
|
+
from datacontract.breaking.breaking_change import BreakingChange, Location, Severity
|
|
1
2
|
from datacontract.breaking.breaking_rules import BreakingRules
|
|
2
|
-
from datacontract.model.breaking_change import BreakingChange, Location, Severity
|
|
3
3
|
from datacontract.model.data_contract_specification import Contact, DeprecatedQuality, Field, Info, Model, Terms
|
|
4
4
|
|
|
5
5
|
|
datacontract/cli.py
CHANGED
|
@@ -119,10 +119,6 @@ def test(
|
|
|
119
119
|
"servers (default)."
|
|
120
120
|
),
|
|
121
121
|
] = "all",
|
|
122
|
-
examples: Annotated[
|
|
123
|
-
bool,
|
|
124
|
-
typer.Option(help="Run the schema and quality tests on the example data within the data contract."),
|
|
125
|
-
] = None,
|
|
126
122
|
publish: Annotated[str, typer.Option(help="The url to publish the results after the test")] = None,
|
|
127
123
|
logs: Annotated[bool, typer.Option(help="Print logs")] = False,
|
|
128
124
|
ssl_verification: Annotated[
|
|
@@ -141,7 +137,6 @@ def test(
|
|
|
141
137
|
schema_location=schema,
|
|
142
138
|
publish_url=publish,
|
|
143
139
|
server=server,
|
|
144
|
-
examples=examples,
|
|
145
140
|
ssl_verification=ssl_verification,
|
|
146
141
|
).test()
|
|
147
142
|
if logs:
|
|
@@ -237,6 +232,10 @@ def import_(
|
|
|
237
232
|
Optional[str],
|
|
238
233
|
typer.Option(help="The path to the file or Glue Database that should be imported."),
|
|
239
234
|
] = None,
|
|
235
|
+
dialect: Annotated[
|
|
236
|
+
Optional[str],
|
|
237
|
+
typer.Option(help="The SQL dialect to use when importing SQL files, e.g., postgres, tsql, bigquery."),
|
|
238
|
+
] = None,
|
|
240
239
|
glue_table: Annotated[
|
|
241
240
|
Optional[List[str]],
|
|
242
241
|
typer.Option(
|
|
@@ -293,6 +292,7 @@ def import_(
|
|
|
293
292
|
source=source,
|
|
294
293
|
template=template,
|
|
295
294
|
schema=schema,
|
|
295
|
+
dialect=dialect,
|
|
296
296
|
glue_table=glue_table,
|
|
297
297
|
bigquery_table=bigquery_table,
|
|
298
298
|
bigquery_project=bigquery_project,
|
datacontract/data_contract.py
CHANGED
|
@@ -1,10 +1,6 @@
|
|
|
1
|
-
import json
|
|
2
1
|
import logging
|
|
3
|
-
import tempfile
|
|
4
2
|
import typing
|
|
5
3
|
|
|
6
|
-
import yaml
|
|
7
|
-
|
|
8
4
|
if typing.TYPE_CHECKING:
|
|
9
5
|
from pyspark.sql import SparkSession
|
|
10
6
|
|
|
@@ -14,11 +10,8 @@ from datacontract.breaking.breaking import (
|
|
|
14
10
|
quality_breaking_changes,
|
|
15
11
|
terms_breaking_changes,
|
|
16
12
|
)
|
|
17
|
-
from datacontract.
|
|
18
|
-
|
|
19
|
-
)
|
|
20
|
-
from datacontract.engines.fastjsonschema.check_jsonschema import check_jsonschema
|
|
21
|
-
from datacontract.engines.soda.check_soda_execute import check_soda_execute
|
|
13
|
+
from datacontract.breaking.breaking_change import BreakingChange, BreakingChanges, Severity
|
|
14
|
+
from datacontract.engines.data_contract_test import execute_data_contract_test
|
|
22
15
|
from datacontract.export.exporter import ExportFormat
|
|
23
16
|
from datacontract.export.exporter_factory import exporter_factory
|
|
24
17
|
from datacontract.imports.importer_factory import importer_factory
|
|
@@ -26,16 +19,14 @@ from datacontract.init.init_template import get_init_template
|
|
|
26
19
|
from datacontract.integration.datamesh_manager import publish_test_results_to_datamesh_manager
|
|
27
20
|
from datacontract.lint import resolve
|
|
28
21
|
from datacontract.lint.linters.description_linter import DescriptionLinter
|
|
29
|
-
from datacontract.lint.linters.example_model_linter import ExampleModelLinter
|
|
30
22
|
from datacontract.lint.linters.field_pattern_linter import FieldPatternLinter
|
|
31
23
|
from datacontract.lint.linters.field_reference_linter import FieldReferenceLinter
|
|
32
24
|
from datacontract.lint.linters.notice_period_linter import NoticePeriodLinter
|
|
33
25
|
from datacontract.lint.linters.quality_schema_linter import QualityUsesSchemaLinter
|
|
34
26
|
from datacontract.lint.linters.valid_constraints_linter import ValidFieldConstraintsLinter
|
|
35
|
-
from datacontract.model.
|
|
36
|
-
from datacontract.model.data_contract_specification import DataContractSpecification, Server
|
|
27
|
+
from datacontract.model.data_contract_specification import DataContractSpecification
|
|
37
28
|
from datacontract.model.exceptions import DataContractException
|
|
38
|
-
from datacontract.model.run import Check, Run
|
|
29
|
+
from datacontract.model.run import Check, ResultEnum, Run
|
|
39
30
|
|
|
40
31
|
|
|
41
32
|
class DataContract:
|
|
@@ -46,7 +37,6 @@ class DataContract:
|
|
|
46
37
|
data_contract: DataContractSpecification = None,
|
|
47
38
|
schema_location: str = None,
|
|
48
39
|
server: str = None,
|
|
49
|
-
examples: bool = False,
|
|
50
40
|
publish_url: str = None,
|
|
51
41
|
spark: "SparkSession" = None,
|
|
52
42
|
inline_definitions: bool = True,
|
|
@@ -58,14 +48,12 @@ class DataContract:
|
|
|
58
48
|
self._data_contract = data_contract
|
|
59
49
|
self._schema_location = schema_location
|
|
60
50
|
self._server = server
|
|
61
|
-
self._examples = examples
|
|
62
51
|
self._publish_url = publish_url
|
|
63
52
|
self._spark = spark
|
|
64
53
|
self._inline_definitions = inline_definitions
|
|
65
54
|
self._inline_quality = inline_quality
|
|
66
55
|
self._ssl_verification = ssl_verification
|
|
67
56
|
self.all_linters = {
|
|
68
|
-
ExampleModelLinter(),
|
|
69
57
|
QualityUsesSchemaLinter(),
|
|
70
58
|
FieldPatternLinter(),
|
|
71
59
|
FieldReferenceLinter(),
|
|
@@ -96,7 +84,12 @@ class DataContract:
|
|
|
96
84
|
inline_quality=self._inline_quality,
|
|
97
85
|
)
|
|
98
86
|
run.checks.append(
|
|
99
|
-
Check(
|
|
87
|
+
Check(
|
|
88
|
+
type="lint",
|
|
89
|
+
result=ResultEnum.passed,
|
|
90
|
+
name="Data contract is syntactically valid",
|
|
91
|
+
engine="datacontract",
|
|
92
|
+
)
|
|
100
93
|
)
|
|
101
94
|
if enabled_linters == "none":
|
|
102
95
|
linters_to_check = set()
|
|
@@ -113,7 +106,7 @@ class DataContract:
|
|
|
113
106
|
run.checks.append(
|
|
114
107
|
Check(
|
|
115
108
|
type="general",
|
|
116
|
-
result=
|
|
109
|
+
result=ResultEnum.error,
|
|
117
110
|
name=f"Linter '{linter.name}'",
|
|
118
111
|
reason=str(e),
|
|
119
112
|
engine="datacontract",
|
|
@@ -130,7 +123,7 @@ class DataContract:
|
|
|
130
123
|
run.checks.append(
|
|
131
124
|
Check(
|
|
132
125
|
type="general",
|
|
133
|
-
result=
|
|
126
|
+
result=ResultEnum.error,
|
|
134
127
|
name="Check Data Contract",
|
|
135
128
|
reason=str(e),
|
|
136
129
|
engine="datacontract",
|
|
@@ -153,54 +146,7 @@ class DataContract:
|
|
|
153
146
|
inline_quality=self._inline_quality,
|
|
154
147
|
)
|
|
155
148
|
|
|
156
|
-
|
|
157
|
-
raise DataContractException(
|
|
158
|
-
type="lint",
|
|
159
|
-
name="Check that data contract contains models",
|
|
160
|
-
result="warning",
|
|
161
|
-
reason="Models block is missing. Skip executing tests.",
|
|
162
|
-
engine="datacontract",
|
|
163
|
-
)
|
|
164
|
-
|
|
165
|
-
if self._examples:
|
|
166
|
-
if data_contract.examples is None or len(data_contract.examples) == 0:
|
|
167
|
-
raise DataContractException(
|
|
168
|
-
type="lint",
|
|
169
|
-
name="Check that data contract contains valid examples",
|
|
170
|
-
result="warning",
|
|
171
|
-
reason="Examples block is missing. Skip executing tests.",
|
|
172
|
-
engine="datacontract",
|
|
173
|
-
)
|
|
174
|
-
else:
|
|
175
|
-
check_that_datacontract_contains_valid_server_configuration(run, data_contract, self._server)
|
|
176
|
-
|
|
177
|
-
# TODO create directory only for examples
|
|
178
|
-
with tempfile.TemporaryDirectory(prefix="datacontract-cli") as tmp_dir:
|
|
179
|
-
if self._examples:
|
|
180
|
-
server_name = "examples"
|
|
181
|
-
server = self._get_examples_server(data_contract, run, tmp_dir)
|
|
182
|
-
elif self._server:
|
|
183
|
-
server_name = self._server
|
|
184
|
-
server = data_contract.servers.get(server_name)
|
|
185
|
-
else:
|
|
186
|
-
server_name = list(data_contract.servers.keys())[0]
|
|
187
|
-
server = data_contract.servers.get(server_name)
|
|
188
|
-
|
|
189
|
-
run.log_info(f"Running tests for data contract {data_contract.id} with server {server_name}")
|
|
190
|
-
run.dataContractId = data_contract.id
|
|
191
|
-
run.dataContractVersion = data_contract.info.version
|
|
192
|
-
run.dataProductId = server.dataProductId
|
|
193
|
-
run.outputPortId = server.outputPortId
|
|
194
|
-
run.server = server_name
|
|
195
|
-
|
|
196
|
-
# TODO check server is supported type for nicer error messages
|
|
197
|
-
|
|
198
|
-
# TODO check server credentials are complete for nicer error messages
|
|
199
|
-
|
|
200
|
-
if server.format == "json" and server.type != "kafka":
|
|
201
|
-
check_jsonschema(run, data_contract, server)
|
|
202
|
-
|
|
203
|
-
check_soda_execute(run, data_contract, server, self._spark, tmp_dir)
|
|
149
|
+
execute_data_contract_test(data_contract, run, self._server, self._spark)
|
|
204
150
|
|
|
205
151
|
except DataContractException as e:
|
|
206
152
|
run.checks.append(
|
|
@@ -219,7 +165,7 @@ class DataContract:
|
|
|
219
165
|
run.checks.append(
|
|
220
166
|
Check(
|
|
221
167
|
type="general",
|
|
222
|
-
result=
|
|
168
|
+
result=ResultEnum.error,
|
|
223
169
|
name="Test Data Contract",
|
|
224
170
|
reason=str(e),
|
|
225
171
|
engine="datacontract",
|
|
@@ -235,38 +181,6 @@ class DataContract:
|
|
|
235
181
|
|
|
236
182
|
return run
|
|
237
183
|
|
|
238
|
-
def _get_examples_server(self, data_contract, run, tmp_dir):
|
|
239
|
-
run.log_info(f"Copying examples to files in temporary directory {tmp_dir}")
|
|
240
|
-
format = "json"
|
|
241
|
-
for example in data_contract.examples:
|
|
242
|
-
format = example.type
|
|
243
|
-
p = f"{tmp_dir}/{example.model}.{format}"
|
|
244
|
-
run.log_info(f"Creating example file {p}")
|
|
245
|
-
with open(p, "w") as f:
|
|
246
|
-
content = ""
|
|
247
|
-
if format == "json" and isinstance(example.data, list):
|
|
248
|
-
content = json.dumps(example.data)
|
|
249
|
-
elif format == "json" and isinstance(example.data, str):
|
|
250
|
-
content = example.data
|
|
251
|
-
elif format == "yaml" and isinstance(example.data, list):
|
|
252
|
-
content = yaml.dump(example.data, allow_unicode=True)
|
|
253
|
-
elif format == "yaml" and isinstance(example.data, str):
|
|
254
|
-
content = example.data
|
|
255
|
-
elif format == "csv":
|
|
256
|
-
content = example.data
|
|
257
|
-
logging.debug(f"Content of example file {p}: {content}")
|
|
258
|
-
f.write(content)
|
|
259
|
-
path = f"{tmp_dir}" + "/{model}." + format
|
|
260
|
-
delimiter = "array"
|
|
261
|
-
server = Server(
|
|
262
|
-
type="local",
|
|
263
|
-
path=path,
|
|
264
|
-
format=format,
|
|
265
|
-
delimiter=delimiter,
|
|
266
|
-
)
|
|
267
|
-
run.log_info(f"Using {server} for testing the examples")
|
|
268
|
-
return server
|
|
269
|
-
|
|
270
184
|
def breaking(self, other: "DataContract") -> BreakingChanges:
|
|
271
185
|
return self.changelog(other, include_severities=[Severity.ERROR, Severity.WARNING])
|
|
272
186
|
|