datacontract-cli 0.10.21__py3-none-any.whl → 0.10.23__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datacontract/breaking/breaking.py +1 -1
- datacontract/breaking/breaking_rules.py +1 -1
- datacontract/cli.py +25 -77
- datacontract/data_contract.py +14 -100
- datacontract/engines/data_contract_checks.py +735 -0
- datacontract/engines/data_contract_test.py +67 -0
- datacontract/engines/datacontract/check_that_datacontract_contains_valid_servers_configuration.py +2 -3
- datacontract/engines/soda/check_soda_execute.py +37 -31
- datacontract/engines/soda/connections/{duckdb.py → duckdb_connection.py} +6 -5
- datacontract/engines/soda/connections/kafka.py +8 -3
- datacontract/export/avro_converter.py +2 -0
- datacontract/export/dbt_converter.py +13 -10
- datacontract/export/exporter.py +0 -2
- datacontract/export/exporter_factory.py +0 -12
- datacontract/export/odcs_v3_exporter.py +22 -3
- datacontract/export/sodacl_converter.py +22 -294
- datacontract/export/sql_type_converter.py +7 -2
- datacontract/imports/odcs_importer.py +6 -3
- datacontract/imports/odcs_v3_importer.py +3 -1
- datacontract/imports/sql_importer.py +229 -29
- datacontract/lint/resolve.py +17 -4
- datacontract/model/exceptions.py +4 -1
- datacontract/model/run.py +11 -4
- datacontract/output/junit_test_results.py +135 -0
- datacontract/output/output_format.py +10 -0
- datacontract/output/test_results_writer.py +79 -0
- {datacontract_cli-0.10.21.dist-info → datacontract_cli-0.10.23.dist-info}/METADATA +192 -215
- {datacontract_cli-0.10.21.dist-info → datacontract_cli-0.10.23.dist-info}/RECORD +33 -32
- {datacontract_cli-0.10.21.dist-info → datacontract_cli-0.10.23.dist-info}/WHEEL +1 -1
- datacontract/engines/soda/connections/dask.py +0 -28
- datacontract/export/odcs_v2_exporter.py +0 -124
- datacontract/imports/odcs_v2_importer.py +0 -177
- datacontract/lint/linters/example_model_linter.py +0 -91
- /datacontract/{model → breaking}/breaking_change.py +0 -0
- {datacontract_cli-0.10.21.dist-info → datacontract_cli-0.10.23.dist-info}/LICENSE +0 -0
- {datacontract_cli-0.10.21.dist-info → datacontract_cli-0.10.23.dist-info}/entry_points.txt +0 -0
- {datacontract_cli-0.10.21.dist-info → datacontract_cli-0.10.23.dist-info}/top_level.txt +0 -0
|
@@ -1,5 +1,5 @@
|
|
|
1
|
+
from datacontract.breaking.breaking_change import BreakingChange, Location, Severity
|
|
1
2
|
from datacontract.breaking.breaking_rules import BreakingRules
|
|
2
|
-
from datacontract.model.breaking_change import BreakingChange, Location, Severity
|
|
3
3
|
from datacontract.model.data_contract_specification import Contact, DeprecatedQuality, Field, Info, Model, Terms
|
|
4
4
|
|
|
5
5
|
|
datacontract/cli.py
CHANGED
|
@@ -5,9 +5,7 @@ from typing import Iterable, List, Optional
|
|
|
5
5
|
|
|
6
6
|
import typer
|
|
7
7
|
from click import Context
|
|
8
|
-
from rich import box
|
|
9
8
|
from rich.console import Console
|
|
10
|
-
from rich.table import Table
|
|
11
9
|
from typer.core import TyperGroup
|
|
12
10
|
from typing_extensions import Annotated
|
|
13
11
|
|
|
@@ -19,6 +17,8 @@ from datacontract.integration.datamesh_manager import (
|
|
|
19
17
|
publish_data_contract_to_datamesh_manager,
|
|
20
18
|
)
|
|
21
19
|
from datacontract.lint.resolve import resolve_data_contract_dict
|
|
20
|
+
from datacontract.output.output_format import OutputFormat
|
|
21
|
+
from datacontract.output.test_results_writer import write_test_result
|
|
22
22
|
|
|
23
23
|
console = Console()
|
|
24
24
|
|
|
@@ -92,12 +92,19 @@ def lint(
|
|
|
92
92
|
str,
|
|
93
93
|
typer.Option(help="The location (url or path) of the Data Contract Specification JSON Schema"),
|
|
94
94
|
] = None,
|
|
95
|
+
output: Annotated[
|
|
96
|
+
Path,
|
|
97
|
+
typer.Option(
|
|
98
|
+
help="Specify the file path where the test results should be written to (e.g., './test-results/TEST-datacontract.xml'). If no path is provided, the output will be printed to stdout."
|
|
99
|
+
),
|
|
100
|
+
] = None,
|
|
101
|
+
output_format: Annotated[OutputFormat, typer.Option(help="The target format for the test results.")] = None,
|
|
95
102
|
):
|
|
96
103
|
"""
|
|
97
104
|
Validate that the datacontract.yaml is correctly formatted.
|
|
98
105
|
"""
|
|
99
106
|
run = DataContract(data_contract_file=location, schema_location=schema).lint()
|
|
100
|
-
|
|
107
|
+
write_test_result(run, console, output_format, output)
|
|
101
108
|
|
|
102
109
|
|
|
103
110
|
@app.command()
|
|
@@ -119,11 +126,14 @@ def test(
|
|
|
119
126
|
"servers (default)."
|
|
120
127
|
),
|
|
121
128
|
] = "all",
|
|
122
|
-
examples: Annotated[
|
|
123
|
-
bool,
|
|
124
|
-
typer.Option(help="Run the schema and quality tests on the example data within the data contract."),
|
|
125
|
-
] = None,
|
|
126
129
|
publish: Annotated[str, typer.Option(help="The url to publish the results after the test")] = None,
|
|
130
|
+
output: Annotated[
|
|
131
|
+
Path,
|
|
132
|
+
typer.Option(
|
|
133
|
+
help="Specify the file path where the test results should be written to (e.g., './test-results/TEST-datacontract.xml')."
|
|
134
|
+
),
|
|
135
|
+
] = None,
|
|
136
|
+
output_format: Annotated[OutputFormat, typer.Option(help="The target format for the test results.")] = None,
|
|
127
137
|
logs: Annotated[bool, typer.Option(help="Print logs")] = False,
|
|
128
138
|
ssl_verification: Annotated[
|
|
129
139
|
bool,
|
|
@@ -141,12 +151,11 @@ def test(
|
|
|
141
151
|
schema_location=schema,
|
|
142
152
|
publish_url=publish,
|
|
143
153
|
server=server,
|
|
144
|
-
examples=examples,
|
|
145
154
|
ssl_verification=ssl_verification,
|
|
146
155
|
).test()
|
|
147
156
|
if logs:
|
|
148
157
|
_print_logs(run)
|
|
149
|
-
|
|
158
|
+
write_test_result(run, console, output_format, output)
|
|
150
159
|
|
|
151
160
|
|
|
152
161
|
@app.command()
|
|
@@ -219,7 +228,7 @@ def export(
|
|
|
219
228
|
if output is None:
|
|
220
229
|
console.print(result, markup=False, soft_wrap=True)
|
|
221
230
|
else:
|
|
222
|
-
with output.open("w") as f:
|
|
231
|
+
with output.open(mode="w", encoding="utf-8") as f:
|
|
223
232
|
f.write(result)
|
|
224
233
|
console.print(f"Written result to {output}")
|
|
225
234
|
|
|
@@ -237,6 +246,10 @@ def import_(
|
|
|
237
246
|
Optional[str],
|
|
238
247
|
typer.Option(help="The path to the file or Glue Database that should be imported."),
|
|
239
248
|
] = None,
|
|
249
|
+
dialect: Annotated[
|
|
250
|
+
Optional[str],
|
|
251
|
+
typer.Option(help="The SQL dialect to use when importing SQL files, e.g., postgres, tsql, bigquery."),
|
|
252
|
+
] = None,
|
|
240
253
|
glue_table: Annotated[
|
|
241
254
|
Optional[List[str]],
|
|
242
255
|
typer.Option(
|
|
@@ -293,6 +306,7 @@ def import_(
|
|
|
293
306
|
source=source,
|
|
294
307
|
template=template,
|
|
295
308
|
schema=schema,
|
|
309
|
+
dialect=dialect,
|
|
296
310
|
glue_table=glue_table,
|
|
297
311
|
bigquery_table=bigquery_table,
|
|
298
312
|
bigquery_project=bigquery_project,
|
|
@@ -306,7 +320,7 @@ def import_(
|
|
|
306
320
|
if output is None:
|
|
307
321
|
console.print(result.to_yaml(), markup=False, soft_wrap=True)
|
|
308
322
|
else:
|
|
309
|
-
with output.open("w") as f:
|
|
323
|
+
with output.open(mode="w", encoding="utf-8") as f:
|
|
310
324
|
f.write(result.to_yaml())
|
|
311
325
|
console.print(f"Written result to {output}")
|
|
312
326
|
|
|
@@ -467,77 +481,11 @@ def api(
|
|
|
467
481
|
uvicorn.run(app="datacontract.api:app", port=port, host=host, reload=True, log_config=LOGGING_CONFIG)
|
|
468
482
|
|
|
469
483
|
|
|
470
|
-
def _handle_result(run):
|
|
471
|
-
_print_table(run)
|
|
472
|
-
if run.result == "passed":
|
|
473
|
-
console.print(
|
|
474
|
-
f"🟢 data contract is valid. Run {len(run.checks)} checks. Took {(run.timestampEnd - run.timestampStart).total_seconds()} seconds."
|
|
475
|
-
)
|
|
476
|
-
elif run.result == "warning":
|
|
477
|
-
console.print("🟠 data contract has warnings. Found the following warnings:")
|
|
478
|
-
i = 1
|
|
479
|
-
for check in run.checks:
|
|
480
|
-
if check.result != "passed":
|
|
481
|
-
field = to_field(run, check)
|
|
482
|
-
if field:
|
|
483
|
-
field = field + " "
|
|
484
|
-
else:
|
|
485
|
-
field = ""
|
|
486
|
-
console.print(f"{i}) {field}{check.name}: {check.reason}")
|
|
487
|
-
i += 1
|
|
488
|
-
else:
|
|
489
|
-
console.print("🔴 data contract is invalid, found the following errors:")
|
|
490
|
-
i = 1
|
|
491
|
-
for check in run.checks:
|
|
492
|
-
if check.result != "passed":
|
|
493
|
-
field = to_field(run, check)
|
|
494
|
-
if field:
|
|
495
|
-
field = field + " "
|
|
496
|
-
else:
|
|
497
|
-
field = ""
|
|
498
|
-
console.print(f"{i}) {field}{check.name}: {check.reason}")
|
|
499
|
-
i += 1
|
|
500
|
-
raise typer.Exit(code=1)
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
def _print_table(run):
|
|
504
|
-
table = Table(box=box.ROUNDED)
|
|
505
|
-
table.add_column("Result", no_wrap=True)
|
|
506
|
-
table.add_column("Check", max_width=100)
|
|
507
|
-
table.add_column("Field", max_width=32)
|
|
508
|
-
table.add_column("Details", max_width=50)
|
|
509
|
-
for check in sorted(run.checks, key=lambda c: (c.result or "", c.model or "", c.field or "")):
|
|
510
|
-
table.add_row(with_markup(check.result), check.name, to_field(run, check), check.reason)
|
|
511
|
-
console.print(table)
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
def to_field(run, check):
|
|
515
|
-
models = [c.model for c in run.checks]
|
|
516
|
-
if len(set(models)) > 1:
|
|
517
|
-
if check.field is None:
|
|
518
|
-
return check.model
|
|
519
|
-
return check.model + "." + check.field
|
|
520
|
-
else:
|
|
521
|
-
return check.field
|
|
522
|
-
|
|
523
|
-
|
|
524
484
|
def _print_logs(run):
|
|
525
485
|
console.print("\nLogs:")
|
|
526
486
|
for log in run.logs:
|
|
527
487
|
console.print(log.timestamp.strftime("%y-%m-%d %H:%M:%S"), log.level.ljust(5), log.message)
|
|
528
488
|
|
|
529
489
|
|
|
530
|
-
def with_markup(result):
|
|
531
|
-
if result == "passed":
|
|
532
|
-
return "[green]passed[/green]"
|
|
533
|
-
if result == "warning":
|
|
534
|
-
return "[yellow]warning[/yellow]"
|
|
535
|
-
if result == "failed":
|
|
536
|
-
return "[red]failed[/red]"
|
|
537
|
-
if result == "error":
|
|
538
|
-
return "[red]error[/red]"
|
|
539
|
-
return result
|
|
540
|
-
|
|
541
|
-
|
|
542
490
|
if __name__ == "__main__":
|
|
543
491
|
app()
|
datacontract/data_contract.py
CHANGED
|
@@ -1,10 +1,6 @@
|
|
|
1
|
-
import json
|
|
2
1
|
import logging
|
|
3
|
-
import tempfile
|
|
4
2
|
import typing
|
|
5
3
|
|
|
6
|
-
import yaml
|
|
7
|
-
|
|
8
4
|
if typing.TYPE_CHECKING:
|
|
9
5
|
from pyspark.sql import SparkSession
|
|
10
6
|
|
|
@@ -14,11 +10,8 @@ from datacontract.breaking.breaking import (
|
|
|
14
10
|
quality_breaking_changes,
|
|
15
11
|
terms_breaking_changes,
|
|
16
12
|
)
|
|
17
|
-
from datacontract.
|
|
18
|
-
|
|
19
|
-
)
|
|
20
|
-
from datacontract.engines.fastjsonschema.check_jsonschema import check_jsonschema
|
|
21
|
-
from datacontract.engines.soda.check_soda_execute import check_soda_execute
|
|
13
|
+
from datacontract.breaking.breaking_change import BreakingChange, BreakingChanges, Severity
|
|
14
|
+
from datacontract.engines.data_contract_test import execute_data_contract_test
|
|
22
15
|
from datacontract.export.exporter import ExportFormat
|
|
23
16
|
from datacontract.export.exporter_factory import exporter_factory
|
|
24
17
|
from datacontract.imports.importer_factory import importer_factory
|
|
@@ -26,16 +19,14 @@ from datacontract.init.init_template import get_init_template
|
|
|
26
19
|
from datacontract.integration.datamesh_manager import publish_test_results_to_datamesh_manager
|
|
27
20
|
from datacontract.lint import resolve
|
|
28
21
|
from datacontract.lint.linters.description_linter import DescriptionLinter
|
|
29
|
-
from datacontract.lint.linters.example_model_linter import ExampleModelLinter
|
|
30
22
|
from datacontract.lint.linters.field_pattern_linter import FieldPatternLinter
|
|
31
23
|
from datacontract.lint.linters.field_reference_linter import FieldReferenceLinter
|
|
32
24
|
from datacontract.lint.linters.notice_period_linter import NoticePeriodLinter
|
|
33
25
|
from datacontract.lint.linters.quality_schema_linter import QualityUsesSchemaLinter
|
|
34
26
|
from datacontract.lint.linters.valid_constraints_linter import ValidFieldConstraintsLinter
|
|
35
|
-
from datacontract.model.
|
|
36
|
-
from datacontract.model.data_contract_specification import DataContractSpecification, Server
|
|
27
|
+
from datacontract.model.data_contract_specification import DataContractSpecification
|
|
37
28
|
from datacontract.model.exceptions import DataContractException
|
|
38
|
-
from datacontract.model.run import Check, Run
|
|
29
|
+
from datacontract.model.run import Check, ResultEnum, Run
|
|
39
30
|
|
|
40
31
|
|
|
41
32
|
class DataContract:
|
|
@@ -46,7 +37,6 @@ class DataContract:
|
|
|
46
37
|
data_contract: DataContractSpecification = None,
|
|
47
38
|
schema_location: str = None,
|
|
48
39
|
server: str = None,
|
|
49
|
-
examples: bool = False,
|
|
50
40
|
publish_url: str = None,
|
|
51
41
|
spark: "SparkSession" = None,
|
|
52
42
|
inline_definitions: bool = True,
|
|
@@ -58,14 +48,12 @@ class DataContract:
|
|
|
58
48
|
self._data_contract = data_contract
|
|
59
49
|
self._schema_location = schema_location
|
|
60
50
|
self._server = server
|
|
61
|
-
self._examples = examples
|
|
62
51
|
self._publish_url = publish_url
|
|
63
52
|
self._spark = spark
|
|
64
53
|
self._inline_definitions = inline_definitions
|
|
65
54
|
self._inline_quality = inline_quality
|
|
66
55
|
self._ssl_verification = ssl_verification
|
|
67
56
|
self.all_linters = {
|
|
68
|
-
ExampleModelLinter(),
|
|
69
57
|
QualityUsesSchemaLinter(),
|
|
70
58
|
FieldPatternLinter(),
|
|
71
59
|
FieldReferenceLinter(),
|
|
@@ -96,7 +84,12 @@ class DataContract:
|
|
|
96
84
|
inline_quality=self._inline_quality,
|
|
97
85
|
)
|
|
98
86
|
run.checks.append(
|
|
99
|
-
Check(
|
|
87
|
+
Check(
|
|
88
|
+
type="lint",
|
|
89
|
+
result=ResultEnum.passed,
|
|
90
|
+
name="Data contract is syntactically valid",
|
|
91
|
+
engine="datacontract",
|
|
92
|
+
)
|
|
100
93
|
)
|
|
101
94
|
if enabled_linters == "none":
|
|
102
95
|
linters_to_check = set()
|
|
@@ -113,7 +106,7 @@ class DataContract:
|
|
|
113
106
|
run.checks.append(
|
|
114
107
|
Check(
|
|
115
108
|
type="general",
|
|
116
|
-
result=
|
|
109
|
+
result=ResultEnum.error,
|
|
117
110
|
name=f"Linter '{linter.name}'",
|
|
118
111
|
reason=str(e),
|
|
119
112
|
engine="datacontract",
|
|
@@ -130,7 +123,7 @@ class DataContract:
|
|
|
130
123
|
run.checks.append(
|
|
131
124
|
Check(
|
|
132
125
|
type="general",
|
|
133
|
-
result=
|
|
126
|
+
result=ResultEnum.error,
|
|
134
127
|
name="Check Data Contract",
|
|
135
128
|
reason=str(e),
|
|
136
129
|
engine="datacontract",
|
|
@@ -153,54 +146,7 @@ class DataContract:
|
|
|
153
146
|
inline_quality=self._inline_quality,
|
|
154
147
|
)
|
|
155
148
|
|
|
156
|
-
|
|
157
|
-
raise DataContractException(
|
|
158
|
-
type="lint",
|
|
159
|
-
name="Check that data contract contains models",
|
|
160
|
-
result="warning",
|
|
161
|
-
reason="Models block is missing. Skip executing tests.",
|
|
162
|
-
engine="datacontract",
|
|
163
|
-
)
|
|
164
|
-
|
|
165
|
-
if self._examples:
|
|
166
|
-
if data_contract.examples is None or len(data_contract.examples) == 0:
|
|
167
|
-
raise DataContractException(
|
|
168
|
-
type="lint",
|
|
169
|
-
name="Check that data contract contains valid examples",
|
|
170
|
-
result="warning",
|
|
171
|
-
reason="Examples block is missing. Skip executing tests.",
|
|
172
|
-
engine="datacontract",
|
|
173
|
-
)
|
|
174
|
-
else:
|
|
175
|
-
check_that_datacontract_contains_valid_server_configuration(run, data_contract, self._server)
|
|
176
|
-
|
|
177
|
-
# TODO create directory only for examples
|
|
178
|
-
with tempfile.TemporaryDirectory(prefix="datacontract-cli") as tmp_dir:
|
|
179
|
-
if self._examples:
|
|
180
|
-
server_name = "examples"
|
|
181
|
-
server = self._get_examples_server(data_contract, run, tmp_dir)
|
|
182
|
-
elif self._server:
|
|
183
|
-
server_name = self._server
|
|
184
|
-
server = data_contract.servers.get(server_name)
|
|
185
|
-
else:
|
|
186
|
-
server_name = list(data_contract.servers.keys())[0]
|
|
187
|
-
server = data_contract.servers.get(server_name)
|
|
188
|
-
|
|
189
|
-
run.log_info(f"Running tests for data contract {data_contract.id} with server {server_name}")
|
|
190
|
-
run.dataContractId = data_contract.id
|
|
191
|
-
run.dataContractVersion = data_contract.info.version
|
|
192
|
-
run.dataProductId = server.dataProductId
|
|
193
|
-
run.outputPortId = server.outputPortId
|
|
194
|
-
run.server = server_name
|
|
195
|
-
|
|
196
|
-
# TODO check server is supported type for nicer error messages
|
|
197
|
-
|
|
198
|
-
# TODO check server credentials are complete for nicer error messages
|
|
199
|
-
|
|
200
|
-
if server.format == "json" and server.type != "kafka":
|
|
201
|
-
check_jsonschema(run, data_contract, server)
|
|
202
|
-
|
|
203
|
-
check_soda_execute(run, data_contract, server, self._spark, tmp_dir)
|
|
149
|
+
execute_data_contract_test(data_contract, run, self._server, self._spark)
|
|
204
150
|
|
|
205
151
|
except DataContractException as e:
|
|
206
152
|
run.checks.append(
|
|
@@ -219,7 +165,7 @@ class DataContract:
|
|
|
219
165
|
run.checks.append(
|
|
220
166
|
Check(
|
|
221
167
|
type="general",
|
|
222
|
-
result=
|
|
168
|
+
result=ResultEnum.error,
|
|
223
169
|
name="Test Data Contract",
|
|
224
170
|
reason=str(e),
|
|
225
171
|
engine="datacontract",
|
|
@@ -235,38 +181,6 @@ class DataContract:
|
|
|
235
181
|
|
|
236
182
|
return run
|
|
237
183
|
|
|
238
|
-
def _get_examples_server(self, data_contract, run, tmp_dir):
|
|
239
|
-
run.log_info(f"Copying examples to files in temporary directory {tmp_dir}")
|
|
240
|
-
format = "json"
|
|
241
|
-
for example in data_contract.examples:
|
|
242
|
-
format = example.type
|
|
243
|
-
p = f"{tmp_dir}/{example.model}.{format}"
|
|
244
|
-
run.log_info(f"Creating example file {p}")
|
|
245
|
-
with open(p, "w") as f:
|
|
246
|
-
content = ""
|
|
247
|
-
if format == "json" and isinstance(example.data, list):
|
|
248
|
-
content = json.dumps(example.data)
|
|
249
|
-
elif format == "json" and isinstance(example.data, str):
|
|
250
|
-
content = example.data
|
|
251
|
-
elif format == "yaml" and isinstance(example.data, list):
|
|
252
|
-
content = yaml.dump(example.data, allow_unicode=True)
|
|
253
|
-
elif format == "yaml" and isinstance(example.data, str):
|
|
254
|
-
content = example.data
|
|
255
|
-
elif format == "csv":
|
|
256
|
-
content = example.data
|
|
257
|
-
logging.debug(f"Content of example file {p}: {content}")
|
|
258
|
-
f.write(content)
|
|
259
|
-
path = f"{tmp_dir}" + "/{model}." + format
|
|
260
|
-
delimiter = "array"
|
|
261
|
-
server = Server(
|
|
262
|
-
type="local",
|
|
263
|
-
path=path,
|
|
264
|
-
format=format,
|
|
265
|
-
delimiter=delimiter,
|
|
266
|
-
)
|
|
267
|
-
run.log_info(f"Using {server} for testing the examples")
|
|
268
|
-
return server
|
|
269
|
-
|
|
270
184
|
def breaking(self, other: "DataContract") -> BreakingChanges:
|
|
271
185
|
return self.changelog(other, include_severities=[Severity.ERROR, Severity.WARNING])
|
|
272
186
|
|