datacontract-cli 0.10.21__py3-none-any.whl → 0.10.23__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. datacontract/breaking/breaking.py +1 -1
  2. datacontract/breaking/breaking_rules.py +1 -1
  3. datacontract/cli.py +25 -77
  4. datacontract/data_contract.py +14 -100
  5. datacontract/engines/data_contract_checks.py +735 -0
  6. datacontract/engines/data_contract_test.py +67 -0
  7. datacontract/engines/datacontract/check_that_datacontract_contains_valid_servers_configuration.py +2 -3
  8. datacontract/engines/soda/check_soda_execute.py +37 -31
  9. datacontract/engines/soda/connections/{duckdb.py → duckdb_connection.py} +6 -5
  10. datacontract/engines/soda/connections/kafka.py +8 -3
  11. datacontract/export/avro_converter.py +2 -0
  12. datacontract/export/dbt_converter.py +13 -10
  13. datacontract/export/exporter.py +0 -2
  14. datacontract/export/exporter_factory.py +0 -12
  15. datacontract/export/odcs_v3_exporter.py +22 -3
  16. datacontract/export/sodacl_converter.py +22 -294
  17. datacontract/export/sql_type_converter.py +7 -2
  18. datacontract/imports/odcs_importer.py +6 -3
  19. datacontract/imports/odcs_v3_importer.py +3 -1
  20. datacontract/imports/sql_importer.py +229 -29
  21. datacontract/lint/resolve.py +17 -4
  22. datacontract/model/exceptions.py +4 -1
  23. datacontract/model/run.py +11 -4
  24. datacontract/output/junit_test_results.py +135 -0
  25. datacontract/output/output_format.py +10 -0
  26. datacontract/output/test_results_writer.py +79 -0
  27. {datacontract_cli-0.10.21.dist-info → datacontract_cli-0.10.23.dist-info}/METADATA +192 -215
  28. {datacontract_cli-0.10.21.dist-info → datacontract_cli-0.10.23.dist-info}/RECORD +33 -32
  29. {datacontract_cli-0.10.21.dist-info → datacontract_cli-0.10.23.dist-info}/WHEEL +1 -1
  30. datacontract/engines/soda/connections/dask.py +0 -28
  31. datacontract/export/odcs_v2_exporter.py +0 -124
  32. datacontract/imports/odcs_v2_importer.py +0 -177
  33. datacontract/lint/linters/example_model_linter.py +0 -91
  34. /datacontract/{model → breaking}/breaking_change.py +0 -0
  35. {datacontract_cli-0.10.21.dist-info → datacontract_cli-0.10.23.dist-info}/LICENSE +0 -0
  36. {datacontract_cli-0.10.21.dist-info → datacontract_cli-0.10.23.dist-info}/entry_points.txt +0 -0
  37. {datacontract_cli-0.10.21.dist-info → datacontract_cli-0.10.23.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,5 @@
1
+ from datacontract.breaking.breaking_change import BreakingChange, Location, Severity
1
2
  from datacontract.breaking.breaking_rules import BreakingRules
2
- from datacontract.model.breaking_change import BreakingChange, Location, Severity
3
3
  from datacontract.model.data_contract_specification import Contact, DeprecatedQuality, Field, Info, Model, Terms
4
4
 
5
5
 
@@ -1,4 +1,4 @@
1
- from datacontract.model.breaking_change import Severity
1
+ from datacontract.breaking.breaking_change import Severity
2
2
 
3
3
 
4
4
  class BreakingRules:
datacontract/cli.py CHANGED
@@ -5,9 +5,7 @@ from typing import Iterable, List, Optional
5
5
 
6
6
  import typer
7
7
  from click import Context
8
- from rich import box
9
8
  from rich.console import Console
10
- from rich.table import Table
11
9
  from typer.core import TyperGroup
12
10
  from typing_extensions import Annotated
13
11
 
@@ -19,6 +17,8 @@ from datacontract.integration.datamesh_manager import (
19
17
  publish_data_contract_to_datamesh_manager,
20
18
  )
21
19
  from datacontract.lint.resolve import resolve_data_contract_dict
20
+ from datacontract.output.output_format import OutputFormat
21
+ from datacontract.output.test_results_writer import write_test_result
22
22
 
23
23
  console = Console()
24
24
 
@@ -92,12 +92,19 @@ def lint(
92
92
  str,
93
93
  typer.Option(help="The location (url or path) of the Data Contract Specification JSON Schema"),
94
94
  ] = None,
95
+ output: Annotated[
96
+ Path,
97
+ typer.Option(
98
+ help="Specify the file path where the test results should be written to (e.g., './test-results/TEST-datacontract.xml'). If no path is provided, the output will be printed to stdout."
99
+ ),
100
+ ] = None,
101
+ output_format: Annotated[OutputFormat, typer.Option(help="The target format for the test results.")] = None,
95
102
  ):
96
103
  """
97
104
  Validate that the datacontract.yaml is correctly formatted.
98
105
  """
99
106
  run = DataContract(data_contract_file=location, schema_location=schema).lint()
100
- _handle_result(run)
107
+ write_test_result(run, console, output_format, output)
101
108
 
102
109
 
103
110
  @app.command()
@@ -119,11 +126,14 @@ def test(
119
126
  "servers (default)."
120
127
  ),
121
128
  ] = "all",
122
- examples: Annotated[
123
- bool,
124
- typer.Option(help="Run the schema and quality tests on the example data within the data contract."),
125
- ] = None,
126
129
  publish: Annotated[str, typer.Option(help="The url to publish the results after the test")] = None,
130
+ output: Annotated[
131
+ Path,
132
+ typer.Option(
133
+ help="Specify the file path where the test results should be written to (e.g., './test-results/TEST-datacontract.xml')."
134
+ ),
135
+ ] = None,
136
+ output_format: Annotated[OutputFormat, typer.Option(help="The target format for the test results.")] = None,
127
137
  logs: Annotated[bool, typer.Option(help="Print logs")] = False,
128
138
  ssl_verification: Annotated[
129
139
  bool,
@@ -141,12 +151,11 @@ def test(
141
151
  schema_location=schema,
142
152
  publish_url=publish,
143
153
  server=server,
144
- examples=examples,
145
154
  ssl_verification=ssl_verification,
146
155
  ).test()
147
156
  if logs:
148
157
  _print_logs(run)
149
- _handle_result(run)
158
+ write_test_result(run, console, output_format, output)
150
159
 
151
160
 
152
161
  @app.command()
@@ -219,7 +228,7 @@ def export(
219
228
  if output is None:
220
229
  console.print(result, markup=False, soft_wrap=True)
221
230
  else:
222
- with output.open("w") as f:
231
+ with output.open(mode="w", encoding="utf-8") as f:
223
232
  f.write(result)
224
233
  console.print(f"Written result to {output}")
225
234
 
@@ -237,6 +246,10 @@ def import_(
237
246
  Optional[str],
238
247
  typer.Option(help="The path to the file or Glue Database that should be imported."),
239
248
  ] = None,
249
+ dialect: Annotated[
250
+ Optional[str],
251
+ typer.Option(help="The SQL dialect to use when importing SQL files, e.g., postgres, tsql, bigquery."),
252
+ ] = None,
240
253
  glue_table: Annotated[
241
254
  Optional[List[str]],
242
255
  typer.Option(
@@ -293,6 +306,7 @@ def import_(
293
306
  source=source,
294
307
  template=template,
295
308
  schema=schema,
309
+ dialect=dialect,
296
310
  glue_table=glue_table,
297
311
  bigquery_table=bigquery_table,
298
312
  bigquery_project=bigquery_project,
@@ -306,7 +320,7 @@ def import_(
306
320
  if output is None:
307
321
  console.print(result.to_yaml(), markup=False, soft_wrap=True)
308
322
  else:
309
- with output.open("w") as f:
323
+ with output.open(mode="w", encoding="utf-8") as f:
310
324
  f.write(result.to_yaml())
311
325
  console.print(f"Written result to {output}")
312
326
 
@@ -467,77 +481,11 @@ def api(
467
481
  uvicorn.run(app="datacontract.api:app", port=port, host=host, reload=True, log_config=LOGGING_CONFIG)
468
482
 
469
483
 
470
- def _handle_result(run):
471
- _print_table(run)
472
- if run.result == "passed":
473
- console.print(
474
- f"🟢 data contract is valid. Run {len(run.checks)} checks. Took {(run.timestampEnd - run.timestampStart).total_seconds()} seconds."
475
- )
476
- elif run.result == "warning":
477
- console.print("🟠 data contract has warnings. Found the following warnings:")
478
- i = 1
479
- for check in run.checks:
480
- if check.result != "passed":
481
- field = to_field(run, check)
482
- if field:
483
- field = field + " "
484
- else:
485
- field = ""
486
- console.print(f"{i}) {field}{check.name}: {check.reason}")
487
- i += 1
488
- else:
489
- console.print("🔴 data contract is invalid, found the following errors:")
490
- i = 1
491
- for check in run.checks:
492
- if check.result != "passed":
493
- field = to_field(run, check)
494
- if field:
495
- field = field + " "
496
- else:
497
- field = ""
498
- console.print(f"{i}) {field}{check.name}: {check.reason}")
499
- i += 1
500
- raise typer.Exit(code=1)
501
-
502
-
503
- def _print_table(run):
504
- table = Table(box=box.ROUNDED)
505
- table.add_column("Result", no_wrap=True)
506
- table.add_column("Check", max_width=100)
507
- table.add_column("Field", max_width=32)
508
- table.add_column("Details", max_width=50)
509
- for check in sorted(run.checks, key=lambda c: (c.result or "", c.model or "", c.field or "")):
510
- table.add_row(with_markup(check.result), check.name, to_field(run, check), check.reason)
511
- console.print(table)
512
-
513
-
514
- def to_field(run, check):
515
- models = [c.model for c in run.checks]
516
- if len(set(models)) > 1:
517
- if check.field is None:
518
- return check.model
519
- return check.model + "." + check.field
520
- else:
521
- return check.field
522
-
523
-
524
484
  def _print_logs(run):
525
485
  console.print("\nLogs:")
526
486
  for log in run.logs:
527
487
  console.print(log.timestamp.strftime("%y-%m-%d %H:%M:%S"), log.level.ljust(5), log.message)
528
488
 
529
489
 
530
- def with_markup(result):
531
- if result == "passed":
532
- return "[green]passed[/green]"
533
- if result == "warning":
534
- return "[yellow]warning[/yellow]"
535
- if result == "failed":
536
- return "[red]failed[/red]"
537
- if result == "error":
538
- return "[red]error[/red]"
539
- return result
540
-
541
-
542
490
  if __name__ == "__main__":
543
491
  app()
@@ -1,10 +1,6 @@
1
- import json
2
1
  import logging
3
- import tempfile
4
2
  import typing
5
3
 
6
- import yaml
7
-
8
4
  if typing.TYPE_CHECKING:
9
5
  from pyspark.sql import SparkSession
10
6
 
@@ -14,11 +10,8 @@ from datacontract.breaking.breaking import (
14
10
  quality_breaking_changes,
15
11
  terms_breaking_changes,
16
12
  )
17
- from datacontract.engines.datacontract.check_that_datacontract_contains_valid_servers_configuration import (
18
- check_that_datacontract_contains_valid_server_configuration,
19
- )
20
- from datacontract.engines.fastjsonschema.check_jsonschema import check_jsonschema
21
- from datacontract.engines.soda.check_soda_execute import check_soda_execute
13
+ from datacontract.breaking.breaking_change import BreakingChange, BreakingChanges, Severity
14
+ from datacontract.engines.data_contract_test import execute_data_contract_test
22
15
  from datacontract.export.exporter import ExportFormat
23
16
  from datacontract.export.exporter_factory import exporter_factory
24
17
  from datacontract.imports.importer_factory import importer_factory
@@ -26,16 +19,14 @@ from datacontract.init.init_template import get_init_template
26
19
  from datacontract.integration.datamesh_manager import publish_test_results_to_datamesh_manager
27
20
  from datacontract.lint import resolve
28
21
  from datacontract.lint.linters.description_linter import DescriptionLinter
29
- from datacontract.lint.linters.example_model_linter import ExampleModelLinter
30
22
  from datacontract.lint.linters.field_pattern_linter import FieldPatternLinter
31
23
  from datacontract.lint.linters.field_reference_linter import FieldReferenceLinter
32
24
  from datacontract.lint.linters.notice_period_linter import NoticePeriodLinter
33
25
  from datacontract.lint.linters.quality_schema_linter import QualityUsesSchemaLinter
34
26
  from datacontract.lint.linters.valid_constraints_linter import ValidFieldConstraintsLinter
35
- from datacontract.model.breaking_change import BreakingChange, BreakingChanges, Severity
36
- from datacontract.model.data_contract_specification import DataContractSpecification, Server
27
+ from datacontract.model.data_contract_specification import DataContractSpecification
37
28
  from datacontract.model.exceptions import DataContractException
38
- from datacontract.model.run import Check, Run
29
+ from datacontract.model.run import Check, ResultEnum, Run
39
30
 
40
31
 
41
32
  class DataContract:
@@ -46,7 +37,6 @@ class DataContract:
46
37
  data_contract: DataContractSpecification = None,
47
38
  schema_location: str = None,
48
39
  server: str = None,
49
- examples: bool = False,
50
40
  publish_url: str = None,
51
41
  spark: "SparkSession" = None,
52
42
  inline_definitions: bool = True,
@@ -58,14 +48,12 @@ class DataContract:
58
48
  self._data_contract = data_contract
59
49
  self._schema_location = schema_location
60
50
  self._server = server
61
- self._examples = examples
62
51
  self._publish_url = publish_url
63
52
  self._spark = spark
64
53
  self._inline_definitions = inline_definitions
65
54
  self._inline_quality = inline_quality
66
55
  self._ssl_verification = ssl_verification
67
56
  self.all_linters = {
68
- ExampleModelLinter(),
69
57
  QualityUsesSchemaLinter(),
70
58
  FieldPatternLinter(),
71
59
  FieldReferenceLinter(),
@@ -96,7 +84,12 @@ class DataContract:
96
84
  inline_quality=self._inline_quality,
97
85
  )
98
86
  run.checks.append(
99
- Check(type="lint", result="passed", name="Data contract is syntactically valid", engine="datacontract")
87
+ Check(
88
+ type="lint",
89
+ result=ResultEnum.passed,
90
+ name="Data contract is syntactically valid",
91
+ engine="datacontract",
92
+ )
100
93
  )
101
94
  if enabled_linters == "none":
102
95
  linters_to_check = set()
@@ -113,7 +106,7 @@ class DataContract:
113
106
  run.checks.append(
114
107
  Check(
115
108
  type="general",
116
- result="error",
109
+ result=ResultEnum.error,
117
110
  name=f"Linter '{linter.name}'",
118
111
  reason=str(e),
119
112
  engine="datacontract",
@@ -130,7 +123,7 @@ class DataContract:
130
123
  run.checks.append(
131
124
  Check(
132
125
  type="general",
133
- result="error",
126
+ result=ResultEnum.error,
134
127
  name="Check Data Contract",
135
128
  reason=str(e),
136
129
  engine="datacontract",
@@ -153,54 +146,7 @@ class DataContract:
153
146
  inline_quality=self._inline_quality,
154
147
  )
155
148
 
156
- if data_contract.models is None or len(data_contract.models) == 0:
157
- raise DataContractException(
158
- type="lint",
159
- name="Check that data contract contains models",
160
- result="warning",
161
- reason="Models block is missing. Skip executing tests.",
162
- engine="datacontract",
163
- )
164
-
165
- if self._examples:
166
- if data_contract.examples is None or len(data_contract.examples) == 0:
167
- raise DataContractException(
168
- type="lint",
169
- name="Check that data contract contains valid examples",
170
- result="warning",
171
- reason="Examples block is missing. Skip executing tests.",
172
- engine="datacontract",
173
- )
174
- else:
175
- check_that_datacontract_contains_valid_server_configuration(run, data_contract, self._server)
176
-
177
- # TODO create directory only for examples
178
- with tempfile.TemporaryDirectory(prefix="datacontract-cli") as tmp_dir:
179
- if self._examples:
180
- server_name = "examples"
181
- server = self._get_examples_server(data_contract, run, tmp_dir)
182
- elif self._server:
183
- server_name = self._server
184
- server = data_contract.servers.get(server_name)
185
- else:
186
- server_name = list(data_contract.servers.keys())[0]
187
- server = data_contract.servers.get(server_name)
188
-
189
- run.log_info(f"Running tests for data contract {data_contract.id} with server {server_name}")
190
- run.dataContractId = data_contract.id
191
- run.dataContractVersion = data_contract.info.version
192
- run.dataProductId = server.dataProductId
193
- run.outputPortId = server.outputPortId
194
- run.server = server_name
195
-
196
- # TODO check server is supported type for nicer error messages
197
-
198
- # TODO check server credentials are complete for nicer error messages
199
-
200
- if server.format == "json" and server.type != "kafka":
201
- check_jsonschema(run, data_contract, server)
202
-
203
- check_soda_execute(run, data_contract, server, self._spark, tmp_dir)
149
+ execute_data_contract_test(data_contract, run, self._server, self._spark)
204
150
 
205
151
  except DataContractException as e:
206
152
  run.checks.append(
@@ -219,7 +165,7 @@ class DataContract:
219
165
  run.checks.append(
220
166
  Check(
221
167
  type="general",
222
- result="error",
168
+ result=ResultEnum.error,
223
169
  name="Test Data Contract",
224
170
  reason=str(e),
225
171
  engine="datacontract",
@@ -235,38 +181,6 @@ class DataContract:
235
181
 
236
182
  return run
237
183
 
238
- def _get_examples_server(self, data_contract, run, tmp_dir):
239
- run.log_info(f"Copying examples to files in temporary directory {tmp_dir}")
240
- format = "json"
241
- for example in data_contract.examples:
242
- format = example.type
243
- p = f"{tmp_dir}/{example.model}.{format}"
244
- run.log_info(f"Creating example file {p}")
245
- with open(p, "w") as f:
246
- content = ""
247
- if format == "json" and isinstance(example.data, list):
248
- content = json.dumps(example.data)
249
- elif format == "json" and isinstance(example.data, str):
250
- content = example.data
251
- elif format == "yaml" and isinstance(example.data, list):
252
- content = yaml.dump(example.data, allow_unicode=True)
253
- elif format == "yaml" and isinstance(example.data, str):
254
- content = example.data
255
- elif format == "csv":
256
- content = example.data
257
- logging.debug(f"Content of example file {p}: {content}")
258
- f.write(content)
259
- path = f"{tmp_dir}" + "/{model}." + format
260
- delimiter = "array"
261
- server = Server(
262
- type="local",
263
- path=path,
264
- format=format,
265
- delimiter=delimiter,
266
- )
267
- run.log_info(f"Using {server} for testing the examples")
268
- return server
269
-
270
184
  def breaking(self, other: "DataContract") -> BreakingChanges:
271
185
  return self.changelog(other, include_severities=[Severity.ERROR, Severity.WARNING])
272
186