datacontract-cli 0.10.21__py3-none-any.whl → 0.10.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datacontract-cli might be problematic. Click here for more details.

Files changed (29) hide show
  1. datacontract/breaking/breaking.py +1 -1
  2. datacontract/breaking/breaking_rules.py +1 -1
  3. datacontract/cli.py +5 -5
  4. datacontract/data_contract.py +14 -100
  5. datacontract/engines/data_contract_checks.py +735 -0
  6. datacontract/engines/data_contract_test.py +51 -0
  7. datacontract/engines/soda/check_soda_execute.py +36 -30
  8. datacontract/engines/soda/connections/kafka.py +8 -3
  9. datacontract/export/avro_converter.py +2 -0
  10. datacontract/export/exporter.py +0 -2
  11. datacontract/export/exporter_factory.py +0 -12
  12. datacontract/export/sodacl_converter.py +22 -294
  13. datacontract/export/sql_type_converter.py +7 -2
  14. datacontract/imports/odcs_importer.py +6 -3
  15. datacontract/imports/odcs_v3_importer.py +2 -0
  16. datacontract/imports/sql_importer.py +229 -29
  17. datacontract/model/exceptions.py +4 -1
  18. datacontract/model/run.py +11 -4
  19. {datacontract_cli-0.10.21.dist-info → datacontract_cli-0.10.22.dist-info}/METADATA +139 -166
  20. {datacontract_cli-0.10.21.dist-info → datacontract_cli-0.10.22.dist-info}/RECORD +25 -27
  21. datacontract/engines/soda/connections/dask.py +0 -28
  22. datacontract/export/odcs_v2_exporter.py +0 -124
  23. datacontract/imports/odcs_v2_importer.py +0 -177
  24. datacontract/lint/linters/example_model_linter.py +0 -91
  25. /datacontract/{model → breaking}/breaking_change.py +0 -0
  26. {datacontract_cli-0.10.21.dist-info → datacontract_cli-0.10.22.dist-info}/LICENSE +0 -0
  27. {datacontract_cli-0.10.21.dist-info → datacontract_cli-0.10.22.dist-info}/WHEEL +0 -0
  28. {datacontract_cli-0.10.21.dist-info → datacontract_cli-0.10.22.dist-info}/entry_points.txt +0 -0
  29. {datacontract_cli-0.10.21.dist-info → datacontract_cli-0.10.22.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,5 @@
1
+ from datacontract.breaking.breaking_change import BreakingChange, Location, Severity
1
2
  from datacontract.breaking.breaking_rules import BreakingRules
2
- from datacontract.model.breaking_change import BreakingChange, Location, Severity
3
3
  from datacontract.model.data_contract_specification import Contact, DeprecatedQuality, Field, Info, Model, Terms
4
4
 
5
5
 
@@ -1,4 +1,4 @@
1
- from datacontract.model.breaking_change import Severity
1
+ from datacontract.breaking.breaking_change import Severity
2
2
 
3
3
 
4
4
  class BreakingRules:
datacontract/cli.py CHANGED
@@ -119,10 +119,6 @@ def test(
119
119
  "servers (default)."
120
120
  ),
121
121
  ] = "all",
122
- examples: Annotated[
123
- bool,
124
- typer.Option(help="Run the schema and quality tests on the example data within the data contract."),
125
- ] = None,
126
122
  publish: Annotated[str, typer.Option(help="The url to publish the results after the test")] = None,
127
123
  logs: Annotated[bool, typer.Option(help="Print logs")] = False,
128
124
  ssl_verification: Annotated[
@@ -141,7 +137,6 @@ def test(
141
137
  schema_location=schema,
142
138
  publish_url=publish,
143
139
  server=server,
144
- examples=examples,
145
140
  ssl_verification=ssl_verification,
146
141
  ).test()
147
142
  if logs:
@@ -237,6 +232,10 @@ def import_(
237
232
  Optional[str],
238
233
  typer.Option(help="The path to the file or Glue Database that should be imported."),
239
234
  ] = None,
235
+ dialect: Annotated[
236
+ Optional[str],
237
+ typer.Option(help="The SQL dialect to use when importing SQL files, e.g., postgres, tsql, bigquery."),
238
+ ] = None,
240
239
  glue_table: Annotated[
241
240
  Optional[List[str]],
242
241
  typer.Option(
@@ -293,6 +292,7 @@ def import_(
293
292
  source=source,
294
293
  template=template,
295
294
  schema=schema,
295
+ dialect=dialect,
296
296
  glue_table=glue_table,
297
297
  bigquery_table=bigquery_table,
298
298
  bigquery_project=bigquery_project,
@@ -1,10 +1,6 @@
1
- import json
2
1
  import logging
3
- import tempfile
4
2
  import typing
5
3
 
6
- import yaml
7
-
8
4
  if typing.TYPE_CHECKING:
9
5
  from pyspark.sql import SparkSession
10
6
 
@@ -14,11 +10,8 @@ from datacontract.breaking.breaking import (
14
10
  quality_breaking_changes,
15
11
  terms_breaking_changes,
16
12
  )
17
- from datacontract.engines.datacontract.check_that_datacontract_contains_valid_servers_configuration import (
18
- check_that_datacontract_contains_valid_server_configuration,
19
- )
20
- from datacontract.engines.fastjsonschema.check_jsonschema import check_jsonschema
21
- from datacontract.engines.soda.check_soda_execute import check_soda_execute
13
+ from datacontract.breaking.breaking_change import BreakingChange, BreakingChanges, Severity
14
+ from datacontract.engines.data_contract_test import execute_data_contract_test
22
15
  from datacontract.export.exporter import ExportFormat
23
16
  from datacontract.export.exporter_factory import exporter_factory
24
17
  from datacontract.imports.importer_factory import importer_factory
@@ -26,16 +19,14 @@ from datacontract.init.init_template import get_init_template
26
19
  from datacontract.integration.datamesh_manager import publish_test_results_to_datamesh_manager
27
20
  from datacontract.lint import resolve
28
21
  from datacontract.lint.linters.description_linter import DescriptionLinter
29
- from datacontract.lint.linters.example_model_linter import ExampleModelLinter
30
22
  from datacontract.lint.linters.field_pattern_linter import FieldPatternLinter
31
23
  from datacontract.lint.linters.field_reference_linter import FieldReferenceLinter
32
24
  from datacontract.lint.linters.notice_period_linter import NoticePeriodLinter
33
25
  from datacontract.lint.linters.quality_schema_linter import QualityUsesSchemaLinter
34
26
  from datacontract.lint.linters.valid_constraints_linter import ValidFieldConstraintsLinter
35
- from datacontract.model.breaking_change import BreakingChange, BreakingChanges, Severity
36
- from datacontract.model.data_contract_specification import DataContractSpecification, Server
27
+ from datacontract.model.data_contract_specification import DataContractSpecification
37
28
  from datacontract.model.exceptions import DataContractException
38
- from datacontract.model.run import Check, Run
29
+ from datacontract.model.run import Check, ResultEnum, Run
39
30
 
40
31
 
41
32
  class DataContract:
@@ -46,7 +37,6 @@ class DataContract:
46
37
  data_contract: DataContractSpecification = None,
47
38
  schema_location: str = None,
48
39
  server: str = None,
49
- examples: bool = False,
50
40
  publish_url: str = None,
51
41
  spark: "SparkSession" = None,
52
42
  inline_definitions: bool = True,
@@ -58,14 +48,12 @@ class DataContract:
58
48
  self._data_contract = data_contract
59
49
  self._schema_location = schema_location
60
50
  self._server = server
61
- self._examples = examples
62
51
  self._publish_url = publish_url
63
52
  self._spark = spark
64
53
  self._inline_definitions = inline_definitions
65
54
  self._inline_quality = inline_quality
66
55
  self._ssl_verification = ssl_verification
67
56
  self.all_linters = {
68
- ExampleModelLinter(),
69
57
  QualityUsesSchemaLinter(),
70
58
  FieldPatternLinter(),
71
59
  FieldReferenceLinter(),
@@ -96,7 +84,12 @@ class DataContract:
96
84
  inline_quality=self._inline_quality,
97
85
  )
98
86
  run.checks.append(
99
- Check(type="lint", result="passed", name="Data contract is syntactically valid", engine="datacontract")
87
+ Check(
88
+ type="lint",
89
+ result=ResultEnum.passed,
90
+ name="Data contract is syntactically valid",
91
+ engine="datacontract",
92
+ )
100
93
  )
101
94
  if enabled_linters == "none":
102
95
  linters_to_check = set()
@@ -113,7 +106,7 @@ class DataContract:
113
106
  run.checks.append(
114
107
  Check(
115
108
  type="general",
116
- result="error",
109
+ result=ResultEnum.error,
117
110
  name=f"Linter '{linter.name}'",
118
111
  reason=str(e),
119
112
  engine="datacontract",
@@ -130,7 +123,7 @@ class DataContract:
130
123
  run.checks.append(
131
124
  Check(
132
125
  type="general",
133
- result="error",
126
+ result=ResultEnum.error,
134
127
  name="Check Data Contract",
135
128
  reason=str(e),
136
129
  engine="datacontract",
@@ -153,54 +146,7 @@ class DataContract:
153
146
  inline_quality=self._inline_quality,
154
147
  )
155
148
 
156
- if data_contract.models is None or len(data_contract.models) == 0:
157
- raise DataContractException(
158
- type="lint",
159
- name="Check that data contract contains models",
160
- result="warning",
161
- reason="Models block is missing. Skip executing tests.",
162
- engine="datacontract",
163
- )
164
-
165
- if self._examples:
166
- if data_contract.examples is None or len(data_contract.examples) == 0:
167
- raise DataContractException(
168
- type="lint",
169
- name="Check that data contract contains valid examples",
170
- result="warning",
171
- reason="Examples block is missing. Skip executing tests.",
172
- engine="datacontract",
173
- )
174
- else:
175
- check_that_datacontract_contains_valid_server_configuration(run, data_contract, self._server)
176
-
177
- # TODO create directory only for examples
178
- with tempfile.TemporaryDirectory(prefix="datacontract-cli") as tmp_dir:
179
- if self._examples:
180
- server_name = "examples"
181
- server = self._get_examples_server(data_contract, run, tmp_dir)
182
- elif self._server:
183
- server_name = self._server
184
- server = data_contract.servers.get(server_name)
185
- else:
186
- server_name = list(data_contract.servers.keys())[0]
187
- server = data_contract.servers.get(server_name)
188
-
189
- run.log_info(f"Running tests for data contract {data_contract.id} with server {server_name}")
190
- run.dataContractId = data_contract.id
191
- run.dataContractVersion = data_contract.info.version
192
- run.dataProductId = server.dataProductId
193
- run.outputPortId = server.outputPortId
194
- run.server = server_name
195
-
196
- # TODO check server is supported type for nicer error messages
197
-
198
- # TODO check server credentials are complete for nicer error messages
199
-
200
- if server.format == "json" and server.type != "kafka":
201
- check_jsonschema(run, data_contract, server)
202
-
203
- check_soda_execute(run, data_contract, server, self._spark, tmp_dir)
149
+ execute_data_contract_test(data_contract, run, self._server, self._spark)
204
150
 
205
151
  except DataContractException as e:
206
152
  run.checks.append(
@@ -219,7 +165,7 @@ class DataContract:
219
165
  run.checks.append(
220
166
  Check(
221
167
  type="general",
222
- result="error",
168
+ result=ResultEnum.error,
223
169
  name="Test Data Contract",
224
170
  reason=str(e),
225
171
  engine="datacontract",
@@ -235,38 +181,6 @@ class DataContract:
235
181
 
236
182
  return run
237
183
 
238
- def _get_examples_server(self, data_contract, run, tmp_dir):
239
- run.log_info(f"Copying examples to files in temporary directory {tmp_dir}")
240
- format = "json"
241
- for example in data_contract.examples:
242
- format = example.type
243
- p = f"{tmp_dir}/{example.model}.{format}"
244
- run.log_info(f"Creating example file {p}")
245
- with open(p, "w") as f:
246
- content = ""
247
- if format == "json" and isinstance(example.data, list):
248
- content = json.dumps(example.data)
249
- elif format == "json" and isinstance(example.data, str):
250
- content = example.data
251
- elif format == "yaml" and isinstance(example.data, list):
252
- content = yaml.dump(example.data, allow_unicode=True)
253
- elif format == "yaml" and isinstance(example.data, str):
254
- content = example.data
255
- elif format == "csv":
256
- content = example.data
257
- logging.debug(f"Content of example file {p}: {content}")
258
- f.write(content)
259
- path = f"{tmp_dir}" + "/{model}." + format
260
- delimiter = "array"
261
- server = Server(
262
- type="local",
263
- path=path,
264
- format=format,
265
- delimiter=delimiter,
266
- )
267
- run.log_info(f"Using {server} for testing the examples")
268
- return server
269
-
270
184
  def breaking(self, other: "DataContract") -> BreakingChanges:
271
185
  return self.changelog(other, include_severities=[Severity.ERROR, Severity.WARNING])
272
186