datacontract-cli 0.10.23__py3-none-any.whl → 0.10.37__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datacontract/__init__.py +13 -0
- datacontract/api.py +12 -5
- datacontract/catalog/catalog.py +5 -3
- datacontract/cli.py +116 -10
- datacontract/data_contract.py +143 -65
- datacontract/engines/data_contract_checks.py +366 -60
- datacontract/engines/data_contract_test.py +50 -4
- datacontract/engines/fastjsonschema/check_jsonschema.py +37 -19
- datacontract/engines/fastjsonschema/s3/s3_read_files.py +3 -2
- datacontract/engines/soda/check_soda_execute.py +22 -3
- datacontract/engines/soda/connections/athena.py +79 -0
- datacontract/engines/soda/connections/duckdb_connection.py +65 -6
- datacontract/engines/soda/connections/kafka.py +4 -2
- datacontract/export/avro_converter.py +20 -3
- datacontract/export/bigquery_converter.py +1 -1
- datacontract/export/dbt_converter.py +36 -7
- datacontract/export/dqx_converter.py +126 -0
- datacontract/export/duckdb_type_converter.py +57 -0
- datacontract/export/excel_exporter.py +923 -0
- datacontract/export/exporter.py +3 -0
- datacontract/export/exporter_factory.py +17 -1
- datacontract/export/great_expectations_converter.py +55 -5
- datacontract/export/{html_export.py → html_exporter.py} +31 -20
- datacontract/export/markdown_converter.py +134 -5
- datacontract/export/mermaid_exporter.py +110 -0
- datacontract/export/odcs_v3_exporter.py +187 -145
- datacontract/export/protobuf_converter.py +163 -69
- datacontract/export/rdf_converter.py +2 -2
- datacontract/export/sodacl_converter.py +9 -1
- datacontract/export/spark_converter.py +31 -4
- datacontract/export/sql_converter.py +6 -2
- datacontract/export/sql_type_converter.py +20 -8
- datacontract/imports/avro_importer.py +63 -12
- datacontract/imports/csv_importer.py +111 -57
- datacontract/imports/excel_importer.py +1111 -0
- datacontract/imports/importer.py +16 -3
- datacontract/imports/importer_factory.py +17 -0
- datacontract/imports/json_importer.py +325 -0
- datacontract/imports/odcs_importer.py +2 -2
- datacontract/imports/odcs_v3_importer.py +351 -151
- datacontract/imports/protobuf_importer.py +264 -0
- datacontract/imports/spark_importer.py +117 -13
- datacontract/imports/sql_importer.py +32 -16
- datacontract/imports/unity_importer.py +84 -38
- datacontract/init/init_template.py +1 -1
- datacontract/integration/datamesh_manager.py +16 -2
- datacontract/lint/resolve.py +112 -23
- datacontract/lint/schema.py +24 -15
- datacontract/model/data_contract_specification/__init__.py +1 -0
- datacontract/model/odcs.py +13 -0
- datacontract/model/run.py +3 -0
- datacontract/output/junit_test_results.py +3 -3
- datacontract/schemas/datacontract-1.1.0.init.yaml +1 -1
- datacontract/schemas/datacontract-1.2.0.init.yaml +91 -0
- datacontract/schemas/datacontract-1.2.0.schema.json +2029 -0
- datacontract/schemas/datacontract-1.2.1.init.yaml +91 -0
- datacontract/schemas/datacontract-1.2.1.schema.json +2058 -0
- datacontract/schemas/odcs-3.0.2.schema.json +2382 -0
- datacontract/templates/datacontract.html +54 -3
- datacontract/templates/datacontract_odcs.html +685 -0
- datacontract/templates/index.html +5 -2
- datacontract/templates/partials/server.html +2 -0
- datacontract/templates/style/output.css +319 -145
- {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.37.dist-info}/METADATA +656 -431
- datacontract_cli-0.10.37.dist-info/RECORD +119 -0
- {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.37.dist-info}/WHEEL +1 -1
- {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.37.dist-info/licenses}/LICENSE +1 -1
- datacontract/export/csv_type_converter.py +0 -36
- datacontract/lint/lint.py +0 -142
- datacontract/lint/linters/description_linter.py +0 -35
- datacontract/lint/linters/field_pattern_linter.py +0 -34
- datacontract/lint/linters/field_reference_linter.py +0 -48
- datacontract/lint/linters/notice_period_linter.py +0 -55
- datacontract/lint/linters/quality_schema_linter.py +0 -52
- datacontract/lint/linters/valid_constraints_linter.py +0 -100
- datacontract/model/data_contract_specification.py +0 -327
- datacontract_cli-0.10.23.dist-info/RECORD +0 -113
- /datacontract/{lint/linters → output}/__init__.py +0 -0
- {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.37.dist-info}/entry_points.txt +0 -0
- {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.37.dist-info}/top_level.txt +0 -0
datacontract/data_contract.py
CHANGED
|
@@ -1,9 +1,17 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import typing
|
|
3
3
|
|
|
4
|
+
from open_data_contract_standard.model import CustomProperty, OpenDataContractStandard
|
|
5
|
+
|
|
6
|
+
from datacontract.export.odcs_v3_exporter import to_odcs_v3
|
|
7
|
+
from datacontract.imports.importer import ImportFormat, Spec
|
|
8
|
+
from datacontract.imports.odcs_v3_importer import import_from_odcs
|
|
9
|
+
|
|
4
10
|
if typing.TYPE_CHECKING:
|
|
5
11
|
from pyspark.sql import SparkSession
|
|
6
12
|
|
|
13
|
+
from duckdb.duckdb import DuckDBPyConnection
|
|
14
|
+
|
|
7
15
|
from datacontract.breaking.breaking import (
|
|
8
16
|
info_breaking_changes,
|
|
9
17
|
models_breaking_changes,
|
|
@@ -18,13 +26,7 @@ from datacontract.imports.importer_factory import importer_factory
|
|
|
18
26
|
from datacontract.init.init_template import get_init_template
|
|
19
27
|
from datacontract.integration.datamesh_manager import publish_test_results_to_datamesh_manager
|
|
20
28
|
from datacontract.lint import resolve
|
|
21
|
-
from datacontract.
|
|
22
|
-
from datacontract.lint.linters.field_pattern_linter import FieldPatternLinter
|
|
23
|
-
from datacontract.lint.linters.field_reference_linter import FieldReferenceLinter
|
|
24
|
-
from datacontract.lint.linters.notice_period_linter import NoticePeriodLinter
|
|
25
|
-
from datacontract.lint.linters.quality_schema_linter import QualityUsesSchemaLinter
|
|
26
|
-
from datacontract.lint.linters.valid_constraints_linter import ValidFieldConstraintsLinter
|
|
27
|
-
from datacontract.model.data_contract_specification import DataContractSpecification
|
|
29
|
+
from datacontract.model.data_contract_specification import DataContractSpecification, Info
|
|
28
30
|
from datacontract.model.exceptions import DataContractException
|
|
29
31
|
from datacontract.model.run import Check, ResultEnum, Run
|
|
30
32
|
|
|
@@ -39,9 +41,11 @@ class DataContract:
|
|
|
39
41
|
server: str = None,
|
|
40
42
|
publish_url: str = None,
|
|
41
43
|
spark: "SparkSession" = None,
|
|
44
|
+
duckdb_connection: DuckDBPyConnection = None,
|
|
42
45
|
inline_definitions: bool = True,
|
|
43
46
|
inline_quality: bool = True,
|
|
44
47
|
ssl_verification: bool = True,
|
|
48
|
+
publish_test_results: bool = False,
|
|
45
49
|
):
|
|
46
50
|
self._data_contract_file = data_contract_file
|
|
47
51
|
self._data_contract_str = data_contract_str
|
|
@@ -49,29 +53,20 @@ class DataContract:
|
|
|
49
53
|
self._schema_location = schema_location
|
|
50
54
|
self._server = server
|
|
51
55
|
self._publish_url = publish_url
|
|
56
|
+
self._publish_test_results = publish_test_results
|
|
52
57
|
self._spark = spark
|
|
58
|
+
self._duckdb_connection = duckdb_connection
|
|
53
59
|
self._inline_definitions = inline_definitions
|
|
54
60
|
self._inline_quality = inline_quality
|
|
55
61
|
self._ssl_verification = ssl_verification
|
|
56
|
-
self.all_linters = {
|
|
57
|
-
QualityUsesSchemaLinter(),
|
|
58
|
-
FieldPatternLinter(),
|
|
59
|
-
FieldReferenceLinter(),
|
|
60
|
-
NoticePeriodLinter(),
|
|
61
|
-
ValidFieldConstraintsLinter(),
|
|
62
|
-
DescriptionLinter(),
|
|
63
|
-
}
|
|
64
62
|
|
|
65
63
|
@classmethod
|
|
66
64
|
def init(cls, template: typing.Optional[str], schema: typing.Optional[str] = None) -> DataContractSpecification:
|
|
67
65
|
template_str = get_init_template(template)
|
|
68
66
|
return resolve.resolve_data_contract(data_contract_str=template_str, schema_location=schema)
|
|
69
67
|
|
|
70
|
-
def lint(self
|
|
71
|
-
"""Lint the data contract by
|
|
72
|
-
|
|
73
|
-
enabled_linters can be either "all" or "none", or a set of linter IDs. The "schema" linter is always enabled, even with enabled_linters="none".
|
|
74
|
-
"""
|
|
68
|
+
def lint(self) -> Run:
|
|
69
|
+
"""Lint the data contract by validating it against the JSON schema."""
|
|
75
70
|
run = Run.create_run()
|
|
76
71
|
try:
|
|
77
72
|
run.log_info("Linting data contract")
|
|
@@ -91,27 +86,6 @@ class DataContract:
|
|
|
91
86
|
engine="datacontract",
|
|
92
87
|
)
|
|
93
88
|
)
|
|
94
|
-
if enabled_linters == "none":
|
|
95
|
-
linters_to_check = set()
|
|
96
|
-
elif enabled_linters == "all":
|
|
97
|
-
linters_to_check = self.all_linters
|
|
98
|
-
elif isinstance(enabled_linters, set):
|
|
99
|
-
linters_to_check = {linter for linter in self.all_linters if linter.id in enabled_linters}
|
|
100
|
-
else:
|
|
101
|
-
raise RuntimeError(f"Unknown argument enabled_linters={enabled_linters} for lint()")
|
|
102
|
-
for linter in linters_to_check:
|
|
103
|
-
try:
|
|
104
|
-
run.checks.extend(linter.lint(data_contract))
|
|
105
|
-
except Exception as e:
|
|
106
|
-
run.checks.append(
|
|
107
|
-
Check(
|
|
108
|
-
type="general",
|
|
109
|
-
result=ResultEnum.error,
|
|
110
|
-
name=f"Linter '{linter.name}'",
|
|
111
|
-
reason=str(e),
|
|
112
|
-
engine="datacontract",
|
|
113
|
-
)
|
|
114
|
-
)
|
|
115
89
|
run.dataContractId = data_contract.id
|
|
116
90
|
run.dataContractVersion = data_contract.info.version
|
|
117
91
|
except DataContractException as e:
|
|
@@ -146,7 +120,7 @@ class DataContract:
|
|
|
146
120
|
inline_quality=self._inline_quality,
|
|
147
121
|
)
|
|
148
122
|
|
|
149
|
-
execute_data_contract_test(data_contract, run, self._server, self._spark)
|
|
123
|
+
execute_data_contract_test(data_contract, run, self._server, self._spark, self._duckdb_connection)
|
|
150
124
|
|
|
151
125
|
except DataContractException as e:
|
|
152
126
|
run.checks.append(
|
|
@@ -176,7 +150,7 @@ class DataContract:
|
|
|
176
150
|
|
|
177
151
|
run.finish()
|
|
178
152
|
|
|
179
|
-
if self._publish_url is not None:
|
|
153
|
+
if self._publish_url is not None or self._publish_test_results:
|
|
180
154
|
publish_test_results_to_datamesh_manager(run, self._publish_url, self._ssl_verification)
|
|
181
155
|
|
|
182
156
|
return run
|
|
@@ -240,34 +214,138 @@ class DataContract:
|
|
|
240
214
|
inline_quality=self._inline_quality,
|
|
241
215
|
)
|
|
242
216
|
|
|
243
|
-
def export(
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
217
|
+
def export(
|
|
218
|
+
self, export_format: ExportFormat, model: str = "all", sql_server_type: str = "auto", **kwargs
|
|
219
|
+
) -> str | bytes:
|
|
220
|
+
if (
|
|
221
|
+
export_format == ExportFormat.html
|
|
222
|
+
or export_format == ExportFormat.mermaid
|
|
223
|
+
or export_format == ExportFormat.excel
|
|
224
|
+
):
|
|
225
|
+
data_contract = resolve.resolve_data_contract_v2(
|
|
226
|
+
self._data_contract_file,
|
|
227
|
+
self._data_contract_str,
|
|
228
|
+
self._data_contract,
|
|
229
|
+
schema_location=self._schema_location,
|
|
230
|
+
inline_definitions=self._inline_definitions,
|
|
231
|
+
inline_quality=self._inline_quality,
|
|
232
|
+
)
|
|
252
233
|
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
234
|
+
return exporter_factory.create(export_format).export(
|
|
235
|
+
data_contract=data_contract,
|
|
236
|
+
model=model,
|
|
237
|
+
server=self._server,
|
|
238
|
+
sql_server_type=sql_server_type,
|
|
239
|
+
export_args=kwargs,
|
|
240
|
+
)
|
|
241
|
+
else:
|
|
242
|
+
data_contract = resolve.resolve_data_contract(
|
|
243
|
+
self._data_contract_file,
|
|
244
|
+
self._data_contract_str,
|
|
245
|
+
self._data_contract,
|
|
246
|
+
schema_location=self._schema_location,
|
|
247
|
+
inline_definitions=self._inline_definitions,
|
|
248
|
+
inline_quality=self._inline_quality,
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
return exporter_factory.create(export_format).export(
|
|
252
|
+
data_contract=data_contract,
|
|
253
|
+
model=model,
|
|
254
|
+
server=self._server,
|
|
255
|
+
sql_server_type=sql_server_type,
|
|
256
|
+
export_args=kwargs,
|
|
257
|
+
)
|
|
260
258
|
|
|
259
|
+
@classmethod
|
|
261
260
|
def import_from_source(
|
|
262
|
-
|
|
261
|
+
cls,
|
|
263
262
|
format: str,
|
|
264
263
|
source: typing.Optional[str] = None,
|
|
265
264
|
template: typing.Optional[str] = None,
|
|
266
265
|
schema: typing.Optional[str] = None,
|
|
266
|
+
spec: Spec = Spec.datacontract_specification,
|
|
267
267
|
**kwargs,
|
|
268
|
-
) -> DataContractSpecification:
|
|
269
|
-
|
|
268
|
+
) -> DataContractSpecification | OpenDataContractStandard:
|
|
269
|
+
id = kwargs.get("id")
|
|
270
|
+
owner = kwargs.get("owner")
|
|
270
271
|
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
272
|
+
if spec == Spec.odcs or format == ImportFormat.excel:
|
|
273
|
+
data_contract_specification_initial = cls.init(template=template, schema=schema)
|
|
274
|
+
|
|
275
|
+
odcs_imported = importer_factory.create(format).import_source(
|
|
276
|
+
data_contract_specification=data_contract_specification_initial, source=source, import_args=kwargs
|
|
277
|
+
)
|
|
278
|
+
|
|
279
|
+
if isinstance(odcs_imported, DataContractSpecification):
|
|
280
|
+
# convert automatically
|
|
281
|
+
odcs_imported = to_odcs_v3(odcs_imported)
|
|
282
|
+
|
|
283
|
+
cls._overwrite_id_in_odcs(odcs_imported, id)
|
|
284
|
+
cls._overwrite_owner_in_odcs(odcs_imported, owner)
|
|
285
|
+
|
|
286
|
+
return odcs_imported
|
|
287
|
+
elif spec == Spec.datacontract_specification:
|
|
288
|
+
data_contract_specification_initial = cls.init(template=template, schema=schema)
|
|
289
|
+
|
|
290
|
+
data_contract_specification_imported = importer_factory.create(format).import_source(
|
|
291
|
+
data_contract_specification=data_contract_specification_initial, source=source, import_args=kwargs
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
if isinstance(data_contract_specification_imported, OpenDataContractStandard):
|
|
295
|
+
# convert automatically
|
|
296
|
+
data_contract_specification_imported = import_from_odcs(
|
|
297
|
+
data_contract_specification_initial, data_contract_specification_imported
|
|
298
|
+
)
|
|
299
|
+
|
|
300
|
+
cls._overwrite_id_in_data_contract_specification(data_contract_specification_imported, id)
|
|
301
|
+
cls._overwrite_owner_in_data_contract_specification(data_contract_specification_imported, owner)
|
|
302
|
+
|
|
303
|
+
return data_contract_specification_imported
|
|
304
|
+
else:
|
|
305
|
+
raise DataContractException(
|
|
306
|
+
type="general",
|
|
307
|
+
result=ResultEnum.error,
|
|
308
|
+
name="Import Data Contract",
|
|
309
|
+
reason=f"Unsupported data contract format: {spec}",
|
|
310
|
+
engine="datacontract",
|
|
311
|
+
)
|
|
312
|
+
|
|
313
|
+
@staticmethod
|
|
314
|
+
def _overwrite_id_in_data_contract_specification(
|
|
315
|
+
data_contract_specification: DataContractSpecification, id: str | None
|
|
316
|
+
):
|
|
317
|
+
if not id:
|
|
318
|
+
return
|
|
319
|
+
|
|
320
|
+
data_contract_specification.id = id
|
|
321
|
+
|
|
322
|
+
@staticmethod
|
|
323
|
+
def _overwrite_owner_in_data_contract_specification(
|
|
324
|
+
data_contract_specification: DataContractSpecification, owner: str | None
|
|
325
|
+
):
|
|
326
|
+
if not owner:
|
|
327
|
+
return
|
|
328
|
+
|
|
329
|
+
if data_contract_specification.info is None:
|
|
330
|
+
data_contract_specification.info = Info()
|
|
331
|
+
data_contract_specification.info.owner = owner
|
|
332
|
+
|
|
333
|
+
@staticmethod
|
|
334
|
+
def _overwrite_owner_in_odcs(odcs: OpenDataContractStandard, owner: str | None):
|
|
335
|
+
if not owner:
|
|
336
|
+
return
|
|
337
|
+
|
|
338
|
+
if odcs.customProperties is None:
|
|
339
|
+
odcs.customProperties = []
|
|
340
|
+
for customProperty in odcs.customProperties:
|
|
341
|
+
if customProperty.name == "owner":
|
|
342
|
+
customProperty.value = owner
|
|
343
|
+
return
|
|
344
|
+
odcs.customProperties.append(CustomProperty(property="owner", value=owner))
|
|
345
|
+
|
|
346
|
+
@staticmethod
|
|
347
|
+
def _overwrite_id_in_odcs(odcs: OpenDataContractStandard, id: str | None):
|
|
348
|
+
if not id:
|
|
349
|
+
return
|
|
350
|
+
|
|
351
|
+
odcs.id = id
|