datacontract-cli 0.10.0__py3-none-any.whl → 0.10.37__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datacontract/__init__.py +13 -0
- datacontract/api.py +260 -0
- datacontract/breaking/breaking.py +242 -12
- datacontract/breaking/breaking_rules.py +37 -1
- datacontract/catalog/catalog.py +80 -0
- datacontract/cli.py +387 -117
- datacontract/data_contract.py +216 -353
- datacontract/engines/data_contract_checks.py +1041 -0
- datacontract/engines/data_contract_test.py +113 -0
- datacontract/engines/datacontract/check_that_datacontract_contains_valid_servers_configuration.py +2 -3
- datacontract/engines/datacontract/check_that_datacontract_file_exists.py +1 -1
- datacontract/engines/fastjsonschema/check_jsonschema.py +176 -42
- datacontract/engines/fastjsonschema/s3/s3_read_files.py +16 -1
- datacontract/engines/soda/check_soda_execute.py +100 -56
- datacontract/engines/soda/connections/athena.py +79 -0
- datacontract/engines/soda/connections/bigquery.py +8 -1
- datacontract/engines/soda/connections/databricks.py +12 -3
- datacontract/engines/soda/connections/duckdb_connection.py +241 -0
- datacontract/engines/soda/connections/kafka.py +206 -113
- datacontract/engines/soda/connections/snowflake.py +8 -5
- datacontract/engines/soda/connections/sqlserver.py +43 -0
- datacontract/engines/soda/connections/trino.py +26 -0
- datacontract/export/avro_converter.py +72 -8
- datacontract/export/avro_idl_converter.py +31 -25
- datacontract/export/bigquery_converter.py +130 -0
- datacontract/export/custom_converter.py +40 -0
- datacontract/export/data_caterer_converter.py +161 -0
- datacontract/export/dbml_converter.py +148 -0
- datacontract/export/dbt_converter.py +141 -54
- datacontract/export/dcs_exporter.py +6 -0
- datacontract/export/dqx_converter.py +126 -0
- datacontract/export/duckdb_type_converter.py +57 -0
- datacontract/export/excel_exporter.py +923 -0
- datacontract/export/exporter.py +100 -0
- datacontract/export/exporter_factory.py +216 -0
- datacontract/export/go_converter.py +105 -0
- datacontract/export/great_expectations_converter.py +257 -36
- datacontract/export/html_exporter.py +86 -0
- datacontract/export/iceberg_converter.py +188 -0
- datacontract/export/jsonschema_converter.py +71 -16
- datacontract/export/markdown_converter.py +337 -0
- datacontract/export/mermaid_exporter.py +110 -0
- datacontract/export/odcs_v3_exporter.py +375 -0
- datacontract/export/pandas_type_converter.py +40 -0
- datacontract/export/protobuf_converter.py +168 -68
- datacontract/export/pydantic_converter.py +6 -0
- datacontract/export/rdf_converter.py +13 -6
- datacontract/export/sodacl_converter.py +36 -188
- datacontract/export/spark_converter.py +245 -0
- datacontract/export/sql_converter.py +37 -3
- datacontract/export/sql_type_converter.py +269 -8
- datacontract/export/sqlalchemy_converter.py +170 -0
- datacontract/export/terraform_converter.py +7 -2
- datacontract/imports/avro_importer.py +246 -26
- datacontract/imports/bigquery_importer.py +221 -0
- datacontract/imports/csv_importer.py +143 -0
- datacontract/imports/dbml_importer.py +112 -0
- datacontract/imports/dbt_importer.py +240 -0
- datacontract/imports/excel_importer.py +1111 -0
- datacontract/imports/glue_importer.py +288 -0
- datacontract/imports/iceberg_importer.py +172 -0
- datacontract/imports/importer.py +51 -0
- datacontract/imports/importer_factory.py +128 -0
- datacontract/imports/json_importer.py +325 -0
- datacontract/imports/jsonschema_importer.py +146 -0
- datacontract/imports/odcs_importer.py +60 -0
- datacontract/imports/odcs_v3_importer.py +516 -0
- datacontract/imports/parquet_importer.py +81 -0
- datacontract/imports/protobuf_importer.py +264 -0
- datacontract/imports/spark_importer.py +262 -0
- datacontract/imports/sql_importer.py +274 -35
- datacontract/imports/unity_importer.py +219 -0
- datacontract/init/init_template.py +20 -0
- datacontract/integration/datamesh_manager.py +86 -0
- datacontract/lint/resolve.py +271 -49
- datacontract/lint/resources.py +21 -0
- datacontract/lint/schema.py +53 -17
- datacontract/lint/urls.py +32 -12
- datacontract/model/data_contract_specification/__init__.py +1 -0
- datacontract/model/exceptions.py +4 -1
- datacontract/model/odcs.py +24 -0
- datacontract/model/run.py +49 -29
- datacontract/output/__init__.py +0 -0
- datacontract/output/junit_test_results.py +135 -0
- datacontract/output/output_format.py +10 -0
- datacontract/output/test_results_writer.py +79 -0
- datacontract/py.typed +0 -0
- datacontract/schemas/datacontract-1.1.0.init.yaml +91 -0
- datacontract/schemas/datacontract-1.1.0.schema.json +1975 -0
- datacontract/schemas/datacontract-1.2.0.init.yaml +91 -0
- datacontract/schemas/datacontract-1.2.0.schema.json +2029 -0
- datacontract/schemas/datacontract-1.2.1.init.yaml +91 -0
- datacontract/schemas/datacontract-1.2.1.schema.json +2058 -0
- datacontract/schemas/odcs-3.0.1.schema.json +2634 -0
- datacontract/schemas/odcs-3.0.2.schema.json +2382 -0
- datacontract/templates/datacontract.html +139 -294
- datacontract/templates/datacontract_odcs.html +685 -0
- datacontract/templates/index.html +236 -0
- datacontract/templates/partials/datacontract_information.html +86 -0
- datacontract/templates/partials/datacontract_servicelevels.html +253 -0
- datacontract/templates/partials/datacontract_terms.html +51 -0
- datacontract/templates/partials/definition.html +25 -0
- datacontract/templates/partials/example.html +27 -0
- datacontract/templates/partials/model_field.html +144 -0
- datacontract/templates/partials/quality.html +49 -0
- datacontract/templates/partials/server.html +211 -0
- datacontract/templates/style/output.css +491 -72
- datacontract_cli-0.10.37.dist-info/METADATA +2235 -0
- datacontract_cli-0.10.37.dist-info/RECORD +119 -0
- {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.37.dist-info}/WHEEL +1 -1
- {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.37.dist-info/licenses}/LICENSE +1 -1
- datacontract/engines/datacontract/check_that_datacontract_str_is_valid.py +0 -48
- datacontract/engines/soda/connections/dask.py +0 -28
- datacontract/engines/soda/connections/duckdb.py +0 -76
- datacontract/export/csv_type_converter.py +0 -36
- datacontract/export/html_export.py +0 -66
- datacontract/export/odcs_converter.py +0 -102
- datacontract/init/download_datacontract_file.py +0 -17
- datacontract/integration/publish_datamesh_manager.py +0 -33
- datacontract/integration/publish_opentelemetry.py +0 -107
- datacontract/lint/lint.py +0 -141
- datacontract/lint/linters/description_linter.py +0 -34
- datacontract/lint/linters/example_model_linter.py +0 -91
- datacontract/lint/linters/field_pattern_linter.py +0 -34
- datacontract/lint/linters/field_reference_linter.py +0 -38
- datacontract/lint/linters/notice_period_linter.py +0 -55
- datacontract/lint/linters/quality_schema_linter.py +0 -52
- datacontract/lint/linters/valid_constraints_linter.py +0 -99
- datacontract/model/data_contract_specification.py +0 -141
- datacontract/web.py +0 -14
- datacontract_cli-0.10.0.dist-info/METADATA +0 -951
- datacontract_cli-0.10.0.dist-info/RECORD +0 -66
- /datacontract/{model → breaking}/breaking_change.py +0 -0
- /datacontract/{lint/linters → export}/__init__.py +0 -0
- {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.37.dist-info}/entry_points.txt +0 -0
- {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.37.dist-info}/top_level.txt +0 -0
datacontract/data_contract.py
CHANGED
|
@@ -1,56 +1,34 @@
|
|
|
1
|
-
import json
|
|
2
1
|
import logging
|
|
3
|
-
import tempfile
|
|
4
2
|
import typing
|
|
5
3
|
|
|
6
|
-
import
|
|
7
|
-
from pyspark.sql import SparkSession
|
|
4
|
+
from open_data_contract_standard.model import CustomProperty, OpenDataContractStandard
|
|
8
5
|
|
|
9
|
-
from datacontract.
|
|
10
|
-
|
|
11
|
-
from datacontract.
|
|
12
|
-
|
|
6
|
+
from datacontract.export.odcs_v3_exporter import to_odcs_v3
|
|
7
|
+
from datacontract.imports.importer import ImportFormat, Spec
|
|
8
|
+
from datacontract.imports.odcs_v3_importer import import_from_odcs
|
|
9
|
+
|
|
10
|
+
if typing.TYPE_CHECKING:
|
|
11
|
+
from pyspark.sql import SparkSession
|
|
12
|
+
|
|
13
|
+
from duckdb.duckdb import DuckDBPyConnection
|
|
14
|
+
|
|
15
|
+
from datacontract.breaking.breaking import (
|
|
16
|
+
info_breaking_changes,
|
|
17
|
+
models_breaking_changes,
|
|
18
|
+
quality_breaking_changes,
|
|
19
|
+
terms_breaking_changes,
|
|
13
20
|
)
|
|
14
|
-
from datacontract.
|
|
15
|
-
|
|
16
|
-
from datacontract.
|
|
17
|
-
from datacontract.export.
|
|
18
|
-
from datacontract.
|
|
19
|
-
from datacontract.
|
|
20
|
-
|
|
21
|
-
from datacontract.export.great_expectations_converter import \
|
|
22
|
-
to_great_expectations
|
|
23
|
-
from datacontract.export.html_export import to_html
|
|
24
|
-
from datacontract.export.jsonschema_converter import to_jsonschema_json
|
|
25
|
-
from datacontract.export.odcs_converter import to_odcs_yaml
|
|
26
|
-
from datacontract.export.protobuf_converter import to_protobuf
|
|
27
|
-
from datacontract.export.pydantic_converter import to_pydantic_model_str
|
|
28
|
-
from datacontract.export.rdf_converter import to_rdf_n3
|
|
29
|
-
from datacontract.export.sodacl_converter import to_sodacl_yaml
|
|
30
|
-
from datacontract.export.sql_converter import to_sql_ddl, to_sql_query
|
|
31
|
-
from datacontract.export.terraform_converter import to_terraform
|
|
32
|
-
from datacontract.imports.avro_importer import import_avro
|
|
33
|
-
from datacontract.imports.sql_importer import import_sql
|
|
34
|
-
from datacontract.integration.publish_datamesh_manager import \
|
|
35
|
-
publish_datamesh_manager
|
|
36
|
-
from datacontract.integration.publish_opentelemetry import publish_opentelemetry
|
|
21
|
+
from datacontract.breaking.breaking_change import BreakingChange, BreakingChanges, Severity
|
|
22
|
+
from datacontract.engines.data_contract_test import execute_data_contract_test
|
|
23
|
+
from datacontract.export.exporter import ExportFormat
|
|
24
|
+
from datacontract.export.exporter_factory import exporter_factory
|
|
25
|
+
from datacontract.imports.importer_factory import importer_factory
|
|
26
|
+
from datacontract.init.init_template import get_init_template
|
|
27
|
+
from datacontract.integration.datamesh_manager import publish_test_results_to_datamesh_manager
|
|
37
28
|
from datacontract.lint import resolve
|
|
38
|
-
from datacontract.
|
|
39
|
-
from datacontract.lint.linters.example_model_linter import ExampleModelLinter
|
|
40
|
-
from datacontract.lint.linters.field_pattern_linter import FieldPatternLinter
|
|
41
|
-
from datacontract.lint.linters.field_reference_linter import \
|
|
42
|
-
FieldReferenceLinter
|
|
43
|
-
from datacontract.lint.linters.notice_period_linter import NoticePeriodLinter
|
|
44
|
-
from datacontract.lint.linters.quality_schema_linter import \
|
|
45
|
-
QualityUsesSchemaLinter
|
|
46
|
-
from datacontract.lint.linters.valid_constraints_linter import \
|
|
47
|
-
ValidFieldConstraintsLinter
|
|
48
|
-
from datacontract.model.breaking_change import BreakingChanges, BreakingChange, \
|
|
49
|
-
Severity
|
|
50
|
-
from datacontract.model.data_contract_specification import \
|
|
51
|
-
DataContractSpecification, Server
|
|
29
|
+
from datacontract.model.data_contract_specification import DataContractSpecification, Info
|
|
52
30
|
from datacontract.model.exceptions import DataContractException
|
|
53
|
-
from datacontract.model.run import
|
|
31
|
+
from datacontract.model.run import Check, ResultEnum, Run
|
|
54
32
|
|
|
55
33
|
|
|
56
34
|
class DataContract:
|
|
@@ -61,41 +39,34 @@ class DataContract:
|
|
|
61
39
|
data_contract: DataContractSpecification = None,
|
|
62
40
|
schema_location: str = None,
|
|
63
41
|
server: str = None,
|
|
64
|
-
examples: bool = False,
|
|
65
42
|
publish_url: str = None,
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
inline_definitions: bool =
|
|
43
|
+
spark: "SparkSession" = None,
|
|
44
|
+
duckdb_connection: DuckDBPyConnection = None,
|
|
45
|
+
inline_definitions: bool = True,
|
|
46
|
+
inline_quality: bool = True,
|
|
47
|
+
ssl_verification: bool = True,
|
|
48
|
+
publish_test_results: bool = False,
|
|
69
49
|
):
|
|
70
50
|
self._data_contract_file = data_contract_file
|
|
71
51
|
self._data_contract_str = data_contract_str
|
|
72
52
|
self._data_contract = data_contract
|
|
73
53
|
self._schema_location = schema_location
|
|
74
54
|
self._server = server
|
|
75
|
-
self._examples = examples
|
|
76
55
|
self._publish_url = publish_url
|
|
77
|
-
self.
|
|
56
|
+
self._publish_test_results = publish_test_results
|
|
78
57
|
self._spark = spark
|
|
58
|
+
self._duckdb_connection = duckdb_connection
|
|
79
59
|
self._inline_definitions = inline_definitions
|
|
80
|
-
self.
|
|
81
|
-
|
|
82
|
-
QualityUsesSchemaLinter(),
|
|
83
|
-
FieldPatternLinter(),
|
|
84
|
-
FieldReferenceLinter(),
|
|
85
|
-
NoticePeriodLinter(),
|
|
86
|
-
ValidFieldConstraintsLinter(),
|
|
87
|
-
DescriptionLinter(),
|
|
88
|
-
}
|
|
60
|
+
self._inline_quality = inline_quality
|
|
61
|
+
self._ssl_verification = ssl_verification
|
|
89
62
|
|
|
90
63
|
@classmethod
|
|
91
|
-
def init(cls, template: str =
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
def lint(self, enabled_linters: typing.Union[str, set[str]] = "all") -> Run:
|
|
95
|
-
"""Lint the data contract by deserializing the contract and checking the schema, as well as calling the configured linters.
|
|
64
|
+
def init(cls, template: typing.Optional[str], schema: typing.Optional[str] = None) -> DataContractSpecification:
|
|
65
|
+
template_str = get_init_template(template)
|
|
66
|
+
return resolve.resolve_data_contract(data_contract_str=template_str, schema_location=schema)
|
|
96
67
|
|
|
97
|
-
|
|
98
|
-
"""
|
|
68
|
+
def lint(self) -> Run:
|
|
69
|
+
"""Lint the data contract by validating it against the JSON schema."""
|
|
99
70
|
run = Run.create_run()
|
|
100
71
|
try:
|
|
101
72
|
run.log_info("Linting data contract")
|
|
@@ -104,32 +75,17 @@ class DataContract:
|
|
|
104
75
|
self._data_contract_str,
|
|
105
76
|
self._data_contract,
|
|
106
77
|
self._schema_location,
|
|
107
|
-
inline_definitions=
|
|
78
|
+
inline_definitions=self._inline_definitions,
|
|
79
|
+
inline_quality=self._inline_quality,
|
|
108
80
|
)
|
|
109
81
|
run.checks.append(
|
|
110
|
-
Check(
|
|
82
|
+
Check(
|
|
83
|
+
type="lint",
|
|
84
|
+
result=ResultEnum.passed,
|
|
85
|
+
name="Data contract is syntactically valid",
|
|
86
|
+
engine="datacontract",
|
|
87
|
+
)
|
|
111
88
|
)
|
|
112
|
-
if enabled_linters == "none":
|
|
113
|
-
linters_to_check = set()
|
|
114
|
-
elif enabled_linters == "all":
|
|
115
|
-
linters_to_check = self.all_linters
|
|
116
|
-
elif isinstance(enabled_linters, set):
|
|
117
|
-
linters_to_check = {linter for linter in self.all_linters if linter.id in enabled_linters}
|
|
118
|
-
else:
|
|
119
|
-
raise RuntimeError(f"Unknown argument enabled_linters={enabled_linters} for lint()")
|
|
120
|
-
for linter in linters_to_check:
|
|
121
|
-
try:
|
|
122
|
-
run.checks.extend(linter.lint(data_contract))
|
|
123
|
-
except Exception as e:
|
|
124
|
-
run.checks.append(
|
|
125
|
-
Check(
|
|
126
|
-
type="general",
|
|
127
|
-
result="error",
|
|
128
|
-
name=f"Linter '{linter.name}'",
|
|
129
|
-
reason=str(e),
|
|
130
|
-
engine="datacontract",
|
|
131
|
-
)
|
|
132
|
-
)
|
|
133
89
|
run.dataContractId = data_contract.id
|
|
134
90
|
run.dataContractVersion = data_contract.info.version
|
|
135
91
|
except DataContractException as e:
|
|
@@ -141,7 +97,7 @@ class DataContract:
|
|
|
141
97
|
run.checks.append(
|
|
142
98
|
Check(
|
|
143
99
|
type="general",
|
|
144
|
-
result=
|
|
100
|
+
result=ResultEnum.error,
|
|
145
101
|
name="Check Data Contract",
|
|
146
102
|
reason=str(e),
|
|
147
103
|
engine="datacontract",
|
|
@@ -156,62 +112,34 @@ class DataContract:
|
|
|
156
112
|
try:
|
|
157
113
|
run.log_info("Testing data contract")
|
|
158
114
|
data_contract = resolve.resolve_data_contract(
|
|
159
|
-
self._data_contract_file,
|
|
115
|
+
self._data_contract_file,
|
|
116
|
+
self._data_contract_str,
|
|
117
|
+
self._data_contract,
|
|
118
|
+
self._schema_location,
|
|
119
|
+
inline_definitions=self._inline_definitions,
|
|
120
|
+
inline_quality=self._inline_quality,
|
|
160
121
|
)
|
|
161
122
|
|
|
162
|
-
|
|
163
|
-
raise DataContractException(
|
|
164
|
-
type="lint",
|
|
165
|
-
name="Check that data contract contains models",
|
|
166
|
-
result="warning",
|
|
167
|
-
reason="Models block is missing. Skip executing tests.",
|
|
168
|
-
engine="datacontract",
|
|
169
|
-
)
|
|
170
|
-
|
|
171
|
-
if self._examples:
|
|
172
|
-
if data_contract.examples is None or len(data_contract.examples) == 0:
|
|
173
|
-
raise DataContractException(
|
|
174
|
-
type="lint",
|
|
175
|
-
name="Check that data contract contains valid examples",
|
|
176
|
-
result="warning",
|
|
177
|
-
reason="Examples block is missing. Skip executing tests.",
|
|
178
|
-
engine="datacontract",
|
|
179
|
-
)
|
|
180
|
-
else:
|
|
181
|
-
check_that_datacontract_contains_valid_server_configuration(run, data_contract, self._server)
|
|
182
|
-
|
|
183
|
-
# TODO create directory only for examples
|
|
184
|
-
with tempfile.TemporaryDirectory(prefix="datacontract-cli") as tmp_dir:
|
|
185
|
-
if self._examples:
|
|
186
|
-
server_name = "examples"
|
|
187
|
-
server = self._get_examples_server(data_contract, run, tmp_dir)
|
|
188
|
-
else:
|
|
189
|
-
server_name = list(data_contract.servers.keys())[0]
|
|
190
|
-
server = data_contract.servers.get(server_name)
|
|
191
|
-
|
|
192
|
-
run.log_info(f"Running tests for data contract {data_contract.id} with server {server_name}")
|
|
193
|
-
run.dataContractId = data_contract.id
|
|
194
|
-
run.dataContractVersion = data_contract.info.version
|
|
195
|
-
run.dataProductId = server.dataProductId
|
|
196
|
-
run.outputPortId = server.outputPortId
|
|
197
|
-
run.server = server_name
|
|
198
|
-
|
|
199
|
-
# 5. check server is supported type
|
|
200
|
-
# 6. check server credentials are complete
|
|
201
|
-
if server.format == "json" and server.type != "kafka":
|
|
202
|
-
check_jsonschema(run, data_contract, server)
|
|
203
|
-
check_soda_execute(run, data_contract, server, self._spark, tmp_dir)
|
|
123
|
+
execute_data_contract_test(data_contract, run, self._server, self._spark, self._duckdb_connection)
|
|
204
124
|
|
|
205
125
|
except DataContractException as e:
|
|
206
126
|
run.checks.append(
|
|
207
|
-
Check(
|
|
127
|
+
Check(
|
|
128
|
+
type=e.type,
|
|
129
|
+
name=e.name,
|
|
130
|
+
result=e.result,
|
|
131
|
+
reason=e.reason,
|
|
132
|
+
model=e.model,
|
|
133
|
+
engine=e.engine,
|
|
134
|
+
details="",
|
|
135
|
+
)
|
|
208
136
|
)
|
|
209
137
|
run.log_error(str(e))
|
|
210
138
|
except Exception as e:
|
|
211
139
|
run.checks.append(
|
|
212
140
|
Check(
|
|
213
141
|
type="general",
|
|
214
|
-
result=
|
|
142
|
+
result=ResultEnum.error,
|
|
215
143
|
name="Test Data Contract",
|
|
216
144
|
reason=str(e),
|
|
217
145
|
engine="datacontract",
|
|
@@ -222,16 +150,8 @@ class DataContract:
|
|
|
222
150
|
|
|
223
151
|
run.finish()
|
|
224
152
|
|
|
225
|
-
if self._publish_url is not None:
|
|
226
|
-
|
|
227
|
-
publish_datamesh_manager(run, self._publish_url)
|
|
228
|
-
except Exception:
|
|
229
|
-
run.log_error("Failed to publish to datamesh manager")
|
|
230
|
-
if self._publish_to_opentelemetry:
|
|
231
|
-
try:
|
|
232
|
-
publish_opentelemetry(run)
|
|
233
|
-
except Exception:
|
|
234
|
-
run.log_error("Failed to publish to opentelemetry")
|
|
153
|
+
if self._publish_url is not None or self._publish_test_results:
|
|
154
|
+
publish_test_results_to_datamesh_manager(run, self._publish_url, self._ssl_verification)
|
|
235
155
|
|
|
236
156
|
return run
|
|
237
157
|
|
|
@@ -246,6 +166,24 @@ class DataContract:
|
|
|
246
166
|
|
|
247
167
|
breaking_changes = list[BreakingChange]()
|
|
248
168
|
|
|
169
|
+
breaking_changes.extend(
|
|
170
|
+
info_breaking_changes(
|
|
171
|
+
old_info=old.info,
|
|
172
|
+
new_info=new.info,
|
|
173
|
+
new_path=other._data_contract_file,
|
|
174
|
+
include_severities=include_severities,
|
|
175
|
+
)
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
breaking_changes.extend(
|
|
179
|
+
terms_breaking_changes(
|
|
180
|
+
old_terms=old.terms,
|
|
181
|
+
new_terms=new.terms,
|
|
182
|
+
new_path=other._data_contract_file,
|
|
183
|
+
include_severities=include_severities,
|
|
184
|
+
)
|
|
185
|
+
)
|
|
186
|
+
|
|
249
187
|
breaking_changes.extend(
|
|
250
188
|
quality_breaking_changes(
|
|
251
189
|
old_quality=old.quality,
|
|
@@ -273,216 +211,141 @@ class DataContract:
|
|
|
273
211
|
data_contract=self._data_contract,
|
|
274
212
|
schema_location=self._schema_location,
|
|
275
213
|
inline_definitions=self._inline_definitions,
|
|
214
|
+
inline_quality=self._inline_quality,
|
|
276
215
|
)
|
|
277
216
|
|
|
278
|
-
def export(
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
)
|
|
303
|
-
|
|
304
|
-
return to_jsonschema_json(model_name, model_value)
|
|
305
|
-
if export_format == "sodacl":
|
|
306
|
-
return to_sodacl_yaml(data_contract)
|
|
307
|
-
if export_format == "dbt":
|
|
308
|
-
return to_dbt_models_yaml(data_contract)
|
|
309
|
-
if export_format == "dbt-sources":
|
|
310
|
-
return to_dbt_sources_yaml(data_contract, self._server)
|
|
311
|
-
if export_format == "dbt-staging-sql":
|
|
312
|
-
if data_contract.models is None:
|
|
313
|
-
raise RuntimeError(f"Export to {export_format} requires models in the data contract.")
|
|
314
|
-
|
|
315
|
-
model_names = list(data_contract.models.keys())
|
|
316
|
-
|
|
317
|
-
if model == "all":
|
|
318
|
-
if len(data_contract.models.items()) != 1:
|
|
319
|
-
raise RuntimeError(
|
|
320
|
-
f"Export to {export_format} is model specific. Specify the model via --model $MODEL_NAME. Available models: {model_names}"
|
|
321
|
-
)
|
|
322
|
-
|
|
323
|
-
model_name, model_value = next(iter(data_contract.models.items()))
|
|
324
|
-
return to_dbt_staging_sql(data_contract, model_name, model_value)
|
|
325
|
-
else:
|
|
326
|
-
model_name = model
|
|
327
|
-
model_value = data_contract.models.get(model_name)
|
|
328
|
-
if model_value is None:
|
|
329
|
-
raise RuntimeError(
|
|
330
|
-
f"Model {model_name} not found in the data contract. Available models: {model_names}"
|
|
331
|
-
)
|
|
332
|
-
|
|
333
|
-
return to_dbt_staging_sql(data_contract, model_name, model_value)
|
|
334
|
-
if export_format == "odcs":
|
|
335
|
-
return to_odcs_yaml(data_contract)
|
|
336
|
-
if export_format == "rdf":
|
|
337
|
-
return to_rdf_n3(data_contract, rdf_base)
|
|
338
|
-
if export_format == "protobuf":
|
|
339
|
-
return to_protobuf(data_contract)
|
|
340
|
-
if export_format == "avro":
|
|
341
|
-
if data_contract.models is None:
|
|
342
|
-
raise RuntimeError(f"Export to {export_format} requires models in the data contract.")
|
|
343
|
-
|
|
344
|
-
model_names = list(data_contract.models.keys())
|
|
345
|
-
|
|
346
|
-
if model == "all":
|
|
347
|
-
if len(data_contract.models.items()) != 1:
|
|
348
|
-
raise RuntimeError(
|
|
349
|
-
f"Export to {export_format} is model specific. Specify the model via --model $MODEL_NAME. Available models: {model_names}"
|
|
350
|
-
)
|
|
351
|
-
|
|
352
|
-
model_name, model_value = next(iter(data_contract.models.items()))
|
|
353
|
-
return to_avro_schema_json(model_name, model_value)
|
|
354
|
-
else:
|
|
355
|
-
model_name = model
|
|
356
|
-
model_value = data_contract.models.get(model_name)
|
|
357
|
-
if model_value is None:
|
|
358
|
-
raise RuntimeError(
|
|
359
|
-
f"Model {model_name} not found in the data contract. Available models: {model_names}"
|
|
360
|
-
)
|
|
361
|
-
|
|
362
|
-
return to_avro_schema_json(model_name, model_value)
|
|
363
|
-
if export_format == "avro-idl":
|
|
364
|
-
return to_avro_idl(data_contract)
|
|
365
|
-
if export_format == "terraform":
|
|
366
|
-
return to_terraform(data_contract)
|
|
367
|
-
if export_format == "sql":
|
|
368
|
-
server_type = self._determine_sql_server_type(data_contract, sql_server_type)
|
|
369
|
-
return to_sql_ddl(data_contract, server_type=server_type)
|
|
370
|
-
if export_format == "sql-query":
|
|
371
|
-
if data_contract.models is None:
|
|
372
|
-
raise RuntimeError(f"Export to {export_format} requires models in the data contract.")
|
|
373
|
-
|
|
374
|
-
server_type = self._determine_sql_server_type(data_contract, sql_server_type)
|
|
375
|
-
|
|
376
|
-
model_names = list(data_contract.models.keys())
|
|
377
|
-
|
|
378
|
-
if model == "all":
|
|
379
|
-
if len(data_contract.models.items()) != 1:
|
|
380
|
-
raise RuntimeError(
|
|
381
|
-
f"Export to {export_format} is model specific. Specify the model via --model $MODEL_NAME. Available models: {model_names}"
|
|
382
|
-
)
|
|
383
|
-
|
|
384
|
-
model_name, model_value = next(iter(data_contract.models.items()))
|
|
385
|
-
return to_sql_query(data_contract, model_name, model_value, server_type)
|
|
386
|
-
else:
|
|
387
|
-
model_name = model
|
|
388
|
-
model_value = data_contract.models.get(model_name)
|
|
389
|
-
if model_value is None:
|
|
390
|
-
raise RuntimeError(
|
|
391
|
-
f"Model {model_name} not found in the data contract. Available models: {model_names}"
|
|
392
|
-
)
|
|
393
|
-
|
|
394
|
-
return to_sql_query(data_contract, model_name, model_value, server_type)
|
|
395
|
-
|
|
396
|
-
if export_format == "great-expectations":
|
|
397
|
-
if data_contract.models is None:
|
|
398
|
-
raise RuntimeError(f"Export to {export_format} requires models in the data contract.")
|
|
399
|
-
|
|
400
|
-
model_names = list(data_contract.models.keys())
|
|
401
|
-
|
|
402
|
-
if model == "all":
|
|
403
|
-
if len(data_contract.models.items()) != 1:
|
|
404
|
-
raise RuntimeError(
|
|
405
|
-
f"Export to {export_format} is model specific. Specify the model via --model "
|
|
406
|
-
f"$MODEL_NAME. Available models: {model_names}"
|
|
407
|
-
)
|
|
408
|
-
|
|
409
|
-
model_name, model_value = next(iter(data_contract.models.items()))
|
|
410
|
-
return to_great_expectations(data_contract, model_name)
|
|
411
|
-
else:
|
|
412
|
-
model_name = model
|
|
413
|
-
model_value = data_contract.models.get(model_name)
|
|
414
|
-
if model_value is None:
|
|
415
|
-
raise RuntimeError(
|
|
416
|
-
f"Model {model_name} not found in the data contract. " f"Available models: {model_names}"
|
|
417
|
-
)
|
|
418
|
-
|
|
419
|
-
return to_great_expectations(data_contract, model_name)
|
|
420
|
-
if export_format == "pydantic-model":
|
|
421
|
-
return to_pydantic_model_str(data_contract)
|
|
422
|
-
if export_format == "html":
|
|
423
|
-
return to_html(data_contract)
|
|
424
|
-
else:
|
|
425
|
-
print(f"Export format {export_format} not supported.")
|
|
426
|
-
return ""
|
|
427
|
-
|
|
428
|
-
def _determine_sql_server_type(self, data_contract: DataContractSpecification, sql_server_type: str):
|
|
429
|
-
if sql_server_type == "auto":
|
|
430
|
-
if data_contract.servers is None or len(data_contract.servers) == 0:
|
|
431
|
-
raise RuntimeError("Export with server_type='auto' requires servers in the data contract.")
|
|
432
|
-
|
|
433
|
-
server_types = set([server.type for server in data_contract.servers.values()])
|
|
434
|
-
if "snowflake" in server_types:
|
|
435
|
-
return "snowflake"
|
|
436
|
-
elif "postgres" in server_types:
|
|
437
|
-
return "postgres"
|
|
438
|
-
elif "databricks" in server_types:
|
|
439
|
-
return "databricks"
|
|
440
|
-
else:
|
|
441
|
-
# default to snowflake dialect
|
|
442
|
-
return "snowflake"
|
|
217
|
+
def export(
|
|
218
|
+
self, export_format: ExportFormat, model: str = "all", sql_server_type: str = "auto", **kwargs
|
|
219
|
+
) -> str | bytes:
|
|
220
|
+
if (
|
|
221
|
+
export_format == ExportFormat.html
|
|
222
|
+
or export_format == ExportFormat.mermaid
|
|
223
|
+
or export_format == ExportFormat.excel
|
|
224
|
+
):
|
|
225
|
+
data_contract = resolve.resolve_data_contract_v2(
|
|
226
|
+
self._data_contract_file,
|
|
227
|
+
self._data_contract_str,
|
|
228
|
+
self._data_contract,
|
|
229
|
+
schema_location=self._schema_location,
|
|
230
|
+
inline_definitions=self._inline_definitions,
|
|
231
|
+
inline_quality=self._inline_quality,
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
return exporter_factory.create(export_format).export(
|
|
235
|
+
data_contract=data_contract,
|
|
236
|
+
model=model,
|
|
237
|
+
server=self._server,
|
|
238
|
+
sql_server_type=sql_server_type,
|
|
239
|
+
export_args=kwargs,
|
|
240
|
+
)
|
|
443
241
|
else:
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
run.log_info(f"Creating example file {p}")
|
|
453
|
-
with open(p, "w") as f:
|
|
454
|
-
content = ""
|
|
455
|
-
if format == "json" and isinstance(example.data, list):
|
|
456
|
-
content = json.dumps(example.data)
|
|
457
|
-
elif format == "json" and isinstance(example.data, str):
|
|
458
|
-
content = example.data
|
|
459
|
-
elif format == "yaml" and isinstance(example.data, list):
|
|
460
|
-
content = yaml.dump(example.data, allow_unicode=True)
|
|
461
|
-
elif format == "yaml" and isinstance(example.data, str):
|
|
462
|
-
content = example.data
|
|
463
|
-
elif format == "csv":
|
|
464
|
-
content = example.data
|
|
465
|
-
logging.debug(f"Content of example file {p}: {content}")
|
|
466
|
-
f.write(content)
|
|
467
|
-
path = f"{tmp_dir}" + "/{model}." + format
|
|
468
|
-
delimiter = "array"
|
|
469
|
-
server = Server(
|
|
470
|
-
type="local",
|
|
471
|
-
path=path,
|
|
472
|
-
format=format,
|
|
473
|
-
delimiter=delimiter,
|
|
474
|
-
)
|
|
475
|
-
run.log_info(f"Using {server} for testing the examples")
|
|
476
|
-
return server
|
|
242
|
+
data_contract = resolve.resolve_data_contract(
|
|
243
|
+
self._data_contract_file,
|
|
244
|
+
self._data_contract_str,
|
|
245
|
+
self._data_contract,
|
|
246
|
+
schema_location=self._schema_location,
|
|
247
|
+
inline_definitions=self._inline_definitions,
|
|
248
|
+
inline_quality=self._inline_quality,
|
|
249
|
+
)
|
|
477
250
|
|
|
478
|
-
|
|
479
|
-
|
|
251
|
+
return exporter_factory.create(export_format).export(
|
|
252
|
+
data_contract=data_contract,
|
|
253
|
+
model=model,
|
|
254
|
+
server=self._server,
|
|
255
|
+
sql_server_type=sql_server_type,
|
|
256
|
+
export_args=kwargs,
|
|
257
|
+
)
|
|
480
258
|
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
259
|
+
@classmethod
|
|
260
|
+
def import_from_source(
|
|
261
|
+
cls,
|
|
262
|
+
format: str,
|
|
263
|
+
source: typing.Optional[str] = None,
|
|
264
|
+
template: typing.Optional[str] = None,
|
|
265
|
+
schema: typing.Optional[str] = None,
|
|
266
|
+
spec: Spec = Spec.datacontract_specification,
|
|
267
|
+
**kwargs,
|
|
268
|
+
) -> DataContractSpecification | OpenDataContractStandard:
|
|
269
|
+
id = kwargs.get("id")
|
|
270
|
+
owner = kwargs.get("owner")
|
|
271
|
+
|
|
272
|
+
if spec == Spec.odcs or format == ImportFormat.excel:
|
|
273
|
+
data_contract_specification_initial = cls.init(template=template, schema=schema)
|
|
274
|
+
|
|
275
|
+
odcs_imported = importer_factory.create(format).import_source(
|
|
276
|
+
data_contract_specification=data_contract_specification_initial, source=source, import_args=kwargs
|
|
277
|
+
)
|
|
278
|
+
|
|
279
|
+
if isinstance(odcs_imported, DataContractSpecification):
|
|
280
|
+
# convert automatically
|
|
281
|
+
odcs_imported = to_odcs_v3(odcs_imported)
|
|
282
|
+
|
|
283
|
+
cls._overwrite_id_in_odcs(odcs_imported, id)
|
|
284
|
+
cls._overwrite_owner_in_odcs(odcs_imported, owner)
|
|
285
|
+
|
|
286
|
+
return odcs_imported
|
|
287
|
+
elif spec == Spec.datacontract_specification:
|
|
288
|
+
data_contract_specification_initial = cls.init(template=template, schema=schema)
|
|
289
|
+
|
|
290
|
+
data_contract_specification_imported = importer_factory.create(format).import_source(
|
|
291
|
+
data_contract_specification=data_contract_specification_initial, source=source, import_args=kwargs
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
if isinstance(data_contract_specification_imported, OpenDataContractStandard):
|
|
295
|
+
# convert automatically
|
|
296
|
+
data_contract_specification_imported = import_from_odcs(
|
|
297
|
+
data_contract_specification_initial, data_contract_specification_imported
|
|
298
|
+
)
|
|
299
|
+
|
|
300
|
+
cls._overwrite_id_in_data_contract_specification(data_contract_specification_imported, id)
|
|
301
|
+
cls._overwrite_owner_in_data_contract_specification(data_contract_specification_imported, owner)
|
|
302
|
+
|
|
303
|
+
return data_contract_specification_imported
|
|
485
304
|
else:
|
|
486
|
-
|
|
305
|
+
raise DataContractException(
|
|
306
|
+
type="general",
|
|
307
|
+
result=ResultEnum.error,
|
|
308
|
+
name="Import Data Contract",
|
|
309
|
+
reason=f"Unsupported data contract format: {spec}",
|
|
310
|
+
engine="datacontract",
|
|
311
|
+
)
|
|
487
312
|
|
|
488
|
-
|
|
313
|
+
@staticmethod
|
|
314
|
+
def _overwrite_id_in_data_contract_specification(
|
|
315
|
+
data_contract_specification: DataContractSpecification, id: str | None
|
|
316
|
+
):
|
|
317
|
+
if not id:
|
|
318
|
+
return
|
|
319
|
+
|
|
320
|
+
data_contract_specification.id = id
|
|
321
|
+
|
|
322
|
+
@staticmethod
|
|
323
|
+
def _overwrite_owner_in_data_contract_specification(
|
|
324
|
+
data_contract_specification: DataContractSpecification, owner: str | None
|
|
325
|
+
):
|
|
326
|
+
if not owner:
|
|
327
|
+
return
|
|
328
|
+
|
|
329
|
+
if data_contract_specification.info is None:
|
|
330
|
+
data_contract_specification.info = Info()
|
|
331
|
+
data_contract_specification.info.owner = owner
|
|
332
|
+
|
|
333
|
+
@staticmethod
|
|
334
|
+
def _overwrite_owner_in_odcs(odcs: OpenDataContractStandard, owner: str | None):
|
|
335
|
+
if not owner:
|
|
336
|
+
return
|
|
337
|
+
|
|
338
|
+
if odcs.customProperties is None:
|
|
339
|
+
odcs.customProperties = []
|
|
340
|
+
for customProperty in odcs.customProperties:
|
|
341
|
+
if customProperty.name == "owner":
|
|
342
|
+
customProperty.value = owner
|
|
343
|
+
return
|
|
344
|
+
odcs.customProperties.append(CustomProperty(property="owner", value=owner))
|
|
345
|
+
|
|
346
|
+
@staticmethod
|
|
347
|
+
def _overwrite_id_in_odcs(odcs: OpenDataContractStandard, id: str | None):
|
|
348
|
+
if not id:
|
|
349
|
+
return
|
|
350
|
+
|
|
351
|
+
odcs.id = id
|