datacontract-cli 0.10.14__py3-none-any.whl → 0.10.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datacontract-cli might be problematic. Click here for more details.
- datacontract/breaking/breaking.py +227 -9
- datacontract/breaking/breaking_rules.py +24 -0
- datacontract/catalog/catalog.py +1 -1
- datacontract/cli.py +99 -32
- datacontract/data_contract.py +26 -4
- datacontract/engines/datacontract/check_that_datacontract_file_exists.py +1 -1
- datacontract/engines/fastjsonschema/check_jsonschema.py +114 -22
- datacontract/engines/soda/check_soda_execute.py +5 -3
- datacontract/engines/soda/connections/duckdb.py +1 -0
- datacontract/engines/soda/connections/kafka.py +12 -12
- datacontract/export/avro_idl_converter.py +1 -2
- datacontract/export/bigquery_converter.py +4 -3
- datacontract/export/data_caterer_converter.py +1 -1
- datacontract/export/dbml_converter.py +2 -4
- datacontract/export/dbt_converter.py +2 -3
- datacontract/export/exporter.py +1 -1
- datacontract/export/exporter_factory.py +3 -2
- datacontract/export/go_converter.py +3 -2
- datacontract/export/great_expectations_converter.py +202 -40
- datacontract/export/html_export.py +1 -1
- datacontract/export/jsonschema_converter.py +3 -2
- datacontract/export/odcs_v2_exporter.py +1 -1
- datacontract/export/odcs_v3_exporter.py +1 -1
- datacontract/export/pandas_type_converter.py +40 -0
- datacontract/export/protobuf_converter.py +1 -1
- datacontract/export/rdf_converter.py +4 -5
- datacontract/export/sodacl_converter.py +6 -2
- datacontract/export/spark_converter.py +7 -6
- datacontract/export/sql_converter.py +1 -2
- datacontract/export/sqlalchemy_converter.py +1 -2
- datacontract/export/terraform_converter.py +1 -1
- datacontract/imports/avro_importer.py +1 -1
- datacontract/imports/bigquery_importer.py +1 -1
- datacontract/imports/dbml_importer.py +2 -2
- datacontract/imports/dbt_importer.py +3 -2
- datacontract/imports/glue_importer.py +5 -3
- datacontract/imports/iceberg_importer.py +5 -6
- datacontract/imports/importer.py +1 -0
- datacontract/imports/importer_factory.py +7 -1
- datacontract/imports/jsonschema_importer.py +3 -2
- datacontract/imports/odcs_v2_importer.py +2 -2
- datacontract/imports/odcs_v3_importer.py +2 -2
- datacontract/imports/parquet_importer.py +81 -0
- datacontract/imports/spark_importer.py +2 -1
- datacontract/imports/sql_importer.py +1 -1
- datacontract/imports/unity_importer.py +3 -3
- datacontract/integration/opentelemetry.py +0 -1
- datacontract/lint/lint.py +2 -1
- datacontract/lint/linters/description_linter.py +1 -0
- datacontract/lint/linters/example_model_linter.py +1 -0
- datacontract/lint/linters/field_pattern_linter.py +1 -0
- datacontract/lint/linters/field_reference_linter.py +1 -0
- datacontract/lint/linters/notice_period_linter.py +1 -0
- datacontract/lint/linters/quality_schema_linter.py +1 -0
- datacontract/lint/linters/valid_constraints_linter.py +1 -0
- datacontract/lint/resolve.py +1 -1
- datacontract/lint/schema.py +1 -1
- datacontract/model/data_contract_specification.py +11 -5
- datacontract/model/run.py +21 -12
- datacontract/templates/index.html +6 -6
- datacontract/web.py +2 -3
- {datacontract_cli-0.10.14.dist-info → datacontract_cli-0.10.15.dist-info}/METADATA +97 -52
- datacontract_cli-0.10.15.dist-info/RECORD +105 -0
- {datacontract_cli-0.10.14.dist-info → datacontract_cli-0.10.15.dist-info}/WHEEL +1 -1
- datacontract_cli-0.10.14.dist-info/RECORD +0 -103
- {datacontract_cli-0.10.14.dist-info → datacontract_cli-0.10.15.dist-info}/LICENSE +0 -0
- {datacontract_cli-0.10.14.dist-info → datacontract_cli-0.10.15.dist-info}/entry_points.txt +0 -0
- {datacontract_cli-0.10.14.dist-info → datacontract_cli-0.10.15.dist-info}/top_level.txt +0 -0
|
@@ -1,49 +1,118 @@
|
|
|
1
|
+
"""
|
|
2
|
+
This module provides functionalities to export data contracts to Great Expectations suites.
|
|
3
|
+
It includes definitions for exporting different types of data (pandas, Spark, SQL) into
|
|
4
|
+
Great Expectations expectations format.
|
|
5
|
+
"""
|
|
6
|
+
|
|
1
7
|
import json
|
|
2
|
-
from
|
|
8
|
+
from enum import Enum
|
|
9
|
+
from typing import Any, Dict, List
|
|
3
10
|
|
|
4
11
|
import yaml
|
|
5
12
|
|
|
6
|
-
from datacontract.
|
|
7
|
-
|
|
13
|
+
from datacontract.export.exporter import (
|
|
14
|
+
Exporter,
|
|
15
|
+
_check_models_for_export,
|
|
16
|
+
)
|
|
17
|
+
from datacontract.export.pandas_type_converter import convert_to_pandas_type
|
|
18
|
+
from datacontract.export.spark_converter import to_spark_data_type
|
|
19
|
+
from datacontract.export.sql_type_converter import convert_to_sql_type
|
|
20
|
+
from datacontract.model.data_contract_specification import (
|
|
21
|
+
DataContractSpecification,
|
|
22
|
+
Field,
|
|
23
|
+
Quality,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class GreatExpectationsEngine(Enum):
|
|
28
|
+
"""Enum to represent the type of data engine for expectations.
|
|
29
|
+
|
|
30
|
+
Attributes:
|
|
31
|
+
pandas (str): Represents the Pandas engine type.
|
|
32
|
+
spark (str): Represents the Spark engine type.
|
|
33
|
+
sql (str): Represents the SQL engine type.
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
pandas = "pandas"
|
|
37
|
+
spark = "spark"
|
|
38
|
+
sql = "sql"
|
|
39
|
+
|
|
8
40
|
|
|
41
|
+
class GreatExpectationsExporter(Exporter):
|
|
42
|
+
"""Exporter class to convert data contracts to Great Expectations suites.
|
|
43
|
+
|
|
44
|
+
Methods:
|
|
45
|
+
export: Converts a data contract model to a Great Expectations suite.
|
|
46
|
+
|
|
47
|
+
"""
|
|
9
48
|
|
|
10
|
-
class GreateExpectationsExporter(Exporter):
|
|
11
49
|
def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
|
|
50
|
+
"""Exports a data contract model to a Great Expectations suite.
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
data_contract (DataContractSpecification): The data contract specification.
|
|
54
|
+
model (str): The model name to export.
|
|
55
|
+
server (str): The server information.
|
|
56
|
+
sql_server_type (str): Type of SQL server (e.g., "snowflake").
|
|
57
|
+
export_args (dict): Additional arguments for export, such as "suite_name" and "engine".
|
|
58
|
+
|
|
59
|
+
Returns:
|
|
60
|
+
dict: A dictionary representation of the Great Expectations suite.
|
|
61
|
+
"""
|
|
62
|
+
expectation_suite_name = export_args.get("suite_name")
|
|
63
|
+
engine = export_args.get("engine")
|
|
12
64
|
model_name, model_value = _check_models_for_export(data_contract, model, self.export_format)
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
model_name,
|
|
16
|
-
)
|
|
65
|
+
sql_server_type = "snowflake" if sql_server_type == "auto" else sql_server_type
|
|
66
|
+
return to_great_expectations(data_contract, model_name, expectation_suite_name, engine, sql_server_type)
|
|
17
67
|
|
|
18
68
|
|
|
19
|
-
def to_great_expectations(
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
69
|
+
def to_great_expectations(
|
|
70
|
+
data_contract_spec: DataContractSpecification,
|
|
71
|
+
model_key: str,
|
|
72
|
+
expectation_suite_name: str | None = None,
|
|
73
|
+
engine: str | None = None,
|
|
74
|
+
sql_server_type: str = "snowflake",
|
|
75
|
+
) -> str:
|
|
76
|
+
"""Converts a data contract model to a Great Expectations suite.
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
data_contract_spec (DataContractSpecification): The data contract specification.
|
|
80
|
+
model_key (str): The model key.
|
|
81
|
+
expectation_suite_name (str | None): Optional suite name for the expectations.
|
|
82
|
+
engine (str | None): Optional engine type (e.g., "pandas", "spark").
|
|
83
|
+
sql_server_type (str): The type of SQL server (default is "snowflake").
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
str: JSON string of the Great Expectations suite.
|
|
25
87
|
"""
|
|
26
88
|
expectations = []
|
|
89
|
+
if not expectation_suite_name:
|
|
90
|
+
expectation_suite_name = "{model_key}.{contract_version}".format(
|
|
91
|
+
model_key=model_key, contract_version=data_contract_spec.info.version
|
|
92
|
+
)
|
|
27
93
|
model_value = data_contract_spec.models.get(model_key)
|
|
28
94
|
quality_checks = get_quality_checks(data_contract_spec.quality)
|
|
29
|
-
expectations.extend(model_to_expectations(model_value.fields))
|
|
95
|
+
expectations.extend(model_to_expectations(model_value.fields, engine, sql_server_type))
|
|
30
96
|
expectations.extend(checks_to_expectations(quality_checks, model_key))
|
|
31
|
-
model_expectation_suite = to_suite(
|
|
97
|
+
model_expectation_suite = to_suite(expectations, expectation_suite_name)
|
|
32
98
|
|
|
33
99
|
return model_expectation_suite
|
|
34
100
|
|
|
35
101
|
|
|
36
|
-
def to_suite(
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
)
|
|
102
|
+
def to_suite(expectations: List[Dict[str, Any]], expectation_suite_name: str) -> str:
|
|
103
|
+
"""Converts a list of expectations to a JSON-formatted suite.
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
expectations (List[Dict[str, Any]]): List of expectations.
|
|
107
|
+
expectation_suite_name (str): Name of the expectation suite.
|
|
108
|
+
|
|
109
|
+
Returns:
|
|
110
|
+
str: JSON string of the expectation suite.
|
|
111
|
+
"""
|
|
41
112
|
return json.dumps(
|
|
42
113
|
{
|
|
43
114
|
"data_asset_type": "null",
|
|
44
|
-
"expectation_suite_name":
|
|
45
|
-
model_key=model_key, contract_version=contract_version
|
|
46
|
-
),
|
|
115
|
+
"expectation_suite_name": expectation_suite_name,
|
|
47
116
|
"expectations": expectations,
|
|
48
117
|
"meta": {},
|
|
49
118
|
},
|
|
@@ -51,22 +120,53 @@ def to_suite(
|
|
|
51
120
|
)
|
|
52
121
|
|
|
53
122
|
|
|
54
|
-
def model_to_expectations(fields: Dict[str, Field]) -> List[Dict[str, Any]]:
|
|
55
|
-
"""
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
123
|
+
def model_to_expectations(fields: Dict[str, Field], engine: str | None, sql_server_type: str) -> List[Dict[str, Any]]:
|
|
124
|
+
"""Converts model fields to a list of expectations.
|
|
125
|
+
|
|
126
|
+
Args:
|
|
127
|
+
fields (Dict[str, Field]): Dictionary of model fields.
|
|
128
|
+
engine (str | None): Engine type (e.g., "pandas", "spark").
|
|
129
|
+
sql_server_type (str): SQL server type.
|
|
130
|
+
|
|
131
|
+
Returns:
|
|
132
|
+
List[Dict[str, Any]]: List of expectations.
|
|
59
133
|
"""
|
|
60
134
|
expectations = []
|
|
61
135
|
add_column_order_exp(fields, expectations)
|
|
62
136
|
for field_name, field in fields.items():
|
|
63
|
-
add_field_expectations(field_name, field, expectations)
|
|
137
|
+
add_field_expectations(field_name, field, expectations, engine, sql_server_type)
|
|
64
138
|
return expectations
|
|
65
139
|
|
|
66
140
|
|
|
67
|
-
def add_field_expectations(
|
|
141
|
+
def add_field_expectations(
|
|
142
|
+
field_name,
|
|
143
|
+
field: Field,
|
|
144
|
+
expectations: List[Dict[str, Any]],
|
|
145
|
+
engine: str | None,
|
|
146
|
+
sql_server_type: str,
|
|
147
|
+
) -> List[Dict[str, Any]]:
|
|
148
|
+
"""Adds expectations for a specific field based on its properties.
|
|
149
|
+
|
|
150
|
+
Args:
|
|
151
|
+
field_name (str): The name of the field.
|
|
152
|
+
field (Field): The field object.
|
|
153
|
+
expectations (List[Dict[str, Any]]): The expectations list to update.
|
|
154
|
+
engine (str | None): Engine type (e.g., "pandas", "spark").
|
|
155
|
+
sql_server_type (str): SQL server type.
|
|
156
|
+
|
|
157
|
+
Returns:
|
|
158
|
+
List[Dict[str, Any]]: Updated list of expectations.
|
|
159
|
+
"""
|
|
68
160
|
if field.type is not None:
|
|
69
|
-
|
|
161
|
+
if engine == GreatExpectationsEngine.spark.value:
|
|
162
|
+
field_type = to_spark_data_type(field).__class__.__name__
|
|
163
|
+
elif engine == GreatExpectationsEngine.pandas.value:
|
|
164
|
+
field_type = convert_to_pandas_type(field)
|
|
165
|
+
elif engine == GreatExpectationsEngine.sql.value:
|
|
166
|
+
field_type = convert_to_sql_type(field, sql_server_type)
|
|
167
|
+
else:
|
|
168
|
+
field_type = field.type
|
|
169
|
+
expectations.append(to_column_types_exp(field_name, field_type))
|
|
70
170
|
if field.unique:
|
|
71
171
|
expectations.append(to_column_unique_exp(field_name))
|
|
72
172
|
if field.maxLength is not None or field.minLength is not None:
|
|
@@ -74,11 +174,16 @@ def add_field_expectations(field_name, field: Field, expectations: List[Dict[str
|
|
|
74
174
|
if field.minimum is not None or field.maximum is not None:
|
|
75
175
|
expectations.append(to_column_min_max_exp(field_name, field.minimum, field.maximum))
|
|
76
176
|
|
|
77
|
-
# TODO: all constraints
|
|
78
177
|
return expectations
|
|
79
178
|
|
|
80
179
|
|
|
81
180
|
def add_column_order_exp(fields: Dict[str, Field], expectations: List[Dict[str, Any]]):
|
|
181
|
+
"""Adds expectation for column ordering.
|
|
182
|
+
|
|
183
|
+
Args:
|
|
184
|
+
fields (Dict[str, Field]): Dictionary of fields.
|
|
185
|
+
expectations (List[Dict[str, Any]]): The expectations list to update.
|
|
186
|
+
"""
|
|
82
187
|
expectations.append(
|
|
83
188
|
{
|
|
84
189
|
"expectation_type": "expect_table_columns_to_match_ordered_list",
|
|
@@ -89,6 +194,15 @@ def add_column_order_exp(fields: Dict[str, Field], expectations: List[Dict[str,
|
|
|
89
194
|
|
|
90
195
|
|
|
91
196
|
def to_column_types_exp(field_name, field_type) -> Dict[str, Any]:
|
|
197
|
+
"""Creates a column type expectation.
|
|
198
|
+
|
|
199
|
+
Args:
|
|
200
|
+
field_name (str): The name of the field.
|
|
201
|
+
field_type (str): The type of the field.
|
|
202
|
+
|
|
203
|
+
Returns:
|
|
204
|
+
Dict[str, Any]: Column type expectation.
|
|
205
|
+
"""
|
|
92
206
|
return {
|
|
93
207
|
"expectation_type": "expect_column_values_to_be_of_type",
|
|
94
208
|
"kwargs": {"column": field_name, "type_": field_type},
|
|
@@ -97,18 +211,54 @@ def to_column_types_exp(field_name, field_type) -> Dict[str, Any]:
|
|
|
97
211
|
|
|
98
212
|
|
|
99
213
|
def to_column_unique_exp(field_name) -> Dict[str, Any]:
|
|
100
|
-
|
|
214
|
+
"""Creates a column uniqueness expectation.
|
|
215
|
+
|
|
216
|
+
Args:
|
|
217
|
+
field_name (str): The name of the field.
|
|
218
|
+
|
|
219
|
+
Returns:
|
|
220
|
+
Dict[str, Any]: Column uniqueness expectation.
|
|
221
|
+
"""
|
|
222
|
+
return {
|
|
223
|
+
"expectation_type": "expect_column_values_to_be_unique",
|
|
224
|
+
"kwargs": {"column": field_name},
|
|
225
|
+
"meta": {},
|
|
226
|
+
}
|
|
101
227
|
|
|
102
228
|
|
|
103
229
|
def to_column_length_exp(field_name, min_length, max_length) -> Dict[str, Any]:
|
|
230
|
+
"""Creates a column length expectation.
|
|
231
|
+
|
|
232
|
+
Args:
|
|
233
|
+
field_name (str): The name of the field.
|
|
234
|
+
min_length (int | None): Minimum length.
|
|
235
|
+
max_length (int | None): Maximum length.
|
|
236
|
+
|
|
237
|
+
Returns:
|
|
238
|
+
Dict[str, Any]: Column length expectation.
|
|
239
|
+
"""
|
|
104
240
|
return {
|
|
105
241
|
"expectation_type": "expect_column_value_lengths_to_be_between",
|
|
106
|
-
"kwargs": {
|
|
242
|
+
"kwargs": {
|
|
243
|
+
"column": field_name,
|
|
244
|
+
"min_value": min_length,
|
|
245
|
+
"max_value": max_length,
|
|
246
|
+
},
|
|
107
247
|
"meta": {},
|
|
108
248
|
}
|
|
109
249
|
|
|
110
250
|
|
|
111
251
|
def to_column_min_max_exp(field_name, minimum, maximum) -> Dict[str, Any]:
|
|
252
|
+
"""Creates a column min-max value expectation.
|
|
253
|
+
|
|
254
|
+
Args:
|
|
255
|
+
field_name (str): The name of the field.
|
|
256
|
+
minimum (float | None): Minimum value.
|
|
257
|
+
maximum (float | None): Maximum value.
|
|
258
|
+
|
|
259
|
+
Returns:
|
|
260
|
+
Dict[str, Any]: Column min-max value expectation.
|
|
261
|
+
"""
|
|
112
262
|
return {
|
|
113
263
|
"expectation_type": "expect_column_values_to_be_between",
|
|
114
264
|
"kwargs": {"column": field_name, "min_value": minimum, "max_value": maximum},
|
|
@@ -117,6 +267,14 @@ def to_column_min_max_exp(field_name, minimum, maximum) -> Dict[str, Any]:
|
|
|
117
267
|
|
|
118
268
|
|
|
119
269
|
def get_quality_checks(quality: Quality) -> Dict[str, Any]:
|
|
270
|
+
"""Retrieves quality checks defined in a data contract.
|
|
271
|
+
|
|
272
|
+
Args:
|
|
273
|
+
quality (Quality): Quality object from the data contract.
|
|
274
|
+
|
|
275
|
+
Returns:
|
|
276
|
+
Dict[str, Any]: Dictionary of quality checks.
|
|
277
|
+
"""
|
|
120
278
|
if quality is None:
|
|
121
279
|
return {}
|
|
122
280
|
if quality.type is None:
|
|
@@ -131,11 +289,14 @@ def get_quality_checks(quality: Quality) -> Dict[str, Any]:
|
|
|
131
289
|
|
|
132
290
|
|
|
133
291
|
def checks_to_expectations(quality_checks: Dict[str, Any], model_key: str) -> List[Dict[str, Any]]:
|
|
134
|
-
"""
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
292
|
+
"""Converts quality checks to a list of expectations.
|
|
293
|
+
|
|
294
|
+
Args:
|
|
295
|
+
quality_checks (Dict[str, Any]): Dictionary of quality checks by model.
|
|
296
|
+
model_key (str): The model key.
|
|
297
|
+
|
|
298
|
+
Returns:
|
|
299
|
+
List[Dict[str, Any]]: List of expectations for the model.
|
|
139
300
|
"""
|
|
140
301
|
if quality_checks is None or model_key not in quality_checks:
|
|
141
302
|
return []
|
|
@@ -148,3 +309,4 @@ def checks_to_expectations(quality_checks: Dict[str, Any], model_key: str) -> Li
|
|
|
148
309
|
if isinstance(model_quality_checks, str):
|
|
149
310
|
expectation_list = json.loads(model_quality_checks)
|
|
150
311
|
return expectation_list
|
|
312
|
+
return []
|
|
@@ -7,8 +7,8 @@ import pytz
|
|
|
7
7
|
import yaml
|
|
8
8
|
from jinja2 import Environment, PackageLoader, select_autoescape
|
|
9
9
|
|
|
10
|
-
from datacontract.model.data_contract_specification import DataContractSpecification
|
|
11
10
|
from datacontract.export.exporter import Exporter
|
|
11
|
+
from datacontract.model.data_contract_specification import DataContractSpecification
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
class HtmlExporter(Exporter):
|
|
@@ -1,9 +1,8 @@
|
|
|
1
1
|
import json
|
|
2
2
|
from typing import Dict
|
|
3
3
|
|
|
4
|
-
from datacontract.model.data_contract_specification import DataContractSpecification, Model, Field
|
|
5
|
-
|
|
6
4
|
from datacontract.export.exporter import Exporter, _check_models_for_export
|
|
5
|
+
from datacontract.model.data_contract_specification import DataContractSpecification, Field, Model
|
|
7
6
|
|
|
8
7
|
|
|
9
8
|
class JsonSchemaExporter(Exporter):
|
|
@@ -51,6 +50,8 @@ def to_property(field: Field) -> dict:
|
|
|
51
50
|
property["type"] = json_type
|
|
52
51
|
if json_format is not None:
|
|
53
52
|
property["format"] = json_format
|
|
53
|
+
if field.primaryKey:
|
|
54
|
+
property["primaryKey"] = field.primaryKey
|
|
54
55
|
if field.unique:
|
|
55
56
|
property["unique"] = True
|
|
56
57
|
if json_type == "object":
|
|
@@ -2,8 +2,8 @@ from typing import Dict
|
|
|
2
2
|
|
|
3
3
|
import yaml
|
|
4
4
|
|
|
5
|
-
from datacontract.model.data_contract_specification import DataContractSpecification, Model, Field
|
|
6
5
|
from datacontract.export.exporter import Exporter
|
|
6
|
+
from datacontract.model.data_contract_specification import DataContractSpecification, Field, Model
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
class OdcsV2Exporter(Exporter):
|
|
@@ -3,7 +3,7 @@ from typing import Dict
|
|
|
3
3
|
import yaml
|
|
4
4
|
|
|
5
5
|
from datacontract.export.exporter import Exporter
|
|
6
|
-
from datacontract.model.data_contract_specification import DataContractSpecification,
|
|
6
|
+
from datacontract.model.data_contract_specification import DataContractSpecification, Field, Model
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
class OdcsV3Exporter(Exporter):
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Module for converting data contract field types to corresponding pandas data types.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from datacontract.model.data_contract_specification import Field
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def convert_to_pandas_type(field: Field) -> str:
|
|
9
|
+
"""
|
|
10
|
+
Convert a data contract field type to the equivalent pandas data type.
|
|
11
|
+
|
|
12
|
+
Parameters:
|
|
13
|
+
----------
|
|
14
|
+
field : Field
|
|
15
|
+
A Field object containing metadata about the data type of the field.
|
|
16
|
+
|
|
17
|
+
Returns:
|
|
18
|
+
-------
|
|
19
|
+
str
|
|
20
|
+
The corresponding pandas data type as a string.
|
|
21
|
+
"""
|
|
22
|
+
field_type = field.type
|
|
23
|
+
|
|
24
|
+
if field_type in ["string", "varchar", "text"]:
|
|
25
|
+
return "str"
|
|
26
|
+
if field_type in ["integer", "int"]:
|
|
27
|
+
return "int32"
|
|
28
|
+
if field_type == "long":
|
|
29
|
+
return "int64"
|
|
30
|
+
if field_type == "float":
|
|
31
|
+
return "float32"
|
|
32
|
+
if field_type in ["number", "decimal", "numeric", "double"]:
|
|
33
|
+
return "float64"
|
|
34
|
+
if field_type == "boolean":
|
|
35
|
+
return "bool"
|
|
36
|
+
if field_type in ["timestamp", "timestamp_tz", "timestamp_ntz", "date"]:
|
|
37
|
+
return "datetime64[ns]"
|
|
38
|
+
if field_type == "bytes":
|
|
39
|
+
return "object"
|
|
40
|
+
return "object"
|
|
@@ -1,9 +1,8 @@
|
|
|
1
1
|
from pydantic import BaseModel
|
|
2
|
-
from rdflib import
|
|
3
|
-
|
|
4
|
-
from datacontract.model.data_contract_specification import DataContractSpecification
|
|
2
|
+
from rdflib import RDF, BNode, Graph, Literal, Namespace, URIRef
|
|
5
3
|
|
|
6
4
|
from datacontract.export.exporter import Exporter
|
|
5
|
+
from datacontract.model.data_contract_specification import DataContractSpecification
|
|
7
6
|
|
|
8
7
|
|
|
9
8
|
class RdfExporter(Exporter):
|
|
@@ -58,8 +57,8 @@ def to_rdf(data_contract_spec: DataContractSpecification, base) -> Graph:
|
|
|
58
57
|
else:
|
|
59
58
|
g = Graph(base=Namespace(""))
|
|
60
59
|
|
|
61
|
-
dc = Namespace("https://datacontract.com/DataContractSpecification/
|
|
62
|
-
dcx = Namespace("https://datacontract.com/DataContractSpecification/
|
|
60
|
+
dc = Namespace("https://datacontract.com/DataContractSpecification/1.1.0/")
|
|
61
|
+
dcx = Namespace("https://datacontract.com/DataContractSpecification/1.1.0/Extension/")
|
|
63
62
|
|
|
64
63
|
g.bind("dc", dc)
|
|
65
64
|
g.bind("dcx", dcx)
|
|
@@ -62,12 +62,16 @@ def to_checks(model_key, model_value, server_type: str, check_types: bool):
|
|
|
62
62
|
if field.enum is not None and len(field.enum) > 0:
|
|
63
63
|
checks.append(check_field_enum(field_name, field.enum, quote_field_name))
|
|
64
64
|
if field.quality is not None and len(field.quality) > 0:
|
|
65
|
-
|
|
65
|
+
quality_list = check_quality_list(model_key, field_name, field.quality)
|
|
66
|
+
if (quality_list is not None) and len(quality_list) > 0:
|
|
67
|
+
checks.append(quality_list)
|
|
66
68
|
# TODO references: str = None
|
|
67
69
|
# TODO format
|
|
68
70
|
|
|
69
71
|
if model_value.quality is not None and len(model_value.quality) > 0:
|
|
70
|
-
|
|
72
|
+
quality_list = check_quality_list(model_key, None, model_value.quality)
|
|
73
|
+
if (quality_list is not None) and len(quality_list) > 0:
|
|
74
|
+
checks.append(quality_list)
|
|
71
75
|
|
|
72
76
|
checks_for_model_key = f"checks for {model_key}"
|
|
73
77
|
|
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
from pyspark.sql import types
|
|
2
|
+
|
|
3
|
+
from datacontract.export.exporter import Exporter
|
|
2
4
|
from datacontract.model.data_contract_specification import (
|
|
3
5
|
DataContractSpecification,
|
|
4
|
-
Model,
|
|
5
6
|
Field,
|
|
7
|
+
Model,
|
|
6
8
|
)
|
|
7
|
-
from datacontract.export.exporter import Exporter
|
|
8
9
|
|
|
9
10
|
|
|
10
11
|
class SparkExporter(Exporter):
|
|
@@ -102,11 +103,11 @@ def to_struct_field(field: Field, field_name: str) -> types.StructField:
|
|
|
102
103
|
Returns:
|
|
103
104
|
types.StructField: The corresponding Spark StructField.
|
|
104
105
|
"""
|
|
105
|
-
data_type =
|
|
106
|
+
data_type = to_spark_data_type(field)
|
|
106
107
|
return types.StructField(name=field_name, dataType=data_type, nullable=not field.required)
|
|
107
108
|
|
|
108
109
|
|
|
109
|
-
def
|
|
110
|
+
def to_spark_data_type(field: Field) -> types.DataType:
|
|
110
111
|
"""
|
|
111
112
|
Convert a field to a Spark DataType.
|
|
112
113
|
|
|
@@ -120,11 +121,11 @@ def to_data_type(field: Field) -> types.DataType:
|
|
|
120
121
|
if field_type is None or field_type in ["null"]:
|
|
121
122
|
return types.NullType()
|
|
122
123
|
if field_type == "array":
|
|
123
|
-
return types.ArrayType(
|
|
124
|
+
return types.ArrayType(to_spark_data_type(field.items))
|
|
124
125
|
if field_type in ["object", "record", "struct"]:
|
|
125
126
|
return types.StructType(to_struct_type(field.fields))
|
|
126
127
|
if field_type == "map":
|
|
127
|
-
return types.MapType(
|
|
128
|
+
return types.MapType(to_spark_data_type(field.keys), to_spark_data_type(field.values))
|
|
128
129
|
if field_type in ["string", "varchar", "text"]:
|
|
129
130
|
return types.StringType()
|
|
130
131
|
if field_type in ["number", "decimal", "numeric"]:
|
|
@@ -1,8 +1,7 @@
|
|
|
1
|
+
from datacontract.export.exporter import Exporter, _check_models_for_export, _determine_sql_server_type
|
|
1
2
|
from datacontract.export.sql_type_converter import convert_to_sql_type
|
|
2
3
|
from datacontract.model.data_contract_specification import DataContractSpecification, Model
|
|
3
4
|
|
|
4
|
-
from datacontract.export.exporter import Exporter, _check_models_for_export, _determine_sql_server_type
|
|
5
|
-
|
|
6
5
|
|
|
7
6
|
class SqlExporter(Exporter):
|
|
8
7
|
def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
|
|
@@ -2,8 +2,7 @@ import ast
|
|
|
2
2
|
import typing
|
|
3
3
|
|
|
4
4
|
import datacontract.model.data_contract_specification as spec
|
|
5
|
-
from datacontract.export.exporter import Exporter
|
|
6
|
-
from datacontract.export.exporter import _determine_sql_server_type
|
|
5
|
+
from datacontract.export.exporter import Exporter, _determine_sql_server_type
|
|
7
6
|
|
|
8
7
|
|
|
9
8
|
class SQLAlchemyExporter(Exporter):
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import re
|
|
2
2
|
|
|
3
|
-
from datacontract.model.data_contract_specification import DataContractSpecification, Server
|
|
4
3
|
from datacontract.export.exporter import Exporter
|
|
4
|
+
from datacontract.model.data_contract_specification import DataContractSpecification, Server
|
|
5
5
|
|
|
6
6
|
|
|
7
7
|
class TerraformExporter(Exporter):
|
|
@@ -3,7 +3,7 @@ from typing import Dict, List
|
|
|
3
3
|
import avro.schema
|
|
4
4
|
|
|
5
5
|
from datacontract.imports.importer import Importer
|
|
6
|
-
from datacontract.model.data_contract_specification import DataContractSpecification,
|
|
6
|
+
from datacontract.model.data_contract_specification import DataContractSpecification, Field, Model
|
|
7
7
|
from datacontract.model.exceptions import DataContractException
|
|
8
8
|
|
|
9
9
|
|
|
@@ -3,7 +3,7 @@ import logging
|
|
|
3
3
|
from typing import List
|
|
4
4
|
|
|
5
5
|
from datacontract.imports.importer import Importer
|
|
6
|
-
from datacontract.model.data_contract_specification import DataContractSpecification,
|
|
6
|
+
from datacontract.model.data_contract_specification import DataContractSpecification, Field, Model
|
|
7
7
|
from datacontract.model.exceptions import DataContractException
|
|
8
8
|
|
|
9
9
|
|
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
from pydbml import PyDBML, Database
|
|
2
1
|
from typing import List
|
|
3
2
|
|
|
3
|
+
from pydbml import Database, PyDBML
|
|
4
4
|
from pyparsing import ParseException
|
|
5
5
|
|
|
6
6
|
from datacontract.imports.importer import Importer
|
|
7
7
|
from datacontract.imports.sql_importer import map_type_from_sql
|
|
8
|
-
from datacontract.model.data_contract_specification import DataContractSpecification,
|
|
8
|
+
from datacontract.model.data_contract_specification import DataContractSpecification, Field, Model
|
|
9
9
|
from datacontract.model.exceptions import DataContractException
|
|
10
10
|
|
|
11
11
|
|
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
import json
|
|
2
2
|
from typing import TypedDict
|
|
3
3
|
|
|
4
|
-
from datacontract.imports.importer import Importer
|
|
5
|
-
from datacontract.model.data_contract_specification import DataContractSpecification, Field, Model
|
|
6
4
|
from dbt.artifacts.resources.v1.components import ColumnInfo
|
|
7
5
|
from dbt.contracts.graph.manifest import Manifest
|
|
8
6
|
|
|
7
|
+
from datacontract.imports.importer import Importer
|
|
8
|
+
from datacontract.model.data_contract_specification import DataContractSpecification, Field, Model
|
|
9
|
+
|
|
9
10
|
|
|
10
11
|
class DBTImportArgs(TypedDict, total=False):
|
|
11
12
|
"""
|
|
@@ -1,11 +1,13 @@
|
|
|
1
|
-
import boto3
|
|
2
|
-
from typing import List, Dict, Generator
|
|
3
1
|
import re
|
|
2
|
+
from typing import Dict, Generator, List
|
|
3
|
+
|
|
4
|
+
import boto3
|
|
5
|
+
|
|
4
6
|
from datacontract.imports.importer import Importer
|
|
5
7
|
from datacontract.model.data_contract_specification import (
|
|
6
8
|
DataContractSpecification,
|
|
7
|
-
Model,
|
|
8
9
|
Field,
|
|
10
|
+
Model,
|
|
9
11
|
Server,
|
|
10
12
|
)
|
|
11
13
|
|
|
@@ -1,12 +1,11 @@
|
|
|
1
|
-
from typing import
|
|
1
|
+
from typing import Any, Dict
|
|
2
2
|
|
|
3
|
-
from datacontract.imports.importer import Importer
|
|
4
|
-
from datacontract.model.data_contract_specification import DataContractSpecification, Model, Field
|
|
5
|
-
|
|
6
|
-
from pyiceberg.schema import Schema
|
|
7
|
-
from pyiceberg import types as iceberg_types
|
|
8
3
|
from pydantic import ValidationError
|
|
4
|
+
from pyiceberg import types as iceberg_types
|
|
5
|
+
from pyiceberg.schema import Schema
|
|
9
6
|
|
|
7
|
+
from datacontract.imports.importer import Importer
|
|
8
|
+
from datacontract.model.data_contract_specification import DataContractSpecification, Field, Model
|
|
10
9
|
from datacontract.model.exceptions import DataContractException
|
|
11
10
|
|
|
12
11
|
|
datacontract/imports/importer.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import importlib.util
|
|
2
2
|
import sys
|
|
3
|
-
|
|
3
|
+
|
|
4
|
+
from datacontract.imports.importer import Importer, ImportFormat
|
|
4
5
|
|
|
5
6
|
|
|
6
7
|
class ImporterFactory:
|
|
@@ -98,3 +99,8 @@ importer_factory.register_lazy_importer(
|
|
|
98
99
|
module_path="datacontract.imports.iceberg_importer",
|
|
99
100
|
class_name="IcebergImporter",
|
|
100
101
|
)
|
|
102
|
+
importer_factory.register_lazy_importer(
|
|
103
|
+
name=ImportFormat.parquet,
|
|
104
|
+
module_path="datacontract.imports.parquet_importer",
|
|
105
|
+
class_name="ParquetImporter",
|
|
106
|
+
)
|