datacontract-cli 0.10.24__py3-none-any.whl → 0.10.26__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datacontract-cli might be problematic. Click here for more details.
- datacontract/api.py +3 -3
- datacontract/cli.py +1 -1
- datacontract/engines/soda/connections/kafka.py +2 -1
- datacontract/export/great_expectations_converter.py +49 -2
- datacontract/export/odcs_v3_exporter.py +183 -140
- datacontract/export/spark_converter.py +1 -1
- datacontract/export/sql_converter.py +4 -0
- datacontract/export/sql_type_converter.py +2 -0
- datacontract/imports/avro_importer.py +23 -23
- datacontract/imports/csv_importer.py +2 -2
- datacontract/imports/excel_importer.py +850 -0
- datacontract/imports/importer.py +4 -2
- datacontract/imports/importer_factory.py +5 -0
- datacontract/imports/odcs_v3_importer.py +202 -145
- datacontract/imports/protobuf_importer.py +0 -2
- datacontract/imports/spark_importer.py +2 -0
- datacontract/lint/linters/description_linter.py +1 -3
- datacontract/lint/linters/field_reference_linter.py +1 -2
- datacontract/lint/linters/notice_period_linter.py +2 -2
- datacontract/lint/linters/valid_constraints_linter.py +3 -3
- datacontract/model/data_contract_specification/__init__.py +1 -0
- {datacontract_cli-0.10.24.dist-info → datacontract_cli-0.10.26.dist-info}/METADATA +59 -18
- {datacontract_cli-0.10.24.dist-info → datacontract_cli-0.10.26.dist-info}/RECORD +27 -26
- {datacontract_cli-0.10.24.dist-info → datacontract_cli-0.10.26.dist-info}/WHEEL +1 -1
- datacontract/model/data_contract_specification.py +0 -327
- {datacontract_cli-0.10.24.dist-info → datacontract_cli-0.10.26.dist-info}/entry_points.txt +0 -0
- {datacontract_cli-0.10.24.dist-info → datacontract_cli-0.10.26.dist-info}/licenses/LICENSE +0 -0
- {datacontract_cli-0.10.24.dist-info → datacontract_cli-0.10.26.dist-info}/top_level.txt +0 -0
datacontract/api.py
CHANGED
|
@@ -162,7 +162,7 @@ async def test(
|
|
|
162
162
|
server: Annotated[
|
|
163
163
|
str | None,
|
|
164
164
|
Query(
|
|
165
|
-
|
|
165
|
+
examples=["production"],
|
|
166
166
|
description="The server name to test. Optional, if there is only one server.",
|
|
167
167
|
),
|
|
168
168
|
] = None,
|
|
@@ -191,7 +191,7 @@ async def lint(
|
|
|
191
191
|
schema: Annotated[
|
|
192
192
|
str | None,
|
|
193
193
|
Query(
|
|
194
|
-
|
|
194
|
+
examples=["https://datacontract.com/datacontract.schema.json"],
|
|
195
195
|
description="The schema to use for validation. This must be a URL.",
|
|
196
196
|
),
|
|
197
197
|
] = None,
|
|
@@ -220,7 +220,7 @@ def export(
|
|
|
220
220
|
server: Annotated[
|
|
221
221
|
str | None,
|
|
222
222
|
Query(
|
|
223
|
-
|
|
223
|
+
examples=["production"],
|
|
224
224
|
description="The server name to export. Optional, if there is only one server.",
|
|
225
225
|
),
|
|
226
226
|
] = None,
|
datacontract/cli.py
CHANGED
|
@@ -244,7 +244,7 @@ def import_(
|
|
|
244
244
|
] = None,
|
|
245
245
|
source: Annotated[
|
|
246
246
|
Optional[str],
|
|
247
|
-
typer.Option(help="The path to the file
|
|
247
|
+
typer.Option(help="The path to the file that should be imported."),
|
|
248
248
|
] = None,
|
|
249
249
|
dialect: Annotated[
|
|
250
250
|
Optional[str],
|
|
@@ -27,6 +27,7 @@ def create_spark_session():
|
|
|
27
27
|
tmp_dir = tempfile.TemporaryDirectory(prefix="datacontract-cli-spark")
|
|
28
28
|
atexit.register(tmp_dir.cleanup)
|
|
29
29
|
|
|
30
|
+
pyspark_version = "3.5.5" # MUST be the same as in the pyproject.toml
|
|
30
31
|
spark = (
|
|
31
32
|
SparkSession.builder.appName("datacontract")
|
|
32
33
|
.config("spark.sql.warehouse.dir", f"{tmp_dir}/spark-warehouse")
|
|
@@ -34,7 +35,7 @@ def create_spark_session():
|
|
|
34
35
|
.config("spark.ui.enabled", "false")
|
|
35
36
|
.config(
|
|
36
37
|
"spark.jars.packages",
|
|
37
|
-
"org.apache.spark:spark-sql-kafka-0-10_2.12:
|
|
38
|
+
f"org.apache.spark:spark-sql-kafka-0-10_2.12:{pyspark_version},org.apache.spark:spark-avro_2.12:{pyspark_version}",
|
|
38
39
|
)
|
|
39
40
|
.getOrCreate()
|
|
40
41
|
)
|
|
@@ -19,6 +19,7 @@ from datacontract.export.spark_converter import to_spark_data_type
|
|
|
19
19
|
from datacontract.export.sql_type_converter import convert_to_sql_type
|
|
20
20
|
from datacontract.model.data_contract_specification import (
|
|
21
21
|
DataContractSpecification,
|
|
22
|
+
DeprecatedQuality,
|
|
22
23
|
Field,
|
|
23
24
|
Quality,
|
|
24
25
|
)
|
|
@@ -91,8 +92,14 @@ def to_great_expectations(
|
|
|
91
92
|
model_key=model_key, contract_version=data_contract_spec.info.version
|
|
92
93
|
)
|
|
93
94
|
model_value = data_contract_spec.models.get(model_key)
|
|
94
|
-
|
|
95
|
+
|
|
96
|
+
# Support for Deprecated Quality
|
|
97
|
+
quality_checks = get_deprecated_quality_checks(data_contract_spec.quality)
|
|
98
|
+
|
|
99
|
+
expectations.extend(get_quality_checks(model_value.quality))
|
|
100
|
+
|
|
95
101
|
expectations.extend(model_to_expectations(model_value.fields, engine, sql_server_type))
|
|
102
|
+
|
|
96
103
|
expectations.extend(checks_to_expectations(quality_checks, model_key))
|
|
97
104
|
model_expectation_suite = to_suite(expectations, expectation_suite_name)
|
|
98
105
|
|
|
@@ -135,6 +142,7 @@ def model_to_expectations(fields: Dict[str, Field], engine: str | None, sql_serv
|
|
|
135
142
|
add_column_order_exp(fields, expectations)
|
|
136
143
|
for field_name, field in fields.items():
|
|
137
144
|
add_field_expectations(field_name, field, expectations, engine, sql_server_type)
|
|
145
|
+
expectations.extend(get_quality_checks(field.quality, field_name))
|
|
138
146
|
return expectations
|
|
139
147
|
|
|
140
148
|
|
|
@@ -173,6 +181,8 @@ def add_field_expectations(
|
|
|
173
181
|
expectations.append(to_column_length_exp(field_name, field.minLength, field.maxLength))
|
|
174
182
|
if field.minimum is not None or field.maximum is not None:
|
|
175
183
|
expectations.append(to_column_min_max_exp(field_name, field.minimum, field.maximum))
|
|
184
|
+
if field.enum is not None and len(field.enum) != 0:
|
|
185
|
+
expectations.append(to_column_enum_exp(field_name, field.enum))
|
|
176
186
|
|
|
177
187
|
return expectations
|
|
178
188
|
|
|
@@ -266,7 +276,24 @@ def to_column_min_max_exp(field_name, minimum, maximum) -> Dict[str, Any]:
|
|
|
266
276
|
}
|
|
267
277
|
|
|
268
278
|
|
|
269
|
-
def
|
|
279
|
+
def to_column_enum_exp(field_name, enum_list: List[str]) -> Dict[str, Any]:
|
|
280
|
+
"""Creates a expect_column_values_to_be_in_set expectation.
|
|
281
|
+
|
|
282
|
+
Args:
|
|
283
|
+
field_name (str): The name of the field.
|
|
284
|
+
enum_list (Set[str]): enum list of value.
|
|
285
|
+
|
|
286
|
+
Returns:
|
|
287
|
+
Dict[str, Any]: Column value in set expectation.
|
|
288
|
+
"""
|
|
289
|
+
return {
|
|
290
|
+
"expectation_type": "expect_column_values_to_be_in_set",
|
|
291
|
+
"kwargs": {"column": field_name, "value_set": enum_list},
|
|
292
|
+
"meta": {},
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
def get_deprecated_quality_checks(quality: DeprecatedQuality) -> Dict[str, Any]:
|
|
270
297
|
"""Retrieves quality checks defined in a data contract.
|
|
271
298
|
|
|
272
299
|
Args:
|
|
@@ -288,6 +315,26 @@ def get_quality_checks(quality: Quality) -> Dict[str, Any]:
|
|
|
288
315
|
return quality_specification
|
|
289
316
|
|
|
290
317
|
|
|
318
|
+
def get_quality_checks(qualities: List[Quality], field_name: str | None = None) -> List[Dict[str, Any]]:
|
|
319
|
+
"""Retrieves quality checks defined in a data contract.
|
|
320
|
+
|
|
321
|
+
Args:
|
|
322
|
+
qualities (List[Quality]): List of quality object from the model specification.
|
|
323
|
+
field_name (str | None): field name if the quality list is attached to a specific field
|
|
324
|
+
|
|
325
|
+
Returns:
|
|
326
|
+
Dict[str, Any]: Dictionary of quality checks.
|
|
327
|
+
"""
|
|
328
|
+
quality_specification = []
|
|
329
|
+
for quality in qualities:
|
|
330
|
+
if quality is not None and quality.engine is not None and quality.engine.lower() == "great-expectations":
|
|
331
|
+
ge_expectation = quality.implementation
|
|
332
|
+
if field_name is not None:
|
|
333
|
+
ge_expectation["column"] = field_name
|
|
334
|
+
quality_specification.append(ge_expectation)
|
|
335
|
+
return quality_specification
|
|
336
|
+
|
|
337
|
+
|
|
291
338
|
def checks_to_expectations(quality_checks: Dict[str, Any], model_key: str) -> List[Dict[str, Any]]:
|
|
292
339
|
"""Converts quality checks to a list of expectations.
|
|
293
340
|
|
|
@@ -1,6 +1,17 @@
|
|
|
1
|
-
from typing import Dict
|
|
2
|
-
|
|
3
|
-
import
|
|
1
|
+
from typing import Any, Dict
|
|
2
|
+
|
|
3
|
+
from open_data_contract_standard.model import (
|
|
4
|
+
CustomProperty,
|
|
5
|
+
DataQuality,
|
|
6
|
+
Description,
|
|
7
|
+
OpenDataContractStandard,
|
|
8
|
+
Role,
|
|
9
|
+
SchemaObject,
|
|
10
|
+
SchemaProperty,
|
|
11
|
+
Server,
|
|
12
|
+
ServiceLevelAgreementProperty,
|
|
13
|
+
Support,
|
|
14
|
+
)
|
|
4
15
|
|
|
5
16
|
from datacontract.export.exporter import Exporter
|
|
6
17
|
from datacontract.model.data_contract_specification import DataContractSpecification, Field, Model
|
|
@@ -12,155 +23,148 @@ class OdcsV3Exporter(Exporter):
|
|
|
12
23
|
|
|
13
24
|
|
|
14
25
|
def to_odcs_v3_yaml(data_contract_spec: DataContractSpecification) -> str:
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
26
|
+
result = OpenDataContractStandard(
|
|
27
|
+
apiVersion="v3.0.1",
|
|
28
|
+
kind="DataContract",
|
|
29
|
+
id=data_contract_spec.id,
|
|
30
|
+
name=data_contract_spec.info.title,
|
|
31
|
+
version=data_contract_spec.info.version,
|
|
32
|
+
status=to_status(data_contract_spec.info.status),
|
|
33
|
+
)
|
|
23
34
|
|
|
24
35
|
if data_contract_spec.terms is not None:
|
|
25
|
-
|
|
26
|
-
|
|
36
|
+
result.description = Description(
|
|
37
|
+
purpose=data_contract_spec.terms.description.strip()
|
|
27
38
|
if data_contract_spec.terms.description is not None
|
|
28
39
|
else None,
|
|
29
|
-
|
|
30
|
-
|
|
40
|
+
usage=data_contract_spec.terms.usage.strip() if data_contract_spec.terms.usage is not None else None,
|
|
41
|
+
limitations=data_contract_spec.terms.limitations.strip()
|
|
31
42
|
if data_contract_spec.terms.limitations is not None
|
|
32
43
|
else None,
|
|
33
|
-
|
|
44
|
+
)
|
|
34
45
|
|
|
35
|
-
|
|
46
|
+
result.schema_ = []
|
|
36
47
|
for model_key, model_value in data_contract_spec.models.items():
|
|
37
48
|
odcs_schema = to_odcs_schema(model_key, model_value)
|
|
38
|
-
|
|
49
|
+
result.schema_.append(odcs_schema)
|
|
39
50
|
|
|
40
51
|
if data_contract_spec.servicelevels is not None:
|
|
41
52
|
slas = []
|
|
42
53
|
if data_contract_spec.servicelevels.availability is not None:
|
|
43
54
|
slas.append(
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
}
|
|
55
|
+
ServiceLevelAgreementProperty(
|
|
56
|
+
property="generalAvailability", value=data_contract_spec.servicelevels.availability.description
|
|
57
|
+
)
|
|
48
58
|
)
|
|
49
59
|
if data_contract_spec.servicelevels.retention is not None:
|
|
50
|
-
slas.append(
|
|
60
|
+
slas.append(
|
|
61
|
+
ServiceLevelAgreementProperty(
|
|
62
|
+
property="retention", value=data_contract_spec.servicelevels.retention.period
|
|
63
|
+
)
|
|
64
|
+
)
|
|
51
65
|
|
|
52
66
|
if len(slas) > 0:
|
|
53
|
-
|
|
67
|
+
result.slaProperties = slas
|
|
54
68
|
|
|
55
69
|
if data_contract_spec.info.contact is not None:
|
|
56
70
|
support = []
|
|
57
71
|
if data_contract_spec.info.contact.email is not None:
|
|
58
|
-
support.append(
|
|
59
|
-
{
|
|
60
|
-
"channel": "email",
|
|
61
|
-
"url": "mailto:" + data_contract_spec.info.contact.email,
|
|
62
|
-
}
|
|
63
|
-
)
|
|
72
|
+
support.append(Support(channel="email", url="mailto:" + data_contract_spec.info.contact.email))
|
|
64
73
|
if data_contract_spec.info.contact.url is not None:
|
|
65
|
-
support.append(
|
|
66
|
-
{
|
|
67
|
-
"channel": "other",
|
|
68
|
-
"url": data_contract_spec.info.contact.url,
|
|
69
|
-
}
|
|
70
|
-
)
|
|
74
|
+
support.append(Support(channel="other", url=data_contract_spec.info.contact.url))
|
|
71
75
|
if len(support) > 0:
|
|
72
|
-
|
|
76
|
+
result.support = support
|
|
73
77
|
|
|
74
78
|
if data_contract_spec.servers is not None and len(data_contract_spec.servers) > 0:
|
|
75
79
|
servers = []
|
|
76
80
|
|
|
77
81
|
for server_key, server_value in data_contract_spec.servers.items():
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
server_dict["type"] = server_value.type
|
|
82
|
+
server = Server(server=server_key, type=server_value.type or "")
|
|
83
|
+
|
|
84
|
+
# Set all the attributes that are not None
|
|
82
85
|
if server_value.environment is not None:
|
|
83
|
-
|
|
86
|
+
server.environment = server_value.environment
|
|
84
87
|
if server_value.account is not None:
|
|
85
|
-
|
|
88
|
+
server.account = server_value.account
|
|
86
89
|
if server_value.database is not None:
|
|
87
|
-
|
|
90
|
+
server.database = server_value.database
|
|
88
91
|
if server_value.schema_ is not None:
|
|
89
|
-
|
|
92
|
+
server.schema_ = server_value.schema_
|
|
90
93
|
if server_value.format is not None:
|
|
91
|
-
|
|
94
|
+
server.format = server_value.format
|
|
92
95
|
if server_value.project is not None:
|
|
93
|
-
|
|
96
|
+
server.project = server_value.project
|
|
94
97
|
if server_value.dataset is not None:
|
|
95
|
-
|
|
98
|
+
server.dataset = server_value.dataset
|
|
96
99
|
if server_value.path is not None:
|
|
97
|
-
|
|
100
|
+
server.path = server_value.path
|
|
98
101
|
if server_value.delimiter is not None:
|
|
99
|
-
|
|
102
|
+
server.delimiter = server_value.delimiter
|
|
100
103
|
if server_value.endpointUrl is not None:
|
|
101
|
-
|
|
104
|
+
server.endpointUrl = server_value.endpointUrl
|
|
102
105
|
if server_value.location is not None:
|
|
103
|
-
|
|
106
|
+
server.location = server_value.location
|
|
104
107
|
if server_value.host is not None:
|
|
105
|
-
|
|
108
|
+
server.host = server_value.host
|
|
106
109
|
if server_value.port is not None:
|
|
107
|
-
|
|
110
|
+
server.port = server_value.port
|
|
108
111
|
if server_value.catalog is not None:
|
|
109
|
-
|
|
112
|
+
server.catalog = server_value.catalog
|
|
110
113
|
if server_value.topic is not None:
|
|
111
|
-
|
|
114
|
+
server.topic = server_value.topic
|
|
112
115
|
if server_value.http_path is not None:
|
|
113
|
-
|
|
116
|
+
server.http_path = server_value.http_path
|
|
114
117
|
if server_value.token is not None:
|
|
115
|
-
|
|
118
|
+
server.token = server_value.token
|
|
116
119
|
if server_value.driver is not None:
|
|
117
|
-
|
|
120
|
+
server.driver = server_value.driver
|
|
121
|
+
|
|
118
122
|
if server_value.roles is not None:
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
servers.append(server_dict)
|
|
123
|
+
server.roles = [Role(role=role.name, description=role.description) for role in server_value.roles]
|
|
124
|
+
|
|
125
|
+
servers.append(server)
|
|
123
126
|
|
|
124
127
|
if len(servers) > 0:
|
|
125
|
-
|
|
128
|
+
result.servers = servers
|
|
126
129
|
|
|
127
|
-
|
|
130
|
+
custom_properties = []
|
|
128
131
|
if data_contract_spec.info.owner is not None:
|
|
129
|
-
|
|
132
|
+
custom_properties.append(CustomProperty(property="owner", value=data_contract_spec.info.owner))
|
|
130
133
|
if data_contract_spec.info.model_extra is not None:
|
|
131
134
|
for key, value in data_contract_spec.info.model_extra.items():
|
|
132
|
-
|
|
133
|
-
if len(odcs["customProperties"]) == 0:
|
|
134
|
-
del odcs["customProperties"]
|
|
135
|
+
custom_properties.append(CustomProperty(property=key, value=value))
|
|
135
136
|
|
|
136
|
-
|
|
137
|
+
if len(custom_properties) > 0:
|
|
138
|
+
result.customProperties = custom_properties
|
|
137
139
|
|
|
140
|
+
return result.to_yaml()
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def to_odcs_schema(model_key, model_value: Model) -> SchemaObject:
|
|
144
|
+
schema_obj = SchemaObject(
|
|
145
|
+
name=model_key, physicalName=model_key, logicalType="object", physicalType=model_value.type
|
|
146
|
+
)
|
|
138
147
|
|
|
139
|
-
def to_odcs_schema(model_key, model_value: Model) -> dict:
|
|
140
|
-
odcs_table = {
|
|
141
|
-
"name": model_key,
|
|
142
|
-
"physicalName": model_key,
|
|
143
|
-
"logicalType": "object",
|
|
144
|
-
"physicalType": model_value.type,
|
|
145
|
-
}
|
|
146
148
|
if model_value.description is not None:
|
|
147
|
-
|
|
149
|
+
schema_obj.description = model_value.description
|
|
150
|
+
|
|
148
151
|
properties = to_properties(model_value.fields)
|
|
149
152
|
if properties:
|
|
150
|
-
|
|
153
|
+
schema_obj.properties = properties
|
|
151
154
|
|
|
152
155
|
model_quality = to_odcs_quality_list(model_value.quality)
|
|
153
156
|
if len(model_quality) > 0:
|
|
154
|
-
|
|
157
|
+
schema_obj.quality = model_quality
|
|
155
158
|
|
|
156
|
-
|
|
159
|
+
custom_properties = []
|
|
157
160
|
if model_value.model_extra is not None:
|
|
158
161
|
for key, value in model_value.model_extra.items():
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
+
custom_properties.append(CustomProperty(property=key, value=value))
|
|
163
|
+
|
|
164
|
+
if len(custom_properties) > 0:
|
|
165
|
+
schema_obj.customProperties = custom_properties
|
|
162
166
|
|
|
163
|
-
return
|
|
167
|
+
return schema_obj
|
|
164
168
|
|
|
165
169
|
|
|
166
170
|
def to_properties(fields: Dict[str, Field]) -> list:
|
|
@@ -198,82 +202,119 @@ def to_logical_type(type: str) -> str | None:
|
|
|
198
202
|
return "array"
|
|
199
203
|
if type.lower() in ["array"]:
|
|
200
204
|
return "array"
|
|
205
|
+
if type.lower() in ["variant"]:
|
|
206
|
+
return "variant"
|
|
201
207
|
if type.lower() in ["null"]:
|
|
202
208
|
return None
|
|
203
209
|
return None
|
|
204
210
|
|
|
205
211
|
|
|
206
|
-
def to_physical_type(
|
|
207
|
-
|
|
208
|
-
|
|
212
|
+
def to_physical_type(config: Dict[str, Any]) -> str | None:
|
|
213
|
+
if config is None:
|
|
214
|
+
return None
|
|
215
|
+
if "postgresType" in config:
|
|
216
|
+
return config["postgresType"]
|
|
217
|
+
elif "bigqueryType" in config:
|
|
218
|
+
return config["bigqueryType"]
|
|
219
|
+
elif "snowflakeType" in config:
|
|
220
|
+
return config["snowflakeType"]
|
|
221
|
+
elif "redshiftType" in config:
|
|
222
|
+
return config["redshiftType"]
|
|
223
|
+
elif "sqlserverType" in config:
|
|
224
|
+
return config["sqlserverType"]
|
|
225
|
+
elif "databricksType" in config:
|
|
226
|
+
return config["databricksType"]
|
|
227
|
+
elif "physicalType" in config:
|
|
228
|
+
return config["physicalType"]
|
|
229
|
+
return None
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
def to_property(field_name: str, field: Field) -> SchemaProperty:
|
|
233
|
+
property = SchemaProperty(name=field_name)
|
|
209
234
|
|
|
235
|
+
if field.fields:
|
|
236
|
+
properties = []
|
|
237
|
+
for field_name_, field_ in field.fields.items():
|
|
238
|
+
property_ = to_property(field_name_, field_)
|
|
239
|
+
properties.append(property_)
|
|
240
|
+
property.properties = properties
|
|
241
|
+
|
|
242
|
+
if field.items:
|
|
243
|
+
items = to_property(field_name, field.items)
|
|
244
|
+
items.name = None # Clear the name for items
|
|
245
|
+
property.items = items
|
|
210
246
|
|
|
211
|
-
def to_property(field_name: str, field: Field) -> dict:
|
|
212
|
-
property = {"name": field_name}
|
|
213
247
|
if field.title is not None:
|
|
214
|
-
property
|
|
248
|
+
property.businessName = field.title
|
|
249
|
+
|
|
215
250
|
if field.type is not None:
|
|
216
|
-
property
|
|
217
|
-
property
|
|
251
|
+
property.logicalType = to_logical_type(field.type)
|
|
252
|
+
property.physicalType = to_physical_type(field.config)
|
|
253
|
+
|
|
218
254
|
if field.description is not None:
|
|
219
|
-
property
|
|
255
|
+
property.description = field.description
|
|
256
|
+
|
|
220
257
|
if field.required is not None:
|
|
221
|
-
property
|
|
258
|
+
property.required = field.required
|
|
259
|
+
|
|
222
260
|
if field.unique is not None:
|
|
223
|
-
property
|
|
261
|
+
property.unique = field.unique
|
|
262
|
+
|
|
224
263
|
if field.classification is not None:
|
|
225
|
-
property
|
|
264
|
+
property.classification = field.classification
|
|
265
|
+
|
|
226
266
|
if field.examples is not None:
|
|
227
|
-
property
|
|
267
|
+
property.examples = field.examples.copy()
|
|
268
|
+
|
|
228
269
|
if field.example is not None:
|
|
229
|
-
property
|
|
270
|
+
property.examples = [field.example]
|
|
271
|
+
|
|
230
272
|
if field.primaryKey is not None and field.primaryKey:
|
|
231
|
-
property
|
|
232
|
-
property
|
|
273
|
+
property.primaryKey = field.primaryKey
|
|
274
|
+
property.primaryKeyPosition = 1
|
|
275
|
+
|
|
233
276
|
if field.primary is not None and field.primary:
|
|
234
|
-
property
|
|
235
|
-
property
|
|
277
|
+
property.primaryKey = field.primary
|
|
278
|
+
property.primaryKeyPosition = 1
|
|
236
279
|
|
|
237
|
-
|
|
280
|
+
custom_properties = []
|
|
238
281
|
if field.model_extra is not None:
|
|
239
282
|
for key, value in field.model_extra.items():
|
|
240
|
-
|
|
283
|
+
custom_properties.append(CustomProperty(property=key, value=value))
|
|
284
|
+
|
|
241
285
|
if field.pii is not None:
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
286
|
+
custom_properties.append(CustomProperty(property="pii", value=field.pii))
|
|
287
|
+
|
|
288
|
+
if len(custom_properties) > 0:
|
|
289
|
+
property.customProperties = custom_properties
|
|
245
290
|
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
property["tags"].extend(field.tags)
|
|
249
|
-
if not property["tags"]:
|
|
250
|
-
del property["tags"]
|
|
291
|
+
if field.tags is not None and len(field.tags) > 0:
|
|
292
|
+
property.tags = field.tags
|
|
251
293
|
|
|
252
|
-
|
|
294
|
+
logical_type_options = {}
|
|
253
295
|
if field.minLength is not None:
|
|
254
|
-
|
|
296
|
+
logical_type_options["minLength"] = field.minLength
|
|
255
297
|
if field.maxLength is not None:
|
|
256
|
-
|
|
298
|
+
logical_type_options["maxLength"] = field.maxLength
|
|
257
299
|
if field.pattern is not None:
|
|
258
|
-
|
|
300
|
+
logical_type_options["pattern"] = field.pattern
|
|
259
301
|
if field.minimum is not None:
|
|
260
|
-
|
|
302
|
+
logical_type_options["minimum"] = field.minimum
|
|
261
303
|
if field.maximum is not None:
|
|
262
|
-
|
|
304
|
+
logical_type_options["maximum"] = field.maximum
|
|
263
305
|
if field.exclusiveMinimum is not None:
|
|
264
|
-
|
|
306
|
+
logical_type_options["exclusiveMinimum"] = field.exclusiveMinimum
|
|
265
307
|
if field.exclusiveMaximum is not None:
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
308
|
+
logical_type_options["exclusiveMaximum"] = field.exclusiveMaximum
|
|
309
|
+
|
|
310
|
+
if logical_type_options:
|
|
311
|
+
property.logicalTypeOptions = logical_type_options
|
|
269
312
|
|
|
270
313
|
if field.quality is not None:
|
|
271
314
|
quality_list = field.quality
|
|
272
315
|
quality_property = to_odcs_quality_list(quality_list)
|
|
273
316
|
if len(quality_property) > 0:
|
|
274
|
-
property
|
|
275
|
-
|
|
276
|
-
# todo enum
|
|
317
|
+
property.quality = quality_property
|
|
277
318
|
|
|
278
319
|
return property
|
|
279
320
|
|
|
@@ -286,33 +327,35 @@ def to_odcs_quality_list(quality_list):
|
|
|
286
327
|
|
|
287
328
|
|
|
288
329
|
def to_odcs_quality(quality):
|
|
289
|
-
|
|
330
|
+
quality_obj = DataQuality(type=quality.type)
|
|
331
|
+
|
|
290
332
|
if quality.description is not None:
|
|
291
|
-
|
|
333
|
+
quality_obj.description = quality.description
|
|
292
334
|
if quality.query is not None:
|
|
293
|
-
|
|
335
|
+
quality_obj.query = quality.query
|
|
294
336
|
# dialect is not supported in v3.0.0
|
|
295
337
|
if quality.mustBe is not None:
|
|
296
|
-
|
|
338
|
+
quality_obj.mustBe = quality.mustBe
|
|
297
339
|
if quality.mustNotBe is not None:
|
|
298
|
-
|
|
340
|
+
quality_obj.mustNotBe = quality.mustNotBe
|
|
299
341
|
if quality.mustBeGreaterThan is not None:
|
|
300
|
-
|
|
342
|
+
quality_obj.mustBeGreaterThan = quality.mustBeGreaterThan
|
|
301
343
|
if quality.mustBeGreaterThanOrEqualTo is not None:
|
|
302
|
-
|
|
344
|
+
quality_obj.mustBeGreaterOrEqualTo = quality.mustBeGreaterThanOrEqualTo
|
|
303
345
|
if quality.mustBeLessThan is not None:
|
|
304
|
-
|
|
346
|
+
quality_obj.mustBeLessThan = quality.mustBeLessThan
|
|
305
347
|
if quality.mustBeLessThanOrEqualTo is not None:
|
|
306
|
-
|
|
348
|
+
quality_obj.mustBeLessOrEqualTo = quality.mustBeLessThanOrEqualTo
|
|
307
349
|
if quality.mustBeBetween is not None:
|
|
308
|
-
|
|
350
|
+
quality_obj.mustBeBetween = quality.mustBeBetween
|
|
309
351
|
if quality.mustNotBeBetween is not None:
|
|
310
|
-
|
|
352
|
+
quality_obj.mustNotBeBetween = quality.mustNotBeBetween
|
|
311
353
|
if quality.engine is not None:
|
|
312
|
-
|
|
354
|
+
quality_obj.engine = quality.engine
|
|
313
355
|
if quality.implementation is not None:
|
|
314
|
-
|
|
315
|
-
|
|
356
|
+
quality_obj.implementation = quality.implementation
|
|
357
|
+
|
|
358
|
+
return quality_obj
|
|
316
359
|
|
|
317
360
|
|
|
318
361
|
def to_status(status):
|
|
@@ -175,7 +175,7 @@ def print_schema(dtype: types.DataType) -> str:
|
|
|
175
175
|
Returns:
|
|
176
176
|
str: The indented text.
|
|
177
177
|
"""
|
|
178
|
-
return "\n".join([f'
|
|
178
|
+
return "\n".join([f"{' ' * level}{line}" for line in text.split("\n")])
|
|
179
179
|
|
|
180
180
|
def repr_column(column: types.StructField) -> str:
|
|
181
181
|
"""
|
|
@@ -117,6 +117,8 @@ def _to_sql_table(model_name, model, server_type="snowflake"):
|
|
|
117
117
|
result += " primary key"
|
|
118
118
|
if server_type == "databricks" and field.description is not None:
|
|
119
119
|
result += f' COMMENT "{_escape(field.description)}"'
|
|
120
|
+
if server_type == "snowflake" and field.description is not None:
|
|
121
|
+
result += f" COMMENT '{_escape(field.description)}'"
|
|
120
122
|
if current_field_index < fields:
|
|
121
123
|
result += ","
|
|
122
124
|
result += "\n"
|
|
@@ -124,6 +126,8 @@ def _to_sql_table(model_name, model, server_type="snowflake"):
|
|
|
124
126
|
result += ")"
|
|
125
127
|
if server_type == "databricks" and model.description is not None:
|
|
126
128
|
result += f' COMMENT "{_escape(model.description)}"'
|
|
129
|
+
if server_type == "snowflake" and model.description is not None:
|
|
130
|
+
result += f" COMMENT='{_escape(model.description)}'"
|
|
127
131
|
result += ";\n"
|
|
128
132
|
return result
|
|
129
133
|
|
|
@@ -197,6 +197,8 @@ def convert_to_databricks(field: Field) -> None | str:
|
|
|
197
197
|
if type.lower() in ["array"]:
|
|
198
198
|
item_type = convert_to_databricks(field.items)
|
|
199
199
|
return f"ARRAY<{item_type}>"
|
|
200
|
+
if type.lower() in ["variant"]:
|
|
201
|
+
return "VARIANT"
|
|
200
202
|
return None
|
|
201
203
|
|
|
202
204
|
|