datacontract-cli 0.10.24__py3-none-any.whl → 0.10.25__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datacontract-cli might be problematic. Click here for more details.
- datacontract/api.py +3 -3
- datacontract/cli.py +1 -1
- datacontract/export/great_expectations_converter.py +49 -2
- datacontract/export/odcs_v3_exporter.py +161 -136
- datacontract/export/spark_converter.py +1 -1
- datacontract/imports/avro_importer.py +23 -23
- datacontract/imports/csv_importer.py +2 -2
- datacontract/imports/excel_importer.py +850 -0
- datacontract/imports/importer.py +4 -2
- datacontract/imports/importer_factory.py +5 -0
- datacontract/imports/odcs_v3_importer.py +202 -145
- datacontract/imports/protobuf_importer.py +0 -2
- datacontract/lint/linters/description_linter.py +1 -3
- datacontract/lint/linters/field_reference_linter.py +1 -2
- datacontract/lint/linters/notice_period_linter.py +2 -2
- datacontract/lint/linters/valid_constraints_linter.py +3 -3
- datacontract/model/data_contract_specification/__init__.py +1 -0
- {datacontract_cli-0.10.24.dist-info → datacontract_cli-0.10.25.dist-info}/METADATA +33 -5
- {datacontract_cli-0.10.24.dist-info → datacontract_cli-0.10.25.dist-info}/RECORD +23 -22
- {datacontract_cli-0.10.24.dist-info → datacontract_cli-0.10.25.dist-info}/WHEEL +1 -1
- datacontract/model/data_contract_specification.py +0 -327
- {datacontract_cli-0.10.24.dist-info → datacontract_cli-0.10.25.dist-info}/entry_points.txt +0 -0
- {datacontract_cli-0.10.24.dist-info → datacontract_cli-0.10.25.dist-info}/licenses/LICENSE +0 -0
- {datacontract_cli-0.10.24.dist-info → datacontract_cli-0.10.25.dist-info}/top_level.txt +0 -0
datacontract/api.py
CHANGED
|
@@ -162,7 +162,7 @@ async def test(
|
|
|
162
162
|
server: Annotated[
|
|
163
163
|
str | None,
|
|
164
164
|
Query(
|
|
165
|
-
|
|
165
|
+
examples=["production"],
|
|
166
166
|
description="The server name to test. Optional, if there is only one server.",
|
|
167
167
|
),
|
|
168
168
|
] = None,
|
|
@@ -191,7 +191,7 @@ async def lint(
|
|
|
191
191
|
schema: Annotated[
|
|
192
192
|
str | None,
|
|
193
193
|
Query(
|
|
194
|
-
|
|
194
|
+
examples=["https://datacontract.com/datacontract.schema.json"],
|
|
195
195
|
description="The schema to use for validation. This must be a URL.",
|
|
196
196
|
),
|
|
197
197
|
] = None,
|
|
@@ -220,7 +220,7 @@ def export(
|
|
|
220
220
|
server: Annotated[
|
|
221
221
|
str | None,
|
|
222
222
|
Query(
|
|
223
|
-
|
|
223
|
+
examples=["production"],
|
|
224
224
|
description="The server name to export. Optional, if there is only one server.",
|
|
225
225
|
),
|
|
226
226
|
] = None,
|
datacontract/cli.py
CHANGED
|
@@ -244,7 +244,7 @@ def import_(
|
|
|
244
244
|
] = None,
|
|
245
245
|
source: Annotated[
|
|
246
246
|
Optional[str],
|
|
247
|
-
typer.Option(help="The path to the file
|
|
247
|
+
typer.Option(help="The path to the file that should be imported."),
|
|
248
248
|
] = None,
|
|
249
249
|
dialect: Annotated[
|
|
250
250
|
Optional[str],
|
|
@@ -19,6 +19,7 @@ from datacontract.export.spark_converter import to_spark_data_type
|
|
|
19
19
|
from datacontract.export.sql_type_converter import convert_to_sql_type
|
|
20
20
|
from datacontract.model.data_contract_specification import (
|
|
21
21
|
DataContractSpecification,
|
|
22
|
+
DeprecatedQuality,
|
|
22
23
|
Field,
|
|
23
24
|
Quality,
|
|
24
25
|
)
|
|
@@ -91,8 +92,14 @@ def to_great_expectations(
|
|
|
91
92
|
model_key=model_key, contract_version=data_contract_spec.info.version
|
|
92
93
|
)
|
|
93
94
|
model_value = data_contract_spec.models.get(model_key)
|
|
94
|
-
|
|
95
|
+
|
|
96
|
+
# Support for Deprecated Quality
|
|
97
|
+
quality_checks = get_deprecated_quality_checks(data_contract_spec.quality)
|
|
98
|
+
|
|
99
|
+
expectations.extend(get_quality_checks(model_value.quality))
|
|
100
|
+
|
|
95
101
|
expectations.extend(model_to_expectations(model_value.fields, engine, sql_server_type))
|
|
102
|
+
|
|
96
103
|
expectations.extend(checks_to_expectations(quality_checks, model_key))
|
|
97
104
|
model_expectation_suite = to_suite(expectations, expectation_suite_name)
|
|
98
105
|
|
|
@@ -135,6 +142,7 @@ def model_to_expectations(fields: Dict[str, Field], engine: str | None, sql_serv
|
|
|
135
142
|
add_column_order_exp(fields, expectations)
|
|
136
143
|
for field_name, field in fields.items():
|
|
137
144
|
add_field_expectations(field_name, field, expectations, engine, sql_server_type)
|
|
145
|
+
expectations.extend(get_quality_checks(field.quality, field_name))
|
|
138
146
|
return expectations
|
|
139
147
|
|
|
140
148
|
|
|
@@ -173,6 +181,8 @@ def add_field_expectations(
|
|
|
173
181
|
expectations.append(to_column_length_exp(field_name, field.minLength, field.maxLength))
|
|
174
182
|
if field.minimum is not None or field.maximum is not None:
|
|
175
183
|
expectations.append(to_column_min_max_exp(field_name, field.minimum, field.maximum))
|
|
184
|
+
if field.enum is not None and len(field.enum) != 0:
|
|
185
|
+
expectations.append(to_column_enum_exp(field_name, field.enum))
|
|
176
186
|
|
|
177
187
|
return expectations
|
|
178
188
|
|
|
@@ -266,7 +276,24 @@ def to_column_min_max_exp(field_name, minimum, maximum) -> Dict[str, Any]:
|
|
|
266
276
|
}
|
|
267
277
|
|
|
268
278
|
|
|
269
|
-
def
|
|
279
|
+
def to_column_enum_exp(field_name, enum_list: List[str]) -> Dict[str, Any]:
|
|
280
|
+
"""Creates a expect_column_values_to_be_in_set expectation.
|
|
281
|
+
|
|
282
|
+
Args:
|
|
283
|
+
field_name (str): The name of the field.
|
|
284
|
+
enum_list (Set[str]): enum list of value.
|
|
285
|
+
|
|
286
|
+
Returns:
|
|
287
|
+
Dict[str, Any]: Column value in set expectation.
|
|
288
|
+
"""
|
|
289
|
+
return {
|
|
290
|
+
"expectation_type": "expect_column_values_to_be_in_set",
|
|
291
|
+
"kwargs": {"column": field_name, "value_set": enum_list},
|
|
292
|
+
"meta": {},
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
def get_deprecated_quality_checks(quality: DeprecatedQuality) -> Dict[str, Any]:
|
|
270
297
|
"""Retrieves quality checks defined in a data contract.
|
|
271
298
|
|
|
272
299
|
Args:
|
|
@@ -288,6 +315,26 @@ def get_quality_checks(quality: Quality) -> Dict[str, Any]:
|
|
|
288
315
|
return quality_specification
|
|
289
316
|
|
|
290
317
|
|
|
318
|
+
def get_quality_checks(qualities: List[Quality], field_name: str | None = None) -> List[Dict[str, Any]]:
|
|
319
|
+
"""Retrieves quality checks defined in a data contract.
|
|
320
|
+
|
|
321
|
+
Args:
|
|
322
|
+
qualities (List[Quality]): List of quality object from the model specification.
|
|
323
|
+
field_name (str | None): field name if the quality list is attached to a specific field
|
|
324
|
+
|
|
325
|
+
Returns:
|
|
326
|
+
Dict[str, Any]: Dictionary of quality checks.
|
|
327
|
+
"""
|
|
328
|
+
quality_specification = []
|
|
329
|
+
for quality in qualities:
|
|
330
|
+
if quality is not None and quality.engine is not None and quality.engine.lower() == "great-expectations":
|
|
331
|
+
ge_expectation = quality.implementation
|
|
332
|
+
if field_name is not None:
|
|
333
|
+
ge_expectation["column"] = field_name
|
|
334
|
+
quality_specification.append(ge_expectation)
|
|
335
|
+
return quality_specification
|
|
336
|
+
|
|
337
|
+
|
|
291
338
|
def checks_to_expectations(quality_checks: Dict[str, Any], model_key: str) -> List[Dict[str, Any]]:
|
|
292
339
|
"""Converts quality checks to a list of expectations.
|
|
293
340
|
|
|
@@ -1,6 +1,17 @@
|
|
|
1
1
|
from typing import Dict
|
|
2
2
|
|
|
3
|
-
import
|
|
3
|
+
from open_data_contract_standard.model import (
|
|
4
|
+
CustomProperty,
|
|
5
|
+
DataQuality,
|
|
6
|
+
Description,
|
|
7
|
+
OpenDataContractStandard,
|
|
8
|
+
Role,
|
|
9
|
+
SchemaObject,
|
|
10
|
+
SchemaProperty,
|
|
11
|
+
Server,
|
|
12
|
+
ServiceLevelAgreementProperty,
|
|
13
|
+
Support,
|
|
14
|
+
)
|
|
4
15
|
|
|
5
16
|
from datacontract.export.exporter import Exporter
|
|
6
17
|
from datacontract.model.data_contract_specification import DataContractSpecification, Field, Model
|
|
@@ -12,155 +23,148 @@ class OdcsV3Exporter(Exporter):
|
|
|
12
23
|
|
|
13
24
|
|
|
14
25
|
def to_odcs_v3_yaml(data_contract_spec: DataContractSpecification) -> str:
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
26
|
+
result = OpenDataContractStandard(
|
|
27
|
+
apiVersion="v3.0.1",
|
|
28
|
+
kind="DataContract",
|
|
29
|
+
id=data_contract_spec.id,
|
|
30
|
+
name=data_contract_spec.info.title,
|
|
31
|
+
version=data_contract_spec.info.version,
|
|
32
|
+
status=to_status(data_contract_spec.info.status),
|
|
33
|
+
)
|
|
23
34
|
|
|
24
35
|
if data_contract_spec.terms is not None:
|
|
25
|
-
|
|
26
|
-
|
|
36
|
+
result.description = Description(
|
|
37
|
+
purpose=data_contract_spec.terms.description.strip()
|
|
27
38
|
if data_contract_spec.terms.description is not None
|
|
28
39
|
else None,
|
|
29
|
-
|
|
30
|
-
|
|
40
|
+
usage=data_contract_spec.terms.usage.strip() if data_contract_spec.terms.usage is not None else None,
|
|
41
|
+
limitations=data_contract_spec.terms.limitations.strip()
|
|
31
42
|
if data_contract_spec.terms.limitations is not None
|
|
32
43
|
else None,
|
|
33
|
-
|
|
44
|
+
)
|
|
34
45
|
|
|
35
|
-
|
|
46
|
+
result.schema_ = []
|
|
36
47
|
for model_key, model_value in data_contract_spec.models.items():
|
|
37
48
|
odcs_schema = to_odcs_schema(model_key, model_value)
|
|
38
|
-
|
|
49
|
+
result.schema_.append(odcs_schema)
|
|
39
50
|
|
|
40
51
|
if data_contract_spec.servicelevels is not None:
|
|
41
52
|
slas = []
|
|
42
53
|
if data_contract_spec.servicelevels.availability is not None:
|
|
43
54
|
slas.append(
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
}
|
|
55
|
+
ServiceLevelAgreementProperty(
|
|
56
|
+
property="generalAvailability", value=data_contract_spec.servicelevels.availability.description
|
|
57
|
+
)
|
|
48
58
|
)
|
|
49
59
|
if data_contract_spec.servicelevels.retention is not None:
|
|
50
|
-
slas.append(
|
|
60
|
+
slas.append(
|
|
61
|
+
ServiceLevelAgreementProperty(
|
|
62
|
+
property="retention", value=data_contract_spec.servicelevels.retention.period
|
|
63
|
+
)
|
|
64
|
+
)
|
|
51
65
|
|
|
52
66
|
if len(slas) > 0:
|
|
53
|
-
|
|
67
|
+
result.slaProperties = slas
|
|
54
68
|
|
|
55
69
|
if data_contract_spec.info.contact is not None:
|
|
56
70
|
support = []
|
|
57
71
|
if data_contract_spec.info.contact.email is not None:
|
|
58
|
-
support.append(
|
|
59
|
-
{
|
|
60
|
-
"channel": "email",
|
|
61
|
-
"url": "mailto:" + data_contract_spec.info.contact.email,
|
|
62
|
-
}
|
|
63
|
-
)
|
|
72
|
+
support.append(Support(channel="email", url="mailto:" + data_contract_spec.info.contact.email))
|
|
64
73
|
if data_contract_spec.info.contact.url is not None:
|
|
65
|
-
support.append(
|
|
66
|
-
{
|
|
67
|
-
"channel": "other",
|
|
68
|
-
"url": data_contract_spec.info.contact.url,
|
|
69
|
-
}
|
|
70
|
-
)
|
|
74
|
+
support.append(Support(channel="other", url=data_contract_spec.info.contact.url))
|
|
71
75
|
if len(support) > 0:
|
|
72
|
-
|
|
76
|
+
result.support = support
|
|
73
77
|
|
|
74
78
|
if data_contract_spec.servers is not None and len(data_contract_spec.servers) > 0:
|
|
75
79
|
servers = []
|
|
76
80
|
|
|
77
81
|
for server_key, server_value in data_contract_spec.servers.items():
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
server_dict["type"] = server_value.type
|
|
82
|
+
server = Server(server=server_key, type=server_value.type or "")
|
|
83
|
+
|
|
84
|
+
# Set all the attributes that are not None
|
|
82
85
|
if server_value.environment is not None:
|
|
83
|
-
|
|
86
|
+
server.environment = server_value.environment
|
|
84
87
|
if server_value.account is not None:
|
|
85
|
-
|
|
88
|
+
server.account = server_value.account
|
|
86
89
|
if server_value.database is not None:
|
|
87
|
-
|
|
90
|
+
server.database = server_value.database
|
|
88
91
|
if server_value.schema_ is not None:
|
|
89
|
-
|
|
92
|
+
server.schema_ = server_value.schema_
|
|
90
93
|
if server_value.format is not None:
|
|
91
|
-
|
|
94
|
+
server.format = server_value.format
|
|
92
95
|
if server_value.project is not None:
|
|
93
|
-
|
|
96
|
+
server.project = server_value.project
|
|
94
97
|
if server_value.dataset is not None:
|
|
95
|
-
|
|
98
|
+
server.dataset = server_value.dataset
|
|
96
99
|
if server_value.path is not None:
|
|
97
|
-
|
|
100
|
+
server.path = server_value.path
|
|
98
101
|
if server_value.delimiter is not None:
|
|
99
|
-
|
|
102
|
+
server.delimiter = server_value.delimiter
|
|
100
103
|
if server_value.endpointUrl is not None:
|
|
101
|
-
|
|
104
|
+
server.endpointUrl = server_value.endpointUrl
|
|
102
105
|
if server_value.location is not None:
|
|
103
|
-
|
|
106
|
+
server.location = server_value.location
|
|
104
107
|
if server_value.host is not None:
|
|
105
|
-
|
|
108
|
+
server.host = server_value.host
|
|
106
109
|
if server_value.port is not None:
|
|
107
|
-
|
|
110
|
+
server.port = server_value.port
|
|
108
111
|
if server_value.catalog is not None:
|
|
109
|
-
|
|
112
|
+
server.catalog = server_value.catalog
|
|
110
113
|
if server_value.topic is not None:
|
|
111
|
-
|
|
114
|
+
server.topic = server_value.topic
|
|
112
115
|
if server_value.http_path is not None:
|
|
113
|
-
|
|
116
|
+
server.http_path = server_value.http_path
|
|
114
117
|
if server_value.token is not None:
|
|
115
|
-
|
|
118
|
+
server.token = server_value.token
|
|
116
119
|
if server_value.driver is not None:
|
|
117
|
-
|
|
120
|
+
server.driver = server_value.driver
|
|
121
|
+
|
|
118
122
|
if server_value.roles is not None:
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
servers.append(server_dict)
|
|
123
|
+
server.roles = [Role(role=role.name, description=role.description) for role in server_value.roles]
|
|
124
|
+
|
|
125
|
+
servers.append(server)
|
|
123
126
|
|
|
124
127
|
if len(servers) > 0:
|
|
125
|
-
|
|
128
|
+
result.servers = servers
|
|
126
129
|
|
|
127
|
-
|
|
130
|
+
custom_properties = []
|
|
128
131
|
if data_contract_spec.info.owner is not None:
|
|
129
|
-
|
|
132
|
+
custom_properties.append(CustomProperty(property="owner", value=data_contract_spec.info.owner))
|
|
130
133
|
if data_contract_spec.info.model_extra is not None:
|
|
131
134
|
for key, value in data_contract_spec.info.model_extra.items():
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
+
custom_properties.append(CustomProperty(property=key, value=value))
|
|
136
|
+
|
|
137
|
+
if len(custom_properties) > 0:
|
|
138
|
+
result.customProperties = custom_properties
|
|
135
139
|
|
|
136
|
-
return
|
|
140
|
+
return result.to_yaml()
|
|
137
141
|
|
|
138
142
|
|
|
139
|
-
def to_odcs_schema(model_key, model_value: Model) ->
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
"physicalType": model_value.type,
|
|
145
|
-
}
|
|
143
|
+
def to_odcs_schema(model_key, model_value: Model) -> SchemaObject:
|
|
144
|
+
schema_obj = SchemaObject(
|
|
145
|
+
name=model_key, physicalName=model_key, logicalType="object", physicalType=model_value.type
|
|
146
|
+
)
|
|
147
|
+
|
|
146
148
|
if model_value.description is not None:
|
|
147
|
-
|
|
149
|
+
schema_obj.description = model_value.description
|
|
150
|
+
|
|
148
151
|
properties = to_properties(model_value.fields)
|
|
149
152
|
if properties:
|
|
150
|
-
|
|
153
|
+
schema_obj.properties = properties
|
|
151
154
|
|
|
152
155
|
model_quality = to_odcs_quality_list(model_value.quality)
|
|
153
156
|
if len(model_quality) > 0:
|
|
154
|
-
|
|
157
|
+
schema_obj.quality = model_quality
|
|
155
158
|
|
|
156
|
-
|
|
159
|
+
custom_properties = []
|
|
157
160
|
if model_value.model_extra is not None:
|
|
158
161
|
for key, value in model_value.model_extra.items():
|
|
159
|
-
|
|
160
|
-
if len(odcs_table["customProperties"]) == 0:
|
|
161
|
-
del odcs_table["customProperties"]
|
|
162
|
+
custom_properties.append(CustomProperty(property=key, value=value))
|
|
162
163
|
|
|
163
|
-
|
|
164
|
+
if len(custom_properties) > 0:
|
|
165
|
+
schema_obj.customProperties = custom_properties
|
|
166
|
+
|
|
167
|
+
return schema_obj
|
|
164
168
|
|
|
165
169
|
|
|
166
170
|
def to_properties(fields: Dict[str, Field]) -> list:
|
|
@@ -204,76 +208,95 @@ def to_logical_type(type: str) -> str | None:
|
|
|
204
208
|
|
|
205
209
|
|
|
206
210
|
def to_physical_type(type: str) -> str | None:
|
|
207
|
-
# TODO: to we need to do a server mapping here?
|
|
208
211
|
return type
|
|
209
212
|
|
|
210
213
|
|
|
211
|
-
def to_property(field_name: str, field: Field) ->
|
|
212
|
-
property =
|
|
214
|
+
def to_property(field_name: str, field: Field) -> SchemaProperty:
|
|
215
|
+
property = SchemaProperty(name=field_name)
|
|
216
|
+
|
|
217
|
+
if field.fields:
|
|
218
|
+
properties = []
|
|
219
|
+
for field_name_, field_ in field.fields.items():
|
|
220
|
+
property_ = to_property(field_name_, field_)
|
|
221
|
+
properties.append(property_)
|
|
222
|
+
property.properties = properties
|
|
223
|
+
|
|
224
|
+
if field.items:
|
|
225
|
+
items = to_property(field_name, field.items)
|
|
226
|
+
items.name = None # Clear the name for items
|
|
227
|
+
property.items = items
|
|
228
|
+
|
|
213
229
|
if field.title is not None:
|
|
214
|
-
property
|
|
230
|
+
property.businessName = field.title
|
|
231
|
+
|
|
215
232
|
if field.type is not None:
|
|
216
|
-
property
|
|
217
|
-
property
|
|
233
|
+
property.logicalType = to_logical_type(field.type)
|
|
234
|
+
property.physicalType = to_physical_type(field.type)
|
|
235
|
+
|
|
218
236
|
if field.description is not None:
|
|
219
|
-
property
|
|
237
|
+
property.description = field.description
|
|
238
|
+
|
|
220
239
|
if field.required is not None:
|
|
221
|
-
property
|
|
240
|
+
property.required = field.required
|
|
241
|
+
|
|
222
242
|
if field.unique is not None:
|
|
223
|
-
property
|
|
243
|
+
property.unique = field.unique
|
|
244
|
+
|
|
224
245
|
if field.classification is not None:
|
|
225
|
-
property
|
|
246
|
+
property.classification = field.classification
|
|
247
|
+
|
|
226
248
|
if field.examples is not None:
|
|
227
|
-
property
|
|
249
|
+
property.examples = field.examples.copy()
|
|
250
|
+
|
|
228
251
|
if field.example is not None:
|
|
229
|
-
property
|
|
252
|
+
property.examples = [field.example]
|
|
253
|
+
|
|
230
254
|
if field.primaryKey is not None and field.primaryKey:
|
|
231
|
-
property
|
|
232
|
-
property
|
|
255
|
+
property.primaryKey = field.primaryKey
|
|
256
|
+
property.primaryKeyPosition = 1
|
|
257
|
+
|
|
233
258
|
if field.primary is not None and field.primary:
|
|
234
|
-
property
|
|
235
|
-
property
|
|
259
|
+
property.primaryKey = field.primary
|
|
260
|
+
property.primaryKeyPosition = 1
|
|
236
261
|
|
|
237
|
-
|
|
262
|
+
custom_properties = []
|
|
238
263
|
if field.model_extra is not None:
|
|
239
264
|
for key, value in field.model_extra.items():
|
|
240
|
-
|
|
265
|
+
custom_properties.append(CustomProperty(property=key, value=value))
|
|
266
|
+
|
|
241
267
|
if field.pii is not None:
|
|
242
|
-
|
|
243
|
-
if property.get("customProperties") is not None and len(property["customProperties"]) == 0:
|
|
244
|
-
del property["customProperties"]
|
|
268
|
+
custom_properties.append(CustomProperty(property="pii", value=field.pii))
|
|
245
269
|
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
property["tags"].extend(field.tags)
|
|
249
|
-
if not property["tags"]:
|
|
250
|
-
del property["tags"]
|
|
270
|
+
if len(custom_properties) > 0:
|
|
271
|
+
property.customProperties = custom_properties
|
|
251
272
|
|
|
252
|
-
|
|
273
|
+
if field.tags is not None and len(field.tags) > 0:
|
|
274
|
+
property.tags = field.tags
|
|
275
|
+
|
|
276
|
+
logical_type_options = {}
|
|
253
277
|
if field.minLength is not None:
|
|
254
|
-
|
|
278
|
+
logical_type_options["minLength"] = field.minLength
|
|
255
279
|
if field.maxLength is not None:
|
|
256
|
-
|
|
280
|
+
logical_type_options["maxLength"] = field.maxLength
|
|
257
281
|
if field.pattern is not None:
|
|
258
|
-
|
|
282
|
+
logical_type_options["pattern"] = field.pattern
|
|
259
283
|
if field.minimum is not None:
|
|
260
|
-
|
|
284
|
+
logical_type_options["minimum"] = field.minimum
|
|
261
285
|
if field.maximum is not None:
|
|
262
|
-
|
|
286
|
+
logical_type_options["maximum"] = field.maximum
|
|
263
287
|
if field.exclusiveMinimum is not None:
|
|
264
|
-
|
|
288
|
+
logical_type_options["exclusiveMinimum"] = field.exclusiveMinimum
|
|
265
289
|
if field.exclusiveMaximum is not None:
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
290
|
+
logical_type_options["exclusiveMaximum"] = field.exclusiveMaximum
|
|
291
|
+
|
|
292
|
+
if logical_type_options:
|
|
293
|
+
property.logicalTypeOptions = logical_type_options
|
|
269
294
|
|
|
270
295
|
if field.quality is not None:
|
|
271
296
|
quality_list = field.quality
|
|
272
297
|
quality_property = to_odcs_quality_list(quality_list)
|
|
273
298
|
if len(quality_property) > 0:
|
|
274
|
-
property
|
|
275
|
-
|
|
276
|
-
# todo enum
|
|
299
|
+
property.quality = quality_property
|
|
277
300
|
|
|
278
301
|
return property
|
|
279
302
|
|
|
@@ -286,33 +309,35 @@ def to_odcs_quality_list(quality_list):
|
|
|
286
309
|
|
|
287
310
|
|
|
288
311
|
def to_odcs_quality(quality):
|
|
289
|
-
|
|
312
|
+
quality_obj = DataQuality(type=quality.type)
|
|
313
|
+
|
|
290
314
|
if quality.description is not None:
|
|
291
|
-
|
|
315
|
+
quality_obj.description = quality.description
|
|
292
316
|
if quality.query is not None:
|
|
293
|
-
|
|
317
|
+
quality_obj.query = quality.query
|
|
294
318
|
# dialect is not supported in v3.0.0
|
|
295
319
|
if quality.mustBe is not None:
|
|
296
|
-
|
|
320
|
+
quality_obj.mustBe = quality.mustBe
|
|
297
321
|
if quality.mustNotBe is not None:
|
|
298
|
-
|
|
322
|
+
quality_obj.mustNotBe = quality.mustNotBe
|
|
299
323
|
if quality.mustBeGreaterThan is not None:
|
|
300
|
-
|
|
324
|
+
quality_obj.mustBeGreaterThan = quality.mustBeGreaterThan
|
|
301
325
|
if quality.mustBeGreaterThanOrEqualTo is not None:
|
|
302
|
-
|
|
326
|
+
quality_obj.mustBeGreaterOrEqualTo = quality.mustBeGreaterThanOrEqualTo
|
|
303
327
|
if quality.mustBeLessThan is not None:
|
|
304
|
-
|
|
328
|
+
quality_obj.mustBeLessThan = quality.mustBeLessThan
|
|
305
329
|
if quality.mustBeLessThanOrEqualTo is not None:
|
|
306
|
-
|
|
330
|
+
quality_obj.mustBeLessOrEqualTo = quality.mustBeLessThanOrEqualTo
|
|
307
331
|
if quality.mustBeBetween is not None:
|
|
308
|
-
|
|
332
|
+
quality_obj.mustBeBetween = quality.mustBeBetween
|
|
309
333
|
if quality.mustNotBeBetween is not None:
|
|
310
|
-
|
|
334
|
+
quality_obj.mustNotBeBetween = quality.mustNotBeBetween
|
|
311
335
|
if quality.engine is not None:
|
|
312
|
-
|
|
336
|
+
quality_obj.engine = quality.engine
|
|
313
337
|
if quality.implementation is not None:
|
|
314
|
-
|
|
315
|
-
|
|
338
|
+
quality_obj.implementation = quality.implementation
|
|
339
|
+
|
|
340
|
+
return quality_obj
|
|
316
341
|
|
|
317
342
|
|
|
318
343
|
def to_status(status):
|
|
@@ -175,7 +175,7 @@ def print_schema(dtype: types.DataType) -> str:
|
|
|
175
175
|
Returns:
|
|
176
176
|
str: The indented text.
|
|
177
177
|
"""
|
|
178
|
-
return "\n".join([f'
|
|
178
|
+
return "\n".join([f"{' ' * level}{line}" for line in text.split("\n")])
|
|
179
179
|
|
|
180
180
|
def repr_column(column: types.StructField) -> str:
|
|
181
181
|
"""
|
|
@@ -55,7 +55,7 @@ def import_avro(data_contract_specification: DataContractSpecification, source:
|
|
|
55
55
|
engine="datacontract",
|
|
56
56
|
original_exception=e,
|
|
57
57
|
)
|
|
58
|
-
|
|
58
|
+
# type record is being used for both the table and the object types in data contract
|
|
59
59
|
# -> CONSTRAINT: one table per .avsc input, all nested records are interpreted as objects
|
|
60
60
|
fields = import_record_fields(avro_schema.fields)
|
|
61
61
|
|
|
@@ -92,19 +92,19 @@ def handle_config_avro_custom_properties(field: avro.schema.Field, imported_fiel
|
|
|
92
92
|
|
|
93
93
|
|
|
94
94
|
LOGICAL_TYPE_MAPPING = {
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
95
|
+
"decimal": "decimal",
|
|
96
|
+
"date": "date",
|
|
97
|
+
"time-millis": "time",
|
|
98
|
+
"time-micros": "time",
|
|
99
|
+
"timestamp-millis": "timestamp_tz",
|
|
100
|
+
"timestamp-micros": "timestamp_tz",
|
|
101
|
+
"local-timestamp-micros": "timestamp_ntz",
|
|
102
|
+
"local-timestamp-millis": "timestamp_ntz",
|
|
103
|
+
"duration": "string",
|
|
104
|
+
"uuid": "string",
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
|
|
108
108
|
def import_record_fields(record_fields: List[avro.schema.Field]) -> Dict[str, Field]:
|
|
109
109
|
"""
|
|
110
110
|
Import Avro record fields and convert them to data contract fields.
|
|
@@ -150,15 +150,15 @@ def import_record_fields(record_fields: List[avro.schema.Field]) -> Dict[str, Fi
|
|
|
150
150
|
if not imported_field.config:
|
|
151
151
|
imported_field.config = {}
|
|
152
152
|
imported_field.config["avroType"] = "enum"
|
|
153
|
-
else:
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
153
|
+
else:
|
|
154
|
+
logical_type = field.type.get_prop("logicalType")
|
|
155
|
+
if logical_type in LOGICAL_TYPE_MAPPING:
|
|
156
|
+
imported_field.type = LOGICAL_TYPE_MAPPING[logical_type]
|
|
157
|
+
if logical_type == "decimal":
|
|
158
|
+
imported_field.precision = field.type.precision
|
|
159
|
+
imported_field.scale = field.type.scale
|
|
160
|
+
else:
|
|
161
|
+
imported_field.type = map_type_from_avro(field.type.type)
|
|
162
162
|
imported_fields[field.name] = imported_field
|
|
163
163
|
|
|
164
164
|
return imported_fields
|
|
@@ -31,10 +31,10 @@ def import_csv(
|
|
|
31
31
|
if data_contract_specification.servers is None:
|
|
32
32
|
data_contract_specification.servers = {}
|
|
33
33
|
|
|
34
|
-
delimiter = None if dialect is None else dialect[
|
|
34
|
+
delimiter = None if dialect is None else dialect["Delimiter"][0]
|
|
35
35
|
|
|
36
36
|
if dialect is not None:
|
|
37
|
-
dc_types = [map_type_from_duckdb(x["type"]) for x in dialect[
|
|
37
|
+
dc_types = [map_type_from_duckdb(x["type"]) for x in dialect["Columns"][0]]
|
|
38
38
|
else:
|
|
39
39
|
dc_types = [map_type_from_duckdb(str(x)) for x in tbl.dtypes]
|
|
40
40
|
|