datacontract-cli 0.10.23__py3-none-any.whl → 0.10.25__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datacontract-cli might be problematic. Click here for more details.
- datacontract/__init__.py +13 -0
- datacontract/api.py +3 -3
- datacontract/catalog/catalog.py +2 -2
- datacontract/cli.py +1 -1
- datacontract/data_contract.py +5 -3
- datacontract/engines/data_contract_test.py +13 -4
- datacontract/engines/fastjsonschema/s3/s3_read_files.py +3 -2
- datacontract/engines/soda/check_soda_execute.py +16 -3
- datacontract/engines/soda/connections/duckdb_connection.py +61 -5
- datacontract/engines/soda/connections/kafka.py +3 -2
- datacontract/export/avro_converter.py +8 -1
- datacontract/export/bigquery_converter.py +1 -1
- datacontract/export/duckdb_type_converter.py +57 -0
- datacontract/export/great_expectations_converter.py +49 -2
- datacontract/export/odcs_v3_exporter.py +162 -136
- datacontract/export/protobuf_converter.py +163 -69
- datacontract/export/spark_converter.py +1 -1
- datacontract/imports/avro_importer.py +30 -5
- datacontract/imports/csv_importer.py +111 -57
- datacontract/imports/excel_importer.py +850 -0
- datacontract/imports/importer.py +5 -2
- datacontract/imports/importer_factory.py +10 -0
- datacontract/imports/odcs_v3_importer.py +226 -127
- datacontract/imports/protobuf_importer.py +264 -0
- datacontract/lint/linters/description_linter.py +1 -3
- datacontract/lint/linters/field_reference_linter.py +1 -2
- datacontract/lint/linters/notice_period_linter.py +2 -2
- datacontract/lint/linters/valid_constraints_linter.py +3 -3
- datacontract/lint/resolve.py +23 -8
- datacontract/model/data_contract_specification/__init__.py +1 -0
- datacontract/model/run.py +3 -0
- datacontract/output/__init__.py +0 -0
- datacontract/templates/datacontract.html +2 -1
- datacontract/templates/index.html +2 -1
- {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.25.dist-info}/METADATA +305 -195
- {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.25.dist-info}/RECORD +40 -38
- {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.25.dist-info}/WHEEL +1 -1
- datacontract/export/csv_type_converter.py +0 -36
- datacontract/lint/linters/quality_schema_linter.py +0 -52
- datacontract/model/data_contract_specification.py +0 -327
- {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.25.dist-info}/entry_points.txt +0 -0
- {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.25.dist-info/licenses}/LICENSE +0 -0
- {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.25.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,17 @@
|
|
|
1
1
|
from typing import Dict
|
|
2
2
|
|
|
3
|
-
import
|
|
3
|
+
from open_data_contract_standard.model import (
|
|
4
|
+
CustomProperty,
|
|
5
|
+
DataQuality,
|
|
6
|
+
Description,
|
|
7
|
+
OpenDataContractStandard,
|
|
8
|
+
Role,
|
|
9
|
+
SchemaObject,
|
|
10
|
+
SchemaProperty,
|
|
11
|
+
Server,
|
|
12
|
+
ServiceLevelAgreementProperty,
|
|
13
|
+
Support,
|
|
14
|
+
)
|
|
4
15
|
|
|
5
16
|
from datacontract.export.exporter import Exporter
|
|
6
17
|
from datacontract.model.data_contract_specification import DataContractSpecification, Field, Model
|
|
@@ -12,154 +23,148 @@ class OdcsV3Exporter(Exporter):
|
|
|
12
23
|
|
|
13
24
|
|
|
14
25
|
def to_odcs_v3_yaml(data_contract_spec: DataContractSpecification) -> str:
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
}
|
|
26
|
+
result = OpenDataContractStandard(
|
|
27
|
+
apiVersion="v3.0.1",
|
|
28
|
+
kind="DataContract",
|
|
29
|
+
id=data_contract_spec.id,
|
|
30
|
+
name=data_contract_spec.info.title,
|
|
31
|
+
version=data_contract_spec.info.version,
|
|
32
|
+
status=to_status(data_contract_spec.info.status),
|
|
33
|
+
)
|
|
24
34
|
|
|
25
35
|
if data_contract_spec.terms is not None:
|
|
26
|
-
|
|
27
|
-
|
|
36
|
+
result.description = Description(
|
|
37
|
+
purpose=data_contract_spec.terms.description.strip()
|
|
28
38
|
if data_contract_spec.terms.description is not None
|
|
29
39
|
else None,
|
|
30
|
-
|
|
31
|
-
|
|
40
|
+
usage=data_contract_spec.terms.usage.strip() if data_contract_spec.terms.usage is not None else None,
|
|
41
|
+
limitations=data_contract_spec.terms.limitations.strip()
|
|
32
42
|
if data_contract_spec.terms.limitations is not None
|
|
33
43
|
else None,
|
|
34
|
-
|
|
44
|
+
)
|
|
35
45
|
|
|
36
|
-
|
|
46
|
+
result.schema_ = []
|
|
37
47
|
for model_key, model_value in data_contract_spec.models.items():
|
|
38
48
|
odcs_schema = to_odcs_schema(model_key, model_value)
|
|
39
|
-
|
|
49
|
+
result.schema_.append(odcs_schema)
|
|
40
50
|
|
|
41
51
|
if data_contract_spec.servicelevels is not None:
|
|
42
52
|
slas = []
|
|
43
53
|
if data_contract_spec.servicelevels.availability is not None:
|
|
44
54
|
slas.append(
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
}
|
|
55
|
+
ServiceLevelAgreementProperty(
|
|
56
|
+
property="generalAvailability", value=data_contract_spec.servicelevels.availability.description
|
|
57
|
+
)
|
|
49
58
|
)
|
|
50
59
|
if data_contract_spec.servicelevels.retention is not None:
|
|
51
|
-
slas.append(
|
|
60
|
+
slas.append(
|
|
61
|
+
ServiceLevelAgreementProperty(
|
|
62
|
+
property="retention", value=data_contract_spec.servicelevels.retention.period
|
|
63
|
+
)
|
|
64
|
+
)
|
|
52
65
|
|
|
53
66
|
if len(slas) > 0:
|
|
54
|
-
|
|
67
|
+
result.slaProperties = slas
|
|
55
68
|
|
|
56
69
|
if data_contract_spec.info.contact is not None:
|
|
57
70
|
support = []
|
|
58
71
|
if data_contract_spec.info.contact.email is not None:
|
|
59
|
-
support.append(
|
|
60
|
-
{
|
|
61
|
-
"channel": "email",
|
|
62
|
-
"url": "mailto:" + data_contract_spec.info.contact.email,
|
|
63
|
-
}
|
|
64
|
-
)
|
|
72
|
+
support.append(Support(channel="email", url="mailto:" + data_contract_spec.info.contact.email))
|
|
65
73
|
if data_contract_spec.info.contact.url is not None:
|
|
66
|
-
support.append(
|
|
67
|
-
{
|
|
68
|
-
"channel": "other",
|
|
69
|
-
"url": data_contract_spec.info.contact.url,
|
|
70
|
-
}
|
|
71
|
-
)
|
|
74
|
+
support.append(Support(channel="other", url=data_contract_spec.info.contact.url))
|
|
72
75
|
if len(support) > 0:
|
|
73
|
-
|
|
76
|
+
result.support = support
|
|
74
77
|
|
|
75
78
|
if data_contract_spec.servers is not None and len(data_contract_spec.servers) > 0:
|
|
76
79
|
servers = []
|
|
77
80
|
|
|
78
81
|
for server_key, server_value in data_contract_spec.servers.items():
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
server_dict["type"] = server_value.type
|
|
82
|
+
server = Server(server=server_key, type=server_value.type or "")
|
|
83
|
+
|
|
84
|
+
# Set all the attributes that are not None
|
|
83
85
|
if server_value.environment is not None:
|
|
84
|
-
|
|
86
|
+
server.environment = server_value.environment
|
|
85
87
|
if server_value.account is not None:
|
|
86
|
-
|
|
88
|
+
server.account = server_value.account
|
|
87
89
|
if server_value.database is not None:
|
|
88
|
-
|
|
90
|
+
server.database = server_value.database
|
|
89
91
|
if server_value.schema_ is not None:
|
|
90
|
-
|
|
92
|
+
server.schema_ = server_value.schema_
|
|
91
93
|
if server_value.format is not None:
|
|
92
|
-
|
|
94
|
+
server.format = server_value.format
|
|
93
95
|
if server_value.project is not None:
|
|
94
|
-
|
|
96
|
+
server.project = server_value.project
|
|
95
97
|
if server_value.dataset is not None:
|
|
96
|
-
|
|
98
|
+
server.dataset = server_value.dataset
|
|
97
99
|
if server_value.path is not None:
|
|
98
|
-
|
|
100
|
+
server.path = server_value.path
|
|
99
101
|
if server_value.delimiter is not None:
|
|
100
|
-
|
|
102
|
+
server.delimiter = server_value.delimiter
|
|
101
103
|
if server_value.endpointUrl is not None:
|
|
102
|
-
|
|
104
|
+
server.endpointUrl = server_value.endpointUrl
|
|
103
105
|
if server_value.location is not None:
|
|
104
|
-
|
|
106
|
+
server.location = server_value.location
|
|
105
107
|
if server_value.host is not None:
|
|
106
|
-
|
|
108
|
+
server.host = server_value.host
|
|
107
109
|
if server_value.port is not None:
|
|
108
|
-
|
|
110
|
+
server.port = server_value.port
|
|
109
111
|
if server_value.catalog is not None:
|
|
110
|
-
|
|
112
|
+
server.catalog = server_value.catalog
|
|
111
113
|
if server_value.topic is not None:
|
|
112
|
-
|
|
114
|
+
server.topic = server_value.topic
|
|
113
115
|
if server_value.http_path is not None:
|
|
114
|
-
|
|
116
|
+
server.http_path = server_value.http_path
|
|
115
117
|
if server_value.token is not None:
|
|
116
|
-
|
|
118
|
+
server.token = server_value.token
|
|
117
119
|
if server_value.driver is not None:
|
|
118
|
-
|
|
120
|
+
server.driver = server_value.driver
|
|
121
|
+
|
|
119
122
|
if server_value.roles is not None:
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
servers.append(server_dict)
|
|
123
|
+
server.roles = [Role(role=role.name, description=role.description) for role in server_value.roles]
|
|
124
|
+
|
|
125
|
+
servers.append(server)
|
|
124
126
|
|
|
125
127
|
if len(servers) > 0:
|
|
126
|
-
|
|
128
|
+
result.servers = servers
|
|
127
129
|
|
|
128
|
-
|
|
130
|
+
custom_properties = []
|
|
131
|
+
if data_contract_spec.info.owner is not None:
|
|
132
|
+
custom_properties.append(CustomProperty(property="owner", value=data_contract_spec.info.owner))
|
|
129
133
|
if data_contract_spec.info.model_extra is not None:
|
|
130
134
|
for key, value in data_contract_spec.info.model_extra.items():
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
135
|
+
custom_properties.append(CustomProperty(property=key, value=value))
|
|
136
|
+
|
|
137
|
+
if len(custom_properties) > 0:
|
|
138
|
+
result.customProperties = custom_properties
|
|
134
139
|
|
|
135
|
-
return
|
|
140
|
+
return result.to_yaml()
|
|
136
141
|
|
|
137
142
|
|
|
138
|
-
def to_odcs_schema(model_key, model_value: Model) ->
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
"physicalType": model_value.type,
|
|
144
|
-
}
|
|
143
|
+
def to_odcs_schema(model_key, model_value: Model) -> SchemaObject:
|
|
144
|
+
schema_obj = SchemaObject(
|
|
145
|
+
name=model_key, physicalName=model_key, logicalType="object", physicalType=model_value.type
|
|
146
|
+
)
|
|
147
|
+
|
|
145
148
|
if model_value.description is not None:
|
|
146
|
-
|
|
149
|
+
schema_obj.description = model_value.description
|
|
150
|
+
|
|
147
151
|
properties = to_properties(model_value.fields)
|
|
148
152
|
if properties:
|
|
149
|
-
|
|
153
|
+
schema_obj.properties = properties
|
|
150
154
|
|
|
151
155
|
model_quality = to_odcs_quality_list(model_value.quality)
|
|
152
156
|
if len(model_quality) > 0:
|
|
153
|
-
|
|
157
|
+
schema_obj.quality = model_quality
|
|
154
158
|
|
|
155
|
-
|
|
159
|
+
custom_properties = []
|
|
156
160
|
if model_value.model_extra is not None:
|
|
157
161
|
for key, value in model_value.model_extra.items():
|
|
158
|
-
|
|
159
|
-
if len(odcs_table["customProperties"]) == 0:
|
|
160
|
-
del odcs_table["customProperties"]
|
|
162
|
+
custom_properties.append(CustomProperty(property=key, value=value))
|
|
161
163
|
|
|
162
|
-
|
|
164
|
+
if len(custom_properties) > 0:
|
|
165
|
+
schema_obj.customProperties = custom_properties
|
|
166
|
+
|
|
167
|
+
return schema_obj
|
|
163
168
|
|
|
164
169
|
|
|
165
170
|
def to_properties(fields: Dict[str, Field]) -> list:
|
|
@@ -203,76 +208,95 @@ def to_logical_type(type: str) -> str | None:
|
|
|
203
208
|
|
|
204
209
|
|
|
205
210
|
def to_physical_type(type: str) -> str | None:
|
|
206
|
-
# TODO: to we need to do a server mapping here?
|
|
207
211
|
return type
|
|
208
212
|
|
|
209
213
|
|
|
210
|
-
def to_property(field_name: str, field: Field) ->
|
|
211
|
-
property =
|
|
214
|
+
def to_property(field_name: str, field: Field) -> SchemaProperty:
|
|
215
|
+
property = SchemaProperty(name=field_name)
|
|
216
|
+
|
|
217
|
+
if field.fields:
|
|
218
|
+
properties = []
|
|
219
|
+
for field_name_, field_ in field.fields.items():
|
|
220
|
+
property_ = to_property(field_name_, field_)
|
|
221
|
+
properties.append(property_)
|
|
222
|
+
property.properties = properties
|
|
223
|
+
|
|
224
|
+
if field.items:
|
|
225
|
+
items = to_property(field_name, field.items)
|
|
226
|
+
items.name = None # Clear the name for items
|
|
227
|
+
property.items = items
|
|
228
|
+
|
|
212
229
|
if field.title is not None:
|
|
213
|
-
property
|
|
230
|
+
property.businessName = field.title
|
|
231
|
+
|
|
214
232
|
if field.type is not None:
|
|
215
|
-
property
|
|
216
|
-
property
|
|
233
|
+
property.logicalType = to_logical_type(field.type)
|
|
234
|
+
property.physicalType = to_physical_type(field.type)
|
|
235
|
+
|
|
217
236
|
if field.description is not None:
|
|
218
|
-
property
|
|
237
|
+
property.description = field.description
|
|
238
|
+
|
|
219
239
|
if field.required is not None:
|
|
220
|
-
property
|
|
240
|
+
property.required = field.required
|
|
241
|
+
|
|
221
242
|
if field.unique is not None:
|
|
222
|
-
property
|
|
243
|
+
property.unique = field.unique
|
|
244
|
+
|
|
223
245
|
if field.classification is not None:
|
|
224
|
-
property
|
|
246
|
+
property.classification = field.classification
|
|
247
|
+
|
|
225
248
|
if field.examples is not None:
|
|
226
|
-
property
|
|
249
|
+
property.examples = field.examples.copy()
|
|
250
|
+
|
|
227
251
|
if field.example is not None:
|
|
228
|
-
property
|
|
252
|
+
property.examples = [field.example]
|
|
253
|
+
|
|
229
254
|
if field.primaryKey is not None and field.primaryKey:
|
|
230
|
-
property
|
|
231
|
-
property
|
|
255
|
+
property.primaryKey = field.primaryKey
|
|
256
|
+
property.primaryKeyPosition = 1
|
|
257
|
+
|
|
232
258
|
if field.primary is not None and field.primary:
|
|
233
|
-
property
|
|
234
|
-
property
|
|
259
|
+
property.primaryKey = field.primary
|
|
260
|
+
property.primaryKeyPosition = 1
|
|
235
261
|
|
|
236
|
-
|
|
262
|
+
custom_properties = []
|
|
237
263
|
if field.model_extra is not None:
|
|
238
264
|
for key, value in field.model_extra.items():
|
|
239
|
-
|
|
265
|
+
custom_properties.append(CustomProperty(property=key, value=value))
|
|
266
|
+
|
|
240
267
|
if field.pii is not None:
|
|
241
|
-
|
|
242
|
-
if property.get("customProperties") is not None and len(property["customProperties"]) == 0:
|
|
243
|
-
del property["customProperties"]
|
|
268
|
+
custom_properties.append(CustomProperty(property="pii", value=field.pii))
|
|
244
269
|
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
property["tags"].extend(field.tags)
|
|
248
|
-
if not property["tags"]:
|
|
249
|
-
del property["tags"]
|
|
270
|
+
if len(custom_properties) > 0:
|
|
271
|
+
property.customProperties = custom_properties
|
|
250
272
|
|
|
251
|
-
|
|
273
|
+
if field.tags is not None and len(field.tags) > 0:
|
|
274
|
+
property.tags = field.tags
|
|
275
|
+
|
|
276
|
+
logical_type_options = {}
|
|
252
277
|
if field.minLength is not None:
|
|
253
|
-
|
|
278
|
+
logical_type_options["minLength"] = field.minLength
|
|
254
279
|
if field.maxLength is not None:
|
|
255
|
-
|
|
280
|
+
logical_type_options["maxLength"] = field.maxLength
|
|
256
281
|
if field.pattern is not None:
|
|
257
|
-
|
|
282
|
+
logical_type_options["pattern"] = field.pattern
|
|
258
283
|
if field.minimum is not None:
|
|
259
|
-
|
|
284
|
+
logical_type_options["minimum"] = field.minimum
|
|
260
285
|
if field.maximum is not None:
|
|
261
|
-
|
|
286
|
+
logical_type_options["maximum"] = field.maximum
|
|
262
287
|
if field.exclusiveMinimum is not None:
|
|
263
|
-
|
|
288
|
+
logical_type_options["exclusiveMinimum"] = field.exclusiveMinimum
|
|
264
289
|
if field.exclusiveMaximum is not None:
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
290
|
+
logical_type_options["exclusiveMaximum"] = field.exclusiveMaximum
|
|
291
|
+
|
|
292
|
+
if logical_type_options:
|
|
293
|
+
property.logicalTypeOptions = logical_type_options
|
|
268
294
|
|
|
269
295
|
if field.quality is not None:
|
|
270
296
|
quality_list = field.quality
|
|
271
297
|
quality_property = to_odcs_quality_list(quality_list)
|
|
272
298
|
if len(quality_property) > 0:
|
|
273
|
-
property
|
|
274
|
-
|
|
275
|
-
# todo enum
|
|
299
|
+
property.quality = quality_property
|
|
276
300
|
|
|
277
301
|
return property
|
|
278
302
|
|
|
@@ -285,33 +309,35 @@ def to_odcs_quality_list(quality_list):
|
|
|
285
309
|
|
|
286
310
|
|
|
287
311
|
def to_odcs_quality(quality):
|
|
288
|
-
|
|
312
|
+
quality_obj = DataQuality(type=quality.type)
|
|
313
|
+
|
|
289
314
|
if quality.description is not None:
|
|
290
|
-
|
|
315
|
+
quality_obj.description = quality.description
|
|
291
316
|
if quality.query is not None:
|
|
292
|
-
|
|
317
|
+
quality_obj.query = quality.query
|
|
293
318
|
# dialect is not supported in v3.0.0
|
|
294
319
|
if quality.mustBe is not None:
|
|
295
|
-
|
|
320
|
+
quality_obj.mustBe = quality.mustBe
|
|
296
321
|
if quality.mustNotBe is not None:
|
|
297
|
-
|
|
322
|
+
quality_obj.mustNotBe = quality.mustNotBe
|
|
298
323
|
if quality.mustBeGreaterThan is not None:
|
|
299
|
-
|
|
324
|
+
quality_obj.mustBeGreaterThan = quality.mustBeGreaterThan
|
|
300
325
|
if quality.mustBeGreaterThanOrEqualTo is not None:
|
|
301
|
-
|
|
326
|
+
quality_obj.mustBeGreaterOrEqualTo = quality.mustBeGreaterThanOrEqualTo
|
|
302
327
|
if quality.mustBeLessThan is not None:
|
|
303
|
-
|
|
328
|
+
quality_obj.mustBeLessThan = quality.mustBeLessThan
|
|
304
329
|
if quality.mustBeLessThanOrEqualTo is not None:
|
|
305
|
-
|
|
330
|
+
quality_obj.mustBeLessOrEqualTo = quality.mustBeLessThanOrEqualTo
|
|
306
331
|
if quality.mustBeBetween is not None:
|
|
307
|
-
|
|
332
|
+
quality_obj.mustBeBetween = quality.mustBeBetween
|
|
308
333
|
if quality.mustNotBeBetween is not None:
|
|
309
|
-
|
|
334
|
+
quality_obj.mustNotBeBetween = quality.mustNotBeBetween
|
|
310
335
|
if quality.engine is not None:
|
|
311
|
-
|
|
336
|
+
quality_obj.engine = quality.engine
|
|
312
337
|
if quality.implementation is not None:
|
|
313
|
-
|
|
314
|
-
|
|
338
|
+
quality_obj.implementation = quality.implementation
|
|
339
|
+
|
|
340
|
+
return quality_obj
|
|
315
341
|
|
|
316
342
|
|
|
317
343
|
def to_status(status):
|