datacontract-cli 0.10.23__py3-none-any.whl → 0.10.25__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datacontract-cli might be problematic. Click here for more details.

Files changed (43) hide show
  1. datacontract/__init__.py +13 -0
  2. datacontract/api.py +3 -3
  3. datacontract/catalog/catalog.py +2 -2
  4. datacontract/cli.py +1 -1
  5. datacontract/data_contract.py +5 -3
  6. datacontract/engines/data_contract_test.py +13 -4
  7. datacontract/engines/fastjsonschema/s3/s3_read_files.py +3 -2
  8. datacontract/engines/soda/check_soda_execute.py +16 -3
  9. datacontract/engines/soda/connections/duckdb_connection.py +61 -5
  10. datacontract/engines/soda/connections/kafka.py +3 -2
  11. datacontract/export/avro_converter.py +8 -1
  12. datacontract/export/bigquery_converter.py +1 -1
  13. datacontract/export/duckdb_type_converter.py +57 -0
  14. datacontract/export/great_expectations_converter.py +49 -2
  15. datacontract/export/odcs_v3_exporter.py +162 -136
  16. datacontract/export/protobuf_converter.py +163 -69
  17. datacontract/export/spark_converter.py +1 -1
  18. datacontract/imports/avro_importer.py +30 -5
  19. datacontract/imports/csv_importer.py +111 -57
  20. datacontract/imports/excel_importer.py +850 -0
  21. datacontract/imports/importer.py +5 -2
  22. datacontract/imports/importer_factory.py +10 -0
  23. datacontract/imports/odcs_v3_importer.py +226 -127
  24. datacontract/imports/protobuf_importer.py +264 -0
  25. datacontract/lint/linters/description_linter.py +1 -3
  26. datacontract/lint/linters/field_reference_linter.py +1 -2
  27. datacontract/lint/linters/notice_period_linter.py +2 -2
  28. datacontract/lint/linters/valid_constraints_linter.py +3 -3
  29. datacontract/lint/resolve.py +23 -8
  30. datacontract/model/data_contract_specification/__init__.py +1 -0
  31. datacontract/model/run.py +3 -0
  32. datacontract/output/__init__.py +0 -0
  33. datacontract/templates/datacontract.html +2 -1
  34. datacontract/templates/index.html +2 -1
  35. {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.25.dist-info}/METADATA +305 -195
  36. {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.25.dist-info}/RECORD +40 -38
  37. {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.25.dist-info}/WHEEL +1 -1
  38. datacontract/export/csv_type_converter.py +0 -36
  39. datacontract/lint/linters/quality_schema_linter.py +0 -52
  40. datacontract/model/data_contract_specification.py +0 -327
  41. {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.25.dist-info}/entry_points.txt +0 -0
  42. {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.25.dist-info/licenses}/LICENSE +0 -0
  43. {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.25.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,17 @@
1
1
  from typing import Dict
2
2
 
3
- import yaml
3
+ from open_data_contract_standard.model import (
4
+ CustomProperty,
5
+ DataQuality,
6
+ Description,
7
+ OpenDataContractStandard,
8
+ Role,
9
+ SchemaObject,
10
+ SchemaProperty,
11
+ Server,
12
+ ServiceLevelAgreementProperty,
13
+ Support,
14
+ )
4
15
 
5
16
  from datacontract.export.exporter import Exporter
6
17
  from datacontract.model.data_contract_specification import DataContractSpecification, Field, Model
@@ -12,154 +23,148 @@ class OdcsV3Exporter(Exporter):
12
23
 
13
24
 
14
25
  def to_odcs_v3_yaml(data_contract_spec: DataContractSpecification) -> str:
15
- odcs = {
16
- "apiVersion": "v3.0.0",
17
- "kind": "DataContract",
18
- "id": data_contract_spec.id,
19
- "name": data_contract_spec.info.title,
20
- "version": data_contract_spec.info.version,
21
- "domain": data_contract_spec.info.owner,
22
- "status": to_status(data_contract_spec.info.status),
23
- }
26
+ result = OpenDataContractStandard(
27
+ apiVersion="v3.0.1",
28
+ kind="DataContract",
29
+ id=data_contract_spec.id,
30
+ name=data_contract_spec.info.title,
31
+ version=data_contract_spec.info.version,
32
+ status=to_status(data_contract_spec.info.status),
33
+ )
24
34
 
25
35
  if data_contract_spec.terms is not None:
26
- odcs["description"] = {
27
- "purpose": data_contract_spec.terms.description.strip()
36
+ result.description = Description(
37
+ purpose=data_contract_spec.terms.description.strip()
28
38
  if data_contract_spec.terms.description is not None
29
39
  else None,
30
- "usage": data_contract_spec.terms.usage.strip() if data_contract_spec.terms.usage is not None else None,
31
- "limitations": data_contract_spec.terms.limitations.strip()
40
+ usage=data_contract_spec.terms.usage.strip() if data_contract_spec.terms.usage is not None else None,
41
+ limitations=data_contract_spec.terms.limitations.strip()
32
42
  if data_contract_spec.terms.limitations is not None
33
43
  else None,
34
- }
44
+ )
35
45
 
36
- odcs["schema"] = []
46
+ result.schema_ = []
37
47
  for model_key, model_value in data_contract_spec.models.items():
38
48
  odcs_schema = to_odcs_schema(model_key, model_value)
39
- odcs["schema"].append(odcs_schema)
49
+ result.schema_.append(odcs_schema)
40
50
 
41
51
  if data_contract_spec.servicelevels is not None:
42
52
  slas = []
43
53
  if data_contract_spec.servicelevels.availability is not None:
44
54
  slas.append(
45
- {
46
- "property": "generalAvailability",
47
- "value": data_contract_spec.servicelevels.availability.description,
48
- }
55
+ ServiceLevelAgreementProperty(
56
+ property="generalAvailability", value=data_contract_spec.servicelevels.availability.description
57
+ )
49
58
  )
50
59
  if data_contract_spec.servicelevels.retention is not None:
51
- slas.append({"property": "retention", "value": data_contract_spec.servicelevels.retention.period})
60
+ slas.append(
61
+ ServiceLevelAgreementProperty(
62
+ property="retention", value=data_contract_spec.servicelevels.retention.period
63
+ )
64
+ )
52
65
 
53
66
  if len(slas) > 0:
54
- odcs["slaProperties"] = slas
67
+ result.slaProperties = slas
55
68
 
56
69
  if data_contract_spec.info.contact is not None:
57
70
  support = []
58
71
  if data_contract_spec.info.contact.email is not None:
59
- support.append(
60
- {
61
- "channel": "email",
62
- "url": "mailto:" + data_contract_spec.info.contact.email,
63
- }
64
- )
72
+ support.append(Support(channel="email", url="mailto:" + data_contract_spec.info.contact.email))
65
73
  if data_contract_spec.info.contact.url is not None:
66
- support.append(
67
- {
68
- "channel": "other",
69
- "url": data_contract_spec.info.contact.url,
70
- }
71
- )
74
+ support.append(Support(channel="other", url=data_contract_spec.info.contact.url))
72
75
  if len(support) > 0:
73
- odcs["support"] = support
76
+ result.support = support
74
77
 
75
78
  if data_contract_spec.servers is not None and len(data_contract_spec.servers) > 0:
76
79
  servers = []
77
80
 
78
81
  for server_key, server_value in data_contract_spec.servers.items():
79
- server_dict = {}
80
- server_dict["server"] = server_key
81
- if server_value.type is not None:
82
- server_dict["type"] = server_value.type
82
+ server = Server(server=server_key, type=server_value.type or "")
83
+
84
+ # Set all the attributes that are not None
83
85
  if server_value.environment is not None:
84
- server_dict["environment"] = server_value.environment
86
+ server.environment = server_value.environment
85
87
  if server_value.account is not None:
86
- server_dict["account"] = server_value.account
88
+ server.account = server_value.account
87
89
  if server_value.database is not None:
88
- server_dict["database"] = server_value.database
90
+ server.database = server_value.database
89
91
  if server_value.schema_ is not None:
90
- server_dict["schema"] = server_value.schema_
92
+ server.schema_ = server_value.schema_
91
93
  if server_value.format is not None:
92
- server_dict["format"] = server_value.format
94
+ server.format = server_value.format
93
95
  if server_value.project is not None:
94
- server_dict["project"] = server_value.project
96
+ server.project = server_value.project
95
97
  if server_value.dataset is not None:
96
- server_dict["dataset"] = server_value.dataset
98
+ server.dataset = server_value.dataset
97
99
  if server_value.path is not None:
98
- server_dict["path"] = server_value.path
100
+ server.path = server_value.path
99
101
  if server_value.delimiter is not None:
100
- server_dict["delimiter"] = server_value.delimiter
102
+ server.delimiter = server_value.delimiter
101
103
  if server_value.endpointUrl is not None:
102
- server_dict["endpointUrl"] = server_value.endpointUrl
104
+ server.endpointUrl = server_value.endpointUrl
103
105
  if server_value.location is not None:
104
- server_dict["location"] = server_value.location
106
+ server.location = server_value.location
105
107
  if server_value.host is not None:
106
- server_dict["host"] = server_value.host
108
+ server.host = server_value.host
107
109
  if server_value.port is not None:
108
- server_dict["port"] = server_value.port
110
+ server.port = server_value.port
109
111
  if server_value.catalog is not None:
110
- server_dict["catalog"] = server_value.catalog
112
+ server.catalog = server_value.catalog
111
113
  if server_value.topic is not None:
112
- server_dict["topic"] = server_value.topic
114
+ server.topic = server_value.topic
113
115
  if server_value.http_path is not None:
114
- server_dict["http_path"] = server_value.http_path
116
+ server.http_path = server_value.http_path
115
117
  if server_value.token is not None:
116
- server_dict["token"] = server_value.token
118
+ server.token = server_value.token
117
119
  if server_value.driver is not None:
118
- server_dict["driver"] = server_value.driver
120
+ server.driver = server_value.driver
121
+
119
122
  if server_value.roles is not None:
120
- server_dict["roles"] = [
121
- {"name": role.name, "description": role.description} for role in server_value.roles
122
- ]
123
- servers.append(server_dict)
123
+ server.roles = [Role(role=role.name, description=role.description) for role in server_value.roles]
124
+
125
+ servers.append(server)
124
126
 
125
127
  if len(servers) > 0:
126
- odcs["servers"] = servers
128
+ result.servers = servers
127
129
 
128
- odcs["customProperties"] = []
130
+ custom_properties = []
131
+ if data_contract_spec.info.owner is not None:
132
+ custom_properties.append(CustomProperty(property="owner", value=data_contract_spec.info.owner))
129
133
  if data_contract_spec.info.model_extra is not None:
130
134
  for key, value in data_contract_spec.info.model_extra.items():
131
- odcs["customProperties"].append({"property": key, "value": value})
132
- if len(odcs["customProperties"]) == 0:
133
- del odcs["customProperties"]
135
+ custom_properties.append(CustomProperty(property=key, value=value))
136
+
137
+ if len(custom_properties) > 0:
138
+ result.customProperties = custom_properties
134
139
 
135
- return yaml.dump(odcs, indent=2, sort_keys=False, allow_unicode=True)
140
+ return result.to_yaml()
136
141
 
137
142
 
138
- def to_odcs_schema(model_key, model_value: Model) -> dict:
139
- odcs_table = {
140
- "name": model_key,
141
- "physicalName": model_key,
142
- "logicalType": "object",
143
- "physicalType": model_value.type,
144
- }
143
+ def to_odcs_schema(model_key, model_value: Model) -> SchemaObject:
144
+ schema_obj = SchemaObject(
145
+ name=model_key, physicalName=model_key, logicalType="object", physicalType=model_value.type
146
+ )
147
+
145
148
  if model_value.description is not None:
146
- odcs_table["description"] = model_value.description
149
+ schema_obj.description = model_value.description
150
+
147
151
  properties = to_properties(model_value.fields)
148
152
  if properties:
149
- odcs_table["properties"] = properties
153
+ schema_obj.properties = properties
150
154
 
151
155
  model_quality = to_odcs_quality_list(model_value.quality)
152
156
  if len(model_quality) > 0:
153
- odcs_table["quality"] = model_quality
157
+ schema_obj.quality = model_quality
154
158
 
155
- odcs_table["customProperties"] = []
159
+ custom_properties = []
156
160
  if model_value.model_extra is not None:
157
161
  for key, value in model_value.model_extra.items():
158
- odcs_table["customProperties"].append({"property": key, "value": value})
159
- if len(odcs_table["customProperties"]) == 0:
160
- del odcs_table["customProperties"]
162
+ custom_properties.append(CustomProperty(property=key, value=value))
161
163
 
162
- return odcs_table
164
+ if len(custom_properties) > 0:
165
+ schema_obj.customProperties = custom_properties
166
+
167
+ return schema_obj
163
168
 
164
169
 
165
170
  def to_properties(fields: Dict[str, Field]) -> list:
@@ -203,76 +208,95 @@ def to_logical_type(type: str) -> str | None:
203
208
 
204
209
 
205
210
  def to_physical_type(type: str) -> str | None:
206
- # TODO: to we need to do a server mapping here?
207
211
  return type
208
212
 
209
213
 
210
- def to_property(field_name: str, field: Field) -> dict:
211
- property = {"name": field_name}
214
+ def to_property(field_name: str, field: Field) -> SchemaProperty:
215
+ property = SchemaProperty(name=field_name)
216
+
217
+ if field.fields:
218
+ properties = []
219
+ for field_name_, field_ in field.fields.items():
220
+ property_ = to_property(field_name_, field_)
221
+ properties.append(property_)
222
+ property.properties = properties
223
+
224
+ if field.items:
225
+ items = to_property(field_name, field.items)
226
+ items.name = None # Clear the name for items
227
+ property.items = items
228
+
212
229
  if field.title is not None:
213
- property["businessName"] = field.title
230
+ property.businessName = field.title
231
+
214
232
  if field.type is not None:
215
- property["logicalType"] = to_logical_type(field.type)
216
- property["physicalType"] = to_physical_type(field.type)
233
+ property.logicalType = to_logical_type(field.type)
234
+ property.physicalType = to_physical_type(field.type)
235
+
217
236
  if field.description is not None:
218
- property["description"] = field.description
237
+ property.description = field.description
238
+
219
239
  if field.required is not None:
220
- property["nullable"] = not field.required
240
+ property.required = field.required
241
+
221
242
  if field.unique is not None:
222
- property["unique"] = field.unique
243
+ property.unique = field.unique
244
+
223
245
  if field.classification is not None:
224
- property["classification"] = field.classification
246
+ property.classification = field.classification
247
+
225
248
  if field.examples is not None:
226
- property["examples"] = field.examples
249
+ property.examples = field.examples.copy()
250
+
227
251
  if field.example is not None:
228
- property["examples"] = [field.example]
252
+ property.examples = [field.example]
253
+
229
254
  if field.primaryKey is not None and field.primaryKey:
230
- property["primaryKey"] = field.primaryKey
231
- property["primaryKeyPosition"] = 1
255
+ property.primaryKey = field.primaryKey
256
+ property.primaryKeyPosition = 1
257
+
232
258
  if field.primary is not None and field.primary:
233
- property["primaryKey"] = field.primary
234
- property["primaryKeyPosition"] = 1
259
+ property.primaryKey = field.primary
260
+ property.primaryKeyPosition = 1
235
261
 
236
- property["customProperties"] = []
262
+ custom_properties = []
237
263
  if field.model_extra is not None:
238
264
  for key, value in field.model_extra.items():
239
- property["customProperties"].append({"property": key, "value": value})
265
+ custom_properties.append(CustomProperty(property=key, value=value))
266
+
240
267
  if field.pii is not None:
241
- property["customProperties"].append({"property": "pii", "value": field.pii})
242
- if property.get("customProperties") is not None and len(property["customProperties"]) == 0:
243
- del property["customProperties"]
268
+ custom_properties.append(CustomProperty(property="pii", value=field.pii))
244
269
 
245
- property["tags"] = []
246
- if field.tags is not None:
247
- property["tags"].extend(field.tags)
248
- if not property["tags"]:
249
- del property["tags"]
270
+ if len(custom_properties) > 0:
271
+ property.customProperties = custom_properties
250
272
 
251
- property["logicalTypeOptions"] = {}
273
+ if field.tags is not None and len(field.tags) > 0:
274
+ property.tags = field.tags
275
+
276
+ logical_type_options = {}
252
277
  if field.minLength is not None:
253
- property["logicalTypeOptions"]["minLength"] = field.minLength
278
+ logical_type_options["minLength"] = field.minLength
254
279
  if field.maxLength is not None:
255
- property["logicalTypeOptions"]["maxLength"] = field.maxLength
280
+ logical_type_options["maxLength"] = field.maxLength
256
281
  if field.pattern is not None:
257
- property["logicalTypeOptions"]["pattern"] = field.pattern
282
+ logical_type_options["pattern"] = field.pattern
258
283
  if field.minimum is not None:
259
- property["logicalTypeOptions"]["minimum"] = field.minimum
284
+ logical_type_options["minimum"] = field.minimum
260
285
  if field.maximum is not None:
261
- property["logicalTypeOptions"]["maximum"] = field.maximum
286
+ logical_type_options["maximum"] = field.maximum
262
287
  if field.exclusiveMinimum is not None:
263
- property["logicalTypeOptions"]["exclusiveMinimum"] = field.exclusiveMinimum
288
+ logical_type_options["exclusiveMinimum"] = field.exclusiveMinimum
264
289
  if field.exclusiveMaximum is not None:
265
- property["logicalTypeOptions"]["exclusiveMaximum"] = field.exclusiveMaximum
266
- if property["logicalTypeOptions"] == {}:
267
- del property["logicalTypeOptions"]
290
+ logical_type_options["exclusiveMaximum"] = field.exclusiveMaximum
291
+
292
+ if logical_type_options:
293
+ property.logicalTypeOptions = logical_type_options
268
294
 
269
295
  if field.quality is not None:
270
296
  quality_list = field.quality
271
297
  quality_property = to_odcs_quality_list(quality_list)
272
298
  if len(quality_property) > 0:
273
- property["quality"] = quality_property
274
-
275
- # todo enum
299
+ property.quality = quality_property
276
300
 
277
301
  return property
278
302
 
@@ -285,33 +309,35 @@ def to_odcs_quality_list(quality_list):
285
309
 
286
310
 
287
311
  def to_odcs_quality(quality):
288
- quality_dict = {"type": quality.type}
312
+ quality_obj = DataQuality(type=quality.type)
313
+
289
314
  if quality.description is not None:
290
- quality_dict["description"] = quality.description
315
+ quality_obj.description = quality.description
291
316
  if quality.query is not None:
292
- quality_dict["query"] = quality.query
317
+ quality_obj.query = quality.query
293
318
  # dialect is not supported in v3.0.0
294
319
  if quality.mustBe is not None:
295
- quality_dict["mustBe"] = quality.mustBe
320
+ quality_obj.mustBe = quality.mustBe
296
321
  if quality.mustNotBe is not None:
297
- quality_dict["mustNotBe"] = quality.mustNotBe
322
+ quality_obj.mustNotBe = quality.mustNotBe
298
323
  if quality.mustBeGreaterThan is not None:
299
- quality_dict["mustBeGreaterThan"] = quality.mustBeGreaterThan
324
+ quality_obj.mustBeGreaterThan = quality.mustBeGreaterThan
300
325
  if quality.mustBeGreaterThanOrEqualTo is not None:
301
- quality_dict["mustBeGreaterThanOrEqualTo"] = quality.mustBeGreaterThanOrEqualTo
326
+ quality_obj.mustBeGreaterOrEqualTo = quality.mustBeGreaterThanOrEqualTo
302
327
  if quality.mustBeLessThan is not None:
303
- quality_dict["mustBeLessThan"] = quality.mustBeLessThan
328
+ quality_obj.mustBeLessThan = quality.mustBeLessThan
304
329
  if quality.mustBeLessThanOrEqualTo is not None:
305
- quality_dict["mustBeLessThanOrEqualTo"] = quality.mustBeLessThanOrEqualTo
330
+ quality_obj.mustBeLessOrEqualTo = quality.mustBeLessThanOrEqualTo
306
331
  if quality.mustBeBetween is not None:
307
- quality_dict["mustBeBetween"] = quality.mustBeBetween
332
+ quality_obj.mustBeBetween = quality.mustBeBetween
308
333
  if quality.mustNotBeBetween is not None:
309
- quality_dict["mustNotBeBetween"] = quality.mustNotBeBetween
334
+ quality_obj.mustNotBeBetween = quality.mustNotBeBetween
310
335
  if quality.engine is not None:
311
- quality_dict["engine"] = quality.engine
336
+ quality_obj.engine = quality.engine
312
337
  if quality.implementation is not None:
313
- quality_dict["implementation"] = quality.implementation
314
- return quality_dict
338
+ quality_obj.implementation = quality.implementation
339
+
340
+ return quality_obj
315
341
 
316
342
 
317
343
  def to_status(status):