datacontract-cli 0.10.24__py3-none-any.whl → 0.10.26__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datacontract-cli might be problematic. Click here for more details.
- datacontract/api.py +3 -3
- datacontract/cli.py +1 -1
- datacontract/engines/soda/connections/kafka.py +2 -1
- datacontract/export/great_expectations_converter.py +49 -2
- datacontract/export/odcs_v3_exporter.py +183 -140
- datacontract/export/spark_converter.py +1 -1
- datacontract/export/sql_converter.py +4 -0
- datacontract/export/sql_type_converter.py +2 -0
- datacontract/imports/avro_importer.py +23 -23
- datacontract/imports/csv_importer.py +2 -2
- datacontract/imports/excel_importer.py +850 -0
- datacontract/imports/importer.py +4 -2
- datacontract/imports/importer_factory.py +5 -0
- datacontract/imports/odcs_v3_importer.py +202 -145
- datacontract/imports/protobuf_importer.py +0 -2
- datacontract/imports/spark_importer.py +2 -0
- datacontract/lint/linters/description_linter.py +1 -3
- datacontract/lint/linters/field_reference_linter.py +1 -2
- datacontract/lint/linters/notice_period_linter.py +2 -2
- datacontract/lint/linters/valid_constraints_linter.py +3 -3
- datacontract/model/data_contract_specification/__init__.py +1 -0
- {datacontract_cli-0.10.24.dist-info → datacontract_cli-0.10.26.dist-info}/METADATA +59 -18
- {datacontract_cli-0.10.24.dist-info → datacontract_cli-0.10.26.dist-info}/RECORD +27 -26
- {datacontract_cli-0.10.24.dist-info → datacontract_cli-0.10.26.dist-info}/WHEEL +1 -1
- datacontract/model/data_contract_specification.py +0 -327
- {datacontract_cli-0.10.24.dist-info → datacontract_cli-0.10.26.dist-info}/entry_points.txt +0 -0
- {datacontract_cli-0.10.24.dist-info → datacontract_cli-0.10.26.dist-info}/licenses/LICENSE +0 -0
- {datacontract_cli-0.10.24.dist-info → datacontract_cli-0.10.26.dist-info}/top_level.txt +0 -0
datacontract/imports/importer.py
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
from abc import ABC, abstractmethod
|
|
2
2
|
from enum import Enum
|
|
3
3
|
|
|
4
|
-
from
|
|
4
|
+
from datacontract_specification.model import DataContractSpecification
|
|
5
|
+
from open_data_contract_standard.model import OpenDataContractStandard
|
|
5
6
|
|
|
6
7
|
|
|
7
8
|
class Importer(ABC):
|
|
@@ -14,7 +15,7 @@ class Importer(ABC):
|
|
|
14
15
|
data_contract_specification: DataContractSpecification,
|
|
15
16
|
source: str,
|
|
16
17
|
import_args: dict,
|
|
17
|
-
) -> DataContractSpecification:
|
|
18
|
+
) -> DataContractSpecification | OpenDataContractStandard:
|
|
18
19
|
pass
|
|
19
20
|
|
|
20
21
|
|
|
@@ -33,6 +34,7 @@ class ImportFormat(str, Enum):
|
|
|
33
34
|
parquet = "parquet"
|
|
34
35
|
csv = "csv"
|
|
35
36
|
protobuf = "protobuf"
|
|
37
|
+
excel = "excel"
|
|
36
38
|
|
|
37
39
|
@classmethod
|
|
38
40
|
def get_supported_formats(cls):
|
|
@@ -114,3 +114,8 @@ importer_factory.register_lazy_importer(
|
|
|
114
114
|
module_path="datacontract.imports.protobuf_importer",
|
|
115
115
|
class_name="ProtoBufImporter",
|
|
116
116
|
)
|
|
117
|
+
importer_factory.register_lazy_importer(
|
|
118
|
+
name=ImportFormat.excel,
|
|
119
|
+
module_path="datacontract.imports.excel_importer",
|
|
120
|
+
class_name="ExcelImporter",
|
|
121
|
+
)
|
|
@@ -4,7 +4,8 @@ import re
|
|
|
4
4
|
from typing import Any, Dict, List
|
|
5
5
|
from venv import logger
|
|
6
6
|
|
|
7
|
-
import
|
|
7
|
+
from datacontract_specification.model import Quality
|
|
8
|
+
from open_data_contract_standard.model import CustomProperty, OpenDataContractStandard, SchemaProperty
|
|
8
9
|
|
|
9
10
|
from datacontract.imports.importer import Importer
|
|
10
11
|
from datacontract.lint.resources import read_resource
|
|
@@ -15,7 +16,6 @@ from datacontract.model.data_contract_specification import (
|
|
|
15
16
|
Field,
|
|
16
17
|
Info,
|
|
17
18
|
Model,
|
|
18
|
-
Quality,
|
|
19
19
|
Retention,
|
|
20
20
|
Server,
|
|
21
21
|
ServerRole,
|
|
@@ -41,7 +41,7 @@ def import_odcs_v3_from_str(
|
|
|
41
41
|
data_contract_specification: DataContractSpecification, source_str: str
|
|
42
42
|
) -> DataContractSpecification:
|
|
43
43
|
try:
|
|
44
|
-
|
|
44
|
+
odcs = OpenDataContractStandard.from_string(source_str)
|
|
45
45
|
except Exception as e:
|
|
46
46
|
raise DataContractException(
|
|
47
47
|
type="schema",
|
|
@@ -51,41 +51,44 @@ def import_odcs_v3_from_str(
|
|
|
51
51
|
original_exception=e,
|
|
52
52
|
)
|
|
53
53
|
|
|
54
|
-
data_contract_specification
|
|
55
|
-
data_contract_specification.info = import_info(odcs_contract)
|
|
56
|
-
data_contract_specification.servers = import_servers(odcs_contract)
|
|
57
|
-
data_contract_specification.terms = import_terms(odcs_contract)
|
|
58
|
-
data_contract_specification.servicelevels = import_servicelevels(odcs_contract)
|
|
59
|
-
data_contract_specification.models = import_models(odcs_contract)
|
|
60
|
-
data_contract_specification.tags = import_tags(odcs_contract)
|
|
54
|
+
return import_from_odcs_model(data_contract_specification, odcs)
|
|
61
55
|
|
|
56
|
+
|
|
57
|
+
def import_from_odcs_model(data_contract_specification, odcs):
|
|
58
|
+
data_contract_specification.id = odcs.id
|
|
59
|
+
data_contract_specification.info = import_info(odcs)
|
|
60
|
+
data_contract_specification.servers = import_servers(odcs)
|
|
61
|
+
data_contract_specification.terms = import_terms(odcs)
|
|
62
|
+
data_contract_specification.servicelevels = import_servicelevels(odcs)
|
|
63
|
+
data_contract_specification.models = import_models(odcs)
|
|
64
|
+
data_contract_specification.tags = import_tags(odcs)
|
|
62
65
|
return data_contract_specification
|
|
63
66
|
|
|
64
67
|
|
|
65
|
-
def import_info(
|
|
68
|
+
def import_info(odcs: Any) -> Info:
|
|
66
69
|
info = Info()
|
|
67
70
|
|
|
68
|
-
info.title =
|
|
71
|
+
info.title = odcs.name if odcs.name is not None else ""
|
|
69
72
|
|
|
70
|
-
if
|
|
71
|
-
info.version =
|
|
73
|
+
if odcs.version is not None:
|
|
74
|
+
info.version = odcs.version
|
|
72
75
|
|
|
73
76
|
# odcs.description.purpose => datacontract.description
|
|
74
|
-
if
|
|
75
|
-
info.description =
|
|
77
|
+
if odcs.description is not None and odcs.description.purpose is not None:
|
|
78
|
+
info.description = odcs.description.purpose
|
|
76
79
|
|
|
77
80
|
# odcs.domain => datacontract.owner
|
|
78
|
-
owner = get_owner(
|
|
81
|
+
owner = get_owner(odcs.customProperties)
|
|
79
82
|
if owner is not None:
|
|
80
83
|
info.owner = owner
|
|
81
84
|
|
|
82
85
|
# add dataProduct as custom property
|
|
83
|
-
if
|
|
84
|
-
info.dataProduct =
|
|
86
|
+
if odcs.dataProduct is not None:
|
|
87
|
+
info.dataProduct = odcs.dataProduct
|
|
85
88
|
|
|
86
89
|
# add tenant as custom property
|
|
87
|
-
if
|
|
88
|
-
info.tenant =
|
|
90
|
+
if odcs.tenant is not None:
|
|
91
|
+
info.tenant = odcs.tenant
|
|
89
92
|
|
|
90
93
|
return info
|
|
91
94
|
|
|
@@ -96,96 +99,92 @@ def import_server_roles(roles: List[Dict]) -> List[ServerRole] | None:
|
|
|
96
99
|
result = []
|
|
97
100
|
for role in roles:
|
|
98
101
|
server_role = ServerRole()
|
|
99
|
-
server_role.name = role.
|
|
100
|
-
server_role.description = role.
|
|
102
|
+
server_role.name = role.role
|
|
103
|
+
server_role.description = role.description
|
|
101
104
|
result.append(server_role)
|
|
102
105
|
|
|
103
106
|
|
|
104
|
-
def import_servers(
|
|
105
|
-
if
|
|
107
|
+
def import_servers(odcs: OpenDataContractStandard) -> Dict[str, Server] | None:
|
|
108
|
+
if odcs.servers is None:
|
|
106
109
|
return None
|
|
107
110
|
servers = {}
|
|
108
|
-
for odcs_server in
|
|
109
|
-
server_name = odcs_server.
|
|
111
|
+
for odcs_server in odcs.servers:
|
|
112
|
+
server_name = odcs_server.server
|
|
110
113
|
if server_name is None:
|
|
111
114
|
logger.warning("Server name is missing, skipping server")
|
|
112
115
|
continue
|
|
113
116
|
|
|
114
117
|
server = Server()
|
|
115
|
-
server.type = odcs_server.
|
|
116
|
-
server.description = odcs_server.
|
|
117
|
-
server.environment = odcs_server.
|
|
118
|
-
server.format = odcs_server.
|
|
119
|
-
server.project = odcs_server.
|
|
120
|
-
server.dataset = odcs_server.
|
|
121
|
-
server.path = odcs_server.
|
|
122
|
-
server.delimiter = odcs_server.
|
|
123
|
-
server.endpointUrl = odcs_server.
|
|
124
|
-
server.location = odcs_server.
|
|
125
|
-
server.account = odcs_server.
|
|
126
|
-
server.database = odcs_server.
|
|
127
|
-
server.schema_ = odcs_server.
|
|
128
|
-
server.host = odcs_server.
|
|
129
|
-
server.port = odcs_server.
|
|
130
|
-
server.catalog = odcs_server.
|
|
131
|
-
server.topic = odcs_server
|
|
132
|
-
server.http_path = odcs_server
|
|
133
|
-
server.token = odcs_server
|
|
134
|
-
server.
|
|
135
|
-
server.
|
|
136
|
-
server.
|
|
137
|
-
|
|
138
|
-
|
|
118
|
+
server.type = odcs_server.type
|
|
119
|
+
server.description = odcs_server.description
|
|
120
|
+
server.environment = odcs_server.environment
|
|
121
|
+
server.format = odcs_server.format
|
|
122
|
+
server.project = odcs_server.project
|
|
123
|
+
server.dataset = odcs_server.dataset
|
|
124
|
+
server.path = odcs_server.path
|
|
125
|
+
server.delimiter = odcs_server.delimiter
|
|
126
|
+
server.endpointUrl = odcs_server.endpointUrl
|
|
127
|
+
server.location = odcs_server.location
|
|
128
|
+
server.account = odcs_server.account
|
|
129
|
+
server.database = odcs_server.database
|
|
130
|
+
server.schema_ = odcs_server.schema_
|
|
131
|
+
server.host = odcs_server.host
|
|
132
|
+
server.port = odcs_server.port
|
|
133
|
+
server.catalog = odcs_server.catalog
|
|
134
|
+
server.topic = getattr(odcs_server, "topic", None)
|
|
135
|
+
server.http_path = getattr(odcs_server, "http_path", None)
|
|
136
|
+
server.token = getattr(odcs_server, "token", None)
|
|
137
|
+
server.driver = getattr(odcs_server, "driver", None)
|
|
138
|
+
server.roles = import_server_roles(odcs_server.roles)
|
|
139
|
+
server.storageAccount = (
|
|
140
|
+
re.search(r"(?:@|://)([^.]+)\.", odcs_server.location, re.IGNORECASE) if server.type == "azure" else None
|
|
141
|
+
)
|
|
139
142
|
servers[server_name] = server
|
|
140
143
|
return servers
|
|
141
144
|
|
|
142
145
|
|
|
143
|
-
def import_terms(
|
|
144
|
-
if
|
|
146
|
+
def import_terms(odcs: Any) -> Terms | None:
|
|
147
|
+
if odcs.description is None:
|
|
145
148
|
return None
|
|
146
|
-
if
|
|
147
|
-
odcs_contract.get("description").get("usage") is not None
|
|
148
|
-
or odcs_contract.get("description").get("limitations") is not None
|
|
149
|
-
or odcs_contract.get("price") is not None
|
|
150
|
-
):
|
|
149
|
+
if odcs.description.usage is not None or odcs.description.limitations is not None or odcs.price is not None:
|
|
151
150
|
terms = Terms()
|
|
152
|
-
if
|
|
153
|
-
terms.usage =
|
|
154
|
-
if
|
|
155
|
-
terms.limitations =
|
|
156
|
-
if
|
|
157
|
-
terms.billing = f"{
|
|
151
|
+
if odcs.description.usage is not None:
|
|
152
|
+
terms.usage = odcs.description.usage
|
|
153
|
+
if odcs.description.limitations is not None:
|
|
154
|
+
terms.limitations = odcs.description.limitations
|
|
155
|
+
if odcs.price is not None:
|
|
156
|
+
terms.billing = f"{odcs.price.priceAmount} {odcs.price.priceCurrency} / {odcs.price.priceUnit}"
|
|
158
157
|
|
|
159
158
|
return terms
|
|
160
159
|
else:
|
|
161
160
|
return None
|
|
162
161
|
|
|
163
162
|
|
|
164
|
-
def import_servicelevels(
|
|
163
|
+
def import_servicelevels(odcs: Any) -> ServiceLevel:
|
|
165
164
|
# find the two properties we can map (based on the examples)
|
|
166
|
-
sla_properties =
|
|
167
|
-
availability = next((p for p in sla_properties if p
|
|
168
|
-
retention = next((p for p in sla_properties if p
|
|
165
|
+
sla_properties = odcs.slaProperties if odcs.slaProperties is not None else []
|
|
166
|
+
availability = next((p for p in sla_properties if p.property == "generalAvailability"), None)
|
|
167
|
+
retention = next((p for p in sla_properties if p.property == "retention"), None)
|
|
169
168
|
|
|
170
169
|
if availability is not None or retention is not None:
|
|
171
170
|
servicelevel = ServiceLevel()
|
|
172
171
|
|
|
173
172
|
if availability is not None:
|
|
174
|
-
value = availability.
|
|
173
|
+
value = availability.value
|
|
175
174
|
if isinstance(value, datetime.datetime):
|
|
176
175
|
value = value.isoformat()
|
|
177
176
|
servicelevel.availability = Availability(description=value)
|
|
178
177
|
|
|
179
178
|
if retention is not None:
|
|
180
|
-
servicelevel.retention = Retention(period=f"{retention.
|
|
179
|
+
servicelevel.retention = Retention(period=f"{retention.value}{retention.unit}")
|
|
181
180
|
|
|
182
181
|
return servicelevel
|
|
183
182
|
else:
|
|
184
183
|
return None
|
|
185
184
|
|
|
186
185
|
|
|
187
|
-
def get_server_type(
|
|
188
|
-
servers = import_servers(
|
|
186
|
+
def get_server_type(odcs: OpenDataContractStandard) -> str | None:
|
|
187
|
+
servers = import_servers(odcs)
|
|
189
188
|
if servers is None or len(servers) == 0:
|
|
190
189
|
return None
|
|
191
190
|
# get first server from map
|
|
@@ -193,49 +192,106 @@ def get_server_type(odcs_contract: Dict[str, Any]) -> str | None:
|
|
|
193
192
|
return server.type
|
|
194
193
|
|
|
195
194
|
|
|
196
|
-
def import_models(
|
|
197
|
-
custom_type_mappings = get_custom_type_mappings(
|
|
195
|
+
def import_models(odcs: Any) -> Dict[str, Model]:
|
|
196
|
+
custom_type_mappings = get_custom_type_mappings(odcs.customProperties)
|
|
198
197
|
|
|
199
|
-
odcs_schemas =
|
|
198
|
+
odcs_schemas = odcs.schema_ if odcs.schema_ is not None else []
|
|
200
199
|
result = {}
|
|
201
200
|
|
|
202
201
|
for odcs_schema in odcs_schemas:
|
|
203
|
-
schema_name = odcs_schema.
|
|
204
|
-
schema_physical_name = odcs_schema.
|
|
205
|
-
schema_description = odcs_schema.
|
|
202
|
+
schema_name = odcs_schema.name
|
|
203
|
+
schema_physical_name = odcs_schema.physicalName
|
|
204
|
+
schema_description = odcs_schema.description if odcs_schema.description is not None else ""
|
|
206
205
|
model_name = schema_physical_name if schema_physical_name is not None else schema_name
|
|
207
|
-
model = Model(description=" ".join(schema_description.splitlines()), type="table")
|
|
208
|
-
model.fields = import_fields(
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
if odcs_schema.get("quality") is not None:
|
|
212
|
-
# convert dict to pydantic model
|
|
213
|
-
|
|
214
|
-
model.quality = [Quality.model_validate(q) for q in odcs_schema.get("quality")]
|
|
206
|
+
model = Model(description=" ".join(schema_description.splitlines()) if schema_description else "", type="table")
|
|
207
|
+
model.fields = import_fields(odcs_schema.properties, custom_type_mappings, server_type=get_server_type(odcs))
|
|
208
|
+
if odcs_schema.quality is not None:
|
|
209
|
+
model.quality = convert_quality_list(odcs_schema.quality)
|
|
215
210
|
model.title = schema_name
|
|
216
|
-
if odcs_schema.
|
|
217
|
-
model.config = {"dataGranularityDescription": odcs_schema.
|
|
211
|
+
if odcs_schema.dataGranularityDescription is not None:
|
|
212
|
+
model.config = {"dataGranularityDescription": odcs_schema.dataGranularityDescription}
|
|
218
213
|
result[model_name] = model
|
|
219
214
|
|
|
220
215
|
return result
|
|
221
216
|
|
|
222
217
|
|
|
223
|
-
def
|
|
218
|
+
def convert_quality_list(odcs_quality_list):
|
|
219
|
+
"""Convert a list of ODCS DataQuality objects to datacontract Quality objects"""
|
|
220
|
+
quality_list = []
|
|
221
|
+
|
|
222
|
+
if odcs_quality_list is not None:
|
|
223
|
+
for odcs_quality in odcs_quality_list:
|
|
224
|
+
quality = Quality(type=odcs_quality.type)
|
|
225
|
+
|
|
226
|
+
if odcs_quality.description is not None:
|
|
227
|
+
quality.description = odcs_quality.description
|
|
228
|
+
if odcs_quality.query is not None:
|
|
229
|
+
quality.query = odcs_quality.query
|
|
230
|
+
if odcs_quality.mustBe is not None:
|
|
231
|
+
quality.mustBe = odcs_quality.mustBe
|
|
232
|
+
if odcs_quality.mustNotBe is not None:
|
|
233
|
+
quality.mustNotBe = odcs_quality.mustNotBe
|
|
234
|
+
if odcs_quality.mustBeGreaterThan is not None:
|
|
235
|
+
quality.mustBeGreaterThan = odcs_quality.mustBeGreaterThan
|
|
236
|
+
if odcs_quality.mustBeGreaterOrEqualTo is not None:
|
|
237
|
+
quality.mustBeGreaterThanOrEqualTo = odcs_quality.mustBeGreaterOrEqualTo
|
|
238
|
+
if odcs_quality.mustBeLessThan is not None:
|
|
239
|
+
quality.mustBeLessThan = odcs_quality.mustBeLessThan
|
|
240
|
+
if odcs_quality.mustBeLessOrEqualTo is not None:
|
|
241
|
+
quality.mustBeLessThanOrEqualTo = odcs_quality.mustBeLessOrEqualTo
|
|
242
|
+
if odcs_quality.mustBeBetween is not None:
|
|
243
|
+
quality.mustBeBetween = odcs_quality.mustBeBetween
|
|
244
|
+
if odcs_quality.mustNotBeBetween is not None:
|
|
245
|
+
quality.mustNotBeBetween = odcs_quality.mustNotBeBetween
|
|
246
|
+
if odcs_quality.engine is not None:
|
|
247
|
+
quality.engine = odcs_quality.engine
|
|
248
|
+
if odcs_quality.implementation is not None:
|
|
249
|
+
quality.implementation = odcs_quality.implementation
|
|
250
|
+
if odcs_quality.businessImpact is not None:
|
|
251
|
+
quality.model_extra["businessImpact"] = odcs_quality.businessImpact
|
|
252
|
+
if odcs_quality.dimension is not None:
|
|
253
|
+
quality.model_extra["dimension"] = odcs_quality.dimension
|
|
254
|
+
if odcs_quality.rule is not None:
|
|
255
|
+
quality.model_extra["rule"] = odcs_quality.rule
|
|
256
|
+
if odcs_quality.schedule is not None:
|
|
257
|
+
quality.model_extra["schedule"] = odcs_quality.schedule
|
|
258
|
+
if odcs_quality.scheduler is not None:
|
|
259
|
+
quality.model_extra["scheduler"] = odcs_quality.scheduler
|
|
260
|
+
if odcs_quality.severity is not None:
|
|
261
|
+
quality.model_extra["severity"] = odcs_quality.severity
|
|
262
|
+
if odcs_quality.method is not None:
|
|
263
|
+
quality.model_extra["method"] = odcs_quality.method
|
|
264
|
+
if odcs_quality.customProperties is not None:
|
|
265
|
+
quality.model_extra["customProperties"] = []
|
|
266
|
+
for item in odcs_quality.customProperties:
|
|
267
|
+
quality.model_extra["customProperties"].append(
|
|
268
|
+
{
|
|
269
|
+
"property": item.property,
|
|
270
|
+
"value": item.value,
|
|
271
|
+
}
|
|
272
|
+
)
|
|
273
|
+
|
|
274
|
+
quality_list.append(quality)
|
|
275
|
+
|
|
276
|
+
return quality_list
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
def import_field_config(odcs_property: SchemaProperty, server_type=None) -> Dict[str, Any]:
|
|
224
280
|
config = {}
|
|
225
|
-
if odcs_property.
|
|
226
|
-
config["criticalDataElement"] = odcs_property.
|
|
227
|
-
if odcs_property.
|
|
228
|
-
config["encryptedName"] = odcs_property.
|
|
229
|
-
if odcs_property.
|
|
230
|
-
config["partitionKeyPosition"] = odcs_property.
|
|
231
|
-
if odcs_property.
|
|
232
|
-
config["partitioned"] = odcs_property.
|
|
233
|
-
|
|
234
|
-
if odcs_property.
|
|
235
|
-
for item in odcs_property.
|
|
236
|
-
config[item
|
|
237
|
-
|
|
238
|
-
physical_type = odcs_property.
|
|
281
|
+
if odcs_property.criticalDataElement is not None:
|
|
282
|
+
config["criticalDataElement"] = odcs_property.criticalDataElement
|
|
283
|
+
if odcs_property.encryptedName is not None:
|
|
284
|
+
config["encryptedName"] = odcs_property.encryptedName
|
|
285
|
+
if odcs_property.partitionKeyPosition is not None:
|
|
286
|
+
config["partitionKeyPosition"] = odcs_property.partitionKeyPosition
|
|
287
|
+
if odcs_property.partitioned is not None:
|
|
288
|
+
config["partitioned"] = odcs_property.partitioned
|
|
289
|
+
|
|
290
|
+
if odcs_property.customProperties is not None:
|
|
291
|
+
for item in odcs_property.customProperties:
|
|
292
|
+
config[item.property] = item.value
|
|
293
|
+
|
|
294
|
+
physical_type = odcs_property.physicalType
|
|
239
295
|
if physical_type is not None:
|
|
240
296
|
if server_type == "postgres" or server_type == "postgresql":
|
|
241
297
|
config["postgresType"] = physical_type
|
|
@@ -255,13 +311,13 @@ def import_field_config(odcs_property: Dict[str, Any], server_type=None) -> Dict
|
|
|
255
311
|
return config
|
|
256
312
|
|
|
257
313
|
|
|
258
|
-
def has_composite_primary_key(odcs_properties) -> bool:
|
|
259
|
-
primary_keys = [prop for prop in odcs_properties if prop.
|
|
314
|
+
def has_composite_primary_key(odcs_properties: List[SchemaProperty]) -> bool:
|
|
315
|
+
primary_keys = [prop for prop in odcs_properties if prop.primaryKey is not None and prop.primaryKey]
|
|
260
316
|
return len(primary_keys) > 1
|
|
261
317
|
|
|
262
318
|
|
|
263
319
|
def import_fields(
|
|
264
|
-
odcs_properties:
|
|
320
|
+
odcs_properties: List[SchemaProperty], custom_type_mappings: Dict[str, str], server_type
|
|
265
321
|
) -> Dict[str, Field]:
|
|
266
322
|
logger = logging.getLogger(__name__)
|
|
267
323
|
result = {}
|
|
@@ -270,50 +326,51 @@ def import_fields(
|
|
|
270
326
|
return result
|
|
271
327
|
|
|
272
328
|
for odcs_property in odcs_properties:
|
|
273
|
-
mapped_type = map_type(odcs_property.
|
|
329
|
+
mapped_type = map_type(odcs_property.logicalType, custom_type_mappings)
|
|
274
330
|
if mapped_type is not None:
|
|
275
|
-
property_name = odcs_property
|
|
276
|
-
description = odcs_property.
|
|
331
|
+
property_name = odcs_property.name
|
|
332
|
+
description = odcs_property.description if odcs_property.description is not None else None
|
|
277
333
|
field = Field(
|
|
278
334
|
description=" ".join(description.splitlines()) if description is not None else None,
|
|
279
335
|
type=mapped_type,
|
|
280
|
-
title=odcs_property.
|
|
281
|
-
required=odcs_property.
|
|
282
|
-
primaryKey=odcs_property.
|
|
283
|
-
if not has_composite_primary_key(odcs_properties) and odcs_property.
|
|
336
|
+
title=odcs_property.businessName,
|
|
337
|
+
required=odcs_property.required if odcs_property.required is not None else None,
|
|
338
|
+
primaryKey=odcs_property.primaryKey
|
|
339
|
+
if not has_composite_primary_key(odcs_properties) and odcs_property.primaryKey is not None
|
|
284
340
|
else False,
|
|
285
|
-
unique=odcs_property.
|
|
286
|
-
examples=odcs_property.
|
|
287
|
-
classification=odcs_property.
|
|
288
|
-
if odcs_property.
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
if odcs_property.get("properties") is not None else {},
|
|
341
|
+
unique=odcs_property.unique if odcs_property.unique else None,
|
|
342
|
+
examples=odcs_property.examples if odcs_property.examples is not None else None,
|
|
343
|
+
classification=odcs_property.classification if odcs_property.classification is not None else None,
|
|
344
|
+
tags=odcs_property.tags if odcs_property.tags is not None else None,
|
|
345
|
+
quality=convert_quality_list(odcs_property.quality),
|
|
346
|
+
fields=import_fields(odcs_property.properties, custom_type_mappings, server_type)
|
|
347
|
+
if odcs_property.properties is not None
|
|
348
|
+
else {},
|
|
294
349
|
config=import_field_config(odcs_property, server_type),
|
|
295
|
-
format=odcs_property
|
|
350
|
+
format=getattr(odcs_property, "format", None),
|
|
296
351
|
)
|
|
297
|
-
#mapped_type is array
|
|
298
|
-
if field.type == "array" and odcs_property.
|
|
299
|
-
#nested array object
|
|
300
|
-
if odcs_property.
|
|
301
|
-
field.items= Field(
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
352
|
+
# mapped_type is array
|
|
353
|
+
if field.type == "array" and odcs_property.items is not None:
|
|
354
|
+
# nested array object
|
|
355
|
+
if odcs_property.items.logicalType == "object":
|
|
356
|
+
field.items = Field(
|
|
357
|
+
type="object",
|
|
358
|
+
fields=import_fields(odcs_property.items.properties, custom_type_mappings, server_type),
|
|
359
|
+
)
|
|
360
|
+
# array of simple type
|
|
361
|
+
elif odcs_property.items.logicalType is not None:
|
|
362
|
+
field.items = Field(type=odcs_property.items.logicalType)
|
|
363
|
+
|
|
307
364
|
# enum from quality validValues as enum
|
|
308
365
|
if field.type == "string":
|
|
309
366
|
for q in field.quality:
|
|
310
|
-
if hasattr(q,"validValues"):
|
|
367
|
+
if hasattr(q, "validValues"):
|
|
311
368
|
field.enum = q.validValues
|
|
312
369
|
|
|
313
370
|
result[property_name] = field
|
|
314
371
|
else:
|
|
315
372
|
logger.info(
|
|
316
|
-
f"Can't map {odcs_property.
|
|
373
|
+
f"Can't map {odcs_property.name} to the Datacontract Mapping types, as there is no equivalent or special mapping. Consider introducing a customProperty 'dc_mapping_{odcs_property.logicalType}' that defines your expected type as the 'value'"
|
|
317
374
|
)
|
|
318
375
|
|
|
319
376
|
return result
|
|
@@ -331,28 +388,28 @@ def map_type(odcs_type: str, custom_mappings: Dict[str, str]) -> str | None:
|
|
|
331
388
|
return None
|
|
332
389
|
|
|
333
390
|
|
|
334
|
-
def get_custom_type_mappings(odcs_custom_properties: List[
|
|
391
|
+
def get_custom_type_mappings(odcs_custom_properties: List[CustomProperty]) -> Dict[str, str]:
|
|
335
392
|
result = {}
|
|
336
393
|
if odcs_custom_properties is not None:
|
|
337
394
|
for prop in odcs_custom_properties:
|
|
338
|
-
if prop
|
|
339
|
-
odcs_type_name = prop[
|
|
340
|
-
datacontract_type = prop
|
|
395
|
+
if prop.property.startswith("dc_mapping_"):
|
|
396
|
+
odcs_type_name = prop.property[11:] # Changed substring to slice
|
|
397
|
+
datacontract_type = prop.value
|
|
341
398
|
result[odcs_type_name] = datacontract_type
|
|
342
399
|
|
|
343
400
|
return result
|
|
344
401
|
|
|
345
402
|
|
|
346
|
-
def get_owner(odcs_custom_properties: List[
|
|
403
|
+
def get_owner(odcs_custom_properties: List[CustomProperty]) -> str | None:
|
|
347
404
|
if odcs_custom_properties is not None:
|
|
348
405
|
for prop in odcs_custom_properties:
|
|
349
|
-
if prop
|
|
350
|
-
return prop
|
|
406
|
+
if prop.property == "owner":
|
|
407
|
+
return prop.value
|
|
351
408
|
|
|
352
409
|
return None
|
|
353
410
|
|
|
354
411
|
|
|
355
|
-
def import_tags(
|
|
356
|
-
if
|
|
412
|
+
def import_tags(odcs: OpenDataContractStandard) -> List[str] | None:
|
|
413
|
+
if odcs.tags is None:
|
|
357
414
|
return None
|
|
358
|
-
return
|
|
415
|
+
return odcs.tags
|
|
@@ -238,7 +238,6 @@ def import_protobuf(
|
|
|
238
238
|
os.remove(descriptor_file)
|
|
239
239
|
|
|
240
240
|
|
|
241
|
-
|
|
242
241
|
class ProtoBufImporter(Importer):
|
|
243
242
|
def __init__(self, name):
|
|
244
243
|
# 'name' is passed by the importer factory.
|
|
@@ -263,4 +262,3 @@ class ProtoBufImporter(Importer):
|
|
|
263
262
|
"""
|
|
264
263
|
# Wrap the source in a list because import_protobuf expects a list of sources.
|
|
265
264
|
return import_protobuf(data_contract_specification, [source], import_args)
|
|
266
|
-
|
|
@@ -154,5 +154,7 @@ def _data_type_from_spark(spark_type: types.DataType) -> str:
|
|
|
154
154
|
return "null"
|
|
155
155
|
elif isinstance(spark_type, types.VarcharType):
|
|
156
156
|
return "varchar"
|
|
157
|
+
elif isinstance(spark_type, types.VariantType):
|
|
158
|
+
return "variant"
|
|
157
159
|
else:
|
|
158
160
|
raise ValueError(f"Unsupported Spark type: {spark_type}")
|
|
@@ -23,9 +23,7 @@ class DescriptionLinter(Linter):
|
|
|
23
23
|
result = result.with_error(f"Model '{model_name}' has empty description.")
|
|
24
24
|
for field_name, field in model.fields.items():
|
|
25
25
|
if not field.description:
|
|
26
|
-
result = result.with_error(
|
|
27
|
-
f"Field '{field_name}' in model '{model_name}'" f" has empty description."
|
|
28
|
-
)
|
|
26
|
+
result = result.with_error(f"Field '{field_name}' in model '{model_name}' has empty description.")
|
|
29
27
|
for definition_name, definition in contract.definitions.items():
|
|
30
28
|
if not definition.description:
|
|
31
29
|
result = result.with_error(f"Definition '{definition_name}' has empty description.")
|
|
@@ -34,8 +34,7 @@ class FieldReferenceLinter(Linter):
|
|
|
34
34
|
|
|
35
35
|
if ref_model not in contract.models:
|
|
36
36
|
result = result.with_error(
|
|
37
|
-
f"Field '{field_name}' in model '{model_name}'"
|
|
38
|
-
f" references non-existing model '{ref_model}'."
|
|
37
|
+
f"Field '{field_name}' in model '{model_name}' references non-existing model '{ref_model}'."
|
|
39
38
|
)
|
|
40
39
|
else:
|
|
41
40
|
ref_model_obj = contract.models[ref_model]
|
|
@@ -41,10 +41,10 @@ class NoticePeriodLinter(Linter):
|
|
|
41
41
|
if not period:
|
|
42
42
|
return LinterResult.cautious("No notice period defined.")
|
|
43
43
|
if not period.startswith("P"):
|
|
44
|
-
return LinterResult.erroneous(f"Notice period '{period}' is not a valid
|
|
44
|
+
return LinterResult.erroneous(f"Notice period '{period}' is not a valid ISO8601 duration.")
|
|
45
45
|
if period == "P":
|
|
46
46
|
return LinterResult.erroneous(
|
|
47
|
-
"Notice period 'P' is not a valid
|
|
47
|
+
"Notice period 'P' is not a valid ISO8601 duration, requires at least one duration to be specified."
|
|
48
48
|
)
|
|
49
49
|
if (
|
|
50
50
|
not self.simple.fullmatch(period)
|
|
@@ -40,7 +40,7 @@ class ValidFieldConstraintsLinter(Linter):
|
|
|
40
40
|
):
|
|
41
41
|
case (True, True, _, _) if min > max:
|
|
42
42
|
return LinterResult.erroneous(
|
|
43
|
-
f"Minimum {min} is greater than maximum {max} on
|
|
43
|
+
f"Minimum {min} is greater than maximum {max} on field '{field_name}' in model '{model_name}'."
|
|
44
44
|
)
|
|
45
45
|
case (_, _, True, True) if xmin >= xmax:
|
|
46
46
|
return LinterResult.erroneous(
|
|
@@ -68,11 +68,11 @@ class ValidFieldConstraintsLinter(Linter):
|
|
|
68
68
|
result = LinterResult()
|
|
69
69
|
if field.minLength and field.maxLength and field.minLength > field.maxLength:
|
|
70
70
|
result = result.with_error(
|
|
71
|
-
f"Minimum length is greater that maximum length on
|
|
71
|
+
f"Minimum length is greater that maximum length on field '{field_name}' in model '{model_name}'."
|
|
72
72
|
)
|
|
73
73
|
if field.pattern and field.format:
|
|
74
74
|
result = result.with_error(
|
|
75
|
-
f"Both a pattern and a format are defined for field
|
|
75
|
+
f"Both a pattern and a format are defined for field '{field_name}' in model '{model_name}'."
|
|
76
76
|
)
|
|
77
77
|
return result
|
|
78
78
|
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from datacontract_specification.model import *
|