datacontract-cli 0.10.23__py3-none-any.whl → 0.10.40__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datacontract/__init__.py +13 -0
- datacontract/api.py +12 -5
- datacontract/catalog/catalog.py +5 -3
- datacontract/cli.py +119 -13
- datacontract/data_contract.py +145 -67
- datacontract/engines/data_contract_checks.py +366 -60
- datacontract/engines/data_contract_test.py +50 -4
- datacontract/engines/fastjsonschema/check_jsonschema.py +37 -19
- datacontract/engines/fastjsonschema/s3/s3_read_files.py +3 -2
- datacontract/engines/soda/check_soda_execute.py +27 -3
- datacontract/engines/soda/connections/athena.py +79 -0
- datacontract/engines/soda/connections/duckdb_connection.py +65 -6
- datacontract/engines/soda/connections/kafka.py +4 -2
- datacontract/engines/soda/connections/oracle.py +50 -0
- datacontract/export/avro_converter.py +20 -3
- datacontract/export/bigquery_converter.py +1 -1
- datacontract/export/dbt_converter.py +36 -7
- datacontract/export/dqx_converter.py +126 -0
- datacontract/export/duckdb_type_converter.py +57 -0
- datacontract/export/excel_exporter.py +923 -0
- datacontract/export/exporter.py +3 -0
- datacontract/export/exporter_factory.py +17 -1
- datacontract/export/great_expectations_converter.py +55 -5
- datacontract/export/{html_export.py → html_exporter.py} +31 -20
- datacontract/export/markdown_converter.py +134 -5
- datacontract/export/mermaid_exporter.py +110 -0
- datacontract/export/odcs_v3_exporter.py +193 -149
- datacontract/export/protobuf_converter.py +163 -69
- datacontract/export/rdf_converter.py +2 -2
- datacontract/export/sodacl_converter.py +9 -1
- datacontract/export/spark_converter.py +31 -4
- datacontract/export/sql_converter.py +6 -2
- datacontract/export/sql_type_converter.py +124 -8
- datacontract/imports/avro_importer.py +63 -12
- datacontract/imports/csv_importer.py +111 -57
- datacontract/imports/excel_importer.py +1112 -0
- datacontract/imports/importer.py +16 -3
- datacontract/imports/importer_factory.py +17 -0
- datacontract/imports/json_importer.py +325 -0
- datacontract/imports/odcs_importer.py +2 -2
- datacontract/imports/odcs_v3_importer.py +367 -151
- datacontract/imports/protobuf_importer.py +264 -0
- datacontract/imports/spark_importer.py +117 -13
- datacontract/imports/sql_importer.py +32 -16
- datacontract/imports/unity_importer.py +84 -38
- datacontract/init/init_template.py +1 -1
- datacontract/integration/entropy_data.py +126 -0
- datacontract/lint/resolve.py +112 -23
- datacontract/lint/schema.py +24 -15
- datacontract/lint/urls.py +17 -3
- datacontract/model/data_contract_specification/__init__.py +1 -0
- datacontract/model/odcs.py +13 -0
- datacontract/model/run.py +3 -0
- datacontract/output/junit_test_results.py +3 -3
- datacontract/schemas/datacontract-1.1.0.init.yaml +1 -1
- datacontract/schemas/datacontract-1.2.0.init.yaml +91 -0
- datacontract/schemas/datacontract-1.2.0.schema.json +2029 -0
- datacontract/schemas/datacontract-1.2.1.init.yaml +91 -0
- datacontract/schemas/datacontract-1.2.1.schema.json +2058 -0
- datacontract/schemas/odcs-3.0.2.schema.json +2382 -0
- datacontract/schemas/odcs-3.1.0.schema.json +2809 -0
- datacontract/templates/datacontract.html +54 -3
- datacontract/templates/datacontract_odcs.html +685 -0
- datacontract/templates/index.html +5 -2
- datacontract/templates/partials/server.html +2 -0
- datacontract/templates/style/output.css +319 -145
- {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.40.dist-info}/METADATA +711 -433
- datacontract_cli-0.10.40.dist-info/RECORD +121 -0
- {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.40.dist-info}/WHEEL +1 -1
- {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.40.dist-info/licenses}/LICENSE +1 -1
- datacontract/export/csv_type_converter.py +0 -36
- datacontract/integration/datamesh_manager.py +0 -72
- datacontract/lint/lint.py +0 -142
- datacontract/lint/linters/description_linter.py +0 -35
- datacontract/lint/linters/field_pattern_linter.py +0 -34
- datacontract/lint/linters/field_reference_linter.py +0 -48
- datacontract/lint/linters/notice_period_linter.py +0 -55
- datacontract/lint/linters/quality_schema_linter.py +0 -52
- datacontract/lint/linters/valid_constraints_linter.py +0 -100
- datacontract/model/data_contract_specification.py +0 -327
- datacontract_cli-0.10.23.dist-info/RECORD +0 -113
- /datacontract/{lint/linters → output}/__init__.py +0 -0
- {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.40.dist-info}/entry_points.txt +0 -0
- {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.40.dist-info}/top_level.txt +0 -0
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
import datetime
|
|
2
2
|
import logging
|
|
3
|
+
import re
|
|
3
4
|
from typing import Any, Dict, List
|
|
4
5
|
from venv import logger
|
|
5
6
|
|
|
6
|
-
import
|
|
7
|
+
from datacontract_specification.model import Quality
|
|
8
|
+
from open_data_contract_standard.model import CustomProperty, OpenDataContractStandard, SchemaProperty
|
|
7
9
|
|
|
8
10
|
from datacontract.imports.importer import Importer
|
|
9
11
|
from datacontract.lint.resources import read_resource
|
|
@@ -14,9 +16,9 @@ from datacontract.model.data_contract_specification import (
|
|
|
14
16
|
Field,
|
|
15
17
|
Info,
|
|
16
18
|
Model,
|
|
17
|
-
Quality,
|
|
18
19
|
Retention,
|
|
19
20
|
Server,
|
|
21
|
+
ServerRole,
|
|
20
22
|
ServiceLevel,
|
|
21
23
|
Terms,
|
|
22
24
|
)
|
|
@@ -27,19 +29,20 @@ class OdcsImporter(Importer):
|
|
|
27
29
|
def import_source(
|
|
28
30
|
self, data_contract_specification: DataContractSpecification, source: str, import_args: dict
|
|
29
31
|
) -> DataContractSpecification:
|
|
30
|
-
return
|
|
32
|
+
return import_odcs_v3_as_dcs(data_contract_specification, source)
|
|
31
33
|
|
|
32
34
|
|
|
33
|
-
def
|
|
35
|
+
def import_odcs_v3_as_dcs(
|
|
36
|
+
data_contract_specification: DataContractSpecification, source: str
|
|
37
|
+
) -> DataContractSpecification:
|
|
34
38
|
source_str = read_resource(source)
|
|
35
|
-
|
|
39
|
+
odcs = parse_odcs_v3_from_str(source_str)
|
|
40
|
+
return import_from_odcs(data_contract_specification, odcs)
|
|
36
41
|
|
|
37
42
|
|
|
38
|
-
def
|
|
39
|
-
data_contract_specification: DataContractSpecification, source_str: str
|
|
40
|
-
) -> DataContractSpecification:
|
|
43
|
+
def parse_odcs_v3_from_str(source_str):
|
|
41
44
|
try:
|
|
42
|
-
|
|
45
|
+
odcs = OpenDataContractStandard.from_string(source_str)
|
|
43
46
|
except Exception as e:
|
|
44
47
|
raise DataContractException(
|
|
45
48
|
type="schema",
|
|
@@ -48,130 +51,145 @@ def import_odcs_v3_from_str(
|
|
|
48
51
|
engine="datacontract",
|
|
49
52
|
original_exception=e,
|
|
50
53
|
)
|
|
54
|
+
return odcs
|
|
51
55
|
|
|
52
|
-
data_contract_specification.id = odcs_contract["id"]
|
|
53
|
-
data_contract_specification.info = import_info(odcs_contract)
|
|
54
|
-
data_contract_specification.servers = import_servers(odcs_contract)
|
|
55
|
-
data_contract_specification.terms = import_terms(odcs_contract)
|
|
56
|
-
data_contract_specification.servicelevels = import_servicelevels(odcs_contract)
|
|
57
|
-
data_contract_specification.models = import_models(odcs_contract)
|
|
58
|
-
data_contract_specification.tags = import_tags(odcs_contract)
|
|
59
56
|
|
|
57
|
+
def import_from_odcs(data_contract_specification: DataContractSpecification, odcs: OpenDataContractStandard):
|
|
58
|
+
data_contract_specification.id = odcs.id
|
|
59
|
+
data_contract_specification.info = import_info(odcs)
|
|
60
|
+
data_contract_specification.servers = import_servers(odcs)
|
|
61
|
+
data_contract_specification.terms = import_terms(odcs)
|
|
62
|
+
data_contract_specification.servicelevels = import_servicelevels(odcs)
|
|
63
|
+
data_contract_specification.models = import_models(odcs)
|
|
64
|
+
data_contract_specification.tags = import_tags(odcs)
|
|
60
65
|
return data_contract_specification
|
|
61
66
|
|
|
62
67
|
|
|
63
|
-
def import_info(
|
|
68
|
+
def import_info(odcs: Any) -> Info:
|
|
64
69
|
info = Info()
|
|
65
70
|
|
|
66
|
-
info.title =
|
|
71
|
+
info.title = odcs.name if odcs.name is not None else ""
|
|
67
72
|
|
|
68
|
-
if
|
|
69
|
-
info.version =
|
|
73
|
+
if odcs.version is not None:
|
|
74
|
+
info.version = odcs.version
|
|
70
75
|
|
|
71
76
|
# odcs.description.purpose => datacontract.description
|
|
72
|
-
if
|
|
73
|
-
info.description =
|
|
77
|
+
if odcs.description is not None and odcs.description.purpose is not None:
|
|
78
|
+
info.description = odcs.description.purpose
|
|
74
79
|
|
|
75
80
|
# odcs.domain => datacontract.owner
|
|
76
|
-
|
|
77
|
-
|
|
81
|
+
owner = get_owner(odcs.customProperties)
|
|
82
|
+
if owner is not None:
|
|
83
|
+
info.owner = owner
|
|
78
84
|
|
|
79
85
|
# add dataProduct as custom property
|
|
80
|
-
if
|
|
81
|
-
info.dataProduct =
|
|
86
|
+
if odcs.dataProduct is not None:
|
|
87
|
+
info.dataProduct = odcs.dataProduct
|
|
82
88
|
|
|
83
89
|
# add tenant as custom property
|
|
84
|
-
if
|
|
85
|
-
info.tenant =
|
|
90
|
+
if odcs.tenant is not None:
|
|
91
|
+
info.tenant = odcs.tenant
|
|
86
92
|
|
|
87
93
|
return info
|
|
88
94
|
|
|
89
95
|
|
|
90
|
-
def
|
|
91
|
-
if
|
|
96
|
+
def import_server_roles(roles: List[Dict]) -> List[ServerRole] | None:
|
|
97
|
+
if roles is None:
|
|
98
|
+
return None
|
|
99
|
+
result = []
|
|
100
|
+
for role in roles:
|
|
101
|
+
server_role = ServerRole()
|
|
102
|
+
server_role.name = role.role
|
|
103
|
+
server_role.description = role.description
|
|
104
|
+
result.append(server_role)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def import_servers(odcs: OpenDataContractStandard) -> Dict[str, Server] | None:
|
|
108
|
+
if odcs.servers is None:
|
|
92
109
|
return None
|
|
93
110
|
servers = {}
|
|
94
|
-
for odcs_server in
|
|
95
|
-
server_name = odcs_server.
|
|
111
|
+
for odcs_server in odcs.servers:
|
|
112
|
+
server_name = odcs_server.server
|
|
96
113
|
if server_name is None:
|
|
97
114
|
logger.warning("Server name is missing, skipping server")
|
|
98
115
|
continue
|
|
99
116
|
|
|
100
117
|
server = Server()
|
|
101
|
-
server.type = odcs_server.
|
|
102
|
-
server.description = odcs_server.
|
|
103
|
-
server.environment = odcs_server.
|
|
104
|
-
server.format = odcs_server.
|
|
105
|
-
server.project = odcs_server.
|
|
106
|
-
server.dataset = odcs_server.
|
|
107
|
-
server.path = odcs_server.
|
|
108
|
-
server.delimiter = odcs_server.
|
|
109
|
-
server.endpointUrl = odcs_server.
|
|
110
|
-
server.location = odcs_server.
|
|
111
|
-
server.account = odcs_server.
|
|
112
|
-
server.database = odcs_server.
|
|
113
|
-
server.schema_ = odcs_server.
|
|
114
|
-
server.
|
|
115
|
-
server.
|
|
116
|
-
server.
|
|
117
|
-
server.
|
|
118
|
-
server.
|
|
119
|
-
server.
|
|
120
|
-
server.
|
|
121
|
-
server.
|
|
122
|
-
server.driver = odcs_server
|
|
123
|
-
server.roles = odcs_server.
|
|
118
|
+
server.type = odcs_server.type
|
|
119
|
+
server.description = odcs_server.description
|
|
120
|
+
server.environment = odcs_server.environment
|
|
121
|
+
server.format = odcs_server.format
|
|
122
|
+
server.project = odcs_server.project
|
|
123
|
+
server.dataset = odcs_server.dataset
|
|
124
|
+
server.path = odcs_server.path
|
|
125
|
+
server.delimiter = odcs_server.delimiter
|
|
126
|
+
server.endpointUrl = odcs_server.endpointUrl
|
|
127
|
+
server.location = odcs_server.location
|
|
128
|
+
server.account = odcs_server.account
|
|
129
|
+
server.database = odcs_server.database
|
|
130
|
+
server.schema_ = odcs_server.schema_
|
|
131
|
+
server.service_name = odcs_server.serviceName
|
|
132
|
+
server.host = odcs_server.host
|
|
133
|
+
server.port = odcs_server.port
|
|
134
|
+
server.catalog = odcs_server.catalog
|
|
135
|
+
server.stagingDir = odcs_server.stagingDir
|
|
136
|
+
server.topic = getattr(odcs_server, "topic", None)
|
|
137
|
+
server.http_path = getattr(odcs_server, "http_path", None)
|
|
138
|
+
server.token = getattr(odcs_server, "token", None)
|
|
139
|
+
server.driver = getattr(odcs_server, "driver", None)
|
|
140
|
+
server.roles = import_server_roles(odcs_server.roles)
|
|
141
|
+
server.storageAccount = (
|
|
142
|
+
to_azure_storage_account(odcs_server.location)
|
|
143
|
+
if server.type == "azure" and "://" in server.location
|
|
144
|
+
else None
|
|
145
|
+
)
|
|
124
146
|
|
|
125
147
|
servers[server_name] = server
|
|
126
148
|
return servers
|
|
127
149
|
|
|
128
150
|
|
|
129
|
-
def import_terms(
|
|
130
|
-
if
|
|
151
|
+
def import_terms(odcs: Any) -> Terms | None:
|
|
152
|
+
if odcs.description is None:
|
|
131
153
|
return None
|
|
132
|
-
if
|
|
133
|
-
odcs_contract.get("description").get("usage") is not None
|
|
134
|
-
or odcs_contract.get("description").get("limitations") is not None
|
|
135
|
-
or odcs_contract.get("price") is not None
|
|
136
|
-
):
|
|
154
|
+
if odcs.description.usage is not None or odcs.description.limitations is not None or odcs.price is not None:
|
|
137
155
|
terms = Terms()
|
|
138
|
-
if
|
|
139
|
-
terms.usage =
|
|
140
|
-
if
|
|
141
|
-
terms.limitations =
|
|
142
|
-
if
|
|
143
|
-
terms.billing = f"{
|
|
156
|
+
if odcs.description.usage is not None:
|
|
157
|
+
terms.usage = odcs.description.usage
|
|
158
|
+
if odcs.description.limitations is not None:
|
|
159
|
+
terms.limitations = odcs.description.limitations
|
|
160
|
+
if odcs.price is not None:
|
|
161
|
+
terms.billing = f"{odcs.price.priceAmount} {odcs.price.priceCurrency} / {odcs.price.priceUnit}"
|
|
144
162
|
|
|
145
163
|
return terms
|
|
146
164
|
else:
|
|
147
165
|
return None
|
|
148
166
|
|
|
149
167
|
|
|
150
|
-
def import_servicelevels(
|
|
168
|
+
def import_servicelevels(odcs: Any) -> ServiceLevel:
|
|
151
169
|
# find the two properties we can map (based on the examples)
|
|
152
|
-
sla_properties =
|
|
153
|
-
availability = next((p for p in sla_properties if p
|
|
154
|
-
retention = next((p for p in sla_properties if p
|
|
170
|
+
sla_properties = odcs.slaProperties if odcs.slaProperties is not None else []
|
|
171
|
+
availability = next((p for p in sla_properties if p.property == "generalAvailability"), None)
|
|
172
|
+
retention = next((p for p in sla_properties if p.property == "retention"), None)
|
|
155
173
|
|
|
156
174
|
if availability is not None or retention is not None:
|
|
157
175
|
servicelevel = ServiceLevel()
|
|
158
176
|
|
|
159
177
|
if availability is not None:
|
|
160
|
-
value = availability.
|
|
178
|
+
value = availability.value
|
|
161
179
|
if isinstance(value, datetime.datetime):
|
|
162
180
|
value = value.isoformat()
|
|
163
181
|
servicelevel.availability = Availability(description=value)
|
|
164
182
|
|
|
165
183
|
if retention is not None:
|
|
166
|
-
servicelevel.retention = Retention(period=f"{retention.
|
|
184
|
+
servicelevel.retention = Retention(period=f"{retention.value}{retention.unit}")
|
|
167
185
|
|
|
168
186
|
return servicelevel
|
|
169
187
|
else:
|
|
170
188
|
return None
|
|
171
189
|
|
|
172
190
|
|
|
173
|
-
def get_server_type(
|
|
174
|
-
servers = import_servers(
|
|
191
|
+
def get_server_type(odcs: OpenDataContractStandard) -> str | None:
|
|
192
|
+
servers = import_servers(odcs)
|
|
175
193
|
if servers is None or len(servers) == 0:
|
|
176
194
|
return None
|
|
177
195
|
# get first server from map
|
|
@@ -179,49 +197,123 @@ def get_server_type(odcs_contract: Dict[str, Any]) -> str | None:
|
|
|
179
197
|
return server.type
|
|
180
198
|
|
|
181
199
|
|
|
182
|
-
def
|
|
183
|
-
|
|
200
|
+
def get_composite_primary_keys(properties: List[SchemaProperty]) -> list[str]:
|
|
201
|
+
primary_keys = [
|
|
202
|
+
(property.name, property.primaryKeyPosition)
|
|
203
|
+
for property in properties
|
|
204
|
+
if property.name and property.primaryKey is not None and property.primaryKey
|
|
205
|
+
]
|
|
206
|
+
|
|
207
|
+
primary_keys.sort(key=lambda x: x[1] or -1)
|
|
208
|
+
return [name for name, _ in primary_keys]
|
|
209
|
+
|
|
184
210
|
|
|
185
|
-
|
|
211
|
+
def import_models(odcs: Any) -> Dict[str, Model]:
|
|
212
|
+
custom_type_mappings = get_custom_type_mappings(odcs.customProperties)
|
|
213
|
+
|
|
214
|
+
odcs_schemas = odcs.schema_ if odcs.schema_ is not None else []
|
|
186
215
|
result = {}
|
|
187
216
|
|
|
188
217
|
for odcs_schema in odcs_schemas:
|
|
189
|
-
schema_name = odcs_schema.
|
|
190
|
-
schema_physical_name = odcs_schema.
|
|
191
|
-
schema_description = odcs_schema.
|
|
218
|
+
schema_name = odcs_schema.name
|
|
219
|
+
schema_physical_name = odcs_schema.physicalName
|
|
220
|
+
schema_description = odcs_schema.description if odcs_schema.description is not None else ""
|
|
192
221
|
model_name = schema_physical_name if schema_physical_name is not None else schema_name
|
|
193
|
-
model = Model(
|
|
194
|
-
|
|
195
|
-
|
|
222
|
+
model = Model(
|
|
223
|
+
description=" ".join(schema_description.splitlines()) if schema_description else "",
|
|
224
|
+
type="table",
|
|
225
|
+
tags=odcs_schema.tags if odcs_schema.tags is not None else None,
|
|
196
226
|
)
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
227
|
+
model.fields = import_fields(odcs_schema.properties, custom_type_mappings, server_type=get_server_type(odcs))
|
|
228
|
+
if has_composite_primary_key(odcs_properties=odcs_schema.properties):
|
|
229
|
+
model.primaryKey = get_composite_primary_keys(odcs_schema.properties)
|
|
230
|
+
if odcs_schema.quality is not None:
|
|
231
|
+
model.quality = convert_quality_list(odcs_schema.quality)
|
|
201
232
|
model.title = schema_name
|
|
202
|
-
if odcs_schema.
|
|
203
|
-
model.config = {"dataGranularityDescription": odcs_schema.
|
|
233
|
+
if odcs_schema.dataGranularityDescription is not None:
|
|
234
|
+
model.config = {"dataGranularityDescription": odcs_schema.dataGranularityDescription}
|
|
204
235
|
result[model_name] = model
|
|
205
236
|
|
|
206
237
|
return result
|
|
207
238
|
|
|
208
239
|
|
|
209
|
-
def
|
|
240
|
+
def convert_quality_list(odcs_quality_list):
|
|
241
|
+
"""Convert a list of ODCS DataQuality objects to datacontract Quality objects"""
|
|
242
|
+
quality_list = []
|
|
243
|
+
|
|
244
|
+
if odcs_quality_list is not None:
|
|
245
|
+
for odcs_quality in odcs_quality_list:
|
|
246
|
+
quality = Quality(type=odcs_quality.type)
|
|
247
|
+
|
|
248
|
+
if odcs_quality.description is not None:
|
|
249
|
+
quality.description = odcs_quality.description
|
|
250
|
+
if odcs_quality.query is not None:
|
|
251
|
+
quality.query = odcs_quality.query
|
|
252
|
+
if odcs_quality.rule is not None:
|
|
253
|
+
quality.metric = odcs_quality.rule
|
|
254
|
+
if odcs_quality.mustBe is not None:
|
|
255
|
+
quality.mustBe = odcs_quality.mustBe
|
|
256
|
+
if odcs_quality.mustNotBe is not None:
|
|
257
|
+
quality.mustNotBe = odcs_quality.mustNotBe
|
|
258
|
+
if odcs_quality.mustBeGreaterThan is not None:
|
|
259
|
+
quality.mustBeGreaterThan = odcs_quality.mustBeGreaterThan
|
|
260
|
+
if odcs_quality.mustBeGreaterOrEqualTo is not None:
|
|
261
|
+
quality.mustBeGreaterOrEqualTo = odcs_quality.mustBeGreaterOrEqualTo
|
|
262
|
+
if odcs_quality.mustBeLessThan is not None:
|
|
263
|
+
quality.mustBeLessThan = odcs_quality.mustBeLessThan
|
|
264
|
+
if odcs_quality.mustBeLessOrEqualTo is not None:
|
|
265
|
+
quality.mustBeLessOrEqualTo = odcs_quality.mustBeLessOrEqualTo
|
|
266
|
+
if odcs_quality.mustBeBetween is not None:
|
|
267
|
+
quality.mustBeBetween = odcs_quality.mustBeBetween
|
|
268
|
+
if odcs_quality.mustNotBeBetween is not None:
|
|
269
|
+
quality.mustNotBeBetween = odcs_quality.mustNotBeBetween
|
|
270
|
+
if odcs_quality.engine is not None:
|
|
271
|
+
quality.engine = odcs_quality.engine
|
|
272
|
+
if odcs_quality.implementation is not None:
|
|
273
|
+
quality.implementation = odcs_quality.implementation
|
|
274
|
+
if odcs_quality.businessImpact is not None:
|
|
275
|
+
quality.model_extra["businessImpact"] = odcs_quality.businessImpact
|
|
276
|
+
if odcs_quality.dimension is not None:
|
|
277
|
+
quality.model_extra["dimension"] = odcs_quality.dimension
|
|
278
|
+
if odcs_quality.schedule is not None:
|
|
279
|
+
quality.model_extra["schedule"] = odcs_quality.schedule
|
|
280
|
+
if odcs_quality.scheduler is not None:
|
|
281
|
+
quality.model_extra["scheduler"] = odcs_quality.scheduler
|
|
282
|
+
if odcs_quality.severity is not None:
|
|
283
|
+
quality.model_extra["severity"] = odcs_quality.severity
|
|
284
|
+
if odcs_quality.method is not None:
|
|
285
|
+
quality.model_extra["method"] = odcs_quality.method
|
|
286
|
+
if odcs_quality.customProperties is not None:
|
|
287
|
+
quality.model_extra["customProperties"] = []
|
|
288
|
+
for item in odcs_quality.customProperties:
|
|
289
|
+
quality.model_extra["customProperties"].append(
|
|
290
|
+
{
|
|
291
|
+
"property": item.property,
|
|
292
|
+
"value": item.value,
|
|
293
|
+
}
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
quality_list.append(quality)
|
|
297
|
+
|
|
298
|
+
return quality_list
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
def import_field_config(odcs_property: SchemaProperty, server_type=None) -> dict[Any, Any] | None:
|
|
210
302
|
config = {}
|
|
211
|
-
if odcs_property.
|
|
212
|
-
config["criticalDataElement"] = odcs_property.
|
|
213
|
-
if odcs_property.
|
|
214
|
-
config["encryptedName"] = odcs_property.
|
|
215
|
-
if odcs_property.
|
|
216
|
-
config["partitionKeyPosition"] = odcs_property.
|
|
217
|
-
if odcs_property.
|
|
218
|
-
config["partitioned"] = odcs_property.
|
|
219
|
-
|
|
220
|
-
if odcs_property.
|
|
221
|
-
for item in odcs_property.
|
|
222
|
-
config[item
|
|
223
|
-
|
|
224
|
-
physical_type = odcs_property.
|
|
303
|
+
if odcs_property.criticalDataElement is not None:
|
|
304
|
+
config["criticalDataElement"] = odcs_property.criticalDataElement
|
|
305
|
+
if odcs_property.encryptedName is not None:
|
|
306
|
+
config["encryptedName"] = odcs_property.encryptedName
|
|
307
|
+
if odcs_property.partitionKeyPosition is not None:
|
|
308
|
+
config["partitionKeyPosition"] = odcs_property.partitionKeyPosition
|
|
309
|
+
if odcs_property.partitioned is not None:
|
|
310
|
+
config["partitioned"] = odcs_property.partitioned
|
|
311
|
+
|
|
312
|
+
if odcs_property.customProperties is not None:
|
|
313
|
+
for item in odcs_property.customProperties:
|
|
314
|
+
config[item.property] = item.value
|
|
315
|
+
|
|
316
|
+
physical_type = odcs_property.physicalType
|
|
225
317
|
if physical_type is not None:
|
|
226
318
|
if server_type == "postgres" or server_type == "postgresql":
|
|
227
319
|
config["postgresType"] = physical_type
|
|
@@ -235,82 +327,206 @@ def import_field_config(odcs_property: Dict[str, Any], server_type=None) -> Dict
|
|
|
235
327
|
config["sqlserverType"] = physical_type
|
|
236
328
|
elif server_type == "databricks":
|
|
237
329
|
config["databricksType"] = physical_type
|
|
330
|
+
elif server_type == "oracle":
|
|
331
|
+
config["oracleType"] = physical_type
|
|
238
332
|
else:
|
|
239
333
|
config["physicalType"] = physical_type
|
|
240
334
|
|
|
335
|
+
if len(config) == 0:
|
|
336
|
+
return None
|
|
337
|
+
|
|
241
338
|
return config
|
|
242
339
|
|
|
243
340
|
|
|
244
|
-
def has_composite_primary_key(odcs_properties) -> bool:
|
|
245
|
-
primary_keys = [prop for prop in odcs_properties if prop.
|
|
341
|
+
def has_composite_primary_key(odcs_properties: List[SchemaProperty]) -> bool:
|
|
342
|
+
primary_keys = [prop for prop in odcs_properties if prop.primaryKey is not None and prop.primaryKey]
|
|
246
343
|
return len(primary_keys) > 1
|
|
247
344
|
|
|
248
345
|
|
|
249
346
|
def import_fields(
|
|
250
|
-
odcs_properties:
|
|
347
|
+
odcs_properties: List[SchemaProperty], custom_type_mappings: Dict[str, str], server_type
|
|
251
348
|
) -> Dict[str, Field]:
|
|
252
|
-
logger = logging.getLogger(__name__)
|
|
253
349
|
result = {}
|
|
254
350
|
|
|
255
351
|
if odcs_properties is None:
|
|
256
352
|
return result
|
|
257
353
|
|
|
258
354
|
for odcs_property in odcs_properties:
|
|
259
|
-
|
|
260
|
-
if
|
|
261
|
-
|
|
262
|
-
description = odcs_property.get("description") if odcs_property.get("description") is not None else None
|
|
263
|
-
field = Field(
|
|
264
|
-
description=" ".join(description.splitlines()) if description is not None else None,
|
|
265
|
-
type=mapped_type,
|
|
266
|
-
title=odcs_property.get("businessName"),
|
|
267
|
-
required=not odcs_property.get("nullable") if odcs_property.get("nullable") is not None else False,
|
|
268
|
-
primaryKey=odcs_property.get("primaryKey")
|
|
269
|
-
if not has_composite_primary_key(odcs_properties) and odcs_property.get("primaryKey") is not None
|
|
270
|
-
else False,
|
|
271
|
-
unique=odcs_property.get("unique"),
|
|
272
|
-
examples=odcs_property.get("examples") if odcs_property.get("examples") is not None else None,
|
|
273
|
-
classification=odcs_property.get("classification")
|
|
274
|
-
if odcs_property.get("classification") is not None
|
|
275
|
-
else "",
|
|
276
|
-
tags=odcs_property.get("tags") if odcs_property.get("tags") is not None else None,
|
|
277
|
-
quality=odcs_property.get("quality") if odcs_property.get("quality") is not None else [],
|
|
278
|
-
config=import_field_config(odcs_property, server_type),
|
|
279
|
-
)
|
|
280
|
-
result[property_name] = field
|
|
281
|
-
else:
|
|
282
|
-
logger.info(
|
|
283
|
-
f"Can't map {odcs_property.get('column')} to the Datacontract Mapping types, as there is no equivalent or special mapping. Consider introducing a customProperty 'dc_mapping_{odcs_property.get('logicalName')}' that defines your expected type as the 'value'"
|
|
284
|
-
)
|
|
355
|
+
field = import_field(odcs_property, odcs_properties, custom_type_mappings, server_type)
|
|
356
|
+
if field is not None:
|
|
357
|
+
result[odcs_property.name] = field
|
|
285
358
|
|
|
286
359
|
return result
|
|
287
360
|
|
|
288
361
|
|
|
289
|
-
def
|
|
290
|
-
|
|
362
|
+
def import_field(
|
|
363
|
+
odcs_property: SchemaProperty,
|
|
364
|
+
odcs_properties: List[SchemaProperty],
|
|
365
|
+
custom_type_mappings: Dict[str, str],
|
|
366
|
+
server_type: str,
|
|
367
|
+
) -> Field | None:
|
|
368
|
+
"""
|
|
369
|
+
Import a single ODCS property as a datacontract Field.
|
|
370
|
+
Returns None if the property cannot be mapped.
|
|
371
|
+
"""
|
|
372
|
+
logger = logging.getLogger(__name__)
|
|
373
|
+
|
|
374
|
+
mapped_type = map_type(odcs_property.logicalType, custom_type_mappings, odcs_property.physicalType)
|
|
375
|
+
|
|
376
|
+
if mapped_type is None:
|
|
377
|
+
type_info = f"logicalType={odcs_property.logicalType}, physicalType={odcs_property.physicalType}"
|
|
378
|
+
logger.warning(
|
|
379
|
+
f"Can't map field '{odcs_property.name}' ({type_info}) to the datacontract mapping types. "
|
|
380
|
+
f"Both logicalType and physicalType are missing or unmappable. "
|
|
381
|
+
f"Consider introducing a customProperty 'dc_mapping_<type>' that defines your expected type as the 'value'"
|
|
382
|
+
)
|
|
291
383
|
return None
|
|
292
|
-
|
|
293
|
-
if
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
384
|
+
|
|
385
|
+
description = odcs_property.description if odcs_property.description is not None else None
|
|
386
|
+
field = Field(
|
|
387
|
+
description=" ".join(description.splitlines()) if description is not None else None,
|
|
388
|
+
type=mapped_type,
|
|
389
|
+
title=odcs_property.businessName,
|
|
390
|
+
required=odcs_property.required if odcs_property.required is not None else None,
|
|
391
|
+
primaryKey=to_primary_key(odcs_property, odcs_properties),
|
|
392
|
+
unique=odcs_property.unique if odcs_property.unique else None,
|
|
393
|
+
examples=odcs_property.examples if odcs_property.examples is not None else None,
|
|
394
|
+
classification=odcs_property.classification if odcs_property.classification is not None else None,
|
|
395
|
+
tags=odcs_property.tags if odcs_property.tags is not None else None,
|
|
396
|
+
quality=convert_quality_list(odcs_property.quality),
|
|
397
|
+
fields=import_fields(odcs_property.properties, custom_type_mappings, server_type)
|
|
398
|
+
if odcs_property.properties is not None
|
|
399
|
+
else {},
|
|
400
|
+
config=import_field_config(odcs_property, server_type),
|
|
401
|
+
format=getattr(odcs_property, "format", None),
|
|
402
|
+
)
|
|
403
|
+
|
|
404
|
+
# mapped_type is array
|
|
405
|
+
if field.type == "array" and odcs_property.items is not None:
|
|
406
|
+
field.items = import_field(odcs_property.items, [], custom_type_mappings, server_type)
|
|
407
|
+
|
|
408
|
+
# enum from quality validValues as enum
|
|
409
|
+
if field.type == "string":
|
|
410
|
+
for q in field.quality:
|
|
411
|
+
if hasattr(q, "validValues"):
|
|
412
|
+
field.enum = q.validValues
|
|
413
|
+
|
|
414
|
+
return field
|
|
415
|
+
|
|
416
|
+
|
|
417
|
+
def to_primary_key(odcs_property: SchemaProperty, odcs_properties: list[SchemaProperty]) -> bool | None:
|
|
418
|
+
if odcs_property.primaryKey is None:
|
|
298
419
|
return None
|
|
420
|
+
if has_composite_primary_key(odcs_properties):
|
|
421
|
+
return None
|
|
422
|
+
return odcs_property.primaryKey
|
|
423
|
+
|
|
424
|
+
|
|
425
|
+
def map_type(odcs_logical_type: str, custom_mappings: Dict[str, str], physical_type: str = None) -> str | None:
|
|
426
|
+
# Try to map logicalType first
|
|
427
|
+
if odcs_logical_type is not None:
|
|
428
|
+
t = odcs_logical_type.lower()
|
|
429
|
+
if t in DATACONTRACT_TYPES:
|
|
430
|
+
return t
|
|
431
|
+
elif custom_mappings.get(t) is not None:
|
|
432
|
+
return custom_mappings.get(t)
|
|
433
|
+
|
|
434
|
+
# Fallback to physicalType if logicalType is not mapped
|
|
435
|
+
if physical_type is not None:
|
|
436
|
+
pt = physical_type.lower()
|
|
437
|
+
# Remove parameters from physical type (e.g., VARCHAR(50) -> varchar, DECIMAL(10,2) -> decimal)
|
|
438
|
+
pt_base = pt.split("(")[0].strip()
|
|
439
|
+
|
|
440
|
+
# Try direct mapping of physical type
|
|
441
|
+
if pt in DATACONTRACT_TYPES:
|
|
442
|
+
return pt
|
|
443
|
+
elif pt_base in DATACONTRACT_TYPES:
|
|
444
|
+
return pt_base
|
|
445
|
+
elif custom_mappings.get(pt) is not None:
|
|
446
|
+
return custom_mappings.get(pt)
|
|
447
|
+
elif custom_mappings.get(pt_base) is not None:
|
|
448
|
+
return custom_mappings.get(pt_base)
|
|
449
|
+
# Common physical type mappings
|
|
450
|
+
elif pt_base in ["varchar", "char", "nvarchar", "nchar", "text", "ntext", "string", "character varying"]:
|
|
451
|
+
return "string"
|
|
452
|
+
elif pt_base in ["int", "integer", "smallint", "tinyint", "mediumint", "int2", "int4", "int8"]:
|
|
453
|
+
return "int"
|
|
454
|
+
elif pt_base in ["bigint", "long", "int64"]:
|
|
455
|
+
return "long"
|
|
456
|
+
elif pt_base in ["float", "real", "float4", "float8"]:
|
|
457
|
+
return "float"
|
|
458
|
+
elif pt_base in ["double", "double precision"]:
|
|
459
|
+
return "double"
|
|
460
|
+
elif pt_base in ["decimal", "numeric", "number"]:
|
|
461
|
+
return "decimal"
|
|
462
|
+
elif pt_base in ["boolean", "bool", "bit"]:
|
|
463
|
+
return "boolean"
|
|
464
|
+
elif pt_base in ["timestamp", "datetime", "datetime2", "timestamptz", "timestamp with time zone"]:
|
|
465
|
+
return "timestamp"
|
|
466
|
+
elif pt_base in ["date"]:
|
|
467
|
+
return "date"
|
|
468
|
+
elif pt_base in ["time"]:
|
|
469
|
+
return "time"
|
|
470
|
+
elif pt_base in ["json", "jsonb"]:
|
|
471
|
+
return "json"
|
|
472
|
+
elif pt_base in ["array"]:
|
|
473
|
+
return "array"
|
|
474
|
+
elif pt_base in ["object", "struct", "record"]:
|
|
475
|
+
return "object"
|
|
476
|
+
elif pt_base in ["bytes", "binary", "varbinary", "blob", "bytea"]:
|
|
477
|
+
return "bytes"
|
|
478
|
+
else:
|
|
479
|
+
return None
|
|
480
|
+
return None
|
|
299
481
|
|
|
300
482
|
|
|
301
|
-
def get_custom_type_mappings(odcs_custom_properties: List[
|
|
483
|
+
def get_custom_type_mappings(odcs_custom_properties: List[CustomProperty]) -> Dict[str, str]:
|
|
302
484
|
result = {}
|
|
303
485
|
if odcs_custom_properties is not None:
|
|
304
486
|
for prop in odcs_custom_properties:
|
|
305
|
-
if prop
|
|
306
|
-
odcs_type_name = prop[
|
|
307
|
-
datacontract_type = prop
|
|
487
|
+
if prop.property.startswith("dc_mapping_"):
|
|
488
|
+
odcs_type_name = prop.property[11:] # Changed substring to slice
|
|
489
|
+
datacontract_type = prop.value
|
|
308
490
|
result[odcs_type_name] = datacontract_type
|
|
309
491
|
|
|
310
492
|
return result
|
|
311
493
|
|
|
312
494
|
|
|
313
|
-
def
|
|
314
|
-
if
|
|
495
|
+
def get_owner(odcs_custom_properties: List[CustomProperty]) -> str | None:
|
|
496
|
+
if odcs_custom_properties is not None:
|
|
497
|
+
for prop in odcs_custom_properties:
|
|
498
|
+
if prop.property == "owner":
|
|
499
|
+
return prop.value
|
|
500
|
+
|
|
501
|
+
return None
|
|
502
|
+
|
|
503
|
+
|
|
504
|
+
def import_tags(odcs: OpenDataContractStandard) -> List[str] | None:
|
|
505
|
+
if odcs.tags is None:
|
|
315
506
|
return None
|
|
316
|
-
return
|
|
507
|
+
return odcs.tags
|
|
508
|
+
|
|
509
|
+
|
|
510
|
+
def to_azure_storage_account(location: str) -> str | None:
|
|
511
|
+
"""
|
|
512
|
+
Converts a storage location string to extract the storage account name.
|
|
513
|
+
ODCS v3.0 has no explicit field for the storage account. It uses the location field, which is a URI.
|
|
514
|
+
|
|
515
|
+
This function parses a storage location string to identify and return the
|
|
516
|
+
storage account name. It handles two primary patterns:
|
|
517
|
+
1. Protocol://containerName@storageAccountName
|
|
518
|
+
2. Protocol://storageAccountName
|
|
519
|
+
|
|
520
|
+
:param location: The storage location string to parse, typically following
|
|
521
|
+
the format protocol://containerName@storageAccountName. or
|
|
522
|
+
protocol://storageAccountName.
|
|
523
|
+
:return: The extracted storage account name if found, otherwise None
|
|
524
|
+
"""
|
|
525
|
+
# to catch protocol://containerName@storageAccountName. pattern from location
|
|
526
|
+
match = re.search(r"(?<=@)([^.]*)", location, re.IGNORECASE)
|
|
527
|
+
if match:
|
|
528
|
+
return match.group()
|
|
529
|
+
else:
|
|
530
|
+
# to catch protocol://storageAccountName. pattern from location
|
|
531
|
+
match = re.search(r"(?<=//)(?!@)([^.]*)", location, re.IGNORECASE)
|
|
532
|
+
return match.group() if match else None
|