datacontract-cli 0.10.23__py3-none-any.whl → 0.10.37__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datacontract/__init__.py +13 -0
- datacontract/api.py +12 -5
- datacontract/catalog/catalog.py +5 -3
- datacontract/cli.py +116 -10
- datacontract/data_contract.py +143 -65
- datacontract/engines/data_contract_checks.py +366 -60
- datacontract/engines/data_contract_test.py +50 -4
- datacontract/engines/fastjsonschema/check_jsonschema.py +37 -19
- datacontract/engines/fastjsonschema/s3/s3_read_files.py +3 -2
- datacontract/engines/soda/check_soda_execute.py +22 -3
- datacontract/engines/soda/connections/athena.py +79 -0
- datacontract/engines/soda/connections/duckdb_connection.py +65 -6
- datacontract/engines/soda/connections/kafka.py +4 -2
- datacontract/export/avro_converter.py +20 -3
- datacontract/export/bigquery_converter.py +1 -1
- datacontract/export/dbt_converter.py +36 -7
- datacontract/export/dqx_converter.py +126 -0
- datacontract/export/duckdb_type_converter.py +57 -0
- datacontract/export/excel_exporter.py +923 -0
- datacontract/export/exporter.py +3 -0
- datacontract/export/exporter_factory.py +17 -1
- datacontract/export/great_expectations_converter.py +55 -5
- datacontract/export/{html_export.py → html_exporter.py} +31 -20
- datacontract/export/markdown_converter.py +134 -5
- datacontract/export/mermaid_exporter.py +110 -0
- datacontract/export/odcs_v3_exporter.py +187 -145
- datacontract/export/protobuf_converter.py +163 -69
- datacontract/export/rdf_converter.py +2 -2
- datacontract/export/sodacl_converter.py +9 -1
- datacontract/export/spark_converter.py +31 -4
- datacontract/export/sql_converter.py +6 -2
- datacontract/export/sql_type_converter.py +20 -8
- datacontract/imports/avro_importer.py +63 -12
- datacontract/imports/csv_importer.py +111 -57
- datacontract/imports/excel_importer.py +1111 -0
- datacontract/imports/importer.py +16 -3
- datacontract/imports/importer_factory.py +17 -0
- datacontract/imports/json_importer.py +325 -0
- datacontract/imports/odcs_importer.py +2 -2
- datacontract/imports/odcs_v3_importer.py +351 -151
- datacontract/imports/protobuf_importer.py +264 -0
- datacontract/imports/spark_importer.py +117 -13
- datacontract/imports/sql_importer.py +32 -16
- datacontract/imports/unity_importer.py +84 -38
- datacontract/init/init_template.py +1 -1
- datacontract/integration/datamesh_manager.py +16 -2
- datacontract/lint/resolve.py +112 -23
- datacontract/lint/schema.py +24 -15
- datacontract/model/data_contract_specification/__init__.py +1 -0
- datacontract/model/odcs.py +13 -0
- datacontract/model/run.py +3 -0
- datacontract/output/junit_test_results.py +3 -3
- datacontract/schemas/datacontract-1.1.0.init.yaml +1 -1
- datacontract/schemas/datacontract-1.2.0.init.yaml +91 -0
- datacontract/schemas/datacontract-1.2.0.schema.json +2029 -0
- datacontract/schemas/datacontract-1.2.1.init.yaml +91 -0
- datacontract/schemas/datacontract-1.2.1.schema.json +2058 -0
- datacontract/schemas/odcs-3.0.2.schema.json +2382 -0
- datacontract/templates/datacontract.html +54 -3
- datacontract/templates/datacontract_odcs.html +685 -0
- datacontract/templates/index.html +5 -2
- datacontract/templates/partials/server.html +2 -0
- datacontract/templates/style/output.css +319 -145
- {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.37.dist-info}/METADATA +656 -431
- datacontract_cli-0.10.37.dist-info/RECORD +119 -0
- {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.37.dist-info}/WHEEL +1 -1
- {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.37.dist-info/licenses}/LICENSE +1 -1
- datacontract/export/csv_type_converter.py +0 -36
- datacontract/lint/lint.py +0 -142
- datacontract/lint/linters/description_linter.py +0 -35
- datacontract/lint/linters/field_pattern_linter.py +0 -34
- datacontract/lint/linters/field_reference_linter.py +0 -48
- datacontract/lint/linters/notice_period_linter.py +0 -55
- datacontract/lint/linters/quality_schema_linter.py +0 -52
- datacontract/lint/linters/valid_constraints_linter.py +0 -100
- datacontract/model/data_contract_specification.py +0 -327
- datacontract_cli-0.10.23.dist-info/RECORD +0 -113
- /datacontract/{lint/linters → output}/__init__.py +0 -0
- {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.37.dist-info}/entry_points.txt +0 -0
- {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.37.dist-info}/top_level.txt +0 -0
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
import datetime
|
|
2
2
|
import logging
|
|
3
|
+
import re
|
|
3
4
|
from typing import Any, Dict, List
|
|
4
5
|
from venv import logger
|
|
5
6
|
|
|
6
|
-
import
|
|
7
|
+
from datacontract_specification.model import Quality
|
|
8
|
+
from open_data_contract_standard.model import CustomProperty, OpenDataContractStandard, SchemaProperty
|
|
7
9
|
|
|
8
10
|
from datacontract.imports.importer import Importer
|
|
9
11
|
from datacontract.lint.resources import read_resource
|
|
@@ -14,9 +16,9 @@ from datacontract.model.data_contract_specification import (
|
|
|
14
16
|
Field,
|
|
15
17
|
Info,
|
|
16
18
|
Model,
|
|
17
|
-
Quality,
|
|
18
19
|
Retention,
|
|
19
20
|
Server,
|
|
21
|
+
ServerRole,
|
|
20
22
|
ServiceLevel,
|
|
21
23
|
Terms,
|
|
22
24
|
)
|
|
@@ -27,19 +29,20 @@ class OdcsImporter(Importer):
|
|
|
27
29
|
def import_source(
|
|
28
30
|
self, data_contract_specification: DataContractSpecification, source: str, import_args: dict
|
|
29
31
|
) -> DataContractSpecification:
|
|
30
|
-
return
|
|
32
|
+
return import_odcs_v3_as_dcs(data_contract_specification, source)
|
|
31
33
|
|
|
32
34
|
|
|
33
|
-
def
|
|
35
|
+
def import_odcs_v3_as_dcs(
|
|
36
|
+
data_contract_specification: DataContractSpecification, source: str
|
|
37
|
+
) -> DataContractSpecification:
|
|
34
38
|
source_str = read_resource(source)
|
|
35
|
-
|
|
39
|
+
odcs = parse_odcs_v3_from_str(source_str)
|
|
40
|
+
return import_from_odcs(data_contract_specification, odcs)
|
|
36
41
|
|
|
37
42
|
|
|
38
|
-
def
|
|
39
|
-
data_contract_specification: DataContractSpecification, source_str: str
|
|
40
|
-
) -> DataContractSpecification:
|
|
43
|
+
def parse_odcs_v3_from_str(source_str):
|
|
41
44
|
try:
|
|
42
|
-
|
|
45
|
+
odcs = OpenDataContractStandard.from_string(source_str)
|
|
43
46
|
except Exception as e:
|
|
44
47
|
raise DataContractException(
|
|
45
48
|
type="schema",
|
|
@@ -48,130 +51,144 @@ def import_odcs_v3_from_str(
|
|
|
48
51
|
engine="datacontract",
|
|
49
52
|
original_exception=e,
|
|
50
53
|
)
|
|
54
|
+
return odcs
|
|
51
55
|
|
|
52
|
-
data_contract_specification.id = odcs_contract["id"]
|
|
53
|
-
data_contract_specification.info = import_info(odcs_contract)
|
|
54
|
-
data_contract_specification.servers = import_servers(odcs_contract)
|
|
55
|
-
data_contract_specification.terms = import_terms(odcs_contract)
|
|
56
|
-
data_contract_specification.servicelevels = import_servicelevels(odcs_contract)
|
|
57
|
-
data_contract_specification.models = import_models(odcs_contract)
|
|
58
|
-
data_contract_specification.tags = import_tags(odcs_contract)
|
|
59
56
|
|
|
57
|
+
def import_from_odcs(data_contract_specification: DataContractSpecification, odcs: OpenDataContractStandard):
|
|
58
|
+
data_contract_specification.id = odcs.id
|
|
59
|
+
data_contract_specification.info = import_info(odcs)
|
|
60
|
+
data_contract_specification.servers = import_servers(odcs)
|
|
61
|
+
data_contract_specification.terms = import_terms(odcs)
|
|
62
|
+
data_contract_specification.servicelevels = import_servicelevels(odcs)
|
|
63
|
+
data_contract_specification.models = import_models(odcs)
|
|
64
|
+
data_contract_specification.tags = import_tags(odcs)
|
|
60
65
|
return data_contract_specification
|
|
61
66
|
|
|
62
67
|
|
|
63
|
-
def import_info(
|
|
68
|
+
def import_info(odcs: Any) -> Info:
|
|
64
69
|
info = Info()
|
|
65
70
|
|
|
66
|
-
info.title =
|
|
71
|
+
info.title = odcs.name if odcs.name is not None else ""
|
|
67
72
|
|
|
68
|
-
if
|
|
69
|
-
info.version =
|
|
73
|
+
if odcs.version is not None:
|
|
74
|
+
info.version = odcs.version
|
|
70
75
|
|
|
71
76
|
# odcs.description.purpose => datacontract.description
|
|
72
|
-
if
|
|
73
|
-
info.description =
|
|
77
|
+
if odcs.description is not None and odcs.description.purpose is not None:
|
|
78
|
+
info.description = odcs.description.purpose
|
|
74
79
|
|
|
75
80
|
# odcs.domain => datacontract.owner
|
|
76
|
-
|
|
77
|
-
|
|
81
|
+
owner = get_owner(odcs.customProperties)
|
|
82
|
+
if owner is not None:
|
|
83
|
+
info.owner = owner
|
|
78
84
|
|
|
79
85
|
# add dataProduct as custom property
|
|
80
|
-
if
|
|
81
|
-
info.dataProduct =
|
|
86
|
+
if odcs.dataProduct is not None:
|
|
87
|
+
info.dataProduct = odcs.dataProduct
|
|
82
88
|
|
|
83
89
|
# add tenant as custom property
|
|
84
|
-
if
|
|
85
|
-
info.tenant =
|
|
90
|
+
if odcs.tenant is not None:
|
|
91
|
+
info.tenant = odcs.tenant
|
|
86
92
|
|
|
87
93
|
return info
|
|
88
94
|
|
|
89
95
|
|
|
90
|
-
def
|
|
91
|
-
if
|
|
96
|
+
def import_server_roles(roles: List[Dict]) -> List[ServerRole] | None:
|
|
97
|
+
if roles is None:
|
|
98
|
+
return None
|
|
99
|
+
result = []
|
|
100
|
+
for role in roles:
|
|
101
|
+
server_role = ServerRole()
|
|
102
|
+
server_role.name = role.role
|
|
103
|
+
server_role.description = role.description
|
|
104
|
+
result.append(server_role)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def import_servers(odcs: OpenDataContractStandard) -> Dict[str, Server] | None:
|
|
108
|
+
if odcs.servers is None:
|
|
92
109
|
return None
|
|
93
110
|
servers = {}
|
|
94
|
-
for odcs_server in
|
|
95
|
-
server_name = odcs_server.
|
|
111
|
+
for odcs_server in odcs.servers:
|
|
112
|
+
server_name = odcs_server.server
|
|
96
113
|
if server_name is None:
|
|
97
114
|
logger.warning("Server name is missing, skipping server")
|
|
98
115
|
continue
|
|
99
116
|
|
|
100
117
|
server = Server()
|
|
101
|
-
server.type = odcs_server.
|
|
102
|
-
server.description = odcs_server.
|
|
103
|
-
server.environment = odcs_server.
|
|
104
|
-
server.format = odcs_server.
|
|
105
|
-
server.project = odcs_server.
|
|
106
|
-
server.dataset = odcs_server.
|
|
107
|
-
server.path = odcs_server.
|
|
108
|
-
server.delimiter = odcs_server.
|
|
109
|
-
server.endpointUrl = odcs_server.
|
|
110
|
-
server.location = odcs_server.
|
|
111
|
-
server.account = odcs_server.
|
|
112
|
-
server.database = odcs_server.
|
|
113
|
-
server.schema_ = odcs_server.
|
|
114
|
-
server.host = odcs_server.
|
|
115
|
-
server.port = odcs_server.
|
|
116
|
-
server.catalog = odcs_server.
|
|
117
|
-
server.
|
|
118
|
-
server.
|
|
119
|
-
server.
|
|
120
|
-
server.
|
|
121
|
-
server.
|
|
122
|
-
server.
|
|
123
|
-
server.
|
|
118
|
+
server.type = odcs_server.type
|
|
119
|
+
server.description = odcs_server.description
|
|
120
|
+
server.environment = odcs_server.environment
|
|
121
|
+
server.format = odcs_server.format
|
|
122
|
+
server.project = odcs_server.project
|
|
123
|
+
server.dataset = odcs_server.dataset
|
|
124
|
+
server.path = odcs_server.path
|
|
125
|
+
server.delimiter = odcs_server.delimiter
|
|
126
|
+
server.endpointUrl = odcs_server.endpointUrl
|
|
127
|
+
server.location = odcs_server.location
|
|
128
|
+
server.account = odcs_server.account
|
|
129
|
+
server.database = odcs_server.database
|
|
130
|
+
server.schema_ = odcs_server.schema_
|
|
131
|
+
server.host = odcs_server.host
|
|
132
|
+
server.port = odcs_server.port
|
|
133
|
+
server.catalog = odcs_server.catalog
|
|
134
|
+
server.stagingDir = odcs_server.stagingDir
|
|
135
|
+
server.topic = getattr(odcs_server, "topic", None)
|
|
136
|
+
server.http_path = getattr(odcs_server, "http_path", None)
|
|
137
|
+
server.token = getattr(odcs_server, "token", None)
|
|
138
|
+
server.driver = getattr(odcs_server, "driver", None)
|
|
139
|
+
server.roles = import_server_roles(odcs_server.roles)
|
|
140
|
+
server.storageAccount = (
|
|
141
|
+
to_azure_storage_account(odcs_server.location)
|
|
142
|
+
if server.type == "azure" and "://" in server.location
|
|
143
|
+
else None
|
|
144
|
+
)
|
|
124
145
|
|
|
125
146
|
servers[server_name] = server
|
|
126
147
|
return servers
|
|
127
148
|
|
|
128
149
|
|
|
129
|
-
def import_terms(
|
|
130
|
-
if
|
|
150
|
+
def import_terms(odcs: Any) -> Terms | None:
|
|
151
|
+
if odcs.description is None:
|
|
131
152
|
return None
|
|
132
|
-
if
|
|
133
|
-
odcs_contract.get("description").get("usage") is not None
|
|
134
|
-
or odcs_contract.get("description").get("limitations") is not None
|
|
135
|
-
or odcs_contract.get("price") is not None
|
|
136
|
-
):
|
|
153
|
+
if odcs.description.usage is not None or odcs.description.limitations is not None or odcs.price is not None:
|
|
137
154
|
terms = Terms()
|
|
138
|
-
if
|
|
139
|
-
terms.usage =
|
|
140
|
-
if
|
|
141
|
-
terms.limitations =
|
|
142
|
-
if
|
|
143
|
-
terms.billing = f"{
|
|
155
|
+
if odcs.description.usage is not None:
|
|
156
|
+
terms.usage = odcs.description.usage
|
|
157
|
+
if odcs.description.limitations is not None:
|
|
158
|
+
terms.limitations = odcs.description.limitations
|
|
159
|
+
if odcs.price is not None:
|
|
160
|
+
terms.billing = f"{odcs.price.priceAmount} {odcs.price.priceCurrency} / {odcs.price.priceUnit}"
|
|
144
161
|
|
|
145
162
|
return terms
|
|
146
163
|
else:
|
|
147
164
|
return None
|
|
148
165
|
|
|
149
166
|
|
|
150
|
-
def import_servicelevels(
|
|
167
|
+
def import_servicelevels(odcs: Any) -> ServiceLevel:
|
|
151
168
|
# find the two properties we can map (based on the examples)
|
|
152
|
-
sla_properties =
|
|
153
|
-
availability = next((p for p in sla_properties if p
|
|
154
|
-
retention = next((p for p in sla_properties if p
|
|
169
|
+
sla_properties = odcs.slaProperties if odcs.slaProperties is not None else []
|
|
170
|
+
availability = next((p for p in sla_properties if p.property == "generalAvailability"), None)
|
|
171
|
+
retention = next((p for p in sla_properties if p.property == "retention"), None)
|
|
155
172
|
|
|
156
173
|
if availability is not None or retention is not None:
|
|
157
174
|
servicelevel = ServiceLevel()
|
|
158
175
|
|
|
159
176
|
if availability is not None:
|
|
160
|
-
value = availability.
|
|
177
|
+
value = availability.value
|
|
161
178
|
if isinstance(value, datetime.datetime):
|
|
162
179
|
value = value.isoformat()
|
|
163
180
|
servicelevel.availability = Availability(description=value)
|
|
164
181
|
|
|
165
182
|
if retention is not None:
|
|
166
|
-
servicelevel.retention = Retention(period=f"{retention.
|
|
183
|
+
servicelevel.retention = Retention(period=f"{retention.value}{retention.unit}")
|
|
167
184
|
|
|
168
185
|
return servicelevel
|
|
169
186
|
else:
|
|
170
187
|
return None
|
|
171
188
|
|
|
172
189
|
|
|
173
|
-
def get_server_type(
|
|
174
|
-
servers = import_servers(
|
|
190
|
+
def get_server_type(odcs: OpenDataContractStandard) -> str | None:
|
|
191
|
+
servers = import_servers(odcs)
|
|
175
192
|
if servers is None or len(servers) == 0:
|
|
176
193
|
return None
|
|
177
194
|
# get first server from map
|
|
@@ -179,49 +196,110 @@ def get_server_type(odcs_contract: Dict[str, Any]) -> str | None:
|
|
|
179
196
|
return server.type
|
|
180
197
|
|
|
181
198
|
|
|
182
|
-
def import_models(
|
|
183
|
-
custom_type_mappings = get_custom_type_mappings(
|
|
199
|
+
def import_models(odcs: Any) -> Dict[str, Model]:
|
|
200
|
+
custom_type_mappings = get_custom_type_mappings(odcs.customProperties)
|
|
184
201
|
|
|
185
|
-
odcs_schemas =
|
|
202
|
+
odcs_schemas = odcs.schema_ if odcs.schema_ is not None else []
|
|
186
203
|
result = {}
|
|
187
204
|
|
|
188
205
|
for odcs_schema in odcs_schemas:
|
|
189
|
-
schema_name = odcs_schema.
|
|
190
|
-
schema_physical_name = odcs_schema.
|
|
191
|
-
schema_description = odcs_schema.
|
|
206
|
+
schema_name = odcs_schema.name
|
|
207
|
+
schema_physical_name = odcs_schema.physicalName
|
|
208
|
+
schema_description = odcs_schema.description if odcs_schema.description is not None else ""
|
|
192
209
|
model_name = schema_physical_name if schema_physical_name is not None else schema_name
|
|
193
|
-
model = Model(
|
|
194
|
-
|
|
195
|
-
|
|
210
|
+
model = Model(
|
|
211
|
+
description=" ".join(schema_description.splitlines()) if schema_description else "",
|
|
212
|
+
type="table",
|
|
213
|
+
tags=odcs_schema.tags if odcs_schema.tags is not None else None,
|
|
196
214
|
)
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
model.quality = [Quality.model_validate(q) for q in odcs_schema.get("quality")]
|
|
215
|
+
model.fields = import_fields(odcs_schema.properties, custom_type_mappings, server_type=get_server_type(odcs))
|
|
216
|
+
if odcs_schema.quality is not None:
|
|
217
|
+
model.quality = convert_quality_list(odcs_schema.quality)
|
|
201
218
|
model.title = schema_name
|
|
202
|
-
if odcs_schema.
|
|
203
|
-
model.config = {"dataGranularityDescription": odcs_schema.
|
|
219
|
+
if odcs_schema.dataGranularityDescription is not None:
|
|
220
|
+
model.config = {"dataGranularityDescription": odcs_schema.dataGranularityDescription}
|
|
204
221
|
result[model_name] = model
|
|
205
222
|
|
|
206
223
|
return result
|
|
207
224
|
|
|
208
225
|
|
|
209
|
-
def
|
|
226
|
+
def convert_quality_list(odcs_quality_list):
|
|
227
|
+
"""Convert a list of ODCS DataQuality objects to datacontract Quality objects"""
|
|
228
|
+
quality_list = []
|
|
229
|
+
|
|
230
|
+
if odcs_quality_list is not None:
|
|
231
|
+
for odcs_quality in odcs_quality_list:
|
|
232
|
+
quality = Quality(type=odcs_quality.type)
|
|
233
|
+
|
|
234
|
+
if odcs_quality.description is not None:
|
|
235
|
+
quality.description = odcs_quality.description
|
|
236
|
+
if odcs_quality.query is not None:
|
|
237
|
+
quality.query = odcs_quality.query
|
|
238
|
+
if odcs_quality.rule is not None:
|
|
239
|
+
quality.metric = odcs_quality.rule
|
|
240
|
+
if odcs_quality.mustBe is not None:
|
|
241
|
+
quality.mustBe = odcs_quality.mustBe
|
|
242
|
+
if odcs_quality.mustNotBe is not None:
|
|
243
|
+
quality.mustNotBe = odcs_quality.mustNotBe
|
|
244
|
+
if odcs_quality.mustBeGreaterThan is not None:
|
|
245
|
+
quality.mustBeGreaterThan = odcs_quality.mustBeGreaterThan
|
|
246
|
+
if odcs_quality.mustBeGreaterOrEqualTo is not None:
|
|
247
|
+
quality.mustBeGreaterOrEqualTo = odcs_quality.mustBeGreaterOrEqualTo
|
|
248
|
+
if odcs_quality.mustBeLessThan is not None:
|
|
249
|
+
quality.mustBeLessThan = odcs_quality.mustBeLessThan
|
|
250
|
+
if odcs_quality.mustBeLessOrEqualTo is not None:
|
|
251
|
+
quality.mustBeLessOrEqualTo = odcs_quality.mustBeLessOrEqualTo
|
|
252
|
+
if odcs_quality.mustBeBetween is not None:
|
|
253
|
+
quality.mustBeBetween = odcs_quality.mustBeBetween
|
|
254
|
+
if odcs_quality.mustNotBeBetween is not None:
|
|
255
|
+
quality.mustNotBeBetween = odcs_quality.mustNotBeBetween
|
|
256
|
+
if odcs_quality.engine is not None:
|
|
257
|
+
quality.engine = odcs_quality.engine
|
|
258
|
+
if odcs_quality.implementation is not None:
|
|
259
|
+
quality.implementation = odcs_quality.implementation
|
|
260
|
+
if odcs_quality.businessImpact is not None:
|
|
261
|
+
quality.model_extra["businessImpact"] = odcs_quality.businessImpact
|
|
262
|
+
if odcs_quality.dimension is not None:
|
|
263
|
+
quality.model_extra["dimension"] = odcs_quality.dimension
|
|
264
|
+
if odcs_quality.schedule is not None:
|
|
265
|
+
quality.model_extra["schedule"] = odcs_quality.schedule
|
|
266
|
+
if odcs_quality.scheduler is not None:
|
|
267
|
+
quality.model_extra["scheduler"] = odcs_quality.scheduler
|
|
268
|
+
if odcs_quality.severity is not None:
|
|
269
|
+
quality.model_extra["severity"] = odcs_quality.severity
|
|
270
|
+
if odcs_quality.method is not None:
|
|
271
|
+
quality.model_extra["method"] = odcs_quality.method
|
|
272
|
+
if odcs_quality.customProperties is not None:
|
|
273
|
+
quality.model_extra["customProperties"] = []
|
|
274
|
+
for item in odcs_quality.customProperties:
|
|
275
|
+
quality.model_extra["customProperties"].append(
|
|
276
|
+
{
|
|
277
|
+
"property": item.property,
|
|
278
|
+
"value": item.value,
|
|
279
|
+
}
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
quality_list.append(quality)
|
|
283
|
+
|
|
284
|
+
return quality_list
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
def import_field_config(odcs_property: SchemaProperty, server_type=None) -> dict[Any, Any] | None:
|
|
210
288
|
config = {}
|
|
211
|
-
if odcs_property.
|
|
212
|
-
config["criticalDataElement"] = odcs_property.
|
|
213
|
-
if odcs_property.
|
|
214
|
-
config["encryptedName"] = odcs_property.
|
|
215
|
-
if odcs_property.
|
|
216
|
-
config["partitionKeyPosition"] = odcs_property.
|
|
217
|
-
if odcs_property.
|
|
218
|
-
config["partitioned"] = odcs_property.
|
|
219
|
-
|
|
220
|
-
if odcs_property.
|
|
221
|
-
for item in odcs_property.
|
|
222
|
-
config[item
|
|
223
|
-
|
|
224
|
-
physical_type = odcs_property.
|
|
289
|
+
if odcs_property.criticalDataElement is not None:
|
|
290
|
+
config["criticalDataElement"] = odcs_property.criticalDataElement
|
|
291
|
+
if odcs_property.encryptedName is not None:
|
|
292
|
+
config["encryptedName"] = odcs_property.encryptedName
|
|
293
|
+
if odcs_property.partitionKeyPosition is not None:
|
|
294
|
+
config["partitionKeyPosition"] = odcs_property.partitionKeyPosition
|
|
295
|
+
if odcs_property.partitioned is not None:
|
|
296
|
+
config["partitioned"] = odcs_property.partitioned
|
|
297
|
+
|
|
298
|
+
if odcs_property.customProperties is not None:
|
|
299
|
+
for item in odcs_property.customProperties:
|
|
300
|
+
config[item.property] = item.value
|
|
301
|
+
|
|
302
|
+
physical_type = odcs_property.physicalType
|
|
225
303
|
if physical_type is not None:
|
|
226
304
|
if server_type == "postgres" or server_type == "postgresql":
|
|
227
305
|
config["postgresType"] = physical_type
|
|
@@ -238,79 +316,201 @@ def import_field_config(odcs_property: Dict[str, Any], server_type=None) -> Dict
|
|
|
238
316
|
else:
|
|
239
317
|
config["physicalType"] = physical_type
|
|
240
318
|
|
|
319
|
+
if len(config) == 0:
|
|
320
|
+
return None
|
|
321
|
+
|
|
241
322
|
return config
|
|
242
323
|
|
|
243
324
|
|
|
244
|
-
def has_composite_primary_key(odcs_properties) -> bool:
|
|
245
|
-
primary_keys = [prop for prop in odcs_properties if prop.
|
|
325
|
+
def has_composite_primary_key(odcs_properties: List[SchemaProperty]) -> bool:
|
|
326
|
+
primary_keys = [prop for prop in odcs_properties if prop.primaryKey is not None and prop.primaryKey]
|
|
246
327
|
return len(primary_keys) > 1
|
|
247
328
|
|
|
248
329
|
|
|
249
330
|
def import_fields(
|
|
250
|
-
odcs_properties:
|
|
331
|
+
odcs_properties: List[SchemaProperty], custom_type_mappings: Dict[str, str], server_type
|
|
251
332
|
) -> Dict[str, Field]:
|
|
252
|
-
logger = logging.getLogger(__name__)
|
|
253
333
|
result = {}
|
|
254
334
|
|
|
255
335
|
if odcs_properties is None:
|
|
256
336
|
return result
|
|
257
337
|
|
|
258
338
|
for odcs_property in odcs_properties:
|
|
259
|
-
|
|
260
|
-
if
|
|
261
|
-
|
|
262
|
-
description = odcs_property.get("description") if odcs_property.get("description") is not None else None
|
|
263
|
-
field = Field(
|
|
264
|
-
description=" ".join(description.splitlines()) if description is not None else None,
|
|
265
|
-
type=mapped_type,
|
|
266
|
-
title=odcs_property.get("businessName"),
|
|
267
|
-
required=not odcs_property.get("nullable") if odcs_property.get("nullable") is not None else False,
|
|
268
|
-
primaryKey=odcs_property.get("primaryKey")
|
|
269
|
-
if not has_composite_primary_key(odcs_properties) and odcs_property.get("primaryKey") is not None
|
|
270
|
-
else False,
|
|
271
|
-
unique=odcs_property.get("unique"),
|
|
272
|
-
examples=odcs_property.get("examples") if odcs_property.get("examples") is not None else None,
|
|
273
|
-
classification=odcs_property.get("classification")
|
|
274
|
-
if odcs_property.get("classification") is not None
|
|
275
|
-
else "",
|
|
276
|
-
tags=odcs_property.get("tags") if odcs_property.get("tags") is not None else None,
|
|
277
|
-
quality=odcs_property.get("quality") if odcs_property.get("quality") is not None else [],
|
|
278
|
-
config=import_field_config(odcs_property, server_type),
|
|
279
|
-
)
|
|
280
|
-
result[property_name] = field
|
|
281
|
-
else:
|
|
282
|
-
logger.info(
|
|
283
|
-
f"Can't map {odcs_property.get('column')} to the Datacontract Mapping types, as there is no equivalent or special mapping. Consider introducing a customProperty 'dc_mapping_{odcs_property.get('logicalName')}' that defines your expected type as the 'value'"
|
|
284
|
-
)
|
|
339
|
+
field = import_field(odcs_property, odcs_properties, custom_type_mappings, server_type)
|
|
340
|
+
if field is not None:
|
|
341
|
+
result[odcs_property.name] = field
|
|
285
342
|
|
|
286
343
|
return result
|
|
287
344
|
|
|
288
345
|
|
|
289
|
-
def
|
|
290
|
-
|
|
346
|
+
def import_field(
|
|
347
|
+
odcs_property: SchemaProperty,
|
|
348
|
+
odcs_properties: List[SchemaProperty],
|
|
349
|
+
custom_type_mappings: Dict[str, str],
|
|
350
|
+
server_type: str,
|
|
351
|
+
) -> Field | None:
|
|
352
|
+
"""
|
|
353
|
+
Import a single ODCS property as a datacontract Field.
|
|
354
|
+
Returns None if the property cannot be mapped.
|
|
355
|
+
"""
|
|
356
|
+
logger = logging.getLogger(__name__)
|
|
357
|
+
|
|
358
|
+
mapped_type = map_type(odcs_property.logicalType, custom_type_mappings, odcs_property.physicalType)
|
|
359
|
+
|
|
360
|
+
if mapped_type is None:
|
|
361
|
+
type_info = f"logicalType={odcs_property.logicalType}, physicalType={odcs_property.physicalType}"
|
|
362
|
+
logger.warning(
|
|
363
|
+
f"Can't map field '{odcs_property.name}' ({type_info}) to the datacontract mapping types. "
|
|
364
|
+
f"Both logicalType and physicalType are missing or unmappable. "
|
|
365
|
+
f"Consider introducing a customProperty 'dc_mapping_<type>' that defines your expected type as the 'value'"
|
|
366
|
+
)
|
|
291
367
|
return None
|
|
292
|
-
|
|
293
|
-
if
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
368
|
+
|
|
369
|
+
description = odcs_property.description if odcs_property.description is not None else None
|
|
370
|
+
field = Field(
|
|
371
|
+
description=" ".join(description.splitlines()) if description is not None else None,
|
|
372
|
+
type=mapped_type,
|
|
373
|
+
title=odcs_property.businessName,
|
|
374
|
+
required=odcs_property.required if odcs_property.required is not None else None,
|
|
375
|
+
primaryKey=to_primary_key(odcs_property, odcs_properties),
|
|
376
|
+
unique=odcs_property.unique if odcs_property.unique else None,
|
|
377
|
+
examples=odcs_property.examples if odcs_property.examples is not None else None,
|
|
378
|
+
classification=odcs_property.classification if odcs_property.classification is not None else None,
|
|
379
|
+
tags=odcs_property.tags if odcs_property.tags is not None else None,
|
|
380
|
+
quality=convert_quality_list(odcs_property.quality),
|
|
381
|
+
fields=import_fields(odcs_property.properties, custom_type_mappings, server_type)
|
|
382
|
+
if odcs_property.properties is not None
|
|
383
|
+
else {},
|
|
384
|
+
config=import_field_config(odcs_property, server_type),
|
|
385
|
+
format=getattr(odcs_property, "format", None),
|
|
386
|
+
)
|
|
387
|
+
|
|
388
|
+
# mapped_type is array
|
|
389
|
+
if field.type == "array" and odcs_property.items is not None:
|
|
390
|
+
field.items = import_field(odcs_property.items, [], custom_type_mappings, server_type)
|
|
391
|
+
|
|
392
|
+
# enum from quality validValues as enum
|
|
393
|
+
if field.type == "string":
|
|
394
|
+
for q in field.quality:
|
|
395
|
+
if hasattr(q, "validValues"):
|
|
396
|
+
field.enum = q.validValues
|
|
397
|
+
|
|
398
|
+
return field
|
|
399
|
+
|
|
400
|
+
|
|
401
|
+
def to_primary_key(odcs_property: SchemaProperty, odcs_properties: list[SchemaProperty]) -> bool | None:
|
|
402
|
+
if odcs_property.primaryKey is None:
|
|
298
403
|
return None
|
|
404
|
+
if has_composite_primary_key(odcs_properties):
|
|
405
|
+
return None
|
|
406
|
+
return odcs_property.primaryKey
|
|
407
|
+
|
|
408
|
+
|
|
409
|
+
def map_type(odcs_logical_type: str, custom_mappings: Dict[str, str], physical_type: str = None) -> str | None:
|
|
410
|
+
# Try to map logicalType first
|
|
411
|
+
if odcs_logical_type is not None:
|
|
412
|
+
t = odcs_logical_type.lower()
|
|
413
|
+
if t in DATACONTRACT_TYPES:
|
|
414
|
+
return t
|
|
415
|
+
elif custom_mappings.get(t) is not None:
|
|
416
|
+
return custom_mappings.get(t)
|
|
417
|
+
|
|
418
|
+
# Fallback to physicalType if logicalType is not mapped
|
|
419
|
+
if physical_type is not None:
|
|
420
|
+
pt = physical_type.lower()
|
|
421
|
+
# Remove parameters from physical type (e.g., VARCHAR(50) -> varchar, DECIMAL(10,2) -> decimal)
|
|
422
|
+
pt_base = pt.split("(")[0].strip()
|
|
423
|
+
|
|
424
|
+
# Try direct mapping of physical type
|
|
425
|
+
if pt in DATACONTRACT_TYPES:
|
|
426
|
+
return pt
|
|
427
|
+
elif pt_base in DATACONTRACT_TYPES:
|
|
428
|
+
return pt_base
|
|
429
|
+
elif custom_mappings.get(pt) is not None:
|
|
430
|
+
return custom_mappings.get(pt)
|
|
431
|
+
elif custom_mappings.get(pt_base) is not None:
|
|
432
|
+
return custom_mappings.get(pt_base)
|
|
433
|
+
# Common physical type mappings
|
|
434
|
+
elif pt_base in ["varchar", "char", "nvarchar", "nchar", "text", "ntext", "string", "character varying"]:
|
|
435
|
+
return "string"
|
|
436
|
+
elif pt_base in ["int", "integer", "smallint", "tinyint", "mediumint", "int2", "int4", "int8"]:
|
|
437
|
+
return "int"
|
|
438
|
+
elif pt_base in ["bigint", "long", "int64"]:
|
|
439
|
+
return "long"
|
|
440
|
+
elif pt_base in ["float", "real", "float4", "float8"]:
|
|
441
|
+
return "float"
|
|
442
|
+
elif pt_base in ["double", "double precision"]:
|
|
443
|
+
return "double"
|
|
444
|
+
elif pt_base in ["decimal", "numeric", "number"]:
|
|
445
|
+
return "decimal"
|
|
446
|
+
elif pt_base in ["boolean", "bool", "bit"]:
|
|
447
|
+
return "boolean"
|
|
448
|
+
elif pt_base in ["timestamp", "datetime", "datetime2", "timestamptz", "timestamp with time zone"]:
|
|
449
|
+
return "timestamp"
|
|
450
|
+
elif pt_base in ["date"]:
|
|
451
|
+
return "date"
|
|
452
|
+
elif pt_base in ["time"]:
|
|
453
|
+
return "time"
|
|
454
|
+
elif pt_base in ["json", "jsonb"]:
|
|
455
|
+
return "json"
|
|
456
|
+
elif pt_base in ["array"]:
|
|
457
|
+
return "array"
|
|
458
|
+
elif pt_base in ["object", "struct", "record"]:
|
|
459
|
+
return "object"
|
|
460
|
+
elif pt_base in ["bytes", "binary", "varbinary", "blob", "bytea"]:
|
|
461
|
+
return "bytes"
|
|
462
|
+
else:
|
|
463
|
+
return None
|
|
464
|
+
return None
|
|
299
465
|
|
|
300
466
|
|
|
301
|
-
def get_custom_type_mappings(odcs_custom_properties: List[
|
|
467
|
+
def get_custom_type_mappings(odcs_custom_properties: List[CustomProperty]) -> Dict[str, str]:
|
|
302
468
|
result = {}
|
|
303
469
|
if odcs_custom_properties is not None:
|
|
304
470
|
for prop in odcs_custom_properties:
|
|
305
|
-
if prop
|
|
306
|
-
odcs_type_name = prop[
|
|
307
|
-
datacontract_type = prop
|
|
471
|
+
if prop.property.startswith("dc_mapping_"):
|
|
472
|
+
odcs_type_name = prop.property[11:] # Changed substring to slice
|
|
473
|
+
datacontract_type = prop.value
|
|
308
474
|
result[odcs_type_name] = datacontract_type
|
|
309
475
|
|
|
310
476
|
return result
|
|
311
477
|
|
|
312
478
|
|
|
313
|
-
def
|
|
314
|
-
if
|
|
479
|
+
def get_owner(odcs_custom_properties: List[CustomProperty]) -> str | None:
|
|
480
|
+
if odcs_custom_properties is not None:
|
|
481
|
+
for prop in odcs_custom_properties:
|
|
482
|
+
if prop.property == "owner":
|
|
483
|
+
return prop.value
|
|
484
|
+
|
|
485
|
+
return None
|
|
486
|
+
|
|
487
|
+
|
|
488
|
+
def import_tags(odcs: OpenDataContractStandard) -> List[str] | None:
|
|
489
|
+
if odcs.tags is None:
|
|
315
490
|
return None
|
|
316
|
-
return
|
|
491
|
+
return odcs.tags
|
|
492
|
+
|
|
493
|
+
|
|
494
|
+
def to_azure_storage_account(location: str) -> str | None:
|
|
495
|
+
"""
|
|
496
|
+
Converts a storage location string to extract the storage account name.
|
|
497
|
+
ODCS v3.0 has no explicit field for the storage account. It uses the location field, which is a URI.
|
|
498
|
+
|
|
499
|
+
This function parses a storage location string to identify and return the
|
|
500
|
+
storage account name. It handles two primary patterns:
|
|
501
|
+
1. Protocol://containerName@storageAccountName
|
|
502
|
+
2. Protocol://storageAccountName
|
|
503
|
+
|
|
504
|
+
:param location: The storage location string to parse, typically following
|
|
505
|
+
the format protocol://containerName@storageAccountName. or
|
|
506
|
+
protocol://storageAccountName.
|
|
507
|
+
:return: The extracted storage account name if found, otherwise None
|
|
508
|
+
"""
|
|
509
|
+
# to catch protocol://containerName@storageAccountName. pattern from location
|
|
510
|
+
match = re.search(r"(?<=@)([^.]*)", location, re.IGNORECASE)
|
|
511
|
+
if match:
|
|
512
|
+
return match.group()
|
|
513
|
+
else:
|
|
514
|
+
# to catch protocol://storageAccountName. pattern from location
|
|
515
|
+
match = re.search(r"(?<=//)(?!@)([^.]*)", location, re.IGNORECASE)
|
|
516
|
+
return match.group() if match else None
|