datacontract-cli 0.10.13__py3-none-any.whl → 0.10.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datacontract-cli might be problematic. Click here for more details.
- datacontract/cli.py +5 -0
- datacontract/data_contract.py +9 -1
- datacontract/engines/soda/connections/kafka.py +26 -5
- datacontract/export/avro_converter.py +8 -1
- datacontract/export/avro_idl_converter.py +1 -0
- datacontract/export/dcs_exporter.py +6 -0
- datacontract/export/exporter.py +4 -1
- datacontract/export/exporter_factory.py +13 -1
- datacontract/export/{odcs_converter.py → odcs_v2_exporter.py} +4 -4
- datacontract/export/odcs_v3_exporter.py +294 -0
- datacontract/export/sodacl_converter.py +82 -2
- datacontract/export/spark_converter.py +3 -1
- datacontract/export/sql_type_converter.py +55 -11
- datacontract/imports/iceberg_importer.py +162 -0
- datacontract/imports/importer.py +1 -0
- datacontract/imports/importer_factory.py +5 -0
- datacontract/imports/odcs_importer.py +25 -168
- datacontract/imports/odcs_v2_importer.py +177 -0
- datacontract/imports/odcs_v3_importer.py +309 -0
- datacontract/integration/datamesh_manager.py +1 -1
- datacontract/lint/resolve.py +14 -9
- datacontract/lint/resources.py +21 -0
- datacontract/lint/urls.py +4 -2
- datacontract/model/data_contract_specification.py +72 -8
- datacontract/model/odcs.py +11 -0
- {datacontract_cli-0.10.13.dist-info → datacontract_cli-0.10.14.dist-info}/METADATA +89 -51
- {datacontract_cli-0.10.13.dist-info → datacontract_cli-0.10.14.dist-info}/RECORD +31 -25
- {datacontract_cli-0.10.13.dist-info → datacontract_cli-0.10.14.dist-info}/WHEEL +1 -1
- datacontract/engines/datacontract/check_that_datacontract_str_is_valid.py +0 -48
- {datacontract_cli-0.10.13.dist-info → datacontract_cli-0.10.14.dist-info}/LICENSE +0 -0
- {datacontract_cli-0.10.13.dist-info → datacontract_cli-0.10.14.dist-info}/entry_points.txt +0 -0
- {datacontract_cli-0.10.13.dist-info → datacontract_cli-0.10.14.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,309 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
import logging
|
|
3
|
+
from typing import Any, Dict, List
|
|
4
|
+
from venv import logger
|
|
5
|
+
|
|
6
|
+
import yaml
|
|
7
|
+
|
|
8
|
+
from datacontract.imports.importer import Importer
|
|
9
|
+
from datacontract.lint.resources import read_resource
|
|
10
|
+
from datacontract.model.data_contract_specification import (
|
|
11
|
+
Availability,
|
|
12
|
+
DataContractSpecification,
|
|
13
|
+
Info,
|
|
14
|
+
Model,
|
|
15
|
+
Field,
|
|
16
|
+
Retention,
|
|
17
|
+
Server,
|
|
18
|
+
ServiceLevel,
|
|
19
|
+
Terms,
|
|
20
|
+
DATACONTRACT_TYPES,
|
|
21
|
+
)
|
|
22
|
+
from datacontract.model.exceptions import DataContractException
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class OdcsImporter(Importer):
|
|
26
|
+
def import_source(
|
|
27
|
+
self, data_contract_specification: DataContractSpecification, source: str, import_args: dict
|
|
28
|
+
) -> DataContractSpecification:
|
|
29
|
+
return import_odcs_v3(data_contract_specification, source)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def import_odcs_v3(data_contract_specification: DataContractSpecification, source: str) -> DataContractSpecification:
|
|
33
|
+
source_str = read_resource(source)
|
|
34
|
+
return import_odcs_v3_from_str(data_contract_specification, source_str)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def import_odcs_v3_from_str(
|
|
38
|
+
data_contract_specification: DataContractSpecification, source_str: str
|
|
39
|
+
) -> DataContractSpecification:
|
|
40
|
+
try:
|
|
41
|
+
odcs_contract = yaml.safe_load(source_str)
|
|
42
|
+
except Exception as e:
|
|
43
|
+
raise DataContractException(
|
|
44
|
+
type="schema",
|
|
45
|
+
name="Parse ODCS contract",
|
|
46
|
+
reason=f"Failed to parse odcs contract from {source_str}",
|
|
47
|
+
engine="datacontract",
|
|
48
|
+
original_exception=e,
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
data_contract_specification.id = odcs_contract["id"]
|
|
52
|
+
data_contract_specification.info = import_info(odcs_contract)
|
|
53
|
+
data_contract_specification.servers = import_servers(odcs_contract)
|
|
54
|
+
data_contract_specification.terms = import_terms(odcs_contract)
|
|
55
|
+
data_contract_specification.servicelevels = import_servicelevels(odcs_contract)
|
|
56
|
+
data_contract_specification.models = import_models(odcs_contract)
|
|
57
|
+
data_contract_specification.tags = import_tags(odcs_contract)
|
|
58
|
+
|
|
59
|
+
return data_contract_specification
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def import_info(odcs_contract: Dict[str, Any]) -> Info:
|
|
63
|
+
info = Info()
|
|
64
|
+
|
|
65
|
+
info.title = odcs_contract.get("name") if odcs_contract.get("name") is not None else ""
|
|
66
|
+
|
|
67
|
+
if odcs_contract.get("version") is not None:
|
|
68
|
+
info.version = odcs_contract.get("version")
|
|
69
|
+
|
|
70
|
+
# odcs.description.purpose => datacontract.description
|
|
71
|
+
if odcs_contract.get("description") is not None and odcs_contract.get("description").get("purpose") is not None:
|
|
72
|
+
info.description = odcs_contract.get("description").get("purpose")
|
|
73
|
+
|
|
74
|
+
# odcs.domain => datacontract.owner
|
|
75
|
+
if odcs_contract.get("domain") is not None:
|
|
76
|
+
info.owner = odcs_contract.get("domain")
|
|
77
|
+
|
|
78
|
+
# add dataProduct as custom property
|
|
79
|
+
if odcs_contract.get("dataProduct") is not None:
|
|
80
|
+
info.dataProduct = odcs_contract.get("dataProduct")
|
|
81
|
+
|
|
82
|
+
# add tenant as custom property
|
|
83
|
+
if odcs_contract.get("tenant") is not None:
|
|
84
|
+
info.tenant = odcs_contract.get("tenant")
|
|
85
|
+
|
|
86
|
+
return info
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def import_servers(odcs_contract: Dict[str, Any]) -> Dict[str, Server] | None:
|
|
90
|
+
if odcs_contract.get("servers") is None:
|
|
91
|
+
return None
|
|
92
|
+
servers = {}
|
|
93
|
+
for odcs_server in odcs_contract.get("servers"):
|
|
94
|
+
server_name = odcs_server.get("server")
|
|
95
|
+
if server_name is None:
|
|
96
|
+
logger.warning("Server name is missing, skipping server")
|
|
97
|
+
continue
|
|
98
|
+
|
|
99
|
+
server = Server()
|
|
100
|
+
server.type = odcs_server.get("type")
|
|
101
|
+
server.description = odcs_server.get("description")
|
|
102
|
+
server.environment = odcs_server.get("environment")
|
|
103
|
+
server.format = odcs_server.get("format")
|
|
104
|
+
server.project = odcs_server.get("project")
|
|
105
|
+
server.dataset = odcs_server.get("dataset")
|
|
106
|
+
server.path = odcs_server.get("path")
|
|
107
|
+
server.delimiter = odcs_server.get("delimiter")
|
|
108
|
+
server.endpointUrl = odcs_server.get("endpointUrl")
|
|
109
|
+
server.location = odcs_server.get("location")
|
|
110
|
+
server.account = odcs_server.get("account")
|
|
111
|
+
server.database = odcs_server.get("database")
|
|
112
|
+
server.schema_ = odcs_server.get("schema")
|
|
113
|
+
server.host = odcs_server.get("host")
|
|
114
|
+
server.port = odcs_server.get("port")
|
|
115
|
+
server.catalog = odcs_server.get("catalog")
|
|
116
|
+
server.topic = odcs_server.get("topic")
|
|
117
|
+
server.http_path = odcs_server.get("http_path")
|
|
118
|
+
server.token = odcs_server.get("token")
|
|
119
|
+
server.dataProductId = odcs_server.get("dataProductId")
|
|
120
|
+
server.outputPortId = odcs_server.get("outputPortId")
|
|
121
|
+
server.driver = odcs_server.get("driver")
|
|
122
|
+
server.roles = odcs_server.get("roles")
|
|
123
|
+
|
|
124
|
+
servers[server_name] = server
|
|
125
|
+
return servers
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def import_terms(odcs_contract: Dict[str, Any]) -> Terms | None:
|
|
129
|
+
if odcs_contract.get("description") is None:
|
|
130
|
+
return None
|
|
131
|
+
if (
|
|
132
|
+
odcs_contract.get("description").get("usage") is not None
|
|
133
|
+
or odcs_contract.get("description").get("limitations") is not None
|
|
134
|
+
or odcs_contract.get("price") is not None
|
|
135
|
+
):
|
|
136
|
+
terms = Terms()
|
|
137
|
+
if odcs_contract.get("description").get("usage") is not None:
|
|
138
|
+
terms.usage = odcs_contract.get("description").get("usage")
|
|
139
|
+
if odcs_contract.get("description").get("limitations") is not None:
|
|
140
|
+
terms.limitations = odcs_contract.get("description").get("limitations")
|
|
141
|
+
if odcs_contract.get("price") is not None:
|
|
142
|
+
terms.billing = f"{odcs_contract.get('price').get('priceAmount')} {odcs_contract.get('price').get('priceCurrency')} / {odcs_contract.get('price').get('priceUnit')}"
|
|
143
|
+
|
|
144
|
+
return terms
|
|
145
|
+
else:
|
|
146
|
+
return None
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def import_servicelevels(odcs_contract: Dict[str, Any]) -> ServiceLevel:
|
|
150
|
+
# find the two properties we can map (based on the examples)
|
|
151
|
+
sla_properties = odcs_contract.get("slaProperties") if odcs_contract.get("slaProperties") is not None else []
|
|
152
|
+
availability = next((p for p in sla_properties if p["property"] == "generalAvailability"), None)
|
|
153
|
+
retention = next((p for p in sla_properties if p["property"] == "retention"), None)
|
|
154
|
+
|
|
155
|
+
if availability is not None or retention is not None:
|
|
156
|
+
servicelevel = ServiceLevel()
|
|
157
|
+
|
|
158
|
+
if availability is not None:
|
|
159
|
+
value = availability.get("value")
|
|
160
|
+
if isinstance(value, datetime.datetime):
|
|
161
|
+
value = value.isoformat()
|
|
162
|
+
servicelevel.availability = Availability(description=value)
|
|
163
|
+
|
|
164
|
+
if retention is not None:
|
|
165
|
+
servicelevel.retention = Retention(period=f"{retention.get('value')}{retention.get('unit')}")
|
|
166
|
+
|
|
167
|
+
return servicelevel
|
|
168
|
+
else:
|
|
169
|
+
return None
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def get_server_type(odcs_contract: Dict[str, Any]) -> str | None:
|
|
173
|
+
servers = import_servers(odcs_contract)
|
|
174
|
+
if servers is None or len(servers) == 0:
|
|
175
|
+
return None
|
|
176
|
+
# get first server from map
|
|
177
|
+
server = next(iter(servers.values()))
|
|
178
|
+
return server.type
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def import_models(odcs_contract: Dict[str, Any]) -> Dict[str, Model]:
|
|
182
|
+
custom_type_mappings = get_custom_type_mappings(odcs_contract.get("customProperties"))
|
|
183
|
+
|
|
184
|
+
odcs_schemas = odcs_contract.get("schema") if odcs_contract.get("schema") is not None else []
|
|
185
|
+
result = {}
|
|
186
|
+
|
|
187
|
+
for odcs_schema in odcs_schemas:
|
|
188
|
+
schema_name = odcs_schema.get("name")
|
|
189
|
+
schema_physical_name = odcs_schema.get("physicalName")
|
|
190
|
+
schema_description = odcs_schema.get("description") if odcs_schema.get("description") is not None else ""
|
|
191
|
+
model_name = schema_physical_name if schema_physical_name is not None else schema_name
|
|
192
|
+
model = Model(description=" ".join(schema_description.splitlines()), type="table")
|
|
193
|
+
model.fields = import_fields(
|
|
194
|
+
odcs_schema.get("properties"), custom_type_mappings, server_type=get_server_type(odcs_contract)
|
|
195
|
+
)
|
|
196
|
+
model.title = schema_name
|
|
197
|
+
if odcs_schema.get("dataGranularityDescription") is not None:
|
|
198
|
+
model.config = {"dataGranularityDescription": odcs_schema.get("dataGranularityDescription")}
|
|
199
|
+
result[model_name] = model
|
|
200
|
+
|
|
201
|
+
return result
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def import_field_config(odcs_property: Dict[str, Any], server_type=None) -> Dict[str, Any]:
|
|
205
|
+
config = {}
|
|
206
|
+
if odcs_property.get("criticalDataElement") is not None:
|
|
207
|
+
config["criticalDataElement"] = odcs_property.get("criticalDataElement")
|
|
208
|
+
if odcs_property.get("encryptedName") is not None:
|
|
209
|
+
config["encryptedName"] = odcs_property.get("encryptedName")
|
|
210
|
+
if odcs_property.get("partitionKeyPosition") is not None:
|
|
211
|
+
config["partitionKeyPosition"] = odcs_property.get("partitionKeyPosition")
|
|
212
|
+
if odcs_property.get("partitioned") is not None:
|
|
213
|
+
config["partitioned"] = odcs_property.get("partitioned")
|
|
214
|
+
|
|
215
|
+
if odcs_property.get("customProperties") is not None and isinstance(odcs_property.get("customProperties"), list):
|
|
216
|
+
for item in odcs_property.get("customProperties"):
|
|
217
|
+
config[item["property"]] = item["value"]
|
|
218
|
+
|
|
219
|
+
physical_type = odcs_property.get("physicalType")
|
|
220
|
+
if physical_type is not None:
|
|
221
|
+
if server_type == "postgres" or server_type == "postgresql":
|
|
222
|
+
config["postgresType"] = physical_type
|
|
223
|
+
elif server_type == "bigquery":
|
|
224
|
+
config["bigqueryType"] = physical_type
|
|
225
|
+
elif server_type == "snowflake":
|
|
226
|
+
config["snowflakeType"] = physical_type
|
|
227
|
+
elif server_type == "redshift":
|
|
228
|
+
config["redshiftType"] = physical_type
|
|
229
|
+
elif server_type == "sqlserver":
|
|
230
|
+
config["sqlserverType"] = physical_type
|
|
231
|
+
elif server_type == "databricksType":
|
|
232
|
+
config["databricksType"] = physical_type
|
|
233
|
+
else:
|
|
234
|
+
config["physicalType"] = physical_type
|
|
235
|
+
|
|
236
|
+
return config
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
def has_composite_primary_key(odcs_properties) -> bool:
|
|
240
|
+
primary_keys = [prop for prop in odcs_properties if prop.get("primaryKey") is not None and prop.get("primaryKey")]
|
|
241
|
+
return len(primary_keys) > 1
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
def import_fields(
|
|
245
|
+
odcs_properties: Dict[str, Any], custom_type_mappings: Dict[str, str], server_type
|
|
246
|
+
) -> Dict[str, Field]:
|
|
247
|
+
logger = logging.getLogger(__name__)
|
|
248
|
+
result = {}
|
|
249
|
+
|
|
250
|
+
if odcs_properties is None:
|
|
251
|
+
return result
|
|
252
|
+
|
|
253
|
+
for odcs_property in odcs_properties:
|
|
254
|
+
mapped_type = map_type(odcs_property.get("logicalType"), custom_type_mappings)
|
|
255
|
+
if mapped_type is not None:
|
|
256
|
+
property_name = odcs_property["name"]
|
|
257
|
+
description = odcs_property.get("description") if odcs_property.get("description") is not None else None
|
|
258
|
+
field = Field(
|
|
259
|
+
description=" ".join(description.splitlines()) if description is not None else None,
|
|
260
|
+
type=mapped_type,
|
|
261
|
+
title=odcs_property.get("businessName"),
|
|
262
|
+
required=not odcs_property.get("nullable") if odcs_property.get("nullable") is not None else False,
|
|
263
|
+
primary=odcs_property.get("primaryKey")
|
|
264
|
+
if not has_composite_primary_key(odcs_properties) and odcs_property.get("primaryKey") is not None
|
|
265
|
+
else False,
|
|
266
|
+
unique=odcs_property.get("unique"),
|
|
267
|
+
examples=odcs_property.get("examples") if odcs_property.get("examples") is not None else None,
|
|
268
|
+
classification=odcs_property.get("classification")
|
|
269
|
+
if odcs_property.get("classification") is not None
|
|
270
|
+
else "",
|
|
271
|
+
tags=odcs_property.get("tags") if odcs_property.get("tags") is not None else None,
|
|
272
|
+
quality=odcs_property.get("quality") if odcs_property.get("quality") is not None else [],
|
|
273
|
+
config=import_field_config(odcs_property, server_type),
|
|
274
|
+
)
|
|
275
|
+
result[property_name] = field
|
|
276
|
+
else:
|
|
277
|
+
logger.info(
|
|
278
|
+
f"Can't map {odcs_property.get('column')} to the Datacontract Mapping types, as there is no equivalent or special mapping. Consider introducing a customProperty 'dc_mapping_{odcs_property.get('logicalName')}' that defines your expected type as the 'value'"
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
return result
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
def map_type(odcs_type: str, custom_mappings: Dict[str, str]) -> str | None:
|
|
285
|
+
t = odcs_type.lower()
|
|
286
|
+
if t in DATACONTRACT_TYPES:
|
|
287
|
+
return t
|
|
288
|
+
elif custom_mappings.get(t) is not None:
|
|
289
|
+
return custom_mappings.get(t)
|
|
290
|
+
else:
|
|
291
|
+
return None
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
def get_custom_type_mappings(odcs_custom_properties: List[Any]) -> Dict[str, str]:
|
|
295
|
+
result = {}
|
|
296
|
+
if odcs_custom_properties is not None:
|
|
297
|
+
for prop in odcs_custom_properties:
|
|
298
|
+
if prop["property"].startswith("dc_mapping_"):
|
|
299
|
+
odcs_type_name = prop["property"].substring(11)
|
|
300
|
+
datacontract_type = prop["value"]
|
|
301
|
+
result[odcs_type_name] = datacontract_type
|
|
302
|
+
|
|
303
|
+
return result
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
def import_tags(odcs_contract) -> List[str] | None:
|
|
307
|
+
if odcs_contract.get("tags") is None:
|
|
308
|
+
return None
|
|
309
|
+
return odcs_contract.get("tags")
|
|
@@ -23,7 +23,7 @@ def publish_test_results_to_datamesh_manager(run: Run, publish_url: str):
|
|
|
23
23
|
)
|
|
24
24
|
|
|
25
25
|
if run.dataContractId is None:
|
|
26
|
-
raise Exception("Cannot publish run results
|
|
26
|
+
raise Exception("Cannot publish run results for unknown data contract ID")
|
|
27
27
|
|
|
28
28
|
headers = {"Content-Type": "application/json", "x-api-key": api_key}
|
|
29
29
|
request_body = run.model_dump_json()
|
datacontract/lint/resolve.py
CHANGED
|
@@ -5,11 +5,13 @@ import fastjsonschema
|
|
|
5
5
|
import yaml
|
|
6
6
|
from fastjsonschema import JsonSchemaValueException
|
|
7
7
|
|
|
8
|
-
from datacontract.
|
|
8
|
+
from datacontract.imports.odcs_v3_importer import import_odcs_v3_from_str
|
|
9
|
+
from datacontract.lint.resources import read_resource
|
|
9
10
|
from datacontract.lint.schema import fetch_schema
|
|
10
11
|
from datacontract.lint.urls import fetch_resource
|
|
11
12
|
from datacontract.model.data_contract_specification import DataContractSpecification, Definition, Quality
|
|
12
13
|
from datacontract.model.exceptions import DataContractException
|
|
14
|
+
from datacontract.model.odcs import is_open_data_contract_standard
|
|
13
15
|
|
|
14
16
|
|
|
15
17
|
def resolve_data_contract(
|
|
@@ -41,10 +43,7 @@ def resolve_data_contract(
|
|
|
41
43
|
def resolve_data_contract_from_location(
|
|
42
44
|
location, schema_location: str = None, inline_definitions: bool = False, inline_quality: bool = False
|
|
43
45
|
) -> DataContractSpecification:
|
|
44
|
-
|
|
45
|
-
data_contract_str = fetch_resource(location)
|
|
46
|
-
else:
|
|
47
|
-
data_contract_str = read_file(location)
|
|
46
|
+
data_contract_str = read_resource(location)
|
|
48
47
|
return _resolve_data_contract_from_str(data_contract_str, schema_location, inline_definitions, inline_quality)
|
|
49
48
|
|
|
50
49
|
|
|
@@ -196,10 +195,16 @@ def _get_quality_ref_file(quality_spec: str | object) -> str | object:
|
|
|
196
195
|
def _resolve_data_contract_from_str(
|
|
197
196
|
data_contract_str, schema_location: str = None, inline_definitions: bool = False, inline_quality: bool = False
|
|
198
197
|
) -> DataContractSpecification:
|
|
199
|
-
|
|
200
|
-
|
|
198
|
+
yaml_dict = _to_yaml(data_contract_str)
|
|
199
|
+
|
|
200
|
+
if is_open_data_contract_standard(yaml_dict):
|
|
201
|
+
# if ODCS, then validate the ODCS schema and import to DataContractSpecification directly
|
|
202
|
+
data_contract_specification = DataContractSpecification(dataContractSpecification="0.9.3")
|
|
203
|
+
return import_odcs_v3_from_str(data_contract_specification, source_str=data_contract_str)
|
|
201
204
|
|
|
202
|
-
|
|
205
|
+
_validate_data_contract_specification_schema(yaml_dict, schema_location)
|
|
206
|
+
data_contract_specification = yaml_dict
|
|
207
|
+
spec = DataContractSpecification(**data_contract_specification)
|
|
203
208
|
|
|
204
209
|
if inline_definitions:
|
|
205
210
|
inline_definitions_into_data_contract(spec)
|
|
@@ -224,7 +229,7 @@ def _to_yaml(data_contract_str):
|
|
|
224
229
|
)
|
|
225
230
|
|
|
226
231
|
|
|
227
|
-
def
|
|
232
|
+
def _validate_data_contract_specification_schema(data_contract_yaml, schema_location: str = None):
|
|
228
233
|
schema = fetch_schema(schema_location)
|
|
229
234
|
try:
|
|
230
235
|
fastjsonschema.validate(schema, data_contract_yaml)
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
from datacontract.lint.files import read_file
|
|
2
|
+
from datacontract.lint.urls import fetch_resource
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def read_resource(location: str) -> str:
|
|
6
|
+
"""
|
|
7
|
+
Read a resource from a given location.
|
|
8
|
+
|
|
9
|
+
If the location is a URL, fetch the resource from the web. API-Keys are supported.
|
|
10
|
+
Otherwise, read the resource from a local file.
|
|
11
|
+
|
|
12
|
+
Args:
|
|
13
|
+
location (str): The location of the resource, either a URL or a file path.
|
|
14
|
+
|
|
15
|
+
Returns:
|
|
16
|
+
str: The content of the resource.
|
|
17
|
+
"""
|
|
18
|
+
if location.startswith("http://") or location.startswith("https://"):
|
|
19
|
+
return fetch_resource(location)
|
|
20
|
+
else:
|
|
21
|
+
return read_file(location)
|
datacontract/lint/urls.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import os
|
|
2
|
+
from urllib.parse import urlparse
|
|
2
3
|
|
|
3
4
|
import requests
|
|
4
5
|
|
|
@@ -25,7 +26,8 @@ def fetch_resource(url: str):
|
|
|
25
26
|
|
|
26
27
|
|
|
27
28
|
def _set_api_key(headers, url):
|
|
28
|
-
|
|
29
|
+
hostname = urlparse(url).hostname
|
|
30
|
+
if hostname == "datamesh-manager.com" or hostname.endswith(".datamesh-manager.com"):
|
|
29
31
|
datamesh_manager_api_key = os.getenv("DATAMESH_MANAGER_API_KEY")
|
|
30
32
|
if datamesh_manager_api_key is None or datamesh_manager_api_key == "":
|
|
31
33
|
print("Error: Data Mesh Manager API Key is not set. Set env variable DATAMESH_MANAGER_API_KEY.")
|
|
@@ -37,7 +39,7 @@ def _set_api_key(headers, url):
|
|
|
37
39
|
result="error",
|
|
38
40
|
)
|
|
39
41
|
headers["x-api-key"] = datamesh_manager_api_key
|
|
40
|
-
elif "
|
|
42
|
+
elif hostname == "datacontract-manager.com" or hostname.endswith(".datacontract-manager.com"):
|
|
41
43
|
datacontract_manager_api_key = os.getenv("DATACONTRACT_MANAGER_API_KEY")
|
|
42
44
|
if datacontract_manager_api_key is None or datacontract_manager_api_key == "":
|
|
43
45
|
print("Error: Data Contract Manager API Key is not set. Set env variable DATACONTRACT_MANAGER_API_KEY.")
|
|
@@ -4,6 +4,32 @@ from typing import List, Dict, Optional, Any
|
|
|
4
4
|
import pydantic as pyd
|
|
5
5
|
import yaml
|
|
6
6
|
|
|
7
|
+
DATACONTRACT_TYPES = [
|
|
8
|
+
"string",
|
|
9
|
+
"text",
|
|
10
|
+
"varchar",
|
|
11
|
+
"number",
|
|
12
|
+
"decimal",
|
|
13
|
+
"numeric",
|
|
14
|
+
"int",
|
|
15
|
+
"integer",
|
|
16
|
+
"long",
|
|
17
|
+
"bigint",
|
|
18
|
+
"float",
|
|
19
|
+
"double",
|
|
20
|
+
"boolean",
|
|
21
|
+
"timestamp",
|
|
22
|
+
"timestamp_tz",
|
|
23
|
+
"timestamp_ntz",
|
|
24
|
+
"date",
|
|
25
|
+
"array",
|
|
26
|
+
"bytes",
|
|
27
|
+
"object",
|
|
28
|
+
"record",
|
|
29
|
+
"struct",
|
|
30
|
+
"null",
|
|
31
|
+
]
|
|
32
|
+
|
|
7
33
|
|
|
8
34
|
class Contact(pyd.BaseModel):
|
|
9
35
|
name: str = None
|
|
@@ -15,6 +41,14 @@ class Contact(pyd.BaseModel):
|
|
|
15
41
|
)
|
|
16
42
|
|
|
17
43
|
|
|
44
|
+
class ServerRole(pyd.BaseModel):
|
|
45
|
+
name: str = None
|
|
46
|
+
description: str = None
|
|
47
|
+
model_config = pyd.ConfigDict(
|
|
48
|
+
extra="allow",
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
|
|
18
52
|
class Server(pyd.BaseModel):
|
|
19
53
|
type: str = None
|
|
20
54
|
description: str = None
|
|
@@ -38,6 +72,7 @@ class Server(pyd.BaseModel):
|
|
|
38
72
|
dataProductId: str = None
|
|
39
73
|
outputPortId: str = None
|
|
40
74
|
driver: str = None
|
|
75
|
+
roles: List[ServerRole] = None
|
|
41
76
|
|
|
42
77
|
model_config = pyd.ConfigDict(
|
|
43
78
|
extra="allow",
|
|
@@ -83,19 +118,40 @@ class Definition(pyd.BaseModel):
|
|
|
83
118
|
)
|
|
84
119
|
|
|
85
120
|
|
|
121
|
+
class Quality(pyd.BaseModel):
|
|
122
|
+
type: str = None
|
|
123
|
+
description: str = None
|
|
124
|
+
query: str = None
|
|
125
|
+
dialect: str = None
|
|
126
|
+
mustBe: int = None
|
|
127
|
+
mustNotBe: int = None
|
|
128
|
+
mustBeGreaterThan: int = None
|
|
129
|
+
mustBeGreaterThanOrEqualTo: int = None
|
|
130
|
+
mustBeLessThan: int = None
|
|
131
|
+
mustBeLessThanOrEqualTo: int = None
|
|
132
|
+
mustBeBetween: List[int] = None
|
|
133
|
+
mustNotBeBetween: List[int] = None
|
|
134
|
+
engine: str = None
|
|
135
|
+
implementation: str | Dict[str, Any] = None
|
|
136
|
+
|
|
137
|
+
model_config = pyd.ConfigDict(
|
|
138
|
+
extra="allow",
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
|
|
86
142
|
class Field(pyd.BaseModel):
|
|
87
143
|
ref: str = pyd.Field(default=None, alias="$ref")
|
|
88
144
|
ref_obj: Definition = pyd.Field(default=None, exclude=True)
|
|
89
|
-
title: str = None
|
|
145
|
+
title: str | None = None
|
|
90
146
|
type: str = None
|
|
91
147
|
format: str = None
|
|
92
148
|
required: bool = None
|
|
93
149
|
primary: bool = None
|
|
94
|
-
unique: bool = None
|
|
150
|
+
unique: bool | None = None
|
|
95
151
|
references: str = None
|
|
96
|
-
description: str = None
|
|
97
|
-
pii: bool = None
|
|
98
|
-
classification: str = None
|
|
152
|
+
description: str | None = None
|
|
153
|
+
pii: bool | None = None
|
|
154
|
+
classification: str | None = None
|
|
99
155
|
pattern: str = None
|
|
100
156
|
minLength: int = None
|
|
101
157
|
maxLength: int = None
|
|
@@ -103,8 +159,8 @@ class Field(pyd.BaseModel):
|
|
|
103
159
|
exclusiveMinimum: int = None
|
|
104
160
|
maximum: int = None
|
|
105
161
|
exclusiveMaximum: int = None
|
|
106
|
-
enum: List[str] = []
|
|
107
|
-
tags: List[str] = []
|
|
162
|
+
enum: List[str] | None = []
|
|
163
|
+
tags: List[str] | None = []
|
|
108
164
|
links: Dict[str, str] = {}
|
|
109
165
|
fields: Dict[str, "Field"] = {}
|
|
110
166
|
items: "Field" = None
|
|
@@ -113,7 +169,9 @@ class Field(pyd.BaseModel):
|
|
|
113
169
|
precision: int = None
|
|
114
170
|
scale: int = None
|
|
115
171
|
example: str = None
|
|
116
|
-
|
|
172
|
+
examples: List[Any] | None = None
|
|
173
|
+
quality: List[Quality] | None = []
|
|
174
|
+
config: Dict[str, Any] | None = None
|
|
117
175
|
|
|
118
176
|
model_config = pyd.ConfigDict(
|
|
119
177
|
extra="allow",
|
|
@@ -126,7 +184,13 @@ class Model(pyd.BaseModel):
|
|
|
126
184
|
namespace: Optional[str] = None
|
|
127
185
|
title: Optional[str] = None
|
|
128
186
|
fields: Dict[str, Field] = {}
|
|
187
|
+
quality: List[Quality] | None = []
|
|
129
188
|
config: Dict[str, Any] = None
|
|
189
|
+
tags: List[str] | None = None
|
|
190
|
+
|
|
191
|
+
model_config = pyd.ConfigDict(
|
|
192
|
+
extra="allow",
|
|
193
|
+
)
|
|
130
194
|
|
|
131
195
|
|
|
132
196
|
class Info(pyd.BaseModel):
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
def is_open_data_contract_standard(odcs: dict) -> bool:
|
|
2
|
+
"""
|
|
3
|
+
Check if the given dictionary is an OpenDataContractStandard.
|
|
4
|
+
|
|
5
|
+
Args:
|
|
6
|
+
odcs (dict): The dictionary to check.
|
|
7
|
+
|
|
8
|
+
Returns:
|
|
9
|
+
bool: True if the dictionary is an OpenDataContractStandard, False otherwise.
|
|
10
|
+
"""
|
|
11
|
+
return odcs.get("kind") == "DataContract" and odcs.get("apiVersion", "").startswith("v3")
|