datacontract-cli 0.10.12__py3-none-any.whl → 0.10.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datacontract-cli might be problematic. Click here for more details.
- datacontract/cli.py +5 -0
- datacontract/data_contract.py +9 -1
- datacontract/engines/soda/connections/kafka.py +28 -6
- datacontract/export/avro_converter.py +8 -1
- datacontract/export/avro_idl_converter.py +1 -0
- datacontract/export/bigquery_converter.py +30 -23
- datacontract/export/data_caterer_converter.py +148 -0
- datacontract/export/dcs_exporter.py +6 -0
- datacontract/export/exporter.py +5 -1
- datacontract/export/exporter_factory.py +19 -1
- datacontract/export/jsonschema_converter.py +13 -2
- datacontract/export/{odcs_converter.py → odcs_v2_exporter.py} +4 -4
- datacontract/export/odcs_v3_exporter.py +294 -0
- datacontract/export/sodacl_converter.py +82 -2
- datacontract/export/spark_converter.py +3 -1
- datacontract/export/sql_type_converter.py +56 -21
- datacontract/imports/iceberg_importer.py +162 -0
- datacontract/imports/importer.py +1 -0
- datacontract/imports/importer_factory.py +5 -0
- datacontract/imports/odcs_importer.py +25 -168
- datacontract/imports/odcs_v2_importer.py +177 -0
- datacontract/imports/odcs_v3_importer.py +309 -0
- datacontract/imports/spark_importer.py +5 -1
- datacontract/imports/unity_importer.py +105 -84
- datacontract/integration/datamesh_manager.py +1 -1
- datacontract/lint/resolve.py +24 -10
- datacontract/lint/resources.py +21 -0
- datacontract/lint/urls.py +29 -13
- datacontract/model/data_contract_specification.py +72 -8
- datacontract/model/odcs.py +11 -0
- {datacontract_cli-0.10.12.dist-info → datacontract_cli-0.10.14.dist-info}/METADATA +106 -52
- {datacontract_cli-0.10.12.dist-info → datacontract_cli-0.10.14.dist-info}/RECORD +36 -29
- {datacontract_cli-0.10.12.dist-info → datacontract_cli-0.10.14.dist-info}/WHEEL +1 -1
- datacontract/engines/datacontract/check_that_datacontract_str_is_valid.py +0 -48
- {datacontract_cli-0.10.12.dist-info → datacontract_cli-0.10.14.dist-info}/LICENSE +0 -0
- {datacontract_cli-0.10.12.dist-info → datacontract_cli-0.10.14.dist-info}/entry_points.txt +0 -0
- {datacontract_cli-0.10.12.dist-info → datacontract_cli-0.10.14.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
from typing import Dict, Any
|
|
2
|
+
|
|
3
|
+
from datacontract.imports.importer import Importer
|
|
4
|
+
from datacontract.model.data_contract_specification import DataContractSpecification, Model, Field
|
|
5
|
+
|
|
6
|
+
from pyiceberg.schema import Schema
|
|
7
|
+
from pyiceberg import types as iceberg_types
|
|
8
|
+
from pydantic import ValidationError
|
|
9
|
+
|
|
10
|
+
from datacontract.model.exceptions import DataContractException
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class IcebergImporter(Importer):
|
|
14
|
+
def import_source(
|
|
15
|
+
self, data_contract_specification: DataContractSpecification, source: str, import_args: dict
|
|
16
|
+
) -> DataContractSpecification:
|
|
17
|
+
schema = load_and_validate_iceberg_schema(source)
|
|
18
|
+
return import_iceberg(
|
|
19
|
+
data_contract_specification,
|
|
20
|
+
schema,
|
|
21
|
+
import_args.get("iceberg_table"),
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def load_and_validate_iceberg_schema(source: str) -> Schema:
|
|
26
|
+
with open(source, "r") as file:
|
|
27
|
+
try:
|
|
28
|
+
return Schema.model_validate_json(file.read())
|
|
29
|
+
except ValidationError as e:
|
|
30
|
+
raise DataContractException(
|
|
31
|
+
type="schema",
|
|
32
|
+
name="Parse iceberg schema",
|
|
33
|
+
reason=f"Failed to validate iceberg schema from {source}: {e}",
|
|
34
|
+
engine="datacontract",
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def import_iceberg(
|
|
39
|
+
data_contract_specification: DataContractSpecification, schema: Schema, table_name: str
|
|
40
|
+
) -> DataContractSpecification:
|
|
41
|
+
if data_contract_specification.models is None:
|
|
42
|
+
data_contract_specification.models = {}
|
|
43
|
+
|
|
44
|
+
model = Model(type="table", title=table_name)
|
|
45
|
+
|
|
46
|
+
for field in schema.fields:
|
|
47
|
+
model.fields[field.name] = _field_from_nested_field(field)
|
|
48
|
+
|
|
49
|
+
data_contract_specification.models[table_name] = model
|
|
50
|
+
return data_contract_specification
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _field_from_nested_field(nested_field: iceberg_types.NestedField) -> Field:
|
|
54
|
+
"""
|
|
55
|
+
Converts an Iceberg NestedField into a Field object for the data contract.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
nested_field: The Iceberg NestedField to convert.
|
|
59
|
+
|
|
60
|
+
Returns:
|
|
61
|
+
Field: The generated Field object.
|
|
62
|
+
"""
|
|
63
|
+
field = Field(
|
|
64
|
+
title=nested_field.name,
|
|
65
|
+
required=nested_field.required,
|
|
66
|
+
config=build_field_config(nested_field),
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
if nested_field.doc is not None:
|
|
70
|
+
field.description = nested_field.doc
|
|
71
|
+
|
|
72
|
+
return _type_from_iceberg_type(field, nested_field.field_type)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def _type_from_iceberg_type(field: Field, iceberg_type: iceberg_types.IcebergType) -> Field:
|
|
76
|
+
"""
|
|
77
|
+
Maps Iceberg data types to the Data Contract type system and updates the field.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
field: The Field object to update.
|
|
81
|
+
iceberg_type: The Iceberg data type to map.
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
Field: The updated Field object.
|
|
85
|
+
"""
|
|
86
|
+
field.type = _data_type_from_iceberg(iceberg_type)
|
|
87
|
+
|
|
88
|
+
if field.type == "array":
|
|
89
|
+
field.items = _type_from_iceberg_type(Field(required=iceberg_type.element_required), iceberg_type.element_type)
|
|
90
|
+
|
|
91
|
+
elif field.type == "map":
|
|
92
|
+
field.keys = _type_from_iceberg_type(Field(required=True), iceberg_type.key_type)
|
|
93
|
+
field.values = _type_from_iceberg_type(Field(required=iceberg_type.value_required), iceberg_type.value_type)
|
|
94
|
+
|
|
95
|
+
elif field.type == "object":
|
|
96
|
+
field.fields = {nf.name: _field_from_nested_field(nf) for nf in iceberg_type.fields}
|
|
97
|
+
|
|
98
|
+
return field
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def build_field_config(iceberg_field: iceberg_types.NestedField) -> Dict[str, Any]:
|
|
102
|
+
config = {}
|
|
103
|
+
|
|
104
|
+
if iceberg_field.field_id > 0:
|
|
105
|
+
config["icebergFieldId"] = iceberg_field.field_id
|
|
106
|
+
|
|
107
|
+
if iceberg_field.initial_default is not None:
|
|
108
|
+
config["icebergInitialDefault"] = iceberg_field.initial_default
|
|
109
|
+
|
|
110
|
+
if iceberg_field.write_default is not None:
|
|
111
|
+
config["icebergWriteDefault"] = iceberg_field.write_default
|
|
112
|
+
|
|
113
|
+
return config
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def _data_type_from_iceberg(type: iceberg_types.IcebergType) -> str:
|
|
117
|
+
"""
|
|
118
|
+
Convert an Iceberg field type to a datacontract field type
|
|
119
|
+
|
|
120
|
+
Args:
|
|
121
|
+
type: The Iceberg field type
|
|
122
|
+
|
|
123
|
+
Returns:
|
|
124
|
+
str: The datacontract field type
|
|
125
|
+
"""
|
|
126
|
+
if isinstance(type, iceberg_types.BooleanType):
|
|
127
|
+
return "boolean"
|
|
128
|
+
if isinstance(type, iceberg_types.IntegerType):
|
|
129
|
+
return "integer"
|
|
130
|
+
if isinstance(type, iceberg_types.LongType):
|
|
131
|
+
return "long"
|
|
132
|
+
if isinstance(type, iceberg_types.FloatType):
|
|
133
|
+
return "float"
|
|
134
|
+
if isinstance(type, iceberg_types.DoubleType):
|
|
135
|
+
return "double"
|
|
136
|
+
if isinstance(type, iceberg_types.DecimalType):
|
|
137
|
+
return "decimal"
|
|
138
|
+
if isinstance(type, iceberg_types.DateType):
|
|
139
|
+
return "date"
|
|
140
|
+
if isinstance(type, iceberg_types.TimeType):
|
|
141
|
+
# there isn't a great mapping for the iceberg type "time", just map to string for now
|
|
142
|
+
return "string"
|
|
143
|
+
if isinstance(type, iceberg_types.TimestampType):
|
|
144
|
+
return "timestamp_ntz"
|
|
145
|
+
if isinstance(type, iceberg_types.TimestamptzType):
|
|
146
|
+
return "timestamp_tz"
|
|
147
|
+
if isinstance(type, iceberg_types.StringType):
|
|
148
|
+
return "string"
|
|
149
|
+
if isinstance(type, iceberg_types.UUIDType):
|
|
150
|
+
return "string"
|
|
151
|
+
if isinstance(type, iceberg_types.BinaryType):
|
|
152
|
+
return "bytes"
|
|
153
|
+
if isinstance(type, iceberg_types.FixedType):
|
|
154
|
+
return "bytes"
|
|
155
|
+
if isinstance(type, iceberg_types.MapType):
|
|
156
|
+
return "map"
|
|
157
|
+
if isinstance(type, iceberg_types.ListType):
|
|
158
|
+
return "array"
|
|
159
|
+
if isinstance(type, iceberg_types.StructType):
|
|
160
|
+
return "object"
|
|
161
|
+
|
|
162
|
+
raise ValueError(f"Unknown Iceberg type: {type}")
|
datacontract/imports/importer.py
CHANGED
|
@@ -93,3 +93,8 @@ importer_factory.register_lazy_importer(
|
|
|
93
93
|
module_path="datacontract.imports.dbml_importer",
|
|
94
94
|
class_name="DBMLImporter",
|
|
95
95
|
)
|
|
96
|
+
importer_factory.register_lazy_importer(
|
|
97
|
+
name=ImportFormat.iceberg,
|
|
98
|
+
module_path="datacontract.imports.iceberg_importer",
|
|
99
|
+
class_name="IcebergImporter",
|
|
100
|
+
)
|
|
@@ -1,47 +1,12 @@
|
|
|
1
|
-
import datetime
|
|
2
|
-
import logging
|
|
3
|
-
from typing import Any, Dict, List
|
|
4
1
|
import yaml
|
|
2
|
+
|
|
5
3
|
from datacontract.imports.importer import Importer
|
|
4
|
+
from datacontract.lint.resources import read_resource
|
|
6
5
|
from datacontract.model.data_contract_specification import (
|
|
7
|
-
Availability,
|
|
8
|
-
Contact,
|
|
9
6
|
DataContractSpecification,
|
|
10
|
-
Info,
|
|
11
|
-
Model,
|
|
12
|
-
Field,
|
|
13
|
-
Retention,
|
|
14
|
-
ServiceLevel,
|
|
15
|
-
Terms,
|
|
16
7
|
)
|
|
17
8
|
from datacontract.model.exceptions import DataContractException
|
|
18
9
|
|
|
19
|
-
DATACONTRACT_TYPES = [
|
|
20
|
-
"string",
|
|
21
|
-
"text",
|
|
22
|
-
"varchar",
|
|
23
|
-
"number",
|
|
24
|
-
"decimal",
|
|
25
|
-
"numeric",
|
|
26
|
-
"int",
|
|
27
|
-
"integer",
|
|
28
|
-
"long",
|
|
29
|
-
"bigint",
|
|
30
|
-
"float",
|
|
31
|
-
"double",
|
|
32
|
-
"boolean",
|
|
33
|
-
"timestamp",
|
|
34
|
-
"timestamp_tz",
|
|
35
|
-
"timestamp_ntz",
|
|
36
|
-
"date",
|
|
37
|
-
"array",
|
|
38
|
-
"bytes",
|
|
39
|
-
"object",
|
|
40
|
-
"record",
|
|
41
|
-
"struct",
|
|
42
|
-
"null",
|
|
43
|
-
]
|
|
44
|
-
|
|
45
10
|
|
|
46
11
|
class OdcsImporter(Importer):
|
|
47
12
|
def import_source(
|
|
@@ -52,8 +17,7 @@ class OdcsImporter(Importer):
|
|
|
52
17
|
|
|
53
18
|
def import_odcs(data_contract_specification: DataContractSpecification, source: str) -> DataContractSpecification:
|
|
54
19
|
try:
|
|
55
|
-
|
|
56
|
-
odcs_contract = yaml.safe_load(file.read())
|
|
20
|
+
odcs_contract = yaml.safe_load(read_resource(source))
|
|
57
21
|
|
|
58
22
|
except Exception as e:
|
|
59
23
|
raise DataContractException(
|
|
@@ -64,137 +28,30 @@ def import_odcs(data_contract_specification: DataContractSpecification, source:
|
|
|
64
28
|
original_exception=e,
|
|
65
29
|
)
|
|
66
30
|
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
data_contract_specification.terms = import_terms(odcs_contract)
|
|
70
|
-
data_contract_specification.servicelevels = import_servicelevels(odcs_contract)
|
|
71
|
-
data_contract_specification.models = import_models(odcs_contract)
|
|
72
|
-
|
|
73
|
-
return data_contract_specification
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
def import_info(odcs_contract: Dict[str, Any]) -> Info:
|
|
77
|
-
info = Info(title=odcs_contract.get("quantumName"), version=odcs_contract.get("version"))
|
|
78
|
-
|
|
79
|
-
if odcs_contract.get("description").get("purpose") is not None:
|
|
80
|
-
info.description = odcs_contract.get("description").get("purpose")
|
|
81
|
-
|
|
82
|
-
if odcs_contract.get("datasetDomain") is not None:
|
|
83
|
-
info.owner = odcs_contract.get("datasetDomain")
|
|
84
|
-
|
|
85
|
-
if odcs_contract.get("productDl") is not None or odcs_contract.get("productFeedbackUrl") is not None:
|
|
86
|
-
contact = Contact()
|
|
87
|
-
if odcs_contract.get("productDl") is not None:
|
|
88
|
-
contact.name = odcs_contract.get("productDl")
|
|
89
|
-
if odcs_contract.get("productFeedbackUrl") is not None:
|
|
90
|
-
contact.url = odcs_contract.get("productFeedbackUrl")
|
|
91
|
-
|
|
92
|
-
info.contact = contact
|
|
93
|
-
|
|
94
|
-
return info
|
|
31
|
+
odcs_kind = odcs_contract.get("kind")
|
|
32
|
+
odcs_api_version = odcs_contract.get("apiVersion")
|
|
95
33
|
|
|
34
|
+
# if odcs_kind is not DataContract throw exception
|
|
35
|
+
if odcs_kind != "DataContract":
|
|
36
|
+
raise DataContractException(
|
|
37
|
+
type="schema",
|
|
38
|
+
name="Importing ODCS contract",
|
|
39
|
+
reason=f"Unsupported ODCS kind: {odcs_kind}. Is this a valid ODCS data contract?",
|
|
40
|
+
engine="datacontract",
|
|
41
|
+
)
|
|
96
42
|
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
odcs_contract.get("description").get("usage") is not None
|
|
100
|
-
or odcs_contract.get("description").get("limitations") is not None
|
|
101
|
-
or odcs_contract.get("price") is not None
|
|
102
|
-
):
|
|
103
|
-
terms = Terms()
|
|
104
|
-
if odcs_contract.get("description").get("usage") is not None:
|
|
105
|
-
terms.usage = odcs_contract.get("description").get("usage")
|
|
106
|
-
if odcs_contract.get("description").get("limitations") is not None:
|
|
107
|
-
terms.limitations = odcs_contract.get("description").get("limitations")
|
|
108
|
-
if odcs_contract.get("price") is not None:
|
|
109
|
-
terms.billing = f"{odcs_contract.get('price').get('priceAmount')} {odcs_contract.get('price').get('priceCurrency')} / {odcs_contract.get('price').get('priceUnit')}"
|
|
110
|
-
|
|
111
|
-
return terms
|
|
112
|
-
else:
|
|
113
|
-
return None
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
def import_servicelevels(odcs_contract: Dict[str, Any]) -> ServiceLevel:
|
|
117
|
-
# find the two properties we can map (based on the examples)
|
|
118
|
-
sla_properties = odcs_contract.get("slaProperties") if odcs_contract.get("slaProperties") is not None else []
|
|
119
|
-
availability = next((p for p in sla_properties if p["property"] == "generalAvailability"), None)
|
|
120
|
-
retention = next((p for p in sla_properties if p["property"] == "retention"), None)
|
|
121
|
-
|
|
122
|
-
if availability is not None or retention is not None:
|
|
123
|
-
servicelevel = ServiceLevel()
|
|
124
|
-
|
|
125
|
-
if availability is not None:
|
|
126
|
-
value = availability.get("value")
|
|
127
|
-
if isinstance(value, datetime.datetime):
|
|
128
|
-
value = value.isoformat()
|
|
129
|
-
servicelevel.availability = Availability(description=value)
|
|
130
|
-
|
|
131
|
-
if retention is not None:
|
|
132
|
-
servicelevel.retention = Retention(period=f"{retention.get('value')}{retention.get('unit')}")
|
|
133
|
-
|
|
134
|
-
return servicelevel
|
|
135
|
-
else:
|
|
136
|
-
return None
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
def import_models(odcs_contract: Dict[str, Any]) -> Dict[str, Model]:
|
|
140
|
-
custom_type_mappings = get_custom_type_mappings(odcs_contract.get("customProperties"))
|
|
141
|
-
|
|
142
|
-
odcs_tables = odcs_contract.get("dataset") if odcs_contract.get("dataset") is not None else []
|
|
143
|
-
result = {}
|
|
144
|
-
|
|
145
|
-
for table in odcs_tables:
|
|
146
|
-
description = table.get("description") if table.get("description") is not None else ""
|
|
147
|
-
model = Model(description=" ".join(description.splitlines()), type="table")
|
|
148
|
-
model.fields = import_fields(table.get("columns"), custom_type_mappings)
|
|
149
|
-
result[table.get("table")] = model
|
|
150
|
-
|
|
151
|
-
return result
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
def import_fields(odcs_columns: Dict[str, Any], custom_type_mappings: Dict[str, str]) -> Dict[str, Field]:
|
|
155
|
-
logger = logging.getLogger(__name__)
|
|
156
|
-
result = {}
|
|
157
|
-
|
|
158
|
-
for column in odcs_columns:
|
|
159
|
-
mapped_type = map_type(column.get("logicalType"), custom_type_mappings)
|
|
160
|
-
if mapped_type is not None:
|
|
161
|
-
description = column.get("description") if column.get("description") is not None else ""
|
|
162
|
-
field = Field(
|
|
163
|
-
description=" ".join(description.splitlines()),
|
|
164
|
-
type=mapped_type,
|
|
165
|
-
title=column.get("businessName") if column.get("businessName") is not None else "",
|
|
166
|
-
required=not column.get("isNullable") if column.get("isNullable") is not None else False,
|
|
167
|
-
primary=column.get("isPrimary") if column.get("isPrimary") is not None else False,
|
|
168
|
-
unique=column.get("isUnique") if column.get("isUnique") is not None else False,
|
|
169
|
-
classification=column.get("classification") if column.get("classification") is not None else "",
|
|
170
|
-
tags=column.get("tags") if column.get("tags") is not None else [],
|
|
171
|
-
)
|
|
172
|
-
result[column["column"]] = field
|
|
173
|
-
else:
|
|
174
|
-
logger.info(
|
|
175
|
-
f"Can't properly map {column.get('column')} to the Datacontract Mapping types, as there is no equivalent or special mapping. Consider introducing a customProperty 'dc_mapping_{column.get('logicalName')}' that defines your expected type as the 'value'"
|
|
176
|
-
)
|
|
177
|
-
|
|
178
|
-
return result
|
|
43
|
+
if odcs_api_version.startswith("v2."):
|
|
44
|
+
from datacontract.imports.odcs_v2_importer import import_odcs_v2
|
|
179
45
|
|
|
46
|
+
return import_odcs_v2(data_contract_specification, source)
|
|
47
|
+
elif odcs_api_version.startswith("v3."):
|
|
48
|
+
from datacontract.imports.odcs_v3_importer import import_odcs_v3
|
|
180
49
|
|
|
181
|
-
|
|
182
|
-
t = odcs_type.lower()
|
|
183
|
-
if t in DATACONTRACT_TYPES:
|
|
184
|
-
return t
|
|
185
|
-
elif custom_mappings.get(t) is not None:
|
|
186
|
-
return custom_mappings.get(t)
|
|
50
|
+
return import_odcs_v3(data_contract_specification, source)
|
|
187
51
|
else:
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
for prop in odcs_custom_properties:
|
|
195
|
-
if prop["property"].startswith("dc_mapping_"):
|
|
196
|
-
odcs_type_name = prop["property"].substring(11)
|
|
197
|
-
datacontract_type = prop["value"]
|
|
198
|
-
result[odcs_type_name] = datacontract_type
|
|
199
|
-
|
|
200
|
-
return result
|
|
52
|
+
raise DataContractException(
|
|
53
|
+
type="schema",
|
|
54
|
+
name="Importing ODCS contract",
|
|
55
|
+
reason=f"Unsupported ODCS API version: {odcs_api_version}",
|
|
56
|
+
engine="datacontract",
|
|
57
|
+
)
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
import logging
|
|
3
|
+
from typing import Any, Dict, List
|
|
4
|
+
|
|
5
|
+
import yaml
|
|
6
|
+
|
|
7
|
+
from datacontract.imports.importer import Importer
|
|
8
|
+
from datacontract.model.data_contract_specification import (
|
|
9
|
+
Availability,
|
|
10
|
+
Contact,
|
|
11
|
+
DataContractSpecification,
|
|
12
|
+
Info,
|
|
13
|
+
Model,
|
|
14
|
+
Field,
|
|
15
|
+
Retention,
|
|
16
|
+
ServiceLevel,
|
|
17
|
+
Terms,
|
|
18
|
+
DATACONTRACT_TYPES,
|
|
19
|
+
)
|
|
20
|
+
from datacontract.model.exceptions import DataContractException
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class OdcsImporter(Importer):
|
|
24
|
+
def import_source(
|
|
25
|
+
self, data_contract_specification: DataContractSpecification, source: str, import_args: dict
|
|
26
|
+
) -> DataContractSpecification:
|
|
27
|
+
return import_odcs_v2(data_contract_specification, source)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def import_odcs_v2(data_contract_specification: DataContractSpecification, source: str) -> DataContractSpecification:
|
|
31
|
+
try:
|
|
32
|
+
with open(source, "r") as file:
|
|
33
|
+
odcs_contract = yaml.safe_load(file.read())
|
|
34
|
+
|
|
35
|
+
except Exception as e:
|
|
36
|
+
raise DataContractException(
|
|
37
|
+
type="schema",
|
|
38
|
+
name="Parse ODCS contract",
|
|
39
|
+
reason=f"Failed to parse odcs contract from {source}",
|
|
40
|
+
engine="datacontract",
|
|
41
|
+
original_exception=e,
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
data_contract_specification.id = odcs_contract["uuid"]
|
|
45
|
+
data_contract_specification.info = import_info(odcs_contract)
|
|
46
|
+
data_contract_specification.terms = import_terms(odcs_contract)
|
|
47
|
+
data_contract_specification.servicelevels = import_servicelevels(odcs_contract)
|
|
48
|
+
data_contract_specification.models = import_models(odcs_contract)
|
|
49
|
+
|
|
50
|
+
return data_contract_specification
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def import_info(odcs_contract: Dict[str, Any]) -> Info:
|
|
54
|
+
info = Info(title=odcs_contract.get("quantumName"), version=odcs_contract.get("version"))
|
|
55
|
+
|
|
56
|
+
if odcs_contract.get("description").get("purpose") is not None:
|
|
57
|
+
info.description = odcs_contract.get("description").get("purpose")
|
|
58
|
+
|
|
59
|
+
if odcs_contract.get("datasetDomain") is not None:
|
|
60
|
+
info.owner = odcs_contract.get("datasetDomain")
|
|
61
|
+
|
|
62
|
+
if odcs_contract.get("productDl") is not None or odcs_contract.get("productFeedbackUrl") is not None:
|
|
63
|
+
contact = Contact()
|
|
64
|
+
if odcs_contract.get("productDl") is not None:
|
|
65
|
+
contact.name = odcs_contract.get("productDl")
|
|
66
|
+
if odcs_contract.get("productFeedbackUrl") is not None:
|
|
67
|
+
contact.url = odcs_contract.get("productFeedbackUrl")
|
|
68
|
+
|
|
69
|
+
info.contact = contact
|
|
70
|
+
|
|
71
|
+
return info
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def import_terms(odcs_contract: Dict[str, Any]) -> Terms | None:
|
|
75
|
+
if (
|
|
76
|
+
odcs_contract.get("description").get("usage") is not None
|
|
77
|
+
or odcs_contract.get("description").get("limitations") is not None
|
|
78
|
+
or odcs_contract.get("price") is not None
|
|
79
|
+
):
|
|
80
|
+
terms = Terms()
|
|
81
|
+
if odcs_contract.get("description").get("usage") is not None:
|
|
82
|
+
terms.usage = odcs_contract.get("description").get("usage")
|
|
83
|
+
if odcs_contract.get("description").get("limitations") is not None:
|
|
84
|
+
terms.limitations = odcs_contract.get("description").get("limitations")
|
|
85
|
+
if odcs_contract.get("price") is not None:
|
|
86
|
+
terms.billing = f"{odcs_contract.get('price').get('priceAmount')} {odcs_contract.get('price').get('priceCurrency')} / {odcs_contract.get('price').get('priceUnit')}"
|
|
87
|
+
|
|
88
|
+
return terms
|
|
89
|
+
else:
|
|
90
|
+
return None
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def import_servicelevels(odcs_contract: Dict[str, Any]) -> ServiceLevel:
|
|
94
|
+
# find the two properties we can map (based on the examples)
|
|
95
|
+
sla_properties = odcs_contract.get("slaProperties") if odcs_contract.get("slaProperties") is not None else []
|
|
96
|
+
availability = next((p for p in sla_properties if p["property"] == "generalAvailability"), None)
|
|
97
|
+
retention = next((p for p in sla_properties if p["property"] == "retention"), None)
|
|
98
|
+
|
|
99
|
+
if availability is not None or retention is not None:
|
|
100
|
+
servicelevel = ServiceLevel()
|
|
101
|
+
|
|
102
|
+
if availability is not None:
|
|
103
|
+
value = availability.get("value")
|
|
104
|
+
if isinstance(value, datetime.datetime):
|
|
105
|
+
value = value.isoformat()
|
|
106
|
+
servicelevel.availability = Availability(description=value)
|
|
107
|
+
|
|
108
|
+
if retention is not None:
|
|
109
|
+
servicelevel.retention = Retention(period=f"{retention.get('value')}{retention.get('unit')}")
|
|
110
|
+
|
|
111
|
+
return servicelevel
|
|
112
|
+
else:
|
|
113
|
+
return None
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def import_models(odcs_contract: Dict[str, Any]) -> Dict[str, Model]:
|
|
117
|
+
custom_type_mappings = get_custom_type_mappings(odcs_contract.get("customProperties"))
|
|
118
|
+
|
|
119
|
+
odcs_tables = odcs_contract.get("dataset") if odcs_contract.get("dataset") is not None else []
|
|
120
|
+
result = {}
|
|
121
|
+
|
|
122
|
+
for table in odcs_tables:
|
|
123
|
+
description = table.get("description") if table.get("description") is not None else ""
|
|
124
|
+
model = Model(description=" ".join(description.splitlines()), type="table")
|
|
125
|
+
model.fields = import_fields(table.get("columns"), custom_type_mappings)
|
|
126
|
+
result[table.get("table")] = model
|
|
127
|
+
|
|
128
|
+
return result
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def import_fields(odcs_columns: Dict[str, Any], custom_type_mappings: Dict[str, str]) -> Dict[str, Field]:
|
|
132
|
+
logger = logging.getLogger(__name__)
|
|
133
|
+
result = {}
|
|
134
|
+
|
|
135
|
+
for column in odcs_columns:
|
|
136
|
+
mapped_type = map_type(column.get("logicalType"), custom_type_mappings)
|
|
137
|
+
if mapped_type is not None:
|
|
138
|
+
description = column.get("description") if column.get("description") is not None else ""
|
|
139
|
+
field = Field(
|
|
140
|
+
description=" ".join(description.splitlines()),
|
|
141
|
+
type=mapped_type,
|
|
142
|
+
title=column.get("businessName") if column.get("businessName") is not None else "",
|
|
143
|
+
required=not column.get("isNullable") if column.get("isNullable") is not None else False,
|
|
144
|
+
primary=column.get("isPrimary") if column.get("isPrimary") is not None else False,
|
|
145
|
+
unique=column.get("isUnique") if column.get("isUnique") is not None else False,
|
|
146
|
+
classification=column.get("classification") if column.get("classification") is not None else "",
|
|
147
|
+
tags=column.get("tags") if column.get("tags") is not None else [],
|
|
148
|
+
)
|
|
149
|
+
result[column["column"]] = field
|
|
150
|
+
else:
|
|
151
|
+
logger.info(
|
|
152
|
+
f"Can't properly map {column.get('column')} to the Datacontract Mapping types, as there is no equivalent or special mapping. Consider introducing a customProperty 'dc_mapping_{column.get('logicalName')}' that defines your expected type as the 'value'"
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
return result
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def map_type(odcs_type: str, custom_mappings: Dict[str, str]) -> str | None:
|
|
159
|
+
t = odcs_type.lower()
|
|
160
|
+
if t in DATACONTRACT_TYPES:
|
|
161
|
+
return t
|
|
162
|
+
elif custom_mappings.get(t) is not None:
|
|
163
|
+
return custom_mappings.get(t)
|
|
164
|
+
else:
|
|
165
|
+
return None
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def get_custom_type_mappings(odcs_custom_properties: List[Any]) -> Dict[str, str]:
|
|
169
|
+
result = {}
|
|
170
|
+
if odcs_custom_properties is not None:
|
|
171
|
+
for prop in odcs_custom_properties:
|
|
172
|
+
if prop["property"].startswith("dc_mapping_"):
|
|
173
|
+
odcs_type_name = prop["property"].substring(11)
|
|
174
|
+
datacontract_type = prop["value"]
|
|
175
|
+
result[odcs_type_name] = datacontract_type
|
|
176
|
+
|
|
177
|
+
return result
|