datacontract-cli 0.10.13__py3-none-any.whl → 0.10.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datacontract-cli might be problematic. Click here for more details.

Files changed (32) hide show
  1. datacontract/cli.py +5 -0
  2. datacontract/data_contract.py +9 -1
  3. datacontract/engines/soda/connections/kafka.py +26 -5
  4. datacontract/export/avro_converter.py +8 -1
  5. datacontract/export/avro_idl_converter.py +1 -0
  6. datacontract/export/dcs_exporter.py +6 -0
  7. datacontract/export/exporter.py +4 -1
  8. datacontract/export/exporter_factory.py +13 -1
  9. datacontract/export/{odcs_converter.py → odcs_v2_exporter.py} +4 -4
  10. datacontract/export/odcs_v3_exporter.py +294 -0
  11. datacontract/export/sodacl_converter.py +82 -2
  12. datacontract/export/spark_converter.py +3 -1
  13. datacontract/export/sql_type_converter.py +55 -11
  14. datacontract/imports/iceberg_importer.py +162 -0
  15. datacontract/imports/importer.py +1 -0
  16. datacontract/imports/importer_factory.py +5 -0
  17. datacontract/imports/odcs_importer.py +25 -168
  18. datacontract/imports/odcs_v2_importer.py +177 -0
  19. datacontract/imports/odcs_v3_importer.py +309 -0
  20. datacontract/integration/datamesh_manager.py +1 -1
  21. datacontract/lint/resolve.py +14 -9
  22. datacontract/lint/resources.py +21 -0
  23. datacontract/lint/urls.py +4 -2
  24. datacontract/model/data_contract_specification.py +72 -8
  25. datacontract/model/odcs.py +11 -0
  26. {datacontract_cli-0.10.13.dist-info → datacontract_cli-0.10.14.dist-info}/METADATA +89 -51
  27. {datacontract_cli-0.10.13.dist-info → datacontract_cli-0.10.14.dist-info}/RECORD +31 -25
  28. {datacontract_cli-0.10.13.dist-info → datacontract_cli-0.10.14.dist-info}/WHEEL +1 -1
  29. datacontract/engines/datacontract/check_that_datacontract_str_is_valid.py +0 -48
  30. {datacontract_cli-0.10.13.dist-info → datacontract_cli-0.10.14.dist-info}/LICENSE +0 -0
  31. {datacontract_cli-0.10.13.dist-info → datacontract_cli-0.10.14.dist-info}/entry_points.txt +0 -0
  32. {datacontract_cli-0.10.13.dist-info → datacontract_cli-0.10.14.dist-info}/top_level.txt +0 -0
@@ -7,6 +7,8 @@ def convert_to_sql_type(field: Field, server_type: str) -> str:
7
7
  return convert_to_snowflake(field)
8
8
  elif server_type == "postgres":
9
9
  return convert_type_to_postgres(field)
10
+ elif server_type == "dataframe":
11
+ return convert_to_dataframe(field)
10
12
  elif server_type == "databricks":
11
13
  return convert_to_databricks(field)
12
14
  elif server_type == "local" or server_type == "s3":
@@ -108,6 +110,46 @@ def convert_type_to_postgres(field: Field) -> None | str:
108
110
  return None
109
111
 
110
112
 
113
+ # dataframe data types:
114
+ # https://spark.apache.org/docs/latest/sql-ref-datatypes.html
115
+ def convert_to_dataframe(field: Field) -> None | str:
116
+ if field.config and "dataframeType" in field.config:
117
+ return field.config["dataframeType"]
118
+ type = field.type
119
+ if type is None:
120
+ return None
121
+ if type.lower() in ["string", "varchar", "text"]:
122
+ return "STRING"
123
+ if type.lower() in ["timestamp", "timestamp_tz"]:
124
+ return "TIMESTAMP"
125
+ if type.lower() in ["timestamp_ntz"]:
126
+ return "TIMESTAMP_NTZ"
127
+ if type.lower() in ["date"]:
128
+ return "DATE"
129
+ if type.lower() in ["time"]:
130
+ return "STRING"
131
+ if type.lower() in ["number", "decimal", "numeric"]:
132
+ # precision and scale not supported by data contract
133
+ return "DECIMAL"
134
+ if type.lower() in ["float"]:
135
+ return "FLOAT"
136
+ if type.lower() in ["double"]:
137
+ return "DOUBLE"
138
+ if type.lower() in ["integer", "int"]:
139
+ return "INT"
140
+ if type.lower() in ["long", "bigint"]:
141
+ return "BIGINT"
142
+ if type.lower() in ["boolean"]:
143
+ return "BOOLEAN"
144
+ if type.lower() in ["object", "record", "struct"]:
145
+ return "STRUCT"
146
+ if type.lower() in ["bytes"]:
147
+ return "BINARY"
148
+ if type.lower() in ["array"]:
149
+ return "ARRAY"
150
+ return None
151
+
152
+
111
153
  # databricks data types:
112
154
  # https://docs.databricks.com/en/sql/language-manual/sql-ref-datatypes.html
113
155
  def convert_to_databricks(field: Field) -> None | str:
@@ -186,7 +228,7 @@ def convert_to_duckdb(field: Field) -> None | str:
186
228
  "time": "TIME",
187
229
  "timestamp": "TIMESTAMP WITH TIME ZONE",
188
230
  "timestamp_tz": "TIMESTAMP WITH TIME ZONE",
189
- "timestamp_ntz": "DATETIME",
231
+ "timestamp_ntz": "TIMESTAMP",
190
232
  }
191
233
 
192
234
  # Convert simple mappings
@@ -281,25 +323,27 @@ def get_type_config(field: Field, config_attr: str) -> dict[str, str] | None:
281
323
 
282
324
  def convert_type_to_trino(field: Field) -> None | str:
283
325
  """Convert from supported datacontract types to equivalent trino types"""
284
- field_type = field.type
326
+ if field.config and "trinoType" in field.config:
327
+ return field.config["trinoType"]
285
328
 
286
- if field_type.lower() in ["string", "text", "varchar"]:
329
+ field_type = field.type.lower()
330
+ if field_type in ["string", "text", "varchar"]:
287
331
  return "varchar"
288
332
  # tinyint, smallint not supported by data contract
289
- if field_type.lower() in ["number", "decimal", "numeric"]:
333
+ if field_type in ["number", "decimal", "numeric"]:
290
334
  # precision and scale not supported by data contract
291
335
  return "decimal"
292
- if field_type.lower() in ["int", "integer"]:
336
+ if field_type in ["int", "integer"]:
293
337
  return "integer"
294
- if field_type.lower() in ["long", "bigint"]:
338
+ if field_type in ["long", "bigint"]:
295
339
  return "bigint"
296
- if field_type.lower() in ["float"]:
340
+ if field_type in ["float"]:
297
341
  return "real"
298
- if field_type.lower() in ["timestamp", "timestamp_tz"]:
342
+ if field_type in ["timestamp", "timestamp_tz"]:
299
343
  return "timestamp(3) with time zone"
300
- if field_type.lower() in ["timestamp_ntz"]:
344
+ if field_type in ["timestamp_ntz"]:
301
345
  return "timestamp(3)"
302
- if field_type.lower() in ["bytes"]:
346
+ if field_type in ["bytes"]:
303
347
  return "varbinary"
304
- if field_type.lower() in ["object", "record", "struct"]:
348
+ if field_type in ["object", "record", "struct"]:
305
349
  return "json"
@@ -0,0 +1,162 @@
1
+ from typing import Dict, Any
2
+
3
+ from datacontract.imports.importer import Importer
4
+ from datacontract.model.data_contract_specification import DataContractSpecification, Model, Field
5
+
6
+ from pyiceberg.schema import Schema
7
+ from pyiceberg import types as iceberg_types
8
+ from pydantic import ValidationError
9
+
10
+ from datacontract.model.exceptions import DataContractException
11
+
12
+
13
+ class IcebergImporter(Importer):
14
+ def import_source(
15
+ self, data_contract_specification: DataContractSpecification, source: str, import_args: dict
16
+ ) -> DataContractSpecification:
17
+ schema = load_and_validate_iceberg_schema(source)
18
+ return import_iceberg(
19
+ data_contract_specification,
20
+ schema,
21
+ import_args.get("iceberg_table"),
22
+ )
23
+
24
+
25
+ def load_and_validate_iceberg_schema(source: str) -> Schema:
26
+ with open(source, "r") as file:
27
+ try:
28
+ return Schema.model_validate_json(file.read())
29
+ except ValidationError as e:
30
+ raise DataContractException(
31
+ type="schema",
32
+ name="Parse iceberg schema",
33
+ reason=f"Failed to validate iceberg schema from {source}: {e}",
34
+ engine="datacontract",
35
+ )
36
+
37
+
38
+ def import_iceberg(
39
+ data_contract_specification: DataContractSpecification, schema: Schema, table_name: str
40
+ ) -> DataContractSpecification:
41
+ if data_contract_specification.models is None:
42
+ data_contract_specification.models = {}
43
+
44
+ model = Model(type="table", title=table_name)
45
+
46
+ for field in schema.fields:
47
+ model.fields[field.name] = _field_from_nested_field(field)
48
+
49
+ data_contract_specification.models[table_name] = model
50
+ return data_contract_specification
51
+
52
+
53
+ def _field_from_nested_field(nested_field: iceberg_types.NestedField) -> Field:
54
+ """
55
+ Converts an Iceberg NestedField into a Field object for the data contract.
56
+
57
+ Args:
58
+ nested_field: The Iceberg NestedField to convert.
59
+
60
+ Returns:
61
+ Field: The generated Field object.
62
+ """
63
+ field = Field(
64
+ title=nested_field.name,
65
+ required=nested_field.required,
66
+ config=build_field_config(nested_field),
67
+ )
68
+
69
+ if nested_field.doc is not None:
70
+ field.description = nested_field.doc
71
+
72
+ return _type_from_iceberg_type(field, nested_field.field_type)
73
+
74
+
75
+ def _type_from_iceberg_type(field: Field, iceberg_type: iceberg_types.IcebergType) -> Field:
76
+ """
77
+ Maps Iceberg data types to the Data Contract type system and updates the field.
78
+
79
+ Args:
80
+ field: The Field object to update.
81
+ iceberg_type: The Iceberg data type to map.
82
+
83
+ Returns:
84
+ Field: The updated Field object.
85
+ """
86
+ field.type = _data_type_from_iceberg(iceberg_type)
87
+
88
+ if field.type == "array":
89
+ field.items = _type_from_iceberg_type(Field(required=iceberg_type.element_required), iceberg_type.element_type)
90
+
91
+ elif field.type == "map":
92
+ field.keys = _type_from_iceberg_type(Field(required=True), iceberg_type.key_type)
93
+ field.values = _type_from_iceberg_type(Field(required=iceberg_type.value_required), iceberg_type.value_type)
94
+
95
+ elif field.type == "object":
96
+ field.fields = {nf.name: _field_from_nested_field(nf) for nf in iceberg_type.fields}
97
+
98
+ return field
99
+
100
+
101
+ def build_field_config(iceberg_field: iceberg_types.NestedField) -> Dict[str, Any]:
102
+ config = {}
103
+
104
+ if iceberg_field.field_id > 0:
105
+ config["icebergFieldId"] = iceberg_field.field_id
106
+
107
+ if iceberg_field.initial_default is not None:
108
+ config["icebergInitialDefault"] = iceberg_field.initial_default
109
+
110
+ if iceberg_field.write_default is not None:
111
+ config["icebergWriteDefault"] = iceberg_field.write_default
112
+
113
+ return config
114
+
115
+
116
+ def _data_type_from_iceberg(type: iceberg_types.IcebergType) -> str:
117
+ """
118
+ Convert an Iceberg field type to a datacontract field type
119
+
120
+ Args:
121
+ type: The Iceberg field type
122
+
123
+ Returns:
124
+ str: The datacontract field type
125
+ """
126
+ if isinstance(type, iceberg_types.BooleanType):
127
+ return "boolean"
128
+ if isinstance(type, iceberg_types.IntegerType):
129
+ return "integer"
130
+ if isinstance(type, iceberg_types.LongType):
131
+ return "long"
132
+ if isinstance(type, iceberg_types.FloatType):
133
+ return "float"
134
+ if isinstance(type, iceberg_types.DoubleType):
135
+ return "double"
136
+ if isinstance(type, iceberg_types.DecimalType):
137
+ return "decimal"
138
+ if isinstance(type, iceberg_types.DateType):
139
+ return "date"
140
+ if isinstance(type, iceberg_types.TimeType):
141
+ # there isn't a great mapping for the iceberg type "time", just map to string for now
142
+ return "string"
143
+ if isinstance(type, iceberg_types.TimestampType):
144
+ return "timestamp_ntz"
145
+ if isinstance(type, iceberg_types.TimestamptzType):
146
+ return "timestamp_tz"
147
+ if isinstance(type, iceberg_types.StringType):
148
+ return "string"
149
+ if isinstance(type, iceberg_types.UUIDType):
150
+ return "string"
151
+ if isinstance(type, iceberg_types.BinaryType):
152
+ return "bytes"
153
+ if isinstance(type, iceberg_types.FixedType):
154
+ return "bytes"
155
+ if isinstance(type, iceberg_types.MapType):
156
+ return "map"
157
+ if isinstance(type, iceberg_types.ListType):
158
+ return "array"
159
+ if isinstance(type, iceberg_types.StructType):
160
+ return "object"
161
+
162
+ raise ValueError(f"Unknown Iceberg type: {type}")
@@ -29,6 +29,7 @@ class ImportFormat(str, Enum):
29
29
  odcs = "odcs"
30
30
  unity = "unity"
31
31
  spark = "spark"
32
+ iceberg = "iceberg"
32
33
 
33
34
  @classmethod
34
35
  def get_supported_formats(cls):
@@ -93,3 +93,8 @@ importer_factory.register_lazy_importer(
93
93
  module_path="datacontract.imports.dbml_importer",
94
94
  class_name="DBMLImporter",
95
95
  )
96
+ importer_factory.register_lazy_importer(
97
+ name=ImportFormat.iceberg,
98
+ module_path="datacontract.imports.iceberg_importer",
99
+ class_name="IcebergImporter",
100
+ )
@@ -1,47 +1,12 @@
1
- import datetime
2
- import logging
3
- from typing import Any, Dict, List
4
1
  import yaml
2
+
5
3
  from datacontract.imports.importer import Importer
4
+ from datacontract.lint.resources import read_resource
6
5
  from datacontract.model.data_contract_specification import (
7
- Availability,
8
- Contact,
9
6
  DataContractSpecification,
10
- Info,
11
- Model,
12
- Field,
13
- Retention,
14
- ServiceLevel,
15
- Terms,
16
7
  )
17
8
  from datacontract.model.exceptions import DataContractException
18
9
 
19
- DATACONTRACT_TYPES = [
20
- "string",
21
- "text",
22
- "varchar",
23
- "number",
24
- "decimal",
25
- "numeric",
26
- "int",
27
- "integer",
28
- "long",
29
- "bigint",
30
- "float",
31
- "double",
32
- "boolean",
33
- "timestamp",
34
- "timestamp_tz",
35
- "timestamp_ntz",
36
- "date",
37
- "array",
38
- "bytes",
39
- "object",
40
- "record",
41
- "struct",
42
- "null",
43
- ]
44
-
45
10
 
46
11
  class OdcsImporter(Importer):
47
12
  def import_source(
@@ -52,8 +17,7 @@ class OdcsImporter(Importer):
52
17
 
53
18
  def import_odcs(data_contract_specification: DataContractSpecification, source: str) -> DataContractSpecification:
54
19
  try:
55
- with open(source, "r") as file:
56
- odcs_contract = yaml.safe_load(file.read())
20
+ odcs_contract = yaml.safe_load(read_resource(source))
57
21
 
58
22
  except Exception as e:
59
23
  raise DataContractException(
@@ -64,137 +28,30 @@ def import_odcs(data_contract_specification: DataContractSpecification, source:
64
28
  original_exception=e,
65
29
  )
66
30
 
67
- data_contract_specification.id = odcs_contract["uuid"]
68
- data_contract_specification.info = import_info(odcs_contract)
69
- data_contract_specification.terms = import_terms(odcs_contract)
70
- data_contract_specification.servicelevels = import_servicelevels(odcs_contract)
71
- data_contract_specification.models = import_models(odcs_contract)
72
-
73
- return data_contract_specification
74
-
75
-
76
- def import_info(odcs_contract: Dict[str, Any]) -> Info:
77
- info = Info(title=odcs_contract.get("quantumName"), version=odcs_contract.get("version"))
78
-
79
- if odcs_contract.get("description").get("purpose") is not None:
80
- info.description = odcs_contract.get("description").get("purpose")
81
-
82
- if odcs_contract.get("datasetDomain") is not None:
83
- info.owner = odcs_contract.get("datasetDomain")
84
-
85
- if odcs_contract.get("productDl") is not None or odcs_contract.get("productFeedbackUrl") is not None:
86
- contact = Contact()
87
- if odcs_contract.get("productDl") is not None:
88
- contact.name = odcs_contract.get("productDl")
89
- if odcs_contract.get("productFeedbackUrl") is not None:
90
- contact.url = odcs_contract.get("productFeedbackUrl")
91
-
92
- info.contact = contact
93
-
94
- return info
31
+ odcs_kind = odcs_contract.get("kind")
32
+ odcs_api_version = odcs_contract.get("apiVersion")
95
33
 
34
+ # if odcs_kind is not DataContract throw exception
35
+ if odcs_kind != "DataContract":
36
+ raise DataContractException(
37
+ type="schema",
38
+ name="Importing ODCS contract",
39
+ reason=f"Unsupported ODCS kind: {odcs_kind}. Is this a valid ODCS data contract?",
40
+ engine="datacontract",
41
+ )
96
42
 
97
- def import_terms(odcs_contract: Dict[str, Any]) -> Terms | None:
98
- if (
99
- odcs_contract.get("description").get("usage") is not None
100
- or odcs_contract.get("description").get("limitations") is not None
101
- or odcs_contract.get("price") is not None
102
- ):
103
- terms = Terms()
104
- if odcs_contract.get("description").get("usage") is not None:
105
- terms.usage = odcs_contract.get("description").get("usage")
106
- if odcs_contract.get("description").get("limitations") is not None:
107
- terms.limitations = odcs_contract.get("description").get("limitations")
108
- if odcs_contract.get("price") is not None:
109
- terms.billing = f"{odcs_contract.get('price').get('priceAmount')} {odcs_contract.get('price').get('priceCurrency')} / {odcs_contract.get('price').get('priceUnit')}"
110
-
111
- return terms
112
- else:
113
- return None
114
-
115
-
116
- def import_servicelevels(odcs_contract: Dict[str, Any]) -> ServiceLevel:
117
- # find the two properties we can map (based on the examples)
118
- sla_properties = odcs_contract.get("slaProperties") if odcs_contract.get("slaProperties") is not None else []
119
- availability = next((p for p in sla_properties if p["property"] == "generalAvailability"), None)
120
- retention = next((p for p in sla_properties if p["property"] == "retention"), None)
121
-
122
- if availability is not None or retention is not None:
123
- servicelevel = ServiceLevel()
124
-
125
- if availability is not None:
126
- value = availability.get("value")
127
- if isinstance(value, datetime.datetime):
128
- value = value.isoformat()
129
- servicelevel.availability = Availability(description=value)
130
-
131
- if retention is not None:
132
- servicelevel.retention = Retention(period=f"{retention.get('value')}{retention.get('unit')}")
133
-
134
- return servicelevel
135
- else:
136
- return None
137
-
138
-
139
- def import_models(odcs_contract: Dict[str, Any]) -> Dict[str, Model]:
140
- custom_type_mappings = get_custom_type_mappings(odcs_contract.get("customProperties"))
141
-
142
- odcs_tables = odcs_contract.get("dataset") if odcs_contract.get("dataset") is not None else []
143
- result = {}
144
-
145
- for table in odcs_tables:
146
- description = table.get("description") if table.get("description") is not None else ""
147
- model = Model(description=" ".join(description.splitlines()), type="table")
148
- model.fields = import_fields(table.get("columns"), custom_type_mappings)
149
- result[table.get("table")] = model
150
-
151
- return result
152
-
153
-
154
- def import_fields(odcs_columns: Dict[str, Any], custom_type_mappings: Dict[str, str]) -> Dict[str, Field]:
155
- logger = logging.getLogger(__name__)
156
- result = {}
157
-
158
- for column in odcs_columns:
159
- mapped_type = map_type(column.get("logicalType"), custom_type_mappings)
160
- if mapped_type is not None:
161
- description = column.get("description") if column.get("description") is not None else ""
162
- field = Field(
163
- description=" ".join(description.splitlines()),
164
- type=mapped_type,
165
- title=column.get("businessName") if column.get("businessName") is not None else "",
166
- required=not column.get("isNullable") if column.get("isNullable") is not None else False,
167
- primary=column.get("isPrimary") if column.get("isPrimary") is not None else False,
168
- unique=column.get("isUnique") if column.get("isUnique") is not None else False,
169
- classification=column.get("classification") if column.get("classification") is not None else "",
170
- tags=column.get("tags") if column.get("tags") is not None else [],
171
- )
172
- result[column["column"]] = field
173
- else:
174
- logger.info(
175
- f"Can't properly map {column.get('column')} to the Datacontract Mapping types, as there is no equivalent or special mapping. Consider introducing a customProperty 'dc_mapping_{column.get('logicalName')}' that defines your expected type as the 'value'"
176
- )
177
-
178
- return result
43
+ if odcs_api_version.startswith("v2."):
44
+ from datacontract.imports.odcs_v2_importer import import_odcs_v2
179
45
 
46
+ return import_odcs_v2(data_contract_specification, source)
47
+ elif odcs_api_version.startswith("v3."):
48
+ from datacontract.imports.odcs_v3_importer import import_odcs_v3
180
49
 
181
- def map_type(odcs_type: str, custom_mappings: Dict[str, str]) -> str | None:
182
- t = odcs_type.lower()
183
- if t in DATACONTRACT_TYPES:
184
- return t
185
- elif custom_mappings.get(t) is not None:
186
- return custom_mappings.get(t)
50
+ return import_odcs_v3(data_contract_specification, source)
187
51
  else:
188
- return None
189
-
190
-
191
- def get_custom_type_mappings(odcs_custom_properties: List[Any]) -> Dict[str, str]:
192
- result = {}
193
- if odcs_custom_properties is not None:
194
- for prop in odcs_custom_properties:
195
- if prop["property"].startswith("dc_mapping_"):
196
- odcs_type_name = prop["property"].substring(11)
197
- datacontract_type = prop["value"]
198
- result[odcs_type_name] = datacontract_type
199
-
200
- return result
52
+ raise DataContractException(
53
+ type="schema",
54
+ name="Importing ODCS contract",
55
+ reason=f"Unsupported ODCS API version: {odcs_api_version}",
56
+ engine="datacontract",
57
+ )
@@ -0,0 +1,177 @@
1
+ import datetime
2
+ import logging
3
+ from typing import Any, Dict, List
4
+
5
+ import yaml
6
+
7
+ from datacontract.imports.importer import Importer
8
+ from datacontract.model.data_contract_specification import (
9
+ Availability,
10
+ Contact,
11
+ DataContractSpecification,
12
+ Info,
13
+ Model,
14
+ Field,
15
+ Retention,
16
+ ServiceLevel,
17
+ Terms,
18
+ DATACONTRACT_TYPES,
19
+ )
20
+ from datacontract.model.exceptions import DataContractException
21
+
22
+
23
+ class OdcsImporter(Importer):
24
+ def import_source(
25
+ self, data_contract_specification: DataContractSpecification, source: str, import_args: dict
26
+ ) -> DataContractSpecification:
27
+ return import_odcs_v2(data_contract_specification, source)
28
+
29
+
30
+ def import_odcs_v2(data_contract_specification: DataContractSpecification, source: str) -> DataContractSpecification:
31
+ try:
32
+ with open(source, "r") as file:
33
+ odcs_contract = yaml.safe_load(file.read())
34
+
35
+ except Exception as e:
36
+ raise DataContractException(
37
+ type="schema",
38
+ name="Parse ODCS contract",
39
+ reason=f"Failed to parse odcs contract from {source}",
40
+ engine="datacontract",
41
+ original_exception=e,
42
+ )
43
+
44
+ data_contract_specification.id = odcs_contract["uuid"]
45
+ data_contract_specification.info = import_info(odcs_contract)
46
+ data_contract_specification.terms = import_terms(odcs_contract)
47
+ data_contract_specification.servicelevels = import_servicelevels(odcs_contract)
48
+ data_contract_specification.models = import_models(odcs_contract)
49
+
50
+ return data_contract_specification
51
+
52
+
53
+ def import_info(odcs_contract: Dict[str, Any]) -> Info:
54
+ info = Info(title=odcs_contract.get("quantumName"), version=odcs_contract.get("version"))
55
+
56
+ if odcs_contract.get("description").get("purpose") is not None:
57
+ info.description = odcs_contract.get("description").get("purpose")
58
+
59
+ if odcs_contract.get("datasetDomain") is not None:
60
+ info.owner = odcs_contract.get("datasetDomain")
61
+
62
+ if odcs_contract.get("productDl") is not None or odcs_contract.get("productFeedbackUrl") is not None:
63
+ contact = Contact()
64
+ if odcs_contract.get("productDl") is not None:
65
+ contact.name = odcs_contract.get("productDl")
66
+ if odcs_contract.get("productFeedbackUrl") is not None:
67
+ contact.url = odcs_contract.get("productFeedbackUrl")
68
+
69
+ info.contact = contact
70
+
71
+ return info
72
+
73
+
74
+ def import_terms(odcs_contract: Dict[str, Any]) -> Terms | None:
75
+ if (
76
+ odcs_contract.get("description").get("usage") is not None
77
+ or odcs_contract.get("description").get("limitations") is not None
78
+ or odcs_contract.get("price") is not None
79
+ ):
80
+ terms = Terms()
81
+ if odcs_contract.get("description").get("usage") is not None:
82
+ terms.usage = odcs_contract.get("description").get("usage")
83
+ if odcs_contract.get("description").get("limitations") is not None:
84
+ terms.limitations = odcs_contract.get("description").get("limitations")
85
+ if odcs_contract.get("price") is not None:
86
+ terms.billing = f"{odcs_contract.get('price').get('priceAmount')} {odcs_contract.get('price').get('priceCurrency')} / {odcs_contract.get('price').get('priceUnit')}"
87
+
88
+ return terms
89
+ else:
90
+ return None
91
+
92
+
93
+ def import_servicelevels(odcs_contract: Dict[str, Any]) -> ServiceLevel:
94
+ # find the two properties we can map (based on the examples)
95
+ sla_properties = odcs_contract.get("slaProperties") if odcs_contract.get("slaProperties") is not None else []
96
+ availability = next((p for p in sla_properties if p["property"] == "generalAvailability"), None)
97
+ retention = next((p for p in sla_properties if p["property"] == "retention"), None)
98
+
99
+ if availability is not None or retention is not None:
100
+ servicelevel = ServiceLevel()
101
+
102
+ if availability is not None:
103
+ value = availability.get("value")
104
+ if isinstance(value, datetime.datetime):
105
+ value = value.isoformat()
106
+ servicelevel.availability = Availability(description=value)
107
+
108
+ if retention is not None:
109
+ servicelevel.retention = Retention(period=f"{retention.get('value')}{retention.get('unit')}")
110
+
111
+ return servicelevel
112
+ else:
113
+ return None
114
+
115
+
116
+ def import_models(odcs_contract: Dict[str, Any]) -> Dict[str, Model]:
117
+ custom_type_mappings = get_custom_type_mappings(odcs_contract.get("customProperties"))
118
+
119
+ odcs_tables = odcs_contract.get("dataset") if odcs_contract.get("dataset") is not None else []
120
+ result = {}
121
+
122
+ for table in odcs_tables:
123
+ description = table.get("description") if table.get("description") is not None else ""
124
+ model = Model(description=" ".join(description.splitlines()), type="table")
125
+ model.fields = import_fields(table.get("columns"), custom_type_mappings)
126
+ result[table.get("table")] = model
127
+
128
+ return result
129
+
130
+
131
+ def import_fields(odcs_columns: Dict[str, Any], custom_type_mappings: Dict[str, str]) -> Dict[str, Field]:
132
+ logger = logging.getLogger(__name__)
133
+ result = {}
134
+
135
+ for column in odcs_columns:
136
+ mapped_type = map_type(column.get("logicalType"), custom_type_mappings)
137
+ if mapped_type is not None:
138
+ description = column.get("description") if column.get("description") is not None else ""
139
+ field = Field(
140
+ description=" ".join(description.splitlines()),
141
+ type=mapped_type,
142
+ title=column.get("businessName") if column.get("businessName") is not None else "",
143
+ required=not column.get("isNullable") if column.get("isNullable") is not None else False,
144
+ primary=column.get("isPrimary") if column.get("isPrimary") is not None else False,
145
+ unique=column.get("isUnique") if column.get("isUnique") is not None else False,
146
+ classification=column.get("classification") if column.get("classification") is not None else "",
147
+ tags=column.get("tags") if column.get("tags") is not None else [],
148
+ )
149
+ result[column["column"]] = field
150
+ else:
151
+ logger.info(
152
+ f"Can't properly map {column.get('column')} to the Datacontract Mapping types, as there is no equivalent or special mapping. Consider introducing a customProperty 'dc_mapping_{column.get('logicalName')}' that defines your expected type as the 'value'"
153
+ )
154
+
155
+ return result
156
+
157
+
158
+ def map_type(odcs_type: str, custom_mappings: Dict[str, str]) -> str | None:
159
+ t = odcs_type.lower()
160
+ if t in DATACONTRACT_TYPES:
161
+ return t
162
+ elif custom_mappings.get(t) is not None:
163
+ return custom_mappings.get(t)
164
+ else:
165
+ return None
166
+
167
+
168
+ def get_custom_type_mappings(odcs_custom_properties: List[Any]) -> Dict[str, str]:
169
+ result = {}
170
+ if odcs_custom_properties is not None:
171
+ for prop in odcs_custom_properties:
172
+ if prop["property"].startswith("dc_mapping_"):
173
+ odcs_type_name = prop["property"].substring(11)
174
+ datacontract_type = prop["value"]
175
+ result[odcs_type_name] = datacontract_type
176
+
177
+ return result