datacontract-cli 0.10.23__py3-none-any.whl → 0.10.37__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. datacontract/__init__.py +13 -0
  2. datacontract/api.py +12 -5
  3. datacontract/catalog/catalog.py +5 -3
  4. datacontract/cli.py +116 -10
  5. datacontract/data_contract.py +143 -65
  6. datacontract/engines/data_contract_checks.py +366 -60
  7. datacontract/engines/data_contract_test.py +50 -4
  8. datacontract/engines/fastjsonschema/check_jsonschema.py +37 -19
  9. datacontract/engines/fastjsonschema/s3/s3_read_files.py +3 -2
  10. datacontract/engines/soda/check_soda_execute.py +22 -3
  11. datacontract/engines/soda/connections/athena.py +79 -0
  12. datacontract/engines/soda/connections/duckdb_connection.py +65 -6
  13. datacontract/engines/soda/connections/kafka.py +4 -2
  14. datacontract/export/avro_converter.py +20 -3
  15. datacontract/export/bigquery_converter.py +1 -1
  16. datacontract/export/dbt_converter.py +36 -7
  17. datacontract/export/dqx_converter.py +126 -0
  18. datacontract/export/duckdb_type_converter.py +57 -0
  19. datacontract/export/excel_exporter.py +923 -0
  20. datacontract/export/exporter.py +3 -0
  21. datacontract/export/exporter_factory.py +17 -1
  22. datacontract/export/great_expectations_converter.py +55 -5
  23. datacontract/export/{html_export.py → html_exporter.py} +31 -20
  24. datacontract/export/markdown_converter.py +134 -5
  25. datacontract/export/mermaid_exporter.py +110 -0
  26. datacontract/export/odcs_v3_exporter.py +187 -145
  27. datacontract/export/protobuf_converter.py +163 -69
  28. datacontract/export/rdf_converter.py +2 -2
  29. datacontract/export/sodacl_converter.py +9 -1
  30. datacontract/export/spark_converter.py +31 -4
  31. datacontract/export/sql_converter.py +6 -2
  32. datacontract/export/sql_type_converter.py +20 -8
  33. datacontract/imports/avro_importer.py +63 -12
  34. datacontract/imports/csv_importer.py +111 -57
  35. datacontract/imports/excel_importer.py +1111 -0
  36. datacontract/imports/importer.py +16 -3
  37. datacontract/imports/importer_factory.py +17 -0
  38. datacontract/imports/json_importer.py +325 -0
  39. datacontract/imports/odcs_importer.py +2 -2
  40. datacontract/imports/odcs_v3_importer.py +351 -151
  41. datacontract/imports/protobuf_importer.py +264 -0
  42. datacontract/imports/spark_importer.py +117 -13
  43. datacontract/imports/sql_importer.py +32 -16
  44. datacontract/imports/unity_importer.py +84 -38
  45. datacontract/init/init_template.py +1 -1
  46. datacontract/integration/datamesh_manager.py +16 -2
  47. datacontract/lint/resolve.py +112 -23
  48. datacontract/lint/schema.py +24 -15
  49. datacontract/model/data_contract_specification/__init__.py +1 -0
  50. datacontract/model/odcs.py +13 -0
  51. datacontract/model/run.py +3 -0
  52. datacontract/output/junit_test_results.py +3 -3
  53. datacontract/schemas/datacontract-1.1.0.init.yaml +1 -1
  54. datacontract/schemas/datacontract-1.2.0.init.yaml +91 -0
  55. datacontract/schemas/datacontract-1.2.0.schema.json +2029 -0
  56. datacontract/schemas/datacontract-1.2.1.init.yaml +91 -0
  57. datacontract/schemas/datacontract-1.2.1.schema.json +2058 -0
  58. datacontract/schemas/odcs-3.0.2.schema.json +2382 -0
  59. datacontract/templates/datacontract.html +54 -3
  60. datacontract/templates/datacontract_odcs.html +685 -0
  61. datacontract/templates/index.html +5 -2
  62. datacontract/templates/partials/server.html +2 -0
  63. datacontract/templates/style/output.css +319 -145
  64. {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.37.dist-info}/METADATA +656 -431
  65. datacontract_cli-0.10.37.dist-info/RECORD +119 -0
  66. {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.37.dist-info}/WHEEL +1 -1
  67. {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.37.dist-info/licenses}/LICENSE +1 -1
  68. datacontract/export/csv_type_converter.py +0 -36
  69. datacontract/lint/lint.py +0 -142
  70. datacontract/lint/linters/description_linter.py +0 -35
  71. datacontract/lint/linters/field_pattern_linter.py +0 -34
  72. datacontract/lint/linters/field_reference_linter.py +0 -48
  73. datacontract/lint/linters/notice_period_linter.py +0 -55
  74. datacontract/lint/linters/quality_schema_linter.py +0 -52
  75. datacontract/lint/linters/valid_constraints_linter.py +0 -100
  76. datacontract/model/data_contract_specification.py +0 -327
  77. datacontract_cli-0.10.23.dist-info/RECORD +0 -113
  78. /datacontract/{lint/linters → output}/__init__.py +0 -0
  79. {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.37.dist-info}/entry_points.txt +0 -0
  80. {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.37.dist-info}/top_level.txt +0 -0
@@ -1,9 +1,11 @@
1
1
  import datetime
2
2
  import logging
3
+ import re
3
4
  from typing import Any, Dict, List
4
5
  from venv import logger
5
6
 
6
- import yaml
7
+ from datacontract_specification.model import Quality
8
+ from open_data_contract_standard.model import CustomProperty, OpenDataContractStandard, SchemaProperty
7
9
 
8
10
  from datacontract.imports.importer import Importer
9
11
  from datacontract.lint.resources import read_resource
@@ -14,9 +16,9 @@ from datacontract.model.data_contract_specification import (
14
16
  Field,
15
17
  Info,
16
18
  Model,
17
- Quality,
18
19
  Retention,
19
20
  Server,
21
+ ServerRole,
20
22
  ServiceLevel,
21
23
  Terms,
22
24
  )
@@ -27,19 +29,20 @@ class OdcsImporter(Importer):
27
29
  def import_source(
28
30
  self, data_contract_specification: DataContractSpecification, source: str, import_args: dict
29
31
  ) -> DataContractSpecification:
30
- return import_odcs_v3(data_contract_specification, source)
32
+ return import_odcs_v3_as_dcs(data_contract_specification, source)
31
33
 
32
34
 
33
- def import_odcs_v3(data_contract_specification: DataContractSpecification, source: str) -> DataContractSpecification:
35
+ def import_odcs_v3_as_dcs(
36
+ data_contract_specification: DataContractSpecification, source: str
37
+ ) -> DataContractSpecification:
34
38
  source_str = read_resource(source)
35
- return import_odcs_v3_from_str(data_contract_specification, source_str)
39
+ odcs = parse_odcs_v3_from_str(source_str)
40
+ return import_from_odcs(data_contract_specification, odcs)
36
41
 
37
42
 
38
- def import_odcs_v3_from_str(
39
- data_contract_specification: DataContractSpecification, source_str: str
40
- ) -> DataContractSpecification:
43
+ def parse_odcs_v3_from_str(source_str):
41
44
  try:
42
- odcs_contract = yaml.safe_load(source_str)
45
+ odcs = OpenDataContractStandard.from_string(source_str)
43
46
  except Exception as e:
44
47
  raise DataContractException(
45
48
  type="schema",
@@ -48,130 +51,144 @@ def import_odcs_v3_from_str(
48
51
  engine="datacontract",
49
52
  original_exception=e,
50
53
  )
54
+ return odcs
51
55
 
52
- data_contract_specification.id = odcs_contract["id"]
53
- data_contract_specification.info = import_info(odcs_contract)
54
- data_contract_specification.servers = import_servers(odcs_contract)
55
- data_contract_specification.terms = import_terms(odcs_contract)
56
- data_contract_specification.servicelevels = import_servicelevels(odcs_contract)
57
- data_contract_specification.models = import_models(odcs_contract)
58
- data_contract_specification.tags = import_tags(odcs_contract)
59
56
 
57
+ def import_from_odcs(data_contract_specification: DataContractSpecification, odcs: OpenDataContractStandard):
58
+ data_contract_specification.id = odcs.id
59
+ data_contract_specification.info = import_info(odcs)
60
+ data_contract_specification.servers = import_servers(odcs)
61
+ data_contract_specification.terms = import_terms(odcs)
62
+ data_contract_specification.servicelevels = import_servicelevels(odcs)
63
+ data_contract_specification.models = import_models(odcs)
64
+ data_contract_specification.tags = import_tags(odcs)
60
65
  return data_contract_specification
61
66
 
62
67
 
63
- def import_info(odcs_contract: Dict[str, Any]) -> Info:
68
+ def import_info(odcs: Any) -> Info:
64
69
  info = Info()
65
70
 
66
- info.title = odcs_contract.get("name") if odcs_contract.get("name") is not None else ""
71
+ info.title = odcs.name if odcs.name is not None else ""
67
72
 
68
- if odcs_contract.get("version") is not None:
69
- info.version = odcs_contract.get("version")
73
+ if odcs.version is not None:
74
+ info.version = odcs.version
70
75
 
71
76
  # odcs.description.purpose => datacontract.description
72
- if odcs_contract.get("description") is not None and odcs_contract.get("description").get("purpose") is not None:
73
- info.description = odcs_contract.get("description").get("purpose")
77
+ if odcs.description is not None and odcs.description.purpose is not None:
78
+ info.description = odcs.description.purpose
74
79
 
75
80
  # odcs.domain => datacontract.owner
76
- if odcs_contract.get("domain") is not None:
77
- info.owner = odcs_contract.get("domain")
81
+ owner = get_owner(odcs.customProperties)
82
+ if owner is not None:
83
+ info.owner = owner
78
84
 
79
85
  # add dataProduct as custom property
80
- if odcs_contract.get("dataProduct") is not None:
81
- info.dataProduct = odcs_contract.get("dataProduct")
86
+ if odcs.dataProduct is not None:
87
+ info.dataProduct = odcs.dataProduct
82
88
 
83
89
  # add tenant as custom property
84
- if odcs_contract.get("tenant") is not None:
85
- info.tenant = odcs_contract.get("tenant")
90
+ if odcs.tenant is not None:
91
+ info.tenant = odcs.tenant
86
92
 
87
93
  return info
88
94
 
89
95
 
90
- def import_servers(odcs_contract: Dict[str, Any]) -> Dict[str, Server] | None:
91
- if odcs_contract.get("servers") is None:
96
+ def import_server_roles(roles: List[Dict]) -> List[ServerRole] | None:
97
+ if roles is None:
98
+ return None
99
+ result = []
100
+ for role in roles:
101
+ server_role = ServerRole()
102
+ server_role.name = role.role
103
+ server_role.description = role.description
104
+ result.append(server_role)
105
+
106
+
107
+ def import_servers(odcs: OpenDataContractStandard) -> Dict[str, Server] | None:
108
+ if odcs.servers is None:
92
109
  return None
93
110
  servers = {}
94
- for odcs_server in odcs_contract.get("servers"):
95
- server_name = odcs_server.get("server")
111
+ for odcs_server in odcs.servers:
112
+ server_name = odcs_server.server
96
113
  if server_name is None:
97
114
  logger.warning("Server name is missing, skipping server")
98
115
  continue
99
116
 
100
117
  server = Server()
101
- server.type = odcs_server.get("type")
102
- server.description = odcs_server.get("description")
103
- server.environment = odcs_server.get("environment")
104
- server.format = odcs_server.get("format")
105
- server.project = odcs_server.get("project")
106
- server.dataset = odcs_server.get("dataset")
107
- server.path = odcs_server.get("path")
108
- server.delimiter = odcs_server.get("delimiter")
109
- server.endpointUrl = odcs_server.get("endpointUrl")
110
- server.location = odcs_server.get("location")
111
- server.account = odcs_server.get("account")
112
- server.database = odcs_server.get("database")
113
- server.schema_ = odcs_server.get("schema")
114
- server.host = odcs_server.get("host")
115
- server.port = odcs_server.get("port")
116
- server.catalog = odcs_server.get("catalog")
117
- server.topic = odcs_server.get("topic")
118
- server.http_path = odcs_server.get("http_path")
119
- server.token = odcs_server.get("token")
120
- server.dataProductId = odcs_server.get("dataProductId")
121
- server.outputPortId = odcs_server.get("outputPortId")
122
- server.driver = odcs_server.get("driver")
123
- server.roles = odcs_server.get("roles")
118
+ server.type = odcs_server.type
119
+ server.description = odcs_server.description
120
+ server.environment = odcs_server.environment
121
+ server.format = odcs_server.format
122
+ server.project = odcs_server.project
123
+ server.dataset = odcs_server.dataset
124
+ server.path = odcs_server.path
125
+ server.delimiter = odcs_server.delimiter
126
+ server.endpointUrl = odcs_server.endpointUrl
127
+ server.location = odcs_server.location
128
+ server.account = odcs_server.account
129
+ server.database = odcs_server.database
130
+ server.schema_ = odcs_server.schema_
131
+ server.host = odcs_server.host
132
+ server.port = odcs_server.port
133
+ server.catalog = odcs_server.catalog
134
+ server.stagingDir = odcs_server.stagingDir
135
+ server.topic = getattr(odcs_server, "topic", None)
136
+ server.http_path = getattr(odcs_server, "http_path", None)
137
+ server.token = getattr(odcs_server, "token", None)
138
+ server.driver = getattr(odcs_server, "driver", None)
139
+ server.roles = import_server_roles(odcs_server.roles)
140
+ server.storageAccount = (
141
+ to_azure_storage_account(odcs_server.location)
142
+ if server.type == "azure" and "://" in server.location
143
+ else None
144
+ )
124
145
 
125
146
  servers[server_name] = server
126
147
  return servers
127
148
 
128
149
 
129
- def import_terms(odcs_contract: Dict[str, Any]) -> Terms | None:
130
- if odcs_contract.get("description") is None:
150
+ def import_terms(odcs: Any) -> Terms | None:
151
+ if odcs.description is None:
131
152
  return None
132
- if (
133
- odcs_contract.get("description").get("usage") is not None
134
- or odcs_contract.get("description").get("limitations") is not None
135
- or odcs_contract.get("price") is not None
136
- ):
153
+ if odcs.description.usage is not None or odcs.description.limitations is not None or odcs.price is not None:
137
154
  terms = Terms()
138
- if odcs_contract.get("description").get("usage") is not None:
139
- terms.usage = odcs_contract.get("description").get("usage")
140
- if odcs_contract.get("description").get("limitations") is not None:
141
- terms.limitations = odcs_contract.get("description").get("limitations")
142
- if odcs_contract.get("price") is not None:
143
- terms.billing = f"{odcs_contract.get('price').get('priceAmount')} {odcs_contract.get('price').get('priceCurrency')} / {odcs_contract.get('price').get('priceUnit')}"
155
+ if odcs.description.usage is not None:
156
+ terms.usage = odcs.description.usage
157
+ if odcs.description.limitations is not None:
158
+ terms.limitations = odcs.description.limitations
159
+ if odcs.price is not None:
160
+ terms.billing = f"{odcs.price.priceAmount} {odcs.price.priceCurrency} / {odcs.price.priceUnit}"
144
161
 
145
162
  return terms
146
163
  else:
147
164
  return None
148
165
 
149
166
 
150
- def import_servicelevels(odcs_contract: Dict[str, Any]) -> ServiceLevel:
167
+ def import_servicelevels(odcs: Any) -> ServiceLevel:
151
168
  # find the two properties we can map (based on the examples)
152
- sla_properties = odcs_contract.get("slaProperties") if odcs_contract.get("slaProperties") is not None else []
153
- availability = next((p for p in sla_properties if p["property"] == "generalAvailability"), None)
154
- retention = next((p for p in sla_properties if p["property"] == "retention"), None)
169
+ sla_properties = odcs.slaProperties if odcs.slaProperties is not None else []
170
+ availability = next((p for p in sla_properties if p.property == "generalAvailability"), None)
171
+ retention = next((p for p in sla_properties if p.property == "retention"), None)
155
172
 
156
173
  if availability is not None or retention is not None:
157
174
  servicelevel = ServiceLevel()
158
175
 
159
176
  if availability is not None:
160
- value = availability.get("value")
177
+ value = availability.value
161
178
  if isinstance(value, datetime.datetime):
162
179
  value = value.isoformat()
163
180
  servicelevel.availability = Availability(description=value)
164
181
 
165
182
  if retention is not None:
166
- servicelevel.retention = Retention(period=f"{retention.get('value')}{retention.get('unit')}")
183
+ servicelevel.retention = Retention(period=f"{retention.value}{retention.unit}")
167
184
 
168
185
  return servicelevel
169
186
  else:
170
187
  return None
171
188
 
172
189
 
173
- def get_server_type(odcs_contract: Dict[str, Any]) -> str | None:
174
- servers = import_servers(odcs_contract)
190
+ def get_server_type(odcs: OpenDataContractStandard) -> str | None:
191
+ servers = import_servers(odcs)
175
192
  if servers is None or len(servers) == 0:
176
193
  return None
177
194
  # get first server from map
@@ -179,49 +196,110 @@ def get_server_type(odcs_contract: Dict[str, Any]) -> str | None:
179
196
  return server.type
180
197
 
181
198
 
182
- def import_models(odcs_contract: Dict[str, Any]) -> Dict[str, Model]:
183
- custom_type_mappings = get_custom_type_mappings(odcs_contract.get("customProperties"))
199
+ def import_models(odcs: Any) -> Dict[str, Model]:
200
+ custom_type_mappings = get_custom_type_mappings(odcs.customProperties)
184
201
 
185
- odcs_schemas = odcs_contract.get("schema") if odcs_contract.get("schema") is not None else []
202
+ odcs_schemas = odcs.schema_ if odcs.schema_ is not None else []
186
203
  result = {}
187
204
 
188
205
  for odcs_schema in odcs_schemas:
189
- schema_name = odcs_schema.get("name")
190
- schema_physical_name = odcs_schema.get("physicalName")
191
- schema_description = odcs_schema.get("description") if odcs_schema.get("description") is not None else ""
206
+ schema_name = odcs_schema.name
207
+ schema_physical_name = odcs_schema.physicalName
208
+ schema_description = odcs_schema.description if odcs_schema.description is not None else ""
192
209
  model_name = schema_physical_name if schema_physical_name is not None else schema_name
193
- model = Model(description=" ".join(schema_description.splitlines()), type="table")
194
- model.fields = import_fields(
195
- odcs_schema.get("properties"), custom_type_mappings, server_type=get_server_type(odcs_contract)
210
+ model = Model(
211
+ description=" ".join(schema_description.splitlines()) if schema_description else "",
212
+ type="table",
213
+ tags=odcs_schema.tags if odcs_schema.tags is not None else None,
196
214
  )
197
- if odcs_schema.get("quality") is not None:
198
- # convert dict to pydantic model
199
-
200
- model.quality = [Quality.model_validate(q) for q in odcs_schema.get("quality")]
215
+ model.fields = import_fields(odcs_schema.properties, custom_type_mappings, server_type=get_server_type(odcs))
216
+ if odcs_schema.quality is not None:
217
+ model.quality = convert_quality_list(odcs_schema.quality)
201
218
  model.title = schema_name
202
- if odcs_schema.get("dataGranularityDescription") is not None:
203
- model.config = {"dataGranularityDescription": odcs_schema.get("dataGranularityDescription")}
219
+ if odcs_schema.dataGranularityDescription is not None:
220
+ model.config = {"dataGranularityDescription": odcs_schema.dataGranularityDescription}
204
221
  result[model_name] = model
205
222
 
206
223
  return result
207
224
 
208
225
 
209
- def import_field_config(odcs_property: Dict[str, Any], server_type=None) -> Dict[str, Any]:
226
+ def convert_quality_list(odcs_quality_list):
227
+ """Convert a list of ODCS DataQuality objects to datacontract Quality objects"""
228
+ quality_list = []
229
+
230
+ if odcs_quality_list is not None:
231
+ for odcs_quality in odcs_quality_list:
232
+ quality = Quality(type=odcs_quality.type)
233
+
234
+ if odcs_quality.description is not None:
235
+ quality.description = odcs_quality.description
236
+ if odcs_quality.query is not None:
237
+ quality.query = odcs_quality.query
238
+ if odcs_quality.rule is not None:
239
+ quality.metric = odcs_quality.rule
240
+ if odcs_quality.mustBe is not None:
241
+ quality.mustBe = odcs_quality.mustBe
242
+ if odcs_quality.mustNotBe is not None:
243
+ quality.mustNotBe = odcs_quality.mustNotBe
244
+ if odcs_quality.mustBeGreaterThan is not None:
245
+ quality.mustBeGreaterThan = odcs_quality.mustBeGreaterThan
246
+ if odcs_quality.mustBeGreaterOrEqualTo is not None:
247
+ quality.mustBeGreaterOrEqualTo = odcs_quality.mustBeGreaterOrEqualTo
248
+ if odcs_quality.mustBeLessThan is not None:
249
+ quality.mustBeLessThan = odcs_quality.mustBeLessThan
250
+ if odcs_quality.mustBeLessOrEqualTo is not None:
251
+ quality.mustBeLessOrEqualTo = odcs_quality.mustBeLessOrEqualTo
252
+ if odcs_quality.mustBeBetween is not None:
253
+ quality.mustBeBetween = odcs_quality.mustBeBetween
254
+ if odcs_quality.mustNotBeBetween is not None:
255
+ quality.mustNotBeBetween = odcs_quality.mustNotBeBetween
256
+ if odcs_quality.engine is not None:
257
+ quality.engine = odcs_quality.engine
258
+ if odcs_quality.implementation is not None:
259
+ quality.implementation = odcs_quality.implementation
260
+ if odcs_quality.businessImpact is not None:
261
+ quality.model_extra["businessImpact"] = odcs_quality.businessImpact
262
+ if odcs_quality.dimension is not None:
263
+ quality.model_extra["dimension"] = odcs_quality.dimension
264
+ if odcs_quality.schedule is not None:
265
+ quality.model_extra["schedule"] = odcs_quality.schedule
266
+ if odcs_quality.scheduler is not None:
267
+ quality.model_extra["scheduler"] = odcs_quality.scheduler
268
+ if odcs_quality.severity is not None:
269
+ quality.model_extra["severity"] = odcs_quality.severity
270
+ if odcs_quality.method is not None:
271
+ quality.model_extra["method"] = odcs_quality.method
272
+ if odcs_quality.customProperties is not None:
273
+ quality.model_extra["customProperties"] = []
274
+ for item in odcs_quality.customProperties:
275
+ quality.model_extra["customProperties"].append(
276
+ {
277
+ "property": item.property,
278
+ "value": item.value,
279
+ }
280
+ )
281
+
282
+ quality_list.append(quality)
283
+
284
+ return quality_list
285
+
286
+
287
+ def import_field_config(odcs_property: SchemaProperty, server_type=None) -> dict[Any, Any] | None:
210
288
  config = {}
211
- if odcs_property.get("criticalDataElement") is not None:
212
- config["criticalDataElement"] = odcs_property.get("criticalDataElement")
213
- if odcs_property.get("encryptedName") is not None:
214
- config["encryptedName"] = odcs_property.get("encryptedName")
215
- if odcs_property.get("partitionKeyPosition") is not None:
216
- config["partitionKeyPosition"] = odcs_property.get("partitionKeyPosition")
217
- if odcs_property.get("partitioned") is not None:
218
- config["partitioned"] = odcs_property.get("partitioned")
219
-
220
- if odcs_property.get("customProperties") is not None and isinstance(odcs_property.get("customProperties"), list):
221
- for item in odcs_property.get("customProperties"):
222
- config[item["property"]] = item["value"]
223
-
224
- physical_type = odcs_property.get("physicalType")
289
+ if odcs_property.criticalDataElement is not None:
290
+ config["criticalDataElement"] = odcs_property.criticalDataElement
291
+ if odcs_property.encryptedName is not None:
292
+ config["encryptedName"] = odcs_property.encryptedName
293
+ if odcs_property.partitionKeyPosition is not None:
294
+ config["partitionKeyPosition"] = odcs_property.partitionKeyPosition
295
+ if odcs_property.partitioned is not None:
296
+ config["partitioned"] = odcs_property.partitioned
297
+
298
+ if odcs_property.customProperties is not None:
299
+ for item in odcs_property.customProperties:
300
+ config[item.property] = item.value
301
+
302
+ physical_type = odcs_property.physicalType
225
303
  if physical_type is not None:
226
304
  if server_type == "postgres" or server_type == "postgresql":
227
305
  config["postgresType"] = physical_type
@@ -238,79 +316,201 @@ def import_field_config(odcs_property: Dict[str, Any], server_type=None) -> Dict
238
316
  else:
239
317
  config["physicalType"] = physical_type
240
318
 
319
+ if len(config) == 0:
320
+ return None
321
+
241
322
  return config
242
323
 
243
324
 
244
- def has_composite_primary_key(odcs_properties) -> bool:
245
- primary_keys = [prop for prop in odcs_properties if prop.get("primaryKey") is not None and prop.get("primaryKey")]
325
+ def has_composite_primary_key(odcs_properties: List[SchemaProperty]) -> bool:
326
+ primary_keys = [prop for prop in odcs_properties if prop.primaryKey is not None and prop.primaryKey]
246
327
  return len(primary_keys) > 1
247
328
 
248
329
 
249
330
  def import_fields(
250
- odcs_properties: Dict[str, Any], custom_type_mappings: Dict[str, str], server_type
331
+ odcs_properties: List[SchemaProperty], custom_type_mappings: Dict[str, str], server_type
251
332
  ) -> Dict[str, Field]:
252
- logger = logging.getLogger(__name__)
253
333
  result = {}
254
334
 
255
335
  if odcs_properties is None:
256
336
  return result
257
337
 
258
338
  for odcs_property in odcs_properties:
259
- mapped_type = map_type(odcs_property.get("logicalType"), custom_type_mappings)
260
- if mapped_type is not None:
261
- property_name = odcs_property["name"]
262
- description = odcs_property.get("description") if odcs_property.get("description") is not None else None
263
- field = Field(
264
- description=" ".join(description.splitlines()) if description is not None else None,
265
- type=mapped_type,
266
- title=odcs_property.get("businessName"),
267
- required=not odcs_property.get("nullable") if odcs_property.get("nullable") is not None else False,
268
- primaryKey=odcs_property.get("primaryKey")
269
- if not has_composite_primary_key(odcs_properties) and odcs_property.get("primaryKey") is not None
270
- else False,
271
- unique=odcs_property.get("unique"),
272
- examples=odcs_property.get("examples") if odcs_property.get("examples") is not None else None,
273
- classification=odcs_property.get("classification")
274
- if odcs_property.get("classification") is not None
275
- else "",
276
- tags=odcs_property.get("tags") if odcs_property.get("tags") is not None else None,
277
- quality=odcs_property.get("quality") if odcs_property.get("quality") is not None else [],
278
- config=import_field_config(odcs_property, server_type),
279
- )
280
- result[property_name] = field
281
- else:
282
- logger.info(
283
- f"Can't map {odcs_property.get('column')} to the Datacontract Mapping types, as there is no equivalent or special mapping. Consider introducing a customProperty 'dc_mapping_{odcs_property.get('logicalName')}' that defines your expected type as the 'value'"
284
- )
339
+ field = import_field(odcs_property, odcs_properties, custom_type_mappings, server_type)
340
+ if field is not None:
341
+ result[odcs_property.name] = field
285
342
 
286
343
  return result
287
344
 
288
345
 
289
- def map_type(odcs_type: str, custom_mappings: Dict[str, str]) -> str | None:
290
- if odcs_type is None:
346
+ def import_field(
347
+ odcs_property: SchemaProperty,
348
+ odcs_properties: List[SchemaProperty],
349
+ custom_type_mappings: Dict[str, str],
350
+ server_type: str,
351
+ ) -> Field | None:
352
+ """
353
+ Import a single ODCS property as a datacontract Field.
354
+ Returns None if the property cannot be mapped.
355
+ """
356
+ logger = logging.getLogger(__name__)
357
+
358
+ mapped_type = map_type(odcs_property.logicalType, custom_type_mappings, odcs_property.physicalType)
359
+
360
+ if mapped_type is None:
361
+ type_info = f"logicalType={odcs_property.logicalType}, physicalType={odcs_property.physicalType}"
362
+ logger.warning(
363
+ f"Can't map field '{odcs_property.name}' ({type_info}) to the datacontract mapping types. "
364
+ f"Both logicalType and physicalType are missing or unmappable. "
365
+ f"Consider introducing a customProperty 'dc_mapping_<type>' that defines your expected type as the 'value'"
366
+ )
291
367
  return None
292
- t = odcs_type.lower()
293
- if t in DATACONTRACT_TYPES:
294
- return t
295
- elif custom_mappings.get(t) is not None:
296
- return custom_mappings.get(t)
297
- else:
368
+
369
+ description = odcs_property.description if odcs_property.description is not None else None
370
+ field = Field(
371
+ description=" ".join(description.splitlines()) if description is not None else None,
372
+ type=mapped_type,
373
+ title=odcs_property.businessName,
374
+ required=odcs_property.required if odcs_property.required is not None else None,
375
+ primaryKey=to_primary_key(odcs_property, odcs_properties),
376
+ unique=odcs_property.unique if odcs_property.unique else None,
377
+ examples=odcs_property.examples if odcs_property.examples is not None else None,
378
+ classification=odcs_property.classification if odcs_property.classification is not None else None,
379
+ tags=odcs_property.tags if odcs_property.tags is not None else None,
380
+ quality=convert_quality_list(odcs_property.quality),
381
+ fields=import_fields(odcs_property.properties, custom_type_mappings, server_type)
382
+ if odcs_property.properties is not None
383
+ else {},
384
+ config=import_field_config(odcs_property, server_type),
385
+ format=getattr(odcs_property, "format", None),
386
+ )
387
+
388
+ # mapped_type is array
389
+ if field.type == "array" and odcs_property.items is not None:
390
+ field.items = import_field(odcs_property.items, [], custom_type_mappings, server_type)
391
+
392
+ # enum from quality validValues as enum
393
+ if field.type == "string":
394
+ for q in field.quality:
395
+ if hasattr(q, "validValues"):
396
+ field.enum = q.validValues
397
+
398
+ return field
399
+
400
+
401
+ def to_primary_key(odcs_property: SchemaProperty, odcs_properties: list[SchemaProperty]) -> bool | None:
402
+ if odcs_property.primaryKey is None:
298
403
  return None
404
+ if has_composite_primary_key(odcs_properties):
405
+ return None
406
+ return odcs_property.primaryKey
407
+
408
+
409
+ def map_type(odcs_logical_type: str, custom_mappings: Dict[str, str], physical_type: str = None) -> str | None:
410
+ # Try to map logicalType first
411
+ if odcs_logical_type is not None:
412
+ t = odcs_logical_type.lower()
413
+ if t in DATACONTRACT_TYPES:
414
+ return t
415
+ elif custom_mappings.get(t) is not None:
416
+ return custom_mappings.get(t)
417
+
418
+ # Fallback to physicalType if logicalType is not mapped
419
+ if physical_type is not None:
420
+ pt = physical_type.lower()
421
+ # Remove parameters from physical type (e.g., VARCHAR(50) -> varchar, DECIMAL(10,2) -> decimal)
422
+ pt_base = pt.split("(")[0].strip()
423
+
424
+ # Try direct mapping of physical type
425
+ if pt in DATACONTRACT_TYPES:
426
+ return pt
427
+ elif pt_base in DATACONTRACT_TYPES:
428
+ return pt_base
429
+ elif custom_mappings.get(pt) is not None:
430
+ return custom_mappings.get(pt)
431
+ elif custom_mappings.get(pt_base) is not None:
432
+ return custom_mappings.get(pt_base)
433
+ # Common physical type mappings
434
+ elif pt_base in ["varchar", "char", "nvarchar", "nchar", "text", "ntext", "string", "character varying"]:
435
+ return "string"
436
+ elif pt_base in ["int", "integer", "smallint", "tinyint", "mediumint", "int2", "int4", "int8"]:
437
+ return "int"
438
+ elif pt_base in ["bigint", "long", "int64"]:
439
+ return "long"
440
+ elif pt_base in ["float", "real", "float4", "float8"]:
441
+ return "float"
442
+ elif pt_base in ["double", "double precision"]:
443
+ return "double"
444
+ elif pt_base in ["decimal", "numeric", "number"]:
445
+ return "decimal"
446
+ elif pt_base in ["boolean", "bool", "bit"]:
447
+ return "boolean"
448
+ elif pt_base in ["timestamp", "datetime", "datetime2", "timestamptz", "timestamp with time zone"]:
449
+ return "timestamp"
450
+ elif pt_base in ["date"]:
451
+ return "date"
452
+ elif pt_base in ["time"]:
453
+ return "time"
454
+ elif pt_base in ["json", "jsonb"]:
455
+ return "json"
456
+ elif pt_base in ["array"]:
457
+ return "array"
458
+ elif pt_base in ["object", "struct", "record"]:
459
+ return "object"
460
+ elif pt_base in ["bytes", "binary", "varbinary", "blob", "bytea"]:
461
+ return "bytes"
462
+ else:
463
+ return None
464
+ return None
299
465
 
300
466
 
301
- def get_custom_type_mappings(odcs_custom_properties: List[Any]) -> Dict[str, str]:
467
+ def get_custom_type_mappings(odcs_custom_properties: List[CustomProperty]) -> Dict[str, str]:
302
468
  result = {}
303
469
  if odcs_custom_properties is not None:
304
470
  for prop in odcs_custom_properties:
305
- if prop["property"].startswith("dc_mapping_"):
306
- odcs_type_name = prop["property"].substring(11)
307
- datacontract_type = prop["value"]
471
+ if prop.property.startswith("dc_mapping_"):
472
+ odcs_type_name = prop.property[11:] # Changed substring to slice
473
+ datacontract_type = prop.value
308
474
  result[odcs_type_name] = datacontract_type
309
475
 
310
476
  return result
311
477
 
312
478
 
313
- def import_tags(odcs_contract) -> List[str] | None:
314
- if odcs_contract.get("tags") is None:
479
+ def get_owner(odcs_custom_properties: List[CustomProperty]) -> str | None:
480
+ if odcs_custom_properties is not None:
481
+ for prop in odcs_custom_properties:
482
+ if prop.property == "owner":
483
+ return prop.value
484
+
485
+ return None
486
+
487
+
488
+ def import_tags(odcs: OpenDataContractStandard) -> List[str] | None:
489
+ if odcs.tags is None:
315
490
  return None
316
- return odcs_contract.get("tags")
491
+ return odcs.tags
492
+
493
+
494
+ def to_azure_storage_account(location: str) -> str | None:
495
+ """
496
+ Converts a storage location string to extract the storage account name.
497
+ ODCS v3.0 has no explicit field for the storage account. It uses the location field, which is a URI.
498
+
499
+ This function parses a storage location string to identify and return the
500
+ storage account name. It handles two primary patterns:
501
+ 1. Protocol://containerName@storageAccountName
502
+ 2. Protocol://storageAccountName
503
+
504
+ :param location: The storage location string to parse, typically following
505
+ the format protocol://containerName@storageAccountName. or
506
+ protocol://storageAccountName.
507
+ :return: The extracted storage account name if found, otherwise None
508
+ """
509
+ # to catch protocol://containerName@storageAccountName. pattern from location
510
+ match = re.search(r"(?<=@)([^.]*)", location, re.IGNORECASE)
511
+ if match:
512
+ return match.group()
513
+ else:
514
+ # to catch protocol://storageAccountName. pattern from location
515
+ match = re.search(r"(?<=//)(?!@)([^.]*)", location, re.IGNORECASE)
516
+ return match.group() if match else None