datacontract-cli 0.10.13__py3-none-any.whl → 0.10.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datacontract-cli might be problematic. Click here for more details.

Files changed (77) hide show
  1. datacontract/breaking/breaking.py +227 -9
  2. datacontract/breaking/breaking_rules.py +24 -0
  3. datacontract/catalog/catalog.py +1 -1
  4. datacontract/cli.py +104 -32
  5. datacontract/data_contract.py +35 -5
  6. datacontract/engines/datacontract/check_that_datacontract_file_exists.py +1 -1
  7. datacontract/engines/fastjsonschema/check_jsonschema.py +114 -22
  8. datacontract/engines/soda/check_soda_execute.py +5 -3
  9. datacontract/engines/soda/connections/duckdb.py +1 -0
  10. datacontract/engines/soda/connections/kafka.py +38 -17
  11. datacontract/export/avro_converter.py +8 -1
  12. datacontract/export/avro_idl_converter.py +2 -2
  13. datacontract/export/bigquery_converter.py +4 -3
  14. datacontract/export/data_caterer_converter.py +1 -1
  15. datacontract/export/dbml_converter.py +2 -4
  16. datacontract/export/dbt_converter.py +2 -3
  17. datacontract/export/dcs_exporter.py +6 -0
  18. datacontract/export/exporter.py +5 -2
  19. datacontract/export/exporter_factory.py +16 -3
  20. datacontract/export/go_converter.py +3 -2
  21. datacontract/export/great_expectations_converter.py +202 -40
  22. datacontract/export/html_export.py +1 -1
  23. datacontract/export/jsonschema_converter.py +3 -2
  24. datacontract/export/{odcs_converter.py → odcs_v2_exporter.py} +5 -5
  25. datacontract/export/odcs_v3_exporter.py +294 -0
  26. datacontract/export/pandas_type_converter.py +40 -0
  27. datacontract/export/protobuf_converter.py +1 -1
  28. datacontract/export/rdf_converter.py +4 -5
  29. datacontract/export/sodacl_converter.py +86 -2
  30. datacontract/export/spark_converter.py +10 -7
  31. datacontract/export/sql_converter.py +1 -2
  32. datacontract/export/sql_type_converter.py +55 -11
  33. datacontract/export/sqlalchemy_converter.py +1 -2
  34. datacontract/export/terraform_converter.py +1 -1
  35. datacontract/imports/avro_importer.py +1 -1
  36. datacontract/imports/bigquery_importer.py +1 -1
  37. datacontract/imports/dbml_importer.py +2 -2
  38. datacontract/imports/dbt_importer.py +3 -2
  39. datacontract/imports/glue_importer.py +5 -3
  40. datacontract/imports/iceberg_importer.py +161 -0
  41. datacontract/imports/importer.py +2 -0
  42. datacontract/imports/importer_factory.py +12 -1
  43. datacontract/imports/jsonschema_importer.py +3 -2
  44. datacontract/imports/odcs_importer.py +25 -168
  45. datacontract/imports/odcs_v2_importer.py +177 -0
  46. datacontract/imports/odcs_v3_importer.py +309 -0
  47. datacontract/imports/parquet_importer.py +81 -0
  48. datacontract/imports/spark_importer.py +2 -1
  49. datacontract/imports/sql_importer.py +1 -1
  50. datacontract/imports/unity_importer.py +3 -3
  51. datacontract/integration/datamesh_manager.py +1 -1
  52. datacontract/integration/opentelemetry.py +0 -1
  53. datacontract/lint/lint.py +2 -1
  54. datacontract/lint/linters/description_linter.py +1 -0
  55. datacontract/lint/linters/example_model_linter.py +1 -0
  56. datacontract/lint/linters/field_pattern_linter.py +1 -0
  57. datacontract/lint/linters/field_reference_linter.py +1 -0
  58. datacontract/lint/linters/notice_period_linter.py +1 -0
  59. datacontract/lint/linters/quality_schema_linter.py +1 -0
  60. datacontract/lint/linters/valid_constraints_linter.py +1 -0
  61. datacontract/lint/resolve.py +14 -9
  62. datacontract/lint/resources.py +21 -0
  63. datacontract/lint/schema.py +1 -1
  64. datacontract/lint/urls.py +4 -2
  65. datacontract/model/data_contract_specification.py +83 -13
  66. datacontract/model/odcs.py +11 -0
  67. datacontract/model/run.py +21 -12
  68. datacontract/templates/index.html +6 -6
  69. datacontract/web.py +2 -3
  70. {datacontract_cli-0.10.13.dist-info → datacontract_cli-0.10.15.dist-info}/METADATA +176 -93
  71. datacontract_cli-0.10.15.dist-info/RECORD +105 -0
  72. {datacontract_cli-0.10.13.dist-info → datacontract_cli-0.10.15.dist-info}/WHEEL +1 -1
  73. datacontract/engines/datacontract/check_that_datacontract_str_is_valid.py +0 -48
  74. datacontract_cli-0.10.13.dist-info/RECORD +0 -97
  75. {datacontract_cli-0.10.13.dist-info → datacontract_cli-0.10.15.dist-info}/LICENSE +0 -0
  76. {datacontract_cli-0.10.13.dist-info → datacontract_cli-0.10.15.dist-info}/entry_points.txt +0 -0
  77. {datacontract_cli-0.10.13.dist-info → datacontract_cli-0.10.15.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,309 @@
1
+ import datetime
2
+ import logging
3
+ from typing import Any, Dict, List
4
+ from venv import logger
5
+
6
+ import yaml
7
+
8
+ from datacontract.imports.importer import Importer
9
+ from datacontract.lint.resources import read_resource
10
+ from datacontract.model.data_contract_specification import (
11
+ DATACONTRACT_TYPES,
12
+ Availability,
13
+ DataContractSpecification,
14
+ Field,
15
+ Info,
16
+ Model,
17
+ Retention,
18
+ Server,
19
+ ServiceLevel,
20
+ Terms,
21
+ )
22
+ from datacontract.model.exceptions import DataContractException
23
+
24
+
25
+ class OdcsImporter(Importer):
26
+ def import_source(
27
+ self, data_contract_specification: DataContractSpecification, source: str, import_args: dict
28
+ ) -> DataContractSpecification:
29
+ return import_odcs_v3(data_contract_specification, source)
30
+
31
+
32
+ def import_odcs_v3(data_contract_specification: DataContractSpecification, source: str) -> DataContractSpecification:
33
+ source_str = read_resource(source)
34
+ return import_odcs_v3_from_str(data_contract_specification, source_str)
35
+
36
+
37
+ def import_odcs_v3_from_str(
38
+ data_contract_specification: DataContractSpecification, source_str: str
39
+ ) -> DataContractSpecification:
40
+ try:
41
+ odcs_contract = yaml.safe_load(source_str)
42
+ except Exception as e:
43
+ raise DataContractException(
44
+ type="schema",
45
+ name="Parse ODCS contract",
46
+ reason=f"Failed to parse odcs contract from {source_str}",
47
+ engine="datacontract",
48
+ original_exception=e,
49
+ )
50
+
51
+ data_contract_specification.id = odcs_contract["id"]
52
+ data_contract_specification.info = import_info(odcs_contract)
53
+ data_contract_specification.servers = import_servers(odcs_contract)
54
+ data_contract_specification.terms = import_terms(odcs_contract)
55
+ data_contract_specification.servicelevels = import_servicelevels(odcs_contract)
56
+ data_contract_specification.models = import_models(odcs_contract)
57
+ data_contract_specification.tags = import_tags(odcs_contract)
58
+
59
+ return data_contract_specification
60
+
61
+
62
+ def import_info(odcs_contract: Dict[str, Any]) -> Info:
63
+ info = Info()
64
+
65
+ info.title = odcs_contract.get("name") if odcs_contract.get("name") is not None else ""
66
+
67
+ if odcs_contract.get("version") is not None:
68
+ info.version = odcs_contract.get("version")
69
+
70
+ # odcs.description.purpose => datacontract.description
71
+ if odcs_contract.get("description") is not None and odcs_contract.get("description").get("purpose") is not None:
72
+ info.description = odcs_contract.get("description").get("purpose")
73
+
74
+ # odcs.domain => datacontract.owner
75
+ if odcs_contract.get("domain") is not None:
76
+ info.owner = odcs_contract.get("domain")
77
+
78
+ # add dataProduct as custom property
79
+ if odcs_contract.get("dataProduct") is not None:
80
+ info.dataProduct = odcs_contract.get("dataProduct")
81
+
82
+ # add tenant as custom property
83
+ if odcs_contract.get("tenant") is not None:
84
+ info.tenant = odcs_contract.get("tenant")
85
+
86
+ return info
87
+
88
+
89
+ def import_servers(odcs_contract: Dict[str, Any]) -> Dict[str, Server] | None:
90
+ if odcs_contract.get("servers") is None:
91
+ return None
92
+ servers = {}
93
+ for odcs_server in odcs_contract.get("servers"):
94
+ server_name = odcs_server.get("server")
95
+ if server_name is None:
96
+ logger.warning("Server name is missing, skipping server")
97
+ continue
98
+
99
+ server = Server()
100
+ server.type = odcs_server.get("type")
101
+ server.description = odcs_server.get("description")
102
+ server.environment = odcs_server.get("environment")
103
+ server.format = odcs_server.get("format")
104
+ server.project = odcs_server.get("project")
105
+ server.dataset = odcs_server.get("dataset")
106
+ server.path = odcs_server.get("path")
107
+ server.delimiter = odcs_server.get("delimiter")
108
+ server.endpointUrl = odcs_server.get("endpointUrl")
109
+ server.location = odcs_server.get("location")
110
+ server.account = odcs_server.get("account")
111
+ server.database = odcs_server.get("database")
112
+ server.schema_ = odcs_server.get("schema")
113
+ server.host = odcs_server.get("host")
114
+ server.port = odcs_server.get("port")
115
+ server.catalog = odcs_server.get("catalog")
116
+ server.topic = odcs_server.get("topic")
117
+ server.http_path = odcs_server.get("http_path")
118
+ server.token = odcs_server.get("token")
119
+ server.dataProductId = odcs_server.get("dataProductId")
120
+ server.outputPortId = odcs_server.get("outputPortId")
121
+ server.driver = odcs_server.get("driver")
122
+ server.roles = odcs_server.get("roles")
123
+
124
+ servers[server_name] = server
125
+ return servers
126
+
127
+
128
+ def import_terms(odcs_contract: Dict[str, Any]) -> Terms | None:
129
+ if odcs_contract.get("description") is None:
130
+ return None
131
+ if (
132
+ odcs_contract.get("description").get("usage") is not None
133
+ or odcs_contract.get("description").get("limitations") is not None
134
+ or odcs_contract.get("price") is not None
135
+ ):
136
+ terms = Terms()
137
+ if odcs_contract.get("description").get("usage") is not None:
138
+ terms.usage = odcs_contract.get("description").get("usage")
139
+ if odcs_contract.get("description").get("limitations") is not None:
140
+ terms.limitations = odcs_contract.get("description").get("limitations")
141
+ if odcs_contract.get("price") is not None:
142
+ terms.billing = f"{odcs_contract.get('price').get('priceAmount')} {odcs_contract.get('price').get('priceCurrency')} / {odcs_contract.get('price').get('priceUnit')}"
143
+
144
+ return terms
145
+ else:
146
+ return None
147
+
148
+
149
+ def import_servicelevels(odcs_contract: Dict[str, Any]) -> ServiceLevel:
150
+ # find the two properties we can map (based on the examples)
151
+ sla_properties = odcs_contract.get("slaProperties") if odcs_contract.get("slaProperties") is not None else []
152
+ availability = next((p for p in sla_properties if p["property"] == "generalAvailability"), None)
153
+ retention = next((p for p in sla_properties if p["property"] == "retention"), None)
154
+
155
+ if availability is not None or retention is not None:
156
+ servicelevel = ServiceLevel()
157
+
158
+ if availability is not None:
159
+ value = availability.get("value")
160
+ if isinstance(value, datetime.datetime):
161
+ value = value.isoformat()
162
+ servicelevel.availability = Availability(description=value)
163
+
164
+ if retention is not None:
165
+ servicelevel.retention = Retention(period=f"{retention.get('value')}{retention.get('unit')}")
166
+
167
+ return servicelevel
168
+ else:
169
+ return None
170
+
171
+
172
+ def get_server_type(odcs_contract: Dict[str, Any]) -> str | None:
173
+ servers = import_servers(odcs_contract)
174
+ if servers is None or len(servers) == 0:
175
+ return None
176
+ # get first server from map
177
+ server = next(iter(servers.values()))
178
+ return server.type
179
+
180
+
181
+ def import_models(odcs_contract: Dict[str, Any]) -> Dict[str, Model]:
182
+ custom_type_mappings = get_custom_type_mappings(odcs_contract.get("customProperties"))
183
+
184
+ odcs_schemas = odcs_contract.get("schema") if odcs_contract.get("schema") is not None else []
185
+ result = {}
186
+
187
+ for odcs_schema in odcs_schemas:
188
+ schema_name = odcs_schema.get("name")
189
+ schema_physical_name = odcs_schema.get("physicalName")
190
+ schema_description = odcs_schema.get("description") if odcs_schema.get("description") is not None else ""
191
+ model_name = schema_physical_name if schema_physical_name is not None else schema_name
192
+ model = Model(description=" ".join(schema_description.splitlines()), type="table")
193
+ model.fields = import_fields(
194
+ odcs_schema.get("properties"), custom_type_mappings, server_type=get_server_type(odcs_contract)
195
+ )
196
+ model.title = schema_name
197
+ if odcs_schema.get("dataGranularityDescription") is not None:
198
+ model.config = {"dataGranularityDescription": odcs_schema.get("dataGranularityDescription")}
199
+ result[model_name] = model
200
+
201
+ return result
202
+
203
+
204
+ def import_field_config(odcs_property: Dict[str, Any], server_type=None) -> Dict[str, Any]:
205
+ config = {}
206
+ if odcs_property.get("criticalDataElement") is not None:
207
+ config["criticalDataElement"] = odcs_property.get("criticalDataElement")
208
+ if odcs_property.get("encryptedName") is not None:
209
+ config["encryptedName"] = odcs_property.get("encryptedName")
210
+ if odcs_property.get("partitionKeyPosition") is not None:
211
+ config["partitionKeyPosition"] = odcs_property.get("partitionKeyPosition")
212
+ if odcs_property.get("partitioned") is not None:
213
+ config["partitioned"] = odcs_property.get("partitioned")
214
+
215
+ if odcs_property.get("customProperties") is not None and isinstance(odcs_property.get("customProperties"), list):
216
+ for item in odcs_property.get("customProperties"):
217
+ config[item["property"]] = item["value"]
218
+
219
+ physical_type = odcs_property.get("physicalType")
220
+ if physical_type is not None:
221
+ if server_type == "postgres" or server_type == "postgresql":
222
+ config["postgresType"] = physical_type
223
+ elif server_type == "bigquery":
224
+ config["bigqueryType"] = physical_type
225
+ elif server_type == "snowflake":
226
+ config["snowflakeType"] = physical_type
227
+ elif server_type == "redshift":
228
+ config["redshiftType"] = physical_type
229
+ elif server_type == "sqlserver":
230
+ config["sqlserverType"] = physical_type
231
+ elif server_type == "databricksType":
232
+ config["databricksType"] = physical_type
233
+ else:
234
+ config["physicalType"] = physical_type
235
+
236
+ return config
237
+
238
+
239
+ def has_composite_primary_key(odcs_properties) -> bool:
240
+ primary_keys = [prop for prop in odcs_properties if prop.get("primaryKey") is not None and prop.get("primaryKey")]
241
+ return len(primary_keys) > 1
242
+
243
+
244
+ def import_fields(
245
+ odcs_properties: Dict[str, Any], custom_type_mappings: Dict[str, str], server_type
246
+ ) -> Dict[str, Field]:
247
+ logger = logging.getLogger(__name__)
248
+ result = {}
249
+
250
+ if odcs_properties is None:
251
+ return result
252
+
253
+ for odcs_property in odcs_properties:
254
+ mapped_type = map_type(odcs_property.get("logicalType"), custom_type_mappings)
255
+ if mapped_type is not None:
256
+ property_name = odcs_property["name"]
257
+ description = odcs_property.get("description") if odcs_property.get("description") is not None else None
258
+ field = Field(
259
+ description=" ".join(description.splitlines()) if description is not None else None,
260
+ type=mapped_type,
261
+ title=odcs_property.get("businessName"),
262
+ required=not odcs_property.get("nullable") if odcs_property.get("nullable") is not None else False,
263
+ primary=odcs_property.get("primaryKey")
264
+ if not has_composite_primary_key(odcs_properties) and odcs_property.get("primaryKey") is not None
265
+ else False,
266
+ unique=odcs_property.get("unique"),
267
+ examples=odcs_property.get("examples") if odcs_property.get("examples") is not None else None,
268
+ classification=odcs_property.get("classification")
269
+ if odcs_property.get("classification") is not None
270
+ else "",
271
+ tags=odcs_property.get("tags") if odcs_property.get("tags") is not None else None,
272
+ quality=odcs_property.get("quality") if odcs_property.get("quality") is not None else [],
273
+ config=import_field_config(odcs_property, server_type),
274
+ )
275
+ result[property_name] = field
276
+ else:
277
+ logger.info(
278
+ f"Can't map {odcs_property.get('column')} to the Datacontract Mapping types, as there is no equivalent or special mapping. Consider introducing a customProperty 'dc_mapping_{odcs_property.get('logicalName')}' that defines your expected type as the 'value'"
279
+ )
280
+
281
+ return result
282
+
283
+
284
+ def map_type(odcs_type: str, custom_mappings: Dict[str, str]) -> str | None:
285
+ t = odcs_type.lower()
286
+ if t in DATACONTRACT_TYPES:
287
+ return t
288
+ elif custom_mappings.get(t) is not None:
289
+ return custom_mappings.get(t)
290
+ else:
291
+ return None
292
+
293
+
294
+ def get_custom_type_mappings(odcs_custom_properties: List[Any]) -> Dict[str, str]:
295
+ result = {}
296
+ if odcs_custom_properties is not None:
297
+ for prop in odcs_custom_properties:
298
+ if prop["property"].startswith("dc_mapping_"):
299
+ odcs_type_name = prop["property"].substring(11)
300
+ datacontract_type = prop["value"]
301
+ result[odcs_type_name] = datacontract_type
302
+
303
+ return result
304
+
305
+
306
+ def import_tags(odcs_contract) -> List[str] | None:
307
+ if odcs_contract.get("tags") is None:
308
+ return None
309
+ return odcs_contract.get("tags")
@@ -0,0 +1,81 @@
1
+ import os.path
2
+
3
+ import pyarrow
4
+ from pyarrow import parquet
5
+
6
+ from datacontract.imports.importer import Importer
7
+ from datacontract.model.data_contract_specification import (
8
+ DataContractSpecification,
9
+ Field,
10
+ Model,
11
+ )
12
+ from datacontract.model.exceptions import DataContractException
13
+
14
+
15
+ class ParquetImporter(Importer):
16
+ def import_source(
17
+ self, data_contract_specification: DataContractSpecification, source: str, import_args: dict
18
+ ) -> DataContractSpecification:
19
+ return import_parquet(data_contract_specification, source)
20
+
21
+
22
+ def import_parquet(data_contract_specification: DataContractSpecification, source: str) -> DataContractSpecification:
23
+ # use filename as schema name, remove .parquet suffix, avoid breaking the yaml output by replacing dots
24
+ schema_name = os.path.basename(source).removesuffix(".parquet").replace(".", "_")
25
+
26
+ fields: dict[str, Field] = {}
27
+
28
+ arrow_schema = parquet.read_schema(source)
29
+ for field_name in arrow_schema.names:
30
+ parquet_field = arrow_schema.field(field_name)
31
+
32
+ field = map_pyarrow_field_to_specification_field(parquet_field, "parquet")
33
+
34
+ if not parquet_field.nullable:
35
+ field.required = True
36
+
37
+ fields[field_name] = field
38
+
39
+ data_contract_specification.models[schema_name] = Model(fields=fields)
40
+
41
+ return data_contract_specification
42
+
43
+
44
+ def map_pyarrow_field_to_specification_field(pyarrow_field: pyarrow.Field, file_format: str) -> Field:
45
+ if pyarrow.types.is_boolean(pyarrow_field.type):
46
+ return Field(type="boolean")
47
+ if pyarrow.types.is_int32(pyarrow_field.type):
48
+ return Field(type="int")
49
+ if pyarrow.types.is_int64(pyarrow_field.type):
50
+ return Field(type="long")
51
+ if pyarrow.types.is_integer(pyarrow_field.type):
52
+ return Field(type="number")
53
+ if pyarrow.types.is_float32(pyarrow_field.type):
54
+ return Field(type="float")
55
+ if pyarrow.types.is_float64(pyarrow_field.type):
56
+ return Field(type="double")
57
+ if pyarrow.types.is_decimal(pyarrow_field.type):
58
+ return Field(type="decimal", precision=pyarrow_field.type.precision, scale=pyarrow_field.type.scale)
59
+ if pyarrow.types.is_timestamp(pyarrow_field.type):
60
+ return Field(type="timestamp")
61
+ if pyarrow.types.is_date(pyarrow_field.type):
62
+ return Field(type="date")
63
+ if pyarrow.types.is_null(pyarrow_field.type):
64
+ return Field(type="null")
65
+ if pyarrow.types.is_binary(pyarrow_field.type):
66
+ return Field(type="bytes")
67
+ if pyarrow.types.is_string(pyarrow_field.type):
68
+ return Field(type="string")
69
+ if pyarrow.types.is_map(pyarrow_field.type) or pyarrow.types.is_dictionary(pyarrow_field.type):
70
+ return Field(type="map")
71
+ if pyarrow.types.is_struct(pyarrow_field.type):
72
+ return Field(type="struct")
73
+ if pyarrow.types.is_list(pyarrow_field.type):
74
+ return Field(type="array")
75
+
76
+ raise DataContractException(
77
+ type="schema",
78
+ name=f"Parse {file_format} schema",
79
+ reason=f"{pyarrow_field.type} currently not supported.",
80
+ engine="datacontract",
81
+ )
@@ -1,9 +1,10 @@
1
1
  from pyspark.sql import DataFrame, SparkSession, types
2
+
2
3
  from datacontract.imports.importer import Importer
3
4
  from datacontract.model.data_contract_specification import (
4
5
  DataContractSpecification,
5
- Model,
6
6
  Field,
7
+ Model,
7
8
  Server,
8
9
  )
9
10
 
@@ -1,7 +1,7 @@
1
1
  from simple_ddl_parser import parse_from_file
2
2
 
3
3
  from datacontract.imports.importer import Importer
4
- from datacontract.model.data_contract_specification import DataContractSpecification, Model, Field
4
+ from datacontract.model.data_contract_specification import DataContractSpecification, Field, Model
5
5
 
6
6
 
7
7
  class SqlImporter(Importer):
@@ -2,13 +2,13 @@ import json
2
2
  import os
3
3
  from typing import List, Optional
4
4
 
5
- from pyspark.sql import types
6
5
  from databricks.sdk import WorkspaceClient
7
- from databricks.sdk.service.catalog import TableInfo, ColumnInfo
6
+ from databricks.sdk.service.catalog import ColumnInfo, TableInfo
7
+ from pyspark.sql import types
8
8
 
9
9
  from datacontract.imports.importer import Importer
10
10
  from datacontract.imports.spark_importer import _field_from_struct_type
11
- from datacontract.model.data_contract_specification import DataContractSpecification, Model, Field
11
+ from datacontract.model.data_contract_specification import DataContractSpecification, Field, Model
12
12
  from datacontract.model.exceptions import DataContractException
13
13
 
14
14
 
@@ -23,7 +23,7 @@ def publish_test_results_to_datamesh_manager(run: Run, publish_url: str):
23
23
  )
24
24
 
25
25
  if run.dataContractId is None:
26
- raise Exception("Cannot publish run results, as data contract ID is unknown")
26
+ raise Exception("Cannot publish run results for unknown data contract ID")
27
27
 
28
28
  headers = {"Content-Type": "application/json", "x-api-key": api_key}
29
29
  request_body = run.model_dump_json()
@@ -12,7 +12,6 @@ from opentelemetry.sdk.metrics.export import ConsoleMetricExporter, PeriodicExpo
12
12
 
13
13
  from datacontract.model.run import Run
14
14
 
15
-
16
15
  # Publishes metrics of a test run.
17
16
  # Metric contains the values:
18
17
  # 0 == test run passed,
datacontract/lint/lint.py CHANGED
@@ -1,9 +1,10 @@
1
1
  import abc
2
2
  from dataclasses import dataclass, field
3
3
  from enum import Enum
4
- from typing import Sequence, Any, cast
4
+ from typing import Any, Sequence, cast
5
5
 
6
6
  from datacontract.model.run import Check
7
+
7
8
  from ..model.data_contract_specification import DataContractSpecification
8
9
 
9
10
  """This module contains linter definitions for linting a data contract.
@@ -1,4 +1,5 @@
1
1
  from datacontract.model.data_contract_specification import DataContractSpecification
2
+
2
3
  from ..lint import Linter, LinterResult
3
4
 
4
5
 
@@ -5,6 +5,7 @@ import json
5
5
  import yaml
6
6
 
7
7
  from datacontract.model.data_contract_specification import DataContractSpecification, Example
8
+
8
9
  from ..lint import Linter, LinterResult
9
10
 
10
11
 
@@ -1,6 +1,7 @@
1
1
  import re
2
2
 
3
3
  from datacontract.model.data_contract_specification import DataContractSpecification
4
+
4
5
  from ..lint import Linter, LinterResult
5
6
 
6
7
 
@@ -1,4 +1,5 @@
1
1
  from datacontract.model.data_contract_specification import DataContractSpecification
2
+
2
3
  from ..lint import Linter, LinterResult
3
4
 
4
5
 
@@ -1,6 +1,7 @@
1
1
  import re
2
2
 
3
3
  from datacontract.model.data_contract_specification import DataContractSpecification
4
+
4
5
  from ..lint import Linter, LinterResult
5
6
 
6
7
 
@@ -1,6 +1,7 @@
1
1
  import yaml
2
2
 
3
3
  from datacontract.model.data_contract_specification import DataContractSpecification, Model
4
+
4
5
  from ..lint import Linter, LinterResult
5
6
 
6
7
 
@@ -1,4 +1,5 @@
1
1
  from datacontract.model.data_contract_specification import DataContractSpecification, Field
2
+
2
3
  from ..lint import Linter, LinterResult
3
4
 
4
5
 
@@ -5,11 +5,13 @@ import fastjsonschema
5
5
  import yaml
6
6
  from fastjsonschema import JsonSchemaValueException
7
7
 
8
- from datacontract.lint.files import read_file
8
+ from datacontract.imports.odcs_v3_importer import import_odcs_v3_from_str
9
+ from datacontract.lint.resources import read_resource
9
10
  from datacontract.lint.schema import fetch_schema
10
11
  from datacontract.lint.urls import fetch_resource
11
12
  from datacontract.model.data_contract_specification import DataContractSpecification, Definition, Quality
12
13
  from datacontract.model.exceptions import DataContractException
14
+ from datacontract.model.odcs import is_open_data_contract_standard
13
15
 
14
16
 
15
17
  def resolve_data_contract(
@@ -41,10 +43,7 @@ def resolve_data_contract(
41
43
  def resolve_data_contract_from_location(
42
44
  location, schema_location: str = None, inline_definitions: bool = False, inline_quality: bool = False
43
45
  ) -> DataContractSpecification:
44
- if location.startswith("http://") or location.startswith("https://"):
45
- data_contract_str = fetch_resource(location)
46
- else:
47
- data_contract_str = read_file(location)
46
+ data_contract_str = read_resource(location)
48
47
  return _resolve_data_contract_from_str(data_contract_str, schema_location, inline_definitions, inline_quality)
49
48
 
50
49
 
@@ -196,10 +195,16 @@ def _get_quality_ref_file(quality_spec: str | object) -> str | object:
196
195
  def _resolve_data_contract_from_str(
197
196
  data_contract_str, schema_location: str = None, inline_definitions: bool = False, inline_quality: bool = False
198
197
  ) -> DataContractSpecification:
199
- data_contract_yaml_dict = _to_yaml(data_contract_str)
200
- _validate(data_contract_yaml_dict, schema_location)
198
+ yaml_dict = _to_yaml(data_contract_str)
199
+
200
+ if is_open_data_contract_standard(yaml_dict):
201
+ # if ODCS, then validate the ODCS schema and import to DataContractSpecification directly
202
+ data_contract_specification = DataContractSpecification(dataContractSpecification="1.1.0")
203
+ return import_odcs_v3_from_str(data_contract_specification, source_str=data_contract_str)
201
204
 
202
- spec = DataContractSpecification(**data_contract_yaml_dict)
205
+ _validate_data_contract_specification_schema(yaml_dict, schema_location)
206
+ data_contract_specification = yaml_dict
207
+ spec = DataContractSpecification(**data_contract_specification)
203
208
 
204
209
  if inline_definitions:
205
210
  inline_definitions_into_data_contract(spec)
@@ -224,7 +229,7 @@ def _to_yaml(data_contract_str):
224
229
  )
225
230
 
226
231
 
227
- def _validate(data_contract_yaml, schema_location: str = None):
232
+ def _validate_data_contract_specification_schema(data_contract_yaml, schema_location: str = None):
228
233
  schema = fetch_schema(schema_location)
229
234
  try:
230
235
  fastjsonschema.validate(schema, data_contract_yaml)
@@ -0,0 +1,21 @@
1
+ from datacontract.lint.files import read_file
2
+ from datacontract.lint.urls import fetch_resource
3
+
4
+
5
+ def read_resource(location: str) -> str:
6
+ """
7
+ Read a resource from a given location.
8
+
9
+ If the location is a URL, fetch the resource from the web. API-Keys are supported.
10
+ Otherwise, read the resource from a local file.
11
+
12
+ Args:
13
+ location (str): The location of the resource, either a URL or a file path.
14
+
15
+ Returns:
16
+ str: The content of the resource.
17
+ """
18
+ if location.startswith("http://") or location.startswith("https://"):
19
+ return fetch_resource(location)
20
+ else:
21
+ return read_file(location)
@@ -1,6 +1,6 @@
1
1
  import json
2
2
  import os
3
- from typing import Dict, Any
3
+ from typing import Any, Dict
4
4
 
5
5
  import requests
6
6
 
datacontract/lint/urls.py CHANGED
@@ -1,4 +1,5 @@
1
1
  import os
2
+ from urllib.parse import urlparse
2
3
 
3
4
  import requests
4
5
 
@@ -25,7 +26,8 @@ def fetch_resource(url: str):
25
26
 
26
27
 
27
28
  def _set_api_key(headers, url):
28
- if ".datamesh-manager.com/" in url:
29
+ hostname = urlparse(url).hostname
30
+ if hostname == "datamesh-manager.com" or hostname.endswith(".datamesh-manager.com"):
29
31
  datamesh_manager_api_key = os.getenv("DATAMESH_MANAGER_API_KEY")
30
32
  if datamesh_manager_api_key is None or datamesh_manager_api_key == "":
31
33
  print("Error: Data Mesh Manager API Key is not set. Set env variable DATAMESH_MANAGER_API_KEY.")
@@ -37,7 +39,7 @@ def _set_api_key(headers, url):
37
39
  result="error",
38
40
  )
39
41
  headers["x-api-key"] = datamesh_manager_api_key
40
- elif ".datacontract-manager.com/" in url:
42
+ elif hostname == "datacontract-manager.com" or hostname.endswith(".datacontract-manager.com"):
41
43
  datacontract_manager_api_key = os.getenv("DATACONTRACT_MANAGER_API_KEY")
42
44
  if datacontract_manager_api_key is None or datacontract_manager_api_key == "":
43
45
  print("Error: Data Contract Manager API Key is not set. Set env variable DATACONTRACT_MANAGER_API_KEY.")