datacontract-cli 0.10.0__py3-none-any.whl → 0.10.37__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. datacontract/__init__.py +13 -0
  2. datacontract/api.py +260 -0
  3. datacontract/breaking/breaking.py +242 -12
  4. datacontract/breaking/breaking_rules.py +37 -1
  5. datacontract/catalog/catalog.py +80 -0
  6. datacontract/cli.py +387 -117
  7. datacontract/data_contract.py +216 -353
  8. datacontract/engines/data_contract_checks.py +1041 -0
  9. datacontract/engines/data_contract_test.py +113 -0
  10. datacontract/engines/datacontract/check_that_datacontract_contains_valid_servers_configuration.py +2 -3
  11. datacontract/engines/datacontract/check_that_datacontract_file_exists.py +1 -1
  12. datacontract/engines/fastjsonschema/check_jsonschema.py +176 -42
  13. datacontract/engines/fastjsonschema/s3/s3_read_files.py +16 -1
  14. datacontract/engines/soda/check_soda_execute.py +100 -56
  15. datacontract/engines/soda/connections/athena.py +79 -0
  16. datacontract/engines/soda/connections/bigquery.py +8 -1
  17. datacontract/engines/soda/connections/databricks.py +12 -3
  18. datacontract/engines/soda/connections/duckdb_connection.py +241 -0
  19. datacontract/engines/soda/connections/kafka.py +206 -113
  20. datacontract/engines/soda/connections/snowflake.py +8 -5
  21. datacontract/engines/soda/connections/sqlserver.py +43 -0
  22. datacontract/engines/soda/connections/trino.py +26 -0
  23. datacontract/export/avro_converter.py +72 -8
  24. datacontract/export/avro_idl_converter.py +31 -25
  25. datacontract/export/bigquery_converter.py +130 -0
  26. datacontract/export/custom_converter.py +40 -0
  27. datacontract/export/data_caterer_converter.py +161 -0
  28. datacontract/export/dbml_converter.py +148 -0
  29. datacontract/export/dbt_converter.py +141 -54
  30. datacontract/export/dcs_exporter.py +6 -0
  31. datacontract/export/dqx_converter.py +126 -0
  32. datacontract/export/duckdb_type_converter.py +57 -0
  33. datacontract/export/excel_exporter.py +923 -0
  34. datacontract/export/exporter.py +100 -0
  35. datacontract/export/exporter_factory.py +216 -0
  36. datacontract/export/go_converter.py +105 -0
  37. datacontract/export/great_expectations_converter.py +257 -36
  38. datacontract/export/html_exporter.py +86 -0
  39. datacontract/export/iceberg_converter.py +188 -0
  40. datacontract/export/jsonschema_converter.py +71 -16
  41. datacontract/export/markdown_converter.py +337 -0
  42. datacontract/export/mermaid_exporter.py +110 -0
  43. datacontract/export/odcs_v3_exporter.py +375 -0
  44. datacontract/export/pandas_type_converter.py +40 -0
  45. datacontract/export/protobuf_converter.py +168 -68
  46. datacontract/export/pydantic_converter.py +6 -0
  47. datacontract/export/rdf_converter.py +13 -6
  48. datacontract/export/sodacl_converter.py +36 -188
  49. datacontract/export/spark_converter.py +245 -0
  50. datacontract/export/sql_converter.py +37 -3
  51. datacontract/export/sql_type_converter.py +269 -8
  52. datacontract/export/sqlalchemy_converter.py +170 -0
  53. datacontract/export/terraform_converter.py +7 -2
  54. datacontract/imports/avro_importer.py +246 -26
  55. datacontract/imports/bigquery_importer.py +221 -0
  56. datacontract/imports/csv_importer.py +143 -0
  57. datacontract/imports/dbml_importer.py +112 -0
  58. datacontract/imports/dbt_importer.py +240 -0
  59. datacontract/imports/excel_importer.py +1111 -0
  60. datacontract/imports/glue_importer.py +288 -0
  61. datacontract/imports/iceberg_importer.py +172 -0
  62. datacontract/imports/importer.py +51 -0
  63. datacontract/imports/importer_factory.py +128 -0
  64. datacontract/imports/json_importer.py +325 -0
  65. datacontract/imports/jsonschema_importer.py +146 -0
  66. datacontract/imports/odcs_importer.py +60 -0
  67. datacontract/imports/odcs_v3_importer.py +516 -0
  68. datacontract/imports/parquet_importer.py +81 -0
  69. datacontract/imports/protobuf_importer.py +264 -0
  70. datacontract/imports/spark_importer.py +262 -0
  71. datacontract/imports/sql_importer.py +274 -35
  72. datacontract/imports/unity_importer.py +219 -0
  73. datacontract/init/init_template.py +20 -0
  74. datacontract/integration/datamesh_manager.py +86 -0
  75. datacontract/lint/resolve.py +271 -49
  76. datacontract/lint/resources.py +21 -0
  77. datacontract/lint/schema.py +53 -17
  78. datacontract/lint/urls.py +32 -12
  79. datacontract/model/data_contract_specification/__init__.py +1 -0
  80. datacontract/model/exceptions.py +4 -1
  81. datacontract/model/odcs.py +24 -0
  82. datacontract/model/run.py +49 -29
  83. datacontract/output/__init__.py +0 -0
  84. datacontract/output/junit_test_results.py +135 -0
  85. datacontract/output/output_format.py +10 -0
  86. datacontract/output/test_results_writer.py +79 -0
  87. datacontract/py.typed +0 -0
  88. datacontract/schemas/datacontract-1.1.0.init.yaml +91 -0
  89. datacontract/schemas/datacontract-1.1.0.schema.json +1975 -0
  90. datacontract/schemas/datacontract-1.2.0.init.yaml +91 -0
  91. datacontract/schemas/datacontract-1.2.0.schema.json +2029 -0
  92. datacontract/schemas/datacontract-1.2.1.init.yaml +91 -0
  93. datacontract/schemas/datacontract-1.2.1.schema.json +2058 -0
  94. datacontract/schemas/odcs-3.0.1.schema.json +2634 -0
  95. datacontract/schemas/odcs-3.0.2.schema.json +2382 -0
  96. datacontract/templates/datacontract.html +139 -294
  97. datacontract/templates/datacontract_odcs.html +685 -0
  98. datacontract/templates/index.html +236 -0
  99. datacontract/templates/partials/datacontract_information.html +86 -0
  100. datacontract/templates/partials/datacontract_servicelevels.html +253 -0
  101. datacontract/templates/partials/datacontract_terms.html +51 -0
  102. datacontract/templates/partials/definition.html +25 -0
  103. datacontract/templates/partials/example.html +27 -0
  104. datacontract/templates/partials/model_field.html +144 -0
  105. datacontract/templates/partials/quality.html +49 -0
  106. datacontract/templates/partials/server.html +211 -0
  107. datacontract/templates/style/output.css +491 -72
  108. datacontract_cli-0.10.37.dist-info/METADATA +2235 -0
  109. datacontract_cli-0.10.37.dist-info/RECORD +119 -0
  110. {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.37.dist-info}/WHEEL +1 -1
  111. {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.37.dist-info/licenses}/LICENSE +1 -1
  112. datacontract/engines/datacontract/check_that_datacontract_str_is_valid.py +0 -48
  113. datacontract/engines/soda/connections/dask.py +0 -28
  114. datacontract/engines/soda/connections/duckdb.py +0 -76
  115. datacontract/export/csv_type_converter.py +0 -36
  116. datacontract/export/html_export.py +0 -66
  117. datacontract/export/odcs_converter.py +0 -102
  118. datacontract/init/download_datacontract_file.py +0 -17
  119. datacontract/integration/publish_datamesh_manager.py +0 -33
  120. datacontract/integration/publish_opentelemetry.py +0 -107
  121. datacontract/lint/lint.py +0 -141
  122. datacontract/lint/linters/description_linter.py +0 -34
  123. datacontract/lint/linters/example_model_linter.py +0 -91
  124. datacontract/lint/linters/field_pattern_linter.py +0 -34
  125. datacontract/lint/linters/field_reference_linter.py +0 -38
  126. datacontract/lint/linters/notice_period_linter.py +0 -55
  127. datacontract/lint/linters/quality_schema_linter.py +0 -52
  128. datacontract/lint/linters/valid_constraints_linter.py +0 -99
  129. datacontract/model/data_contract_specification.py +0 -141
  130. datacontract/web.py +0 -14
  131. datacontract_cli-0.10.0.dist-info/METADATA +0 -951
  132. datacontract_cli-0.10.0.dist-info/RECORD +0 -66
  133. /datacontract/{model → breaking}/breaking_change.py +0 -0
  134. /datacontract/{lint/linters → export}/__init__.py +0 -0
  135. {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.37.dist-info}/entry_points.txt +0 -0
  136. {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.37.dist-info}/top_level.txt +0 -0
@@ -1,16 +1,26 @@
1
+ import importlib.resources as resources
1
2
  import logging
2
3
  import os
4
+ import warnings
5
+ from pathlib import Path
3
6
 
4
7
  import fastjsonschema
5
8
  import yaml
6
9
  from fastjsonschema import JsonSchemaValueException
10
+ from open_data_contract_standard.model import OpenDataContractStandard
7
11
 
8
- from datacontract.lint.files import read_file
12
+ from datacontract.imports.odcs_v3_importer import import_from_odcs, parse_odcs_v3_from_str
13
+ from datacontract.lint.resources import read_resource
9
14
  from datacontract.lint.schema import fetch_schema
10
15
  from datacontract.lint.urls import fetch_resource
11
- from datacontract.model.data_contract_specification import \
12
- DataContractSpecification, Definition, Quality
16
+ from datacontract.model.data_contract_specification import (
17
+ DataContractSpecification,
18
+ Definition,
19
+ DeprecatedQuality,
20
+ )
13
21
  from datacontract.model.exceptions import DataContractException
22
+ from datacontract.model.odcs import is_open_data_contract_standard, is_open_data_product_standard
23
+ from datacontract.model.run import ResultEnum
14
24
 
15
25
 
16
26
  def resolve_data_contract(
@@ -19,68 +29,222 @@ def resolve_data_contract(
19
29
  data_contract: DataContractSpecification = None,
20
30
  schema_location: str = None,
21
31
  inline_definitions: bool = False,
32
+ inline_quality: bool = False,
22
33
  ) -> DataContractSpecification:
23
34
  if data_contract_location is not None:
24
- return resolve_data_contract_from_location(data_contract_location, schema_location, inline_definitions)
35
+ return resolve_data_contract_from_location(
36
+ data_contract_location, schema_location, inline_definitions, inline_quality
37
+ )
25
38
  elif data_contract_str is not None:
26
- return resolve_data_contract_from_str(data_contract_str, schema_location, inline_definitions)
39
+ return _resolve_data_contract_from_str(data_contract_str, schema_location, inline_definitions, inline_quality)
27
40
  elif data_contract is not None:
28
41
  return data_contract
29
42
  else:
30
43
  raise DataContractException(
31
44
  type="lint",
32
- result="failed",
45
+ result=ResultEnum.failed,
46
+ name="Check that data contract YAML is valid",
47
+ reason="Data contract needs to be provided",
48
+ engine="datacontract",
49
+ )
50
+
51
+
52
+ def resolve_data_contract_v2(
53
+ data_contract_location: str = None,
54
+ data_contract_str: str = None,
55
+ data_contract: DataContractSpecification | OpenDataContractStandard = None,
56
+ schema_location: str = None,
57
+ inline_definitions: bool = False,
58
+ inline_quality: bool = False,
59
+ ) -> DataContractSpecification | OpenDataContractStandard:
60
+ if data_contract_location is not None:
61
+ return resolve_data_contract_from_location_v2(
62
+ data_contract_location, schema_location, inline_definitions, inline_quality
63
+ )
64
+ elif data_contract_str is not None:
65
+ return _resolve_data_contract_from_str_v2(
66
+ data_contract_str, schema_location, inline_definitions, inline_quality
67
+ )
68
+ elif data_contract is not None:
69
+ return data_contract
70
+ else:
71
+ raise DataContractException(
72
+ type="lint",
73
+ result=ResultEnum.failed,
33
74
  name="Check that data contract YAML is valid",
34
75
  reason="Data contract needs to be provided",
35
76
  engine="datacontract",
36
77
  )
37
78
 
38
79
 
80
+ def resolve_data_contract_dict(
81
+ data_contract_location: str = None,
82
+ data_contract_str: str = None,
83
+ data_contract: DataContractSpecification = None,
84
+ ) -> dict:
85
+ if data_contract_location is not None:
86
+ return _to_yaml(read_resource(data_contract_location))
87
+ elif data_contract_str is not None:
88
+ return _to_yaml(data_contract_str)
89
+ elif data_contract is not None:
90
+ return data_contract.model_dump()
91
+ else:
92
+ raise DataContractException(
93
+ type="lint",
94
+ result=ResultEnum.failed,
95
+ name="Check that data contract YAML is valid",
96
+ reason="Data contract needs to be provided",
97
+ engine="datacontract",
98
+ )
99
+
100
+
101
+ def resolve_data_contract_from_location_v2(
102
+ location, schema_location: str = None, inline_definitions: bool = False, inline_quality: bool = False
103
+ ) -> DataContractSpecification | OpenDataContractStandard:
104
+ data_contract_str = read_resource(location)
105
+ return _resolve_data_contract_from_str_v2(data_contract_str, schema_location, inline_definitions, inline_quality)
106
+
107
+
39
108
  def resolve_data_contract_from_location(
40
- location, schema_location: str = None, inline_definitions: bool = False, include_quality: bool = True
109
+ location, schema_location: str = None, inline_definitions: bool = False, inline_quality: bool = False
41
110
  ) -> DataContractSpecification:
42
- if location.startswith("http://") or location.startswith("https://"):
43
- data_contract_str = fetch_resource(location)
44
- else:
45
- data_contract_str = read_file(location)
46
- return resolve_data_contract_from_str(data_contract_str, schema_location, inline_definitions, include_quality)
111
+ data_contract_str = read_resource(location)
112
+ return _resolve_data_contract_from_str(data_contract_str, schema_location, inline_definitions, inline_quality)
47
113
 
48
114
 
49
115
  def inline_definitions_into_data_contract(spec: DataContractSpecification):
50
116
  for model in spec.models.values():
51
117
  for field in model.fields.values():
52
- # If ref_obj is not empty, we've already inlined definitions.
53
- if not field.ref and not field.ref_obj:
54
- continue
118
+ inline_definition_into_field(field, spec)
119
+
120
+
121
+ def inline_definition_into_field(field, spec):
122
+ # iterate recursively over arrays
123
+ if field.items is not None:
124
+ inline_definition_into_field(field.items, spec)
55
125
 
56
- definition = resolve_definition_ref(field.ref, spec.definitions)
57
- field.ref_obj = definition
126
+ # iterate recursively over nested fields
127
+ if field.fields is not None:
128
+ for nested_field_name, nested_field in field.fields.items():
129
+ inline_definition_into_field(nested_field, spec)
58
130
 
59
- for field_name in field.model_fields.keys():
60
- if field_name in definition.model_fields_set and field_name not in field.model_fields_set:
61
- setattr(field, field_name, getattr(definition, field_name))
131
+ if not field.ref:
132
+ return
62
133
 
134
+ definition = _resolve_definition_ref(field.ref, spec)
135
+ for field_name in field.model_fields.keys():
136
+ if field_name in definition.model_fields_set and field_name not in field.model_fields_set:
137
+ setattr(field, field_name, getattr(definition, field_name))
138
+ # extras
139
+ for extra_field_name, extra_field_value in definition.model_extra.items():
140
+ if extra_field_name not in field.model_extra.keys():
141
+ setattr(field, extra_field_name, extra_field_value)
63
142
 
64
- def resolve_definition_ref(ref, definitions) -> Definition:
65
- if ref.startswith("http://") or ref.startswith("https://"):
66
- definition_str = fetch_resource(ref)
67
- definition_dict = to_yaml(definition_str)
68
- return Definition(**definition_dict)
69
143
 
70
- elif ref.startswith("#/definitions/"):
71
- definition_name = ref.split("#/definitions/")[1]
72
- return definitions[definition_name]
144
+ def _resolve_definition_ref(ref, spec) -> Definition:
145
+ logging.info(f"Resolving definition ref {ref}")
146
+
147
+ if "#" in ref:
148
+ path, definition_path = ref.split("#")
149
+ else:
150
+ path, definition_path = ref, None
151
+
152
+ if path.startswith("http://") or path.startswith("https://"):
153
+ logging.info(f"Resolving definition url {path}")
154
+
155
+ definition_str = fetch_resource(path)
156
+ definition_dict = _to_yaml(definition_str)
157
+ definition = Definition(**definition_dict)
158
+ if definition_path is not None:
159
+ return _find_by_path_in_definition(definition_path, definition)
160
+ else:
161
+ return definition
162
+ elif path.startswith("file://"):
163
+ logging.info(f"Resolving definition file path {path}")
164
+
165
+ path = path.replace("file://", "")
166
+ definition_str = _fetch_file(path)
167
+ definition_dict = _to_yaml(definition_str)
168
+ if definition_path:
169
+ path_parts = [part for part in definition_path.split("/") if part != ""]
170
+ for path_part in path_parts:
171
+ definition_dict = definition_dict.get(path_part, None)
172
+ if not definition_dict:
173
+ raise DataContractException(
174
+ type="lint",
175
+ result="failed",
176
+ name="Check that data contract YAML is valid",
177
+ reason=f"Cannot resolve definition {definition_path}, {path_part} not found",
178
+ engine="datacontract",
179
+ )
180
+ # this assumes that definitions_dict is a definitions dict, however,
181
+ # all we know is that it is a file!
182
+ definition = Definition(**definition_dict)
183
+ # if definition_path is not None:
184
+ # definition = _find_by_path_in_definition(definition_path, definition)
185
+ return definition
186
+ elif ref.startswith("#"):
187
+ logging.info(f"Resolving definition local path {path}")
188
+
189
+ definition_path = ref[1:]
190
+
191
+ return _find_by_path_in_spec(definition_path, spec)
73
192
  else:
74
193
  raise DataContractException(
75
194
  type="lint",
76
- result="failed",
195
+ result=ResultEnum.failed,
77
196
  name="Check that data contract YAML is valid",
78
197
  reason=f"Cannot resolve reference {ref}",
79
198
  engine="datacontract",
80
199
  )
81
200
 
82
201
 
83
- def resolve_quality_ref(quality: Quality):
202
+ def _find_by_path_in_spec(definition_path: str, spec: DataContractSpecification):
203
+ path_elements = definition_path.split("/")
204
+ definition_key = path_elements[2]
205
+ if definition_key not in spec.definitions:
206
+ raise DataContractException(
207
+ type="lint",
208
+ result=ResultEnum.failed,
209
+ name="Check that data contract YAML is valid",
210
+ reason=f"Cannot resolve definition {definition_key}",
211
+ engine="datacontract",
212
+ )
213
+ definition = spec.definitions[definition_key]
214
+ definition = _find_subfield_in_definition(definition, path_elements[3:])
215
+ return definition
216
+
217
+
218
+ def _find_by_path_in_definition(definition_path: str, definition: Definition):
219
+ if definition_path == "" or definition_path == "/":
220
+ return definition
221
+
222
+ path_elements = definition_path.split("/")
223
+ return _find_subfield_in_definition(definition, path_elements[1:])
224
+
225
+
226
+ def _find_subfield_in_definition(definition: Definition, path_elements):
227
+ while len(path_elements) > 0 and path_elements[0] == "fields":
228
+ definition = definition.fields[path_elements[1]]
229
+ path_elements = path_elements[2:]
230
+
231
+ return definition
232
+
233
+
234
+ def _fetch_file(path) -> str:
235
+ if not os.path.exists(path):
236
+ raise DataContractException(
237
+ type="export",
238
+ result=ResultEnum.failed,
239
+ name="Check that data contract definition is valid",
240
+ reason=f"Cannot resolve reference {path}",
241
+ engine="datacontract",
242
+ )
243
+ with open(path, "r") as file:
244
+ return file.read()
245
+
246
+
247
+ def _resolve_quality_ref(quality: DeprecatedQuality):
84
248
  """
85
249
  Return the content of a ref file path
86
250
  @param quality data contract quality specification
@@ -89,13 +253,13 @@ def resolve_quality_ref(quality: Quality):
89
253
  specification = quality.specification
90
254
  if quality.type == "great-expectations":
91
255
  for model, model_quality in specification.items():
92
- specification[model] = get_quality_ref_file(model_quality)
256
+ specification[model] = _get_quality_ref_file(model_quality)
93
257
  else:
94
258
  if "$ref" in specification:
95
- quality.specification = get_quality_ref_file(specification)
259
+ quality.specification = _get_quality_ref_file(specification)
96
260
 
97
261
 
98
- def get_quality_ref_file(quality_spec: str | object) -> str | object:
262
+ def _get_quality_ref_file(quality_spec: str | object) -> str | object:
99
263
  """
100
264
  Get the file associated with a quality reference
101
265
  @param quality_spec quality specification
@@ -106,7 +270,7 @@ def get_quality_ref_file(quality_spec: str | object) -> str | object:
106
270
  if not os.path.exists(ref):
107
271
  raise DataContractException(
108
272
  type="export",
109
- result="failed",
273
+ result=ResultEnum.failed,
110
274
  name="Check that data contract quality is valid",
111
275
  reason=f"Cannot resolve reference {ref}",
112
276
  engine="datacontract",
@@ -116,26 +280,83 @@ def get_quality_ref_file(quality_spec: str | object) -> str | object:
116
280
  return quality_spec
117
281
 
118
282
 
119
- def resolve_data_contract_from_str(
120
- data_contract_str, schema_location: str = None, inline_definitions: bool = False, include_quality: bool = False
283
+ def _resolve_data_contract_from_str_v2(
284
+ data_contract_str, schema_location: str = None, inline_definitions: bool = False, inline_quality: bool = False
285
+ ) -> DataContractSpecification | OpenDataContractStandard:
286
+ yaml_dict = _to_yaml(data_contract_str)
287
+
288
+ if is_open_data_product_standard(yaml_dict):
289
+ logging.info("Cannot import ODPS, as not supported")
290
+ raise DataContractException(
291
+ type="schema",
292
+ result=ResultEnum.failed,
293
+ name="Parse ODCS contract",
294
+ reason="Cannot parse ODPS product",
295
+ engine="datacontract",
296
+ )
297
+
298
+ if is_open_data_contract_standard(yaml_dict):
299
+ logging.info("Importing ODCS v3")
300
+ # if ODCS, then validate the ODCS schema and import to DataContractSpecification directly
301
+ odcs = parse_odcs_v3_from_str(data_contract_str)
302
+ return odcs
303
+
304
+ logging.info("Importing DCS")
305
+ return _resolve_dcs_from_yaml_dict(inline_definitions, inline_quality, schema_location, yaml_dict)
306
+
307
+
308
+ def _resolve_data_contract_from_str(
309
+ data_contract_str, schema_location: str = None, inline_definitions: bool = False, inline_quality: bool = False
121
310
  ) -> DataContractSpecification:
122
- data_contract_yaml_dict = to_yaml(data_contract_str)
123
- validate(data_contract_yaml_dict, schema_location)
311
+ yaml_dict = _to_yaml(data_contract_str)
312
+
313
+ if schema_location is None:
314
+ if is_open_data_contract_standard(yaml_dict):
315
+ logging.info("Using ODCS 3.0.2 schema to validate data contract")
316
+ # TODO refactor this to a specific function
317
+ schema_location = resources.files("datacontract").joinpath("schemas", "odcs-3.0.2.schema.json")
318
+
319
+ _validate_json_schema(yaml_dict, schema_location)
320
+
321
+ if is_open_data_contract_standard(yaml_dict):
322
+ logging.info("Importing ODCS v3")
323
+ # if ODCS, then validate the ODCS schema and import to DataContractSpecification directly
324
+ odcs = parse_odcs_v3_from_str(data_contract_str)
325
+
326
+ data_contract_specification = DataContractSpecification(dataContractSpecification="1.2.1")
327
+ return import_from_odcs(data_contract_specification, odcs)
124
328
 
125
- spec = DataContractSpecification(**data_contract_yaml_dict)
329
+ logging.info("Importing DCS")
330
+ return _resolve_dcs_from_yaml_dict(inline_definitions, inline_quality, schema_location, yaml_dict)
126
331
 
332
+
333
+ def _resolve_dcs_from_yaml_dict(inline_definitions, inline_quality, schema_location, yaml_dict):
334
+ _validate_json_schema(yaml_dict, schema_location)
335
+ data_contract_specification = yaml_dict
336
+ spec = DataContractSpecification(**data_contract_specification)
127
337
  if inline_definitions:
128
338
  inline_definitions_into_data_contract(spec)
129
- if spec.quality and include_quality:
130
- resolve_quality_ref(spec.quality)
131
-
339
+ ## Suppress DeprecationWarning when accessing spec.quality,
340
+ ## iif it is in fact *not* used.
341
+ with warnings.catch_warnings(record=True) as recorded_warnings:
342
+ spec_quality = spec.quality
343
+ for w in recorded_warnings:
344
+ if not issubclass(w.category, DeprecationWarning) or spec_quality is not None:
345
+ warnings.warn_explicit(
346
+ message=w.message,
347
+ category=w.category,
348
+ filename=w.filename,
349
+ lineno=w.lineno,
350
+ source=w.source,
351
+ )
352
+ if spec_quality and inline_quality:
353
+ _resolve_quality_ref(spec_quality)
132
354
  return spec
133
355
 
134
356
 
135
- def to_yaml(data_contract_str):
357
+ def _to_yaml(data_contract_str) -> dict:
136
358
  try:
137
- yaml_dict = yaml.safe_load(data_contract_str)
138
- return yaml_dict
359
+ return yaml.safe_load(data_contract_str)
139
360
  except Exception as e:
140
361
  logging.warning(f"Cannot parse YAML. Error: {str(e)}")
141
362
  raise DataContractException(
@@ -147,16 +368,17 @@ def to_yaml(data_contract_str):
147
368
  )
148
369
 
149
370
 
150
- def validate(data_contract_yaml, schema_location: str = None):
371
+ def _validate_json_schema(yaml_str, schema_location: str | Path = None):
372
+ logging.debug(f"Linting data contract with schema at {schema_location}")
151
373
  schema = fetch_schema(schema_location)
152
374
  try:
153
- fastjsonschema.validate(schema, data_contract_yaml)
375
+ fastjsonschema.validate(schema, yaml_str, use_default=False)
154
376
  logging.debug("YAML data is valid.")
155
377
  except JsonSchemaValueException as e:
156
378
  logging.warning(f"Data Contract YAML is invalid. Validation error: {e.message}")
157
379
  raise DataContractException(
158
380
  type="lint",
159
- result="failed",
381
+ result=ResultEnum.failed,
160
382
  name="Check that data contract YAML is valid",
161
383
  reason=e.message,
162
384
  engine="datacontract",
@@ -165,7 +387,7 @@ def validate(data_contract_yaml, schema_location: str = None):
165
387
  logging.warning(f"Data Contract YAML is invalid. Validation error: {str(e)}")
166
388
  raise DataContractException(
167
389
  type="lint",
168
- result="failed",
390
+ result=ResultEnum.failed,
169
391
  name="Check that data contract YAML is valid",
170
392
  reason=str(e),
171
393
  engine="datacontract",
@@ -0,0 +1,21 @@
1
+ from datacontract.lint.files import read_file
2
+ from datacontract.lint.urls import fetch_resource
3
+
4
+
5
+ def read_resource(location: str) -> str:
6
+ """
7
+ Read a resource from a given location.
8
+
9
+ If the location is a URL, fetch the resource from the web. API-Keys are supported.
10
+ Otherwise, read the resource from a local file.
11
+
12
+ Args:
13
+ location (str): The location of the resource, either a URL or a file path.
14
+
15
+ Returns:
16
+ str: The content of the resource.
17
+ """
18
+ if location.startswith("http://") or location.startswith("https://"):
19
+ return fetch_resource(location)
20
+ else:
21
+ return read_file(location)
@@ -1,27 +1,63 @@
1
+ import importlib.resources as resources
1
2
  import json
3
+ import logging
2
4
  import os
5
+ from pathlib import Path
6
+ from typing import Any, Dict
3
7
 
4
8
  import requests
5
9
 
6
10
  from datacontract.model.exceptions import DataContractException
11
+ from datacontract.model.run import ResultEnum
7
12
 
13
+ DEFAULT_DATA_CONTRACT_SCHEMA = "datacontract-1.2.1.schema.json"
8
14
 
9
- def fetch_schema(location: str = None):
10
- if location is None:
11
- location = "https://datacontract.com/datacontract.schema.json"
12
15
 
13
- if location.startswith("http://") or location.startswith("https://"):
14
- response = requests.get(location)
15
- return response.json()
16
+ def fetch_schema(location: str | Path = None) -> Dict[str, Any]:
17
+ """
18
+ Fetch and return a JSON schema from a given location.
19
+
20
+ This function retrieves a JSON schema either from a URL or a local file path.
21
+ If no location is provided, it defaults to the DataContract schema URL.
22
+
23
+ Args:
24
+ location: The URL or file path of the schema.
25
+
26
+ Returns:
27
+ The JSON schema as a dictionary.
28
+
29
+ Raises:
30
+ DataContractException: If the specified local file does not exist.
31
+ requests.RequestException: If there's an error fetching the schema from a URL.
32
+ json.JSONDecodeError: If there's an error decoding the JSON schema.
33
+
34
+ """
35
+ if location is None:
36
+ logging.info("Use default bundled schema " + DEFAULT_DATA_CONTRACT_SCHEMA)
37
+ schemas = resources.files("datacontract")
38
+ schema_file = schemas.joinpath("schemas", DEFAULT_DATA_CONTRACT_SCHEMA)
39
+ with schema_file.open("r") as file:
40
+ schema = json.load(file)
16
41
  else:
17
- if not os.path.exists(location):
18
- raise DataContractException(
19
- type="lint",
20
- name=f"Reading schema from {location}",
21
- reason=f"The file '{location}' does not exist.",
22
- engine="datacontract",
23
- result="error",
24
- )
25
- with open(location, "r") as file:
26
- file_content = file.read()
27
- return json.loads(file_content)
42
+ # Convert Path objects to strings for string operations
43
+ location_str = str(location)
44
+
45
+ if location_str.startswith("http://") or location_str.startswith("https://"):
46
+ logging.debug(f"Downloading schema from {location_str}")
47
+ response = requests.get(location_str)
48
+ schema = response.json()
49
+ else:
50
+ if not os.path.exists(location):
51
+ raise DataContractException(
52
+ type="lint",
53
+ name=f"Reading schema from {location}",
54
+ reason=f"The file '{location}' does not exist.",
55
+ engine="datacontract",
56
+ result=ResultEnum.error,
57
+ )
58
+
59
+ logging.debug(f"Loading JSON schema locally at {location}")
60
+ with open(location, "r") as file:
61
+ schema = json.load(file)
62
+
63
+ return schema
datacontract/lint/urls.py CHANGED
@@ -1,4 +1,5 @@
1
1
  import os
2
+ from urllib.parse import urlparse
2
3
 
3
4
  import requests
4
5
 
@@ -25,16 +26,35 @@ def fetch_resource(url: str):
25
26
 
26
27
 
27
28
  def _set_api_key(headers, url):
28
- if ".datamesh-manager.com/" not in url:
29
- return
29
+ hostname = urlparse(url).hostname
30
+
30
31
  datamesh_manager_api_key = os.getenv("DATAMESH_MANAGER_API_KEY")
31
- if datamesh_manager_api_key is None or datamesh_manager_api_key == "":
32
- print("Error: Data Mesh Manager API Key is not set. Set env variable DATAMESH_MANAGER_API_KEY.")
33
- raise DataContractException(
34
- type="lint",
35
- name=f"Reading data contract from {url}",
36
- reason="Error: Data Mesh Manager API Key is not set. Set env variable DATAMESH_MANAGER_API_KEY.",
37
- engine="datacontract",
38
- result="error",
39
- )
40
- headers["x-api-key"] = datamesh_manager_api_key
32
+ datacontract_manager_api_key = os.getenv("DATACONTRACT_MANAGER_API_KEY")
33
+
34
+ if hostname == "datamesh-manager.com" or hostname.endswith(".datamesh-manager.com"):
35
+ if datamesh_manager_api_key is None or datamesh_manager_api_key == "":
36
+ print("Error: Data Mesh Manager API key is not set. Set env variable DATAMESH_MANAGER_API_KEY.")
37
+ raise DataContractException(
38
+ type="lint",
39
+ name=f"Reading data contract from {url}",
40
+ reason="Error: Data Mesh Manager API key is not set. Set env variable DATAMESH_MANAGER_API_KEY.",
41
+ engine="datacontract",
42
+ result="error",
43
+ )
44
+ headers["x-api-key"] = datamesh_manager_api_key
45
+ elif hostname == "datacontract-manager.com" or hostname.endswith(".datacontract-manager.com"):
46
+ if datacontract_manager_api_key is None or datacontract_manager_api_key == "":
47
+ print("Error: Data Contract Manager API key is not set. Set env variable DATACONTRACT_MANAGER_API_KEY.")
48
+ raise DataContractException(
49
+ type="lint",
50
+ name=f"Reading data contract from {url}",
51
+ reason="Error: Data Contract Manager API key is not set. Set env variable DATACONTRACT_MANAGER_API_KEY.",
52
+ engine="datacontract",
53
+ result="error",
54
+ )
55
+ headers["x-api-key"] = datacontract_manager_api_key
56
+
57
+ if datamesh_manager_api_key is not None and datamesh_manager_api_key != "":
58
+ headers["x-api-key"] = datamesh_manager_api_key
59
+ if datacontract_manager_api_key is not None and datacontract_manager_api_key != "":
60
+ headers["x-api-key"] = datacontract_manager_api_key
@@ -0,0 +1 @@
1
+ from datacontract_specification.model import *
@@ -1,3 +1,6 @@
1
+ from datacontract.model.run import ResultEnum
2
+
3
+
1
4
  class DataContractException(Exception):
2
5
  """Exception raised for errors in the execution of a run.
3
6
 
@@ -19,7 +22,7 @@ class DataContractException(Exception):
19
22
  engine="datacontract",
20
23
  model=None,
21
24
  original_exception=None,
22
- result: str = "failed",
25
+ result: ResultEnum = ResultEnum.failed,
23
26
  message="Run operation failed",
24
27
  ):
25
28
  self.type = type
@@ -0,0 +1,24 @@
1
+ def is_open_data_contract_standard(odcs: dict) -> bool:
2
+ """
3
+ Check if the given dictionary is an OpenDataContractStandard.
4
+
5
+ Args:
6
+ odcs (dict): The dictionary to check.
7
+
8
+ Returns:
9
+ bool: True if the dictionary is an OpenDataContractStandard, False otherwise.
10
+ """
11
+ return odcs.get("kind") == "DataContract" and odcs.get("apiVersion", "").startswith("v3")
12
+
13
+
14
+ def is_open_data_product_standard(odcs: dict) -> bool:
15
+ """
16
+ Check if the given dictionary is an open data product standard.
17
+
18
+ Args:
19
+ odcs (dict): The dictionary to check.
20
+
21
+ Returns:
22
+ bool: True if the dictionary is an open data product standard, False otherwise.
23
+ """
24
+ return odcs.get("kind") == "DataProduct" and odcs.get("apiVersion", "").startswith("v1")