datacontract-cli 0.10.23__py3-none-any.whl → 0.10.40__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. datacontract/__init__.py +13 -0
  2. datacontract/api.py +12 -5
  3. datacontract/catalog/catalog.py +5 -3
  4. datacontract/cli.py +119 -13
  5. datacontract/data_contract.py +145 -67
  6. datacontract/engines/data_contract_checks.py +366 -60
  7. datacontract/engines/data_contract_test.py +50 -4
  8. datacontract/engines/fastjsonschema/check_jsonschema.py +37 -19
  9. datacontract/engines/fastjsonschema/s3/s3_read_files.py +3 -2
  10. datacontract/engines/soda/check_soda_execute.py +27 -3
  11. datacontract/engines/soda/connections/athena.py +79 -0
  12. datacontract/engines/soda/connections/duckdb_connection.py +65 -6
  13. datacontract/engines/soda/connections/kafka.py +4 -2
  14. datacontract/engines/soda/connections/oracle.py +50 -0
  15. datacontract/export/avro_converter.py +20 -3
  16. datacontract/export/bigquery_converter.py +1 -1
  17. datacontract/export/dbt_converter.py +36 -7
  18. datacontract/export/dqx_converter.py +126 -0
  19. datacontract/export/duckdb_type_converter.py +57 -0
  20. datacontract/export/excel_exporter.py +923 -0
  21. datacontract/export/exporter.py +3 -0
  22. datacontract/export/exporter_factory.py +17 -1
  23. datacontract/export/great_expectations_converter.py +55 -5
  24. datacontract/export/{html_export.py → html_exporter.py} +31 -20
  25. datacontract/export/markdown_converter.py +134 -5
  26. datacontract/export/mermaid_exporter.py +110 -0
  27. datacontract/export/odcs_v3_exporter.py +193 -149
  28. datacontract/export/protobuf_converter.py +163 -69
  29. datacontract/export/rdf_converter.py +2 -2
  30. datacontract/export/sodacl_converter.py +9 -1
  31. datacontract/export/spark_converter.py +31 -4
  32. datacontract/export/sql_converter.py +6 -2
  33. datacontract/export/sql_type_converter.py +124 -8
  34. datacontract/imports/avro_importer.py +63 -12
  35. datacontract/imports/csv_importer.py +111 -57
  36. datacontract/imports/excel_importer.py +1112 -0
  37. datacontract/imports/importer.py +16 -3
  38. datacontract/imports/importer_factory.py +17 -0
  39. datacontract/imports/json_importer.py +325 -0
  40. datacontract/imports/odcs_importer.py +2 -2
  41. datacontract/imports/odcs_v3_importer.py +367 -151
  42. datacontract/imports/protobuf_importer.py +264 -0
  43. datacontract/imports/spark_importer.py +117 -13
  44. datacontract/imports/sql_importer.py +32 -16
  45. datacontract/imports/unity_importer.py +84 -38
  46. datacontract/init/init_template.py +1 -1
  47. datacontract/integration/entropy_data.py +126 -0
  48. datacontract/lint/resolve.py +112 -23
  49. datacontract/lint/schema.py +24 -15
  50. datacontract/lint/urls.py +17 -3
  51. datacontract/model/data_contract_specification/__init__.py +1 -0
  52. datacontract/model/odcs.py +13 -0
  53. datacontract/model/run.py +3 -0
  54. datacontract/output/junit_test_results.py +3 -3
  55. datacontract/schemas/datacontract-1.1.0.init.yaml +1 -1
  56. datacontract/schemas/datacontract-1.2.0.init.yaml +91 -0
  57. datacontract/schemas/datacontract-1.2.0.schema.json +2029 -0
  58. datacontract/schemas/datacontract-1.2.1.init.yaml +91 -0
  59. datacontract/schemas/datacontract-1.2.1.schema.json +2058 -0
  60. datacontract/schemas/odcs-3.0.2.schema.json +2382 -0
  61. datacontract/schemas/odcs-3.1.0.schema.json +2809 -0
  62. datacontract/templates/datacontract.html +54 -3
  63. datacontract/templates/datacontract_odcs.html +685 -0
  64. datacontract/templates/index.html +5 -2
  65. datacontract/templates/partials/server.html +2 -0
  66. datacontract/templates/style/output.css +319 -145
  67. {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.40.dist-info}/METADATA +711 -433
  68. datacontract_cli-0.10.40.dist-info/RECORD +121 -0
  69. {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.40.dist-info}/WHEEL +1 -1
  70. {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.40.dist-info/licenses}/LICENSE +1 -1
  71. datacontract/export/csv_type_converter.py +0 -36
  72. datacontract/integration/datamesh_manager.py +0 -72
  73. datacontract/lint/lint.py +0 -142
  74. datacontract/lint/linters/description_linter.py +0 -35
  75. datacontract/lint/linters/field_pattern_linter.py +0 -34
  76. datacontract/lint/linters/field_reference_linter.py +0 -48
  77. datacontract/lint/linters/notice_period_linter.py +0 -55
  78. datacontract/lint/linters/quality_schema_linter.py +0 -52
  79. datacontract/lint/linters/valid_constraints_linter.py +0 -100
  80. datacontract/model/data_contract_specification.py +0 -327
  81. datacontract_cli-0.10.23.dist-info/RECORD +0 -113
  82. /datacontract/{lint/linters → output}/__init__.py +0 -0
  83. {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.40.dist-info}/entry_points.txt +0 -0
  84. {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.40.dist-info}/top_level.txt +0 -0
@@ -1,14 +1,14 @@
1
1
  import json
2
2
  import os
3
- from typing import List, Optional
3
+ from typing import List
4
4
 
5
5
  from databricks.sdk import WorkspaceClient
6
6
  from databricks.sdk.service.catalog import ColumnInfo, TableInfo
7
- from pyspark.sql import types
7
+ from open_data_contract_standard.model import OpenDataContractStandard
8
8
 
9
9
  from datacontract.imports.importer import Importer
10
- from datacontract.imports.spark_importer import _field_from_struct_type
11
- from datacontract.model.data_contract_specification import DataContractSpecification, Field, Model
10
+ from datacontract.imports.sql_importer import map_type_from_sql, to_physical_type_key
11
+ from datacontract.model.data_contract_specification import DataContractSpecification, Field, Model, Server
12
12
  from datacontract.model.exceptions import DataContractException
13
13
 
14
14
 
@@ -18,8 +18,11 @@ class UnityImporter(Importer):
18
18
  """
19
19
 
20
20
  def import_source(
21
- self, data_contract_specification: DataContractSpecification, source: str, import_args: dict
22
- ) -> DataContractSpecification:
21
+ self,
22
+ data_contract_specification: DataContractSpecification | OpenDataContractStandard,
23
+ source: str,
24
+ import_args: dict,
25
+ ) -> DataContractSpecification | OpenDataContractStandard:
23
26
  """
24
27
  Import data contract specification from a source.
25
28
 
@@ -35,15 +38,14 @@ class UnityImporter(Importer):
35
38
  if source is not None:
36
39
  data_contract_specification = import_unity_from_json(data_contract_specification, source)
37
40
  else:
38
- data_contract_specification = import_unity_from_api(
39
- data_contract_specification, import_args.get("unity_table_full_name")
40
- )
41
+ unity_table_full_name_list = import_args.get("unity_table_full_name")
42
+ data_contract_specification = import_unity_from_api(data_contract_specification, unity_table_full_name_list)
41
43
  return data_contract_specification
42
44
 
43
45
 
44
46
  def import_unity_from_json(
45
- data_contract_specification: DataContractSpecification, source: str
46
- ) -> DataContractSpecification:
47
+ data_contract_specification: DataContractSpecification | OpenDataContractStandard, source: str
48
+ ) -> DataContractSpecification | OpenDataContractStandard:
47
49
  """
48
50
  Import data contract specification from a JSON file.
49
51
 
@@ -71,39 +73,71 @@ def import_unity_from_json(
71
73
 
72
74
 
73
75
  def import_unity_from_api(
74
- data_contract_specification: DataContractSpecification, unity_table_full_name: Optional[str] = None
76
+ data_contract_specification: DataContractSpecification, unity_table_full_name_list: List[str] = None
75
77
  ) -> DataContractSpecification:
76
78
  """
77
79
  Import data contract specification from Unity Catalog API.
78
80
 
79
81
  :param data_contract_specification: The data contract specification to be imported.
80
82
  :type data_contract_specification: DataContractSpecification
81
- :param unity_table_full_name: The full name of the Unity table.
82
- :type unity_table_full_name: Optional[str]
83
+ :param unity_table_full_name_list: The full name of the Unity table.
84
+ :type unity_table_full_name_list: list[str]
83
85
  :return: The imported data contract specification.
84
86
  :rtype: DataContractSpecification
85
87
  :raises DataContractException: If there is an error retrieving the schema from the API.
86
88
  """
87
89
  try:
88
- workspace_client = WorkspaceClient()
89
- unity_schema: TableInfo = workspace_client.tables.get(unity_table_full_name)
90
+ # print(f"Retrieving Unity Catalog schema for table: {unity_table_full_name}")
91
+ profile = os.getenv("DATACONTRACT_DATABRICKS_PROFILE")
92
+ host, token = os.getenv("DATACONTRACT_DATABRICKS_SERVER_HOSTNAME"), os.getenv("DATACONTRACT_DATABRICKS_TOKEN")
93
+ # print(f"Databricks host: {host}, token: {'***' if token else 'not set'}")
94
+ exception = DataContractException(
95
+ type="configuration",
96
+ name="Databricks configuration",
97
+ reason="",
98
+ engine="datacontract",
99
+ )
100
+ if not profile and not host and not token:
101
+ reason = "Either DATACONTRACT_DATABRICKS_PROFILE or both DATACONTRACT_DATABRICKS_SERVER_HOSTNAME and DATACONTRACT_DATABRICKS_TOKEN environment variables must be set"
102
+ exception.reason = reason
103
+ raise exception
104
+ if token and not host:
105
+ reason = "DATACONTRACT_DATABRICKS_SERVER_HOSTNAME environment variable is not set"
106
+ exception.reason = reason
107
+ raise exception
108
+ if host and not token:
109
+ reason = "DATACONTRACT_DATABRICKS_TOKEN environment variable is not set"
110
+ exception.reason = reason
111
+ raise exception
112
+ workspace_client = WorkspaceClient(profile=profile) if profile else WorkspaceClient(host=host, token=token)
90
113
  except Exception as e:
91
114
  raise DataContractException(
92
115
  type="schema",
93
116
  name="Retrieve unity catalog schema",
94
- reason=f"Failed to retrieve unity catalog schema from databricks profile: {os.getenv('DATABRICKS_CONFIG_PROFILE')}",
117
+ reason="Failed to connect to unity catalog schema",
95
118
  engine="datacontract",
96
119
  original_exception=e,
97
120
  )
98
121
 
99
- convert_unity_schema(data_contract_specification, unity_schema)
122
+ for unity_table_full_name in unity_table_full_name_list:
123
+ try:
124
+ unity_schema: TableInfo = workspace_client.tables.get(unity_table_full_name)
125
+ except Exception as e:
126
+ raise DataContractException(
127
+ type="schema",
128
+ name="Retrieve unity catalog schema",
129
+ reason=f"Unity table {unity_table_full_name} not found",
130
+ engine="datacontract",
131
+ original_exception=e,
132
+ )
133
+ data_contract_specification = convert_unity_schema(data_contract_specification, unity_schema)
100
134
 
101
135
  return data_contract_specification
102
136
 
103
137
 
104
138
  def convert_unity_schema(
105
- data_contract_specification: DataContractSpecification, unity_schema: TableInfo
106
- ) -> DataContractSpecification:
139
+ data_contract_specification: DataContractSpecification | OpenDataContractStandard, unity_schema: TableInfo
140
+ ) -> DataContractSpecification | OpenDataContractStandard:
107
141
  """
108
142
  Convert Unity schema to data contract specification.
109
143
 
@@ -117,6 +151,21 @@ def convert_unity_schema(
117
151
  if data_contract_specification.models is None:
118
152
  data_contract_specification.models = {}
119
153
 
154
+ if data_contract_specification.servers is None:
155
+ data_contract_specification.servers = {}
156
+
157
+ # Configure databricks server with catalog and schema from Unity table info
158
+ schema_name = unity_schema.schema_name
159
+ catalog_name = unity_schema.catalog_name
160
+ if catalog_name and schema_name:
161
+ server_name = "myserver" # Default server name
162
+
163
+ data_contract_specification.servers[server_name] = Server(
164
+ type="databricks",
165
+ catalog=catalog_name,
166
+ schema=schema_name,
167
+ )
168
+
120
169
  fields = import_table_fields(unity_schema.columns)
121
170
 
122
171
  table_id = unity_schema.name or unity_schema.table_id
@@ -149,25 +198,22 @@ def import_table_fields(columns: List[ColumnInfo]) -> dict[str, Field]:
149
198
  imported_fields = {}
150
199
 
151
200
  for column in columns:
152
- struct_field: types.StructField = _type_json_to_spark_field(column.type_json)
153
- imported_fields[column.name] = _field_from_struct_type(struct_field)
201
+ imported_fields[column.name] = _to_field(column)
154
202
 
155
203
  return imported_fields
156
204
 
157
205
 
158
- def _type_json_to_spark_field(type_json: str) -> types.StructField:
159
- """
160
- Parses a JSON string representing a Spark field and returns a StructField object.
161
-
162
- The reason we do this is to leverage the Spark JSON schema parser to handle the
163
- complexity of the Spark field types. The field `type_json` in the Unity API is
164
- the output of a `StructField.jsonValue()` call.
165
-
166
- :param type_json: The JSON string representing the Spark field.
167
- :type type_json: str
168
-
169
- :return: The StructField object.
170
- :rtype: types.StructField
171
- """
172
- type_dict = json.loads(type_json)
173
- return types.StructField.fromJson(type_dict)
206
+ def _to_field(column: ColumnInfo) -> Field:
207
+ field = Field()
208
+ # The second condition evaluates for complex types (e.g. variant)
209
+ if column.type_name is not None or (column.type_name is None and column.type_text is not None):
210
+ sql_type = str(column.type_text)
211
+ field.type = map_type_from_sql(sql_type)
212
+ physical_type_key = to_physical_type_key("databricks")
213
+ field.config = {
214
+ physical_type_key: sql_type,
215
+ }
216
+ field.required = column.nullable is None or not column.nullable
217
+ field.description = column.comment if column.comment else None
218
+
219
+ return field
@@ -3,7 +3,7 @@ import logging
3
3
 
4
4
  import requests
5
5
 
6
- DEFAULT_DATA_CONTRACT_INIT_TEMPLATE = "datacontract-1.1.0.init.yaml"
6
+ DEFAULT_DATA_CONTRACT_INIT_TEMPLATE = "datacontract-1.2.1.init.yaml"
7
7
 
8
8
 
9
9
  def get_init_template(location: str = None) -> str:
@@ -0,0 +1,126 @@
1
+ import os
2
+ from urllib.parse import urlparse
3
+
4
+ import requests
5
+
6
+ from datacontract.model.run import Run
7
+
8
+ # used to retrieve the HTML location of the published data contract or test results
9
+ RESPONSE_HEADER_LOCATION_HTML = "location-html"
10
+
11
+
12
+ def publish_test_results_to_entropy_data(run: Run, publish_url: str, ssl_verification: bool):
13
+ try:
14
+ host = publish_url
15
+ if publish_url is None:
16
+ # this url supports Data Mesh Manager and Data Contract Manager
17
+ host = _get_host()
18
+ url = "%s/api/test-results" % host
19
+ else:
20
+ url = publish_url
21
+
22
+ api_key = _get_api_key()
23
+
24
+ if run.dataContractId is None:
25
+ raise Exception("Cannot publish run results for unknown data contract ID")
26
+
27
+ headers = {"Content-Type": "application/json", "x-api-key": api_key}
28
+ request_body = run.model_dump_json()
29
+ # print("Request Body:", request_body)
30
+ response = requests.post(
31
+ url,
32
+ data=request_body,
33
+ headers=headers,
34
+ verify=ssl_verification,
35
+ )
36
+ # print("Status Code:", response.status_code)
37
+ # print("Response Body:", response.text)
38
+ if response.status_code != 200:
39
+ display_host = _extract_hostname(host)
40
+ run.log_error(f"Error publishing test results to {display_host}: {response.text}")
41
+ return
42
+ run.log_info("Published test results successfully")
43
+
44
+ location_html = response.headers.get(RESPONSE_HEADER_LOCATION_HTML)
45
+ if location_html is not None and len(location_html) > 0:
46
+ print(f"🚀 Open {location_html}")
47
+
48
+ except Exception as e:
49
+ run.log_error(f"Failed publishing test results. Error: {str(e)}")
50
+
51
+
52
+ def publish_data_contract_to_entropy_data(data_contract_dict: dict, ssl_verification: bool):
53
+ try:
54
+ api_key = _get_api_key()
55
+ host = _get_host()
56
+ headers = {"Content-Type": "application/json", "x-api-key": api_key}
57
+ id = data_contract_dict["id"]
58
+ url = f"{host}/api/datacontracts/{id}"
59
+ response = requests.put(
60
+ url=url,
61
+ json=data_contract_dict,
62
+ headers=headers,
63
+ verify=ssl_verification,
64
+ )
65
+ if response.status_code != 200:
66
+ display_host = _extract_hostname(host)
67
+ print(f"Error publishing data contract to {display_host}: {response.text}")
68
+ exit(1)
69
+
70
+ print("✅ Published data contract successfully")
71
+
72
+ location_html = response.headers.get(RESPONSE_HEADER_LOCATION_HTML)
73
+ if location_html is not None and len(location_html) > 0:
74
+ print(f"🚀 Open {location_html}")
75
+
76
+ except Exception as e:
77
+ print(f"Failed publishing data contract. Error: {str(e)}")
78
+
79
+
80
+ def _get_api_key() -> str:
81
+ """
82
+ Get API key from environment variables with fallback priority:
83
+ 1. ENTROPY_DATA_API_KEY
84
+ 2. DATAMESH_MANAGER_API_KEY
85
+ 3. DATACONTRACT_MANAGER_API_KEY
86
+ """
87
+ api_key = os.getenv("ENTROPY_DATA_API_KEY")
88
+ if api_key is None:
89
+ api_key = os.getenv("DATAMESH_MANAGER_API_KEY")
90
+ if api_key is None:
91
+ api_key = os.getenv("DATACONTRACT_MANAGER_API_KEY")
92
+ if api_key is None:
93
+ raise Exception(
94
+ "Cannot publish, as neither ENTROPY_DATA_API_KEY, DATAMESH_MANAGER_API_KEY, nor DATACONTRACT_MANAGER_API_KEY is set"
95
+ )
96
+ return api_key
97
+
98
+
99
+ def _get_host() -> str:
100
+ """
101
+ Get host from environment variables with fallback priority:
102
+ 1. ENTROPY_DATA_HOST
103
+ 2. DATAMESH_MANAGER_HOST
104
+ 3. DATACONTRACT_MANAGER_HOST
105
+ 4. Default: https://api.entropy-data.com
106
+ """
107
+ host = os.getenv("ENTROPY_DATA_HOST")
108
+ if host is None:
109
+ host = os.getenv("DATAMESH_MANAGER_HOST")
110
+ if host is None:
111
+ host = os.getenv("DATACONTRACT_MANAGER_HOST")
112
+ if host is None:
113
+ host = "https://api.entropy-data.com"
114
+ return host
115
+
116
+
117
+ def _extract_hostname(url: str) -> str:
118
+ """
119
+ Extract the hostname (including subdomains and top-level domain) from a URL.
120
+
121
+ Examples:
122
+ - https://app.entropy-data.com/path -> app.entropy-data.com
123
+ - http://api.example.com:8080/api -> api.example.com
124
+ """
125
+ parsed = urlparse(url)
126
+ return parsed.netloc.split(":")[0] if parsed.netloc else url
@@ -1,11 +1,15 @@
1
+ import importlib.resources as resources
1
2
  import logging
2
3
  import os
4
+ import warnings
5
+ from pathlib import Path
3
6
 
4
7
  import fastjsonschema
5
8
  import yaml
6
9
  from fastjsonschema import JsonSchemaValueException
10
+ from open_data_contract_standard.model import OpenDataContractStandard
7
11
 
8
- from datacontract.imports.odcs_v3_importer import import_odcs_v3_from_str
12
+ from datacontract.imports.odcs_v3_importer import import_from_odcs, parse_odcs_v3_from_str
9
13
  from datacontract.lint.resources import read_resource
10
14
  from datacontract.lint.schema import fetch_schema
11
15
  from datacontract.lint.urls import fetch_resource
@@ -15,7 +19,8 @@ from datacontract.model.data_contract_specification import (
15
19
  DeprecatedQuality,
16
20
  )
17
21
  from datacontract.model.exceptions import DataContractException
18
- from datacontract.model.odcs import is_open_data_contract_standard
22
+ from datacontract.model.odcs import is_open_data_contract_standard, is_open_data_product_standard
23
+ from datacontract.model.run import ResultEnum
19
24
 
20
25
 
21
26
  def resolve_data_contract(
@@ -37,7 +42,35 @@ def resolve_data_contract(
37
42
  else:
38
43
  raise DataContractException(
39
44
  type="lint",
40
- result="failed",
45
+ result=ResultEnum.failed,
46
+ name="Check that data contract YAML is valid",
47
+ reason="Data contract needs to be provided",
48
+ engine="datacontract",
49
+ )
50
+
51
+
52
+ def resolve_data_contract_v2(
53
+ data_contract_location: str = None,
54
+ data_contract_str: str = None,
55
+ data_contract: DataContractSpecification | OpenDataContractStandard = None,
56
+ schema_location: str = None,
57
+ inline_definitions: bool = False,
58
+ inline_quality: bool = False,
59
+ ) -> DataContractSpecification | OpenDataContractStandard:
60
+ if data_contract_location is not None:
61
+ return resolve_data_contract_from_location_v2(
62
+ data_contract_location, schema_location, inline_definitions, inline_quality
63
+ )
64
+ elif data_contract_str is not None:
65
+ return _resolve_data_contract_from_str_v2(
66
+ data_contract_str, schema_location, inline_definitions, inline_quality
67
+ )
68
+ elif data_contract is not None:
69
+ return data_contract
70
+ else:
71
+ raise DataContractException(
72
+ type="lint",
73
+ result=ResultEnum.failed,
41
74
  name="Check that data contract YAML is valid",
42
75
  reason="Data contract needs to be provided",
43
76
  engine="datacontract",
@@ -58,13 +91,20 @@ def resolve_data_contract_dict(
58
91
  else:
59
92
  raise DataContractException(
60
93
  type="lint",
61
- result="failed",
94
+ result=ResultEnum.failed,
62
95
  name="Check that data contract YAML is valid",
63
96
  reason="Data contract needs to be provided",
64
97
  engine="datacontract",
65
98
  )
66
99
 
67
100
 
101
+ def resolve_data_contract_from_location_v2(
102
+ location, schema_location: str = None, inline_definitions: bool = False, inline_quality: bool = False
103
+ ) -> DataContractSpecification | OpenDataContractStandard:
104
+ data_contract_str = read_resource(location)
105
+ return _resolve_data_contract_from_str_v2(data_contract_str, schema_location, inline_definitions, inline_quality)
106
+
107
+
68
108
  def resolve_data_contract_from_location(
69
109
  location, schema_location: str = None, inline_definitions: bool = False, inline_quality: bool = False
70
110
  ) -> DataContractSpecification:
@@ -152,7 +192,7 @@ def _resolve_definition_ref(ref, spec) -> Definition:
152
192
  else:
153
193
  raise DataContractException(
154
194
  type="lint",
155
- result="failed",
195
+ result=ResultEnum.failed,
156
196
  name="Check that data contract YAML is valid",
157
197
  reason=f"Cannot resolve reference {ref}",
158
198
  engine="datacontract",
@@ -165,7 +205,7 @@ def _find_by_path_in_spec(definition_path: str, spec: DataContractSpecification)
165
205
  if definition_key not in spec.definitions:
166
206
  raise DataContractException(
167
207
  type="lint",
168
- result="failed",
208
+ result=ResultEnum.failed,
169
209
  name="Check that data contract YAML is valid",
170
210
  reason=f"Cannot resolve definition {definition_key}",
171
211
  engine="datacontract",
@@ -195,7 +235,7 @@ def _fetch_file(path) -> str:
195
235
  if not os.path.exists(path):
196
236
  raise DataContractException(
197
237
  type="export",
198
- result="failed",
238
+ result=ResultEnum.failed,
199
239
  name="Check that data contract definition is valid",
200
240
  reason=f"Cannot resolve reference {path}",
201
241
  engine="datacontract",
@@ -230,7 +270,7 @@ def _get_quality_ref_file(quality_spec: str | object) -> str | object:
230
270
  if not os.path.exists(ref):
231
271
  raise DataContractException(
232
272
  type="export",
233
- result="failed",
273
+ result=ResultEnum.failed,
234
274
  name="Check that data contract quality is valid",
235
275
  reason=f"Cannot resolve reference {ref}",
236
276
  engine="datacontract",
@@ -240,35 +280,83 @@ def _get_quality_ref_file(quality_spec: str | object) -> str | object:
240
280
  return quality_spec
241
281
 
242
282
 
283
+ def _resolve_data_contract_from_str_v2(
284
+ data_contract_str, schema_location: str = None, inline_definitions: bool = False, inline_quality: bool = False
285
+ ) -> DataContractSpecification | OpenDataContractStandard:
286
+ yaml_dict = _to_yaml(data_contract_str)
287
+
288
+ if is_open_data_product_standard(yaml_dict):
289
+ logging.info("Cannot import ODPS, as not supported")
290
+ raise DataContractException(
291
+ type="schema",
292
+ result=ResultEnum.failed,
293
+ name="Parse ODCS contract",
294
+ reason="Cannot parse ODPS product",
295
+ engine="datacontract",
296
+ )
297
+
298
+ if is_open_data_contract_standard(yaml_dict):
299
+ logging.info("Importing ODCS v3")
300
+ # if ODCS, then validate the ODCS schema and import to DataContractSpecification directly
301
+ odcs = parse_odcs_v3_from_str(data_contract_str)
302
+ return odcs
303
+
304
+ logging.info("Importing DCS")
305
+ return _resolve_dcs_from_yaml_dict(inline_definitions, inline_quality, schema_location, yaml_dict)
306
+
307
+
243
308
  def _resolve_data_contract_from_str(
244
309
  data_contract_str, schema_location: str = None, inline_definitions: bool = False, inline_quality: bool = False
245
310
  ) -> DataContractSpecification:
246
311
  yaml_dict = _to_yaml(data_contract_str)
247
312
 
313
+ if schema_location is None:
314
+ if is_open_data_contract_standard(yaml_dict):
315
+ logging.info("Using ODCS 3.1.0 schema to validate data contract")
316
+ # TODO refactor this to a specific function
317
+ schema_location = resources.files("datacontract").joinpath("schemas", "odcs-3.1.0.schema.json")
318
+
319
+ _validate_json_schema(yaml_dict, schema_location)
320
+
248
321
  if is_open_data_contract_standard(yaml_dict):
249
322
  logging.info("Importing ODCS v3")
250
323
  # if ODCS, then validate the ODCS schema and import to DataContractSpecification directly
251
- data_contract_specification = DataContractSpecification(dataContractSpecification="1.1.0")
252
- return import_odcs_v3_from_str(data_contract_specification, source_str=data_contract_str)
253
- else:
254
- logging.info("Importing DCS")
324
+ odcs = parse_odcs_v3_from_str(data_contract_str)
325
+
326
+ data_contract_specification = DataContractSpecification(dataContractSpecification="1.2.1")
327
+ return import_from_odcs(data_contract_specification, odcs)
328
+
329
+ logging.info("Importing DCS")
330
+ return _resolve_dcs_from_yaml_dict(inline_definitions, inline_quality, schema_location, yaml_dict)
255
331
 
256
- _validate_data_contract_specification_schema(yaml_dict, schema_location)
332
+
333
+ def _resolve_dcs_from_yaml_dict(inline_definitions, inline_quality, schema_location, yaml_dict):
334
+ _validate_json_schema(yaml_dict, schema_location)
257
335
  data_contract_specification = yaml_dict
258
336
  spec = DataContractSpecification(**data_contract_specification)
259
-
260
337
  if inline_definitions:
261
338
  inline_definitions_into_data_contract(spec)
262
- if spec.quality and inline_quality:
263
- _resolve_quality_ref(spec.quality)
264
-
339
+ ## Suppress DeprecationWarning when accessing spec.quality,
340
+ ## iif it is in fact *not* used.
341
+ with warnings.catch_warnings(record=True) as recorded_warnings:
342
+ spec_quality = spec.quality
343
+ for w in recorded_warnings:
344
+ if not issubclass(w.category, DeprecationWarning) or spec_quality is not None:
345
+ warnings.warn_explicit(
346
+ message=w.message,
347
+ category=w.category,
348
+ filename=w.filename,
349
+ lineno=w.lineno,
350
+ source=w.source,
351
+ )
352
+ if spec_quality and inline_quality:
353
+ _resolve_quality_ref(spec_quality)
265
354
  return spec
266
355
 
267
356
 
268
357
  def _to_yaml(data_contract_str) -> dict:
269
358
  try:
270
- yaml_dict = yaml.safe_load(data_contract_str)
271
- return yaml_dict
359
+ return yaml.safe_load(data_contract_str)
272
360
  except Exception as e:
273
361
  logging.warning(f"Cannot parse YAML. Error: {str(e)}")
274
362
  raise DataContractException(
@@ -280,16 +368,17 @@ def _to_yaml(data_contract_str) -> dict:
280
368
  )
281
369
 
282
370
 
283
- def _validate_data_contract_specification_schema(data_contract_yaml, schema_location: str = None):
371
+ def _validate_json_schema(yaml_str, schema_location: str | Path = None):
372
+ logging.debug(f"Linting data contract with schema at {schema_location}")
284
373
  schema = fetch_schema(schema_location)
285
374
  try:
286
- fastjsonschema.validate(schema, data_contract_yaml, use_default=False)
375
+ fastjsonschema.validate(schema, yaml_str, use_default=False)
287
376
  logging.debug("YAML data is valid.")
288
377
  except JsonSchemaValueException as e:
289
378
  logging.warning(f"Data Contract YAML is invalid. Validation error: {e.message}")
290
379
  raise DataContractException(
291
380
  type="lint",
292
- result="failed",
381
+ result=ResultEnum.failed,
293
382
  name="Check that data contract YAML is valid",
294
383
  reason=e.message,
295
384
  engine="datacontract",
@@ -298,7 +387,7 @@ def _validate_data_contract_specification_schema(data_contract_yaml, schema_loca
298
387
  logging.warning(f"Data Contract YAML is invalid. Validation error: {str(e)}")
299
388
  raise DataContractException(
300
389
  type="lint",
301
- result="failed",
390
+ result=ResultEnum.failed,
302
391
  name="Check that data contract YAML is valid",
303
392
  reason=str(e),
304
393
  engine="datacontract",
@@ -2,16 +2,18 @@ import importlib.resources as resources
2
2
  import json
3
3
  import logging
4
4
  import os
5
+ from pathlib import Path
5
6
  from typing import Any, Dict
6
7
 
7
8
  import requests
8
9
 
9
10
  from datacontract.model.exceptions import DataContractException
11
+ from datacontract.model.run import ResultEnum
10
12
 
11
- DEFAULT_DATA_CONTRACT_SCHEMA = "datacontract-1.1.0.schema.json"
13
+ DEFAULT_DATA_CONTRACT_SCHEMA = "datacontract-1.2.1.schema.json"
12
14
 
13
15
 
14
- def fetch_schema(location: str = None) -> Dict[str, Any]:
16
+ def fetch_schema(location: str | Path = None) -> Dict[str, Any]:
15
17
  """
16
18
  Fetch and return a JSON schema from a given location.
17
19
 
@@ -36,19 +38,26 @@ def fetch_schema(location: str = None) -> Dict[str, Any]:
36
38
  schema_file = schemas.joinpath("schemas", DEFAULT_DATA_CONTRACT_SCHEMA)
37
39
  with schema_file.open("r") as file:
38
40
  schema = json.load(file)
39
- elif location.startswith("http://") or location.startswith("https://"):
40
- response = requests.get(location)
41
- schema = response.json()
42
41
  else:
43
- if not os.path.exists(location):
44
- raise DataContractException(
45
- type="lint",
46
- name=f"Reading schema from {location}",
47
- reason=f"The file '{location}' does not exist.",
48
- engine="datacontract",
49
- result="error",
50
- )
51
- with open(location, "r") as file:
52
- schema = json.load(file)
42
+ # Convert Path objects to strings for string operations
43
+ location_str = str(location)
44
+
45
+ if location_str.startswith("http://") or location_str.startswith("https://"):
46
+ logging.debug(f"Downloading schema from {location_str}")
47
+ response = requests.get(location_str)
48
+ schema = response.json()
49
+ else:
50
+ if not os.path.exists(location):
51
+ raise DataContractException(
52
+ type="lint",
53
+ name=f"Reading schema from {location}",
54
+ reason=f"The file '{location}' does not exist.",
55
+ engine="datacontract",
56
+ result=ResultEnum.error,
57
+ )
58
+
59
+ logging.debug(f"Loading JSON schema locally at {location}")
60
+ with open(location, "r") as file:
61
+ schema = json.load(file)
53
62
 
54
63
  return schema
datacontract/lint/urls.py CHANGED
@@ -28,10 +28,22 @@ def fetch_resource(url: str):
28
28
  def _set_api_key(headers, url):
29
29
  hostname = urlparse(url).hostname
30
30
 
31
+ entropy_data_api_key = os.getenv("ENTROPY_DATA_API_KEY")
31
32
  datamesh_manager_api_key = os.getenv("DATAMESH_MANAGER_API_KEY")
32
33
  datacontract_manager_api_key = os.getenv("DATACONTRACT_MANAGER_API_KEY")
33
34
 
34
- if hostname == "datamesh-manager.com" or hostname.endswith(".datamesh-manager.com"):
35
+ if hostname == "entropy-data.com" or hostname.endswith(".entropy-data.com"):
36
+ if entropy_data_api_key is None or entropy_data_api_key == "":
37
+ print("Error: Entropy Data API key is not set. Set env variable ENTROPY_DATA_API_KEY.")
38
+ raise DataContractException(
39
+ type="lint",
40
+ name=f"Reading data contract from {url}",
41
+ reason="Error: Entropy Data API key is not set. Set env variable ENTROPY_DATA_API_KEY.",
42
+ engine="datacontract",
43
+ result="error",
44
+ )
45
+ headers["x-api-key"] = entropy_data_api_key
46
+ elif hostname == "datamesh-manager.com" or hostname.endswith(".datamesh-manager.com"):
35
47
  if datamesh_manager_api_key is None or datamesh_manager_api_key == "":
36
48
  print("Error: Data Mesh Manager API key is not set. Set env variable DATAMESH_MANAGER_API_KEY.")
37
49
  raise DataContractException(
@@ -54,7 +66,9 @@ def _set_api_key(headers, url):
54
66
  )
55
67
  headers["x-api-key"] = datacontract_manager_api_key
56
68
 
57
- if datamesh_manager_api_key is not None and datamesh_manager_api_key != "":
58
- headers["x-api-key"] = datamesh_manager_api_key
59
69
  if datacontract_manager_api_key is not None and datacontract_manager_api_key != "":
60
70
  headers["x-api-key"] = datacontract_manager_api_key
71
+ if datamesh_manager_api_key is not None and datamesh_manager_api_key != "":
72
+ headers["x-api-key"] = datamesh_manager_api_key
73
+ if entropy_data_api_key is not None and entropy_data_api_key != "":
74
+ headers["x-api-key"] = entropy_data_api_key