datacontract-cli 0.10.0__py3-none-any.whl → 0.10.37__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. datacontract/__init__.py +13 -0
  2. datacontract/api.py +260 -0
  3. datacontract/breaking/breaking.py +242 -12
  4. datacontract/breaking/breaking_rules.py +37 -1
  5. datacontract/catalog/catalog.py +80 -0
  6. datacontract/cli.py +387 -117
  7. datacontract/data_contract.py +216 -353
  8. datacontract/engines/data_contract_checks.py +1041 -0
  9. datacontract/engines/data_contract_test.py +113 -0
  10. datacontract/engines/datacontract/check_that_datacontract_contains_valid_servers_configuration.py +2 -3
  11. datacontract/engines/datacontract/check_that_datacontract_file_exists.py +1 -1
  12. datacontract/engines/fastjsonschema/check_jsonschema.py +176 -42
  13. datacontract/engines/fastjsonschema/s3/s3_read_files.py +16 -1
  14. datacontract/engines/soda/check_soda_execute.py +100 -56
  15. datacontract/engines/soda/connections/athena.py +79 -0
  16. datacontract/engines/soda/connections/bigquery.py +8 -1
  17. datacontract/engines/soda/connections/databricks.py +12 -3
  18. datacontract/engines/soda/connections/duckdb_connection.py +241 -0
  19. datacontract/engines/soda/connections/kafka.py +206 -113
  20. datacontract/engines/soda/connections/snowflake.py +8 -5
  21. datacontract/engines/soda/connections/sqlserver.py +43 -0
  22. datacontract/engines/soda/connections/trino.py +26 -0
  23. datacontract/export/avro_converter.py +72 -8
  24. datacontract/export/avro_idl_converter.py +31 -25
  25. datacontract/export/bigquery_converter.py +130 -0
  26. datacontract/export/custom_converter.py +40 -0
  27. datacontract/export/data_caterer_converter.py +161 -0
  28. datacontract/export/dbml_converter.py +148 -0
  29. datacontract/export/dbt_converter.py +141 -54
  30. datacontract/export/dcs_exporter.py +6 -0
  31. datacontract/export/dqx_converter.py +126 -0
  32. datacontract/export/duckdb_type_converter.py +57 -0
  33. datacontract/export/excel_exporter.py +923 -0
  34. datacontract/export/exporter.py +100 -0
  35. datacontract/export/exporter_factory.py +216 -0
  36. datacontract/export/go_converter.py +105 -0
  37. datacontract/export/great_expectations_converter.py +257 -36
  38. datacontract/export/html_exporter.py +86 -0
  39. datacontract/export/iceberg_converter.py +188 -0
  40. datacontract/export/jsonschema_converter.py +71 -16
  41. datacontract/export/markdown_converter.py +337 -0
  42. datacontract/export/mermaid_exporter.py +110 -0
  43. datacontract/export/odcs_v3_exporter.py +375 -0
  44. datacontract/export/pandas_type_converter.py +40 -0
  45. datacontract/export/protobuf_converter.py +168 -68
  46. datacontract/export/pydantic_converter.py +6 -0
  47. datacontract/export/rdf_converter.py +13 -6
  48. datacontract/export/sodacl_converter.py +36 -188
  49. datacontract/export/spark_converter.py +245 -0
  50. datacontract/export/sql_converter.py +37 -3
  51. datacontract/export/sql_type_converter.py +269 -8
  52. datacontract/export/sqlalchemy_converter.py +170 -0
  53. datacontract/export/terraform_converter.py +7 -2
  54. datacontract/imports/avro_importer.py +246 -26
  55. datacontract/imports/bigquery_importer.py +221 -0
  56. datacontract/imports/csv_importer.py +143 -0
  57. datacontract/imports/dbml_importer.py +112 -0
  58. datacontract/imports/dbt_importer.py +240 -0
  59. datacontract/imports/excel_importer.py +1111 -0
  60. datacontract/imports/glue_importer.py +288 -0
  61. datacontract/imports/iceberg_importer.py +172 -0
  62. datacontract/imports/importer.py +51 -0
  63. datacontract/imports/importer_factory.py +128 -0
  64. datacontract/imports/json_importer.py +325 -0
  65. datacontract/imports/jsonschema_importer.py +146 -0
  66. datacontract/imports/odcs_importer.py +60 -0
  67. datacontract/imports/odcs_v3_importer.py +516 -0
  68. datacontract/imports/parquet_importer.py +81 -0
  69. datacontract/imports/protobuf_importer.py +264 -0
  70. datacontract/imports/spark_importer.py +262 -0
  71. datacontract/imports/sql_importer.py +274 -35
  72. datacontract/imports/unity_importer.py +219 -0
  73. datacontract/init/init_template.py +20 -0
  74. datacontract/integration/datamesh_manager.py +86 -0
  75. datacontract/lint/resolve.py +271 -49
  76. datacontract/lint/resources.py +21 -0
  77. datacontract/lint/schema.py +53 -17
  78. datacontract/lint/urls.py +32 -12
  79. datacontract/model/data_contract_specification/__init__.py +1 -0
  80. datacontract/model/exceptions.py +4 -1
  81. datacontract/model/odcs.py +24 -0
  82. datacontract/model/run.py +49 -29
  83. datacontract/output/__init__.py +0 -0
  84. datacontract/output/junit_test_results.py +135 -0
  85. datacontract/output/output_format.py +10 -0
  86. datacontract/output/test_results_writer.py +79 -0
  87. datacontract/py.typed +0 -0
  88. datacontract/schemas/datacontract-1.1.0.init.yaml +91 -0
  89. datacontract/schemas/datacontract-1.1.0.schema.json +1975 -0
  90. datacontract/schemas/datacontract-1.2.0.init.yaml +91 -0
  91. datacontract/schemas/datacontract-1.2.0.schema.json +2029 -0
  92. datacontract/schemas/datacontract-1.2.1.init.yaml +91 -0
  93. datacontract/schemas/datacontract-1.2.1.schema.json +2058 -0
  94. datacontract/schemas/odcs-3.0.1.schema.json +2634 -0
  95. datacontract/schemas/odcs-3.0.2.schema.json +2382 -0
  96. datacontract/templates/datacontract.html +139 -294
  97. datacontract/templates/datacontract_odcs.html +685 -0
  98. datacontract/templates/index.html +236 -0
  99. datacontract/templates/partials/datacontract_information.html +86 -0
  100. datacontract/templates/partials/datacontract_servicelevels.html +253 -0
  101. datacontract/templates/partials/datacontract_terms.html +51 -0
  102. datacontract/templates/partials/definition.html +25 -0
  103. datacontract/templates/partials/example.html +27 -0
  104. datacontract/templates/partials/model_field.html +144 -0
  105. datacontract/templates/partials/quality.html +49 -0
  106. datacontract/templates/partials/server.html +211 -0
  107. datacontract/templates/style/output.css +491 -72
  108. datacontract_cli-0.10.37.dist-info/METADATA +2235 -0
  109. datacontract_cli-0.10.37.dist-info/RECORD +119 -0
  110. {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.37.dist-info}/WHEEL +1 -1
  111. {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.37.dist-info/licenses}/LICENSE +1 -1
  112. datacontract/engines/datacontract/check_that_datacontract_str_is_valid.py +0 -48
  113. datacontract/engines/soda/connections/dask.py +0 -28
  114. datacontract/engines/soda/connections/duckdb.py +0 -76
  115. datacontract/export/csv_type_converter.py +0 -36
  116. datacontract/export/html_export.py +0 -66
  117. datacontract/export/odcs_converter.py +0 -102
  118. datacontract/init/download_datacontract_file.py +0 -17
  119. datacontract/integration/publish_datamesh_manager.py +0 -33
  120. datacontract/integration/publish_opentelemetry.py +0 -107
  121. datacontract/lint/lint.py +0 -141
  122. datacontract/lint/linters/description_linter.py +0 -34
  123. datacontract/lint/linters/example_model_linter.py +0 -91
  124. datacontract/lint/linters/field_pattern_linter.py +0 -34
  125. datacontract/lint/linters/field_reference_linter.py +0 -38
  126. datacontract/lint/linters/notice_period_linter.py +0 -55
  127. datacontract/lint/linters/quality_schema_linter.py +0 -52
  128. datacontract/lint/linters/valid_constraints_linter.py +0 -99
  129. datacontract/model/data_contract_specification.py +0 -141
  130. datacontract/web.py +0 -14
  131. datacontract_cli-0.10.0.dist-info/METADATA +0 -951
  132. datacontract_cli-0.10.0.dist-info/RECORD +0 -66
  133. /datacontract/{model → breaking}/breaking_change.py +0 -0
  134. /datacontract/{lint/linters → export}/__init__.py +0 -0
  135. {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.37.dist-info}/entry_points.txt +0 -0
  136. {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.37.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,113 @@
1
+ import atexit
2
+ import os
3
+ import tempfile
4
+ import typing
5
+
6
+ import requests
7
+ from duckdb.duckdb import DuckDBPyConnection
8
+
9
+ from datacontract.engines.data_contract_checks import create_checks
10
+
11
+ if typing.TYPE_CHECKING:
12
+ from pyspark.sql import SparkSession
13
+
14
+ from datacontract.engines.datacontract.check_that_datacontract_contains_valid_servers_configuration import (
15
+ check_that_datacontract_contains_valid_server_configuration,
16
+ )
17
+ from datacontract.engines.fastjsonschema.check_jsonschema import check_jsonschema
18
+ from datacontract.engines.soda.check_soda_execute import check_soda_execute
19
+ from datacontract.model.data_contract_specification import DataContractSpecification, Server
20
+ from datacontract.model.exceptions import DataContractException
21
+ from datacontract.model.run import ResultEnum, Run
22
+
23
+
24
+ def execute_data_contract_test(
25
+ data_contract_specification: DataContractSpecification,
26
+ run: Run,
27
+ server_name: str = None,
28
+ spark: "SparkSession" = None,
29
+ duckdb_connection: DuckDBPyConnection = None,
30
+ ):
31
+ if data_contract_specification.models is None or len(data_contract_specification.models) == 0:
32
+ raise DataContractException(
33
+ type="lint",
34
+ name="Check that data contract contains models",
35
+ result=ResultEnum.warning,
36
+ reason="Models block is missing. Skip executing tests.",
37
+ engine="datacontract",
38
+ )
39
+ if (
40
+ server_name is None
41
+ and data_contract_specification.servers is not None
42
+ and len(data_contract_specification.servers) > 0
43
+ ):
44
+ server_name = list(data_contract_specification.servers.keys())[0]
45
+ server = get_server(data_contract_specification, server_name)
46
+ run.log_info(f"Running tests for data contract {data_contract_specification.id} with server {server_name}")
47
+ run.dataContractId = data_contract_specification.id
48
+ run.dataContractVersion = data_contract_specification.info.version
49
+ run.dataProductId = server.dataProductId
50
+ run.outputPortId = server.outputPortId
51
+ run.server = server_name
52
+
53
+ if server.type == "api":
54
+ server = process_api_response(run, server)
55
+
56
+ run.checks.extend(create_checks(data_contract_specification, server))
57
+
58
+ # TODO check server is supported type for nicer error messages
59
+ # TODO check server credentials are complete for nicer error messages
60
+ if server.format == "json" and server.type != "kafka":
61
+ check_jsonschema(run, data_contract_specification, server)
62
+ check_soda_execute(run, data_contract_specification, server, spark, duckdb_connection)
63
+
64
+
65
+ def get_server(data_contract_specification: DataContractSpecification, server_name: str = None) -> Server | None:
66
+ """Get the server configuration from the data contract specification.
67
+
68
+ Args:
69
+ data_contract_specification: The data contract specification
70
+ server_name: Optional name of the server to use. If not provided, uses the first server.
71
+
72
+ Returns:
73
+ The selected server configuration
74
+ """
75
+
76
+ check_that_datacontract_contains_valid_server_configuration(data_contract_specification, server_name)
77
+
78
+ if server_name is not None:
79
+ server = data_contract_specification.servers.get(server_name)
80
+ else:
81
+ server_name = list(data_contract_specification.servers.keys())[0]
82
+ server = data_contract_specification.servers.get(server_name)
83
+ return server
84
+
85
+
86
+ def process_api_response(run, server):
87
+ tmp_dir = tempfile.TemporaryDirectory(prefix="datacontract_cli_api_")
88
+ atexit.register(tmp_dir.cleanup)
89
+ headers = {}
90
+ if os.getenv("DATACONTRACT_API_HEADER_AUTHORIZATION") is not None:
91
+ headers["Authorization"] = os.getenv("DATACONTRACT_API_HEADER_AUTHORIZATION")
92
+ try:
93
+ response = requests.get(server.location, headers=headers)
94
+ response.raise_for_status()
95
+ except requests.exceptions.RequestException as e:
96
+ raise DataContractException(
97
+ type="connection",
98
+ name="API server connection error",
99
+ result=ResultEnum.error,
100
+ reason=f"Failed to fetch API response from {server.location}: {e}",
101
+ engine="datacontract",
102
+ )
103
+ with open(f"{tmp_dir.name}/api_response.json", "w") as f:
104
+ f.write(response.text)
105
+ run.log_info(f"Saved API response to {tmp_dir.name}/api_response.json")
106
+ server = Server(
107
+ type="local",
108
+ format="json",
109
+ path=f"{tmp_dir.name}/api_response.json",
110
+ dataProductId=server.dataProductId,
111
+ outputPortId=server.outputPortId,
112
+ )
113
+ return server
@@ -1,12 +1,11 @@
1
1
  from datacontract.model.data_contract_specification import DataContractSpecification
2
2
  from datacontract.model.exceptions import DataContractException
3
- from datacontract.model.run import Run
4
3
 
5
4
 
6
5
  def check_that_datacontract_contains_valid_server_configuration(
7
- run: Run, data_contract: DataContractSpecification, server_name: str
6
+ data_contract: DataContractSpecification, server_name: str | None
8
7
  ):
9
- if data_contract.servers is None:
8
+ if data_contract.servers is None or len(data_contract.servers) == 0:
10
9
  raise DataContractException(
11
10
  type="lint",
12
11
  name="Check that data contract contains valid server configuration",
@@ -1,6 +1,6 @@
1
1
  import os
2
2
 
3
- from datacontract.model.run import Run, Check
3
+ from datacontract.model.run import Check, Run
4
4
 
5
5
 
6
6
  def check_that_datacontract_file_exists(run: Run, file_path: str):
@@ -1,32 +1,115 @@
1
+ import glob
1
2
  import json
2
3
  import logging
3
4
  import os
5
+ import threading
6
+ from typing import Any, Callable, Generator, List, Optional
4
7
 
5
8
  import fastjsonschema
9
+ from fastjsonschema import JsonSchemaValueException
6
10
 
7
11
  from datacontract.engines.fastjsonschema.s3.s3_read_files import yield_s3_files
8
12
  from datacontract.export.jsonschema_converter import to_jsonschema
9
- from datacontract.model.data_contract_specification import \
10
- DataContractSpecification, Server
13
+ from datacontract.model.data_contract_specification import DataContractSpecification, Server
11
14
  from datacontract.model.exceptions import DataContractException
12
- from datacontract.model.run import Run, Check
15
+ from datacontract.model.run import Check, ResultEnum, Run
13
16
 
17
+ # Thread-safe cache for primaryKey fields.
18
+ _primary_key_cache = {}
19
+ _cache_lock = threading.Lock()
14
20
 
15
- def validate_json_stream(model_name, validate, json_stream):
21
+
22
+ def get_primary_key_field(schema: dict, model_name: str) -> Optional[str]:
23
+ # Check cache first.
24
+ with _cache_lock:
25
+ cached_value = _primary_key_cache.get(model_name)
26
+ if cached_value is not None:
27
+ return cached_value
28
+
29
+ # Find primaryKey field.
30
+ fields = schema.get("properties", {})
31
+ for field_name, attributes in fields.items():
32
+ if attributes.get("primaryKey", False):
33
+ # Cache the result before returning.
34
+ with _cache_lock:
35
+ _primary_key_cache[model_name] = field_name
36
+ return field_name
37
+
38
+ # Return None if no primary key was found.
39
+ return None
40
+
41
+
42
+ def get_primary_key_value(schema: dict, model_name: str, json_object: dict) -> Optional[str]:
43
+ # Get the `primaryKey` field.
44
+ primary_key_field = get_primary_key_field(schema, model_name)
45
+ if not primary_key_field:
46
+ return None
47
+
48
+ # Return the value of the `primaryKey` field in the JSON object.
49
+ return json_object.get(primary_key_field)
50
+
51
+
52
+ def process_exceptions(run, exceptions: List[DataContractException]):
53
+ if not exceptions:
54
+ return
55
+
56
+ # Define the maximum number of errors to process (can be adjusted by defining an ENV variable).
16
57
  try:
17
- logging.info("Validating JSON")
18
- for json_obj in json_stream:
58
+ error_limit = int(os.getenv("DATACONTRACT_MAX_ERRORS", 500))
59
+ except ValueError:
60
+ # Fallback to default if environment variable is invalid.
61
+ error_limit = 500
62
+
63
+ # Calculate the effective limit to avoid index out of range
64
+ limit = min(len(exceptions), error_limit)
65
+
66
+ # Add all exceptions up to the limit - 1 to `run.checks`.
67
+ DEFAULT_ERROR_MESSAGE = "An error occurred during validation phase. See the logs for more details."
68
+ run.checks.extend(
69
+ [
70
+ Check(
71
+ type=exception.type,
72
+ name=exception.name,
73
+ result=exception.result,
74
+ reason=exception.reason,
75
+ model=exception.model,
76
+ engine=exception.engine,
77
+ message=exception.message or DEFAULT_ERROR_MESSAGE,
78
+ )
79
+ for exception in exceptions[: limit - 1]
80
+ ]
81
+ )
82
+
83
+ # Raise the last exception within the limit.
84
+ last_exception = exceptions[limit - 1]
85
+ raise last_exception
86
+
87
+
88
+ def validate_json_stream(
89
+ schema: dict, model_name: str, validate: Callable, json_stream: Generator[Any, Any, None]
90
+ ) -> List[DataContractException]:
91
+ logging.info(f"Validating JSON stream for model: '{model_name}'.")
92
+ exceptions: List[DataContractException] = []
93
+ for json_obj in json_stream:
94
+ try:
19
95
  validate(json_obj)
20
- return True
21
- except fastjsonschema.JsonSchemaValueException as e:
22
- raise DataContractException(
23
- type="schema",
24
- name="Check that JSON has valid schema",
25
- model=model_name,
26
- reason=e.message,
27
- engine="jsonschema",
28
- original_exception=e,
29
- )
96
+ except JsonSchemaValueException as e:
97
+ logging.warning(f"Validation failed for JSON object with type: '{model_name}'.")
98
+ primary_key_value = get_primary_key_value(schema, model_name, json_obj)
99
+ exceptions.append(
100
+ DataContractException(
101
+ type="schema",
102
+ name="Check that JSON has valid schema",
103
+ result=ResultEnum.failed,
104
+ reason=f"{f'#{primary_key_value}: ' if primary_key_value is not None else ''}{e.message}",
105
+ model=model_name,
106
+ engine="jsonschema",
107
+ message=e.message,
108
+ )
109
+ )
110
+ if not exceptions:
111
+ logging.info(f"All JSON objects in the stream passed validation for model: '{model_name}'.")
112
+ return exceptions
30
113
 
31
114
 
32
115
  def read_json_lines(file):
@@ -60,42 +143,64 @@ def read_json_file_content(file_content: str):
60
143
  yield json.loads(file_content)
61
144
 
62
145
 
63
- def process_json_file(run, model_name, validate, file, delimiter):
146
+ def process_json_file(run, schema, model_name, validate, file, delimiter):
64
147
  if delimiter == "new_line":
65
148
  json_stream = read_json_lines(file)
66
149
  elif delimiter == "array":
67
150
  json_stream = read_json_array(file)
68
151
  else:
69
152
  json_stream = read_json_file(file)
70
- validate_json_stream(model_name, validate, json_stream)
71
153
 
154
+ # Validate the JSON stream and collect exceptions.
155
+ exceptions = validate_json_stream(schema, model_name, validate, json_stream)
156
+
157
+ # Handle all errors from schema validation.
158
+ process_exceptions(run, exceptions)
72
159
 
73
- def process_local_file(run, server, model_name, validate):
160
+
161
+ def process_local_file(run, server, schema, model_name, validate):
74
162
  path = server.path
163
+ if not path:
164
+ raise DataContractException(
165
+ type="schema",
166
+ name="Check that JSON has valid schema",
167
+ result=ResultEnum.warning,
168
+ reason="For server with type 'local', a 'path' must be defined.",
169
+ engine="datacontract",
170
+ )
75
171
  if "{model}" in path:
76
172
  path = path.format(model=model_name)
77
173
 
174
+ all_files = []
78
175
  if os.path.isdir(path):
79
- return process_directory(run, path, server, model_name, validate)
176
+ # Fetch all JSONs in the directory
177
+ for root, _, files in os.walk(path):
178
+ for file in files:
179
+ if file.endswith(".json"):
180
+ all_files.append(os.path.join(root, file))
80
181
  else:
81
- logging.info(f"Processing file {path}")
82
- with open(path, "r") as file:
83
- process_json_file(run, model_name, validate, file, server.delimiter)
182
+ # Use glob to fetch all JSONs
183
+ for file_path in glob.glob(path, recursive=True):
184
+ if os.path.isfile(file_path):
185
+ if file_path.endswith(".json"):
186
+ all_files.append(file_path)
84
187
 
188
+ if not all_files:
189
+ raise DataContractException(
190
+ type="schema",
191
+ name="Check that JSON has valid schema",
192
+ result=ResultEnum.warning,
193
+ reason=f"No files found in '{path}'.",
194
+ engine="datacontract",
195
+ )
85
196
 
86
- def process_directory(run, path, server, model_name, validate):
87
- success = True
88
- for filename in os.listdir(path):
89
- if filename.endswith(".json"): # or make this a parameter
90
- file_path = os.path.join(path, filename)
91
- with open(file_path, "r") as file:
92
- if not process_json_file(run, model_name, validate, file, server.delimiter):
93
- success = False
94
- break
95
- return success
197
+ for file in all_files:
198
+ logging.info(f"Processing file: {file}")
199
+ with open(file, "r") as f:
200
+ process_json_file(run, schema, model_name, validate, f, server.delimiter)
96
201
 
97
202
 
98
- def process_s3_file(server, model_name, validate):
203
+ def process_s3_file(run, server, schema, model_name, validate):
99
204
  s3_endpoint_url = server.endpointUrl
100
205
  s3_location = server.location
101
206
  if "{model}" in s3_location:
@@ -114,12 +219,16 @@ def process_s3_file(server, model_name, validate):
114
219
  raise DataContractException(
115
220
  type="schema",
116
221
  name="Check that JSON has valid schema",
117
- result="warning",
222
+ result=ResultEnum.warning,
118
223
  reason=f"Cannot find any file in {s3_location}",
119
224
  engine="datacontract",
120
225
  )
121
226
 
122
- return validate_json_stream(model_name, validate, json_stream)
227
+ # Validate the JSON stream and collect exceptions.
228
+ exceptions = validate_json_stream(schema, model_name, validate, json_stream)
229
+
230
+ # Handle all errors from schema validation.
231
+ process_exceptions(run, exceptions)
123
232
 
124
233
 
125
234
  def check_jsonschema(run: Run, data_contract: DataContractSpecification, server: Server):
@@ -131,7 +240,7 @@ def check_jsonschema(run: Run, data_contract: DataContractSpecification, server:
131
240
  Check(
132
241
  type="schema",
133
242
  name="Check that JSON has valid schema",
134
- result="warning",
243
+ result=ResultEnum.warning,
135
244
  reason="Server format is not 'json'. Skip validating jsonschema.",
136
245
  engine="jsonschema",
137
246
  )
@@ -149,20 +258,45 @@ def check_jsonschema(run: Run, data_contract: DataContractSpecification, server:
149
258
  schema = to_jsonschema(model_name, model)
150
259
  run.log_info(f"jsonschema: {schema}")
151
260
 
152
- validate = fastjsonschema.compile(schema)
261
+ validate = fastjsonschema.compile(
262
+ schema,
263
+ formats={"uuid": r"^[0-9a-fA-F]{8}\b-[0-9a-fA-F]{4}\b-[0-9a-fA-F]{4}\b-[0-9a-fA-F]{4}\b-[0-9a-fA-F]{12}$"},
264
+ )
153
265
 
154
266
  # Process files based on server type
155
267
  if server.type == "local":
156
- process_local_file(run, server, model_name, validate)
268
+ process_local_file(run, server, schema, model_name, validate)
157
269
  elif server.type == "s3":
158
- process_s3_file(server, model_name, validate)
270
+ process_s3_file(run, server, schema, model_name, validate)
271
+ elif server.type == "gcs":
272
+ run.checks.append(
273
+ Check(
274
+ type="schema",
275
+ name="Check that JSON has valid schema",
276
+ model=model_name,
277
+ result=ResultEnum.info,
278
+ reason="JSON Schema check skipped for GCS, as GCS is currently not supported",
279
+ engine="jsonschema",
280
+ )
281
+ )
282
+ elif server.type == "azure":
283
+ run.checks.append(
284
+ Check(
285
+ type="schema",
286
+ name="Check that JSON has valid schema",
287
+ model=model_name,
288
+ result=ResultEnum.info,
289
+ reason="JSON Schema check skipped for azure, as azure is currently not supported",
290
+ engine="jsonschema",
291
+ )
292
+ )
159
293
  else:
160
294
  run.checks.append(
161
295
  Check(
162
296
  type="schema",
163
297
  name="Check that JSON has valid schema",
164
298
  model=model_name,
165
- result="warn",
299
+ result=ResultEnum.warning,
166
300
  reason=f"Server type {server.type} not supported",
167
301
  engine="jsonschema",
168
302
  )
@@ -174,7 +308,7 @@ def check_jsonschema(run: Run, data_contract: DataContractSpecification, server:
174
308
  type="schema",
175
309
  name="Check that JSON has valid schema",
176
310
  model=model_name,
177
- result="passed",
311
+ result=ResultEnum.passed,
178
312
  reason="All JSON entries are valid.",
179
313
  engine="jsonschema",
180
314
  )
@@ -1,7 +1,8 @@
1
1
  import logging
2
2
  import os
3
3
 
4
- import s3fs
4
+ from datacontract.model.exceptions import DataContractException
5
+ from datacontract.model.run import ResultEnum
5
6
 
6
7
 
7
8
  def yield_s3_files(s3_endpoint_url, s3_location):
@@ -14,11 +15,25 @@ def yield_s3_files(s3_endpoint_url, s3_location):
14
15
 
15
16
 
16
17
  def s3_fs(s3_endpoint_url):
18
+ try:
19
+ import s3fs
20
+ except ImportError as e:
21
+ raise DataContractException(
22
+ type="schema",
23
+ result=ResultEnum.failed,
24
+ name="s3 extra missing",
25
+ reason="Install the extra s3 to use s3",
26
+ engine="datacontract",
27
+ original_exception=e,
28
+ )
29
+
17
30
  aws_access_key_id = os.getenv("DATACONTRACT_S3_ACCESS_KEY_ID")
18
31
  aws_secret_access_key = os.getenv("DATACONTRACT_S3_SECRET_ACCESS_KEY")
32
+ aws_session_token = os.getenv("DATACONTRACT_S3_SESSION_TOKEN")
19
33
  return s3fs.S3FileSystem(
20
34
  key=aws_access_key_id,
21
35
  secret=aws_secret_access_key,
36
+ token=aws_session_token,
22
37
  anon=aws_access_key_id is None,
23
38
  client_kwargs={"endpoint_url": s3_endpoint_url},
24
39
  )