datacontract-cli 0.9.7__py3-none-any.whl → 0.9.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datacontract-cli might be problematic. Click here for more details.
- datacontract/breaking/breaking.py +48 -57
- datacontract/cli.py +98 -80
- datacontract/data_contract.py +156 -106
- datacontract/engines/datacontract/check_that_datacontract_contains_valid_servers_configuration.py +5 -1
- datacontract/engines/datacontract/check_that_datacontract_file_exists.py +9 -8
- datacontract/engines/datacontract/check_that_datacontract_str_is_valid.py +26 -22
- datacontract/engines/fastjsonschema/check_jsonschema.py +31 -25
- datacontract/engines/fastjsonschema/s3/s3_read_files.py +8 -6
- datacontract/engines/soda/check_soda_execute.py +46 -35
- datacontract/engines/soda/connections/bigquery.py +5 -3
- datacontract/engines/soda/connections/dask.py +0 -1
- datacontract/engines/soda/connections/databricks.py +2 -2
- datacontract/engines/soda/connections/duckdb.py +4 -4
- datacontract/engines/soda/connections/kafka.py +36 -17
- datacontract/engines/soda/connections/postgres.py +3 -3
- datacontract/engines/soda/connections/snowflake.py +4 -4
- datacontract/export/avro_converter.py +3 -7
- datacontract/export/avro_idl_converter.py +65 -42
- datacontract/export/dbt_converter.py +43 -32
- datacontract/export/great_expectations_converter.py +141 -0
- datacontract/export/jsonschema_converter.py +3 -1
- datacontract/export/odcs_converter.py +5 -7
- datacontract/export/protobuf_converter.py +12 -10
- datacontract/export/pydantic_converter.py +140 -0
- datacontract/export/rdf_converter.py +34 -11
- datacontract/export/sodacl_converter.py +24 -24
- datacontract/export/sql_converter.py +20 -9
- datacontract/export/sql_type_converter.py +44 -4
- datacontract/export/terraform_converter.py +4 -3
- datacontract/imports/avro_importer.py +32 -10
- datacontract/imports/sql_importer.py +0 -2
- datacontract/init/download_datacontract_file.py +2 -2
- datacontract/integration/publish_datamesh_manager.py +4 -9
- datacontract/integration/publish_opentelemetry.py +30 -16
- datacontract/lint/files.py +2 -2
- datacontract/lint/lint.py +26 -31
- datacontract/lint/linters/description_linter.py +12 -21
- datacontract/lint/linters/example_model_linter.py +28 -29
- datacontract/lint/linters/field_pattern_linter.py +8 -8
- datacontract/lint/linters/field_reference_linter.py +11 -10
- datacontract/lint/linters/notice_period_linter.py +18 -22
- datacontract/lint/linters/primary_field_linter.py +10 -12
- datacontract/lint/linters/quality_schema_linter.py +16 -20
- datacontract/lint/linters/valid_constraints_linter.py +42 -37
- datacontract/lint/resolve.py +7 -10
- datacontract/lint/schema.py +2 -3
- datacontract/lint/urls.py +4 -5
- datacontract/model/breaking_change.py +2 -1
- datacontract/model/data_contract_specification.py +8 -7
- datacontract/model/exceptions.py +13 -2
- datacontract/model/run.py +1 -1
- datacontract/web.py +3 -7
- {datacontract_cli-0.9.7.dist-info → datacontract_cli-0.9.8.dist-info}/METADATA +176 -37
- datacontract_cli-0.9.8.dist-info/RECORD +63 -0
- datacontract_cli-0.9.7.dist-info/RECORD +0 -61
- {datacontract_cli-0.9.7.dist-info → datacontract_cli-0.9.8.dist-info}/LICENSE +0 -0
- {datacontract_cli-0.9.7.dist-info → datacontract_cli-0.9.8.dist-info}/WHEEL +0 -0
- {datacontract_cli-0.9.7.dist-info → datacontract_cli-0.9.8.dist-info}/entry_points.txt +0 -0
- {datacontract_cli-0.9.7.dist-info → datacontract_cli-0.9.8.dist-info}/top_level.txt +0 -0
datacontract/data_contract.py
CHANGED
|
@@ -5,56 +5,66 @@ import typing
|
|
|
5
5
|
|
|
6
6
|
import yaml
|
|
7
7
|
|
|
8
|
-
from datacontract.breaking.breaking import models_breaking_changes,
|
|
9
|
-
|
|
10
|
-
|
|
8
|
+
from datacontract.breaking.breaking import models_breaking_changes, \
|
|
9
|
+
quality_breaking_changes
|
|
10
|
+
from datacontract.engines.datacontract.check_that_datacontract_contains_valid_servers_configuration import (
|
|
11
|
+
check_that_datacontract_contains_valid_server_configuration,
|
|
12
|
+
)
|
|
11
13
|
from datacontract.engines.fastjsonschema.check_jsonschema import \
|
|
12
14
|
check_jsonschema
|
|
13
15
|
from datacontract.engines.soda.check_soda_execute import check_soda_execute
|
|
14
|
-
from datacontract.export.avro_converter import
|
|
16
|
+
from datacontract.export.avro_converter import to_avro_schema_json
|
|
15
17
|
from datacontract.export.avro_idl_converter import to_avro_idl
|
|
16
18
|
from datacontract.export.dbt_converter import to_dbt_models_yaml, \
|
|
17
19
|
to_dbt_sources_yaml, to_dbt_staging_sql
|
|
18
|
-
from datacontract.export.
|
|
20
|
+
from datacontract.export.great_expectations_converter import \
|
|
21
|
+
to_great_expectations
|
|
22
|
+
from datacontract.export.jsonschema_converter import to_jsonschema_json
|
|
19
23
|
from datacontract.export.odcs_converter import to_odcs_yaml
|
|
20
24
|
from datacontract.export.protobuf_converter import to_protobuf
|
|
25
|
+
from datacontract.export.pydantic_converter import to_pydantic_model_str
|
|
21
26
|
from datacontract.export.rdf_converter import to_rdf_n3
|
|
22
27
|
from datacontract.export.sodacl_converter import to_sodacl_yaml
|
|
23
|
-
from datacontract.imports.avro_importer import import_avro
|
|
24
28
|
from datacontract.export.sql_converter import to_sql_ddl, to_sql_query
|
|
25
29
|
from datacontract.export.terraform_converter import to_terraform
|
|
30
|
+
from datacontract.imports.avro_importer import import_avro
|
|
26
31
|
from datacontract.imports.sql_importer import import_sql
|
|
27
32
|
from datacontract.integration.publish_datamesh_manager import \
|
|
28
33
|
publish_datamesh_manager
|
|
29
34
|
from datacontract.integration.publish_opentelemetry import publish_opentelemetry
|
|
30
35
|
from datacontract.lint import resolve
|
|
31
|
-
|
|
32
|
-
from datacontract.model.breaking_change import BreakingChanges, BreakingChange, Severity
|
|
33
36
|
from datacontract.lint.linters.description_linter import DescriptionLinter
|
|
34
37
|
from datacontract.lint.linters.example_model_linter import ExampleModelLinter
|
|
35
|
-
from datacontract.lint.linters.valid_constraints_linter import ValidFieldConstraintsLinter
|
|
36
38
|
from datacontract.lint.linters.field_pattern_linter import FieldPatternLinter
|
|
37
|
-
from datacontract.lint.linters.field_reference_linter import
|
|
39
|
+
from datacontract.lint.linters.field_reference_linter import \
|
|
40
|
+
FieldReferenceLinter
|
|
38
41
|
from datacontract.lint.linters.notice_period_linter import NoticePeriodLinter
|
|
39
|
-
from datacontract.lint.linters.primary_field_linter import
|
|
40
|
-
|
|
42
|
+
from datacontract.lint.linters.primary_field_linter import \
|
|
43
|
+
PrimaryFieldUniqueRequired
|
|
44
|
+
from datacontract.lint.linters.quality_schema_linter import \
|
|
45
|
+
QualityUsesSchemaLinter
|
|
46
|
+
from datacontract.lint.linters.valid_constraints_linter import \
|
|
47
|
+
ValidFieldConstraintsLinter
|
|
48
|
+
from datacontract.model.breaking_change import BreakingChanges, BreakingChange, \
|
|
49
|
+
Severity
|
|
41
50
|
from datacontract.model.data_contract_specification import \
|
|
42
51
|
DataContractSpecification, Server
|
|
43
52
|
from datacontract.model.exceptions import DataContractException
|
|
44
|
-
from datacontract.model.run import
|
|
45
|
-
Run, Check
|
|
53
|
+
from datacontract.model.run import Run, Check
|
|
46
54
|
|
|
47
55
|
|
|
48
56
|
def _determine_sql_server_type(data_contract, sql_server_type):
|
|
49
57
|
if sql_server_type == "auto":
|
|
50
58
|
if data_contract.servers is None or len(data_contract.servers) == 0:
|
|
51
|
-
raise RuntimeError(
|
|
59
|
+
raise RuntimeError("Export with server_type='auto' requires servers in the data contract.")
|
|
52
60
|
|
|
53
61
|
server_types = set([server.type for server in data_contract.servers.values()])
|
|
54
62
|
if "snowflake" in server_types:
|
|
55
63
|
return "snowflake"
|
|
56
64
|
elif "postgres" in server_types:
|
|
57
65
|
return "postgres"
|
|
66
|
+
elif "databricks" in server_types:
|
|
67
|
+
return "databricks"
|
|
58
68
|
else:
|
|
59
69
|
# default to snowflake dialect
|
|
60
70
|
return "snowflake"
|
|
@@ -94,7 +104,7 @@ class DataContract:
|
|
|
94
104
|
NoticePeriodLinter(),
|
|
95
105
|
PrimaryFieldUniqueRequired(),
|
|
96
106
|
ValidFieldConstraintsLinter(),
|
|
97
|
-
DescriptionLinter()
|
|
107
|
+
DescriptionLinter(),
|
|
98
108
|
}
|
|
99
109
|
|
|
100
110
|
@classmethod
|
|
@@ -104,60 +114,59 @@ class DataContract:
|
|
|
104
114
|
def lint(self, enabled_linters: typing.Union[str, set[str]] = "all") -> Run:
|
|
105
115
|
"""Lint the data contract by deserializing the contract and checking the schema, as well as calling the configured linters.
|
|
106
116
|
|
|
107
|
-
|
|
108
|
-
|
|
117
|
+
enabled_linters can be either "all" or "none", or a set of linter IDs. The "schema" linter is always enabled, even with enabled_linters="none".
|
|
118
|
+
"""
|
|
109
119
|
run = Run.create_run()
|
|
110
120
|
try:
|
|
111
121
|
run.log_info("Linting data contract")
|
|
112
|
-
data_contract = resolve.resolve_data_contract(
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
122
|
+
data_contract = resolve.resolve_data_contract(
|
|
123
|
+
self._data_contract_file,
|
|
124
|
+
self._data_contract_str,
|
|
125
|
+
self._data_contract,
|
|
126
|
+
self._schema_location,
|
|
127
|
+
inline_definitions=True,
|
|
128
|
+
)
|
|
129
|
+
run.checks.append(
|
|
130
|
+
Check(type="lint", result="passed", name="Data contract is syntactically valid", engine="datacontract")
|
|
131
|
+
)
|
|
121
132
|
if enabled_linters == "none":
|
|
122
133
|
linters_to_check = set()
|
|
123
134
|
elif enabled_linters == "all":
|
|
124
135
|
linters_to_check = self.all_linters
|
|
125
136
|
elif isinstance(enabled_linters, set):
|
|
126
|
-
linters_to_check = {linter for linter in self.all_linters
|
|
127
|
-
if linter.id in enabled_linters}
|
|
137
|
+
linters_to_check = {linter for linter in self.all_linters if linter.id in enabled_linters}
|
|
128
138
|
else:
|
|
129
139
|
raise RuntimeError(f"Unknown argument enabled_linters={enabled_linters} for lint()")
|
|
130
140
|
for linter in linters_to_check:
|
|
131
141
|
try:
|
|
132
142
|
run.checks.extend(linter.lint(data_contract))
|
|
133
143
|
except Exception as e:
|
|
134
|
-
run.checks.append(
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
144
|
+
run.checks.append(
|
|
145
|
+
Check(
|
|
146
|
+
type="general",
|
|
147
|
+
result="error",
|
|
148
|
+
name=f"Linter '{linter.name}'",
|
|
149
|
+
reason=str(e),
|
|
150
|
+
engine="datacontract",
|
|
151
|
+
)
|
|
152
|
+
)
|
|
141
153
|
run.dataContractId = data_contract.id
|
|
142
154
|
run.dataContractVersion = data_contract.info.version
|
|
143
155
|
except DataContractException as e:
|
|
144
|
-
run.checks.append(
|
|
145
|
-
type=e.type,
|
|
146
|
-
|
|
147
|
-
name=e.name,
|
|
148
|
-
reason=e.reason,
|
|
149
|
-
engine=e.engine,
|
|
150
|
-
details=""
|
|
151
|
-
))
|
|
156
|
+
run.checks.append(
|
|
157
|
+
Check(type=e.type, result=e.result, name=e.name, reason=e.reason, engine=e.engine, details="")
|
|
158
|
+
)
|
|
152
159
|
run.log_error(str(e))
|
|
153
160
|
except Exception as e:
|
|
154
|
-
run.checks.append(
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
+
run.checks.append(
|
|
162
|
+
Check(
|
|
163
|
+
type="general",
|
|
164
|
+
result="error",
|
|
165
|
+
name="Check Data Contract",
|
|
166
|
+
reason=str(e),
|
|
167
|
+
engine="datacontract",
|
|
168
|
+
)
|
|
169
|
+
)
|
|
161
170
|
run.log_error(str(e))
|
|
162
171
|
run.finish()
|
|
163
172
|
return run
|
|
@@ -165,9 +174,10 @@ class DataContract:
|
|
|
165
174
|
def test(self) -> Run:
|
|
166
175
|
run = Run.create_run()
|
|
167
176
|
try:
|
|
168
|
-
run.log_info(
|
|
169
|
-
data_contract = resolve.resolve_data_contract(
|
|
170
|
-
|
|
177
|
+
run.log_info("Testing data contract")
|
|
178
|
+
data_contract = resolve.resolve_data_contract(
|
|
179
|
+
self._data_contract_file, self._data_contract_str, self._data_contract, self._schema_location
|
|
180
|
+
)
|
|
171
181
|
|
|
172
182
|
if data_contract.models is None or len(data_contract.models) == 0:
|
|
173
183
|
raise DataContractException(
|
|
@@ -213,23 +223,20 @@ class DataContract:
|
|
|
213
223
|
check_soda_execute(run, data_contract, server, self._spark, tmp_dir)
|
|
214
224
|
|
|
215
225
|
except DataContractException as e:
|
|
216
|
-
run.checks.append(
|
|
217
|
-
type=e.type,
|
|
218
|
-
|
|
219
|
-
name=e.name,
|
|
220
|
-
reason=e.reason,
|
|
221
|
-
engine=e.engine,
|
|
222
|
-
details=""
|
|
223
|
-
))
|
|
226
|
+
run.checks.append(
|
|
227
|
+
Check(type=e.type, result=e.result, name=e.name, reason=e.reason, engine=e.engine, details="")
|
|
228
|
+
)
|
|
224
229
|
run.log_error(str(e))
|
|
225
230
|
except Exception as e:
|
|
226
|
-
run.checks.append(
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
231
|
+
run.checks.append(
|
|
232
|
+
Check(
|
|
233
|
+
type="general",
|
|
234
|
+
result="error",
|
|
235
|
+
name="Test Data Contract",
|
|
236
|
+
reason=str(e),
|
|
237
|
+
engine="datacontract",
|
|
238
|
+
)
|
|
239
|
+
)
|
|
233
240
|
logging.exception("Exception occurred")
|
|
234
241
|
run.log_error(str(e))
|
|
235
242
|
|
|
@@ -238,45 +245,44 @@ class DataContract:
|
|
|
238
245
|
if self._publish_url is not None:
|
|
239
246
|
try:
|
|
240
247
|
publish_datamesh_manager(run, self._publish_url)
|
|
241
|
-
except:
|
|
248
|
+
except Exception:
|
|
242
249
|
logging.error("Failed to publish to datamesh manager")
|
|
243
250
|
if self._publish_to_opentelemetry:
|
|
244
251
|
try:
|
|
245
252
|
publish_opentelemetry(run)
|
|
246
|
-
except:
|
|
253
|
+
except Exception:
|
|
247
254
|
logging.error("Failed to publish to opentelemetry")
|
|
248
255
|
|
|
249
256
|
return run
|
|
250
257
|
|
|
251
|
-
def breaking(self, other:
|
|
252
|
-
return self.changelog(
|
|
253
|
-
other,
|
|
254
|
-
include_severities=[Severity.ERROR, Severity.WARNING]
|
|
255
|
-
)
|
|
258
|
+
def breaking(self, other: "DataContract") -> BreakingChanges:
|
|
259
|
+
return self.changelog(other, include_severities=[Severity.ERROR, Severity.WARNING])
|
|
256
260
|
|
|
257
261
|
def changelog(
|
|
258
|
-
self,
|
|
259
|
-
other: 'DataContract',
|
|
260
|
-
include_severities: [Severity] = (Severity.ERROR, Severity.WARNING, Severity.INFO)
|
|
262
|
+
self, other: "DataContract", include_severities: [Severity] = (Severity.ERROR, Severity.WARNING, Severity.INFO)
|
|
261
263
|
) -> BreakingChanges:
|
|
262
264
|
old = self.get_data_contract_specification()
|
|
263
265
|
new = other.get_data_contract_specification()
|
|
264
266
|
|
|
265
267
|
breaking_changes = list[BreakingChange]()
|
|
266
268
|
|
|
267
|
-
breaking_changes.extend(
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
269
|
+
breaking_changes.extend(
|
|
270
|
+
quality_breaking_changes(
|
|
271
|
+
old_quality=old.quality,
|
|
272
|
+
new_quality=new.quality,
|
|
273
|
+
new_path=other._data_contract_file,
|
|
274
|
+
include_severities=include_severities,
|
|
275
|
+
)
|
|
276
|
+
)
|
|
273
277
|
|
|
274
|
-
breaking_changes.extend(
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
278
|
+
breaking_changes.extend(
|
|
279
|
+
models_breaking_changes(
|
|
280
|
+
old_models=old.models,
|
|
281
|
+
new_models=new.models,
|
|
282
|
+
new_path=other._data_contract_file,
|
|
283
|
+
include_severities=include_severities,
|
|
284
|
+
)
|
|
285
|
+
)
|
|
280
286
|
|
|
281
287
|
return BreakingChanges(breaking_changes=breaking_changes)
|
|
282
288
|
|
|
@@ -290,17 +296,20 @@ class DataContract:
|
|
|
290
296
|
)
|
|
291
297
|
|
|
292
298
|
def export(self, export_format, model: str = "all", rdf_base: str = None, sql_server_type: str = "auto") -> str:
|
|
293
|
-
data_contract = resolve.resolve_data_contract(
|
|
294
|
-
|
|
299
|
+
data_contract = resolve.resolve_data_contract(
|
|
300
|
+
self._data_contract_file, self._data_contract_str, self._data_contract, inline_definitions=True
|
|
301
|
+
)
|
|
295
302
|
if export_format == "jsonschema":
|
|
296
303
|
if data_contract.models is None:
|
|
297
|
-
raise RuntimeError(
|
|
304
|
+
raise RuntimeError(f"Export to {export_format} requires models in the data contract.")
|
|
298
305
|
|
|
299
306
|
model_names = list(data_contract.models.keys())
|
|
300
307
|
|
|
301
308
|
if model == "all":
|
|
302
309
|
if len(data_contract.models.items()) != 1:
|
|
303
|
-
raise RuntimeError(
|
|
310
|
+
raise RuntimeError(
|
|
311
|
+
f"Export to {export_format} is model specific. Specify the model via --model $MODEL_NAME. Available models: {model_names}"
|
|
312
|
+
)
|
|
304
313
|
|
|
305
314
|
model_name, model_value = next(iter(data_contract.models.items()))
|
|
306
315
|
return to_jsonschema_json(model_name, model_value)
|
|
@@ -308,7 +317,9 @@ class DataContract:
|
|
|
308
317
|
model_name = model
|
|
309
318
|
model_value = data_contract.models.get(model_name)
|
|
310
319
|
if model_value is None:
|
|
311
|
-
raise RuntimeError(
|
|
320
|
+
raise RuntimeError(
|
|
321
|
+
f"Model {model_name} not found in the data contract. Available models: {model_names}"
|
|
322
|
+
)
|
|
312
323
|
|
|
313
324
|
return to_jsonschema_json(model_name, model_value)
|
|
314
325
|
if export_format == "sodacl":
|
|
@@ -325,7 +336,9 @@ class DataContract:
|
|
|
325
336
|
|
|
326
337
|
if model == "all":
|
|
327
338
|
if len(data_contract.models.items()) != 1:
|
|
328
|
-
raise RuntimeError(
|
|
339
|
+
raise RuntimeError(
|
|
340
|
+
f"Export to {export_format} is model specific. Specify the model via --model $MODEL_NAME. Available models: {model_names}"
|
|
341
|
+
)
|
|
329
342
|
|
|
330
343
|
model_name, model_value = next(iter(data_contract.models.items()))
|
|
331
344
|
return to_dbt_staging_sql(data_contract, model_name, model_value)
|
|
@@ -333,7 +346,9 @@ class DataContract:
|
|
|
333
346
|
model_name = model
|
|
334
347
|
model_value = data_contract.models.get(model_name)
|
|
335
348
|
if model_value is None:
|
|
336
|
-
raise RuntimeError(
|
|
349
|
+
raise RuntimeError(
|
|
350
|
+
f"Model {model_name} not found in the data contract. Available models: {model_names}"
|
|
351
|
+
)
|
|
337
352
|
|
|
338
353
|
return to_dbt_staging_sql(data_contract, model_name, model_value)
|
|
339
354
|
if export_format == "odcs":
|
|
@@ -350,7 +365,9 @@ class DataContract:
|
|
|
350
365
|
|
|
351
366
|
if model == "all":
|
|
352
367
|
if len(data_contract.models.items()) != 1:
|
|
353
|
-
raise RuntimeError(
|
|
368
|
+
raise RuntimeError(
|
|
369
|
+
f"Export to {export_format} is model specific. Specify the model via --model $MODEL_NAME. Available models: {model_names}"
|
|
370
|
+
)
|
|
354
371
|
|
|
355
372
|
model_name, model_value = next(iter(data_contract.models.items()))
|
|
356
373
|
return to_avro_schema_json(model_name, model_value)
|
|
@@ -358,7 +375,9 @@ class DataContract:
|
|
|
358
375
|
model_name = model
|
|
359
376
|
model_value = data_contract.models.get(model_name)
|
|
360
377
|
if model_value is None:
|
|
361
|
-
raise RuntimeError(
|
|
378
|
+
raise RuntimeError(
|
|
379
|
+
f"Model {model_name} not found in the data contract. Available models: {model_names}"
|
|
380
|
+
)
|
|
362
381
|
|
|
363
382
|
return to_avro_schema_json(model_name, model_value)
|
|
364
383
|
if export_format == "avro-idl":
|
|
@@ -378,7 +397,9 @@ class DataContract:
|
|
|
378
397
|
|
|
379
398
|
if model == "all":
|
|
380
399
|
if len(data_contract.models.items()) != 1:
|
|
381
|
-
raise RuntimeError(
|
|
400
|
+
raise RuntimeError(
|
|
401
|
+
f"Export to {export_format} is model specific. Specify the model via --model $MODEL_NAME. Available models: {model_names}"
|
|
402
|
+
)
|
|
382
403
|
|
|
383
404
|
model_name, model_value = next(iter(data_contract.models.items()))
|
|
384
405
|
return to_sql_query(data_contract, model_name, model_value, server_type)
|
|
@@ -386,9 +407,38 @@ class DataContract:
|
|
|
386
407
|
model_name = model
|
|
387
408
|
model_value = data_contract.models.get(model_name)
|
|
388
409
|
if model_value is None:
|
|
389
|
-
raise RuntimeError(
|
|
410
|
+
raise RuntimeError(
|
|
411
|
+
f"Model {model_name} not found in the data contract. Available models: {model_names}"
|
|
412
|
+
)
|
|
390
413
|
|
|
391
414
|
return to_sql_query(data_contract, model_name, model_value, server_type)
|
|
415
|
+
|
|
416
|
+
if export_format == "great-expectations":
|
|
417
|
+
if data_contract.models is None:
|
|
418
|
+
raise RuntimeError(f"Export to {export_format} requires models in the data contract.")
|
|
419
|
+
|
|
420
|
+
model_names = list(data_contract.models.keys())
|
|
421
|
+
|
|
422
|
+
if model == "all":
|
|
423
|
+
if len(data_contract.models.items()) != 1:
|
|
424
|
+
raise RuntimeError(
|
|
425
|
+
f"Export to {export_format} is model specific. Specify the model via --model "
|
|
426
|
+
f"$MODEL_NAME. Available models: {model_names}"
|
|
427
|
+
)
|
|
428
|
+
|
|
429
|
+
model_name, model_value = next(iter(data_contract.models.items()))
|
|
430
|
+
return to_great_expectations(data_contract, model_name)
|
|
431
|
+
else:
|
|
432
|
+
model_name = model
|
|
433
|
+
model_value = data_contract.models.get(model_name)
|
|
434
|
+
if model_value is None:
|
|
435
|
+
raise RuntimeError(
|
|
436
|
+
f"Model {model_name} not found in the data contract. " f"Available models: {model_names}"
|
|
437
|
+
)
|
|
438
|
+
|
|
439
|
+
return to_great_expectations(data_contract, model_name)
|
|
440
|
+
if export_format == "pydantic-model":
|
|
441
|
+
return to_pydantic_model_str(data_contract)
|
|
392
442
|
else:
|
|
393
443
|
print(f"Export format {export_format} not supported.")
|
|
394
444
|
return ""
|
|
@@ -402,13 +452,13 @@ class DataContract:
|
|
|
402
452
|
run.log_info(f"Creating example file {p}")
|
|
403
453
|
with open(p, "w") as f:
|
|
404
454
|
content = ""
|
|
405
|
-
if format == "json" and
|
|
455
|
+
if format == "json" and isinstance(example.data, list):
|
|
406
456
|
content = json.dumps(example.data)
|
|
407
|
-
elif format == "json" and
|
|
457
|
+
elif format == "json" and isinstance(example.data, str):
|
|
408
458
|
content = example.data
|
|
409
|
-
elif format == "yaml" and
|
|
459
|
+
elif format == "yaml" and isinstance(example.data, list):
|
|
410
460
|
content = yaml.dump(example.data, allow_unicode=True)
|
|
411
|
-
elif format == "yaml" and
|
|
461
|
+
elif format == "yaml" and isinstance(example.data, str):
|
|
412
462
|
content = example.data
|
|
413
463
|
elif format == "csv":
|
|
414
464
|
content = example.data
|
datacontract/engines/datacontract/check_that_datacontract_contains_valid_servers_configuration.py
CHANGED
|
@@ -3,7 +3,9 @@ from datacontract.model.exceptions import DataContractException
|
|
|
3
3
|
from datacontract.model.run import Run
|
|
4
4
|
|
|
5
5
|
|
|
6
|
-
def check_that_datacontract_contains_valid_server_configuration(
|
|
6
|
+
def check_that_datacontract_contains_valid_server_configuration(
|
|
7
|
+
run: Run, data_contract: DataContractSpecification, server_name: str
|
|
8
|
+
):
|
|
7
9
|
if data_contract.servers is None:
|
|
8
10
|
raise DataContractException(
|
|
9
11
|
type="lint",
|
|
@@ -28,4 +30,6 @@ def check_that_datacontract_contains_valid_server_configuration(run: Run, data_c
|
|
|
28
30
|
reason=f"Cannot find server '{server_name}' in the data contract servers configuration. Skip executing tests.",
|
|
29
31
|
engine="datacontract",
|
|
30
32
|
)
|
|
33
|
+
|
|
34
|
+
|
|
31
35
|
# TODO check for server.type, if all required fields are present
|
|
@@ -9,12 +9,13 @@ def check_that_datacontract_file_exists(run: Run, file_path: str):
|
|
|
9
9
|
if file_path.startswith("http://") or file_path.startswith("https://"):
|
|
10
10
|
return
|
|
11
11
|
if not os.path.exists(file_path):
|
|
12
|
-
run.checks.append(
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
12
|
+
run.checks.append(
|
|
13
|
+
Check(
|
|
14
|
+
type="lint",
|
|
15
|
+
name="Check that data contract file exists",
|
|
16
|
+
result="failed",
|
|
17
|
+
reason=f"The file '{file_path}' does not exist.",
|
|
18
|
+
engine="datacontract-cli",
|
|
19
|
+
)
|
|
20
|
+
)
|
|
19
21
|
raise Exception(f"The file '{file_path}' does not exist.")
|
|
20
|
-
|
|
@@ -14,31 +14,35 @@ def check_that_datacontract_str_is_valid(run: Run, data_contract_str: str):
|
|
|
14
14
|
try:
|
|
15
15
|
fastjsonschema.validate(schema, data_contract_yaml)
|
|
16
16
|
logging.debug("YAML data is valid.")
|
|
17
|
-
run.checks.append(
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
17
|
+
run.checks.append(
|
|
18
|
+
Check(
|
|
19
|
+
type="lint",
|
|
20
|
+
result="passed",
|
|
21
|
+
name="Check that data contract YAML is valid",
|
|
22
|
+
engine="datacontract",
|
|
23
|
+
)
|
|
24
|
+
)
|
|
23
25
|
except JsonSchemaValueException as e:
|
|
24
26
|
logging.warning("YAML data is invalid.")
|
|
25
27
|
logging.warning(f"Validation error: {e.message}")
|
|
26
|
-
run.checks.append(
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
28
|
+
run.checks.append(
|
|
29
|
+
Check(
|
|
30
|
+
type="lint",
|
|
31
|
+
result="failed",
|
|
32
|
+
name="Check that data contract YAML is valid",
|
|
33
|
+
reason=e.message,
|
|
34
|
+
engine="datacontract",
|
|
35
|
+
)
|
|
36
|
+
)
|
|
33
37
|
except Exception as e:
|
|
34
38
|
logging.warning("YAML data is invalid.")
|
|
35
39
|
logging.warning(f"Validation error: {str(e)}")
|
|
36
|
-
run.checks.append(
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
40
|
+
run.checks.append(
|
|
41
|
+
Check(
|
|
42
|
+
type="lint",
|
|
43
|
+
result="failed",
|
|
44
|
+
name="Check that data contract YAML is valid",
|
|
45
|
+
reason=str(e),
|
|
46
|
+
engine="datacontract",
|
|
47
|
+
)
|
|
48
|
+
)
|
|
@@ -25,7 +25,7 @@ def validate_json_stream(model_name, validate, json_stream):
|
|
|
25
25
|
model=model_name,
|
|
26
26
|
reason=e.message,
|
|
27
27
|
engine="jsonschema",
|
|
28
|
-
original_exception=e
|
|
28
|
+
original_exception=e,
|
|
29
29
|
)
|
|
30
30
|
|
|
31
31
|
|
|
@@ -79,16 +79,16 @@ def process_local_file(run, server, model_name, validate):
|
|
|
79
79
|
return process_directory(run, path, server, model_name, validate)
|
|
80
80
|
else:
|
|
81
81
|
logging.info(f"Processing file {path}")
|
|
82
|
-
with open(path,
|
|
82
|
+
with open(path, "r") as file:
|
|
83
83
|
process_json_file(run, model_name, validate, file, server.delimiter)
|
|
84
84
|
|
|
85
85
|
|
|
86
86
|
def process_directory(run, path, server, model_name, validate):
|
|
87
87
|
success = True
|
|
88
88
|
for filename in os.listdir(path):
|
|
89
|
-
if filename.endswith(
|
|
89
|
+
if filename.endswith(".json"): # or make this a parameter
|
|
90
90
|
file_path = os.path.join(path, filename)
|
|
91
|
-
with open(file_path,
|
|
91
|
+
with open(file_path, "r") as file:
|
|
92
92
|
if not process_json_file(run, model_name, validate, file, server.delimiter):
|
|
93
93
|
success = False
|
|
94
94
|
break
|
|
@@ -127,13 +127,15 @@ def check_jsonschema(run: Run, data_contract: DataContractSpecification, server:
|
|
|
127
127
|
|
|
128
128
|
# Early exit conditions
|
|
129
129
|
if server.format != "json":
|
|
130
|
-
run.checks.append(
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
130
|
+
run.checks.append(
|
|
131
|
+
Check(
|
|
132
|
+
type="schema",
|
|
133
|
+
name="Check that JSON has valid schema",
|
|
134
|
+
result="warning",
|
|
135
|
+
reason="Server format is not 'json'. Skip validating jsonschema.",
|
|
136
|
+
engine="jsonschema",
|
|
137
|
+
)
|
|
138
|
+
)
|
|
137
139
|
run.log_warn("jsonschema: Server format is not 'json'. Skip jsonschema checks.")
|
|
138
140
|
return
|
|
139
141
|
|
|
@@ -155,21 +157,25 @@ def check_jsonschema(run: Run, data_contract: DataContractSpecification, server:
|
|
|
155
157
|
elif server.type == "s3":
|
|
156
158
|
process_s3_file(server, model_name, validate)
|
|
157
159
|
else:
|
|
158
|
-
run.checks.append(
|
|
160
|
+
run.checks.append(
|
|
161
|
+
Check(
|
|
162
|
+
type="schema",
|
|
163
|
+
name="Check that JSON has valid schema",
|
|
164
|
+
model=model_name,
|
|
165
|
+
result="warn",
|
|
166
|
+
reason=f"Server type {server.type} not supported",
|
|
167
|
+
engine="jsonschema",
|
|
168
|
+
)
|
|
169
|
+
)
|
|
170
|
+
return
|
|
171
|
+
|
|
172
|
+
run.checks.append(
|
|
173
|
+
Check(
|
|
159
174
|
type="schema",
|
|
160
175
|
name="Check that JSON has valid schema",
|
|
161
176
|
model=model_name,
|
|
162
|
-
result="
|
|
163
|
-
reason=
|
|
177
|
+
result="passed",
|
|
178
|
+
reason="All JSON entries are valid.",
|
|
164
179
|
engine="jsonschema",
|
|
165
|
-
)
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
run.checks.append(Check(
|
|
169
|
-
type="schema",
|
|
170
|
-
name="Check that JSON has valid schema",
|
|
171
|
-
model=model_name,
|
|
172
|
-
result="passed",
|
|
173
|
-
reason="All JSON entries are valid.",
|
|
174
|
-
engine="jsonschema",
|
|
175
|
-
))
|
|
180
|
+
)
|
|
181
|
+
)
|
|
@@ -14,9 +14,11 @@ def yield_s3_files(s3_endpoint_url, s3_location):
|
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
def s3_fs(s3_endpoint_url):
|
|
17
|
-
aws_access_key_id = os.getenv(
|
|
18
|
-
aws_secret_access_key = os.getenv(
|
|
19
|
-
return s3fs.S3FileSystem(
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
17
|
+
aws_access_key_id = os.getenv("DATACONTRACT_S3_ACCESS_KEY_ID")
|
|
18
|
+
aws_secret_access_key = os.getenv("DATACONTRACT_S3_SECRET_ACCESS_KEY")
|
|
19
|
+
return s3fs.S3FileSystem(
|
|
20
|
+
key=aws_access_key_id,
|
|
21
|
+
secret=aws_secret_access_key,
|
|
22
|
+
anon=aws_access_key_id is None,
|
|
23
|
+
client_kwargs={"endpoint_url": s3_endpoint_url},
|
|
24
|
+
)
|