datacontract-cli 0.9.7__py3-none-any.whl → 0.9.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datacontract-cli might be problematic. Click here for more details.
- datacontract/breaking/breaking.py +48 -57
- datacontract/cli.py +98 -80
- datacontract/data_contract.py +156 -106
- datacontract/engines/datacontract/check_that_datacontract_contains_valid_servers_configuration.py +5 -1
- datacontract/engines/datacontract/check_that_datacontract_file_exists.py +9 -8
- datacontract/engines/datacontract/check_that_datacontract_str_is_valid.py +26 -22
- datacontract/engines/fastjsonschema/check_jsonschema.py +31 -25
- datacontract/engines/fastjsonschema/s3/s3_read_files.py +8 -6
- datacontract/engines/soda/check_soda_execute.py +46 -35
- datacontract/engines/soda/connections/bigquery.py +5 -3
- datacontract/engines/soda/connections/dask.py +0 -1
- datacontract/engines/soda/connections/databricks.py +2 -2
- datacontract/engines/soda/connections/duckdb.py +4 -4
- datacontract/engines/soda/connections/kafka.py +36 -17
- datacontract/engines/soda/connections/postgres.py +3 -3
- datacontract/engines/soda/connections/snowflake.py +4 -4
- datacontract/export/avro_converter.py +3 -7
- datacontract/export/avro_idl_converter.py +65 -42
- datacontract/export/dbt_converter.py +43 -32
- datacontract/export/great_expectations_converter.py +141 -0
- datacontract/export/jsonschema_converter.py +3 -1
- datacontract/export/odcs_converter.py +5 -7
- datacontract/export/protobuf_converter.py +12 -10
- datacontract/export/pydantic_converter.py +140 -0
- datacontract/export/rdf_converter.py +34 -11
- datacontract/export/sodacl_converter.py +24 -24
- datacontract/export/sql_converter.py +20 -9
- datacontract/export/sql_type_converter.py +44 -4
- datacontract/export/terraform_converter.py +4 -3
- datacontract/imports/avro_importer.py +32 -10
- datacontract/imports/sql_importer.py +0 -2
- datacontract/init/download_datacontract_file.py +2 -2
- datacontract/integration/publish_datamesh_manager.py +4 -9
- datacontract/integration/publish_opentelemetry.py +30 -16
- datacontract/lint/files.py +2 -2
- datacontract/lint/lint.py +26 -31
- datacontract/lint/linters/description_linter.py +12 -21
- datacontract/lint/linters/example_model_linter.py +28 -29
- datacontract/lint/linters/field_pattern_linter.py +8 -8
- datacontract/lint/linters/field_reference_linter.py +11 -10
- datacontract/lint/linters/notice_period_linter.py +18 -22
- datacontract/lint/linters/primary_field_linter.py +10 -12
- datacontract/lint/linters/quality_schema_linter.py +16 -20
- datacontract/lint/linters/valid_constraints_linter.py +42 -37
- datacontract/lint/resolve.py +7 -10
- datacontract/lint/schema.py +2 -3
- datacontract/lint/urls.py +4 -5
- datacontract/model/breaking_change.py +2 -1
- datacontract/model/data_contract_specification.py +8 -7
- datacontract/model/exceptions.py +13 -2
- datacontract/model/run.py +1 -1
- datacontract/web.py +3 -7
- {datacontract_cli-0.9.7.dist-info → datacontract_cli-0.9.8.dist-info}/METADATA +176 -37
- datacontract_cli-0.9.8.dist-info/RECORD +63 -0
- datacontract_cli-0.9.7.dist-info/RECORD +0 -61
- {datacontract_cli-0.9.7.dist-info → datacontract_cli-0.9.8.dist-info}/LICENSE +0 -0
- {datacontract_cli-0.9.7.dist-info → datacontract_cli-0.9.8.dist-info}/WHEEL +0 -0
- {datacontract_cli-0.9.7.dist-info → datacontract_cli-0.9.8.dist-info}/entry_points.txt +0 -0
- {datacontract_cli-0.9.7.dist-info → datacontract_cli-0.9.8.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,5 @@
|
|
|
1
|
+
from datacontract.model.data_contract_specification import DataContractSpecification
|
|
1
2
|
from ..lint import Linter, LinterResult
|
|
2
|
-
from datacontract.model.data_contract_specification import\
|
|
3
|
-
DataContractSpecification, Model
|
|
4
3
|
|
|
5
4
|
|
|
6
5
|
class DescriptionLinter(Linter):
|
|
@@ -14,30 +13,22 @@ class DescriptionLinter(Linter):
|
|
|
14
13
|
def id(self) -> str:
|
|
15
14
|
return "description"
|
|
16
15
|
|
|
17
|
-
def lint_implementation(
|
|
18
|
-
self,
|
|
19
|
-
contract: DataContractSpecification
|
|
20
|
-
) -> LinterResult:
|
|
16
|
+
def lint_implementation(self, contract: DataContractSpecification) -> LinterResult:
|
|
21
17
|
result = LinterResult()
|
|
22
18
|
if not contract.info or not contract.info.description:
|
|
23
|
-
result = result.with_error(
|
|
24
|
-
|
|
25
|
-
for (model_name, model) in contract.models.items():
|
|
19
|
+
result = result.with_error("Contract has empty description.")
|
|
20
|
+
for model_name, model in contract.models.items():
|
|
26
21
|
if not model.description:
|
|
27
|
-
result = result.with_error(
|
|
28
|
-
|
|
29
|
-
)
|
|
30
|
-
for (field_name, field) in model.fields.items():
|
|
22
|
+
result = result.with_error(f"Model '{model_name}' has empty description.")
|
|
23
|
+
for field_name, field in model.fields.items():
|
|
31
24
|
if not field.description:
|
|
32
25
|
result = result.with_error(
|
|
33
|
-
f"Field '{field_name}' in model '{model_name}'"
|
|
34
|
-
|
|
35
|
-
for
|
|
26
|
+
f"Field '{field_name}' in model '{model_name}'" f" has empty description."
|
|
27
|
+
)
|
|
28
|
+
for definition_name, definition in contract.definitions.items():
|
|
36
29
|
if not definition.description:
|
|
37
|
-
result = result.with_error(
|
|
38
|
-
|
|
39
|
-
for (index, example) in enumerate(contract.examples):
|
|
30
|
+
result = result.with_error(f"Definition '{definition_name}' has empty description.")
|
|
31
|
+
for index, example in enumerate(contract.examples):
|
|
40
32
|
if not example.description:
|
|
41
|
-
result = result.with_error(
|
|
42
|
-
f"Example {index + 1} has empty description.")
|
|
33
|
+
result = result.with_error(f"Example {index + 1} has empty description.")
|
|
43
34
|
return result
|
|
@@ -1,14 +1,15 @@
|
|
|
1
1
|
import csv
|
|
2
|
-
import yaml
|
|
3
|
-
import json
|
|
4
2
|
import io
|
|
3
|
+
import json
|
|
4
|
+
|
|
5
|
+
import yaml
|
|
5
6
|
|
|
7
|
+
from datacontract.model.data_contract_specification import \
|
|
8
|
+
DataContractSpecification, Example
|
|
6
9
|
from ..lint import Linter, LinterResult
|
|
7
|
-
from datacontract.model.data_contract_specification import DataContractSpecification, Example
|
|
8
10
|
|
|
9
11
|
|
|
10
12
|
class ExampleModelLinter(Linter):
|
|
11
|
-
|
|
12
13
|
@property
|
|
13
14
|
def name(self) -> str:
|
|
14
15
|
return "Example(s) match model"
|
|
@@ -37,42 +38,39 @@ class ExampleModelLinter(Linter):
|
|
|
37
38
|
raise NotImplementedError(f"Unknown type {example.type}")
|
|
38
39
|
else:
|
|
39
40
|
# Checked in lint_implementation, shouldn't happen.
|
|
40
|
-
raise NotImplementedError("Can't lint object examples.")
|
|
41
|
+
raise NotImplementedError("Can't lint object examples.")
|
|
41
42
|
|
|
42
|
-
def lint_implementation(
|
|
43
|
-
self,
|
|
44
|
-
contract: DataContractSpecification
|
|
45
|
-
) -> LinterResult:
|
|
43
|
+
def lint_implementation(self, contract: DataContractSpecification) -> LinterResult:
|
|
46
44
|
"""Check whether the example(s) headers match the model.
|
|
47
45
|
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
46
|
+
This linter checks whether the example's fields match the model
|
|
47
|
+
fields, and whether all required fields of the model are present in
|
|
48
|
+
the example.
|
|
51
49
|
"""
|
|
52
50
|
result = LinterResult()
|
|
53
51
|
examples = contract.examples
|
|
54
52
|
models = contract.models
|
|
55
53
|
examples_with_model = []
|
|
56
|
-
for
|
|
54
|
+
for index, example in enumerate(examples):
|
|
57
55
|
if example.model not in models:
|
|
58
|
-
result = result.with_error(
|
|
59
|
-
f"Example {index + 1} has non-existent model '{example.model}'")
|
|
56
|
+
result = result.with_error(f"Example {index + 1} has non-existent model '{example.model}'")
|
|
60
57
|
else:
|
|
61
|
-
examples_with_model.append(
|
|
62
|
-
|
|
63
|
-
for (index, example, model) in examples_with_model:
|
|
58
|
+
examples_with_model.append((index, example, models.get(example.model)))
|
|
59
|
+
for index, example, model in examples_with_model:
|
|
64
60
|
if example.type == "custom":
|
|
65
|
-
result = result.with_warning(
|
|
66
|
-
|
|
67
|
-
|
|
61
|
+
result = result.with_warning(
|
|
62
|
+
f"Example {index + 1} has type" ' "custom", cannot check model' " conformance"
|
|
63
|
+
)
|
|
68
64
|
elif not isinstance(example.data, str):
|
|
69
|
-
result = result.with_warning(
|
|
70
|
-
"string example, can only lint string examples for now."
|
|
65
|
+
result = result.with_warning(
|
|
66
|
+
f"Example {index + 1} is not a " "string example, can only lint string examples for now."
|
|
67
|
+
)
|
|
71
68
|
elif model.type == "object":
|
|
72
69
|
result = result.with_warning(
|
|
73
70
|
f"Example {index + 1} uses a "
|
|
74
71
|
f"model '{example.model}' with type 'object'. Linting is "
|
|
75
|
-
"currently only supported for 'table' models"
|
|
72
|
+
"currently only supported for 'table' models"
|
|
73
|
+
)
|
|
76
74
|
else:
|
|
77
75
|
if example.type in ("csv", "yaml", "json"):
|
|
78
76
|
headers = self.get_example_headers(example)
|
|
@@ -80,13 +78,14 @@ class ExampleModelLinter(Linter):
|
|
|
80
78
|
if example_header not in model.fields:
|
|
81
79
|
result = result.with_error(
|
|
82
80
|
f"Example {index + 1} has field '{example_header}'"
|
|
83
|
-
f" that's not contained in model '{example.model}'"
|
|
84
|
-
|
|
81
|
+
f" that's not contained in model '{example.model}'"
|
|
82
|
+
)
|
|
83
|
+
for field_name, field_value in model.fields.items():
|
|
85
84
|
if field_name not in headers and field_value.required:
|
|
86
85
|
result = result.with_error(
|
|
87
86
|
f"Example {index + 1} is missing field '{field_name}'"
|
|
88
|
-
f" required by model '{example.model}'"
|
|
87
|
+
f" required by model '{example.model}'"
|
|
88
|
+
)
|
|
89
89
|
else:
|
|
90
|
-
result = result.with_error(f"Example {index + 1} has unknown type"
|
|
91
|
-
f"{example.type}")
|
|
90
|
+
result = result.with_error(f"Example {index + 1} has unknown type" f"{example.type}")
|
|
92
91
|
return result
|
|
@@ -1,13 +1,16 @@
|
|
|
1
1
|
import re
|
|
2
2
|
|
|
3
|
+
from datacontract.model.data_contract_specification import \
|
|
4
|
+
DataContractSpecification
|
|
3
5
|
from ..lint import Linter, LinterResult
|
|
4
|
-
|
|
6
|
+
|
|
5
7
|
|
|
6
8
|
class FieldPatternLinter(Linter):
|
|
7
9
|
"""Checks that all patterns defined for fields are correct Python regex
|
|
8
|
-
|
|
10
|
+
syntax.
|
|
9
11
|
|
|
10
12
|
"""
|
|
13
|
+
|
|
11
14
|
@property
|
|
12
15
|
def name(self):
|
|
13
16
|
return "Field pattern is correct regex"
|
|
@@ -16,13 +19,10 @@ class FieldPatternLinter(Linter):
|
|
|
16
19
|
def id(self) -> str:
|
|
17
20
|
return "field-pattern"
|
|
18
21
|
|
|
19
|
-
def lint_implementation(
|
|
20
|
-
self,
|
|
21
|
-
contract: DataContractSpecification
|
|
22
|
-
) -> LinterResult:
|
|
22
|
+
def lint_implementation(self, contract: DataContractSpecification) -> LinterResult:
|
|
23
23
|
result = LinterResult()
|
|
24
|
-
for
|
|
25
|
-
for
|
|
24
|
+
for model_name, model in contract.models.items():
|
|
25
|
+
for field_name, field in model.fields.items():
|
|
26
26
|
if field.pattern:
|
|
27
27
|
try:
|
|
28
28
|
re.compile(field.pattern)
|
|
@@ -1,11 +1,13 @@
|
|
|
1
|
-
from ..lint import Linter, LinterResult
|
|
2
1
|
from datacontract.model.data_contract_specification import DataContractSpecification
|
|
2
|
+
from ..lint import Linter, LinterResult
|
|
3
|
+
|
|
3
4
|
|
|
4
5
|
class FieldReferenceLinter(Linter):
|
|
5
6
|
"""Checks that all references definitions in fields refer to existing
|
|
6
|
-
|
|
7
|
+
fields.
|
|
7
8
|
|
|
8
9
|
"""
|
|
10
|
+
|
|
9
11
|
@property
|
|
10
12
|
def name(self):
|
|
11
13
|
return "Field references existing field"
|
|
@@ -14,24 +16,23 @@ class FieldReferenceLinter(Linter):
|
|
|
14
16
|
def id(self) -> str:
|
|
15
17
|
return "field-reference"
|
|
16
18
|
|
|
17
|
-
def lint_implementation(
|
|
18
|
-
self,
|
|
19
|
-
contract: DataContractSpecification
|
|
20
|
-
) -> LinterResult:
|
|
19
|
+
def lint_implementation(self, contract: DataContractSpecification) -> LinterResult:
|
|
21
20
|
result = LinterResult()
|
|
22
|
-
for
|
|
23
|
-
for
|
|
21
|
+
for model_name, model in contract.models.items():
|
|
22
|
+
for field_name, field in model.fields.items():
|
|
24
23
|
if field.references:
|
|
25
24
|
(ref_model, ref_field) = field.references.split(".", maxsplit=2)
|
|
26
25
|
if ref_model not in contract.models:
|
|
27
26
|
result = result.with_error(
|
|
28
27
|
f"Field '{field_name}' in model '{model_name}'"
|
|
29
|
-
f" references non-existing model '{ref_model}'."
|
|
28
|
+
f" references non-existing model '{ref_model}'."
|
|
29
|
+
)
|
|
30
30
|
else:
|
|
31
31
|
ref_model_obj = contract.models[ref_model]
|
|
32
32
|
if ref_field not in ref_model_obj.fields:
|
|
33
33
|
result = result.with_error(
|
|
34
34
|
f"Field '{field_name}' in model '{model_name}'"
|
|
35
35
|
f" references non-existing field '{ref_field}'"
|
|
36
|
-
f" in model '{ref_model}'."
|
|
36
|
+
f" in model '{ref_model}'."
|
|
37
|
+
)
|
|
37
38
|
return result
|
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
import re
|
|
2
2
|
|
|
3
|
+
from datacontract.model.data_contract_specification import \
|
|
4
|
+
DataContractSpecification
|
|
3
5
|
from ..lint import Linter, LinterResult
|
|
4
|
-
|
|
6
|
+
|
|
5
7
|
|
|
6
8
|
class NoticePeriodLinter(Linter):
|
|
7
9
|
@property
|
|
@@ -25,17 +27,13 @@ class NoticePeriodLinter(Linter):
|
|
|
25
27
|
(:?[0-9\.,]+M)? # Number of minutes
|
|
26
28
|
(:?[0-9\.,]+S)? # Number of seconds
|
|
27
29
|
)?
|
|
28
|
-
""",
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
datetime_extended = re.compile(
|
|
33
|
-
r"P\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}")
|
|
30
|
+
""",
|
|
31
|
+
re.VERBOSE,
|
|
32
|
+
)
|
|
33
|
+
datetime_basic = re.compile(r"P\d{8}T\d{6}")
|
|
34
|
+
datetime_extended = re.compile(r"P\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}")
|
|
34
35
|
|
|
35
|
-
def lint_implementation(
|
|
36
|
-
self,
|
|
37
|
-
contract: DataContractSpecification
|
|
38
|
-
) -> LinterResult:
|
|
36
|
+
def lint_implementation(self, contract: DataContractSpecification) -> LinterResult:
|
|
39
37
|
"""Check whether the notice period is specified using ISO8601 duration syntax."""
|
|
40
38
|
if not contract.terms:
|
|
41
39
|
return LinterResult.cautious("No terms defined.")
|
|
@@ -43,17 +41,15 @@ class NoticePeriodLinter(Linter):
|
|
|
43
41
|
if not period:
|
|
44
42
|
return LinterResult.cautious("No notice period defined.")
|
|
45
43
|
if not period.startswith("P"):
|
|
46
|
-
return LinterResult.erroneous(
|
|
47
|
-
f"Notice period '{period}' is not a valid"
|
|
48
|
-
"ISO8601 duration.")
|
|
44
|
+
return LinterResult.erroneous(f"Notice period '{period}' is not a valid" "ISO8601 duration.")
|
|
49
45
|
if period == "P":
|
|
50
46
|
return LinterResult.erroneous(
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
not self.datetime_basic.fullmatch(period)
|
|
56
|
-
not self.datetime_extended.fullmatch(period)
|
|
57
|
-
|
|
58
|
-
|
|
47
|
+
"Notice period 'P' is not a valid" "ISO8601 duration, requires at least one" "duration to be specified."
|
|
48
|
+
)
|
|
49
|
+
if (
|
|
50
|
+
not self.simple.fullmatch(period)
|
|
51
|
+
and not self.datetime_basic.fullmatch(period)
|
|
52
|
+
and not self.datetime_extended.fullmatch(period)
|
|
53
|
+
):
|
|
54
|
+
return LinterResult.erroneous(f"Notice period '{period}' is not a valid ISO8601 duration.")
|
|
59
55
|
return LinterResult()
|
|
@@ -1,8 +1,10 @@
|
|
|
1
|
-
from ..lint import Linter, LinterResult
|
|
2
1
|
from datacontract.model.data_contract_specification import DataContractSpecification
|
|
2
|
+
from ..lint import Linter, LinterResult
|
|
3
|
+
|
|
3
4
|
|
|
4
5
|
class PrimaryFieldUniqueRequired(Linter):
|
|
5
6
|
"""Checks that all fields defined as primary are also defined as unique and required."""
|
|
7
|
+
|
|
6
8
|
@property
|
|
7
9
|
def name(self) -> str:
|
|
8
10
|
return "Model primary fields unique and required"
|
|
@@ -11,20 +13,16 @@ class PrimaryFieldUniqueRequired(Linter):
|
|
|
11
13
|
def id(self) -> str:
|
|
12
14
|
return "notice-period"
|
|
13
15
|
|
|
14
|
-
def lint_implementation(
|
|
15
|
-
self,
|
|
16
|
-
contract: DataContractSpecification
|
|
17
|
-
) -> LinterResult:
|
|
16
|
+
def lint_implementation(self, contract: DataContractSpecification) -> LinterResult:
|
|
18
17
|
if not contract.models:
|
|
19
18
|
return LinterResult.cautious("No models defined on contract.")
|
|
20
19
|
result = LinterResult()
|
|
21
|
-
for
|
|
22
|
-
for
|
|
23
|
-
if
|
|
24
|
-
and not field.required
|
|
25
|
-
and not field.unique):
|
|
20
|
+
for model_name, model in contract.models.items():
|
|
21
|
+
for field_name, field in model.fields.items():
|
|
22
|
+
if field.primary and not field.required and not field.unique:
|
|
26
23
|
result = result.with_error(
|
|
27
24
|
f"Field '{field_name}' in model '{model_name}'"
|
|
28
|
-
|
|
29
|
-
|
|
25
|
+
" is marked as primary, but not as unique"
|
|
26
|
+
" and required."
|
|
27
|
+
)
|
|
30
28
|
return result
|
|
@@ -1,12 +1,11 @@
|
|
|
1
1
|
import yaml
|
|
2
2
|
|
|
3
|
-
from
|
|
4
|
-
from datacontract.model.data_contract_specification import\
|
|
3
|
+
from datacontract.model.data_contract_specification import \
|
|
5
4
|
DataContractSpecification, Model
|
|
5
|
+
from ..lint import Linter, LinterResult
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
class QualityUsesSchemaLinter(Linter):
|
|
9
|
-
|
|
10
9
|
@property
|
|
11
10
|
def name(self) -> str:
|
|
12
11
|
return "Quality check(s) use model"
|
|
@@ -15,23 +14,21 @@ class QualityUsesSchemaLinter(Linter):
|
|
|
15
14
|
def id(self) -> str:
|
|
16
15
|
return "quality-schema"
|
|
17
16
|
|
|
18
|
-
def lint_sodacl(self, check, models: dict[str, Model])
|
|
19
|
-
LinterResult:
|
|
17
|
+
def lint_sodacl(self, check, models: dict[str, Model]) -> LinterResult:
|
|
20
18
|
result = LinterResult()
|
|
21
19
|
for sodacl_check in check.keys():
|
|
22
|
-
table_name = sodacl_check[len("checks for "):]
|
|
20
|
+
table_name = sodacl_check[len("checks for ") :]
|
|
23
21
|
if table_name not in models:
|
|
24
|
-
result = result.with_error(
|
|
25
|
-
f"Quality check on unknown model '{table_name}'")
|
|
22
|
+
result = result.with_error(f"Quality check on unknown model '{table_name}'")
|
|
26
23
|
return result
|
|
27
24
|
|
|
28
|
-
def lint_montecarlo(self, check, models: dict[str, Model])
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
25
|
+
def lint_montecarlo(self, check, models: dict[str, Model]) -> LinterResult:
|
|
26
|
+
return LinterResult().with_warning("Linting montecarlo checks is not currently implemented")
|
|
27
|
+
|
|
28
|
+
def lint_great_expectations(self, check, models: dict[str, Model]) -> LinterResult:
|
|
29
|
+
return LinterResult().with_warning("Linting great expectations checks is not currently implemented")
|
|
32
30
|
|
|
33
|
-
def lint_implementation(self, contract: DataContractSpecification)
|
|
34
|
-
LinterResult:
|
|
31
|
+
def lint_implementation(self, contract: DataContractSpecification) -> LinterResult:
|
|
35
32
|
result = LinterResult()
|
|
36
33
|
models = contract.models
|
|
37
34
|
check = contract.quality
|
|
@@ -45,12 +42,11 @@ class QualityUsesSchemaLinter(Linter):
|
|
|
45
42
|
check_specification = check.specification
|
|
46
43
|
match check.type:
|
|
47
44
|
case "SodaCL":
|
|
48
|
-
result = result.combine(
|
|
49
|
-
self.lint_sodacl(check_specification, models))
|
|
45
|
+
result = result.combine(self.lint_sodacl(check_specification, models))
|
|
50
46
|
case "montecarlo":
|
|
51
|
-
result = result.combine(
|
|
52
|
-
|
|
47
|
+
result = result.combine(self.lint_montecarlo(check_specification, models))
|
|
48
|
+
case "great-expectations":
|
|
49
|
+
result = result.combine(self.lint_great_expectations(check_specification, models))
|
|
53
50
|
case _:
|
|
54
|
-
result = result.with_warning("Can't lint quality check "
|
|
55
|
-
f"with type '{check.type}'")
|
|
51
|
+
result = result.with_warning("Can't lint quality check " f"with type '{check.type}'")
|
|
56
52
|
return result
|
|
@@ -1,17 +1,16 @@
|
|
|
1
|
+
from datacontract.model.data_contract_specification import DataContractSpecification, Field
|
|
1
2
|
from ..lint import Linter, LinterResult
|
|
2
|
-
from datacontract.model.data_contract_specification import\
|
|
3
|
-
DataContractSpecification, Field
|
|
4
3
|
|
|
5
4
|
|
|
6
5
|
class ValidFieldConstraintsLinter(Linter):
|
|
7
6
|
"""Check validity of field constraints.
|
|
8
7
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
8
|
+
More precisely, check that only numeric constraints are specified on
|
|
9
|
+
fields of numeric type and string constraints on fields of string type.
|
|
10
|
+
Additionally, the linter checks that defined constraints make sense.
|
|
11
|
+
Minimum values should not be greater than maximum values, exclusive and
|
|
12
|
+
non-exclusive minimum and maximum should not be combined and string
|
|
13
|
+
pattern and format should not be combined.
|
|
15
14
|
|
|
16
15
|
"""
|
|
17
16
|
|
|
@@ -20,52 +19,60 @@ class ValidFieldConstraintsLinter(Linter):
|
|
|
20
19
|
"format": set(["string", "text", "varchar"]),
|
|
21
20
|
"minLength": set(["string", "text", "varchar"]),
|
|
22
21
|
"maxLength": set(["string", "text", "varchar"]),
|
|
23
|
-
"minimum": set(["int", "integer", "number", "decimal", "numeric",
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
"maximum": set(["int", "integer", "number", "decimal", "numeric",
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
22
|
+
"minimum": set(["int", "integer", "number", "decimal", "numeric", "long", "bigint", "float", "double"]),
|
|
23
|
+
"exclusiveMinimum": set(
|
|
24
|
+
["int", "integer", "number", "decimal", "numeric", "long", "bigint", "float", "double"]
|
|
25
|
+
),
|
|
26
|
+
"maximum": set(["int", "integer", "number", "decimal", "numeric", "long", "bigint", "float", "double"]),
|
|
27
|
+
"exclusiveMaximum": set(
|
|
28
|
+
["int", "integer", "number", "decimal", "numeric", "long", "bigint", "float", "double"]
|
|
29
|
+
),
|
|
31
30
|
}
|
|
32
31
|
|
|
33
32
|
def check_minimum_maximum(self, field: Field, field_name: str, model_name: str) -> LinterResult:
|
|
34
33
|
(min, max, xmin, xmax) = (field.minimum, field.maximum, field.exclusiveMinimum, field.exclusiveMaximum)
|
|
35
|
-
match (
|
|
36
|
-
|
|
34
|
+
match (
|
|
35
|
+
"minimum" in field.model_fields_set,
|
|
36
|
+
"maximum" in field.model_fields_set,
|
|
37
|
+
"exclusiveMinimum" in field.model_fields_set,
|
|
38
|
+
"exclusiveMaximum" in field.model_fields_set,
|
|
39
|
+
):
|
|
37
40
|
case (True, True, _, _) if min > max:
|
|
38
41
|
return LinterResult.erroneous(
|
|
39
|
-
f"Minimum {min} is greater than maximum {max} on "
|
|
40
|
-
|
|
42
|
+
f"Minimum {min} is greater than maximum {max} on " f"field '{field_name}' in model '{model_name}'."
|
|
43
|
+
)
|
|
41
44
|
case (_, _, True, True) if xmin >= xmax:
|
|
42
|
-
|
|
45
|
+
return LinterResult.erroneous(
|
|
43
46
|
f"Exclusive minimum {xmin} is greater than exclusive"
|
|
44
|
-
f" maximum {xmax} on field '{field_name}' in model '{model_name}'."
|
|
47
|
+
f" maximum {xmax} on field '{field_name}' in model '{model_name}'."
|
|
48
|
+
)
|
|
45
49
|
case (True, True, True, True):
|
|
46
50
|
return LinterResult.erroneous(
|
|
47
51
|
f"Both exclusive and non-exclusive minimum and maximum are "
|
|
48
|
-
f"defined on field '{field_name}' in model '{model_name}'."
|
|
52
|
+
f"defined on field '{field_name}' in model '{model_name}'."
|
|
53
|
+
)
|
|
49
54
|
case (True, _, True, _):
|
|
50
55
|
return LinterResult.erroneous(
|
|
51
56
|
f"Both exclusive and non-exclusive minimum are "
|
|
52
|
-
f"defined on field '{field_name}' in model '{model_name}'."
|
|
57
|
+
f"defined on field '{field_name}' in model '{model_name}'."
|
|
58
|
+
)
|
|
53
59
|
case (_, True, _, True):
|
|
54
60
|
return LinterResult.erroneous(
|
|
55
61
|
f"Both exclusive and non-exclusive maximum are "
|
|
56
|
-
f"defined on field '{field_name}' in model '{model_name}'."
|
|
62
|
+
f"defined on field '{field_name}' in model '{model_name}'."
|
|
63
|
+
)
|
|
57
64
|
return LinterResult()
|
|
58
65
|
|
|
59
66
|
def check_string_constraints(self, field: Field, field_name: str, model_name: str) -> LinterResult:
|
|
60
67
|
result = LinterResult()
|
|
61
68
|
if field.minLength and field.maxLength and field.minLength > field.maxLength:
|
|
62
69
|
result = result.with_error(
|
|
63
|
-
f"Minimum length is greater that maximum length on"
|
|
64
|
-
|
|
70
|
+
f"Minimum length is greater that maximum length on" f" field '{field_name}' in model '{model_name}'."
|
|
71
|
+
)
|
|
65
72
|
if field.pattern and field.format:
|
|
66
73
|
result = result.with_error(
|
|
67
|
-
f"Both a pattern and a format are defined for field"
|
|
68
|
-
|
|
74
|
+
f"Both a pattern and a format are defined for field" f" '{field_name}' in model '{model_name}'."
|
|
75
|
+
)
|
|
69
76
|
return result
|
|
70
77
|
|
|
71
78
|
@property
|
|
@@ -76,19 +83,17 @@ class ValidFieldConstraintsLinter(Linter):
|
|
|
76
83
|
def id(self):
|
|
77
84
|
return "field-constraints"
|
|
78
85
|
|
|
79
|
-
def lint_implementation(
|
|
80
|
-
self,
|
|
81
|
-
contract: DataContractSpecification
|
|
82
|
-
) -> LinterResult:
|
|
86
|
+
def lint_implementation(self, contract: DataContractSpecification) -> LinterResult:
|
|
83
87
|
result = LinterResult()
|
|
84
|
-
for
|
|
85
|
-
for
|
|
86
|
-
for
|
|
88
|
+
for model_name, model in contract.models.items():
|
|
89
|
+
for field_name, field in model.fields.items():
|
|
90
|
+
for _property, allowed_types in self.valid_types_for_constraint.items():
|
|
87
91
|
if _property in field.model_fields_set and field.type not in allowed_types:
|
|
88
92
|
result = result.with_error(
|
|
89
93
|
f"Forbidden constraint '{_property}' defined on field "
|
|
90
94
|
f"'{field_name}' in model '{model_name}'. Field type "
|
|
91
|
-
f"is '{field.type}'."
|
|
95
|
+
f"is '{field.type}'."
|
|
96
|
+
)
|
|
92
97
|
result = result.combine(self.check_minimum_maximum(field, field_name, model_name))
|
|
93
98
|
result = result.combine(self.check_string_constraints(field, field_name, model_name))
|
|
94
99
|
return result
|
datacontract/lint/resolve.py
CHANGED
|
@@ -17,7 +17,7 @@ def resolve_data_contract(
|
|
|
17
17
|
data_contract_str: str = None,
|
|
18
18
|
data_contract: DataContractSpecification = None,
|
|
19
19
|
schema_location: str = None,
|
|
20
|
-
inline_definitions: bool = False
|
|
20
|
+
inline_definitions: bool = False,
|
|
21
21
|
) -> DataContractSpecification:
|
|
22
22
|
if data_contract_location is not None:
|
|
23
23
|
return resolve_data_contract_from_location(data_contract_location, schema_location, inline_definitions)
|
|
@@ -30,14 +30,13 @@ def resolve_data_contract(
|
|
|
30
30
|
type="lint",
|
|
31
31
|
result="failed",
|
|
32
32
|
name="Check that data contract YAML is valid",
|
|
33
|
-
reason=
|
|
33
|
+
reason="Data contract needs to be provided",
|
|
34
34
|
engine="datacontract",
|
|
35
35
|
)
|
|
36
36
|
|
|
37
37
|
|
|
38
38
|
def resolve_data_contract_from_location(
|
|
39
|
-
location, schema_location: str = None,
|
|
40
|
-
inline_definitions: bool = False
|
|
39
|
+
location, schema_location: str = None, inline_definitions: bool = False
|
|
41
40
|
) -> DataContractSpecification:
|
|
42
41
|
if location.startswith("http://") or location.startswith("https://"):
|
|
43
42
|
data_contract_str = fetch_resource(location)
|
|
@@ -57,10 +56,9 @@ def inline_definitions_into_data_contract(spec: DataContractSpecification):
|
|
|
57
56
|
field.ref_obj = definition
|
|
58
57
|
|
|
59
58
|
for field_name in field.model_fields.keys():
|
|
60
|
-
if
|
|
61
|
-
field_name
|
|
62
|
-
|
|
63
|
-
getattr(definition, field_name))
|
|
59
|
+
if field_name in definition.model_fields_set and field_name not in field.model_fields_set:
|
|
60
|
+
setattr(field, field_name, getattr(definition, field_name))
|
|
61
|
+
|
|
64
62
|
|
|
65
63
|
def resolve_ref(ref, definitions) -> Definition:
|
|
66
64
|
if ref.startswith("http://") or ref.startswith("https://"):
|
|
@@ -82,8 +80,7 @@ def resolve_ref(ref, definitions) -> Definition:
|
|
|
82
80
|
|
|
83
81
|
|
|
84
82
|
def resolve_data_contract_from_str(
|
|
85
|
-
data_contract_str, schema_location: str = None,
|
|
86
|
-
inline_definitions: bool = False
|
|
83
|
+
data_contract_str, schema_location: str = None, inline_definitions: bool = False
|
|
87
84
|
) -> DataContractSpecification:
|
|
88
85
|
data_contract_yaml_dict = to_yaml(data_contract_str)
|
|
89
86
|
validate(data_contract_yaml_dict, schema_location)
|
datacontract/lint/schema.py
CHANGED
|
@@ -7,7 +7,6 @@ from datacontract.model.exceptions import DataContractException
|
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
def fetch_schema(location: str = None):
|
|
10
|
-
|
|
11
10
|
if location is None:
|
|
12
11
|
location = "https://datacontract.com/datacontract.schema.json"
|
|
13
12
|
|
|
@@ -21,8 +20,8 @@ def fetch_schema(location: str = None):
|
|
|
21
20
|
name=f"Reading schema from {location}",
|
|
22
21
|
reason=f"The file '{location}' does not exist.",
|
|
23
22
|
engine="datacontract",
|
|
24
|
-
result="error"
|
|
23
|
+
result="error",
|
|
25
24
|
)
|
|
26
|
-
with open(location,
|
|
25
|
+
with open(location, "r") as file:
|
|
27
26
|
file_content = file.read()
|
|
28
27
|
return json.loads(file_content)
|
datacontract/lint/urls.py
CHANGED
|
@@ -20,22 +20,21 @@ def fetch_resource(url: str):
|
|
|
20
20
|
name=f"Reading data contract from {url}",
|
|
21
21
|
reason=f"Cannot read resource from URL {url}. Response status is {response.status_code}",
|
|
22
22
|
engine="datacontract",
|
|
23
|
-
result="error"
|
|
23
|
+
result="error",
|
|
24
24
|
)
|
|
25
25
|
|
|
26
26
|
|
|
27
27
|
def _set_api_key(headers, url):
|
|
28
28
|
if ".datamesh-manager.com/" not in url:
|
|
29
29
|
return
|
|
30
|
-
datamesh_manager_api_key = os.getenv(
|
|
30
|
+
datamesh_manager_api_key = os.getenv("DATAMESH_MANAGER_API_KEY")
|
|
31
31
|
if datamesh_manager_api_key is None or datamesh_manager_api_key == "":
|
|
32
|
-
print(
|
|
32
|
+
print("Error: Data Mesh Manager API Key is not set. Set env variable DATAMESH_MANAGER_API_KEY.")
|
|
33
33
|
raise DataContractException(
|
|
34
34
|
type="lint",
|
|
35
35
|
name=f"Reading data contract from {url}",
|
|
36
36
|
reason="Error: Data Mesh Manager API Key is not set. Set env variable DATAMESH_MANAGER_API_KEY.",
|
|
37
37
|
engine="datacontract",
|
|
38
|
-
result="error"
|
|
38
|
+
result="error",
|
|
39
39
|
)
|
|
40
40
|
headers["x-api-key"] = datamesh_manager_api_key
|
|
41
|
-
|