datacontract-cli 0.9.6.post2__py3-none-any.whl → 0.9.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datacontract-cli might be problematic. Click here for more details.
- datacontract/breaking/breaking.py +139 -63
- datacontract/breaking/breaking_rules.py +71 -54
- datacontract/cli.py +138 -45
- datacontract/data_contract.py +316 -78
- datacontract/engines/datacontract/check_that_datacontract_contains_valid_servers_configuration.py +5 -1
- datacontract/engines/datacontract/check_that_datacontract_file_exists.py +9 -8
- datacontract/engines/datacontract/check_that_datacontract_str_is_valid.py +26 -22
- datacontract/engines/fastjsonschema/check_jsonschema.py +31 -25
- datacontract/engines/fastjsonschema/s3/s3_read_files.py +8 -6
- datacontract/engines/soda/check_soda_execute.py +46 -35
- datacontract/engines/soda/connections/bigquery.py +5 -3
- datacontract/engines/soda/connections/dask.py +0 -1
- datacontract/engines/soda/connections/databricks.py +2 -2
- datacontract/engines/soda/connections/duckdb.py +4 -4
- datacontract/engines/soda/connections/kafka.py +36 -17
- datacontract/engines/soda/connections/postgres.py +3 -3
- datacontract/engines/soda/connections/snowflake.py +4 -4
- datacontract/export/avro_converter.py +3 -7
- datacontract/export/avro_idl_converter.py +280 -0
- datacontract/export/dbt_converter.py +55 -80
- datacontract/export/great_expectations_converter.py +141 -0
- datacontract/export/jsonschema_converter.py +3 -1
- datacontract/export/odcs_converter.py +10 -12
- datacontract/export/protobuf_converter.py +99 -0
- datacontract/export/pydantic_converter.py +140 -0
- datacontract/export/rdf_converter.py +35 -12
- datacontract/export/sodacl_converter.py +24 -24
- datacontract/export/sql_converter.py +93 -0
- datacontract/export/sql_type_converter.py +131 -0
- datacontract/export/terraform_converter.py +71 -0
- datacontract/imports/avro_importer.py +106 -0
- datacontract/imports/sql_importer.py +0 -2
- datacontract/init/download_datacontract_file.py +2 -2
- datacontract/integration/publish_datamesh_manager.py +4 -9
- datacontract/integration/publish_opentelemetry.py +107 -0
- datacontract/lint/files.py +2 -2
- datacontract/lint/lint.py +46 -31
- datacontract/lint/linters/description_linter.py +34 -0
- datacontract/lint/linters/example_model_linter.py +67 -43
- datacontract/lint/linters/field_pattern_linter.py +34 -0
- datacontract/lint/linters/field_reference_linter.py +38 -0
- datacontract/lint/linters/notice_period_linter.py +55 -0
- datacontract/lint/linters/primary_field_linter.py +28 -0
- datacontract/lint/linters/quality_schema_linter.py +52 -0
- datacontract/lint/linters/valid_constraints_linter.py +99 -0
- datacontract/lint/resolve.py +53 -8
- datacontract/lint/schema.py +2 -3
- datacontract/lint/urls.py +4 -5
- datacontract/model/breaking_change.py +27 -5
- datacontract/model/data_contract_specification.py +45 -25
- datacontract/model/exceptions.py +13 -2
- datacontract/model/run.py +1 -1
- datacontract/web.py +5 -8
- {datacontract_cli-0.9.6.post2.dist-info → datacontract_cli-0.9.8.dist-info}/METADATA +207 -35
- datacontract_cli-0.9.8.dist-info/RECORD +63 -0
- {datacontract_cli-0.9.6.post2.dist-info → datacontract_cli-0.9.8.dist-info}/WHEEL +1 -1
- datacontract_cli-0.9.6.post2.dist-info/RECORD +0 -47
- {datacontract_cli-0.9.6.post2.dist-info → datacontract_cli-0.9.8.dist-info}/LICENSE +0 -0
- {datacontract_cli-0.9.6.post2.dist-info → datacontract_cli-0.9.8.dist-info}/entry_points.txt +0 -0
- {datacontract_cli-0.9.6.post2.dist-info → datacontract_cli-0.9.8.dist-info}/top_level.txt +0 -0
|
@@ -1,67 +1,91 @@
|
|
|
1
1
|
import csv
|
|
2
|
-
import yaml
|
|
3
|
-
import json
|
|
4
2
|
import io
|
|
3
|
+
import json
|
|
4
|
+
|
|
5
|
+
import yaml
|
|
5
6
|
|
|
7
|
+
from datacontract.model.data_contract_specification import \
|
|
8
|
+
DataContractSpecification, Example
|
|
6
9
|
from ..lint import Linter, LinterResult
|
|
7
|
-
from datacontract.model.data_contract_specification import DataContractSpecification, Example
|
|
8
10
|
|
|
9
11
|
|
|
10
12
|
class ExampleModelLinter(Linter):
|
|
13
|
+
@property
|
|
11
14
|
def name(self) -> str:
|
|
12
15
|
return "Example(s) match model"
|
|
13
16
|
|
|
17
|
+
@property
|
|
18
|
+
def id(self) -> str:
|
|
19
|
+
return "example-model"
|
|
20
|
+
|
|
14
21
|
@staticmethod
|
|
15
22
|
def get_example_headers(example: Example) -> list[str]:
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
23
|
+
if isinstance(example.data, str):
|
|
24
|
+
match example.type:
|
|
25
|
+
case "csv":
|
|
26
|
+
dialect = csv.Sniffer().sniff(example.data)
|
|
27
|
+
data = io.StringIO(example.data)
|
|
28
|
+
reader = csv.reader(data, dialect=dialect)
|
|
29
|
+
return next(reader)
|
|
30
|
+
case "yaml":
|
|
31
|
+
data = yaml.safe_load(example.data)
|
|
32
|
+
return data.keys()
|
|
33
|
+
case "json":
|
|
34
|
+
data = json.loads(example.data)
|
|
35
|
+
return data.keys()
|
|
36
|
+
case _:
|
|
37
|
+
# This is checked in lint_implementation, so shouldn't happen.
|
|
38
|
+
raise NotImplementedError(f"Unknown type {example.type}")
|
|
39
|
+
else:
|
|
40
|
+
# Checked in lint_implementation, shouldn't happen.
|
|
41
|
+
raise NotImplementedError("Can't lint object examples.")
|
|
42
|
+
|
|
43
|
+
def lint_implementation(self, contract: DataContractSpecification) -> LinterResult:
|
|
44
|
+
"""Check whether the example(s) headers match the model.
|
|
28
45
|
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
"""Check whether the example(s) match the model."""
|
|
46
|
+
This linter checks whether the example's fields match the model
|
|
47
|
+
fields, and whether all required fields of the model are present in
|
|
48
|
+
the example.
|
|
49
|
+
"""
|
|
34
50
|
result = LinterResult()
|
|
35
|
-
examples =
|
|
36
|
-
models =
|
|
51
|
+
examples = contract.examples
|
|
52
|
+
models = contract.models
|
|
37
53
|
examples_with_model = []
|
|
38
|
-
for
|
|
54
|
+
for index, example in enumerate(examples):
|
|
39
55
|
if example.model not in models:
|
|
40
|
-
result = result.with_error(
|
|
41
|
-
f"Example {index + 1} has non-existent model '{example.model}'")
|
|
56
|
+
result = result.with_error(f"Example {index + 1} has non-existent model '{example.model}'")
|
|
42
57
|
else:
|
|
43
|
-
examples_with_model.append(
|
|
44
|
-
|
|
45
|
-
for (index, example, model) in examples_with_model:
|
|
58
|
+
examples_with_model.append((index, example, models.get(example.model)))
|
|
59
|
+
for index, example, model in examples_with_model:
|
|
46
60
|
if example.type == "custom":
|
|
47
|
-
result = result.with_warning(
|
|
48
|
-
|
|
49
|
-
|
|
61
|
+
result = result.with_warning(
|
|
62
|
+
f"Example {index + 1} has type" ' "custom", cannot check model' " conformance"
|
|
63
|
+
)
|
|
64
|
+
elif not isinstance(example.data, str):
|
|
65
|
+
result = result.with_warning(
|
|
66
|
+
f"Example {index + 1} is not a " "string example, can only lint string examples for now."
|
|
67
|
+
)
|
|
50
68
|
elif model.type == "object":
|
|
51
69
|
result = result.with_warning(
|
|
52
70
|
f"Example {index + 1} uses a "
|
|
53
71
|
f"model '{example.model}' with type 'object'. Linting is "
|
|
54
|
-
"currently only supported for 'table' models"
|
|
72
|
+
"currently only supported for 'table' models"
|
|
73
|
+
)
|
|
55
74
|
else:
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
75
|
+
if example.type in ("csv", "yaml", "json"):
|
|
76
|
+
headers = self.get_example_headers(example)
|
|
77
|
+
for example_header in headers:
|
|
78
|
+
if example_header not in model.fields:
|
|
79
|
+
result = result.with_error(
|
|
80
|
+
f"Example {index + 1} has field '{example_header}'"
|
|
81
|
+
f" that's not contained in model '{example.model}'"
|
|
82
|
+
)
|
|
83
|
+
for field_name, field_value in model.fields.items():
|
|
84
|
+
if field_name not in headers and field_value.required:
|
|
85
|
+
result = result.with_error(
|
|
86
|
+
f"Example {index + 1} is missing field '{field_name}'"
|
|
87
|
+
f" required by model '{example.model}'"
|
|
88
|
+
)
|
|
89
|
+
else:
|
|
90
|
+
result = result.with_error(f"Example {index + 1} has unknown type" f"{example.type}")
|
|
67
91
|
return result
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import re
|
|
2
|
+
|
|
3
|
+
from datacontract.model.data_contract_specification import \
|
|
4
|
+
DataContractSpecification
|
|
5
|
+
from ..lint import Linter, LinterResult
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class FieldPatternLinter(Linter):
|
|
9
|
+
"""Checks that all patterns defined for fields are correct Python regex
|
|
10
|
+
syntax.
|
|
11
|
+
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
@property
|
|
15
|
+
def name(self):
|
|
16
|
+
return "Field pattern is correct regex"
|
|
17
|
+
|
|
18
|
+
@property
|
|
19
|
+
def id(self) -> str:
|
|
20
|
+
return "field-pattern"
|
|
21
|
+
|
|
22
|
+
def lint_implementation(self, contract: DataContractSpecification) -> LinterResult:
|
|
23
|
+
result = LinterResult()
|
|
24
|
+
for model_name, model in contract.models.items():
|
|
25
|
+
for field_name, field in model.fields.items():
|
|
26
|
+
if field.pattern:
|
|
27
|
+
try:
|
|
28
|
+
re.compile(field.pattern)
|
|
29
|
+
except re.error as e:
|
|
30
|
+
result = result.with_error(
|
|
31
|
+
f"Failed to compile pattern regex '{field.pattern}' for "
|
|
32
|
+
f"field '{field_name}' in model '{model_name}': {e.msg}"
|
|
33
|
+
)
|
|
34
|
+
return result
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
from datacontract.model.data_contract_specification import DataContractSpecification
|
|
2
|
+
from ..lint import Linter, LinterResult
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class FieldReferenceLinter(Linter):
|
|
6
|
+
"""Checks that all references definitions in fields refer to existing
|
|
7
|
+
fields.
|
|
8
|
+
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
@property
|
|
12
|
+
def name(self):
|
|
13
|
+
return "Field references existing field"
|
|
14
|
+
|
|
15
|
+
@property
|
|
16
|
+
def id(self) -> str:
|
|
17
|
+
return "field-reference"
|
|
18
|
+
|
|
19
|
+
def lint_implementation(self, contract: DataContractSpecification) -> LinterResult:
|
|
20
|
+
result = LinterResult()
|
|
21
|
+
for model_name, model in contract.models.items():
|
|
22
|
+
for field_name, field in model.fields.items():
|
|
23
|
+
if field.references:
|
|
24
|
+
(ref_model, ref_field) = field.references.split(".", maxsplit=2)
|
|
25
|
+
if ref_model not in contract.models:
|
|
26
|
+
result = result.with_error(
|
|
27
|
+
f"Field '{field_name}' in model '{model_name}'"
|
|
28
|
+
f" references non-existing model '{ref_model}'."
|
|
29
|
+
)
|
|
30
|
+
else:
|
|
31
|
+
ref_model_obj = contract.models[ref_model]
|
|
32
|
+
if ref_field not in ref_model_obj.fields:
|
|
33
|
+
result = result.with_error(
|
|
34
|
+
f"Field '{field_name}' in model '{model_name}'"
|
|
35
|
+
f" references non-existing field '{ref_field}'"
|
|
36
|
+
f" in model '{ref_model}'."
|
|
37
|
+
)
|
|
38
|
+
return result
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import re
|
|
2
|
+
|
|
3
|
+
from datacontract.model.data_contract_specification import \
|
|
4
|
+
DataContractSpecification
|
|
5
|
+
from ..lint import Linter, LinterResult
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class NoticePeriodLinter(Linter):
|
|
9
|
+
@property
|
|
10
|
+
def name(self) -> str:
|
|
11
|
+
return "noticePeriod in ISO8601 format"
|
|
12
|
+
|
|
13
|
+
@property
|
|
14
|
+
def id(self) -> str:
|
|
15
|
+
return "notice-period"
|
|
16
|
+
|
|
17
|
+
# Regex matching the "simple" ISO8601 duration format
|
|
18
|
+
simple = re.compile(
|
|
19
|
+
r"""P # Introduces period
|
|
20
|
+
(:?[0-9\.,]+Y)? # Number of years
|
|
21
|
+
(:?[0-9\.,]+M)? # Number of months
|
|
22
|
+
(:?[0-9\.,]+W)? # Number of weeks
|
|
23
|
+
(:?[0-9\.,]+D)? # Number of days
|
|
24
|
+
(:? # Time part (optional)
|
|
25
|
+
T # Always starts with T
|
|
26
|
+
(:?[0-9\.,]+H)? # Number of hours
|
|
27
|
+
(:?[0-9\.,]+M)? # Number of minutes
|
|
28
|
+
(:?[0-9\.,]+S)? # Number of seconds
|
|
29
|
+
)?
|
|
30
|
+
""",
|
|
31
|
+
re.VERBOSE,
|
|
32
|
+
)
|
|
33
|
+
datetime_basic = re.compile(r"P\d{8}T\d{6}")
|
|
34
|
+
datetime_extended = re.compile(r"P\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}")
|
|
35
|
+
|
|
36
|
+
def lint_implementation(self, contract: DataContractSpecification) -> LinterResult:
|
|
37
|
+
"""Check whether the notice period is specified using ISO8601 duration syntax."""
|
|
38
|
+
if not contract.terms:
|
|
39
|
+
return LinterResult.cautious("No terms defined.")
|
|
40
|
+
period = contract.terms.noticePeriod
|
|
41
|
+
if not period:
|
|
42
|
+
return LinterResult.cautious("No notice period defined.")
|
|
43
|
+
if not period.startswith("P"):
|
|
44
|
+
return LinterResult.erroneous(f"Notice period '{period}' is not a valid" "ISO8601 duration.")
|
|
45
|
+
if period == "P":
|
|
46
|
+
return LinterResult.erroneous(
|
|
47
|
+
"Notice period 'P' is not a valid" "ISO8601 duration, requires at least one" "duration to be specified."
|
|
48
|
+
)
|
|
49
|
+
if (
|
|
50
|
+
not self.simple.fullmatch(period)
|
|
51
|
+
and not self.datetime_basic.fullmatch(period)
|
|
52
|
+
and not self.datetime_extended.fullmatch(period)
|
|
53
|
+
):
|
|
54
|
+
return LinterResult.erroneous(f"Notice period '{period}' is not a valid ISO8601 duration.")
|
|
55
|
+
return LinterResult()
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
from datacontract.model.data_contract_specification import DataContractSpecification
|
|
2
|
+
from ..lint import Linter, LinterResult
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class PrimaryFieldUniqueRequired(Linter):
|
|
6
|
+
"""Checks that all fields defined as primary are also defined as unique and required."""
|
|
7
|
+
|
|
8
|
+
@property
|
|
9
|
+
def name(self) -> str:
|
|
10
|
+
return "Model primary fields unique and required"
|
|
11
|
+
|
|
12
|
+
@property
|
|
13
|
+
def id(self) -> str:
|
|
14
|
+
return "notice-period"
|
|
15
|
+
|
|
16
|
+
def lint_implementation(self, contract: DataContractSpecification) -> LinterResult:
|
|
17
|
+
if not contract.models:
|
|
18
|
+
return LinterResult.cautious("No models defined on contract.")
|
|
19
|
+
result = LinterResult()
|
|
20
|
+
for model_name, model in contract.models.items():
|
|
21
|
+
for field_name, field in model.fields.items():
|
|
22
|
+
if field.primary and not field.required and not field.unique:
|
|
23
|
+
result = result.with_error(
|
|
24
|
+
f"Field '{field_name}' in model '{model_name}'"
|
|
25
|
+
" is marked as primary, but not as unique"
|
|
26
|
+
" and required."
|
|
27
|
+
)
|
|
28
|
+
return result
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import yaml
|
|
2
|
+
|
|
3
|
+
from datacontract.model.data_contract_specification import \
|
|
4
|
+
DataContractSpecification, Model
|
|
5
|
+
from ..lint import Linter, LinterResult
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class QualityUsesSchemaLinter(Linter):
|
|
9
|
+
@property
|
|
10
|
+
def name(self) -> str:
|
|
11
|
+
return "Quality check(s) use model"
|
|
12
|
+
|
|
13
|
+
@property
|
|
14
|
+
def id(self) -> str:
|
|
15
|
+
return "quality-schema"
|
|
16
|
+
|
|
17
|
+
def lint_sodacl(self, check, models: dict[str, Model]) -> LinterResult:
|
|
18
|
+
result = LinterResult()
|
|
19
|
+
for sodacl_check in check.keys():
|
|
20
|
+
table_name = sodacl_check[len("checks for ") :]
|
|
21
|
+
if table_name not in models:
|
|
22
|
+
result = result.with_error(f"Quality check on unknown model '{table_name}'")
|
|
23
|
+
return result
|
|
24
|
+
|
|
25
|
+
def lint_montecarlo(self, check, models: dict[str, Model]) -> LinterResult:
|
|
26
|
+
return LinterResult().with_warning("Linting montecarlo checks is not currently implemented")
|
|
27
|
+
|
|
28
|
+
def lint_great_expectations(self, check, models: dict[str, Model]) -> LinterResult:
|
|
29
|
+
return LinterResult().with_warning("Linting great expectations checks is not currently implemented")
|
|
30
|
+
|
|
31
|
+
def lint_implementation(self, contract: DataContractSpecification) -> LinterResult:
|
|
32
|
+
result = LinterResult()
|
|
33
|
+
models = contract.models
|
|
34
|
+
check = contract.quality
|
|
35
|
+
if not check:
|
|
36
|
+
return LinterResult()
|
|
37
|
+
if not check.specification:
|
|
38
|
+
return LinterResult.cautious("Quality check without specification.")
|
|
39
|
+
if isinstance(check.specification, str):
|
|
40
|
+
check_specification = yaml.safe_load(check.specification)
|
|
41
|
+
else:
|
|
42
|
+
check_specification = check.specification
|
|
43
|
+
match check.type:
|
|
44
|
+
case "SodaCL":
|
|
45
|
+
result = result.combine(self.lint_sodacl(check_specification, models))
|
|
46
|
+
case "montecarlo":
|
|
47
|
+
result = result.combine(self.lint_montecarlo(check_specification, models))
|
|
48
|
+
case "great-expectations":
|
|
49
|
+
result = result.combine(self.lint_great_expectations(check_specification, models))
|
|
50
|
+
case _:
|
|
51
|
+
result = result.with_warning("Can't lint quality check " f"with type '{check.type}'")
|
|
52
|
+
return result
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
from datacontract.model.data_contract_specification import DataContractSpecification, Field
|
|
2
|
+
from ..lint import Linter, LinterResult
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class ValidFieldConstraintsLinter(Linter):
|
|
6
|
+
"""Check validity of field constraints.
|
|
7
|
+
|
|
8
|
+
More precisely, check that only numeric constraints are specified on
|
|
9
|
+
fields of numeric type and string constraints on fields of string type.
|
|
10
|
+
Additionally, the linter checks that defined constraints make sense.
|
|
11
|
+
Minimum values should not be greater than maximum values, exclusive and
|
|
12
|
+
non-exclusive minimum and maximum should not be combined and string
|
|
13
|
+
pattern and format should not be combined.
|
|
14
|
+
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
valid_types_for_constraint = {
|
|
18
|
+
"pattern": set(["string", "text", "varchar"]),
|
|
19
|
+
"format": set(["string", "text", "varchar"]),
|
|
20
|
+
"minLength": set(["string", "text", "varchar"]),
|
|
21
|
+
"maxLength": set(["string", "text", "varchar"]),
|
|
22
|
+
"minimum": set(["int", "integer", "number", "decimal", "numeric", "long", "bigint", "float", "double"]),
|
|
23
|
+
"exclusiveMinimum": set(
|
|
24
|
+
["int", "integer", "number", "decimal", "numeric", "long", "bigint", "float", "double"]
|
|
25
|
+
),
|
|
26
|
+
"maximum": set(["int", "integer", "number", "decimal", "numeric", "long", "bigint", "float", "double"]),
|
|
27
|
+
"exclusiveMaximum": set(
|
|
28
|
+
["int", "integer", "number", "decimal", "numeric", "long", "bigint", "float", "double"]
|
|
29
|
+
),
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
def check_minimum_maximum(self, field: Field, field_name: str, model_name: str) -> LinterResult:
|
|
33
|
+
(min, max, xmin, xmax) = (field.minimum, field.maximum, field.exclusiveMinimum, field.exclusiveMaximum)
|
|
34
|
+
match (
|
|
35
|
+
"minimum" in field.model_fields_set,
|
|
36
|
+
"maximum" in field.model_fields_set,
|
|
37
|
+
"exclusiveMinimum" in field.model_fields_set,
|
|
38
|
+
"exclusiveMaximum" in field.model_fields_set,
|
|
39
|
+
):
|
|
40
|
+
case (True, True, _, _) if min > max:
|
|
41
|
+
return LinterResult.erroneous(
|
|
42
|
+
f"Minimum {min} is greater than maximum {max} on " f"field '{field_name}' in model '{model_name}'."
|
|
43
|
+
)
|
|
44
|
+
case (_, _, True, True) if xmin >= xmax:
|
|
45
|
+
return LinterResult.erroneous(
|
|
46
|
+
f"Exclusive minimum {xmin} is greater than exclusive"
|
|
47
|
+
f" maximum {xmax} on field '{field_name}' in model '{model_name}'."
|
|
48
|
+
)
|
|
49
|
+
case (True, True, True, True):
|
|
50
|
+
return LinterResult.erroneous(
|
|
51
|
+
f"Both exclusive and non-exclusive minimum and maximum are "
|
|
52
|
+
f"defined on field '{field_name}' in model '{model_name}'."
|
|
53
|
+
)
|
|
54
|
+
case (True, _, True, _):
|
|
55
|
+
return LinterResult.erroneous(
|
|
56
|
+
f"Both exclusive and non-exclusive minimum are "
|
|
57
|
+
f"defined on field '{field_name}' in model '{model_name}'."
|
|
58
|
+
)
|
|
59
|
+
case (_, True, _, True):
|
|
60
|
+
return LinterResult.erroneous(
|
|
61
|
+
f"Both exclusive and non-exclusive maximum are "
|
|
62
|
+
f"defined on field '{field_name}' in model '{model_name}'."
|
|
63
|
+
)
|
|
64
|
+
return LinterResult()
|
|
65
|
+
|
|
66
|
+
def check_string_constraints(self, field: Field, field_name: str, model_name: str) -> LinterResult:
|
|
67
|
+
result = LinterResult()
|
|
68
|
+
if field.minLength and field.maxLength and field.minLength > field.maxLength:
|
|
69
|
+
result = result.with_error(
|
|
70
|
+
f"Minimum length is greater that maximum length on" f" field '{field_name}' in model '{model_name}'."
|
|
71
|
+
)
|
|
72
|
+
if field.pattern and field.format:
|
|
73
|
+
result = result.with_error(
|
|
74
|
+
f"Both a pattern and a format are defined for field" f" '{field_name}' in model '{model_name}'."
|
|
75
|
+
)
|
|
76
|
+
return result
|
|
77
|
+
|
|
78
|
+
@property
|
|
79
|
+
def name(self):
|
|
80
|
+
return "Fields use valid constraints"
|
|
81
|
+
|
|
82
|
+
@property
|
|
83
|
+
def id(self):
|
|
84
|
+
return "field-constraints"
|
|
85
|
+
|
|
86
|
+
def lint_implementation(self, contract: DataContractSpecification) -> LinterResult:
|
|
87
|
+
result = LinterResult()
|
|
88
|
+
for model_name, model in contract.models.items():
|
|
89
|
+
for field_name, field in model.fields.items():
|
|
90
|
+
for _property, allowed_types in self.valid_types_for_constraint.items():
|
|
91
|
+
if _property in field.model_fields_set and field.type not in allowed_types:
|
|
92
|
+
result = result.with_error(
|
|
93
|
+
f"Forbidden constraint '{_property}' defined on field "
|
|
94
|
+
f"'{field_name}' in model '{model_name}'. Field type "
|
|
95
|
+
f"is '{field.type}'."
|
|
96
|
+
)
|
|
97
|
+
result = result.combine(self.check_minimum_maximum(field, field_name, model_name))
|
|
98
|
+
result = result.combine(self.check_string_constraints(field, field_name, model_name))
|
|
99
|
+
return result
|
datacontract/lint/resolve.py
CHANGED
|
@@ -8,7 +8,7 @@ from datacontract.lint.files import read_file
|
|
|
8
8
|
from datacontract.lint.schema import fetch_schema
|
|
9
9
|
from datacontract.lint.urls import fetch_resource
|
|
10
10
|
from datacontract.model.data_contract_specification import \
|
|
11
|
-
DataContractSpecification
|
|
11
|
+
DataContractSpecification, Definition
|
|
12
12
|
from datacontract.model.exceptions import DataContractException
|
|
13
13
|
|
|
14
14
|
|
|
@@ -17,11 +17,12 @@ def resolve_data_contract(
|
|
|
17
17
|
data_contract_str: str = None,
|
|
18
18
|
data_contract: DataContractSpecification = None,
|
|
19
19
|
schema_location: str = None,
|
|
20
|
+
inline_definitions: bool = False,
|
|
20
21
|
) -> DataContractSpecification:
|
|
21
22
|
if data_contract_location is not None:
|
|
22
|
-
return resolve_data_contract_from_location(data_contract_location, schema_location)
|
|
23
|
+
return resolve_data_contract_from_location(data_contract_location, schema_location, inline_definitions)
|
|
23
24
|
elif data_contract_str is not None:
|
|
24
|
-
return resolve_data_contract_from_str(data_contract_str, schema_location)
|
|
25
|
+
return resolve_data_contract_from_str(data_contract_str, schema_location, inline_definitions)
|
|
25
26
|
elif data_contract is not None:
|
|
26
27
|
return data_contract
|
|
27
28
|
else:
|
|
@@ -29,23 +30,67 @@ def resolve_data_contract(
|
|
|
29
30
|
type="lint",
|
|
30
31
|
result="failed",
|
|
31
32
|
name="Check that data contract YAML is valid",
|
|
32
|
-
reason=
|
|
33
|
+
reason="Data contract needs to be provided",
|
|
33
34
|
engine="datacontract",
|
|
34
35
|
)
|
|
35
36
|
|
|
36
37
|
|
|
37
|
-
def resolve_data_contract_from_location(
|
|
38
|
+
def resolve_data_contract_from_location(
|
|
39
|
+
location, schema_location: str = None, inline_definitions: bool = False
|
|
40
|
+
) -> DataContractSpecification:
|
|
38
41
|
if location.startswith("http://") or location.startswith("https://"):
|
|
39
42
|
data_contract_str = fetch_resource(location)
|
|
40
43
|
else:
|
|
41
44
|
data_contract_str = read_file(location)
|
|
42
|
-
return resolve_data_contract_from_str(data_contract_str, schema_location)
|
|
45
|
+
return resolve_data_contract_from_str(data_contract_str, schema_location, inline_definitions)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def inline_definitions_into_data_contract(spec: DataContractSpecification):
|
|
49
|
+
for model in spec.models.values():
|
|
50
|
+
for field in model.fields.values():
|
|
51
|
+
# If ref_obj is not empty, we've already inlined definitions.
|
|
52
|
+
if not field.ref and not field.ref_obj:
|
|
53
|
+
continue
|
|
54
|
+
|
|
55
|
+
definition = resolve_ref(field.ref, spec.definitions)
|
|
56
|
+
field.ref_obj = definition
|
|
57
|
+
|
|
58
|
+
for field_name in field.model_fields.keys():
|
|
59
|
+
if field_name in definition.model_fields_set and field_name not in field.model_fields_set:
|
|
60
|
+
setattr(field, field_name, getattr(definition, field_name))
|
|
43
61
|
|
|
44
62
|
|
|
45
|
-
def
|
|
63
|
+
def resolve_ref(ref, definitions) -> Definition:
|
|
64
|
+
if ref.startswith("http://") or ref.startswith("https://"):
|
|
65
|
+
definition_str = fetch_resource(ref)
|
|
66
|
+
definition_dict = to_yaml(definition_str)
|
|
67
|
+
return Definition(**definition_dict)
|
|
68
|
+
|
|
69
|
+
elif ref.startswith("#/definitions/"):
|
|
70
|
+
definition_name = ref.split("#/definitions/")[1]
|
|
71
|
+
return definitions[definition_name]
|
|
72
|
+
else:
|
|
73
|
+
raise DataContractException(
|
|
74
|
+
type="lint",
|
|
75
|
+
result="failed",
|
|
76
|
+
name="Check that data contract YAML is valid",
|
|
77
|
+
reason=f"Cannot resolve reference {ref}",
|
|
78
|
+
engine="datacontract",
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def resolve_data_contract_from_str(
|
|
83
|
+
data_contract_str, schema_location: str = None, inline_definitions: bool = False
|
|
84
|
+
) -> DataContractSpecification:
|
|
46
85
|
data_contract_yaml_dict = to_yaml(data_contract_str)
|
|
47
86
|
validate(data_contract_yaml_dict, schema_location)
|
|
48
|
-
|
|
87
|
+
|
|
88
|
+
spec = DataContractSpecification(**data_contract_yaml_dict)
|
|
89
|
+
|
|
90
|
+
if inline_definitions:
|
|
91
|
+
inline_definitions_into_data_contract(spec)
|
|
92
|
+
|
|
93
|
+
return spec
|
|
49
94
|
|
|
50
95
|
|
|
51
96
|
def to_yaml(data_contract_str):
|
datacontract/lint/schema.py
CHANGED
|
@@ -7,7 +7,6 @@ from datacontract.model.exceptions import DataContractException
|
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
def fetch_schema(location: str = None):
|
|
10
|
-
|
|
11
10
|
if location is None:
|
|
12
11
|
location = "https://datacontract.com/datacontract.schema.json"
|
|
13
12
|
|
|
@@ -21,8 +20,8 @@ def fetch_schema(location: str = None):
|
|
|
21
20
|
name=f"Reading schema from {location}",
|
|
22
21
|
reason=f"The file '{location}' does not exist.",
|
|
23
22
|
engine="datacontract",
|
|
24
|
-
result="error"
|
|
23
|
+
result="error",
|
|
25
24
|
)
|
|
26
|
-
with open(location,
|
|
25
|
+
with open(location, "r") as file:
|
|
27
26
|
file_content = file.read()
|
|
28
27
|
return json.loads(file_content)
|
datacontract/lint/urls.py
CHANGED
|
@@ -20,22 +20,21 @@ def fetch_resource(url: str):
|
|
|
20
20
|
name=f"Reading data contract from {url}",
|
|
21
21
|
reason=f"Cannot read resource from URL {url}. Response status is {response.status_code}",
|
|
22
22
|
engine="datacontract",
|
|
23
|
-
result="error"
|
|
23
|
+
result="error",
|
|
24
24
|
)
|
|
25
25
|
|
|
26
26
|
|
|
27
27
|
def _set_api_key(headers, url):
|
|
28
28
|
if ".datamesh-manager.com/" not in url:
|
|
29
29
|
return
|
|
30
|
-
datamesh_manager_api_key = os.getenv(
|
|
30
|
+
datamesh_manager_api_key = os.getenv("DATAMESH_MANAGER_API_KEY")
|
|
31
31
|
if datamesh_manager_api_key is None or datamesh_manager_api_key == "":
|
|
32
|
-
print(
|
|
32
|
+
print("Error: Data Mesh Manager API Key is not set. Set env variable DATAMESH_MANAGER_API_KEY.")
|
|
33
33
|
raise DataContractException(
|
|
34
34
|
type="lint",
|
|
35
35
|
name=f"Reading data contract from {url}",
|
|
36
36
|
reason="Error: Data Mesh Manager API Key is not set. Set env variable DATAMESH_MANAGER_API_KEY.",
|
|
37
37
|
engine="datacontract",
|
|
38
|
-
result="error"
|
|
38
|
+
result="error",
|
|
39
39
|
)
|
|
40
40
|
headers["x-api-key"] = datamesh_manager_api_key
|
|
41
|
-
|
|
@@ -1,8 +1,18 @@
|
|
|
1
|
+
from enum import Enum
|
|
1
2
|
from typing import List
|
|
2
3
|
|
|
3
4
|
from pydantic import BaseModel
|
|
4
5
|
|
|
5
6
|
|
|
7
|
+
class Severity(Enum):
|
|
8
|
+
ERROR = "error"
|
|
9
|
+
WARNING = "warning"
|
|
10
|
+
INFO = "info"
|
|
11
|
+
|
|
12
|
+
def __str__(self) -> str:
|
|
13
|
+
return self.value
|
|
14
|
+
|
|
15
|
+
|
|
6
16
|
class Location(BaseModel):
|
|
7
17
|
path: str
|
|
8
18
|
composition: List[str]
|
|
@@ -10,7 +20,7 @@ class Location(BaseModel):
|
|
|
10
20
|
|
|
11
21
|
class BreakingChange(BaseModel):
|
|
12
22
|
description: str
|
|
13
|
-
severity:
|
|
23
|
+
severity: Severity
|
|
14
24
|
check_name: str
|
|
15
25
|
location: Location
|
|
16
26
|
|
|
@@ -24,19 +34,31 @@ class BreakingChanges(BaseModel):
|
|
|
24
34
|
breaking_changes: List[BreakingChange]
|
|
25
35
|
|
|
26
36
|
def passed_checks(self) -> bool:
|
|
27
|
-
errors = len(list(filter(lambda x: x.severity ==
|
|
37
|
+
errors = len(list(filter(lambda x: x.severity == Severity.ERROR, self.breaking_changes)))
|
|
28
38
|
return errors == 0
|
|
29
39
|
|
|
30
|
-
def
|
|
40
|
+
def breaking_str(self) -> str:
|
|
31
41
|
changes_amount = len(self.breaking_changes)
|
|
32
|
-
errors = len(list(filter(lambda x: x.severity ==
|
|
33
|
-
warnings = len(list(filter(lambda x: x.severity ==
|
|
42
|
+
errors = len(list(filter(lambda x: x.severity == Severity.ERROR, self.breaking_changes)))
|
|
43
|
+
warnings = len(list(filter(lambda x: x.severity == Severity.WARNING, self.breaking_changes)))
|
|
34
44
|
|
|
35
45
|
headline = f"{changes_amount} breaking changes: {errors} error, {warnings} warning\n"
|
|
36
46
|
content = str.join("\n\n", map(lambda x: str(x), self.breaking_changes))
|
|
37
47
|
|
|
38
48
|
return headline + content
|
|
39
49
|
|
|
50
|
+
def changelog_str(self) -> str:
|
|
51
|
+
changes_amount = len(self.breaking_changes)
|
|
52
|
+
errors = len(list(filter(lambda x: x.severity == Severity.ERROR, self.breaking_changes)))
|
|
53
|
+
warnings = len(list(filter(lambda x: x.severity == Severity.WARNING, self.breaking_changes)))
|
|
54
|
+
infos = len(list(filter(lambda x: x.severity == Severity.INFO, self.breaking_changes)))
|
|
55
|
+
|
|
56
|
+
headline = f"{changes_amount} changes: {errors} error, {warnings} warning, {infos} info\n"
|
|
57
|
+
content = str.join("\n\n", map(lambda x: str(x), self.breaking_changes))
|
|
58
|
+
|
|
59
|
+
return headline + content
|
|
60
|
+
|
|
61
|
+
|
|
40
62
|
#
|
|
41
63
|
# [
|
|
42
64
|
# {
|