datacontract-cli 0.9.7__py3-none-any.whl → 0.9.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datacontract-cli might be problematic. Click here for more details.
- datacontract/breaking/breaking.py +48 -57
- datacontract/cli.py +100 -80
- datacontract/data_contract.py +178 -128
- datacontract/engines/datacontract/check_that_datacontract_contains_valid_servers_configuration.py +5 -1
- datacontract/engines/datacontract/check_that_datacontract_file_exists.py +9 -8
- datacontract/engines/datacontract/check_that_datacontract_str_is_valid.py +26 -22
- datacontract/engines/fastjsonschema/check_jsonschema.py +31 -25
- datacontract/engines/fastjsonschema/s3/s3_read_files.py +8 -6
- datacontract/engines/soda/check_soda_execute.py +58 -36
- datacontract/engines/soda/connections/bigquery.py +5 -3
- datacontract/engines/soda/connections/dask.py +0 -1
- datacontract/engines/soda/connections/databricks.py +2 -2
- datacontract/engines/soda/connections/duckdb.py +25 -8
- datacontract/engines/soda/connections/kafka.py +36 -17
- datacontract/engines/soda/connections/postgres.py +3 -3
- datacontract/engines/soda/connections/snowflake.py +4 -4
- datacontract/export/avro_converter.py +9 -11
- datacontract/export/avro_idl_converter.py +65 -42
- datacontract/export/csv_type_converter.py +36 -0
- datacontract/export/dbt_converter.py +43 -32
- datacontract/export/great_expectations_converter.py +141 -0
- datacontract/export/html_export.py +46 -0
- datacontract/export/jsonschema_converter.py +3 -1
- datacontract/export/odcs_converter.py +5 -7
- datacontract/export/protobuf_converter.py +12 -10
- datacontract/export/pydantic_converter.py +131 -0
- datacontract/export/rdf_converter.py +34 -11
- datacontract/export/sodacl_converter.py +118 -21
- datacontract/export/sql_converter.py +30 -8
- datacontract/export/sql_type_converter.py +44 -4
- datacontract/export/terraform_converter.py +4 -3
- datacontract/imports/avro_importer.py +65 -18
- datacontract/imports/sql_importer.py +0 -2
- datacontract/init/download_datacontract_file.py +2 -2
- datacontract/integration/publish_datamesh_manager.py +6 -12
- datacontract/integration/publish_opentelemetry.py +30 -16
- datacontract/lint/files.py +2 -2
- datacontract/lint/lint.py +26 -31
- datacontract/lint/linters/description_linter.py +12 -21
- datacontract/lint/linters/example_model_linter.py +28 -29
- datacontract/lint/linters/field_pattern_linter.py +8 -8
- datacontract/lint/linters/field_reference_linter.py +11 -10
- datacontract/lint/linters/notice_period_linter.py +18 -22
- datacontract/lint/linters/quality_schema_linter.py +16 -20
- datacontract/lint/linters/valid_constraints_linter.py +42 -37
- datacontract/lint/resolve.py +50 -14
- datacontract/lint/schema.py +2 -3
- datacontract/lint/urls.py +4 -5
- datacontract/model/breaking_change.py +2 -1
- datacontract/model/data_contract_specification.py +8 -7
- datacontract/model/exceptions.py +13 -2
- datacontract/model/run.py +3 -2
- datacontract/web.py +3 -7
- datacontract_cli-0.9.9.dist-info/METADATA +951 -0
- datacontract_cli-0.9.9.dist-info/RECORD +64 -0
- datacontract/lint/linters/primary_field_linter.py +0 -30
- datacontract_cli-0.9.7.dist-info/METADATA +0 -603
- datacontract_cli-0.9.7.dist-info/RECORD +0 -61
- {datacontract_cli-0.9.7.dist-info → datacontract_cli-0.9.9.dist-info}/LICENSE +0 -0
- {datacontract_cli-0.9.7.dist-info → datacontract_cli-0.9.9.dist-info}/WHEEL +0 -0
- {datacontract_cli-0.9.7.dist-info → datacontract_cli-0.9.9.dist-info}/entry_points.txt +0 -0
- {datacontract_cli-0.9.7.dist-info → datacontract_cli-0.9.9.dist-info}/top_level.txt +0 -0
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
import re
|
|
2
2
|
|
|
3
|
+
from datacontract.model.data_contract_specification import \
|
|
4
|
+
DataContractSpecification
|
|
3
5
|
from ..lint import Linter, LinterResult
|
|
4
|
-
|
|
6
|
+
|
|
5
7
|
|
|
6
8
|
class NoticePeriodLinter(Linter):
|
|
7
9
|
@property
|
|
@@ -25,17 +27,13 @@ class NoticePeriodLinter(Linter):
|
|
|
25
27
|
(:?[0-9\.,]+M)? # Number of minutes
|
|
26
28
|
(:?[0-9\.,]+S)? # Number of seconds
|
|
27
29
|
)?
|
|
28
|
-
""",
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
datetime_extended = re.compile(
|
|
33
|
-
r"P\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}")
|
|
30
|
+
""",
|
|
31
|
+
re.VERBOSE,
|
|
32
|
+
)
|
|
33
|
+
datetime_basic = re.compile(r"P\d{8}T\d{6}")
|
|
34
|
+
datetime_extended = re.compile(r"P\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}")
|
|
34
35
|
|
|
35
|
-
def lint_implementation(
|
|
36
|
-
self,
|
|
37
|
-
contract: DataContractSpecification
|
|
38
|
-
) -> LinterResult:
|
|
36
|
+
def lint_implementation(self, contract: DataContractSpecification) -> LinterResult:
|
|
39
37
|
"""Check whether the notice period is specified using ISO8601 duration syntax."""
|
|
40
38
|
if not contract.terms:
|
|
41
39
|
return LinterResult.cautious("No terms defined.")
|
|
@@ -43,17 +41,15 @@ class NoticePeriodLinter(Linter):
|
|
|
43
41
|
if not period:
|
|
44
42
|
return LinterResult.cautious("No notice period defined.")
|
|
45
43
|
if not period.startswith("P"):
|
|
46
|
-
return LinterResult.erroneous(
|
|
47
|
-
f"Notice period '{period}' is not a valid"
|
|
48
|
-
"ISO8601 duration.")
|
|
44
|
+
return LinterResult.erroneous(f"Notice period '{period}' is not a valid" "ISO8601 duration.")
|
|
49
45
|
if period == "P":
|
|
50
46
|
return LinterResult.erroneous(
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
not self.datetime_basic.fullmatch(period)
|
|
56
|
-
not self.datetime_extended.fullmatch(period)
|
|
57
|
-
|
|
58
|
-
|
|
47
|
+
"Notice period 'P' is not a valid" "ISO8601 duration, requires at least one" "duration to be specified."
|
|
48
|
+
)
|
|
49
|
+
if (
|
|
50
|
+
not self.simple.fullmatch(period)
|
|
51
|
+
and not self.datetime_basic.fullmatch(period)
|
|
52
|
+
and not self.datetime_extended.fullmatch(period)
|
|
53
|
+
):
|
|
54
|
+
return LinterResult.erroneous(f"Notice period '{period}' is not a valid ISO8601 duration.")
|
|
59
55
|
return LinterResult()
|
|
@@ -1,12 +1,11 @@
|
|
|
1
1
|
import yaml
|
|
2
2
|
|
|
3
|
-
from
|
|
4
|
-
from datacontract.model.data_contract_specification import\
|
|
3
|
+
from datacontract.model.data_contract_specification import \
|
|
5
4
|
DataContractSpecification, Model
|
|
5
|
+
from ..lint import Linter, LinterResult
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
class QualityUsesSchemaLinter(Linter):
|
|
9
|
-
|
|
10
9
|
@property
|
|
11
10
|
def name(self) -> str:
|
|
12
11
|
return "Quality check(s) use model"
|
|
@@ -15,23 +14,21 @@ class QualityUsesSchemaLinter(Linter):
|
|
|
15
14
|
def id(self) -> str:
|
|
16
15
|
return "quality-schema"
|
|
17
16
|
|
|
18
|
-
def lint_sodacl(self, check, models: dict[str, Model])
|
|
19
|
-
LinterResult:
|
|
17
|
+
def lint_sodacl(self, check, models: dict[str, Model]) -> LinterResult:
|
|
20
18
|
result = LinterResult()
|
|
21
19
|
for sodacl_check in check.keys():
|
|
22
|
-
table_name = sodacl_check[len("checks for "):]
|
|
20
|
+
table_name = sodacl_check[len("checks for ") :]
|
|
23
21
|
if table_name not in models:
|
|
24
|
-
result = result.with_error(
|
|
25
|
-
f"Quality check on unknown model '{table_name}'")
|
|
22
|
+
result = result.with_error(f"Quality check on unknown model '{table_name}'")
|
|
26
23
|
return result
|
|
27
24
|
|
|
28
|
-
def lint_montecarlo(self, check, models: dict[str, Model])
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
25
|
+
def lint_montecarlo(self, check, models: dict[str, Model]) -> LinterResult:
|
|
26
|
+
return LinterResult().with_warning("Linting montecarlo checks is not currently implemented")
|
|
27
|
+
|
|
28
|
+
def lint_great_expectations(self, check, models: dict[str, Model]) -> LinterResult:
|
|
29
|
+
return LinterResult().with_warning("Linting great expectations checks is not currently implemented")
|
|
32
30
|
|
|
33
|
-
def lint_implementation(self, contract: DataContractSpecification)
|
|
34
|
-
LinterResult:
|
|
31
|
+
def lint_implementation(self, contract: DataContractSpecification) -> LinterResult:
|
|
35
32
|
result = LinterResult()
|
|
36
33
|
models = contract.models
|
|
37
34
|
check = contract.quality
|
|
@@ -45,12 +42,11 @@ class QualityUsesSchemaLinter(Linter):
|
|
|
45
42
|
check_specification = check.specification
|
|
46
43
|
match check.type:
|
|
47
44
|
case "SodaCL":
|
|
48
|
-
result = result.combine(
|
|
49
|
-
self.lint_sodacl(check_specification, models))
|
|
45
|
+
result = result.combine(self.lint_sodacl(check_specification, models))
|
|
50
46
|
case "montecarlo":
|
|
51
|
-
result = result.combine(
|
|
52
|
-
|
|
47
|
+
result = result.combine(self.lint_montecarlo(check_specification, models))
|
|
48
|
+
case "great-expectations":
|
|
49
|
+
result = result.combine(self.lint_great_expectations(check_specification, models))
|
|
53
50
|
case _:
|
|
54
|
-
result = result.with_warning("Can't lint quality check "
|
|
55
|
-
f"with type '{check.type}'")
|
|
51
|
+
result = result.with_warning("Can't lint quality check " f"with type '{check.type}'")
|
|
56
52
|
return result
|
|
@@ -1,17 +1,16 @@
|
|
|
1
|
+
from datacontract.model.data_contract_specification import DataContractSpecification, Field
|
|
1
2
|
from ..lint import Linter, LinterResult
|
|
2
|
-
from datacontract.model.data_contract_specification import\
|
|
3
|
-
DataContractSpecification, Field
|
|
4
3
|
|
|
5
4
|
|
|
6
5
|
class ValidFieldConstraintsLinter(Linter):
|
|
7
6
|
"""Check validity of field constraints.
|
|
8
7
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
8
|
+
More precisely, check that only numeric constraints are specified on
|
|
9
|
+
fields of numeric type and string constraints on fields of string type.
|
|
10
|
+
Additionally, the linter checks that defined constraints make sense.
|
|
11
|
+
Minimum values should not be greater than maximum values, exclusive and
|
|
12
|
+
non-exclusive minimum and maximum should not be combined and string
|
|
13
|
+
pattern and format should not be combined.
|
|
15
14
|
|
|
16
15
|
"""
|
|
17
16
|
|
|
@@ -20,52 +19,60 @@ class ValidFieldConstraintsLinter(Linter):
|
|
|
20
19
|
"format": set(["string", "text", "varchar"]),
|
|
21
20
|
"minLength": set(["string", "text", "varchar"]),
|
|
22
21
|
"maxLength": set(["string", "text", "varchar"]),
|
|
23
|
-
"minimum": set(["int", "integer", "number", "decimal", "numeric",
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
"maximum": set(["int", "integer", "number", "decimal", "numeric",
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
22
|
+
"minimum": set(["int", "integer", "number", "decimal", "numeric", "long", "bigint", "float", "double"]),
|
|
23
|
+
"exclusiveMinimum": set(
|
|
24
|
+
["int", "integer", "number", "decimal", "numeric", "long", "bigint", "float", "double"]
|
|
25
|
+
),
|
|
26
|
+
"maximum": set(["int", "integer", "number", "decimal", "numeric", "long", "bigint", "float", "double"]),
|
|
27
|
+
"exclusiveMaximum": set(
|
|
28
|
+
["int", "integer", "number", "decimal", "numeric", "long", "bigint", "float", "double"]
|
|
29
|
+
),
|
|
31
30
|
}
|
|
32
31
|
|
|
33
32
|
def check_minimum_maximum(self, field: Field, field_name: str, model_name: str) -> LinterResult:
|
|
34
33
|
(min, max, xmin, xmax) = (field.minimum, field.maximum, field.exclusiveMinimum, field.exclusiveMaximum)
|
|
35
|
-
match (
|
|
36
|
-
|
|
34
|
+
match (
|
|
35
|
+
"minimum" in field.model_fields_set,
|
|
36
|
+
"maximum" in field.model_fields_set,
|
|
37
|
+
"exclusiveMinimum" in field.model_fields_set,
|
|
38
|
+
"exclusiveMaximum" in field.model_fields_set,
|
|
39
|
+
):
|
|
37
40
|
case (True, True, _, _) if min > max:
|
|
38
41
|
return LinterResult.erroneous(
|
|
39
|
-
f"Minimum {min} is greater than maximum {max} on "
|
|
40
|
-
|
|
42
|
+
f"Minimum {min} is greater than maximum {max} on " f"field '{field_name}' in model '{model_name}'."
|
|
43
|
+
)
|
|
41
44
|
case (_, _, True, True) if xmin >= xmax:
|
|
42
|
-
|
|
45
|
+
return LinterResult.erroneous(
|
|
43
46
|
f"Exclusive minimum {xmin} is greater than exclusive"
|
|
44
|
-
f" maximum {xmax} on field '{field_name}' in model '{model_name}'."
|
|
47
|
+
f" maximum {xmax} on field '{field_name}' in model '{model_name}'."
|
|
48
|
+
)
|
|
45
49
|
case (True, True, True, True):
|
|
46
50
|
return LinterResult.erroneous(
|
|
47
51
|
f"Both exclusive and non-exclusive minimum and maximum are "
|
|
48
|
-
f"defined on field '{field_name}' in model '{model_name}'."
|
|
52
|
+
f"defined on field '{field_name}' in model '{model_name}'."
|
|
53
|
+
)
|
|
49
54
|
case (True, _, True, _):
|
|
50
55
|
return LinterResult.erroneous(
|
|
51
56
|
f"Both exclusive and non-exclusive minimum are "
|
|
52
|
-
f"defined on field '{field_name}' in model '{model_name}'."
|
|
57
|
+
f"defined on field '{field_name}' in model '{model_name}'."
|
|
58
|
+
)
|
|
53
59
|
case (_, True, _, True):
|
|
54
60
|
return LinterResult.erroneous(
|
|
55
61
|
f"Both exclusive and non-exclusive maximum are "
|
|
56
|
-
f"defined on field '{field_name}' in model '{model_name}'."
|
|
62
|
+
f"defined on field '{field_name}' in model '{model_name}'."
|
|
63
|
+
)
|
|
57
64
|
return LinterResult()
|
|
58
65
|
|
|
59
66
|
def check_string_constraints(self, field: Field, field_name: str, model_name: str) -> LinterResult:
|
|
60
67
|
result = LinterResult()
|
|
61
68
|
if field.minLength and field.maxLength and field.minLength > field.maxLength:
|
|
62
69
|
result = result.with_error(
|
|
63
|
-
f"Minimum length is greater that maximum length on"
|
|
64
|
-
|
|
70
|
+
f"Minimum length is greater that maximum length on" f" field '{field_name}' in model '{model_name}'."
|
|
71
|
+
)
|
|
65
72
|
if field.pattern and field.format:
|
|
66
73
|
result = result.with_error(
|
|
67
|
-
f"Both a pattern and a format are defined for field"
|
|
68
|
-
|
|
74
|
+
f"Both a pattern and a format are defined for field" f" '{field_name}' in model '{model_name}'."
|
|
75
|
+
)
|
|
69
76
|
return result
|
|
70
77
|
|
|
71
78
|
@property
|
|
@@ -76,19 +83,17 @@ class ValidFieldConstraintsLinter(Linter):
|
|
|
76
83
|
def id(self):
|
|
77
84
|
return "field-constraints"
|
|
78
85
|
|
|
79
|
-
def lint_implementation(
|
|
80
|
-
self,
|
|
81
|
-
contract: DataContractSpecification
|
|
82
|
-
) -> LinterResult:
|
|
86
|
+
def lint_implementation(self, contract: DataContractSpecification) -> LinterResult:
|
|
83
87
|
result = LinterResult()
|
|
84
|
-
for
|
|
85
|
-
for
|
|
86
|
-
for
|
|
88
|
+
for model_name, model in contract.models.items():
|
|
89
|
+
for field_name, field in model.fields.items():
|
|
90
|
+
for _property, allowed_types in self.valid_types_for_constraint.items():
|
|
87
91
|
if _property in field.model_fields_set and field.type not in allowed_types:
|
|
88
92
|
result = result.with_error(
|
|
89
93
|
f"Forbidden constraint '{_property}' defined on field "
|
|
90
94
|
f"'{field_name}' in model '{model_name}'. Field type "
|
|
91
|
-
f"is '{field.type}'."
|
|
95
|
+
f"is '{field.type}'."
|
|
96
|
+
)
|
|
92
97
|
result = result.combine(self.check_minimum_maximum(field, field_name, model_name))
|
|
93
98
|
result = result.combine(self.check_string_constraints(field, field_name, model_name))
|
|
94
99
|
return result
|
datacontract/lint/resolve.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import logging
|
|
2
|
+
import os
|
|
2
3
|
|
|
3
4
|
import fastjsonschema
|
|
4
5
|
import yaml
|
|
@@ -8,7 +9,7 @@ from datacontract.lint.files import read_file
|
|
|
8
9
|
from datacontract.lint.schema import fetch_schema
|
|
9
10
|
from datacontract.lint.urls import fetch_resource
|
|
10
11
|
from datacontract.model.data_contract_specification import \
|
|
11
|
-
DataContractSpecification, Definition
|
|
12
|
+
DataContractSpecification, Definition, Quality
|
|
12
13
|
from datacontract.model.exceptions import DataContractException
|
|
13
14
|
|
|
14
15
|
|
|
@@ -17,7 +18,7 @@ def resolve_data_contract(
|
|
|
17
18
|
data_contract_str: str = None,
|
|
18
19
|
data_contract: DataContractSpecification = None,
|
|
19
20
|
schema_location: str = None,
|
|
20
|
-
inline_definitions: bool = False
|
|
21
|
+
inline_definitions: bool = False,
|
|
21
22
|
) -> DataContractSpecification:
|
|
22
23
|
if data_contract_location is not None:
|
|
23
24
|
return resolve_data_contract_from_location(data_contract_location, schema_location, inline_definitions)
|
|
@@ -30,20 +31,19 @@ def resolve_data_contract(
|
|
|
30
31
|
type="lint",
|
|
31
32
|
result="failed",
|
|
32
33
|
name="Check that data contract YAML is valid",
|
|
33
|
-
reason=
|
|
34
|
+
reason="Data contract needs to be provided",
|
|
34
35
|
engine="datacontract",
|
|
35
36
|
)
|
|
36
37
|
|
|
37
38
|
|
|
38
39
|
def resolve_data_contract_from_location(
|
|
39
|
-
location, schema_location: str = None,
|
|
40
|
-
inline_definitions: bool = False
|
|
40
|
+
location, schema_location: str = None, inline_definitions: bool = False, include_quality: bool = True
|
|
41
41
|
) -> DataContractSpecification:
|
|
42
42
|
if location.startswith("http://") or location.startswith("https://"):
|
|
43
43
|
data_contract_str = fetch_resource(location)
|
|
44
44
|
else:
|
|
45
45
|
data_contract_str = read_file(location)
|
|
46
|
-
return resolve_data_contract_from_str(data_contract_str, schema_location, inline_definitions)
|
|
46
|
+
return resolve_data_contract_from_str(data_contract_str, schema_location, inline_definitions, include_quality)
|
|
47
47
|
|
|
48
48
|
|
|
49
49
|
def inline_definitions_into_data_contract(spec: DataContractSpecification):
|
|
@@ -53,16 +53,15 @@ def inline_definitions_into_data_contract(spec: DataContractSpecification):
|
|
|
53
53
|
if not field.ref and not field.ref_obj:
|
|
54
54
|
continue
|
|
55
55
|
|
|
56
|
-
definition =
|
|
56
|
+
definition = resolve_definition_ref(field.ref, spec.definitions)
|
|
57
57
|
field.ref_obj = definition
|
|
58
58
|
|
|
59
59
|
for field_name in field.model_fields.keys():
|
|
60
|
-
if
|
|
61
|
-
field_name
|
|
62
|
-
setattr(field, field_name,
|
|
63
|
-
getattr(definition, field_name))
|
|
60
|
+
if field_name in definition.model_fields_set and field_name not in field.model_fields_set:
|
|
61
|
+
setattr(field, field_name, getattr(definition, field_name))
|
|
64
62
|
|
|
65
|
-
|
|
63
|
+
|
|
64
|
+
def resolve_definition_ref(ref, definitions) -> Definition:
|
|
66
65
|
if ref.startswith("http://") or ref.startswith("https://"):
|
|
67
66
|
definition_str = fetch_resource(ref)
|
|
68
67
|
definition_dict = to_yaml(definition_str)
|
|
@@ -81,9 +80,44 @@ def resolve_ref(ref, definitions) -> Definition:
|
|
|
81
80
|
)
|
|
82
81
|
|
|
83
82
|
|
|
83
|
+
def resolve_quality_ref(quality: Quality):
|
|
84
|
+
"""
|
|
85
|
+
Return the content of a ref file path
|
|
86
|
+
@param quality data contract quality specification
|
|
87
|
+
"""
|
|
88
|
+
if isinstance(quality.specification, dict):
|
|
89
|
+
specification = quality.specification
|
|
90
|
+
if quality.type == "great-expectations":
|
|
91
|
+
for model, model_quality in specification.items():
|
|
92
|
+
specification[model] = get_quality_ref_file(model_quality)
|
|
93
|
+
else:
|
|
94
|
+
if "$ref" in specification:
|
|
95
|
+
quality.specification = get_quality_ref_file(specification)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def get_quality_ref_file(quality_spec: str | object) -> str | object:
|
|
99
|
+
"""
|
|
100
|
+
Get the file associated with a quality reference
|
|
101
|
+
@param quality_spec quality specification
|
|
102
|
+
@returns: the content of the quality file
|
|
103
|
+
"""
|
|
104
|
+
if isinstance(quality_spec, dict) and "$ref" in quality_spec:
|
|
105
|
+
ref = quality_spec["$ref"]
|
|
106
|
+
if not os.path.exists(ref):
|
|
107
|
+
raise DataContractException(
|
|
108
|
+
type="export",
|
|
109
|
+
result="failed",
|
|
110
|
+
name="Check that data contract quality is valid",
|
|
111
|
+
reason=f"Cannot resolve reference {ref}",
|
|
112
|
+
engine="datacontract",
|
|
113
|
+
)
|
|
114
|
+
with open(ref, "r") as file:
|
|
115
|
+
quality_spec = file.read()
|
|
116
|
+
return quality_spec
|
|
117
|
+
|
|
118
|
+
|
|
84
119
|
def resolve_data_contract_from_str(
|
|
85
|
-
data_contract_str, schema_location: str = None,
|
|
86
|
-
inline_definitions: bool = False
|
|
120
|
+
data_contract_str, schema_location: str = None, inline_definitions: bool = False, include_quality: bool = False
|
|
87
121
|
) -> DataContractSpecification:
|
|
88
122
|
data_contract_yaml_dict = to_yaml(data_contract_str)
|
|
89
123
|
validate(data_contract_yaml_dict, schema_location)
|
|
@@ -92,6 +126,8 @@ def resolve_data_contract_from_str(
|
|
|
92
126
|
|
|
93
127
|
if inline_definitions:
|
|
94
128
|
inline_definitions_into_data_contract(spec)
|
|
129
|
+
if spec.quality and include_quality:
|
|
130
|
+
resolve_quality_ref(spec.quality)
|
|
95
131
|
|
|
96
132
|
return spec
|
|
97
133
|
|
datacontract/lint/schema.py
CHANGED
|
@@ -7,7 +7,6 @@ from datacontract.model.exceptions import DataContractException
|
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
def fetch_schema(location: str = None):
|
|
10
|
-
|
|
11
10
|
if location is None:
|
|
12
11
|
location = "https://datacontract.com/datacontract.schema.json"
|
|
13
12
|
|
|
@@ -21,8 +20,8 @@ def fetch_schema(location: str = None):
|
|
|
21
20
|
name=f"Reading schema from {location}",
|
|
22
21
|
reason=f"The file '{location}' does not exist.",
|
|
23
22
|
engine="datacontract",
|
|
24
|
-
result="error"
|
|
23
|
+
result="error",
|
|
25
24
|
)
|
|
26
|
-
with open(location,
|
|
25
|
+
with open(location, "r") as file:
|
|
27
26
|
file_content = file.read()
|
|
28
27
|
return json.loads(file_content)
|
datacontract/lint/urls.py
CHANGED
|
@@ -20,22 +20,21 @@ def fetch_resource(url: str):
|
|
|
20
20
|
name=f"Reading data contract from {url}",
|
|
21
21
|
reason=f"Cannot read resource from URL {url}. Response status is {response.status_code}",
|
|
22
22
|
engine="datacontract",
|
|
23
|
-
result="error"
|
|
23
|
+
result="error",
|
|
24
24
|
)
|
|
25
25
|
|
|
26
26
|
|
|
27
27
|
def _set_api_key(headers, url):
|
|
28
28
|
if ".datamesh-manager.com/" not in url:
|
|
29
29
|
return
|
|
30
|
-
datamesh_manager_api_key = os.getenv(
|
|
30
|
+
datamesh_manager_api_key = os.getenv("DATAMESH_MANAGER_API_KEY")
|
|
31
31
|
if datamesh_manager_api_key is None or datamesh_manager_api_key == "":
|
|
32
|
-
print(
|
|
32
|
+
print("Error: Data Mesh Manager API Key is not set. Set env variable DATAMESH_MANAGER_API_KEY.")
|
|
33
33
|
raise DataContractException(
|
|
34
34
|
type="lint",
|
|
35
35
|
name=f"Reading data contract from {url}",
|
|
36
36
|
reason="Error: Data Mesh Manager API Key is not set. Set env variable DATAMESH_MANAGER_API_KEY.",
|
|
37
37
|
engine="datacontract",
|
|
38
|
-
result="error"
|
|
38
|
+
result="error",
|
|
39
39
|
)
|
|
40
40
|
headers["x-api-key"] = datamesh_manager_api_key
|
|
41
|
-
|
|
@@ -22,13 +22,13 @@ class Server(pyd.BaseModel):
|
|
|
22
22
|
location: str = None
|
|
23
23
|
account: str = None
|
|
24
24
|
database: str = None
|
|
25
|
-
schema_: str = pyd.Field(default=None, alias=
|
|
25
|
+
schema_: str = pyd.Field(default=None, alias="schema")
|
|
26
26
|
host: str = None
|
|
27
27
|
port: int = None
|
|
28
28
|
catalog: str = None
|
|
29
29
|
topic: str = None
|
|
30
|
-
http_path: str = None
|
|
31
|
-
token: str = None
|
|
30
|
+
http_path: str = None # Use ENV variable
|
|
31
|
+
token: str = None # Use ENV variable
|
|
32
32
|
dataProductId: str = None
|
|
33
33
|
outputPortId: str = None
|
|
34
34
|
|
|
@@ -81,13 +81,14 @@ class Field(pyd.BaseModel):
|
|
|
81
81
|
exclusiveMaximum: int = None
|
|
82
82
|
enum: List[str] = []
|
|
83
83
|
tags: List[str] = []
|
|
84
|
-
fields: Dict[str,
|
|
85
|
-
items:
|
|
84
|
+
fields: Dict[str, "Field"] = {}
|
|
85
|
+
items: "Field" = None
|
|
86
86
|
|
|
87
87
|
|
|
88
88
|
class Model(pyd.BaseModel):
|
|
89
89
|
description: str = None
|
|
90
90
|
type: str = None
|
|
91
|
+
namespace: str = None
|
|
91
92
|
fields: Dict[str, Field] = {}
|
|
92
93
|
|
|
93
94
|
|
|
@@ -126,8 +127,8 @@ class DataContractSpecification(pyd.BaseModel):
|
|
|
126
127
|
@classmethod
|
|
127
128
|
def from_file(cls, file):
|
|
128
129
|
if not os.path.exists(file):
|
|
129
|
-
raise(f"The file '{file}' does not exist.")
|
|
130
|
-
with open(file,
|
|
130
|
+
raise (f"The file '{file}' does not exist.")
|
|
131
|
+
with open(file, "r") as file:
|
|
131
132
|
file_content = file.read()
|
|
132
133
|
return DataContractSpecification.from_string(file_content)
|
|
133
134
|
|
datacontract/model/exceptions.py
CHANGED
|
@@ -11,7 +11,17 @@ class DataContractException(Exception):
|
|
|
11
11
|
message (str): General message for the error.
|
|
12
12
|
"""
|
|
13
13
|
|
|
14
|
-
def __init__(
|
|
14
|
+
def __init__(
|
|
15
|
+
self,
|
|
16
|
+
type,
|
|
17
|
+
name,
|
|
18
|
+
reason,
|
|
19
|
+
engine="datacontract",
|
|
20
|
+
model=None,
|
|
21
|
+
original_exception=None,
|
|
22
|
+
result: str = "failed",
|
|
23
|
+
message="Run operation failed",
|
|
24
|
+
):
|
|
15
25
|
self.type = type
|
|
16
26
|
self.name = name
|
|
17
27
|
self.model = model
|
|
@@ -21,4 +31,5 @@ class DataContractException(Exception):
|
|
|
21
31
|
self.original_exception = original_exception
|
|
22
32
|
self.message = message
|
|
23
33
|
super().__init__(
|
|
24
|
-
f"{self.message}: [{self.type}] {self.name} - {self.model} - {self.result} - {self.reason} - {self.engine}"
|
|
34
|
+
f"{self.message}: [{self.type}] {self.name} - {self.model} - {self.result} - {self.reason} - {self.engine}"
|
|
35
|
+
)
|
datacontract/model/run.py
CHANGED
|
@@ -15,6 +15,7 @@ class Check(BaseModel):
|
|
|
15
15
|
model: Optional[str] = None
|
|
16
16
|
field: Optional[str] = None
|
|
17
17
|
details: Optional[str] = None
|
|
18
|
+
diagnostics: Optional[dict] = None
|
|
18
19
|
|
|
19
20
|
|
|
20
21
|
class Log(BaseModel):
|
|
@@ -32,7 +33,7 @@ class Run(BaseModel):
|
|
|
32
33
|
server: Optional[str] = None
|
|
33
34
|
timestampStart: datetime
|
|
34
35
|
timestampEnd: datetime
|
|
35
|
-
result: str = "unknown"
|
|
36
|
+
result: str = "unknown" # passed, warning, failed, error, unknown
|
|
36
37
|
checks: List[Check]
|
|
37
38
|
logs: List[Log]
|
|
38
39
|
|
|
@@ -69,7 +70,7 @@ class Run(BaseModel):
|
|
|
69
70
|
self.logs.append(Log(level="ERROR", message=message, timestamp=datetime.now(timezone.utc)))
|
|
70
71
|
|
|
71
72
|
def pretty(self):
|
|
72
|
-
return self.model_dump_json()
|
|
73
|
+
return self.model_dump_json(indent=2)
|
|
73
74
|
|
|
74
75
|
@staticmethod
|
|
75
76
|
def create_run():
|
datacontract/web.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from typing import Annotated, Union
|
|
2
2
|
|
|
3
|
-
from fastapi import FastAPI, File
|
|
3
|
+
from fastapi import FastAPI, File
|
|
4
4
|
|
|
5
5
|
from datacontract.data_contract import DataContract
|
|
6
6
|
|
|
@@ -8,11 +8,7 @@ app = FastAPI()
|
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
@app.post("/lint")
|
|
11
|
-
def lint(file: Annotated[bytes, File()],
|
|
12
|
-
linters: Union[str, set[str]]="all"):
|
|
11
|
+
def lint(file: Annotated[bytes, File()], linters: Union[str, set[str]] = "all"):
|
|
13
12
|
data_contract = DataContract(data_contract_str=str(file, encoding="utf-8"))
|
|
14
13
|
lint_result = data_contract.lint(enabled_linters=linters)
|
|
15
|
-
return {
|
|
16
|
-
"result": lint_result.result,
|
|
17
|
-
"checks": lint_result.checks
|
|
18
|
-
}
|
|
14
|
+
return {"result": lint_result.result, "checks": lint_result.checks}
|