datacontract-cli 0.10.34__py3-none-any.whl → 0.10.36__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datacontract-cli might be problematic. Click here for more details.

Files changed (39) hide show
  1. datacontract/api.py +10 -3
  2. datacontract/cli.py +5 -3
  3. datacontract/data_contract.py +18 -51
  4. datacontract/engines/data_contract_checks.py +280 -19
  5. datacontract/engines/fastjsonschema/check_jsonschema.py +29 -19
  6. datacontract/export/dbt_converter.py +30 -4
  7. datacontract/export/dqx_converter.py +126 -0
  8. datacontract/export/excel_exporter.py +3 -3
  9. datacontract/export/exporter.py +1 -0
  10. datacontract/export/exporter_factory.py +6 -0
  11. datacontract/export/markdown_converter.py +35 -16
  12. datacontract/export/mermaid_exporter.py +24 -11
  13. datacontract/export/rdf_converter.py +2 -2
  14. datacontract/export/spark_converter.py +28 -3
  15. datacontract/export/sql_type_converter.py +6 -4
  16. datacontract/imports/odcs_v3_importer.py +100 -19
  17. datacontract/imports/unity_importer.py +16 -11
  18. datacontract/init/init_template.py +1 -1
  19. datacontract/lint/resolve.py +1 -1
  20. datacontract/lint/schema.py +1 -1
  21. datacontract/schemas/datacontract-1.1.0.init.yaml +1 -1
  22. datacontract/schemas/datacontract-1.2.0.init.yaml +1 -1
  23. datacontract/schemas/datacontract-1.2.1.init.yaml +91 -0
  24. datacontract/schemas/datacontract-1.2.1.schema.json +2058 -0
  25. datacontract/schemas/odcs-3.0.2.schema.json +2382 -0
  26. datacontract/templates/datacontract_odcs.html +60 -41
  27. {datacontract_cli-0.10.34.dist-info → datacontract_cli-0.10.36.dist-info}/METADATA +68 -56
  28. {datacontract_cli-0.10.34.dist-info → datacontract_cli-0.10.36.dist-info}/RECORD +32 -35
  29. datacontract/lint/lint.py +0 -142
  30. datacontract/lint/linters/__init__.py +0 -0
  31. datacontract/lint/linters/description_linter.py +0 -33
  32. datacontract/lint/linters/field_pattern_linter.py +0 -34
  33. datacontract/lint/linters/field_reference_linter.py +0 -47
  34. datacontract/lint/linters/notice_period_linter.py +0 -55
  35. datacontract/lint/linters/valid_constraints_linter.py +0 -100
  36. {datacontract_cli-0.10.34.dist-info → datacontract_cli-0.10.36.dist-info}/WHEEL +0 -0
  37. {datacontract_cli-0.10.34.dist-info → datacontract_cli-0.10.36.dist-info}/entry_points.txt +0 -0
  38. {datacontract_cli-0.10.34.dist-info → datacontract_cli-0.10.36.dist-info}/licenses/LICENSE +0 -0
  39. {datacontract_cli-0.10.34.dist-info → datacontract_cli-0.10.36.dist-info}/top_level.txt +0 -0
datacontract/lint/lint.py DELETED
@@ -1,142 +0,0 @@
1
- import abc
2
- from dataclasses import dataclass, field
3
- from enum import Enum
4
- from typing import Any, Sequence, cast
5
-
6
- from datacontract.model.run import Check
7
-
8
- from ..model.data_contract_specification import DataContractSpecification
9
-
10
- """This module contains linter definitions for linting a data contract.
11
-
12
- Lints are quality checks that can succeed, fail, or warn. They are
13
- distinct from checks such as "valid yaml" or "file not found", which
14
- will cause the processing of the data contract to stop. Lints can be
15
- ignored, and are high-level requirements on the format of a data
16
- contract."""
17
-
18
-
19
- class LintSeverity(Enum):
20
- """The severity of a lint message. Generally, lint messages should be
21
- emitted with a severity of ERROR. WARNING should be used when the linter
22
- cannot determine a lint result, for example, when an unsupported model
23
- type is used.
24
- """
25
-
26
- ERROR = 2
27
- WARNING = 1
28
-
29
-
30
- @dataclass
31
- class LinterMessage:
32
- """A single linter message with attached severity and optional "model" that
33
- caused the message.
34
-
35
- Attributes:
36
- outcome: The outcome of the linting, either ERROR or WARNING. Linting outcomes with level WARNING are discarded for now.
37
- message: A message describing the error or warning in more detail.
38
- model: The model that caused the lint to fail. Is optional.
39
-
40
- """
41
-
42
- outcome: LintSeverity
43
- message: str
44
- model: Any = None
45
-
46
- @classmethod
47
- def error(cls, message: str, model=None):
48
- return LinterMessage(LintSeverity.ERROR, message, model)
49
-
50
- @classmethod
51
- def warning(cls, message: str, model=None):
52
- return LinterMessage(LintSeverity.WARNING, message, model)
53
-
54
-
55
- @dataclass
56
- class LinterResult:
57
- """Result of linting a contract. Contains multiple LinterResults from
58
- the same linter or lint phase.
59
-
60
- Attributes:
61
- linter: The linter that produced these results
62
- results: A list of linting results. Multiple identical linting
63
- results can be present in the list. An empty list means that
64
- the linter ran without producing warnings or errors.
65
- """
66
-
67
- results: Sequence[LinterMessage] = field(default_factory=list)
68
-
69
- @classmethod
70
- def erroneous(cls, message, model=None):
71
- return cls([LinterMessage.error(message, model)])
72
-
73
- @classmethod
74
- def cautious(cls, message, model=None):
75
- return cls([LinterMessage.warning(message, model)])
76
-
77
- def with_warning(self, message, model=None):
78
- result = LinterMessage.warning(message, model)
79
- return LinterResult(cast(list[LinterMessage], self.results) + [result])
80
-
81
- def with_error(self, message, model=None):
82
- result = LinterMessage.error(message, model)
83
- return LinterResult(cast(list[LinterMessage], self.results) + [result])
84
-
85
- def has_errors(self) -> bool:
86
- return any(map(lambda result: result.outcome == LintSeverity.ERROR, self.results))
87
-
88
- def has_warnings(self) -> bool:
89
- return any(map(lambda result: result.outcome == LintSeverity.WARNING, self.results))
90
-
91
- def error_results(self) -> Sequence[LinterMessage]:
92
- return [result for result in self.results if result.outcome == LintSeverity.ERROR]
93
-
94
- def warning_results(self) -> Sequence[LinterMessage]:
95
- return [result for result in self.results if result.outcome == LintSeverity.WARNING]
96
-
97
- def no_errors_or_warnings(self) -> bool:
98
- return len(self.results) == 0
99
-
100
- def combine(self, other: "LinterResult") -> "LinterResult":
101
- return LinterResult(cast(list[Any], self.results) + cast(list[Any], other.results))
102
-
103
-
104
- class Linter(abc.ABC):
105
- @property
106
- @abc.abstractmethod
107
- def name(self) -> str:
108
- """Human-readable name of the linter."""
109
- pass
110
-
111
- @property
112
- @abc.abstractmethod
113
- def id(self) -> str:
114
- """A linter ID for configuration (i.e. enabling and disabling)."""
115
- pass
116
-
117
- @abc.abstractmethod
118
- def lint_implementation(self, contract: DataContractSpecification) -> LinterResult:
119
- pass
120
-
121
- def lint(self, contract: DataContractSpecification) -> list[Check]:
122
- """Call with a data contract to get a list of check results from the linter."""
123
- result = self.lint_implementation(contract)
124
- checks = []
125
- if not result.error_results():
126
- checks.append(Check(type="lint", name=f"Linter '{self.name}'", result="passed", engine="datacontract"))
127
- else:
128
- # All linter messages are treated as warnings. Severity is
129
- # currently ignored, but could be used in filtering in the future
130
- # Linter messages with level WARNING are currently ignored, but might
131
- # be logged or printed in the future.
132
- for lint_error in result.error_results():
133
- checks.append(
134
- Check(
135
- type="lint",
136
- name=f"Linter '{self.name}'",
137
- result="warning",
138
- engine="datacontract",
139
- reason=lint_error.message,
140
- )
141
- )
142
- return checks
File without changes
@@ -1,33 +0,0 @@
1
- from datacontract.model.data_contract_specification import DataContractSpecification
2
-
3
- from ..lint import Linter, LinterResult
4
-
5
-
6
- class DescriptionLinter(Linter):
7
- """Check for a description on contracts, models, model fields, definitions and examples."""
8
-
9
- @property
10
- def name(self) -> str:
11
- return "Objects have descriptions"
12
-
13
- @property
14
- def id(self) -> str:
15
- return "description"
16
-
17
- def lint_implementation(self, contract: DataContractSpecification) -> LinterResult:
18
- result = LinterResult()
19
- if not contract.info or not contract.info.description:
20
- result = result.with_error("Contract has empty description.")
21
- for model_name, model in contract.models.items():
22
- if not model.description:
23
- result = result.with_error(f"Model '{model_name}' has empty description.")
24
- for field_name, field in model.fields.items():
25
- if not field.description:
26
- result = result.with_error(f"Field '{field_name}' in model '{model_name}' has empty description.")
27
- for definition_name, definition in contract.definitions.items():
28
- if not definition.description:
29
- result = result.with_error(f"Definition '{definition_name}' has empty description.")
30
- for index, example in enumerate(contract.examples):
31
- if not example.description:
32
- result = result.with_error(f"Example {index + 1} has empty description.")
33
- return result
@@ -1,34 +0,0 @@
1
- import re
2
-
3
- from datacontract.model.data_contract_specification import DataContractSpecification
4
-
5
- from ..lint import Linter, LinterResult
6
-
7
-
8
- class FieldPatternLinter(Linter):
9
- """Checks that all patterns defined for fields are correct Python regex
10
- syntax.
11
-
12
- """
13
-
14
- @property
15
- def name(self):
16
- return "Field pattern is correct regex"
17
-
18
- @property
19
- def id(self) -> str:
20
- return "field-pattern"
21
-
22
- def lint_implementation(self, contract: DataContractSpecification) -> LinterResult:
23
- result = LinterResult()
24
- for model_name, model in contract.models.items():
25
- for field_name, field in model.fields.items():
26
- if field.pattern:
27
- try:
28
- re.compile(field.pattern)
29
- except re.error as e:
30
- result = result.with_error(
31
- f"Failed to compile pattern regex '{field.pattern}' for "
32
- f"field '{field_name}' in model '{model_name}': {e.msg}"
33
- )
34
- return result
@@ -1,47 +0,0 @@
1
- from datacontract.model.data_contract_specification import DataContractSpecification
2
-
3
- from ..lint import Linter, LinterResult
4
-
5
-
6
- class FieldReferenceLinter(Linter):
7
- """Checks that all references definitions in fields refer to existing
8
- fields.
9
-
10
- """
11
-
12
- @property
13
- def name(self):
14
- return "Field references existing field"
15
-
16
- @property
17
- def id(self) -> str:
18
- return "field-reference"
19
-
20
- def lint_implementation(self, contract: DataContractSpecification) -> LinterResult:
21
- result = LinterResult()
22
- for model_name, model in contract.models.items():
23
- for field_name, field in model.fields.items():
24
- if field.references:
25
- reference_hierarchy = field.references.split(".")
26
- if len(reference_hierarchy) != 2:
27
- result = result.with_error(
28
- f"Field '{field_name}' in model '{model_name}'"
29
- f" references must follow the model.field syntax and refer to a field in a model in this data contract."
30
- )
31
- continue
32
- ref_model = reference_hierarchy[0]
33
- ref_field = reference_hierarchy[1]
34
-
35
- if ref_model not in contract.models:
36
- result = result.with_error(
37
- f"Field '{field_name}' in model '{model_name}' references non-existing model '{ref_model}'."
38
- )
39
- else:
40
- ref_model_obj = contract.models[ref_model]
41
- if ref_field not in ref_model_obj.fields:
42
- result = result.with_error(
43
- f"Field '{field_name}' in model '{model_name}'"
44
- f" references non-existing field '{ref_field}'"
45
- f" in model '{ref_model}'."
46
- )
47
- return result
@@ -1,55 +0,0 @@
1
- import re
2
-
3
- from datacontract.model.data_contract_specification import DataContractSpecification
4
-
5
- from ..lint import Linter, LinterResult
6
-
7
-
8
- class NoticePeriodLinter(Linter):
9
- @property
10
- def name(self) -> str:
11
- return "noticePeriod in ISO8601 format"
12
-
13
- @property
14
- def id(self) -> str:
15
- return "notice-period"
16
-
17
- # Regex matching the "simple" ISO8601 duration format
18
- simple = re.compile(
19
- r"""P # Introduces period
20
- (:?[0-9\.,]+Y)? # Number of years
21
- (:?[0-9\.,]+M)? # Number of months
22
- (:?[0-9\.,]+W)? # Number of weeks
23
- (:?[0-9\.,]+D)? # Number of days
24
- (:? # Time part (optional)
25
- T # Always starts with T
26
- (:?[0-9\.,]+H)? # Number of hours
27
- (:?[0-9\.,]+M)? # Number of minutes
28
- (:?[0-9\.,]+S)? # Number of seconds
29
- )?
30
- """,
31
- re.VERBOSE,
32
- )
33
- datetime_basic = re.compile(r"P\d{8}T\d{6}")
34
- datetime_extended = re.compile(r"P\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}")
35
-
36
- def lint_implementation(self, contract: DataContractSpecification) -> LinterResult:
37
- """Check whether the notice period is specified using ISO8601 duration syntax."""
38
- if not contract.terms:
39
- return LinterResult.cautious("No terms defined.")
40
- period = contract.terms.noticePeriod
41
- if not period:
42
- return LinterResult.cautious("No notice period defined.")
43
- if not period.startswith("P"):
44
- return LinterResult.erroneous(f"Notice period '{period}' is not a valid ISO8601 duration.")
45
- if period == "P":
46
- return LinterResult.erroneous(
47
- "Notice period 'P' is not a valid ISO8601 duration, requires at least one duration to be specified."
48
- )
49
- if (
50
- not self.simple.fullmatch(period)
51
- and not self.datetime_basic.fullmatch(period)
52
- and not self.datetime_extended.fullmatch(period)
53
- ):
54
- return LinterResult.erroneous(f"Notice period '{period}' is not a valid ISO8601 duration.")
55
- return LinterResult()
@@ -1,100 +0,0 @@
1
- from datacontract.model.data_contract_specification import DataContractSpecification, Field
2
-
3
- from ..lint import Linter, LinterResult
4
-
5
-
6
- class ValidFieldConstraintsLinter(Linter):
7
- """Check validity of field constraints.
8
-
9
- More precisely, check that only numeric constraints are specified on
10
- fields of numeric type and string constraints on fields of string type.
11
- Additionally, the linter checks that defined constraints make sense.
12
- Minimum values should not be greater than maximum values, exclusive and
13
- non-exclusive minimum and maximum should not be combined and string
14
- pattern and format should not be combined.
15
-
16
- """
17
-
18
- valid_types_for_constraint = {
19
- "pattern": set(["string", "text", "varchar"]),
20
- "format": set(["string", "text", "varchar"]),
21
- "minLength": set(["string", "text", "varchar"]),
22
- "maxLength": set(["string", "text", "varchar"]),
23
- "minimum": set(["int", "integer", "number", "decimal", "numeric", "long", "bigint", "float", "double"]),
24
- "exclusiveMinimum": set(
25
- ["int", "integer", "number", "decimal", "numeric", "long", "bigint", "float", "double"]
26
- ),
27
- "maximum": set(["int", "integer", "number", "decimal", "numeric", "long", "bigint", "float", "double"]),
28
- "exclusiveMaximum": set(
29
- ["int", "integer", "number", "decimal", "numeric", "long", "bigint", "float", "double"]
30
- ),
31
- }
32
-
33
- def check_minimum_maximum(self, field: Field, field_name: str, model_name: str) -> LinterResult:
34
- (min, max, xmin, xmax) = (field.minimum, field.maximum, field.exclusiveMinimum, field.exclusiveMaximum)
35
- match (
36
- "minimum" in field.model_fields_set,
37
- "maximum" in field.model_fields_set,
38
- "exclusiveMinimum" in field.model_fields_set,
39
- "exclusiveMaximum" in field.model_fields_set,
40
- ):
41
- case (True, True, _, _) if min > max:
42
- return LinterResult.erroneous(
43
- f"Minimum {min} is greater than maximum {max} on field '{field_name}' in model '{model_name}'."
44
- )
45
- case (_, _, True, True) if xmin >= xmax:
46
- return LinterResult.erroneous(
47
- f"Exclusive minimum {xmin} is greater than exclusive"
48
- f" maximum {xmax} on field '{field_name}' in model '{model_name}'."
49
- )
50
- case (True, True, True, True):
51
- return LinterResult.erroneous(
52
- f"Both exclusive and non-exclusive minimum and maximum are "
53
- f"defined on field '{field_name}' in model '{model_name}'."
54
- )
55
- case (True, _, True, _):
56
- return LinterResult.erroneous(
57
- f"Both exclusive and non-exclusive minimum are "
58
- f"defined on field '{field_name}' in model '{model_name}'."
59
- )
60
- case (_, True, _, True):
61
- return LinterResult.erroneous(
62
- f"Both exclusive and non-exclusive maximum are "
63
- f"defined on field '{field_name}' in model '{model_name}'."
64
- )
65
- return LinterResult()
66
-
67
- def check_string_constraints(self, field: Field, field_name: str, model_name: str) -> LinterResult:
68
- result = LinterResult()
69
- if field.minLength and field.maxLength and field.minLength > field.maxLength:
70
- result = result.with_error(
71
- f"Minimum length is greater that maximum length on field '{field_name}' in model '{model_name}'."
72
- )
73
- if field.pattern and field.format:
74
- result = result.with_error(
75
- f"Both a pattern and a format are defined for field '{field_name}' in model '{model_name}'."
76
- )
77
- return result
78
-
79
- @property
80
- def name(self):
81
- return "Fields use valid constraints"
82
-
83
- @property
84
- def id(self):
85
- return "field-constraints"
86
-
87
- def lint_implementation(self, contract: DataContractSpecification) -> LinterResult:
88
- result = LinterResult()
89
- for model_name, model in contract.models.items():
90
- for field_name, field in model.fields.items():
91
- for _property, allowed_types in self.valid_types_for_constraint.items():
92
- if _property in field.model_fields_set and field.type not in allowed_types:
93
- result = result.with_error(
94
- f"Forbidden constraint '{_property}' defined on field "
95
- f"'{field_name}' in model '{model_name}'. Field type "
96
- f"is '{field.type}'."
97
- )
98
- result = result.combine(self.check_minimum_maximum(field, field_name, model_name))
99
- result = result.combine(self.check_string_constraints(field, field_name, model_name))
100
- return result