datacontract-cli 0.10.21__py3-none-any.whl → 0.10.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datacontract-cli might be problematic. Click here for more details.

Files changed (29) hide show
  1. datacontract/breaking/breaking.py +1 -1
  2. datacontract/breaking/breaking_rules.py +1 -1
  3. datacontract/cli.py +5 -5
  4. datacontract/data_contract.py +14 -100
  5. datacontract/engines/data_contract_checks.py +735 -0
  6. datacontract/engines/data_contract_test.py +51 -0
  7. datacontract/engines/soda/check_soda_execute.py +36 -30
  8. datacontract/engines/soda/connections/kafka.py +8 -3
  9. datacontract/export/avro_converter.py +2 -0
  10. datacontract/export/exporter.py +0 -2
  11. datacontract/export/exporter_factory.py +0 -12
  12. datacontract/export/sodacl_converter.py +22 -294
  13. datacontract/export/sql_type_converter.py +7 -2
  14. datacontract/imports/odcs_importer.py +6 -3
  15. datacontract/imports/odcs_v3_importer.py +2 -0
  16. datacontract/imports/sql_importer.py +229 -29
  17. datacontract/model/exceptions.py +4 -1
  18. datacontract/model/run.py +11 -4
  19. {datacontract_cli-0.10.21.dist-info → datacontract_cli-0.10.22.dist-info}/METADATA +139 -166
  20. {datacontract_cli-0.10.21.dist-info → datacontract_cli-0.10.22.dist-info}/RECORD +25 -27
  21. datacontract/engines/soda/connections/dask.py +0 -28
  22. datacontract/export/odcs_v2_exporter.py +0 -124
  23. datacontract/imports/odcs_v2_importer.py +0 -177
  24. datacontract/lint/linters/example_model_linter.py +0 -91
  25. /datacontract/{model → breaking}/breaking_change.py +0 -0
  26. {datacontract_cli-0.10.21.dist-info → datacontract_cli-0.10.22.dist-info}/LICENSE +0 -0
  27. {datacontract_cli-0.10.21.dist-info → datacontract_cli-0.10.22.dist-info}/WHEEL +0 -0
  28. {datacontract_cli-0.10.21.dist-info → datacontract_cli-0.10.22.dist-info}/entry_points.txt +0 -0
  29. {datacontract_cli-0.10.21.dist-info → datacontract_cli-0.10.22.dist-info}/top_level.txt +0 -0
@@ -1,124 +0,0 @@
1
- from typing import Dict
2
-
3
- import yaml
4
-
5
- from datacontract.export.exporter import Exporter
6
- from datacontract.model.data_contract_specification import DataContractSpecification, Field, Model
7
-
8
-
9
- class OdcsV2Exporter(Exporter):
10
- def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
11
- return to_odcs_v2_yaml(data_contract)
12
-
13
-
14
- def to_odcs_v2_yaml(data_contract_spec: DataContractSpecification):
15
- odcs = {
16
- "kind": "DataContract",
17
- "apiVersion": "2.3.0",
18
- "uuid": data_contract_spec.id,
19
- "version": data_contract_spec.info.version,
20
- "datasetDomain": data_contract_spec.info.owner,
21
- "quantumName": data_contract_spec.info.title,
22
- "status": "unknown",
23
- }
24
-
25
- if data_contract_spec.info.contact is not None:
26
- if data_contract_spec.info.contact.email is not None:
27
- odcs["productDl"] = data_contract_spec.info.contact.email
28
- if data_contract_spec.info.contact.url is not None:
29
- odcs["productFeedbackUrl"] = data_contract_spec.info.contact.url
30
-
31
- if data_contract_spec.terms is not None:
32
- odcs["description"] = {
33
- "purpose": data_contract_spec.terms.description.strip()
34
- if data_contract_spec.terms.description is not None
35
- else None,
36
- "usage": data_contract_spec.terms.usage.strip() if data_contract_spec.terms.usage is not None else None,
37
- "limitations": data_contract_spec.terms.limitations.strip()
38
- if data_contract_spec.terms.limitations is not None
39
- else None,
40
- }
41
-
42
- if data_contract_spec.servicelevels is not None:
43
- slas = []
44
- if data_contract_spec.servicelevels.availability is not None:
45
- slas.append(
46
- {
47
- "property": "generalAvailability",
48
- "value": data_contract_spec.servicelevels.availability.description,
49
- }
50
- )
51
- if data_contract_spec.servicelevels.retention is not None:
52
- slas.append({"property": "retention", "value": data_contract_spec.servicelevels.retention.period})
53
-
54
- if len(slas) > 0:
55
- odcs["slaProperties"] = slas
56
-
57
- odcs["type"] = "tables" # required, TODO read from models.type?
58
- odcs["dataset"] = []
59
-
60
- for model_key, model_value in data_contract_spec.models.items():
61
- odcs_table = to_odcs_table(model_key, model_value)
62
- odcs["dataset"].append(odcs_table)
63
- return yaml.dump(odcs, indent=2, sort_keys=False, allow_unicode=True)
64
-
65
-
66
- def to_odcs_table(model_key, model_value: Model) -> dict:
67
- odcs_table = {
68
- "table": model_key,
69
- "physicalName": model_key,
70
- "columns": [],
71
- }
72
- if model_value.description is not None:
73
- odcs_table["description"] = model_value.description
74
- columns = to_columns(model_value.fields)
75
- if columns:
76
- odcs_table["columns"] = columns
77
- return odcs_table
78
-
79
-
80
- def to_columns(fields: Dict[str, Field]) -> list:
81
- columns = []
82
- for field_name, field in fields.items():
83
- column = to_column(field_name, field)
84
- columns.append(column)
85
- return columns
86
-
87
-
88
- def to_column(field_name: str, field: Field) -> dict:
89
- column = {"column": field_name}
90
- if field.type is not None:
91
- column["logicalType"] = field.type
92
- column["physicalType"] = field.type
93
- if field.description is not None:
94
- column["description"] = field.description
95
- if field.required is not None:
96
- column["isNullable"] = not field.required
97
- if field.unique is not None:
98
- column["isUnique"] = field.unique
99
- if field.classification is not None:
100
- column["classification"] = field.classification
101
- column["tags"] = []
102
- if field.tags is not None:
103
- column["tags"].extend(field.tags)
104
- if field.pii is not None:
105
- column["tags"].append(f"pii:{str(field.pii).lower()}")
106
- if field.minLength is not None:
107
- column["tags"].append(f"minLength:{field.minLength}")
108
- if field.maxLength is not None:
109
- column["tags"].append(f"maxLength:{field.maxLength}")
110
- if field.pattern is not None:
111
- column["tags"].append(f"pattern:{field.pattern}")
112
- if field.minimum is not None:
113
- column["tags"].append(f"minimum:{field.minimum}")
114
- if field.maximum is not None:
115
- column["tags"].append(f"maximum:{field.maximum}")
116
- if field.exclusiveMinimum is not None:
117
- column["tags"].append(f"exclusiveMinimum:{field.exclusiveMinimum}")
118
- if field.exclusiveMaximum is not None:
119
- column["tags"].append(f"exclusiveMaximum:{field.exclusiveMaximum}")
120
- if not column["tags"]:
121
- del column["tags"]
122
-
123
- # todo enum
124
- return column
@@ -1,177 +0,0 @@
1
- import datetime
2
- import logging
3
- from typing import Any, Dict, List
4
-
5
- import yaml
6
-
7
- from datacontract.imports.importer import Importer
8
- from datacontract.model.data_contract_specification import (
9
- DATACONTRACT_TYPES,
10
- Availability,
11
- Contact,
12
- DataContractSpecification,
13
- Field,
14
- Info,
15
- Model,
16
- Retention,
17
- ServiceLevel,
18
- Terms,
19
- )
20
- from datacontract.model.exceptions import DataContractException
21
-
22
-
23
- class OdcsImporter(Importer):
24
- def import_source(
25
- self, data_contract_specification: DataContractSpecification, source: str, import_args: dict
26
- ) -> DataContractSpecification:
27
- return import_odcs_v2(data_contract_specification, source)
28
-
29
-
30
- def import_odcs_v2(data_contract_specification: DataContractSpecification, source: str) -> DataContractSpecification:
31
- try:
32
- with open(source, "r") as file:
33
- odcs_contract = yaml.safe_load(file.read())
34
-
35
- except Exception as e:
36
- raise DataContractException(
37
- type="schema",
38
- name="Parse ODCS contract",
39
- reason=f"Failed to parse odcs contract from {source}",
40
- engine="datacontract",
41
- original_exception=e,
42
- )
43
-
44
- data_contract_specification.id = odcs_contract["uuid"]
45
- data_contract_specification.info = import_info(odcs_contract)
46
- data_contract_specification.terms = import_terms(odcs_contract)
47
- data_contract_specification.servicelevels = import_servicelevels(odcs_contract)
48
- data_contract_specification.models = import_models(odcs_contract)
49
-
50
- return data_contract_specification
51
-
52
-
53
- def import_info(odcs_contract: Dict[str, Any]) -> Info:
54
- info = Info(title=odcs_contract.get("quantumName"), version=odcs_contract.get("version"))
55
-
56
- if odcs_contract.get("description").get("purpose") is not None:
57
- info.description = odcs_contract.get("description").get("purpose")
58
-
59
- if odcs_contract.get("datasetDomain") is not None:
60
- info.owner = odcs_contract.get("datasetDomain")
61
-
62
- if odcs_contract.get("productDl") is not None or odcs_contract.get("productFeedbackUrl") is not None:
63
- contact = Contact()
64
- if odcs_contract.get("productDl") is not None:
65
- contact.name = odcs_contract.get("productDl")
66
- if odcs_contract.get("productFeedbackUrl") is not None:
67
- contact.url = odcs_contract.get("productFeedbackUrl")
68
-
69
- info.contact = contact
70
-
71
- return info
72
-
73
-
74
- def import_terms(odcs_contract: Dict[str, Any]) -> Terms | None:
75
- if (
76
- odcs_contract.get("description").get("usage") is not None
77
- or odcs_contract.get("description").get("limitations") is not None
78
- or odcs_contract.get("price") is not None
79
- ):
80
- terms = Terms()
81
- if odcs_contract.get("description").get("usage") is not None:
82
- terms.usage = odcs_contract.get("description").get("usage")
83
- if odcs_contract.get("description").get("limitations") is not None:
84
- terms.limitations = odcs_contract.get("description").get("limitations")
85
- if odcs_contract.get("price") is not None:
86
- terms.billing = f"{odcs_contract.get('price').get('priceAmount')} {odcs_contract.get('price').get('priceCurrency')} / {odcs_contract.get('price').get('priceUnit')}"
87
-
88
- return terms
89
- else:
90
- return None
91
-
92
-
93
- def import_servicelevels(odcs_contract: Dict[str, Any]) -> ServiceLevel:
94
- # find the two properties we can map (based on the examples)
95
- sla_properties = odcs_contract.get("slaProperties") if odcs_contract.get("slaProperties") is not None else []
96
- availability = next((p for p in sla_properties if p["property"] == "generalAvailability"), None)
97
- retention = next((p for p in sla_properties if p["property"] == "retention"), None)
98
-
99
- if availability is not None or retention is not None:
100
- servicelevel = ServiceLevel()
101
-
102
- if availability is not None:
103
- value = availability.get("value")
104
- if isinstance(value, datetime.datetime):
105
- value = value.isoformat()
106
- servicelevel.availability = Availability(description=value)
107
-
108
- if retention is not None:
109
- servicelevel.retention = Retention(period=f"{retention.get('value')}{retention.get('unit')}")
110
-
111
- return servicelevel
112
- else:
113
- return None
114
-
115
-
116
- def import_models(odcs_contract: Dict[str, Any]) -> Dict[str, Model]:
117
- custom_type_mappings = get_custom_type_mappings(odcs_contract.get("customProperties"))
118
-
119
- odcs_tables = odcs_contract.get("dataset") if odcs_contract.get("dataset") is not None else []
120
- result = {}
121
-
122
- for table in odcs_tables:
123
- description = table.get("description") if table.get("description") is not None else ""
124
- model = Model(description=" ".join(description.splitlines()), type="table")
125
- model.fields = import_fields(table.get("columns"), custom_type_mappings)
126
- result[table.get("table")] = model
127
-
128
- return result
129
-
130
-
131
- def import_fields(odcs_columns: Dict[str, Any], custom_type_mappings: Dict[str, str]) -> Dict[str, Field]:
132
- logger = logging.getLogger(__name__)
133
- result = {}
134
-
135
- for column in odcs_columns:
136
- mapped_type = map_type(column.get("logicalType"), custom_type_mappings)
137
- if mapped_type is not None:
138
- description = column.get("description") if column.get("description") is not None else ""
139
- field = Field(
140
- description=" ".join(description.splitlines()),
141
- type=mapped_type,
142
- title=column.get("businessName") if column.get("businessName") is not None else "",
143
- required=not column.get("isNullable") if column.get("isNullable") is not None else False,
144
- primaryKey=column.get("isPrimary") if column.get("isPrimary") is not None else False,
145
- unique=column.get("isUnique") if column.get("isUnique") is not None else False,
146
- classification=column.get("classification") if column.get("classification") is not None else "",
147
- tags=column.get("tags") if column.get("tags") is not None else [],
148
- )
149
- result[column["column"]] = field
150
- else:
151
- logger.info(
152
- f"Can't properly map {column.get('column')} to the Datacontract Mapping types, as there is no equivalent or special mapping. Consider introducing a customProperty 'dc_mapping_{column.get('logicalName')}' that defines your expected type as the 'value'"
153
- )
154
-
155
- return result
156
-
157
-
158
- def map_type(odcs_type: str, custom_mappings: Dict[str, str]) -> str | None:
159
- t = odcs_type.lower()
160
- if t in DATACONTRACT_TYPES:
161
- return t
162
- elif custom_mappings.get(t) is not None:
163
- return custom_mappings.get(t)
164
- else:
165
- return None
166
-
167
-
168
- def get_custom_type_mappings(odcs_custom_properties: List[Any]) -> Dict[str, str]:
169
- result = {}
170
- if odcs_custom_properties is not None:
171
- for prop in odcs_custom_properties:
172
- if prop["property"].startswith("dc_mapping_"):
173
- odcs_type_name = prop["property"].substring(11)
174
- datacontract_type = prop["value"]
175
- result[odcs_type_name] = datacontract_type
176
-
177
- return result
@@ -1,91 +0,0 @@
1
- import csv
2
- import io
3
- import json
4
-
5
- import yaml
6
-
7
- from datacontract.model.data_contract_specification import DataContractSpecification, Example
8
-
9
- from ..lint import Linter, LinterResult
10
-
11
-
12
- class ExampleModelLinter(Linter):
13
- @property
14
- def name(self) -> str:
15
- return "Example(s) match model"
16
-
17
- @property
18
- def id(self) -> str:
19
- return "example-model"
20
-
21
- @staticmethod
22
- def get_example_headers(example: Example) -> list[str]:
23
- if isinstance(example.data, str):
24
- match example.type:
25
- case "csv":
26
- dialect = csv.Sniffer().sniff(example.data)
27
- data = io.StringIO(example.data)
28
- reader = csv.reader(data, dialect=dialect)
29
- return next(reader)
30
- case "yaml":
31
- data = yaml.safe_load(example.data)
32
- return data.keys()
33
- case "json":
34
- data = json.loads(example.data)
35
- return data.keys()
36
- case _:
37
- # This is checked in lint_implementation, so shouldn't happen.
38
- raise NotImplementedError(f"Unknown type {example.type}")
39
- else:
40
- # Checked in lint_implementation, shouldn't happen.
41
- raise NotImplementedError("Can't lint object examples.")
42
-
43
- def lint_implementation(self, contract: DataContractSpecification) -> LinterResult:
44
- """Check whether the example(s) headers match the model.
45
-
46
- This linter checks whether the example's fields match the model
47
- fields, and whether all required fields of the model are present in
48
- the example.
49
- """
50
- result = LinterResult()
51
- examples = contract.examples
52
- models = contract.models
53
- examples_with_model = []
54
- for index, example in enumerate(examples):
55
- if example.model not in models:
56
- result = result.with_error(f"Example {index + 1} has non-existent model '{example.model}'")
57
- else:
58
- examples_with_model.append((index, example, models.get(example.model)))
59
- for index, example, model in examples_with_model:
60
- if example.type == "custom":
61
- result = result.with_warning(
62
- f"Example {index + 1} has type" ' "custom", cannot check model' " conformance"
63
- )
64
- elif not isinstance(example.data, str):
65
- result = result.with_warning(
66
- f"Example {index + 1} is not a " "string example, can only lint string examples for now."
67
- )
68
- elif model.type == "object":
69
- result = result.with_warning(
70
- f"Example {index + 1} uses a "
71
- f"model '{example.model}' with type 'object'. Linting is "
72
- "currently only supported for 'table' models"
73
- )
74
- else:
75
- if example.type in ("csv", "yaml", "json"):
76
- headers = self.get_example_headers(example)
77
- for example_header in headers:
78
- if example_header not in model.fields:
79
- result = result.with_error(
80
- f"Example {index + 1} has field '{example_header}'"
81
- f" that's not contained in model '{example.model}'"
82
- )
83
- for field_name, field_value in model.fields.items():
84
- if field_name not in headers and field_value.required:
85
- result = result.with_error(
86
- f"Example {index + 1} is missing field '{field_name}'"
87
- f" required by model '{example.model}'"
88
- )
89
- else:
90
- result = result.with_error(f"Example {index + 1} has unknown type" f"{example.type}")
91
- return result
File without changes