datacontract-cli 0.10.14__py3-none-any.whl → 0.10.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datacontract-cli might be problematic. Click here for more details.
- datacontract/breaking/breaking.py +227 -9
- datacontract/breaking/breaking_rules.py +24 -0
- datacontract/catalog/catalog.py +1 -1
- datacontract/cli.py +99 -32
- datacontract/data_contract.py +26 -4
- datacontract/engines/datacontract/check_that_datacontract_file_exists.py +1 -1
- datacontract/engines/fastjsonschema/check_jsonschema.py +114 -22
- datacontract/engines/soda/check_soda_execute.py +5 -3
- datacontract/engines/soda/connections/duckdb.py +1 -0
- datacontract/engines/soda/connections/kafka.py +12 -12
- datacontract/export/avro_idl_converter.py +1 -2
- datacontract/export/bigquery_converter.py +4 -3
- datacontract/export/data_caterer_converter.py +1 -1
- datacontract/export/dbml_converter.py +2 -4
- datacontract/export/dbt_converter.py +2 -3
- datacontract/export/exporter.py +1 -1
- datacontract/export/exporter_factory.py +3 -2
- datacontract/export/go_converter.py +3 -2
- datacontract/export/great_expectations_converter.py +202 -40
- datacontract/export/html_export.py +1 -1
- datacontract/export/jsonschema_converter.py +3 -2
- datacontract/export/odcs_v2_exporter.py +1 -1
- datacontract/export/odcs_v3_exporter.py +1 -1
- datacontract/export/pandas_type_converter.py +40 -0
- datacontract/export/protobuf_converter.py +1 -1
- datacontract/export/rdf_converter.py +4 -5
- datacontract/export/sodacl_converter.py +6 -2
- datacontract/export/spark_converter.py +7 -6
- datacontract/export/sql_converter.py +1 -2
- datacontract/export/sqlalchemy_converter.py +1 -2
- datacontract/export/terraform_converter.py +1 -1
- datacontract/imports/avro_importer.py +1 -1
- datacontract/imports/bigquery_importer.py +1 -1
- datacontract/imports/dbml_importer.py +2 -2
- datacontract/imports/dbt_importer.py +3 -2
- datacontract/imports/glue_importer.py +5 -3
- datacontract/imports/iceberg_importer.py +5 -6
- datacontract/imports/importer.py +1 -0
- datacontract/imports/importer_factory.py +7 -1
- datacontract/imports/jsonschema_importer.py +3 -2
- datacontract/imports/odcs_v2_importer.py +2 -2
- datacontract/imports/odcs_v3_importer.py +2 -2
- datacontract/imports/parquet_importer.py +81 -0
- datacontract/imports/spark_importer.py +2 -1
- datacontract/imports/sql_importer.py +1 -1
- datacontract/imports/unity_importer.py +3 -3
- datacontract/integration/opentelemetry.py +0 -1
- datacontract/lint/lint.py +2 -1
- datacontract/lint/linters/description_linter.py +1 -0
- datacontract/lint/linters/example_model_linter.py +1 -0
- datacontract/lint/linters/field_pattern_linter.py +1 -0
- datacontract/lint/linters/field_reference_linter.py +1 -0
- datacontract/lint/linters/notice_period_linter.py +1 -0
- datacontract/lint/linters/quality_schema_linter.py +1 -0
- datacontract/lint/linters/valid_constraints_linter.py +1 -0
- datacontract/lint/resolve.py +1 -1
- datacontract/lint/schema.py +1 -1
- datacontract/model/data_contract_specification.py +11 -5
- datacontract/model/run.py +21 -12
- datacontract/templates/index.html +6 -6
- datacontract/web.py +2 -3
- {datacontract_cli-0.10.14.dist-info → datacontract_cli-0.10.15.dist-info}/METADATA +97 -52
- datacontract_cli-0.10.15.dist-info/RECORD +105 -0
- {datacontract_cli-0.10.14.dist-info → datacontract_cli-0.10.15.dist-info}/WHEEL +1 -1
- datacontract_cli-0.10.14.dist-info/RECORD +0 -103
- {datacontract_cli-0.10.14.dist-info → datacontract_cli-0.10.15.dist-info}/LICENSE +0 -0
- {datacontract_cli-0.10.14.dist-info → datacontract_cli-0.10.15.dist-info}/entry_points.txt +0 -0
- {datacontract_cli-0.10.14.dist-info → datacontract_cli-0.10.15.dist-info}/top_level.txt +0 -0
|
@@ -3,7 +3,7 @@ import json
|
|
|
3
3
|
import fastjsonschema
|
|
4
4
|
|
|
5
5
|
from datacontract.imports.importer import Importer
|
|
6
|
-
from datacontract.model.data_contract_specification import DataContractSpecification,
|
|
6
|
+
from datacontract.model.data_contract_specification import DataContractSpecification, Definition, Field, Model
|
|
7
7
|
from datacontract.model.exceptions import DataContractException
|
|
8
8
|
|
|
9
9
|
|
|
@@ -111,7 +111,8 @@ def schema_to_args(property_schema, is_required: bool = None) -> dict:
|
|
|
111
111
|
nested_properties = property_schema.get("properties")
|
|
112
112
|
if nested_properties is not None:
|
|
113
113
|
# recursive call for complex nested properties
|
|
114
|
-
|
|
114
|
+
required = property_schema.get("required", [])
|
|
115
|
+
field_kwargs["fields"] = jsonschema_to_args(nested_properties, required)
|
|
115
116
|
|
|
116
117
|
return field_kwargs
|
|
117
118
|
|
|
@@ -6,16 +6,16 @@ import yaml
|
|
|
6
6
|
|
|
7
7
|
from datacontract.imports.importer import Importer
|
|
8
8
|
from datacontract.model.data_contract_specification import (
|
|
9
|
+
DATACONTRACT_TYPES,
|
|
9
10
|
Availability,
|
|
10
11
|
Contact,
|
|
11
12
|
DataContractSpecification,
|
|
13
|
+
Field,
|
|
12
14
|
Info,
|
|
13
15
|
Model,
|
|
14
|
-
Field,
|
|
15
16
|
Retention,
|
|
16
17
|
ServiceLevel,
|
|
17
18
|
Terms,
|
|
18
|
-
DATACONTRACT_TYPES,
|
|
19
19
|
)
|
|
20
20
|
from datacontract.model.exceptions import DataContractException
|
|
21
21
|
|
|
@@ -8,16 +8,16 @@ import yaml
|
|
|
8
8
|
from datacontract.imports.importer import Importer
|
|
9
9
|
from datacontract.lint.resources import read_resource
|
|
10
10
|
from datacontract.model.data_contract_specification import (
|
|
11
|
+
DATACONTRACT_TYPES,
|
|
11
12
|
Availability,
|
|
12
13
|
DataContractSpecification,
|
|
14
|
+
Field,
|
|
13
15
|
Info,
|
|
14
16
|
Model,
|
|
15
|
-
Field,
|
|
16
17
|
Retention,
|
|
17
18
|
Server,
|
|
18
19
|
ServiceLevel,
|
|
19
20
|
Terms,
|
|
20
|
-
DATACONTRACT_TYPES,
|
|
21
21
|
)
|
|
22
22
|
from datacontract.model.exceptions import DataContractException
|
|
23
23
|
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
import os.path
|
|
2
|
+
|
|
3
|
+
import pyarrow
|
|
4
|
+
from pyarrow import parquet
|
|
5
|
+
|
|
6
|
+
from datacontract.imports.importer import Importer
|
|
7
|
+
from datacontract.model.data_contract_specification import (
|
|
8
|
+
DataContractSpecification,
|
|
9
|
+
Field,
|
|
10
|
+
Model,
|
|
11
|
+
)
|
|
12
|
+
from datacontract.model.exceptions import DataContractException
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class ParquetImporter(Importer):
|
|
16
|
+
def import_source(
|
|
17
|
+
self, data_contract_specification: DataContractSpecification, source: str, import_args: dict
|
|
18
|
+
) -> DataContractSpecification:
|
|
19
|
+
return import_parquet(data_contract_specification, source)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def import_parquet(data_contract_specification: DataContractSpecification, source: str) -> DataContractSpecification:
|
|
23
|
+
# use filename as schema name, remove .parquet suffix, avoid breaking the yaml output by replacing dots
|
|
24
|
+
schema_name = os.path.basename(source).removesuffix(".parquet").replace(".", "_")
|
|
25
|
+
|
|
26
|
+
fields: dict[str, Field] = {}
|
|
27
|
+
|
|
28
|
+
arrow_schema = parquet.read_schema(source)
|
|
29
|
+
for field_name in arrow_schema.names:
|
|
30
|
+
parquet_field = arrow_schema.field(field_name)
|
|
31
|
+
|
|
32
|
+
field = map_pyarrow_field_to_specification_field(parquet_field, "parquet")
|
|
33
|
+
|
|
34
|
+
if not parquet_field.nullable:
|
|
35
|
+
field.required = True
|
|
36
|
+
|
|
37
|
+
fields[field_name] = field
|
|
38
|
+
|
|
39
|
+
data_contract_specification.models[schema_name] = Model(fields=fields)
|
|
40
|
+
|
|
41
|
+
return data_contract_specification
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def map_pyarrow_field_to_specification_field(pyarrow_field: pyarrow.Field, file_format: str) -> Field:
|
|
45
|
+
if pyarrow.types.is_boolean(pyarrow_field.type):
|
|
46
|
+
return Field(type="boolean")
|
|
47
|
+
if pyarrow.types.is_int32(pyarrow_field.type):
|
|
48
|
+
return Field(type="int")
|
|
49
|
+
if pyarrow.types.is_int64(pyarrow_field.type):
|
|
50
|
+
return Field(type="long")
|
|
51
|
+
if pyarrow.types.is_integer(pyarrow_field.type):
|
|
52
|
+
return Field(type="number")
|
|
53
|
+
if pyarrow.types.is_float32(pyarrow_field.type):
|
|
54
|
+
return Field(type="float")
|
|
55
|
+
if pyarrow.types.is_float64(pyarrow_field.type):
|
|
56
|
+
return Field(type="double")
|
|
57
|
+
if pyarrow.types.is_decimal(pyarrow_field.type):
|
|
58
|
+
return Field(type="decimal", precision=pyarrow_field.type.precision, scale=pyarrow_field.type.scale)
|
|
59
|
+
if pyarrow.types.is_timestamp(pyarrow_field.type):
|
|
60
|
+
return Field(type="timestamp")
|
|
61
|
+
if pyarrow.types.is_date(pyarrow_field.type):
|
|
62
|
+
return Field(type="date")
|
|
63
|
+
if pyarrow.types.is_null(pyarrow_field.type):
|
|
64
|
+
return Field(type="null")
|
|
65
|
+
if pyarrow.types.is_binary(pyarrow_field.type):
|
|
66
|
+
return Field(type="bytes")
|
|
67
|
+
if pyarrow.types.is_string(pyarrow_field.type):
|
|
68
|
+
return Field(type="string")
|
|
69
|
+
if pyarrow.types.is_map(pyarrow_field.type) or pyarrow.types.is_dictionary(pyarrow_field.type):
|
|
70
|
+
return Field(type="map")
|
|
71
|
+
if pyarrow.types.is_struct(pyarrow_field.type):
|
|
72
|
+
return Field(type="struct")
|
|
73
|
+
if pyarrow.types.is_list(pyarrow_field.type):
|
|
74
|
+
return Field(type="array")
|
|
75
|
+
|
|
76
|
+
raise DataContractException(
|
|
77
|
+
type="schema",
|
|
78
|
+
name=f"Parse {file_format} schema",
|
|
79
|
+
reason=f"{pyarrow_field.type} currently not supported.",
|
|
80
|
+
engine="datacontract",
|
|
81
|
+
)
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from simple_ddl_parser import parse_from_file
|
|
2
2
|
|
|
3
3
|
from datacontract.imports.importer import Importer
|
|
4
|
-
from datacontract.model.data_contract_specification import DataContractSpecification,
|
|
4
|
+
from datacontract.model.data_contract_specification import DataContractSpecification, Field, Model
|
|
5
5
|
|
|
6
6
|
|
|
7
7
|
class SqlImporter(Importer):
|
|
@@ -2,13 +2,13 @@ import json
|
|
|
2
2
|
import os
|
|
3
3
|
from typing import List, Optional
|
|
4
4
|
|
|
5
|
-
from pyspark.sql import types
|
|
6
5
|
from databricks.sdk import WorkspaceClient
|
|
7
|
-
from databricks.sdk.service.catalog import
|
|
6
|
+
from databricks.sdk.service.catalog import ColumnInfo, TableInfo
|
|
7
|
+
from pyspark.sql import types
|
|
8
8
|
|
|
9
9
|
from datacontract.imports.importer import Importer
|
|
10
10
|
from datacontract.imports.spark_importer import _field_from_struct_type
|
|
11
|
-
from datacontract.model.data_contract_specification import DataContractSpecification,
|
|
11
|
+
from datacontract.model.data_contract_specification import DataContractSpecification, Field, Model
|
|
12
12
|
from datacontract.model.exceptions import DataContractException
|
|
13
13
|
|
|
14
14
|
|
datacontract/lint/lint.py
CHANGED
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
import abc
|
|
2
2
|
from dataclasses import dataclass, field
|
|
3
3
|
from enum import Enum
|
|
4
|
-
from typing import
|
|
4
|
+
from typing import Any, Sequence, cast
|
|
5
5
|
|
|
6
6
|
from datacontract.model.run import Check
|
|
7
|
+
|
|
7
8
|
from ..model.data_contract_specification import DataContractSpecification
|
|
8
9
|
|
|
9
10
|
"""This module contains linter definitions for linting a data contract.
|
datacontract/lint/resolve.py
CHANGED
|
@@ -199,7 +199,7 @@ def _resolve_data_contract_from_str(
|
|
|
199
199
|
|
|
200
200
|
if is_open_data_contract_standard(yaml_dict):
|
|
201
201
|
# if ODCS, then validate the ODCS schema and import to DataContractSpecification directly
|
|
202
|
-
data_contract_specification = DataContractSpecification(dataContractSpecification="
|
|
202
|
+
data_contract_specification = DataContractSpecification(dataContractSpecification="1.1.0")
|
|
203
203
|
return import_odcs_v3_from_str(data_contract_specification, source_str=data_contract_str)
|
|
204
204
|
|
|
205
205
|
_validate_data_contract_specification_schema(yaml_dict, schema_location)
|
datacontract/lint/schema.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import os
|
|
2
|
-
from typing import
|
|
2
|
+
from typing import Any, Dict, List, Optional
|
|
3
3
|
|
|
4
4
|
import pydantic as pyd
|
|
5
5
|
import yaml
|
|
@@ -147,6 +147,7 @@ class Field(pyd.BaseModel):
|
|
|
147
147
|
format: str = None
|
|
148
148
|
required: bool = None
|
|
149
149
|
primary: bool = None
|
|
150
|
+
primaryKey: bool = None
|
|
150
151
|
unique: bool | None = None
|
|
151
152
|
references: str = None
|
|
152
153
|
description: str | None = None
|
|
@@ -282,9 +283,14 @@ class DataContractSpecification(pyd.BaseModel):
|
|
|
282
283
|
terms: Terms = None
|
|
283
284
|
models: Dict[str, Model] = {}
|
|
284
285
|
definitions: Dict[str, Definition] = {}
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
286
|
+
examples: List[Example] = pyd.Field(
|
|
287
|
+
default_factory=list,
|
|
288
|
+
deprecated="Removed in Data Contract Specification " "v1.1.0. Use models.examples instead.",
|
|
289
|
+
)
|
|
290
|
+
quality: Quality = pyd.Field(
|
|
291
|
+
default=None,
|
|
292
|
+
deprecated="Removed in Data Contract Specification v1.1.0. Use " "model-level and field-level quality instead.",
|
|
293
|
+
)
|
|
288
294
|
servicelevels: Optional[ServiceLevel] = None
|
|
289
295
|
links: Dict[str, str] = {}
|
|
290
296
|
tags: List[str] = []
|
|
@@ -292,7 +298,7 @@ class DataContractSpecification(pyd.BaseModel):
|
|
|
292
298
|
@classmethod
|
|
293
299
|
def from_file(cls, file):
|
|
294
300
|
if not os.path.exists(file):
|
|
295
|
-
raise (f"The file '{file}' does not exist.")
|
|
301
|
+
raise FileNotFoundError(f"The file '{file}' does not exist.")
|
|
296
302
|
with open(file, "r") as file:
|
|
297
303
|
file_content = file.read()
|
|
298
304
|
return DataContractSpecification.from_string(file_content)
|
datacontract/model/run.py
CHANGED
|
@@ -1,15 +1,24 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from datetime import datetime, timezone
|
|
3
|
+
from enum import Enum
|
|
3
4
|
from typing import List, Optional
|
|
4
5
|
from uuid import UUID, uuid4
|
|
5
6
|
|
|
6
7
|
from pydantic import BaseModel
|
|
7
8
|
|
|
8
9
|
|
|
10
|
+
class ResultEnum(str, Enum):
|
|
11
|
+
passed = "passed"
|
|
12
|
+
warning = "warning"
|
|
13
|
+
failed = "failed"
|
|
14
|
+
error = "error"
|
|
15
|
+
unknown = "unknown"
|
|
16
|
+
|
|
17
|
+
|
|
9
18
|
class Check(BaseModel):
|
|
10
19
|
type: str
|
|
11
20
|
name: Optional[str]
|
|
12
|
-
result:
|
|
21
|
+
result: ResultEnum
|
|
13
22
|
engine: str
|
|
14
23
|
reason: Optional[str] = None
|
|
15
24
|
model: Optional[str] = None
|
|
@@ -33,29 +42,29 @@ class Run(BaseModel):
|
|
|
33
42
|
server: Optional[str] = None
|
|
34
43
|
timestampStart: datetime
|
|
35
44
|
timestampEnd: datetime
|
|
36
|
-
result:
|
|
45
|
+
result: ResultEnum = ResultEnum.unknown
|
|
37
46
|
checks: List[Check]
|
|
38
47
|
logs: List[Log]
|
|
39
48
|
|
|
40
49
|
def has_passed(self):
|
|
41
50
|
self.calculate_result()
|
|
42
|
-
return self.result ==
|
|
51
|
+
return self.result == ResultEnum.passed
|
|
43
52
|
|
|
44
53
|
def finish(self):
|
|
45
54
|
self.timestampEnd = datetime.now(timezone.utc)
|
|
46
55
|
self.calculate_result()
|
|
47
56
|
|
|
48
57
|
def calculate_result(self):
|
|
49
|
-
if any(check.result ==
|
|
50
|
-
self.result =
|
|
51
|
-
elif any(check.result ==
|
|
52
|
-
self.result =
|
|
53
|
-
elif any(check.result ==
|
|
54
|
-
self.result =
|
|
55
|
-
elif any(check.result ==
|
|
56
|
-
self.result =
|
|
58
|
+
if any(check.result == ResultEnum.error for check in self.checks):
|
|
59
|
+
self.result = ResultEnum.error
|
|
60
|
+
elif any(check.result == ResultEnum.failed for check in self.checks):
|
|
61
|
+
self.result = ResultEnum.failed
|
|
62
|
+
elif any(check.result == ResultEnum.warning for check in self.checks):
|
|
63
|
+
self.result = ResultEnum.warning
|
|
64
|
+
elif any(check.result == ResultEnum.passed for check in self.checks):
|
|
65
|
+
self.result = ResultEnum.passed
|
|
57
66
|
else:
|
|
58
|
-
self.result =
|
|
67
|
+
self.result = ResultEnum.unknown
|
|
59
68
|
|
|
60
69
|
def log_info(self, message: str):
|
|
61
70
|
logging.info(message)
|
|
@@ -78,17 +78,17 @@
|
|
|
78
78
|
|
|
79
79
|
<li class="col-span-1 rounded-lg bg-white shadow hover:bg-gray-50"
|
|
80
80
|
data-search="{{
|
|
81
|
-
contract.spec.info.title|lower }} {{
|
|
82
|
-
contract.spec.info.owner|lower if contract.spec.info.owner else '' }} {{
|
|
83
|
-
contract.spec.info.description|lower }} {%
|
|
81
|
+
contract.spec.info.title|lower|e }} {{
|
|
82
|
+
contract.spec.info.owner|lower|e if contract.spec.info.owner else '' }} {{
|
|
83
|
+
contract.spec.info.description|lower|e }} {%
|
|
84
84
|
for model_name, model in contract.spec.models.items() %}
|
|
85
|
-
{{ model.description|lower }} {%
|
|
85
|
+
{{ model.description|lower|e }} {%
|
|
86
86
|
for field_name, field in model.fields.items() %}
|
|
87
|
-
{{ field_name|lower }} {{ field.description|lower if field.description else '' }} {%
|
|
87
|
+
{{ field_name|lower|e }} {{ field.description|lower|e if field.description else '' }} {%
|
|
88
88
|
endfor %}
|
|
89
89
|
{% endfor %}
|
|
90
90
|
">
|
|
91
|
-
<a href="{{contract.html_link}}" >
|
|
91
|
+
<a href="{{contract.html_link|e}}" >
|
|
92
92
|
<div class="flex w-full justify-between space-x-1 p-6 pb-4">
|
|
93
93
|
<div class="flex-1 truncate">
|
|
94
94
|
<div class="flex items-center space-x-3">
|
datacontract/web.py
CHANGED
|
@@ -1,11 +1,10 @@
|
|
|
1
|
-
from typing import Annotated,
|
|
1
|
+
from typing import Annotated, Optional, Union
|
|
2
2
|
|
|
3
3
|
import typer
|
|
4
4
|
from fastapi import FastAPI, File
|
|
5
|
-
from fastapi.responses import HTMLResponse
|
|
5
|
+
from fastapi.responses import HTMLResponse, PlainTextResponse
|
|
6
6
|
|
|
7
7
|
from datacontract.data_contract import DataContract, ExportFormat
|
|
8
|
-
from fastapi.responses import PlainTextResponse
|
|
9
8
|
|
|
10
9
|
app = FastAPI()
|
|
11
10
|
|