datacontract-cli 0.9.6.post2__py3-none-any.whl → 0.9.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datacontract-cli might be problematic. Click here for more details.
- datacontract/breaking/breaking.py +139 -63
- datacontract/breaking/breaking_rules.py +71 -54
- datacontract/cli.py +138 -45
- datacontract/data_contract.py +316 -78
- datacontract/engines/datacontract/check_that_datacontract_contains_valid_servers_configuration.py +5 -1
- datacontract/engines/datacontract/check_that_datacontract_file_exists.py +9 -8
- datacontract/engines/datacontract/check_that_datacontract_str_is_valid.py +26 -22
- datacontract/engines/fastjsonschema/check_jsonschema.py +31 -25
- datacontract/engines/fastjsonschema/s3/s3_read_files.py +8 -6
- datacontract/engines/soda/check_soda_execute.py +46 -35
- datacontract/engines/soda/connections/bigquery.py +5 -3
- datacontract/engines/soda/connections/dask.py +0 -1
- datacontract/engines/soda/connections/databricks.py +2 -2
- datacontract/engines/soda/connections/duckdb.py +4 -4
- datacontract/engines/soda/connections/kafka.py +36 -17
- datacontract/engines/soda/connections/postgres.py +3 -3
- datacontract/engines/soda/connections/snowflake.py +4 -4
- datacontract/export/avro_converter.py +3 -7
- datacontract/export/avro_idl_converter.py +280 -0
- datacontract/export/dbt_converter.py +55 -80
- datacontract/export/great_expectations_converter.py +141 -0
- datacontract/export/jsonschema_converter.py +3 -1
- datacontract/export/odcs_converter.py +10 -12
- datacontract/export/protobuf_converter.py +99 -0
- datacontract/export/pydantic_converter.py +140 -0
- datacontract/export/rdf_converter.py +35 -12
- datacontract/export/sodacl_converter.py +24 -24
- datacontract/export/sql_converter.py +93 -0
- datacontract/export/sql_type_converter.py +131 -0
- datacontract/export/terraform_converter.py +71 -0
- datacontract/imports/avro_importer.py +106 -0
- datacontract/imports/sql_importer.py +0 -2
- datacontract/init/download_datacontract_file.py +2 -2
- datacontract/integration/publish_datamesh_manager.py +4 -9
- datacontract/integration/publish_opentelemetry.py +107 -0
- datacontract/lint/files.py +2 -2
- datacontract/lint/lint.py +46 -31
- datacontract/lint/linters/description_linter.py +34 -0
- datacontract/lint/linters/example_model_linter.py +67 -43
- datacontract/lint/linters/field_pattern_linter.py +34 -0
- datacontract/lint/linters/field_reference_linter.py +38 -0
- datacontract/lint/linters/notice_period_linter.py +55 -0
- datacontract/lint/linters/primary_field_linter.py +28 -0
- datacontract/lint/linters/quality_schema_linter.py +52 -0
- datacontract/lint/linters/valid_constraints_linter.py +99 -0
- datacontract/lint/resolve.py +53 -8
- datacontract/lint/schema.py +2 -3
- datacontract/lint/urls.py +4 -5
- datacontract/model/breaking_change.py +27 -5
- datacontract/model/data_contract_specification.py +45 -25
- datacontract/model/exceptions.py +13 -2
- datacontract/model/run.py +1 -1
- datacontract/web.py +5 -8
- {datacontract_cli-0.9.6.post2.dist-info → datacontract_cli-0.9.8.dist-info}/METADATA +207 -35
- datacontract_cli-0.9.8.dist-info/RECORD +63 -0
- {datacontract_cli-0.9.6.post2.dist-info → datacontract_cli-0.9.8.dist-info}/WHEEL +1 -1
- datacontract_cli-0.9.6.post2.dist-info/RECORD +0 -47
- {datacontract_cli-0.9.6.post2.dist-info → datacontract_cli-0.9.8.dist-info}/LICENSE +0 -0
- {datacontract_cli-0.9.6.post2.dist-info → datacontract_cli-0.9.8.dist-info}/entry_points.txt +0 -0
- {datacontract_cli-0.9.6.post2.dist-info → datacontract_cli-0.9.8.dist-info}/top_level.txt +0 -0
|
@@ -1,18 +1,17 @@
|
|
|
1
1
|
import os
|
|
2
2
|
from typing import List, Dict
|
|
3
3
|
|
|
4
|
-
import pydantic
|
|
4
|
+
import pydantic as pyd
|
|
5
5
|
import yaml
|
|
6
|
-
from pydantic import BaseModel
|
|
7
6
|
|
|
8
7
|
|
|
9
|
-
class Contact(BaseModel):
|
|
8
|
+
class Contact(pyd.BaseModel):
|
|
10
9
|
name: str = None
|
|
11
10
|
url: str = None
|
|
12
11
|
email: str = None
|
|
13
12
|
|
|
14
13
|
|
|
15
|
-
class Server(BaseModel):
|
|
14
|
+
class Server(pyd.BaseModel):
|
|
16
15
|
type: str = None
|
|
17
16
|
format: str = None
|
|
18
17
|
project: str = None
|
|
@@ -23,31 +22,53 @@ class Server(BaseModel):
|
|
|
23
22
|
location: str = None
|
|
24
23
|
account: str = None
|
|
25
24
|
database: str = None
|
|
26
|
-
schema_: str =
|
|
25
|
+
schema_: str = pyd.Field(default=None, alias="schema")
|
|
27
26
|
host: str = None
|
|
28
27
|
port: int = None
|
|
29
28
|
catalog: str = None
|
|
30
29
|
topic: str = None
|
|
31
|
-
http_path: str = None
|
|
32
|
-
token: str = None
|
|
30
|
+
http_path: str = None # Use ENV variable
|
|
31
|
+
token: str = None # Use ENV variable
|
|
33
32
|
dataProductId: str = None
|
|
34
33
|
outputPortId: str = None
|
|
35
34
|
|
|
36
35
|
|
|
37
|
-
class Terms(BaseModel):
|
|
36
|
+
class Terms(pyd.BaseModel):
|
|
38
37
|
usage: str = None
|
|
39
38
|
limitations: str = None
|
|
40
39
|
billing: str = None
|
|
41
40
|
noticePeriod: str = None
|
|
42
41
|
|
|
43
42
|
|
|
44
|
-
class
|
|
45
|
-
|
|
43
|
+
class Definition(pyd.BaseModel):
|
|
44
|
+
domain: str = None
|
|
45
|
+
name: str = None
|
|
46
|
+
title: str = None
|
|
47
|
+
description: str = None
|
|
48
|
+
type: str = None
|
|
49
|
+
enum: List[str] = []
|
|
50
|
+
format: str = None
|
|
51
|
+
minLength: int = None
|
|
52
|
+
maxLength: int = None
|
|
53
|
+
pattern: str = None
|
|
54
|
+
minimum: int = None
|
|
55
|
+
exclusiveMinimum: int = None
|
|
56
|
+
maximum: int = None
|
|
57
|
+
exclusiveMaximum: int = None
|
|
58
|
+
pii: bool = None
|
|
59
|
+
classification: str = None
|
|
60
|
+
tags: List[str] = []
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class Field(pyd.BaseModel):
|
|
64
|
+
ref: str = pyd.Field(default=None, alias="$ref")
|
|
65
|
+
ref_obj: Definition = pyd.Field(default=None, exclude=True)
|
|
46
66
|
type: str = None
|
|
47
67
|
format: str = None
|
|
48
68
|
required: bool = None
|
|
49
69
|
primary: bool = None
|
|
50
70
|
unique: bool = None
|
|
71
|
+
references: str = None
|
|
51
72
|
description: str = None
|
|
52
73
|
pii: bool = None
|
|
53
74
|
classification: str = None
|
|
@@ -55,25 +76,23 @@ class Field(BaseModel):
|
|
|
55
76
|
minLength: int = None
|
|
56
77
|
maxLength: int = None
|
|
57
78
|
minimum: int = None
|
|
58
|
-
|
|
79
|
+
exclusiveMinimum: int = None
|
|
59
80
|
maximum: int = None
|
|
60
|
-
|
|
81
|
+
exclusiveMaximum: int = None
|
|
61
82
|
enum: List[str] = []
|
|
62
83
|
tags: List[str] = []
|
|
63
|
-
fields: Dict[str,
|
|
64
|
-
|
|
65
|
-
@property
|
|
66
|
-
def ref(self):
|
|
67
|
-
return self.schema.get("$ref")
|
|
84
|
+
fields: Dict[str, "Field"] = {}
|
|
85
|
+
items: "Field" = None
|
|
68
86
|
|
|
69
87
|
|
|
70
|
-
class Model(BaseModel):
|
|
88
|
+
class Model(pyd.BaseModel):
|
|
71
89
|
description: str = None
|
|
72
90
|
type: str = None
|
|
91
|
+
namespace: str = None
|
|
73
92
|
fields: Dict[str, Field] = {}
|
|
74
93
|
|
|
75
94
|
|
|
76
|
-
class Info(BaseModel):
|
|
95
|
+
class Info(pyd.BaseModel):
|
|
77
96
|
title: str = None
|
|
78
97
|
version: str = None
|
|
79
98
|
description: str = None
|
|
@@ -81,25 +100,26 @@ class Info(BaseModel):
|
|
|
81
100
|
contact: Contact = None
|
|
82
101
|
|
|
83
102
|
|
|
84
|
-
class Example(BaseModel):
|
|
103
|
+
class Example(pyd.BaseModel):
|
|
85
104
|
type: str = None
|
|
86
105
|
description: str = None
|
|
87
106
|
model: str = None
|
|
88
107
|
data: str | object = None
|
|
89
108
|
|
|
90
109
|
|
|
91
|
-
class Quality(BaseModel):
|
|
110
|
+
class Quality(pyd.BaseModel):
|
|
92
111
|
type: str = None
|
|
93
112
|
specification: str | object = None
|
|
94
113
|
|
|
95
114
|
|
|
96
|
-
class DataContractSpecification(BaseModel):
|
|
115
|
+
class DataContractSpecification(pyd.BaseModel):
|
|
97
116
|
dataContractSpecification: str = None
|
|
98
117
|
id: str = None
|
|
99
118
|
info: Info = None
|
|
100
119
|
servers: Dict[str, Server] = {}
|
|
101
120
|
terms: Terms = None
|
|
102
121
|
models: Dict[str, Model] = {}
|
|
122
|
+
definitions: Dict[str, Definition] = {}
|
|
103
123
|
# schema: Dict[str, str]
|
|
104
124
|
examples: List[Example] = []
|
|
105
125
|
quality: Quality = None
|
|
@@ -107,8 +127,8 @@ class DataContractSpecification(BaseModel):
|
|
|
107
127
|
@classmethod
|
|
108
128
|
def from_file(cls, file):
|
|
109
129
|
if not os.path.exists(file):
|
|
110
|
-
raise(f"The file '{file}' does not exist.")
|
|
111
|
-
with open(file,
|
|
130
|
+
raise (f"The file '{file}' does not exist.")
|
|
131
|
+
with open(file, "r") as file:
|
|
112
132
|
file_content = file.read()
|
|
113
133
|
return DataContractSpecification.from_string(file_content)
|
|
114
134
|
|
|
@@ -118,4 +138,4 @@ class DataContractSpecification(BaseModel):
|
|
|
118
138
|
return DataContractSpecification(**data)
|
|
119
139
|
|
|
120
140
|
def to_yaml(self):
|
|
121
|
-
return yaml.dump(self.model_dump(exclude_defaults=True, exclude_none=True), sort_keys=False)
|
|
141
|
+
return yaml.dump(self.model_dump(exclude_defaults=True, exclude_none=True), sort_keys=False, allow_unicode=True)
|
datacontract/model/exceptions.py
CHANGED
|
@@ -11,7 +11,17 @@ class DataContractException(Exception):
|
|
|
11
11
|
message (str): General message for the error.
|
|
12
12
|
"""
|
|
13
13
|
|
|
14
|
-
def __init__(
|
|
14
|
+
def __init__(
|
|
15
|
+
self,
|
|
16
|
+
type,
|
|
17
|
+
name,
|
|
18
|
+
reason,
|
|
19
|
+
engine="datacontract",
|
|
20
|
+
model=None,
|
|
21
|
+
original_exception=None,
|
|
22
|
+
result: str = "failed",
|
|
23
|
+
message="Run operation failed",
|
|
24
|
+
):
|
|
15
25
|
self.type = type
|
|
16
26
|
self.name = name
|
|
17
27
|
self.model = model
|
|
@@ -21,4 +31,5 @@ class DataContractException(Exception):
|
|
|
21
31
|
self.original_exception = original_exception
|
|
22
32
|
self.message = message
|
|
23
33
|
super().__init__(
|
|
24
|
-
f"{self.message}: [{self.type}] {self.name} - {self.model} - {self.result} - {self.reason} - {self.engine}"
|
|
34
|
+
f"{self.message}: [{self.type}] {self.name} - {self.model} - {self.result} - {self.reason} - {self.engine}"
|
|
35
|
+
)
|
datacontract/model/run.py
CHANGED
datacontract/web.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
from typing import Annotated
|
|
1
|
+
from typing import Annotated, Union
|
|
2
2
|
|
|
3
|
-
from fastapi import FastAPI, File
|
|
3
|
+
from fastapi import FastAPI, File
|
|
4
4
|
|
|
5
5
|
from datacontract.data_contract import DataContract
|
|
6
6
|
|
|
@@ -8,10 +8,7 @@ app = FastAPI()
|
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
@app.post("/lint")
|
|
11
|
-
def lint(file: Annotated[bytes, File()]):
|
|
11
|
+
def lint(file: Annotated[bytes, File()], linters: Union[str, set[str]] = "all"):
|
|
12
12
|
data_contract = DataContract(data_contract_str=str(file, encoding="utf-8"))
|
|
13
|
-
lint_result = data_contract.lint()
|
|
14
|
-
return {
|
|
15
|
-
"result": lint_result.result,
|
|
16
|
-
"checks": lint_result.checks
|
|
17
|
-
}
|
|
13
|
+
lint_result = data_contract.lint(enabled_linters=linters)
|
|
14
|
+
return {"result": lint_result.result, "checks": lint_result.checks}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: datacontract-cli
|
|
3
|
-
Version: 0.9.
|
|
3
|
+
Version: 0.9.8
|
|
4
4
|
Summary: Test data contracts
|
|
5
5
|
Author-email: Jochen Christ <jochen.christ@innoq.com>, Stefan Negele <stefan.negele@innoq.com>
|
|
6
6
|
Project-URL: Homepage, https://cli.datacontract.com
|
|
@@ -11,7 +11,7 @@ Classifier: Operating System :: OS Independent
|
|
|
11
11
|
Requires-Python: >=3.10
|
|
12
12
|
Description-Content-Type: text/markdown
|
|
13
13
|
License-File: LICENSE
|
|
14
|
-
Requires-Dist: typer[all]
|
|
14
|
+
Requires-Dist: typer[all] <0.13,>=0.9
|
|
15
15
|
Requires-Dist: pydantic <2.7.0,>=2.5.3
|
|
16
16
|
Requires-Dist: pyyaml ~=6.0.1
|
|
17
17
|
Requires-Dist: requests ~=2.31.0
|
|
@@ -19,22 +19,27 @@ Requires-Dist: fastapi ==0.110.0
|
|
|
19
19
|
Requires-Dist: fastparquet ==2024.2.0
|
|
20
20
|
Requires-Dist: python-multipart ==0.0.9
|
|
21
21
|
Requires-Dist: rich ~=13.7.0
|
|
22
|
-
Requires-Dist: simple-ddl-parser ==1.0.
|
|
23
|
-
Requires-Dist: soda-core-bigquery
|
|
24
|
-
Requires-Dist: soda-core-duckdb
|
|
25
|
-
Requires-Dist: soda-core-postgres
|
|
26
|
-
Requires-Dist: soda-core-snowflake
|
|
27
|
-
Requires-Dist: soda-core-spark[databricks]
|
|
28
|
-
Requires-Dist: soda-core-spark-df
|
|
22
|
+
Requires-Dist: simple-ddl-parser ==1.0.4
|
|
23
|
+
Requires-Dist: soda-core-bigquery <3.4.0,>=3.3.1
|
|
24
|
+
Requires-Dist: soda-core-duckdb <3.4.0,>=3.3.1
|
|
25
|
+
Requires-Dist: soda-core-postgres <3.4.0,>=3.3.1
|
|
26
|
+
Requires-Dist: soda-core-snowflake <3.4.0,>=3.3.1
|
|
27
|
+
Requires-Dist: soda-core-spark[databricks] <3.4.0,>=3.3.1
|
|
28
|
+
Requires-Dist: soda-core-spark-df <3.4.0,>=3.3.1
|
|
29
29
|
Requires-Dist: snowflake-connector-python[pandas] <3.8,>=3.6
|
|
30
|
-
Requires-Dist: duckdb ==0.10.
|
|
30
|
+
Requires-Dist: duckdb ==0.10.1
|
|
31
31
|
Requires-Dist: fastjsonschema ~=2.19.1
|
|
32
32
|
Requires-Dist: python-dotenv ~=1.0.0
|
|
33
|
-
Requires-Dist: s3fs ==2024.
|
|
33
|
+
Requires-Dist: s3fs ==2024.3.1
|
|
34
34
|
Requires-Dist: rdflib ==7.0.0
|
|
35
|
+
Requires-Dist: avro ==1.11.3
|
|
36
|
+
Requires-Dist: opentelemetry-exporter-otlp-proto-grpc ~=1.16.0
|
|
37
|
+
Requires-Dist: opentelemetry-exporter-otlp-proto-http ~=1.16.0
|
|
35
38
|
Provides-Extra: dev
|
|
36
39
|
Requires-Dist: httpx ==0.27.0 ; extra == 'dev'
|
|
40
|
+
Requires-Dist: ruff ; extra == 'dev'
|
|
37
41
|
Requires-Dist: pytest ; extra == 'dev'
|
|
42
|
+
Requires-Dist: testcontainers <4.0 ; extra == 'dev'
|
|
38
43
|
Requires-Dist: testcontainers-minio ; extra == 'dev'
|
|
39
44
|
Requires-Dist: testcontainers-postgres ; extra == 'dev'
|
|
40
45
|
Requires-Dist: testcontainers-kafka ; extra == 'dev'
|
|
@@ -52,6 +57,16 @@ Requires-Dist: testcontainers-kafka ; extra == 'dev'
|
|
|
52
57
|
The `datacontract` CLI is an open source command-line tool for working with [Data Contracts](https://datacontract.com/).
|
|
53
58
|
It uses data contract YAML files to lint the data contract, connect to data sources and execute schema and quality tests, detect breaking changes, and export to different formats. The tool is written in Python. It can be used as a standalone CLI tool, in a CI/CD pipeline, or directly as a Python library.
|
|
54
59
|
|
|
60
|
+

|
|
61
|
+
|
|
62
|
+
<div align="center">
|
|
63
|
+
<a href="https://www.youtube.com/watch?v=B1dixhgO2vQ">
|
|
64
|
+
<img
|
|
65
|
+
src="https://img.youtube.com/vi/B1dixhgO2vQ/0.jpg"
|
|
66
|
+
alt="Demo of Data Contract CLI"
|
|
67
|
+
style="width:100%;">
|
|
68
|
+
</a>
|
|
69
|
+
</div>
|
|
55
70
|
|
|
56
71
|
## Getting started
|
|
57
72
|
|
|
@@ -123,20 +138,20 @@ $ datacontract test --examples datacontract.yaml
|
|
|
123
138
|
# find differences between to data contracts (Coming Soon)
|
|
124
139
|
$ datacontract diff datacontract-v1.yaml datacontract-v2.yaml
|
|
125
140
|
|
|
126
|
-
#
|
|
141
|
+
# find differences between to data contracts categorized into error, warning, and info.
|
|
142
|
+
$ datacontract changelog datacontract-v1.yaml datacontract-v2.yaml
|
|
143
|
+
|
|
144
|
+
# fail pipeline on breaking changes. Uses changelog internally and showing only error and warning.
|
|
127
145
|
$ datacontract breaking datacontract-v1.yaml datacontract-v2.yaml
|
|
128
146
|
|
|
129
|
-
# export model as jsonschema
|
|
147
|
+
# export model as jsonschema (other formats: avro, dbt, dbt-sources, dbt-staging-sql, jsonschema, odcs, rdf, sql (coming soon), sodacl, terraform)
|
|
130
148
|
$ datacontract export --format jsonschema datacontract.yaml
|
|
131
149
|
|
|
132
|
-
# export model as dbt
|
|
133
|
-
$ datacontract export --format dbt datacontract.yaml
|
|
134
|
-
|
|
135
150
|
# import sql
|
|
136
151
|
$ datacontract import --format sql --source my_ddl.sql
|
|
137
152
|
|
|
138
|
-
# import
|
|
139
|
-
$ datacontract import --format
|
|
153
|
+
# import avro
|
|
154
|
+
$ datacontract import --format avro --source avro_schema.avsc
|
|
140
155
|
```
|
|
141
156
|
|
|
142
157
|
## Programmatic (Python)
|
|
@@ -150,7 +165,15 @@ if not run.has_passed():
|
|
|
150
165
|
# Abort pipeline, alert, or take corrective actions...
|
|
151
166
|
```
|
|
152
167
|
|
|
153
|
-
##
|
|
168
|
+
## Integrations
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
| Integration | Option | Description |
|
|
172
|
+
|-------------------|------------------------------|-------------------------------------------------------------------------------------------------------|
|
|
173
|
+
| Data Mesh Manager | `--publish` | Push full results to the [Data Mesh Manager API](https://api.datamesh-manager.com/swagger/index.html) |
|
|
174
|
+
| OpenTelemetry | `--publish-to-opentelemetry` | Push result as gauge metrics (logs are planned) |
|
|
175
|
+
|
|
176
|
+
### Integration with Data Mesh Manager
|
|
154
177
|
|
|
155
178
|
If you use [Data Mesh Manager](https://datamesh-manager.com/), you can use the data contract URL and append the `--publish` option to send and display the test results. Set an environment variable for your API key.
|
|
156
179
|
|
|
@@ -160,9 +183,34 @@ $ EXPORT DATAMESH_MANAGER_API_KEY=xxx
|
|
|
160
183
|
$ datacontract test https://demo.datamesh-manager.com/demo279750347121/datacontracts/4df9d6ee-e55d-4088-9598-b635b2fdcbbc/datacontract.yaml --server production --publish
|
|
161
184
|
```
|
|
162
185
|
|
|
186
|
+
### Integration with OpenTelemetry
|
|
163
187
|
|
|
188
|
+
If you use OpenTelemetry, you can use the data contract URL and append the `--publish-to-opentelemetry` option to send the test results to your OLTP-compatible instance, e.g., Prometheus.
|
|
164
189
|
|
|
190
|
+
The metric name is "datacontract.cli.test.result" and it uses the following encoding for the result:
|
|
165
191
|
|
|
192
|
+
| datacontract.cli.test.result | Description |
|
|
193
|
+
|-------|---------------------------------------|
|
|
194
|
+
| 0 | test run passed, no warnings |
|
|
195
|
+
| 1 | test run has warnings |
|
|
196
|
+
| 2 | test run failed |
|
|
197
|
+
| 3 | test run not possible due to an error |
|
|
198
|
+
| 4 | test status unknown |
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
```bash
|
|
202
|
+
# Fetch current data contract, execute tests on production, and publish result to open telemetry
|
|
203
|
+
$ EXPORT OTEL_SERVICE_NAME=datacontract-cli
|
|
204
|
+
$ EXPORT OTEL_EXPORTER_OTLP_ENDPOINT=https://YOUR_ID.apm.westeurope.azure.elastic-cloud.com:443
|
|
205
|
+
$ EXPORT OTEL_EXPORTER_OTLP_HEADERS=Authorization=Bearer%20secret # Optional, when using SaaS Products
|
|
206
|
+
$ EXPORT OTEL_EXPORTER_OTLP_PROTOCOL=http/protobuf # Optional, default is http/protobuf - use value grpc to use the gRPC protocol instead
|
|
207
|
+
# Send to OpenTelemetry
|
|
208
|
+
$ datacontract test https://demo.datamesh-manager.com/demo279750347121/datacontracts/4df9d6ee-e55d-4088-9598-b635b2fdcbbc/datacontract.yaml --server production --publish-to-opentelemetry
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
Current limitations:
|
|
212
|
+
- currently, only ConsoleExporter and OTLP Exporter
|
|
213
|
+
- Metrics only, no logs yet (but loosely planned)
|
|
166
214
|
|
|
167
215
|
## Installation
|
|
168
216
|
|
|
@@ -451,20 +499,36 @@ datacontract export --format dbt
|
|
|
451
499
|
|
|
452
500
|
Available export options:
|
|
453
501
|
|
|
454
|
-
| Type
|
|
455
|
-
|
|
456
|
-
| `jsonschema`
|
|
457
|
-
| `odcs`
|
|
458
|
-
| `sodacl`
|
|
459
|
-
| `dbt`
|
|
460
|
-
| `dbt-sources`
|
|
461
|
-
| `dbt-staging-sql`
|
|
462
|
-
| `rdf`
|
|
463
|
-
| `avro`
|
|
464
|
-
| `
|
|
465
|
-
| `
|
|
466
|
-
| `
|
|
467
|
-
|
|
|
502
|
+
| Type | Description | Status |
|
|
503
|
+
|----------------------|---------------------------------------------------------|--------|
|
|
504
|
+
| `jsonschema` | Export to JSON Schema | ✅ |
|
|
505
|
+
| `odcs` | Export to Open Data Contract Standard (ODCS) | ✅ |
|
|
506
|
+
| `sodacl` | Export to SodaCL quality checks in YAML format | ✅ |
|
|
507
|
+
| `dbt` | Export to dbt models in YAML format | ✅ |
|
|
508
|
+
| `dbt-sources` | Export to dbt sources in YAML format | ✅ |
|
|
509
|
+
| `dbt-staging-sql` | Export to dbt staging SQL models | ✅ |
|
|
510
|
+
| `rdf` | Export data contract to RDF representation in N3 format | ✅ |
|
|
511
|
+
| `avro` | Export to AVRO models | ✅ |
|
|
512
|
+
| `protobuf` | Export to Protobuf | ✅ |
|
|
513
|
+
| `terraform` | Export to terraform resources | ✅ |
|
|
514
|
+
| `sql` | Export to SQL DDL | ✅ |
|
|
515
|
+
| `sql-query` | Export to SQL Query | ✅ |
|
|
516
|
+
| `great-expectations` | Export to Great Expectations Suites in JSON Format | ✅ |
|
|
517
|
+
| `bigquery` | Export to BigQuery Schemas | TBD |
|
|
518
|
+
| `pydantic` | Export to pydantic models | TBD |
|
|
519
|
+
| `html` | Export to HTML page | TBD |
|
|
520
|
+
| Missing something? | Please create an issue on GitHub | TBD |
|
|
521
|
+
|
|
522
|
+
#### Great Expectations
|
|
523
|
+
The export function transforms a specified data contract into a comprehensive Great Expectations JSON suite.
|
|
524
|
+
If the contract includes multiple models, you need to specify the names of the model you wish to export.
|
|
525
|
+
```shell
|
|
526
|
+
datacontract export datacontract.yaml --format great-expectations --model orders
|
|
527
|
+
```
|
|
528
|
+
The export creates a list of expectations by utilizing:
|
|
529
|
+
|
|
530
|
+
- The data from the Model definition with a fixed mapping
|
|
531
|
+
- The expectations provided in the quality field for each model (find here the expectations gallery https://greatexpectations.io/expectations/)
|
|
468
532
|
|
|
469
533
|
#### RDF
|
|
470
534
|
|
|
@@ -502,13 +566,120 @@ Available import options:
|
|
|
502
566
|
| Type | Description | Status |
|
|
503
567
|
|--------------------|------------------------------------------------|---------|
|
|
504
568
|
| `sql` | Import from SQL DDL | ✅ |
|
|
569
|
+
| `avro` | Import from AVRO schemas | ✅ |
|
|
505
570
|
| `protobuf` | Import from Protobuf schemas | TBD |
|
|
506
|
-
| `avro` | Import from AVRO schemas | TBD |
|
|
507
571
|
| `jsonschema` | Import from JSON Schemas | TBD |
|
|
572
|
+
| `bigquery` | Import from BigQuery Schemas | TBD |
|
|
508
573
|
| `dbt` | Import from dbt models | TBD |
|
|
509
574
|
| `odcs` | Import from Open Data Contract Standard (ODCS) | TBD |
|
|
510
575
|
| Missing something? | Please create an issue on GitHub | TBD |
|
|
511
576
|
|
|
577
|
+
## Best Practices
|
|
578
|
+
|
|
579
|
+
We share best practices in using the Data Contract CLI.
|
|
580
|
+
|
|
581
|
+
### Data-first Approach
|
|
582
|
+
|
|
583
|
+
Create a data contract based on the actual data. This is the fastest way to get started and to get feedback from the data consumers.
|
|
584
|
+
|
|
585
|
+
1. Use an existing physical schema (e.g., SQL DDL) as a starting point to define your logical data model in the contract. Double check right after the import whether the actual data meets the imported logical data model. Just to be sure.
|
|
586
|
+
```bash
|
|
587
|
+
$ datacontract import --format sql ddl.sql
|
|
588
|
+
$ datacontract test
|
|
589
|
+
```
|
|
590
|
+
|
|
591
|
+
2. Add examples to the `datacontract.yaml`. If you can, use actual data and anonymize. Make sure that the examples match the imported logical data model.
|
|
592
|
+
```bash
|
|
593
|
+
$ datacontract test --examples
|
|
594
|
+
```
|
|
595
|
+
|
|
596
|
+
|
|
597
|
+
3. Add quality checks and additional type constraints one by one to the contract and make sure the examples and the actual data still adheres to the contract. Check against examples for a very fast feedback loop.
|
|
598
|
+
```bash
|
|
599
|
+
$ datacontract test --examples
|
|
600
|
+
$ datacontract test
|
|
601
|
+
```
|
|
602
|
+
|
|
603
|
+
4. Make sure that all the best practices for a `datacontract.yaml` are met using the linter. You probably forgot to document some fields and add the terms and conditions.
|
|
604
|
+
```bash
|
|
605
|
+
$ datacontract lint
|
|
606
|
+
```
|
|
607
|
+
|
|
608
|
+
5. Set up a CI pipeline that executes daily and reports the results to the [Data Mesh Manager](https://datamesh-manager.com). Or to some place else. You can even publish to any opentelemetry compatible system.
|
|
609
|
+
```bash
|
|
610
|
+
$ datacontract test --publish https://api.datamesh-manager.com/api/runs
|
|
611
|
+
```
|
|
612
|
+
|
|
613
|
+
### Contract-First
|
|
614
|
+
|
|
615
|
+
Create a data contract based on the requirements from use cases.
|
|
616
|
+
|
|
617
|
+
1. Start with a `datacontract.yaml` template.
|
|
618
|
+
```bash
|
|
619
|
+
$ datacontract init
|
|
620
|
+
```
|
|
621
|
+
|
|
622
|
+
2. Add examples to the `datacontract.yaml`. Do not start with the data model, although you are probably tempted to do that. Examples are the fastest way to get feedback from everybody and not loose someone in the discussion.
|
|
623
|
+
|
|
624
|
+
3. Create the model based on the examples. Test the model against the examples to double-check whether the model matches the examples.
|
|
625
|
+
```bash
|
|
626
|
+
$ datacontract test --examples
|
|
627
|
+
```
|
|
628
|
+
|
|
629
|
+
4. Add quality checks and additional type constraints one by one to the contract and make sure the examples and the actual data still adheres to the contract. Check against examples for a very fast feedback loop.
|
|
630
|
+
```bash
|
|
631
|
+
$ datacontract test --examples
|
|
632
|
+
```
|
|
633
|
+
|
|
634
|
+
5. Fill in the terms, descriptions, etc. Make sure you follow all best practices for a `datacontract.yaml` using the linter.
|
|
635
|
+
```bash
|
|
636
|
+
$ datacontract lint
|
|
637
|
+
```
|
|
638
|
+
|
|
639
|
+
6. Set up a CI pipeline that lints and tests the examples so you make sure that any changes later do not decrease the quality of the contract.
|
|
640
|
+
```bash
|
|
641
|
+
$ datacontract lint
|
|
642
|
+
$ datacontract test --examples
|
|
643
|
+
```
|
|
644
|
+
|
|
645
|
+
7. Use the export function to start building the providing data product as well as the integration into the consuming data products.
|
|
646
|
+
```bash
|
|
647
|
+
# data provider
|
|
648
|
+
$ datacontract export --format dbt
|
|
649
|
+
# data consumer
|
|
650
|
+
$ datacontract export --format dbt-sources
|
|
651
|
+
$ datacontract export --format dbt-staging-sql
|
|
652
|
+
```
|
|
653
|
+
|
|
654
|
+
### Schema Evolution
|
|
655
|
+
|
|
656
|
+
#### Non-breaking Changes
|
|
657
|
+
Examples: adding models or fields
|
|
658
|
+
|
|
659
|
+
- Add the models or fields in the datacontract.yaml
|
|
660
|
+
- Increment the minor version of the datacontract.yaml on any change. Simply edit the datacontract.yaml for this.
|
|
661
|
+
- You need a policy that these changes are non-breaking. That means that one cannot use the star expression in SQL to query a table under contract. Make the consequences known.
|
|
662
|
+
- Fail the build in the Pull Request if a datacontract.yaml accidentially adds a breaking change even despite only a minor version change
|
|
663
|
+
```bash
|
|
664
|
+
$ datacontract breaking datacontract-from-pr.yaml datacontract-from-main.yaml
|
|
665
|
+
```
|
|
666
|
+
- Create a changelog of this minor change.
|
|
667
|
+
```bash
|
|
668
|
+
$ datacontract changelog datacontract-from-pr.yaml datacontract-from-main.yaml
|
|
669
|
+
```
|
|
670
|
+
#### Breaking Changes
|
|
671
|
+
Examples: Removing or renaming models and fields.
|
|
672
|
+
|
|
673
|
+
- Remove or rename models and fields in the datacontract.yaml, and any other change that might be part of this new major version of this data contract.
|
|
674
|
+
- Increment the major version of the datacontract.yaml for this and create a new file for the major version. The reason being, that one needs to offer an upgrade path for the data consumers from the old to the new major version.
|
|
675
|
+
- As data consumers need to migrate, try to reduce the frequency of major versions by making multiple breaking changes together if possible.
|
|
676
|
+
- Be aware of the notice period in the data contract as this is the minimum amount of time you have to offer both the old and the new version for a migration path.
|
|
677
|
+
- Do not fear making breaking changes with data contracts. It's okay to do them in this controlled way. Really!
|
|
678
|
+
- Create a changelog of this major change.
|
|
679
|
+
```bash
|
|
680
|
+
$ datacontract changelog datacontract-from-pr.yaml datacontract-from-main.yaml
|
|
681
|
+
```
|
|
682
|
+
|
|
512
683
|
## Development Setup
|
|
513
684
|
|
|
514
685
|
Python base interpreter should be 3.11.x (unless working on 3.12 release candidate).
|
|
@@ -521,7 +692,8 @@ source venv/bin/activate
|
|
|
521
692
|
# Install Requirements
|
|
522
693
|
pip install --upgrade pip setuptools wheel
|
|
523
694
|
pip install -e '.[dev]'
|
|
524
|
-
|
|
695
|
+
ruff check --fix
|
|
696
|
+
ruff format --check
|
|
525
697
|
pytest
|
|
526
698
|
```
|
|
527
699
|
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
datacontract/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
datacontract/cli.py,sha256=NTM6W9J8WvnzheQnaL5W1JukEdXhvfPOtyv2T8eFnkE,10515
|
|
3
|
+
datacontract/data_contract.py,sha256=HMKFAKCN5rlrSwPzrrnGiybttS_-W9ipctsulTlqocE,21656
|
|
4
|
+
datacontract/web.py,sha256=SWglmbqy3NV5h4VDsG0OpwhASJT9uve7w6FPwkBjIYM,457
|
|
5
|
+
datacontract/breaking/breaking.py,sha256=Abw59Xd_CypS6Us-hvsGnPvCUo-a-cl8RzDCmjG4yfk,11719
|
|
6
|
+
datacontract/breaking/breaking_rules.py,sha256=PzjCojAwteiIP3AKHfzeHAZG5b8WNKTeFl8FbFAWOzo,2913
|
|
7
|
+
datacontract/engines/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
|
+
datacontract/engines/datacontract/check_that_datacontract_contains_valid_servers_configuration.py,sha256=TXO47ON3NjwYI4Y2eBYklMOCo7vAtYzqLPAhZhii6dg,1565
|
|
9
|
+
datacontract/engines/datacontract/check_that_datacontract_file_exists.py,sha256=XHvxX6BrTdZk2wN55zd6ER4k7ILDyGzjbCbZxQAJ2iE,665
|
|
10
|
+
datacontract/engines/datacontract/check_that_datacontract_str_is_valid.py,sha256=CsxFGyInjpsylmoofByV1b-wpFhG1jtRWxSUpp-BXk8,1525
|
|
11
|
+
datacontract/engines/fastjsonschema/check_jsonschema.py,sha256=YkVC0KrJlSMXTvrgopGseRBtJKZf-T9Nxk5oKZwLYWk,5754
|
|
12
|
+
datacontract/engines/fastjsonschema/s3/s3_read_files.py,sha256=iupiyqBa1dzgT2BtVGna-BjC5rqe6MTLs2QRp8GTs7M,665
|
|
13
|
+
datacontract/engines/soda/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
14
|
+
datacontract/engines/soda/check_soda_execute.py,sha256=4zd-E1-3RpqXHLdAVodVZm-tYMBfZORAyPDTObIfodo,6507
|
|
15
|
+
datacontract/engines/soda/connections/bigquery.py,sha256=Ao0KaJe4R28auU_4umxvVaLB6ZHEbKaNoYZ-RfAUmeo,662
|
|
16
|
+
datacontract/engines/soda/connections/dask.py,sha256=Yy6Et2n_vDVsdjtqyBWDSZt7mnjPzPk_MZ-92VZHfnY,1496
|
|
17
|
+
datacontract/engines/soda/connections/databricks.py,sha256=lpMju-o_TzLZeF0EEVwePPr8JahqvFnj5xRYjF15fc8,561
|
|
18
|
+
datacontract/engines/soda/connections/duckdb.py,sha256=xKMs_EVqzG4ynachRw9Xx_syA27wDCBLC15ar48k0aY,2340
|
|
19
|
+
datacontract/engines/soda/connections/kafka.py,sha256=AgAQxm_NgPUXQShqLS81PAjcQ9hJI1fy8CDFdGa_luI,5971
|
|
20
|
+
datacontract/engines/soda/connections/postgres.py,sha256=9GTF4Es3M5vb7ocSGqAxXmslvkS5CjsPQGIuo020CFc,626
|
|
21
|
+
datacontract/engines/soda/connections/snowflake.py,sha256=y1t2a1DWY4_tr5k-X5_nhLE6v1rfCwTahzhtHR91x9A,719
|
|
22
|
+
datacontract/export/avro_converter.py,sha256=hbw3d9FalqsjalXK2XqZbL9ecqnbCMs6o-kdDyeG0ZU,2202
|
|
23
|
+
datacontract/export/avro_idl_converter.py,sha256=pbDel_DdvakGOtxFzP-WKnHO1sshCCaLZeNkz3Dyvh8,9582
|
|
24
|
+
datacontract/export/dbt_converter.py,sha256=QF1PYh-UAZEnFYq2D70iT7KUIWCMd1tOVt8lfXmb0Ho,8549
|
|
25
|
+
datacontract/export/great_expectations_converter.py,sha256=yxYSyYOKow_5P6zqzcYlt3zoCAFMoDgEpFvrzXr_2G8,4908
|
|
26
|
+
datacontract/export/jsonschema_converter.py,sha256=6Y0vsIx8GtmrZt7tjivru0Qc6IAZ5IApmDwav-VJinQ,3153
|
|
27
|
+
datacontract/export/odcs_converter.py,sha256=l4fgXUHewtxfJWqIxMNU4y7apwvl_SYpb479IeZPI1A,3732
|
|
28
|
+
datacontract/export/protobuf_converter.py,sha256=K40yEdfRLYDLUx8bn3_an-cwdHfWv8_SBgzGLJT6mX4,2974
|
|
29
|
+
datacontract/export/pydantic_converter.py,sha256=dES_NpeXTMBPX-GASZmsRO9pONVVVKQVEQAM5maTfJ8,5670
|
|
30
|
+
datacontract/export/rdf_converter.py,sha256=dfMicvoOt2bloJZMjkIY6wPR9ymBa9jAlDDwQpiHfAc,6108
|
|
31
|
+
datacontract/export/sodacl_converter.py,sha256=a4CYzTRbWaVP4pom2JkH9A1VkVWsrWzewEamj1e_5fA,3197
|
|
32
|
+
datacontract/export/sql_converter.py,sha256=qmvyVLtFk5MVtNUDF5YmZbrbRz7fsQ4v3kdJHvrq_PI,3343
|
|
33
|
+
datacontract/export/sql_type_converter.py,sha256=kiFFX54dfSgXXqbUCtFz0ujEt0Ie5JnuuWbcvJc0kos,4627
|
|
34
|
+
datacontract/export/terraform_converter.py,sha256=3gwfKHp1QtF2bg_iXmWuV2QSNeZNy5i_AQVe1REXSbc,1949
|
|
35
|
+
datacontract/imports/avro_importer.py,sha256=rQ9lHrTV8_dgjRB2acDDdnWKH8y9DxC4TYv9Rx_CeEQ,3823
|
|
36
|
+
datacontract/imports/sql_importer.py,sha256=kaQMKQLuHFo5uSl8j_g9PBXPdP4KruMh_O_egkbcqxM,2027
|
|
37
|
+
datacontract/init/download_datacontract_file.py,sha256=pj_4mhWKlEtfueWohDgkb1nyuG5ERDipUDszxKwpZUs,413
|
|
38
|
+
datacontract/integration/publish_datamesh_manager.py,sha256=Ul6enuo5l3sFoNIFU0XRg6etzu8a15IQp8jUovB5ynY,1316
|
|
39
|
+
datacontract/integration/publish_opentelemetry.py,sha256=eyR2VQOUii-IvcaBMyru7IDrqU8OuxYvbB24poX4t58,3830
|
|
40
|
+
datacontract/lint/files.py,sha256=tg0vq_w4LQsEr_8A5qr4hUJmHeGalUpsXJXC1t-OGC0,471
|
|
41
|
+
datacontract/lint/lint.py,sha256=GhgBwfDgihXfWIu1WUfYeJoxfYacVJrwq3L2KO4Z2Io,5076
|
|
42
|
+
datacontract/lint/resolve.py,sha256=7EhNiG-040NXKJqliWs3u5vJg6KzfgAfgnq_s9dXGhc,4863
|
|
43
|
+
datacontract/lint/schema.py,sha256=FIzubF1C9TnhEQBeoDsLuhc6HymCMx_v435Hjbcz4U8,838
|
|
44
|
+
datacontract/lint/urls.py,sha256=NHTbwadnm1VCxsiF7clfDvsxJ8-D3Mi5AzDj029E6wQ,1325
|
|
45
|
+
datacontract/lint/linters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
46
|
+
datacontract/lint/linters/description_linter.py,sha256=Cm99NTWo75kDRIW63qz5uEa8xsug_8dFURPRC1twodw,1554
|
|
47
|
+
datacontract/lint/linters/example_model_linter.py,sha256=YpPWymPIKvcL6jsA8rM1nVMOHaMifmrqlLf65Dtglr8,3978
|
|
48
|
+
datacontract/lint/linters/field_pattern_linter.py,sha256=hdzcY_MM6MsJeag0HQ1YwgzXlgK2IQ2wpDKNDkNwkvk,1089
|
|
49
|
+
datacontract/lint/linters/field_reference_linter.py,sha256=fRNI483JpWV_ewWoNqfuVkNqYwQ-7z68q_xhHQZPO7g,1550
|
|
50
|
+
datacontract/lint/linters/notice_period_linter.py,sha256=_w3lp9qfEkbMMSTV38IlTVWnG-kyJY-S-7dex_JSpwc,2135
|
|
51
|
+
datacontract/lint/linters/primary_field_linter.py,sha256=HoNhk-wmoR0JERZB6e8jZnIx7Sl5-N8QdU4Ctz6scqU,1128
|
|
52
|
+
datacontract/lint/linters/quality_schema_linter.py,sha256=fwqOoT15kDXLw_I7LMDWh4Y-Z9xA83FHPooIz7cnDac,2187
|
|
53
|
+
datacontract/lint/linters/valid_constraints_linter.py,sha256=GeeE2hojMt24EoHuSHE1lURA2v2p5Dmxc7Qq8IvZQH0,4915
|
|
54
|
+
datacontract/model/breaking_change.py,sha256=BIDEUo1U2CQLVT2-I5PyFttxAj6zQPI1UUkEoOOQXMY,2249
|
|
55
|
+
datacontract/model/data_contract_specification.py,sha256=Oo-hL_FVjsN0xX8vi4qZ5gjxNYsqRmUMg0i7fbqSk-o,3480
|
|
56
|
+
datacontract/model/exceptions.py,sha256=zW9NoyzwsND-c9UqgyTVuezUVGEc6KK1Uc2zl12loyo,1178
|
|
57
|
+
datacontract/model/run.py,sha256=mm1cZxjJVg0w1qzN_WV9TcWrAXsACBDSChW39Kpa-K4,2563
|
|
58
|
+
datacontract_cli-0.9.8.dist-info/LICENSE,sha256=23h64qnSeIZ0DKeziWAKC-zBCt328iSbRbWBrXoYRb4,2210
|
|
59
|
+
datacontract_cli-0.9.8.dist-info/METADATA,sha256=P_OZ5eN2G4_4lXzHnQFfNYB70IzwOd-BHALFLQ47Gr8,35543
|
|
60
|
+
datacontract_cli-0.9.8.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
|
61
|
+
datacontract_cli-0.9.8.dist-info/entry_points.txt,sha256=D3Eqy4q_Z6bHauGd4ppIyQglwbrm1AJnLau4Ppbw9Is,54
|
|
62
|
+
datacontract_cli-0.9.8.dist-info/top_level.txt,sha256=VIRjd8EIUrBYWjEXJJjtdUgc0UAJdPZjmLiOR8BRBYM,13
|
|
63
|
+
datacontract_cli-0.9.8.dist-info/RECORD,,
|