datacontract-cli 0.10.14__py3-none-any.whl → 0.10.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datacontract-cli might be problematic. Click here for more details.
- datacontract/breaking/breaking.py +229 -11
- datacontract/breaking/breaking_rules.py +24 -0
- datacontract/catalog/catalog.py +1 -1
- datacontract/cli.py +100 -33
- datacontract/data_contract.py +26 -4
- datacontract/engines/datacontract/check_that_datacontract_file_exists.py +1 -1
- datacontract/engines/fastjsonschema/check_jsonschema.py +114 -22
- datacontract/engines/soda/check_soda_execute.py +7 -5
- datacontract/engines/soda/connections/duckdb.py +1 -0
- datacontract/engines/soda/connections/kafka.py +12 -12
- datacontract/export/avro_idl_converter.py +1 -2
- datacontract/export/bigquery_converter.py +4 -3
- datacontract/export/data_caterer_converter.py +1 -1
- datacontract/export/dbml_converter.py +2 -4
- datacontract/export/dbt_converter.py +45 -39
- datacontract/export/exporter.py +2 -1
- datacontract/export/exporter_factory.py +7 -2
- datacontract/export/go_converter.py +3 -2
- datacontract/export/great_expectations_converter.py +202 -40
- datacontract/export/html_export.py +1 -1
- datacontract/export/iceberg_converter.py +188 -0
- datacontract/export/jsonschema_converter.py +3 -2
- datacontract/export/odcs_v2_exporter.py +1 -1
- datacontract/export/odcs_v3_exporter.py +44 -30
- datacontract/export/pandas_type_converter.py +40 -0
- datacontract/export/protobuf_converter.py +1 -1
- datacontract/export/rdf_converter.py +4 -5
- datacontract/export/sodacl_converter.py +9 -4
- datacontract/export/spark_converter.py +7 -6
- datacontract/export/sql_converter.py +1 -2
- datacontract/export/sqlalchemy_converter.py +1 -2
- datacontract/export/terraform_converter.py +1 -1
- datacontract/imports/avro_importer.py +1 -1
- datacontract/imports/bigquery_importer.py +1 -1
- datacontract/imports/dbml_importer.py +2 -2
- datacontract/imports/dbt_importer.py +80 -15
- datacontract/imports/glue_importer.py +5 -3
- datacontract/imports/iceberg_importer.py +17 -7
- datacontract/imports/importer.py +1 -0
- datacontract/imports/importer_factory.py +7 -1
- datacontract/imports/jsonschema_importer.py +3 -2
- datacontract/imports/odcs_v2_importer.py +2 -2
- datacontract/imports/odcs_v3_importer.py +7 -2
- datacontract/imports/parquet_importer.py +81 -0
- datacontract/imports/spark_importer.py +2 -1
- datacontract/imports/sql_importer.py +1 -1
- datacontract/imports/unity_importer.py +3 -3
- datacontract/integration/opentelemetry.py +0 -1
- datacontract/lint/lint.py +2 -1
- datacontract/lint/linters/description_linter.py +1 -0
- datacontract/lint/linters/example_model_linter.py +1 -0
- datacontract/lint/linters/field_pattern_linter.py +1 -0
- datacontract/lint/linters/field_reference_linter.py +1 -0
- datacontract/lint/linters/notice_period_linter.py +1 -0
- datacontract/lint/linters/quality_schema_linter.py +1 -0
- datacontract/lint/linters/valid_constraints_linter.py +1 -0
- datacontract/lint/resolve.py +7 -3
- datacontract/lint/schema.py +1 -1
- datacontract/model/data_contract_specification.py +13 -6
- datacontract/model/run.py +21 -12
- datacontract/templates/index.html +6 -6
- datacontract/web.py +2 -3
- {datacontract_cli-0.10.14.dist-info → datacontract_cli-0.10.16.dist-info}/METADATA +163 -60
- datacontract_cli-0.10.16.dist-info/RECORD +106 -0
- {datacontract_cli-0.10.14.dist-info → datacontract_cli-0.10.16.dist-info}/WHEEL +1 -1
- datacontract_cli-0.10.14.dist-info/RECORD +0 -103
- {datacontract_cli-0.10.14.dist-info → datacontract_cli-0.10.16.dist-info}/LICENSE +0 -0
- {datacontract_cli-0.10.14.dist-info → datacontract_cli-0.10.16.dist-info}/entry_points.txt +0 -0
- {datacontract_cli-0.10.14.dist-info → datacontract_cli-0.10.16.dist-info}/top_level.txt +0 -0
datacontract/lint/resolve.py
CHANGED
|
@@ -9,7 +9,11 @@ from datacontract.imports.odcs_v3_importer import import_odcs_v3_from_str
|
|
|
9
9
|
from datacontract.lint.resources import read_resource
|
|
10
10
|
from datacontract.lint.schema import fetch_schema
|
|
11
11
|
from datacontract.lint.urls import fetch_resource
|
|
12
|
-
from datacontract.model.data_contract_specification import
|
|
12
|
+
from datacontract.model.data_contract_specification import (
|
|
13
|
+
DataContractSpecification,
|
|
14
|
+
Definition,
|
|
15
|
+
DeprecatedQuality,
|
|
16
|
+
)
|
|
13
17
|
from datacontract.model.exceptions import DataContractException
|
|
14
18
|
from datacontract.model.odcs import is_open_data_contract_standard
|
|
15
19
|
|
|
@@ -156,7 +160,7 @@ def _fetch_file(path) -> str:
|
|
|
156
160
|
return file.read()
|
|
157
161
|
|
|
158
162
|
|
|
159
|
-
def _resolve_quality_ref(quality:
|
|
163
|
+
def _resolve_quality_ref(quality: DeprecatedQuality):
|
|
160
164
|
"""
|
|
161
165
|
Return the content of a ref file path
|
|
162
166
|
@param quality data contract quality specification
|
|
@@ -199,7 +203,7 @@ def _resolve_data_contract_from_str(
|
|
|
199
203
|
|
|
200
204
|
if is_open_data_contract_standard(yaml_dict):
|
|
201
205
|
# if ODCS, then validate the ODCS schema and import to DataContractSpecification directly
|
|
202
|
-
data_contract_specification = DataContractSpecification(dataContractSpecification="
|
|
206
|
+
data_contract_specification = DataContractSpecification(dataContractSpecification="1.1.0")
|
|
203
207
|
return import_odcs_v3_from_str(data_contract_specification, source_str=data_contract_str)
|
|
204
208
|
|
|
205
209
|
_validate_data_contract_specification_schema(yaml_dict, schema_location)
|
datacontract/lint/schema.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import os
|
|
2
|
-
from typing import
|
|
2
|
+
from typing import Any, Dict, List, Optional
|
|
3
3
|
|
|
4
4
|
import pydantic as pyd
|
|
5
5
|
import yaml
|
|
@@ -147,6 +147,7 @@ class Field(pyd.BaseModel):
|
|
|
147
147
|
format: str = None
|
|
148
148
|
required: bool = None
|
|
149
149
|
primary: bool = None
|
|
150
|
+
primaryKey: bool = None
|
|
150
151
|
unique: bool | None = None
|
|
151
152
|
references: str = None
|
|
152
153
|
description: str | None = None
|
|
@@ -213,7 +214,8 @@ class Example(pyd.BaseModel):
|
|
|
213
214
|
data: str | object = None
|
|
214
215
|
|
|
215
216
|
|
|
216
|
-
|
|
217
|
+
# Deprecated Quality class
|
|
218
|
+
class DeprecatedQuality(pyd.BaseModel):
|
|
217
219
|
type: str = None
|
|
218
220
|
specification: str | object = None
|
|
219
221
|
|
|
@@ -282,9 +284,14 @@ class DataContractSpecification(pyd.BaseModel):
|
|
|
282
284
|
terms: Terms = None
|
|
283
285
|
models: Dict[str, Model] = {}
|
|
284
286
|
definitions: Dict[str, Definition] = {}
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
287
|
+
examples: List[Example] = pyd.Field(
|
|
288
|
+
default_factory=list,
|
|
289
|
+
deprecated="Removed in Data Contract Specification " "v1.1.0. Use models.examples instead.",
|
|
290
|
+
)
|
|
291
|
+
quality: DeprecatedQuality = pyd.Field(
|
|
292
|
+
default=None,
|
|
293
|
+
deprecated="Removed in Data Contract Specification v1.1.0. Use " "model-level and field-level quality instead.",
|
|
294
|
+
)
|
|
288
295
|
servicelevels: Optional[ServiceLevel] = None
|
|
289
296
|
links: Dict[str, str] = {}
|
|
290
297
|
tags: List[str] = []
|
|
@@ -292,7 +299,7 @@ class DataContractSpecification(pyd.BaseModel):
|
|
|
292
299
|
@classmethod
|
|
293
300
|
def from_file(cls, file):
|
|
294
301
|
if not os.path.exists(file):
|
|
295
|
-
raise (f"The file '{file}' does not exist.")
|
|
302
|
+
raise FileNotFoundError(f"The file '{file}' does not exist.")
|
|
296
303
|
with open(file, "r") as file:
|
|
297
304
|
file_content = file.read()
|
|
298
305
|
return DataContractSpecification.from_string(file_content)
|
datacontract/model/run.py
CHANGED
|
@@ -1,15 +1,24 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from datetime import datetime, timezone
|
|
3
|
+
from enum import Enum
|
|
3
4
|
from typing import List, Optional
|
|
4
5
|
from uuid import UUID, uuid4
|
|
5
6
|
|
|
6
7
|
from pydantic import BaseModel
|
|
7
8
|
|
|
8
9
|
|
|
10
|
+
class ResultEnum(str, Enum):
|
|
11
|
+
passed = "passed"
|
|
12
|
+
warning = "warning"
|
|
13
|
+
failed = "failed"
|
|
14
|
+
error = "error"
|
|
15
|
+
unknown = "unknown"
|
|
16
|
+
|
|
17
|
+
|
|
9
18
|
class Check(BaseModel):
|
|
10
19
|
type: str
|
|
11
20
|
name: Optional[str]
|
|
12
|
-
result:
|
|
21
|
+
result: ResultEnum
|
|
13
22
|
engine: str
|
|
14
23
|
reason: Optional[str] = None
|
|
15
24
|
model: Optional[str] = None
|
|
@@ -33,29 +42,29 @@ class Run(BaseModel):
|
|
|
33
42
|
server: Optional[str] = None
|
|
34
43
|
timestampStart: datetime
|
|
35
44
|
timestampEnd: datetime
|
|
36
|
-
result:
|
|
45
|
+
result: ResultEnum = ResultEnum.unknown
|
|
37
46
|
checks: List[Check]
|
|
38
47
|
logs: List[Log]
|
|
39
48
|
|
|
40
49
|
def has_passed(self):
|
|
41
50
|
self.calculate_result()
|
|
42
|
-
return self.result ==
|
|
51
|
+
return self.result == ResultEnum.passed
|
|
43
52
|
|
|
44
53
|
def finish(self):
|
|
45
54
|
self.timestampEnd = datetime.now(timezone.utc)
|
|
46
55
|
self.calculate_result()
|
|
47
56
|
|
|
48
57
|
def calculate_result(self):
|
|
49
|
-
if any(check.result ==
|
|
50
|
-
self.result =
|
|
51
|
-
elif any(check.result ==
|
|
52
|
-
self.result =
|
|
53
|
-
elif any(check.result ==
|
|
54
|
-
self.result =
|
|
55
|
-
elif any(check.result ==
|
|
56
|
-
self.result =
|
|
58
|
+
if any(check.result == ResultEnum.error for check in self.checks):
|
|
59
|
+
self.result = ResultEnum.error
|
|
60
|
+
elif any(check.result == ResultEnum.failed for check in self.checks):
|
|
61
|
+
self.result = ResultEnum.failed
|
|
62
|
+
elif any(check.result == ResultEnum.warning for check in self.checks):
|
|
63
|
+
self.result = ResultEnum.warning
|
|
64
|
+
elif any(check.result == ResultEnum.passed for check in self.checks):
|
|
65
|
+
self.result = ResultEnum.passed
|
|
57
66
|
else:
|
|
58
|
-
self.result =
|
|
67
|
+
self.result = ResultEnum.unknown
|
|
59
68
|
|
|
60
69
|
def log_info(self, message: str):
|
|
61
70
|
logging.info(message)
|
|
@@ -78,17 +78,17 @@
|
|
|
78
78
|
|
|
79
79
|
<li class="col-span-1 rounded-lg bg-white shadow hover:bg-gray-50"
|
|
80
80
|
data-search="{{
|
|
81
|
-
contract.spec.info.title|lower }} {{
|
|
82
|
-
contract.spec.info.owner|lower if contract.spec.info.owner else '' }} {{
|
|
83
|
-
contract.spec.info.description|lower }} {%
|
|
81
|
+
contract.spec.info.title|lower|e }} {{
|
|
82
|
+
contract.spec.info.owner|lower|e if contract.spec.info.owner else '' }} {{
|
|
83
|
+
contract.spec.info.description|lower|e }} {%
|
|
84
84
|
for model_name, model in contract.spec.models.items() %}
|
|
85
|
-
{{ model.description|lower }} {%
|
|
85
|
+
{{ model.description|lower|e }} {%
|
|
86
86
|
for field_name, field in model.fields.items() %}
|
|
87
|
-
{{ field_name|lower }} {{ field.description|lower if field.description else '' }} {%
|
|
87
|
+
{{ field_name|lower|e }} {{ field.description|lower|e if field.description else '' }} {%
|
|
88
88
|
endfor %}
|
|
89
89
|
{% endfor %}
|
|
90
90
|
">
|
|
91
|
-
<a href="{{contract.html_link}}" >
|
|
91
|
+
<a href="{{contract.html_link|e}}" >
|
|
92
92
|
<div class="flex w-full justify-between space-x-1 p-6 pb-4">
|
|
93
93
|
<div class="flex-1 truncate">
|
|
94
94
|
<div class="flex items-center space-x-3">
|
datacontract/web.py
CHANGED
|
@@ -1,11 +1,10 @@
|
|
|
1
|
-
from typing import Annotated,
|
|
1
|
+
from typing import Annotated, Optional, Union
|
|
2
2
|
|
|
3
3
|
import typer
|
|
4
4
|
from fastapi import FastAPI, File
|
|
5
|
-
from fastapi.responses import HTMLResponse
|
|
5
|
+
from fastapi.responses import HTMLResponse, PlainTextResponse
|
|
6
6
|
|
|
7
7
|
from datacontract.data_contract import DataContract, ExportFormat
|
|
8
|
-
from fastapi.responses import PlainTextResponse
|
|
9
8
|
|
|
10
9
|
app = FastAPI()
|
|
11
10
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: datacontract-cli
|
|
3
|
-
Version: 0.10.
|
|
3
|
+
Version: 0.10.16
|
|
4
4
|
Summary: The datacontract CLI is an open source command-line tool for working with Data Contracts. It uses data contract YAML files to lint the data contract, connect to data sources and execute schema and quality tests, detect breaking changes, and export to different formats. The tool is written in Python. It can be used as a standalone CLI tool, in a CI/CD pipeline, or directly as a Python library.
|
|
5
5
|
Author-email: Jochen Christ <jochen.christ@innoq.com>, Stefan Negele <stefan.negele@innoq.com>, Simon Harrer <simon.harrer@innoq.com>
|
|
6
6
|
Project-URL: Homepage, https://cli.datacontract.com
|
|
@@ -11,71 +11,72 @@ Classifier: Operating System :: OS Independent
|
|
|
11
11
|
Requires-Python: >=3.10
|
|
12
12
|
Description-Content-Type: text/markdown
|
|
13
13
|
License-File: LICENSE
|
|
14
|
-
Requires-Dist: typer<0.
|
|
15
|
-
Requires-Dist: pydantic<2.
|
|
14
|
+
Requires-Dist: typer<0.15,>=0.12
|
|
15
|
+
Requires-Dist: pydantic<2.11.0,>=2.8.2
|
|
16
16
|
Requires-Dist: pyyaml~=6.0.1
|
|
17
17
|
Requires-Dist: requests<2.33,>=2.31
|
|
18
|
-
Requires-Dist: fastapi==0.115.
|
|
19
|
-
Requires-Dist: uvicorn==0.
|
|
20
|
-
Requires-Dist: fastjsonschema<2.
|
|
21
|
-
Requires-Dist: fastparquet==2024.
|
|
22
|
-
Requires-Dist: python-multipart==0.0.
|
|
18
|
+
Requires-Dist: fastapi==0.115.6
|
|
19
|
+
Requires-Dist: uvicorn==0.32.1
|
|
20
|
+
Requires-Dist: fastjsonschema<2.22.0,>=2.19.1
|
|
21
|
+
Requires-Dist: fastparquet==2024.11.0
|
|
22
|
+
Requires-Dist: python-multipart==0.0.19
|
|
23
23
|
Requires-Dist: rich<13.10,>=13.7
|
|
24
24
|
Requires-Dist: simple-ddl-parser==1.7.1
|
|
25
25
|
Requires-Dist: duckdb==1.1.2
|
|
26
|
-
Requires-Dist: soda-core-duckdb<3.
|
|
26
|
+
Requires-Dist: soda-core-duckdb<3.5.0,>=3.3.1
|
|
27
27
|
Requires-Dist: setuptools>=60
|
|
28
28
|
Requires-Dist: python-dotenv~=1.0.0
|
|
29
29
|
Requires-Dist: rdflib==7.0.0
|
|
30
30
|
Requires-Dist: opentelemetry-exporter-otlp-proto-grpc~=1.16
|
|
31
31
|
Requires-Dist: opentelemetry-exporter-otlp-proto-http~=1.16
|
|
32
|
-
Requires-Dist: boto3<1.35.
|
|
33
|
-
Requires-Dist:
|
|
34
|
-
Provides-Extra: all
|
|
35
|
-
Requires-Dist: datacontract-cli[bigquery,databricks,dbml,dbt,iceberg,kafka,postgres,s3,snowflake,sqlserver,trino]; extra == "all"
|
|
32
|
+
Requires-Dist: boto3<1.35.69,>=1.34.41
|
|
33
|
+
Requires-Dist: jinja_partials>=0.2.1
|
|
36
34
|
Provides-Extra: avro
|
|
37
35
|
Requires-Dist: avro==1.12.0; extra == "avro"
|
|
38
36
|
Provides-Extra: bigquery
|
|
39
|
-
Requires-Dist: soda-core-bigquery<3.
|
|
37
|
+
Requires-Dist: soda-core-bigquery<3.5.0,>=3.3.1; extra == "bigquery"
|
|
40
38
|
Provides-Extra: databricks
|
|
41
|
-
Requires-Dist: soda-core-spark-df<3.
|
|
39
|
+
Requires-Dist: soda-core-spark-df<3.5.0,>=3.3.1; extra == "databricks"
|
|
42
40
|
Requires-Dist: databricks-sql-connector<3.6.0,>=3.1.2; extra == "databricks"
|
|
43
|
-
Requires-Dist: databricks-sdk<0.
|
|
44
|
-
Requires-Dist: soda-core-spark[databricks]<3.
|
|
45
|
-
Provides-Extra:
|
|
46
|
-
Requires-Dist:
|
|
41
|
+
Requires-Dist: databricks-sdk<0.39.0,>=0.32.0; extra == "databricks"
|
|
42
|
+
Requires-Dist: soda-core-spark[databricks]<3.5.0,>=3.3.1; extra == "databricks"
|
|
43
|
+
Provides-Extra: iceberg
|
|
44
|
+
Requires-Dist: pyiceberg==0.8.1; extra == "iceberg"
|
|
45
|
+
Provides-Extra: kafka
|
|
46
|
+
Requires-Dist: datacontract-cli[avro]; extra == "kafka"
|
|
47
|
+
Requires-Dist: soda-core-spark-df<3.5.0,>=3.3.1; extra == "kafka"
|
|
48
|
+
Provides-Extra: postgres
|
|
49
|
+
Requires-Dist: soda-core-postgres<3.5.0,>=3.3.1; extra == "postgres"
|
|
50
|
+
Provides-Extra: s3
|
|
51
|
+
Requires-Dist: s3fs==2024.10.0; extra == "s3"
|
|
52
|
+
Provides-Extra: snowflake
|
|
53
|
+
Requires-Dist: snowflake-connector-python[pandas]<3.13,>=3.6; extra == "snowflake"
|
|
54
|
+
Requires-Dist: soda-core-snowflake<3.5.0,>=3.3.1; extra == "snowflake"
|
|
55
|
+
Provides-Extra: sqlserver
|
|
56
|
+
Requires-Dist: soda-core-sqlserver<3.5.0,>=3.3.1; extra == "sqlserver"
|
|
57
|
+
Provides-Extra: trino
|
|
58
|
+
Requires-Dist: soda-core-trino<3.5.0,>=3.3.1; extra == "trino"
|
|
47
59
|
Provides-Extra: dbt
|
|
48
60
|
Requires-Dist: dbt-core>=1.8.0; extra == "dbt"
|
|
61
|
+
Provides-Extra: dbml
|
|
62
|
+
Requires-Dist: pydbml>=1.1.1; extra == "dbml"
|
|
63
|
+
Provides-Extra: parquet
|
|
64
|
+
Requires-Dist: pyarrow>=12.0.0; extra == "parquet"
|
|
65
|
+
Provides-Extra: all
|
|
66
|
+
Requires-Dist: datacontract-cli[bigquery,databricks,dbml,dbt,iceberg,kafka,parquet,postgres,s3,snowflake,sqlserver,trino]; extra == "all"
|
|
49
67
|
Provides-Extra: dev
|
|
50
68
|
Requires-Dist: datacontract-cli[all]; extra == "dev"
|
|
51
|
-
Requires-Dist: httpx==0.
|
|
69
|
+
Requires-Dist: httpx==0.28.1; extra == "dev"
|
|
52
70
|
Requires-Dist: kafka-python; extra == "dev"
|
|
53
|
-
Requires-Dist: moto==5.0.
|
|
71
|
+
Requires-Dist: moto==5.0.22; extra == "dev"
|
|
54
72
|
Requires-Dist: pandas>=2.1.0; extra == "dev"
|
|
55
|
-
Requires-Dist: pre-commit<
|
|
56
|
-
Requires-Dist: pyarrow>=12.0.0; extra == "dev"
|
|
73
|
+
Requires-Dist: pre-commit<4.1.0,>=3.7.1; extra == "dev"
|
|
57
74
|
Requires-Dist: pytest; extra == "dev"
|
|
58
75
|
Requires-Dist: pytest-xdist; extra == "dev"
|
|
59
|
-
Requires-Dist: pymssql==2.3.
|
|
76
|
+
Requires-Dist: pymssql==2.3.2; extra == "dev"
|
|
60
77
|
Requires-Dist: ruff; extra == "dev"
|
|
61
|
-
Requires-Dist: testcontainers[kafka,minio,mssql,postgres]==4.
|
|
78
|
+
Requires-Dist: testcontainers[kafka,minio,mssql,postgres]==4.9.0; extra == "dev"
|
|
62
79
|
Requires-Dist: trino==0.330.0; extra == "dev"
|
|
63
|
-
Provides-Extra: iceberg
|
|
64
|
-
Requires-Dist: pyiceberg==0.7.1; extra == "iceberg"
|
|
65
|
-
Provides-Extra: kafka
|
|
66
|
-
Requires-Dist: datacontract-cli[avro]; extra == "kafka"
|
|
67
|
-
Requires-Dist: soda-core-spark-df<3.4.0,>=3.3.1; extra == "kafka"
|
|
68
|
-
Provides-Extra: postgres
|
|
69
|
-
Requires-Dist: soda-core-postgres<3.4.0,>=3.3.1; extra == "postgres"
|
|
70
|
-
Provides-Extra: s3
|
|
71
|
-
Requires-Dist: s3fs==2024.9.0; extra == "s3"
|
|
72
|
-
Provides-Extra: snowflake
|
|
73
|
-
Requires-Dist: snowflake-connector-python[pandas]<3.13,>=3.6; extra == "snowflake"
|
|
74
|
-
Requires-Dist: soda-core-snowflake<3.4.0,>=3.3.1; extra == "snowflake"
|
|
75
|
-
Provides-Extra: sqlserver
|
|
76
|
-
Requires-Dist: soda-core-sqlserver<3.4.0,>=3.3.1; extra == "sqlserver"
|
|
77
|
-
Provides-Extra: trino
|
|
78
|
-
Requires-Dist: soda-core-trino<3.4.0,>=3.3.1; extra == "trino"
|
|
79
80
|
|
|
80
81
|
# Data Contract CLI
|
|
81
82
|
|
|
@@ -273,6 +274,7 @@ A list of available extras:
|
|
|
273
274
|
| Avro Support | `pip install datacontract-cli[avro]` |
|
|
274
275
|
| Google BigQuery | `pip install datacontract-cli[bigquery]` |
|
|
275
276
|
| Databricks Integration | `pip install datacontract-cli[databricks]` |
|
|
277
|
+
| Iceberg | `pip install datacontract-cli[iceberg]` |
|
|
276
278
|
| Kafka Integration | `pip install datacontract-cli[kafka]` |
|
|
277
279
|
| PostgreSQL Integration | `pip install datacontract-cli[postgres]` |
|
|
278
280
|
| S3 Integration | `pip install datacontract-cli[s3]` |
|
|
@@ -280,6 +282,8 @@ A list of available extras:
|
|
|
280
282
|
| Microsoft SQL Server | `pip install datacontract-cli[sqlserver]` |
|
|
281
283
|
| Trino | `pip install datacontract-cli[trino]` |
|
|
282
284
|
| Dbt | `pip install datacontract-cli[dbt]` |
|
|
285
|
+
| Dbml | `pip install datacontract-cli[dbml]` |
|
|
286
|
+
| Parquet | `pip install datacontract-cli[parquet]` |
|
|
283
287
|
|
|
284
288
|
|
|
285
289
|
|
|
@@ -825,19 +829,19 @@ models:
|
|
|
825
829
|
### export
|
|
826
830
|
|
|
827
831
|
```
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
832
|
+
Usage: datacontract export [OPTIONS] [LOCATION]
|
|
833
|
+
|
|
834
|
+
Convert data contract to a specific format. Saves to file specified by `output` option if present, otherwise prints to stdout.
|
|
835
|
+
|
|
833
836
|
╭─ Arguments ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
|
|
834
837
|
│ location [LOCATION] The location (url or path) of the data contract yaml. [default: datacontract.yaml] │
|
|
835
838
|
╰────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
|
|
836
839
|
╭─ Options ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
|
|
837
840
|
│ * --format [jsonschema|pydantic-model|sodacl|dbt|dbt-sources|db The export format. [default: None] [required] │
|
|
838
|
-
│ t-staging-sql|odcs|rdf|avro|protobuf
|
|
839
|
-
│
|
|
840
|
-
│
|
|
841
|
+
│ t-staging-sql|odcs|odcs_v2|odcs_v3|rdf|avro|protobuf │
|
|
842
|
+
│ |great-expectations|terraform|avro-idl|sql|sql-query │
|
|
843
|
+
│ |html|go|bigquery|dbml|spark|sqlalchemy|data-caterer │
|
|
844
|
+
│ |dcs|iceberg] │
|
|
841
845
|
│ --output PATH Specify the file path where the exported data will be │
|
|
842
846
|
│ saved. If no path is provided, the output will be │
|
|
843
847
|
│ printed to stdout. │
|
|
@@ -847,6 +851,12 @@ models:
|
|
|
847
851
|
│ file to refer to a model, e.g., `orders`, or `all` │
|
|
848
852
|
│ for all models (default). │
|
|
849
853
|
│ [default: all] │
|
|
854
|
+
│ --schema TEXT The location (url or path) of the Data Contract │
|
|
855
|
+
│ Specification JSON Schema │
|
|
856
|
+
│ [default: │
|
|
857
|
+
│ https://datacontract.com/datacontract.schema.json] │
|
|
858
|
+
│ --engine TEXT [engine] The engine used for great expection run. │
|
|
859
|
+
│ [default: None] │
|
|
850
860
|
│ --help Show this message and exit. │
|
|
851
861
|
╰────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
|
|
852
862
|
╭─ RDF Options ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
|
|
@@ -857,12 +867,11 @@ models:
|
|
|
857
867
|
│ detect the sql dialect via the specified servers in the data contract. │
|
|
858
868
|
│ [default: auto] │
|
|
859
869
|
╰────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
|
|
860
|
-
|
|
861
870
|
```
|
|
862
871
|
|
|
863
872
|
```bash
|
|
864
873
|
# Example export data contract as HTML
|
|
865
|
-
datacontract export --format html
|
|
874
|
+
datacontract export --format html --output datacontract.html
|
|
866
875
|
```
|
|
867
876
|
|
|
868
877
|
Available export options:
|
|
@@ -893,11 +902,13 @@ Available export options:
|
|
|
893
902
|
| `sqlalchemy` | Export to SQLAlchemy Models | ✅ |
|
|
894
903
|
| `data-caterer` | Export to Data Caterer in YAML format | ✅ |
|
|
895
904
|
| `dcs` | Export to Data Contract Specification in YAML format | ✅ |
|
|
905
|
+
| `iceberg` | Export to an Iceberg JSON Schema Definition | partial |
|
|
896
906
|
| Missing something? | Please create an issue on GitHub | TBD |
|
|
897
907
|
|
|
908
|
+
|
|
898
909
|
#### Great Expectations
|
|
899
910
|
|
|
900
|
-
The export function transforms a specified data contract into a comprehensive Great Expectations JSON suite.
|
|
911
|
+
The `export` function transforms a specified data contract into a comprehensive Great Expectations JSON suite.
|
|
901
912
|
If the contract includes multiple models, you need to specify the names of the model you wish to export.
|
|
902
913
|
|
|
903
914
|
```shell
|
|
@@ -907,7 +918,22 @@ datacontract export datacontract.yaml --format great-expectations --model order
|
|
|
907
918
|
The export creates a list of expectations by utilizing:
|
|
908
919
|
|
|
909
920
|
- The data from the Model definition with a fixed mapping
|
|
910
|
-
- The expectations provided in the quality field for each model (find here the expectations gallery https://greatexpectations.io/expectations/)
|
|
921
|
+
- The expectations provided in the quality field for each model (find here the expectations gallery: [Great Expectations Gallery](https://greatexpectations.io/expectations/))
|
|
922
|
+
|
|
923
|
+
### Additional Arguments
|
|
924
|
+
|
|
925
|
+
To further customize the export, the following optional arguments are available:
|
|
926
|
+
|
|
927
|
+
- **`suite_name`**: The name of the expectation suite. This suite groups all generated expectations and provides a convenient identifier within Great Expectations. If not provided, a default suite name will be generated based on the model name(s).
|
|
928
|
+
|
|
929
|
+
- **`engine`**: Specifies the engine used to run Great Expectations checks. Accepted values are:
|
|
930
|
+
- `pandas` — Use this when working with in-memory data frames through the Pandas library.
|
|
931
|
+
- `spark` — Use this for working with Spark dataframes.
|
|
932
|
+
- `sql` — Use this for working with SQL databases.
|
|
933
|
+
|
|
934
|
+
- **`sql_server_type`**: Specifies the type of SQL server to connect with when `engine` is set to `sql`.
|
|
935
|
+
|
|
936
|
+
Providing `sql_server_type` ensures that the appropriate SQL dialect and connection settings are applied during the expectation validation.
|
|
911
937
|
|
|
912
938
|
#### RDF
|
|
913
939
|
|
|
@@ -1000,18 +1026,81 @@ models:
|
|
|
1000
1026
|
- **avroLogicalType**: Specifies the logical type of the field in Avro. In this example, it is `local-timestamp-micros`.
|
|
1001
1027
|
- **avroDefault**: Specifies the default value for the field in Avro. In this example, it is 1672534861000000 which corresponds to ` 2023-01-01 01:01:01 UTC`.
|
|
1002
1028
|
|
|
1029
|
+
#### Iceberg
|
|
1030
|
+
|
|
1031
|
+
Exports to an [Iceberg Table Json Schema Definition](https://iceberg.apache.org/spec/#appendix-c-json-serialization).
|
|
1003
1032
|
|
|
1004
|
-
|
|
1033
|
+
This export only supports a single model export at a time because Iceberg's schema definition is for a single table and the exporter maps 1 model to 1 table, use the `--model` flag
|
|
1034
|
+
to limit your contract export to a single model.
|
|
1005
1035
|
|
|
1036
|
+
```bash
|
|
1037
|
+
$ datacontract export --format iceberg --model orders https://datacontract.com/examples/orders-latest/datacontract.yaml --output /tmp/orders_iceberg.json
|
|
1038
|
+
|
|
1039
|
+
$ cat /tmp/orders_iceberg.json | jq '.'
|
|
1040
|
+
{
|
|
1041
|
+
"type": "struct",
|
|
1042
|
+
"fields": [
|
|
1043
|
+
{
|
|
1044
|
+
"id": 1,
|
|
1045
|
+
"name": "order_id",
|
|
1046
|
+
"type": "string",
|
|
1047
|
+
"required": true
|
|
1048
|
+
},
|
|
1049
|
+
{
|
|
1050
|
+
"id": 2,
|
|
1051
|
+
"name": "order_timestamp",
|
|
1052
|
+
"type": "timestamptz",
|
|
1053
|
+
"required": true
|
|
1054
|
+
},
|
|
1055
|
+
{
|
|
1056
|
+
"id": 3,
|
|
1057
|
+
"name": "order_total",
|
|
1058
|
+
"type": "long",
|
|
1059
|
+
"required": true
|
|
1060
|
+
},
|
|
1061
|
+
{
|
|
1062
|
+
"id": 4,
|
|
1063
|
+
"name": "customer_id",
|
|
1064
|
+
"type": "string",
|
|
1065
|
+
"required": false
|
|
1066
|
+
},
|
|
1067
|
+
{
|
|
1068
|
+
"id": 5,
|
|
1069
|
+
"name": "customer_email_address",
|
|
1070
|
+
"type": "string",
|
|
1071
|
+
"required": true
|
|
1072
|
+
},
|
|
1073
|
+
{
|
|
1074
|
+
"id": 6,
|
|
1075
|
+
"name": "processed_timestamp",
|
|
1076
|
+
"type": "timestamptz",
|
|
1077
|
+
"required": true
|
|
1078
|
+
}
|
|
1079
|
+
],
|
|
1080
|
+
"schema-id": 0,
|
|
1081
|
+
"identifier-field-ids": [
|
|
1082
|
+
1
|
|
1083
|
+
]
|
|
1084
|
+
}
|
|
1006
1085
|
```
|
|
1007
|
-
Usage: datacontract import [OPTIONS]
|
|
1008
1086
|
|
|
1009
|
-
|
|
1087
|
+
### import
|
|
1088
|
+
|
|
1089
|
+
```
|
|
1090
|
+
Usage: datacontract import [OPTIONS]
|
|
1091
|
+
|
|
1092
|
+
Create a data contract from the given source location. Saves to file specified by `output` option if present, otherwise
|
|
1093
|
+
prints to stdout.
|
|
1010
1094
|
|
|
1011
1095
|
╭─ Options ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
|
|
1012
|
-
│ * --format [sql|avro|dbt|glue|jsonschema|bigquery
|
|
1013
|
-
│ |unity|spark|iceberg]
|
|
1096
|
+
│ * --format [sql|avro|dbt|dbml|glue|jsonschema|bigquery The format of the source file. │
|
|
1097
|
+
│ |odcs|unity|spark|iceberg|parquet] [default: None] │
|
|
1014
1098
|
│ [required] │
|
|
1099
|
+
│ --output PATH Specify the file path where the Data │
|
|
1100
|
+
│ Contract will be saved. If no path is │
|
|
1101
|
+
│ provided, the output will be printed to │
|
|
1102
|
+
│ stdout. │
|
|
1103
|
+
│ [default: None] │
|
|
1015
1104
|
│ --source TEXT The path to the file or Glue Database that │
|
|
1016
1105
|
│ should be imported. │
|
|
1017
1106
|
│ [default: None] │
|
|
@@ -1041,7 +1130,8 @@ models:
|
|
|
1041
1130
|
│ empty for all tables in the file). │
|
|
1042
1131
|
│ [default: None] │
|
|
1043
1132
|
│ --iceberg-table TEXT Table name to assign to the model created │
|
|
1044
|
-
│ from the Iceberg schema.
|
|
1133
|
+
│ from the Iceberg schema. │
|
|
1134
|
+
│ [default: None] │
|
|
1045
1135
|
│ --help Show this message and exit. │
|
|
1046
1136
|
╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1047
1137
|
```
|
|
@@ -1050,6 +1140,8 @@ Example:
|
|
|
1050
1140
|
```bash
|
|
1051
1141
|
# Example import from SQL DDL
|
|
1052
1142
|
datacontract import --format sql --source my_ddl.sql
|
|
1143
|
+
# To save to file
|
|
1144
|
+
datacontract import --format sql --source my_ddl.sql --output datacontract.yaml
|
|
1053
1145
|
```
|
|
1054
1146
|
|
|
1055
1147
|
Available import options:
|
|
@@ -1068,6 +1160,7 @@ Available import options:
|
|
|
1068
1160
|
| `dbml` | Import from DBML models | ✅ |
|
|
1069
1161
|
| `protobuf` | Import from Protobuf schemas | TBD |
|
|
1070
1162
|
| `iceberg` | Import from an Iceberg JSON Schema Definition | partial |
|
|
1163
|
+
| `parquet` | Import from Parquet File Metadta | ✅ |
|
|
1071
1164
|
| Missing something? | Please create an issue on GitHub | TBD |
|
|
1072
1165
|
|
|
1073
1166
|
|
|
@@ -1270,6 +1363,16 @@ datacontract import --format iceberg --source ./tests/fixtures/iceberg/simple_sc
|
|
|
1270
1363
|
╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1271
1364
|
```
|
|
1272
1365
|
|
|
1366
|
+
Examples:
|
|
1367
|
+
|
|
1368
|
+
```
|
|
1369
|
+
# create a catalog right in the current folder
|
|
1370
|
+
datacontract catalog --output "."
|
|
1371
|
+
|
|
1372
|
+
# Create a catalog based on a filename convention
|
|
1373
|
+
datacontract catalog --files "*.odcs.yaml"
|
|
1374
|
+
```
|
|
1375
|
+
|
|
1273
1376
|
### Publish
|
|
1274
1377
|
|
|
1275
1378
|
```
|
|
@@ -1587,7 +1690,7 @@ if __name__ == "__main__":
|
|
|
1587
1690
|
Output
|
|
1588
1691
|
|
|
1589
1692
|
```yaml
|
|
1590
|
-
dataContractSpecification:
|
|
1693
|
+
dataContractSpecification: 1.1.0
|
|
1591
1694
|
id: uuid-custom
|
|
1592
1695
|
info:
|
|
1593
1696
|
title: my_custom_imported_data
|