datacontract-cli 0.10.32__py3-none-any.whl → 0.10.34__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datacontract-cli might be problematic. Click here for more details.
- datacontract/cli.py +20 -5
- datacontract/data_contract.py +8 -2
- datacontract/engines/data_contract_checks.py +102 -59
- datacontract/engines/data_contract_test.py +37 -0
- datacontract/engines/fastjsonschema/check_jsonschema.py +8 -0
- datacontract/engines/soda/check_soda_execute.py +6 -0
- datacontract/engines/soda/connections/athena.py +79 -0
- datacontract/engines/soda/connections/duckdb_connection.py +3 -0
- datacontract/export/avro_converter.py +12 -2
- datacontract/export/excel_exporter.py +922 -0
- datacontract/export/exporter.py +1 -0
- datacontract/export/exporter_factory.py +4 -0
- datacontract/export/markdown_converter.py +115 -5
- datacontract/export/sql_type_converter.py +4 -0
- datacontract/imports/avro_importer.py +33 -7
- datacontract/imports/excel_importer.py +13 -5
- datacontract/imports/odcs_v3_importer.py +1 -0
- datacontract/imports/spark_importer.py +12 -1
- {datacontract_cli-0.10.32.dist-info → datacontract_cli-0.10.34.dist-info}/METADATA +111 -12
- {datacontract_cli-0.10.32.dist-info → datacontract_cli-0.10.34.dist-info}/RECORD +24 -22
- {datacontract_cli-0.10.32.dist-info → datacontract_cli-0.10.34.dist-info}/licenses/LICENSE +1 -1
- {datacontract_cli-0.10.32.dist-info → datacontract_cli-0.10.34.dist-info}/WHEEL +0 -0
- {datacontract_cli-0.10.32.dist-info → datacontract_cli-0.10.34.dist-info}/entry_points.txt +0 -0
- {datacontract_cli-0.10.32.dist-info → datacontract_cli-0.10.34.dist-info}/top_level.txt +0 -0
datacontract/export/exporter.py
CHANGED
|
@@ -204,3 +204,7 @@ exporter_factory.register_lazy_exporter(
|
|
|
204
204
|
exporter_factory.register_lazy_exporter(
|
|
205
205
|
name=ExportFormat.custom, module_path="datacontract.export.custom_converter", class_name="CustomExporter"
|
|
206
206
|
)
|
|
207
|
+
|
|
208
|
+
exporter_factory.register_lazy_exporter(
|
|
209
|
+
name=ExportFormat.excel, module_path="datacontract.export.excel_exporter", class_name="ExcelExporter"
|
|
210
|
+
)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Dict
|
|
1
|
+
from typing import Dict, List
|
|
2
2
|
|
|
3
3
|
from pydantic import BaseModel
|
|
4
4
|
|
|
@@ -12,6 +12,9 @@ from datacontract.model.data_contract_specification import (
|
|
|
12
12
|
ServiceLevel,
|
|
13
13
|
)
|
|
14
14
|
|
|
15
|
+
TAB = " "
|
|
16
|
+
ARROW = "↳"
|
|
17
|
+
|
|
15
18
|
|
|
16
19
|
class MarkdownExporter(Exporter):
|
|
17
20
|
"""Exporter implementation for converting data contracts to Markdown."""
|
|
@@ -70,7 +73,8 @@ def obj_attributes_to_markdown(obj: BaseModel, excluded_fields: set = set(), is_
|
|
|
70
73
|
else:
|
|
71
74
|
bullet_char = "-"
|
|
72
75
|
newline_char = "\n"
|
|
73
|
-
|
|
76
|
+
model_attributes_to_include = set(obj.__class__.model_fields.keys())
|
|
77
|
+
obj_model = obj.model_dump(exclude_unset=True, include=model_attributes_to_include, exclude=excluded_fields)
|
|
74
78
|
description_value = obj_model.pop("description", None)
|
|
75
79
|
attributes = [
|
|
76
80
|
(f"{bullet_char} `{attr}`" if value is True else f"{bullet_char} **{attr}:** {value}")
|
|
@@ -78,7 +82,8 @@ def obj_attributes_to_markdown(obj: BaseModel, excluded_fields: set = set(), is_
|
|
|
78
82
|
if value
|
|
79
83
|
]
|
|
80
84
|
description = f"*{description_to_markdown(description_value)}*"
|
|
81
|
-
|
|
85
|
+
extra = [extra_to_markdown(obj)] if obj.model_extra else []
|
|
86
|
+
return newline_char.join([description] + attributes + extra)
|
|
82
87
|
|
|
83
88
|
|
|
84
89
|
def servers_to_markdown(servers: Dict[str, Server]) -> str:
|
|
@@ -153,8 +158,8 @@ def field_to_markdown(field_name: str, field: Field, level: int = 0) -> str:
|
|
|
153
158
|
Returns:
|
|
154
159
|
str: A Markdown table rows for the field.
|
|
155
160
|
"""
|
|
156
|
-
tabs =
|
|
157
|
-
arrow =
|
|
161
|
+
tabs = TAB * level
|
|
162
|
+
arrow = ARROW if level > 0 else ""
|
|
158
163
|
column_name = f"{tabs}{arrow} {field_name}"
|
|
159
164
|
|
|
160
165
|
attributes = obj_attributes_to_markdown(field, {"type", "fields", "items", "keys", "values"}, True)
|
|
@@ -206,3 +211,108 @@ def service_level_to_markdown(service_level: ServiceLevel | None) -> str:
|
|
|
206
211
|
|
|
207
212
|
def description_to_markdown(description: str | None) -> str:
|
|
208
213
|
return (description or "No description.").replace("\n", "<br>")
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def array_of_dict_to_markdown(array: List[Dict[str, str]]) -> str:
|
|
217
|
+
"""
|
|
218
|
+
Convert a list of dictionaries to a Markdown table.
|
|
219
|
+
|
|
220
|
+
Args:
|
|
221
|
+
array (List[Dict[str, str]]): A list of dictionaries where each dictionary represents a row in the table.
|
|
222
|
+
|
|
223
|
+
Returns:
|
|
224
|
+
str: A Markdown formatted table.
|
|
225
|
+
"""
|
|
226
|
+
if not array:
|
|
227
|
+
return ""
|
|
228
|
+
|
|
229
|
+
headers = []
|
|
230
|
+
|
|
231
|
+
for item in array:
|
|
232
|
+
headers += item.keys()
|
|
233
|
+
headers = list(dict.fromkeys(headers)) # Preserve order and remove duplicates
|
|
234
|
+
|
|
235
|
+
markdown_parts = [
|
|
236
|
+
"| " + " | ".join(headers) + " |",
|
|
237
|
+
"| " + " | ".join(["---"] * len(headers)) + " |",
|
|
238
|
+
]
|
|
239
|
+
|
|
240
|
+
for row in array:
|
|
241
|
+
element = row
|
|
242
|
+
markdown_parts.append(
|
|
243
|
+
"| "
|
|
244
|
+
+ " | ".join(
|
|
245
|
+
f"{str(element.get(header, ''))}".replace("\n", "<br>").replace("\t", TAB) for header in headers
|
|
246
|
+
)
|
|
247
|
+
+ " |"
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
return "\n".join(markdown_parts) + "\n"
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
def array_to_markdown(array: List[str]) -> str:
|
|
254
|
+
"""
|
|
255
|
+
Convert a list of strings to a Markdown formatted list.
|
|
256
|
+
|
|
257
|
+
Args:
|
|
258
|
+
array (List[str]): A list of strings to convert.
|
|
259
|
+
|
|
260
|
+
Returns:
|
|
261
|
+
str: A Markdown formatted list.
|
|
262
|
+
"""
|
|
263
|
+
if not array:
|
|
264
|
+
return ""
|
|
265
|
+
return "\n".join(f"- {item}" for item in array) + "\n"
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
def dict_to_markdown(dictionary: Dict[str, str]) -> str:
|
|
269
|
+
"""
|
|
270
|
+
Convert a dictionary to a Markdown formatted list.
|
|
271
|
+
|
|
272
|
+
Args:
|
|
273
|
+
dictionary (Dict[str, str]): A dictionary where keys are item names and values are item descriptions.
|
|
274
|
+
|
|
275
|
+
Returns:
|
|
276
|
+
str: A Markdown formatted list of items.
|
|
277
|
+
"""
|
|
278
|
+
if not dictionary:
|
|
279
|
+
return ""
|
|
280
|
+
|
|
281
|
+
markdown_parts = []
|
|
282
|
+
for key, value in dictionary.items():
|
|
283
|
+
if isinstance(value, dict):
|
|
284
|
+
markdown_parts.append(f"- {key}")
|
|
285
|
+
nested_markdown = dict_to_markdown(value)
|
|
286
|
+
if nested_markdown:
|
|
287
|
+
nested_lines = nested_markdown.split("\n")
|
|
288
|
+
for line in nested_lines:
|
|
289
|
+
if line.strip():
|
|
290
|
+
markdown_parts.append(f" {line}")
|
|
291
|
+
else:
|
|
292
|
+
markdown_parts.append(f"- {key}: {value}")
|
|
293
|
+
return "\n".join(markdown_parts) + "\n"
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
def extra_to_markdown(obj: BaseModel) -> str:
|
|
297
|
+
"""
|
|
298
|
+
Convert the extra attributes of a data contract to Markdown format.
|
|
299
|
+
Args:
|
|
300
|
+
obj (BaseModel): The data contract object containing extra attributes.
|
|
301
|
+
Returns:
|
|
302
|
+
str: A Markdown formatted string representing the extra attributes of the data contract.
|
|
303
|
+
"""
|
|
304
|
+
markdown_part = ""
|
|
305
|
+
extra = obj.model_extra
|
|
306
|
+
if extra:
|
|
307
|
+
for key_extra, value_extra in extra.items():
|
|
308
|
+
markdown_part += f"\n### {key_extra.capitalize()}\n"
|
|
309
|
+
if isinstance(value_extra, list) and len(value_extra):
|
|
310
|
+
if isinstance(value_extra[0], dict):
|
|
311
|
+
markdown_part += array_of_dict_to_markdown(value_extra)
|
|
312
|
+
elif isinstance(value_extra[0], str):
|
|
313
|
+
markdown_part += array_to_markdown(value_extra)
|
|
314
|
+
elif isinstance(value_extra, dict):
|
|
315
|
+
markdown_part += dict_to_markdown(value_extra)
|
|
316
|
+
else:
|
|
317
|
+
markdown_part += f"{str(value_extra)}\n"
|
|
318
|
+
return markdown_part
|
|
@@ -3,6 +3,9 @@ from datacontract.model.data_contract_specification import Field
|
|
|
3
3
|
|
|
4
4
|
|
|
5
5
|
def convert_to_sql_type(field: Field, server_type: str) -> str:
|
|
6
|
+
if field.config and "physicalType" in field.config:
|
|
7
|
+
return field.config["physicalType"]
|
|
8
|
+
|
|
6
9
|
if server_type == "snowflake":
|
|
7
10
|
return convert_to_snowflake(field)
|
|
8
11
|
elif server_type == "postgres":
|
|
@@ -19,6 +22,7 @@ def convert_to_sql_type(field: Field, server_type: str) -> str:
|
|
|
19
22
|
return convert_type_to_bigquery(field)
|
|
20
23
|
elif server_type == "trino":
|
|
21
24
|
return convert_type_to_trino(field)
|
|
25
|
+
|
|
22
26
|
return field.type
|
|
23
27
|
|
|
24
28
|
|
|
@@ -130,13 +130,23 @@ def import_record_fields(record_fields: List[avro.schema.Field]) -> Dict[str, Fi
|
|
|
130
130
|
imported_field.fields = import_record_fields(field.type.fields)
|
|
131
131
|
elif field.type.type == "union":
|
|
132
132
|
imported_field.required = False
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
if
|
|
136
|
-
imported_field.
|
|
137
|
-
|
|
138
|
-
imported_field.
|
|
139
|
-
imported_field.
|
|
133
|
+
# Check for enum in union first, since it needs special handling
|
|
134
|
+
enum_schema = get_enum_from_union_field(field)
|
|
135
|
+
if enum_schema:
|
|
136
|
+
imported_field.type = "string"
|
|
137
|
+
imported_field.enum = enum_schema.symbols
|
|
138
|
+
imported_field.title = enum_schema.name
|
|
139
|
+
if not imported_field.config:
|
|
140
|
+
imported_field.config = {}
|
|
141
|
+
imported_field.config["avroType"] = "enum"
|
|
142
|
+
else:
|
|
143
|
+
type = import_type_of_optional_field(field)
|
|
144
|
+
imported_field.type = type
|
|
145
|
+
if type == "record":
|
|
146
|
+
imported_field.fields = import_record_fields(get_record_from_union_field(field).fields)
|
|
147
|
+
elif type == "array":
|
|
148
|
+
imported_field.type = "array"
|
|
149
|
+
imported_field.items = import_avro_array_items(get_array_from_union_field(field))
|
|
140
150
|
elif field.type.type == "array":
|
|
141
151
|
imported_field.type = "array"
|
|
142
152
|
imported_field.items = import_avro_array_items(field.type)
|
|
@@ -277,6 +287,22 @@ def get_array_from_union_field(field: avro.schema.Field) -> avro.schema.ArraySch
|
|
|
277
287
|
return None
|
|
278
288
|
|
|
279
289
|
|
|
290
|
+
def get_enum_from_union_field(field: avro.schema.Field) -> avro.schema.EnumSchema | None:
|
|
291
|
+
"""
|
|
292
|
+
Get the enum schema from a union field.
|
|
293
|
+
|
|
294
|
+
Args:
|
|
295
|
+
field: The Avro field with a union type.
|
|
296
|
+
|
|
297
|
+
Returns:
|
|
298
|
+
The enum schema if found, None otherwise.
|
|
299
|
+
"""
|
|
300
|
+
for field_type in field.type.schemas:
|
|
301
|
+
if field_type.type == "enum":
|
|
302
|
+
return field_type
|
|
303
|
+
return None
|
|
304
|
+
|
|
305
|
+
|
|
280
306
|
def map_type_from_avro(avro_type_str: str) -> str:
|
|
281
307
|
"""
|
|
282
308
|
Map Avro type strings to data contract type strings.
|
|
@@ -415,7 +415,8 @@ def get_cell_value_by_name(workbook: Workbook, name: str) -> str | None:
|
|
|
415
415
|
try:
|
|
416
416
|
cell = get_cell_by_name_in_workbook(workbook, name)
|
|
417
417
|
if cell.value is not None:
|
|
418
|
-
|
|
418
|
+
value = str(cell.value).strip()
|
|
419
|
+
return value if value else None
|
|
419
420
|
except Exception as e:
|
|
420
421
|
logger.warning(f"Error getting cell value by name {name}: {str(e)}")
|
|
421
422
|
return None
|
|
@@ -431,7 +432,8 @@ def get_cell_value_by_name_in_sheet(sheet: Worksheet, name: str) -> str | None:
|
|
|
431
432
|
if sheet_title == sheet.title:
|
|
432
433
|
cell = sheet[coordinate]
|
|
433
434
|
if cell.value is not None:
|
|
434
|
-
|
|
435
|
+
value = str(cell.value).strip()
|
|
436
|
+
return value if value else None
|
|
435
437
|
except Exception as e:
|
|
436
438
|
logger.warning(f"Error getting cell value by name {name} in sheet {sheet.title}: {str(e)}")
|
|
437
439
|
return None
|
|
@@ -443,7 +445,10 @@ def get_cell_value(row, col_idx):
|
|
|
443
445
|
return None
|
|
444
446
|
try:
|
|
445
447
|
cell = row[col_idx]
|
|
446
|
-
|
|
448
|
+
if cell.value is not None:
|
|
449
|
+
value = str(cell.value).strip()
|
|
450
|
+
return value if value else None
|
|
451
|
+
return None
|
|
447
452
|
except (IndexError, AttributeError):
|
|
448
453
|
return None
|
|
449
454
|
|
|
@@ -452,7 +457,10 @@ def get_cell_value_by_position(sheet, row_idx, col_idx):
|
|
|
452
457
|
"""Get cell value by row and column indices (0-based)"""
|
|
453
458
|
try:
|
|
454
459
|
cell = sheet.cell(row=row_idx + 1, column=col_idx + 1) # Convert to 1-based indices
|
|
455
|
-
|
|
460
|
+
if cell.value is not None:
|
|
461
|
+
value = str(cell.value).strip()
|
|
462
|
+
return value if value else None
|
|
463
|
+
return None
|
|
456
464
|
except Exception as e:
|
|
457
465
|
logger.warning(f"Error getting cell value by position ({row_idx}, {col_idx}): {str(e)}")
|
|
458
466
|
return None
|
|
@@ -823,7 +831,7 @@ def import_custom_properties(workbook: Workbook) -> List[CustomProperty]:
|
|
|
823
831
|
except Exception as e:
|
|
824
832
|
logger.warning(f"Error importing custom properties: {str(e)}")
|
|
825
833
|
|
|
826
|
-
return custom_properties
|
|
834
|
+
return custom_properties if custom_properties else None
|
|
827
835
|
|
|
828
836
|
|
|
829
837
|
def parse_property_value(value: str) -> Any:
|
|
@@ -131,6 +131,7 @@ def import_servers(odcs: OpenDataContractStandard) -> Dict[str, Server] | None:
|
|
|
131
131
|
server.host = odcs_server.host
|
|
132
132
|
server.port = odcs_server.port
|
|
133
133
|
server.catalog = odcs_server.catalog
|
|
134
|
+
server.stagingDir = odcs_server.stagingDir
|
|
134
135
|
server.topic = getattr(odcs_server, "topic", None)
|
|
135
136
|
server.http_path = getattr(odcs_server, "http_path", None)
|
|
136
137
|
server.token = getattr(odcs_server, "token", None)
|
|
@@ -1,4 +1,6 @@
|
|
|
1
|
+
import atexit
|
|
1
2
|
import logging
|
|
3
|
+
import tempfile
|
|
2
4
|
|
|
3
5
|
from databricks.sdk import WorkspaceClient
|
|
4
6
|
from pyspark.sql import DataFrame, SparkSession, types
|
|
@@ -54,7 +56,16 @@ def import_spark(
|
|
|
54
56
|
Returns:
|
|
55
57
|
DataContractSpecification: The updated contract spec with imported models.
|
|
56
58
|
"""
|
|
57
|
-
|
|
59
|
+
|
|
60
|
+
tmp_dir = tempfile.TemporaryDirectory(prefix="datacontract-cli-spark")
|
|
61
|
+
atexit.register(tmp_dir.cleanup)
|
|
62
|
+
|
|
63
|
+
spark = (
|
|
64
|
+
SparkSession.builder.config("spark.sql.warehouse.dir", f"{tmp_dir}/spark-warehouse")
|
|
65
|
+
.config("spark.streaming.stopGracefullyOnShutdown", "true")
|
|
66
|
+
.config("spark.ui.enabled", "false")
|
|
67
|
+
.getOrCreate()
|
|
68
|
+
)
|
|
58
69
|
data_contract_specification.servers["local"] = Server(type="dataframe")
|
|
59
70
|
|
|
60
71
|
if dataframe is not None:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: datacontract-cli
|
|
3
|
-
Version: 0.10.
|
|
3
|
+
Version: 0.10.34
|
|
4
4
|
Summary: The datacontract CLI is an open source command-line tool for working with Data Contracts. It uses data contract YAML files to lint the data contract, connect to data sources and execute schema and quality tests, detect breaking changes, and export to different formats. The tool is written in Python. It can be used as a standalone CLI tool, in a CI/CD pipeline, or directly as a Python library.
|
|
5
5
|
Author-email: Jochen Christ <jochen.christ@innoq.com>, Stefan Negele <stefan.negele@innoq.com>, Simon Harrer <simon.harrer@innoq.com>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -20,7 +20,7 @@ Requires-Dist: fastparquet<2025.0.0,>=2024.5.0
|
|
|
20
20
|
Requires-Dist: numpy<2.0.0,>=1.26.4
|
|
21
21
|
Requires-Dist: python-multipart<1.0.0,>=0.0.20
|
|
22
22
|
Requires-Dist: rich<15.0,>=13.7
|
|
23
|
-
Requires-Dist: sqlglot<
|
|
23
|
+
Requires-Dist: sqlglot<28.0.0,>=26.6.0
|
|
24
24
|
Requires-Dist: duckdb<2.0.0,>=1.0.0
|
|
25
25
|
Requires-Dist: soda-core-duckdb<3.6.0,>=3.3.20
|
|
26
26
|
Requires-Dist: setuptools>=60
|
|
@@ -42,7 +42,7 @@ Provides-Extra: databricks
|
|
|
42
42
|
Requires-Dist: soda-core-spark-df<3.6.0,>=3.3.20; extra == "databricks"
|
|
43
43
|
Requires-Dist: soda-core-spark[databricks]<3.6.0,>=3.3.20; extra == "databricks"
|
|
44
44
|
Requires-Dist: databricks-sql-connector<4.1.0,>=3.7.0; extra == "databricks"
|
|
45
|
-
Requires-Dist: databricks-sdk<0.
|
|
45
|
+
Requires-Dist: databricks-sdk<0.61.0; extra == "databricks"
|
|
46
46
|
Requires-Dist: pyspark<4.0.0,>=3.5.5; extra == "databricks"
|
|
47
47
|
Provides-Extra: iceberg
|
|
48
48
|
Requires-Dist: pyiceberg==0.9.1; extra == "iceberg"
|
|
@@ -56,10 +56,12 @@ Provides-Extra: s3
|
|
|
56
56
|
Requires-Dist: s3fs<2026.0.0,>=2025.2.0; extra == "s3"
|
|
57
57
|
Requires-Dist: aiobotocore<2.24.0,>=2.17.0; extra == "s3"
|
|
58
58
|
Provides-Extra: snowflake
|
|
59
|
-
Requires-Dist: snowflake-connector-python[pandas]<3.
|
|
59
|
+
Requires-Dist: snowflake-connector-python[pandas]<3.17,>=3.6; extra == "snowflake"
|
|
60
60
|
Requires-Dist: soda-core-snowflake<3.6.0,>=3.3.20; extra == "snowflake"
|
|
61
61
|
Provides-Extra: sqlserver
|
|
62
62
|
Requires-Dist: soda-core-sqlserver<3.6.0,>=3.3.20; extra == "sqlserver"
|
|
63
|
+
Provides-Extra: athena
|
|
64
|
+
Requires-Dist: soda-core-athena<3.6.0,>=3.3.20; extra == "athena"
|
|
63
65
|
Provides-Extra: trino
|
|
64
66
|
Requires-Dist: soda-core-trino<3.6.0,>=3.3.20; extra == "trino"
|
|
65
67
|
Provides-Extra: dbt
|
|
@@ -76,19 +78,19 @@ Requires-Dist: uvicorn==0.35.0; extra == "api"
|
|
|
76
78
|
Provides-Extra: protobuf
|
|
77
79
|
Requires-Dist: grpcio-tools>=1.53; extra == "protobuf"
|
|
78
80
|
Provides-Extra: all
|
|
79
|
-
Requires-Dist: datacontract-cli[api,bigquery,csv,databricks,dbml,dbt,excel,iceberg,kafka,parquet,postgres,protobuf,rdf,s3,snowflake,sqlserver,trino]; extra == "all"
|
|
81
|
+
Requires-Dist: datacontract-cli[api,athena,bigquery,csv,databricks,dbml,dbt,excel,iceberg,kafka,parquet,postgres,protobuf,rdf,s3,snowflake,sqlserver,trino]; extra == "all"
|
|
80
82
|
Provides-Extra: dev
|
|
81
83
|
Requires-Dist: datacontract-cli[all]; extra == "dev"
|
|
82
84
|
Requires-Dist: httpx==0.28.1; extra == "dev"
|
|
83
85
|
Requires-Dist: kafka-python; extra == "dev"
|
|
84
|
-
Requires-Dist: moto==5.1.
|
|
86
|
+
Requires-Dist: moto==5.1.8; extra == "dev"
|
|
85
87
|
Requires-Dist: pandas>=2.1.0; extra == "dev"
|
|
86
88
|
Requires-Dist: pre-commit<4.3.0,>=3.7.1; extra == "dev"
|
|
87
89
|
Requires-Dist: pytest; extra == "dev"
|
|
88
90
|
Requires-Dist: pytest-xdist; extra == "dev"
|
|
89
91
|
Requires-Dist: pymssql==2.3.7; extra == "dev"
|
|
90
92
|
Requires-Dist: ruff; extra == "dev"
|
|
91
|
-
Requires-Dist: testcontainers[kafka,minio,mssql,postgres]==4.
|
|
93
|
+
Requires-Dist: testcontainers[kafka,minio,mssql,postgres]==4.12.0; extra == "dev"
|
|
92
94
|
Requires-Dist: trino==0.335.0; extra == "dev"
|
|
93
95
|
Dynamic: license-file
|
|
94
96
|
|
|
@@ -214,9 +216,15 @@ $ datacontract export --format odcs datacontract.yaml --output odcs.yaml
|
|
|
214
216
|
# import ODCS to data contract
|
|
215
217
|
$ datacontract import --format odcs odcs.yaml --output datacontract.yaml
|
|
216
218
|
|
|
217
|
-
# import sql (other formats: avro, glue, bigquery, jsonschema ...)
|
|
219
|
+
# import sql (other formats: avro, glue, bigquery, jsonschema, excel ...)
|
|
218
220
|
$ datacontract import --format sql --source my-ddl.sql --dialect postgres --output datacontract.yaml
|
|
219
221
|
|
|
222
|
+
# import from Excel template
|
|
223
|
+
$ datacontract import --format excel --source odcs.xlsx --output datacontract.yaml
|
|
224
|
+
|
|
225
|
+
# export to Excel template
|
|
226
|
+
$ datacontract export --format excel --output odcs.xlsx datacontract.yaml
|
|
227
|
+
|
|
220
228
|
# find differences between two data contracts
|
|
221
229
|
$ datacontract diff datacontract-v1.yaml datacontract-v2.yaml
|
|
222
230
|
|
|
@@ -310,6 +318,7 @@ A list of available extras:
|
|
|
310
318
|
|
|
311
319
|
| Dependency | Installation Command |
|
|
312
320
|
|-------------------------|--------------------------------------------|
|
|
321
|
+
| Amazon Athena | `pip install datacontract-cli[athena]` |
|
|
313
322
|
| Avro Support | `pip install datacontract-cli[avro]` |
|
|
314
323
|
| Google BigQuery | `pip install datacontract-cli[bigquery]` |
|
|
315
324
|
| Databricks Integration | `pip install datacontract-cli[databricks]` |
|
|
@@ -454,6 +463,7 @@ Credentials are provided with environment variables.
|
|
|
454
463
|
Supported server types:
|
|
455
464
|
|
|
456
465
|
- [s3](#S3)
|
|
466
|
+
- [athena](#athena)
|
|
457
467
|
- [bigquery](#bigquery)
|
|
458
468
|
- [azure](#azure)
|
|
459
469
|
- [sqlserver](#sqlserver)
|
|
@@ -464,6 +474,7 @@ Supported server types:
|
|
|
464
474
|
- [kafka](#kafka)
|
|
465
475
|
- [postgres](#postgres)
|
|
466
476
|
- [trino](#trino)
|
|
477
|
+
- [api](#api)
|
|
467
478
|
- [local](#local)
|
|
468
479
|
|
|
469
480
|
Supported formats:
|
|
@@ -523,6 +534,41 @@ servers:
|
|
|
523
534
|
| `DATACONTRACT_S3_SESSION_TOKEN` | `AQoDYXdzEJr...` | AWS temporary session token (optional) |
|
|
524
535
|
|
|
525
536
|
|
|
537
|
+
#### Athena
|
|
538
|
+
|
|
539
|
+
Data Contract CLI can test data in AWS Athena stored in S3.
|
|
540
|
+
Supports different file formats, such as Iceberg, Parquet, JSON, CSV...
|
|
541
|
+
|
|
542
|
+
##### Example
|
|
543
|
+
|
|
544
|
+
datacontract.yaml
|
|
545
|
+
```yaml
|
|
546
|
+
servers:
|
|
547
|
+
athena:
|
|
548
|
+
type: athena
|
|
549
|
+
catalog: awsdatacatalog # awsdatacatalog is the default setting
|
|
550
|
+
schema: icebergdemodb # in Athena, this is called "database"
|
|
551
|
+
regionName: eu-central-1
|
|
552
|
+
stagingDir: s3://my-bucket/athena-results/
|
|
553
|
+
models:
|
|
554
|
+
my_table: # corresponds to a table of view name
|
|
555
|
+
type: table
|
|
556
|
+
fields:
|
|
557
|
+
my_column_1: # corresponds to a column
|
|
558
|
+
type: string
|
|
559
|
+
config:
|
|
560
|
+
physicalType: varchar
|
|
561
|
+
```
|
|
562
|
+
|
|
563
|
+
##### Environment Variables
|
|
564
|
+
|
|
565
|
+
| Environment Variable | Example | Description |
|
|
566
|
+
|-------------------------------------|---------------------------------|----------------------------------------|
|
|
567
|
+
| `DATACONTRACT_S3_REGION` | `eu-central-1` | Region of Athena service |
|
|
568
|
+
| `DATACONTRACT_S3_ACCESS_KEY_ID` | `AKIAXV5Q5QABCDEFGH` | AWS Access Key ID |
|
|
569
|
+
| `DATACONTRACT_S3_SECRET_ACCESS_KEY` | `93S7LRrJcqLaaaa/XXXXXXXXXXXXX` | AWS Secret Access Key |
|
|
570
|
+
| `DATACONTRACT_S3_SESSION_TOKEN` | `AQoDYXdzEJr...` | AWS temporary session token (optional) |
|
|
571
|
+
|
|
526
572
|
|
|
527
573
|
#### Google Cloud Storage (GCS)
|
|
528
574
|
|
|
@@ -890,6 +936,38 @@ models:
|
|
|
890
936
|
| `DATACONTRACT_TRINO_PASSWORD` | `mysecretpassword` | Password |
|
|
891
937
|
|
|
892
938
|
|
|
939
|
+
#### API
|
|
940
|
+
|
|
941
|
+
Data Contract CLI can test APIs that return data in JSON format.
|
|
942
|
+
Currently, only GET requests are supported.
|
|
943
|
+
|
|
944
|
+
##### Example
|
|
945
|
+
|
|
946
|
+
datacontract.yaml
|
|
947
|
+
```yaml
|
|
948
|
+
servers:
|
|
949
|
+
api:
|
|
950
|
+
type: "api"
|
|
951
|
+
location: "https://api.example.com/path"
|
|
952
|
+
delimiter: none # new_line, array, or none (default)
|
|
953
|
+
|
|
954
|
+
models:
|
|
955
|
+
my_object: # corresponds to the root element of the JSON response
|
|
956
|
+
type: object
|
|
957
|
+
fields:
|
|
958
|
+
field1:
|
|
959
|
+
type: string
|
|
960
|
+
fields2:
|
|
961
|
+
type: number
|
|
962
|
+
```
|
|
963
|
+
|
|
964
|
+
##### Environment Variables
|
|
965
|
+
|
|
966
|
+
| Environment Variable | Example | Description |
|
|
967
|
+
|-----------------------------------------|------------------|---------------------------------------------------|
|
|
968
|
+
| `DATACONTRACT_API_HEADER_AUTHORIZATION` | `Bearer <token>` | The value for the `authorization` header. Optional. |
|
|
969
|
+
|
|
970
|
+
|
|
893
971
|
#### Local
|
|
894
972
|
|
|
895
973
|
Data Contract CLI can test local files in parquet, json, csv, or delta format.
|
|
@@ -933,7 +1011,7 @@ models:
|
|
|
933
1011
|
│ terraform|avro-idl|sql|sql-query|mer │
|
|
934
1012
|
│ maid|html|go|bigquery|dbml|spark|sql │
|
|
935
1013
|
│ alchemy|data-caterer|dcs|markdown|ic │
|
|
936
|
-
│ eberg|custom]
|
|
1014
|
+
│ eberg|custom|excel] │
|
|
937
1015
|
│ --output PATH Specify the file path where the │
|
|
938
1016
|
│ exported data will be saved. If no │
|
|
939
1017
|
│ path is provided, the output will be │
|
|
@@ -953,8 +1031,10 @@ models:
|
|
|
953
1031
|
│ --engine TEXT [engine] The engine used for great │
|
|
954
1032
|
│ expection run. │
|
|
955
1033
|
│ [default: None] │
|
|
956
|
-
│ --template PATH
|
|
957
|
-
│
|
|
1034
|
+
│ --template PATH The file path or URL of a template. │
|
|
1035
|
+
│ For Excel format: path/URL to custom │
|
|
1036
|
+
│ Excel template. For custom format: │
|
|
1037
|
+
│ path to Jinja template. │
|
|
958
1038
|
│ [default: None] │
|
|
959
1039
|
│ --help Show this message and exit. │
|
|
960
1040
|
╰──────────────────────────────────────────────────────────────────────────────────────────────────╯
|
|
@@ -1003,6 +1083,7 @@ Available export options:
|
|
|
1003
1083
|
| `dcs` | Export to Data Contract Specification in YAML format | ✅ |
|
|
1004
1084
|
| `markdown` | Export to Markdown | ✅ |
|
|
1005
1085
|
| `iceberg` | Export to an Iceberg JSON Schema Definition | partial |
|
|
1086
|
+
| `excel` | Export to ODCS Excel Template | ✅ |
|
|
1006
1087
|
| `custom` | Export to Custom format with Jinja | ✅ |
|
|
1007
1088
|
| Missing something? | Please create an issue on GitHub | TBD |
|
|
1008
1089
|
|
|
@@ -1274,6 +1355,22 @@ FROM
|
|
|
1274
1355
|
{{ ref('orders') }}
|
|
1275
1356
|
```
|
|
1276
1357
|
|
|
1358
|
+
#### ODCS Excel Templace
|
|
1359
|
+
|
|
1360
|
+
The `export` function converts a data contract into an ODCS (Open Data Contract Standard) Excel template. This creates a user-friendly Excel spreadsheet that can be used for authoring, sharing, and managing data contracts using the familiar Excel interface.
|
|
1361
|
+
|
|
1362
|
+
```shell
|
|
1363
|
+
datacontract export --format excel --output datacontract.xlsx datacontract.yaml
|
|
1364
|
+
```
|
|
1365
|
+
|
|
1366
|
+
The Excel format enables:
|
|
1367
|
+
- **User-friendly authoring**: Create and edit data contracts in Excel's familiar interface
|
|
1368
|
+
- **Easy sharing**: Distribute data contracts as standard Excel files
|
|
1369
|
+
- **Collaboration**: Enable non-technical stakeholders to contribute to data contract definitions
|
|
1370
|
+
- **Round-trip conversion**: Import Excel templates back to YAML data contracts
|
|
1371
|
+
|
|
1372
|
+
For more information about the Excel template structure, visit the [ODCS Excel Template repository](https://github.com/datacontract/open-data-contract-standard-excel-template).
|
|
1373
|
+
|
|
1277
1374
|
### import
|
|
1278
1375
|
```
|
|
1279
1376
|
|
|
@@ -1392,6 +1489,7 @@ Available import options:
|
|
|
1392
1489
|
| `spark` | Import from Spark StructTypes, Variant | ✅ |
|
|
1393
1490
|
| `sql` | Import from SQL DDL | ✅ |
|
|
1394
1491
|
| `unity` | Import from Databricks Unity Catalog | partial |
|
|
1492
|
+
| `excel` | Import from ODCS Excel Template | ✅ |
|
|
1395
1493
|
| Missing something? | Please create an issue on GitHub | TBD |
|
|
1396
1494
|
|
|
1397
1495
|
|
|
@@ -2078,6 +2176,7 @@ We are happy to receive your contributions. Propose your change in an issue or d
|
|
|
2078
2176
|
|
|
2079
2177
|
## Companies using this tool
|
|
2080
2178
|
|
|
2179
|
+
- [Entropy Data](https://www.entropy-data.com)
|
|
2081
2180
|
- [INNOQ](https://innoq.com)
|
|
2082
2181
|
- [Data Catering](https://data.catering/)
|
|
2083
2182
|
- [Oliver Wyman](https://www.oliverwyman.com/)
|
|
@@ -2096,7 +2195,7 @@ We are happy to receive your contributions. Propose your change in an issue or d
|
|
|
2096
2195
|
|
|
2097
2196
|
## Credits
|
|
2098
2197
|
|
|
2099
|
-
Created by [Stefan Negele](https://www.linkedin.com/in/stefan-negele-573153112/)
|
|
2198
|
+
Created by [Stefan Negele](https://www.linkedin.com/in/stefan-negele-573153112/), [Jochen Christ](https://www.linkedin.com/in/jochenchrist/), and [Simon Harrer]().
|
|
2100
2199
|
|
|
2101
2200
|
|
|
2102
2201
|
|