datacontract-cli 0.10.19__py3-none-any.whl → 0.10.21__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datacontract-cli might be problematic. Click here for more details.
- datacontract/api.py +253 -0
- datacontract/cli.py +28 -7
- datacontract/export/custom_converter.py +40 -0
- datacontract/export/exporter.py +1 -0
- datacontract/export/exporter_factory.py +4 -0
- datacontract/lint/urls.py +13 -9
- datacontract/model/data_contract_specification.py +130 -129
- datacontract/model/run.py +18 -18
- datacontract/templates/datacontract.html +16 -2
- datacontract/templates/partials/definition.html +3 -95
- datacontract/templates/partials/model_field.html +13 -0
- datacontract/templates/partials/quality.html +49 -0
- datacontract/templates/style/output.css +151 -152
- {datacontract_cli-0.10.19.dist-info → datacontract_cli-0.10.21.dist-info}/METADATA +103 -21
- {datacontract_cli-0.10.19.dist-info → datacontract_cli-0.10.21.dist-info}/RECORD +19 -17
- datacontract/web.py +0 -67
- {datacontract_cli-0.10.19.dist-info → datacontract_cli-0.10.21.dist-info}/LICENSE +0 -0
- {datacontract_cli-0.10.19.dist-info → datacontract_cli-0.10.21.dist-info}/WHEEL +0 -0
- {datacontract_cli-0.10.19.dist-info → datacontract_cli-0.10.21.dist-info}/entry_points.txt +0 -0
- {datacontract_cli-0.10.19.dist-info → datacontract_cli-0.10.21.dist-info}/top_level.txt +0 -0
datacontract/api.py
ADDED
|
@@ -0,0 +1,253 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
from typing import Annotated, Optional
|
|
4
|
+
|
|
5
|
+
import typer
|
|
6
|
+
from fastapi import Body, Depends, FastAPI, HTTPException, Query, status
|
|
7
|
+
from fastapi.responses import PlainTextResponse
|
|
8
|
+
from fastapi.security.api_key import APIKeyHeader
|
|
9
|
+
|
|
10
|
+
from datacontract.data_contract import DataContract, ExportFormat
|
|
11
|
+
from datacontract.model.run import Run
|
|
12
|
+
|
|
13
|
+
DATA_CONTRACT_EXAMPLE_PAYLOAD = """dataContractSpecification: 1.1.0
|
|
14
|
+
id: urn:datacontract:checkout:orders-latest
|
|
15
|
+
info:
|
|
16
|
+
title: Orders Latest
|
|
17
|
+
version: 2.0.0
|
|
18
|
+
owner: Sales Team
|
|
19
|
+
servers:
|
|
20
|
+
production:
|
|
21
|
+
type: s3
|
|
22
|
+
location: s3://datacontract-example-orders-latest/v2/{model}/*.json
|
|
23
|
+
format: json
|
|
24
|
+
delimiter: new_line
|
|
25
|
+
models:
|
|
26
|
+
orders:
|
|
27
|
+
description: One record per order. Includes cancelled and deleted orders.
|
|
28
|
+
type: table
|
|
29
|
+
fields:
|
|
30
|
+
order_id:
|
|
31
|
+
type: string
|
|
32
|
+
primaryKey: true
|
|
33
|
+
order_timestamp:
|
|
34
|
+
description: The business timestamp in UTC when the order was successfully registered in the source system and the payment was successful.
|
|
35
|
+
type: timestamp
|
|
36
|
+
required: true
|
|
37
|
+
examples:
|
|
38
|
+
- "2024-09-09T08:30:00Z"
|
|
39
|
+
order_total:
|
|
40
|
+
description: Total amount the smallest monetary unit (e.g., cents).
|
|
41
|
+
type: long
|
|
42
|
+
required: true
|
|
43
|
+
examples:
|
|
44
|
+
- 9999
|
|
45
|
+
quality:
|
|
46
|
+
- type: sql
|
|
47
|
+
description: 95% of all order total values are expected to be between 10 and 499 EUR.
|
|
48
|
+
query: |
|
|
49
|
+
SELECT quantile_cont(order_total, 0.95) AS percentile_95
|
|
50
|
+
FROM orders
|
|
51
|
+
mustBeBetween: [1000, 99900]
|
|
52
|
+
customer_id:
|
|
53
|
+
description: Unique identifier for the customer.
|
|
54
|
+
type: text
|
|
55
|
+
minLength: 10
|
|
56
|
+
maxLength: 20
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
app = FastAPI(
|
|
60
|
+
docs_url="/",
|
|
61
|
+
title="Data Contract CLI API",
|
|
62
|
+
summary="You can use the API to test, export, and lint your data contracts.",
|
|
63
|
+
license_info={
|
|
64
|
+
"name": "MIT License",
|
|
65
|
+
"identifier": "MIT",
|
|
66
|
+
},
|
|
67
|
+
contact={"name": "Data Contract CLI", "url": "https://cli.datacontract.com/"},
|
|
68
|
+
openapi_tags=[
|
|
69
|
+
{
|
|
70
|
+
"name": "test",
|
|
71
|
+
"externalDocs": {
|
|
72
|
+
"description": "Documentation",
|
|
73
|
+
"url": "https://cli.datacontract.com/#test",
|
|
74
|
+
},
|
|
75
|
+
},
|
|
76
|
+
{
|
|
77
|
+
"name": "lint",
|
|
78
|
+
"externalDocs": {
|
|
79
|
+
"description": "Documentation",
|
|
80
|
+
"url": "https://cli.datacontract.com/#lint",
|
|
81
|
+
},
|
|
82
|
+
},
|
|
83
|
+
{
|
|
84
|
+
"name": "export",
|
|
85
|
+
"externalDocs": {
|
|
86
|
+
"description": "Documentation",
|
|
87
|
+
"url": "https://cli.datacontract.com/#export",
|
|
88
|
+
},
|
|
89
|
+
},
|
|
90
|
+
],
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
api_key_header = APIKeyHeader(
|
|
94
|
+
name="x-api-key",
|
|
95
|
+
auto_error=False, # this makes authentication optional
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def check_api_key(api_key_header: str | None):
|
|
100
|
+
correct_api_key = os.getenv("DATACONTRACT_CLI_API_KEY")
|
|
101
|
+
if correct_api_key is None or correct_api_key == "":
|
|
102
|
+
logging.info("Environment variable DATACONTRACT_CLI_API_KEY is not set. Skip API key check.")
|
|
103
|
+
return
|
|
104
|
+
if api_key_header is None or api_key_header == "":
|
|
105
|
+
logging.info("The API key is missing.")
|
|
106
|
+
raise HTTPException(
|
|
107
|
+
status_code=status.HTTP_401_UNAUTHORIZED,
|
|
108
|
+
detail="Missing API key. Use Header 'x-api-key' to provide the API key.",
|
|
109
|
+
)
|
|
110
|
+
if api_key_header != correct_api_key:
|
|
111
|
+
logging.info("The provided API key is not correct.")
|
|
112
|
+
raise HTTPException(
|
|
113
|
+
status_code=status.HTTP_403_FORBIDDEN,
|
|
114
|
+
detail="The provided API key is not correct.",
|
|
115
|
+
)
|
|
116
|
+
logging.info("Request authenticated with API key.")
|
|
117
|
+
pass
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
@app.post(
|
|
121
|
+
"/test",
|
|
122
|
+
tags=["test"],
|
|
123
|
+
summary="Run data contract tests",
|
|
124
|
+
description="""
|
|
125
|
+
Run schema and quality tests. Data Contract CLI connects to the data sources configured in the server section.
|
|
126
|
+
This usually requires credentials to access the data sources.
|
|
127
|
+
Credentials must be provided via environment variables when running the web server.
|
|
128
|
+
POST the data contract YAML as payload.
|
|
129
|
+
""",
|
|
130
|
+
responses={
|
|
131
|
+
401: {
|
|
132
|
+
"description": "Unauthorized (when an environment variable DATACONTRACT_CLI_API_KEY is configured).",
|
|
133
|
+
"content": {
|
|
134
|
+
"application/json": {
|
|
135
|
+
"examples": {
|
|
136
|
+
"api_key_missing": {
|
|
137
|
+
"summary": "API key Missing",
|
|
138
|
+
"value": {"detail": "Missing API key. Use Header 'x-api-key' to provide the API key."},
|
|
139
|
+
},
|
|
140
|
+
"api_key_wrong": {
|
|
141
|
+
"summary": "API key Wrong",
|
|
142
|
+
"value": {"detail": "The provided API key is not correct."},
|
|
143
|
+
},
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
},
|
|
147
|
+
},
|
|
148
|
+
},
|
|
149
|
+
response_model_exclude_none=True,
|
|
150
|
+
response_model_exclude_unset=True,
|
|
151
|
+
)
|
|
152
|
+
async def test(
|
|
153
|
+
body: Annotated[
|
|
154
|
+
str,
|
|
155
|
+
Body(
|
|
156
|
+
title="Data Contract YAML",
|
|
157
|
+
media_type="application/yaml",
|
|
158
|
+
examples=[DATA_CONTRACT_EXAMPLE_PAYLOAD],
|
|
159
|
+
),
|
|
160
|
+
],
|
|
161
|
+
api_key: Annotated[str | None, Depends(api_key_header)] = None,
|
|
162
|
+
server: Annotated[
|
|
163
|
+
str | None,
|
|
164
|
+
Query(
|
|
165
|
+
example="production",
|
|
166
|
+
description="The server name to test. Optional, if there is only one server.",
|
|
167
|
+
),
|
|
168
|
+
] = None,
|
|
169
|
+
) -> Run:
|
|
170
|
+
check_api_key(api_key)
|
|
171
|
+
logging.info("Testing data contract...")
|
|
172
|
+
logging.info(body)
|
|
173
|
+
return DataContract(data_contract_str=body, server=server).test()
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
@app.post(
|
|
177
|
+
"/lint",
|
|
178
|
+
tags=["lint"],
|
|
179
|
+
summary="Validate that the datacontract.yaml is correctly formatted.",
|
|
180
|
+
description="""Validate that the datacontract.yaml is correctly formatted.""",
|
|
181
|
+
)
|
|
182
|
+
async def lint(
|
|
183
|
+
body: Annotated[
|
|
184
|
+
str,
|
|
185
|
+
Body(
|
|
186
|
+
title="Data Contract YAML",
|
|
187
|
+
media_type="application/yaml",
|
|
188
|
+
examples=[DATA_CONTRACT_EXAMPLE_PAYLOAD],
|
|
189
|
+
),
|
|
190
|
+
],
|
|
191
|
+
schema: Annotated[
|
|
192
|
+
str | None,
|
|
193
|
+
Query(
|
|
194
|
+
example="https://datacontract.com/datacontract.schema.json",
|
|
195
|
+
description="The schema to use for validation. This must be a URL.",
|
|
196
|
+
),
|
|
197
|
+
] = None,
|
|
198
|
+
):
|
|
199
|
+
data_contract = DataContract(data_contract_str=body, schema_location=schema)
|
|
200
|
+
lint_result = data_contract.lint()
|
|
201
|
+
return {"result": lint_result.result, "checks": lint_result.checks}
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
@app.post(
|
|
205
|
+
"/export",
|
|
206
|
+
tags=["export"],
|
|
207
|
+
summary="Convert data contract to a specific format.",
|
|
208
|
+
response_class=PlainTextResponse,
|
|
209
|
+
)
|
|
210
|
+
def export(
|
|
211
|
+
body: Annotated[
|
|
212
|
+
str,
|
|
213
|
+
Body(
|
|
214
|
+
title="Data Contract YAML",
|
|
215
|
+
media_type="application/yaml",
|
|
216
|
+
examples=[DATA_CONTRACT_EXAMPLE_PAYLOAD],
|
|
217
|
+
),
|
|
218
|
+
],
|
|
219
|
+
format: Annotated[ExportFormat, typer.Option(help="The export format.")],
|
|
220
|
+
server: Annotated[
|
|
221
|
+
str | None,
|
|
222
|
+
Query(
|
|
223
|
+
example="production",
|
|
224
|
+
description="The server name to export. Optional, if there is only one server.",
|
|
225
|
+
),
|
|
226
|
+
] = None,
|
|
227
|
+
model: Annotated[
|
|
228
|
+
str | None,
|
|
229
|
+
Query(
|
|
230
|
+
description="Use the key of the model in the data contract yaml file "
|
|
231
|
+
"to refer to a model, e.g., `orders`, or `all` for all "
|
|
232
|
+
"models (default).",
|
|
233
|
+
),
|
|
234
|
+
] = "all",
|
|
235
|
+
rdf_base: Annotated[
|
|
236
|
+
Optional[str],
|
|
237
|
+
typer.Option(help="[rdf] The base URI used to generate the RDF graph.", rich_help_panel="RDF Options"),
|
|
238
|
+
] = None,
|
|
239
|
+
sql_server_type: Annotated[
|
|
240
|
+
Optional[str],
|
|
241
|
+
Query(
|
|
242
|
+
description="[sql] The server type to determine the sql dialect. By default, it uses 'auto' to automatically detect the sql dialect via the specified servers in the data contract.",
|
|
243
|
+
),
|
|
244
|
+
] = None,
|
|
245
|
+
):
|
|
246
|
+
result = DataContract(data_contract_str=body, server=server).export(
|
|
247
|
+
export_format=format,
|
|
248
|
+
model=model,
|
|
249
|
+
rdf_base=rdf_base,
|
|
250
|
+
sql_server_type=sql_server_type,
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
return result
|
datacontract/cli.py
CHANGED
|
@@ -4,7 +4,6 @@ from pathlib import Path
|
|
|
4
4
|
from typing import Iterable, List, Optional
|
|
5
5
|
|
|
6
6
|
import typer
|
|
7
|
-
import uvicorn
|
|
8
7
|
from click import Context
|
|
9
8
|
from rich import box
|
|
10
9
|
from rich.console import Console
|
|
@@ -12,7 +11,6 @@ from rich.table import Table
|
|
|
12
11
|
from typer.core import TyperGroup
|
|
13
12
|
from typing_extensions import Annotated
|
|
14
13
|
|
|
15
|
-
from datacontract import web
|
|
16
14
|
from datacontract.catalog.catalog import create_data_contract_html, create_index_html
|
|
17
15
|
from datacontract.data_contract import DataContract, ExportFormat
|
|
18
16
|
from datacontract.imports.importer import ImportFormat
|
|
@@ -198,6 +196,11 @@ def export(
|
|
|
198
196
|
Optional[str],
|
|
199
197
|
typer.Option(help="[engine] The engine used for great expection run."),
|
|
200
198
|
] = None,
|
|
199
|
+
# TODO: this should be a subcommand
|
|
200
|
+
template: Annotated[
|
|
201
|
+
Optional[Path],
|
|
202
|
+
typer.Option(help="[custom] The file path of Jinja template."),
|
|
203
|
+
] = None,
|
|
201
204
|
):
|
|
202
205
|
"""
|
|
203
206
|
Convert data contract to a specific format. Saves to file specified by `output` option if present, otherwise prints to stdout.
|
|
@@ -210,6 +213,7 @@ def export(
|
|
|
210
213
|
rdf_base=rdf_base,
|
|
211
214
|
sql_server_type=sql_server_type,
|
|
212
215
|
engine=engine,
|
|
216
|
+
template=template,
|
|
213
217
|
)
|
|
214
218
|
# Don't interpret console markup in output.
|
|
215
219
|
if output is None:
|
|
@@ -346,7 +350,7 @@ def catalog(
|
|
|
346
350
|
] = None,
|
|
347
351
|
):
|
|
348
352
|
"""
|
|
349
|
-
Create
|
|
353
|
+
Create a html catalog of data contracts.
|
|
350
354
|
"""
|
|
351
355
|
path = Path(output)
|
|
352
356
|
path.mkdir(parents=True, exist_ok=True)
|
|
@@ -435,15 +439,32 @@ def diff(
|
|
|
435
439
|
|
|
436
440
|
|
|
437
441
|
@app.command()
|
|
438
|
-
def
|
|
442
|
+
def api(
|
|
439
443
|
port: Annotated[int, typer.Option(help="Bind socket to this port.")] = 4242,
|
|
440
|
-
host: Annotated[
|
|
444
|
+
host: Annotated[
|
|
445
|
+
str, typer.Option(help="Bind socket to this host. Hint: For running in docker, set it to 0.0.0.0")
|
|
446
|
+
] = "127.0.0.1",
|
|
441
447
|
):
|
|
442
448
|
"""
|
|
443
|
-
Start the datacontract
|
|
449
|
+
Start the datacontract CLI as server application with REST API.
|
|
450
|
+
|
|
451
|
+
The OpenAPI documentation as Swagger UI is available on http://localhost:4242.
|
|
452
|
+
You can execute the commands directly from the Swagger UI.
|
|
453
|
+
|
|
454
|
+
To protect the API, you can set the environment variable DATACONTRACT_CLI_API_KEY to a secret API key.
|
|
455
|
+
To authenticate, requests must include the header 'x-api-key' with the correct API key.
|
|
456
|
+
This is highly recommended, as data contract tests may be subject to SQL injections or leak sensitive information.
|
|
457
|
+
|
|
458
|
+
To connect to servers (such as a Snowflake data source), set the credentials as environment variables as documented in
|
|
459
|
+
https://cli.datacontract.com/#test
|
|
444
460
|
"""
|
|
461
|
+
import uvicorn
|
|
462
|
+
from uvicorn.config import LOGGING_CONFIG
|
|
463
|
+
|
|
464
|
+
log_config = LOGGING_CONFIG
|
|
465
|
+
log_config["root"] = {"level": "INFO"}
|
|
445
466
|
|
|
446
|
-
uvicorn.run(
|
|
467
|
+
uvicorn.run(app="datacontract.api:app", port=port, host=host, reload=True, log_config=LOGGING_CONFIG)
|
|
447
468
|
|
|
448
469
|
|
|
449
470
|
def _handle_result(run):
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
from jinja2 import Environment, FileSystemLoader
|
|
4
|
+
|
|
5
|
+
from datacontract.export.exporter import Exporter
|
|
6
|
+
from datacontract.model.data_contract_specification import (
|
|
7
|
+
DataContractSpecification,
|
|
8
|
+
Model,
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class CustomExporter(Exporter):
|
|
13
|
+
"""Exporter implementation for converting data contracts to Markdown."""
|
|
14
|
+
|
|
15
|
+
def export(
|
|
16
|
+
self,
|
|
17
|
+
data_contract: DataContractSpecification,
|
|
18
|
+
model: Model,
|
|
19
|
+
server: str,
|
|
20
|
+
sql_server_type: str,
|
|
21
|
+
export_args: dict,
|
|
22
|
+
) -> str:
|
|
23
|
+
"""Exports a data contract to custom format with Jinja."""
|
|
24
|
+
template = export_args.get("template")
|
|
25
|
+
if template is None:
|
|
26
|
+
raise RuntimeError("Export to custom requires template argument.")
|
|
27
|
+
|
|
28
|
+
return to_custom(data_contract, template)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def to_custom(data_contract: DataContractSpecification, template_path: Path) -> str:
|
|
32
|
+
template = get_template(template_path)
|
|
33
|
+
rendered_sql = template.render(data_contract=data_contract)
|
|
34
|
+
return rendered_sql
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def get_template(path: Path):
|
|
38
|
+
abosolute_path = Path(path).resolve()
|
|
39
|
+
env = Environment(loader=FileSystemLoader(str(abosolute_path.parent)))
|
|
40
|
+
return env.get_template(path.name)
|
datacontract/export/exporter.py
CHANGED
|
@@ -206,3 +206,7 @@ exporter_factory.register_lazy_exporter(
|
|
|
206
206
|
exporter_factory.register_lazy_exporter(
|
|
207
207
|
name=ExportFormat.iceberg, module_path="datacontract.export.iceberg_converter", class_name="IcebergExporter"
|
|
208
208
|
)
|
|
209
|
+
|
|
210
|
+
exporter_factory.register_lazy_exporter(
|
|
211
|
+
name=ExportFormat.custom, module_path="datacontract.export.custom_converter", class_name="CustomExporter"
|
|
212
|
+
)
|
datacontract/lint/urls.py
CHANGED
|
@@ -27,30 +27,34 @@ def fetch_resource(url: str):
|
|
|
27
27
|
|
|
28
28
|
def _set_api_key(headers, url):
|
|
29
29
|
hostname = urlparse(url).hostname
|
|
30
|
+
|
|
31
|
+
datamesh_manager_api_key = os.getenv("DATAMESH_MANAGER_API_KEY")
|
|
32
|
+
datacontract_manager_api_key = os.getenv("DATACONTRACT_MANAGER_API_KEY")
|
|
33
|
+
|
|
30
34
|
if hostname == "datamesh-manager.com" or hostname.endswith(".datamesh-manager.com"):
|
|
31
|
-
datamesh_manager_api_key = os.getenv("DATAMESH_MANAGER_API_KEY")
|
|
32
35
|
if datamesh_manager_api_key is None or datamesh_manager_api_key == "":
|
|
33
|
-
print("Error: Data Mesh Manager API
|
|
36
|
+
print("Error: Data Mesh Manager API key is not set. Set env variable DATAMESH_MANAGER_API_KEY.")
|
|
34
37
|
raise DataContractException(
|
|
35
38
|
type="lint",
|
|
36
39
|
name=f"Reading data contract from {url}",
|
|
37
|
-
reason="Error: Data Mesh Manager API
|
|
40
|
+
reason="Error: Data Mesh Manager API key is not set. Set env variable DATAMESH_MANAGER_API_KEY.",
|
|
38
41
|
engine="datacontract",
|
|
39
42
|
result="error",
|
|
40
43
|
)
|
|
41
44
|
headers["x-api-key"] = datamesh_manager_api_key
|
|
42
45
|
elif hostname == "datacontract-manager.com" or hostname.endswith(".datacontract-manager.com"):
|
|
43
|
-
datacontract_manager_api_key = os.getenv("DATACONTRACT_MANAGER_API_KEY")
|
|
44
46
|
if datacontract_manager_api_key is None or datacontract_manager_api_key == "":
|
|
45
|
-
print("Error: Data Contract Manager API
|
|
47
|
+
print("Error: Data Contract Manager API key is not set. Set env variable DATACONTRACT_MANAGER_API_KEY.")
|
|
46
48
|
raise DataContractException(
|
|
47
49
|
type="lint",
|
|
48
50
|
name=f"Reading data contract from {url}",
|
|
49
|
-
reason="Error: Data Contract Manager API
|
|
51
|
+
reason="Error: Data Contract Manager API key is not set. Set env variable DATACONTRACT_MANAGER_API_KEY.",
|
|
50
52
|
engine="datacontract",
|
|
51
53
|
result="error",
|
|
52
54
|
)
|
|
53
55
|
headers["x-api-key"] = datacontract_manager_api_key
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
56
|
+
|
|
57
|
+
if datamesh_manager_api_key is not None and datamesh_manager_api_key != "":
|
|
58
|
+
headers["x-api-key"] = datamesh_manager_api_key
|
|
59
|
+
if datacontract_manager_api_key is not None and datacontract_manager_api_key != "":
|
|
60
|
+
headers["x-api-key"] = datacontract_manager_api_key
|