datacontract-cli 0.10.19__py3-none-any.whl → 0.10.21__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datacontract-cli might be problematic. Click here for more details.

datacontract/api.py ADDED
@@ -0,0 +1,253 @@
1
+ import logging
2
+ import os
3
+ from typing import Annotated, Optional
4
+
5
+ import typer
6
+ from fastapi import Body, Depends, FastAPI, HTTPException, Query, status
7
+ from fastapi.responses import PlainTextResponse
8
+ from fastapi.security.api_key import APIKeyHeader
9
+
10
+ from datacontract.data_contract import DataContract, ExportFormat
11
+ from datacontract.model.run import Run
12
+
13
+ DATA_CONTRACT_EXAMPLE_PAYLOAD = """dataContractSpecification: 1.1.0
14
+ id: urn:datacontract:checkout:orders-latest
15
+ info:
16
+ title: Orders Latest
17
+ version: 2.0.0
18
+ owner: Sales Team
19
+ servers:
20
+ production:
21
+ type: s3
22
+ location: s3://datacontract-example-orders-latest/v2/{model}/*.json
23
+ format: json
24
+ delimiter: new_line
25
+ models:
26
+ orders:
27
+ description: One record per order. Includes cancelled and deleted orders.
28
+ type: table
29
+ fields:
30
+ order_id:
31
+ type: string
32
+ primaryKey: true
33
+ order_timestamp:
34
+ description: The business timestamp in UTC when the order was successfully registered in the source system and the payment was successful.
35
+ type: timestamp
36
+ required: true
37
+ examples:
38
+ - "2024-09-09T08:30:00Z"
39
+ order_total:
40
+ description: Total amount the smallest monetary unit (e.g., cents).
41
+ type: long
42
+ required: true
43
+ examples:
44
+ - 9999
45
+ quality:
46
+ - type: sql
47
+ description: 95% of all order total values are expected to be between 10 and 499 EUR.
48
+ query: |
49
+ SELECT quantile_cont(order_total, 0.95) AS percentile_95
50
+ FROM orders
51
+ mustBeBetween: [1000, 99900]
52
+ customer_id:
53
+ description: Unique identifier for the customer.
54
+ type: text
55
+ minLength: 10
56
+ maxLength: 20
57
+ """
58
+
59
+ app = FastAPI(
60
+ docs_url="/",
61
+ title="Data Contract CLI API",
62
+ summary="You can use the API to test, export, and lint your data contracts.",
63
+ license_info={
64
+ "name": "MIT License",
65
+ "identifier": "MIT",
66
+ },
67
+ contact={"name": "Data Contract CLI", "url": "https://cli.datacontract.com/"},
68
+ openapi_tags=[
69
+ {
70
+ "name": "test",
71
+ "externalDocs": {
72
+ "description": "Documentation",
73
+ "url": "https://cli.datacontract.com/#test",
74
+ },
75
+ },
76
+ {
77
+ "name": "lint",
78
+ "externalDocs": {
79
+ "description": "Documentation",
80
+ "url": "https://cli.datacontract.com/#lint",
81
+ },
82
+ },
83
+ {
84
+ "name": "export",
85
+ "externalDocs": {
86
+ "description": "Documentation",
87
+ "url": "https://cli.datacontract.com/#export",
88
+ },
89
+ },
90
+ ],
91
+ )
92
+
93
+ api_key_header = APIKeyHeader(
94
+ name="x-api-key",
95
+ auto_error=False, # this makes authentication optional
96
+ )
97
+
98
+
99
+ def check_api_key(api_key_header: str | None):
100
+ correct_api_key = os.getenv("DATACONTRACT_CLI_API_KEY")
101
+ if correct_api_key is None or correct_api_key == "":
102
+ logging.info("Environment variable DATACONTRACT_CLI_API_KEY is not set. Skip API key check.")
103
+ return
104
+ if api_key_header is None or api_key_header == "":
105
+ logging.info("The API key is missing.")
106
+ raise HTTPException(
107
+ status_code=status.HTTP_401_UNAUTHORIZED,
108
+ detail="Missing API key. Use Header 'x-api-key' to provide the API key.",
109
+ )
110
+ if api_key_header != correct_api_key:
111
+ logging.info("The provided API key is not correct.")
112
+ raise HTTPException(
113
+ status_code=status.HTTP_403_FORBIDDEN,
114
+ detail="The provided API key is not correct.",
115
+ )
116
+ logging.info("Request authenticated with API key.")
117
+ pass
118
+
119
+
120
+ @app.post(
121
+ "/test",
122
+ tags=["test"],
123
+ summary="Run data contract tests",
124
+ description="""
125
+ Run schema and quality tests. Data Contract CLI connects to the data sources configured in the server section.
126
+ This usually requires credentials to access the data sources.
127
+ Credentials must be provided via environment variables when running the web server.
128
+ POST the data contract YAML as payload.
129
+ """,
130
+ responses={
131
+ 401: {
132
+ "description": "Unauthorized (when an environment variable DATACONTRACT_CLI_API_KEY is configured).",
133
+ "content": {
134
+ "application/json": {
135
+ "examples": {
136
+ "api_key_missing": {
137
+ "summary": "API key Missing",
138
+ "value": {"detail": "Missing API key. Use Header 'x-api-key' to provide the API key."},
139
+ },
140
+ "api_key_wrong": {
141
+ "summary": "API key Wrong",
142
+ "value": {"detail": "The provided API key is not correct."},
143
+ },
144
+ }
145
+ }
146
+ },
147
+ },
148
+ },
149
+ response_model_exclude_none=True,
150
+ response_model_exclude_unset=True,
151
+ )
152
+ async def test(
153
+ body: Annotated[
154
+ str,
155
+ Body(
156
+ title="Data Contract YAML",
157
+ media_type="application/yaml",
158
+ examples=[DATA_CONTRACT_EXAMPLE_PAYLOAD],
159
+ ),
160
+ ],
161
+ api_key: Annotated[str | None, Depends(api_key_header)] = None,
162
+ server: Annotated[
163
+ str | None,
164
+ Query(
165
+ example="production",
166
+ description="The server name to test. Optional, if there is only one server.",
167
+ ),
168
+ ] = None,
169
+ ) -> Run:
170
+ check_api_key(api_key)
171
+ logging.info("Testing data contract...")
172
+ logging.info(body)
173
+ return DataContract(data_contract_str=body, server=server).test()
174
+
175
+
176
+ @app.post(
177
+ "/lint",
178
+ tags=["lint"],
179
+ summary="Validate that the datacontract.yaml is correctly formatted.",
180
+ description="""Validate that the datacontract.yaml is correctly formatted.""",
181
+ )
182
+ async def lint(
183
+ body: Annotated[
184
+ str,
185
+ Body(
186
+ title="Data Contract YAML",
187
+ media_type="application/yaml",
188
+ examples=[DATA_CONTRACT_EXAMPLE_PAYLOAD],
189
+ ),
190
+ ],
191
+ schema: Annotated[
192
+ str | None,
193
+ Query(
194
+ example="https://datacontract.com/datacontract.schema.json",
195
+ description="The schema to use for validation. This must be a URL.",
196
+ ),
197
+ ] = None,
198
+ ):
199
+ data_contract = DataContract(data_contract_str=body, schema_location=schema)
200
+ lint_result = data_contract.lint()
201
+ return {"result": lint_result.result, "checks": lint_result.checks}
202
+
203
+
204
+ @app.post(
205
+ "/export",
206
+ tags=["export"],
207
+ summary="Convert data contract to a specific format.",
208
+ response_class=PlainTextResponse,
209
+ )
210
+ def export(
211
+ body: Annotated[
212
+ str,
213
+ Body(
214
+ title="Data Contract YAML",
215
+ media_type="application/yaml",
216
+ examples=[DATA_CONTRACT_EXAMPLE_PAYLOAD],
217
+ ),
218
+ ],
219
+ format: Annotated[ExportFormat, typer.Option(help="The export format.")],
220
+ server: Annotated[
221
+ str | None,
222
+ Query(
223
+ example="production",
224
+ description="The server name to export. Optional, if there is only one server.",
225
+ ),
226
+ ] = None,
227
+ model: Annotated[
228
+ str | None,
229
+ Query(
230
+ description="Use the key of the model in the data contract yaml file "
231
+ "to refer to a model, e.g., `orders`, or `all` for all "
232
+ "models (default).",
233
+ ),
234
+ ] = "all",
235
+ rdf_base: Annotated[
236
+ Optional[str],
237
+ typer.Option(help="[rdf] The base URI used to generate the RDF graph.", rich_help_panel="RDF Options"),
238
+ ] = None,
239
+ sql_server_type: Annotated[
240
+ Optional[str],
241
+ Query(
242
+ description="[sql] The server type to determine the sql dialect. By default, it uses 'auto' to automatically detect the sql dialect via the specified servers in the data contract.",
243
+ ),
244
+ ] = None,
245
+ ):
246
+ result = DataContract(data_contract_str=body, server=server).export(
247
+ export_format=format,
248
+ model=model,
249
+ rdf_base=rdf_base,
250
+ sql_server_type=sql_server_type,
251
+ )
252
+
253
+ return result
datacontract/cli.py CHANGED
@@ -4,7 +4,6 @@ from pathlib import Path
4
4
  from typing import Iterable, List, Optional
5
5
 
6
6
  import typer
7
- import uvicorn
8
7
  from click import Context
9
8
  from rich import box
10
9
  from rich.console import Console
@@ -12,7 +11,6 @@ from rich.table import Table
12
11
  from typer.core import TyperGroup
13
12
  from typing_extensions import Annotated
14
13
 
15
- from datacontract import web
16
14
  from datacontract.catalog.catalog import create_data_contract_html, create_index_html
17
15
  from datacontract.data_contract import DataContract, ExportFormat
18
16
  from datacontract.imports.importer import ImportFormat
@@ -198,6 +196,11 @@ def export(
198
196
  Optional[str],
199
197
  typer.Option(help="[engine] The engine used for great expection run."),
200
198
  ] = None,
199
+ # TODO: this should be a subcommand
200
+ template: Annotated[
201
+ Optional[Path],
202
+ typer.Option(help="[custom] The file path of Jinja template."),
203
+ ] = None,
201
204
  ):
202
205
  """
203
206
  Convert data contract to a specific format. Saves to file specified by `output` option if present, otherwise prints to stdout.
@@ -210,6 +213,7 @@ def export(
210
213
  rdf_base=rdf_base,
211
214
  sql_server_type=sql_server_type,
212
215
  engine=engine,
216
+ template=template,
213
217
  )
214
218
  # Don't interpret console markup in output.
215
219
  if output is None:
@@ -346,7 +350,7 @@ def catalog(
346
350
  ] = None,
347
351
  ):
348
352
  """
349
- Create an html catalog of data contracts.
353
+ Create a html catalog of data contracts.
350
354
  """
351
355
  path = Path(output)
352
356
  path.mkdir(parents=True, exist_ok=True)
@@ -435,15 +439,32 @@ def diff(
435
439
 
436
440
 
437
441
  @app.command()
438
- def serve(
442
+ def api(
439
443
  port: Annotated[int, typer.Option(help="Bind socket to this port.")] = 4242,
440
- host: Annotated[str, typer.Option(help="Bind socket to this host.")] = "127.0.0.1",
444
+ host: Annotated[
445
+ str, typer.Option(help="Bind socket to this host. Hint: For running in docker, set it to 0.0.0.0")
446
+ ] = "127.0.0.1",
441
447
  ):
442
448
  """
443
- Start the datacontract web server.
449
+ Start the datacontract CLI as server application with REST API.
450
+
451
+ The OpenAPI documentation as Swagger UI is available on http://localhost:4242.
452
+ You can execute the commands directly from the Swagger UI.
453
+
454
+ To protect the API, you can set the environment variable DATACONTRACT_CLI_API_KEY to a secret API key.
455
+ To authenticate, requests must include the header 'x-api-key' with the correct API key.
456
+ This is highly recommended, as data contract tests may be subject to SQL injections or leak sensitive information.
457
+
458
+ To connect to servers (such as a Snowflake data source), set the credentials as environment variables as documented in
459
+ https://cli.datacontract.com/#test
444
460
  """
461
+ import uvicorn
462
+ from uvicorn.config import LOGGING_CONFIG
463
+
464
+ log_config = LOGGING_CONFIG
465
+ log_config["root"] = {"level": "INFO"}
445
466
 
446
- uvicorn.run(web.app, port=port, host=host)
467
+ uvicorn.run(app="datacontract.api:app", port=port, host=host, reload=True, log_config=LOGGING_CONFIG)
447
468
 
448
469
 
449
470
  def _handle_result(run):
@@ -0,0 +1,40 @@
1
+ from pathlib import Path
2
+
3
+ from jinja2 import Environment, FileSystemLoader
4
+
5
+ from datacontract.export.exporter import Exporter
6
+ from datacontract.model.data_contract_specification import (
7
+ DataContractSpecification,
8
+ Model,
9
+ )
10
+
11
+
12
+ class CustomExporter(Exporter):
13
+ """Exporter implementation for converting data contracts to Markdown."""
14
+
15
+ def export(
16
+ self,
17
+ data_contract: DataContractSpecification,
18
+ model: Model,
19
+ server: str,
20
+ sql_server_type: str,
21
+ export_args: dict,
22
+ ) -> str:
23
+ """Exports a data contract to custom format with Jinja."""
24
+ template = export_args.get("template")
25
+ if template is None:
26
+ raise RuntimeError("Export to custom requires template argument.")
27
+
28
+ return to_custom(data_contract, template)
29
+
30
+
31
+ def to_custom(data_contract: DataContractSpecification, template_path: Path) -> str:
32
+ template = get_template(template_path)
33
+ rendered_sql = template.render(data_contract=data_contract)
34
+ return rendered_sql
35
+
36
+
37
+ def get_template(path: Path):
38
+ abosolute_path = Path(path).resolve()
39
+ env = Environment(loader=FileSystemLoader(str(abosolute_path.parent)))
40
+ return env.get_template(path.name)
@@ -45,6 +45,7 @@ class ExportFormat(str, Enum):
45
45
  dcs = "dcs"
46
46
  markdown = "markdown"
47
47
  iceberg = "iceberg"
48
+ custom = "custom"
48
49
 
49
50
  @classmethod
50
51
  def get_supported_formats(cls):
@@ -206,3 +206,7 @@ exporter_factory.register_lazy_exporter(
206
206
  exporter_factory.register_lazy_exporter(
207
207
  name=ExportFormat.iceberg, module_path="datacontract.export.iceberg_converter", class_name="IcebergExporter"
208
208
  )
209
+
210
+ exporter_factory.register_lazy_exporter(
211
+ name=ExportFormat.custom, module_path="datacontract.export.custom_converter", class_name="CustomExporter"
212
+ )
datacontract/lint/urls.py CHANGED
@@ -27,30 +27,34 @@ def fetch_resource(url: str):
27
27
 
28
28
  def _set_api_key(headers, url):
29
29
  hostname = urlparse(url).hostname
30
+
31
+ datamesh_manager_api_key = os.getenv("DATAMESH_MANAGER_API_KEY")
32
+ datacontract_manager_api_key = os.getenv("DATACONTRACT_MANAGER_API_KEY")
33
+
30
34
  if hostname == "datamesh-manager.com" or hostname.endswith(".datamesh-manager.com"):
31
- datamesh_manager_api_key = os.getenv("DATAMESH_MANAGER_API_KEY")
32
35
  if datamesh_manager_api_key is None or datamesh_manager_api_key == "":
33
- print("Error: Data Mesh Manager API Key is not set. Set env variable DATAMESH_MANAGER_API_KEY.")
36
+ print("Error: Data Mesh Manager API key is not set. Set env variable DATAMESH_MANAGER_API_KEY.")
34
37
  raise DataContractException(
35
38
  type="lint",
36
39
  name=f"Reading data contract from {url}",
37
- reason="Error: Data Mesh Manager API Key is not set. Set env variable DATAMESH_MANAGER_API_KEY.",
40
+ reason="Error: Data Mesh Manager API key is not set. Set env variable DATAMESH_MANAGER_API_KEY.",
38
41
  engine="datacontract",
39
42
  result="error",
40
43
  )
41
44
  headers["x-api-key"] = datamesh_manager_api_key
42
45
  elif hostname == "datacontract-manager.com" or hostname.endswith(".datacontract-manager.com"):
43
- datacontract_manager_api_key = os.getenv("DATACONTRACT_MANAGER_API_KEY")
44
46
  if datacontract_manager_api_key is None or datacontract_manager_api_key == "":
45
- print("Error: Data Contract Manager API Key is not set. Set env variable DATACONTRACT_MANAGER_API_KEY.")
47
+ print("Error: Data Contract Manager API key is not set. Set env variable DATACONTRACT_MANAGER_API_KEY.")
46
48
  raise DataContractException(
47
49
  type="lint",
48
50
  name=f"Reading data contract from {url}",
49
- reason="Error: Data Contract Manager API Key is not set. Set env variable DATACONTRACT_MANAGER_API_KEY.",
51
+ reason="Error: Data Contract Manager API key is not set. Set env variable DATACONTRACT_MANAGER_API_KEY.",
50
52
  engine="datacontract",
51
53
  result="error",
52
54
  )
53
55
  headers["x-api-key"] = datacontract_manager_api_key
54
- else:
55
- # do nothing
56
- pass
56
+
57
+ if datamesh_manager_api_key is not None and datamesh_manager_api_key != "":
58
+ headers["x-api-key"] = datamesh_manager_api_key
59
+ if datacontract_manager_api_key is not None and datacontract_manager_api_key != "":
60
+ headers["x-api-key"] = datacontract_manager_api_key