datacontract-cli 0.10.20__py3-none-any.whl → 0.10.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datacontract-cli might be problematic. Click here for more details.

Files changed (38) hide show
  1. datacontract/{web.py → api.py} +55 -3
  2. datacontract/breaking/breaking.py +1 -1
  3. datacontract/breaking/breaking_rules.py +1 -1
  4. datacontract/cli.py +32 -10
  5. datacontract/data_contract.py +14 -100
  6. datacontract/engines/data_contract_checks.py +735 -0
  7. datacontract/engines/data_contract_test.py +51 -0
  8. datacontract/engines/soda/check_soda_execute.py +36 -30
  9. datacontract/engines/soda/connections/kafka.py +8 -3
  10. datacontract/export/avro_converter.py +2 -0
  11. datacontract/export/custom_converter.py +40 -0
  12. datacontract/export/exporter.py +1 -2
  13. datacontract/export/exporter_factory.py +4 -12
  14. datacontract/export/sodacl_converter.py +22 -294
  15. datacontract/export/sql_type_converter.py +7 -2
  16. datacontract/imports/odcs_importer.py +6 -3
  17. datacontract/imports/odcs_v3_importer.py +2 -0
  18. datacontract/imports/sql_importer.py +229 -29
  19. datacontract/lint/urls.py +4 -4
  20. datacontract/model/data_contract_specification.py +130 -129
  21. datacontract/model/exceptions.py +4 -1
  22. datacontract/model/run.py +25 -18
  23. datacontract/templates/datacontract.html +16 -2
  24. datacontract/templates/partials/definition.html +3 -95
  25. datacontract/templates/partials/model_field.html +13 -0
  26. datacontract/templates/partials/quality.html +49 -0
  27. datacontract/templates/style/output.css +151 -152
  28. {datacontract_cli-0.10.20.dist-info → datacontract_cli-0.10.22.dist-info}/METADATA +238 -184
  29. {datacontract_cli-0.10.20.dist-info → datacontract_cli-0.10.22.dist-info}/RECORD +34 -34
  30. datacontract/engines/soda/connections/dask.py +0 -28
  31. datacontract/export/odcs_v2_exporter.py +0 -124
  32. datacontract/imports/odcs_v2_importer.py +0 -177
  33. datacontract/lint/linters/example_model_linter.py +0 -91
  34. /datacontract/{model → breaking}/breaking_change.py +0 -0
  35. {datacontract_cli-0.10.20.dist-info → datacontract_cli-0.10.22.dist-info}/LICENSE +0 -0
  36. {datacontract_cli-0.10.20.dist-info → datacontract_cli-0.10.22.dist-info}/WHEEL +0 -0
  37. {datacontract_cli-0.10.20.dist-info → datacontract_cli-0.10.22.dist-info}/entry_points.txt +0 -0
  38. {datacontract_cli-0.10.20.dist-info → datacontract_cli-0.10.22.dist-info}/top_level.txt +0 -0
@@ -1,8 +1,11 @@
1
+ import logging
2
+ import os
1
3
  from typing import Annotated, Optional
2
4
 
3
5
  import typer
4
- from fastapi import Body, FastAPI, Query
6
+ from fastapi import Body, Depends, FastAPI, HTTPException, Query, status
5
7
  from fastapi.responses import PlainTextResponse
8
+ from fastapi.security.api_key import APIKeyHeader
6
9
 
7
10
  from datacontract.data_contract import DataContract, ExportFormat
8
11
  from datacontract.model.run import Run
@@ -55,8 +58,8 @@ models:
55
58
 
56
59
  app = FastAPI(
57
60
  docs_url="/",
58
- title="Data Contract API",
59
- summary="API to execute Data Contract CLI operations.",
61
+ title="Data Contract CLI API",
62
+ summary="You can use the API to test, export, and lint your data contracts.",
60
63
  license_info={
61
64
  "name": "MIT License",
62
65
  "identifier": "MIT",
@@ -87,6 +90,32 @@ app = FastAPI(
87
90
  ],
88
91
  )
89
92
 
93
+ api_key_header = APIKeyHeader(
94
+ name="x-api-key",
95
+ auto_error=False, # this makes authentication optional
96
+ )
97
+
98
+
99
+ def check_api_key(api_key_header: str | None):
100
+ correct_api_key = os.getenv("DATACONTRACT_CLI_API_KEY")
101
+ if correct_api_key is None or correct_api_key == "":
102
+ logging.info("Environment variable DATACONTRACT_CLI_API_KEY is not set. Skip API key check.")
103
+ return
104
+ if api_key_header is None or api_key_header == "":
105
+ logging.info("The API key is missing.")
106
+ raise HTTPException(
107
+ status_code=status.HTTP_401_UNAUTHORIZED,
108
+ detail="Missing API key. Use Header 'x-api-key' to provide the API key.",
109
+ )
110
+ if api_key_header != correct_api_key:
111
+ logging.info("The provided API key is not correct.")
112
+ raise HTTPException(
113
+ status_code=status.HTTP_403_FORBIDDEN,
114
+ detail="The provided API key is not correct.",
115
+ )
116
+ logging.info("Request authenticated with API key.")
117
+ pass
118
+
90
119
 
91
120
  @app.post(
92
121
  "/test",
@@ -98,6 +127,25 @@ app = FastAPI(
98
127
  Credentials must be provided via environment variables when running the web server.
99
128
  POST the data contract YAML as payload.
100
129
  """,
130
+ responses={
131
+ 401: {
132
+ "description": "Unauthorized (when an environment variable DATACONTRACT_CLI_API_KEY is configured).",
133
+ "content": {
134
+ "application/json": {
135
+ "examples": {
136
+ "api_key_missing": {
137
+ "summary": "API key Missing",
138
+ "value": {"detail": "Missing API key. Use Header 'x-api-key' to provide the API key."},
139
+ },
140
+ "api_key_wrong": {
141
+ "summary": "API key Wrong",
142
+ "value": {"detail": "The provided API key is not correct."},
143
+ },
144
+ }
145
+ }
146
+ },
147
+ },
148
+ },
101
149
  response_model_exclude_none=True,
102
150
  response_model_exclude_unset=True,
103
151
  )
@@ -110,6 +158,7 @@ async def test(
110
158
  examples=[DATA_CONTRACT_EXAMPLE_PAYLOAD],
111
159
  ),
112
160
  ],
161
+ api_key: Annotated[str | None, Depends(api_key_header)] = None,
113
162
  server: Annotated[
114
163
  str | None,
115
164
  Query(
@@ -118,6 +167,9 @@ async def test(
118
167
  ),
119
168
  ] = None,
120
169
  ) -> Run:
170
+ check_api_key(api_key)
171
+ logging.info("Testing data contract...")
172
+ logging.info(body)
121
173
  return DataContract(data_contract_str=body, server=server).test()
122
174
 
123
175
 
@@ -1,5 +1,5 @@
1
+ from datacontract.breaking.breaking_change import BreakingChange, Location, Severity
1
2
  from datacontract.breaking.breaking_rules import BreakingRules
2
- from datacontract.model.breaking_change import BreakingChange, Location, Severity
3
3
  from datacontract.model.data_contract_specification import Contact, DeprecatedQuality, Field, Info, Model, Terms
4
4
 
5
5
 
@@ -1,4 +1,4 @@
1
- from datacontract.model.breaking_change import Severity
1
+ from datacontract.breaking.breaking_change import Severity
2
2
 
3
3
 
4
4
  class BreakingRules:
datacontract/cli.py CHANGED
@@ -119,10 +119,6 @@ def test(
119
119
  "servers (default)."
120
120
  ),
121
121
  ] = "all",
122
- examples: Annotated[
123
- bool,
124
- typer.Option(help="Run the schema and quality tests on the example data within the data contract."),
125
- ] = None,
126
122
  publish: Annotated[str, typer.Option(help="The url to publish the results after the test")] = None,
127
123
  logs: Annotated[bool, typer.Option(help="Print logs")] = False,
128
124
  ssl_verification: Annotated[
@@ -141,7 +137,6 @@ def test(
141
137
  schema_location=schema,
142
138
  publish_url=publish,
143
139
  server=server,
144
- examples=examples,
145
140
  ssl_verification=ssl_verification,
146
141
  ).test()
147
142
  if logs:
@@ -196,6 +191,11 @@ def export(
196
191
  Optional[str],
197
192
  typer.Option(help="[engine] The engine used for great expection run."),
198
193
  ] = None,
194
+ # TODO: this should be a subcommand
195
+ template: Annotated[
196
+ Optional[Path],
197
+ typer.Option(help="[custom] The file path of Jinja template."),
198
+ ] = None,
199
199
  ):
200
200
  """
201
201
  Convert data contract to a specific format. Saves to file specified by `output` option if present, otherwise prints to stdout.
@@ -208,6 +208,7 @@ def export(
208
208
  rdf_base=rdf_base,
209
209
  sql_server_type=sql_server_type,
210
210
  engine=engine,
211
+ template=template,
211
212
  )
212
213
  # Don't interpret console markup in output.
213
214
  if output is None:
@@ -231,6 +232,10 @@ def import_(
231
232
  Optional[str],
232
233
  typer.Option(help="The path to the file or Glue Database that should be imported."),
233
234
  ] = None,
235
+ dialect: Annotated[
236
+ Optional[str],
237
+ typer.Option(help="The SQL dialect to use when importing SQL files, e.g., postgres, tsql, bigquery."),
238
+ ] = None,
234
239
  glue_table: Annotated[
235
240
  Optional[List[str]],
236
241
  typer.Option(
@@ -287,6 +292,7 @@ def import_(
287
292
  source=source,
288
293
  template=template,
289
294
  schema=schema,
295
+ dialect=dialect,
290
296
  glue_table=glue_table,
291
297
  bigquery_table=bigquery_table,
292
298
  bigquery_project=bigquery_project,
@@ -344,7 +350,7 @@ def catalog(
344
350
  ] = None,
345
351
  ):
346
352
  """
347
- Create an html catalog of data contracts.
353
+ Create a html catalog of data contracts.
348
354
  """
349
355
  path = Path(output)
350
356
  path.mkdir(parents=True, exist_ok=True)
@@ -433,16 +439,32 @@ def diff(
433
439
 
434
440
 
435
441
  @app.command()
436
- def serve(
442
+ def api(
437
443
  port: Annotated[int, typer.Option(help="Bind socket to this port.")] = 4242,
438
- host: Annotated[str, typer.Option(help="Bind socket to this host.")] = "127.0.0.1",
444
+ host: Annotated[
445
+ str, typer.Option(help="Bind socket to this host. Hint: For running in docker, set it to 0.0.0.0")
446
+ ] = "127.0.0.1",
439
447
  ):
440
448
  """
441
- Start the datacontract web server.
449
+ Start the datacontract CLI as server application with REST API.
450
+
451
+ The OpenAPI documentation as Swagger UI is available on http://localhost:4242.
452
+ You can execute the commands directly from the Swagger UI.
453
+
454
+ To protect the API, you can set the environment variable DATACONTRACT_CLI_API_KEY to a secret API key.
455
+ To authenticate, requests must include the header 'x-api-key' with the correct API key.
456
+ This is highly recommended, as data contract tests may be subject to SQL injections or leak sensitive information.
457
+
458
+ To connect to servers (such as a Snowflake data source), set the credentials as environment variables as documented in
459
+ https://cli.datacontract.com/#test
442
460
  """
443
461
  import uvicorn
462
+ from uvicorn.config import LOGGING_CONFIG
463
+
464
+ log_config = LOGGING_CONFIG
465
+ log_config["root"] = {"level": "INFO"}
444
466
 
445
- uvicorn.run("datacontract.web:app", port=port, host=host, reload=True)
467
+ uvicorn.run(app="datacontract.api:app", port=port, host=host, reload=True, log_config=LOGGING_CONFIG)
446
468
 
447
469
 
448
470
  def _handle_result(run):
@@ -1,10 +1,6 @@
1
- import json
2
1
  import logging
3
- import tempfile
4
2
  import typing
5
3
 
6
- import yaml
7
-
8
4
  if typing.TYPE_CHECKING:
9
5
  from pyspark.sql import SparkSession
10
6
 
@@ -14,11 +10,8 @@ from datacontract.breaking.breaking import (
14
10
  quality_breaking_changes,
15
11
  terms_breaking_changes,
16
12
  )
17
- from datacontract.engines.datacontract.check_that_datacontract_contains_valid_servers_configuration import (
18
- check_that_datacontract_contains_valid_server_configuration,
19
- )
20
- from datacontract.engines.fastjsonschema.check_jsonschema import check_jsonschema
21
- from datacontract.engines.soda.check_soda_execute import check_soda_execute
13
+ from datacontract.breaking.breaking_change import BreakingChange, BreakingChanges, Severity
14
+ from datacontract.engines.data_contract_test import execute_data_contract_test
22
15
  from datacontract.export.exporter import ExportFormat
23
16
  from datacontract.export.exporter_factory import exporter_factory
24
17
  from datacontract.imports.importer_factory import importer_factory
@@ -26,16 +19,14 @@ from datacontract.init.init_template import get_init_template
26
19
  from datacontract.integration.datamesh_manager import publish_test_results_to_datamesh_manager
27
20
  from datacontract.lint import resolve
28
21
  from datacontract.lint.linters.description_linter import DescriptionLinter
29
- from datacontract.lint.linters.example_model_linter import ExampleModelLinter
30
22
  from datacontract.lint.linters.field_pattern_linter import FieldPatternLinter
31
23
  from datacontract.lint.linters.field_reference_linter import FieldReferenceLinter
32
24
  from datacontract.lint.linters.notice_period_linter import NoticePeriodLinter
33
25
  from datacontract.lint.linters.quality_schema_linter import QualityUsesSchemaLinter
34
26
  from datacontract.lint.linters.valid_constraints_linter import ValidFieldConstraintsLinter
35
- from datacontract.model.breaking_change import BreakingChange, BreakingChanges, Severity
36
- from datacontract.model.data_contract_specification import DataContractSpecification, Server
27
+ from datacontract.model.data_contract_specification import DataContractSpecification
37
28
  from datacontract.model.exceptions import DataContractException
38
- from datacontract.model.run import Check, Run
29
+ from datacontract.model.run import Check, ResultEnum, Run
39
30
 
40
31
 
41
32
  class DataContract:
@@ -46,7 +37,6 @@ class DataContract:
46
37
  data_contract: DataContractSpecification = None,
47
38
  schema_location: str = None,
48
39
  server: str = None,
49
- examples: bool = False,
50
40
  publish_url: str = None,
51
41
  spark: "SparkSession" = None,
52
42
  inline_definitions: bool = True,
@@ -58,14 +48,12 @@ class DataContract:
58
48
  self._data_contract = data_contract
59
49
  self._schema_location = schema_location
60
50
  self._server = server
61
- self._examples = examples
62
51
  self._publish_url = publish_url
63
52
  self._spark = spark
64
53
  self._inline_definitions = inline_definitions
65
54
  self._inline_quality = inline_quality
66
55
  self._ssl_verification = ssl_verification
67
56
  self.all_linters = {
68
- ExampleModelLinter(),
69
57
  QualityUsesSchemaLinter(),
70
58
  FieldPatternLinter(),
71
59
  FieldReferenceLinter(),
@@ -96,7 +84,12 @@ class DataContract:
96
84
  inline_quality=self._inline_quality,
97
85
  )
98
86
  run.checks.append(
99
- Check(type="lint", result="passed", name="Data contract is syntactically valid", engine="datacontract")
87
+ Check(
88
+ type="lint",
89
+ result=ResultEnum.passed,
90
+ name="Data contract is syntactically valid",
91
+ engine="datacontract",
92
+ )
100
93
  )
101
94
  if enabled_linters == "none":
102
95
  linters_to_check = set()
@@ -113,7 +106,7 @@ class DataContract:
113
106
  run.checks.append(
114
107
  Check(
115
108
  type="general",
116
- result="error",
109
+ result=ResultEnum.error,
117
110
  name=f"Linter '{linter.name}'",
118
111
  reason=str(e),
119
112
  engine="datacontract",
@@ -130,7 +123,7 @@ class DataContract:
130
123
  run.checks.append(
131
124
  Check(
132
125
  type="general",
133
- result="error",
126
+ result=ResultEnum.error,
134
127
  name="Check Data Contract",
135
128
  reason=str(e),
136
129
  engine="datacontract",
@@ -153,54 +146,7 @@ class DataContract:
153
146
  inline_quality=self._inline_quality,
154
147
  )
155
148
 
156
- if data_contract.models is None or len(data_contract.models) == 0:
157
- raise DataContractException(
158
- type="lint",
159
- name="Check that data contract contains models",
160
- result="warning",
161
- reason="Models block is missing. Skip executing tests.",
162
- engine="datacontract",
163
- )
164
-
165
- if self._examples:
166
- if data_contract.examples is None or len(data_contract.examples) == 0:
167
- raise DataContractException(
168
- type="lint",
169
- name="Check that data contract contains valid examples",
170
- result="warning",
171
- reason="Examples block is missing. Skip executing tests.",
172
- engine="datacontract",
173
- )
174
- else:
175
- check_that_datacontract_contains_valid_server_configuration(run, data_contract, self._server)
176
-
177
- # TODO create directory only for examples
178
- with tempfile.TemporaryDirectory(prefix="datacontract-cli") as tmp_dir:
179
- if self._examples:
180
- server_name = "examples"
181
- server = self._get_examples_server(data_contract, run, tmp_dir)
182
- elif self._server:
183
- server_name = self._server
184
- server = data_contract.servers.get(server_name)
185
- else:
186
- server_name = list(data_contract.servers.keys())[0]
187
- server = data_contract.servers.get(server_name)
188
-
189
- run.log_info(f"Running tests for data contract {data_contract.id} with server {server_name}")
190
- run.dataContractId = data_contract.id
191
- run.dataContractVersion = data_contract.info.version
192
- run.dataProductId = server.dataProductId
193
- run.outputPortId = server.outputPortId
194
- run.server = server_name
195
-
196
- # TODO check server is supported type for nicer error messages
197
-
198
- # TODO check server credentials are complete for nicer error messages
199
-
200
- if server.format == "json" and server.type != "kafka":
201
- check_jsonschema(run, data_contract, server)
202
-
203
- check_soda_execute(run, data_contract, server, self._spark, tmp_dir)
149
+ execute_data_contract_test(data_contract, run, self._server, self._spark)
204
150
 
205
151
  except DataContractException as e:
206
152
  run.checks.append(
@@ -219,7 +165,7 @@ class DataContract:
219
165
  run.checks.append(
220
166
  Check(
221
167
  type="general",
222
- result="error",
168
+ result=ResultEnum.error,
223
169
  name="Test Data Contract",
224
170
  reason=str(e),
225
171
  engine="datacontract",
@@ -235,38 +181,6 @@ class DataContract:
235
181
 
236
182
  return run
237
183
 
238
- def _get_examples_server(self, data_contract, run, tmp_dir):
239
- run.log_info(f"Copying examples to files in temporary directory {tmp_dir}")
240
- format = "json"
241
- for example in data_contract.examples:
242
- format = example.type
243
- p = f"{tmp_dir}/{example.model}.{format}"
244
- run.log_info(f"Creating example file {p}")
245
- with open(p, "w") as f:
246
- content = ""
247
- if format == "json" and isinstance(example.data, list):
248
- content = json.dumps(example.data)
249
- elif format == "json" and isinstance(example.data, str):
250
- content = example.data
251
- elif format == "yaml" and isinstance(example.data, list):
252
- content = yaml.dump(example.data, allow_unicode=True)
253
- elif format == "yaml" and isinstance(example.data, str):
254
- content = example.data
255
- elif format == "csv":
256
- content = example.data
257
- logging.debug(f"Content of example file {p}: {content}")
258
- f.write(content)
259
- path = f"{tmp_dir}" + "/{model}." + format
260
- delimiter = "array"
261
- server = Server(
262
- type="local",
263
- path=path,
264
- format=format,
265
- delimiter=delimiter,
266
- )
267
- run.log_info(f"Using {server} for testing the examples")
268
- return server
269
-
270
184
  def breaking(self, other: "DataContract") -> BreakingChanges:
271
185
  return self.changelog(other, include_severities=[Severity.ERROR, Severity.WARNING])
272
186