datacontract-cli 0.10.23__py3-none-any.whl → 0.10.37__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. datacontract/__init__.py +13 -0
  2. datacontract/api.py +12 -5
  3. datacontract/catalog/catalog.py +5 -3
  4. datacontract/cli.py +116 -10
  5. datacontract/data_contract.py +143 -65
  6. datacontract/engines/data_contract_checks.py +366 -60
  7. datacontract/engines/data_contract_test.py +50 -4
  8. datacontract/engines/fastjsonschema/check_jsonschema.py +37 -19
  9. datacontract/engines/fastjsonschema/s3/s3_read_files.py +3 -2
  10. datacontract/engines/soda/check_soda_execute.py +22 -3
  11. datacontract/engines/soda/connections/athena.py +79 -0
  12. datacontract/engines/soda/connections/duckdb_connection.py +65 -6
  13. datacontract/engines/soda/connections/kafka.py +4 -2
  14. datacontract/export/avro_converter.py +20 -3
  15. datacontract/export/bigquery_converter.py +1 -1
  16. datacontract/export/dbt_converter.py +36 -7
  17. datacontract/export/dqx_converter.py +126 -0
  18. datacontract/export/duckdb_type_converter.py +57 -0
  19. datacontract/export/excel_exporter.py +923 -0
  20. datacontract/export/exporter.py +3 -0
  21. datacontract/export/exporter_factory.py +17 -1
  22. datacontract/export/great_expectations_converter.py +55 -5
  23. datacontract/export/{html_export.py → html_exporter.py} +31 -20
  24. datacontract/export/markdown_converter.py +134 -5
  25. datacontract/export/mermaid_exporter.py +110 -0
  26. datacontract/export/odcs_v3_exporter.py +187 -145
  27. datacontract/export/protobuf_converter.py +163 -69
  28. datacontract/export/rdf_converter.py +2 -2
  29. datacontract/export/sodacl_converter.py +9 -1
  30. datacontract/export/spark_converter.py +31 -4
  31. datacontract/export/sql_converter.py +6 -2
  32. datacontract/export/sql_type_converter.py +20 -8
  33. datacontract/imports/avro_importer.py +63 -12
  34. datacontract/imports/csv_importer.py +111 -57
  35. datacontract/imports/excel_importer.py +1111 -0
  36. datacontract/imports/importer.py +16 -3
  37. datacontract/imports/importer_factory.py +17 -0
  38. datacontract/imports/json_importer.py +325 -0
  39. datacontract/imports/odcs_importer.py +2 -2
  40. datacontract/imports/odcs_v3_importer.py +351 -151
  41. datacontract/imports/protobuf_importer.py +264 -0
  42. datacontract/imports/spark_importer.py +117 -13
  43. datacontract/imports/sql_importer.py +32 -16
  44. datacontract/imports/unity_importer.py +84 -38
  45. datacontract/init/init_template.py +1 -1
  46. datacontract/integration/datamesh_manager.py +16 -2
  47. datacontract/lint/resolve.py +112 -23
  48. datacontract/lint/schema.py +24 -15
  49. datacontract/model/data_contract_specification/__init__.py +1 -0
  50. datacontract/model/odcs.py +13 -0
  51. datacontract/model/run.py +3 -0
  52. datacontract/output/junit_test_results.py +3 -3
  53. datacontract/schemas/datacontract-1.1.0.init.yaml +1 -1
  54. datacontract/schemas/datacontract-1.2.0.init.yaml +91 -0
  55. datacontract/schemas/datacontract-1.2.0.schema.json +2029 -0
  56. datacontract/schemas/datacontract-1.2.1.init.yaml +91 -0
  57. datacontract/schemas/datacontract-1.2.1.schema.json +2058 -0
  58. datacontract/schemas/odcs-3.0.2.schema.json +2382 -0
  59. datacontract/templates/datacontract.html +54 -3
  60. datacontract/templates/datacontract_odcs.html +685 -0
  61. datacontract/templates/index.html +5 -2
  62. datacontract/templates/partials/server.html +2 -0
  63. datacontract/templates/style/output.css +319 -145
  64. {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.37.dist-info}/METADATA +656 -431
  65. datacontract_cli-0.10.37.dist-info/RECORD +119 -0
  66. {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.37.dist-info}/WHEEL +1 -1
  67. {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.37.dist-info/licenses}/LICENSE +1 -1
  68. datacontract/export/csv_type_converter.py +0 -36
  69. datacontract/lint/lint.py +0 -142
  70. datacontract/lint/linters/description_linter.py +0 -35
  71. datacontract/lint/linters/field_pattern_linter.py +0 -34
  72. datacontract/lint/linters/field_reference_linter.py +0 -48
  73. datacontract/lint/linters/notice_period_linter.py +0 -55
  74. datacontract/lint/linters/quality_schema_linter.py +0 -52
  75. datacontract/lint/linters/valid_constraints_linter.py +0 -100
  76. datacontract/model/data_contract_specification.py +0 -327
  77. datacontract_cli-0.10.23.dist-info/RECORD +0 -113
  78. /datacontract/{lint/linters → output}/__init__.py +0 -0
  79. {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.37.dist-info}/entry_points.txt +0 -0
  80. {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.37.dist-info}/top_level.txt +0 -0
datacontract/__init__.py CHANGED
@@ -0,0 +1,13 @@
1
+ # Configuration so that yaml.safe_dump dumps strings with line breaks with yaml literal |
2
+ import yaml
3
+
4
+ yaml.SafeDumper.org_represent_str = yaml.SafeDumper.represent_str
5
+
6
+
7
+ def repr_str(dumper, data):
8
+ if "\n" in data:
9
+ return dumper.represent_scalar("tag:yaml.org,2002:str", data, style="|")
10
+ return dumper.org_represent_str(data)
11
+
12
+
13
+ yaml.add_representer(str, repr_str, Dumper=yaml.SafeDumper)
datacontract/api.py CHANGED
@@ -10,7 +10,7 @@ from fastapi.security.api_key import APIKeyHeader
10
10
  from datacontract.data_contract import DataContract, ExportFormat
11
11
  from datacontract.model.run import Run
12
12
 
13
- DATA_CONTRACT_EXAMPLE_PAYLOAD = """dataContractSpecification: 1.1.0
13
+ DATA_CONTRACT_EXAMPLE_PAYLOAD = """dataContractSpecification: 1.2.1
14
14
  id: urn:datacontract:checkout:orders-latest
15
15
  info:
16
16
  title: Orders Latest
@@ -162,15 +162,22 @@ async def test(
162
162
  server: Annotated[
163
163
  str | None,
164
164
  Query(
165
- example="production",
166
165
  description="The server name to test. Optional, if there is only one server.",
166
+ examples=["production"],
167
+ ),
168
+ ] = None,
169
+ publish_url: Annotated[
170
+ str | None,
171
+ Query(
172
+ description="URL to publish test results. Optional, if you want to publish the test results to a Data Mesh Manager or Data Contract Manager. Example: https://api.datamesh-manager.com/api/test-results",
173
+ examples=["https://api.datamesh-manager.com/api/test-results"],
167
174
  ),
168
175
  ] = None,
169
176
  ) -> Run:
170
177
  check_api_key(api_key)
171
178
  logging.info("Testing data contract...")
172
179
  logging.info(body)
173
- return DataContract(data_contract_str=body, server=server).test()
180
+ return DataContract(data_contract_str=body, server=server, publish_url=publish_url).test()
174
181
 
175
182
 
176
183
  @app.post(
@@ -191,7 +198,7 @@ async def lint(
191
198
  schema: Annotated[
192
199
  str | None,
193
200
  Query(
194
- example="https://datacontract.com/datacontract.schema.json",
201
+ examples=["https://datacontract.com/datacontract.schema.json"],
195
202
  description="The schema to use for validation. This must be a URL.",
196
203
  ),
197
204
  ] = None,
@@ -220,7 +227,7 @@ def export(
220
227
  server: Annotated[
221
228
  str | None,
222
229
  Query(
223
- example="production",
230
+ examples=["production"],
224
231
  description="The server name to export. Optional, if there is only one server.",
225
232
  ),
226
233
  ] = None,
@@ -1,3 +1,4 @@
1
+ import logging
1
2
  from dataclasses import dataclass
2
3
  from datetime import datetime
3
4
  from pathlib import Path
@@ -6,11 +7,12 @@ import pytz
6
7
  from jinja2 import Environment, PackageLoader, select_autoescape
7
8
 
8
9
  from datacontract.data_contract import DataContract
9
- from datacontract.export.html_export import get_version
10
+ from datacontract.export.html_exporter import get_version
10
11
  from datacontract.model.data_contract_specification import DataContractSpecification
11
12
 
12
13
 
13
14
  def create_data_contract_html(contracts, file: Path, path: Path, schema: str):
15
+ logging.debug(f"Creating data contract html for file {file} and schema {schema}")
14
16
  data_contract = DataContract(
15
17
  data_contract_file=f"{file.absolute()}", inline_definitions=True, inline_quality=True, schema_location=schema
16
18
  )
@@ -19,7 +21,7 @@ def create_data_contract_html(contracts, file: Path, path: Path, schema: str):
19
21
  file_without_suffix = file.with_suffix(".html")
20
22
  html_filepath = path / file_without_suffix
21
23
  html_filepath.parent.mkdir(parents=True, exist_ok=True)
22
- with open(html_filepath, "w") as f:
24
+ with open(html_filepath, "w", encoding="utf-8") as f:
23
25
  f.write(html)
24
26
  contracts.append(
25
27
  DataContractView(
@@ -42,7 +44,7 @@ class DataContractView:
42
44
 
43
45
  def create_index_html(contracts, path):
44
46
  index_filepath = path / "index.html"
45
- with open(index_filepath, "w") as f:
47
+ with open(index_filepath, "w", encoding="utf-8") as f:
46
48
  # Load templates from templates folder
47
49
  package_loader = PackageLoader("datacontract", "templates")
48
50
  env = Environment(
datacontract/cli.py CHANGED
@@ -1,4 +1,6 @@
1
+ import logging
1
2
  import os
3
+ import sys
2
4
  from importlib import metadata
3
5
  from pathlib import Path
4
6
  from typing import Iterable, List, Optional
@@ -11,17 +13,20 @@ from typing_extensions import Annotated
11
13
 
12
14
  from datacontract.catalog.catalog import create_data_contract_html, create_index_html
13
15
  from datacontract.data_contract import DataContract, ExportFormat
14
- from datacontract.imports.importer import ImportFormat
16
+ from datacontract.imports.importer import ImportFormat, Spec
15
17
  from datacontract.init.init_template import get_init_template
16
18
  from datacontract.integration.datamesh_manager import (
17
19
  publish_data_contract_to_datamesh_manager,
18
20
  )
19
21
  from datacontract.lint.resolve import resolve_data_contract_dict
22
+ from datacontract.model.exceptions import DataContractException
20
23
  from datacontract.output.output_format import OutputFormat
21
24
  from datacontract.output.test_results_writer import write_test_result
22
25
 
23
26
  console = Console()
24
27
 
28
+ debug_option = Annotated[bool, typer.Option(help="Enable debug logging")]
29
+
25
30
 
26
31
  class OrderedCommands(TyperGroup):
27
32
  def list_commands(self, ctx: Context) -> Iterable[str]:
@@ -69,10 +74,13 @@ def init(
69
74
  ] = "datacontract.yaml",
70
75
  template: Annotated[str, typer.Option(help="URL of a template or data contract")] = None,
71
76
  overwrite: Annotated[bool, typer.Option(help="Replace the existing datacontract.yaml")] = False,
77
+ debug: debug_option = None,
72
78
  ):
73
79
  """
74
80
  Create an empty data contract.
75
81
  """
82
+ enable_debug_logging(debug)
83
+
76
84
  if not overwrite and os.path.exists(location):
77
85
  console.print("File already exists, use --overwrite to overwrite")
78
86
  raise typer.Exit(code=1)
@@ -99,14 +107,24 @@ def lint(
99
107
  ),
100
108
  ] = None,
101
109
  output_format: Annotated[OutputFormat, typer.Option(help="The target format for the test results.")] = None,
110
+ debug: debug_option = None,
102
111
  ):
103
112
  """
104
113
  Validate that the datacontract.yaml is correctly formatted.
105
114
  """
115
+ enable_debug_logging(debug)
116
+
106
117
  run = DataContract(data_contract_file=location, schema_location=schema).lint()
107
118
  write_test_result(run, console, output_format, output)
108
119
 
109
120
 
121
+ def enable_debug_logging(debug: bool):
122
+ if debug:
123
+ logging.basicConfig(
124
+ level=logging.DEBUG, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", stream=sys.stderr
125
+ )
126
+
127
+
110
128
  @app.command()
111
129
  def test(
112
130
  location: Annotated[
@@ -126,7 +144,10 @@ def test(
126
144
  "servers (default)."
127
145
  ),
128
146
  ] = "all",
129
- publish: Annotated[str, typer.Option(help="The url to publish the results after the test")] = None,
147
+ publish_test_results: Annotated[
148
+ bool, typer.Option(help="Deprecated. Use publish parameter. Publish the results after the test")
149
+ ] = False,
150
+ publish: Annotated[str, typer.Option(help="The url to publish the results after the test.")] = None,
130
151
  output: Annotated[
131
152
  Path,
132
153
  typer.Option(
@@ -139,16 +160,20 @@ def test(
139
160
  bool,
140
161
  typer.Option(help="SSL verification when publishing the data contract."),
141
162
  ] = True,
163
+ debug: debug_option = None,
142
164
  ):
143
165
  """
144
166
  Run schema and quality tests on configured servers.
145
167
  """
168
+ enable_debug_logging(debug)
169
+
146
170
  console.print(f"Testing {location}")
147
171
  if server == "all":
148
172
  server = None
149
173
  run = DataContract(
150
174
  data_contract_file=location,
151
175
  schema_location=schema,
176
+ publish_test_results=publish_test_results,
152
177
  publish_url=publish,
153
178
  server=server,
154
179
  ssl_verification=ssl_verification,
@@ -208,12 +233,24 @@ def export(
208
233
  # TODO: this should be a subcommand
209
234
  template: Annotated[
210
235
  Optional[Path],
211
- typer.Option(help="[custom] The file path of Jinja template."),
236
+ typer.Option(
237
+ help="The file path or URL of a template. For Excel format: path/URL to custom Excel template. For custom format: path to Jinja template."
238
+ ),
212
239
  ] = None,
240
+ debug: debug_option = None,
213
241
  ):
214
242
  """
215
243
  Convert data contract to a specific format. Saves to file specified by `output` option if present, otherwise prints to stdout.
216
244
  """
245
+ enable_debug_logging(debug)
246
+
247
+ # Validate that Excel format requires an output file path
248
+ if format == ExportFormat.excel and output is None:
249
+ console.print("❌ Error: Excel export requires an output file path.")
250
+ console.print("💡 Hint: Use --output to specify where to save the Excel file, e.g.:")
251
+ console.print(" datacontract export --format excel --output datacontract.xlsx")
252
+ raise typer.Exit(code=1)
253
+
217
254
  # TODO exception handling
218
255
  result = DataContract(data_contract_file=location, schema_location=schema, server=server).export(
219
256
  export_format=format,
@@ -228,8 +265,13 @@ def export(
228
265
  if output is None:
229
266
  console.print(result, markup=False, soft_wrap=True)
230
267
  else:
231
- with output.open(mode="w", encoding="utf-8") as f:
232
- f.write(result)
268
+ if isinstance(result, bytes):
269
+ # If the result is bytes, we assume it's a binary file (e.g., Excel, PDF)
270
+ with output.open(mode="wb") as f:
271
+ f.write(result)
272
+ else:
273
+ with output.open(mode="w", encoding="utf-8") as f:
274
+ f.write(result)
233
275
  console.print(f"Written result to {output}")
234
276
 
235
277
 
@@ -244,8 +286,12 @@ def import_(
244
286
  ] = None,
245
287
  source: Annotated[
246
288
  Optional[str],
247
- typer.Option(help="The path to the file or Glue Database that should be imported."),
289
+ typer.Option(help="The path to the file that should be imported."),
248
290
  ] = None,
291
+ spec: Annotated[
292
+ Spec,
293
+ typer.Option(help="The format of the data contract to import. "),
294
+ ] = Spec.datacontract_specification,
249
295
  dialect: Annotated[
250
296
  Optional[str],
251
297
  typer.Option(help="The SQL dialect to use when importing SQL files, e.g., postgres, tsql, bigquery."),
@@ -265,7 +311,7 @@ def import_(
265
311
  ),
266
312
  ] = None,
267
313
  unity_table_full_name: Annotated[
268
- Optional[str], typer.Option(help="Full name of a table in the unity catalog")
314
+ Optional[List[str]], typer.Option(help="Full name of a table in the unity catalog")
269
315
  ] = None,
270
316
  dbt_model: Annotated[
271
317
  Optional[List[str]],
@@ -297,13 +343,25 @@ def import_(
297
343
  str,
298
344
  typer.Option(help="The location (url or path) of the Data Contract Specification JSON Schema"),
299
345
  ] = None,
346
+ owner: Annotated[
347
+ Optional[str],
348
+ typer.Option(help="The owner or team responsible for managing the data contract."),
349
+ ] = None,
350
+ id: Annotated[
351
+ Optional[str],
352
+ typer.Option(help="The identifier for the the data contract."),
353
+ ] = None,
354
+ debug: debug_option = None,
300
355
  ):
301
356
  """
302
357
  Create a data contract from the given source location. Saves to file specified by `output` option if present, otherwise prints to stdout.
303
358
  """
304
- result = DataContract().import_from_source(
359
+ enable_debug_logging(debug)
360
+
361
+ result = DataContract.import_from_source(
305
362
  format=format,
306
363
  source=source,
364
+ spec=spec,
307
365
  template=template,
308
366
  schema=schema,
309
367
  dialect=dialect,
@@ -316,6 +374,8 @@ def import_(
316
374
  dbml_schema=dbml_schema,
317
375
  dbml_table=dbml_table,
318
376
  iceberg_table=iceberg_table,
377
+ owner=owner,
378
+ id=id,
319
379
  )
320
380
  if output is None:
321
381
  console.print(result.to_yaml(), markup=False, soft_wrap=True)
@@ -339,10 +399,13 @@ def publish(
339
399
  bool,
340
400
  typer.Option(help="SSL verification when publishing the data contract."),
341
401
  ] = True,
402
+ debug: debug_option = None,
342
403
  ):
343
404
  """
344
405
  Publish the data contract to the Data Mesh Manager.
345
406
  """
407
+ enable_debug_logging(debug)
408
+
346
409
  publish_data_contract_to_datamesh_manager(
347
410
  data_contract_dict=resolve_data_contract_dict(location),
348
411
  ssl_verification=ssl_verification,
@@ -362,10 +425,13 @@ def catalog(
362
425
  str,
363
426
  typer.Option(help="The location (url or path) of the Data Contract Specification JSON Schema"),
364
427
  ] = None,
428
+ debug: debug_option = None,
365
429
  ):
366
430
  """
367
431
  Create a html catalog of data contracts.
368
432
  """
433
+ enable_debug_logging(debug)
434
+
369
435
  path = Path(output)
370
436
  path.mkdir(parents=True, exist_ok=True)
371
437
  console.print(f"Created {output}")
@@ -374,6 +440,11 @@ def catalog(
374
440
  for file in Path().rglob(files):
375
441
  try:
376
442
  create_data_contract_html(contracts, file, path, schema)
443
+ except DataContractException as e:
444
+ if e.reason == "Cannot parse ODPS product":
445
+ console.print(f"Skipped {file} due to error: {e.reason}")
446
+ else:
447
+ console.print(f"Skipped {file} due to error: {e}")
377
448
  except Exception as e:
378
449
  console.print(f"Skipped {file} due to error: {e}")
379
450
 
@@ -390,10 +461,12 @@ def breaking(
390
461
  str,
391
462
  typer.Argument(help="The location (url or path) of the new data contract yaml."),
392
463
  ],
464
+ debug: debug_option = None,
393
465
  ):
394
466
  """
395
467
  Identifies breaking changes between data contracts. Prints to stdout.
396
468
  """
469
+ enable_debug_logging(debug)
397
470
 
398
471
  # TODO exception handling
399
472
  result = DataContract(data_contract_file=location_old, inline_definitions=True).breaking(
@@ -416,10 +489,12 @@ def changelog(
416
489
  str,
417
490
  typer.Argument(help="The location (url or path) of the new data contract yaml."),
418
491
  ],
492
+ debug: debug_option = None,
419
493
  ):
420
494
  """
421
495
  Generate a changelog between data contracts. Prints to stdout.
422
496
  """
497
+ enable_debug_logging(debug)
423
498
 
424
499
  # TODO exception handling
425
500
  result = DataContract(data_contract_file=location_old, inline_definitions=True).changelog(
@@ -439,10 +514,12 @@ def diff(
439
514
  str,
440
515
  typer.Argument(help="The location (url or path) of the new data contract yaml."),
441
516
  ],
517
+ debug: debug_option = None,
442
518
  ):
443
519
  """
444
520
  PLACEHOLDER. Currently works as 'changelog' does.
445
521
  """
522
+ enable_debug_logging(debug)
446
523
 
447
524
  # TODO change to diff output, not the changelog entries
448
525
  result = DataContract(data_contract_file=location_old, inline_definitions=True).changelog(
@@ -452,12 +529,32 @@ def diff(
452
529
  console.print(result.changelog_str())
453
530
 
454
531
 
455
- @app.command()
532
+ def _get_uvicorn_arguments(port: int, host: str, context: typer.Context) -> dict:
533
+ """
534
+ Take the default datacontract uvicorn arguments and merge them with the
535
+ extra arguments passed to the command to start the API.
536
+ """
537
+ default_args = {
538
+ "app": "datacontract.api:app",
539
+ "port": port,
540
+ "host": host,
541
+ "reload": True,
542
+ }
543
+
544
+ # Create a list of the extra arguments, remove the leading -- from the cli arguments
545
+ trimmed_keys = list(map(lambda x: str(x).replace("--", ""), context.args[::2]))
546
+ # Merge the two dicts and return them as one dict
547
+ return default_args | dict(zip(trimmed_keys, context.args[1::2]))
548
+
549
+
550
+ @app.command(context_settings={"allow_extra_args": True, "ignore_unknown_options": True})
456
551
  def api(
552
+ ctx: Annotated[typer.Context, typer.Option(help="Extra arguments to pass to uvicorn.run().")],
457
553
  port: Annotated[int, typer.Option(help="Bind socket to this port.")] = 4242,
458
554
  host: Annotated[
459
555
  str, typer.Option(help="Bind socket to this host. Hint: For running in docker, set it to 0.0.0.0")
460
556
  ] = "127.0.0.1",
557
+ debug: debug_option = None,
461
558
  ):
462
559
  """
463
560
  Start the datacontract CLI as server application with REST API.
@@ -471,14 +568,23 @@ def api(
471
568
 
472
569
  To connect to servers (such as a Snowflake data source), set the credentials as environment variables as documented in
473
570
  https://cli.datacontract.com/#test
571
+
572
+ It is possible to run the API with extra arguments for `uvicorn.run()` as keyword arguments, e.g.:
573
+ `datacontract api --port 1234 --root_path /datacontract`.
474
574
  """
575
+ enable_debug_logging(debug)
576
+
475
577
  import uvicorn
476
578
  from uvicorn.config import LOGGING_CONFIG
477
579
 
478
580
  log_config = LOGGING_CONFIG
479
581
  log_config["root"] = {"level": "INFO"}
480
582
 
481
- uvicorn.run(app="datacontract.api:app", port=port, host=host, reload=True, log_config=LOGGING_CONFIG)
583
+ uvicorn_args = _get_uvicorn_arguments(port, host, ctx)
584
+ # Add the log config
585
+ uvicorn_args["log_config"] = log_config
586
+ # Run uvicorn
587
+ uvicorn.run(**uvicorn_args)
482
588
 
483
589
 
484
590
  def _print_logs(run):