datacontract-cli 0.10.0__py3-none-any.whl → 0.10.37__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. datacontract/__init__.py +13 -0
  2. datacontract/api.py +260 -0
  3. datacontract/breaking/breaking.py +242 -12
  4. datacontract/breaking/breaking_rules.py +37 -1
  5. datacontract/catalog/catalog.py +80 -0
  6. datacontract/cli.py +387 -117
  7. datacontract/data_contract.py +216 -353
  8. datacontract/engines/data_contract_checks.py +1041 -0
  9. datacontract/engines/data_contract_test.py +113 -0
  10. datacontract/engines/datacontract/check_that_datacontract_contains_valid_servers_configuration.py +2 -3
  11. datacontract/engines/datacontract/check_that_datacontract_file_exists.py +1 -1
  12. datacontract/engines/fastjsonschema/check_jsonschema.py +176 -42
  13. datacontract/engines/fastjsonschema/s3/s3_read_files.py +16 -1
  14. datacontract/engines/soda/check_soda_execute.py +100 -56
  15. datacontract/engines/soda/connections/athena.py +79 -0
  16. datacontract/engines/soda/connections/bigquery.py +8 -1
  17. datacontract/engines/soda/connections/databricks.py +12 -3
  18. datacontract/engines/soda/connections/duckdb_connection.py +241 -0
  19. datacontract/engines/soda/connections/kafka.py +206 -113
  20. datacontract/engines/soda/connections/snowflake.py +8 -5
  21. datacontract/engines/soda/connections/sqlserver.py +43 -0
  22. datacontract/engines/soda/connections/trino.py +26 -0
  23. datacontract/export/avro_converter.py +72 -8
  24. datacontract/export/avro_idl_converter.py +31 -25
  25. datacontract/export/bigquery_converter.py +130 -0
  26. datacontract/export/custom_converter.py +40 -0
  27. datacontract/export/data_caterer_converter.py +161 -0
  28. datacontract/export/dbml_converter.py +148 -0
  29. datacontract/export/dbt_converter.py +141 -54
  30. datacontract/export/dcs_exporter.py +6 -0
  31. datacontract/export/dqx_converter.py +126 -0
  32. datacontract/export/duckdb_type_converter.py +57 -0
  33. datacontract/export/excel_exporter.py +923 -0
  34. datacontract/export/exporter.py +100 -0
  35. datacontract/export/exporter_factory.py +216 -0
  36. datacontract/export/go_converter.py +105 -0
  37. datacontract/export/great_expectations_converter.py +257 -36
  38. datacontract/export/html_exporter.py +86 -0
  39. datacontract/export/iceberg_converter.py +188 -0
  40. datacontract/export/jsonschema_converter.py +71 -16
  41. datacontract/export/markdown_converter.py +337 -0
  42. datacontract/export/mermaid_exporter.py +110 -0
  43. datacontract/export/odcs_v3_exporter.py +375 -0
  44. datacontract/export/pandas_type_converter.py +40 -0
  45. datacontract/export/protobuf_converter.py +168 -68
  46. datacontract/export/pydantic_converter.py +6 -0
  47. datacontract/export/rdf_converter.py +13 -6
  48. datacontract/export/sodacl_converter.py +36 -188
  49. datacontract/export/spark_converter.py +245 -0
  50. datacontract/export/sql_converter.py +37 -3
  51. datacontract/export/sql_type_converter.py +269 -8
  52. datacontract/export/sqlalchemy_converter.py +170 -0
  53. datacontract/export/terraform_converter.py +7 -2
  54. datacontract/imports/avro_importer.py +246 -26
  55. datacontract/imports/bigquery_importer.py +221 -0
  56. datacontract/imports/csv_importer.py +143 -0
  57. datacontract/imports/dbml_importer.py +112 -0
  58. datacontract/imports/dbt_importer.py +240 -0
  59. datacontract/imports/excel_importer.py +1111 -0
  60. datacontract/imports/glue_importer.py +288 -0
  61. datacontract/imports/iceberg_importer.py +172 -0
  62. datacontract/imports/importer.py +51 -0
  63. datacontract/imports/importer_factory.py +128 -0
  64. datacontract/imports/json_importer.py +325 -0
  65. datacontract/imports/jsonschema_importer.py +146 -0
  66. datacontract/imports/odcs_importer.py +60 -0
  67. datacontract/imports/odcs_v3_importer.py +516 -0
  68. datacontract/imports/parquet_importer.py +81 -0
  69. datacontract/imports/protobuf_importer.py +264 -0
  70. datacontract/imports/spark_importer.py +262 -0
  71. datacontract/imports/sql_importer.py +274 -35
  72. datacontract/imports/unity_importer.py +219 -0
  73. datacontract/init/init_template.py +20 -0
  74. datacontract/integration/datamesh_manager.py +86 -0
  75. datacontract/lint/resolve.py +271 -49
  76. datacontract/lint/resources.py +21 -0
  77. datacontract/lint/schema.py +53 -17
  78. datacontract/lint/urls.py +32 -12
  79. datacontract/model/data_contract_specification/__init__.py +1 -0
  80. datacontract/model/exceptions.py +4 -1
  81. datacontract/model/odcs.py +24 -0
  82. datacontract/model/run.py +49 -29
  83. datacontract/output/__init__.py +0 -0
  84. datacontract/output/junit_test_results.py +135 -0
  85. datacontract/output/output_format.py +10 -0
  86. datacontract/output/test_results_writer.py +79 -0
  87. datacontract/py.typed +0 -0
  88. datacontract/schemas/datacontract-1.1.0.init.yaml +91 -0
  89. datacontract/schemas/datacontract-1.1.0.schema.json +1975 -0
  90. datacontract/schemas/datacontract-1.2.0.init.yaml +91 -0
  91. datacontract/schemas/datacontract-1.2.0.schema.json +2029 -0
  92. datacontract/schemas/datacontract-1.2.1.init.yaml +91 -0
  93. datacontract/schemas/datacontract-1.2.1.schema.json +2058 -0
  94. datacontract/schemas/odcs-3.0.1.schema.json +2634 -0
  95. datacontract/schemas/odcs-3.0.2.schema.json +2382 -0
  96. datacontract/templates/datacontract.html +139 -294
  97. datacontract/templates/datacontract_odcs.html +685 -0
  98. datacontract/templates/index.html +236 -0
  99. datacontract/templates/partials/datacontract_information.html +86 -0
  100. datacontract/templates/partials/datacontract_servicelevels.html +253 -0
  101. datacontract/templates/partials/datacontract_terms.html +51 -0
  102. datacontract/templates/partials/definition.html +25 -0
  103. datacontract/templates/partials/example.html +27 -0
  104. datacontract/templates/partials/model_field.html +144 -0
  105. datacontract/templates/partials/quality.html +49 -0
  106. datacontract/templates/partials/server.html +211 -0
  107. datacontract/templates/style/output.css +491 -72
  108. datacontract_cli-0.10.37.dist-info/METADATA +2235 -0
  109. datacontract_cli-0.10.37.dist-info/RECORD +119 -0
  110. {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.37.dist-info}/WHEEL +1 -1
  111. {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.37.dist-info/licenses}/LICENSE +1 -1
  112. datacontract/engines/datacontract/check_that_datacontract_str_is_valid.py +0 -48
  113. datacontract/engines/soda/connections/dask.py +0 -28
  114. datacontract/engines/soda/connections/duckdb.py +0 -76
  115. datacontract/export/csv_type_converter.py +0 -36
  116. datacontract/export/html_export.py +0 -66
  117. datacontract/export/odcs_converter.py +0 -102
  118. datacontract/init/download_datacontract_file.py +0 -17
  119. datacontract/integration/publish_datamesh_manager.py +0 -33
  120. datacontract/integration/publish_opentelemetry.py +0 -107
  121. datacontract/lint/lint.py +0 -141
  122. datacontract/lint/linters/description_linter.py +0 -34
  123. datacontract/lint/linters/example_model_linter.py +0 -91
  124. datacontract/lint/linters/field_pattern_linter.py +0 -34
  125. datacontract/lint/linters/field_reference_linter.py +0 -38
  126. datacontract/lint/linters/notice_period_linter.py +0 -55
  127. datacontract/lint/linters/quality_schema_linter.py +0 -52
  128. datacontract/lint/linters/valid_constraints_linter.py +0 -99
  129. datacontract/model/data_contract_specification.py +0 -141
  130. datacontract/web.py +0 -14
  131. datacontract_cli-0.10.0.dist-info/METADATA +0 -951
  132. datacontract_cli-0.10.0.dist-info/RECORD +0 -66
  133. /datacontract/{model → breaking}/breaking_change.py +0 -0
  134. /datacontract/{lint/linters → export}/__init__.py +0 -0
  135. {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.37.dist-info}/entry_points.txt +0 -0
  136. {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.37.dist-info}/top_level.txt +0 -0
datacontract/cli.py CHANGED
@@ -1,21 +1,32 @@
1
- from enum import Enum
1
+ import logging
2
+ import os
3
+ import sys
2
4
  from importlib import metadata
3
- from typing import Iterable, Optional
5
+ from pathlib import Path
6
+ from typing import Iterable, List, Optional
4
7
 
5
8
  import typer
6
9
  from click import Context
7
- from rich import box
8
10
  from rich.console import Console
9
- from rich.table import Table
10
11
  from typer.core import TyperGroup
11
12
  from typing_extensions import Annotated
12
13
 
13
- from datacontract.data_contract import DataContract
14
- from datacontract.init.download_datacontract_file import \
15
- download_datacontract_file, FileExistsException
14
+ from datacontract.catalog.catalog import create_data_contract_html, create_index_html
15
+ from datacontract.data_contract import DataContract, ExportFormat
16
+ from datacontract.imports.importer import ImportFormat, Spec
17
+ from datacontract.init.init_template import get_init_template
18
+ from datacontract.integration.datamesh_manager import (
19
+ publish_data_contract_to_datamesh_manager,
20
+ )
21
+ from datacontract.lint.resolve import resolve_data_contract_dict
22
+ from datacontract.model.exceptions import DataContractException
23
+ from datacontract.output.output_format import OutputFormat
24
+ from datacontract.output.test_results_writer import write_test_result
16
25
 
17
26
  console = Console()
18
27
 
28
+ debug_option = Annotated[bool, typer.Option(help="Enable debug logging")]
29
+
19
30
 
20
31
  class OrderedCommands(TyperGroup):
21
32
  def list_commands(self, ctx: Context) -> Iterable[str]:
@@ -39,7 +50,11 @@ def version_callback(value: bool):
39
50
  def common(
40
51
  ctx: typer.Context,
41
52
  version: bool = typer.Option(
42
- None, "--version", help="Prints the current version.", callback=version_callback, is_eager=True
53
+ None,
54
+ "--version",
55
+ help="Prints the current version.",
56
+ callback=version_callback,
57
+ is_eager=True,
43
58
  ),
44
59
  ):
45
60
  """
@@ -55,49 +70,71 @@ def common(
55
70
  @app.command()
56
71
  def init(
57
72
  location: Annotated[
58
- str, typer.Argument(help="The location (url or path) of the data contract yaml to create.")
73
+ str, typer.Argument(help="The location of the data contract file to create.")
59
74
  ] = "datacontract.yaml",
60
- template: Annotated[
61
- str, typer.Option(help="URL of a template or data contract")
62
- ] = "https://datacontract.com/datacontract.init.yaml",
75
+ template: Annotated[str, typer.Option(help="URL of a template or data contract")] = None,
63
76
  overwrite: Annotated[bool, typer.Option(help="Replace the existing datacontract.yaml")] = False,
77
+ debug: debug_option = None,
64
78
  ):
65
79
  """
66
- Download a datacontract.yaml template and write it to file.
80
+ Create an empty data contract.
67
81
  """
68
- try:
69
- download_datacontract_file(location, template, overwrite)
70
- except FileExistsException:
82
+ enable_debug_logging(debug)
83
+
84
+ if not overwrite and os.path.exists(location):
71
85
  console.print("File already exists, use --overwrite to overwrite")
72
86
  raise typer.Exit(code=1)
73
- else:
74
- console.print("📄 data contract written to " + location)
87
+ template_str = get_init_template(template)
88
+ with open(location, "w") as f:
89
+ f.write(template_str)
90
+ console.print("📄 data contract written to " + location)
75
91
 
76
92
 
77
93
  @app.command()
78
94
  def lint(
79
95
  location: Annotated[
80
- str, typer.Argument(help="The location (url or path) of the data contract yaml.")
96
+ str,
97
+ typer.Argument(help="The location (url or path) of the data contract yaml."),
81
98
  ] = "datacontract.yaml",
82
99
  schema: Annotated[
83
- str, typer.Option(help="The location (url or path) of the Data Contract Specification JSON Schema")
84
- ] = "https://datacontract.com/datacontract.schema.json",
100
+ str,
101
+ typer.Option(help="The location (url or path) of the Data Contract Specification JSON Schema"),
102
+ ] = None,
103
+ output: Annotated[
104
+ Path,
105
+ typer.Option(
106
+ help="Specify the file path where the test results should be written to (e.g., './test-results/TEST-datacontract.xml'). If no path is provided, the output will be printed to stdout."
107
+ ),
108
+ ] = None,
109
+ output_format: Annotated[OutputFormat, typer.Option(help="The target format for the test results.")] = None,
110
+ debug: debug_option = None,
85
111
  ):
86
112
  """
87
113
  Validate that the datacontract.yaml is correctly formatted.
88
114
  """
115
+ enable_debug_logging(debug)
116
+
89
117
  run = DataContract(data_contract_file=location, schema_location=schema).lint()
90
- _handle_result(run)
118
+ write_test_result(run, console, output_format, output)
119
+
120
+
121
+ def enable_debug_logging(debug: bool):
122
+ if debug:
123
+ logging.basicConfig(
124
+ level=logging.DEBUG, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", stream=sys.stderr
125
+ )
91
126
 
92
127
 
93
128
  @app.command()
94
129
  def test(
95
130
  location: Annotated[
96
- str, typer.Argument(help="The location (url or path) of the data contract yaml.")
131
+ str,
132
+ typer.Argument(help="The location (url or path) of the data contract yaml."),
97
133
  ] = "datacontract.yaml",
98
134
  schema: Annotated[
99
- str, typer.Option(help="The location (url or path) of the Data Contract Specification JSON Schema")
100
- ] = "https://datacontract.com/datacontract.schema.json",
135
+ str,
136
+ typer.Option(help="The location (url or path) of the Data Contract Specification JSON Schema"),
137
+ ] = None,
101
138
  server: Annotated[
102
139
  str,
103
140
  typer.Option(
@@ -107,59 +144,54 @@ def test(
107
144
  "servers (default)."
108
145
  ),
109
146
  ] = "all",
110
- examples: Annotated[
111
- bool, typer.Option(help="Run the schema and quality tests on the example data within the data contract.")
112
- ] = None,
113
- publish: Annotated[str, typer.Option(help="The url to publish the results after the test")] = None,
114
- publish_to_opentelemetry: Annotated[
115
- bool,
147
+ publish_test_results: Annotated[
148
+ bool, typer.Option(help="Deprecated. Use publish parameter. Publish the results after the test")
149
+ ] = False,
150
+ publish: Annotated[str, typer.Option(help="The url to publish the results after the test.")] = None,
151
+ output: Annotated[
152
+ Path,
116
153
  typer.Option(
117
- help="Publish the results to opentelemetry. Use environment variables to configure the OTLP endpoint, headers, etc."
154
+ help="Specify the file path where the test results should be written to (e.g., './test-results/TEST-datacontract.xml')."
118
155
  ),
119
- ] = False,
156
+ ] = None,
157
+ output_format: Annotated[OutputFormat, typer.Option(help="The target format for the test results.")] = None,
120
158
  logs: Annotated[bool, typer.Option(help="Print logs")] = False,
159
+ ssl_verification: Annotated[
160
+ bool,
161
+ typer.Option(help="SSL verification when publishing the data contract."),
162
+ ] = True,
163
+ debug: debug_option = None,
121
164
  ):
122
165
  """
123
166
  Run schema and quality tests on configured servers.
124
167
  """
168
+ enable_debug_logging(debug)
169
+
125
170
  console.print(f"Testing {location}")
126
171
  if server == "all":
127
172
  server = None
128
173
  run = DataContract(
129
174
  data_contract_file=location,
130
175
  schema_location=schema,
176
+ publish_test_results=publish_test_results,
131
177
  publish_url=publish,
132
- publish_to_opentelemetry=publish_to_opentelemetry,
133
178
  server=server,
134
- examples=examples,
179
+ ssl_verification=ssl_verification,
135
180
  ).test()
136
181
  if logs:
137
182
  _print_logs(run)
138
- _handle_result(run)
139
-
140
-
141
- class ExportFormat(str, Enum):
142
- jsonschema = "jsonschema"
143
- pydantic_model = "pydantic-model"
144
- sodacl = "sodacl"
145
- dbt = "dbt"
146
- dbt_sources = "dbt-sources"
147
- dbt_staging_sql = "dbt-staging-sql"
148
- odcs = "odcs"
149
- rdf = "rdf"
150
- avro = "avro"
151
- protobuf = "protobuf"
152
- great_expectations = "great-expectations"
153
- terraform = "terraform"
154
- avro_idl = "avro-idl"
155
- sql = "sql"
156
- sql_query = "sql-query"
157
- html = "html"
183
+ write_test_result(run, console, output_format, output)
158
184
 
159
185
 
160
186
  @app.command()
161
187
  def export(
162
188
  format: Annotated[ExportFormat, typer.Option(help="The export format.")],
189
+ output: Annotated[
190
+ Path,
191
+ typer.Option(
192
+ help="Specify the file path where the exported data will be saved. If no path is provided, the output will be printed to stdout."
193
+ ),
194
+ ] = None,
163
195
  server: Annotated[str, typer.Option(help="The server name to export.")] = None,
164
196
  model: Annotated[
165
197
  str,
@@ -169,10 +201,15 @@ def export(
169
201
  "models (default)."
170
202
  ),
171
203
  ] = "all",
204
+ # TODO: this should be a subcommand
172
205
  rdf_base: Annotated[
173
206
  Optional[str],
174
- typer.Option(help="[rdf] The base URI used to generate the RDF graph.", rich_help_panel="RDF Options"),
207
+ typer.Option(
208
+ help="[rdf] The base URI used to generate the RDF graph.",
209
+ rich_help_panel="RDF Options",
210
+ ),
175
211
  ] = None,
212
+ # TODO: this should be a subcommand
176
213
  sql_server_type: Annotated[
177
214
  Optional[str],
178
215
  typer.Option(
@@ -181,48 +218,255 @@ def export(
181
218
  ),
182
219
  ] = "auto",
183
220
  location: Annotated[
184
- str, typer.Argument(help="The location (url or path) of the data contract yaml.")
221
+ str,
222
+ typer.Argument(help="The location (url or path) of the data contract yaml."),
185
223
  ] = "datacontract.yaml",
224
+ schema: Annotated[
225
+ str,
226
+ typer.Option(help="The location (url or path) of the Data Contract Specification JSON Schema"),
227
+ ] = None,
228
+ # TODO: this should be a subcommand
229
+ engine: Annotated[
230
+ Optional[str],
231
+ typer.Option(help="[engine] The engine used for great expection run."),
232
+ ] = None,
233
+ # TODO: this should be a subcommand
234
+ template: Annotated[
235
+ Optional[Path],
236
+ typer.Option(
237
+ help="The file path or URL of a template. For Excel format: path/URL to custom Excel template. For custom format: path to Jinja template."
238
+ ),
239
+ ] = None,
240
+ debug: debug_option = None,
186
241
  ):
187
242
  """
188
- Convert data contract to a specific format. console.prints to stdout.
243
+ Convert data contract to a specific format. Saves to file specified by `output` option if present, otherwise prints to stdout.
189
244
  """
245
+ enable_debug_logging(debug)
246
+
247
+ # Validate that Excel format requires an output file path
248
+ if format == ExportFormat.excel and output is None:
249
+ console.print("❌ Error: Excel export requires an output file path.")
250
+ console.print("💡 Hint: Use --output to specify where to save the Excel file, e.g.:")
251
+ console.print(" datacontract export --format excel --output datacontract.xlsx")
252
+ raise typer.Exit(code=1)
253
+
190
254
  # TODO exception handling
191
- result = DataContract(data_contract_file=location, server=server).export(
255
+ result = DataContract(data_contract_file=location, schema_location=schema, server=server).export(
192
256
  export_format=format,
193
257
  model=model,
258
+ server=server,
194
259
  rdf_base=rdf_base,
195
260
  sql_server_type=sql_server_type,
261
+ engine=engine,
262
+ template=template,
196
263
  )
197
264
  # Don't interpret console markup in output.
198
- console.print(result, markup=False)
199
-
200
-
201
- class ImportFormat(str, Enum):
202
- sql = "sql"
203
- avro = "avro"
265
+ if output is None:
266
+ console.print(result, markup=False, soft_wrap=True)
267
+ else:
268
+ if isinstance(result, bytes):
269
+ # If the result is bytes, we assume it's a binary file (e.g., Excel, PDF)
270
+ with output.open(mode="wb") as f:
271
+ f.write(result)
272
+ else:
273
+ with output.open(mode="w", encoding="utf-8") as f:
274
+ f.write(result)
275
+ console.print(f"Written result to {output}")
204
276
 
205
277
 
206
278
  @app.command(name="import")
207
279
  def import_(
208
280
  format: Annotated[ImportFormat, typer.Option(help="The format of the source file.")],
209
- source: Annotated[str, typer.Option(help="The path to the file that should be imported.")],
281
+ output: Annotated[
282
+ Path,
283
+ typer.Option(
284
+ help="Specify the file path where the Data Contract will be saved. If no path is provided, the output will be printed to stdout."
285
+ ),
286
+ ] = None,
287
+ source: Annotated[
288
+ Optional[str],
289
+ typer.Option(help="The path to the file that should be imported."),
290
+ ] = None,
291
+ spec: Annotated[
292
+ Spec,
293
+ typer.Option(help="The format of the data contract to import. "),
294
+ ] = Spec.datacontract_specification,
295
+ dialect: Annotated[
296
+ Optional[str],
297
+ typer.Option(help="The SQL dialect to use when importing SQL files, e.g., postgres, tsql, bigquery."),
298
+ ] = None,
299
+ glue_table: Annotated[
300
+ Optional[List[str]],
301
+ typer.Option(
302
+ help="List of table ids to import from the Glue Database (repeat for multiple table ids, leave empty for all tables in the dataset)."
303
+ ),
304
+ ] = None,
305
+ bigquery_project: Annotated[Optional[str], typer.Option(help="The bigquery project id.")] = None,
306
+ bigquery_dataset: Annotated[Optional[str], typer.Option(help="The bigquery dataset id.")] = None,
307
+ bigquery_table: Annotated[
308
+ Optional[List[str]],
309
+ typer.Option(
310
+ help="List of table ids to import from the bigquery API (repeat for multiple table ids, leave empty for all tables in the dataset)."
311
+ ),
312
+ ] = None,
313
+ unity_table_full_name: Annotated[
314
+ Optional[List[str]], typer.Option(help="Full name of a table in the unity catalog")
315
+ ] = None,
316
+ dbt_model: Annotated[
317
+ Optional[List[str]],
318
+ typer.Option(
319
+ help="List of models names to import from the dbt manifest file (repeat for multiple models names, leave empty for all models in the dataset)."
320
+ ),
321
+ ] = None,
322
+ dbml_schema: Annotated[
323
+ Optional[List[str]],
324
+ typer.Option(
325
+ help="List of schema names to import from the DBML file (repeat for multiple schema names, leave empty for all tables in the file)."
326
+ ),
327
+ ] = None,
328
+ dbml_table: Annotated[
329
+ Optional[List[str]],
330
+ typer.Option(
331
+ help="List of table names to import from the DBML file (repeat for multiple table names, leave empty for all tables in the file)."
332
+ ),
333
+ ] = None,
334
+ iceberg_table: Annotated[
335
+ Optional[str],
336
+ typer.Option(help="Table name to assign to the model created from the Iceberg schema."),
337
+ ] = None,
338
+ template: Annotated[
339
+ Optional[str],
340
+ typer.Option(help="The location (url or path) of the Data Contract Specification Template"),
341
+ ] = None,
342
+ schema: Annotated[
343
+ str,
344
+ typer.Option(help="The location (url or path) of the Data Contract Specification JSON Schema"),
345
+ ] = None,
346
+ owner: Annotated[
347
+ Optional[str],
348
+ typer.Option(help="The owner or team responsible for managing the data contract."),
349
+ ] = None,
350
+ id: Annotated[
351
+ Optional[str],
352
+ typer.Option(help="The identifier for the the data contract."),
353
+ ] = None,
354
+ debug: debug_option = None,
355
+ ):
356
+ """
357
+ Create a data contract from the given source location. Saves to file specified by `output` option if present, otherwise prints to stdout.
358
+ """
359
+ enable_debug_logging(debug)
360
+
361
+ result = DataContract.import_from_source(
362
+ format=format,
363
+ source=source,
364
+ spec=spec,
365
+ template=template,
366
+ schema=schema,
367
+ dialect=dialect,
368
+ glue_table=glue_table,
369
+ bigquery_table=bigquery_table,
370
+ bigquery_project=bigquery_project,
371
+ bigquery_dataset=bigquery_dataset,
372
+ unity_table_full_name=unity_table_full_name,
373
+ dbt_model=dbt_model,
374
+ dbml_schema=dbml_schema,
375
+ dbml_table=dbml_table,
376
+ iceberg_table=iceberg_table,
377
+ owner=owner,
378
+ id=id,
379
+ )
380
+ if output is None:
381
+ console.print(result.to_yaml(), markup=False, soft_wrap=True)
382
+ else:
383
+ with output.open(mode="w", encoding="utf-8") as f:
384
+ f.write(result.to_yaml())
385
+ console.print(f"Written result to {output}")
386
+
387
+
388
+ @app.command(name="publish")
389
+ def publish(
390
+ location: Annotated[
391
+ str,
392
+ typer.Argument(help="The location (url or path) of the data contract yaml."),
393
+ ] = "datacontract.yaml",
394
+ schema: Annotated[
395
+ str,
396
+ typer.Option(help="The location (url or path) of the Data Contract Specification JSON Schema"),
397
+ ] = None,
398
+ ssl_verification: Annotated[
399
+ bool,
400
+ typer.Option(help="SSL verification when publishing the data contract."),
401
+ ] = True,
402
+ debug: debug_option = None,
403
+ ):
404
+ """
405
+ Publish the data contract to the Data Mesh Manager.
406
+ """
407
+ enable_debug_logging(debug)
408
+
409
+ publish_data_contract_to_datamesh_manager(
410
+ data_contract_dict=resolve_data_contract_dict(location),
411
+ ssl_verification=ssl_verification,
412
+ )
413
+
414
+
415
+ @app.command(name="catalog")
416
+ def catalog(
417
+ files: Annotated[
418
+ Optional[str],
419
+ typer.Option(
420
+ help="Glob pattern for the data contract files to include in the catalog. Applies recursively to any subfolders."
421
+ ),
422
+ ] = "*.yaml",
423
+ output: Annotated[Optional[str], typer.Option(help="Output directory for the catalog html files.")] = "catalog/",
424
+ schema: Annotated[
425
+ str,
426
+ typer.Option(help="The location (url or path) of the Data Contract Specification JSON Schema"),
427
+ ] = None,
428
+ debug: debug_option = None,
210
429
  ):
211
430
  """
212
- Create a data contract from the given source file. Prints to stdout.
431
+ Create a html catalog of data contracts.
213
432
  """
214
- result = DataContract().import_from_source(format, source)
215
- console.print(result.to_yaml())
433
+ enable_debug_logging(debug)
434
+
435
+ path = Path(output)
436
+ path.mkdir(parents=True, exist_ok=True)
437
+ console.print(f"Created {output}")
438
+
439
+ contracts = []
440
+ for file in Path().rglob(files):
441
+ try:
442
+ create_data_contract_html(contracts, file, path, schema)
443
+ except DataContractException as e:
444
+ if e.reason == "Cannot parse ODPS product":
445
+ console.print(f"Skipped {file} due to error: {e.reason}")
446
+ else:
447
+ console.print(f"Skipped {file} due to error: {e}")
448
+ except Exception as e:
449
+ console.print(f"Skipped {file} due to error: {e}")
450
+
451
+ create_index_html(contracts, path)
216
452
 
217
453
 
218
454
  @app.command()
219
455
  def breaking(
220
- location_old: Annotated[str, typer.Argument(help="The location (url or path) of the old data contract yaml.")],
221
- location_new: Annotated[str, typer.Argument(help="The location (url or path) of the new data contract yaml.")],
456
+ location_old: Annotated[
457
+ str,
458
+ typer.Argument(help="The location (url or path) of the old data contract yaml."),
459
+ ],
460
+ location_new: Annotated[
461
+ str,
462
+ typer.Argument(help="The location (url or path) of the new data contract yaml."),
463
+ ],
464
+ debug: debug_option = None,
222
465
  ):
223
466
  """
224
467
  Identifies breaking changes between data contracts. Prints to stdout.
225
468
  """
469
+ enable_debug_logging(debug)
226
470
 
227
471
  # TODO exception handling
228
472
  result = DataContract(data_contract_file=location_old, inline_definitions=True).breaking(
@@ -237,12 +481,20 @@ def breaking(
237
481
 
238
482
  @app.command()
239
483
  def changelog(
240
- location_old: Annotated[str, typer.Argument(help="The location (url or path) of the old data contract yaml.")],
241
- location_new: Annotated[str, typer.Argument(help="The location (url or path) of the new data contract yaml.")],
484
+ location_old: Annotated[
485
+ str,
486
+ typer.Argument(help="The location (url or path) of the old data contract yaml."),
487
+ ],
488
+ location_new: Annotated[
489
+ str,
490
+ typer.Argument(help="The location (url or path) of the new data contract yaml."),
491
+ ],
492
+ debug: debug_option = None,
242
493
  ):
243
494
  """
244
495
  Generate a changelog between data contracts. Prints to stdout.
245
496
  """
497
+ enable_debug_logging(debug)
246
498
 
247
499
  # TODO exception handling
248
500
  result = DataContract(data_contract_file=location_old, inline_definitions=True).changelog(
@@ -254,12 +506,20 @@ def changelog(
254
506
 
255
507
  @app.command()
256
508
  def diff(
257
- location_old: Annotated[str, typer.Argument(help="The location (url or path) of the old data contract yaml.")],
258
- location_new: Annotated[str, typer.Argument(help="The location (url or path) of the new data contract yaml.")],
509
+ location_old: Annotated[
510
+ str,
511
+ typer.Argument(help="The location (url or path) of the old data contract yaml."),
512
+ ],
513
+ location_new: Annotated[
514
+ str,
515
+ typer.Argument(help="The location (url or path) of the new data contract yaml."),
516
+ ],
517
+ debug: debug_option = None,
259
518
  ):
260
519
  """
261
520
  PLACEHOLDER. Currently works as 'changelog' does.
262
521
  """
522
+ enable_debug_logging(debug)
263
523
 
264
524
  # TODO change to diff output, not the changelog entries
265
525
  result = DataContract(data_contract_file=location_old, inline_definitions=True).changelog(
@@ -269,40 +529,62 @@ def diff(
269
529
  console.print(result.changelog_str())
270
530
 
271
531
 
272
- def _handle_result(run):
273
- _print_table(run)
274
- if run.result == "passed":
275
- console.print(
276
- f"🟢 data contract is valid. Run {len(run.checks)} checks. Took {(run.timestampEnd - run.timestampStart).total_seconds()} seconds."
277
- )
278
- else:
279
- console.print("🔴 data contract is invalid, found the following errors:")
280
- i = 1
281
- for check in run.checks:
282
- if check.result != "passed":
283
- console.print(str(++i) + ") " + check.reason)
284
- raise typer.Exit(code=1)
532
+ def _get_uvicorn_arguments(port: int, host: str, context: typer.Context) -> dict:
533
+ """
534
+ Take the default datacontract uvicorn arguments and merge them with the
535
+ extra arguments passed to the command to start the API.
536
+ """
537
+ default_args = {
538
+ "app": "datacontract.api:app",
539
+ "port": port,
540
+ "host": host,
541
+ "reload": True,
542
+ }
543
+
544
+ # Create a list of the extra arguments, remove the leading -- from the cli arguments
545
+ trimmed_keys = list(map(lambda x: str(x).replace("--", ""), context.args[::2]))
546
+ # Merge the two dicts and return them as one dict
547
+ return default_args | dict(zip(trimmed_keys, context.args[1::2]))
548
+
549
+
550
+ @app.command(context_settings={"allow_extra_args": True, "ignore_unknown_options": True})
551
+ def api(
552
+ ctx: Annotated[typer.Context, typer.Option(help="Extra arguments to pass to uvicorn.run().")],
553
+ port: Annotated[int, typer.Option(help="Bind socket to this port.")] = 4242,
554
+ host: Annotated[
555
+ str, typer.Option(help="Bind socket to this host. Hint: For running in docker, set it to 0.0.0.0")
556
+ ] = "127.0.0.1",
557
+ debug: debug_option = None,
558
+ ):
559
+ """
560
+ Start the datacontract CLI as server application with REST API.
285
561
 
562
+ The OpenAPI documentation as Swagger UI is available on http://localhost:4242.
563
+ You can execute the commands directly from the Swagger UI.
286
564
 
287
- def _print_table(run):
288
- table = Table(box=box.ROUNDED)
289
- table.add_column("Result", no_wrap=True)
290
- table.add_column("Check", max_width=100)
291
- table.add_column("Field", max_width=32)
292
- table.add_column("Details", max_width=50)
293
- for check in run.checks:
294
- table.add_row(with_markup(check.result), check.name, to_field(run, check), check.reason)
295
- console.print(table)
565
+ To protect the API, you can set the environment variable DATACONTRACT_CLI_API_KEY to a secret API key.
566
+ To authenticate, requests must include the header 'x-api-key' with the correct API key.
567
+ This is highly recommended, as data contract tests may be subject to SQL injections or leak sensitive information.
296
568
 
569
+ To connect to servers (such as a Snowflake data source), set the credentials as environment variables as documented in
570
+ https://cli.datacontract.com/#test
297
571
 
298
- def to_field(run, check):
299
- models = [c.model for c in run.checks]
300
- if len(set(models)) > 1:
301
- if check.field is None:
302
- return check.model
303
- return check.model + "." + check.field
304
- else:
305
- return check.field
572
+ It is possible to run the API with extra arguments for `uvicorn.run()` as keyword arguments, e.g.:
573
+ `datacontract api --port 1234 --root_path /datacontract`.
574
+ """
575
+ enable_debug_logging(debug)
576
+
577
+ import uvicorn
578
+ from uvicorn.config import LOGGING_CONFIG
579
+
580
+ log_config = LOGGING_CONFIG
581
+ log_config["root"] = {"level": "INFO"}
582
+
583
+ uvicorn_args = _get_uvicorn_arguments(port, host, ctx)
584
+ # Add the log config
585
+ uvicorn_args["log_config"] = log_config
586
+ # Run uvicorn
587
+ uvicorn.run(**uvicorn_args)
306
588
 
307
589
 
308
590
  def _print_logs(run):
@@ -311,17 +593,5 @@ def _print_logs(run):
311
593
  console.print(log.timestamp.strftime("%y-%m-%d %H:%M:%S"), log.level.ljust(5), log.message)
312
594
 
313
595
 
314
- def with_markup(result):
315
- if result == "passed":
316
- return "[green]passed[/green]"
317
- if result == "warning":
318
- return "[yellow]warning[/yellow]"
319
- if result == "failed":
320
- return "[red]failed[/red]"
321
- if result == "error":
322
- return "[red]error[/red]"
323
- return result
324
-
325
-
326
596
  if __name__ == "__main__":
327
597
  app()