datacontract-cli 0.10.7__py3-none-any.whl → 0.10.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datacontract-cli might be problematic. Click here for more details.

Files changed (55) hide show
  1. datacontract/catalog/catalog.py +4 -2
  2. datacontract/cli.py +44 -15
  3. datacontract/data_contract.py +52 -206
  4. datacontract/engines/fastjsonschema/s3/s3_read_files.py +13 -1
  5. datacontract/engines/soda/check_soda_execute.py +9 -2
  6. datacontract/engines/soda/connections/bigquery.py +8 -1
  7. datacontract/engines/soda/connections/duckdb.py +28 -12
  8. datacontract/engines/soda/connections/trino.py +26 -0
  9. datacontract/export/__init__.py +0 -0
  10. datacontract/export/avro_converter.py +15 -3
  11. datacontract/export/avro_idl_converter.py +29 -22
  12. datacontract/export/bigquery_converter.py +15 -0
  13. datacontract/export/dbml_converter.py +9 -0
  14. datacontract/export/dbt_converter.py +26 -1
  15. datacontract/export/exporter.py +88 -0
  16. datacontract/export/exporter_factory.py +145 -0
  17. datacontract/export/go_converter.py +6 -0
  18. datacontract/export/great_expectations_converter.py +10 -0
  19. datacontract/export/html_export.py +6 -0
  20. datacontract/export/jsonschema_converter.py +31 -23
  21. datacontract/export/odcs_converter.py +24 -1
  22. datacontract/export/protobuf_converter.py +6 -0
  23. datacontract/export/pydantic_converter.py +6 -0
  24. datacontract/export/rdf_converter.py +9 -0
  25. datacontract/export/sodacl_converter.py +23 -12
  26. datacontract/export/spark_converter.py +211 -0
  27. datacontract/export/sql_converter.py +32 -2
  28. datacontract/export/sql_type_converter.py +32 -5
  29. datacontract/export/terraform_converter.py +6 -0
  30. datacontract/imports/avro_importer.py +8 -0
  31. datacontract/imports/bigquery_importer.py +47 -4
  32. datacontract/imports/glue_importer.py +122 -30
  33. datacontract/imports/importer.py +29 -0
  34. datacontract/imports/importer_factory.py +72 -0
  35. datacontract/imports/jsonschema_importer.py +8 -0
  36. datacontract/imports/odcs_importer.py +200 -0
  37. datacontract/imports/sql_importer.py +8 -0
  38. datacontract/imports/unity_importer.py +152 -0
  39. datacontract/lint/resolve.py +22 -1
  40. datacontract/model/data_contract_specification.py +36 -4
  41. datacontract/templates/datacontract.html +17 -2
  42. datacontract/templates/partials/datacontract_information.html +20 -0
  43. datacontract/templates/partials/datacontract_terms.html +7 -0
  44. datacontract/templates/partials/definition.html +9 -1
  45. datacontract/templates/partials/model_field.html +23 -6
  46. datacontract/templates/partials/server.html +113 -48
  47. datacontract/templates/style/output.css +51 -0
  48. datacontract/web.py +17 -0
  49. {datacontract_cli-0.10.7.dist-info → datacontract_cli-0.10.9.dist-info}/METADATA +298 -59
  50. datacontract_cli-0.10.9.dist-info/RECORD +93 -0
  51. {datacontract_cli-0.10.7.dist-info → datacontract_cli-0.10.9.dist-info}/WHEEL +1 -1
  52. datacontract_cli-0.10.7.dist-info/RECORD +0 -84
  53. {datacontract_cli-0.10.7.dist-info → datacontract_cli-0.10.9.dist-info}/LICENSE +0 -0
  54. {datacontract_cli-0.10.7.dist-info → datacontract_cli-0.10.9.dist-info}/entry_points.txt +0 -0
  55. {datacontract_cli-0.10.7.dist-info → datacontract_cli-0.10.9.dist-info}/top_level.txt +0 -0
@@ -10,8 +10,10 @@ from datacontract.export.html_export import get_version
10
10
  from datacontract.model.data_contract_specification import DataContractSpecification
11
11
 
12
12
 
13
- def create_data_contract_html(contracts, file: Path, path: Path):
14
- data_contract = DataContract(data_contract_file=f"{file.absolute()}", inline_definitions=True, inline_quality=True)
13
+ def create_data_contract_html(contracts, file: Path, path: Path, schema: str):
14
+ data_contract = DataContract(
15
+ data_contract_file=f"{file.absolute()}", inline_definitions=True, inline_quality=True, schema_location=schema
16
+ )
15
17
  html = data_contract.export(export_format="html")
16
18
  spec = data_contract.get_data_contract_specification()
17
19
  file_without_suffix = file.with_suffix(".html")
datacontract/cli.py CHANGED
@@ -1,10 +1,10 @@
1
- from enum import Enum
2
1
  from importlib import metadata
3
2
  from pathlib import Path
4
3
  from typing import Iterable, Optional
5
4
  from typing import List
6
5
 
7
6
  import typer
7
+ import uvicorn
8
8
  from click import Context
9
9
  from rich import box
10
10
  from rich.console import Console
@@ -12,11 +12,15 @@ from rich.table import Table
12
12
  from typer.core import TyperGroup
13
13
  from typing_extensions import Annotated
14
14
 
15
+ from datacontract import web
15
16
  from datacontract.catalog.catalog import create_index_html, create_data_contract_html
16
17
  from datacontract.data_contract import DataContract, ExportFormat
18
+ from datacontract.imports.importer import ImportFormat
17
19
  from datacontract.init.download_datacontract_file import download_datacontract_file, FileExistsException
18
20
  from datacontract.publish.publish import publish_to_datamesh_manager
19
21
 
22
+ DEFAULT_DATA_CONTRACT_SCHEMA_URL = "https://datacontract.com/datacontract.schema.json"
23
+
20
24
  console = Console()
21
25
 
22
26
 
@@ -84,7 +88,7 @@ def lint(
84
88
  ] = "datacontract.yaml",
85
89
  schema: Annotated[
86
90
  str, typer.Option(help="The location (url or path) of the Data Contract Specification JSON Schema")
87
- ] = "https://datacontract.com/datacontract.schema.json",
91
+ ] = DEFAULT_DATA_CONTRACT_SCHEMA_URL,
88
92
  ):
89
93
  """
90
94
  Validate that the datacontract.yaml is correctly formatted.
@@ -100,7 +104,7 @@ def test(
100
104
  ] = "datacontract.yaml",
101
105
  schema: Annotated[
102
106
  str, typer.Option(help="The location (url or path) of the Data Contract Specification JSON Schema")
103
- ] = "https://datacontract.com/datacontract.schema.json",
107
+ ] = DEFAULT_DATA_CONTRACT_SCHEMA_URL,
104
108
  server: Annotated[
105
109
  str,
106
110
  typer.Option(
@@ -175,14 +179,18 @@ def export(
175
179
  location: Annotated[
176
180
  str, typer.Argument(help="The location (url or path) of the data contract yaml.")
177
181
  ] = "datacontract.yaml",
182
+ schema: Annotated[
183
+ str, typer.Option(help="The location (url or path) of the Data Contract Specification JSON Schema")
184
+ ] = DEFAULT_DATA_CONTRACT_SCHEMA_URL,
178
185
  ):
179
186
  """
180
187
  Convert data contract to a specific format. console.prints to stdout.
181
188
  """
182
189
  # TODO exception handling
183
- result = DataContract(data_contract_file=location, server=server).export(
190
+ result = DataContract(data_contract_file=location, schema_location=schema, server=server).export(
184
191
  export_format=format,
185
192
  model=model,
193
+ server=server,
186
194
  rdf_base=rdf_base,
187
195
  sql_server_type=sql_server_type,
188
196
  )
@@ -195,14 +203,6 @@ def export(
195
203
  console.print(f"Written result to {output}")
196
204
 
197
205
 
198
- class ImportFormat(str, Enum):
199
- sql = "sql"
200
- avro = "avro"
201
- glue = "glue"
202
- bigquery = "bigquery"
203
- jsonschema = "jsonschema"
204
-
205
-
206
206
  @app.command(name="import")
207
207
  def import_(
208
208
  format: Annotated[ImportFormat, typer.Option(help="The format of the source file.")],
@@ -223,11 +223,22 @@ def import_(
223
223
  help="List of table ids to import from the bigquery API (repeat for multiple table ids, leave empty for all tables in the dataset)."
224
224
  ),
225
225
  ] = None,
226
+ unity_table_full_name: Annotated[
227
+ Optional[str], typer.Option(help="Full name of a table in the unity catalog")
228
+ ] = None,
226
229
  ):
227
230
  """
228
231
  Create a data contract from the given source location. Prints to stdout.
229
232
  """
230
- result = DataContract().import_from_source(format, source, glue_table, bigquery_table, bigquery_project, bigquery_dataset)
233
+ result = DataContract().import_from_source(
234
+ format=format,
235
+ source=source,
236
+ glue_table=glue_table,
237
+ bigquery_table=bigquery_table,
238
+ bigquery_project=bigquery_project,
239
+ bigquery_dataset=bigquery_dataset,
240
+ unity_table_full_name=unity_table_full_name,
241
+ )
231
242
  console.print(result.to_yaml())
232
243
 
233
244
 
@@ -236,12 +247,15 @@ def publish(
236
247
  location: Annotated[
237
248
  str, typer.Argument(help="The location (url or path) of the data contract yaml.")
238
249
  ] = "datacontract.yaml",
250
+ schema: Annotated[
251
+ str, typer.Option(help="The location (url or path) of the Data Contract Specification JSON Schema")
252
+ ] = DEFAULT_DATA_CONTRACT_SCHEMA_URL,
239
253
  ):
240
254
  """
241
255
  Publish the data contract to the Data Mesh Manager.
242
256
  """
243
257
  publish_to_datamesh_manager(
244
- data_contract=DataContract(data_contract_file=location),
258
+ data_contract=DataContract(data_contract_file=location, schema_location=schema),
245
259
  )
246
260
 
247
261
 
@@ -251,6 +265,9 @@ def catalog(
251
265
  Optional[str], typer.Option(help="Glob pattern for the data contract files to include in the catalog.")
252
266
  ] = "*.yaml",
253
267
  output: Annotated[Optional[str], typer.Option(help="Output directory for the catalog html files.")] = "catalog/",
268
+ schema: Annotated[
269
+ str, typer.Option(help="The location (url or path) of the Data Contract Specification JSON Schema")
270
+ ] = DEFAULT_DATA_CONTRACT_SCHEMA_URL,
254
271
  ):
255
272
  """
256
273
  Create an html catalog of data contracts.
@@ -262,7 +279,7 @@ def catalog(
262
279
  contracts = []
263
280
  for file in Path().glob(files):
264
281
  try:
265
- create_data_contract_html(contracts, file, path)
282
+ create_data_contract_html(contracts, file, path, schema)
266
283
  except Exception as e:
267
284
  console.print(f"Skipped {file} due to error: {e}")
268
285
 
@@ -323,6 +340,18 @@ def diff(
323
340
  console.print(result.changelog_str())
324
341
 
325
342
 
343
+ @app.command()
344
+ def serve(
345
+ port: Annotated[int, typer.Option(help="Bind socket to this port.")] = 4242,
346
+ host: Annotated[str, typer.Option(help="Bind socket to this host.")] = "127.0.0.1",
347
+ ):
348
+ """
349
+ Start the datacontract web server.
350
+ """
351
+
352
+ uvicorn.run(web.app, port=port, host=host)
353
+
354
+
326
355
  def _handle_result(run):
327
356
  _print_table(run)
328
357
  if run.result == "passed":
@@ -2,7 +2,6 @@ import json
2
2
  import logging
3
3
  import tempfile
4
4
  import typing
5
- from enum import Enum
6
5
 
7
6
  import yaml
8
7
  from pyspark.sql import SparkSession
@@ -13,27 +12,10 @@ from datacontract.engines.datacontract.check_that_datacontract_contains_valid_se
13
12
  )
14
13
  from datacontract.engines.fastjsonschema.check_jsonschema import check_jsonschema
15
14
  from datacontract.engines.soda.check_soda_execute import check_soda_execute
16
- from datacontract.export.avro_converter import to_avro_schema_json
17
- from datacontract.export.avro_idl_converter import to_avro_idl
18
- from datacontract.export.bigquery_converter import to_bigquery_json
19
- from datacontract.export.dbml_converter import to_dbml_diagram
20
- from datacontract.export.dbt_converter import to_dbt_models_yaml, to_dbt_sources_yaml, to_dbt_staging_sql
21
- from datacontract.export.go_converter import to_go_types
22
- from datacontract.export.great_expectations_converter import to_great_expectations
23
- from datacontract.export.html_export import to_html
24
- from datacontract.export.jsonschema_converter import to_jsonschema_json
25
- from datacontract.export.odcs_converter import to_odcs_yaml
26
- from datacontract.export.protobuf_converter import to_protobuf
27
- from datacontract.export.pydantic_converter import to_pydantic_model_str
28
- from datacontract.export.rdf_converter import to_rdf_n3
29
- from datacontract.export.sodacl_converter import to_sodacl_yaml
30
- from datacontract.export.sql_converter import to_sql_ddl, to_sql_query
31
- from datacontract.export.terraform_converter import to_terraform
32
- from datacontract.imports.avro_importer import import_avro
33
- from datacontract.imports.bigquery_importer import import_bigquery_from_api, import_bigquery_from_json
34
- from datacontract.imports.glue_importer import import_glue
35
- from datacontract.imports.jsonschema_importer import import_jsonschema
36
- from datacontract.imports.sql_importer import import_sql
15
+ from datacontract.export.exporter import ExportFormat
16
+ from datacontract.export.exporter_factory import exporter_factory
17
+ from datacontract.imports.importer_factory import importer_factory
18
+
37
19
  from datacontract.integration.publish_datamesh_manager import publish_datamesh_manager
38
20
  from datacontract.integration.publish_opentelemetry import publish_opentelemetry
39
21
  from datacontract.lint import resolve
@@ -50,28 +32,6 @@ from datacontract.model.exceptions import DataContractException
50
32
  from datacontract.model.run import Run, Check
51
33
 
52
34
 
53
- class ExportFormat(str, Enum):
54
- jsonschema = "jsonschema"
55
- pydantic_model = "pydantic-model"
56
- sodacl = "sodacl"
57
- dbt = "dbt"
58
- dbt_sources = "dbt-sources"
59
- dbt_staging_sql = "dbt-staging-sql"
60
- odcs = "odcs"
61
- rdf = "rdf"
62
- avro = "avro"
63
- protobuf = "protobuf"
64
- great_expectations = "great-expectations"
65
- terraform = "terraform"
66
- avro_idl = "avro-idl"
67
- sql = "sql"
68
- sql_query = "sql-query"
69
- html = "html"
70
- go = "go"
71
- bigquery = "bigquery"
72
- dbml = "dbml"
73
-
74
-
75
35
  class DataContract:
76
36
  def __init__(
77
37
  self,
@@ -207,6 +167,9 @@ class DataContract:
207
167
  if self._examples:
208
168
  server_name = "examples"
209
169
  server = self._get_examples_server(data_contract, run, tmp_dir)
170
+ elif self._server:
171
+ server_name = self._server
172
+ server = data_contract.servers.get(server_name)
210
173
  else:
211
174
  server_name = list(data_contract.servers.keys())[0]
212
175
  server = data_contract.servers.get(server_name)
@@ -260,6 +223,38 @@ class DataContract:
260
223
 
261
224
  return run
262
225
 
226
+ def _get_examples_server(self, data_contract, run, tmp_dir):
227
+ run.log_info(f"Copying examples to files in temporary directory {tmp_dir}")
228
+ format = "json"
229
+ for example in data_contract.examples:
230
+ format = example.type
231
+ p = f"{tmp_dir}/{example.model}.{format}"
232
+ run.log_info(f"Creating example file {p}")
233
+ with open(p, "w") as f:
234
+ content = ""
235
+ if format == "json" and isinstance(example.data, list):
236
+ content = json.dumps(example.data)
237
+ elif format == "json" and isinstance(example.data, str):
238
+ content = example.data
239
+ elif format == "yaml" and isinstance(example.data, list):
240
+ content = yaml.dump(example.data, allow_unicode=True)
241
+ elif format == "yaml" and isinstance(example.data, str):
242
+ content = example.data
243
+ elif format == "csv":
244
+ content = example.data
245
+ logging.debug(f"Content of example file {p}: {content}")
246
+ f.write(content)
247
+ path = f"{tmp_dir}" + "/{model}." + format
248
+ delimiter = "array"
249
+ server = Server(
250
+ type="local",
251
+ path=path,
252
+ format=format,
253
+ delimiter=delimiter,
254
+ )
255
+ run.log_info(f"Using {server} for testing the examples")
256
+ return server
257
+
263
258
  def breaking(self, other: "DataContract") -> BreakingChanges:
264
259
  return self.changelog(other, include_severities=[Severity.ERROR, Severity.WARNING])
265
260
 
@@ -301,178 +296,29 @@ class DataContract:
301
296
  inline_quality=self._inline_quality,
302
297
  )
303
298
 
304
- def export(
305
- self, export_format: ExportFormat, model: str = "all", rdf_base: str = None, sql_server_type: str = "auto"
306
- ) -> str:
299
+ def export(self, export_format: ExportFormat, model: str = "all", sql_server_type: str = "auto", **kwargs) -> str:
307
300
  data_contract = resolve.resolve_data_contract(
308
301
  self._data_contract_file,
309
302
  self._data_contract_str,
310
303
  self._data_contract,
304
+ schema_location=self._schema_location,
311
305
  inline_definitions=True,
312
306
  inline_quality=True,
313
307
  )
314
- if export_format == "jsonschema":
315
- model_name, model_value = self._check_models_for_export(data_contract, model, export_format)
316
- return to_jsonschema_json(model_name, model_value)
317
- if export_format == "sodacl":
318
- return to_sodacl_yaml(data_contract)
319
- if export_format == "dbt":
320
- return to_dbt_models_yaml(data_contract)
321
- if export_format == "dbt-sources":
322
- return to_dbt_sources_yaml(data_contract, self._server)
323
- if export_format == "dbt-staging-sql":
324
- model_name, model_value = self._check_models_for_export(data_contract, model, export_format)
325
- return to_dbt_staging_sql(data_contract, model_name, model_value)
326
- if export_format == "odcs":
327
- return to_odcs_yaml(data_contract)
328
- if export_format == "rdf":
329
- return to_rdf_n3(data_contract, rdf_base)
330
- if export_format == "protobuf":
331
- return to_protobuf(data_contract)
332
- if export_format == "avro":
333
- model_name, model_value = self._check_models_for_export(data_contract, model, export_format)
334
- return to_avro_schema_json(model_name, model_value)
335
- if export_format == "avro-idl":
336
- return to_avro_idl(data_contract)
337
- if export_format == "terraform":
338
- return to_terraform(data_contract)
339
- if export_format == "sql":
340
- server_type = self._determine_sql_server_type(data_contract, sql_server_type)
341
- return to_sql_ddl(data_contract, server_type=server_type)
342
- if export_format == "sql-query":
343
- model_name, model_value = self._check_models_for_export(data_contract, model, export_format)
344
- server_type = self._determine_sql_server_type(data_contract, sql_server_type)
345
- return to_sql_query(data_contract, model_name, model_value, server_type)
346
- if export_format == "great-expectations":
347
- model_name, model_value = self._check_models_for_export(data_contract, model, export_format)
348
- return to_great_expectations(data_contract, model_name)
349
- if export_format == "pydantic-model":
350
- return to_pydantic_model_str(data_contract)
351
- if export_format == "html":
352
- return to_html(data_contract)
353
- if export_format == "go":
354
- return to_go_types(data_contract)
355
- if export_format == "bigquery":
356
- model_name, model_value = self._check_models_for_export(data_contract, model, export_format)
357
- found_server = data_contract.servers.get(self._server)
358
- if found_server is None:
359
- raise RuntimeError(
360
- f"Export to {export_format} requires selecting a bigquery server from the data contract."
361
- )
362
- if found_server.type != "bigquery":
363
- raise RuntimeError(
364
- f"Export to {export_format} requires selecting a bigquery server from the data contract."
365
- )
366
- return to_bigquery_json(model_name, model_value, found_server)
367
- if export_format == "dbml":
368
- found_server = data_contract.servers.get(self._server)
369
- return to_dbml_diagram(data_contract, found_server)
370
- else:
371
- print(f"Export format {export_format} not supported.")
372
- return ""
373
-
374
- def _determine_sql_server_type(self, data_contract: DataContractSpecification, sql_server_type: str):
375
- if sql_server_type == "auto":
376
- if data_contract.servers is None or len(data_contract.servers) == 0:
377
- raise RuntimeError("Export with server_type='auto' requires servers in the data contract.")
378
-
379
- server_types = set([server.type for server in data_contract.servers.values()])
380
- if "snowflake" in server_types:
381
- return "snowflake"
382
- elif "postgres" in server_types:
383
- return "postgres"
384
- elif "databricks" in server_types:
385
- return "databricks"
386
- else:
387
- # default to snowflake dialect
388
- return "snowflake"
389
- else:
390
- return sql_server_type
391
308
 
392
- def _get_examples_server(self, data_contract, run, tmp_dir):
393
- run.log_info(f"Copying examples to files in temporary directory {tmp_dir}")
394
- format = "json"
395
- for example in data_contract.examples:
396
- format = example.type
397
- p = f"{tmp_dir}/{example.model}.{format}"
398
- run.log_info(f"Creating example file {p}")
399
- with open(p, "w") as f:
400
- content = ""
401
- if format == "json" and isinstance(example.data, list):
402
- content = json.dumps(example.data)
403
- elif format == "json" and isinstance(example.data, str):
404
- content = example.data
405
- elif format == "yaml" and isinstance(example.data, list):
406
- content = yaml.dump(example.data, allow_unicode=True)
407
- elif format == "yaml" and isinstance(example.data, str):
408
- content = example.data
409
- elif format == "csv":
410
- content = example.data
411
- logging.debug(f"Content of example file {p}: {content}")
412
- f.write(content)
413
- path = f"{tmp_dir}" + "/{model}." + format
414
- delimiter = "array"
415
- server = Server(
416
- type="local",
417
- path=path,
418
- format=format,
419
- delimiter=delimiter,
309
+ return exporter_factory.create(export_format).export(
310
+ data_contract=data_contract,
311
+ model=model,
312
+ server=self._server,
313
+ sql_server_type=sql_server_type,
314
+ export_args=kwargs,
420
315
  )
421
- run.log_info(f"Using {server} for testing the examples")
422
- return server
423
-
424
- def _check_models_for_export(
425
- self, data_contract: DataContractSpecification, model: str, export_format: str
426
- ) -> typing.Tuple[str, str]:
427
- if data_contract.models is None:
428
- raise RuntimeError(f"Export to {export_format} requires models in the data contract.")
429
-
430
- model_names = list(data_contract.models.keys())
431
-
432
- if model == "all":
433
- if len(data_contract.models.items()) != 1:
434
- raise RuntimeError(
435
- f"Export to {export_format} is model specific. Specify the model via --model $MODEL_NAME. Available models: {model_names}"
436
- )
437
-
438
- model_name, model_value = next(iter(data_contract.models.items()))
439
- else:
440
- model_name = model
441
- model_value = data_contract.models.get(model_name)
442
- if model_value is None:
443
- raise RuntimeError(
444
- f"Model {model_name} not found in the data contract. Available models: {model_names}"
445
- )
446
-
447
- return model_name, model_value
448
316
 
449
317
  def import_from_source(
450
- self,
451
- format: str,
452
- source: typing.Optional[str] = None,
453
- glue_tables: typing.Optional[typing.List[str]] = None,
454
- bigquery_tables: typing.Optional[typing.List[str]] = None,
455
- bigquery_project: typing.Optional[str] = None,
456
- bigquery_dataset: typing.Optional[str] = None,
318
+ self, format: str, source: typing.Optional[str] = None, **kwargs
457
319
  ) -> DataContractSpecification:
458
- data_contract_specification = DataContract.init()
459
-
460
- if format == "sql":
461
- data_contract_specification = import_sql(data_contract_specification, format, source)
462
- elif format == "avro":
463
- data_contract_specification = import_avro(data_contract_specification, source)
464
- elif format == "glue":
465
- data_contract_specification = import_glue(data_contract_specification, source, glue_tables)
466
- elif format == "jsonschema":
467
- data_contract_specification = import_jsonschema(data_contract_specification, source)
468
- elif format == "bigquery":
469
- if source is not None:
470
- data_contract_specification = import_bigquery_from_json(data_contract_specification, source)
471
- else:
472
- data_contract_specification = import_bigquery_from_api(
473
- data_contract_specification, bigquery_tables, bigquery_project, bigquery_dataset
474
- )
475
- else:
476
- print(f"Import format {format} not supported.")
320
+ data_contract_specification_initial = DataContract.init()
477
321
 
478
- return data_contract_specification
322
+ return importer_factory.create(format).import_source(
323
+ data_contract_specification=data_contract_specification_initial, source=source, import_args=kwargs
324
+ )
@@ -1,7 +1,7 @@
1
1
  import logging
2
2
  import os
3
3
 
4
- import s3fs
4
+ from datacontract.model.exceptions import DataContractException
5
5
 
6
6
 
7
7
  def yield_s3_files(s3_endpoint_url, s3_location):
@@ -14,6 +14,18 @@ def yield_s3_files(s3_endpoint_url, s3_location):
14
14
 
15
15
 
16
16
  def s3_fs(s3_endpoint_url):
17
+ try:
18
+ import s3fs
19
+ except ImportError as e:
20
+ raise DataContractException(
21
+ type="schema",
22
+ result="failed",
23
+ name="s3 extra missing",
24
+ reason="Install the extra datacontract-cli\[s3] to use s3",
25
+ engine="datacontract",
26
+ original_exception=e,
27
+ )
28
+
17
29
  aws_access_key_id = os.getenv("DATACONTRACT_S3_ACCESS_KEY_ID")
18
30
  aws_secret_access_key = os.getenv("DATACONTRACT_S3_SECRET_ACCESS_KEY")
19
31
  return s3fs.S3FileSystem(
@@ -10,6 +10,7 @@ from datacontract.engines.soda.connections.kafka import create_spark_session, re
10
10
  from datacontract.engines.soda.connections.postgres import to_postgres_soda_configuration
11
11
  from datacontract.engines.soda.connections.snowflake import to_snowflake_soda_configuration
12
12
  from datacontract.engines.soda.connections.sqlserver import to_sqlserver_soda_configuration
13
+ from datacontract.engines.soda.connections.trino import to_trino_soda_configuration
13
14
  from datacontract.export.sodacl_converter import to_sodacl_yaml
14
15
  from datacontract.model.data_contract_specification import DataContractSpecification, Server
15
16
  from datacontract.model.run import Run, Check, Log
@@ -66,8 +67,10 @@ def check_soda_execute(
66
67
  scan.set_data_source_name(server.type)
67
68
  elif server.type == "dataframe":
68
69
  if spark is None:
69
- run.log_warn("Server type dataframe only works with the Python library and requires a Spark session, "
70
- "please provide one with the DataContract class")
70
+ run.log_warn(
71
+ "Server type dataframe only works with the Python library and requires a Spark session, "
72
+ "please provide one with the DataContract class"
73
+ )
71
74
  return
72
75
  else:
73
76
  logging.info("Use Spark to connect to data source")
@@ -83,6 +86,10 @@ def check_soda_execute(
83
86
  soda_configuration_str = to_sqlserver_soda_configuration(server)
84
87
  scan.add_configuration_yaml_str(soda_configuration_str)
85
88
  scan.set_data_source_name(server.type)
89
+ elif server.type == "trino":
90
+ soda_configuration_str = to_trino_soda_configuration(server)
91
+ scan.add_configuration_yaml_str(soda_configuration_str)
92
+ scan.set_data_source_name(server.type)
86
93
 
87
94
  else:
88
95
  run.checks.append(
@@ -6,10 +6,17 @@ import yaml
6
6
  # https://docs.soda.io/soda/connect-bigquery.html#authentication-methods
7
7
  def to_bigquery_soda_configuration(server):
8
8
  # with service account key, using an external json file
9
+
10
+ # check for our own environment variable first
11
+ account_info = os.getenv("DATACONTRACT_BIGQUERY_ACCOUNT_INFO_JSON_PATH")
12
+ if account_info is None:
13
+ # but as a fallback look for the default google one
14
+ account_info = os.getenv("GOOGLE_APPLICATION_CREDENTIALS")
15
+
9
16
  soda_configuration = {
10
17
  f"data_source {server.type}": {
11
18
  "type": "bigquery",
12
- "account_info_json_path": os.getenv("DATACONTRACT_BIGQUERY_ACCOUNT_INFO_JSON_PATH"),
19
+ "account_info_json_path": account_info,
13
20
  "auth_scopes": ["https://www.googleapis.com/auth/bigquery"],
14
21
  "project_id": server.project,
15
22
  "dataset": server.dataset,
@@ -80,6 +80,7 @@ def setup_s3_connection(con, server):
80
80
  s3_region = os.getenv("DATACONTRACT_S3_REGION")
81
81
  s3_access_key_id = os.getenv("DATACONTRACT_S3_ACCESS_KEY_ID")
82
82
  s3_secret_access_key = os.getenv("DATACONTRACT_S3_SECRET_ACCESS_KEY")
83
+ s3_session_token = os.getenv("DATACONTRACT_S3_SESSION_TOKEN")
83
84
  s3_endpoint = "s3.amazonaws.com"
84
85
  use_ssl = "true"
85
86
  url_style = "vhost"
@@ -90,18 +91,33 @@ def setup_s3_connection(con, server):
90
91
  url_style = "path"
91
92
 
92
93
  if s3_access_key_id is not None:
93
- con.sql(f"""
94
- CREATE OR REPLACE SECRET s3_secret (
95
- TYPE S3,
96
- PROVIDER CREDENTIAL_CHAIN,
97
- REGION '{s3_region}',
98
- KEY_ID '{s3_access_key_id}',
99
- SECRET '{s3_secret_access_key}',
100
- ENDPOINT '{s3_endpoint}',
101
- USE_SSL '{use_ssl}',
102
- URL_STYLE '{url_style}'
103
- );
104
- """)
94
+ if s3_session_token is not None:
95
+ con.sql(f"""
96
+ CREATE OR REPLACE SECRET s3_secret (
97
+ TYPE S3,
98
+ PROVIDER CREDENTIAL_CHAIN,
99
+ REGION '{s3_region}',
100
+ KEY_ID '{s3_access_key_id}',
101
+ SECRET '{s3_secret_access_key}',
102
+ SESSION_TOKEN '{s3_session_token}',
103
+ ENDPOINT '{s3_endpoint}',
104
+ USE_SSL '{use_ssl}',
105
+ URL_STYLE '{url_style}'
106
+ );
107
+ """)
108
+ else:
109
+ con.sql(f"""
110
+ CREATE OR REPLACE SECRET s3_secret (
111
+ TYPE S3,
112
+ PROVIDER CREDENTIAL_CHAIN,
113
+ REGION '{s3_region}',
114
+ KEY_ID '{s3_access_key_id}',
115
+ SECRET '{s3_secret_access_key}',
116
+ ENDPOINT '{s3_endpoint}',
117
+ USE_SSL '{use_ssl}',
118
+ URL_STYLE '{url_style}'
119
+ );
120
+ """)
105
121
 
106
122
  # con.sql(f"""
107
123
  # SET s3_region = '{s3_region}';
@@ -0,0 +1,26 @@
1
+ import os
2
+
3
+ import yaml
4
+
5
+
6
+ def to_trino_soda_configuration(server):
7
+ password = os.getenv("DATACONTRACT_TRINO_PASSWORD")
8
+ username = os.getenv("DATACONTRACT_TRINO_USERNAME")
9
+
10
+ data_source = {
11
+ "type": "trino",
12
+ "host": server.host,
13
+ "port": str(server.port),
14
+ "username": username,
15
+ "password": password,
16
+ "catalog": server.catalog,
17
+ "schema": server.schema_,
18
+ }
19
+
20
+ if password is None or password == "":
21
+ data_source["auth_type"] = "NoAuthentication" # default is BasicAuthentication
22
+
23
+ soda_configuration = {f"data_source {server.type}": data_source}
24
+
25
+ soda_configuration_str = yaml.dump(soda_configuration)
26
+ return soda_configuration_str
File without changes