PyPI - datacontract-cli - Versions diffs - 0.10.7__py3-none-any.whl → 0.10.9__py3-none-any.whl - Mend

datacontract-cli 0.10.7py3-none-any.whl → 0.10.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of datacontract-cli might be problematic. Click here for more details.

Files changed (55) hide show

datacontract/catalog/catalog.py +4 -2
datacontract/cli.py +44 -15
datacontract/data_contract.py +52 -206
datacontract/engines/fastjsonschema/s3/s3_read_files.py +13 -1
datacontract/engines/soda/check_soda_execute.py +9 -2
datacontract/engines/soda/connections/bigquery.py +8 -1
datacontract/engines/soda/connections/duckdb.py +28 -12
datacontract/engines/soda/connections/trino.py +26 -0
datacontract/export/__init__.py +0 -0
datacontract/export/avro_converter.py +15 -3
datacontract/export/avro_idl_converter.py +29 -22
datacontract/export/bigquery_converter.py +15 -0
datacontract/export/dbml_converter.py +9 -0
datacontract/export/dbt_converter.py +26 -1
datacontract/export/exporter.py +88 -0
datacontract/export/exporter_factory.py +145 -0
datacontract/export/go_converter.py +6 -0
datacontract/export/great_expectations_converter.py +10 -0
datacontract/export/html_export.py +6 -0
datacontract/export/jsonschema_converter.py +31 -23
datacontract/export/odcs_converter.py +24 -1
datacontract/export/protobuf_converter.py +6 -0
datacontract/export/pydantic_converter.py +6 -0
datacontract/export/rdf_converter.py +9 -0
datacontract/export/sodacl_converter.py +23 -12
datacontract/export/spark_converter.py +211 -0
datacontract/export/sql_converter.py +32 -2
datacontract/export/sql_type_converter.py +32 -5
datacontract/export/terraform_converter.py +6 -0
datacontract/imports/avro_importer.py +8 -0
datacontract/imports/bigquery_importer.py +47 -4
datacontract/imports/glue_importer.py +122 -30
datacontract/imports/importer.py +29 -0
datacontract/imports/importer_factory.py +72 -0
datacontract/imports/jsonschema_importer.py +8 -0
datacontract/imports/odcs_importer.py +200 -0
datacontract/imports/sql_importer.py +8 -0
datacontract/imports/unity_importer.py +152 -0
datacontract/lint/resolve.py +22 -1
datacontract/model/data_contract_specification.py +36 -4
datacontract/templates/datacontract.html +17 -2
datacontract/templates/partials/datacontract_information.html +20 -0
datacontract/templates/partials/datacontract_terms.html +7 -0
datacontract/templates/partials/definition.html +9 -1
datacontract/templates/partials/model_field.html +23 -6
datacontract/templates/partials/server.html +113 -48
datacontract/templates/style/output.css +51 -0
datacontract/web.py +17 -0
{datacontract_cli-0.10.7.dist-info → datacontract_cli-0.10.9.dist-info}/METADATA +298 -59
datacontract_cli-0.10.9.dist-info/RECORD +93 -0
{datacontract_cli-0.10.7.dist-info → datacontract_cli-0.10.9.dist-info}/WHEEL +1 -1
datacontract_cli-0.10.7.dist-info/RECORD +0 -84
{datacontract_cli-0.10.7.dist-info → datacontract_cli-0.10.9.dist-info}/LICENSE +0 -0
{datacontract_cli-0.10.7.dist-info → datacontract_cli-0.10.9.dist-info}/entry_points.txt +0 -0
{datacontract_cli-0.10.7.dist-info → datacontract_cli-0.10.9.dist-info}/top_level.txt +0 -0

datacontract/catalog/catalog.py CHANGED Viewed

@@ -10,8 +10,10 @@ from datacontract.export.html_export import get_version
 from datacontract.model.data_contract_specification import DataContractSpecification
-def create_data_contract_html(contracts, file: Path, path: Path):
-    data_contract = DataContract(data_contract_file=f"{file.absolute()}", inline_definitions=True, inline_quality=True)
+def create_data_contract_html(contracts, file: Path, path: Path, schema: str):
+    data_contract = DataContract(
+        data_contract_file=f"{file.absolute()}", inline_definitions=True, inline_quality=True, schema_location=schema
+    )
     html = data_contract.export(export_format="html")
     spec = data_contract.get_data_contract_specification()
     file_without_suffix = file.with_suffix(".html")

datacontract/cli.py CHANGED Viewed

@@ -1,10 +1,10 @@
-from enum import Enum
 from importlib import metadata
 from pathlib import Path
 from typing import Iterable, Optional
 from typing import List
 import typer
+import uvicorn
 from click import Context
 from rich import box
 from rich.console import Console
@@ -12,11 +12,15 @@ from rich.table import Table
 from typer.core import TyperGroup
 from typing_extensions import Annotated
+from datacontract import web
 from datacontract.catalog.catalog import create_index_html, create_data_contract_html
 from datacontract.data_contract import DataContract, ExportFormat
+from datacontract.imports.importer import ImportFormat
 from datacontract.init.download_datacontract_file import download_datacontract_file, FileExistsException
 from datacontract.publish.publish import publish_to_datamesh_manager
+DEFAULT_DATA_CONTRACT_SCHEMA_URL = "https://datacontract.com/datacontract.schema.json"
 console = Console()
@@ -84,7 +88,7 @@ def lint(
     ] = "datacontract.yaml",
     schema: Annotated[
         str, typer.Option(help="The location (url or path) of the Data Contract Specification JSON Schema")
-    ] = "https://datacontract.com/datacontract.schema.json",
+    ] = DEFAULT_DATA_CONTRACT_SCHEMA_URL,
 ):
     """
     Validate that the datacontract.yaml is correctly formatted.
@@ -100,7 +104,7 @@ def test(
     ] = "datacontract.yaml",
     schema: Annotated[
         str, typer.Option(help="The location (url or path) of the Data Contract Specification JSON Schema")
-    ] = "https://datacontract.com/datacontract.schema.json",
+    ] = DEFAULT_DATA_CONTRACT_SCHEMA_URL,
     server: Annotated[
         str,
         typer.Option(
@@ -175,14 +179,18 @@ def export(
     location: Annotated[
         str, typer.Argument(help="The location (url or path) of the data contract yaml.")
     ] = "datacontract.yaml",
+    schema: Annotated[
+        str, typer.Option(help="The location (url or path) of the Data Contract Specification JSON Schema")
+    ] = DEFAULT_DATA_CONTRACT_SCHEMA_URL,
 ):
     """
     Convert data contract to a specific format. console.prints to stdout.
     """
     # TODO exception handling
-    result = DataContract(data_contract_file=location, server=server).export(
+    result = DataContract(data_contract_file=location, schema_location=schema, server=server).export(
         export_format=format,
         model=model,
+        server=server,
         rdf_base=rdf_base,
         sql_server_type=sql_server_type,
     )
@@ -195,14 +203,6 @@ def export(
         console.print(f"Written result to {output}")
-class ImportFormat(str, Enum):
-    sql = "sql"
-    avro = "avro"
-    glue = "glue"
-    bigquery = "bigquery"
-    jsonschema = "jsonschema"
 @app.command(name="import")
 def import_(
     format: Annotated[ImportFormat, typer.Option(help="The format of the source file.")],
@@ -223,11 +223,22 @@ def import_(
             help="List of table ids to import from the bigquery API (repeat for multiple table ids, leave empty for all tables in the dataset)."
         ),
     ] = None,
+    unity_table_full_name: Annotated[
+        Optional[str], typer.Option(help="Full name of a table in the unity catalog")
+    ] = None,
 ):
     """
     Create a data contract from the given source location. Prints to stdout.
     """
-    result = DataContract().import_from_source(format, source, glue_table, bigquery_table, bigquery_project, bigquery_dataset)
+    result = DataContract().import_from_source(
+        format=format,
+        source=source,
+        glue_table=glue_table,
+        bigquery_table=bigquery_table,
+        bigquery_project=bigquery_project,
+        bigquery_dataset=bigquery_dataset,
+        unity_table_full_name=unity_table_full_name,
+    )
     console.print(result.to_yaml())
@@ -236,12 +247,15 @@ def publish(
     location: Annotated[
         str, typer.Argument(help="The location (url or path) of the data contract yaml.")
     ] = "datacontract.yaml",
+    schema: Annotated[
+        str, typer.Option(help="The location (url or path) of the Data Contract Specification JSON Schema")
+    ] = DEFAULT_DATA_CONTRACT_SCHEMA_URL,
 ):
     """
     Publish the data contract to the Data Mesh Manager.
     """
     publish_to_datamesh_manager(
-        data_contract=DataContract(data_contract_file=location),
+        data_contract=DataContract(data_contract_file=location, schema_location=schema),
     )
@@ -251,6 +265,9 @@ def catalog(
         Optional[str], typer.Option(help="Glob pattern for the data contract files to include in the catalog.")
     ] = "*.yaml",
     output: Annotated[Optional[str], typer.Option(help="Output directory for the catalog html files.")] = "catalog/",
+    schema: Annotated[
+        str, typer.Option(help="The location (url or path) of the Data Contract Specification JSON Schema")
+    ] = DEFAULT_DATA_CONTRACT_SCHEMA_URL,
 ):
     """
     Create an html catalog of data contracts.
@@ -262,7 +279,7 @@ def catalog(
     contracts = []
     for file in Path().glob(files):
         try:
-            create_data_contract_html(contracts, file, path)
+            create_data_contract_html(contracts, file, path, schema)
         except Exception as e:
             console.print(f"Skipped {file} due to error: {e}")
@@ -323,6 +340,18 @@ def diff(
     console.print(result.changelog_str())
+@app.command()
+def serve(
+    port: Annotated[int, typer.Option(help="Bind socket to this port.")] = 4242,
+    host: Annotated[str, typer.Option(help="Bind socket to this host.")] = "127.0.0.1",
+):
+    """
+    Start the datacontract web server.
+    """
+    uvicorn.run(web.app, port=port, host=host)
 def _handle_result(run):
     _print_table(run)
     if run.result == "passed":

datacontract/data_contract.py CHANGED Viewed

@@ -2,7 +2,6 @@ import json
 import logging
 import tempfile
 import typing
-from enum import Enum
 import yaml
 from pyspark.sql import SparkSession
@@ -13,27 +12,10 @@ from datacontract.engines.datacontract.check_that_datacontract_contains_valid_se
 )
 from datacontract.engines.fastjsonschema.check_jsonschema import check_jsonschema
 from datacontract.engines.soda.check_soda_execute import check_soda_execute
-from datacontract.export.avro_converter import to_avro_schema_json
-from datacontract.export.avro_idl_converter import to_avro_idl
-from datacontract.export.bigquery_converter import to_bigquery_json
-from datacontract.export.dbml_converter import to_dbml_diagram
-from datacontract.export.dbt_converter import to_dbt_models_yaml, to_dbt_sources_yaml, to_dbt_staging_sql
-from datacontract.export.go_converter import to_go_types
-from datacontract.export.great_expectations_converter import to_great_expectations
-from datacontract.export.html_export import to_html
-from datacontract.export.jsonschema_converter import to_jsonschema_json
-from datacontract.export.odcs_converter import to_odcs_yaml
-from datacontract.export.protobuf_converter import to_protobuf
-from datacontract.export.pydantic_converter import to_pydantic_model_str
-from datacontract.export.rdf_converter import to_rdf_n3
-from datacontract.export.sodacl_converter import to_sodacl_yaml
-from datacontract.export.sql_converter import to_sql_ddl, to_sql_query
-from datacontract.export.terraform_converter import to_terraform
-from datacontract.imports.avro_importer import import_avro
-from datacontract.imports.bigquery_importer import import_bigquery_from_api, import_bigquery_from_json
-from datacontract.imports.glue_importer import import_glue
-from datacontract.imports.jsonschema_importer import import_jsonschema
-from datacontract.imports.sql_importer import import_sql
+from datacontract.export.exporter import ExportFormat
+from datacontract.export.exporter_factory import exporter_factory
+from datacontract.imports.importer_factory import importer_factory
 from datacontract.integration.publish_datamesh_manager import publish_datamesh_manager
 from datacontract.integration.publish_opentelemetry import publish_opentelemetry
 from datacontract.lint import resolve
@@ -50,28 +32,6 @@ from datacontract.model.exceptions import DataContractException
 from datacontract.model.run import Run, Check
-class ExportFormat(str, Enum):
-    jsonschema = "jsonschema"
-    pydantic_model = "pydantic-model"
-    sodacl = "sodacl"
-    dbt = "dbt"
-    dbt_sources = "dbt-sources"
-    dbt_staging_sql = "dbt-staging-sql"
-    odcs = "odcs"
-    rdf = "rdf"
-    avro = "avro"
-    protobuf = "protobuf"
-    great_expectations = "great-expectations"
-    terraform = "terraform"
-    avro_idl = "avro-idl"
-    sql = "sql"
-    sql_query = "sql-query"
-    html = "html"
-    go = "go"
-    bigquery = "bigquery"
-    dbml = "dbml"
 class DataContract:
     def __init__(
         self,
@@ -207,6 +167,9 @@ class DataContract:
                 if self._examples:
                     server_name = "examples"
                     server = self._get_examples_server(data_contract, run, tmp_dir)
+                elif self._server:
+                    server_name = self._server
+                    server = data_contract.servers.get(server_name)
                 else:
                     server_name = list(data_contract.servers.keys())[0]
                     server = data_contract.servers.get(server_name)
@@ -260,6 +223,38 @@ class DataContract:
         return run
+    def _get_examples_server(self, data_contract, run, tmp_dir):
+        run.log_info(f"Copying examples to files in temporary directory {tmp_dir}")
+        format = "json"
+        for example in data_contract.examples:
+            format = example.type
+            p = f"{tmp_dir}/{example.model}.{format}"
+            run.log_info(f"Creating example file {p}")
+            with open(p, "w") as f:
+                content = ""
+                if format == "json" and isinstance(example.data, list):
+                    content = json.dumps(example.data)
+                elif format == "json" and isinstance(example.data, str):
+                    content = example.data
+                elif format == "yaml" and isinstance(example.data, list):
+                    content = yaml.dump(example.data, allow_unicode=True)
+                elif format == "yaml" and isinstance(example.data, str):
+                    content = example.data
+                elif format == "csv":
+                    content = example.data
+                logging.debug(f"Content of example file {p}: {content}")
+                f.write(content)
+        path = f"{tmp_dir}" + "/{model}." + format
+        delimiter = "array"
+        server = Server(
+            type="local",
+            path=path,
+            format=format,
+            delimiter=delimiter,
+        )
+        run.log_info(f"Using {server} for testing the examples")
+        return server
     def breaking(self, other: "DataContract") -> BreakingChanges:
         return self.changelog(other, include_severities=[Severity.ERROR, Severity.WARNING])
@@ -301,178 +296,29 @@ class DataContract:
             inline_quality=self._inline_quality,
         )
-    def export(
-        self, export_format: ExportFormat, model: str = "all", rdf_base: str = None, sql_server_type: str = "auto"
-    ) -> str:
+    def export(self, export_format: ExportFormat, model: str = "all", sql_server_type: str = "auto", **kwargs) -> str:
         data_contract = resolve.resolve_data_contract(
             self._data_contract_file,
             self._data_contract_str,
             self._data_contract,
+            schema_location=self._schema_location,
             inline_definitions=True,
             inline_quality=True,
         )
-        if export_format == "jsonschema":
-            model_name, model_value = self._check_models_for_export(data_contract, model, export_format)
-            return to_jsonschema_json(model_name, model_value)
-        if export_format == "sodacl":
-            return to_sodacl_yaml(data_contract)
-        if export_format == "dbt":
-            return to_dbt_models_yaml(data_contract)
-        if export_format == "dbt-sources":
-            return to_dbt_sources_yaml(data_contract, self._server)
-        if export_format == "dbt-staging-sql":
-            model_name, model_value = self._check_models_for_export(data_contract, model, export_format)
-            return to_dbt_staging_sql(data_contract, model_name, model_value)
-        if export_format == "odcs":
-            return to_odcs_yaml(data_contract)
-        if export_format == "rdf":
-            return to_rdf_n3(data_contract, rdf_base)
-        if export_format == "protobuf":
-            return to_protobuf(data_contract)
-        if export_format == "avro":
-            model_name, model_value = self._check_models_for_export(data_contract, model, export_format)
-            return to_avro_schema_json(model_name, model_value)
-        if export_format == "avro-idl":
-            return to_avro_idl(data_contract)
-        if export_format == "terraform":
-            return to_terraform(data_contract)
-        if export_format == "sql":
-            server_type = self._determine_sql_server_type(data_contract, sql_server_type)
-            return to_sql_ddl(data_contract, server_type=server_type)
-        if export_format == "sql-query":
-            model_name, model_value = self._check_models_for_export(data_contract, model, export_format)
-            server_type = self._determine_sql_server_type(data_contract, sql_server_type)
-            return to_sql_query(data_contract, model_name, model_value, server_type)
-        if export_format == "great-expectations":
-            model_name, model_value = self._check_models_for_export(data_contract, model, export_format)
-            return to_great_expectations(data_contract, model_name)
-        if export_format == "pydantic-model":
-            return to_pydantic_model_str(data_contract)
-        if export_format == "html":
-            return to_html(data_contract)
-        if export_format == "go":
-            return to_go_types(data_contract)
-        if export_format == "bigquery":
-            model_name, model_value = self._check_models_for_export(data_contract, model, export_format)
-            found_server = data_contract.servers.get(self._server)
-            if found_server is None:
-                raise RuntimeError(
-                    f"Export to {export_format} requires selecting a bigquery server from the data contract."
-                )
-            if found_server.type != "bigquery":
-                raise RuntimeError(
-                    f"Export to {export_format} requires selecting a bigquery server from the data contract."
-                )
-            return to_bigquery_json(model_name, model_value, found_server)
-        if export_format == "dbml":
-            found_server = data_contract.servers.get(self._server)
-            return to_dbml_diagram(data_contract, found_server)
-        else:
-            print(f"Export format {export_format} not supported.")
-            return ""
-    def _determine_sql_server_type(self, data_contract: DataContractSpecification, sql_server_type: str):
-        if sql_server_type == "auto":
-            if data_contract.servers is None or len(data_contract.servers) == 0:
-                raise RuntimeError("Export with server_type='auto' requires servers in the data contract.")
-            server_types = set([server.type for server in data_contract.servers.values()])
-            if "snowflake" in server_types:
-                return "snowflake"
-            elif "postgres" in server_types:
-                return "postgres"
-            elif "databricks" in server_types:
-                return "databricks"
-            else:
-                # default to snowflake dialect
-                return "snowflake"
-        else:
-            return sql_server_type
-    def _get_examples_server(self, data_contract, run, tmp_dir):
-        run.log_info(f"Copying examples to files in temporary directory {tmp_dir}")
-        format = "json"
-        for example in data_contract.examples:
-            format = example.type
-            p = f"{tmp_dir}/{example.model}.{format}"
-            run.log_info(f"Creating example file {p}")
-            with open(p, "w") as f:
-                content = ""
-                if format == "json" and isinstance(example.data, list):
-                    content = json.dumps(example.data)
-                elif format == "json" and isinstance(example.data, str):
-                    content = example.data
-                elif format == "yaml" and isinstance(example.data, list):
-                    content = yaml.dump(example.data, allow_unicode=True)
-                elif format == "yaml" and isinstance(example.data, str):
-                    content = example.data
-                elif format == "csv":
-                    content = example.data
-                logging.debug(f"Content of example file {p}: {content}")
-                f.write(content)
-        path = f"{tmp_dir}" + "/{model}." + format
-        delimiter = "array"
-        server = Server(
-            type="local",
-            path=path,
-            format=format,
-            delimiter=delimiter,
+        return exporter_factory.create(export_format).export(
+            data_contract=data_contract,
+            model=model,
+            server=self._server,
+            sql_server_type=sql_server_type,
+            export_args=kwargs,
         )
-        run.log_info(f"Using {server} for testing the examples")
-        return server
-    def _check_models_for_export(
-        self, data_contract: DataContractSpecification, model: str, export_format: str
-    ) -> typing.Tuple[str, str]:
-        if data_contract.models is None:
-            raise RuntimeError(f"Export to {export_format} requires models in the data contract.")
-        model_names = list(data_contract.models.keys())
-        if model == "all":
-            if len(data_contract.models.items()) != 1:
-                raise RuntimeError(
-                    f"Export to {export_format} is model specific. Specify the model via --model $MODEL_NAME. Available models: {model_names}"
-                )
-            model_name, model_value = next(iter(data_contract.models.items()))
-        else:
-            model_name = model
-            model_value = data_contract.models.get(model_name)
-            if model_value is None:
-                raise RuntimeError(
-                    f"Model {model_name} not found in the data contract. Available models: {model_names}"
-                )
-        return model_name, model_value
     def import_from_source(
-        self,
-        format: str,
-        source: typing.Optional[str] = None,
-        glue_tables: typing.Optional[typing.List[str]] = None,
-        bigquery_tables: typing.Optional[typing.List[str]] = None,
-        bigquery_project: typing.Optional[str] = None,
-        bigquery_dataset: typing.Optional[str] = None,
+        self, format: str, source: typing.Optional[str] = None, **kwargs
     ) -> DataContractSpecification:
-        data_contract_specification = DataContract.init()
-        if format == "sql":
-            data_contract_specification = import_sql(data_contract_specification, format, source)
-        elif format == "avro":
-            data_contract_specification = import_avro(data_contract_specification, source)
-        elif format == "glue":
-            data_contract_specification = import_glue(data_contract_specification, source, glue_tables)
-        elif format == "jsonschema":
-            data_contract_specification = import_jsonschema(data_contract_specification, source)
-        elif format == "bigquery":
-            if source is not None:
-                data_contract_specification = import_bigquery_from_json(data_contract_specification, source)
-            else:
-                data_contract_specification = import_bigquery_from_api(
-                    data_contract_specification, bigquery_tables, bigquery_project, bigquery_dataset
-                )
-        else:
-            print(f"Import format {format} not supported.")
+        data_contract_specification_initial = DataContract.init()
-        return data_contract_specification
+        return importer_factory.create(format).import_source(
+            data_contract_specification=data_contract_specification_initial, source=source, import_args=kwargs
+        )

datacontract/engines/fastjsonschema/s3/s3_read_files.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import logging
 import os
-import s3fs
+from datacontract.model.exceptions import DataContractException
 def yield_s3_files(s3_endpoint_url, s3_location):
@@ -14,6 +14,18 @@ def yield_s3_files(s3_endpoint_url, s3_location):
 def s3_fs(s3_endpoint_url):
+    try:
+        import s3fs
+    except ImportError as e:
+        raise DataContractException(
+            type="schema",
+            result="failed",
+            name="s3 extra missing",
+            reason="Install the extra datacontract-cli\[s3] to use s3",
+            engine="datacontract",
+            original_exception=e,
+        )
     aws_access_key_id = os.getenv("DATACONTRACT_S3_ACCESS_KEY_ID")
     aws_secret_access_key = os.getenv("DATACONTRACT_S3_SECRET_ACCESS_KEY")
     return s3fs.S3FileSystem(

datacontract/engines/soda/check_soda_execute.py CHANGED Viewed

@@ -10,6 +10,7 @@ from datacontract.engines.soda.connections.kafka import create_spark_session, re
 from datacontract.engines.soda.connections.postgres import to_postgres_soda_configuration
 from datacontract.engines.soda.connections.snowflake import to_snowflake_soda_configuration
 from datacontract.engines.soda.connections.sqlserver import to_sqlserver_soda_configuration
+from datacontract.engines.soda.connections.trino import to_trino_soda_configuration
 from datacontract.export.sodacl_converter import to_sodacl_yaml
 from datacontract.model.data_contract_specification import DataContractSpecification, Server
 from datacontract.model.run import Run, Check, Log
@@ -66,8 +67,10 @@ def check_soda_execute(
             scan.set_data_source_name(server.type)
     elif server.type == "dataframe":
         if spark is None:
-            run.log_warn("Server type dataframe only works with the Python library and requires a Spark session, "
-                         "please provide one with the DataContract class")
+            run.log_warn(
+                "Server type dataframe only works with the Python library and requires a Spark session, "
+                "please provide one with the DataContract class"
+            )
             return
         else:
             logging.info("Use Spark to connect to data source")
@@ -83,6 +86,10 @@ def check_soda_execute(
         soda_configuration_str = to_sqlserver_soda_configuration(server)
         scan.add_configuration_yaml_str(soda_configuration_str)
         scan.set_data_source_name(server.type)
+    elif server.type == "trino":
+        soda_configuration_str = to_trino_soda_configuration(server)
+        scan.add_configuration_yaml_str(soda_configuration_str)
+        scan.set_data_source_name(server.type)
     else:
         run.checks.append(

datacontract/engines/soda/connections/bigquery.py CHANGED Viewed

@@ -6,10 +6,17 @@ import yaml
 # https://docs.soda.io/soda/connect-bigquery.html#authentication-methods
 def to_bigquery_soda_configuration(server):
     # with service account key, using an external json file
+    # check for our own environment variable first
+    account_info = os.getenv("DATACONTRACT_BIGQUERY_ACCOUNT_INFO_JSON_PATH")
+    if account_info is None:
+        # but as a fallback look for the default google one
+        account_info = os.getenv("GOOGLE_APPLICATION_CREDENTIALS")
     soda_configuration = {
         f"data_source {server.type}": {
             "type": "bigquery",
-            "account_info_json_path": os.getenv("DATACONTRACT_BIGQUERY_ACCOUNT_INFO_JSON_PATH"),
+            "account_info_json_path": account_info,
             "auth_scopes": ["https://www.googleapis.com/auth/bigquery"],
             "project_id": server.project,
             "dataset": server.dataset,

datacontract/engines/soda/connections/duckdb.py CHANGED Viewed

@@ -80,6 +80,7 @@ def setup_s3_connection(con, server):
     s3_region = os.getenv("DATACONTRACT_S3_REGION")
     s3_access_key_id = os.getenv("DATACONTRACT_S3_ACCESS_KEY_ID")
     s3_secret_access_key = os.getenv("DATACONTRACT_S3_SECRET_ACCESS_KEY")
+    s3_session_token = os.getenv("DATACONTRACT_S3_SESSION_TOKEN")
     s3_endpoint = "s3.amazonaws.com"
     use_ssl = "true"
     url_style = "vhost"
@@ -90,18 +91,33 @@ def setup_s3_connection(con, server):
             url_style = "path"
     if s3_access_key_id is not None:
-        con.sql(f"""
-            CREATE OR REPLACE SECRET s3_secret (
-                TYPE S3,
-                PROVIDER CREDENTIAL_CHAIN,
-                REGION '{s3_region}',
-                KEY_ID '{s3_access_key_id}',
-                SECRET '{s3_secret_access_key}',
-                ENDPOINT '{s3_endpoint}',
-                USE_SSL '{use_ssl}',
-                URL_STYLE '{url_style}'
-            );
-        """)
+        if s3_session_token is not None:
+            con.sql(f"""
+                CREATE OR REPLACE SECRET s3_secret (
+                    TYPE S3,
+                    PROVIDER CREDENTIAL_CHAIN,
+                    REGION '{s3_region}',
+                    KEY_ID '{s3_access_key_id}',
+                    SECRET '{s3_secret_access_key}',
+                    SESSION_TOKEN '{s3_session_token}',
+                    ENDPOINT '{s3_endpoint}',
+                    USE_SSL '{use_ssl}',
+                    URL_STYLE '{url_style}'
+                );
+            """)
+        else:
+            con.sql(f"""
+                CREATE OR REPLACE SECRET s3_secret (
+                    TYPE S3,
+                    PROVIDER CREDENTIAL_CHAIN,
+                    REGION '{s3_region}',
+                    KEY_ID '{s3_access_key_id}',
+                    SECRET '{s3_secret_access_key}',
+                    ENDPOINT '{s3_endpoint}',
+                    USE_SSL '{use_ssl}',
+                    URL_STYLE '{url_style}'
+                );
+            """)
     #     con.sql(f"""
     #                 SET s3_region = '{s3_region}';

datacontract/engines/soda/connections/trino.py ADDED Viewed

@@ -0,0 +1,26 @@
+import os
+import yaml
+def to_trino_soda_configuration(server):
+    password = os.getenv("DATACONTRACT_TRINO_PASSWORD")
+    username = os.getenv("DATACONTRACT_TRINO_USERNAME")
+    data_source = {
+        "type": "trino",
+        "host": server.host,
+        "port": str(server.port),
+        "username": username,
+        "password": password,
+        "catalog": server.catalog,
+        "schema": server.schema_,
+    }
+    if password is None or password == "":
+        data_source["auth_type"] = "NoAuthentication"  # default is BasicAuthentication
+    soda_configuration = {f"data_source {server.type}": data_source}
+    soda_configuration_str = yaml.dump(soda_configuration)
+    return soda_configuration_str

datacontract/export/__init__.py ADDED Viewed

File without changes

datacontract-cli 0.10.7__py3-none-any.whl → 0.10.9__py3-none-any.whl

Potentially problematic release.

datacontract-cli 0.10.7py3-none-any.whl → 0.10.9py3-none-any.whl