PyPI - datacontract-cli - Versions diffs - 0.10.16__py3-none-any.whl → 0.10.19__py3-none-any.whl - Mend

datacontract-cli 0.10.16py3-none-any.whl → 0.10.19py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of datacontract-cli might be problematic. Click here for more details.

Files changed (48) hide show

datacontract/breaking/breaking_rules.py +4 -0
datacontract/cli.py +49 -32
datacontract/data_contract.py +14 -11
datacontract/engines/fastjsonschema/check_jsonschema.py +15 -4
datacontract/engines/soda/check_soda_execute.py +9 -4
datacontract/engines/soda/connections/databricks.py +12 -3
datacontract/engines/soda/connections/duckdb.py +22 -9
datacontract/export/data_caterer_converter.py +20 -7
datacontract/export/dbml_converter.py +2 -2
datacontract/export/dbt_converter.py +41 -16
datacontract/export/exporter.py +6 -2
datacontract/export/exporter_factory.py +48 -14
datacontract/export/iceberg_converter.py +3 -3
datacontract/export/markdown_converter.py +208 -0
datacontract/export/odcs_v3_exporter.py +6 -0
datacontract/export/sodacl_converter.py +22 -5
datacontract/export/sql_converter.py +1 -1
datacontract/export/sql_type_converter.py +28 -2
datacontract/export/sqlalchemy_converter.py +3 -1
datacontract/imports/csv_importer.py +89 -0
datacontract/imports/dbml_importer.py +1 -1
datacontract/imports/dbt_importer.py +94 -12
datacontract/imports/importer.py +1 -0
datacontract/imports/importer_factory.py +5 -0
datacontract/imports/odcs_v2_importer.py +1 -1
datacontract/imports/odcs_v3_importer.py +1 -1
datacontract/imports/sql_importer.py +1 -1
datacontract/init/init_template.py +20 -0
datacontract/integration/datamesh_manager.py +15 -9
datacontract/lint/linters/field_reference_linter.py +10 -1
datacontract/lint/resolve.py +48 -14
datacontract/lint/schema.py +10 -3
datacontract/model/data_contract_specification.py +13 -4
datacontract/model/run.py +1 -0
datacontract/schemas/datacontract-1.1.0.init.yaml +91 -0
datacontract/schemas/datacontract-1.1.0.schema.json +1975 -0
datacontract/schemas/odcs-3.0.1.schema.json +2634 -0
datacontract/templates/datacontract.html +20 -1
datacontract/templates/partials/definition.html +15 -5
datacontract/templates/partials/model_field.html +10 -1
{datacontract_cli-0.10.16.dist-info → datacontract_cli-0.10.19.dist-info}/METADATA +477 -343
{datacontract_cli-0.10.16.dist-info → datacontract_cli-0.10.19.dist-info}/RECORD +46 -42
{datacontract_cli-0.10.16.dist-info → datacontract_cli-0.10.19.dist-info}/WHEEL +1 -1
datacontract/init/download_datacontract_file.py +0 -17
datacontract/integration/opentelemetry.py +0 -103
{datacontract_cli-0.10.16.dist-info → datacontract_cli-0.10.19.dist-info}/LICENSE +0 -0
{datacontract_cli-0.10.16.dist-info → datacontract_cli-0.10.19.dist-info}/entry_points.txt +0 -0
{datacontract_cli-0.10.16.dist-info → datacontract_cli-0.10.19.dist-info}/top_level.txt +0 -0

datacontract/breaking/breaking_rules.py CHANGED Viewed

@@ -42,6 +42,10 @@ class BreakingRules:
     field_primary_removed = Severity.WARNING
     field_primary_updated = Severity.WARNING
+    field_primary_key_added = Severity.WARNING
+    field_primary_key_removed = Severity.WARNING
+    field_primary_key_updated = Severity.WARNING
     field_references_added = Severity.WARNING
     field_references_removed = Severity.WARNING
     field_references_updated = Severity.WARNING

datacontract/cli.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import os
 from importlib import metadata
 from pathlib import Path
 from typing import Iterable, List, Optional
@@ -15,15 +16,11 @@ from datacontract import web
 from datacontract.catalog.catalog import create_data_contract_html, create_index_html
 from datacontract.data_contract import DataContract, ExportFormat
 from datacontract.imports.importer import ImportFormat
-from datacontract.init.download_datacontract_file import (
-    FileExistsException,
-    download_datacontract_file,
-)
+from datacontract.init.init_template import get_init_template
 from datacontract.integration.datamesh_manager import (
     publish_data_contract_to_datamesh_manager,
 )
-DEFAULT_DATA_CONTRACT_SCHEMA_URL = "https://datacontract.com/datacontract.schema.json"
+from datacontract.lint.resolve import resolve_data_contract_dict
 console = Console()
@@ -70,24 +67,21 @@ def common(
 @app.command()
 def init(
     location: Annotated[
-        str,
-        typer.Argument(help="The location (url or path) of the data contract yaml to create."),
+        str, typer.Argument(help="The location of the data contract file to create.")
     ] = "datacontract.yaml",
-    template: Annotated[
-        str, typer.Option(help="URL of a template or data contract")
-    ] = "https://datacontract.com/datacontract.init.yaml",
+    template: Annotated[str, typer.Option(help="URL of a template or data contract")] = None,
     overwrite: Annotated[bool, typer.Option(help="Replace the existing datacontract.yaml")] = False,
 ):
     """
-    Download a datacontract.yaml template and write it to file.
+    Create an empty data contract.
     """
-    try:
-        download_datacontract_file(location, template, overwrite)
-    except FileExistsException:
+    if not overwrite and os.path.exists(location):
         console.print("File already exists, use --overwrite to overwrite")
         raise typer.Exit(code=1)
-    else:
-        console.print("📄 data contract written to " + location)
+    template_str = get_init_template(template)
+    with open(location, "w") as f:
+        f.write(template_str)
+    console.print("📄 data contract written to " + location)
 @app.command()
@@ -99,7 +93,7 @@ def lint(
     schema: Annotated[
         str,
         typer.Option(help="The location (url or path) of the Data Contract Specification JSON Schema"),
-    ] = DEFAULT_DATA_CONTRACT_SCHEMA_URL,
+    ] = None,
 ):
     """
     Validate that the datacontract.yaml is correctly formatted.
@@ -117,7 +111,7 @@ def test(
     schema: Annotated[
         str,
         typer.Option(help="The location (url or path) of the Data Contract Specification JSON Schema"),
-    ] = DEFAULT_DATA_CONTRACT_SCHEMA_URL,
+    ] = None,
     server: Annotated[
         str,
         typer.Option(
@@ -132,13 +126,11 @@ def test(
         typer.Option(help="Run the schema and quality tests on the example data within the data contract."),
     ] = None,
     publish: Annotated[str, typer.Option(help="The url to publish the results after the test")] = None,
-    publish_to_opentelemetry: Annotated[
-        bool,
-        typer.Option(
-            help="Publish the results to opentelemetry. Use environment variables to configure the OTLP endpoint, headers, etc."
-        ),
-    ] = False,
     logs: Annotated[bool, typer.Option(help="Print logs")] = False,
+    ssl_verification: Annotated[
+        bool,
+        typer.Option(help="SSL verification when publishing the data contract."),
+    ] = True,
 ):
     """
     Run schema and quality tests on configured servers.
@@ -150,9 +142,9 @@ def test(
         data_contract_file=location,
         schema_location=schema,
         publish_url=publish,
-        publish_to_opentelemetry=publish_to_opentelemetry,
         server=server,
         examples=examples,
+        ssl_verification=ssl_verification,
     ).test()
     if logs:
         _print_logs(run)
@@ -200,7 +192,7 @@ def export(
     schema: Annotated[
         str,
         typer.Option(help="The location (url or path) of the Data Contract Specification JSON Schema"),
-    ] = DEFAULT_DATA_CONTRACT_SCHEMA_URL,
+    ] = None,
     # TODO: this should be a subcommand
     engine: Annotated[
         Optional[str],
@@ -280,6 +272,14 @@ def import_(
         Optional[str],
         typer.Option(help="Table name to assign to the model created from the Iceberg schema."),
     ] = None,
+    template: Annotated[
+        Optional[str],
+        typer.Option(help="The location (url or path) of the Data Contract Specification Template"),
+    ] = None,
+    schema: Annotated[
+        str,
+        typer.Option(help="The location (url or path) of the Data Contract Specification JSON Schema"),
+    ] = None,
 ):
     """
     Create a data contract from the given source location. Saves to file specified by `output` option if present, otherwise prints to stdout.
@@ -287,6 +287,8 @@ def import_(
     result = DataContract().import_from_source(
         format=format,
         source=source,
+        template=template,
+        schema=schema,
         glue_table=glue_table,
         bigquery_table=bigquery_table,
         bigquery_project=bigquery_project,
@@ -314,15 +316,18 @@ def publish(
     schema: Annotated[
         str,
         typer.Option(help="The location (url or path) of the Data Contract Specification JSON Schema"),
-    ] = DEFAULT_DATA_CONTRACT_SCHEMA_URL,
+    ] = None,
+    ssl_verification: Annotated[
+        bool,
+        typer.Option(help="SSL verification when publishing the data contract."),
+    ] = True,
 ):
     """
     Publish the data contract to the Data Mesh Manager.
     """
     publish_data_contract_to_datamesh_manager(
-        data_contract_specification=DataContract(
-            data_contract_file=location, schema_location=schema
-        ).get_data_contract_specification(),
+        data_contract_dict=resolve_data_contract_dict(location),
+        ssl_verification=ssl_verification,
     )
@@ -338,7 +343,7 @@ def catalog(
     schema: Annotated[
         str,
         typer.Option(help="The location (url or path) of the Data Contract Specification JSON Schema"),
-    ] = DEFAULT_DATA_CONTRACT_SCHEMA_URL,
+    ] = None,
 ):
     """
     Create an html catalog of data contracts.
@@ -447,6 +452,18 @@ def _handle_result(run):
         console.print(
             f"🟢 data contract is valid. Run {len(run.checks)} checks. Took {(run.timestampEnd - run.timestampStart).total_seconds()} seconds."
         )
+    elif run.result == "warning":
+        console.print("🟠 data contract has warnings. Found the following warnings:")
+        i = 1
+        for check in run.checks:
+            if check.result != "passed":
+                field = to_field(run, check)
+                if field:
+                    field = field + " "
+                else:
+                    field = ""
+                console.print(f"{i}) {field}{check.name}: {check.reason}")
+                i += 1
     else:
         console.print("🔴 data contract is invalid, found the following errors:")
         i = 1

datacontract/data_contract.py CHANGED Viewed

@@ -22,8 +22,8 @@ from datacontract.engines.soda.check_soda_execute import check_soda_execute
 from datacontract.export.exporter import ExportFormat
 from datacontract.export.exporter_factory import exporter_factory
 from datacontract.imports.importer_factory import importer_factory
+from datacontract.init.init_template import get_init_template
 from datacontract.integration.datamesh_manager import publish_test_results_to_datamesh_manager
-from datacontract.integration.opentelemetry import publish_test_results_to_opentelemetry
 from datacontract.lint import resolve
 from datacontract.lint.linters.description_linter import DescriptionLinter
 from datacontract.lint.linters.example_model_linter import ExampleModelLinter
@@ -48,10 +48,10 @@ class DataContract:
         server: str = None,
         examples: bool = False,
         publish_url: str = None,
-        publish_to_opentelemetry: bool = False,
         spark: "SparkSession" = None,
         inline_definitions: bool = True,
         inline_quality: bool = True,
+        ssl_verification: bool = True,
     ):
         self._data_contract_file = data_contract_file
         self._data_contract_str = data_contract_str
@@ -60,10 +60,10 @@ class DataContract:
         self._server = server
         self._examples = examples
         self._publish_url = publish_url
-        self._publish_to_opentelemetry = publish_to_opentelemetry
         self._spark = spark
         self._inline_definitions = inline_definitions
         self._inline_quality = inline_quality
+        self._ssl_verification = ssl_verification
         self.all_linters = {
             ExampleModelLinter(),
             QualityUsesSchemaLinter(),
@@ -75,8 +75,9 @@ class DataContract:
         }
     @classmethod
-    def init(cls, template: str = "https://datacontract.com/datacontract.init.yaml") -> DataContractSpecification:
-        return resolve.resolve_data_contract(data_contract_location=template)
+    def init(cls, template: typing.Optional[str], schema: typing.Optional[str] = None) -> DataContractSpecification:
+        template_str = get_init_template(template)
+        return resolve.resolve_data_contract(data_contract_str=template_str, schema_location=schema)
     def lint(self, enabled_linters: typing.Union[str, set[str]] = "all") -> Run:
         """Lint the data contract by deserializing the contract and checking the schema, as well as calling the configured linters.
@@ -230,10 +231,7 @@ class DataContract:
         run.finish()
         if self._publish_url is not None:
-            publish_test_results_to_datamesh_manager(run, self._publish_url)
-        if self._publish_to_opentelemetry:
-            publish_test_results_to_opentelemetry(run)
+            publish_test_results_to_datamesh_manager(run, self._publish_url, self._ssl_verification)
         return run
@@ -347,9 +345,14 @@ class DataContract:
         )
     def import_from_source(
-        self, format: str, source: typing.Optional[str] = None, **kwargs
+        self,
+        format: str,
+        source: typing.Optional[str] = None,
+        template: typing.Optional[str] = None,
+        schema: typing.Optional[str] = None,
+        **kwargs,
     ) -> DataContractSpecification:
-        data_contract_specification_initial = DataContract.init()
+        data_contract_specification_initial = DataContract.init(template=template, schema=schema)
         return importer_factory.create(format).import_source(
             data_contract_specification=data_contract_specification_initial, source=source, import_args=kwargs

datacontract/engines/fastjsonschema/check_jsonschema.py CHANGED Viewed

@@ -11,7 +11,7 @@ from datacontract.engines.fastjsonschema.s3.s3_read_files import yield_s3_files
 from datacontract.export.jsonschema_converter import to_jsonschema
 from datacontract.model.data_contract_specification import DataContractSpecification, Server
 from datacontract.model.exceptions import DataContractException
-from datacontract.model.run import Check, Run
+from datacontract.model.run import Check, ResultEnum, Run
 # Thread-safe cache for primaryKey fields.
 _primary_key_cache = {}
@@ -256,18 +256,29 @@ def check_jsonschema(run: Run, data_contract: DataContractSpecification, server:
                     type="schema",
                     name="Check that JSON has valid schema",
                     model=model_name,
-                    result="info",
+                    result=ResultEnum.info,
                     reason="JSON Schema check skipped for GCS, as GCS is currently not supported",
                     engine="jsonschema",
                 )
             )
+        elif server.type == "azure":
+            run.checks.append(
+                Check(
+                    type="schema",
+                    name="Check that JSON has valid schema",
+                    model=model_name,
+                    result=ResultEnum.info,
+                    reason="JSON Schema check skipped for azure, as azure is currently not supported",
+                    engine="jsonschema",
+                )
+            )
         else:
             run.checks.append(
                 Check(
                     type="schema",
                     name="Check that JSON has valid schema",
                     model=model_name,
-                    result="warn",
+                    result=ResultEnum.warning,
                     reason=f"Server type {server.type} not supported",
                     engine="jsonschema",
                 )
@@ -279,7 +290,7 @@ def check_jsonschema(run: Run, data_contract: DataContractSpecification, server:
                 type="schema",
                 name="Check that JSON has valid schema",
                 model=model_name,
-                result="passed",
+                result=ResultEnum.passed,
                 reason="All JSON entries are valid.",
                 engine="jsonschema",
             )

datacontract/engines/soda/check_soda_execute.py CHANGED Viewed

@@ -1,7 +1,5 @@
 import logging
-from soda.scan import Scan
 from datacontract.engines.soda.connections.bigquery import to_bigquery_soda_configuration
 from datacontract.engines.soda.connections.databricks import to_databricks_soda_configuration
 from datacontract.engines.soda.connections.duckdb import get_duckdb_connection
@@ -16,6 +14,11 @@ from datacontract.model.run import Check, Log, ResultEnum, Run
 def check_soda_execute(run: Run, data_contract: DataContractSpecification, server: Server, spark, tmp_dir):
+    from soda.common.config_helper import ConfigHelper
+    ConfigHelper.get_instance().upsert_value("send_anonymous_usage_stats", False)
+    from soda.scan import Scan
     if data_contract is None:
         run.log_warn("Cannot run engine soda-core, as data contract is invalid")
         return
@@ -25,6 +28,7 @@ def check_soda_execute(run: Run, data_contract: DataContractSpecification, serve
     if server.type in ["s3", "gcs", "azure", "local"]:
         if server.format in ["json", "parquet", "csv", "delta"]:
+            run.log_info(f"Configuring engine soda-core to connect to {server.type} {server.format} with duckdb")
             con = get_duckdb_connection(data_contract, server, run)
             scan.add_duckdb_connection(duckdb_connection=con, data_source_name=server.type)
             scan.set_data_source_name(server.type)
@@ -54,11 +58,12 @@ def check_soda_execute(run: Run, data_contract: DataContractSpecification, serve
         scan.set_data_source_name(server.type)
     elif server.type == "databricks":
         if spark is not None:
-            logging.info("Use Spark to connect to data source")
+            run.log_info("Connecting to databricks via spark")
             scan.add_spark_session(spark, data_source_name=server.type)
             scan.set_data_source_name(server.type)
             spark.sql(f"USE {server.catalog}.{server.schema_}")
         else:
+            run.log_info("Connecting to databricks directly")
             soda_configuration_str = to_databricks_soda_configuration(server)
             scan.add_configuration_yaml_str(soda_configuration_str)
             scan.set_data_source_name(server.type)
@@ -183,4 +188,4 @@ def update_reason(check, c):
                 # print(check.reason)
             break  # Exit the loop once the desired block is found
     if "fail" in c["diagnostics"]:
-        check.reason = f"Got: {c['diagnostics']['value']} Expected: {c['diagnostics']['fail']}"
+        check.reason = f"Value: {c['diagnostics']['value']} Fail: {c['diagnostics']['fail']}"

datacontract/engines/soda/connections/databricks.py CHANGED Viewed

@@ -4,15 +4,24 @@ import yaml
 def to_databricks_soda_configuration(server):
+    token = os.getenv("DATACONTRACT_DATABRICKS_TOKEN")
+    if token is None:
+        raise ValueError("DATACONTRACT_DATABRICKS_TOKEN environment variable is not set")
+    http_path = os.getenv("DATACONTRACT_DATABRICKS_HTTP_PATH")
+    host = server.host
+    if host is None:
+        host = os.getenv("DATACONTRACT_DATABRICKS_SERVER_HOSTNAME")
+    if host is None:
+        raise ValueError("DATACONTRACT_DATABRICKS_SERVER_HOSTNAME environment variable is not set")
     soda_configuration = {
         f"data_source {server.type}": {
             "type": "spark",
             "method": "databricks",
-            "host": server.host,
+            "host": host,
             "catalog": server.catalog,
             "schema": server.schema_,
-            "http_path": os.getenv("DATACONTRACT_DATABRICKS_HTTP_PATH"),
-            "token": os.getenv("DATACONTRACT_DATABRICKS_TOKEN"),
+            "http_path": http_path,
+            "token": token,
         }
     }

datacontract/engines/soda/connections/duckdb.py CHANGED Viewed

@@ -146,6 +146,7 @@ def setup_azure_connection(con, server):
     tenant_id = os.getenv("DATACONTRACT_AZURE_TENANT_ID")
     client_id = os.getenv("DATACONTRACT_AZURE_CLIENT_ID")
     client_secret = os.getenv("DATACONTRACT_AZURE_CLIENT_SECRET")
+    storage_account = server.storageAccount
     if tenant_id is None:
         raise ValueError("Error: Environment variable DATACONTRACT_AZURE_TENANT_ID is not set")
@@ -157,12 +158,24 @@ def setup_azure_connection(con, server):
     con.install_extension("azure")
     con.load_extension("azure")
-    con.sql(f"""
-    CREATE SECRET azure_spn (
-        TYPE AZURE,
-        PROVIDER SERVICE_PRINCIPAL,
-        TENANT_ID '{tenant_id}',
-        CLIENT_ID '{client_id}',
-        CLIENT_SECRET '{client_secret}'
-    );
-    """)
+    if storage_account is not None:
+        con.sql(f"""
+        CREATE SECRET azure_spn (
+            TYPE AZURE,
+            PROVIDER SERVICE_PRINCIPAL,
+            TENANT_ID '{tenant_id}',
+            CLIENT_ID '{client_id}',
+            CLIENT_SECRET '{client_secret}',
+            ACCOUNT_NAME '{storage_account}'
+        );
+        """)
+    else:
+        con.sql(f"""
+        CREATE SECRET azure_spn (
+            TYPE AZURE,
+            PROVIDER SERVICE_PRINCIPAL,
+            TENANT_ID '{tenant_id}',
+            CLIENT_ID '{client_id}',
+            CLIENT_SECRET '{client_secret}'
+        );
+        """)

datacontract/export/data_caterer_converter.py CHANGED Viewed

@@ -42,11 +42,11 @@ def _to_data_caterer_generate_step(model_key, model_value: Model, server: Server
         "name": model_key,
         "type": _to_step_type(server),
         "options": _to_data_source_options(model_key, server),
-        "schema": [],
+        "fields": [],
     }
     fields = _to_fields(model_value.fields)
     if fields:
-        step["schema"] = fields
+        step["fields"] = fields
     return step
@@ -97,16 +97,29 @@ def _to_field(field_name: str, field: Field) -> dict:
         if new_type == "object" or new_type == "record" or new_type == "struct":
             # need to get nested field definitions
             nested_fields = _to_fields(field.fields)
-            dc_field["schema"] = {"fields": nested_fields}
+            dc_field["fields"] = nested_fields
+        elif new_type == "array":
+            if field.items is not None and field.items.type is not None:
+                dc_generator_opts["arrayType"] = _to_data_type(field.items.type)
+            else:
+                dc_generator_opts["arrayType"] = "string"
     if field.enum is not None and len(field.enum) > 0:
         dc_generator_opts["oneOf"] = field.enum
     if field.unique is not None and field.unique:
         dc_generator_opts["isUnique"] = field.unique
+    if field.primaryKey is not None and field.primaryKey:
+        dc_generator_opts["isPrimaryKey"] = field.primaryKey
     if field.minLength is not None:
-        dc_generator_opts["minLength"] = field.minLength
+        if field.type is not None and field.type == "array":
+            dc_generator_opts["arrayMinLen"] = field.minLength
+        else:
+            dc_generator_opts["minLen"] = field.minLength
     if field.maxLength is not None:
-        dc_generator_opts["maxLength"] = field.maxLength
+        if field.type is not None and field.type == "array":
+            dc_generator_opts["arrayMaxLen"] = field.maxLength
+        else:
+            dc_generator_opts["maxLen"] = field.maxLength
     if field.pattern is not None:
         dc_generator_opts["regex"] = field.pattern
     if field.minimum is not None:
@@ -115,7 +128,7 @@ def _to_field(field_name: str, field: Field) -> dict:
         dc_generator_opts["max"] = field.maximum
     if len(dc_generator_opts.keys()) > 0:
-        dc_field["generator"] = {"options": dc_generator_opts}
+        dc_field["options"] = dc_generator_opts
     return dc_field
@@ -124,7 +137,7 @@ def _to_data_type(data_type):
         return "double"
     elif data_type == "decimal" or data_type == "bigint":
         return "decimal"
-    elif data_type == "int":
+    elif data_type == "int" or data_type == "integer":
         return "integer"
     elif data_type == "long":
         return "long"

datacontract/export/dbml_converter.py CHANGED Viewed

@@ -90,7 +90,7 @@ Note: {1}
 def generate_field(field_name: str, field: spec.Field, model_name: str, server: spec.Server) -> Tuple[str, str]:
-    if field.primary:
+    if field.primaryKey or field.primary:
         if field.required is not None:
             if not field.required:
                 raise DataContractException(
@@ -115,7 +115,7 @@ def generate_field(field_name: str, field: spec.Field, model_name: str, server:
             field.unique = True
     field_attrs = []
-    if field.primary:
+    if field.primaryKey or field.primary:
         field_attrs.append("pk")
     if field.unique:

datacontract/export/dbt_converter.py CHANGED Viewed

@@ -39,13 +39,6 @@ def to_dbt_models_yaml(data_contract_spec: DataContractSpecification):
 def to_dbt_staging_sql(data_contract_spec: DataContractSpecification, model_name: str, model_value: Model) -> str:
-    if data_contract_spec.models is None or len(data_contract_spec.models.items()) != 1:
-        print(
-            "Export to dbt-staging-sql currently only works with exactly one model in the data contract."
-            "Please specify the model name."
-        )
-        return ""
     id = data_contract_spec.id
     columns = []
     for field_name, field in model_value.fields.items():
@@ -81,19 +74,21 @@ def to_dbt_sources_yaml(data_contract_spec: DataContractSpecification, server: s
     source["tables"] = []
     for model_key, model_value in data_contract_spec.models.items():
-        dbt_model = _to_dbt_source_table(model_key, model_value, adapter_type)
+        dbt_model = _to_dbt_source_table(data_contract_spec, model_key, model_value, adapter_type)
         source["tables"].append(dbt_model)
     return yaml.dump(dbt, indent=2, sort_keys=False, allow_unicode=True)
-def _to_dbt_source_table(model_key, model_value: Model, adapter_type: Optional[str]) -> dict:
+def _to_dbt_source_table(
+    data_contract_spec: DataContractSpecification, model_key, model_value: Model, adapter_type: Optional[str]
+) -> dict:
     dbt_model = {
         "name": model_key,
     }
     if model_value.description is not None:
         dbt_model["description"] = model_value.description
-    columns = _to_columns(model_value.fields, False, adapter_type)
+    columns = _to_columns(data_contract_spec, model_value.fields, False, adapter_type)
     if columns:
         dbt_model["columns"] = columns
     return dbt_model
@@ -114,7 +109,7 @@ def _to_dbt_model(model_key, model_value: Model, data_contract_spec: DataContrac
         dbt_model["config"]["contract"] = {"enforced": True}
     if model_value.description is not None:
         dbt_model["description"] = model_value.description
-    columns = _to_columns(model_value.fields, _supports_constraints(model_type), None)
+    columns = _to_columns(data_contract_spec, model_value.fields, _supports_constraints(model_type), None)
     if columns:
         dbt_model["columns"] = columns
     return dbt_model
@@ -137,15 +132,33 @@ def _supports_constraints(model_type):
     return model_type == "table" or model_type == "incremental"
-def _to_columns(fields: Dict[str, Field], supports_constraints: bool, adapter_type: Optional[str]) -> list:
+def _to_columns(
+    data_contract_spec: DataContractSpecification,
+    fields: Dict[str, Field],
+    supports_constraints: bool,
+    adapter_type: Optional[str],
+) -> list:
     columns = []
     for field_name, field in fields.items():
-        column = _to_column(field_name, field, supports_constraints, adapter_type)
+        column = _to_column(data_contract_spec, field_name, field, supports_constraints, adapter_type)
         columns.append(column)
     return columns
-def _to_column(field_name: str, field: Field, supports_constraints: bool, adapter_type: Optional[str]) -> dict:
+def get_table_name_and_column_name(references: str) -> tuple[Optional[str], str]:
+    parts = references.split(".")
+    if len(parts) < 2:
+        return None, parts[0]
+    return parts[-2], parts[-1]
+def _to_column(
+    data_contract_spec: DataContractSpecification,
+    field_name: str,
+    field: Field,
+    supports_constraints: bool,
+    adapter_type: Optional[str],
+) -> dict:
     column = {"name": field_name}
     adapter_type = adapter_type or "snowflake"
     dbt_type = convert_to_sql_type(field, adapter_type)
@@ -239,9 +252,21 @@ def _to_column(field_name: str, field: Field, supports_constraints: bool, adapte
                     }
                 }
             )
+    if field.references is not None:
+        ref_source_name = data_contract_spec.id
+        table_name, column_name = get_table_name_and_column_name(field.references)
+        if table_name is not None and column_name is not None:
+            column["data_tests"].append(
+                {
+                    "relationships": {
+                        "to": f"""source("{ref_source_name}", "{table_name}")""",
+                        "field": f"{column_name}",
+                    }
+                }
+            )
-        if not column["data_tests"]:
-            column.pop("data_tests")
+    if not column["data_tests"]:
+        column.pop("data_tests")
     # TODO: all constraints
     return column

datacontract/export/exporter.py CHANGED Viewed

@@ -2,7 +2,10 @@ import typing
 from abc import ABC, abstractmethod
 from enum import Enum
-from datacontract.model.data_contract_specification import DataContractSpecification
+from datacontract.model.data_contract_specification import (
+    DataContractSpecification,
+    Model,
+)
 class Exporter(ABC):
@@ -40,6 +43,7 @@ class ExportFormat(str, Enum):
     sqlalchemy = "sqlalchemy"
     data_caterer = "data-caterer"
     dcs = "dcs"
+    markdown = "markdown"
     iceberg = "iceberg"
     @classmethod
@@ -49,7 +53,7 @@ class ExportFormat(str, Enum):
 def _check_models_for_export(
     data_contract: DataContractSpecification, model: str, export_format: str
-) -> typing.Tuple[str, str]:
+) -> typing.Tuple[str, Model]:
     if data_contract.models is None:
         raise RuntimeError(f"Export to {export_format} requires models in the data contract.")

datacontract-cli 0.10.16__py3-none-any.whl → 0.10.19__py3-none-any.whl

Potentially problematic release.

datacontract-cli 0.10.16py3-none-any.whl → 0.10.19py3-none-any.whl