PyPI - datacontract-cli - Versions diffs - 0.10.22__py3-none-any.whl → 0.10.24__py3-none-any.whl - Mend

datacontract-cli 0.10.22py3-none-any.whl → 0.10.24py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of datacontract-cli might be problematic. Click here for more details.

Files changed (39) hide show

datacontract/__init__.py +13 -0
datacontract/catalog/catalog.py +2 -2
datacontract/cli.py +20 -72
datacontract/data_contract.py +5 -3
datacontract/engines/data_contract_test.py +32 -7
datacontract/engines/datacontract/check_that_datacontract_contains_valid_servers_configuration.py +2 -3
datacontract/engines/fastjsonschema/s3/s3_read_files.py +3 -2
datacontract/engines/soda/check_soda_execute.py +17 -4
datacontract/engines/soda/connections/{duckdb.py → duckdb_connection.py} +66 -9
datacontract/engines/soda/connections/kafka.py +3 -2
datacontract/export/avro_converter.py +10 -3
datacontract/export/bigquery_converter.py +1 -1
datacontract/export/dbt_converter.py +13 -10
datacontract/export/duckdb_type_converter.py +57 -0
datacontract/export/odcs_v3_exporter.py +27 -7
datacontract/export/protobuf_converter.py +163 -69
datacontract/imports/avro_importer.py +31 -6
datacontract/imports/csv_importer.py +111 -57
datacontract/imports/importer.py +1 -0
datacontract/imports/importer_factory.py +5 -0
datacontract/imports/odcs_v3_importer.py +49 -7
datacontract/imports/protobuf_importer.py +266 -0
datacontract/lint/resolve.py +40 -12
datacontract/model/data_contract_specification.py +2 -2
datacontract/model/run.py +3 -0
datacontract/output/__init__.py +0 -0
datacontract/output/junit_test_results.py +135 -0
datacontract/output/output_format.py +10 -0
datacontract/output/test_results_writer.py +79 -0
datacontract/templates/datacontract.html +2 -1
datacontract/templates/index.html +2 -1
{datacontract_cli-0.10.22.dist-info → datacontract_cli-0.10.24.dist-info}/METADATA +279 -193
{datacontract_cli-0.10.22.dist-info → datacontract_cli-0.10.24.dist-info}/RECORD +37 -33
{datacontract_cli-0.10.22.dist-info → datacontract_cli-0.10.24.dist-info}/WHEEL +1 -1
datacontract/export/csv_type_converter.py +0 -36
datacontract/lint/linters/quality_schema_linter.py +0 -52
{datacontract_cli-0.10.22.dist-info → datacontract_cli-0.10.24.dist-info}/entry_points.txt +0 -0
{datacontract_cli-0.10.22.dist-info → datacontract_cli-0.10.24.dist-info/licenses}/LICENSE +0 -0
{datacontract_cli-0.10.22.dist-info → datacontract_cli-0.10.24.dist-info}/top_level.txt +0 -0

datacontract/__init__.py CHANGED Viewed

@@ -0,0 +1,13 @@
+# Configuration so that yaml.safe_dump dumps strings with line breaks with yaml literal |
+import yaml
+yaml.SafeDumper.org_represent_str = yaml.SafeDumper.represent_str
+def repr_str(dumper, data):
+    if "\n" in data:
+        return dumper.represent_scalar("tag:yaml.org,2002:str", data, style="|")
+    return dumper.org_represent_str(data)
+yaml.add_representer(str, repr_str, Dumper=yaml.SafeDumper)

datacontract/catalog/catalog.py CHANGED Viewed

@@ -19,7 +19,7 @@ def create_data_contract_html(contracts, file: Path, path: Path, schema: str):
     file_without_suffix = file.with_suffix(".html")
     html_filepath = path / file_without_suffix
     html_filepath.parent.mkdir(parents=True, exist_ok=True)
-    with open(html_filepath, "w") as f:
+    with open(html_filepath, "w", encoding="utf-8") as f:
         f.write(html)
     contracts.append(
         DataContractView(
@@ -42,7 +42,7 @@ class DataContractView:
 def create_index_html(contracts, path):
     index_filepath = path / "index.html"
-    with open(index_filepath, "w") as f:
+    with open(index_filepath, "w", encoding="utf-8") as f:
         # Load templates from templates folder
         package_loader = PackageLoader("datacontract", "templates")
         env = Environment(

datacontract/cli.py CHANGED Viewed

@@ -5,9 +5,7 @@ from typing import Iterable, List, Optional
 import typer
 from click import Context
-from rich import box
 from rich.console import Console
-from rich.table import Table
 from typer.core import TyperGroup
 from typing_extensions import Annotated
@@ -19,6 +17,8 @@ from datacontract.integration.datamesh_manager import (
     publish_data_contract_to_datamesh_manager,
 )
 from datacontract.lint.resolve import resolve_data_contract_dict
+from datacontract.output.output_format import OutputFormat
+from datacontract.output.test_results_writer import write_test_result
 console = Console()
@@ -92,12 +92,19 @@ def lint(
         str,
         typer.Option(help="The location (url or path) of the Data Contract Specification JSON Schema"),
     ] = None,
+    output: Annotated[
+        Path,
+        typer.Option(
+            help="Specify the file path where the test results should be written to (e.g., './test-results/TEST-datacontract.xml'). If no path is provided, the output will be printed to stdout."
+        ),
+    ] = None,
+    output_format: Annotated[OutputFormat, typer.Option(help="The target format for the test results.")] = None,
 ):
     """
     Validate that the datacontract.yaml is correctly formatted.
     """
     run = DataContract(data_contract_file=location, schema_location=schema).lint()
-    _handle_result(run)
+    write_test_result(run, console, output_format, output)
 @app.command()
@@ -120,6 +127,13 @@ def test(
         ),
     ] = "all",
     publish: Annotated[str, typer.Option(help="The url to publish the results after the test")] = None,
+    output: Annotated[
+        Path,
+        typer.Option(
+            help="Specify the file path where the test results should be written to (e.g., './test-results/TEST-datacontract.xml')."
+        ),
+    ] = None,
+    output_format: Annotated[OutputFormat, typer.Option(help="The target format for the test results.")] = None,
     logs: Annotated[bool, typer.Option(help="Print logs")] = False,
     ssl_verification: Annotated[
         bool,
@@ -141,7 +155,7 @@ def test(
     ).test()
     if logs:
         _print_logs(run)
-    _handle_result(run)
+    write_test_result(run, console, output_format, output)
 @app.command()
@@ -214,7 +228,7 @@ def export(
     if output is None:
         console.print(result, markup=False, soft_wrap=True)
     else:
-        with output.open("w") as f:
+        with output.open(mode="w", encoding="utf-8") as f:
             f.write(result)
         console.print(f"Written result to {output}")
@@ -306,7 +320,7 @@ def import_(
     if output is None:
         console.print(result.to_yaml(), markup=False, soft_wrap=True)
     else:
-        with output.open("w") as f:
+        with output.open(mode="w", encoding="utf-8") as f:
             f.write(result.to_yaml())
         console.print(f"Written result to {output}")
@@ -467,77 +481,11 @@ def api(
     uvicorn.run(app="datacontract.api:app", port=port, host=host, reload=True, log_config=LOGGING_CONFIG)
-def _handle_result(run):
-    _print_table(run)
-    if run.result == "passed":
-        console.print(
-            f"🟢 data contract is valid. Run {len(run.checks)} checks. Took {(run.timestampEnd - run.timestampStart).total_seconds()} seconds."
-        )
-    elif run.result == "warning":
-        console.print("🟠 data contract has warnings. Found the following warnings:")
-        i = 1
-        for check in run.checks:
-            if check.result != "passed":
-                field = to_field(run, check)
-                if field:
-                    field = field + " "
-                else:
-                    field = ""
-                console.print(f"{i}) {field}{check.name}: {check.reason}")
-                i += 1
-    else:
-        console.print("🔴 data contract is invalid, found the following errors:")
-        i = 1
-        for check in run.checks:
-            if check.result != "passed":
-                field = to_field(run, check)
-                if field:
-                    field = field + " "
-                else:
-                    field = ""
-                console.print(f"{i}) {field}{check.name}: {check.reason}")
-                i += 1
-        raise typer.Exit(code=1)
-def _print_table(run):
-    table = Table(box=box.ROUNDED)
-    table.add_column("Result", no_wrap=True)
-    table.add_column("Check", max_width=100)
-    table.add_column("Field", max_width=32)
-    table.add_column("Details", max_width=50)
-    for check in sorted(run.checks, key=lambda c: (c.result or "", c.model or "", c.field or "")):
-        table.add_row(with_markup(check.result), check.name, to_field(run, check), check.reason)
-    console.print(table)
-def to_field(run, check):
-    models = [c.model for c in run.checks]
-    if len(set(models)) > 1:
-        if check.field is None:
-            return check.model
-        return check.model + "." + check.field
-    else:
-        return check.field
 def _print_logs(run):
     console.print("\nLogs:")
     for log in run.logs:
         console.print(log.timestamp.strftime("%y-%m-%d %H:%M:%S"), log.level.ljust(5), log.message)
-def with_markup(result):
-    if result == "passed":
-        return "[green]passed[/green]"
-    if result == "warning":
-        return "[yellow]warning[/yellow]"
-    if result == "failed":
-        return "[red]failed[/red]"
-    if result == "error":
-        return "[red]error[/red]"
-    return result
 if __name__ == "__main__":
     app()

datacontract/data_contract.py CHANGED Viewed

@@ -4,6 +4,8 @@ import typing
 if typing.TYPE_CHECKING:
     from pyspark.sql import SparkSession
+from duckdb.duckdb import DuckDBPyConnection
 from datacontract.breaking.breaking import (
     info_breaking_changes,
     models_breaking_changes,
@@ -22,7 +24,6 @@ from datacontract.lint.linters.description_linter import DescriptionLinter
 from datacontract.lint.linters.field_pattern_linter import FieldPatternLinter
 from datacontract.lint.linters.field_reference_linter import FieldReferenceLinter
 from datacontract.lint.linters.notice_period_linter import NoticePeriodLinter
-from datacontract.lint.linters.quality_schema_linter import QualityUsesSchemaLinter
 from datacontract.lint.linters.valid_constraints_linter import ValidFieldConstraintsLinter
 from datacontract.model.data_contract_specification import DataContractSpecification
 from datacontract.model.exceptions import DataContractException
@@ -39,6 +40,7 @@ class DataContract:
         server: str = None,
         publish_url: str = None,
         spark: "SparkSession" = None,
+        duckdb_connection: DuckDBPyConnection = None,
         inline_definitions: bool = True,
         inline_quality: bool = True,
         ssl_verification: bool = True,
@@ -50,11 +52,11 @@ class DataContract:
         self._server = server
         self._publish_url = publish_url
         self._spark = spark
+        self._duckdb_connection = duckdb_connection
         self._inline_definitions = inline_definitions
         self._inline_quality = inline_quality
         self._ssl_verification = ssl_verification
         self.all_linters = {
-            QualityUsesSchemaLinter(),
             FieldPatternLinter(),
             FieldReferenceLinter(),
             NoticePeriodLinter(),
@@ -146,7 +148,7 @@ class DataContract:
                 inline_quality=self._inline_quality,
             )
-            execute_data_contract_test(data_contract, run, self._server, self._spark)
+            execute_data_contract_test(data_contract, run, self._server, self._spark, self._duckdb_connection)
         except DataContractException as e:
             run.checks.append(

datacontract/engines/data_contract_test.py CHANGED Viewed

@@ -1,5 +1,7 @@
 import typing
+from duckdb.duckdb import DuckDBPyConnection
 from datacontract.engines.data_contract_checks import create_checks
 if typing.TYPE_CHECKING:
@@ -10,7 +12,7 @@ from datacontract.engines.datacontract.check_that_datacontract_contains_valid_se
 )
 from datacontract.engines.fastjsonschema.check_jsonschema import check_jsonschema
 from datacontract.engines.soda.check_soda_execute import check_soda_execute
-from datacontract.model.data_contract_specification import DataContractSpecification
+from datacontract.model.data_contract_specification import DataContractSpecification, Server
 from datacontract.model.exceptions import DataContractException
 from datacontract.model.run import ResultEnum, Run
@@ -20,6 +22,7 @@ def execute_data_contract_test(
     run: Run,
     server_name: str = None,
     spark: "SparkSession" = None,
+    duckdb_connection: DuckDBPyConnection = None,
 ):
     if data_contract_specification.models is None or len(data_contract_specification.models) == 0:
         raise DataContractException(
@@ -29,12 +32,13 @@ def execute_data_contract_test(
             reason="Models block is missing. Skip executing tests.",
             engine="datacontract",
         )
-    check_that_datacontract_contains_valid_server_configuration(run, data_contract_specification, server_name)
-    if server_name:
-        server = data_contract_specification.servers.get(server_name)
-    else:
+    if (
+        server_name is None
+        and data_contract_specification.servers is not None
+        and len(data_contract_specification.servers) > 0
+    ):
         server_name = list(data_contract_specification.servers.keys())[0]
-        server = data_contract_specification.servers.get(server_name)
+    server = get_server(data_contract_specification, server_name)
     run.log_info(f"Running tests for data contract {data_contract_specification.id} with server {server_name}")
     run.dataContractId = data_contract_specification.id
     run.dataContractVersion = data_contract_specification.info.version
@@ -48,4 +52,25 @@ def execute_data_contract_test(
     # TODO check server credentials are complete for nicer error messages
     if server.format == "json" and server.type != "kafka":
         check_jsonschema(run, data_contract_specification, server)
-    check_soda_execute(run, data_contract_specification, server, spark)
+    check_soda_execute(run, data_contract_specification, server, spark, duckdb_connection)
+def get_server(data_contract_specification: DataContractSpecification, server_name: str = None) -> Server | None:
+    """Get the server configuration from the data contract specification.
+    Args:
+        data_contract_specification: The data contract specification
+        server_name: Optional name of the server to use. If not provided, uses the first server.
+    Returns:
+        The selected server configuration
+    """
+    check_that_datacontract_contains_valid_server_configuration(data_contract_specification, server_name)
+    if server_name is not None:
+        server = data_contract_specification.servers.get(server_name)
+    else:
+        server_name = list(data_contract_specification.servers.keys())[0]
+        server = data_contract_specification.servers.get(server_name)
+    return server

datacontract/engines/datacontract/check_that_datacontract_contains_valid_servers_configuration.py CHANGED Viewed

@@ -1,12 +1,11 @@
 from datacontract.model.data_contract_specification import DataContractSpecification
 from datacontract.model.exceptions import DataContractException
-from datacontract.model.run import Run
 def check_that_datacontract_contains_valid_server_configuration(
-    run: Run, data_contract: DataContractSpecification, server_name: str
+    data_contract: DataContractSpecification, server_name: str | None
 ):
-    if data_contract.servers is None:
+    if data_contract.servers is None or len(data_contract.servers) == 0:
         raise DataContractException(
             type="lint",
             name="Check that data contract contains valid server configuration",

datacontract/engines/fastjsonschema/s3/s3_read_files.py CHANGED Viewed

@@ -2,6 +2,7 @@ import logging
 import os
 from datacontract.model.exceptions import DataContractException
+from datacontract.model.run import ResultEnum
 def yield_s3_files(s3_endpoint_url, s3_location):
@@ -19,9 +20,9 @@ def s3_fs(s3_endpoint_url):
     except ImportError as e:
         raise DataContractException(
             type="schema",
-            result="failed",
+            result=ResultEnum.failed,
             name="s3 extra missing",
-            reason="Install the extra datacontract-cli\[s3] to use s3",
+            reason="Install the extra s3 to use s3",
             engine="datacontract",
             original_exception=e,
         )

datacontract/engines/soda/check_soda_execute.py CHANGED Viewed

@@ -1,9 +1,15 @@
 import logging
+import typing
 import uuid
+if typing.TYPE_CHECKING:
+    from pyspark.sql import SparkSession
+from duckdb.duckdb import DuckDBPyConnection
 from datacontract.engines.soda.connections.bigquery import to_bigquery_soda_configuration
 from datacontract.engines.soda.connections.databricks import to_databricks_soda_configuration
-from datacontract.engines.soda.connections.duckdb import get_duckdb_connection
+from datacontract.engines.soda.connections.duckdb_connection import get_duckdb_connection
 from datacontract.engines.soda.connections.kafka import create_spark_session, read_kafka_topic
 from datacontract.engines.soda.connections.postgres import to_postgres_soda_configuration
 from datacontract.engines.soda.connections.snowflake import to_snowflake_soda_configuration
@@ -14,7 +20,13 @@ from datacontract.model.data_contract_specification import DataContractSpecifica
 from datacontract.model.run import Check, Log, ResultEnum, Run
-def check_soda_execute(run: Run, data_contract: DataContractSpecification, server: Server, spark):
+def check_soda_execute(
+    run: Run,
+    data_contract: DataContractSpecification,
+    server: Server,
+    spark: "SparkSession" = None,
+    duckdb_connection: DuckDBPyConnection = None,
+):
     from soda.common.config_helper import ConfigHelper
     ConfigHelper.get_instance().upsert_value("send_anonymous_usage_stats", False)
@@ -30,7 +42,7 @@ def check_soda_execute(run: Run, data_contract: DataContractSpecification, serve
     if server.type in ["s3", "gcs", "azure", "local"]:
         if server.format in ["json", "parquet", "csv", "delta"]:
             run.log_info(f"Configuring engine soda-core to connect to {server.type} {server.format} with duckdb")
-            con = get_duckdb_connection(data_contract, server, run)
+            con = get_duckdb_connection(data_contract, server, run, duckdb_connection)
             scan.add_duckdb_connection(duckdb_connection=con, data_source_name=server.type)
             scan.set_data_source_name(server.type)
         else:
@@ -62,7 +74,8 @@ def check_soda_execute(run: Run, data_contract: DataContractSpecification, serve
             run.log_info("Connecting to databricks via spark")
             scan.add_spark_session(spark, data_source_name=server.type)
             scan.set_data_source_name(server.type)
-            spark.sql(f"USE {server.catalog}.{server.schema_}")
+            database_name = ".".join(filter(None, [server.catalog, server.schema_]))
+            spark.sql(f"USE {database_name}")
         else:
             run.log_info("Connecting to databricks directly")
             soda_configuration_str = to_databricks_soda_configuration(server)

datacontract/engines/soda/connections/{duckdb.py → duckdb_connection.py} RENAMED Viewed

@@ -1,13 +1,24 @@
 import os
+from typing import Any, Dict
 import duckdb
-from datacontract.export.csv_type_converter import convert_to_duckdb_csv_type
+from datacontract.export.duckdb_type_converter import convert_to_duckdb_csv_type, convert_to_duckdb_json_type
+from datacontract.model.data_contract_specification import DataContractSpecification, Field, Model, Server
 from datacontract.model.run import Run
-def get_duckdb_connection(data_contract, server, run: Run):
-    con = duckdb.connect(database=":memory:")
+def get_duckdb_connection(
+    data_contract: DataContractSpecification,
+    server: Server,
+    run: Run,
+    duckdb_connection: duckdb.DuckDBPyConnection | None = None,
+) -> duckdb.DuckDBPyConnection:
+    if duckdb_connection is None:
+        con = duckdb.connect(database=":memory:")
+    else:
+        con = duckdb_connection
     path: str = ""
     if server.type == "local":
         path = server.path
@@ -27,14 +38,21 @@ def get_duckdb_connection(data_contract, server, run: Run):
         run.log_info(f"Creating table {model_name} for {model_path}")
         if server.format == "json":
-            format = "auto"
+            json_format = "auto"
             if server.delimiter == "new_line":
-                format = "newline_delimited"
+                json_format = "newline_delimited"
             elif server.delimiter == "array":
-                format = "array"
-            con.sql(f"""
-                        CREATE VIEW "{model_name}" AS SELECT * FROM read_json_auto('{model_path}', format='{format}', hive_partitioning=1);
+                json_format = "array"
+            columns = to_json_types(model)
+            if columns is None:
+                con.sql(f"""
+                        CREATE VIEW "{model_name}" AS SELECT * FROM read_json_auto('{model_path}', format='{json_format}', hive_partitioning=1);
                         """)
+            else:
+                con.sql(
+                    f"""CREATE VIEW "{model_name}" AS SELECT * FROM read_json_auto('{model_path}', format='{json_format}', columns={columns}, hive_partitioning=1);"""
+                )
+                add_nested_views(con, model_name, model.fields)
         elif server.format == "parquet":
             con.sql(f"""
                         CREATE VIEW "{model_name}" AS SELECT * FROM read_parquet('{model_path}', hive_partitioning=1);
@@ -56,7 +74,7 @@ def get_duckdb_connection(data_contract, server, run: Run):
     return con
-def to_csv_types(model) -> dict:
+def to_csv_types(model) -> dict[Any, str | None] | None:
     if model is None:
         return None
     columns = {}
@@ -66,6 +84,45 @@ def to_csv_types(model) -> dict:
     return columns
+def to_json_types(model: Model) -> dict[Any, str | None] | None:
+    if model is None:
+        return None
+    columns = {}
+    for field_name, field in model.fields.items():
+        columns[field_name] = convert_to_duckdb_json_type(field)
+    return columns
+def add_nested_views(con: duckdb.DuckDBPyConnection, model_name: str, fields: Dict[str, Field] | None):
+    model_name = model_name.strip('"')
+    if fields is None:
+        return
+    for field_name, field in fields.items():
+        if field.type is None or field.type.lower() not in ["array", "object"]:
+            continue
+        field_type = field.type.lower()
+        if field_type == "array" and field.items is None:
+            continue
+        elif field_type == "object" and field.fields is None:
+            continue
+        nested_model_name = f"{model_name}__{field_name}"
+        max_depth = 2 if field_type == "array" else 1
+        ## if parent field is not required, the nested objects may respolve
+        ## to a row of NULLs -- but if the objects themselves have required
+        ## fields, this will fail the check.
+        where = "" if field.required else f" WHERE {field_name} IS NOT NULL"
+        con.sql(f"""
+            CREATE VIEW IF NOT EXISTS "{nested_model_name}" AS
+            SELECT unnest({field_name}, max_depth := {max_depth}) as {field_name} FROM "{model_name}" {where}
+            """)
+        if field_type == "array":
+            add_nested_views(con, nested_model_name, field.items.fields)
+        elif field_type == "object":
+            add_nested_views(con, nested_model_name, field.fields)
 def setup_s3_connection(con, server):
     s3_region = os.getenv("DATACONTRACT_S3_REGION")
     s3_access_key_id = os.getenv("DATACONTRACT_S3_ACCESS_KEY_ID")

datacontract/engines/soda/connections/kafka.py CHANGED Viewed

@@ -6,6 +6,7 @@ import tempfile
 from datacontract.export.avro_converter import to_avro_schema_json
 from datacontract.model.data_contract_specification import DataContractSpecification, Field, Server
 from datacontract.model.exceptions import DataContractException
+from datacontract.model.run import ResultEnum
 def create_spark_session():
@@ -16,7 +17,7 @@ def create_spark_session():
     except ImportError as e:
         raise DataContractException(
             type="schema",
-            result="failed",
+            result=ResultEnum.failed,
             name="pyspark is missing",
             reason="Install the extra datacontract-cli[kafka] to use kafka",
             engine="datacontract",
@@ -33,7 +34,7 @@ def create_spark_session():
         .config("spark.ui.enabled", "false")
         .config(
             "spark.jars.packages",
-            "org.apache.spark:spark-sql-kafka-0-10_2.12:3.5.2,org.apache.spark:spark-avro_2.12:3.5.2",
+            "org.apache.spark:spark-sql-kafka-0-10_2.12:3.5.5,org.apache.spark:spark-avro_2.12:3.5.5",
         )
         .getOrCreate()
     )

datacontract/export/avro_converter.py CHANGED Viewed

@@ -91,7 +91,9 @@ def to_avro_type(field: Field, field_name: str) -> str | dict:
         if field.precision is not None:
             typeVal["precision"] = field.precision
         return typeVal
-    elif field.type in ["float", "double"]:
+    elif field.type in ["float"]:
+        return "float"
+    elif field.type in ["double"]:
         return "double"
     elif field.type in ["integer", "int"]:
         return "int"
@@ -107,9 +109,14 @@ def to_avro_type(field: Field, field_name: str) -> str | dict:
         return {"type": "int", "logicalType": "date"}
     elif field.type in ["time"]:
         return "long"
+    elif field.type in ["map"]:
+        if field.config is not None and "values" in field.config:
+            return {"type": "map", "values": field.config["values"]}
+        else:
+            return "bytes"
     elif field.type in ["object", "record", "struct"]:
-        if field.config is not None and 'namespace' in field.config:
-            return to_avro_record(field_name ,field.fields ,field.description ,field.config['namespace'])
+        if field.config is not None and "namespace" in field.config:
+            return to_avro_record(field_name, field.fields, field.description, field.config["namespace"])
         return to_avro_record(field_name, field.fields, field.description, None)
     elif field.type in ["binary"]:
         return "bytes"

datacontract/export/bigquery_converter.py CHANGED Viewed

@@ -103,7 +103,7 @@ def map_type_to_bigquery(field: Field) -> str:
     elif field_type.lower() == "date":
         return "DATE"
     elif field_type.lower() == "timestamp_ntz":
-        return "TIME"
+        return "DATETIME"
     elif field_type.lower() in ["number", "decimal", "numeric"]:
         return "NUMERIC"
     elif field_type.lower() == "double":

datacontract/export/dbt_converter.py CHANGED Viewed

@@ -9,7 +9,7 @@ from datacontract.model.data_contract_specification import DataContractSpecifica
 class DbtExporter(Exporter):
     def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
-        return to_dbt_models_yaml(data_contract)
+        return to_dbt_models_yaml(data_contract, server)
 class DbtSourceExporter(Exporter):
@@ -27,15 +27,16 @@ class DbtStageExporter(Exporter):
         )
-def to_dbt_models_yaml(data_contract_spec: DataContractSpecification):
+def to_dbt_models_yaml(data_contract_spec: DataContractSpecification, server: str = None):
     dbt = {
         "version": 2,
         "models": [],
     }
     for model_key, model_value in data_contract_spec.models.items():
-        dbt_model = _to_dbt_model(model_key, model_value, data_contract_spec)
+        dbt_model = _to_dbt_model(model_key, model_value, data_contract_spec, adapter_type=server)
         dbt["models"].append(dbt_model)
-    return yaml.dump(dbt, indent=2, sort_keys=False, allow_unicode=True)
+    return yaml.safe_dump(dbt, indent=2, sort_keys=False, allow_unicode=True)
 def to_dbt_staging_sql(data_contract_spec: DataContractSpecification, model_name: str, model_value: Model) -> str:
@@ -60,7 +61,7 @@ def to_dbt_sources_yaml(data_contract_spec: DataContractSpecification, server: s
     if data_contract_spec.info.owner is not None:
         source["meta"] = {"owner": data_contract_spec.info.owner}
     if data_contract_spec.info.description is not None:
-        source["description"] = data_contract_spec.info.description
+        source["description"] = data_contract_spec.info.description.strip().replace("\n", " ")
     found_server = data_contract_spec.servers.get(server)
     adapter_type = None
     if found_server is not None:
@@ -87,14 +88,16 @@ def _to_dbt_source_table(
     }
     if model_value.description is not None:
-        dbt_model["description"] = model_value.description
+        dbt_model["description"] = model_value.description.strip().replace("\n", " ")
     columns = _to_columns(data_contract_spec, model_value.fields, False, adapter_type)
     if columns:
         dbt_model["columns"] = columns
     return dbt_model
-def _to_dbt_model(model_key, model_value: Model, data_contract_spec: DataContractSpecification) -> dict:
+def _to_dbt_model(
+    model_key, model_value: Model, data_contract_spec: DataContractSpecification, adapter_type: Optional[str]
+) -> dict:
     dbt_model = {
         "name": model_key,
     }
@@ -108,8 +111,8 @@ def _to_dbt_model(model_key, model_value: Model, data_contract_spec: DataContrac
     if _supports_constraints(model_type):
         dbt_model["config"]["contract"] = {"enforced": True}
     if model_value.description is not None:
-        dbt_model["description"] = model_value.description
-    columns = _to_columns(data_contract_spec, model_value.fields, _supports_constraints(model_type), None)
+        dbt_model["description"] = model_value.description.strip().replace("\n", " ")
+    columns = _to_columns(data_contract_spec, model_value.fields, _supports_constraints(model_type), adapter_type)
     if columns:
         dbt_model["columns"] = columns
     return dbt_model
@@ -171,7 +174,7 @@ def _to_column(
             {"dbt_expectations.dbt_expectations.expect_column_values_to_be_of_type": {"column_type": dbt_type}}
         )
     if field.description is not None:
-        column["description"] = field.description
+        column["description"] = field.description.strip().replace("\n", " ")
     if field.required:
         if supports_constraints:
             column.setdefault("constraints", []).append({"type": "not_null"})

datacontract-cli 0.10.22__py3-none-any.whl → 0.10.24__py3-none-any.whl

Potentially problematic release.

datacontract-cli 0.10.22py3-none-any.whl → 0.10.24py3-none-any.whl