PyPI - datacontract-cli - Versions diffs - 0.10.6__py3-none-any.whl → 0.10.8__py3-none-any.whl - Mend

datacontract-cli 0.10.6py3-none-any.whl → 0.10.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of datacontract-cli might be problematic. Click here for more details.

Files changed (40) hide show

datacontract/cli.py +26 -24
datacontract/data_contract.py +69 -152
datacontract/engines/fastjsonschema/s3/s3_read_files.py +13 -1
datacontract/engines/soda/check_soda_execute.py +11 -0
datacontract/engines/soda/connections/bigquery.py +8 -1
datacontract/engines/soda/connections/kafka.py +3 -0
datacontract/export/__init__.py +0 -0
datacontract/export/avro_converter.py +28 -21
datacontract/export/avro_idl_converter.py +29 -22
datacontract/export/bigquery_converter.py +15 -0
datacontract/export/dbml_converter.py +9 -0
datacontract/export/dbt_converter.py +26 -1
datacontract/export/exporter.py +87 -0
datacontract/export/exporter_factory.py +52 -0
datacontract/export/go_converter.py +6 -0
datacontract/export/great_expectations_converter.py +10 -0
datacontract/export/html_export.py +6 -0
datacontract/export/jsonschema_converter.py +24 -16
datacontract/export/odcs_converter.py +24 -1
datacontract/export/protobuf_converter.py +6 -0
datacontract/export/pydantic_converter.py +6 -0
datacontract/export/rdf_converter.py +9 -0
datacontract/export/sodacl_converter.py +7 -1
datacontract/export/sql_converter.py +32 -2
datacontract/export/sql_type_converter.py +4 -5
datacontract/export/terraform_converter.py +6 -0
datacontract/imports/bigquery_importer.py +30 -4
datacontract/imports/glue_importer.py +13 -3
datacontract/imports/odcs_importer.py +192 -0
datacontract/imports/unity_importer.py +138 -0
datacontract/model/data_contract_specification.py +2 -0
datacontract/templates/partials/server.html +64 -32
datacontract/templates/style/output.css +9 -0
datacontract/web.py +56 -2
{datacontract_cli-0.10.6.dist-info → datacontract_cli-0.10.8.dist-info}/METADATA +232 -96
{datacontract_cli-0.10.6.dist-info → datacontract_cli-0.10.8.dist-info}/RECORD +40 -35
{datacontract_cli-0.10.6.dist-info → datacontract_cli-0.10.8.dist-info}/LICENSE +0 -0
{datacontract_cli-0.10.6.dist-info → datacontract_cli-0.10.8.dist-info}/WHEEL +0 -0
{datacontract_cli-0.10.6.dist-info → datacontract_cli-0.10.8.dist-info}/entry_points.txt +0 -0
{datacontract_cli-0.10.6.dist-info → datacontract_cli-0.10.8.dist-info}/top_level.txt +0 -0

datacontract/cli.py CHANGED Viewed

@@ -5,6 +5,7 @@ from typing import Iterable, Optional
 from typing import List
 import typer
+import uvicorn
 from click import Context
 from rich import box
 from rich.console import Console
@@ -12,8 +13,9 @@ from rich.table import Table
 from typer.core import TyperGroup
 from typing_extensions import Annotated
+from datacontract import web
 from datacontract.catalog.catalog import create_index_html, create_data_contract_html
-from datacontract.data_contract import DataContract
+from datacontract.data_contract import DataContract, ExportFormat
 from datacontract.init.download_datacontract_file import download_datacontract_file, FileExistsException
 from datacontract.publish.publish import publish_to_datamesh_manager
@@ -141,28 +143,6 @@ def test(
     _handle_result(run)
-class ExportFormat(str, Enum):
-    jsonschema = "jsonschema"
-    pydantic_model = "pydantic-model"
-    sodacl = "sodacl"
-    dbt = "dbt"
-    dbt_sources = "dbt-sources"
-    dbt_staging_sql = "dbt-staging-sql"
-    odcs = "odcs"
-    rdf = "rdf"
-    avro = "avro"
-    protobuf = "protobuf"
-    great_expectations = "great-expectations"
-    terraform = "terraform"
-    avro_idl = "avro-idl"
-    sql = "sql"
-    sql_query = "sql-query"
-    html = "html"
-    go = "go"
-    bigquery = "bigquery"
-    dbml = "dbml"
 @app.command()
 def export(
     format: Annotated[ExportFormat, typer.Option(help="The export format.")],
@@ -205,6 +185,7 @@ def export(
     result = DataContract(data_contract_file=location, server=server).export(
         export_format=format,
         model=model,
+        server=server,
         rdf_base=rdf_base,
         sql_server_type=sql_server_type,
     )
@@ -223,6 +204,8 @@ class ImportFormat(str, Enum):
     glue = "glue"
     bigquery = "bigquery"
     jsonschema = "jsonschema"
+    odcs="odcs"
+    unity = "unity"
 @app.command(name="import")
@@ -231,6 +214,12 @@ def import_(
     source: Annotated[
         Optional[str], typer.Option(help="The path to the file or Glue Database that should be imported.")
     ] = None,
+    glue_table: Annotated[
+        Optional[List[str]],
+        typer.Option(
+            help="List of table ids to import from the Glue Database (repeat for multiple table ids, leave empty for all tables in the dataset)."
+        ),
+    ] = None,
     bigquery_project: Annotated[Optional[str], typer.Option(help="The bigquery project id.")] = None,
     bigquery_dataset: Annotated[Optional[str], typer.Option(help="The bigquery dataset id.")] = None,
     bigquery_table: Annotated[
@@ -239,11 +228,12 @@ def import_(
             help="List of table ids to import from the bigquery API (repeat for multiple table ids, leave empty for all tables in the dataset)."
         ),
     ] = None,
+    unity_table_full_name: Annotated[Optional[str], typer.Option(help="Full name of a table in the unity catalog")] = None,
 ):
     """
     Create a data contract from the given source location. Prints to stdout.
     """
-    result = DataContract().import_from_source(format, source, bigquery_table, bigquery_project, bigquery_dataset)
+    result = DataContract().import_from_source(format, source, glue_table, bigquery_table, bigquery_project, bigquery_dataset, unity_table_full_name)
     console.print(result.to_yaml())
@@ -339,6 +329,18 @@ def diff(
     console.print(result.changelog_str())
+@app.command()
+def serve(
+    port: Annotated[int, typer.Option(help="Bind socket to this port.")] = 4242,
+    host: Annotated[str, typer.Option(help="Bind socket to this host.")] = "127.0.0.1",
+):
+    """
+    Start the datacontract web server.
+    """
+    uvicorn.run(web.app, port=port, host=host)
 def _handle_result(run):
     _print_table(run)
     if run.result == "passed":

datacontract/data_contract.py CHANGED Viewed

@@ -12,27 +12,15 @@ from datacontract.engines.datacontract.check_that_datacontract_contains_valid_se
 )
 from datacontract.engines.fastjsonschema.check_jsonschema import check_jsonschema
 from datacontract.engines.soda.check_soda_execute import check_soda_execute
-from datacontract.export.avro_converter import to_avro_schema_json
-from datacontract.export.avro_idl_converter import to_avro_idl
-from datacontract.export.bigquery_converter import to_bigquery_json
-from datacontract.export.dbml_converter import to_dbml_diagram
-from datacontract.export.dbt_converter import to_dbt_models_yaml, to_dbt_sources_yaml, to_dbt_staging_sql
-from datacontract.export.go_converter import to_go_types
-from datacontract.export.great_expectations_converter import to_great_expectations
-from datacontract.export.html_export import to_html
-from datacontract.export.jsonschema_converter import to_jsonschema_json
-from datacontract.export.odcs_converter import to_odcs_yaml
-from datacontract.export.protobuf_converter import to_protobuf
-from datacontract.export.pydantic_converter import to_pydantic_model_str
-from datacontract.export.rdf_converter import to_rdf_n3
-from datacontract.export.sodacl_converter import to_sodacl_yaml
-from datacontract.export.sql_converter import to_sql_ddl, to_sql_query
-from datacontract.export.terraform_converter import to_terraform
+from datacontract.export.exporter import ExportFormat
+from datacontract.export.exporter_factory import exporter_factory
 from datacontract.imports.avro_importer import import_avro
 from datacontract.imports.bigquery_importer import import_bigquery_from_api, import_bigquery_from_json
 from datacontract.imports.glue_importer import import_glue
 from datacontract.imports.jsonschema_importer import import_jsonschema
+from datacontract.imports.odcs_importer import import_odcs
 from datacontract.imports.sql_importer import import_sql
+from datacontract.imports.unity_importer import import_unity_from_json, import_unity_from_api
 from datacontract.integration.publish_datamesh_manager import publish_datamesh_manager
 from datacontract.integration.publish_opentelemetry import publish_opentelemetry
 from datacontract.lint import resolve
@@ -184,6 +172,9 @@ class DataContract:
                 if self._examples:
                     server_name = "examples"
                     server = self._get_examples_server(data_contract, run, tmp_dir)
+                elif self._server:
+                    server_name = self._server
+                    server = data_contract.servers.get(server_name)
                 else:
                     server_name = list(data_contract.servers.keys())[0]
                     server = data_contract.servers.get(server_name)
@@ -195,10 +186,13 @@ class DataContract:
                 run.outputPortId = server.outputPortId
                 run.server = server_name
-                # 5. check server is supported type
-                # 6. check server credentials are complete
+                # TODO check server is supported type for nicer error messages
+                # TODO check server credentials are complete for nicer error messages
                 if server.format == "json" and server.type != "kafka":
                     check_jsonschema(run, data_contract, server)
                 check_soda_execute(run, data_contract, server, self._spark, tmp_dir)
         except DataContractException as e:
@@ -234,6 +228,38 @@ class DataContract:
         return run
+    def _get_examples_server(self, data_contract, run, tmp_dir):
+        run.log_info(f"Copying examples to files in temporary directory {tmp_dir}")
+        format = "json"
+        for example in data_contract.examples:
+            format = example.type
+            p = f"{tmp_dir}/{example.model}.{format}"
+            run.log_info(f"Creating example file {p}")
+            with open(p, "w") as f:
+                content = ""
+                if format == "json" and isinstance(example.data, list):
+                    content = json.dumps(example.data)
+                elif format == "json" and isinstance(example.data, str):
+                    content = example.data
+                elif format == "yaml" and isinstance(example.data, list):
+                    content = yaml.dump(example.data, allow_unicode=True)
+                elif format == "yaml" and isinstance(example.data, str):
+                    content = example.data
+                elif format == "csv":
+                    content = example.data
+                logging.debug(f"Content of example file {p}: {content}")
+                f.write(content)
+        path = f"{tmp_dir}" + "/{model}." + format
+        delimiter = "array"
+        server = Server(
+            type="local",
+            path=path,
+            format=format,
+            delimiter=delimiter,
+        )
+        run.log_info(f"Using {server} for testing the examples")
+        return server
     def breaking(self, other: "DataContract") -> BreakingChanges:
         return self.changelog(other, include_severities=[Severity.ERROR, Severity.WARNING])
@@ -275,7 +301,13 @@ class DataContract:
             inline_quality=self._inline_quality,
         )
-    def export(self, export_format, model: str = "all", rdf_base: str = None, sql_server_type: str = "auto") -> str:
+    def export(
+        self,
+        export_format: ExportFormat,
+        model: str = "all",
+        sql_server_type: str = "auto",
+        **kwargs,
+    ) -> str:
         data_contract = resolve.resolve_data_contract(
             self._data_contract_file,
             self._data_contract_str,
@@ -283,148 +315,24 @@ class DataContract:
             inline_definitions=True,
             inline_quality=True,
         )
-        if export_format == "jsonschema":
-            model_name, model_value = self._check_models_for_export(data_contract, model, export_format)
-            return to_jsonschema_json(model_name, model_value)
-        if export_format == "sodacl":
-            return to_sodacl_yaml(data_contract)
-        if export_format == "dbt":
-            return to_dbt_models_yaml(data_contract)
-        if export_format == "dbt-sources":
-            return to_dbt_sources_yaml(data_contract, self._server)
-        if export_format == "dbt-staging-sql":
-            model_name, model_value = self._check_models_for_export(data_contract, model, export_format)
-            return to_dbt_staging_sql(data_contract, model_name, model_value)
-        if export_format == "odcs":
-            return to_odcs_yaml(data_contract)
-        if export_format == "rdf":
-            return to_rdf_n3(data_contract, rdf_base)
-        if export_format == "protobuf":
-            return to_protobuf(data_contract)
-        if export_format == "avro":
-            model_name, model_value = self._check_models_for_export(data_contract, model, export_format)
-            return to_avro_schema_json(model_name, model_value)
-        if export_format == "avro-idl":
-            return to_avro_idl(data_contract)
-        if export_format == "terraform":
-            return to_terraform(data_contract)
-        if export_format == "sql":
-            server_type = self._determine_sql_server_type(data_contract, sql_server_type)
-            return to_sql_ddl(data_contract, server_type=server_type)
-        if export_format == "sql-query":
-            model_name, model_value = self._check_models_for_export(data_contract, model, export_format)
-            server_type = self._determine_sql_server_type(data_contract, sql_server_type)
-            return to_sql_query(data_contract, model_name, model_value, server_type)
-        if export_format == "great-expectations":
-            model_name, model_value = self._check_models_for_export(data_contract, model, export_format)
-            return to_great_expectations(data_contract, model_name)
-        if export_format == "pydantic-model":
-            return to_pydantic_model_str(data_contract)
-        if export_format == "html":
-            return to_html(data_contract)
-        if export_format == "go":
-            return to_go_types(data_contract)
-        if export_format == "bigquery":
-            model_name, model_value = self._check_models_for_export(data_contract, model, export_format)
-            found_server = data_contract.servers.get(self._server)
-            if found_server is None:
-                raise RuntimeError(
-                    f"Export to {export_format} requires selecting a bigquery server from the data contract."
-                )
-            if found_server.type != "bigquery":
-                raise RuntimeError(
-                    f"Export to {export_format} requires selecting a bigquery server from the data contract."
-                )
-            return to_bigquery_json(model_name, model_value, found_server)
-        if export_format == "dbml":
-            found_server = data_contract.servers.get(self._server)
-            return to_dbml_diagram(data_contract, found_server)
-        else:
-            print(f"Export format {export_format} not supported.")
-            return ""
-    def _determine_sql_server_type(self, data_contract: DataContractSpecification, sql_server_type: str):
-        if sql_server_type == "auto":
-            if data_contract.servers is None or len(data_contract.servers) == 0:
-                raise RuntimeError("Export with server_type='auto' requires servers in the data contract.")
-            server_types = set([server.type for server in data_contract.servers.values()])
-            if "snowflake" in server_types:
-                return "snowflake"
-            elif "postgres" in server_types:
-                return "postgres"
-            elif "databricks" in server_types:
-                return "databricks"
-            else:
-                # default to snowflake dialect
-                return "snowflake"
-        else:
-            return sql_server_type
-    def _get_examples_server(self, data_contract, run, tmp_dir):
-        run.log_info(f"Copying examples to files in temporary directory {tmp_dir}")
-        format = "json"
-        for example in data_contract.examples:
-            format = example.type
-            p = f"{tmp_dir}/{example.model}.{format}"
-            run.log_info(f"Creating example file {p}")
-            with open(p, "w") as f:
-                content = ""
-                if format == "json" and isinstance(example.data, list):
-                    content = json.dumps(example.data)
-                elif format == "json" and isinstance(example.data, str):
-                    content = example.data
-                elif format == "yaml" and isinstance(example.data, list):
-                    content = yaml.dump(example.data, allow_unicode=True)
-                elif format == "yaml" and isinstance(example.data, str):
-                    content = example.data
-                elif format == "csv":
-                    content = example.data
-                logging.debug(f"Content of example file {p}: {content}")
-                f.write(content)
-        path = f"{tmp_dir}" + "/{model}." + format
-        delimiter = "array"
-        server = Server(
-            type="local",
-            path=path,
-            format=format,
-            delimiter=delimiter,
+        return exporter_factory.create(export_format).export(
+            data_contract=data_contract,
+            model=model,
+            server=self._server,
+            sql_server_type=sql_server_type,
+            export_args=kwargs,
         )
-        run.log_info(f"Using {server} for testing the examples")
-        return server
-    def _check_models_for_export(
-        self, data_contract: DataContractSpecification, model: str, export_format: str
-    ) -> typing.Tuple[str, str]:
-        if data_contract.models is None:
-            raise RuntimeError(f"Export to {export_format} requires models in the data contract.")
-        model_names = list(data_contract.models.keys())
-        if model == "all":
-            if len(data_contract.models.items()) != 1:
-                raise RuntimeError(
-                    f"Export to {export_format} is model specific. Specify the model via --model $MODEL_NAME. Available models: {model_names}"
-                )
-            model_name, model_value = next(iter(data_contract.models.items()))
-        else:
-            model_name = model
-            model_value = data_contract.models.get(model_name)
-            if model_value is None:
-                raise RuntimeError(
-                    f"Model {model_name} not found in the data contract. Available models: {model_names}"
-                )
-        return model_name, model_value
     def import_from_source(
         self,
         format: str,
         source: typing.Optional[str] = None,
+        glue_tables: typing.Optional[typing.List[str]] = None,
         bigquery_tables: typing.Optional[typing.List[str]] = None,
         bigquery_project: typing.Optional[str] = None,
         bigquery_dataset: typing.Optional[str] = None,
+        unity_table_full_name: typing.Optional[str] = None
     ) -> DataContractSpecification:
         data_contract_specification = DataContract.init()
@@ -433,7 +341,7 @@ class DataContract:
         elif format == "avro":
             data_contract_specification = import_avro(data_contract_specification, source)
         elif format == "glue":
-            data_contract_specification = import_glue(data_contract_specification, source)
+            data_contract_specification = import_glue(data_contract_specification, source, glue_tables)
         elif format == "jsonschema":
             data_contract_specification = import_jsonschema(data_contract_specification, source)
         elif format == "bigquery":
@@ -443,6 +351,15 @@ class DataContract:
                 data_contract_specification = import_bigquery_from_api(
                     data_contract_specification, bigquery_tables, bigquery_project, bigquery_dataset
                 )
+        elif format == "odcs":
+            data_contract_specification = import_odcs(data_contract_specification, source)
+        elif format == "unity":
+            if source is not None:
+                data_contract_specification = import_unity_from_json(data_contract_specification, source)
+            else:
+                data_contract_specification = import_unity_from_api(
+                    data_contract_specification, unity_table_full_name
+                )
         else:
             print(f"Import format {format} not supported.")

datacontract/engines/fastjsonschema/s3/s3_read_files.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import logging
 import os
-import s3fs
+from datacontract.model.exceptions import DataContractException
 def yield_s3_files(s3_endpoint_url, s3_location):
@@ -14,6 +14,18 @@ def yield_s3_files(s3_endpoint_url, s3_location):
 def s3_fs(s3_endpoint_url):
+    try:
+        import s3fs
+    except ImportError as e:
+        raise DataContractException(
+            type="schema",
+            result="failed",
+            name="s3 extra missing",
+            reason="Install the extra datacontract-cli\[s3] to use s3",
+            engine="datacontract",
+            original_exception=e,
+        )
     aws_access_key_id = os.getenv("DATACONTRACT_S3_ACCESS_KEY_ID")
     aws_secret_access_key = os.getenv("DATACONTRACT_S3_SECRET_ACCESS_KEY")
     return s3fs.S3FileSystem(

datacontract/engines/soda/check_soda_execute.py CHANGED Viewed

@@ -64,6 +64,17 @@ def check_soda_execute(
             soda_configuration_str = to_databricks_soda_configuration(server)
             scan.add_configuration_yaml_str(soda_configuration_str)
             scan.set_data_source_name(server.type)
+    elif server.type == "dataframe":
+        if spark is None:
+            run.log_warn(
+                "Server type dataframe only works with the Python library and requires a Spark session, "
+                "please provide one with the DataContract class"
+            )
+            return
+        else:
+            logging.info("Use Spark to connect to data source")
+            scan.add_spark_session(spark, data_source_name="datacontract-cli")
+            scan.set_data_source_name("datacontract-cli")
     elif server.type == "kafka":
         if spark is None:
             spark = create_spark_session(tmp_dir)

datacontract/engines/soda/connections/bigquery.py CHANGED Viewed

@@ -6,10 +6,17 @@ import yaml
 # https://docs.soda.io/soda/connect-bigquery.html#authentication-methods
 def to_bigquery_soda_configuration(server):
     # with service account key, using an external json file
+    # check for our own environment variable first
+    account_info = os.getenv("DATACONTRACT_BIGQUERY_ACCOUNT_INFO_JSON_PATH")
+    if account_info is None:
+        # but as a fallback look for the default google one
+        account_info = os.getenv("GOOGLE_APPLICATION_CREDENTIALS")
     soda_configuration = {
         f"data_source {server.type}": {
             "type": "bigquery",
-            "account_info_json_path": os.getenv("DATACONTRACT_BIGQUERY_ACCOUNT_INFO_JSON_PATH"),
+            "account_info_json_path": account_info,
             "auth_scopes": ["https://www.googleapis.com/auth/bigquery"],
             "project_id": server.project,
             "dataset": server.dataset,

datacontract/engines/soda/connections/kafka.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import logging
 import os
 from pyspark.sql import SparkSession
 from pyspark.sql.functions import col, expr, from_json
@@ -44,6 +45,8 @@ def create_spark_session(tmp_dir: str) -> SparkSession:
 def read_kafka_topic(spark: SparkSession, data_contract: DataContractSpecification, server: Server, tmp_dir):
     """Read and process data from a Kafka topic based on the server configuration."""
+    logging.info("Reading data from Kafka server %s topic %s", server.host, server.topic)
     df = (
         spark.read.format("kafka")
         .options(**get_auth_options())

datacontract/export/__init__.py ADDED Viewed

File without changes

datacontract/export/avro_converter.py CHANGED Viewed

@@ -1,8 +1,15 @@
 import json
+from datacontract.export.exporter import Exporter, _check_models_for_export
 from datacontract.model.data_contract_specification import Field
+class AvroExporter(Exporter):
+    def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
+        model_name, model_value = _check_models_for_export(data_contract, model, self.export_format)
+        return to_avro_schema_json(model_name, model_value)
 def to_avro_schema(model_name, model) -> dict:
     return to_avro_record(model_name, model.fields, model.description, model.namespace)
@@ -34,13 +41,8 @@ def to_avro_field(field, field_name):
     if field.description is not None:
         avro_field["doc"] = field.description
     avro_field["type"] = to_avro_type(field, field_name)
-    # add logical type definitions for any of the date type fields
-    if field.type in ["timestamp", "timestamp_tz", "timestamp_ntz", "date"]:
-        avro_field["logicalType"] = to_avro_logical_type(field.type)
     if field.config:
-        if "avroLogicalType" in field.config:
-            avro_field["logicalType"] = field.config["avroLogicalType"]
         if "avroDefault" in field.config:
             avro_field["default"] = field.config["avroDefault"]
@@ -48,6 +50,23 @@ def to_avro_field(field, field_name):
 def to_avro_type(field: Field, field_name: str) -> str | dict:
+    if field.config:
+        if "avroLogicalType" in field.config and "avroType" in field.config:
+            return {"type": field.config["avroType"], "logicalType": field.config["avroLogicalType"]}
+        if "avroLogicalType" in field.config:
+            if field.config["avroLogicalType"] in [
+                "timestamp-millis",
+                "timestamp-micros",
+                "local-timestamp-millis",
+                "local-timestamp-micros",
+                "time-micros",
+            ]:
+                return {"type": "long", "logicalType": field.config["avroLogicalType"]}
+            if field.config["avroLogicalType"] in ["time-millis", "date"]:
+                return {"type": "int", "logicalType": field.config["avroLogicalType"]}
+        if "avroType" in field.config:
+            return field.config["avroLogicalType"]
     if field.type is None:
         return "null"
     if field.type in ["string", "varchar", "text"]:
@@ -64,11 +83,11 @@ def to_avro_type(field: Field, field_name: str) -> str | dict:
     elif field.type in ["boolean"]:
         return "boolean"
     elif field.type in ["timestamp", "timestamp_tz"]:
-        return "long"
+        return {"type": "long", "logicalType": "timestamp-millis"}
     elif field.type in ["timestamp_ntz"]:
-        return "long"
+        return {"type": "long", "logicalType": "local-timestamp-millis"}
     elif field.type in ["date"]:
-        return "int"
+        return {"type": "int", "logicalType": "date"}
     elif field.type in ["time"]:
         return "long"
     elif field.type in ["object", "record", "struct"]:
@@ -76,20 +95,8 @@ def to_avro_type(field: Field, field_name: str) -> str | dict:
     elif field.type in ["binary"]:
         return "bytes"
     elif field.type in ["array"]:
-        # TODO support array structs
-        return "array"
+        return {"type": "array", "items": to_avro_type(field.items, field_name)}
     elif field.type in ["null"]:
         return "null"
     else:
         return "bytes"
-def to_avro_logical_type(type: str) -> str:
-    if type in ["timestamp", "timestamp_tz"]:
-        return "timestamp-millis"
-    elif type in ["timestamp_ntz"]:
-        return "local-timestamp-millis"
-    elif type in ["date"]:
-        return "date"
-    else:
-        return ""

datacontract/export/avro_idl_converter.py CHANGED Viewed

@@ -7,28 +7,7 @@ from datacontract.lint.resolve import inline_definitions_into_data_contract
 from datacontract.model.data_contract_specification import DataContractSpecification, Field
 from datacontract.model.exceptions import DataContractException
-def to_avro_idl(contract: DataContractSpecification) -> str:
-    """Serialize the provided data contract specification into an Avro IDL string.
-    The data contract will be serialized as a protocol, with one record type
-    for each contained model. Model fields are mapped one-to-one to Avro IDL
-    record fields.
-    """
-    stream = StringIO()
-    to_avro_idl_stream(contract, stream)
-    return stream.getvalue()
-def to_avro_idl_stream(contract: DataContractSpecification, stream: typing.TextIO):
-    """Serialize the provided data contract specification into Avro IDL."""
-    ir = _contract_to_avro_idl_ir(contract)
-    if ir.description:
-        stream.write(f"/** {contract.info.description} */\n")
-    stream.write(f"protocol {ir.name or 'Unnamed'} {{\n")
-    for model_type in ir.model_types:
-        _write_model_type(model_type, stream)
-    stream.write("}\n")
+from datacontract.export.exporter import Exporter
 class AvroPrimitiveType(Enum):
@@ -107,6 +86,34 @@ avro_primitive_types = set(
 )
+class AvroIdlExporter(Exporter):
+    def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
+        return to_avro_idl(data_contract)
+def to_avro_idl(contract: DataContractSpecification) -> str:
+    """Serialize the provided data contract specification into an Avro IDL string.
+    The data contract will be serialized as a protocol, with one record type
+    for each contained model. Model fields are mapped one-to-one to Avro IDL
+    record fields.
+    """
+    stream = StringIO()
+    to_avro_idl_stream(contract, stream)
+    return stream.getvalue()
+def to_avro_idl_stream(contract: DataContractSpecification, stream: typing.TextIO):
+    """Serialize the provided data contract specification into Avro IDL."""
+    ir = _contract_to_avro_idl_ir(contract)
+    if ir.description:
+        stream.write(f"/** {contract.info.description} */\n")
+    stream.write(f"protocol {ir.name or 'Unnamed'} {{\n")
+    for model_type in ir.model_types:
+        _write_model_type(model_type, stream)
+    stream.write("}\n")
 def _to_avro_primitive_logical_type(field_name: str, field: Field) -> AvroPrimitiveField:
     result = AvroPrimitiveField(field_name, field.required, field.description, AvroPrimitiveType.string)
     match field.type:

datacontract/export/bigquery_converter.py CHANGED Viewed

@@ -5,6 +5,21 @@ from typing import Dict, List
 from datacontract.model.data_contract_specification import Model, Field, Server
 from datacontract.model.exceptions import DataContractException
+from datacontract.export.exporter import Exporter, _check_models_for_export
+class BigQueryExporter(Exporter):
+    def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
+        self.dict_args = export_args
+        model_name, model_value = _check_models_for_export(data_contract, model, self.export_format)
+        found_server = data_contract.servers.get(server)
+        if found_server is None:
+            raise RuntimeError("Export to bigquery requires selecting a bigquery server from the data contract.")
+        if found_server.type != "bigquery":
+            raise RuntimeError("Export to bigquery requires selecting a bigquery server from the data contract.")
+        return to_bigquery_json(model_name, model_value, found_server)
 def to_bigquery_json(model_name: str, model_value: Model, server: Server) -> str:
     bigquery_table = to_bigquery_schema(model_name, model_value, server)

datacontract-cli 0.10.6__py3-none-any.whl → 0.10.8__py3-none-any.whl

Potentially problematic release.

datacontract-cli 0.10.6py3-none-any.whl → 0.10.8py3-none-any.whl