PyPI - datacontract-cli - Versions diffs - 0.10.10__py3-none-any.whl → 0.10.12__py3-none-any.whl - Mend

datacontract-cli 0.10.10py3-none-any.whl → 0.10.12py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of datacontract-cli might be problematic. Click here for more details.

Files changed (39) hide show

datacontract/cli.py +19 -3
datacontract/data_contract.py +17 -17
datacontract/engines/fastjsonschema/check_jsonschema.py +15 -1
datacontract/engines/fastjsonschema/s3/s3_read_files.py +2 -0
datacontract/engines/soda/check_soda_execute.py +2 -8
datacontract/engines/soda/connections/duckdb.py +23 -20
datacontract/engines/soda/connections/kafka.py +81 -23
datacontract/engines/soda/connections/snowflake.py +8 -5
datacontract/export/avro_converter.py +12 -2
datacontract/export/dbml_converter.py +42 -19
datacontract/export/exporter.py +2 -1
datacontract/export/exporter_factory.py +6 -0
datacontract/export/jsonschema_converter.py +1 -4
datacontract/export/spark_converter.py +4 -0
datacontract/export/sql_type_converter.py +64 -29
datacontract/export/sqlalchemy_converter.py +169 -0
datacontract/imports/avro_importer.py +1 -0
datacontract/imports/bigquery_importer.py +2 -2
datacontract/imports/dbml_importer.py +112 -0
datacontract/imports/dbt_importer.py +67 -91
datacontract/imports/glue_importer.py +64 -54
datacontract/imports/importer.py +3 -2
datacontract/imports/importer_factory.py +5 -0
datacontract/imports/jsonschema_importer.py +106 -120
datacontract/imports/odcs_importer.py +1 -1
datacontract/imports/spark_importer.py +29 -10
datacontract/imports/sql_importer.py +5 -1
datacontract/imports/unity_importer.py +1 -1
datacontract/integration/{publish_datamesh_manager.py → datamesh_manager.py} +33 -5
datacontract/integration/{publish_opentelemetry.py → opentelemetry.py} +1 -1
datacontract/model/data_contract_specification.py +6 -2
datacontract/templates/partials/model_field.html +10 -2
{datacontract_cli-0.10.10.dist-info → datacontract_cli-0.10.12.dist-info}/METADATA +283 -113
{datacontract_cli-0.10.10.dist-info → datacontract_cli-0.10.12.dist-info}/RECORD +38 -37
{datacontract_cli-0.10.10.dist-info → datacontract_cli-0.10.12.dist-info}/WHEEL +1 -1
datacontract/publish/publish.py +0 -32
{datacontract_cli-0.10.10.dist-info → datacontract_cli-0.10.12.dist-info}/LICENSE +0 -0
{datacontract_cli-0.10.10.dist-info → datacontract_cli-0.10.12.dist-info}/entry_points.txt +0 -0
{datacontract_cli-0.10.10.dist-info → datacontract_cli-0.10.12.dist-info}/top_level.txt +0 -0

datacontract/cli.py CHANGED Viewed

@@ -17,7 +17,7 @@ from datacontract.catalog.catalog import create_index_html, create_data_contract
 from datacontract.data_contract import DataContract, ExportFormat
 from datacontract.imports.importer import ImportFormat
 from datacontract.init.download_datacontract_file import download_datacontract_file, FileExistsException
-from datacontract.publish.publish import publish_to_datamesh_manager
+from datacontract.integration.datamesh_manager import publish_data_contract_to_datamesh_manager
 DEFAULT_DATA_CONTRACT_SCHEMA_URL = "https://datacontract.com/datacontract.schema.json"
@@ -232,6 +232,18 @@ def import_(
             help="List of models names to import from the dbt manifest file (repeat for multiple models names, leave empty for all models in the dataset)."
         ),
     ] = None,
+    dbml_schema: Annotated[
+        Optional[List[str]],
+        typer.Option(
+            help="List of schema names to import from the DBML file (repeat for multiple schema names, leave empty for all tables in the file)."
+        ),
+    ] = None,
+    dbml_table: Annotated[
+        Optional[List[str]],
+        typer.Option(
+            help="List of table names to import from the DBML file (repeat for multiple table names, leave empty for all tables in the file)."
+        ),
+    ] = None,
 ):
     """
     Create a data contract from the given source location. Prints to stdout.
@@ -245,6 +257,8 @@ def import_(
         bigquery_dataset=bigquery_dataset,
         unity_table_full_name=unity_table_full_name,
         dbt_model=dbt_model,
+        dbml_schema=dbml_schema,
+        dbml_table=dbml_table,
     )
     console.print(result.to_yaml())
@@ -261,8 +275,10 @@ def publish(
     """
     Publish the data contract to the Data Mesh Manager.
     """
-    publish_to_datamesh_manager(
-        data_contract=DataContract(data_contract_file=location, schema_location=schema),
+    publish_data_contract_to_datamesh_manager(
+        data_contract_specification=DataContract(
+            data_contract_file=location, schema_location=schema
+        ).get_data_contract_specification(),
     )

datacontract/data_contract.py CHANGED Viewed

@@ -18,8 +18,8 @@ from datacontract.export.exporter import ExportFormat
 from datacontract.export.exporter_factory import exporter_factory
 from datacontract.imports.importer_factory import importer_factory
-from datacontract.integration.publish_datamesh_manager import publish_datamesh_manager
-from datacontract.integration.publish_opentelemetry import publish_opentelemetry
+from datacontract.integration.datamesh_manager import publish_test_results_to_datamesh_manager
+from datacontract.integration.opentelemetry import publish_test_results_to_opentelemetry
 from datacontract.lint import resolve
 from datacontract.lint.linters.description_linter import DescriptionLinter
 from datacontract.lint.linters.example_model_linter import ExampleModelLinter
@@ -46,8 +46,8 @@ class DataContract:
         publish_url: str = None,
         publish_to_opentelemetry: bool = False,
         spark: "SparkSession" = None,
-        inline_definitions: bool = False,
-        inline_quality: bool = False,
+        inline_definitions: bool = True,
+        inline_quality: bool = True,
     ):
         self._data_contract_file = data_contract_file
         self._data_contract_str = data_contract_str
@@ -87,8 +87,8 @@ class DataContract:
                 self._data_contract_str,
                 self._data_contract,
                 self._schema_location,
-                inline_definitions=True,
-                inline_quality=True,
+                inline_definitions=self._inline_definitions,
+                inline_quality=self._inline_quality,
             )
             run.checks.append(
                 Check(type="lint", result="passed", name="Data contract is syntactically valid", engine="datacontract")
@@ -140,7 +140,12 @@ class DataContract:
         try:
             run.log_info("Testing data contract")
             data_contract = resolve.resolve_data_contract(
-                self._data_contract_file, self._data_contract_str, self._data_contract, self._schema_location
+                self._data_contract_file,
+                self._data_contract_str,
+                self._data_contract,
+                self._schema_location,
+                inline_definitions=self._inline_definitions,
+                inline_quality=self._inline_quality,
             )
             if data_contract.models is None or len(data_contract.models) == 0:
@@ -213,15 +218,10 @@ class DataContract:
         run.finish()
         if self._publish_url is not None:
-            try:
-                publish_datamesh_manager(run, self._publish_url)
-            except Exception:
-                run.log_error("Failed to publish to datamesh manager")
+            publish_test_results_to_datamesh_manager(run, self._publish_url)
         if self._publish_to_opentelemetry:
-            try:
-                publish_opentelemetry(run)
-            except Exception:
-                run.log_error("Failed to publish to opentelemetry")
+            publish_test_results_to_opentelemetry(run)
         return run
@@ -304,8 +304,8 @@ class DataContract:
             self._data_contract_str,
             self._data_contract,
             schema_location=self._schema_location,
-            inline_definitions=True,
-            inline_quality=True,
+            inline_definitions=self._inline_definitions,
+            inline_quality=self._inline_quality,
         )
         return exporter_factory.create(export_format).export(

datacontract/engines/fastjsonschema/check_jsonschema.py CHANGED Viewed

@@ -148,13 +148,27 @@ def check_jsonschema(run: Run, data_contract: DataContractSpecification, server:
         schema = to_jsonschema(model_name, model)
         run.log_info(f"jsonschema: {schema}")
-        validate = fastjsonschema.compile(schema)
+        validate = fastjsonschema.compile(
+            schema,
+            formats={"uuid": r"^[0-9a-fA-F]{8}\b-[0-9a-fA-F]{4}\b-[0-9a-fA-F]{4}\b-[0-9a-fA-F]{4}\b-[0-9a-fA-F]{12}$"},
+        )
         # Process files based on server type
         if server.type == "local":
             process_local_file(run, server, model_name, validate)
         elif server.type == "s3":
             process_s3_file(server, model_name, validate)
+        elif server.type == "gcs":
+            run.checks.append(
+                Check(
+                    type="schema",
+                    name="Check that JSON has valid schema",
+                    model=model_name,
+                    result="info",
+                    reason="JSON Schema check skipped for GCS, as GCS is currently not supported",
+                    engine="jsonschema",
+                )
+            )
         else:
             run.checks.append(
                 Check(

datacontract/engines/fastjsonschema/s3/s3_read_files.py CHANGED Viewed

@@ -28,9 +28,11 @@ def s3_fs(s3_endpoint_url):
     aws_access_key_id = os.getenv("DATACONTRACT_S3_ACCESS_KEY_ID")
     aws_secret_access_key = os.getenv("DATACONTRACT_S3_SECRET_ACCESS_KEY")
+    aws_session_token = os.getenv("DATACONTRACT_S3_SESSION_TOKEN")
     return s3fs.S3FileSystem(
         key=aws_access_key_id,
         secret=aws_secret_access_key,
+        token=aws_session_token,
         anon=aws_access_key_id is None,
         client_kwargs={"endpoint_url": s3_endpoint_url},
     )

datacontract/engines/soda/check_soda_execute.py CHANGED Viewed

@@ -1,8 +1,4 @@
 import logging
-import typing
-if typing.TYPE_CHECKING:
-    from pyspark.sql import SparkSession
 from soda.scan import Scan
@@ -19,9 +15,7 @@ from datacontract.model.data_contract_specification import DataContractSpecifica
 from datacontract.model.run import Run, Check, Log
-def check_soda_execute(
-    run: Run, data_contract: DataContractSpecification, server: Server, spark: "SparkSession", tmp_dir
-):
+def check_soda_execute(run: Run, data_contract: DataContractSpecification, server: Server, spark, tmp_dir):
     if data_contract is None:
         run.log_warn("Cannot run engine soda-core, as data contract is invalid")
         return
@@ -29,7 +23,7 @@ def check_soda_execute(
     run.log_info("Running engine soda-core")
     scan = Scan()
-    if server.type in ["s3", "azure", "local"]:
+    if server.type in ["s3", "gcs", "azure", "local"]:
         if server.format in ["json", "parquet", "csv", "delta"]:
             con = get_duckdb_connection(data_contract, server, run)
             scan.add_duckdb_connection(duckdb_connection=con, data_source_name=server.type)

datacontract/engines/soda/connections/duckdb.py CHANGED Viewed

@@ -1,7 +1,5 @@
 import os
-from deltalake import DeltaTable
 import duckdb
 from datacontract.export.csv_type_converter import convert_to_duckdb_csv_type
 from datacontract.model.run import Run
@@ -15,6 +13,9 @@ def get_duckdb_connection(data_contract, server, run: Run):
     if server.type == "s3":
         path = server.location
         setup_s3_connection(con, server)
+    if server.type == "gcs":
+        path = server.location
+        setup_gcs_connection(con, server)
     if server.type == "azure":
         path = server.location
         setup_azure_connection(con, server)
@@ -49,24 +50,8 @@ def get_duckdb_connection(data_contract, server, run: Run):
                     f"""CREATE VIEW "{model_name}" AS SELECT * FROM read_csv('{model_path}', hive_partitioning=1, columns={columns});"""
                 )
         elif server.format == "delta":
-            if server.type == "azure":
-                # After switching to native delta table support
-                # in https://github.com/datacontract/datacontract-cli/issues/258,
-                # azure storage should also work
-                # https://github.com/duckdb/duckdb_delta/issues/21
-                raise NotImplementedError("Support for Delta Tables on Azure Storage is not implemented yet")
-            storage_options = {
-                "AWS_ENDPOINT_URL": server.endpointUrl,
-                "AWS_ACCESS_KEY_ID": os.getenv("DATACONTRACT_S3_ACCESS_KEY_ID"),
-                "AWS_SECRET_ACCESS_KEY": os.getenv("DATACONTRACT_S3_SECRET_ACCESS_KEY"),
-                "AWS_REGION": os.getenv("DATACONTRACT_S3_REGION", "us-east-1"),
-                "AWS_ALLOW_HTTP": "True" if server.endpointUrl.startswith("http://") else "False",
-            }
-            delta_table_arrow = DeltaTable(model_path, storage_options=storage_options).to_pyarrow_dataset()
-            con.register(model_name, delta_table_arrow)
+            con.sql("update extensions;")  # Make sure we have the latest delta extension
+            con.sql(f"""CREATE VIEW "{model_name}" AS SELECT * FROM delta_scan('{model_path}');""")
     return con
@@ -138,6 +123,24 @@ def setup_s3_connection(con, server):
     # print(con.sql("SELECT * FROM duckdb_settings() WHERE name like 's3%'"))
+def setup_gcs_connection(con, server):
+    key_id = os.getenv("DATACONTRACT_GCS_KEY_ID")
+    secret = os.getenv("DATACONTRACT_GCS_SECRET")
+    if key_id is None:
+        raise ValueError("Error: Environment variable DATACONTRACT_GCS_KEY_ID is not set")
+    if secret is None:
+        raise ValueError("Error: Environment variable DATACONTRACT_GCS_SECRET is not set")
+    con.sql(f"""
+    CREATE SECRET gcs_secret (
+        TYPE GCS,
+        KEY_ID '{key_id}',
+        SECRET '{secret}'
+    );
+    """)
 def setup_azure_connection(con, server):
     tenant_id = os.getenv("DATACONTRACT_AZURE_TENANT_ID")
     client_id = os.getenv("DATACONTRACT_AZURE_CLIENT_ID")

datacontract/engines/soda/connections/kafka.py CHANGED Viewed

@@ -1,33 +1,26 @@
 import logging
 import os
-from pyspark.sql import SparkSession
-from pyspark.sql.functions import col, expr, from_json
-from pyspark.sql.avro.functions import from_avro
-from pyspark.sql.types import (
-    StructType,
-    StructField,
-    StringType,
-    DecimalType,
-    DoubleType,
-    IntegerType,
-    LongType,
-    BooleanType,
-    TimestampType,
-    TimestampNTZType,
-    DateType,
-    BinaryType,
-    ArrayType,
-    NullType,
-    DataType,
-)
 from datacontract.export.avro_converter import to_avro_schema_json
 from datacontract.model.data_contract_specification import DataContractSpecification, Server, Field
 from datacontract.model.exceptions import DataContractException
-def create_spark_session(tmp_dir: str) -> SparkSession:
+def create_spark_session(tmp_dir: str):
     """Create and configure a Spark session."""
+    try:
+        from pyspark.sql import SparkSession
+    except ImportError as e:
+        raise DataContractException(
+            type="schema",
+            result="failed",
+            name="pyspark is missing",
+            reason="Install the extra datacontract-cli[kafka] to use kafka",
+            engine="datacontract",
+            original_exception=e,
+        )
     spark = (
         SparkSession.builder.appName("datacontract")
         .config("spark.sql.warehouse.dir", f"{tmp_dir}/spark-warehouse")
@@ -43,7 +36,7 @@ def create_spark_session(tmp_dir: str) -> SparkSession:
     return spark
-def read_kafka_topic(spark: SparkSession, data_contract: DataContractSpecification, server: Server, tmp_dir):
+def read_kafka_topic(spark, data_contract: DataContractSpecification, server: Server, tmp_dir):
     """Read and process data from a Kafka topic based on the server configuration."""
     logging.info("Reading data from Kafka server %s topic %s", server.host, server.topic)
@@ -74,6 +67,19 @@ def read_kafka_topic(spark: SparkSession, data_contract: DataContractSpecificati
 def process_avro_format(df, model_name, model):
+    try:
+        from pyspark.sql.functions import col, expr
+        from pyspark.sql.avro.functions import from_avro
+    except ImportError as e:
+        raise DataContractException(
+            type="schema",
+            result="failed",
+            name="pyspark is missing",
+            reason="Install the extra datacontract-cli[kafka] to use kafka",
+            engine="datacontract",
+            original_exception=e,
+        )
     avro_schema = to_avro_schema_json(model_name, model)
     df2 = df.withColumn("fixedValue", expr("substring(value, 6, length(value)-5)"))
     options = {"mode": "PERMISSIVE"}
@@ -83,6 +89,18 @@ def process_avro_format(df, model_name, model):
 def process_json_format(df, model_name, model):
+    try:
+        from pyspark.sql.functions import col, from_json
+    except ImportError as e:
+        raise DataContractException(
+            type="schema",
+            result="failed",
+            name="pyspark is missing",
+            reason="Install the extra datacontract-cli[kafka] to use kafka",
+            engine="datacontract",
+            original_exception=e,
+        )
     struct_type = to_struct_type(model.fields)
     df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)").select(
         from_json(col("value"), struct_type, {"mode": "PERMISSIVE"}).alias("json")
@@ -108,11 +126,51 @@ def get_auth_options():
 def to_struct_type(fields):
+    try:
+        from pyspark.sql.types import StructType
+    except ImportError as e:
+        raise DataContractException(
+            type="schema",
+            result="failed",
+            name="pyspark is missing",
+            reason="Install the extra datacontract-cli[kafka] to use kafka",
+            engine="datacontract",
+            original_exception=e,
+        )
     """Convert field definitions to Spark StructType."""
     return StructType([to_struct_field(field_name, field) for field_name, field in fields.items()])
-def to_struct_field(field_name: str, field: Field) -> StructField:
+def to_struct_field(field_name: str, field: Field):
+    try:
+        from pyspark.sql.types import (
+            StructType,
+            StructField,
+            StringType,
+            DecimalType,
+            DoubleType,
+            IntegerType,
+            LongType,
+            BooleanType,
+            TimestampType,
+            TimestampNTZType,
+            DateType,
+            BinaryType,
+            ArrayType,
+            NullType,
+            DataType,
+        )
+    except ImportError as e:
+        raise DataContractException(
+            type="schema",
+            result="failed",
+            name="pyspark is missing",
+            reason="Install the extra datacontract-cli[kafka] to use kafka",
+            engine="datacontract",
+            original_exception=e,
+        )
     """Map field definitions to Spark StructField using match-case."""
     match field.type:
         case "string" | "varchar" | "text":

datacontract/engines/soda/connections/snowflake.py CHANGED Viewed

@@ -4,17 +4,20 @@ import yaml
 def to_snowflake_soda_configuration(server):
+    prefix = "DATACONTRACT_SNOWFLAKE_"
+    snowflake_soda_params = {k.replace(prefix, "").lower(): v for k, v in os.environ.items() if k.startswith(prefix)}
+    # backward compatibility
+    if "connection_timeout" not in snowflake_soda_params:
+        snowflake_soda_params["connection_timeout"] = "5"  # minutes
     soda_configuration = {
         f"data_source {server.type}": {
             "type": "snowflake",
-            "username": os.getenv("DATACONTRACT_SNOWFLAKE_USERNAME"),
-            "password": os.getenv("DATACONTRACT_SNOWFLAKE_PASSWORD"),
-            "role": os.getenv("DATACONTRACT_SNOWFLAKE_ROLE"),
             "account": server.account,
             "database": server.database,
             "schema": server.schema_,
-            "warehouse": os.getenv("DATACONTRACT_SNOWFLAKE_WAREHOUSE"),
-            "connection_timeout": 5,  # minutes
+            **snowflake_soda_params,
         }
     }
     soda_configuration_str = yaml.dump(soda_configuration)

datacontract/export/avro_converter.py CHANGED Viewed

@@ -40,11 +40,21 @@ def to_avro_field(field, field_name):
     avro_field = {"name": field_name}
     if field.description is not None:
         avro_field["doc"] = field.description
-    avro_field["type"] = to_avro_type(field, field_name)
+    is_required_avro = field.required if field.required is not None else True
+    avro_type = to_avro_type(field, field_name)
+    avro_field["type"] = avro_type if is_required_avro else ["null", avro_type]
+    if avro_field["type"] == "enum":
+        avro_field["type"] = {
+            "type": "enum",
+            "name": field.title,
+            "symbols": field.enum,
+        }
     if field.config:
         if "avroDefault" in field.config:
-            avro_field["default"] = field.config["avroDefault"]
+            if field.config.get("avroType") != "enum":
+                avro_field["default"] = field.config["avroDefault"]
     return avro_field

datacontract/export/dbml_converter.py CHANGED Viewed

@@ -3,6 +3,7 @@ from importlib.metadata import version
 from typing import Tuple
 import pytz
+from datacontract.model.exceptions import DataContractException
 import datacontract.model.data_contract_specification as spec
 from datacontract.export.sql_type_converter import convert_to_sql_type
@@ -48,17 +49,7 @@ Using {5} Types for the field types
 {0}
 */
     """.format(generated_info)
-    note = """Note project_info {{
-'''
-{0}
-'''
-}}
-    """.format(generated_info)
-    return """{0}
-{1}
-    """.format(comment, note)
+    return comment
 def get_version() -> str:
@@ -70,19 +61,18 @@ def get_version() -> str:
 def generate_project_info(contract: spec.DataContractSpecification) -> str:
     return """Project "{0}" {{
-    Note: "{1}"
+    Note: '''{1}'''
 }}\n
-    """.format(contract.info.title, " ".join(contract.info.description.splitlines()))
+    """.format(contract.info.title, contract.info.description)
 def generate_table(model_name: str, model: spec.Model, server: spec.Server) -> str:
     result = """Table "{0}" {{
-Note: "{1}"
-    """.format(model_name, " ".join(model.description.splitlines()))
+Note: {1}
+    """.format(model_name, formatDescription(model.description))
     references = []
-    # Add all the fields
     for field_name, field in model.fields.items():
         ref, field_string = generate_field(field_name, field, model_name, server)
         if ref is not None:
@@ -102,6 +92,30 @@ Note: "{1}"
 def generate_field(field_name: str, field: spec.Field, model_name: str, server: spec.Server) -> Tuple[str, str]:
+    if field.primary:
+        if field.required is not None:
+            if not field.required:
+                raise DataContractException(
+                    type="lint",
+                    name="Primary key fields cannot have required == False.",
+                    result="error",
+                    reason="Primary key fields cannot have required == False.",
+                    engine="datacontract",
+                )
+        else:
+            field.required = True
+        if field.unique is not None:
+            if not field.unique:
+                raise DataContractException(
+                    type="lint",
+                    name="Primary key fields cannot have unique == False",
+                    result="error",
+                    reason="Primary key fields cannot have unique == False.",
+                    engine="datacontract",
+                )
+        else:
+            field.unique = True
     field_attrs = []
     if field.primary:
         field_attrs.append("pk")
@@ -115,13 +129,22 @@ def generate_field(field_name: str, field: spec.Field, model_name: str, server:
         field_attrs.append("null")
     if field.description:
-        field_attrs.append('Note: "{0}"'.format(" ".join(field.description.splitlines())))
+        field_attrs.append("""Note: {0}""".format(formatDescription(field.description)))
     field_type = field.type if server is None else convert_to_sql_type(field, server.type)
     field_str = '"{0}" "{1}" [{2}]'.format(field_name, field_type, ",".join(field_attrs))
     ref_str = None
     if (field.references) is not None:
-        # we always assume many to one, as datacontract doesn't really give us more info
-        ref_str = "{0}.{1} > {2}".format(model_name, field_name, field.references)
+        if field.unique:
+            ref_str = "{0}.{1} - {2}".format(model_name, field_name, field.references)
+        else:
+            ref_str = "{0}.{1} > {2}".format(model_name, field_name, field.references)
     return (ref_str, field_str)
+def formatDescription(input: str) -> str:
+    if "\n" in input or "\r" in input or '"' in input:
+        return "'''{0}'''".format(input)
+    else:
+        return '"{0}"'.format(input)

datacontract/export/exporter.py CHANGED Viewed

@@ -35,9 +35,10 @@ class ExportFormat(str, Enum):
     bigquery = "bigquery"
     dbml = "dbml"
     spark = "spark"
+    sqlalchemy = "sqlalchemy"
     @classmethod
-    def get_suported_formats(cls):
+    def get_supported_formats(cls):
         return list(map(lambda c: c.value, cls))

datacontract/export/exporter_factory.py CHANGED Viewed

@@ -143,3 +143,9 @@ exporter_factory.register_lazy_exporter(
 exporter_factory.register_lazy_exporter(
     name=ExportFormat.spark, module_path="datacontract.export.spark_converter", class_name="SparkExporter"
 )
+exporter_factory.register_lazy_exporter(
+    name=ExportFormat.sqlalchemy,
+    module_path="datacontract.export.sqlalchemy_converter",
+    class_name="SQLAlchemyExporter",
+)

datacontract/export/jsonschema_converter.py CHANGED Viewed

@@ -36,10 +36,7 @@ def to_property(field: Field) -> dict:
     property = {}
     json_type, json_format = convert_type_format(field.type, field.format)
     if json_type is not None:
-        if field.required:
-            property["type"] = json_type
-        else:
-            property["type"] = [json_type, "null"]
+        property["type"] = json_type
     if json_format is not None:
         property["format"] = json_format
     if field.unique:

datacontract/export/spark_converter.py CHANGED Viewed

@@ -123,6 +123,8 @@ def to_data_type(field: Field) -> types.DataType:
         return types.ArrayType(to_data_type(field.items))
     if field_type in ["object", "record", "struct"]:
         return types.StructType(to_struct_type(field.fields))
+    if field_type == "map":
+        return types.MapType(to_data_type(field.keys), to_data_type(field.values))
     if field_type in ["string", "varchar", "text"]:
         return types.StringType()
     if field_type in ["number", "decimal", "numeric"]:
@@ -204,6 +206,8 @@ def print_schema(dtype: types.DataType) -> str:
         return format_struct_type(dtype)
     elif isinstance(dtype, types.ArrayType):
         return f"ArrayType({print_schema(dtype.elementType)})"
+    elif isinstance(dtype, types.MapType):
+        return f"MapType(\n{indent(print_schema(dtype.keyType), 1)}, {print_schema(dtype.valueType)})"
     elif isinstance(dtype, types.DecimalType):
         return f"DecimalType({dtype.precision}, {dtype.scale})"
     else:

datacontract-cli 0.10.10__py3-none-any.whl → 0.10.12__py3-none-any.whl

Potentially problematic release.

datacontract-cli 0.10.10py3-none-any.whl → 0.10.12py3-none-any.whl