PyPI - datacontract-cli - Versions diffs - 0.10.12__py3-none-any.whl → 0.10.13__py3-none-any.whl - Mend

datacontract-cli 0.10.12py3-none-any.whl → 0.10.13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of datacontract-cli might be problematic. Click here for more details.

Files changed (18) hide show

datacontract/engines/soda/connections/kafka.py CHANGED Viewed

@@ -25,9 +25,10 @@ def create_spark_session(tmp_dir: str):
         SparkSession.builder.appName("datacontract")
         .config("spark.sql.warehouse.dir", f"{tmp_dir}/spark-warehouse")
         .config("spark.streaming.stopGracefullyOnShutdown", "true")
+        .config("spark.ui.enabled", "false")
         .config(
             "spark.jars.packages",
-            "org.apache.spark:spark-sql-kafka-0-10_2.12:3.5.0,org.apache.spark:spark-avro_2.12:3.5.0",
+            "org.apache.spark:spark-sql-kafka-0-10_2.12:3.5.2,org.apache.spark:spark-avro_2.12:3.5.2",
         )
         .getOrCreate()
     )
@@ -112,7 +113,7 @@ def get_auth_options():
     kafka_sasl_username = os.getenv("DATACONTRACT_KAFKA_SASL_USERNAME")
     kafka_sasl_password = os.getenv("DATACONTRACT_KAFKA_SASL_PASSWORD")
-    if kafka_sasl_username is None:
+    if kafka_sasl_username is None or kafka_sasl_username == "":
         return {}
     return {

datacontract/export/bigquery_converter.py CHANGED Viewed

@@ -44,7 +44,7 @@ def to_fields_array(fields: Dict[str, Field]) -> List[Dict[str, Field]]:
 def to_field(field_name: str, field: Field) -> dict:
-    bq_type = map_type_to_bigquery(field.type, field_name)
+    bq_type = map_type_to_bigquery(field)
     bq_field = {
         "name": field_name,
         "type": bq_type,
@@ -59,10 +59,8 @@ def to_field(field_name: str, field: Field) -> dict:
             # in case the array type is a complex object, we want to copy all its fields
             bq_field["fields"] = to_fields_array(field.items.fields)
         else:
-            # otherwise we make up a structure that gets us a single field of the specified type
-            bq_field["fields"] = to_fields_array(
-                {f"{field_name}_1": Field(type=field.items.type, required=False, description="")}
-            )
+            bq_field["type"] = map_type_to_bigquery(field.items)
     # all of these can carry other fields
     elif bq_type.lower() in ["record", "struct"]:
         bq_field["fields"] = to_fields_array(field.fields)
@@ -79,37 +77,46 @@ def to_field(field_name: str, field: Field) -> dict:
     return bq_field
-def map_type_to_bigquery(type_str: str, field_name: str) -> str:
+def map_type_to_bigquery(field: Field) -> str:
     logger = logging.getLogger(__name__)
-    if type_str.lower() in ["string", "varchar", "text"]:
+    field_type = field.type
+    if not field_type:
+        return None
+    if field.config and "bigqueryType" in field.config:
+        return field.config["bigqueryType"]
+    if field_type.lower() in ["string", "varchar", "text"]:
         return "STRING"
-    elif type_str == "bytes":
+    elif field_type.lower() == "bytes":
         return "BYTES"
-    elif type_str.lower() in ["int", "integer"]:
+    elif field_type.lower() in ["int", "integer"]:
         return "INTEGER"
-    elif type_str.lower() in ["long", "bigint"]:
+    elif field_type.lower() in ["long", "bigint"]:
         return "INT64"
-    elif type_str == "float":
-        return "FLOAT"
-    elif type_str == "boolean":
+    elif field_type.lower() == "float":
+        return "FLOAT64"
+    elif field_type.lower() == "boolean":
         return "BOOL"
-    elif type_str.lower() in ["timestamp", "timestamp_tz"]:
+    elif field_type.lower() in ["timestamp", "timestamp_tz"]:
         return "TIMESTAMP"
-    elif type_str == "date":
+    elif field_type.lower() == "date":
         return "DATE"
-    elif type_str == "timestamp_ntz":
+    elif field_type.lower() == "timestamp_ntz":
         return "TIME"
-    elif type_str.lower() in ["number", "decimal", "numeric"]:
+    elif field_type.lower() in ["number", "decimal", "numeric"]:
         return "NUMERIC"
-    elif type_str == "double":
+    elif field_type.lower() == "double":
         return "BIGNUMERIC"
-    elif type_str.lower() in ["object", "record", "array"]:
+    elif field_type.lower() in ["object", "record", "array"]:
         return "RECORD"
-    elif type_str == "struct":
+    elif field_type.lower() == "struct":
         return "STRUCT"
-    elif type_str == "null":
+    elif field_type.lower() == "null":
         logger.info(
-            f"Can't properly map {field_name} to bigquery Schema, as 'null' is not supported as a type. Mapping it to STRING."
+            f"Can't properly map {field.title} to bigquery Schema, as 'null' \
+                 is not supported as a type. Mapping it to STRING."
         )
         return "STRING"
     else:
@@ -117,6 +124,6 @@ def map_type_to_bigquery(type_str: str, field_name: str) -> str:
             type="schema",
             result="failed",
             name="Map datacontract type to bigquery data type",
-            reason=f"Unsupported type {type_str} in data contract definition.",
+            reason=f"Unsupported type {field_type} in data contract definition.",
             engine="datacontract",
         )

datacontract/export/data_caterer_converter.py ADDED Viewed

@@ -0,0 +1,148 @@
+from typing import Dict
+import yaml
+from datacontract.export.exporter import Exporter
+from datacontract.model.data_contract_specification import DataContractSpecification, Model, Field, Server
+class DataCatererExporter(Exporter):
+    """
+    Exporter class for Data Caterer.
+    Creates a YAML file, based on the data contract, for Data Caterer to generate synthetic data.
+    """
+    def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
+        return to_data_caterer_generate_yaml(data_contract, server)
+def to_data_caterer_generate_yaml(data_contract_spec: DataContractSpecification, server):
+    generation_task = {"name": data_contract_spec.info.title, "steps": []}
+    server_info = _get_server_info(data_contract_spec, server)
+    for model_key, model_value in data_contract_spec.models.items():
+        odcs_table = _to_data_caterer_generate_step(model_key, model_value, server_info)
+        generation_task["steps"].append(odcs_table)
+    return yaml.dump(generation_task, indent=2, sort_keys=False, allow_unicode=True)
+def _get_server_info(data_contract_spec: DataContractSpecification, server):
+    if server is not None and server in data_contract_spec.servers:
+        return data_contract_spec.servers.get(server)
+    elif server is not None:
+        raise Exception(f"Server name not found in servers list in data contract, server-name={server}")
+    elif len(data_contract_spec.servers.keys()) > 0:
+        return next(iter(data_contract_spec.servers.values()))
+    else:
+        return None
+def _to_data_caterer_generate_step(model_key, model_value: Model, server: Server) -> dict:
+    step = {
+        "name": model_key,
+        "type": _to_step_type(server),
+        "options": _to_data_source_options(model_key, server),
+        "schema": [],
+    }
+    fields = _to_fields(model_value.fields)
+    if fields:
+        step["schema"] = fields
+    return step
+def _to_step_type(server: Server):
+    if server is not None and server.type is not None:
+        if server.type in ["s3", "gcs", "azure", "local"]:
+            return server.format
+        else:
+            return server.type
+    else:
+        return "csv"
+def _to_data_source_options(model_key, server: Server):
+    options = {}
+    if server is not None and server.type is not None:
+        if server.type in ["s3", "gcs", "azure", "local"]:
+            if server.path is not None:
+                options["path"] = server.path
+            elif server.location is not None:
+                options["path"] = server.location
+            else:
+                options["path"] = "/tmp/data_caterer_data"
+        elif server.type == "postgres":
+            options["schema"] = server.schema_
+            options["table"] = model_key
+        elif server.type == "kafka":
+            options["topic"] = server.topic
+    return options
+def _to_fields(fields: Dict[str, Field]) -> list:
+    dc_fields = []
+    for field_name, field in fields.items():
+        column = _to_field(field_name, field)
+        dc_fields.append(column)
+    return dc_fields
+def _to_field(field_name: str, field: Field) -> dict:
+    dc_field = {"name": field_name}
+    dc_generator_opts = {}
+    if field.type is not None:
+        new_type = _to_data_type(field.type)
+        dc_field["type"] = _to_data_type(field.type)
+        if new_type == "object" or new_type == "record" or new_type == "struct":
+            # need to get nested field definitions
+            nested_fields = _to_fields(field.fields)
+            dc_field["schema"] = {"fields": nested_fields}
+    if field.enum is not None and len(field.enum) > 0:
+        dc_generator_opts["oneOf"] = field.enum
+    if field.unique is not None and field.unique:
+        dc_generator_opts["isUnique"] = field.unique
+    if field.minLength is not None:
+        dc_generator_opts["minLength"] = field.minLength
+    if field.maxLength is not None:
+        dc_generator_opts["maxLength"] = field.maxLength
+    if field.pattern is not None:
+        dc_generator_opts["regex"] = field.pattern
+    if field.minimum is not None:
+        dc_generator_opts["min"] = field.minimum
+    if field.maximum is not None:
+        dc_generator_opts["max"] = field.maximum
+    if len(dc_generator_opts.keys()) > 0:
+        dc_field["generator"] = {"options": dc_generator_opts}
+    return dc_field
+def _to_data_type(data_type):
+    if data_type == "number" or data_type == "numeric" or data_type == "double":
+        return "double"
+    elif data_type == "decimal" or data_type == "bigint":
+        return "decimal"
+    elif data_type == "int":
+        return "integer"
+    elif data_type == "long":
+        return "long"
+    elif data_type == "float":
+        return "float"
+    elif data_type == "string" or data_type == "text" or data_type == "varchar":
+        return "string"
+    if data_type == "boolean":
+        return "boolean"
+    if data_type == "timestamp" or data_type == "timestamp_tz" or data_type == "timestamp_ntz":
+        return "timestamp"
+    elif data_type == "date":
+        return "date"
+    elif data_type == "array":
+        return "array"
+    elif data_type == "map" or data_type == "object" or data_type == "record" or data_type == "struct":
+        return "struct"
+    elif data_type == "bytes":
+        return "binary"
+    else:
+        return "string"

datacontract/export/exporter.py CHANGED Viewed

@@ -36,6 +36,7 @@ class ExportFormat(str, Enum):
     dbml = "dbml"
     spark = "spark"
     sqlalchemy = "sqlalchemy"
+    data_caterer = "data-caterer"
     @classmethod
     def get_supported_formats(cls):

datacontract/export/exporter_factory.py CHANGED Viewed

@@ -62,6 +62,12 @@ exporter_factory.register_lazy_exporter(
     class_name="BigQueryExporter",
 )
+exporter_factory.register_lazy_exporter(
+    name=ExportFormat.data_caterer,
+    module_path="datacontract.export.data_caterer_converter",
+    class_name="DataCatererExporter",
+)
 exporter_factory.register_lazy_exporter(
     name=ExportFormat.dbml, module_path="datacontract.export.dbml_converter", class_name="DbmlExporter"
 )

datacontract/export/jsonschema_converter.py CHANGED Viewed

@@ -36,7 +36,19 @@ def to_property(field: Field) -> dict:
     property = {}
     json_type, json_format = convert_type_format(field.type, field.format)
     if json_type is not None:
-        property["type"] = json_type
+        if not field.required:
+            """
+            From: https://json-schema.org/understanding-json-schema/reference/type
+            The type keyword may either be a string or an array:
+            If it's a string, it is the name of one of the basic types above.
+            If it is an array, it must be an array of strings, where each string
+            is the name of one of the basic types, and each element is unique.
+            In this case, the JSON snippet is valid if it matches any of the given types.
+            """
+            property["type"] = [json_type, "null"]
+        else:
+            property["type"] = json_type
     if json_format is not None:
         property["format"] = json_format
     if field.unique:
@@ -50,7 +62,6 @@ def to_property(field: Field) -> dict:
         property["required"] = to_required(field.fields)
     if json_type == "array":
         property["items"] = to_property(field.items)
     if field.pattern:
         property["pattern"] = field.pattern
     if field.enum:

datacontract/export/spark_converter.py CHANGED Viewed

@@ -128,7 +128,7 @@ def to_data_type(field: Field) -> types.DataType:
     if field_type in ["string", "varchar", "text"]:
         return types.StringType()
     if field_type in ["number", "decimal", "numeric"]:
-        return types.DecimalType()
+        return types.DecimalType(precision=field.precision, scale=field.scale)
     if field_type in ["integer", "int"]:
         return types.IntegerType()
     if field_type == "long":

datacontract/export/sql_type_converter.py CHANGED Viewed

@@ -269,16 +269,7 @@ def convert_type_to_sqlserver(field: Field) -> None | str:
 def convert_type_to_bigquery(field: Field) -> None | str:
     """Convert from supported datacontract types to equivalent bigquery types"""
-    field_type = field.type
-    if not field_type:
-        return None
-    # If provided sql-server config type, prefer it over default mapping
-    if bigquery_type := get_type_config(field, "bigqueryType"):
-        return bigquery_type
-    field_type = field_type.lower()
-    return map_type_to_bigquery(field_type, field.title)
+    return map_type_to_bigquery(field)
 def get_type_config(field: Field, config_attr: str) -> dict[str, str] | None:

datacontract/imports/spark_importer.py CHANGED Viewed

@@ -80,6 +80,8 @@ def _field_from_struct_type(spark_field: types.StructField) -> Field:
     """
     field = Field()
     field.required = not spark_field.nullable
+    field.description = spark_field.metadata.get("comment")
     return _type_from_data_type(field, spark_field.dataType)
@@ -121,7 +123,7 @@ def _data_type_from_spark(spark_type: types.DataType) -> str:
     """
     if isinstance(spark_type, types.StringType):
         return "string"
-    elif isinstance(spark_type, types.IntegerType):
+    elif isinstance(spark_type, (types.IntegerType, types.ShortType)):
         return "integer"
     elif isinstance(spark_type, types.LongType):
         return "long"
@@ -149,5 +151,7 @@ def _data_type_from_spark(spark_type: types.DataType) -> str:
         return "decimal"
     elif isinstance(spark_type, types.NullType):
         return "null"
+    elif isinstance(spark_type, types.VarcharType):
+        return "varchar"
     else:
         raise ValueError(f"Unsupported Spark type: {spark_type}")

datacontract/imports/unity_importer.py CHANGED Viewed

@@ -1,17 +1,37 @@
 import json
-import requests
 import os
-import typing
+from typing import List, Optional
+from pyspark.sql import types
+from databricks.sdk import WorkspaceClient
+from databricks.sdk.service.catalog import TableInfo, ColumnInfo
 from datacontract.imports.importer import Importer
+from datacontract.imports.spark_importer import _field_from_struct_type
 from datacontract.model.data_contract_specification import DataContractSpecification, Model, Field
 from datacontract.model.exceptions import DataContractException
 class UnityImporter(Importer):
+    """
+    UnityImporter class for importing data contract specifications from Unity Catalog.
+    """
     def import_source(
         self, data_contract_specification: DataContractSpecification, source: str, import_args: dict
     ) -> DataContractSpecification:
+        """
+        Import data contract specification from a source.
+        :param data_contract_specification: The data contract specification to be imported.
+        :type data_contract_specification: DataContractSpecification
+        :param source: The source from which to import the data contract specification.
+        :type source: str
+        :param import_args: Additional arguments for the import process.
+        :type import_args: dict
+        :return: The imported data contract specification.
+        :rtype: DataContractSpecification
+        """
         if source is not None:
             data_contract_specification = import_unity_from_json(data_contract_specification, source)
         else:
@@ -24,9 +44,21 @@ class UnityImporter(Importer):
 def import_unity_from_json(
     data_contract_specification: DataContractSpecification, source: str
 ) -> DataContractSpecification:
+    """
+    Import data contract specification from a JSON file.
+    :param data_contract_specification: The data contract specification to be imported.
+    :type data_contract_specification: DataContractSpecification
+    :param source: The path to the JSON file.
+    :type source: str
+    :return: The imported data contract specification.
+    :rtype: DataContractSpecification
+    :raises DataContractException: If there is an error parsing the JSON file.
+    """
     try:
         with open(source, "r") as file:
-            unity_schema = json.loads(file.read())
+            json_contents = json.loads(file.read())
+            unity_schema = TableInfo.from_dict(json_contents)
     except json.JSONDecodeError as e:
         raise DataContractException(
             type="schema",
@@ -39,114 +71,103 @@ def import_unity_from_json(
 def import_unity_from_api(
-    data_contract_specification: DataContractSpecification, unity_table_full_name: typing.Optional[str] = None
+    data_contract_specification: DataContractSpecification, unity_table_full_name: Optional[str] = None
 ) -> DataContractSpecification:
-    databricks_instance = os.getenv("DATABRICKS_IMPORT_INSTANCE")
-    access_token = os.getenv("DATABRICKS_IMPORT_ACCESS_TOKEN")
-    if not databricks_instance or not access_token:
-        print("Missing environment variables for Databricks instance or access token.")
-        print("Both, $DATABRICKS_IMPORT_INSTANCE and $DATABRICKS_IMPORT_ACCESS_TOKEN must be set.")
-        exit(1)  # Exit if variables are not set
-    api_url = f"{databricks_instance}/api/2.1/unity-catalog/tables/{unity_table_full_name}"
-    headers = {"Authorization": f"Bearer {access_token}"}
-    response = requests.get(api_url, headers=headers)
-    if response.status_code != 200:
+    """
+    Import data contract specification from Unity Catalog API.
+    :param data_contract_specification: The data contract specification to be imported.
+    :type data_contract_specification: DataContractSpecification
+    :param unity_table_full_name: The full name of the Unity table.
+    :type unity_table_full_name: Optional[str]
+    :return: The imported data contract specification.
+    :rtype: DataContractSpecification
+    :raises DataContractException: If there is an error retrieving the schema from the API.
+    """
+    try:
+        workspace_client = WorkspaceClient()
+        unity_schema: TableInfo = workspace_client.tables.get(unity_table_full_name)
+    except Exception as e:
         raise DataContractException(
             type="schema",
             name="Retrieve unity catalog schema",
-            reason=f"Failed to retrieve unity catalog schema from databricks instance: {response.status_code} {response.text}",
+            reason=f"Failed to retrieve unity catalog schema from databricks profile: {os.getenv('DATABRICKS_CONFIG_PROFILE')}",
             engine="datacontract",
+            original_exception=e,
         )
-    convert_unity_schema(data_contract_specification, response.json())
+    convert_unity_schema(data_contract_specification, unity_schema)
     return data_contract_specification
 def convert_unity_schema(
-    data_contract_specification: DataContractSpecification, unity_schema: dict
+    data_contract_specification: DataContractSpecification, unity_schema: TableInfo
 ) -> DataContractSpecification:
+    """
+    Convert Unity schema to data contract specification.
+    :param data_contract_specification: The data contract specification to be converted.
+    :type data_contract_specification: DataContractSpecification
+    :param unity_schema: The Unity schema to be converted.
+    :type unity_schema: TableInfo
+    :return: The converted data contract specification.
+    :rtype: DataContractSpecification
+    """
     if data_contract_specification.models is None:
         data_contract_specification.models = {}
-    fields = import_table_fields(unity_schema.get("columns"))
+    fields = import_table_fields(unity_schema.columns)
-    table_id = unity_schema.get("table_id")
+    table_id = unity_schema.name or unity_schema.table_id
     data_contract_specification.models[table_id] = Model(fields=fields, type="table")
-    if unity_schema.get("name") is not None:
-        data_contract_specification.models[table_id].title = unity_schema.get("name")
+    if unity_schema.name:
+        data_contract_specification.models[table_id].title = unity_schema.name
+    if unity_schema.comment:
+        data_contract_specification.models[table_id].description = unity_schema.comment
     return data_contract_specification
-def import_table_fields(table_fields):
+def import_table_fields(columns: List[ColumnInfo]) -> dict[str, Field]:
+    """
+    Import table fields from Unity schema columns.
+    Here we are first converting the `ColumnInfo.type_json` to a Spark StructField object
+    so we can leave the complexity of the Spark field types to the Spark JSON schema parser,
+    then re-use the logic in `datacontract.imports.spark_importer` to convert the StructField
+    into a Field object.
+    :param columns: The list of Unity schema columns.
+    :type columns: List[ColumnInfo]
+    :return: A dictionary of imported fields.
+    :rtype: dict[str, Field]
+    """
     imported_fields = {}
-    for field in table_fields:
-        field_name = field.get("name")
-        imported_fields[field_name] = Field()
-        imported_fields[field_name].required = field.get("nullable") == "false"
-        imported_fields[field_name].description = field.get("comment")
-        # databricks api 2.1 specifies that type_name can be any of:
-        # BOOLEAN | BYTE | SHORT | INT | LONG | FLOAT | DOUBLE | DATE | TIMESTAMP | TIMESTAMP_NTZ | STRING
-        # | BINARY | DECIMAL | INTERVAL | ARRAY | STRUCT | MAP | CHAR | NULL | USER_DEFINED_TYPE | TABLE_TYPE
-        if field.get("type_name") in ["INTERVAL", "ARRAY", "STRUCT", "MAP", "USER_DEFINED_TYPE", "TABLE_TYPE"]:
-            # complex types are not supported, yet
-            raise DataContractException(
-                type="schema",
-                result="failed",
-                name="Map unity type to data contract type",
-                reason=f"type ${field.get('type_name')} is not supported yet for unity import",
-                engine="datacontract",
-            )
-        imported_fields[field_name].type = map_type_from_unity(field.get("type_name"))
+    for column in columns:
+        struct_field: types.StructField = _type_json_to_spark_field(column.type_json)
+        imported_fields[column.name] = _field_from_struct_type(struct_field)
     return imported_fields
-def map_type_from_unity(type_str: str):
-    if type_str == "BOOLEAN":
-        return "boolean"
-    elif type_str == "BYTE":
-        return "bytes"
-    elif type_str == "SHORT":
-        return "int"
-    elif type_str == "INT":
-        return "int"
-    elif type_str == "LONG":
-        return "long"
-    elif type_str == "FLOAT":
-        return "float"
-    elif type_str == "DOUBLE":
-        return "double"
-    elif type_str == "DATE":
-        return "date"
-    elif type_str == "TIMESTAMP":
-        return "timestamp"
-    elif type_str == "TIMESTAMP_NTZ":
-        return "timestamp_ntz"
-    elif type_str == "STRING":
-        return "string"
-    elif type_str == "BINARY":
-        return "bytes"
-    elif type_str == "DECIMAL":
-        return "decimal"
-    elif type_str == "CHAR":
-        return "varchar"
-    elif type_str == "NULL":
-        return "null"
-    else:
-        raise DataContractException(
-            type="schema",
-            result="failed",
-            name="Map unity type to data contract type",
-            reason=f"Unsupported type {type_str} in unity json definition.",
-            engine="datacontract",
-        )
+def _type_json_to_spark_field(type_json: str) -> types.StructField:
+    """
+    Parses a JSON string representing a Spark field and returns a StructField object.
+    The reason we do this is to leverage the Spark JSON schema parser to handle the
+    complexity of the Spark field types. The field `type_json` in the Unity API is
+    the output of a `StructField.jsonValue()` call.
+    :param type_json: The JSON string representing the Spark field.
+    :type type_json: str
+    :return: The StructField object.
+    :rtype: types.StructField
+    """
+    type_dict = json.loads(type_json)
+    return types.StructField.fromJson(type_dict)

datacontract/lint/resolve.py CHANGED Viewed

@@ -114,7 +114,16 @@ def _resolve_definition_ref(ref, spec) -> Definition:
 def _find_by_path_in_spec(definition_path: str, spec: DataContractSpecification):
     path_elements = definition_path.split("/")
-    definition = spec.definitions[path_elements[2]]
+    definition_key = path_elements[2]
+    if definition_key not in spec.definitions:
+        raise DataContractException(
+            type="lint",
+            result="failed",
+            name="Check that data contract YAML is valid",
+            reason=f"Cannot resolve definition {definition_key}",
+            engine="datacontract",
+        )
+    definition = spec.definitions[definition_key]
     definition = _find_subfield_in_definition(definition, path_elements[3:])
     return definition

datacontract/lint/urls.py CHANGED Viewed

@@ -25,16 +25,30 @@ def fetch_resource(url: str):
 def _set_api_key(headers, url):
-    if ".datamesh-manager.com/" not in url:
-        return
-    datamesh_manager_api_key = os.getenv("DATAMESH_MANAGER_API_KEY")
-    if datamesh_manager_api_key is None or datamesh_manager_api_key == "":
-        print("Error: Data Mesh Manager API Key is not set. Set env variable DATAMESH_MANAGER_API_KEY.")
-        raise DataContractException(
-            type="lint",
-            name=f"Reading data contract from {url}",
-            reason="Error: Data Mesh Manager API Key is not set. Set env variable DATAMESH_MANAGER_API_KEY.",
-            engine="datacontract",
-            result="error",
-        )
-    headers["x-api-key"] = datamesh_manager_api_key
+    if ".datamesh-manager.com/" in url:
+        datamesh_manager_api_key = os.getenv("DATAMESH_MANAGER_API_KEY")
+        if datamesh_manager_api_key is None or datamesh_manager_api_key == "":
+            print("Error: Data Mesh Manager API Key is not set. Set env variable DATAMESH_MANAGER_API_KEY.")
+            raise DataContractException(
+                type="lint",
+                name=f"Reading data contract from {url}",
+                reason="Error: Data Mesh Manager API Key is not set. Set env variable DATAMESH_MANAGER_API_KEY.",
+                engine="datacontract",
+                result="error",
+            )
+        headers["x-api-key"] = datamesh_manager_api_key
+    elif ".datacontract-manager.com/" in url:
+        datacontract_manager_api_key = os.getenv("DATACONTRACT_MANAGER_API_KEY")
+        if datacontract_manager_api_key is None or datacontract_manager_api_key == "":
+            print("Error: Data Contract Manager API Key is not set. Set env variable DATACONTRACT_MANAGER_API_KEY.")
+            raise DataContractException(
+                type="lint",
+                name=f"Reading data contract from {url}",
+                reason="Error: Data Contract Manager API Key is not set. Set env variable DATACONTRACT_MANAGER_API_KEY.",
+                engine="datacontract",
+                result="error",
+            )
+        headers["x-api-key"] = datacontract_manager_api_key
+    else:
+        # do nothing
+        pass

{datacontract_cli-0.10.12.dist-info → datacontract_cli-0.10.13.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: datacontract-cli
-Version: 0.10.12
+Version: 0.10.13
 Summary: The datacontract CLI is an open source command-line tool for working with Data Contracts. It uses data contract YAML files to lint the data contract, connect to data sources and execute schema and quality tests, detect breaking changes, and export to different formats. The tool is written in Python. It can be used as a standalone CLI tool, in a CI/CD pipeline, or directly as a Python library.
 Author-email: Jochen Christ <jochen.christ@innoq.com>, Stefan Negele <stefan.negele@innoq.com>, Simon Harrer <simon.harrer@innoq.com>
 Project-URL: Homepage, https://cli.datacontract.com
@@ -12,16 +12,16 @@ Requires-Python: >=3.10
 Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: typer<0.13,>=0.12
-Requires-Dist: pydantic<2.9.0,>=2.8.2
+Requires-Dist: pydantic<2.10.0,>=2.8.2
 Requires-Dist: pyyaml~=6.0.1
 Requires-Dist: requests<2.33,>=2.31
-Requires-Dist: fastapi==0.112.0
-Requires-Dist: uvicorn==0.30.5
+Requires-Dist: fastapi==0.114.2
+Requires-Dist: uvicorn==0.30.6
 Requires-Dist: fastjsonschema<2.21.0,>=2.19.1
 Requires-Dist: fastparquet==2024.5.0
 Requires-Dist: python-multipart==0.0.9
-Requires-Dist: rich~=13.7.0
-Requires-Dist: simple-ddl-parser==1.6.0
+Requires-Dist: rich<13.9,>=13.7
+Requires-Dist: simple-ddl-parser==1.6.1
 Requires-Dist: duckdb==1.0.0
 Requires-Dist: soda-core-duckdb<3.4.0,>=3.3.1
 Requires-Dist: setuptools>=60
@@ -29,7 +29,7 @@ Requires-Dist: python-dotenv~=1.0.0
 Requires-Dist: rdflib==7.0.0
 Requires-Dist: opentelemetry-exporter-otlp-proto-grpc~=1.16
 Requires-Dist: opentelemetry-exporter-otlp-proto-http~=1.16
-Requires-Dist: boto3<1.35.6,>=1.34.41
+Requires-Dist: boto3<1.35.20,>=1.34.41
 Requires-Dist: jinja-partials>=0.2.1
 Provides-Extra: all
 Requires-Dist: datacontract-cli[bigquery,databricks,dbml,dbt,kafka,postgres,s3,snowflake,sqlserver,trino]; extra == "all"
@@ -39,7 +39,8 @@ Provides-Extra: bigquery
 Requires-Dist: soda-core-bigquery<3.4.0,>=3.3.1; extra == "bigquery"
 Provides-Extra: databricks
 Requires-Dist: soda-core-spark-df<3.4.0,>=3.3.1; extra == "databricks"
-Requires-Dist: databricks-sql-connector<3.4.0,>=3.1.2; extra == "databricks"
+Requires-Dist: databricks-sql-connector<3.5.0,>=3.1.2; extra == "databricks"
+Requires-Dist: databricks-sdk<0.33.0,>=0.32.0; extra == "databricks"
 Requires-Dist: soda-core-spark[databricks]<3.4.0,>=3.3.1; extra == "databricks"
 Provides-Extra: dbml
 Requires-Dist: pydbml>=1.1.1; extra == "dbml"
@@ -49,7 +50,7 @@ Provides-Extra: dev
 Requires-Dist: datacontract-cli[all]; extra == "dev"
 Requires-Dist: httpx==0.27.2; extra == "dev"
 Requires-Dist: kafka-python; extra == "dev"
-Requires-Dist: moto==5.0.13; extra == "dev"
+Requires-Dist: moto==5.0.14; extra == "dev"
 Requires-Dist: pandas>=2.1.0; extra == "dev"
 Requires-Dist: pre-commit<3.9.0,>=3.7.1; extra == "dev"
 Requires-Dist: pyarrow>=12.0.0; extra == "dev"
@@ -65,7 +66,7 @@ Requires-Dist: soda-core-spark-df<3.4.0,>=3.3.1; extra == "kafka"
 Provides-Extra: postgres
 Requires-Dist: soda-core-postgres<3.4.0,>=3.3.1; extra == "postgres"
 Provides-Extra: s3
-Requires-Dist: s3fs==2024.6.1; extra == "s3"
+Requires-Dist: s3fs==2024.9.0; extra == "s3"
 Provides-Extra: snowflake
 Requires-Dist: snowflake-connector-python[pandas]<3.13,>=3.6; extra == "snowflake"
 Requires-Dist: soda-core-snowflake<3.4.0,>=3.3.1; extra == "snowflake"
@@ -825,7 +826,7 @@ models:
 │ *  --format        [jsonschema|pydantic-model|sodacl|dbt|dbt-sources|db  The export format. [default: None] [required]         │
 │                    t-staging-sql|odcs|rdf|avro|protobuf|great-expectati                                                        │
 │                    ons|terraform|avro-idl|sql|sql-query|html|go|bigquer                                                        │
-│                    y|dbml|spark|sqlalchemy]                                                                                    │
+│                    y|dbml|spark|sqlalchemy|data-caterer]                                                                       │
 │    --output        PATH                                                  Specify the file path where the exported data will be │
 │                                                                          saved. If no path is provided, the output will be     │
 │                                                                          printed to stdout.                                    │
@@ -877,6 +878,7 @@ Available export options:
 | `DBML`               | Export to a DBML Diagram description                    | ✅     |
 | `spark`              | Export to a Spark StructType                            | ✅     |
 | `sqlalchemy`         | Export to SQLAlchemy Models                             | ✅     |
+| `data-caterer`       | Export to Data Caterer in YAML format                   | ✅     |
 | Missing something?   | Please create an issue on GitHub                        | TBD    |
 #### Great Expectations
@@ -940,6 +942,20 @@ To specify custom Avro properties in your data contract, you can define them wit
 >NOTE: At this moment, we just support [logicalType](https://avro.apache.org/docs/1.11.0/spec.html#Logical+Types) and [default](https://avro.apache.org/docs/1.11.0/spec.htm)
+#### Data Caterer
+The export function converts the data contract to a data generation task in YAML format that can be
+ingested by [Data Caterer](https://github.com/data-catering/data-caterer). This gives you the
+ability to generate production-like data in any environment based off your data contract.
+```shell
+datacontract export datacontract.yaml --format data-caterer --model orders
+```
+You can further customise the way data is generated via adding
+[additional metadata in the YAML](https://data.catering/setup/generator/data-generator/)
+to suit your needs.
 #### Example Configuration
 ```yaml

{datacontract_cli-0.10.12.dist-info → datacontract_cli-0.10.13.dist-info}/RECORD RENAMED Viewed

@@ -18,7 +18,7 @@ datacontract/engines/soda/connections/bigquery.py,sha256=C-8kxmzpYe88bJp80ObHFLM
 datacontract/engines/soda/connections/dask.py,sha256=Yy6Et2n_vDVsdjtqyBWDSZt7mnjPzPk_MZ-92VZHfnY,1496
 datacontract/engines/soda/connections/databricks.py,sha256=lpMju-o_TzLZeF0EEVwePPr8JahqvFnj5xRYjF15fc8,561
 datacontract/engines/soda/connections/duckdb.py,sha256=OVaxDO9HwfD4M6FYDIDS4zwnUJ20uAM1jz_Fy3EkClo,6271
-datacontract/engines/soda/connections/kafka.py,sha256=VPbH0GfJO_YuBGTyQ3xehjm-sVzSGn4y0bt-fZqPcok,7481
+datacontract/engines/soda/connections/kafka.py,sha256=KBkE9ECxAe_sN3bmIAzZm9hDVrovk6P-op35bpWYoDQ,7555
 datacontract/engines/soda/connections/postgres.py,sha256=9GTF4Es3M5vb7ocSGqAxXmslvkS5CjsPQGIuo020CFc,626
 datacontract/engines/soda/connections/snowflake.py,sha256=rfG2ysuqNM6TkvyqQKcGHFsTGJ6AROmud5VleUDRrb0,749
 datacontract/engines/soda/connections/sqlserver.py,sha256=RzGLbCUdRyfmDcqtM_AB9WZ-Xk-XYX91nkXpVNpYbvc,1440
@@ -26,24 +26,25 @@ datacontract/engines/soda/connections/trino.py,sha256=JvKUP9aFg_n095oWE0-bGmfbET
 datacontract/export/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 datacontract/export/avro_converter.py,sha256=n14GtP88c5lpcxyxF5W86k7xYGiwc6j0PIedElbeWjM,4171
 datacontract/export/avro_idl_converter.py,sha256=-zZxV0OAC76tMkAA9hXbEtp2wtpildKJb0OIbPhgoJE,9794
-datacontract/export/bigquery_converter.py,sha256=chAxgbYybyLG5v6SwqIweK9PePvE-36FStR7fyA_F_4,4577
+datacontract/export/bigquery_converter.py,sha256=gVo6PMoUalc3UuuMtw4CGgvm43ineHNtdu-F1OHKOwA,4639
 datacontract/export/csv_type_converter.py,sha256=ZZuJwBgQnafZC7PPvAXsBf2IajPJq8TYZ1l8Qq0GYeI,1290
+datacontract/export/data_caterer_converter.py,sha256=FaY6GkuR1SPjLbz6FgraEItOVVrYpv0Q5uyHI5fF44Y,5324
 datacontract/export/dbml_converter.py,sha256=Lmg0so_YP652UvDiNYyhAzzxtYvaHQBWFMUHzWadSDA,4739
 datacontract/export/dbt_converter.py,sha256=QfapVeunwab7bWn_B9-J_1U8fm1wdsPBW_Ws2b99o1E,9390
-datacontract/export/exporter.py,sha256=BKUAUT6vifrBn0ktIZ7V0HHAMWhmQMohM2v-TBoHOlA,2875
-datacontract/export/exporter_factory.py,sha256=cKyoHolVw3HrUtaqMKyh5NppvlNd_v54xJFC5bOD0gI,4931
+datacontract/export/exporter.py,sha256=yicZSFRttjFp7kVrGxijD_2QYEbArEhQUsWrlxqP7k8,2909
+datacontract/export/exporter_factory.py,sha256=og8c6ljKtkaZvHitZq66tdUvz5mpYJ9b4kFWZccXOgY,5111
 datacontract/export/go_converter.py,sha256=oWkEjOXBIGN8emTaxeJD8xpB3VYP4sVaNzKX-Oeqgw0,3330
 datacontract/export/great_expectations_converter.py,sha256=LIuWHn3h7xRrDDs1YoibHOI8NVF2Izh5IZOQFLeWN7k,5302
 datacontract/export/html_export.py,sha256=i7jckL80osA8incF0YoJdq3NaYmV1_3OU0cZBfPdNDs,2578
-datacontract/export/jsonschema_converter.py,sha256=Zqx4K7RZWfE9n_F4CMext82jd2Gr6EoqOnBXChRJgfM,4948
+datacontract/export/jsonschema_converter.py,sha256=O3pCtqiQ41m0M-ohmmq3eMgIioD3DRhnWGAsEsIKbKQ,5557
 datacontract/export/odcs_converter.py,sha256=WngHoTF7R9uMDLCedBso7m2s0yDJQS5HTACtH3AhG-E,4673
 datacontract/export/protobuf_converter.py,sha256=Baqq2uvRg0AHFo6gR-hEYdLyiYRWziC1Q4AujlHgCzo,3192
 datacontract/export/pydantic_converter.py,sha256=1Lt9F8i6zyQYb44MyQtsXwCWWXYxZ47SmzArr_uPqsU,5579
 datacontract/export/rdf_converter.py,sha256=nNYVKJ5bQEq-rRz3yHwpvbsNbvjoOMwkCfkev8Y11H0,6436
 datacontract/export/sodacl_converter.py,sha256=Uv3BOsq20YKCHmSU6mN-6L2hPPF4V9fPxDoJ42okfHs,7280
-datacontract/export/spark_converter.py,sha256=wyJLxsyE6lQx_JyMsPtkN67ogggNUef0TE4kcIicZaA,6990
+datacontract/export/spark_converter.py,sha256=lynG9dA2wUOpiqqRI-E3a56qxybG4PPZ1kETjL8jywo,7034
 datacontract/export/sql_converter.py,sha256=grDTj3MGhVc5BbJbZ__LFawJtrsnv4ApmOMX4ohPnCQ,4802
-datacontract/export/sql_type_converter.py,sha256=hxIpkCZ3Isi_8Hj06D6PP0r92091J3lSy1QzSQdfeLw,10854
+datacontract/export/sql_type_converter.py,sha256=jw0ljv5nsGaMBGxWrGa5xCAZsm0bbIy94pRF_tbnDwM,10561
 datacontract/export/sqlalchemy_converter.py,sha256=x2_MVvDMfqgf_c5yfWimSC0um718EYYUpFiCe1-HN4g,6469
 datacontract/export/terraform_converter.py,sha256=IFwn-6BWPVkfwDQbj_Gl0GwbJdMKiwKgXCXz5A64cEs,2163
 datacontract/imports/avro_importer.py,sha256=HkzaMCa9A20D3Z0O_7I67TRcqyNV9XGYX_0JAF51qF8,9937
@@ -55,17 +56,17 @@ datacontract/imports/importer.py,sha256=FE9YsrcNunLiEpNGXd6PAHb_Zs8_dVUQPPBqOVAM
 datacontract/imports/importer_factory.py,sha256=0D8imN7WvQyvASkYHN8JTvNO9VLBllwHlyTeZNUD9Js,3155
 datacontract/imports/jsonschema_importer.py,sha256=cY70b2bFx9Kfsnq5vNj0iy_GvNOIcBH8xeFjrsxv4bQ,4925
 datacontract/imports/odcs_importer.py,sha256=uDBCzWi8mKCslw6Og5t-037dVVVZVsbJjK3EHJH_zQI,7474
-datacontract/imports/spark_importer.py,sha256=fEOtUPVBRkhvaxsdk6fgRd_j-ka1TDwdrh6KkTLvv30,4942
+datacontract/imports/spark_importer.py,sha256=ZKjfZ-lqTRO_bU-QSvAeaXf8PWuR7FJZocUOWt8g0Qg,5099
 datacontract/imports/sql_importer.py,sha256=CiOzSthTwyjCHxYQNVO7j4udchEFER8gtXrr_IyX2dU,2890
-datacontract/imports/unity_importer.py,sha256=5qoU-PRq8ABXxiqVll--L2_6V0iAVQgkooWcvRuchLU,5604
+datacontract/imports/unity_importer.py,sha256=a13eBZBYwIzaaeKE4wDWVEPGq5aCM4iZxmBxxpZe3MU,6737
 datacontract/init/download_datacontract_file.py,sha256=pj_4mhWKlEtfueWohDgkb1nyuG5ERDipUDszxKwpZUs,413
 datacontract/integration/datamesh_manager.py,sha256=iNjDrcZfH8vyNbyQPbxkv7XMfG5rtbcLZXIlfMJ0c8c,2752
 datacontract/integration/opentelemetry.py,sha256=0XsqOsc06qNtXC5KqZCxyeC_IGlIi63lYWHh3KKlOO0,3828
 datacontract/lint/files.py,sha256=tg0vq_w4LQsEr_8A5qr4hUJmHeGalUpsXJXC1t-OGC0,471
 datacontract/lint/lint.py,sha256=GhgBwfDgihXfWIu1WUfYeJoxfYacVJrwq3L2KO4Z2Io,5076
-datacontract/lint/resolve.py,sha256=KOGKrf5wZYgsa5-Uc0x5-C5GX1aVt6TSftIwD8zxy_Y,8815
+datacontract/lint/resolve.py,sha256=6t5eqrtn3nwIA0Hupfa2NaDP92WFxiR5Qp9VGmp54R8,9159
 datacontract/lint/schema.py,sha256=php99jF3puwCWrE02JD1ownC5UFEKCZsLH89UGOxMww,1469
-datacontract/lint/urls.py,sha256=NHTbwadnm1VCxsiF7clfDvsxJ8-D3Mi5AzDj029E6wQ,1325
+datacontract/lint/urls.py,sha256=f8ZbAyPkqPLvxkf7bC9kFoFN9qbn687NI0FhKsdhkX4,2120
 datacontract/lint/linters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 datacontract/lint/linters/description_linter.py,sha256=Cm99NTWo75kDRIW63qz5uEa8xsug_8dFURPRC1twodw,1554
 datacontract/lint/linters/example_model_linter.py,sha256=EljSvzI4RSswdZrGa840bX6Zjat2GI16o5yDmKJfDOg,3972
@@ -88,9 +89,9 @@ datacontract/templates/partials/example.html,sha256=F1dWbHDIXQScgfs4OVgqM1lR4uV4
 datacontract/templates/partials/model_field.html,sha256=kh_ZIqJuayyxN-zDNIUPIoXOZeehGxXQxiImYB6G5qY,6946
 datacontract/templates/partials/server.html,sha256=WkWFbz1ZvhIAUQQhH5Lkwb0HZRW907ehEnFmJSkpquQ,6235
 datacontract/templates/style/output.css,sha256=F3oEhUpuv8kA_dWr4pJymBS_Ju6huIIZdLMkJzPzMmU,25647
-datacontract_cli-0.10.12.dist-info/LICENSE,sha256=23h64qnSeIZ0DKeziWAKC-zBCt328iSbRbWBrXoYRb4,2210
-datacontract_cli-0.10.12.dist-info/METADATA,sha256=Kg7oXLjfYrIU1uNuhJMh5VSlwOBYX9NukWQpznbcQow,88281
-datacontract_cli-0.10.12.dist-info/WHEEL,sha256=cVxcB9AmuTcXqmwrtPhNK88dr7IR_b6qagTj0UvIEbY,91
-datacontract_cli-0.10.12.dist-info/entry_points.txt,sha256=D3Eqy4q_Z6bHauGd4ppIyQglwbrm1AJnLau4Ppbw9Is,54
-datacontract_cli-0.10.12.dist-info/top_level.txt,sha256=VIRjd8EIUrBYWjEXJJjtdUgc0UAJdPZjmLiOR8BRBYM,13
-datacontract_cli-0.10.12.dist-info/RECORD,,
+datacontract_cli-0.10.13.dist-info/LICENSE,sha256=23h64qnSeIZ0DKeziWAKC-zBCt328iSbRbWBrXoYRb4,2210
+datacontract_cli-0.10.13.dist-info/METADATA,sha256=Y1xIanEO-C--EMNUf7sQuZjKtFXS3Up2cxJpz0EB6HE,89019
+datacontract_cli-0.10.13.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
+datacontract_cli-0.10.13.dist-info/entry_points.txt,sha256=D3Eqy4q_Z6bHauGd4ppIyQglwbrm1AJnLau4Ppbw9Is,54
+datacontract_cli-0.10.13.dist-info/top_level.txt,sha256=VIRjd8EIUrBYWjEXJJjtdUgc0UAJdPZjmLiOR8BRBYM,13
+datacontract_cli-0.10.13.dist-info/RECORD,,

{datacontract_cli-0.10.12.dist-info → datacontract_cli-0.10.13.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (74.1.2)
+Generator: setuptools (75.1.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

{datacontract_cli-0.10.12.dist-info → datacontract_cli-0.10.13.dist-info}/LICENSE RENAMED Viewed

File without changes

{datacontract_cli-0.10.12.dist-info → datacontract_cli-0.10.13.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{datacontract_cli-0.10.12.dist-info → datacontract_cli-0.10.13.dist-info}/top_level.txt RENAMED Viewed

File without changes

datacontract-cli 0.10.12__py3-none-any.whl → 0.10.13__py3-none-any.whl

Potentially problematic release.

datacontract-cli 0.10.12py3-none-any.whl → 0.10.13py3-none-any.whl