PyPI - datacontract-cli - Versions diffs - 0.10.23__py3-none-any.whl → 0.10.37__py3-none-any.whl - Mend

datacontract-cli 0.10.23py3-none-any.whl → 0.10.37py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (80) hide show

datacontract/__init__.py +13 -0
datacontract/api.py +12 -5
datacontract/catalog/catalog.py +5 -3
datacontract/cli.py +116 -10
datacontract/data_contract.py +143 -65
datacontract/engines/data_contract_checks.py +366 -60
datacontract/engines/data_contract_test.py +50 -4
datacontract/engines/fastjsonschema/check_jsonschema.py +37 -19
datacontract/engines/fastjsonschema/s3/s3_read_files.py +3 -2
datacontract/engines/soda/check_soda_execute.py +22 -3
datacontract/engines/soda/connections/athena.py +79 -0
datacontract/engines/soda/connections/duckdb_connection.py +65 -6
datacontract/engines/soda/connections/kafka.py +4 -2
datacontract/export/avro_converter.py +20 -3
datacontract/export/bigquery_converter.py +1 -1
datacontract/export/dbt_converter.py +36 -7
datacontract/export/dqx_converter.py +126 -0
datacontract/export/duckdb_type_converter.py +57 -0
datacontract/export/excel_exporter.py +923 -0
datacontract/export/exporter.py +3 -0
datacontract/export/exporter_factory.py +17 -1
datacontract/export/great_expectations_converter.py +55 -5
datacontract/export/{html_export.py → html_exporter.py} +31 -20
datacontract/export/markdown_converter.py +134 -5
datacontract/export/mermaid_exporter.py +110 -0
datacontract/export/odcs_v3_exporter.py +187 -145
datacontract/export/protobuf_converter.py +163 -69
datacontract/export/rdf_converter.py +2 -2
datacontract/export/sodacl_converter.py +9 -1
datacontract/export/spark_converter.py +31 -4
datacontract/export/sql_converter.py +6 -2
datacontract/export/sql_type_converter.py +20 -8
datacontract/imports/avro_importer.py +63 -12
datacontract/imports/csv_importer.py +111 -57
datacontract/imports/excel_importer.py +1111 -0
datacontract/imports/importer.py +16 -3
datacontract/imports/importer_factory.py +17 -0
datacontract/imports/json_importer.py +325 -0
datacontract/imports/odcs_importer.py +2 -2
datacontract/imports/odcs_v3_importer.py +351 -151
datacontract/imports/protobuf_importer.py +264 -0
datacontract/imports/spark_importer.py +117 -13
datacontract/imports/sql_importer.py +32 -16
datacontract/imports/unity_importer.py +84 -38
datacontract/init/init_template.py +1 -1
datacontract/integration/datamesh_manager.py +16 -2
datacontract/lint/resolve.py +112 -23
datacontract/lint/schema.py +24 -15
datacontract/model/data_contract_specification/__init__.py +1 -0
datacontract/model/odcs.py +13 -0
datacontract/model/run.py +3 -0
datacontract/output/junit_test_results.py +3 -3
datacontract/schemas/datacontract-1.1.0.init.yaml +1 -1
datacontract/schemas/datacontract-1.2.0.init.yaml +91 -0
datacontract/schemas/datacontract-1.2.0.schema.json +2029 -0
datacontract/schemas/datacontract-1.2.1.init.yaml +91 -0
datacontract/schemas/datacontract-1.2.1.schema.json +2058 -0
datacontract/schemas/odcs-3.0.2.schema.json +2382 -0
datacontract/templates/datacontract.html +54 -3
datacontract/templates/datacontract_odcs.html +685 -0
datacontract/templates/index.html +5 -2
datacontract/templates/partials/server.html +2 -0
datacontract/templates/style/output.css +319 -145
{datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.37.dist-info}/METADATA +656 -431
datacontract_cli-0.10.37.dist-info/RECORD +119 -0
{datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.37.dist-info}/WHEEL +1 -1
{datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.37.dist-info/licenses}/LICENSE +1 -1
datacontract/export/csv_type_converter.py +0 -36
datacontract/lint/lint.py +0 -142
datacontract/lint/linters/description_linter.py +0 -35
datacontract/lint/linters/field_pattern_linter.py +0 -34
datacontract/lint/linters/field_reference_linter.py +0 -48
datacontract/lint/linters/notice_period_linter.py +0 -55
datacontract/lint/linters/quality_schema_linter.py +0 -52
datacontract/lint/linters/valid_constraints_linter.py +0 -100
datacontract/model/data_contract_specification.py +0 -327
datacontract_cli-0.10.23.dist-info/RECORD +0 -113
/datacontract/{lint/linters → output}/__init__.py +0 -0
{datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.37.dist-info}/entry_points.txt +0 -0
{datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.37.dist-info}/top_level.txt +0 -0

datacontract/export/protobuf_converter.py CHANGED Viewed

@@ -4,102 +4,196 @@ from datacontract.model.data_contract_specification import DataContractSpecifica
 class ProtoBufExporter(Exporter):
     def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
-        return to_protobuf(data_contract)
+        # Returns a dict containing the protobuf representation.
+        proto = to_protobuf(data_contract)
+        return {"protobuf": proto}
-def to_protobuf(data_contract_spec: DataContractSpecification):
+def to_protobuf(data_contract_spec: DataContractSpecification) -> str:
+    """
+    Generates a Protobuf file from the data contract specification.
+    Scans all models for enum fields (even if the type is "string") by checking for a "values" property.
+    """
     messages = ""
+    enum_definitions = {}
+    # Iterate over all models to generate messages and collect enum definitions.
     for model_name, model in data_contract_spec.models.items():
-        messages += to_protobuf_message(model_name, model.fields, model.description, 0)
+        for field_name, field in model.fields.items():
+            # If the field has enum values, collect them.
+            if _is_enum_field(field):
+                enum_name = _get_enum_name(field, field_name)
+                enum_values = _get_enum_values(field)
+                if enum_values and enum_name not in enum_definitions:
+                    enum_definitions[enum_name] = enum_values
+        messages += to_protobuf_message(model_name, model.fields, getattr(model, "description", ""), 0)
         messages += "\n"
-    result = f"""syntax = "proto3";
-{messages}
-"""
-    return result
-def _to_protobuf_message_name(model_name):
-    return model_name[0].upper() + model_name[1:]
-def to_protobuf_message(model_name, fields, description, indent_level: int = 0):
+    # Build header with syntax and package declarations.
+    header = 'syntax = "proto3";\n\n'
+    package = getattr(data_contract_spec, "package", "example")
+    header += f"package {package};\n\n"
+    # Append enum definitions.
+    for enum_name, enum_values in enum_definitions.items():
+        header += f"// Enum for {enum_name}\n"
+        header += f"enum {enum_name} {{\n"
+        # Only iterate if enum_values is a dictionary.
+        if isinstance(enum_values, dict):
+            for enum_const, value in sorted(enum_values.items(), key=lambda item: item[1]):
+                normalized_const = enum_const.upper().replace(" ", "_")
+                header += f"  {normalized_const} = {value};\n"
+        else:
+            header += f"  // Warning: Enum values for {enum_name} are not a dictionary\n"
+        header += "}\n\n"
+    return header + messages
+def _is_enum_field(field) -> bool:
+    """
+    Returns True if the field (dict or object) has a non-empty "values" property.
+    """
+    if isinstance(field, dict):
+        return bool(field.get("values"))
+    return bool(getattr(field, "values", None))
+def _get_enum_name(field, field_name: str) -> str:
+    """
+    Returns the enum name either from the field's "enum_name" or derived from the field name.
+    """
+    if isinstance(field, dict):
+        return field.get("enum_name", _to_protobuf_message_name(field_name))
+    return getattr(field, "enum_name", None) or _to_protobuf_message_name(field_name)
+def _get_enum_values(field) -> dict:
+    """
+    Returns the enum values from the field.
+    If the values are not a dictionary, attempts to extract enum attributes.
+    """
+    if isinstance(field, dict):
+        values = field.get("values", {})
+    else:
+        values = getattr(field, "values", {})
+    if not isinstance(values, dict):
+        # If values is a BaseModel (or similar) with a .dict() method, use it.
+        if hasattr(values, "dict") and callable(values.dict):
+            values_dict = values.dict()
+            return {k: v for k, v in values_dict.items() if k.isupper() and isinstance(v, int)}
+        else:
+            # Otherwise, iterate over attributes that look like enums.
+            return {
+                key: getattr(values, key)
+                for key in dir(values)
+                if key.isupper() and isinstance(getattr(values, key), int)
+            }
+    return values
+def _to_protobuf_message_name(name: str) -> str:
+    """
+    Returns a valid Protobuf message/enum name by capitalizing the first letter.
+    """
+    return name[0].upper() + name[1:] if name else name
+def to_protobuf_message(model_name: str, fields: dict, description: str, indent_level: int = 0) -> str:
+    """
+    Generates a Protobuf message definition from the model's fields.
+    Handles nested messages for complex types.
+    """
     result = ""
+    if description:
+        result += f"{indent(indent_level)}// {description}\n"
-    if description is not None:
-        result += f"""{indent(indent_level)}/* {description} */\n"""
-    fields_protobuf = ""
+    result += f"message {_to_protobuf_message_name(model_name)} {{\n"
     number = 1
     for field_name, field in fields.items():
-        if field.type in ["object", "record", "struct"]:
-            fields_protobuf += (
-                "\n".join(
-                    map(
-                        lambda x: "  " + x,
-                        to_protobuf_message(field_name, field.fields, field.description, indent_level + 1).splitlines(),
-                    )
-                )
-                + "\n"
-            )
-        fields_protobuf += to_protobuf_field(field_name, field, field.description, number, 1) + "\n"
+        # For nested objects, generate a nested message.
+        field_type = _get_field_type(field)
+        if field_type in ["object", "record", "struct"]:
+            nested_desc = field.get("description", "") if isinstance(field, dict) else getattr(field, "description", "")
+            nested_fields = field.get("fields", {}) if isinstance(field, dict) else field.fields
+            nested_message = to_protobuf_message(field_name, nested_fields, nested_desc, indent_level + 1)
+            result += nested_message + "\n"
+        field_desc = field.get("description", "") if isinstance(field, dict) else getattr(field, "description", "")
+        result += to_protobuf_field(field_name, field, field_desc, number, indent_level + 1) + "\n"
         number += 1
-    result += f"message {_to_protobuf_message_name(model_name)} {{\n{fields_protobuf}}}\n"
+    result += f"{indent(indent_level)}}}\n"
     return result
-def to_protobuf_field(field_name, field, description, number: int, indent_level: int = 0):
-    optional = ""
-    if not field.required:
-        optional = "optional "
+def to_protobuf_field(field_name: str, field, description: str, number: int, indent_level: int = 0) -> str:
+    """
+    Generates a field definition within a Protobuf message.
+    """
     result = ""
-    if description is not None:
-        result += f"""{indent(indent_level)}/* {description} */\n"""
-    result += f"{indent(indent_level)}{optional}{_convert_type(field_name, field)} {field_name} = {number};"
+    if description:
+        result += f"{indent(indent_level)}// {description}\n"
+    result += f"{indent(indent_level)}{_convert_type(field_name, field)} {field_name} = {number};"
     return result
-def indent(indent_level):
+def indent(indent_level: int) -> str:
     return "  " * indent_level
-def _convert_type(field_name, field) -> None | str:
-    type = field.type
-    if type is None:
-        return None
-    if type.lower() in ["string", "varchar", "text"]:
-        return "string"
-    if type.lower() in ["timestamp", "timestamp_tz"]:
-        return "string"
-    if type.lower() in ["timestamp_ntz"]:
-        return "string"
-    if type.lower() in ["date"]:
+def _get_field_type(field) -> str:
+    """
+    Retrieves the field type from the field definition.
+    """
+    if isinstance(field, dict):
+        return field.get("type", "").lower()
+    return getattr(field, "type", "").lower()
+def _convert_type(field_name: str, field) -> str:
+    """
+    Converts a field's type (from the data contract) to a Protobuf type.
+    Prioritizes enum conversion if a non-empty "values" property exists.
+    """
+    # For debugging purposes
+    print("Converting field:", field_name)
+    # If the field should be treated as an enum, return its enum name.
+    if _is_enum_field(field):
+        return _get_enum_name(field, field_name)
+    lower_type = _get_field_type(field)
+    if lower_type in ["string", "varchar", "text"]:
         return "string"
-    if type.lower() in ["time"]:
+    if lower_type in ["timestamp", "timestamp_tz", "timestamp_ntz", "date", "time"]:
         return "string"
-    if type.lower() in ["number", "decimal", "numeric"]:
+    if lower_type in ["number", "decimal", "numeric"]:
         return "double"
-    if type.lower() in ["float", "double"]:
-        return type.lower()
-    if type.lower() in ["integer", "int"]:
+    if lower_type in ["float", "double"]:
+        return lower_type
+    if lower_type in ["integer", "int"]:
         return "int32"
-    if type.lower() in ["long", "bigint"]:
+    if lower_type in ["long", "bigint"]:
         return "int64"
-    if type.lower() in ["boolean"]:
+    if lower_type in ["boolean"]:
         return "bool"
-    if type.lower() in ["bytes"]:
+    if lower_type in ["bytes"]:
         return "bytes"
-    if type.lower() in ["object", "record", "struct"]:
+    if lower_type in ["object", "record", "struct"]:
         return _to_protobuf_message_name(field_name)
-    if type.lower() in ["array"]:
-        # TODO spec is missing arrays
-        return "repeated string"
-    return None
+    if lower_type == "array":
+        # Handle array types. Check for an "items" property.
+        items = field.get("items") if isinstance(field, dict) else getattr(field, "items", None)
+        if items and isinstance(items, dict) and items.get("type"):
+            item_type = items.get("type", "").lower()
+            if item_type in ["object", "record", "struct"]:
+                # Singularize the field name (a simple approach).
+                singular = field_name[:-1] if field_name.endswith("s") else field_name
+                return "repeated " + _to_protobuf_message_name(singular)
+            else:
+                return "repeated " + _convert_type(field_name, items)
+        else:
+            return "repeated string"
+    # Fallback for unrecognized types.
+    return "string"

datacontract/export/rdf_converter.py CHANGED Viewed

@@ -57,8 +57,8 @@ def to_rdf(data_contract_spec: DataContractSpecification, base) -> Graph:
     else:
         g = Graph(base=Namespace(""))
-    dc = Namespace("https://datacontract.com/DataContractSpecification/1.1.0/")
-    dcx = Namespace("https://datacontract.com/DataContractSpecification/1.1.0/Extension/")
+    dc = Namespace("https://datacontract.com/DataContractSpecification/1.2.1/")
+    dcx = Namespace("https://datacontract.com/DataContractSpecification/1.2.1/Extension/")
     g.bind("dc", dc)
     g.bind("dcx", dcx)

datacontract/export/sodacl_converter.py CHANGED Viewed

@@ -2,12 +2,14 @@ import yaml
 from datacontract.engines.data_contract_checks import create_checks
 from datacontract.export.exporter import Exporter
+from datacontract.model.data_contract_specification import DataContractSpecification, Server
 from datacontract.model.run import Run
 class SodaExporter(Exporter):
-    def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
+    def export(self, data_contract, model, server, sql_server_type, export_args) -> str:
         run = Run.create_run()
+        server = get_server(data_contract, server)
         run.checks.extend(create_checks(data_contract, server))
         return to_sodacl_yaml(run)
@@ -28,3 +30,9 @@ def to_sodacl_yaml(run: Run) -> str:
             else:
                 sodacl_dict[key] = value
     return yaml.dump(sodacl_dict)
+def get_server(data_contract_specification: DataContractSpecification, server_name: str = None) -> Server | None:
+    if server_name is None:
+        return None
+    return data_contract_specification.servers.get(server_name)

datacontract/export/spark_converter.py CHANGED Viewed

@@ -1,3 +1,5 @@
+import json
 from pyspark.sql import types
 from datacontract.export.exporter import Exporter
@@ -104,7 +106,8 @@ def to_struct_field(field: Field, field_name: str) -> types.StructField:
         types.StructField: The corresponding Spark StructField.
     """
     data_type = to_spark_data_type(field)
-    return types.StructField(name=field_name, dataType=data_type, nullable=not field.required)
+    metadata = to_spark_metadata(field)
+    return types.StructField(name=field_name, dataType=data_type, nullable=not field.required, metadata=metadata)
 def to_spark_data_type(field: Field) -> types.DataType:
@@ -126,6 +129,8 @@ def to_spark_data_type(field: Field) -> types.DataType:
         return types.StructType(to_struct_type(field.fields))
     if field_type == "map":
         return types.MapType(to_spark_data_type(field.keys), to_spark_data_type(field.values))
+    if field_type == "variant":
+        return types.VariantType()
     if field_type in ["string", "varchar", "text"]:
         return types.StringType()
     if field_type in ["number", "decimal", "numeric"]:
@@ -150,7 +155,25 @@ def to_spark_data_type(field: Field) -> types.DataType:
         return types.DateType()
     if field_type == "bytes":
         return types.BinaryType()
-    return types.BinaryType()
+    return types.StringType()  # default if no condition is met
+def to_spark_metadata(field: Field) -> dict[str, str]:
+    """
+    Convert a field to a Spark metadata dictonary.
+    Args:
+        field (Field): The field to convert.
+    Returns:
+        dict: dictionary that can be supplied to Spark as metadata for a StructField
+    """
+    metadata = {}
+    if field.description:
+        metadata["comment"] = field.description
+    return metadata
 def print_schema(dtype: types.DataType) -> str:
@@ -175,7 +198,7 @@ def print_schema(dtype: types.DataType) -> str:
         Returns:
             str: The indented text.
         """
-        return "\n".join([f'{"    " * level}{line}' for line in text.split("\n")])
+        return "\n".join([f"{'    ' * level}{line}" for line in text.split("\n")])
     def repr_column(column: types.StructField) -> str:
         """
@@ -190,7 +213,11 @@ def print_schema(dtype: types.DataType) -> str:
         name = f'"{column.name}"'
         data_type = indent(print_schema(column.dataType), 1)
         nullable = indent(f"{column.nullable}", 1)
-        return f"StructField({name},\n{data_type},\n{nullable}\n)"
+        if column.metadata:
+            metadata = indent(f"{json.dumps(column.metadata)}", 1)
+            return f"StructField({name},\n{data_type},\n{nullable},\n{metadata}\n)"
+        else:
+            return f"StructField({name},\n{data_type},\n{nullable}\n)"
     def format_struct_type(struct_type: types.StructType) -> str:
         """

datacontract/export/sql_converter.py CHANGED Viewed

@@ -4,7 +4,7 @@ from datacontract.model.data_contract_specification import DataContractSpecifica
 class SqlExporter(Exporter):
-    def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
+    def export(self, data_contract, model, server, sql_server_type, export_args) -> str:
         server_type = _determine_sql_server_type(
             data_contract,
             sql_server_type,
@@ -13,7 +13,7 @@ class SqlExporter(Exporter):
 class SqlQueryExporter(Exporter):
-    def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
+    def export(self, data_contract, model, server, sql_server_type, export_args) -> str:
         model_name, model_value = _check_models_for_export(data_contract, model, self.export_format)
         server_type = _determine_sql_server_type(data_contract, sql_server_type, export_args.get("server"))
         return to_sql_query(
@@ -117,6 +117,8 @@ def _to_sql_table(model_name, model, server_type="snowflake"):
             result += " primary key"
         if server_type == "databricks" and field.description is not None:
             result += f' COMMENT "{_escape(field.description)}"'
+        if server_type == "snowflake" and field.description is not None:
+            result += f" COMMENT '{_escape(field.description)}'"
         if current_field_index < fields:
             result += ","
         result += "\n"
@@ -124,6 +126,8 @@ def _to_sql_table(model_name, model, server_type="snowflake"):
     result += ")"
     if server_type == "databricks" and model.description is not None:
         result += f' COMMENT "{_escape(model.description)}"'
+    if server_type == "snowflake" and model.description is not None:
+        result += f" COMMENT='{_escape(model.description)}'"
     result += ";\n"
     return result

datacontract/export/sql_type_converter.py CHANGED Viewed

@@ -3,6 +3,9 @@ from datacontract.model.data_contract_specification import Field
 def convert_to_sql_type(field: Field, server_type: str) -> str:
+    if field.config and "physicalType" in field.config:
+        return field.config["physicalType"]
     if server_type == "snowflake":
         return convert_to_snowflake(field)
     elif server_type == "postgres":
@@ -19,6 +22,7 @@ def convert_to_sql_type(field: Field, server_type: str) -> str:
         return convert_type_to_bigquery(field)
     elif server_type == "trino":
         return convert_type_to_trino(field)
     return field.type
@@ -129,8 +133,9 @@ def convert_to_dataframe(field: Field) -> None | str:
     if type.lower() in ["time"]:
         return "STRING"
     if type.lower() in ["number", "decimal", "numeric"]:
-        # precision and scale not supported by data contract
-        return "DECIMAL"
+        precision = field.precision if field.precision is not None else 38
+        scale = field.scale if field.scale is not None else 0
+        return f"DECIMAL({precision},{scale})"
     if type.lower() in ["float"]:
         return "FLOAT"
     if type.lower() in ["double"]:
@@ -158,9 +163,13 @@ def convert_to_dataframe(field: Field) -> None | str:
 # databricks data types:
 # https://docs.databricks.com/en/sql/language-manual/sql-ref-datatypes.html
 def convert_to_databricks(field: Field) -> None | str:
-    if field.config and "databricksType" in field.config:
-        return field.config["databricksType"]
     type = field.type
+    if (
+        field.config
+        and "databricksType" in field.config
+        and type.lower() not in ["array", "object", "record", "struct"]
+    ):
+        return field.config["databricksType"]
     if type is None:
         return None
     if type.lower() in ["string", "varchar", "text"]:
@@ -174,8 +183,9 @@ def convert_to_databricks(field: Field) -> None | str:
     if type.lower() in ["time"]:
         return "STRING"
     if type.lower() in ["number", "decimal", "numeric"]:
-        # precision and scale not supported by data contract
-        return "DECIMAL"
+        precision = field.precision if field.precision is not None else 38
+        scale = field.scale if field.scale is not None else 0
+        return f"DECIMAL({precision},{scale})"
     if type.lower() in ["float"]:
         return "FLOAT"
     if type.lower() in ["double"]:
@@ -190,13 +200,15 @@ def convert_to_databricks(field: Field) -> None | str:
         nested_fields = []
         for nested_field_name, nested_field in field.fields.items():
             nested_field_type = convert_to_databricks(nested_field)
-            nested_fields.append(f"{nested_field_name} {nested_field_type}")
-        return f"STRUCT<{', '.join(nested_fields)}>"
+            nested_fields.append(f"{nested_field_name}:{nested_field_type}")
+        return f"STRUCT<{','.join(nested_fields)}>"
     if type.lower() in ["bytes"]:
         return "BINARY"
     if type.lower() in ["array"]:
         item_type = convert_to_databricks(field.items)
         return f"ARRAY<{item_type}>"
+    if type.lower() in ["variant"]:
+        return "VARIANT"
     return None

datacontract/imports/avro_importer.py CHANGED Viewed

@@ -55,7 +55,6 @@ def import_avro(data_contract_specification: DataContractSpecification, source:
             engine="datacontract",
             original_exception=e,
         )
     # type record is being used for both the table and the object types in data contract
     # -> CONSTRAINT: one table per .avsc input, all nested records are interpreted as objects
     fields = import_record_fields(avro_schema.fields)
@@ -92,6 +91,20 @@ def handle_config_avro_custom_properties(field: avro.schema.Field, imported_fiel
         imported_field.config["avroDefault"] = field.default
+LOGICAL_TYPE_MAPPING = {
+    "decimal": "decimal",
+    "date": "date",
+    "time-millis": "time",
+    "time-micros": "time",
+    "timestamp-millis": "timestamp_tz",
+    "timestamp-micros": "timestamp_tz",
+    "local-timestamp-micros": "timestamp_ntz",
+    "local-timestamp-millis": "timestamp_ntz",
+    "duration": "string",
+    "uuid": "string",
+}
 def import_record_fields(record_fields: List[avro.schema.Field]) -> Dict[str, Field]:
     """
     Import Avro record fields and convert them to data contract fields.
@@ -117,13 +130,23 @@ def import_record_fields(record_fields: List[avro.schema.Field]) -> Dict[str, Fi
             imported_field.fields = import_record_fields(field.type.fields)
         elif field.type.type == "union":
             imported_field.required = False
-            type = import_type_of_optional_field(field)
-            imported_field.type = type
-            if type == "record":
-                imported_field.fields = import_record_fields(get_record_from_union_field(field).fields)
-            elif type == "array":
-                imported_field.type = "array"
-                imported_field.items = import_avro_array_items(get_array_from_union_field(field))
+            # Check for enum in union first, since it needs special handling
+            enum_schema = get_enum_from_union_field(field)
+            if enum_schema:
+                imported_field.type = "string"
+                imported_field.enum = enum_schema.symbols
+                imported_field.title = enum_schema.name
+                if not imported_field.config:
+                    imported_field.config = {}
+                imported_field.config["avroType"] = "enum"
+            else:
+                type = import_type_of_optional_field(field)
+                imported_field.type = type
+                if type == "record":
+                    imported_field.fields = import_record_fields(get_record_from_union_field(field).fields)
+                elif type == "array":
+                    imported_field.type = "array"
+                    imported_field.items = import_avro_array_items(get_array_from_union_field(field))
         elif field.type.type == "array":
             imported_field.type = "array"
             imported_field.items = import_avro_array_items(field.type)
@@ -137,9 +160,15 @@ def import_record_fields(record_fields: List[avro.schema.Field]) -> Dict[str, Fi
             if not imported_field.config:
                 imported_field.config = {}
             imported_field.config["avroType"] = "enum"
-        else:  # primitive type
-            imported_field.type = map_type_from_avro(field.type.type)
+        else:
+            logical_type = field.type.get_prop("logicalType")
+            if logical_type in LOGICAL_TYPE_MAPPING:
+                imported_field.type = LOGICAL_TYPE_MAPPING[logical_type]
+                if logical_type == "decimal":
+                    imported_field.precision = field.type.precision
+                    imported_field.scale = field.type.scale
+            else:
+                imported_field.type = map_type_from_avro(field.type.type)
         imported_fields[field.name] = imported_field
     return imported_fields
@@ -212,7 +241,11 @@ def import_type_of_optional_field(field: avro.schema.Field) -> str:
     """
     for field_type in field.type.schemas:
         if field_type.type != "null":
-            return map_type_from_avro(field_type.type)
+            logical_type = field_type.get_prop("logicalType")
+            if logical_type and logical_type in LOGICAL_TYPE_MAPPING:
+                return LOGICAL_TYPE_MAPPING[logical_type]
+            else:
+                return map_type_from_avro(field_type.type)
     raise DataContractException(
         type="schema",
         result="failed",
@@ -254,6 +287,22 @@ def get_array_from_union_field(field: avro.schema.Field) -> avro.schema.ArraySch
     return None
+def get_enum_from_union_field(field: avro.schema.Field) -> avro.schema.EnumSchema | None:
+    """
+    Get the enum schema from a union field.
+    Args:
+        field: The Avro field with a union type.
+    Returns:
+        The enum schema if found, None otherwise.
+    """
+    for field_type in field.type.schemas:
+        if field_type.type == "enum":
+            return field_type
+    return None
 def map_type_from_avro(avro_type_str: str) -> str:
     """
     Map Avro type strings to data contract type strings.
@@ -276,6 +325,8 @@ def map_type_from_avro(avro_type_str: str) -> str:
         return "binary"
     elif avro_type_str == "double":
         return "double"
+    elif avro_type_str == "float":
+        return "float"
     elif avro_type_str == "int":
         return "int"
     elif avro_type_str == "long":

datacontract-cli 0.10.23__py3-none-any.whl → 0.10.37__py3-none-any.whl

datacontract-cli 0.10.23py3-none-any.whl → 0.10.37py3-none-any.whl