PyPI - datacontract-cli - Versions diffs - 0.10.31__py3-none-any.whl → 0.10.32__py3-none-any.whl - Mend

datacontract-cli 0.10.31py3-none-any.whl → 0.10.32py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of datacontract-cli might be problematic. Click here for more details.

Files changed (8) hide show

datacontract/imports/excel_importer.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import logging
 import os
+from decimal import Decimal
 from typing import Any, Dict, List, Optional
 import openpyxl
@@ -80,8 +81,16 @@ def import_excel_as_odcs(excel_file_path: str) -> OpenDataContractStandard:
         if tags_str:
             tags = [tag.strip() for tag in tags_str.split(",") if tag.strip()]
-        # Import other components
+        # Import quality data first (standalone from schemas)
+        quality_map = import_quality(workbook)
+        # Import schemas
         schemas = import_schemas(workbook)
+        # Attach quality to schemas and properties
+        schemas_with_quality = attach_quality_to_schemas(schemas, quality_map)
+        # Import other components
         support = import_support(workbook)
         team = import_team(workbook)
         roles = import_roles(workbook)
@@ -103,7 +112,7 @@ def import_excel_as_odcs(excel_file_path: str) -> OpenDataContractStandard:
             tenant=get_cell_value_by_name(workbook, "tenant"),
             description=description,
             tags=tags,
-            schema=schemas,
+            schema=schemas_with_quality,
             support=support,
             price=price,
             team=team,
@@ -150,7 +159,7 @@ def import_schemas(workbook) -> Optional[List[SchemaObject]]:
                 dataGranularityDescription=get_cell_value_by_name_in_sheet(sheet, "schema.dataGranularityDescription"),
                 authoritativeDefinitions=None,
                 properties=import_properties(sheet),
-                quality=None,
+                quality=None,  # Quality will be attached later
                 customProperties=None,
                 tags=None,
             )
@@ -230,16 +239,8 @@ def import_properties(sheet) -> Optional[List[SchemaProperty]]:
                     )
                 ]
-            # Quality
-            quality_type = get_cell_value(row, headers.get("quality type"))
-            quality_description = get_cell_value(row, headers.get("quality description"))
-            if quality_type and quality_description:
-                property_obj.quality = [
-                    DataQuality(
-                        type=quality_type,
-                        description=quality_description,
-                    )
-                ]
+            # Quality will be attached later via quality_map
+            property_obj.quality = None
             # Transform sources
             transform_sources = get_cell_value(row, headers.get("transform sources"))
@@ -853,3 +854,250 @@ def parse_property_value(value: str) -> Any:
     except (ValueError, TypeError, AttributeError):
         # If conversion fails, return original string
         return value
+def import_quality(workbook: Workbook) -> Dict[str, List[DataQuality]]:
+    """
+    Import quality data from Quality sheet and organize by schema.property key
+    Returns:
+        Dictionary mapping schema.property keys to lists of DataQuality objects
+    """
+    try:
+        quality_sheet = workbook["Quality"]
+        if not quality_sheet:
+            return {}
+    except KeyError:
+        logger.warning("Quality sheet not found")
+        return {}
+    try:
+        quality_range = get_range_by_name_in_workbook(workbook, "quality")
+        if not quality_range:
+            logger.warning("Quality range not found")
+            return {}
+        quality_header_row_index = quality_range[0] - 1
+        headers = get_headers_from_header_row(quality_sheet, quality_header_row_index)
+        quality_map = {}
+        for row_idx in range(quality_range[0], quality_range[1]):
+            if len(list(quality_sheet.rows)) < row_idx + 1:
+                break
+            row = list(quality_sheet.rows)[row_idx]
+            # Extract quality fields from row
+            schema_name = get_cell_value(row, headers.get("schema"))
+            property_name = get_cell_value(row, headers.get("property"))
+            quality_type = get_cell_value(row, headers.get("quality type"))
+            description = get_cell_value(row, headers.get("description"))
+            rule = get_cell_value(row, headers.get("rule (library)"))
+            query = get_cell_value(row, headers.get("query (sql)"))
+            engine = get_cell_value(row, headers.get("quality engine (custom)"))
+            implementation = get_cell_value(row, headers.get("implementation (custom)"))
+            severity = get_cell_value(row, headers.get("severity"))
+            scheduler = get_cell_value(row, headers.get("scheduler"))
+            schedule = get_cell_value(row, headers.get("schedule"))
+            threshold_operator = get_cell_value(row, headers.get("threshold operator"))
+            threshold_value = get_cell_value(row, headers.get("threshold value"))
+            # Skip if no schema name or insufficient quality data
+            if not schema_name or (not quality_type and not description and not rule):
+                continue
+            # Parse threshold values based on operator
+            threshold_dict = parse_threshold_values(threshold_operator, threshold_value)
+            # Create DataQuality object with parsed thresholds
+            quality = DataQuality(
+                name=None,
+                description=description,
+                type=quality_type,
+                rule=rule,
+                unit=None,
+                validValues=None,
+                query=query,
+                engine=engine,
+                implementation=implementation,
+                dimension=None,
+                method=None,
+                severity=severity,
+                businessImpact=None,
+                customProperties=None,
+                authoritativeDefinitions=None,
+                tags=None,
+                scheduler=scheduler,
+                schedule=schedule,
+                **threshold_dict,  # Unpack threshold values
+            )
+            # Create key for mapping - use schema.property format
+            key = schema_name if not property_name else f"{schema_name}.{property_name}"
+            if key not in quality_map:
+                quality_map[key] = []
+            quality_map[key].append(quality)
+    except Exception as e:
+        logger.warning(f"Error importing quality: {str(e)}")
+        return {}
+    return quality_map
+def parse_threshold_values(threshold_operator: str, threshold_value: str) -> Dict[str, Any]:
+    """
+    Parse threshold operator and value into DataQuality threshold fields
+    Args:
+        threshold_operator: The threshold operator (e.g., "mustBe", "mustBeBetween")
+        threshold_value: The threshold value (string representation)
+    Returns:
+        Dictionary with appropriate threshold fields set
+    """
+    threshold_dict = {}
+    if not threshold_operator or not threshold_value:
+        return threshold_dict
+    # Parse threshold values based on operator
+    if threshold_operator in ["mustBeBetween", "mustNotBeBetween"]:
+        # Parse "[value1, value2]" format
+        if threshold_value.startswith("[") and threshold_value.endswith("]"):
+            content = threshold_value[1:-1]  # Remove brackets
+            try:
+                values = [Decimal(v.strip()) for v in content.split(",") if v.strip()]
+                if len(values) >= 2:
+                    threshold_dict[threshold_operator] = values[:2]  # Take first two values
+            except (ValueError, TypeError) as e:
+                logger.warning(f"Failed to parse between values: {threshold_value}, error: {e}")
+    else:
+        # Single value for other operators
+        try:
+            # Try to parse as number
+            if threshold_value.replace(".", "").replace("-", "").isdigit():
+                value = Decimal(threshold_value)
+                threshold_dict[threshold_operator] = value
+        except (ValueError, TypeError) as e:
+            logger.warning(f"Failed to parse threshold value: {threshold_value}, error: {e}")
+    return threshold_dict
+def attach_quality_to_schemas(
+    schemas: Optional[List[SchemaObject]], quality_map: Dict[str, List[DataQuality]]
+) -> Optional[List[SchemaObject]]:
+    """
+    Attach quality attributes to schemas and their properties based on quality_map
+    Args:
+        schemas: List of schema objects
+        quality_map: Dictionary mapping schema.property keys to quality lists
+    Returns:
+        List of schema objects with quality attached
+    """
+    if not schemas:
+        return None
+    updated_schemas = []
+    for schema in schemas:
+        schema_name = schema.name
+        if not schema_name:
+            updated_schemas.append(schema)
+            continue
+        # Get schema-level quality attributes
+        schema_quality = quality_map.get(schema_name)
+        if schema_quality:
+            schema.quality = schema_quality
+        # Attach quality to properties
+        if schema.properties:
+            schema.properties = attach_quality_to_properties(schema.properties, schema_name, quality_map)
+        updated_schemas.append(schema)
+    return updated_schemas
+def attach_quality_to_properties(
+    properties: List[SchemaProperty], schema_name: str, quality_map: Dict[str, List[DataQuality]], prefix: str = ""
+) -> List[SchemaProperty]:
+    """
+    Recursively attach quality attributes to properties and nested properties
+    Args:
+        properties: List of property objects
+        schema_name: Name of the parent schema
+        quality_map: Dictionary mapping schema.property keys to quality lists
+        prefix: Current property path prefix for nested properties
+    Returns:
+        List of property objects with quality attached
+    """
+    updated_properties = []
+    for prop in properties:
+        property_name = prop.name
+        if not property_name:
+            updated_properties.append(prop)
+            continue
+        # Build full property path
+        full_property_name = f"{prefix}.{property_name}" if prefix else property_name
+        quality_key = f"{schema_name}.{full_property_name}"
+        # Get quality for this property
+        property_quality = quality_map.get(quality_key)
+        if property_quality:
+            prop.quality = property_quality
+        # Handle nested properties
+        if prop.properties:
+            prop.properties = attach_quality_to_properties(
+                prop.properties, schema_name, quality_map, full_property_name
+            )
+        # Handle array items
+        if prop.items:
+            items_quality_key = f"{schema_name}.{full_property_name}.items"
+            items_quality = quality_map.get(items_quality_key)
+            if items_quality:
+                prop.items.quality = items_quality
+            # Handle nested properties in array items
+            if prop.items.properties:
+                prop.items.properties = attach_quality_to_properties(
+                    prop.items.properties, schema_name, quality_map, f"{full_property_name}.items"
+                )
+        updated_properties.append(prop)
+    return updated_properties
+def get_headers_from_header_row(sheet: Worksheet, header_row_index: int) -> Dict[str, int]:
+    """
+    Get headers from the first row and map them to column indices
+    Args:
+        sheet: The worksheet
+        header_row_index: 0-based row index of the header row
+    Returns:
+        Dictionary mapping header names (lowercase) to column indices
+    """
+    headers = {}
+    try:
+        header_row = list(sheet.rows)[header_row_index]
+        for i, cell in enumerate(header_row):
+            if cell.value:
+                headers[str(cell.value).lower().strip()] = i
+    except (IndexError, AttributeError) as e:
+        logger.warning(f"Error getting headers from row {header_row_index}: {e}")
+    return headers

datacontract/output/junit_test_results.py CHANGED Viewed

@@ -56,19 +56,19 @@ def write_junit_test_results(run: Run, console, output_path: Path):
                 type=check.category if check.category else "General",
             )
             error.text = to_failure_text(check)
-        elif check.result is ResultEnum.warning:
+        elif check.result == ResultEnum.warning:
             skipped = ET.SubElement(
                 testcase,
                 "skipped",
                 message=check.reason if check.reason else "Warning",
                 type=check.category if check.category else "General",
             )
-            skipped.skipped = to_failure_text(check)
+            skipped.text = to_failure_text(check)
         else:
             ET.SubElement(
                 testcase,
                 "skipped",
-                message=check.reason if check.reason else "None",
+                message=check.reason if check.reason else "Skipped",
                 type=check.category if check.category else "General",
             )

{datacontract_cli-0.10.31.dist-info → datacontract_cli-0.10.32.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: datacontract-cli
-Version: 0.10.31
+Version: 0.10.32
 Summary: The datacontract CLI is an open source command-line tool for working with Data Contracts. It uses data contract YAML files to lint the data contract, connect to data sources and execute schema and quality tests, detect breaking changes, and export to different formats. The tool is written in Python. It can be used as a standalone CLI tool, in a CI/CD pipeline, or directly as a Python library.
 Author-email: Jochen Christ <jochen.christ@innoq.com>, Stefan Negele <stefan.negele@innoq.com>, Simon Harrer <simon.harrer@innoq.com>
 License-Expression: MIT
@@ -42,7 +42,7 @@ Provides-Extra: databricks
 Requires-Dist: soda-core-spark-df<3.6.0,>=3.3.20; extra == "databricks"
 Requires-Dist: soda-core-spark[databricks]<3.6.0,>=3.3.20; extra == "databricks"
 Requires-Dist: databricks-sql-connector<4.1.0,>=3.7.0; extra == "databricks"
-Requires-Dist: databricks-sdk<0.59.0; extra == "databricks"
+Requires-Dist: databricks-sdk<0.60.0; extra == "databricks"
 Requires-Dist: pyspark<4.0.0,>=3.5.5; extra == "databricks"
 Provides-Extra: iceberg
 Requires-Dist: pyiceberg==0.9.1; extra == "iceberg"

{datacontract_cli-0.10.31.dist-info → datacontract_cli-0.10.32.dist-info}/RECORD RENAMED Viewed

@@ -59,7 +59,7 @@ datacontract/imports/bigquery_importer.py,sha256=7TcP9FDsIas5LwJZ-HrOPXZ-NuR056s
 datacontract/imports/csv_importer.py,sha256=mBsmyTvfB8q64Z3NYqv4zTDUOvoXG896hZvp3oLt5YM,5330
 datacontract/imports/dbml_importer.py,sha256=o0IOgvXN34lU1FICDHm_QUTv0DKsgwbHPHUDxQhIapE,3872
 datacontract/imports/dbt_importer.py,sha256=hQwqD9vbvwLLc6Yj3tQbar5ldI0pV-ynSiz7CZZ0JCc,8290
-datacontract/imports/excel_importer.py,sha256=TsyGlF4LE3ABps9UJt7ozwEXwHLgmOhqyJhh6l--xk0,37116
+datacontract/imports/excel_importer.py,sha256=C9aETQhzWjzFtVWMi2pD-G1cVKgPwRJT_puyEgvkbVA,46110
 datacontract/imports/glue_importer.py,sha256=fiJPkvfwOCsaKKCGW19-JM5CCGXZ2mkNrVtUzp2iw6g,8370
 datacontract/imports/iceberg_importer.py,sha256=vadGJVqQKgG-j8swUytZALFB8QjbGRqZPCcPcCy0vco,5923
 datacontract/imports/importer.py,sha256=NRhR_9AWPWDNq2ac_DVUHGoJuvkVpwwaao8nDfJG_l0,1257
@@ -92,7 +92,7 @@ datacontract/model/odcs.py,sha256=9PXwm72FASjNwteF1Jn591iP3-St0aq16Cpsk0PkEW8,38
 datacontract/model/run.py,sha256=4UdEUaJl5RxEpN9S3swSu1vGJUVyNhOpRkdfbBZhh90,3146
 datacontract/model/data_contract_specification/__init__.py,sha256=lO7ywraknlDwJNUaSd2B9FWFsWhE8v5S-kob_shW_lg,47
 datacontract/output/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-datacontract/output/junit_test_results.py,sha256=hAT7UgxwbfNzA0f5uuGzM4OIwLfXbmA4dgsfxsclWh0,4822
+datacontract/output/junit_test_results.py,sha256=ZjevRMTxNSiR0HMr3bEvqv4olozPw2zEutbuLloInww,4822
 datacontract/output/output_format.py,sha256=_ZokDBo7-HXBs6czUv7kLLf9cYft_q5QaKzthsVnc58,212
 datacontract/output/test_results_writer.py,sha256=PWNLs3R_LQMH4xp5WDxLkQgY3xvj8Eyzw1jnfgkQxlc,2713
 datacontract/schemas/datacontract-1.1.0.init.yaml,sha256=RXRqtMiRsm1oqxXrKAzqQ_eScIr9D8td8-NWWmpYvM0,1828
@@ -112,9 +112,9 @@ datacontract/templates/partials/model_field.html,sha256=2YBF95ypNCPFYuYKoeilRnDG
 datacontract/templates/partials/quality.html,sha256=ynEDWRn8I90Uje-xhGYgFcfwOgKI1R-CDki-EvTsauQ,1785
 datacontract/templates/partials/server.html,sha256=dHFJtonMjhiUHtT69RUgTpkoRwmNdTRzkCdH0LtGg_4,6279
 datacontract/templates/style/output.css,sha256=ioIo1f96VW7LHhDifj6QI8QbRChJl-LlQ59EwM8MEmA,28692
-datacontract_cli-0.10.31.dist-info/licenses/LICENSE,sha256=23h64qnSeIZ0DKeziWAKC-zBCt328iSbRbWBrXoYRb4,2210
-datacontract_cli-0.10.31.dist-info/METADATA,sha256=T5pyKhT8TWagt86un_b7JSYYN2BLtpLKTvK5kRpOCeg,110144
-datacontract_cli-0.10.31.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-datacontract_cli-0.10.31.dist-info/entry_points.txt,sha256=D3Eqy4q_Z6bHauGd4ppIyQglwbrm1AJnLau4Ppbw9Is,54
-datacontract_cli-0.10.31.dist-info/top_level.txt,sha256=VIRjd8EIUrBYWjEXJJjtdUgc0UAJdPZjmLiOR8BRBYM,13
-datacontract_cli-0.10.31.dist-info/RECORD,,
+datacontract_cli-0.10.32.dist-info/licenses/LICENSE,sha256=23h64qnSeIZ0DKeziWAKC-zBCt328iSbRbWBrXoYRb4,2210
+datacontract_cli-0.10.32.dist-info/METADATA,sha256=jL4AHRxnL24naxZ0nhBwzXOWo5iz00nljbV07g1Dl50,110144
+datacontract_cli-0.10.32.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+datacontract_cli-0.10.32.dist-info/entry_points.txt,sha256=D3Eqy4q_Z6bHauGd4ppIyQglwbrm1AJnLau4Ppbw9Is,54
+datacontract_cli-0.10.32.dist-info/top_level.txt,sha256=VIRjd8EIUrBYWjEXJJjtdUgc0UAJdPZjmLiOR8BRBYM,13
+datacontract_cli-0.10.32.dist-info/RECORD,,

{datacontract_cli-0.10.31.dist-info → datacontract_cli-0.10.32.dist-info}/WHEEL RENAMED Viewed

File without changes

{datacontract_cli-0.10.31.dist-info → datacontract_cli-0.10.32.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{datacontract_cli-0.10.31.dist-info → datacontract_cli-0.10.32.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{datacontract_cli-0.10.31.dist-info → datacontract_cli-0.10.32.dist-info}/top_level.txt RENAMED Viewed

File without changes

datacontract-cli 0.10.31__py3-none-any.whl → 0.10.32__py3-none-any.whl

Potentially problematic release.

datacontract-cli 0.10.31py3-none-any.whl → 0.10.32py3-none-any.whl