PyPI - kontra - Versions diffs - 0.5.2__py3-none-any.whl - Mend

kontra 0.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (124) hide show

kontra/__init__.py +1871 -0
kontra/api/__init__.py +22 -0
kontra/api/compare.py +340 -0
kontra/api/decorators.py +153 -0
kontra/api/results.py +2121 -0
kontra/api/rules.py +681 -0
kontra/cli/__init__.py +0 -0
kontra/cli/commands/__init__.py +1 -0
kontra/cli/commands/config.py +153 -0
kontra/cli/commands/diff.py +450 -0
kontra/cli/commands/history.py +196 -0
kontra/cli/commands/profile.py +289 -0
kontra/cli/commands/validate.py +468 -0
kontra/cli/constants.py +6 -0
kontra/cli/main.py +48 -0
kontra/cli/renderers.py +304 -0
kontra/cli/utils.py +28 -0
kontra/config/__init__.py +34 -0
kontra/config/loader.py +127 -0
kontra/config/models.py +49 -0
kontra/config/settings.py +797 -0
kontra/connectors/__init__.py +0 -0
kontra/connectors/db_utils.py +251 -0
kontra/connectors/detection.py +323 -0
kontra/connectors/handle.py +368 -0
kontra/connectors/postgres.py +127 -0
kontra/connectors/sqlserver.py +226 -0
kontra/engine/__init__.py +0 -0
kontra/engine/backends/duckdb_session.py +227 -0
kontra/engine/backends/duckdb_utils.py +18 -0
kontra/engine/backends/polars_backend.py +47 -0
kontra/engine/engine.py +1205 -0
kontra/engine/executors/__init__.py +15 -0
kontra/engine/executors/base.py +50 -0
kontra/engine/executors/database_base.py +528 -0
kontra/engine/executors/duckdb_sql.py +607 -0
kontra/engine/executors/postgres_sql.py +162 -0
kontra/engine/executors/registry.py +69 -0
kontra/engine/executors/sqlserver_sql.py +163 -0
kontra/engine/materializers/__init__.py +14 -0
kontra/engine/materializers/base.py +42 -0
kontra/engine/materializers/duckdb.py +110 -0
kontra/engine/materializers/factory.py +22 -0
kontra/engine/materializers/polars_connector.py +131 -0
kontra/engine/materializers/postgres.py +157 -0
kontra/engine/materializers/registry.py +138 -0
kontra/engine/materializers/sqlserver.py +160 -0
kontra/engine/result.py +15 -0
kontra/engine/sql_utils.py +611 -0
kontra/engine/sql_validator.py +609 -0
kontra/engine/stats.py +194 -0
kontra/engine/types.py +138 -0
kontra/errors.py +533 -0
kontra/logging.py +85 -0
kontra/preplan/__init__.py +5 -0
kontra/preplan/planner.py +253 -0
kontra/preplan/postgres.py +179 -0
kontra/preplan/sqlserver.py +191 -0
kontra/preplan/types.py +24 -0
kontra/probes/__init__.py +20 -0
kontra/probes/compare.py +400 -0
kontra/probes/relationship.py +283 -0
kontra/reporters/__init__.py +0 -0
kontra/reporters/json_reporter.py +190 -0
kontra/reporters/rich_reporter.py +11 -0
kontra/rules/__init__.py +35 -0
kontra/rules/base.py +186 -0
kontra/rules/builtin/__init__.py +40 -0
kontra/rules/builtin/allowed_values.py +156 -0
kontra/rules/builtin/compare.py +188 -0
kontra/rules/builtin/conditional_not_null.py +213 -0
kontra/rules/builtin/conditional_range.py +310 -0
kontra/rules/builtin/contains.py +138 -0
kontra/rules/builtin/custom_sql_check.py +182 -0
kontra/rules/builtin/disallowed_values.py +140 -0
kontra/rules/builtin/dtype.py +203 -0
kontra/rules/builtin/ends_with.py +129 -0
kontra/rules/builtin/freshness.py +240 -0
kontra/rules/builtin/length.py +193 -0
kontra/rules/builtin/max_rows.py +35 -0
kontra/rules/builtin/min_rows.py +46 -0
kontra/rules/builtin/not_null.py +121 -0
kontra/rules/builtin/range.py +222 -0
kontra/rules/builtin/regex.py +143 -0
kontra/rules/builtin/starts_with.py +129 -0
kontra/rules/builtin/unique.py +124 -0
kontra/rules/condition_parser.py +203 -0
kontra/rules/execution_plan.py +455 -0
kontra/rules/factory.py +103 -0
kontra/rules/predicates.py +25 -0
kontra/rules/registry.py +24 -0
kontra/rules/static_predicates.py +120 -0
kontra/scout/__init__.py +9 -0
kontra/scout/backends/__init__.py +17 -0
kontra/scout/backends/base.py +111 -0
kontra/scout/backends/duckdb_backend.py +359 -0
kontra/scout/backends/postgres_backend.py +519 -0
kontra/scout/backends/sqlserver_backend.py +577 -0
kontra/scout/dtype_mapping.py +150 -0
kontra/scout/patterns.py +69 -0
kontra/scout/profiler.py +801 -0
kontra/scout/reporters/__init__.py +39 -0
kontra/scout/reporters/json_reporter.py +165 -0
kontra/scout/reporters/markdown_reporter.py +152 -0
kontra/scout/reporters/rich_reporter.py +144 -0
kontra/scout/store.py +208 -0
kontra/scout/suggest.py +200 -0
kontra/scout/types.py +652 -0
kontra/state/__init__.py +29 -0
kontra/state/backends/__init__.py +79 -0
kontra/state/backends/base.py +348 -0
kontra/state/backends/local.py +480 -0
kontra/state/backends/postgres.py +1010 -0
kontra/state/backends/s3.py +543 -0
kontra/state/backends/sqlserver.py +969 -0
kontra/state/fingerprint.py +166 -0
kontra/state/types.py +1061 -0
kontra/version.py +1 -0
kontra-0.5.2.dist-info/METADATA +122 -0
kontra-0.5.2.dist-info/RECORD +124 -0
kontra-0.5.2.dist-info/WHEEL +5 -0
kontra-0.5.2.dist-info/entry_points.txt +2 -0
kontra-0.5.2.dist-info/licenses/LICENSE +17 -0
kontra-0.5.2.dist-info/top_level.txt +1 -0

kontra/rules/builtin/compare.py ADDED Viewed

@@ -0,0 +1,188 @@
+# src/kontra/rules/builtin/compare.py
+"""
+Compare rule - Compares two columns using a comparison operator.
+Usage:
+    - name: compare
+      params:
+        left: end_date
+        right: start_date
+        op: ">="
+Fails when:
+    - Either column is NULL (can't compare NULL values)
+    - The comparison left op right is FALSE
+"""
+from __future__ import annotations
+from typing import Any, Dict, Optional, Set
+import polars as pl
+from kontra.rules.base import BaseRule
+from kontra.rules.registry import register_rule
+from kontra.rules.predicates import Predicate
+from kontra.state.types import FailureMode
+# Map operator strings to Polars comparison methods
+POLARS_OP_MAP = {
+    ">": pl.Expr.__gt__,
+    ">=": pl.Expr.__ge__,
+    "<": pl.Expr.__lt__,
+    "<=": pl.Expr.__le__,
+    "==": pl.Expr.__eq__,
+    "!=": pl.Expr.__ne__,
+}
+# Map for human-readable operator descriptions
+OP_DESCRIPTIONS = {
+    ">": "greater than",
+    ">=": "greater than or equal to",
+    "<": "less than",
+    "<=": "less than or equal to",
+    "==": "equal to",
+    "!=": "not equal to",
+}
+SUPPORTED_OPS = set(POLARS_OP_MAP.keys())
+@register_rule("compare")
+class CompareRule(BaseRule):
+    """
+    Fails where left column does not satisfy the comparison with right column.
+    params:
+      - left: str (required) - Left column name
+      - right: str (required) - Right column name
+      - op: str (required) - Comparison operator: >, >=, <, <=, ==, !=
+    NULL handling:
+      Rows where either column is NULL are considered failures.
+      You can't meaningfully compare NULL values.
+    """
+    def __init__(self, name: str, params: Dict[str, Any]):
+        super().__init__(name, params)
+        # Validate parameters at construction time
+        self._left = self._get_required_param("left", str)
+        self._right = self._get_required_param("right", str)
+        self._op = self._get_required_param("op", str)
+        if self._op not in SUPPORTED_OPS:
+            raise ValueError(
+                f"Rule 'compare' unsupported operator '{self._op}'. "
+                f"Supported: {', '.join(sorted(SUPPORTED_OPS))}"
+            )
+    def required_columns(self) -> Set[str]:
+        return {self._left, self._right}
+    def validate(self, df: pl.DataFrame) -> Dict[str, Any]:
+        # Check columns exist before accessing
+        col_check = self._check_columns(df, {self._left, self._right})
+        if col_check is not None:
+            return col_check
+        left_col = pl.col(self._left)
+        right_col = pl.col(self._right)
+        # Get the comparison function
+        compare_fn = POLARS_OP_MAP[self._op]
+        # Build mask expression: True = failure
+        # Failures are: NULL in either column OR comparison is FALSE
+        comparison_expr = compare_fn(left_col, right_col)
+        mask_expr = (
+            left_col.is_null()
+            | right_col.is_null()
+            | ~comparison_expr
+        )
+        # Evaluate the expression to get a Series
+        mask = df.select(mask_expr.alias("_mask"))["_mask"]
+        op_desc = OP_DESCRIPTIONS[self._op]
+        message = f"{self._left} is not {op_desc} {self._right}"
+        res = super()._failures(df, mask, message)
+        res["rule_id"] = self.rule_id
+        if res["failed_count"] > 0:
+            res["failure_mode"] = str(FailureMode.COMPARISON_FAILED)
+            res["details"] = self._explain_failure(df, res["failed_count"])
+        return res
+    def _explain_failure(
+        self, df: pl.DataFrame, failed_count: int
+    ) -> Dict[str, Any]:
+        """Generate detailed failure explanation."""
+        total_rows = df.height
+        failure_rate = failed_count / total_rows if total_rows > 0 else 0
+        # Count NULLs in each column
+        left_nulls = df[self._left].is_null().sum()
+        right_nulls = df[self._right].is_null().sum()
+        details: Dict[str, Any] = {
+            "failed_count": failed_count,
+            "failure_rate": round(failure_rate, 4),
+            "total_rows": total_rows,
+            "left_column": self._left,
+            "right_column": self._right,
+            "operator": self._op,
+            "left_null_count": int(left_nulls),
+            "right_null_count": int(right_nulls),
+        }
+        return details
+    def compile_predicate(self) -> Optional[Predicate]:
+        left_col = pl.col(self._left)
+        right_col = pl.col(self._right)
+        compare_fn = POLARS_OP_MAP[self._op]
+        comparison_expr = compare_fn(left_col, right_col)
+        # Violation mask: NULL in either column OR comparison is FALSE
+        expr = (
+            left_col.is_null()
+            | right_col.is_null()
+            | ~comparison_expr
+        )
+        op_desc = OP_DESCRIPTIONS[self._op]
+        message = f"{self._left} is not {op_desc} {self._right}"
+        return Predicate(
+            rule_id=self.rule_id,
+            expr=expr,
+            message=message,
+            columns={self._left, self._right},
+        )
+    def to_sql_spec(self) -> Optional[Dict[str, Any]]:
+        """Return SQL spec for SQL pushdown executors."""
+        return {
+            "kind": "compare",
+            "rule_id": self.rule_id,
+            "left": self._left,
+            "right": self._right,
+            "op": self._op,
+        }
+    def to_sql_filter(self, dialect: str = "postgres") -> str | None:
+        left = f'"{self._left}"'
+        right = f'"{self._right}"'
+        # Map Python operators to SQL
+        sql_op = self._op
+        if sql_op == "==":
+            sql_op = "="
+        elif sql_op == "!=":
+            sql_op = "<>"
+        # Failures: NULL in either column OR comparison is FALSE
+        return f"{left} IS NULL OR {right} IS NULL OR NOT ({left} {sql_op} {right})"

kontra/rules/builtin/conditional_not_null.py ADDED Viewed

@@ -0,0 +1,213 @@
+# src/kontra/rules/builtin/conditional_not_null.py
+"""
+Conditional not-null rule - Column must not be NULL when a condition is met.
+Usage:
+    - name: conditional_not_null
+      params:
+        column: shipping_date
+        when: "status == 'shipped'"
+Fails when:
+    - The `when` condition is TRUE AND the `column` is NULL
+Passes when:
+    - The `when` condition is FALSE (regardless of column value)
+    - The `when` condition is TRUE AND the `column` is NOT NULL
+"""
+from __future__ import annotations
+from typing import Any, Dict, List, Optional, Set
+import polars as pl
+from kontra.rules.base import BaseRule
+from kontra.rules.registry import register_rule
+from kontra.rules.predicates import Predicate
+from kontra.rules.condition_parser import parse_condition, ConditionParseError
+from kontra.state.types import FailureMode
+# Map operators to Polars comparison methods
+POLARS_OP_MAP = {
+    "==": pl.Expr.__eq__,
+    "!=": pl.Expr.__ne__,
+    ">": pl.Expr.__gt__,
+    ">=": pl.Expr.__ge__,
+    "<": pl.Expr.__lt__,
+    "<=": pl.Expr.__le__,
+}
+@register_rule("conditional_not_null")
+class ConditionalNotNullRule(BaseRule):
+    """
+    Fails where column is NULL when a condition is met.
+    params:
+      - column: str (required) - Column that must not be null
+      - when: str (required) - Condition expression (e.g., "status == 'shipped'")
+    Condition syntax:
+      column_name operator value
+      Supported operators: ==, !=, >, >=, <, <=
+      Supported values: 'string', 123, 123.45, true, false, null
+    Examples:
+      - status == 'shipped'
+      - amount > 0
+      - is_active == true
+    """
+    def __init__(self, name: str, params: Dict[str, Any]):
+        super().__init__(name, params)
+        # Validate parameters at construction time
+        self._column = self._get_required_param("column", str)
+        self._when_expr = self._get_required_param("when", str)
+        # Parse the when expression at init time to fail early
+        try:
+            self._when_column, self._when_op, self._when_value = parse_condition(self._when_expr)
+        except ConditionParseError as e:
+            raise ValueError(f"Rule 'conditional_not_null' invalid 'when' expression: {e}") from e
+    def required_columns(self) -> Set[str]:
+        return {self._column, self._when_column}
+    def _build_condition_expr(self) -> pl.Expr:
+        """Build the Polars expression for the when condition."""
+        when_col = pl.col(self._when_column)
+        compare_fn = POLARS_OP_MAP[self._when_op]
+        # Handle NULL value in condition
+        if self._when_value is None:
+            if self._when_op == "==":
+                return when_col.is_null()
+            elif self._when_op == "!=":
+                return when_col.is_not_null()
+            else:
+                # Other operators with NULL don't make sense; treat as always false
+                return pl.lit(False)
+        # Build comparison expression
+        return compare_fn(when_col, self._when_value)
+    def validate(self, df: pl.DataFrame) -> Dict[str, Any]:
+        # Check columns exist before accessing
+        col_check = self._check_columns(df, {self._column, self._when_column})
+        if col_check is not None:
+            return col_check
+        # Build condition expression
+        condition_expr = self._build_condition_expr()
+        # Mask: True = failure
+        # Failure = condition is TRUE AND column is NULL
+        mask_expr = condition_expr & pl.col(self._column).is_null()
+        # Evaluate the expression to get a Series
+        mask = df.select(mask_expr.alias("_mask"))["_mask"]
+        message = f"{self._column} is null when {self._when_expr}"
+        res = super()._failures(df, mask, message)
+        res["rule_id"] = self.rule_id
+        if res["failed_count"] > 0:
+            res["failure_mode"] = str(FailureMode.CONDITIONAL_NULL)
+            res["details"] = self._explain_failure(df, mask, res["failed_count"])
+        return res
+    def _explain_failure(
+        self, df: pl.DataFrame, mask: pl.Series, failed_count: int
+    ) -> Dict[str, Any]:
+        """Generate detailed failure explanation."""
+        total_rows = df.height
+        failure_rate = failed_count / total_rows if total_rows > 0 else 0
+        # Count rows matching the condition
+        condition_expr = self._build_condition_expr()
+        condition_matches = df.select(condition_expr.sum())[0, 0]
+        details: Dict[str, Any] = {
+            "failed_count": failed_count,
+            "failure_rate": round(failure_rate, 4),
+            "total_rows": total_rows,
+            "column": self._column,
+            "when_condition": self._when_expr,
+            "rows_matching_condition": int(condition_matches) if condition_matches else 0,
+        }
+        # Sample failing row positions (first 5)
+        if failed_count > 0 and failed_count <= 1000:
+            positions: List[int] = []
+            for i, val in enumerate(mask):
+                if val:
+                    positions.append(i)
+                    if len(positions) >= 5:
+                        break
+            if positions:
+                details["sample_positions"] = positions
+        return details
+    def compile_predicate(self) -> Optional[Predicate]:
+        # Build condition expression
+        condition_expr = self._build_condition_expr()
+        # Mask: condition is TRUE AND column is NULL
+        expr = condition_expr & pl.col(self._column).is_null()
+        message = f"{self._column} is null when {self._when_expr}"
+        return Predicate(
+            rule_id=self.rule_id,
+            expr=expr,
+            message=message,
+            columns={self._column, self._when_column},
+        )
+    def to_sql_spec(self) -> Optional[Dict[str, Any]]:
+        """Return SQL spec for SQL pushdown executors."""
+        return {
+            "kind": "conditional_not_null",
+            "rule_id": self.rule_id,
+            "column": self._column,
+            "when_column": self._when_column,
+            "when_op": self._when_op,
+            "when_value": self._when_value,
+        }
+    def to_sql_filter(self, dialect: str = "postgres") -> str | None:
+        col = f'"{self._column}"'
+        when_col = f'"{self._when_column}"'
+        # Map operators
+        sql_op = self._when_op
+        if sql_op == "==":
+            sql_op = "="
+        elif sql_op == "!=":
+            sql_op = "<>"
+        # Format the value
+        if self._when_value is None:
+            # Special handling for NULL comparison
+            if sql_op == "=":
+                condition = f"{when_col} IS NULL"
+            elif sql_op == "<>":
+                condition = f"{when_col} IS NOT NULL"
+            else:
+                return None  # Can't compare with NULL using < > etc.
+        elif isinstance(self._when_value, str):
+            escaped = self._when_value.replace("'", "''")
+            condition = f"{when_col} {sql_op} '{escaped}'"
+        elif isinstance(self._when_value, bool):
+            val = "TRUE" if self._when_value else "FALSE"
+            condition = f"{when_col} {sql_op} {val}"
+        else:
+            condition = f"{when_col} {sql_op} {self._when_value}"
+        # Failure = condition is TRUE AND column is NULL
+        return f"({condition}) AND {col} IS NULL"