PyPI - duckguard - Versions diffs - 2.0.0__py3-none-any.whl → 2.3.0__py3-none-any.whl - Mend

duckguard 2.0.0py3-none-any.whl → 2.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (71) hide show

duckguard/__init__.py +55 -28
duckguard/anomaly/__init__.py +29 -1
duckguard/anomaly/baselines.py +294 -0
duckguard/anomaly/detector.py +1 -5
duckguard/anomaly/methods.py +17 -5
duckguard/anomaly/ml_methods.py +724 -0
duckguard/cli/main.py +561 -56
duckguard/connectors/__init__.py +2 -2
duckguard/connectors/bigquery.py +1 -1
duckguard/connectors/databricks.py +1 -1
duckguard/connectors/factory.py +2 -3
duckguard/connectors/files.py +1 -1
duckguard/connectors/kafka.py +2 -2
duckguard/connectors/mongodb.py +1 -1
duckguard/connectors/mysql.py +1 -1
duckguard/connectors/oracle.py +1 -1
duckguard/connectors/postgres.py +1 -2
duckguard/connectors/redshift.py +1 -1
duckguard/connectors/snowflake.py +1 -2
duckguard/connectors/sqlite.py +1 -1
duckguard/connectors/sqlserver.py +10 -13
duckguard/contracts/__init__.py +6 -6
duckguard/contracts/diff.py +1 -1
duckguard/contracts/generator.py +5 -6
duckguard/contracts/loader.py +4 -4
duckguard/contracts/validator.py +3 -4
duckguard/core/__init__.py +3 -3
duckguard/core/column.py +588 -5
duckguard/core/dataset.py +708 -3
duckguard/core/result.py +328 -1
duckguard/core/scoring.py +1 -2
duckguard/errors.py +362 -0
duckguard/freshness/__init__.py +33 -0
duckguard/freshness/monitor.py +429 -0
duckguard/history/__init__.py +44 -0
duckguard/history/schema.py +301 -0
duckguard/history/storage.py +479 -0
duckguard/history/trends.py +348 -0
duckguard/integrations/__init__.py +31 -0
duckguard/integrations/airflow.py +387 -0
duckguard/integrations/dbt.py +458 -0
duckguard/notifications/__init__.py +61 -0
duckguard/notifications/email.py +508 -0
duckguard/notifications/formatter.py +118 -0
duckguard/notifications/notifiers.py +357 -0
duckguard/profiler/auto_profile.py +3 -3
duckguard/pytest_plugin/__init__.py +1 -1
duckguard/pytest_plugin/plugin.py +1 -1
duckguard/reporting/console.py +2 -2
duckguard/reports/__init__.py +42 -0
duckguard/reports/html_reporter.py +514 -0
duckguard/reports/pdf_reporter.py +114 -0
duckguard/rules/__init__.py +3 -3
duckguard/rules/executor.py +3 -4
duckguard/rules/generator.py +8 -5
duckguard/rules/loader.py +5 -5
duckguard/rules/schema.py +23 -0
duckguard/schema_history/__init__.py +40 -0
duckguard/schema_history/analyzer.py +414 -0
duckguard/schema_history/tracker.py +288 -0
duckguard/semantic/__init__.py +1 -1
duckguard/semantic/analyzer.py +0 -2
duckguard/semantic/detector.py +17 -1
duckguard/semantic/validators.py +2 -1
duckguard-2.3.0.dist-info/METADATA +953 -0
duckguard-2.3.0.dist-info/RECORD +77 -0
duckguard-2.0.0.dist-info/METADATA +0 -221
duckguard-2.0.0.dist-info/RECORD +0 -55
{duckguard-2.0.0.dist-info → duckguard-2.3.0.dist-info}/WHEEL +0 -0
{duckguard-2.0.0.dist-info → duckguard-2.3.0.dist-info}/entry_points.txt +0 -0
{duckguard-2.0.0.dist-info → duckguard-2.3.0.dist-info}/licenses/LICENSE +0 -0

duckguard/notifications/notifiers.py ADDED Viewed

@@ -0,0 +1,357 @@
+"""Notification providers for DuckGuard.
+Supports Slack and Microsoft Teams webhooks for alerting on data quality issues.
+"""
+from __future__ import annotations
+import json
+import os
+from abc import ABC, abstractmethod
+from dataclasses import dataclass, field
+from datetime import datetime
+from typing import Any
+from urllib import request
+from urllib.error import URLError
+from duckguard.rules.executor import ExecutionResult
+@dataclass
+class NotificationConfig:
+    """Configuration for notifications.
+    Attributes:
+        on_failure: Send notification on check failures (default: True)
+        on_warning: Send notification on warnings (default: False)
+        on_success: Send notification on all checks passing (default: False)
+        include_passed_checks: Include passed checks in message (default: False)
+        include_row_samples: Include sample failing rows (default: True)
+        max_failures_shown: Max number of failures to show (default: 10)
+        mention_users: List of users to mention on failure
+        channel: Override default channel (Slack only)
+    """
+    on_failure: bool = True
+    on_warning: bool = False
+    on_success: bool = False
+    include_passed_checks: bool = False
+    include_row_samples: bool = True
+    max_failures_shown: int = 10
+    mention_users: list[str] = field(default_factory=list)
+    channel: str | None = None
+class BaseNotifier(ABC):
+    """Abstract base class for notification providers."""
+    def __init__(
+        self,
+        webhook_url: str | None = None,
+        config: NotificationConfig | None = None
+    ):
+        """Initialize the notifier.
+        Args:
+            webhook_url: Webhook URL for the notification service
+            config: Notification configuration
+        """
+        self.webhook_url = webhook_url or self._get_webhook_from_env()
+        self.config = config or NotificationConfig()
+        if not self.webhook_url:
+            raise ValueError(
+                f"Webhook URL required. Set {self._env_var_name} environment variable "
+                f"or pass webhook_url parameter."
+            )
+    @property
+    @abstractmethod
+    def _env_var_name(self) -> str:
+        """Environment variable name for webhook URL."""
+        pass
+    def _get_webhook_from_env(self) -> str | None:
+        """Get webhook URL from environment variable."""
+        return os.environ.get(self._env_var_name)
+    @abstractmethod
+    def _format_message(self, result: ExecutionResult) -> dict[str, Any]:
+        """Format the result as a message for the notification service."""
+        pass
+    def send_results(self, result: ExecutionResult) -> bool:
+        """Send notification based on execution results.
+        Args:
+            result: ExecutionResult from rule execution
+        Returns:
+            True if notification was sent, False if skipped
+        """
+        should_send = False
+        if not result.passed and self.config.on_failure:
+            should_send = True
+        elif result.warning_count > 0 and self.config.on_warning:
+            should_send = True
+        elif result.passed and self.config.on_success:
+            should_send = True
+        if not should_send:
+            return False
+        return self._send(result)
+    def send_failure_alert(self, result: ExecutionResult) -> bool:
+        """Send an alert for failures (ignores config settings).
+        Args:
+            result: ExecutionResult from rule execution
+        Returns:
+            True if sent successfully
+        """
+        return self._send(result)
+    def _send(self, result: ExecutionResult) -> bool:
+        """Send the notification.
+        Args:
+            result: ExecutionResult to send
+        Returns:
+            True if sent successfully
+        """
+        message = self._format_message(result)
+        data = json.dumps(message).encode("utf-8")
+        req = request.Request(
+            self.webhook_url,
+            data=data,
+            headers={"Content-Type": "application/json"},
+        )
+        try:
+            with request.urlopen(req, timeout=10) as response:
+                return response.status == 200
+        except URLError as e:
+            raise NotificationError(f"Failed to send notification: {e}") from e
+class SlackNotifier(BaseNotifier):
+    """Slack webhook notifier.
+    Usage:
+        notifier = SlackNotifier(webhook_url="https://hooks.slack.com/...")
+        # or set DUCKGUARD_SLACK_WEBHOOK environment variable
+        result = execute_rules(rules, "data.csv")
+        notifier.send_results(result)
+    """
+    @property
+    def _env_var_name(self) -> str:
+        return "DUCKGUARD_SLACK_WEBHOOK"
+    def _format_message(self, result: ExecutionResult) -> dict[str, Any]:
+        """Format as Slack message blocks."""
+        status_emoji = ":white_check_mark:" if result.passed else ":x:"
+        status_text = "PASSED" if result.passed else "FAILED"
+        blocks = [
+            {
+                "type": "header",
+                "text": {
+                    "type": "plain_text",
+                    "text": f"{status_emoji} DuckGuard Validation {status_text}",
+                    "emoji": True,
+                },
+            },
+            {
+                "type": "section",
+                "fields": [
+                    {"type": "mrkdwn", "text": f"*Source:*\n`{result.source}`"},
+                    {"type": "mrkdwn", "text": f"*Time:*\n{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"},
+                    {"type": "mrkdwn", "text": f"*Checks:*\n{result.passed_count}/{result.total_checks} passed"},
+                    {"type": "mrkdwn", "text": f"*Score:*\n{result.quality_score:.1f}%"},
+                ],
+            },
+        ]
+        # Add failures
+        failures = result.get_failures()
+        if failures:
+            failure_text = self._format_failures_slack(failures)
+            blocks.append({"type": "divider"})
+            blocks.append({
+                "type": "section",
+                "text": {"type": "mrkdwn", "text": failure_text},
+            })
+        # Add warnings if configured
+        warnings = result.get_warnings()
+        if warnings and self.config.on_warning:
+            warning_text = self._format_warnings_slack(warnings)
+            blocks.append({"type": "divider"})
+            blocks.append({
+                "type": "section",
+                "text": {"type": "mrkdwn", "text": warning_text},
+            })
+        # Add mentions
+        if not result.passed and self.config.mention_users:
+            mentions = " ".join(f"<@{u}>" for u in self.config.mention_users)
+            blocks.append({
+                "type": "section",
+                "text": {"type": "mrkdwn", "text": f":bell: {mentions}"},
+            })
+        message = {"blocks": blocks}
+        if self.config.channel:
+            message["channel"] = self.config.channel
+        return message
+    def _format_failures_slack(self, failures: list) -> str:
+        """Format failures for Slack."""
+        lines = [":rotating_light: *Failures:*"]
+        shown = failures[:self.config.max_failures_shown]
+        for f in shown:
+            col = f"[{f.column}]" if f.column else "[table]"
+            lines.append(f"• {col} {f.message}")
+            # Include sample failing rows if available
+            if self.config.include_row_samples and f.details.get("failed_rows"):
+                sample = f.details["failed_rows"][:3]
+                lines.append(f"  _Sample values: {sample}_")
+        remaining = len(failures) - len(shown)
+        if remaining > 0:
+            lines.append(f"_...and {remaining} more failures_")
+        return "\n".join(lines)
+    def _format_warnings_slack(self, warnings: list) -> str:
+        """Format warnings for Slack."""
+        lines = [":warning: *Warnings:*"]
+        shown = warnings[:self.config.max_failures_shown]
+        for w in shown:
+            col = f"[{w.column}]" if w.column else "[table]"
+            lines.append(f"• {col} {w.message}")
+        remaining = len(warnings) - len(shown)
+        if remaining > 0:
+            lines.append(f"_...and {remaining} more warnings_")
+        return "\n".join(lines)
+class TeamsNotifier(BaseNotifier):
+    """Microsoft Teams webhook notifier.
+    Usage:
+        notifier = TeamsNotifier(webhook_url="https://outlook.office.com/webhook/...")
+        # or set DUCKGUARD_TEAMS_WEBHOOK environment variable
+        result = execute_rules(rules, "data.csv")
+        notifier.send_results(result)
+    """
+    @property
+    def _env_var_name(self) -> str:
+        return "DUCKGUARD_TEAMS_WEBHOOK"
+    def _format_message(self, result: ExecutionResult) -> dict[str, Any]:
+        """Format as Teams Adaptive Card."""
+        status_text = "PASSED" if result.passed else "FAILED"
+        facts = [
+            {"title": "Source", "value": result.source},
+            {"title": "Time", "value": datetime.now().strftime("%Y-%m-%d %H:%M:%S")},
+            {"title": "Checks", "value": f"{result.passed_count}/{result.total_checks} passed"},
+            {"title": "Score", "value": f"{result.quality_score:.1f}%"},
+        ]
+        sections = [
+            {
+                "activityTitle": f"DuckGuard Validation {status_text}",
+                "facts": facts,
+            }
+        ]
+        # Add failures
+        failures = result.get_failures()
+        if failures:
+            failure_text = self._format_failures_teams(failures)
+            sections.append({
+                "title": "Failures",
+                "text": failure_text,
+            })
+        # Add warnings
+        warnings = result.get_warnings()
+        if warnings and self.config.on_warning:
+            warning_text = self._format_warnings_teams(warnings)
+            sections.append({
+                "title": "Warnings",
+                "text": warning_text,
+            })
+        # Add mentions
+        if not result.passed and self.config.mention_users:
+            mentions = ", ".join(f"@{u}" for u in self.config.mention_users)
+            sections.append({
+                "text": f"**Attention:** {mentions}",
+            })
+        return {
+            "@type": "MessageCard",
+            "@context": "http://schema.org/extensions",
+            "themeColor": "FF0000" if not result.passed else "00FF00",
+            "summary": f"DuckGuard Validation {status_text}",
+            "sections": sections,
+        }
+    def _format_failures_teams(self, failures: list) -> str:
+        """Format failures for Teams."""
+        lines = []
+        shown = failures[:self.config.max_failures_shown]
+        for f in shown:
+            col = f"[{f.column}]" if f.column else "[table]"
+            lines.append(f"- {col} {f.message}")
+            if self.config.include_row_samples and f.details.get("failed_rows"):
+                sample = f.details["failed_rows"][:3]
+                lines.append(f"  *Sample values: {sample}*")
+        remaining = len(failures) - len(shown)
+        if remaining > 0:
+            lines.append(f"*...and {remaining} more failures*")
+        return "<br>".join(lines)
+    def _format_warnings_teams(self, warnings: list) -> str:
+        """Format warnings for Teams."""
+        lines = []
+        shown = warnings[:self.config.max_failures_shown]
+        for w in shown:
+            col = f"[{w.column}]" if w.column else "[table]"
+            lines.append(f"- {col} {w.message}")
+        remaining = len(warnings) - len(shown)
+        if remaining > 0:
+            lines.append(f"*...and {remaining} more warnings*")
+        return "<br>".join(lines)
+class NotificationError(Exception):
+    """Exception raised when notification fails."""
+    pass

duckguard/profiler/auto_profile.py CHANGED Viewed

@@ -3,11 +3,11 @@
 from __future__ import annotations
 import re
-from dataclasses import dataclass, field
+from dataclasses import dataclass
 from typing import Any
 from duckguard.core.dataset import Dataset
-from duckguard.core.result import ProfileResult, ColumnProfile
+from duckguard.core.result import ColumnProfile, ProfileResult
 @dataclass
@@ -319,7 +319,7 @@ class AutoProfiler:
             f'def test_{dataset.name.replace("-", "_").replace(".", "_")}():',
             f'    {output_var} = connect("{dataset.source}")',
             "",
-            f"    # Basic dataset checks",
+            "    # Basic dataset checks",
             f"    assert {output_var}.row_count > 0",
             "",
         ]

duckguard/pytest_plugin/__init__.py CHANGED Viewed

@@ -1,5 +1,5 @@
 """pytest plugin for DuckGuard."""
-from duckguard.pytest_plugin.plugin import duckguard_engine, duckguard_dataset
+from duckguard.pytest_plugin.plugin import duckguard_dataset, duckguard_engine
 __all__ = ["duckguard_engine", "duckguard_dataset"]

duckguard/pytest_plugin/plugin.py CHANGED Viewed

@@ -22,8 +22,8 @@ from __future__ import annotations
 import pytest
-from duckguard.core.engine import DuckGuardEngine
 from duckguard.connectors import connect as duckguard_connect
+from duckguard.core.engine import DuckGuardEngine
 @pytest.fixture(scope="session")

duckguard/reporting/console.py CHANGED Viewed

@@ -3,10 +3,10 @@
 from __future__ import annotations
 from rich.console import Console
-from rich.table import Table
 from rich.panel import Panel
+from rich.table import Table
-from duckguard.core.result import ProfileResult, ScanResult, CheckResult, CheckStatus
+from duckguard.core.result import CheckResult, CheckStatus, ProfileResult, ScanResult
 class ConsoleReporter:

duckguard/reports/__init__.py ADDED Viewed

@@ -0,0 +1,42 @@
+"""Report generation for DuckGuard.
+Provides HTML and PDF report generation for validation results.
+Usage:
+    from duckguard.reports import HTMLReporter, PDFReporter
+    # Generate HTML report
+    reporter = HTMLReporter()
+    reporter.generate(result, "report.html")
+    # Generate PDF report (requires weasyprint)
+    pdf_reporter = PDFReporter()
+    pdf_reporter.generate(result, "report.pdf")
+    # Or use convenience functions
+    from duckguard.reports import generate_html_report, generate_pdf_report
+    generate_html_report(result, "report.html", title="My Report")
+    generate_pdf_report(result, "report.pdf")
+"""
+from duckguard.reports.html_reporter import (
+    HTMLReporter,
+    ReportConfig,
+    generate_html_report,
+)
+from duckguard.reports.pdf_reporter import (
+    PDFReporter,
+    generate_pdf_report,
+)
+__all__ = [
+    # Configuration
+    "ReportConfig",
+    # Reporters
+    "HTMLReporter",
+    "PDFReporter",
+    # Convenience functions
+    "generate_html_report",
+    "generate_pdf_report",
+]

duckguard 2.0.0__py3-none-any.whl → 2.3.0__py3-none-any.whl

duckguard 2.0.0py3-none-any.whl → 2.3.0py3-none-any.whl