duckguard 2.0.0__py3-none-any.whl → 2.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. duckguard/__init__.py +55 -28
  2. duckguard/anomaly/__init__.py +29 -1
  3. duckguard/anomaly/baselines.py +294 -0
  4. duckguard/anomaly/detector.py +1 -5
  5. duckguard/anomaly/methods.py +17 -5
  6. duckguard/anomaly/ml_methods.py +724 -0
  7. duckguard/cli/main.py +561 -56
  8. duckguard/connectors/__init__.py +2 -2
  9. duckguard/connectors/bigquery.py +1 -1
  10. duckguard/connectors/databricks.py +1 -1
  11. duckguard/connectors/factory.py +2 -3
  12. duckguard/connectors/files.py +1 -1
  13. duckguard/connectors/kafka.py +2 -2
  14. duckguard/connectors/mongodb.py +1 -1
  15. duckguard/connectors/mysql.py +1 -1
  16. duckguard/connectors/oracle.py +1 -1
  17. duckguard/connectors/postgres.py +1 -2
  18. duckguard/connectors/redshift.py +1 -1
  19. duckguard/connectors/snowflake.py +1 -2
  20. duckguard/connectors/sqlite.py +1 -1
  21. duckguard/connectors/sqlserver.py +10 -13
  22. duckguard/contracts/__init__.py +6 -6
  23. duckguard/contracts/diff.py +1 -1
  24. duckguard/contracts/generator.py +5 -6
  25. duckguard/contracts/loader.py +4 -4
  26. duckguard/contracts/validator.py +3 -4
  27. duckguard/core/__init__.py +3 -3
  28. duckguard/core/column.py +588 -5
  29. duckguard/core/dataset.py +708 -3
  30. duckguard/core/result.py +328 -1
  31. duckguard/core/scoring.py +1 -2
  32. duckguard/errors.py +362 -0
  33. duckguard/freshness/__init__.py +33 -0
  34. duckguard/freshness/monitor.py +429 -0
  35. duckguard/history/__init__.py +44 -0
  36. duckguard/history/schema.py +301 -0
  37. duckguard/history/storage.py +479 -0
  38. duckguard/history/trends.py +348 -0
  39. duckguard/integrations/__init__.py +31 -0
  40. duckguard/integrations/airflow.py +387 -0
  41. duckguard/integrations/dbt.py +458 -0
  42. duckguard/notifications/__init__.py +61 -0
  43. duckguard/notifications/email.py +508 -0
  44. duckguard/notifications/formatter.py +118 -0
  45. duckguard/notifications/notifiers.py +357 -0
  46. duckguard/profiler/auto_profile.py +3 -3
  47. duckguard/pytest_plugin/__init__.py +1 -1
  48. duckguard/pytest_plugin/plugin.py +1 -1
  49. duckguard/reporting/console.py +2 -2
  50. duckguard/reports/__init__.py +42 -0
  51. duckguard/reports/html_reporter.py +514 -0
  52. duckguard/reports/pdf_reporter.py +114 -0
  53. duckguard/rules/__init__.py +3 -3
  54. duckguard/rules/executor.py +3 -4
  55. duckguard/rules/generator.py +8 -5
  56. duckguard/rules/loader.py +5 -5
  57. duckguard/rules/schema.py +23 -0
  58. duckguard/schema_history/__init__.py +40 -0
  59. duckguard/schema_history/analyzer.py +414 -0
  60. duckguard/schema_history/tracker.py +288 -0
  61. duckguard/semantic/__init__.py +1 -1
  62. duckguard/semantic/analyzer.py +0 -2
  63. duckguard/semantic/detector.py +17 -1
  64. duckguard/semantic/validators.py +2 -1
  65. duckguard-2.3.0.dist-info/METADATA +953 -0
  66. duckguard-2.3.0.dist-info/RECORD +77 -0
  67. duckguard-2.0.0.dist-info/METADATA +0 -221
  68. duckguard-2.0.0.dist-info/RECORD +0 -55
  69. {duckguard-2.0.0.dist-info → duckguard-2.3.0.dist-info}/WHEEL +0 -0
  70. {duckguard-2.0.0.dist-info → duckguard-2.3.0.dist-info}/entry_points.txt +0 -0
  71. {duckguard-2.0.0.dist-info → duckguard-2.3.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,357 @@
1
+ """Notification providers for DuckGuard.
2
+
3
+ Supports Slack and Microsoft Teams webhooks for alerting on data quality issues.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import json
9
+ import os
10
+ from abc import ABC, abstractmethod
11
+ from dataclasses import dataclass, field
12
+ from datetime import datetime
13
+ from typing import Any
14
+ from urllib import request
15
+ from urllib.error import URLError
16
+
17
+ from duckguard.rules.executor import ExecutionResult
18
+
19
+
20
+ @dataclass
21
+ class NotificationConfig:
22
+ """Configuration for notifications.
23
+
24
+ Attributes:
25
+ on_failure: Send notification on check failures (default: True)
26
+ on_warning: Send notification on warnings (default: False)
27
+ on_success: Send notification on all checks passing (default: False)
28
+ include_passed_checks: Include passed checks in message (default: False)
29
+ include_row_samples: Include sample failing rows (default: True)
30
+ max_failures_shown: Max number of failures to show (default: 10)
31
+ mention_users: List of users to mention on failure
32
+ channel: Override default channel (Slack only)
33
+ """
34
+
35
+ on_failure: bool = True
36
+ on_warning: bool = False
37
+ on_success: bool = False
38
+ include_passed_checks: bool = False
39
+ include_row_samples: bool = True
40
+ max_failures_shown: int = 10
41
+ mention_users: list[str] = field(default_factory=list)
42
+ channel: str | None = None
43
+
44
+
45
+ class BaseNotifier(ABC):
46
+ """Abstract base class for notification providers."""
47
+
48
+ def __init__(
49
+ self,
50
+ webhook_url: str | None = None,
51
+ config: NotificationConfig | None = None
52
+ ):
53
+ """Initialize the notifier.
54
+
55
+ Args:
56
+ webhook_url: Webhook URL for the notification service
57
+ config: Notification configuration
58
+ """
59
+ self.webhook_url = webhook_url or self._get_webhook_from_env()
60
+ self.config = config or NotificationConfig()
61
+
62
+ if not self.webhook_url:
63
+ raise ValueError(
64
+ f"Webhook URL required. Set {self._env_var_name} environment variable "
65
+ f"or pass webhook_url parameter."
66
+ )
67
+
68
+ @property
69
+ @abstractmethod
70
+ def _env_var_name(self) -> str:
71
+ """Environment variable name for webhook URL."""
72
+ pass
73
+
74
+ def _get_webhook_from_env(self) -> str | None:
75
+ """Get webhook URL from environment variable."""
76
+ return os.environ.get(self._env_var_name)
77
+
78
+ @abstractmethod
79
+ def _format_message(self, result: ExecutionResult) -> dict[str, Any]:
80
+ """Format the result as a message for the notification service."""
81
+ pass
82
+
83
+ def send_results(self, result: ExecutionResult) -> bool:
84
+ """Send notification based on execution results.
85
+
86
+ Args:
87
+ result: ExecutionResult from rule execution
88
+
89
+ Returns:
90
+ True if notification was sent, False if skipped
91
+ """
92
+ should_send = False
93
+
94
+ if not result.passed and self.config.on_failure:
95
+ should_send = True
96
+ elif result.warning_count > 0 and self.config.on_warning:
97
+ should_send = True
98
+ elif result.passed and self.config.on_success:
99
+ should_send = True
100
+
101
+ if not should_send:
102
+ return False
103
+
104
+ return self._send(result)
105
+
106
+ def send_failure_alert(self, result: ExecutionResult) -> bool:
107
+ """Send an alert for failures (ignores config settings).
108
+
109
+ Args:
110
+ result: ExecutionResult from rule execution
111
+
112
+ Returns:
113
+ True if sent successfully
114
+ """
115
+ return self._send(result)
116
+
117
+ def _send(self, result: ExecutionResult) -> bool:
118
+ """Send the notification.
119
+
120
+ Args:
121
+ result: ExecutionResult to send
122
+
123
+ Returns:
124
+ True if sent successfully
125
+ """
126
+ message = self._format_message(result)
127
+ data = json.dumps(message).encode("utf-8")
128
+
129
+ req = request.Request(
130
+ self.webhook_url,
131
+ data=data,
132
+ headers={"Content-Type": "application/json"},
133
+ )
134
+
135
+ try:
136
+ with request.urlopen(req, timeout=10) as response:
137
+ return response.status == 200
138
+ except URLError as e:
139
+ raise NotificationError(f"Failed to send notification: {e}") from e
140
+
141
+
142
+ class SlackNotifier(BaseNotifier):
143
+ """Slack webhook notifier.
144
+
145
+ Usage:
146
+ notifier = SlackNotifier(webhook_url="https://hooks.slack.com/...")
147
+ # or set DUCKGUARD_SLACK_WEBHOOK environment variable
148
+
149
+ result = execute_rules(rules, "data.csv")
150
+ notifier.send_results(result)
151
+ """
152
+
153
+ @property
154
+ def _env_var_name(self) -> str:
155
+ return "DUCKGUARD_SLACK_WEBHOOK"
156
+
157
+ def _format_message(self, result: ExecutionResult) -> dict[str, Any]:
158
+ """Format as Slack message blocks."""
159
+ status_emoji = ":white_check_mark:" if result.passed else ":x:"
160
+ status_text = "PASSED" if result.passed else "FAILED"
161
+
162
+ blocks = [
163
+ {
164
+ "type": "header",
165
+ "text": {
166
+ "type": "plain_text",
167
+ "text": f"{status_emoji} DuckGuard Validation {status_text}",
168
+ "emoji": True,
169
+ },
170
+ },
171
+ {
172
+ "type": "section",
173
+ "fields": [
174
+ {"type": "mrkdwn", "text": f"*Source:*\n`{result.source}`"},
175
+ {"type": "mrkdwn", "text": f"*Time:*\n{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"},
176
+ {"type": "mrkdwn", "text": f"*Checks:*\n{result.passed_count}/{result.total_checks} passed"},
177
+ {"type": "mrkdwn", "text": f"*Score:*\n{result.quality_score:.1f}%"},
178
+ ],
179
+ },
180
+ ]
181
+
182
+ # Add failures
183
+ failures = result.get_failures()
184
+ if failures:
185
+ failure_text = self._format_failures_slack(failures)
186
+ blocks.append({"type": "divider"})
187
+ blocks.append({
188
+ "type": "section",
189
+ "text": {"type": "mrkdwn", "text": failure_text},
190
+ })
191
+
192
+ # Add warnings if configured
193
+ warnings = result.get_warnings()
194
+ if warnings and self.config.on_warning:
195
+ warning_text = self._format_warnings_slack(warnings)
196
+ blocks.append({"type": "divider"})
197
+ blocks.append({
198
+ "type": "section",
199
+ "text": {"type": "mrkdwn", "text": warning_text},
200
+ })
201
+
202
+ # Add mentions
203
+ if not result.passed and self.config.mention_users:
204
+ mentions = " ".join(f"<@{u}>" for u in self.config.mention_users)
205
+ blocks.append({
206
+ "type": "section",
207
+ "text": {"type": "mrkdwn", "text": f":bell: {mentions}"},
208
+ })
209
+
210
+ message = {"blocks": blocks}
211
+
212
+ if self.config.channel:
213
+ message["channel"] = self.config.channel
214
+
215
+ return message
216
+
217
+ def _format_failures_slack(self, failures: list) -> str:
218
+ """Format failures for Slack."""
219
+ lines = [":rotating_light: *Failures:*"]
220
+
221
+ shown = failures[:self.config.max_failures_shown]
222
+ for f in shown:
223
+ col = f"[{f.column}]" if f.column else "[table]"
224
+ lines.append(f"• {col} {f.message}")
225
+
226
+ # Include sample failing rows if available
227
+ if self.config.include_row_samples and f.details.get("failed_rows"):
228
+ sample = f.details["failed_rows"][:3]
229
+ lines.append(f" _Sample values: {sample}_")
230
+
231
+ remaining = len(failures) - len(shown)
232
+ if remaining > 0:
233
+ lines.append(f"_...and {remaining} more failures_")
234
+
235
+ return "\n".join(lines)
236
+
237
+ def _format_warnings_slack(self, warnings: list) -> str:
238
+ """Format warnings for Slack."""
239
+ lines = [":warning: *Warnings:*"]
240
+
241
+ shown = warnings[:self.config.max_failures_shown]
242
+ for w in shown:
243
+ col = f"[{w.column}]" if w.column else "[table]"
244
+ lines.append(f"• {col} {w.message}")
245
+
246
+ remaining = len(warnings) - len(shown)
247
+ if remaining > 0:
248
+ lines.append(f"_...and {remaining} more warnings_")
249
+
250
+ return "\n".join(lines)
251
+
252
+
253
+ class TeamsNotifier(BaseNotifier):
254
+ """Microsoft Teams webhook notifier.
255
+
256
+ Usage:
257
+ notifier = TeamsNotifier(webhook_url="https://outlook.office.com/webhook/...")
258
+ # or set DUCKGUARD_TEAMS_WEBHOOK environment variable
259
+
260
+ result = execute_rules(rules, "data.csv")
261
+ notifier.send_results(result)
262
+ """
263
+
264
+ @property
265
+ def _env_var_name(self) -> str:
266
+ return "DUCKGUARD_TEAMS_WEBHOOK"
267
+
268
+ def _format_message(self, result: ExecutionResult) -> dict[str, Any]:
269
+ """Format as Teams Adaptive Card."""
270
+ status_text = "PASSED" if result.passed else "FAILED"
271
+
272
+ facts = [
273
+ {"title": "Source", "value": result.source},
274
+ {"title": "Time", "value": datetime.now().strftime("%Y-%m-%d %H:%M:%S")},
275
+ {"title": "Checks", "value": f"{result.passed_count}/{result.total_checks} passed"},
276
+ {"title": "Score", "value": f"{result.quality_score:.1f}%"},
277
+ ]
278
+
279
+ sections = [
280
+ {
281
+ "activityTitle": f"DuckGuard Validation {status_text}",
282
+ "facts": facts,
283
+ }
284
+ ]
285
+
286
+ # Add failures
287
+ failures = result.get_failures()
288
+ if failures:
289
+ failure_text = self._format_failures_teams(failures)
290
+ sections.append({
291
+ "title": "Failures",
292
+ "text": failure_text,
293
+ })
294
+
295
+ # Add warnings
296
+ warnings = result.get_warnings()
297
+ if warnings and self.config.on_warning:
298
+ warning_text = self._format_warnings_teams(warnings)
299
+ sections.append({
300
+ "title": "Warnings",
301
+ "text": warning_text,
302
+ })
303
+
304
+ # Add mentions
305
+ if not result.passed and self.config.mention_users:
306
+ mentions = ", ".join(f"@{u}" for u in self.config.mention_users)
307
+ sections.append({
308
+ "text": f"**Attention:** {mentions}",
309
+ })
310
+
311
+ return {
312
+ "@type": "MessageCard",
313
+ "@context": "http://schema.org/extensions",
314
+ "themeColor": "FF0000" if not result.passed else "00FF00",
315
+ "summary": f"DuckGuard Validation {status_text}",
316
+ "sections": sections,
317
+ }
318
+
319
+ def _format_failures_teams(self, failures: list) -> str:
320
+ """Format failures for Teams."""
321
+ lines = []
322
+
323
+ shown = failures[:self.config.max_failures_shown]
324
+ for f in shown:
325
+ col = f"[{f.column}]" if f.column else "[table]"
326
+ lines.append(f"- {col} {f.message}")
327
+
328
+ if self.config.include_row_samples and f.details.get("failed_rows"):
329
+ sample = f.details["failed_rows"][:3]
330
+ lines.append(f" *Sample values: {sample}*")
331
+
332
+ remaining = len(failures) - len(shown)
333
+ if remaining > 0:
334
+ lines.append(f"*...and {remaining} more failures*")
335
+
336
+ return "<br>".join(lines)
337
+
338
+ def _format_warnings_teams(self, warnings: list) -> str:
339
+ """Format warnings for Teams."""
340
+ lines = []
341
+
342
+ shown = warnings[:self.config.max_failures_shown]
343
+ for w in shown:
344
+ col = f"[{w.column}]" if w.column else "[table]"
345
+ lines.append(f"- {col} {w.message}")
346
+
347
+ remaining = len(warnings) - len(shown)
348
+ if remaining > 0:
349
+ lines.append(f"*...and {remaining} more warnings*")
350
+
351
+ return "<br>".join(lines)
352
+
353
+
354
+ class NotificationError(Exception):
355
+ """Exception raised when notification fails."""
356
+
357
+ pass
@@ -3,11 +3,11 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  import re
6
- from dataclasses import dataclass, field
6
+ from dataclasses import dataclass
7
7
  from typing import Any
8
8
 
9
9
  from duckguard.core.dataset import Dataset
10
- from duckguard.core.result import ProfileResult, ColumnProfile
10
+ from duckguard.core.result import ColumnProfile, ProfileResult
11
11
 
12
12
 
13
13
  @dataclass
@@ -319,7 +319,7 @@ class AutoProfiler:
319
319
  f'def test_{dataset.name.replace("-", "_").replace(".", "_")}():',
320
320
  f' {output_var} = connect("{dataset.source}")',
321
321
  "",
322
- f" # Basic dataset checks",
322
+ " # Basic dataset checks",
323
323
  f" assert {output_var}.row_count > 0",
324
324
  "",
325
325
  ]
@@ -1,5 +1,5 @@
1
1
  """pytest plugin for DuckGuard."""
2
2
 
3
- from duckguard.pytest_plugin.plugin import duckguard_engine, duckguard_dataset
3
+ from duckguard.pytest_plugin.plugin import duckguard_dataset, duckguard_engine
4
4
 
5
5
  __all__ = ["duckguard_engine", "duckguard_dataset"]
@@ -22,8 +22,8 @@ from __future__ import annotations
22
22
 
23
23
  import pytest
24
24
 
25
- from duckguard.core.engine import DuckGuardEngine
26
25
  from duckguard.connectors import connect as duckguard_connect
26
+ from duckguard.core.engine import DuckGuardEngine
27
27
 
28
28
 
29
29
  @pytest.fixture(scope="session")
@@ -3,10 +3,10 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  from rich.console import Console
6
- from rich.table import Table
7
6
  from rich.panel import Panel
7
+ from rich.table import Table
8
8
 
9
- from duckguard.core.result import ProfileResult, ScanResult, CheckResult, CheckStatus
9
+ from duckguard.core.result import CheckResult, CheckStatus, ProfileResult, ScanResult
10
10
 
11
11
 
12
12
  class ConsoleReporter:
@@ -0,0 +1,42 @@
1
+ """Report generation for DuckGuard.
2
+
3
+ Provides HTML and PDF report generation for validation results.
4
+
5
+ Usage:
6
+ from duckguard.reports import HTMLReporter, PDFReporter
7
+
8
+ # Generate HTML report
9
+ reporter = HTMLReporter()
10
+ reporter.generate(result, "report.html")
11
+
12
+ # Generate PDF report (requires weasyprint)
13
+ pdf_reporter = PDFReporter()
14
+ pdf_reporter.generate(result, "report.pdf")
15
+
16
+ # Or use convenience functions
17
+ from duckguard.reports import generate_html_report, generate_pdf_report
18
+
19
+ generate_html_report(result, "report.html", title="My Report")
20
+ generate_pdf_report(result, "report.pdf")
21
+ """
22
+
23
+ from duckguard.reports.html_reporter import (
24
+ HTMLReporter,
25
+ ReportConfig,
26
+ generate_html_report,
27
+ )
28
+ from duckguard.reports.pdf_reporter import (
29
+ PDFReporter,
30
+ generate_pdf_report,
31
+ )
32
+
33
+ __all__ = [
34
+ # Configuration
35
+ "ReportConfig",
36
+ # Reporters
37
+ "HTMLReporter",
38
+ "PDFReporter",
39
+ # Convenience functions
40
+ "generate_html_report",
41
+ "generate_pdf_report",
42
+ ]