duckguard 2.0.0__py3-none-any.whl → 2.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. duckguard/__init__.py +55 -28
  2. duckguard/anomaly/__init__.py +29 -1
  3. duckguard/anomaly/baselines.py +294 -0
  4. duckguard/anomaly/detector.py +1 -5
  5. duckguard/anomaly/methods.py +17 -5
  6. duckguard/anomaly/ml_methods.py +724 -0
  7. duckguard/cli/main.py +561 -56
  8. duckguard/connectors/__init__.py +2 -2
  9. duckguard/connectors/bigquery.py +1 -1
  10. duckguard/connectors/databricks.py +1 -1
  11. duckguard/connectors/factory.py +2 -3
  12. duckguard/connectors/files.py +1 -1
  13. duckguard/connectors/kafka.py +2 -2
  14. duckguard/connectors/mongodb.py +1 -1
  15. duckguard/connectors/mysql.py +1 -1
  16. duckguard/connectors/oracle.py +1 -1
  17. duckguard/connectors/postgres.py +1 -2
  18. duckguard/connectors/redshift.py +1 -1
  19. duckguard/connectors/snowflake.py +1 -2
  20. duckguard/connectors/sqlite.py +1 -1
  21. duckguard/connectors/sqlserver.py +10 -13
  22. duckguard/contracts/__init__.py +6 -6
  23. duckguard/contracts/diff.py +1 -1
  24. duckguard/contracts/generator.py +5 -6
  25. duckguard/contracts/loader.py +4 -4
  26. duckguard/contracts/validator.py +3 -4
  27. duckguard/core/__init__.py +3 -3
  28. duckguard/core/column.py +588 -5
  29. duckguard/core/dataset.py +708 -3
  30. duckguard/core/result.py +328 -1
  31. duckguard/core/scoring.py +1 -2
  32. duckguard/errors.py +362 -0
  33. duckguard/freshness/__init__.py +33 -0
  34. duckguard/freshness/monitor.py +429 -0
  35. duckguard/history/__init__.py +44 -0
  36. duckguard/history/schema.py +301 -0
  37. duckguard/history/storage.py +479 -0
  38. duckguard/history/trends.py +348 -0
  39. duckguard/integrations/__init__.py +31 -0
  40. duckguard/integrations/airflow.py +387 -0
  41. duckguard/integrations/dbt.py +458 -0
  42. duckguard/notifications/__init__.py +61 -0
  43. duckguard/notifications/email.py +508 -0
  44. duckguard/notifications/formatter.py +118 -0
  45. duckguard/notifications/notifiers.py +357 -0
  46. duckguard/profiler/auto_profile.py +3 -3
  47. duckguard/pytest_plugin/__init__.py +1 -1
  48. duckguard/pytest_plugin/plugin.py +1 -1
  49. duckguard/reporting/console.py +2 -2
  50. duckguard/reports/__init__.py +42 -0
  51. duckguard/reports/html_reporter.py +514 -0
  52. duckguard/reports/pdf_reporter.py +114 -0
  53. duckguard/rules/__init__.py +3 -3
  54. duckguard/rules/executor.py +3 -4
  55. duckguard/rules/generator.py +8 -5
  56. duckguard/rules/loader.py +5 -5
  57. duckguard/rules/schema.py +23 -0
  58. duckguard/schema_history/__init__.py +40 -0
  59. duckguard/schema_history/analyzer.py +414 -0
  60. duckguard/schema_history/tracker.py +288 -0
  61. duckguard/semantic/__init__.py +1 -1
  62. duckguard/semantic/analyzer.py +0 -2
  63. duckguard/semantic/detector.py +17 -1
  64. duckguard/semantic/validators.py +2 -1
  65. duckguard-2.3.0.dist-info/METADATA +953 -0
  66. duckguard-2.3.0.dist-info/RECORD +77 -0
  67. duckguard-2.0.0.dist-info/METADATA +0 -221
  68. duckguard-2.0.0.dist-info/RECORD +0 -55
  69. {duckguard-2.0.0.dist-info → duckguard-2.3.0.dist-info}/WHEEL +0 -0
  70. {duckguard-2.0.0.dist-info → duckguard-2.3.0.dist-info}/entry_points.txt +0 -0
  71. {duckguard-2.0.0.dist-info → duckguard-2.3.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,514 @@
1
+ """HTML report generation for DuckGuard.
2
+
3
+ Generates beautiful, standalone HTML reports from validation results.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ from dataclasses import dataclass
9
+ from datetime import datetime
10
+ from pathlib import Path
11
+ from typing import TYPE_CHECKING, Any
12
+
13
+ if TYPE_CHECKING:
14
+ from duckguard.history.storage import StoredRun
15
+ from duckguard.rules.executor import ExecutionResult
16
+
17
+
18
+ @dataclass
19
+ class ReportConfig:
20
+ """Configuration for report generation.
21
+
22
+ Attributes:
23
+ title: Report title
24
+ include_passed: Include passed checks in report
25
+ include_failed_rows: Include sample of failed rows
26
+ max_failed_rows: Maximum failed rows to show per check
27
+ include_charts: Generate quality score charts
28
+ include_trends: Include trend charts (requires history)
29
+ custom_css: Custom CSS to include
30
+ logo_url: URL or data URI for logo
31
+ """
32
+
33
+ title: str = "DuckGuard Data Quality Report"
34
+ include_passed: bool = True
35
+ include_failed_rows: bool = True
36
+ max_failed_rows: int = 10
37
+ include_charts: bool = True
38
+ include_trends: bool = False
39
+ custom_css: str | None = None
40
+ logo_url: str | None = None
41
+
42
+
43
+ # Embedded HTML template (no external dependencies for basic reports)
44
+ HTML_TEMPLATE = """<!DOCTYPE html>
45
+ <html lang="en">
46
+ <head>
47
+ <meta charset="UTF-8">
48
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
49
+ <title>{{ title }}</title>
50
+ <style>
51
+ :root {
52
+ --color-pass: #10b981;
53
+ --color-fail: #ef4444;
54
+ --color-warn: #f59e0b;
55
+ --color-info: #6b7280;
56
+ --color-bg: #f9fafb;
57
+ --color-card: #ffffff;
58
+ --color-border: #e5e7eb;
59
+ --color-text: #111827;
60
+ --color-text-secondary: #6b7280;
61
+ }
62
+ * { box-sizing: border-box; margin: 0; padding: 0; }
63
+ body {
64
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
65
+ background: var(--color-bg);
66
+ color: var(--color-text);
67
+ line-height: 1.5;
68
+ padding: 2rem;
69
+ }
70
+ .container { max-width: 1200px; margin: 0 auto; }
71
+ .header {
72
+ display: flex;
73
+ justify-content: space-between;
74
+ align-items: center;
75
+ margin-bottom: 2rem;
76
+ padding-bottom: 1rem;
77
+ border-bottom: 2px solid var(--color-border);
78
+ }
79
+ .header h1 { font-size: 1.75rem; font-weight: 600; }
80
+ .header .meta { color: var(--color-text-secondary); font-size: 0.875rem; }
81
+ .status-badge {
82
+ display: inline-flex;
83
+ align-items: center;
84
+ padding: 0.5rem 1rem;
85
+ border-radius: 9999px;
86
+ font-weight: 600;
87
+ font-size: 0.875rem;
88
+ }
89
+ .status-pass { background: #d1fae5; color: #065f46; }
90
+ .status-fail { background: #fee2e2; color: #991b1b; }
91
+ .cards { display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 1rem; margin-bottom: 2rem; }
92
+ .card {
93
+ background: var(--color-card);
94
+ border-radius: 0.5rem;
95
+ padding: 1.5rem;
96
+ box-shadow: 0 1px 3px rgba(0,0,0,0.1);
97
+ }
98
+ .card-label { font-size: 0.75rem; text-transform: uppercase; color: var(--color-text-secondary); letter-spacing: 0.05em; margin-bottom: 0.25rem; }
99
+ .card-value { font-size: 2rem; font-weight: 700; }
100
+ .card-value.pass { color: var(--color-pass); }
101
+ .card-value.fail { color: var(--color-fail); }
102
+ .card-value.warn { color: var(--color-warn); }
103
+ .section { background: var(--color-card); border-radius: 0.5rem; padding: 1.5rem; margin-bottom: 1.5rem; box-shadow: 0 1px 3px rgba(0,0,0,0.1); }
104
+ .section-title { font-size: 1.125rem; font-weight: 600; margin-bottom: 1rem; display: flex; align-items: center; gap: 0.5rem; }
105
+ .section-title .icon { width: 1.25rem; height: 1.25rem; }
106
+ table { width: 100%; border-collapse: collapse; font-size: 0.875rem; }
107
+ th, td { padding: 0.75rem; text-align: left; border-bottom: 1px solid var(--color-border); }
108
+ th { font-weight: 600; color: var(--color-text-secondary); background: var(--color-bg); }
109
+ tr:hover { background: var(--color-bg); }
110
+ .status-icon { display: inline-flex; align-items: center; gap: 0.25rem; }
111
+ .status-icon.pass { color: var(--color-pass); }
112
+ .status-icon.fail { color: var(--color-fail); }
113
+ .status-icon.warn { color: var(--color-warn); }
114
+ .gauge-container { display: flex; justify-content: center; margin: 1rem 0; }
115
+ .gauge { width: 200px; height: 100px; position: relative; }
116
+ .gauge svg { width: 100%; height: 100%; }
117
+ .gauge-value { position: absolute; bottom: 0; left: 50%; transform: translateX(-50%); font-size: 2rem; font-weight: 700; }
118
+ .grade { font-size: 1rem; color: var(--color-text-secondary); }
119
+ .failed-rows { margin-top: 0.5rem; padding: 0.75rem; background: #fef2f2; border-radius: 0.375rem; font-size: 0.8rem; }
120
+ .failed-rows-title { font-weight: 600; color: #991b1b; margin-bottom: 0.25rem; }
121
+ .failed-rows code { background: #fee2e2; padding: 0.125rem 0.375rem; border-radius: 0.25rem; font-family: monospace; }
122
+ .footer { margin-top: 2rem; padding-top: 1rem; border-top: 1px solid var(--color-border); text-align: center; color: var(--color-text-secondary); font-size: 0.75rem; }
123
+ .footer a { color: inherit; text-decoration: none; }
124
+ @media print {
125
+ body { padding: 0; }
126
+ .section { break-inside: avoid; }
127
+ }
128
+ {{ custom_css }}
129
+ </style>
130
+ </head>
131
+ <body>
132
+ <div class="container">
133
+ <div class="header">
134
+ <div>
135
+ <h1>{{ title }}</h1>
136
+ <div class="meta">
137
+ Source: <strong>{{ source }}</strong> |
138
+ Generated: {{ generated_at }}
139
+ </div>
140
+ </div>
141
+ <div class="status-badge {{ 'status-pass' if passed else 'status-fail' }}">
142
+ {{ '✓ PASSED' if passed else '✗ FAILED' }}
143
+ </div>
144
+ </div>
145
+
146
+ <div class="cards">
147
+ <div class="card">
148
+ <div class="card-label">Quality Score</div>
149
+ <div class="card-value {{ 'pass' if quality_score >= 80 else 'warn' if quality_score >= 60 else 'fail' }}">
150
+ {{ "%.1f"|format(quality_score) }}%
151
+ </div>
152
+ <div class="grade">Grade: {{ grade }}</div>
153
+ </div>
154
+ <div class="card">
155
+ <div class="card-label">Checks Passed</div>
156
+ <div class="card-value pass">{{ passed_count }}</div>
157
+ <div class="grade">of {{ total_checks }} total</div>
158
+ </div>
159
+ <div class="card">
160
+ <div class="card-label">Failures</div>
161
+ <div class="card-value {{ 'fail' if failed_count > 0 else 'pass' }}">{{ failed_count }}</div>
162
+ </div>
163
+ <div class="card">
164
+ <div class="card-label">Warnings</div>
165
+ <div class="card-value {{ 'warn' if warning_count > 0 else 'pass' }}">{{ warning_count }}</div>
166
+ </div>
167
+ </div>
168
+
169
+ {% if include_charts %}
170
+ <div class="section">
171
+ <div class="section-title">
172
+ <svg class="icon" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9 19v-6a2 2 0 00-2-2H5a2 2 0 00-2 2v6a2 2 0 002 2h2a2 2 0 002-2zm0 0V9a2 2 0 012-2h2a2 2 0 012 2v10m-6 0a2 2 0 002 2h2a2 2 0 002-2m0 0V5a2 2 0 012-2h2a2 2 0 012 2v14a2 2 0 01-2 2h-2a2 2 0 01-2-2z"/></svg>
173
+ Quality Score
174
+ </div>
175
+ <div class="gauge-container">
176
+ <div class="gauge">
177
+ <svg viewBox="0 0 200 100">
178
+ <path d="M 20 90 A 80 80 0 0 1 180 90" fill="none" stroke="#e5e7eb" stroke-width="12" stroke-linecap="round"/>
179
+ <path d="M 20 90 A 80 80 0 0 1 180 90" fill="none"
180
+ stroke="{{ '#10b981' if quality_score >= 80 else '#f59e0b' if quality_score >= 60 else '#ef4444' }}"
181
+ stroke-width="12" stroke-linecap="round"
182
+ stroke-dasharray="{{ quality_score * 2.51 }} 251"/>
183
+ </svg>
184
+ <div class="gauge-value">{{ "%.0f"|format(quality_score) }}</div>
185
+ </div>
186
+ </div>
187
+ </div>
188
+ {% endif %}
189
+
190
+ {% if failures %}
191
+ <div class="section">
192
+ <div class="section-title" style="color: var(--color-fail);">
193
+ <svg class="icon" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M12 8v4m0 4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z"/></svg>
194
+ Failures ({{ failures|length }})
195
+ </div>
196
+ <table>
197
+ <thead>
198
+ <tr>
199
+ <th>Check</th>
200
+ <th>Column</th>
201
+ <th>Message</th>
202
+ <th>Actual</th>
203
+ <th>Expected</th>
204
+ </tr>
205
+ </thead>
206
+ <tbody>
207
+ {% for f in failures %}
208
+ <tr>
209
+ <td><span class="status-icon fail">✗</span> {{ f.check.type.value }}</td>
210
+ <td>{{ f.column or '-' }}</td>
211
+ <td>{{ f.message }}</td>
212
+ <td><code>{{ f.actual_value }}</code></td>
213
+ <td><code>{{ f.expected_value }}</code></td>
214
+ </tr>
215
+ {% if include_failed_rows and f.details and f.details.get('failed_rows') %}
216
+ <tr>
217
+ <td colspan="5">
218
+ <div class="failed-rows">
219
+ <div class="failed-rows-title">Sample Failed Rows ({{ f.details.get('failed_rows')|length }} shown)</div>
220
+ {% for row in f.details.get('failed_rows')[:max_failed_rows] %}
221
+ <code>{{ row }}</code>{% if not loop.last %}, {% endif %}
222
+ {% endfor %}
223
+ </div>
224
+ </td>
225
+ </tr>
226
+ {% endif %}
227
+ {% endfor %}
228
+ </tbody>
229
+ </table>
230
+ </div>
231
+ {% endif %}
232
+
233
+ {% if warnings %}
234
+ <div class="section">
235
+ <div class="section-title" style="color: var(--color-warn);">
236
+ <svg class="icon" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M12 9v2m0 4h.01m-6.938 4h13.856c1.54 0 2.502-1.667 1.732-3L13.732 4c-.77-1.333-2.694-1.333-3.464 0L3.34 16c-.77 1.333.192 3 1.732 3z"/></svg>
237
+ Warnings ({{ warnings|length }})
238
+ </div>
239
+ <table>
240
+ <thead>
241
+ <tr>
242
+ <th>Check</th>
243
+ <th>Column</th>
244
+ <th>Message</th>
245
+ <th>Actual</th>
246
+ </tr>
247
+ </thead>
248
+ <tbody>
249
+ {% for w in warnings %}
250
+ <tr>
251
+ <td><span class="status-icon warn">⚠</span> {{ w.check.type.value }}</td>
252
+ <td>{{ w.column or '-' }}</td>
253
+ <td>{{ w.message }}</td>
254
+ <td><code>{{ w.actual_value }}</code></td>
255
+ </tr>
256
+ {% endfor %}
257
+ </tbody>
258
+ </table>
259
+ </div>
260
+ {% endif %}
261
+
262
+ {% if include_passed and passed_results %}
263
+ <div class="section">
264
+ <div class="section-title" style="color: var(--color-pass);">
265
+ <svg class="icon" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9 12l2 2 4-4m6 2a9 9 0 11-18 0 9 9 0 0118 0z"/></svg>
266
+ Passed Checks ({{ passed_results|length }})
267
+ </div>
268
+ <table>
269
+ <thead>
270
+ <tr>
271
+ <th>Check</th>
272
+ <th>Column</th>
273
+ <th>Message</th>
274
+ </tr>
275
+ </thead>
276
+ <tbody>
277
+ {% for p in passed_results %}
278
+ <tr>
279
+ <td><span class="status-icon pass">✓</span> {{ p.check.type.value }}</td>
280
+ <td>{{ p.column or '-' }}</td>
281
+ <td>{{ p.message }}</td>
282
+ </tr>
283
+ {% endfor %}
284
+ </tbody>
285
+ </table>
286
+ </div>
287
+ {% endif %}
288
+
289
+ <div class="footer">
290
+ Generated by <a href="https://github.com/XDataHubAI/duckguard">DuckGuard</a> |
291
+ Data quality that just works
292
+ </div>
293
+ </div>
294
+ </body>
295
+ </html>
296
+ """
297
+
298
+
299
+ class HTMLReporter:
300
+ """Generates HTML reports from DuckGuard validation results.
301
+
302
+ Creates beautiful, standalone HTML reports that can be shared
303
+ or viewed in any browser.
304
+
305
+ Usage:
306
+ from duckguard.reports import HTMLReporter
307
+ from duckguard import connect, load_rules, execute_rules
308
+
309
+ result = execute_rules(load_rules("rules.yaml"), connect("data.csv"))
310
+
311
+ reporter = HTMLReporter()
312
+ reporter.generate(result, "report.html")
313
+
314
+ Attributes:
315
+ config: Report configuration
316
+ """
317
+
318
+ def __init__(self, config: ReportConfig | None = None):
319
+ """Initialize the reporter.
320
+
321
+ Args:
322
+ config: Report configuration (uses defaults if None)
323
+ """
324
+ self.config = config or ReportConfig()
325
+
326
+ def generate(
327
+ self,
328
+ result: ExecutionResult,
329
+ output_path: str | Path,
330
+ *,
331
+ history: list[StoredRun] | None = None,
332
+ ) -> Path:
333
+ """Generate an HTML report.
334
+
335
+ Args:
336
+ result: ExecutionResult to report on
337
+ output_path: Path to write HTML file
338
+ history: Optional historical results for trends
339
+
340
+ Returns:
341
+ Path to generated report
342
+
343
+ Raises:
344
+ ImportError: If jinja2 is not installed
345
+ """
346
+ try:
347
+ from jinja2 import BaseLoader, Environment
348
+ except ImportError:
349
+ # Fall back to basic string formatting if jinja2 not available
350
+ return self._generate_basic(result, output_path)
351
+
352
+ output_path = Path(output_path)
353
+
354
+ # Create Jinja2 environment
355
+ env = Environment(loader=BaseLoader(), autoescape=True)
356
+ template = env.from_string(HTML_TEMPLATE)
357
+
358
+ # Build context
359
+ context = self._build_context(result, history)
360
+
361
+ # Render and write
362
+ html = template.render(**context)
363
+ output_path.write_text(html, encoding="utf-8")
364
+
365
+ return output_path
366
+
367
+ def _generate_basic(
368
+ self,
369
+ result: ExecutionResult,
370
+ output_path: str | Path,
371
+ ) -> Path:
372
+ """Generate a basic HTML report without Jinja2.
373
+
374
+ Args:
375
+ result: ExecutionResult to report on
376
+ output_path: Path to write HTML file
377
+
378
+ Returns:
379
+ Path to generated report
380
+ """
381
+ output_path = Path(output_path)
382
+
383
+ # Simple HTML generation
384
+ status = "PASSED" if result.passed else "FAILED"
385
+ status_class = "status-pass" if result.passed else "status-fail"
386
+ grade = self._score_to_grade(result.quality_score)
387
+
388
+ failures_html = ""
389
+ for f in result.get_failures():
390
+ failures_html += f"""
391
+ <tr>
392
+ <td>✗ {f.check.type.value}</td>
393
+ <td>{f.column or '-'}</td>
394
+ <td>{f.message}</td>
395
+ </tr>
396
+ """
397
+
398
+ html = f"""<!DOCTYPE html>
399
+ <html>
400
+ <head>
401
+ <meta charset="UTF-8">
402
+ <title>{self.config.title}</title>
403
+ <style>
404
+ body {{ font-family: sans-serif; padding: 2rem; max-width: 1000px; margin: 0 auto; }}
405
+ .header {{ display: flex; justify-content: space-between; border-bottom: 2px solid #eee; padding-bottom: 1rem; }}
406
+ .{status_class} {{ padding: 0.5rem 1rem; border-radius: 9999px; font-weight: bold; }}
407
+ .status-pass {{ background: #d1fae5; color: #065f46; }}
408
+ .status-fail {{ background: #fee2e2; color: #991b1b; }}
409
+ .cards {{ display: grid; grid-template-columns: repeat(4, 1fr); gap: 1rem; margin: 2rem 0; }}
410
+ .card {{ background: #f9fafb; padding: 1rem; border-radius: 0.5rem; }}
411
+ .card-value {{ font-size: 2rem; font-weight: bold; }}
412
+ table {{ width: 100%; border-collapse: collapse; }}
413
+ th, td {{ padding: 0.75rem; text-align: left; border-bottom: 1px solid #eee; }}
414
+ th {{ background: #f9fafb; }}
415
+ </style>
416
+ </head>
417
+ <body>
418
+ <div class="header">
419
+ <div>
420
+ <h1>{self.config.title}</h1>
421
+ <p>Source: {result.source} | Generated: {datetime.now().strftime('%Y-%m-%d %H:%M')}</p>
422
+ </div>
423
+ <span class="{status_class}">{status}</span>
424
+ </div>
425
+ <div class="cards">
426
+ <div class="card">
427
+ <div>Quality Score</div>
428
+ <div class="card-value">{result.quality_score:.1f}%</div>
429
+ <div>Grade: {grade}</div>
430
+ </div>
431
+ <div class="card">
432
+ <div>Checks Passed</div>
433
+ <div class="card-value">{result.passed_count}</div>
434
+ <div>of {result.total_checks}</div>
435
+ </div>
436
+ <div class="card">
437
+ <div>Failures</div>
438
+ <div class="card-value">{result.failed_count}</div>
439
+ </div>
440
+ <div class="card">
441
+ <div>Warnings</div>
442
+ <div class="card-value">{result.warning_count}</div>
443
+ </div>
444
+ </div>
445
+ {f'<h2>Failures</h2><table><tr><th>Check</th><th>Column</th><th>Message</th></tr>{failures_html}</table>' if failures_html else ''}
446
+ <footer style="margin-top: 2rem; text-align: center; color: #888;">Generated by DuckGuard</footer>
447
+ </body>
448
+ </html>"""
449
+
450
+ output_path.write_text(html, encoding="utf-8")
451
+ return output_path
452
+
453
+ def _build_context(
454
+ self,
455
+ result: ExecutionResult,
456
+ history: list[StoredRun] | None = None,
457
+ ) -> dict[str, Any]:
458
+ """Build template context from result."""
459
+ return {
460
+ "title": self.config.title,
461
+ "generated_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
462
+ "source": result.source,
463
+ "quality_score": result.quality_score,
464
+ "grade": self._score_to_grade(result.quality_score),
465
+ "passed": result.passed,
466
+ "total_checks": result.total_checks,
467
+ "passed_count": result.passed_count,
468
+ "failed_count": result.failed_count,
469
+ "warning_count": result.warning_count,
470
+ "failures": result.get_failures(),
471
+ "warnings": result.get_warnings(),
472
+ "passed_results": [r for r in result.results if r.passed]
473
+ if self.config.include_passed
474
+ else [],
475
+ "include_passed": self.config.include_passed,
476
+ "include_charts": self.config.include_charts,
477
+ "include_failed_rows": self.config.include_failed_rows,
478
+ "max_failed_rows": self.config.max_failed_rows,
479
+ "include_trends": self.config.include_trends and history,
480
+ "history": history,
481
+ "custom_css": self.config.custom_css or "",
482
+ }
483
+
484
+ def _score_to_grade(self, score: float) -> str:
485
+ """Convert score to letter grade."""
486
+ if score >= 90:
487
+ return "A"
488
+ elif score >= 80:
489
+ return "B"
490
+ elif score >= 70:
491
+ return "C"
492
+ elif score >= 60:
493
+ return "D"
494
+ return "F"
495
+
496
+
497
+ def generate_html_report(
498
+ result: ExecutionResult,
499
+ output_path: str | Path,
500
+ **kwargs: Any,
501
+ ) -> Path:
502
+ """Convenience function to generate HTML report.
503
+
504
+ Args:
505
+ result: ExecutionResult to report on
506
+ output_path: Path to write HTML file
507
+ **kwargs: Additional ReportConfig options
508
+
509
+ Returns:
510
+ Path to generated report
511
+ """
512
+ config = ReportConfig(**kwargs) if kwargs else None
513
+ reporter = HTMLReporter(config=config)
514
+ return reporter.generate(result, output_path)
@@ -0,0 +1,114 @@
1
+ """PDF report generation for DuckGuard.
2
+
3
+ Uses WeasyPrint to convert HTML reports to PDF format.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import tempfile
9
+ from pathlib import Path
10
+ from typing import TYPE_CHECKING, Any
11
+
12
+ from duckguard.reports.html_reporter import HTMLReporter, ReportConfig
13
+
14
+ if TYPE_CHECKING:
15
+ from duckguard.history.storage import StoredRun
16
+ from duckguard.rules.executor import ExecutionResult
17
+
18
+
19
+ class PDFReporter(HTMLReporter):
20
+ """Generates PDF reports from DuckGuard validation results.
21
+
22
+ Uses WeasyPrint to convert HTML to PDF, producing high-quality
23
+ PDF documents suitable for sharing and archiving.
24
+
25
+ Usage:
26
+ from duckguard.reports import PDFReporter
27
+ from duckguard import connect, load_rules, execute_rules
28
+
29
+ result = execute_rules(load_rules("rules.yaml"), connect("data.csv"))
30
+
31
+ reporter = PDFReporter()
32
+ reporter.generate(result, "report.pdf")
33
+
34
+ Note:
35
+ Requires weasyprint to be installed:
36
+ pip install duckguard[reports]
37
+
38
+ Attributes:
39
+ config: Report configuration (inherited from HTMLReporter)
40
+ """
41
+
42
+ def generate(
43
+ self,
44
+ result: ExecutionResult,
45
+ output_path: str | Path,
46
+ *,
47
+ history: list[StoredRun] | None = None,
48
+ ) -> Path:
49
+ """Generate a PDF report.
50
+
51
+ Args:
52
+ result: ExecutionResult to report on
53
+ output_path: Path to write PDF file
54
+ history: Optional historical results for trends
55
+
56
+ Returns:
57
+ Path to generated PDF report
58
+
59
+ Raises:
60
+ ImportError: If weasyprint is not installed
61
+ """
62
+ try:
63
+ from weasyprint import HTML
64
+ except ImportError:
65
+ raise ImportError(
66
+ "PDF reports require weasyprint. "
67
+ "Install with: pip install duckguard[reports]"
68
+ )
69
+
70
+ output_path = Path(output_path)
71
+
72
+ # Generate HTML first to a temporary file
73
+ with tempfile.NamedTemporaryFile(
74
+ mode="w",
75
+ suffix=".html",
76
+ delete=False,
77
+ encoding="utf-8",
78
+ ) as f:
79
+ html_path = Path(f.name)
80
+
81
+ try:
82
+ # Generate HTML report
83
+ super().generate(result, html_path, history=history)
84
+
85
+ # Convert to PDF
86
+ HTML(filename=str(html_path)).write_pdf(str(output_path))
87
+ finally:
88
+ # Cleanup temporary HTML file
89
+ try:
90
+ html_path.unlink()
91
+ except OSError:
92
+ pass
93
+
94
+ return output_path
95
+
96
+
97
+ def generate_pdf_report(
98
+ result: ExecutionResult,
99
+ output_path: str | Path,
100
+ **kwargs: Any,
101
+ ) -> Path:
102
+ """Convenience function to generate PDF report.
103
+
104
+ Args:
105
+ result: ExecutionResult to report on
106
+ output_path: Path to write PDF file
107
+ **kwargs: Additional ReportConfig options
108
+
109
+ Returns:
110
+ Path to generated PDF report
111
+ """
112
+ config = ReportConfig(**kwargs) if kwargs else None
113
+ reporter = PDFReporter(config=config)
114
+ return reporter.generate(result, output_path)
@@ -10,10 +10,10 @@ Example:
10
10
  results = execute_rules(rules, "data.csv")
11
11
  """
12
12
 
13
- from duckguard.rules.loader import load_rules, load_rules_from_string
14
- from duckguard.rules.executor import execute_rules, RuleExecutor
15
- from duckguard.rules.schema import RuleSet, ColumnRules, Check, SimpleCheck
13
+ from duckguard.rules.executor import RuleExecutor, execute_rules
16
14
  from duckguard.rules.generator import generate_rules
15
+ from duckguard.rules.loader import load_rules, load_rules_from_string
16
+ from duckguard.rules.schema import Check, ColumnRules, RuleSet, SimpleCheck
17
17
 
18
18
  __all__ = [
19
19
  "load_rules",
@@ -9,15 +9,14 @@ from dataclasses import dataclass, field
9
9
  from datetime import datetime
10
10
  from typing import Any
11
11
 
12
- from duckguard.core.dataset import Dataset
13
- from duckguard.core.result import ValidationResult
14
12
  from duckguard.connectors import connect
13
+ from duckguard.core.dataset import Dataset
15
14
  from duckguard.rules.schema import (
16
- RuleSet,
15
+ BUILTIN_PATTERNS,
17
16
  Check,
18
17
  CheckType,
18
+ RuleSet,
19
19
  Severity,
20
- BUILTIN_PATTERNS,
21
20
  )
22
21
 
23
22