duckguard 2.0.0__py3-none-any.whl → 2.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- duckguard/__init__.py +55 -28
- duckguard/anomaly/__init__.py +29 -1
- duckguard/anomaly/baselines.py +294 -0
- duckguard/anomaly/detector.py +1 -5
- duckguard/anomaly/methods.py +17 -5
- duckguard/anomaly/ml_methods.py +724 -0
- duckguard/cli/main.py +561 -56
- duckguard/connectors/__init__.py +2 -2
- duckguard/connectors/bigquery.py +1 -1
- duckguard/connectors/databricks.py +1 -1
- duckguard/connectors/factory.py +2 -3
- duckguard/connectors/files.py +1 -1
- duckguard/connectors/kafka.py +2 -2
- duckguard/connectors/mongodb.py +1 -1
- duckguard/connectors/mysql.py +1 -1
- duckguard/connectors/oracle.py +1 -1
- duckguard/connectors/postgres.py +1 -2
- duckguard/connectors/redshift.py +1 -1
- duckguard/connectors/snowflake.py +1 -2
- duckguard/connectors/sqlite.py +1 -1
- duckguard/connectors/sqlserver.py +10 -13
- duckguard/contracts/__init__.py +6 -6
- duckguard/contracts/diff.py +1 -1
- duckguard/contracts/generator.py +5 -6
- duckguard/contracts/loader.py +4 -4
- duckguard/contracts/validator.py +3 -4
- duckguard/core/__init__.py +3 -3
- duckguard/core/column.py +588 -5
- duckguard/core/dataset.py +708 -3
- duckguard/core/result.py +328 -1
- duckguard/core/scoring.py +1 -2
- duckguard/errors.py +362 -0
- duckguard/freshness/__init__.py +33 -0
- duckguard/freshness/monitor.py +429 -0
- duckguard/history/__init__.py +44 -0
- duckguard/history/schema.py +301 -0
- duckguard/history/storage.py +479 -0
- duckguard/history/trends.py +348 -0
- duckguard/integrations/__init__.py +31 -0
- duckguard/integrations/airflow.py +387 -0
- duckguard/integrations/dbt.py +458 -0
- duckguard/notifications/__init__.py +61 -0
- duckguard/notifications/email.py +508 -0
- duckguard/notifications/formatter.py +118 -0
- duckguard/notifications/notifiers.py +357 -0
- duckguard/profiler/auto_profile.py +3 -3
- duckguard/pytest_plugin/__init__.py +1 -1
- duckguard/pytest_plugin/plugin.py +1 -1
- duckguard/reporting/console.py +2 -2
- duckguard/reports/__init__.py +42 -0
- duckguard/reports/html_reporter.py +514 -0
- duckguard/reports/pdf_reporter.py +114 -0
- duckguard/rules/__init__.py +3 -3
- duckguard/rules/executor.py +3 -4
- duckguard/rules/generator.py +8 -5
- duckguard/rules/loader.py +5 -5
- duckguard/rules/schema.py +23 -0
- duckguard/schema_history/__init__.py +40 -0
- duckguard/schema_history/analyzer.py +414 -0
- duckguard/schema_history/tracker.py +288 -0
- duckguard/semantic/__init__.py +1 -1
- duckguard/semantic/analyzer.py +0 -2
- duckguard/semantic/detector.py +17 -1
- duckguard/semantic/validators.py +2 -1
- duckguard-2.3.0.dist-info/METADATA +953 -0
- duckguard-2.3.0.dist-info/RECORD +77 -0
- duckguard-2.0.0.dist-info/METADATA +0 -221
- duckguard-2.0.0.dist-info/RECORD +0 -55
- {duckguard-2.0.0.dist-info → duckguard-2.3.0.dist-info}/WHEEL +0 -0
- {duckguard-2.0.0.dist-info → duckguard-2.3.0.dist-info}/entry_points.txt +0 -0
- {duckguard-2.0.0.dist-info → duckguard-2.3.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,514 @@
|
|
|
1
|
+
"""HTML report generation for DuckGuard.
|
|
2
|
+
|
|
3
|
+
Generates beautiful, standalone HTML reports from validation results.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
from dataclasses import dataclass
|
|
9
|
+
from datetime import datetime
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import TYPE_CHECKING, Any
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from duckguard.history.storage import StoredRun
|
|
15
|
+
from duckguard.rules.executor import ExecutionResult
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class ReportConfig:
|
|
20
|
+
"""Configuration for report generation.
|
|
21
|
+
|
|
22
|
+
Attributes:
|
|
23
|
+
title: Report title
|
|
24
|
+
include_passed: Include passed checks in report
|
|
25
|
+
include_failed_rows: Include sample of failed rows
|
|
26
|
+
max_failed_rows: Maximum failed rows to show per check
|
|
27
|
+
include_charts: Generate quality score charts
|
|
28
|
+
include_trends: Include trend charts (requires history)
|
|
29
|
+
custom_css: Custom CSS to include
|
|
30
|
+
logo_url: URL or data URI for logo
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
title: str = "DuckGuard Data Quality Report"
|
|
34
|
+
include_passed: bool = True
|
|
35
|
+
include_failed_rows: bool = True
|
|
36
|
+
max_failed_rows: int = 10
|
|
37
|
+
include_charts: bool = True
|
|
38
|
+
include_trends: bool = False
|
|
39
|
+
custom_css: str | None = None
|
|
40
|
+
logo_url: str | None = None
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
# Embedded HTML template (no external dependencies for basic reports)
|
|
44
|
+
HTML_TEMPLATE = """<!DOCTYPE html>
|
|
45
|
+
<html lang="en">
|
|
46
|
+
<head>
|
|
47
|
+
<meta charset="UTF-8">
|
|
48
|
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
49
|
+
<title>{{ title }}</title>
|
|
50
|
+
<style>
|
|
51
|
+
:root {
|
|
52
|
+
--color-pass: #10b981;
|
|
53
|
+
--color-fail: #ef4444;
|
|
54
|
+
--color-warn: #f59e0b;
|
|
55
|
+
--color-info: #6b7280;
|
|
56
|
+
--color-bg: #f9fafb;
|
|
57
|
+
--color-card: #ffffff;
|
|
58
|
+
--color-border: #e5e7eb;
|
|
59
|
+
--color-text: #111827;
|
|
60
|
+
--color-text-secondary: #6b7280;
|
|
61
|
+
}
|
|
62
|
+
* { box-sizing: border-box; margin: 0; padding: 0; }
|
|
63
|
+
body {
|
|
64
|
+
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
|
|
65
|
+
background: var(--color-bg);
|
|
66
|
+
color: var(--color-text);
|
|
67
|
+
line-height: 1.5;
|
|
68
|
+
padding: 2rem;
|
|
69
|
+
}
|
|
70
|
+
.container { max-width: 1200px; margin: 0 auto; }
|
|
71
|
+
.header {
|
|
72
|
+
display: flex;
|
|
73
|
+
justify-content: space-between;
|
|
74
|
+
align-items: center;
|
|
75
|
+
margin-bottom: 2rem;
|
|
76
|
+
padding-bottom: 1rem;
|
|
77
|
+
border-bottom: 2px solid var(--color-border);
|
|
78
|
+
}
|
|
79
|
+
.header h1 { font-size: 1.75rem; font-weight: 600; }
|
|
80
|
+
.header .meta { color: var(--color-text-secondary); font-size: 0.875rem; }
|
|
81
|
+
.status-badge {
|
|
82
|
+
display: inline-flex;
|
|
83
|
+
align-items: center;
|
|
84
|
+
padding: 0.5rem 1rem;
|
|
85
|
+
border-radius: 9999px;
|
|
86
|
+
font-weight: 600;
|
|
87
|
+
font-size: 0.875rem;
|
|
88
|
+
}
|
|
89
|
+
.status-pass { background: #d1fae5; color: #065f46; }
|
|
90
|
+
.status-fail { background: #fee2e2; color: #991b1b; }
|
|
91
|
+
.cards { display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 1rem; margin-bottom: 2rem; }
|
|
92
|
+
.card {
|
|
93
|
+
background: var(--color-card);
|
|
94
|
+
border-radius: 0.5rem;
|
|
95
|
+
padding: 1.5rem;
|
|
96
|
+
box-shadow: 0 1px 3px rgba(0,0,0,0.1);
|
|
97
|
+
}
|
|
98
|
+
.card-label { font-size: 0.75rem; text-transform: uppercase; color: var(--color-text-secondary); letter-spacing: 0.05em; margin-bottom: 0.25rem; }
|
|
99
|
+
.card-value { font-size: 2rem; font-weight: 700; }
|
|
100
|
+
.card-value.pass { color: var(--color-pass); }
|
|
101
|
+
.card-value.fail { color: var(--color-fail); }
|
|
102
|
+
.card-value.warn { color: var(--color-warn); }
|
|
103
|
+
.section { background: var(--color-card); border-radius: 0.5rem; padding: 1.5rem; margin-bottom: 1.5rem; box-shadow: 0 1px 3px rgba(0,0,0,0.1); }
|
|
104
|
+
.section-title { font-size: 1.125rem; font-weight: 600; margin-bottom: 1rem; display: flex; align-items: center; gap: 0.5rem; }
|
|
105
|
+
.section-title .icon { width: 1.25rem; height: 1.25rem; }
|
|
106
|
+
table { width: 100%; border-collapse: collapse; font-size: 0.875rem; }
|
|
107
|
+
th, td { padding: 0.75rem; text-align: left; border-bottom: 1px solid var(--color-border); }
|
|
108
|
+
th { font-weight: 600; color: var(--color-text-secondary); background: var(--color-bg); }
|
|
109
|
+
tr:hover { background: var(--color-bg); }
|
|
110
|
+
.status-icon { display: inline-flex; align-items: center; gap: 0.25rem; }
|
|
111
|
+
.status-icon.pass { color: var(--color-pass); }
|
|
112
|
+
.status-icon.fail { color: var(--color-fail); }
|
|
113
|
+
.status-icon.warn { color: var(--color-warn); }
|
|
114
|
+
.gauge-container { display: flex; justify-content: center; margin: 1rem 0; }
|
|
115
|
+
.gauge { width: 200px; height: 100px; position: relative; }
|
|
116
|
+
.gauge svg { width: 100%; height: 100%; }
|
|
117
|
+
.gauge-value { position: absolute; bottom: 0; left: 50%; transform: translateX(-50%); font-size: 2rem; font-weight: 700; }
|
|
118
|
+
.grade { font-size: 1rem; color: var(--color-text-secondary); }
|
|
119
|
+
.failed-rows { margin-top: 0.5rem; padding: 0.75rem; background: #fef2f2; border-radius: 0.375rem; font-size: 0.8rem; }
|
|
120
|
+
.failed-rows-title { font-weight: 600; color: #991b1b; margin-bottom: 0.25rem; }
|
|
121
|
+
.failed-rows code { background: #fee2e2; padding: 0.125rem 0.375rem; border-radius: 0.25rem; font-family: monospace; }
|
|
122
|
+
.footer { margin-top: 2rem; padding-top: 1rem; border-top: 1px solid var(--color-border); text-align: center; color: var(--color-text-secondary); font-size: 0.75rem; }
|
|
123
|
+
.footer a { color: inherit; text-decoration: none; }
|
|
124
|
+
@media print {
|
|
125
|
+
body { padding: 0; }
|
|
126
|
+
.section { break-inside: avoid; }
|
|
127
|
+
}
|
|
128
|
+
{{ custom_css }}
|
|
129
|
+
</style>
|
|
130
|
+
</head>
|
|
131
|
+
<body>
|
|
132
|
+
<div class="container">
|
|
133
|
+
<div class="header">
|
|
134
|
+
<div>
|
|
135
|
+
<h1>{{ title }}</h1>
|
|
136
|
+
<div class="meta">
|
|
137
|
+
Source: <strong>{{ source }}</strong> |
|
|
138
|
+
Generated: {{ generated_at }}
|
|
139
|
+
</div>
|
|
140
|
+
</div>
|
|
141
|
+
<div class="status-badge {{ 'status-pass' if passed else 'status-fail' }}">
|
|
142
|
+
{{ '✓ PASSED' if passed else '✗ FAILED' }}
|
|
143
|
+
</div>
|
|
144
|
+
</div>
|
|
145
|
+
|
|
146
|
+
<div class="cards">
|
|
147
|
+
<div class="card">
|
|
148
|
+
<div class="card-label">Quality Score</div>
|
|
149
|
+
<div class="card-value {{ 'pass' if quality_score >= 80 else 'warn' if quality_score >= 60 else 'fail' }}">
|
|
150
|
+
{{ "%.1f"|format(quality_score) }}%
|
|
151
|
+
</div>
|
|
152
|
+
<div class="grade">Grade: {{ grade }}</div>
|
|
153
|
+
</div>
|
|
154
|
+
<div class="card">
|
|
155
|
+
<div class="card-label">Checks Passed</div>
|
|
156
|
+
<div class="card-value pass">{{ passed_count }}</div>
|
|
157
|
+
<div class="grade">of {{ total_checks }} total</div>
|
|
158
|
+
</div>
|
|
159
|
+
<div class="card">
|
|
160
|
+
<div class="card-label">Failures</div>
|
|
161
|
+
<div class="card-value {{ 'fail' if failed_count > 0 else 'pass' }}">{{ failed_count }}</div>
|
|
162
|
+
</div>
|
|
163
|
+
<div class="card">
|
|
164
|
+
<div class="card-label">Warnings</div>
|
|
165
|
+
<div class="card-value {{ 'warn' if warning_count > 0 else 'pass' }}">{{ warning_count }}</div>
|
|
166
|
+
</div>
|
|
167
|
+
</div>
|
|
168
|
+
|
|
169
|
+
{% if include_charts %}
|
|
170
|
+
<div class="section">
|
|
171
|
+
<div class="section-title">
|
|
172
|
+
<svg class="icon" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9 19v-6a2 2 0 00-2-2H5a2 2 0 00-2 2v6a2 2 0 002 2h2a2 2 0 002-2zm0 0V9a2 2 0 012-2h2a2 2 0 012 2v10m-6 0a2 2 0 002 2h2a2 2 0 002-2m0 0V5a2 2 0 012-2h2a2 2 0 012 2v14a2 2 0 01-2 2h-2a2 2 0 01-2-2z"/></svg>
|
|
173
|
+
Quality Score
|
|
174
|
+
</div>
|
|
175
|
+
<div class="gauge-container">
|
|
176
|
+
<div class="gauge">
|
|
177
|
+
<svg viewBox="0 0 200 100">
|
|
178
|
+
<path d="M 20 90 A 80 80 0 0 1 180 90" fill="none" stroke="#e5e7eb" stroke-width="12" stroke-linecap="round"/>
|
|
179
|
+
<path d="M 20 90 A 80 80 0 0 1 180 90" fill="none"
|
|
180
|
+
stroke="{{ '#10b981' if quality_score >= 80 else '#f59e0b' if quality_score >= 60 else '#ef4444' }}"
|
|
181
|
+
stroke-width="12" stroke-linecap="round"
|
|
182
|
+
stroke-dasharray="{{ quality_score * 2.51 }} 251"/>
|
|
183
|
+
</svg>
|
|
184
|
+
<div class="gauge-value">{{ "%.0f"|format(quality_score) }}</div>
|
|
185
|
+
</div>
|
|
186
|
+
</div>
|
|
187
|
+
</div>
|
|
188
|
+
{% endif %}
|
|
189
|
+
|
|
190
|
+
{% if failures %}
|
|
191
|
+
<div class="section">
|
|
192
|
+
<div class="section-title" style="color: var(--color-fail);">
|
|
193
|
+
<svg class="icon" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M12 8v4m0 4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z"/></svg>
|
|
194
|
+
Failures ({{ failures|length }})
|
|
195
|
+
</div>
|
|
196
|
+
<table>
|
|
197
|
+
<thead>
|
|
198
|
+
<tr>
|
|
199
|
+
<th>Check</th>
|
|
200
|
+
<th>Column</th>
|
|
201
|
+
<th>Message</th>
|
|
202
|
+
<th>Actual</th>
|
|
203
|
+
<th>Expected</th>
|
|
204
|
+
</tr>
|
|
205
|
+
</thead>
|
|
206
|
+
<tbody>
|
|
207
|
+
{% for f in failures %}
|
|
208
|
+
<tr>
|
|
209
|
+
<td><span class="status-icon fail">✗</span> {{ f.check.type.value }}</td>
|
|
210
|
+
<td>{{ f.column or '-' }}</td>
|
|
211
|
+
<td>{{ f.message }}</td>
|
|
212
|
+
<td><code>{{ f.actual_value }}</code></td>
|
|
213
|
+
<td><code>{{ f.expected_value }}</code></td>
|
|
214
|
+
</tr>
|
|
215
|
+
{% if include_failed_rows and f.details and f.details.get('failed_rows') %}
|
|
216
|
+
<tr>
|
|
217
|
+
<td colspan="5">
|
|
218
|
+
<div class="failed-rows">
|
|
219
|
+
<div class="failed-rows-title">Sample Failed Rows ({{ f.details.get('failed_rows')|length }} shown)</div>
|
|
220
|
+
{% for row in f.details.get('failed_rows')[:max_failed_rows] %}
|
|
221
|
+
<code>{{ row }}</code>{% if not loop.last %}, {% endif %}
|
|
222
|
+
{% endfor %}
|
|
223
|
+
</div>
|
|
224
|
+
</td>
|
|
225
|
+
</tr>
|
|
226
|
+
{% endif %}
|
|
227
|
+
{% endfor %}
|
|
228
|
+
</tbody>
|
|
229
|
+
</table>
|
|
230
|
+
</div>
|
|
231
|
+
{% endif %}
|
|
232
|
+
|
|
233
|
+
{% if warnings %}
|
|
234
|
+
<div class="section">
|
|
235
|
+
<div class="section-title" style="color: var(--color-warn);">
|
|
236
|
+
<svg class="icon" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M12 9v2m0 4h.01m-6.938 4h13.856c1.54 0 2.502-1.667 1.732-3L13.732 4c-.77-1.333-2.694-1.333-3.464 0L3.34 16c-.77 1.333.192 3 1.732 3z"/></svg>
|
|
237
|
+
Warnings ({{ warnings|length }})
|
|
238
|
+
</div>
|
|
239
|
+
<table>
|
|
240
|
+
<thead>
|
|
241
|
+
<tr>
|
|
242
|
+
<th>Check</th>
|
|
243
|
+
<th>Column</th>
|
|
244
|
+
<th>Message</th>
|
|
245
|
+
<th>Actual</th>
|
|
246
|
+
</tr>
|
|
247
|
+
</thead>
|
|
248
|
+
<tbody>
|
|
249
|
+
{% for w in warnings %}
|
|
250
|
+
<tr>
|
|
251
|
+
<td><span class="status-icon warn">⚠</span> {{ w.check.type.value }}</td>
|
|
252
|
+
<td>{{ w.column or '-' }}</td>
|
|
253
|
+
<td>{{ w.message }}</td>
|
|
254
|
+
<td><code>{{ w.actual_value }}</code></td>
|
|
255
|
+
</tr>
|
|
256
|
+
{% endfor %}
|
|
257
|
+
</tbody>
|
|
258
|
+
</table>
|
|
259
|
+
</div>
|
|
260
|
+
{% endif %}
|
|
261
|
+
|
|
262
|
+
{% if include_passed and passed_results %}
|
|
263
|
+
<div class="section">
|
|
264
|
+
<div class="section-title" style="color: var(--color-pass);">
|
|
265
|
+
<svg class="icon" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9 12l2 2 4-4m6 2a9 9 0 11-18 0 9 9 0 0118 0z"/></svg>
|
|
266
|
+
Passed Checks ({{ passed_results|length }})
|
|
267
|
+
</div>
|
|
268
|
+
<table>
|
|
269
|
+
<thead>
|
|
270
|
+
<tr>
|
|
271
|
+
<th>Check</th>
|
|
272
|
+
<th>Column</th>
|
|
273
|
+
<th>Message</th>
|
|
274
|
+
</tr>
|
|
275
|
+
</thead>
|
|
276
|
+
<tbody>
|
|
277
|
+
{% for p in passed_results %}
|
|
278
|
+
<tr>
|
|
279
|
+
<td><span class="status-icon pass">✓</span> {{ p.check.type.value }}</td>
|
|
280
|
+
<td>{{ p.column or '-' }}</td>
|
|
281
|
+
<td>{{ p.message }}</td>
|
|
282
|
+
</tr>
|
|
283
|
+
{% endfor %}
|
|
284
|
+
</tbody>
|
|
285
|
+
</table>
|
|
286
|
+
</div>
|
|
287
|
+
{% endif %}
|
|
288
|
+
|
|
289
|
+
<div class="footer">
|
|
290
|
+
Generated by <a href="https://github.com/XDataHubAI/duckguard">DuckGuard</a> |
|
|
291
|
+
Data quality that just works
|
|
292
|
+
</div>
|
|
293
|
+
</div>
|
|
294
|
+
</body>
|
|
295
|
+
</html>
|
|
296
|
+
"""
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
class HTMLReporter:
|
|
300
|
+
"""Generates HTML reports from DuckGuard validation results.
|
|
301
|
+
|
|
302
|
+
Creates beautiful, standalone HTML reports that can be shared
|
|
303
|
+
or viewed in any browser.
|
|
304
|
+
|
|
305
|
+
Usage:
|
|
306
|
+
from duckguard.reports import HTMLReporter
|
|
307
|
+
from duckguard import connect, load_rules, execute_rules
|
|
308
|
+
|
|
309
|
+
result = execute_rules(load_rules("rules.yaml"), connect("data.csv"))
|
|
310
|
+
|
|
311
|
+
reporter = HTMLReporter()
|
|
312
|
+
reporter.generate(result, "report.html")
|
|
313
|
+
|
|
314
|
+
Attributes:
|
|
315
|
+
config: Report configuration
|
|
316
|
+
"""
|
|
317
|
+
|
|
318
|
+
def __init__(self, config: ReportConfig | None = None):
|
|
319
|
+
"""Initialize the reporter.
|
|
320
|
+
|
|
321
|
+
Args:
|
|
322
|
+
config: Report configuration (uses defaults if None)
|
|
323
|
+
"""
|
|
324
|
+
self.config = config or ReportConfig()
|
|
325
|
+
|
|
326
|
+
def generate(
|
|
327
|
+
self,
|
|
328
|
+
result: ExecutionResult,
|
|
329
|
+
output_path: str | Path,
|
|
330
|
+
*,
|
|
331
|
+
history: list[StoredRun] | None = None,
|
|
332
|
+
) -> Path:
|
|
333
|
+
"""Generate an HTML report.
|
|
334
|
+
|
|
335
|
+
Args:
|
|
336
|
+
result: ExecutionResult to report on
|
|
337
|
+
output_path: Path to write HTML file
|
|
338
|
+
history: Optional historical results for trends
|
|
339
|
+
|
|
340
|
+
Returns:
|
|
341
|
+
Path to generated report
|
|
342
|
+
|
|
343
|
+
Raises:
|
|
344
|
+
ImportError: If jinja2 is not installed
|
|
345
|
+
"""
|
|
346
|
+
try:
|
|
347
|
+
from jinja2 import BaseLoader, Environment
|
|
348
|
+
except ImportError:
|
|
349
|
+
# Fall back to basic string formatting if jinja2 not available
|
|
350
|
+
return self._generate_basic(result, output_path)
|
|
351
|
+
|
|
352
|
+
output_path = Path(output_path)
|
|
353
|
+
|
|
354
|
+
# Create Jinja2 environment
|
|
355
|
+
env = Environment(loader=BaseLoader(), autoescape=True)
|
|
356
|
+
template = env.from_string(HTML_TEMPLATE)
|
|
357
|
+
|
|
358
|
+
# Build context
|
|
359
|
+
context = self._build_context(result, history)
|
|
360
|
+
|
|
361
|
+
# Render and write
|
|
362
|
+
html = template.render(**context)
|
|
363
|
+
output_path.write_text(html, encoding="utf-8")
|
|
364
|
+
|
|
365
|
+
return output_path
|
|
366
|
+
|
|
367
|
+
def _generate_basic(
|
|
368
|
+
self,
|
|
369
|
+
result: ExecutionResult,
|
|
370
|
+
output_path: str | Path,
|
|
371
|
+
) -> Path:
|
|
372
|
+
"""Generate a basic HTML report without Jinja2.
|
|
373
|
+
|
|
374
|
+
Args:
|
|
375
|
+
result: ExecutionResult to report on
|
|
376
|
+
output_path: Path to write HTML file
|
|
377
|
+
|
|
378
|
+
Returns:
|
|
379
|
+
Path to generated report
|
|
380
|
+
"""
|
|
381
|
+
output_path = Path(output_path)
|
|
382
|
+
|
|
383
|
+
# Simple HTML generation
|
|
384
|
+
status = "PASSED" if result.passed else "FAILED"
|
|
385
|
+
status_class = "status-pass" if result.passed else "status-fail"
|
|
386
|
+
grade = self._score_to_grade(result.quality_score)
|
|
387
|
+
|
|
388
|
+
failures_html = ""
|
|
389
|
+
for f in result.get_failures():
|
|
390
|
+
failures_html += f"""
|
|
391
|
+
<tr>
|
|
392
|
+
<td>✗ {f.check.type.value}</td>
|
|
393
|
+
<td>{f.column or '-'}</td>
|
|
394
|
+
<td>{f.message}</td>
|
|
395
|
+
</tr>
|
|
396
|
+
"""
|
|
397
|
+
|
|
398
|
+
html = f"""<!DOCTYPE html>
|
|
399
|
+
<html>
|
|
400
|
+
<head>
|
|
401
|
+
<meta charset="UTF-8">
|
|
402
|
+
<title>{self.config.title}</title>
|
|
403
|
+
<style>
|
|
404
|
+
body {{ font-family: sans-serif; padding: 2rem; max-width: 1000px; margin: 0 auto; }}
|
|
405
|
+
.header {{ display: flex; justify-content: space-between; border-bottom: 2px solid #eee; padding-bottom: 1rem; }}
|
|
406
|
+
.{status_class} {{ padding: 0.5rem 1rem; border-radius: 9999px; font-weight: bold; }}
|
|
407
|
+
.status-pass {{ background: #d1fae5; color: #065f46; }}
|
|
408
|
+
.status-fail {{ background: #fee2e2; color: #991b1b; }}
|
|
409
|
+
.cards {{ display: grid; grid-template-columns: repeat(4, 1fr); gap: 1rem; margin: 2rem 0; }}
|
|
410
|
+
.card {{ background: #f9fafb; padding: 1rem; border-radius: 0.5rem; }}
|
|
411
|
+
.card-value {{ font-size: 2rem; font-weight: bold; }}
|
|
412
|
+
table {{ width: 100%; border-collapse: collapse; }}
|
|
413
|
+
th, td {{ padding: 0.75rem; text-align: left; border-bottom: 1px solid #eee; }}
|
|
414
|
+
th {{ background: #f9fafb; }}
|
|
415
|
+
</style>
|
|
416
|
+
</head>
|
|
417
|
+
<body>
|
|
418
|
+
<div class="header">
|
|
419
|
+
<div>
|
|
420
|
+
<h1>{self.config.title}</h1>
|
|
421
|
+
<p>Source: {result.source} | Generated: {datetime.now().strftime('%Y-%m-%d %H:%M')}</p>
|
|
422
|
+
</div>
|
|
423
|
+
<span class="{status_class}">{status}</span>
|
|
424
|
+
</div>
|
|
425
|
+
<div class="cards">
|
|
426
|
+
<div class="card">
|
|
427
|
+
<div>Quality Score</div>
|
|
428
|
+
<div class="card-value">{result.quality_score:.1f}%</div>
|
|
429
|
+
<div>Grade: {grade}</div>
|
|
430
|
+
</div>
|
|
431
|
+
<div class="card">
|
|
432
|
+
<div>Checks Passed</div>
|
|
433
|
+
<div class="card-value">{result.passed_count}</div>
|
|
434
|
+
<div>of {result.total_checks}</div>
|
|
435
|
+
</div>
|
|
436
|
+
<div class="card">
|
|
437
|
+
<div>Failures</div>
|
|
438
|
+
<div class="card-value">{result.failed_count}</div>
|
|
439
|
+
</div>
|
|
440
|
+
<div class="card">
|
|
441
|
+
<div>Warnings</div>
|
|
442
|
+
<div class="card-value">{result.warning_count}</div>
|
|
443
|
+
</div>
|
|
444
|
+
</div>
|
|
445
|
+
{f'<h2>Failures</h2><table><tr><th>Check</th><th>Column</th><th>Message</th></tr>{failures_html}</table>' if failures_html else ''}
|
|
446
|
+
<footer style="margin-top: 2rem; text-align: center; color: #888;">Generated by DuckGuard</footer>
|
|
447
|
+
</body>
|
|
448
|
+
</html>"""
|
|
449
|
+
|
|
450
|
+
output_path.write_text(html, encoding="utf-8")
|
|
451
|
+
return output_path
|
|
452
|
+
|
|
453
|
+
def _build_context(
|
|
454
|
+
self,
|
|
455
|
+
result: ExecutionResult,
|
|
456
|
+
history: list[StoredRun] | None = None,
|
|
457
|
+
) -> dict[str, Any]:
|
|
458
|
+
"""Build template context from result."""
|
|
459
|
+
return {
|
|
460
|
+
"title": self.config.title,
|
|
461
|
+
"generated_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
|
462
|
+
"source": result.source,
|
|
463
|
+
"quality_score": result.quality_score,
|
|
464
|
+
"grade": self._score_to_grade(result.quality_score),
|
|
465
|
+
"passed": result.passed,
|
|
466
|
+
"total_checks": result.total_checks,
|
|
467
|
+
"passed_count": result.passed_count,
|
|
468
|
+
"failed_count": result.failed_count,
|
|
469
|
+
"warning_count": result.warning_count,
|
|
470
|
+
"failures": result.get_failures(),
|
|
471
|
+
"warnings": result.get_warnings(),
|
|
472
|
+
"passed_results": [r for r in result.results if r.passed]
|
|
473
|
+
if self.config.include_passed
|
|
474
|
+
else [],
|
|
475
|
+
"include_passed": self.config.include_passed,
|
|
476
|
+
"include_charts": self.config.include_charts,
|
|
477
|
+
"include_failed_rows": self.config.include_failed_rows,
|
|
478
|
+
"max_failed_rows": self.config.max_failed_rows,
|
|
479
|
+
"include_trends": self.config.include_trends and history,
|
|
480
|
+
"history": history,
|
|
481
|
+
"custom_css": self.config.custom_css or "",
|
|
482
|
+
}
|
|
483
|
+
|
|
484
|
+
def _score_to_grade(self, score: float) -> str:
|
|
485
|
+
"""Convert score to letter grade."""
|
|
486
|
+
if score >= 90:
|
|
487
|
+
return "A"
|
|
488
|
+
elif score >= 80:
|
|
489
|
+
return "B"
|
|
490
|
+
elif score >= 70:
|
|
491
|
+
return "C"
|
|
492
|
+
elif score >= 60:
|
|
493
|
+
return "D"
|
|
494
|
+
return "F"
|
|
495
|
+
|
|
496
|
+
|
|
497
|
+
def generate_html_report(
|
|
498
|
+
result: ExecutionResult,
|
|
499
|
+
output_path: str | Path,
|
|
500
|
+
**kwargs: Any,
|
|
501
|
+
) -> Path:
|
|
502
|
+
"""Convenience function to generate HTML report.
|
|
503
|
+
|
|
504
|
+
Args:
|
|
505
|
+
result: ExecutionResult to report on
|
|
506
|
+
output_path: Path to write HTML file
|
|
507
|
+
**kwargs: Additional ReportConfig options
|
|
508
|
+
|
|
509
|
+
Returns:
|
|
510
|
+
Path to generated report
|
|
511
|
+
"""
|
|
512
|
+
config = ReportConfig(**kwargs) if kwargs else None
|
|
513
|
+
reporter = HTMLReporter(config=config)
|
|
514
|
+
return reporter.generate(result, output_path)
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
"""PDF report generation for DuckGuard.
|
|
2
|
+
|
|
3
|
+
Uses WeasyPrint to convert HTML reports to PDF format.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import tempfile
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import TYPE_CHECKING, Any
|
|
11
|
+
|
|
12
|
+
from duckguard.reports.html_reporter import HTMLReporter, ReportConfig
|
|
13
|
+
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from duckguard.history.storage import StoredRun
|
|
16
|
+
from duckguard.rules.executor import ExecutionResult
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class PDFReporter(HTMLReporter):
|
|
20
|
+
"""Generates PDF reports from DuckGuard validation results.
|
|
21
|
+
|
|
22
|
+
Uses WeasyPrint to convert HTML to PDF, producing high-quality
|
|
23
|
+
PDF documents suitable for sharing and archiving.
|
|
24
|
+
|
|
25
|
+
Usage:
|
|
26
|
+
from duckguard.reports import PDFReporter
|
|
27
|
+
from duckguard import connect, load_rules, execute_rules
|
|
28
|
+
|
|
29
|
+
result = execute_rules(load_rules("rules.yaml"), connect("data.csv"))
|
|
30
|
+
|
|
31
|
+
reporter = PDFReporter()
|
|
32
|
+
reporter.generate(result, "report.pdf")
|
|
33
|
+
|
|
34
|
+
Note:
|
|
35
|
+
Requires weasyprint to be installed:
|
|
36
|
+
pip install duckguard[reports]
|
|
37
|
+
|
|
38
|
+
Attributes:
|
|
39
|
+
config: Report configuration (inherited from HTMLReporter)
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
def generate(
|
|
43
|
+
self,
|
|
44
|
+
result: ExecutionResult,
|
|
45
|
+
output_path: str | Path,
|
|
46
|
+
*,
|
|
47
|
+
history: list[StoredRun] | None = None,
|
|
48
|
+
) -> Path:
|
|
49
|
+
"""Generate a PDF report.
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
result: ExecutionResult to report on
|
|
53
|
+
output_path: Path to write PDF file
|
|
54
|
+
history: Optional historical results for trends
|
|
55
|
+
|
|
56
|
+
Returns:
|
|
57
|
+
Path to generated PDF report
|
|
58
|
+
|
|
59
|
+
Raises:
|
|
60
|
+
ImportError: If weasyprint is not installed
|
|
61
|
+
"""
|
|
62
|
+
try:
|
|
63
|
+
from weasyprint import HTML
|
|
64
|
+
except ImportError:
|
|
65
|
+
raise ImportError(
|
|
66
|
+
"PDF reports require weasyprint. "
|
|
67
|
+
"Install with: pip install duckguard[reports]"
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
output_path = Path(output_path)
|
|
71
|
+
|
|
72
|
+
# Generate HTML first to a temporary file
|
|
73
|
+
with tempfile.NamedTemporaryFile(
|
|
74
|
+
mode="w",
|
|
75
|
+
suffix=".html",
|
|
76
|
+
delete=False,
|
|
77
|
+
encoding="utf-8",
|
|
78
|
+
) as f:
|
|
79
|
+
html_path = Path(f.name)
|
|
80
|
+
|
|
81
|
+
try:
|
|
82
|
+
# Generate HTML report
|
|
83
|
+
super().generate(result, html_path, history=history)
|
|
84
|
+
|
|
85
|
+
# Convert to PDF
|
|
86
|
+
HTML(filename=str(html_path)).write_pdf(str(output_path))
|
|
87
|
+
finally:
|
|
88
|
+
# Cleanup temporary HTML file
|
|
89
|
+
try:
|
|
90
|
+
html_path.unlink()
|
|
91
|
+
except OSError:
|
|
92
|
+
pass
|
|
93
|
+
|
|
94
|
+
return output_path
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def generate_pdf_report(
|
|
98
|
+
result: ExecutionResult,
|
|
99
|
+
output_path: str | Path,
|
|
100
|
+
**kwargs: Any,
|
|
101
|
+
) -> Path:
|
|
102
|
+
"""Convenience function to generate PDF report.
|
|
103
|
+
|
|
104
|
+
Args:
|
|
105
|
+
result: ExecutionResult to report on
|
|
106
|
+
output_path: Path to write PDF file
|
|
107
|
+
**kwargs: Additional ReportConfig options
|
|
108
|
+
|
|
109
|
+
Returns:
|
|
110
|
+
Path to generated PDF report
|
|
111
|
+
"""
|
|
112
|
+
config = ReportConfig(**kwargs) if kwargs else None
|
|
113
|
+
reporter = PDFReporter(config=config)
|
|
114
|
+
return reporter.generate(result, output_path)
|
duckguard/rules/__init__.py
CHANGED
|
@@ -10,10 +10,10 @@ Example:
|
|
|
10
10
|
results = execute_rules(rules, "data.csv")
|
|
11
11
|
"""
|
|
12
12
|
|
|
13
|
-
from duckguard.rules.
|
|
14
|
-
from duckguard.rules.executor import execute_rules, RuleExecutor
|
|
15
|
-
from duckguard.rules.schema import RuleSet, ColumnRules, Check, SimpleCheck
|
|
13
|
+
from duckguard.rules.executor import RuleExecutor, execute_rules
|
|
16
14
|
from duckguard.rules.generator import generate_rules
|
|
15
|
+
from duckguard.rules.loader import load_rules, load_rules_from_string
|
|
16
|
+
from duckguard.rules.schema import Check, ColumnRules, RuleSet, SimpleCheck
|
|
17
17
|
|
|
18
18
|
__all__ = [
|
|
19
19
|
"load_rules",
|
duckguard/rules/executor.py
CHANGED
|
@@ -9,15 +9,14 @@ from dataclasses import dataclass, field
|
|
|
9
9
|
from datetime import datetime
|
|
10
10
|
from typing import Any
|
|
11
11
|
|
|
12
|
-
from duckguard.core.dataset import Dataset
|
|
13
|
-
from duckguard.core.result import ValidationResult
|
|
14
12
|
from duckguard.connectors import connect
|
|
13
|
+
from duckguard.core.dataset import Dataset
|
|
15
14
|
from duckguard.rules.schema import (
|
|
16
|
-
|
|
15
|
+
BUILTIN_PATTERNS,
|
|
17
16
|
Check,
|
|
18
17
|
CheckType,
|
|
18
|
+
RuleSet,
|
|
19
19
|
Severity,
|
|
20
|
-
BUILTIN_PATTERNS,
|
|
21
20
|
)
|
|
22
21
|
|
|
23
22
|
|