duckguard 2.0.0__py3-none-any.whl → 2.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. duckguard/__init__.py +55 -28
  2. duckguard/anomaly/__init__.py +1 -1
  3. duckguard/anomaly/detector.py +1 -5
  4. duckguard/anomaly/methods.py +1 -3
  5. duckguard/cli/main.py +304 -54
  6. duckguard/connectors/__init__.py +2 -2
  7. duckguard/connectors/bigquery.py +1 -1
  8. duckguard/connectors/databricks.py +1 -1
  9. duckguard/connectors/factory.py +2 -3
  10. duckguard/connectors/files.py +1 -1
  11. duckguard/connectors/kafka.py +2 -2
  12. duckguard/connectors/mongodb.py +1 -1
  13. duckguard/connectors/mysql.py +1 -1
  14. duckguard/connectors/oracle.py +1 -1
  15. duckguard/connectors/postgres.py +1 -2
  16. duckguard/connectors/redshift.py +1 -1
  17. duckguard/connectors/snowflake.py +1 -2
  18. duckguard/connectors/sqlite.py +1 -1
  19. duckguard/connectors/sqlserver.py +10 -13
  20. duckguard/contracts/__init__.py +6 -6
  21. duckguard/contracts/diff.py +1 -1
  22. duckguard/contracts/generator.py +5 -6
  23. duckguard/contracts/loader.py +4 -4
  24. duckguard/contracts/validator.py +3 -4
  25. duckguard/core/__init__.py +3 -3
  26. duckguard/core/column.py +110 -5
  27. duckguard/core/dataset.py +3 -3
  28. duckguard/core/result.py +92 -1
  29. duckguard/core/scoring.py +1 -2
  30. duckguard/errors.py +362 -0
  31. duckguard/history/__init__.py +44 -0
  32. duckguard/history/schema.py +183 -0
  33. duckguard/history/storage.py +479 -0
  34. duckguard/history/trends.py +348 -0
  35. duckguard/integrations/__init__.py +31 -0
  36. duckguard/integrations/airflow.py +387 -0
  37. duckguard/integrations/dbt.py +458 -0
  38. duckguard/notifications/__init__.py +43 -0
  39. duckguard/notifications/formatter.py +118 -0
  40. duckguard/notifications/notifiers.py +357 -0
  41. duckguard/profiler/auto_profile.py +3 -3
  42. duckguard/pytest_plugin/__init__.py +1 -1
  43. duckguard/pytest_plugin/plugin.py +1 -1
  44. duckguard/reporting/console.py +2 -2
  45. duckguard/reports/__init__.py +42 -0
  46. duckguard/reports/html_reporter.py +515 -0
  47. duckguard/reports/pdf_reporter.py +114 -0
  48. duckguard/rules/__init__.py +3 -3
  49. duckguard/rules/executor.py +3 -4
  50. duckguard/rules/generator.py +4 -4
  51. duckguard/rules/loader.py +5 -5
  52. duckguard/semantic/__init__.py +1 -1
  53. duckguard/semantic/analyzer.py +0 -2
  54. duckguard/semantic/validators.py +2 -1
  55. {duckguard-2.0.0.dist-info → duckguard-2.2.0.dist-info}/METADATA +135 -5
  56. duckguard-2.2.0.dist-info/RECORD +69 -0
  57. duckguard-2.0.0.dist-info/RECORD +0 -55
  58. {duckguard-2.0.0.dist-info → duckguard-2.2.0.dist-info}/WHEEL +0 -0
  59. {duckguard-2.0.0.dist-info → duckguard-2.2.0.dist-info}/entry_points.txt +0 -0
  60. {duckguard-2.0.0.dist-info → duckguard-2.2.0.dist-info}/licenses/LICENSE +0 -0
duckguard/errors.py ADDED
@@ -0,0 +1,362 @@
1
+ """Enhanced error classes for DuckGuard with helpful suggestions.
2
+
3
+ Provides user-friendly error messages with:
4
+ - Clear descriptions of what went wrong
5
+ - Suggestions for how to fix the issue
6
+ - Links to relevant documentation
7
+ - Context about the data being validated
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from typing import Any
13
+
14
+ # Documentation base URL
15
+ DOCS_BASE_URL = "https://github.com/XDataHubAI/duckguard"
16
+
17
+
18
+ class DuckGuardError(Exception):
19
+ """Base exception for all DuckGuard errors.
20
+
21
+ Attributes:
22
+ message: Human-readable error description
23
+ suggestion: Helpful suggestion for fixing the issue
24
+ docs_url: Link to relevant documentation
25
+ context: Additional context about the error
26
+ """
27
+
28
+ def __init__(
29
+ self,
30
+ message: str,
31
+ suggestion: str | None = None,
32
+ docs_url: str | None = None,
33
+ context: dict[str, Any] | None = None,
34
+ ):
35
+ self.message = message
36
+ self.suggestion = suggestion
37
+ self.docs_url = docs_url
38
+ self.context = context or {}
39
+ super().__init__(self._format_message())
40
+
41
+ def _format_message(self) -> str:
42
+ """Format the full error message with suggestions."""
43
+ parts = [self.message]
44
+
45
+ if self.suggestion:
46
+ parts.append(f"\n\nSuggestion: {self.suggestion}")
47
+
48
+ if self.docs_url:
49
+ parts.append(f"\n\nDocs: {self.docs_url}")
50
+
51
+ if self.context:
52
+ context_str = "\n".join(f" {k}: {v}" for k, v in self.context.items())
53
+ parts.append(f"\n\nContext:\n{context_str}")
54
+
55
+ return "".join(parts)
56
+
57
+
58
+ class ConnectionError(DuckGuardError):
59
+ """Error connecting to a data source."""
60
+
61
+ def __init__(
62
+ self,
63
+ source: str,
64
+ original_error: Exception | None = None,
65
+ **context: Any,
66
+ ):
67
+ super().__init__(
68
+ message=f"Failed to connect to data source: {source}",
69
+ suggestion=self._get_suggestion(source, original_error),
70
+ docs_url=f"{DOCS_BASE_URL}#connectors",
71
+ context={"source": source, **context},
72
+ )
73
+ self.source = source
74
+ self.original_error = original_error
75
+
76
+ def _get_suggestion(self, source: str, error: Exception | None) -> str:
77
+ """Get a helpful suggestion based on the source type."""
78
+ suggestions = []
79
+
80
+ if source.endswith(".csv"):
81
+ suggestions.append("Verify the CSV file exists and is readable")
82
+ suggestions.append("Check file permissions")
83
+ elif source.endswith(".parquet"):
84
+ suggestions.append("Verify the Parquet file exists and is not corrupted")
85
+ suggestions.append("Try: pip install pyarrow")
86
+ elif "postgres" in source or "postgresql" in source:
87
+ suggestions.append("Verify PostgreSQL connection string format: postgresql://user:pass@host:port/db")
88
+ suggestions.append("Check if the database server is running")
89
+ elif "mysql" in source:
90
+ suggestions.append("Verify MySQL connection string format: mysql://user:pass@host:port/db")
91
+ elif "s3://" in source:
92
+ suggestions.append("Verify AWS credentials are configured")
93
+ suggestions.append("Check S3 bucket permissions")
94
+ else:
95
+ suggestions.append("Verify the data source path or connection string")
96
+
97
+ if error:
98
+ suggestions.append(f"Original error: {error}")
99
+
100
+ return "\n - ".join([""] + suggestions).strip()
101
+
102
+
103
+ class FileNotFoundError(DuckGuardError):
104
+ """File not found error with helpful context."""
105
+
106
+ def __init__(self, path: str, **context: Any):
107
+ import os
108
+
109
+ cwd = os.getcwd()
110
+ super().__init__(
111
+ message=f"File not found: {path}",
112
+ suggestion=f"Check if the file exists. Current directory: {cwd}",
113
+ docs_url=f"{DOCS_BASE_URL}#file-connectors",
114
+ context={"path": path, "cwd": cwd, **context},
115
+ )
116
+
117
+
118
+ class ColumnNotFoundError(DuckGuardError):
119
+ """Column not found in dataset."""
120
+
121
+ def __init__(self, column: str, available_columns: list[str], **context: Any):
122
+ # Find similar column names
123
+ similar = self._find_similar(column, available_columns)
124
+
125
+ suggestion = "Available columns: " + ", ".join(available_columns[:10])
126
+ if len(available_columns) > 10:
127
+ suggestion += f" (and {len(available_columns) - 10} more)"
128
+
129
+ if similar:
130
+ suggestion = f"Did you mean: {similar}?\n\n{suggestion}"
131
+
132
+ super().__init__(
133
+ message=f"Column '{column}' not found in dataset",
134
+ suggestion=suggestion,
135
+ docs_url=f"{DOCS_BASE_URL}#working-with-columns",
136
+ context={"column": column, "similar": similar, **context},
137
+ )
138
+
139
+ def _find_similar(self, target: str, candidates: list[str]) -> str | None:
140
+ """Find a similar column name using simple string matching."""
141
+ target_lower = target.lower()
142
+
143
+ # Exact match ignoring case
144
+ for c in candidates:
145
+ if c.lower() == target_lower:
146
+ return c
147
+
148
+ # Prefix match
149
+ for c in candidates:
150
+ if c.lower().startswith(target_lower) or target_lower.startswith(c.lower()):
151
+ return c
152
+
153
+ # Contains match
154
+ for c in candidates:
155
+ if target_lower in c.lower() or c.lower() in target_lower:
156
+ return c
157
+
158
+ return None
159
+
160
+
161
+ class ValidationError(DuckGuardError):
162
+ """Validation check failed with detailed information."""
163
+
164
+ def __init__(
165
+ self,
166
+ check_name: str,
167
+ column: str | None = None,
168
+ actual_value: Any = None,
169
+ expected_value: Any = None,
170
+ failed_rows: list | None = None,
171
+ **context: Any,
172
+ ):
173
+ col_str = f" for column '{column}'" if column else ""
174
+ message = f"Validation check '{check_name}' failed{col_str}"
175
+
176
+ suggestion_parts = []
177
+ if actual_value is not None and expected_value is not None:
178
+ suggestion_parts.append(f"Expected: {expected_value}, Got: {actual_value}")
179
+
180
+ if failed_rows:
181
+ sample = failed_rows[:3]
182
+ suggestion_parts.append(f"Sample failing values: {sample}")
183
+ if len(failed_rows) > 3:
184
+ suggestion_parts.append(f"({len(failed_rows)} total failures)")
185
+
186
+ suggestion = "\n".join(suggestion_parts) if suggestion_parts else None
187
+
188
+ super().__init__(
189
+ message=message,
190
+ suggestion=suggestion,
191
+ docs_url=f"{DOCS_BASE_URL}#validation-methods",
192
+ context={
193
+ "check_name": check_name,
194
+ "column": column,
195
+ "actual_value": actual_value,
196
+ "expected_value": expected_value,
197
+ **context,
198
+ },
199
+ )
200
+
201
+
202
+ class RuleParseError(DuckGuardError):
203
+ """Error parsing validation rules."""
204
+
205
+ def __init__(
206
+ self,
207
+ message: str,
208
+ file_path: str | None = None,
209
+ line_number: int | None = None,
210
+ **context: Any,
211
+ ):
212
+ location = ""
213
+ if file_path:
214
+ location = f" in {file_path}"
215
+ if line_number:
216
+ location += f" at line {line_number}"
217
+
218
+ suggestion = "Check your YAML syntax and rule format.\n"
219
+ suggestion += "Example valid rule:\n"
220
+ suggestion += """
221
+ columns:
222
+ order_id:
223
+ checks:
224
+ - type: not_null
225
+ - type: unique
226
+ amount:
227
+ checks:
228
+ - type: between
229
+ value: [0, 10000]
230
+ """
231
+
232
+ super().__init__(
233
+ message=f"Failed to parse rules{location}: {message}",
234
+ suggestion=suggestion,
235
+ docs_url=f"{DOCS_BASE_URL}#yaml-rules",
236
+ context={"file_path": file_path, "line_number": line_number, **context},
237
+ )
238
+
239
+
240
+ class ContractViolationError(DuckGuardError):
241
+ """Data contract was violated."""
242
+
243
+ def __init__(
244
+ self,
245
+ violations: list[str],
246
+ contract_path: str | None = None,
247
+ **context: Any,
248
+ ):
249
+ message = f"Data contract violated with {len(violations)} issue(s)"
250
+ if contract_path:
251
+ message += f" (contract: {contract_path})"
252
+
253
+ suggestion = "Violations:\n - " + "\n - ".join(violations[:5])
254
+ if len(violations) > 5:
255
+ suggestion += f"\n ... and {len(violations) - 5} more"
256
+
257
+ suggestion += "\n\nConsider updating the contract or fixing the data issues."
258
+
259
+ super().__init__(
260
+ message=message,
261
+ suggestion=suggestion,
262
+ docs_url=f"{DOCS_BASE_URL}#data-contracts",
263
+ context={"violations": violations, "contract_path": contract_path, **context},
264
+ )
265
+
266
+
267
+ class UnsupportedConnectorError(DuckGuardError):
268
+ """No connector available for the data source."""
269
+
270
+ def __init__(self, source: str, **context: Any):
271
+ supported = [
272
+ "CSV (.csv)",
273
+ "Parquet (.parquet, .pq)",
274
+ "JSON (.json, .jsonl, .ndjson)",
275
+ "PostgreSQL (postgres://, postgresql://)",
276
+ "MySQL (mysql://)",
277
+ "SQLite (sqlite://)",
278
+ "S3 (s3://)",
279
+ "Snowflake (snowflake://)",
280
+ "BigQuery (bigquery://)",
281
+ ]
282
+
283
+ suggestion = "Supported formats:\n - " + "\n - ".join(supported)
284
+
285
+ super().__init__(
286
+ message=f"No connector found for: {source}",
287
+ suggestion=suggestion,
288
+ docs_url=f"{DOCS_BASE_URL}#supported-connectors",
289
+ context={"source": source, **context},
290
+ )
291
+
292
+
293
+ # Error formatting utilities
294
+
295
+ def format_validation_failure(
296
+ check_name: str,
297
+ column: str | None,
298
+ actual: Any,
299
+ expected: Any,
300
+ failed_rows: list | None = None,
301
+ ) -> str:
302
+ """Format a validation failure message with context.
303
+
304
+ Args:
305
+ check_name: Name of the failed check
306
+ column: Column name (if column-level)
307
+ actual: Actual value found
308
+ expected: Expected value
309
+ failed_rows: Sample of failing rows
310
+
311
+ Returns:
312
+ Formatted error message
313
+ """
314
+ parts = []
315
+
316
+ if column:
317
+ parts.append(f"Check '{check_name}' failed for column '{column}'")
318
+ else:
319
+ parts.append(f"Check '{check_name}' failed")
320
+
321
+ parts.append(f" Expected: {expected}")
322
+ parts.append(f" Actual: {actual}")
323
+
324
+ if failed_rows:
325
+ parts.append("")
326
+ parts.append(" Sample failing rows:")
327
+ for row in failed_rows[:5]:
328
+ if hasattr(row, "value"):
329
+ parts.append(f" Row {row.row_index}: {row.value}")
330
+ else:
331
+ parts.append(f" {row}")
332
+
333
+ if len(failed_rows) > 5:
334
+ parts.append(f" ... and {len(failed_rows) - 5} more")
335
+
336
+ return "\n".join(parts)
337
+
338
+
339
+ def format_multiple_failures(failures: list) -> str:
340
+ """Format multiple validation failures into a summary.
341
+
342
+ Args:
343
+ failures: List of failure objects
344
+
345
+ Returns:
346
+ Formatted summary string
347
+ """
348
+ if not failures:
349
+ return "All checks passed!"
350
+
351
+ parts = [f"{len(failures)} validation check(s) failed:"]
352
+ parts.append("")
353
+
354
+ for i, failure in enumerate(failures[:10], 1):
355
+ col = f"[{failure.column}]" if hasattr(failure, "column") and failure.column else "[table]"
356
+ msg = failure.message if hasattr(failure, "message") else str(failure)
357
+ parts.append(f" {i}. {col} {msg}")
358
+
359
+ if len(failures) > 10:
360
+ parts.append(f" ... and {len(failures) - 10} more failures")
361
+
362
+ return "\n".join(parts)
@@ -0,0 +1,44 @@
1
+ """Historical result storage and trend analysis for DuckGuard.
2
+
3
+ This module provides persistent storage for validation results,
4
+ enabling trend analysis and historical comparison.
5
+
6
+ Usage:
7
+ from duckguard.history import HistoryStorage, TrendAnalyzer
8
+
9
+ # Store validation results
10
+ storage = HistoryStorage()
11
+ storage.store(result)
12
+
13
+ # Query history
14
+ runs = storage.get_runs("data.csv", limit=10)
15
+
16
+ # Analyze trends
17
+ analyzer = TrendAnalyzer(storage)
18
+ analysis = analyzer.analyze("data.csv", days=30)
19
+ print(analysis.summary())
20
+ """
21
+
22
+ from duckguard.history.storage import (
23
+ HistoryStorage,
24
+ StoredCheckResult,
25
+ StoredRun,
26
+ TrendDataPoint,
27
+ )
28
+ from duckguard.history.trends import (
29
+ TrendAnalysis,
30
+ TrendAnalyzer,
31
+ analyze_trends,
32
+ )
33
+
34
+ __all__ = [
35
+ # Storage
36
+ "HistoryStorage",
37
+ "StoredRun",
38
+ "StoredCheckResult",
39
+ "TrendDataPoint",
40
+ # Trends
41
+ "TrendAnalyzer",
42
+ "TrendAnalysis",
43
+ "analyze_trends",
44
+ ]
@@ -0,0 +1,183 @@
1
+ """Database schema for historical result storage.
2
+
3
+ Defines the SQLite schema for storing validation results over time,
4
+ enabling trend analysis and historical comparison.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ # Schema version for migrations
10
+ SCHEMA_VERSION = 1
11
+
12
+ # SQL to create all tables
13
+ CREATE_TABLES_SQL = """
14
+ -- Validation runs table: stores metadata for each validation execution
15
+ CREATE TABLE IF NOT EXISTS runs (
16
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
17
+ run_id TEXT UNIQUE NOT NULL,
18
+ source TEXT NOT NULL,
19
+ ruleset_name TEXT,
20
+ started_at TEXT NOT NULL,
21
+ finished_at TEXT,
22
+ quality_score REAL NOT NULL,
23
+ total_checks INTEGER NOT NULL,
24
+ passed_count INTEGER NOT NULL,
25
+ failed_count INTEGER NOT NULL,
26
+ warning_count INTEGER NOT NULL,
27
+ passed INTEGER NOT NULL,
28
+ metadata TEXT,
29
+ created_at TEXT DEFAULT (datetime('now'))
30
+ );
31
+
32
+ -- Individual check results table
33
+ CREATE TABLE IF NOT EXISTS check_results (
34
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
35
+ run_id TEXT NOT NULL,
36
+ check_type TEXT NOT NULL,
37
+ column_name TEXT,
38
+ passed INTEGER NOT NULL,
39
+ severity TEXT NOT NULL,
40
+ actual_value TEXT,
41
+ expected_value TEXT,
42
+ message TEXT,
43
+ details TEXT,
44
+ created_at TEXT DEFAULT (datetime('now')),
45
+ FOREIGN KEY (run_id) REFERENCES runs(run_id)
46
+ );
47
+
48
+ -- Sample of failed rows (limited to avoid large storage)
49
+ CREATE TABLE IF NOT EXISTS failed_rows_sample (
50
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
51
+ run_id TEXT NOT NULL,
52
+ check_id INTEGER,
53
+ row_index INTEGER NOT NULL,
54
+ column_name TEXT NOT NULL,
55
+ value TEXT,
56
+ expected TEXT,
57
+ reason TEXT,
58
+ context TEXT,
59
+ created_at TEXT DEFAULT (datetime('now')),
60
+ FOREIGN KEY (run_id) REFERENCES runs(run_id),
61
+ FOREIGN KEY (check_id) REFERENCES check_results(id)
62
+ );
63
+
64
+ -- Quality score trends (aggregated daily for efficient queries)
65
+ CREATE TABLE IF NOT EXISTS quality_trends (
66
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
67
+ source TEXT NOT NULL,
68
+ date TEXT NOT NULL,
69
+ avg_quality_score REAL NOT NULL,
70
+ min_quality_score REAL NOT NULL,
71
+ max_quality_score REAL NOT NULL,
72
+ run_count INTEGER NOT NULL,
73
+ passed_count INTEGER NOT NULL,
74
+ failed_count INTEGER NOT NULL,
75
+ UNIQUE(source, date)
76
+ );
77
+
78
+ -- Schema metadata table
79
+ CREATE TABLE IF NOT EXISTS schema_info (
80
+ key TEXT PRIMARY KEY,
81
+ value TEXT NOT NULL
82
+ );
83
+
84
+ -- Indexes for common query patterns
85
+ CREATE INDEX IF NOT EXISTS idx_runs_source ON runs(source);
86
+ CREATE INDEX IF NOT EXISTS idx_runs_started_at ON runs(started_at);
87
+ CREATE INDEX IF NOT EXISTS idx_runs_source_started ON runs(source, started_at);
88
+ CREATE INDEX IF NOT EXISTS idx_check_results_run_id ON check_results(run_id);
89
+ CREATE INDEX IF NOT EXISTS idx_failed_rows_run_id ON failed_rows_sample(run_id);
90
+ CREATE INDEX IF NOT EXISTS idx_quality_trends_source_date ON quality_trends(source, date);
91
+ """
92
+
93
+ # Pre-built queries for common operations
94
+ QUERIES = {
95
+ "insert_run": """
96
+ INSERT INTO runs (
97
+ run_id, source, ruleset_name, started_at, finished_at,
98
+ quality_score, total_checks, passed_count, failed_count,
99
+ warning_count, passed, metadata
100
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
101
+ """,
102
+ "insert_check_result": """
103
+ INSERT INTO check_results (
104
+ run_id, check_type, column_name, passed, severity,
105
+ actual_value, expected_value, message, details
106
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
107
+ """,
108
+ "insert_failed_row": """
109
+ INSERT INTO failed_rows_sample (
110
+ run_id, check_id, row_index, column_name, value, expected, reason, context
111
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?)
112
+ """,
113
+ "get_runs_for_source": """
114
+ SELECT * FROM runs
115
+ WHERE source = ?
116
+ ORDER BY started_at DESC
117
+ LIMIT ?
118
+ """,
119
+ "get_runs_in_period": """
120
+ SELECT * FROM runs
121
+ WHERE source = ?
122
+ AND started_at >= ?
123
+ AND started_at <= ?
124
+ ORDER BY started_at DESC
125
+ """,
126
+ "get_all_runs": """
127
+ SELECT * FROM runs
128
+ ORDER BY started_at DESC
129
+ LIMIT ?
130
+ """,
131
+ "get_quality_trend": """
132
+ SELECT date, avg_quality_score, min_quality_score, max_quality_score,
133
+ run_count, passed_count, failed_count
134
+ FROM quality_trends
135
+ WHERE source = ?
136
+ AND date >= ?
137
+ ORDER BY date
138
+ """,
139
+ "get_latest_run": """
140
+ SELECT * FROM runs
141
+ WHERE source = ?
142
+ ORDER BY started_at DESC
143
+ LIMIT 1
144
+ """,
145
+ "get_check_results_for_run": """
146
+ SELECT * FROM check_results
147
+ WHERE run_id = ?
148
+ ORDER BY id
149
+ """,
150
+ "get_failed_rows_for_run": """
151
+ SELECT * FROM failed_rows_sample
152
+ WHERE run_id = ?
153
+ ORDER BY id
154
+ """,
155
+ "upsert_trend": """
156
+ INSERT INTO quality_trends (
157
+ source, date, avg_quality_score, min_quality_score,
158
+ max_quality_score, run_count, passed_count, failed_count
159
+ ) VALUES (?, ?, ?, ?, ?, 1, ?, ?)
160
+ ON CONFLICT(source, date) DO UPDATE SET
161
+ avg_quality_score = (
162
+ (avg_quality_score * run_count + excluded.avg_quality_score)
163
+ / (run_count + 1)
164
+ ),
165
+ min_quality_score = MIN(min_quality_score, excluded.min_quality_score),
166
+ max_quality_score = MAX(max_quality_score, excluded.max_quality_score),
167
+ run_count = run_count + 1,
168
+ passed_count = passed_count + excluded.passed_count,
169
+ failed_count = failed_count + excluded.failed_count
170
+ """,
171
+ "get_unique_sources": """
172
+ SELECT DISTINCT source FROM runs
173
+ ORDER BY source
174
+ """,
175
+ "delete_old_runs": """
176
+ DELETE FROM runs
177
+ WHERE started_at < ?
178
+ """,
179
+ "get_run_by_id": """
180
+ SELECT * FROM runs
181
+ WHERE run_id = ?
182
+ """,
183
+ }