duckguard 2.0.0__py3-none-any.whl → 2.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. duckguard/__init__.py +55 -28
  2. duckguard/anomaly/__init__.py +1 -1
  3. duckguard/anomaly/detector.py +1 -5
  4. duckguard/anomaly/methods.py +1 -3
  5. duckguard/cli/main.py +304 -54
  6. duckguard/connectors/__init__.py +2 -2
  7. duckguard/connectors/bigquery.py +1 -1
  8. duckguard/connectors/databricks.py +1 -1
  9. duckguard/connectors/factory.py +2 -3
  10. duckguard/connectors/files.py +1 -1
  11. duckguard/connectors/kafka.py +2 -2
  12. duckguard/connectors/mongodb.py +1 -1
  13. duckguard/connectors/mysql.py +1 -1
  14. duckguard/connectors/oracle.py +1 -1
  15. duckguard/connectors/postgres.py +1 -2
  16. duckguard/connectors/redshift.py +1 -1
  17. duckguard/connectors/snowflake.py +1 -2
  18. duckguard/connectors/sqlite.py +1 -1
  19. duckguard/connectors/sqlserver.py +10 -13
  20. duckguard/contracts/__init__.py +6 -6
  21. duckguard/contracts/diff.py +1 -1
  22. duckguard/contracts/generator.py +5 -6
  23. duckguard/contracts/loader.py +4 -4
  24. duckguard/contracts/validator.py +3 -4
  25. duckguard/core/__init__.py +3 -3
  26. duckguard/core/column.py +110 -5
  27. duckguard/core/dataset.py +3 -3
  28. duckguard/core/result.py +92 -1
  29. duckguard/core/scoring.py +1 -2
  30. duckguard/errors.py +362 -0
  31. duckguard/history/__init__.py +44 -0
  32. duckguard/history/schema.py +183 -0
  33. duckguard/history/storage.py +479 -0
  34. duckguard/history/trends.py +348 -0
  35. duckguard/integrations/__init__.py +31 -0
  36. duckguard/integrations/airflow.py +387 -0
  37. duckguard/integrations/dbt.py +458 -0
  38. duckguard/notifications/__init__.py +43 -0
  39. duckguard/notifications/formatter.py +118 -0
  40. duckguard/notifications/notifiers.py +357 -0
  41. duckguard/profiler/auto_profile.py +3 -3
  42. duckguard/pytest_plugin/__init__.py +1 -1
  43. duckguard/pytest_plugin/plugin.py +1 -1
  44. duckguard/reporting/console.py +2 -2
  45. duckguard/reports/__init__.py +42 -0
  46. duckguard/reports/html_reporter.py +515 -0
  47. duckguard/reports/pdf_reporter.py +114 -0
  48. duckguard/rules/__init__.py +3 -3
  49. duckguard/rules/executor.py +3 -4
  50. duckguard/rules/generator.py +4 -4
  51. duckguard/rules/loader.py +5 -5
  52. duckguard/semantic/__init__.py +1 -1
  53. duckguard/semantic/analyzer.py +0 -2
  54. duckguard/semantic/validators.py +2 -1
  55. {duckguard-2.0.0.dist-info → duckguard-2.2.0.dist-info}/METADATA +135 -5
  56. duckguard-2.2.0.dist-info/RECORD +69 -0
  57. duckguard-2.0.0.dist-info/RECORD +0 -55
  58. {duckguard-2.0.0.dist-info → duckguard-2.2.0.dist-info}/WHEEL +0 -0
  59. {duckguard-2.0.0.dist-info → duckguard-2.2.0.dist-info}/entry_points.txt +0 -0
  60. {duckguard-2.0.0.dist-info → duckguard-2.2.0.dist-info}/licenses/LICENSE +0 -0
@@ -2,10 +2,9 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
- import re
6
5
  from urllib.parse import urlparse
7
6
 
8
- from duckguard.connectors.base import Connector, ConnectionConfig
7
+ from duckguard.connectors.base import ConnectionConfig, Connector
9
8
  from duckguard.core.dataset import Dataset
10
9
  from duckguard.core.engine import DuckGuardEngine
11
10
 
@@ -4,7 +4,7 @@ from __future__ import annotations
4
4
 
5
5
  from urllib.parse import urlparse
6
6
 
7
- from duckguard.connectors.base import Connector, ConnectionConfig
7
+ from duckguard.connectors.base import ConnectionConfig, Connector
8
8
  from duckguard.core.dataset import Dataset
9
9
  from duckguard.core.engine import DuckGuardEngine
10
10
 
@@ -2,11 +2,10 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
- import re
6
5
  from typing import Any
7
6
  from urllib.parse import parse_qs, urlparse
8
7
 
9
- from duckguard.connectors.base import Connector, ConnectionConfig
8
+ from duckguard.connectors.base import ConnectionConfig, Connector
10
9
  from duckguard.core.dataset import Dataset
11
10
  from duckguard.core.engine import DuckGuardEngine
12
11
 
@@ -5,7 +5,7 @@ from __future__ import annotations
5
5
  import os
6
6
  from pathlib import Path
7
7
 
8
- from duckguard.connectors.base import Connector, ConnectionConfig
8
+ from duckguard.connectors.base import ConnectionConfig, Connector
9
9
  from duckguard.core.dataset import Dataset
10
10
  from duckguard.core.engine import DuckGuardEngine
11
11
 
@@ -5,7 +5,7 @@ from __future__ import annotations
5
5
  from typing import Any
6
6
  from urllib.parse import parse_qs, urlparse
7
7
 
8
- from duckguard.connectors.base import Connector, ConnectionConfig
8
+ from duckguard.connectors.base import ConnectionConfig, Connector
9
9
  from duckguard.core.dataset import Dataset
10
10
  from duckguard.core.engine import DuckGuardEngine
11
11
 
@@ -55,20 +55,17 @@ class SQLServerConnector(Connector):
55
55
  Dataset object
56
56
  """
57
57
  # Try pyodbc first, then pymssql
58
- try:
59
- import pyodbc
58
+ import importlib.util
60
59
 
60
+ if importlib.util.find_spec("pyodbc") is not None:
61
61
  driver_module = "pyodbc"
62
- except ImportError:
63
- try:
64
- import pymssql
65
-
66
- driver_module = "pymssql"
67
- except ImportError:
68
- raise ImportError(
69
- "SQL Server support requires pyodbc or pymssql. "
70
- "Install with: pip install duckguard[sqlserver]"
71
- )
62
+ elif importlib.util.find_spec("pymssql") is not None:
63
+ driver_module = "pymssql"
64
+ else:
65
+ raise ImportError(
66
+ "SQL Server support requires pyodbc or pymssql. "
67
+ "Install with: pip install duckguard[sqlserver]"
68
+ )
72
69
 
73
70
  if not config.table:
74
71
  raise ValueError("Table name is required for SQL Server connections")
@@ -14,17 +14,17 @@ Example:
14
14
  print(f"Contract violations: {result.violations}")
15
15
  """
16
16
 
17
+ from duckguard.contracts.diff import SchemaDiff, diff_contracts
18
+ from duckguard.contracts.generator import generate_contract
19
+ from duckguard.contracts.loader import contract_to_yaml, load_contract, load_contract_from_string
17
20
  from duckguard.contracts.schema import (
21
+ ContractMetadata,
18
22
  DataContract,
19
- SchemaField,
20
23
  FieldType,
21
24
  QualitySLA,
22
- ContractMetadata,
25
+ SchemaField,
23
26
  )
24
- from duckguard.contracts.loader import load_contract, load_contract_from_string, contract_to_yaml
25
- from duckguard.contracts.validator import validate_contract, ContractValidationResult
26
- from duckguard.contracts.generator import generate_contract
27
- from duckguard.contracts.diff import diff_contracts, SchemaDiff
27
+ from duckguard.contracts.validator import ContractValidationResult, validate_contract
28
28
 
29
29
  __all__ = [
30
30
  # Schema
@@ -9,7 +9,7 @@ from dataclasses import dataclass, field
9
9
  from enum import Enum
10
10
  from typing import Any
11
11
 
12
- from duckguard.contracts.schema import DataContract, SchemaField, FieldType
12
+ from duckguard.contracts.schema import DataContract, FieldType, SchemaField
13
13
 
14
14
 
15
15
  class ChangeType(Enum):
@@ -7,19 +7,18 @@ from __future__ import annotations
7
7
 
8
8
  from datetime import datetime
9
9
  from pathlib import Path
10
- from typing import Any
11
10
 
12
- from duckguard.core.dataset import Dataset
13
11
  from duckguard.connectors import connect
12
+ from duckguard.contracts.loader import contract_to_yaml
14
13
  from duckguard.contracts.schema import (
14
+ ContractMetadata,
15
15
  DataContract,
16
- SchemaField,
17
- FieldType,
18
16
  FieldConstraint,
17
+ FieldType,
19
18
  QualitySLA,
20
- ContractMetadata,
19
+ SchemaField,
21
20
  )
22
- from duckguard.contracts.loader import contract_to_yaml
21
+ from duckguard.core.dataset import Dataset
23
22
  from duckguard.semantic import SemanticAnalyzer, SemanticType
24
23
 
25
24
 
@@ -47,12 +47,12 @@ from typing import Any
47
47
  import yaml
48
48
 
49
49
  from duckguard.contracts.schema import (
50
+ ContractMetadata,
50
51
  DataContract,
51
- SchemaField,
52
- FieldType,
53
52
  FieldConstraint,
53
+ FieldType,
54
54
  QualitySLA,
55
- ContractMetadata,
55
+ SchemaField,
56
56
  )
57
57
 
58
58
 
@@ -82,7 +82,7 @@ def load_contract(path: str | Path) -> DataContract:
82
82
  if not path.exists():
83
83
  raise FileNotFoundError(f"Contract file not found: {path}")
84
84
 
85
- with open(path, "r", encoding="utf-8") as f:
85
+ with open(path, encoding="utf-8") as f:
86
86
  content = f.read()
87
87
 
88
88
  return load_contract_from_string(content, source_file=str(path))
@@ -6,14 +6,13 @@ Validates datasets against data contracts to ensure compliance.
6
6
  from __future__ import annotations
7
7
 
8
8
  from dataclasses import dataclass, field
9
- from datetime import datetime, timedelta
9
+ from datetime import datetime
10
10
  from enum import Enum
11
11
  from typing import Any
12
- import re
13
12
 
14
- from duckguard.core.dataset import Dataset
15
13
  from duckguard.connectors import connect
16
- from duckguard.contracts.schema import DataContract, SchemaField, FieldType
14
+ from duckguard.contracts.schema import DataContract, SchemaField
15
+ from duckguard.core.dataset import Dataset
17
16
 
18
17
 
19
18
  class ViolationType(Enum):
@@ -1,8 +1,8 @@
1
1
  """Core module containing the engine, dataset, and column classes."""
2
2
 
3
- from duckguard.core.engine import DuckGuardEngine
4
- from duckguard.core.dataset import Dataset
5
3
  from duckguard.core.column import Column
6
- from duckguard.core.result import ValidationResult, CheckResult
4
+ from duckguard.core.dataset import Dataset
5
+ from duckguard.core.engine import DuckGuardEngine
6
+ from duckguard.core.result import CheckResult, ValidationResult
7
7
 
8
8
  __all__ = ["DuckGuardEngine", "Dataset", "Column", "ValidationResult", "CheckResult"]
duckguard/core/column.py CHANGED
@@ -2,14 +2,16 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
- import re
6
5
  from typing import TYPE_CHECKING, Any
7
6
 
8
- from duckguard.core.result import ValidationResult
7
+ from duckguard.core.result import FailedRow, ValidationResult
9
8
 
10
9
  if TYPE_CHECKING:
11
10
  from duckguard.core.dataset import Dataset
12
11
 
12
+ # Default number of failed rows to capture for debugging
13
+ DEFAULT_SAMPLE_SIZE = 10
14
+
13
15
 
14
16
  class Column:
15
17
  """
@@ -164,13 +166,14 @@ class Column:
164
166
  message=f"Column '{self._name}' unique_percent is {actual:.2f}% (threshold: {threshold}%)",
165
167
  )
166
168
 
167
- def between(self, min_val: Any, max_val: Any) -> ValidationResult:
169
+ def between(self, min_val: Any, max_val: Any, capture_failures: bool = True) -> ValidationResult:
168
170
  """
169
171
  Check that all values are between min and max (inclusive).
170
172
 
171
173
  Args:
172
174
  min_val: Minimum allowed value
173
175
  max_val: Maximum allowed value
176
+ capture_failures: Whether to capture sample failing rows (default: True)
174
177
 
175
178
  Returns:
176
179
  ValidationResult indicating if all non-null values are in range
@@ -188,20 +191,53 @@ class Column:
188
191
  out_of_range = self._dataset.engine.fetch_value(sql) or 0
189
192
  passed = out_of_range == 0
190
193
 
194
+ # Capture sample of failing rows for debugging
195
+ failed_rows = []
196
+ if not passed and capture_failures:
197
+ failed_rows = self._get_failed_rows_between(min_val, max_val)
198
+
191
199
  return ValidationResult(
192
200
  passed=passed,
193
201
  actual_value=out_of_range,
194
202
  expected_value=0,
195
203
  message=f"Column '{self._name}' has {out_of_range} values outside [{min_val}, {max_val}]",
196
204
  details={"min": min_val, "max": max_val, "out_of_range_count": out_of_range},
205
+ failed_rows=failed_rows,
206
+ total_failures=out_of_range,
197
207
  )
198
208
 
199
- def matches(self, pattern: str) -> ValidationResult:
209
+ def _get_failed_rows_between(self, min_val: Any, max_val: Any, limit: int = DEFAULT_SAMPLE_SIZE) -> list[FailedRow]:
210
+ """Get sample of rows that failed between check."""
211
+ ref = self._dataset.engine.get_source_reference(self._dataset.source)
212
+ col = f'"{self._name}"'
213
+
214
+ sql = f"""
215
+ SELECT row_number() OVER () as row_idx, {col} as val
216
+ FROM {ref}
217
+ WHERE {col} IS NOT NULL
218
+ AND ({col} < {min_val} OR {col} > {max_val})
219
+ LIMIT {limit}
220
+ """
221
+
222
+ rows = self._dataset.engine.fetch_all(sql)
223
+ return [
224
+ FailedRow(
225
+ row_index=row[0],
226
+ column=self._name,
227
+ value=row[1],
228
+ expected=f"between {min_val} and {max_val}",
229
+ reason=f"Value {row[1]} is outside range [{min_val}, {max_val}]",
230
+ )
231
+ for row in rows
232
+ ]
233
+
234
+ def matches(self, pattern: str, capture_failures: bool = True) -> ValidationResult:
200
235
  """
201
236
  Check that all non-null values match a regex pattern.
202
237
 
203
238
  Args:
204
239
  pattern: Regular expression pattern
240
+ capture_failures: Whether to capture sample failing rows (default: True)
205
241
 
206
242
  Returns:
207
243
  ValidationResult
@@ -220,20 +256,53 @@ class Column:
220
256
  non_matching = self._dataset.engine.fetch_value(sql) or 0
221
257
  passed = non_matching == 0
222
258
 
259
+ # Capture sample of failing rows
260
+ failed_rows = []
261
+ if not passed and capture_failures:
262
+ failed_rows = self._get_failed_rows_pattern(pattern)
263
+
223
264
  return ValidationResult(
224
265
  passed=passed,
225
266
  actual_value=non_matching,
226
267
  expected_value=0,
227
268
  message=f"Column '{self._name}' has {non_matching} values not matching pattern '{pattern}'",
228
269
  details={"pattern": pattern, "non_matching_count": non_matching},
270
+ failed_rows=failed_rows,
271
+ total_failures=non_matching,
229
272
  )
230
273
 
231
- def isin(self, values: list[Any]) -> ValidationResult:
274
+ def _get_failed_rows_pattern(self, pattern: str, limit: int = DEFAULT_SAMPLE_SIZE) -> list[FailedRow]:
275
+ """Get sample of rows that failed pattern match."""
276
+ ref = self._dataset.engine.get_source_reference(self._dataset.source)
277
+ col = f'"{self._name}"'
278
+
279
+ sql = f"""
280
+ SELECT row_number() OVER () as row_idx, {col} as val
281
+ FROM {ref}
282
+ WHERE {col} IS NOT NULL
283
+ AND NOT regexp_matches({col}::VARCHAR, '{pattern}')
284
+ LIMIT {limit}
285
+ """
286
+
287
+ rows = self._dataset.engine.fetch_all(sql)
288
+ return [
289
+ FailedRow(
290
+ row_index=row[0],
291
+ column=self._name,
292
+ value=row[1],
293
+ expected=f"matches pattern '{pattern}'",
294
+ reason=f"Value '{row[1]}' does not match pattern",
295
+ )
296
+ for row in rows
297
+ ]
298
+
299
+ def isin(self, values: list[Any], capture_failures: bool = True) -> ValidationResult:
232
300
  """
233
301
  Check that all non-null values are in the allowed set.
234
302
 
235
303
  Args:
236
304
  values: List of allowed values
305
+ capture_failures: Whether to capture sample failing rows (default: True)
237
306
 
238
307
  Returns:
239
308
  ValidationResult
@@ -256,14 +325,50 @@ class Column:
256
325
  invalid_count = self._dataset.engine.fetch_value(sql) or 0
257
326
  passed = invalid_count == 0
258
327
 
328
+ # Capture sample of failing rows
329
+ failed_rows = []
330
+ if not passed and capture_failures:
331
+ failed_rows = self._get_failed_rows_isin(values)
332
+
259
333
  return ValidationResult(
260
334
  passed=passed,
261
335
  actual_value=invalid_count,
262
336
  expected_value=0,
263
337
  message=f"Column '{self._name}' has {invalid_count} values not in allowed set",
264
338
  details={"allowed_values": values, "invalid_count": invalid_count},
339
+ failed_rows=failed_rows,
340
+ total_failures=invalid_count,
265
341
  )
266
342
 
343
+ def _get_failed_rows_isin(self, values: list[Any], limit: int = DEFAULT_SAMPLE_SIZE) -> list[FailedRow]:
344
+ """Get sample of rows that failed isin check."""
345
+ ref = self._dataset.engine.get_source_reference(self._dataset.source)
346
+ col = f'"{self._name}"'
347
+
348
+ formatted_values = ", ".join(
349
+ f"'{v}'" if isinstance(v, str) else str(v) for v in values
350
+ )
351
+
352
+ sql = f"""
353
+ SELECT row_number() OVER () as row_idx, {col} as val
354
+ FROM {ref}
355
+ WHERE {col} IS NOT NULL
356
+ AND {col} NOT IN ({formatted_values})
357
+ LIMIT {limit}
358
+ """
359
+
360
+ rows = self._dataset.engine.fetch_all(sql)
361
+ return [
362
+ FailedRow(
363
+ row_index=row[0],
364
+ column=self._name,
365
+ value=row[1],
366
+ expected=f"in {values}",
367
+ reason=f"Value '{row[1]}' is not in allowed set",
368
+ )
369
+ for row in rows
370
+ ]
371
+
267
372
  def has_no_duplicates(self) -> ValidationResult:
268
373
  """
269
374
  Check that all values are unique (no duplicates).
duckguard/core/dataset.py CHANGED
@@ -4,8 +4,8 @@ from __future__ import annotations
4
4
 
5
5
  from typing import TYPE_CHECKING, Any
6
6
 
7
- from duckguard.core.engine import DuckGuardEngine
8
7
  from duckguard.core.column import Column
8
+ from duckguard.core.engine import DuckGuardEngine
9
9
 
10
10
  if TYPE_CHECKING:
11
11
  from duckguard.core.scoring import QualityScore
@@ -230,7 +230,7 @@ class Dataset:
230
230
  def score(
231
231
  self,
232
232
  weights: dict | None = None,
233
- ) -> "QualityScore":
233
+ ) -> QualityScore:
234
234
  """
235
235
  Calculate data quality score for this dataset.
236
236
 
@@ -262,7 +262,7 @@ class Dataset:
262
262
  'consistency': 0.1,
263
263
  })
264
264
  """
265
- from duckguard.core.scoring import QualityScorer, QualityDimension
265
+ from duckguard.core.scoring import QualityDimension, QualityScorer
266
266
 
267
267
  # Convert string keys to QualityDimension enums if needed
268
268
  scorer_weights = None
duckguard/core/result.py CHANGED
@@ -17,6 +17,30 @@ class CheckStatus(Enum):
17
17
  ERROR = "error"
18
18
 
19
19
 
20
+ @dataclass
21
+ class FailedRow:
22
+ """Represents a single row that failed validation.
23
+
24
+ Attributes:
25
+ row_index: The 1-based row number in the source data
26
+ column: The column name that failed validation
27
+ value: The actual value that failed
28
+ expected: What was expected (e.g., "not null", "between 1-100")
29
+ reason: Human-readable explanation of why validation failed
30
+ context: Additional row data for context (optional)
31
+ """
32
+
33
+ row_index: int
34
+ column: str
35
+ value: Any
36
+ expected: str
37
+ reason: str = ""
38
+ context: dict[str, Any] = field(default_factory=dict)
39
+
40
+ def __repr__(self) -> str:
41
+ return f"FailedRow(row={self.row_index}, column='{self.column}', value={self.value!r})"
42
+
43
+
20
44
  @dataclass
21
45
  class CheckResult:
22
46
  """Result of a single validation check."""
@@ -46,13 +70,27 @@ class CheckResult:
46
70
 
47
71
  @dataclass
48
72
  class ValidationResult:
49
- """Result of a validation operation that can be used in assertions."""
73
+ """Result of a validation operation that can be used in assertions.
74
+
75
+ Enhanced with row-level error capture for debugging failed checks.
76
+
77
+ Attributes:
78
+ passed: Whether the validation passed
79
+ actual_value: The actual value found (e.g., count of failures)
80
+ expected_value: What was expected
81
+ message: Human-readable summary
82
+ details: Additional metadata
83
+ failed_rows: List of individual rows that failed validation
84
+ sample_size: How many failed rows to capture (default: 10)
85
+ """
50
86
 
51
87
  passed: bool
52
88
  actual_value: Any
53
89
  expected_value: Any | None = None
54
90
  message: str = ""
55
91
  details: dict[str, Any] = field(default_factory=dict)
92
+ failed_rows: list[FailedRow] = field(default_factory=list)
93
+ total_failures: int = 0
56
94
 
57
95
  def __bool__(self) -> bool:
58
96
  """Allow using ValidationResult in boolean context for assertions."""
@@ -60,8 +98,61 @@ class ValidationResult:
60
98
 
61
99
  def __repr__(self) -> str:
62
100
  status = "PASSED" if self.passed else "FAILED"
101
+ if self.failed_rows:
102
+ return f"ValidationResult({status}, actual={self.actual_value}, failed_rows={len(self.failed_rows)})"
63
103
  return f"ValidationResult({status}, actual={self.actual_value})"
64
104
 
105
+ def get_failed_values(self) -> list[Any]:
106
+ """Get list of values that failed validation."""
107
+ return [row.value for row in self.failed_rows]
108
+
109
+ def get_failed_row_indices(self) -> list[int]:
110
+ """Get list of row indices that failed validation."""
111
+ return [row.row_index for row in self.failed_rows]
112
+
113
+ def to_dataframe(self):
114
+ """Convert failed rows to a pandas DataFrame (if pandas available).
115
+
116
+ Returns:
117
+ pandas.DataFrame with failed row details
118
+
119
+ Raises:
120
+ ImportError: If pandas is not installed
121
+ """
122
+ try:
123
+ import pandas as pd
124
+
125
+ if not self.failed_rows:
126
+ return pd.DataFrame(columns=["row_index", "column", "value", "expected", "reason"])
127
+
128
+ return pd.DataFrame([
129
+ {
130
+ "row_index": row.row_index,
131
+ "column": row.column,
132
+ "value": row.value,
133
+ "expected": row.expected,
134
+ "reason": row.reason,
135
+ **row.context,
136
+ }
137
+ for row in self.failed_rows
138
+ ])
139
+ except ImportError:
140
+ raise ImportError("pandas is required for to_dataframe(). Install with: pip install pandas")
141
+
142
+ def summary(self) -> str:
143
+ """Get a summary of the validation result with sample failures."""
144
+ lines = [self.message]
145
+
146
+ if self.failed_rows:
147
+ lines.append(f"\nSample of {len(self.failed_rows)} failing rows (total: {self.total_failures}):")
148
+ for row in self.failed_rows[:5]:
149
+ lines.append(f" Row {row.row_index}: {row.column}={row.value!r} - {row.reason or row.expected}")
150
+
151
+ if self.total_failures > 5:
152
+ lines.append(f" ... and {self.total_failures - 5} more failures")
153
+
154
+ return "\n".join(lines)
155
+
65
156
 
66
157
  @dataclass
67
158
  class ProfileResult:
duckguard/core/scoring.py CHANGED
@@ -14,7 +14,7 @@ from __future__ import annotations
14
14
  from dataclasses import dataclass, field
15
15
  from datetime import datetime
16
16
  from enum import Enum
17
- from typing import Any, TYPE_CHECKING
17
+ from typing import TYPE_CHECKING
18
18
 
19
19
  if TYPE_CHECKING:
20
20
  from duckguard.core.dataset import Dataset
@@ -302,7 +302,6 @@ class QualityScorer:
302
302
  # Check for reasonable ranges on numeric columns
303
303
  if numeric_stats.get("mean") is not None:
304
304
  min_val = stats.get("min_value")
305
- max_val = stats.get("max_value")
306
305
 
307
306
  # Check for negative values in likely positive-only columns
308
307
  is_likely_positive = any(