duckguard 2.0.0__py3-none-any.whl → 2.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. duckguard/__init__.py +55 -28
  2. duckguard/anomaly/__init__.py +1 -1
  3. duckguard/anomaly/detector.py +1 -5
  4. duckguard/anomaly/methods.py +1 -3
  5. duckguard/cli/main.py +304 -54
  6. duckguard/connectors/__init__.py +2 -2
  7. duckguard/connectors/bigquery.py +1 -1
  8. duckguard/connectors/databricks.py +1 -1
  9. duckguard/connectors/factory.py +2 -3
  10. duckguard/connectors/files.py +1 -1
  11. duckguard/connectors/kafka.py +2 -2
  12. duckguard/connectors/mongodb.py +1 -1
  13. duckguard/connectors/mysql.py +1 -1
  14. duckguard/connectors/oracle.py +1 -1
  15. duckguard/connectors/postgres.py +1 -2
  16. duckguard/connectors/redshift.py +1 -1
  17. duckguard/connectors/snowflake.py +1 -2
  18. duckguard/connectors/sqlite.py +1 -1
  19. duckguard/connectors/sqlserver.py +10 -13
  20. duckguard/contracts/__init__.py +6 -6
  21. duckguard/contracts/diff.py +1 -1
  22. duckguard/contracts/generator.py +5 -6
  23. duckguard/contracts/loader.py +4 -4
  24. duckguard/contracts/validator.py +3 -4
  25. duckguard/core/__init__.py +3 -3
  26. duckguard/core/column.py +110 -5
  27. duckguard/core/dataset.py +3 -3
  28. duckguard/core/result.py +92 -1
  29. duckguard/core/scoring.py +1 -2
  30. duckguard/errors.py +362 -0
  31. duckguard/history/__init__.py +44 -0
  32. duckguard/history/schema.py +183 -0
  33. duckguard/history/storage.py +479 -0
  34. duckguard/history/trends.py +348 -0
  35. duckguard/integrations/__init__.py +31 -0
  36. duckguard/integrations/airflow.py +387 -0
  37. duckguard/integrations/dbt.py +458 -0
  38. duckguard/notifications/__init__.py +43 -0
  39. duckguard/notifications/formatter.py +118 -0
  40. duckguard/notifications/notifiers.py +357 -0
  41. duckguard/profiler/auto_profile.py +3 -3
  42. duckguard/pytest_plugin/__init__.py +1 -1
  43. duckguard/pytest_plugin/plugin.py +1 -1
  44. duckguard/reporting/console.py +2 -2
  45. duckguard/reports/__init__.py +42 -0
  46. duckguard/reports/html_reporter.py +515 -0
  47. duckguard/reports/pdf_reporter.py +114 -0
  48. duckguard/rules/__init__.py +3 -3
  49. duckguard/rules/executor.py +3 -4
  50. duckguard/rules/generator.py +4 -4
  51. duckguard/rules/loader.py +5 -5
  52. duckguard/semantic/__init__.py +1 -1
  53. duckguard/semantic/analyzer.py +0 -2
  54. duckguard/semantic/validators.py +2 -1
  55. {duckguard-2.0.0.dist-info → duckguard-2.2.0.dist-info}/METADATA +135 -5
  56. duckguard-2.2.0.dist-info/RECORD +69 -0
  57. duckguard-2.0.0.dist-info/RECORD +0 -55
  58. {duckguard-2.0.0.dist-info → duckguard-2.2.0.dist-info}/WHEEL +0 -0
  59. {duckguard-2.0.0.dist-info → duckguard-2.2.0.dist-info}/entry_points.txt +0 -0
  60. {duckguard-2.0.0.dist-info → duckguard-2.2.0.dist-info}/licenses/LICENSE +0 -0
duckguard/__init__.py CHANGED
@@ -3,7 +3,7 @@ DuckGuard - Data quality that just works.
3
3
 
4
4
  A Python-native data quality tool built on DuckDB for speed.
5
5
  Features YAML-based rules, semantic type detection, data contracts,
6
- and anomaly detection.
6
+ anomaly detection, notifications, and dbt integration.
7
7
 
8
8
  Quick Start:
9
9
  # Python API
@@ -12,61 +12,80 @@ Quick Start:
12
12
  assert orders.row_count > 0
13
13
  assert orders.customer_id.null_percent == 0
14
14
 
15
+ # With row-level error capture
16
+ result = orders.quantity.between(1, 100)
17
+ if not result:
18
+ print(result.summary()) # See which rows failed
19
+
20
+ # Notifications
21
+ from duckguard.notifications import SlackNotifier
22
+ slack = SlackNotifier(webhook_url="...")
23
+ slack.send_failure_alert(result)
24
+
15
25
  # CLI
16
26
  $ duckguard check data.csv
17
27
  $ duckguard discover data.csv --output duckguard.yaml
18
28
  $ duckguard contract generate data.csv
19
29
 
20
- Documentation: https://github.com/duckguard/duckguard
30
+ Documentation: https://github.com/XDataHubAI/duckguard
21
31
  """
22
32
 
23
33
  # Core classes
24
- from duckguard.core.dataset import Dataset
34
+ # Anomaly detection
35
+ from duckguard.anomaly import (
36
+ AnomalyDetector,
37
+ AnomalyResult,
38
+ detect_anomalies,
39
+ )
40
+
41
+ # Connectors
42
+ from duckguard.connectors import connect
43
+
44
+ # Data contracts
45
+ from duckguard.contracts import (
46
+ DataContract,
47
+ diff_contracts,
48
+ generate_contract,
49
+ load_contract,
50
+ validate_contract,
51
+ )
25
52
  from duckguard.core.column import Column
53
+ from duckguard.core.dataset import Dataset
26
54
  from duckguard.core.engine import DuckGuardEngine
27
- from duckguard.core.result import ValidationResult, CheckResult
55
+ from duckguard.core.result import CheckResult, FailedRow, ValidationResult
28
56
  from duckguard.core.scoring import QualityScore, QualityScorer, score
29
57
 
30
- # Connectors
31
- from duckguard.connectors import connect
58
+ # Error classes
59
+ from duckguard.errors import (
60
+ ColumnNotFoundError,
61
+ ContractViolationError,
62
+ DuckGuardError,
63
+ RuleParseError,
64
+ UnsupportedConnectorError,
65
+ ValidationError,
66
+ )
32
67
 
33
68
  # Profiling
34
- from duckguard.profiler import profile, AutoProfiler
69
+ from duckguard.profiler import AutoProfiler, profile
35
70
 
36
71
  # Rules (YAML-based)
37
72
  from duckguard.rules import (
38
- load_rules,
39
- load_rules_from_string,
73
+ RuleSet,
40
74
  execute_rules,
41
75
  generate_rules,
42
- RuleSet,
76
+ load_rules,
77
+ load_rules_from_string,
43
78
  )
44
79
 
45
80
  # Semantic type detection
46
81
  from duckguard.semantic import (
47
- SemanticType,
48
82
  SemanticAnalyzer,
83
+ SemanticType,
49
84
  detect_type,
50
85
  detect_types_for_dataset,
51
86
  )
52
87
 
53
- # Data contracts
54
- from duckguard.contracts import (
55
- DataContract,
56
- load_contract,
57
- validate_contract,
58
- generate_contract,
59
- diff_contracts,
60
- )
61
-
62
- # Anomaly detection
63
- from duckguard.anomaly import (
64
- AnomalyDetector,
65
- AnomalyResult,
66
- detect_anomalies,
67
- )
68
-
69
- __version__ = "2.0.0"
88
+ __version__ = "2.2.0"
70
89
 
71
90
  __all__ = [
72
91
  # Core classes
@@ -75,6 +94,7 @@ __all__ = [
75
94
  "DuckGuardEngine",
76
95
  "ValidationResult",
77
96
  "CheckResult",
97
+ "FailedRow",
78
98
  # Scoring
79
99
  "QualityScore",
80
100
  "QualityScorer",
@@ -105,6 +125,13 @@ __all__ = [
105
125
  "AnomalyDetector",
106
126
  "AnomalyResult",
107
127
  "detect_anomalies",
128
+ # Errors
129
+ "DuckGuardError",
130
+ "ColumnNotFoundError",
131
+ "ContractViolationError",
132
+ "RuleParseError",
133
+ "UnsupportedConnectorError",
134
+ "ValidationError",
108
135
  # Version
109
136
  "__version__",
110
137
  ]
@@ -17,9 +17,9 @@ from duckguard.anomaly.detector import (
17
17
  detect_column_anomalies,
18
18
  )
19
19
  from duckguard.anomaly.methods import (
20
- ZScoreMethod,
21
20
  IQRMethod,
22
21
  PercentChangeMethod,
22
+ ZScoreMethod,
23
23
  )
24
24
 
25
25
  __all__ = [
@@ -10,15 +10,11 @@ from datetime import datetime
10
10
  from enum import Enum
11
11
  from typing import Any
12
12
 
13
- from duckguard.core.dataset import Dataset
14
13
  from duckguard.anomaly.methods import (
15
- AnomalyMethod,
16
14
  AnomalyScore,
17
- ZScoreMethod,
18
- IQRMethod,
19
- PercentChangeMethod,
20
15
  create_method,
21
16
  )
17
+ from duckguard.core.dataset import Dataset
22
18
 
23
19
 
24
20
  class AnomalyType(Enum):
@@ -5,10 +5,10 @@ Implements various statistical methods for detecting anomalies in data.
5
5
 
6
6
  from __future__ import annotations
7
7
 
8
+ import math
8
9
  from abc import ABC, abstractmethod
9
10
  from dataclasses import dataclass, field
10
11
  from typing import Any
11
- import math
12
12
 
13
13
 
14
14
  @dataclass
@@ -177,8 +177,6 @@ class IQRMethod(AnomalyMethod):
177
177
  if not clean_values:
178
178
  return
179
179
 
180
- n = len(clean_values)
181
-
182
180
  # Calculate Q1 and Q3
183
181
  self._q1 = self._percentile(clean_values, 25)
184
182
  self._q3 = self._percentile(clean_values, 75)