duckguard 2.0.0__py3-none-any.whl → 2.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. duckguard/__init__.py +55 -28
  2. duckguard/anomaly/__init__.py +29 -1
  3. duckguard/anomaly/baselines.py +294 -0
  4. duckguard/anomaly/detector.py +1 -5
  5. duckguard/anomaly/methods.py +17 -5
  6. duckguard/anomaly/ml_methods.py +724 -0
  7. duckguard/cli/main.py +561 -56
  8. duckguard/connectors/__init__.py +2 -2
  9. duckguard/connectors/bigquery.py +1 -1
  10. duckguard/connectors/databricks.py +1 -1
  11. duckguard/connectors/factory.py +2 -3
  12. duckguard/connectors/files.py +1 -1
  13. duckguard/connectors/kafka.py +2 -2
  14. duckguard/connectors/mongodb.py +1 -1
  15. duckguard/connectors/mysql.py +1 -1
  16. duckguard/connectors/oracle.py +1 -1
  17. duckguard/connectors/postgres.py +1 -2
  18. duckguard/connectors/redshift.py +1 -1
  19. duckguard/connectors/snowflake.py +1 -2
  20. duckguard/connectors/sqlite.py +1 -1
  21. duckguard/connectors/sqlserver.py +10 -13
  22. duckguard/contracts/__init__.py +6 -6
  23. duckguard/contracts/diff.py +1 -1
  24. duckguard/contracts/generator.py +5 -6
  25. duckguard/contracts/loader.py +4 -4
  26. duckguard/contracts/validator.py +3 -4
  27. duckguard/core/__init__.py +3 -3
  28. duckguard/core/column.py +588 -5
  29. duckguard/core/dataset.py +708 -3
  30. duckguard/core/result.py +328 -1
  31. duckguard/core/scoring.py +1 -2
  32. duckguard/errors.py +362 -0
  33. duckguard/freshness/__init__.py +33 -0
  34. duckguard/freshness/monitor.py +429 -0
  35. duckguard/history/__init__.py +44 -0
  36. duckguard/history/schema.py +301 -0
  37. duckguard/history/storage.py +479 -0
  38. duckguard/history/trends.py +348 -0
  39. duckguard/integrations/__init__.py +31 -0
  40. duckguard/integrations/airflow.py +387 -0
  41. duckguard/integrations/dbt.py +458 -0
  42. duckguard/notifications/__init__.py +61 -0
  43. duckguard/notifications/email.py +508 -0
  44. duckguard/notifications/formatter.py +118 -0
  45. duckguard/notifications/notifiers.py +357 -0
  46. duckguard/profiler/auto_profile.py +3 -3
  47. duckguard/pytest_plugin/__init__.py +1 -1
  48. duckguard/pytest_plugin/plugin.py +1 -1
  49. duckguard/reporting/console.py +2 -2
  50. duckguard/reports/__init__.py +42 -0
  51. duckguard/reports/html_reporter.py +514 -0
  52. duckguard/reports/pdf_reporter.py +114 -0
  53. duckguard/rules/__init__.py +3 -3
  54. duckguard/rules/executor.py +3 -4
  55. duckguard/rules/generator.py +8 -5
  56. duckguard/rules/loader.py +5 -5
  57. duckguard/rules/schema.py +23 -0
  58. duckguard/schema_history/__init__.py +40 -0
  59. duckguard/schema_history/analyzer.py +414 -0
  60. duckguard/schema_history/tracker.py +288 -0
  61. duckguard/semantic/__init__.py +1 -1
  62. duckguard/semantic/analyzer.py +0 -2
  63. duckguard/semantic/detector.py +17 -1
  64. duckguard/semantic/validators.py +2 -1
  65. duckguard-2.3.0.dist-info/METADATA +953 -0
  66. duckguard-2.3.0.dist-info/RECORD +77 -0
  67. duckguard-2.0.0.dist-info/METADATA +0 -221
  68. duckguard-2.0.0.dist-info/RECORD +0 -55
  69. {duckguard-2.0.0.dist-info → duckguard-2.3.0.dist-info}/WHEEL +0 -0
  70. {duckguard-2.0.0.dist-info → duckguard-2.3.0.dist-info}/entry_points.txt +0 -0
  71. {duckguard-2.0.0.dist-info → duckguard-2.3.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,8 +1,8 @@
1
1
  """Connectors for various data sources."""
2
2
 
3
- from duckguard.connectors.base import Connector, ConnectionConfig
4
- from duckguard.connectors.files import FileConnector, S3Connector, GCSConnector, AzureConnector
3
+ from duckguard.connectors.base import ConnectionConfig, Connector
5
4
  from duckguard.connectors.factory import connect, register_connector
5
+ from duckguard.connectors.files import AzureConnector, FileConnector, GCSConnector, S3Connector
6
6
 
7
7
  # Database connectors (imported lazily to avoid import errors)
8
8
  __all__ = [
@@ -5,7 +5,7 @@ from __future__ import annotations
5
5
  from typing import Any
6
6
  from urllib.parse import urlparse
7
7
 
8
- from duckguard.connectors.base import Connector, ConnectionConfig
8
+ from duckguard.connectors.base import ConnectionConfig, Connector
9
9
  from duckguard.core.dataset import Dataset
10
10
  from duckguard.core.engine import DuckGuardEngine
11
11
 
@@ -5,7 +5,7 @@ from __future__ import annotations
5
5
  from typing import Any
6
6
  from urllib.parse import parse_qs, urlparse
7
7
 
8
- from duckguard.connectors.base import Connector, ConnectionConfig
8
+ from duckguard.connectors.base import ConnectionConfig, Connector
9
9
  from duckguard.core.dataset import Dataset
10
10
  from duckguard.core.engine import DuckGuardEngine
11
11
 
@@ -4,12 +4,11 @@ from __future__ import annotations
4
4
 
5
5
  from typing import Any
6
6
 
7
- from duckguard.connectors.base import Connector, ConnectionConfig
8
- from duckguard.connectors.files import FileConnector, S3Connector, GCSConnector, AzureConnector
7
+ from duckguard.connectors.base import ConnectionConfig, Connector
8
+ from duckguard.connectors.files import AzureConnector, FileConnector, GCSConnector, S3Connector
9
9
  from duckguard.core.dataset import Dataset
10
10
  from duckguard.core.engine import DuckGuardEngine
11
11
 
12
-
13
12
  # Registry of available connectors
14
13
  _CONNECTORS: list[type[Connector]] = [
15
14
  S3Connector,
@@ -5,7 +5,7 @@ from __future__ import annotations
5
5
  import os
6
6
  from pathlib import Path
7
7
 
8
- from duckguard.connectors.base import Connector, ConnectionConfig
8
+ from duckguard.connectors.base import ConnectionConfig, Connector
9
9
  from duckguard.core.dataset import Dataset
10
10
  from duckguard.core.engine import DuckGuardEngine
11
11
 
@@ -6,7 +6,7 @@ import json
6
6
  from typing import Any
7
7
  from urllib.parse import parse_qs, urlparse
8
8
 
9
- from duckguard.connectors.base import Connector, ConnectionConfig
9
+ from duckguard.connectors.base import ConnectionConfig, Connector
10
10
  from duckguard.core.dataset import Dataset
11
11
  from duckguard.core.engine import DuckGuardEngine
12
12
 
@@ -320,7 +320,7 @@ class KafkaStreamValidator:
320
320
  "messages_failed": 0,
321
321
  }
322
322
 
323
- def add_rule(self, rule: callable) -> "KafkaStreamValidator":
323
+ def add_rule(self, rule: callable) -> KafkaStreamValidator:
324
324
  """Add a validation rule."""
325
325
  self.rules.append(rule)
326
326
  return self
@@ -5,7 +5,7 @@ from __future__ import annotations
5
5
  from typing import Any
6
6
  from urllib.parse import urlparse
7
7
 
8
- from duckguard.connectors.base import Connector, ConnectionConfig
8
+ from duckguard.connectors.base import ConnectionConfig, Connector
9
9
  from duckguard.core.dataset import Dataset
10
10
  from duckguard.core.engine import DuckGuardEngine
11
11
 
@@ -4,7 +4,7 @@ from __future__ import annotations
4
4
 
5
5
  from urllib.parse import urlparse
6
6
 
7
- from duckguard.connectors.base import Connector, ConnectionConfig
7
+ from duckguard.connectors.base import ConnectionConfig, Connector
8
8
  from duckguard.core.dataset import Dataset
9
9
  from duckguard.core.engine import DuckGuardEngine
10
10
 
@@ -5,7 +5,7 @@ from __future__ import annotations
5
5
  from typing import Any
6
6
  from urllib.parse import urlparse
7
7
 
8
- from duckguard.connectors.base import Connector, ConnectionConfig
8
+ from duckguard.connectors.base import ConnectionConfig, Connector
9
9
  from duckguard.core.dataset import Dataset
10
10
  from duckguard.core.engine import DuckGuardEngine
11
11
 
@@ -2,10 +2,9 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
- import re
6
5
  from urllib.parse import urlparse
7
6
 
8
- from duckguard.connectors.base import Connector, ConnectionConfig
7
+ from duckguard.connectors.base import ConnectionConfig, Connector
9
8
  from duckguard.core.dataset import Dataset
10
9
  from duckguard.core.engine import DuckGuardEngine
11
10
 
@@ -4,7 +4,7 @@ from __future__ import annotations
4
4
 
5
5
  from urllib.parse import urlparse
6
6
 
7
- from duckguard.connectors.base import Connector, ConnectionConfig
7
+ from duckguard.connectors.base import ConnectionConfig, Connector
8
8
  from duckguard.core.dataset import Dataset
9
9
  from duckguard.core.engine import DuckGuardEngine
10
10
 
@@ -2,11 +2,10 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
- import re
6
5
  from typing import Any
7
6
  from urllib.parse import parse_qs, urlparse
8
7
 
9
- from duckguard.connectors.base import Connector, ConnectionConfig
8
+ from duckguard.connectors.base import ConnectionConfig, Connector
10
9
  from duckguard.core.dataset import Dataset
11
10
  from duckguard.core.engine import DuckGuardEngine
12
11
 
@@ -5,7 +5,7 @@ from __future__ import annotations
5
5
  import os
6
6
  from pathlib import Path
7
7
 
8
- from duckguard.connectors.base import Connector, ConnectionConfig
8
+ from duckguard.connectors.base import ConnectionConfig, Connector
9
9
  from duckguard.core.dataset import Dataset
10
10
  from duckguard.core.engine import DuckGuardEngine
11
11
 
@@ -5,7 +5,7 @@ from __future__ import annotations
5
5
  from typing import Any
6
6
  from urllib.parse import parse_qs, urlparse
7
7
 
8
- from duckguard.connectors.base import Connector, ConnectionConfig
8
+ from duckguard.connectors.base import ConnectionConfig, Connector
9
9
  from duckguard.core.dataset import Dataset
10
10
  from duckguard.core.engine import DuckGuardEngine
11
11
 
@@ -55,20 +55,17 @@ class SQLServerConnector(Connector):
55
55
  Dataset object
56
56
  """
57
57
  # Try pyodbc first, then pymssql
58
- try:
59
- import pyodbc
58
+ import importlib.util
60
59
 
60
+ if importlib.util.find_spec("pyodbc") is not None:
61
61
  driver_module = "pyodbc"
62
- except ImportError:
63
- try:
64
- import pymssql
65
-
66
- driver_module = "pymssql"
67
- except ImportError:
68
- raise ImportError(
69
- "SQL Server support requires pyodbc or pymssql. "
70
- "Install with: pip install duckguard[sqlserver]"
71
- )
62
+ elif importlib.util.find_spec("pymssql") is not None:
63
+ driver_module = "pymssql"
64
+ else:
65
+ raise ImportError(
66
+ "SQL Server support requires pyodbc or pymssql. "
67
+ "Install with: pip install duckguard[sqlserver]"
68
+ )
72
69
 
73
70
  if not config.table:
74
71
  raise ValueError("Table name is required for SQL Server connections")
@@ -14,17 +14,17 @@ Example:
14
14
  print(f"Contract violations: {result.violations}")
15
15
  """
16
16
 
17
+ from duckguard.contracts.diff import SchemaDiff, diff_contracts
18
+ from duckguard.contracts.generator import generate_contract
19
+ from duckguard.contracts.loader import contract_to_yaml, load_contract, load_contract_from_string
17
20
  from duckguard.contracts.schema import (
21
+ ContractMetadata,
18
22
  DataContract,
19
- SchemaField,
20
23
  FieldType,
21
24
  QualitySLA,
22
- ContractMetadata,
25
+ SchemaField,
23
26
  )
24
- from duckguard.contracts.loader import load_contract, load_contract_from_string, contract_to_yaml
25
- from duckguard.contracts.validator import validate_contract, ContractValidationResult
26
- from duckguard.contracts.generator import generate_contract
27
- from duckguard.contracts.diff import diff_contracts, SchemaDiff
27
+ from duckguard.contracts.validator import ContractValidationResult, validate_contract
28
28
 
29
29
  __all__ = [
30
30
  # Schema
@@ -9,7 +9,7 @@ from dataclasses import dataclass, field
9
9
  from enum import Enum
10
10
  from typing import Any
11
11
 
12
- from duckguard.contracts.schema import DataContract, SchemaField, FieldType
12
+ from duckguard.contracts.schema import DataContract, FieldType, SchemaField
13
13
 
14
14
 
15
15
  class ChangeType(Enum):
@@ -7,19 +7,18 @@ from __future__ import annotations
7
7
 
8
8
  from datetime import datetime
9
9
  from pathlib import Path
10
- from typing import Any
11
10
 
12
- from duckguard.core.dataset import Dataset
13
11
  from duckguard.connectors import connect
12
+ from duckguard.contracts.loader import contract_to_yaml
14
13
  from duckguard.contracts.schema import (
14
+ ContractMetadata,
15
15
  DataContract,
16
- SchemaField,
17
- FieldType,
18
16
  FieldConstraint,
17
+ FieldType,
19
18
  QualitySLA,
20
- ContractMetadata,
19
+ SchemaField,
21
20
  )
22
- from duckguard.contracts.loader import contract_to_yaml
21
+ from duckguard.core.dataset import Dataset
23
22
  from duckguard.semantic import SemanticAnalyzer, SemanticType
24
23
 
25
24
 
@@ -47,12 +47,12 @@ from typing import Any
47
47
  import yaml
48
48
 
49
49
  from duckguard.contracts.schema import (
50
+ ContractMetadata,
50
51
  DataContract,
51
- SchemaField,
52
- FieldType,
53
52
  FieldConstraint,
53
+ FieldType,
54
54
  QualitySLA,
55
- ContractMetadata,
55
+ SchemaField,
56
56
  )
57
57
 
58
58
 
@@ -82,7 +82,7 @@ def load_contract(path: str | Path) -> DataContract:
82
82
  if not path.exists():
83
83
  raise FileNotFoundError(f"Contract file not found: {path}")
84
84
 
85
- with open(path, "r", encoding="utf-8") as f:
85
+ with open(path, encoding="utf-8") as f:
86
86
  content = f.read()
87
87
 
88
88
  return load_contract_from_string(content, source_file=str(path))
@@ -6,14 +6,13 @@ Validates datasets against data contracts to ensure compliance.
6
6
  from __future__ import annotations
7
7
 
8
8
  from dataclasses import dataclass, field
9
- from datetime import datetime, timedelta
9
+ from datetime import datetime
10
10
  from enum import Enum
11
11
  from typing import Any
12
- import re
13
12
 
14
- from duckguard.core.dataset import Dataset
15
13
  from duckguard.connectors import connect
16
- from duckguard.contracts.schema import DataContract, SchemaField, FieldType
14
+ from duckguard.contracts.schema import DataContract, SchemaField
15
+ from duckguard.core.dataset import Dataset
17
16
 
18
17
 
19
18
  class ViolationType(Enum):
@@ -1,8 +1,8 @@
1
1
  """Core module containing the engine, dataset, and column classes."""
2
2
 
3
- from duckguard.core.engine import DuckGuardEngine
4
- from duckguard.core.dataset import Dataset
5
3
  from duckguard.core.column import Column
6
- from duckguard.core.result import ValidationResult, CheckResult
4
+ from duckguard.core.dataset import Dataset
5
+ from duckguard.core.engine import DuckGuardEngine
6
+ from duckguard.core.result import CheckResult, ValidationResult
7
7
 
8
8
  __all__ = ["DuckGuardEngine", "Dataset", "Column", "ValidationResult", "CheckResult"]