duckguard 2.0.0__py3-none-any.whl → 2.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- duckguard/__init__.py +55 -28
- duckguard/anomaly/__init__.py +29 -1
- duckguard/anomaly/baselines.py +294 -0
- duckguard/anomaly/detector.py +1 -5
- duckguard/anomaly/methods.py +17 -5
- duckguard/anomaly/ml_methods.py +724 -0
- duckguard/cli/main.py +561 -56
- duckguard/connectors/__init__.py +2 -2
- duckguard/connectors/bigquery.py +1 -1
- duckguard/connectors/databricks.py +1 -1
- duckguard/connectors/factory.py +2 -3
- duckguard/connectors/files.py +1 -1
- duckguard/connectors/kafka.py +2 -2
- duckguard/connectors/mongodb.py +1 -1
- duckguard/connectors/mysql.py +1 -1
- duckguard/connectors/oracle.py +1 -1
- duckguard/connectors/postgres.py +1 -2
- duckguard/connectors/redshift.py +1 -1
- duckguard/connectors/snowflake.py +1 -2
- duckguard/connectors/sqlite.py +1 -1
- duckguard/connectors/sqlserver.py +10 -13
- duckguard/contracts/__init__.py +6 -6
- duckguard/contracts/diff.py +1 -1
- duckguard/contracts/generator.py +5 -6
- duckguard/contracts/loader.py +4 -4
- duckguard/contracts/validator.py +3 -4
- duckguard/core/__init__.py +3 -3
- duckguard/core/column.py +588 -5
- duckguard/core/dataset.py +708 -3
- duckguard/core/result.py +328 -1
- duckguard/core/scoring.py +1 -2
- duckguard/errors.py +362 -0
- duckguard/freshness/__init__.py +33 -0
- duckguard/freshness/monitor.py +429 -0
- duckguard/history/__init__.py +44 -0
- duckguard/history/schema.py +301 -0
- duckguard/history/storage.py +479 -0
- duckguard/history/trends.py +348 -0
- duckguard/integrations/__init__.py +31 -0
- duckguard/integrations/airflow.py +387 -0
- duckguard/integrations/dbt.py +458 -0
- duckguard/notifications/__init__.py +61 -0
- duckguard/notifications/email.py +508 -0
- duckguard/notifications/formatter.py +118 -0
- duckguard/notifications/notifiers.py +357 -0
- duckguard/profiler/auto_profile.py +3 -3
- duckguard/pytest_plugin/__init__.py +1 -1
- duckguard/pytest_plugin/plugin.py +1 -1
- duckguard/reporting/console.py +2 -2
- duckguard/reports/__init__.py +42 -0
- duckguard/reports/html_reporter.py +514 -0
- duckguard/reports/pdf_reporter.py +114 -0
- duckguard/rules/__init__.py +3 -3
- duckguard/rules/executor.py +3 -4
- duckguard/rules/generator.py +8 -5
- duckguard/rules/loader.py +5 -5
- duckguard/rules/schema.py +23 -0
- duckguard/schema_history/__init__.py +40 -0
- duckguard/schema_history/analyzer.py +414 -0
- duckguard/schema_history/tracker.py +288 -0
- duckguard/semantic/__init__.py +1 -1
- duckguard/semantic/analyzer.py +0 -2
- duckguard/semantic/detector.py +17 -1
- duckguard/semantic/validators.py +2 -1
- duckguard-2.3.0.dist-info/METADATA +953 -0
- duckguard-2.3.0.dist-info/RECORD +77 -0
- duckguard-2.0.0.dist-info/METADATA +0 -221
- duckguard-2.0.0.dist-info/RECORD +0 -55
- {duckguard-2.0.0.dist-info → duckguard-2.3.0.dist-info}/WHEEL +0 -0
- {duckguard-2.0.0.dist-info → duckguard-2.3.0.dist-info}/entry_points.txt +0 -0
- {duckguard-2.0.0.dist-info → duckguard-2.3.0.dist-info}/licenses/LICENSE +0 -0
duckguard/connectors/__init__.py
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
"""Connectors for various data sources."""
|
|
2
2
|
|
|
3
|
-
from duckguard.connectors.base import
|
|
4
|
-
from duckguard.connectors.files import FileConnector, S3Connector, GCSConnector, AzureConnector
|
|
3
|
+
from duckguard.connectors.base import ConnectionConfig, Connector
|
|
5
4
|
from duckguard.connectors.factory import connect, register_connector
|
|
5
|
+
from duckguard.connectors.files import AzureConnector, FileConnector, GCSConnector, S3Connector
|
|
6
6
|
|
|
7
7
|
# Database connectors (imported lazily to avoid import errors)
|
|
8
8
|
__all__ = [
|
duckguard/connectors/bigquery.py
CHANGED
|
@@ -5,7 +5,7 @@ from __future__ import annotations
|
|
|
5
5
|
from typing import Any
|
|
6
6
|
from urllib.parse import urlparse
|
|
7
7
|
|
|
8
|
-
from duckguard.connectors.base import
|
|
8
|
+
from duckguard.connectors.base import ConnectionConfig, Connector
|
|
9
9
|
from duckguard.core.dataset import Dataset
|
|
10
10
|
from duckguard.core.engine import DuckGuardEngine
|
|
11
11
|
|
|
@@ -5,7 +5,7 @@ from __future__ import annotations
|
|
|
5
5
|
from typing import Any
|
|
6
6
|
from urllib.parse import parse_qs, urlparse
|
|
7
7
|
|
|
8
|
-
from duckguard.connectors.base import
|
|
8
|
+
from duckguard.connectors.base import ConnectionConfig, Connector
|
|
9
9
|
from duckguard.core.dataset import Dataset
|
|
10
10
|
from duckguard.core.engine import DuckGuardEngine
|
|
11
11
|
|
duckguard/connectors/factory.py
CHANGED
|
@@ -4,12 +4,11 @@ from __future__ import annotations
|
|
|
4
4
|
|
|
5
5
|
from typing import Any
|
|
6
6
|
|
|
7
|
-
from duckguard.connectors.base import
|
|
8
|
-
from duckguard.connectors.files import
|
|
7
|
+
from duckguard.connectors.base import ConnectionConfig, Connector
|
|
8
|
+
from duckguard.connectors.files import AzureConnector, FileConnector, GCSConnector, S3Connector
|
|
9
9
|
from duckguard.core.dataset import Dataset
|
|
10
10
|
from duckguard.core.engine import DuckGuardEngine
|
|
11
11
|
|
|
12
|
-
|
|
13
12
|
# Registry of available connectors
|
|
14
13
|
_CONNECTORS: list[type[Connector]] = [
|
|
15
14
|
S3Connector,
|
duckguard/connectors/files.py
CHANGED
|
@@ -5,7 +5,7 @@ from __future__ import annotations
|
|
|
5
5
|
import os
|
|
6
6
|
from pathlib import Path
|
|
7
7
|
|
|
8
|
-
from duckguard.connectors.base import
|
|
8
|
+
from duckguard.connectors.base import ConnectionConfig, Connector
|
|
9
9
|
from duckguard.core.dataset import Dataset
|
|
10
10
|
from duckguard.core.engine import DuckGuardEngine
|
|
11
11
|
|
duckguard/connectors/kafka.py
CHANGED
|
@@ -6,7 +6,7 @@ import json
|
|
|
6
6
|
from typing import Any
|
|
7
7
|
from urllib.parse import parse_qs, urlparse
|
|
8
8
|
|
|
9
|
-
from duckguard.connectors.base import
|
|
9
|
+
from duckguard.connectors.base import ConnectionConfig, Connector
|
|
10
10
|
from duckguard.core.dataset import Dataset
|
|
11
11
|
from duckguard.core.engine import DuckGuardEngine
|
|
12
12
|
|
|
@@ -320,7 +320,7 @@ class KafkaStreamValidator:
|
|
|
320
320
|
"messages_failed": 0,
|
|
321
321
|
}
|
|
322
322
|
|
|
323
|
-
def add_rule(self, rule: callable) ->
|
|
323
|
+
def add_rule(self, rule: callable) -> KafkaStreamValidator:
|
|
324
324
|
"""Add a validation rule."""
|
|
325
325
|
self.rules.append(rule)
|
|
326
326
|
return self
|
duckguard/connectors/mongodb.py
CHANGED
|
@@ -5,7 +5,7 @@ from __future__ import annotations
|
|
|
5
5
|
from typing import Any
|
|
6
6
|
from urllib.parse import urlparse
|
|
7
7
|
|
|
8
|
-
from duckguard.connectors.base import
|
|
8
|
+
from duckguard.connectors.base import ConnectionConfig, Connector
|
|
9
9
|
from duckguard.core.dataset import Dataset
|
|
10
10
|
from duckguard.core.engine import DuckGuardEngine
|
|
11
11
|
|
duckguard/connectors/mysql.py
CHANGED
|
@@ -4,7 +4,7 @@ from __future__ import annotations
|
|
|
4
4
|
|
|
5
5
|
from urllib.parse import urlparse
|
|
6
6
|
|
|
7
|
-
from duckguard.connectors.base import
|
|
7
|
+
from duckguard.connectors.base import ConnectionConfig, Connector
|
|
8
8
|
from duckguard.core.dataset import Dataset
|
|
9
9
|
from duckguard.core.engine import DuckGuardEngine
|
|
10
10
|
|
duckguard/connectors/oracle.py
CHANGED
|
@@ -5,7 +5,7 @@ from __future__ import annotations
|
|
|
5
5
|
from typing import Any
|
|
6
6
|
from urllib.parse import urlparse
|
|
7
7
|
|
|
8
|
-
from duckguard.connectors.base import
|
|
8
|
+
from duckguard.connectors.base import ConnectionConfig, Connector
|
|
9
9
|
from duckguard.core.dataset import Dataset
|
|
10
10
|
from duckguard.core.engine import DuckGuardEngine
|
|
11
11
|
|
duckguard/connectors/postgres.py
CHANGED
|
@@ -2,10 +2,9 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
-
import re
|
|
6
5
|
from urllib.parse import urlparse
|
|
7
6
|
|
|
8
|
-
from duckguard.connectors.base import
|
|
7
|
+
from duckguard.connectors.base import ConnectionConfig, Connector
|
|
9
8
|
from duckguard.core.dataset import Dataset
|
|
10
9
|
from duckguard.core.engine import DuckGuardEngine
|
|
11
10
|
|
duckguard/connectors/redshift.py
CHANGED
|
@@ -4,7 +4,7 @@ from __future__ import annotations
|
|
|
4
4
|
|
|
5
5
|
from urllib.parse import urlparse
|
|
6
6
|
|
|
7
|
-
from duckguard.connectors.base import
|
|
7
|
+
from duckguard.connectors.base import ConnectionConfig, Connector
|
|
8
8
|
from duckguard.core.dataset import Dataset
|
|
9
9
|
from duckguard.core.engine import DuckGuardEngine
|
|
10
10
|
|
|
@@ -2,11 +2,10 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
-
import re
|
|
6
5
|
from typing import Any
|
|
7
6
|
from urllib.parse import parse_qs, urlparse
|
|
8
7
|
|
|
9
|
-
from duckguard.connectors.base import
|
|
8
|
+
from duckguard.connectors.base import ConnectionConfig, Connector
|
|
10
9
|
from duckguard.core.dataset import Dataset
|
|
11
10
|
from duckguard.core.engine import DuckGuardEngine
|
|
12
11
|
|
duckguard/connectors/sqlite.py
CHANGED
|
@@ -5,7 +5,7 @@ from __future__ import annotations
|
|
|
5
5
|
import os
|
|
6
6
|
from pathlib import Path
|
|
7
7
|
|
|
8
|
-
from duckguard.connectors.base import
|
|
8
|
+
from duckguard.connectors.base import ConnectionConfig, Connector
|
|
9
9
|
from duckguard.core.dataset import Dataset
|
|
10
10
|
from duckguard.core.engine import DuckGuardEngine
|
|
11
11
|
|
|
@@ -5,7 +5,7 @@ from __future__ import annotations
|
|
|
5
5
|
from typing import Any
|
|
6
6
|
from urllib.parse import parse_qs, urlparse
|
|
7
7
|
|
|
8
|
-
from duckguard.connectors.base import
|
|
8
|
+
from duckguard.connectors.base import ConnectionConfig, Connector
|
|
9
9
|
from duckguard.core.dataset import Dataset
|
|
10
10
|
from duckguard.core.engine import DuckGuardEngine
|
|
11
11
|
|
|
@@ -55,20 +55,17 @@ class SQLServerConnector(Connector):
|
|
|
55
55
|
Dataset object
|
|
56
56
|
"""
|
|
57
57
|
# Try pyodbc first, then pymssql
|
|
58
|
-
|
|
59
|
-
import pyodbc
|
|
58
|
+
import importlib.util
|
|
60
59
|
|
|
60
|
+
if importlib.util.find_spec("pyodbc") is not None:
|
|
61
61
|
driver_module = "pyodbc"
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
"SQL Server support requires pyodbc or pymssql. "
|
|
70
|
-
"Install with: pip install duckguard[sqlserver]"
|
|
71
|
-
)
|
|
62
|
+
elif importlib.util.find_spec("pymssql") is not None:
|
|
63
|
+
driver_module = "pymssql"
|
|
64
|
+
else:
|
|
65
|
+
raise ImportError(
|
|
66
|
+
"SQL Server support requires pyodbc or pymssql. "
|
|
67
|
+
"Install with: pip install duckguard[sqlserver]"
|
|
68
|
+
)
|
|
72
69
|
|
|
73
70
|
if not config.table:
|
|
74
71
|
raise ValueError("Table name is required for SQL Server connections")
|
duckguard/contracts/__init__.py
CHANGED
|
@@ -14,17 +14,17 @@ Example:
|
|
|
14
14
|
print(f"Contract violations: {result.violations}")
|
|
15
15
|
"""
|
|
16
16
|
|
|
17
|
+
from duckguard.contracts.diff import SchemaDiff, diff_contracts
|
|
18
|
+
from duckguard.contracts.generator import generate_contract
|
|
19
|
+
from duckguard.contracts.loader import contract_to_yaml, load_contract, load_contract_from_string
|
|
17
20
|
from duckguard.contracts.schema import (
|
|
21
|
+
ContractMetadata,
|
|
18
22
|
DataContract,
|
|
19
|
-
SchemaField,
|
|
20
23
|
FieldType,
|
|
21
24
|
QualitySLA,
|
|
22
|
-
|
|
25
|
+
SchemaField,
|
|
23
26
|
)
|
|
24
|
-
from duckguard.contracts.
|
|
25
|
-
from duckguard.contracts.validator import validate_contract, ContractValidationResult
|
|
26
|
-
from duckguard.contracts.generator import generate_contract
|
|
27
|
-
from duckguard.contracts.diff import diff_contracts, SchemaDiff
|
|
27
|
+
from duckguard.contracts.validator import ContractValidationResult, validate_contract
|
|
28
28
|
|
|
29
29
|
__all__ = [
|
|
30
30
|
# Schema
|
duckguard/contracts/diff.py
CHANGED
|
@@ -9,7 +9,7 @@ from dataclasses import dataclass, field
|
|
|
9
9
|
from enum import Enum
|
|
10
10
|
from typing import Any
|
|
11
11
|
|
|
12
|
-
from duckguard.contracts.schema import DataContract,
|
|
12
|
+
from duckguard.contracts.schema import DataContract, FieldType, SchemaField
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
class ChangeType(Enum):
|
duckguard/contracts/generator.py
CHANGED
|
@@ -7,19 +7,18 @@ from __future__ import annotations
|
|
|
7
7
|
|
|
8
8
|
from datetime import datetime
|
|
9
9
|
from pathlib import Path
|
|
10
|
-
from typing import Any
|
|
11
10
|
|
|
12
|
-
from duckguard.core.dataset import Dataset
|
|
13
11
|
from duckguard.connectors import connect
|
|
12
|
+
from duckguard.contracts.loader import contract_to_yaml
|
|
14
13
|
from duckguard.contracts.schema import (
|
|
14
|
+
ContractMetadata,
|
|
15
15
|
DataContract,
|
|
16
|
-
SchemaField,
|
|
17
|
-
FieldType,
|
|
18
16
|
FieldConstraint,
|
|
17
|
+
FieldType,
|
|
19
18
|
QualitySLA,
|
|
20
|
-
|
|
19
|
+
SchemaField,
|
|
21
20
|
)
|
|
22
|
-
from duckguard.
|
|
21
|
+
from duckguard.core.dataset import Dataset
|
|
23
22
|
from duckguard.semantic import SemanticAnalyzer, SemanticType
|
|
24
23
|
|
|
25
24
|
|
duckguard/contracts/loader.py
CHANGED
|
@@ -47,12 +47,12 @@ from typing import Any
|
|
|
47
47
|
import yaml
|
|
48
48
|
|
|
49
49
|
from duckguard.contracts.schema import (
|
|
50
|
+
ContractMetadata,
|
|
50
51
|
DataContract,
|
|
51
|
-
SchemaField,
|
|
52
|
-
FieldType,
|
|
53
52
|
FieldConstraint,
|
|
53
|
+
FieldType,
|
|
54
54
|
QualitySLA,
|
|
55
|
-
|
|
55
|
+
SchemaField,
|
|
56
56
|
)
|
|
57
57
|
|
|
58
58
|
|
|
@@ -82,7 +82,7 @@ def load_contract(path: str | Path) -> DataContract:
|
|
|
82
82
|
if not path.exists():
|
|
83
83
|
raise FileNotFoundError(f"Contract file not found: {path}")
|
|
84
84
|
|
|
85
|
-
with open(path,
|
|
85
|
+
with open(path, encoding="utf-8") as f:
|
|
86
86
|
content = f.read()
|
|
87
87
|
|
|
88
88
|
return load_contract_from_string(content, source_file=str(path))
|
duckguard/contracts/validator.py
CHANGED
|
@@ -6,14 +6,13 @@ Validates datasets against data contracts to ensure compliance.
|
|
|
6
6
|
from __future__ import annotations
|
|
7
7
|
|
|
8
8
|
from dataclasses import dataclass, field
|
|
9
|
-
from datetime import datetime
|
|
9
|
+
from datetime import datetime
|
|
10
10
|
from enum import Enum
|
|
11
11
|
from typing import Any
|
|
12
|
-
import re
|
|
13
12
|
|
|
14
|
-
from duckguard.core.dataset import Dataset
|
|
15
13
|
from duckguard.connectors import connect
|
|
16
|
-
from duckguard.contracts.schema import DataContract, SchemaField
|
|
14
|
+
from duckguard.contracts.schema import DataContract, SchemaField
|
|
15
|
+
from duckguard.core.dataset import Dataset
|
|
17
16
|
|
|
18
17
|
|
|
19
18
|
class ViolationType(Enum):
|
duckguard/core/__init__.py
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
"""Core module containing the engine, dataset, and column classes."""
|
|
2
2
|
|
|
3
|
-
from duckguard.core.engine import DuckGuardEngine
|
|
4
|
-
from duckguard.core.dataset import Dataset
|
|
5
3
|
from duckguard.core.column import Column
|
|
6
|
-
from duckguard.core.
|
|
4
|
+
from duckguard.core.dataset import Dataset
|
|
5
|
+
from duckguard.core.engine import DuckGuardEngine
|
|
6
|
+
from duckguard.core.result import CheckResult, ValidationResult
|
|
7
7
|
|
|
8
8
|
__all__ = ["DuckGuardEngine", "Dataset", "Column", "ValidationResult", "CheckResult"]
|