phlo-pandera 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- phlo_pandera-0.1.0/PKG-INFO +20 -0
- phlo_pandera-0.1.0/README.md +82 -0
- phlo_pandera-0.1.0/pyproject.toml +56 -0
- phlo_pandera-0.1.0/setup.cfg +4 -0
- phlo_pandera-0.1.0/src/phlo_pandera/__init__.py +83 -0
- phlo_pandera-0.1.0/src/phlo_pandera/checks.py +467 -0
- phlo_pandera-0.1.0/src/phlo_pandera/checks_extra.py +319 -0
- phlo_pandera-0.1.0/src/phlo_pandera/cli_plugin.py +34 -0
- phlo_pandera-0.1.0/src/phlo_pandera/cli_schema.py +426 -0
- phlo_pandera-0.1.0/src/phlo_pandera/cli_schema_codegen.py +453 -0
- phlo_pandera-0.1.0/src/phlo_pandera/cli_schema_utils.py +157 -0
- phlo_pandera-0.1.0/src/phlo_pandera/cli_validate.py +625 -0
- phlo_pandera-0.1.0/src/phlo_pandera/contract.py +129 -0
- phlo_pandera-0.1.0/src/phlo_pandera/decorator.py +542 -0
- phlo_pandera-0.1.0/src/phlo_pandera/decorator_helpers.py +247 -0
- phlo_pandera-0.1.0/src/phlo_pandera/examples.py +224 -0
- phlo_pandera-0.1.0/src/phlo_pandera/observatory_assets/README.txt +1 -0
- phlo_pandera-0.1.0/src/phlo_pandera/observatory_plugin.py +57 -0
- phlo_pandera-0.1.0/src/phlo_pandera/pandera_asset_checks.py +178 -0
- phlo_pandera-0.1.0/src/phlo_pandera/partitioning.py +97 -0
- phlo_pandera-0.1.0/src/phlo_pandera/plugin.py +76 -0
- phlo_pandera-0.1.0/src/phlo_pandera/reconciliation.py +1151 -0
- phlo_pandera-0.1.0/src/phlo_pandera/schema_extractor.py +91 -0
- phlo_pandera-0.1.0/src/phlo_pandera/schemas/__init__.py +19 -0
- phlo_pandera-0.1.0/src/phlo_pandera/schemas/asset_outputs.py +56 -0
- phlo_pandera-0.1.0/src/phlo_pandera/schemas/base.py +38 -0
- phlo_pandera-0.1.0/src/phlo_pandera/severity.py +83 -0
- phlo_pandera-0.1.0/src/phlo_pandera.egg-info/PKG-INFO +20 -0
- phlo_pandera-0.1.0/src/phlo_pandera.egg-info/SOURCES.txt +42 -0
- phlo_pandera-0.1.0/src/phlo_pandera.egg-info/dependency_links.txt +1 -0
- phlo_pandera-0.1.0/src/phlo_pandera.egg-info/entry_points.txt +8 -0
- phlo_pandera-0.1.0/src/phlo_pandera.egg-info/requires.txt +12 -0
- phlo_pandera-0.1.0/src/phlo_pandera.egg-info/top_level.txt +1 -0
- phlo_pandera-0.1.0/tests/test_cli_004_schema_catalog.py +290 -0
- phlo_pandera-0.1.0/tests/test_cli_004_schema_generate.py +253 -0
- phlo_pandera-0.1.0/tests/test_cli_schema_codegen.py +9 -0
- phlo_pandera-0.1.0/tests/test_cli_validate_workflow.py +553 -0
- phlo_pandera-0.1.0/tests/test_decorator_helpers.py +42 -0
- phlo_pandera-0.1.0/tests/test_integration_quality.py +348 -0
- phlo_pandera-0.1.0/tests/test_quality_decorator.py +591 -0
- phlo_pandera-0.1.0/tests/test_quality_partitioning.py +81 -0
- phlo_pandera-0.1.0/tests/test_quality_reconciliation.py +703 -0
- phlo_pandera-0.1.0/tests/test_quality_severity_policy.py +37 -0
- phlo_pandera-0.1.0/tests/test_schema_extractor.py +82 -0
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: phlo-pandera
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Quality checks and schema utilities for Phlo
|
|
5
|
+
Author-email: Phlo Team <team@phlo.dev>
|
|
6
|
+
License: MIT
|
|
7
|
+
Requires-Python: >=3.11
|
|
8
|
+
Description-Content-Type: text/plain
|
|
9
|
+
Requires-Dist: phlo>=0.1.0
|
|
10
|
+
Requires-Dist: pandas>=2.3.3
|
|
11
|
+
Requires-Dist: pandera>=0.26.1
|
|
12
|
+
Requires-Dist: click>=8.3.0
|
|
13
|
+
Requires-Dist: rich>=14.2.0
|
|
14
|
+
Provides-Extra: observatory
|
|
15
|
+
Requires-Dist: phlo-observatory>=0.1.0; extra == "observatory"
|
|
16
|
+
Provides-Extra: dev
|
|
17
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
18
|
+
Requires-Dist: ruff>=0.1.0; extra == "dev"
|
|
19
|
+
|
|
20
|
+
Quality checks and Pandera utilities for Phlo.
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
# phlo-pandera
|
|
2
|
+
|
|
3
|
+
Data quality checks and validation for Phlo.
|
|
4
|
+
|
|
5
|
+
## Description
|
|
6
|
+
|
|
7
|
+
Define and execute data quality checks using the `@phlo_quality` decorator. Checks emit capability specs that adapters translate into orchestrator-native checks.
|
|
8
|
+
|
|
9
|
+
## Installation
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
pip install phlo-pandera
|
|
13
|
+
# or
|
|
14
|
+
phlo plugin install pandera
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
## Configuration
|
|
18
|
+
|
|
19
|
+
| Variable | Default | Description |
|
|
20
|
+
| ------------------------ | ------- | ------------------------------ |
|
|
21
|
+
| `PANDERA_CRITICAL_LEVEL` | `error` | Severity that blocks promotion |
|
|
22
|
+
|
|
23
|
+
## Auto-Configuration
|
|
24
|
+
|
|
25
|
+
This package is **fully auto-configured**:
|
|
26
|
+
|
|
27
|
+
| Feature | How It Works |
|
|
28
|
+
| ----------------------- | --------------------------------------------------------- |
|
|
29
|
+
| **Check Discovery** | Quality workflows auto-discovered in `workflows/quality/` |
|
|
30
|
+
| **Event Emission** | Emits `quality.result` events to HookBus |
|
|
31
|
+
| **Adapter Integration** | Checks translate into orchestrator-native checks via adapters |
|
|
32
|
+
| **Alerting** | Failed checks auto-routed to alerting destinations |
|
|
33
|
+
|
|
34
|
+
### Event Flow
|
|
35
|
+
|
|
36
|
+
```
|
|
37
|
+
@phlo_quality → QualityEventEmitter → quality.result → [Alerting, Metrics, OpenMetadata]
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
## Usage
|
|
41
|
+
|
|
42
|
+
### Defining Checks
|
|
43
|
+
|
|
44
|
+
```python
|
|
45
|
+
from phlo.quality import phlo_quality
|
|
46
|
+
from phlo_pandera.checks import null_check, uniqueness_check
|
|
47
|
+
|
|
48
|
+
@phlo_quality(
|
|
49
|
+
asset="bronze.users",
|
|
50
|
+
checks=[
|
|
51
|
+
null_check(column="id"),
|
|
52
|
+
uniqueness_check(column="email"),
|
|
53
|
+
]
|
|
54
|
+
)
|
|
55
|
+
def validate_users():
|
|
56
|
+
pass
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
### CLI Commands
|
|
60
|
+
|
|
61
|
+
```bash
|
|
62
|
+
# Run quality checks
|
|
63
|
+
phlo quality run --asset bronze.users
|
|
64
|
+
|
|
65
|
+
# List available checks
|
|
66
|
+
phlo quality list
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
## Built-in Checks
|
|
70
|
+
|
|
71
|
+
| Check | Description |
|
|
72
|
+
| ------------------ | ----------------------------------- |
|
|
73
|
+
| `null_check` | Validates column has no NULL values |
|
|
74
|
+
| `uniqueness_check` | Validates column values are unique |
|
|
75
|
+
| `range_check` | Validates values are within range |
|
|
76
|
+
| `regex_check` | Validates values match pattern |
|
|
77
|
+
| `freshness_check` | Validates data is recent |
|
|
78
|
+
|
|
79
|
+
## Entry Points
|
|
80
|
+
|
|
81
|
+
- `phlo.plugins.cli` - Provides `pandera` CLI commands
|
|
82
|
+
- `phlo.plugins.quality` - Provides built-in check plugins
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=45", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "phlo-pandera"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Quality checks and schema utilities for Phlo"
|
|
9
|
+
readme = {text = "Quality checks and Pandera utilities for Phlo.", content-type = "text/plain"}
|
|
10
|
+
requires-python = ">=3.11"
|
|
11
|
+
authors = [
|
|
12
|
+
{name = "Phlo Team", email = "team@phlo.dev"},
|
|
13
|
+
]
|
|
14
|
+
license = {text = "MIT"}
|
|
15
|
+
dependencies = [
|
|
16
|
+
"phlo>=0.1.0",
|
|
17
|
+
"pandas>=2.3.3",
|
|
18
|
+
"pandera>=0.26.1",
|
|
19
|
+
"click>=8.3.0",
|
|
20
|
+
"rich>=14.2.0",
|
|
21
|
+
]
|
|
22
|
+
|
|
23
|
+
[project.optional-dependencies]
|
|
24
|
+
observatory = [
|
|
25
|
+
"phlo-observatory>=0.1.0",
|
|
26
|
+
]
|
|
27
|
+
dev = [
|
|
28
|
+
"pytest>=7.0",
|
|
29
|
+
"ruff>=0.1.0",
|
|
30
|
+
]
|
|
31
|
+
|
|
32
|
+
[project.entry-points."phlo.plugins.cli"]
|
|
33
|
+
pandera = "phlo_pandera.cli_plugin:PanderaCliPlugin"
|
|
34
|
+
|
|
35
|
+
[project.entry-points."phlo.plugins.observatory"]
|
|
36
|
+
pandera = "phlo_pandera.observatory_plugin:PanderaObservatoryExtension"
|
|
37
|
+
|
|
38
|
+
[project.entry-points."phlo.plugins.quality_providers"]
|
|
39
|
+
pandera = "phlo_pandera.plugin:PanderaQualityProvider"
|
|
40
|
+
|
|
41
|
+
[tool.setuptools]
|
|
42
|
+
package-dir = {"" = "src"}
|
|
43
|
+
include-package-data = true
|
|
44
|
+
|
|
45
|
+
[tool.setuptools.packages.find]
|
|
46
|
+
where = ["src"]
|
|
47
|
+
|
|
48
|
+
[tool.setuptools.package-data]
|
|
49
|
+
phlo_pandera = [
|
|
50
|
+
"schemas/**/*.py",
|
|
51
|
+
"observatory_assets/*",
|
|
52
|
+
]
|
|
53
|
+
|
|
54
|
+
[tool.ruff]
|
|
55
|
+
line-length = 100
|
|
56
|
+
target-version = "py311"
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Phlo Quality Framework.
|
|
3
|
+
|
|
4
|
+
Declarative quality checks that reduce boilerplate by 70%.
|
|
5
|
+
|
|
6
|
+
Usage::
|
|
7
|
+
|
|
8
|
+
from phlo_pandera import NullCheck, RangeCheck, phlo_pandera
|
|
9
|
+
|
|
10
|
+
@phlo_pandera(
|
|
11
|
+
table="bronze.weather_observations",
|
|
12
|
+
checks=[
|
|
13
|
+
NullCheck(columns=["station_id", "temperature"]),
|
|
14
|
+
RangeCheck(column="temperature", min_value=-50, max_value=60),
|
|
15
|
+
],
|
|
16
|
+
)
|
|
17
|
+
def weather_quality():
|
|
18
|
+
pass
|
|
19
|
+
|
|
20
|
+
Available Checks:
|
|
21
|
+
- NullCheck: Verify no null values in specified columns
|
|
22
|
+
- RangeCheck: Verify numeric values within range
|
|
23
|
+
- FreshnessCheck: Verify data recency
|
|
24
|
+
- UniqueCheck: Verify uniqueness constraints
|
|
25
|
+
- CountCheck: Verify row count bounds
|
|
26
|
+
- SchemaCheck: Verify Pandera schema compliance
|
|
27
|
+
- CustomSQLCheck: Execute arbitrary SQL assertions
|
|
28
|
+
- PatternCheck: Verify string values match regex patterns
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
from phlo_pandera.checks import (
|
|
32
|
+
CountCheck,
|
|
33
|
+
FreshnessCheck,
|
|
34
|
+
NullCheck,
|
|
35
|
+
QualityCheck,
|
|
36
|
+
RangeCheck,
|
|
37
|
+
UniqueCheck,
|
|
38
|
+
)
|
|
39
|
+
from phlo_pandera.checks_extra import CustomSQLCheck, PatternCheck, SchemaCheck
|
|
40
|
+
from phlo_pandera.contract import PANDERA_CONTRACT_CHECK_NAME, QualityCheckContract, dbt_check_name
|
|
41
|
+
from phlo_pandera.decorator import clear_quality_checks, get_quality_checks, phlo_pandera
|
|
42
|
+
from phlo_pandera.schema_extractor import PanderaSchemaExtractor
|
|
43
|
+
from phlo_pandera.reconciliation import (
|
|
44
|
+
AggregateConsistencyCheck,
|
|
45
|
+
AggregateSpec,
|
|
46
|
+
ChecksumReconciliationCheck,
|
|
47
|
+
KeyParityCheck,
|
|
48
|
+
MultiAggregateConsistencyCheck,
|
|
49
|
+
ReconciliationCheck,
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
__all__ = [
|
|
53
|
+
# Decorator (use as @phlo_pandera(...))
|
|
54
|
+
"phlo_pandera",
|
|
55
|
+
"get_quality_checks",
|
|
56
|
+
"clear_quality_checks",
|
|
57
|
+
# Base class
|
|
58
|
+
"QualityCheck",
|
|
59
|
+
# Quality checks
|
|
60
|
+
"NullCheck",
|
|
61
|
+
"RangeCheck",
|
|
62
|
+
"FreshnessCheck",
|
|
63
|
+
"UniqueCheck",
|
|
64
|
+
"CountCheck",
|
|
65
|
+
"SchemaCheck",
|
|
66
|
+
"CustomSQLCheck",
|
|
67
|
+
"PatternCheck",
|
|
68
|
+
# Reconciliation checks
|
|
69
|
+
"ReconciliationCheck",
|
|
70
|
+
"AggregateConsistencyCheck",
|
|
71
|
+
"AggregateSpec",
|
|
72
|
+
"KeyParityCheck",
|
|
73
|
+
"MultiAggregateConsistencyCheck",
|
|
74
|
+
"ChecksumReconciliationCheck",
|
|
75
|
+
# Schema extraction
|
|
76
|
+
"PanderaSchemaExtractor",
|
|
77
|
+
# Contract helpers
|
|
78
|
+
"PANDERA_CONTRACT_CHECK_NAME",
|
|
79
|
+
"QualityCheckContract",
|
|
80
|
+
"dbt_check_name",
|
|
81
|
+
]
|
|
82
|
+
|
|
83
|
+
__version__ = "1.0.0"
|