dataenginex 0.3.4__tar.gz → 0.4.11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. dataenginex-0.4.11/.gitignore +81 -0
  2. dataenginex-0.4.11/CHANGELOG.md +138 -0
  3. dataenginex-0.4.11/PKG-INFO +71 -0
  4. {dataenginex-0.3.4 → dataenginex-0.4.11}/pyproject.toml +19 -9
  5. {dataenginex-0.3.4 → dataenginex-0.4.11}/src/dataenginex/README.md +1 -1
  6. {dataenginex-0.3.4 → dataenginex-0.4.11}/src/dataenginex/RELEASE_NOTES.md +10 -0
  7. dataenginex-0.4.11/src/dataenginex/__init__.py +34 -0
  8. dataenginex-0.4.11/src/dataenginex/api/__init__.py +54 -0
  9. {dataenginex-0.3.4 → dataenginex-0.4.11}/src/dataenginex/api/auth.py +0 -0
  10. {dataenginex-0.3.4 → dataenginex-0.4.11}/src/dataenginex/api/errors.py +0 -0
  11. {dataenginex-0.3.4 → dataenginex-0.4.11}/src/dataenginex/api/health.py +10 -1
  12. {dataenginex-0.3.4 → dataenginex-0.4.11}/src/dataenginex/api/pagination.py +0 -0
  13. {dataenginex-0.3.4 → dataenginex-0.4.11}/src/dataenginex/api/rate_limit.py +0 -0
  14. dataenginex-0.4.11/src/dataenginex/api/routers/__init__.py +3 -0
  15. {dataenginex-0.3.4 → dataenginex-0.4.11}/src/dataenginex/api/routers/v1.py +35 -19
  16. dataenginex-0.4.11/src/dataenginex/core/__init__.py +81 -0
  17. {dataenginex-0.3.4 → dataenginex-0.4.11}/src/dataenginex/core/medallion_architecture.py +97 -72
  18. {dataenginex-0.3.4 → dataenginex-0.4.11}/src/dataenginex/core/pipeline_config.py +11 -7
  19. dataenginex-0.4.11/src/dataenginex/core/quality.py +305 -0
  20. {dataenginex-0.3.4 → dataenginex-0.4.11}/src/dataenginex/core/schemas.py +27 -40
  21. {dataenginex-0.3.4 → dataenginex-0.4.11}/src/dataenginex/core/validators.py +101 -106
  22. dataenginex-0.4.11/src/dataenginex/data/__init__.py +33 -0
  23. {dataenginex-0.3.4 → dataenginex-0.4.11}/src/dataenginex/data/connectors.py +24 -8
  24. {dataenginex-0.3.4 → dataenginex-0.4.11}/src/dataenginex/data/profiler.py +11 -4
  25. {dataenginex-0.3.4 → dataenginex-0.4.11}/src/dataenginex/data/registry.py +13 -7
  26. dataenginex-0.4.11/src/dataenginex/lakehouse/__init__.py +38 -0
  27. {dataenginex-0.3.4 → dataenginex-0.4.11}/src/dataenginex/lakehouse/catalog.py +20 -2
  28. {dataenginex-0.3.4 → dataenginex-0.4.11}/src/dataenginex/lakehouse/partitioning.py +0 -0
  29. dataenginex-0.4.11/src/dataenginex/lakehouse/storage.py +381 -0
  30. dataenginex-0.4.11/src/dataenginex/middleware/__init__.py +35 -0
  31. {dataenginex-0.3.4 → dataenginex-0.4.11}/src/dataenginex/middleware/logging_config.py +13 -6
  32. {dataenginex-0.3.4 → dataenginex-0.4.11}/src/dataenginex/middleware/metrics.py +20 -4
  33. {dataenginex-0.3.4 → dataenginex-0.4.11}/src/dataenginex/middleware/metrics_middleware.py +31 -9
  34. {dataenginex-0.3.4 → dataenginex-0.4.11}/src/dataenginex/middleware/request_logging.py +8 -1
  35. {dataenginex-0.3.4 → dataenginex-0.4.11}/src/dataenginex/middleware/tracing.py +12 -1
  36. {dataenginex-0.3.4 → dataenginex-0.4.11}/src/dataenginex/ml/__init__.py +20 -12
  37. {dataenginex-0.3.4 → dataenginex-0.4.11}/src/dataenginex/ml/drift.py +14 -1
  38. {dataenginex-0.3.4 → dataenginex-0.4.11}/src/dataenginex/ml/registry.py +17 -2
  39. {dataenginex-0.3.4 → dataenginex-0.4.11}/src/dataenginex/ml/serving.py +19 -4
  40. {dataenginex-0.3.4 → dataenginex-0.4.11}/src/dataenginex/ml/training.py +62 -21
  41. dataenginex-0.4.11/src/dataenginex/warehouse/__init__.py +40 -0
  42. {dataenginex-0.3.4 → dataenginex-0.4.11}/src/dataenginex/warehouse/lineage.py +18 -1
  43. {dataenginex-0.3.4 → dataenginex-0.4.11}/src/dataenginex/warehouse/transforms.py +27 -9
  44. dataenginex-0.3.4/PKG-INFO +0 -66
  45. dataenginex-0.3.4/src/dataenginex/__init__.py +0 -16
  46. dataenginex-0.3.4/src/dataenginex/api/__init__.py +0 -11
  47. dataenginex-0.3.4/src/dataenginex/api/routers/__init__.py +0 -1
  48. dataenginex-0.3.4/src/dataenginex/core/__init__.py +0 -36
  49. dataenginex-0.3.4/src/dataenginex/data/__init__.py +0 -22
  50. dataenginex-0.3.4/src/dataenginex/lakehouse/__init__.py +0 -22
  51. dataenginex-0.3.4/src/dataenginex/lakehouse/storage.py +0 -177
  52. dataenginex-0.3.4/src/dataenginex/middleware/__init__.py +0 -19
  53. dataenginex-0.3.4/src/dataenginex/warehouse/__init__.py +0 -19
@@ -0,0 +1,81 @@
1
+ # Byte-compiled / cache
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # Distribution / packaging
7
+ build/
8
+ dist/
9
+ *.egg-info/
10
+ .eggs/
11
+
12
+ # Virtual environments
13
+ .env
14
+ .venv/
15
+ venv/
16
+ ENV/
17
+ env/
18
+
19
+ # TODO
20
+ TODO.md
21
+
22
+ # Installer logs
23
+ pip-log.txt
24
+ pip-delete-this-directory.txt
25
+
26
+ # Unit test / coverage
27
+ .coverage
28
+ coverage.xml
29
+ htmlcov/
30
+ .tox/
31
+ .nox/
32
+ .pytest_cache/
33
+
34
+ # PyInstaller
35
+ *.manifest
36
+ *.spec
37
+
38
+ # Type checkers
39
+ .mypy_cache/
40
+ .pyre/
41
+ .pytype/
42
+
43
+ # Linter cache
44
+ .ruff_cache/
45
+
46
+ # Tool / build cache
47
+ .cache/
48
+
49
+ # IDEs and editors
50
+ .vscode/
51
+ .idea/
52
+ *.sublime-project
53
+ *.sublime-workspace
54
+
55
+ # Track VS Code MCP config only
56
+ !.vscode/
57
+ .vscode/*
58
+ !.vscode/mcp.json
59
+
60
+ # OS files
61
+ .DS_Store
62
+ Thumbs.db
63
+ desktop.ini
64
+
65
+ # Logs and databases
66
+ *.log
67
+ *.sqlite3
68
+ logs/
69
+
70
+ # Wheel metadata
71
+ pip-wheel-metadata/
72
+
73
+ # Python egg
74
+ *.egg
75
+ *.egg-info/
76
+
77
+ # Temporary
78
+ *.tmp
79
+
80
+ # Generated site (Zensical / MkDocs)
81
+ site/
@@ -0,0 +1,138 @@
1
+ # Changelog
2
+
3
+ All notable changes to `dataenginex` will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ## [Unreleased]
9
+
10
+ ## [0.4.11] - 2026-02-27
11
+
12
+ ### Changed
13
+
14
+ - Added `environment` label support across HTTP metrics counters/histograms/gauges and middleware emission.
15
+ - Aligned alert rule histogram quantile expressions with explicit bucket aggregation by `le` and `environment`.
16
+ - Standardized docs and release prep metadata for CSV-canonical roadmap and setup workflow updates.
17
+
18
+ ## [0.4.10] - 2026-02-21
19
+
20
+ ### Added
21
+
22
+ - `examples/` directory with 4 runnable quickstart scripts
23
+ - `01_hello_pipeline.py` — profiler, schema validation, medallion config
24
+ - `02_api_quickstart.py` — FastAPI app with health, v1 router, metrics
25
+ - `03_quality_gate.py` — QualityGate evaluations against layer thresholds
26
+ - `04_ml_training.py` — SklearnTrainer, ModelRegistry, DriftDetector demo
27
+ - `examples/README.md` with table of examples and run instructions
28
+
29
+ ## [0.4.8] - 2026-02-21
30
+
31
+ ### Added
32
+
33
+ - PySpark local-mode test fixtures in `tests/conftest.py` (session-scoped `spark` session)
34
+ - Sample DataFrame fixtures: `spark_df_jobs`, `spark_df_weather`, `spark_df_empty`
35
+ - `requires_pyspark` skip marker — tests auto-skip when PySpark is not installed
36
+ - `tests/fixtures/sample_data.py` — factory helpers for job, user, and weather records
37
+ - `tests/unit/test_spark_fixtures.py` — validates PySpark fixture behaviour
38
+
39
+ ## [0.4.6] - 2026-02-21
40
+
41
+ ### Added
42
+
43
+ - `QualityGate` — orchestrates quality checks at medallion layer transitions
44
+ - `QualityStore` — in-memory store accumulating per-layer quality metrics
45
+ - `QualityResult` — immutable dataclass capturing evaluation outcomes
46
+ - `QualityDimension` — StrEnum for named quality dimensions
47
+ - `/api/v1/data/quality/{layer}` endpoint for per-layer quality history
48
+ - `set_quality_store()` / `get_quality_store()` for wiring quality at app startup
49
+ - New exports in `dataenginex.core` and `dataenginex.api`
50
+
51
+ ### Changed
52
+
53
+ - `/api/v1/data/quality` now returns live metrics from `QualityStore` (was placeholder zeros)
54
+ - Wired `DataProfiler`, `DataQualityChecks`, and `QualityScorer` into `QualityGate` pipeline
55
+
56
+ ## [0.4.5] - 2026-02-21
57
+
58
+ ### Added
59
+
60
+ - `StorageBackend` ABC with proper `@abstractmethod` contracts
61
+ - `S3Storage` backend for AWS S3 (requires `boto3`)
62
+ - `GCSStorage` backend for Google Cloud Storage (requires `google-cloud-storage`)
63
+ - Re-exported `StorageBackend` from `dataenginex.lakehouse`
64
+
65
+ ### Changed
66
+
67
+ - Refactored `StorageBackend` from plain class to proper `ABC` subclass
68
+ - Updated `lakehouse.__init__` to export all 4 storage backends + ABC
69
+
70
+ ## [0.4.3] - 2026-02-21
71
+
72
+ ### Added
73
+
74
+ - Comprehensive attribute-level docstrings on all public dataclasses
75
+ - `from __future__ import annotations` in all source modules
76
+ - Module-level class/function inventory docstrings
77
+ - mkdocs API reference configuration with `mkdocstrings` plugin
78
+ - API reference pages for all 7 subpackages under `docs/api-reference/`
79
+
80
+ ### Changed
81
+
82
+ - Upgraded mkdocs theme from `mkdocs` to `material`
83
+ - Enhanced module docstrings in middleware, core, and validators
84
+
85
+ ## [0.4.1] - 2026-02-21
86
+
87
+ ### Added
88
+
89
+ - CHANGELOG.md with Keep a Changelog format
90
+ - Release workflow extracts changelog notes for GitHub Releases automatically
91
+
92
+ ### Changed
93
+
94
+ - `release.yml` now reads `packages/dataenginex/CHANGELOG.md` for release notes
95
+
96
+ ## [0.4.0] - 2026-02-21
97
+
98
+ ### Added
99
+
100
+ - Stable `__all__` exports in every subpackage `__init__.py`
101
+ - `from __future__ import annotations` in all public modules
102
+ - Comprehensive module-level docstrings with usage examples
103
+ - New public API exports: `ComponentHealth`, `AuthMiddleware`, `AuthUser`,
104
+ `create_token`, `decode_token`, `BadRequestError`, `NotFoundError`,
105
+ `PaginationMeta`, `RateLimiter`, `RateLimitMiddleware`,
106
+ `ConnectorStatus`, `FetchResult`, `ColumnProfile`, `get_logger`, `get_tracer`
107
+
108
+ ### Changed
109
+
110
+ - Reorganized `__all__` in all subpackages for logical grouping
111
+ - Updated package version to 0.4.0
112
+
113
+ ## [0.3.5] - 2026-02-13
114
+
115
+ ### Added
116
+
117
+ - Production hardening: structured logging, Prometheus/OTel, health probes
118
+ - Data connectors: `RestConnector`, `FileConnector` with async interface
119
+ - Schema registry with versioned schema management
120
+ - Data profiler with automated dataset statistics
121
+ - Lakehouse catalog, partitioning, and storage backends
122
+ - ML framework: trainer, model registry, drift detection, serving
123
+ - Warehouse transforms and persistent lineage tracking
124
+ - JWT authentication middleware
125
+ - Rate limiting middleware
126
+ - Cursor-based pagination utilities
127
+ - Versioned API router (`/api/v1/`)
128
+
129
+ [Unreleased]: https://github.com/TheDataEngineX/DEX/compare/v0.4.11...HEAD
130
+ [0.4.11]: https://github.com/TheDataEngineX/DEX/compare/v0.4.10...v0.4.11
131
+ [0.4.10]: https://github.com/TheDataEngineX/DEX/compare/v0.4.8...v0.4.10
132
+ [0.4.8]: https://github.com/TheDataEngineX/DEX/compare/v0.4.6...v0.4.8
133
+ [0.4.6]: https://github.com/TheDataEngineX/DEX/compare/v0.4.5...v0.4.6
134
+ [0.4.5]: https://github.com/TheDataEngineX/DEX/compare/v0.4.3...v0.4.5
135
+ [0.4.3]: https://github.com/TheDataEngineX/DEX/compare/v0.4.1...v0.4.3
136
+ [0.4.1]: https://github.com/TheDataEngineX/DEX/compare/v0.4.0...v0.4.1
137
+ [0.4.0]: https://github.com/TheDataEngineX/DEX/compare/v0.3.5...v0.4.0
138
+ [0.3.5]: https://github.com/TheDataEngineX/DEX/releases/tag/v0.3.5
@@ -0,0 +1,71 @@
1
+ Metadata-Version: 2.4
2
+ Name: dataenginex
3
+ Version: 0.4.11
4
+ Summary: DataEngineX - Core framework for data engineering projects
5
+ Author-email: Jay <jayapal.myaka99@gmail.com>
6
+ License: MIT
7
+ Requires-Python: >=3.11
8
+ Requires-Dist: email-validator>=2.0.0
9
+ Requires-Dist: fastapi>=0.128.4
10
+ Requires-Dist: httpx>=0.28.0
11
+ Requires-Dist: loguru>=0.7.3
12
+ Requires-Dist: opentelemetry-api>=1.39.0
13
+ Requires-Dist: opentelemetry-exporter-otlp>=1.39.0
14
+ Requires-Dist: opentelemetry-instrumentation-fastapi>=0.60b1
15
+ Requires-Dist: opentelemetry-sdk>=1.39.0
16
+ Requires-Dist: prometheus-client>=0.24.0
17
+ Requires-Dist: python-dotenv>=1.2.0
18
+ Requires-Dist: python-json-logger>=4.0.0
19
+ Requires-Dist: pyyaml>=6.0.2
20
+ Requires-Dist: structlog>=25.5.0
21
+ Requires-Dist: uvicorn>=0.40.0
22
+ Provides-Extra: all
23
+ Requires-Dist: boto3>=1.35.0; extra == 'all'
24
+ Requires-Dist: google-cloud-storage>=2.18.0; extra == 'all'
25
+ Requires-Dist: pyarrow>=18.0.0; extra == 'all'
26
+ Provides-Extra: cloud
27
+ Requires-Dist: boto3>=1.35.0; extra == 'cloud'
28
+ Requires-Dist: google-cloud-storage>=2.18.0; extra == 'cloud'
29
+ Provides-Extra: gcs
30
+ Requires-Dist: google-cloud-storage>=2.18.0; extra == 'gcs'
31
+ Provides-Extra: parquet
32
+ Requires-Dist: pyarrow>=18.0.0; extra == 'parquet'
33
+ Provides-Extra: s3
34
+ Requires-Dist: boto3>=1.35.0; extra == 's3'
35
+ Description-Content-Type: text/markdown
36
+
37
+ # dataenginex
38
+
39
+ `dataenginex` is the core DataEngineX framework package for building observable, production-ready data and API services.
40
+
41
+ It provides:
42
+ - FastAPI application primitives and API extensions
43
+ - Middleware for structured logging, metrics, and tracing
44
+ - Data quality and validation utilities
45
+ - Lakehouse and warehouse building blocks
46
+ - Reusable ML support modules for model-serving workflows
47
+
48
+ ## Install
49
+
50
+ ```bash
51
+ pip install dataenginex
52
+ ```
53
+
54
+ ## Package Scope
55
+
56
+ This package is the core library from the DEX monorepo.
57
+ `careerdex` and `weatherdex` are maintained in the same repository but are not part of this package release flow.
58
+
59
+ ## Quick Usage
60
+
61
+ ```python
62
+ from dataenginex import __version__
63
+
64
+ print(__version__)
65
+ ```
66
+
67
+ ## Source and Docs
68
+
69
+ - Repository: https://github.com/TheDataEngineX/DEX
70
+ - CI/CD guide: `docs/CI_CD.md`
71
+ - Release notes: `packages/dataenginex/src/dataenginex/RELEASE_NOTES.md`
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "dataenginex"
3
- version = "0.3.4"
3
+ version = "0.4.11"
4
4
  description = "DataEngineX - Core framework for data engineering projects"
5
5
  authors = [
6
6
  {name = "Jay", email = "jayapal.myaka99@gmail.com"}
@@ -25,13 +25,23 @@ dependencies = [
25
25
  "httpx>=0.28.0",
26
26
  ]
27
27
 
28
- [build-system]
29
- requires = ["poetry-core>=2.0.0,<3.0.0"]
30
- build-backend = "poetry.core.masonry.api"
28
+ [project.optional-dependencies]
29
+ s3 = ["boto3>=1.35.0"]
30
+ gcs = ["google-cloud-storage>=2.18.0"]
31
+ cloud = [
32
+ "boto3>=1.35.0",
33
+ "google-cloud-storage>=2.18.0",
34
+ ]
35
+ parquet = ["pyarrow>=18.0.0"]
36
+ all = [
37
+ "boto3>=1.35.0",
38
+ "google-cloud-storage>=2.18.0",
39
+ "pyarrow>=18.0.0",
40
+ ]
31
41
 
32
- [tool.poetry]
33
- package-mode = true
42
+ [build-system]
43
+ requires = ["hatchling>=1.25.0"]
44
+ build-backend = "hatchling.build"
34
45
 
35
- [[tool.poetry.packages]]
36
- include = "dataenginex"
37
- from = "src"
46
+ [tool.hatch.build.targets.wheel]
47
+ packages = ["src/dataenginex"]
@@ -30,6 +30,6 @@ print(__version__)
30
30
 
31
31
  ## Source and Docs
32
32
 
33
- - Repository: https://github.com/data-literate/DEX
33
+ - Repository: https://github.com/TheDataEngineX/DEX
34
34
  - CI/CD guide: `docs/CI_CD.md`
35
35
  - Release notes: `packages/dataenginex/src/dataenginex/RELEASE_NOTES.md`
@@ -3,6 +3,16 @@
3
3
  This document tracks published package releases for `dataenginex` only.
4
4
  Only include changes that modify files under `packages/dataenginex/src/dataenginex/**`.
5
5
 
6
+ ## v0.3.5 - 2026-02-20
7
+
8
+ - Released package version `0.3.5`.
9
+ - Tag: `v0.3.5`
10
+ - Release title: `Release v0.3.5`
11
+ - Changes in this release:
12
+ - Hardened `PyPI Publish` workflow for trusted publishing.
13
+ - Added release-only publish gating and build-only behavior for manual dispatch.
14
+ - Switched publish job environment names to repo vars for stricter workflow validation compatibility.
15
+
6
16
  ## v0.3.4 - 2026-02-20
7
17
 
8
18
  - Released package version `0.3.4`.
@@ -0,0 +1,34 @@
1
+ """
2
+ DataEngineX (DEX) — Core framework for data engineering projects.
3
+
4
+ Public API surface. Import from top-level or from subpackages:
5
+
6
+ from dataenginex import __version__
7
+ from dataenginex.api import HealthChecker, HealthStatus
8
+ from dataenginex.core import MedallionArchitecture, DataLayer
9
+ from dataenginex.data import DataConnector, DataProfiler, SchemaRegistry
10
+ from dataenginex.lakehouse import DataCatalog, ParquetStorage
11
+ from dataenginex.middleware import configure_logging, configure_tracing
12
+ from dataenginex.ml import ModelRegistry, SklearnTrainer, DriftDetector
13
+ from dataenginex.warehouse import PersistentLineage, TransformPipeline
14
+
15
+ Submodules:
16
+ api – FastAPI application, health checks, error handling, pagination
17
+ core – Schemas, validators, medallion architecture, pipeline config
18
+ data – Data connectors, profiler, schema registry
19
+ lakehouse – Storage backends, data catalog, partitioning
20
+ middleware – Logging, metrics, tracing, request middleware
21
+ ml – ML training, model registry, drift detection, serving
22
+ warehouse – Transforms, persistent lineage tracking
23
+ """
24
+
25
+ from __future__ import annotations
26
+
27
+ from importlib.metadata import PackageNotFoundError, version
28
+
29
+ try:
30
+ __version__ = version("dataenginex")
31
+ except PackageNotFoundError:
32
+ __version__ = "0.4.11"
33
+
34
+ __all__ = ["__version__"]
@@ -0,0 +1,54 @@
1
+ """Reusable API components — auth, health, errors, pagination, rate limiting, quality.
2
+
3
+ Public API::
4
+
5
+ from dataenginex.api import (
6
+ HealthChecker, HealthStatus, ComponentHealth,
7
+ APIHTTPException, BadRequestError, NotFoundError, ServiceUnavailableError,
8
+ PaginatedResponse, paginate,
9
+ AuthMiddleware, AuthUser, create_token, decode_token,
10
+ RateLimiter, RateLimitMiddleware,
11
+ get_quality_store, set_quality_store,
12
+ )
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ from .auth import AuthMiddleware, AuthUser, create_token, decode_token
18
+ from .errors import (
19
+ APIHTTPException,
20
+ BadRequestError,
21
+ NotFoundError,
22
+ ServiceUnavailableError,
23
+ )
24
+ from .health import ComponentHealth, HealthChecker, HealthStatus
25
+ from .pagination import PaginatedResponse, PaginationMeta, paginate
26
+ from .rate_limit import RateLimiter, RateLimitMiddleware
27
+ from .routers.v1 import get_quality_store, set_quality_store
28
+
29
+ __all__ = [
30
+ # Auth
31
+ "AuthMiddleware",
32
+ "AuthUser",
33
+ "create_token",
34
+ "decode_token",
35
+ # Errors
36
+ "APIHTTPException",
37
+ "BadRequestError",
38
+ "NotFoundError",
39
+ "ServiceUnavailableError",
40
+ # Health
41
+ "ComponentHealth",
42
+ "HealthChecker",
43
+ "HealthStatus",
44
+ # Pagination
45
+ "PaginatedResponse",
46
+ "PaginationMeta",
47
+ "paginate",
48
+ # Quality store
49
+ "get_quality_store",
50
+ "set_quality_store",
51
+ # Rate limiting
52
+ "RateLimiter",
53
+ "RateLimitMiddleware",
54
+ ]
@@ -22,6 +22,15 @@ class HealthStatus(StrEnum):
22
22
 
23
23
  @dataclass(frozen=True)
24
24
  class ComponentHealth:
25
+ """Health status of a single dependency component.
26
+
27
+ Attributes:
28
+ name: Component identifier (e.g. ``"database"``, ``"cache"``).
29
+ status: Current health status.
30
+ message: Optional human-readable message.
31
+ duration_ms: Time taken for the health check in milliseconds.
32
+ """
33
+
25
34
  name: str
26
35
  status: HealthStatus
27
36
  message: str | None = None
@@ -129,5 +138,5 @@ class HealthChecker:
129
138
  asyncio.open_connection(host, port), timeout=self.timeout_seconds
130
139
  )
131
140
  return True, "reachable"
132
- except (TimeoutError, OSError) as exc:
141
+ except (TimeoutError, ConnectionRefusedError, OSError) as exc:
133
142
  return False, f"error={exc.__class__.__name__}"
@@ -0,0 +1,3 @@
1
+ """API routers package."""
2
+
3
+ from __future__ import annotations
@@ -12,9 +12,25 @@ from typing import Any
12
12
  from fastapi import APIRouter
13
13
 
14
14
  from dataenginex.api.pagination import PaginatedResponse, paginate
15
+ from dataenginex.core.quality import QualityStore
15
16
 
16
17
  router = APIRouter(prefix="/api/v1", tags=["v1"])
17
18
 
19
+ # Module-level quality store — shared across requests.
20
+ # Populate via ``set_quality_store()`` from application startup.
21
+ _quality_store: QualityStore = QualityStore()
22
+
23
+
24
+ def set_quality_store(store: QualityStore) -> None:
25
+ """Replace the module-level quality store (call at app startup)."""
26
+ global _quality_store # noqa: PLW0603
27
+ _quality_store = store
28
+
29
+
30
+ def get_quality_store() -> QualityStore:
31
+ """Return the active quality store."""
32
+ return _quality_store
33
+
18
34
 
19
35
  # ---------------------------------------------------------------------------
20
36
  # Data pipeline endpoints
@@ -35,28 +51,28 @@ def list_data_sources(cursor: str | None = None, limit: int = 20) -> PaginatedRe
35
51
 
36
52
  @router.get("/data/quality")
37
53
  def data_quality_summary() -> dict[str, Any]:
38
- """Return a summary of data quality metrics.
54
+ """Return a summary of data quality metrics from the quality store.
55
+
56
+ Returns live metrics when a ``QualityStore`` has been populated via
57
+ ``QualityGate.evaluate()``. Falls back to zeros when no evaluations
58
+ have been recorded yet.
59
+ """
60
+ return _quality_store.summary()
61
+
62
+
63
+ @router.get("/data/quality/{layer}")
64
+ def data_quality_layer(layer: str, limit: int = 10) -> dict[str, Any]:
65
+ """Return quality history for a specific medallion layer.
39
66
 
40
- .. note::
41
- Scores are **placeholder values** until the quality-tracking
42
- subsystem is wired in. See Issue backlog for live metrics epic.
67
+ Args:
68
+ layer: One of ``bronze``, ``silver``, ``gold``.
69
+ limit: Maximum number of history entries to return.
43
70
  """
44
- # TODO(#future): Replace with live metrics from DataProfiler / quality store
71
+ latest = _quality_store.latest(layer)
45
72
  return {
46
- "overall_score": 0.0,
47
- "dimensions": {
48
- "completeness": 0.0,
49
- "accuracy": 0.0,
50
- "consistency": 0.0,
51
- "timeliness": 0.0,
52
- "uniqueness": 0.0,
53
- },
54
- "layer_scores": {
55
- "bronze": 0.0,
56
- "silver": 0.0,
57
- "gold": 0.0,
58
- },
59
- "_note": "Placeholder — connect quality store for live data",
73
+ "layer": layer,
74
+ "latest": latest.to_dict() if latest else None,
75
+ "history": _quality_store.history(layer, limit=limit),
60
76
  }
61
77
 
62
78
 
@@ -0,0 +1,81 @@
1
+ """Core framework — schemas, validators, medallion architecture, pipeline config, quality.
2
+
3
+ Public API::
4
+
5
+ from dataenginex.core import (
6
+ # Medallion
7
+ MedallionArchitecture, DataLayer, StorageFormat, LayerConfiguration,
8
+ # Pipeline
9
+ PipelineConfig, PipelineMetrics,
10
+ # Quality
11
+ QualityGate, QualityStore, QualityResult, QualityDimension,
12
+ # Schemas
13
+ JobPosting, JobSourceEnum, UserProfile,
14
+ ErrorDetail, ErrorResponse, RootResponse, HealthResponse,
15
+ DataQualityReport, PipelineExecutionMetadata,
16
+ # Validators
17
+ SchemaValidator, DataQualityChecks, DataHash,
18
+ QualityScorer, ValidationReport,
19
+ )
20
+ """
21
+
22
+ from __future__ import annotations
23
+
24
+ from .medallion_architecture import (
25
+ DataLayer,
26
+ LayerConfiguration,
27
+ MedallionArchitecture,
28
+ StorageFormat,
29
+ )
30
+ from .pipeline_config import PipelineConfig, PipelineMetrics
31
+ from .quality import QualityDimension, QualityGate, QualityResult, QualityStore
32
+ from .schemas import (
33
+ DataQualityReport,
34
+ ErrorDetail,
35
+ ErrorResponse,
36
+ HealthResponse,
37
+ JobPosting,
38
+ JobSourceEnum,
39
+ PipelineExecutionMetadata,
40
+ RootResponse,
41
+ UserProfile,
42
+ )
43
+ from .validators import (
44
+ DataHash,
45
+ DataQualityChecks,
46
+ QualityScorer,
47
+ SchemaValidator,
48
+ ValidationReport,
49
+ )
50
+
51
+ __all__ = [
52
+ # Medallion architecture
53
+ "DataLayer",
54
+ "LayerConfiguration",
55
+ "MedallionArchitecture",
56
+ "StorageFormat",
57
+ # Pipeline
58
+ "PipelineConfig",
59
+ "PipelineMetrics",
60
+ # Quality gate
61
+ "QualityDimension",
62
+ "QualityGate",
63
+ "QualityResult",
64
+ "QualityStore",
65
+ # Schemas
66
+ "DataQualityReport",
67
+ "ErrorDetail",
68
+ "ErrorResponse",
69
+ "HealthResponse",
70
+ "JobPosting",
71
+ "JobSourceEnum",
72
+ "PipelineExecutionMetadata",
73
+ "RootResponse",
74
+ "UserProfile",
75
+ # Validators
76
+ "DataHash",
77
+ "DataQualityChecks",
78
+ "QualityScorer",
79
+ "SchemaValidator",
80
+ "ValidationReport",
81
+ ]