pycharter 0.0.25__py3-none-any.whl → 0.0.26__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pycharter/__init__.py +6 -0
- pycharter/api/README.md +1 -1
- pycharter/api/dependencies/auth.py +158 -0
- pycharter/api/main.py +30 -2
- pycharter/api/models/etl.py +66 -0
- pycharter/api/routes/v1/__init__.py +4 -0
- pycharter/api/routes/v1/auth.py +97 -0
- pycharter/api/routes/v1/contracts.py +10 -8
- pycharter/api/routes/v1/etl.py +131 -0
- pycharter/cli.py +1 -1
- pycharter/config.py +69 -0
- pycharter/contract_builder/builder.py +32 -37
- pycharter/data/seed/compliance_frameworks.yaml +22 -0
- pycharter/data/seed/contracts.yaml +130 -0
- pycharter/data/seed/data_feeds.yaml +22 -0
- pycharter/data/seed/domains.yaml +13 -0
- pycharter/data/seed/environments.yaml +19 -0
- pycharter/data/seed/owners.yaml +21 -0
- pycharter/data/seed/systems.yaml +13 -0
- pycharter/data/seed/tags.yaml +25 -0
- pycharter/data/templates/contract/README.md +31 -14
- pycharter/data/templates/contract/template_contract.yaml +37 -0
- pycharter/data/templates/etl/README.md +1 -1
- pycharter/data/templates/etl/extract_with_validation.yaml +86 -0
- pycharter/data/templates/etl/load_with_validation.yaml +111 -0
- pycharter/data/templates/etl/settings.yaml +55 -0
- pycharter/db/cli.py +126 -4
- pycharter/db/migrations/versions/20260122000000_change_artifact_unique_constraints_to_title_version.py +2 -2
- pycharter/etl_generator/INTERFACES.md +6 -7
- pycharter/etl_generator/__init__.py +47 -11
- pycharter/etl_generator/config_models.py +673 -0
- pycharter/etl_generator/config_validator.py +133 -157
- pycharter/etl_generator/context.py +3 -0
- pycharter/etl_generator/database.py +5 -1
- pycharter/etl_generator/extractors/__init__.py +4 -2
- pycharter/etl_generator/extractors/cloud_storage.py +9 -9
- pycharter/etl_generator/extractors/database.py +2 -2
- pycharter/etl_generator/extractors/factory.py +15 -33
- pycharter/etl_generator/extractors/file.py +2 -2
- pycharter/etl_generator/extractors/http.py +2 -2
- pycharter/etl_generator/extractors/mongodb.py +393 -0
- pycharter/etl_generator/extractors/streaming.py +2 -2
- pycharter/etl_generator/loaders/__init__.py +15 -9
- pycharter/etl_generator/loaders/{cloud_storage_loader.py → cloud_storage.py} +95 -2
- pycharter/etl_generator/loaders/factory.py +16 -29
- pycharter/etl_generator/loaders/file.py +135 -1
- pycharter/etl_generator/loaders/mongodb.py +416 -0
- pycharter/etl_generator/pipeline.py +283 -164
- pycharter/etl_generator/result.py +16 -0
- pycharter/etl_generator/schemas/__init__.py +71 -42
- pycharter/etl_generator/transformers/config.py +3 -2
- pycharter/etl_generator/transformers/simple_operations.py +57 -4
- pycharter/etl_generator/validation.py +551 -0
- pycharter/runtime_validator/__init__.py +7 -0
- pycharter/runtime_validator/utils.py +33 -0
- pycharter/runtime_validator/validator.py +13 -10
- pycharter/ui/package-lock.json +50 -41
- pycharter/ui/package.json +2 -1
- pycharter/ui/static/404/index.html +1 -1
- pycharter/ui/static/404.html +1 -1
- pycharter/ui/static/__next.__PAGE__.txt +2 -2
- pycharter/ui/static/__next._full.txt +7 -7
- pycharter/ui/static/__next._head.txt +1 -1
- pycharter/ui/static/__next._index.txt +6 -6
- pycharter/ui/static/__next._tree.txt +2 -2
- pycharter/ui/static/_next/static/chunks/0fc1f70b787b8845.js +1 -0
- pycharter/ui/static/_next/static/chunks/17bb8075d7b75663.css +1 -0
- pycharter/ui/static/_next/static/chunks/381932864dcbfdb8.js +1 -0
- pycharter/ui/static/_next/static/chunks/4c951b8e4507e2b3.js +1 -0
- pycharter/ui/static/_next/static/chunks/68b87a6f65abd3ed.js +1 -0
- pycharter/ui/static/_next/static/chunks/78572617b8fae189.js +1 -0
- pycharter/ui/static/_next/static/chunks/8b7be2803e3fe184.js +1 -0
- pycharter/ui/static/_next/static/chunks/a8e529fd1e67f121.js +1 -0
- pycharter/ui/static/_next/static/chunks/c35d998f80be3ff5.js +1 -0
- pycharter/ui/static/_next/static/chunks/e453aa5d01c32c17.js +1 -0
- pycharter/ui/static/_next/static/chunks/f2d240eb057f898a.js +970 -0
- pycharter/ui/static/_next/static/chunks/f7722448f6040846.js +1 -0
- pycharter/ui/static/_not-found/__next._full.txt +12 -12
- pycharter/ui/static/_not-found/__next._head.txt +3 -3
- pycharter/ui/static/_not-found/__next._index.txt +8 -8
- pycharter/ui/static/_not-found/__next._not-found.__PAGE__.txt +2 -2
- pycharter/ui/static/_not-found/__next._not-found.txt +3 -3
- pycharter/ui/static/_not-found/__next._tree.txt +2 -2
- pycharter/ui/static/_not-found/index.html +1 -1
- pycharter/ui/static/_not-found/index.txt +12 -12
- pycharter/ui/static/contracts/__next._full.txt +7 -7
- pycharter/ui/static/contracts/__next._head.txt +1 -1
- pycharter/ui/static/contracts/__next._index.txt +6 -6
- pycharter/ui/static/contracts/__next._tree.txt +2 -2
- pycharter/ui/static/contracts/__next.contracts.__PAGE__.txt +2 -2
- pycharter/ui/static/contracts/__next.contracts.txt +1 -1
- pycharter/ui/static/contracts/index.html +1 -1
- pycharter/ui/static/contracts/index.txt +7 -7
- pycharter/ui/static/documentation/__next._full.txt +7 -7
- pycharter/ui/static/documentation/__next._head.txt +1 -1
- pycharter/ui/static/documentation/__next._index.txt +6 -6
- pycharter/ui/static/documentation/__next._tree.txt +2 -2
- pycharter/ui/static/documentation/__next.documentation.__PAGE__.txt +2 -2
- pycharter/ui/static/documentation/__next.documentation.txt +1 -1
- pycharter/ui/static/documentation/index.html +3 -3
- pycharter/ui/static/documentation/index.txt +7 -7
- pycharter/ui/static/etl/__next._full.txt +21 -0
- pycharter/ui/static/etl/__next._head.txt +7 -0
- pycharter/ui/static/etl/__next._index.txt +9 -0
- pycharter/ui/static/etl/__next._tree.txt +2 -0
- pycharter/ui/static/etl/__next.etl.__PAGE__.txt +9 -0
- pycharter/ui/static/etl/__next.etl.txt +4 -0
- pycharter/ui/static/etl/index.html +2 -0
- pycharter/ui/static/etl/index.txt +21 -0
- pycharter/ui/static/index.html +1 -1
- pycharter/ui/static/index.txt +7 -7
- pycharter/ui/static/metadata/__next._full.txt +7 -7
- pycharter/ui/static/metadata/__next._head.txt +1 -1
- pycharter/ui/static/metadata/__next._index.txt +6 -6
- pycharter/ui/static/metadata/__next._tree.txt +2 -2
- pycharter/ui/static/metadata/__next.metadata.__PAGE__.txt +2 -2
- pycharter/ui/static/metadata/__next.metadata.txt +1 -1
- pycharter/ui/static/metadata/index.html +1 -1
- pycharter/ui/static/metadata/index.txt +7 -7
- pycharter/ui/static/quality/__next._full.txt +7 -7
- pycharter/ui/static/quality/__next._head.txt +1 -1
- pycharter/ui/static/quality/__next._index.txt +6 -6
- pycharter/ui/static/quality/__next._tree.txt +2 -2
- pycharter/ui/static/quality/__next.quality.__PAGE__.txt +2 -2
- pycharter/ui/static/quality/__next.quality.txt +1 -1
- pycharter/ui/static/quality/index.html +2 -2
- pycharter/ui/static/quality/index.txt +7 -7
- pycharter/ui/static/rules/__next._full.txt +7 -7
- pycharter/ui/static/rules/__next._head.txt +1 -1
- pycharter/ui/static/rules/__next._index.txt +6 -6
- pycharter/ui/static/rules/__next._tree.txt +2 -2
- pycharter/ui/static/rules/__next.rules.__PAGE__.txt +2 -2
- pycharter/ui/static/rules/__next.rules.txt +1 -1
- pycharter/ui/static/rules/index.html +1 -1
- pycharter/ui/static/rules/index.txt +7 -7
- pycharter/ui/static/schemas/__next._full.txt +7 -7
- pycharter/ui/static/schemas/__next._head.txt +1 -1
- pycharter/ui/static/schemas/__next._index.txt +6 -6
- pycharter/ui/static/schemas/__next._tree.txt +2 -2
- pycharter/ui/static/schemas/__next.schemas.__PAGE__.txt +2 -2
- pycharter/ui/static/schemas/__next.schemas.txt +1 -1
- pycharter/ui/static/schemas/index.html +1 -1
- pycharter/ui/static/schemas/index.txt +7 -7
- pycharter/ui/static/settings/__next._full.txt +7 -7
- pycharter/ui/static/settings/__next._head.txt +1 -1
- pycharter/ui/static/settings/__next._index.txt +6 -6
- pycharter/ui/static/settings/__next._tree.txt +2 -2
- pycharter/ui/static/settings/__next.settings.__PAGE__.txt +2 -2
- pycharter/ui/static/settings/__next.settings.txt +1 -1
- pycharter/ui/static/settings/index.html +1 -1
- pycharter/ui/static/settings/index.txt +7 -7
- pycharter/ui/static/static/404/index.html +1 -1
- pycharter/ui/static/static/404.html +1 -1
- pycharter/ui/static/static/__next.__PAGE__.txt +1 -1
- pycharter/ui/static/static/__next._full.txt +1 -1
- pycharter/ui/static/static/__next._head.txt +1 -1
- pycharter/ui/static/static/__next._index.txt +1 -1
- pycharter/ui/static/static/__next._tree.txt +1 -1
- pycharter/ui/static/static/_not-found/__next._full.txt +1 -1
- pycharter/ui/static/static/_not-found/__next._head.txt +1 -1
- pycharter/ui/static/static/_not-found/__next._index.txt +1 -1
- pycharter/ui/static/static/_not-found/__next._not-found.__PAGE__.txt +1 -1
- pycharter/ui/static/static/_not-found/__next._not-found.txt +1 -1
- pycharter/ui/static/static/_not-found/__next._tree.txt +1 -1
- pycharter/ui/static/static/_not-found/index.html +1 -1
- pycharter/ui/static/static/_not-found/index.txt +1 -1
- pycharter/ui/static/static/contracts/__next._full.txt +2 -2
- pycharter/ui/static/static/contracts/__next._head.txt +1 -1
- pycharter/ui/static/static/contracts/__next._index.txt +1 -1
- pycharter/ui/static/static/contracts/__next._tree.txt +1 -1
- pycharter/ui/static/static/contracts/__next.contracts.__PAGE__.txt +2 -2
- pycharter/ui/static/static/contracts/__next.contracts.txt +1 -1
- pycharter/ui/static/static/contracts/index.html +1 -1
- pycharter/ui/static/static/contracts/index.txt +2 -2
- pycharter/ui/static/static/documentation/__next._full.txt +1 -1
- pycharter/ui/static/static/documentation/__next._head.txt +1 -1
- pycharter/ui/static/static/documentation/__next._index.txt +1 -1
- pycharter/ui/static/static/documentation/__next._tree.txt +1 -1
- pycharter/ui/static/static/documentation/__next.documentation.__PAGE__.txt +1 -1
- pycharter/ui/static/static/documentation/__next.documentation.txt +1 -1
- pycharter/ui/static/static/documentation/index.html +2 -2
- pycharter/ui/static/static/documentation/index.txt +1 -1
- pycharter/ui/static/static/index.html +1 -1
- pycharter/ui/static/static/index.txt +1 -1
- pycharter/ui/static/static/metadata/__next._full.txt +1 -1
- pycharter/ui/static/static/metadata/__next._head.txt +1 -1
- pycharter/ui/static/static/metadata/__next._index.txt +1 -1
- pycharter/ui/static/static/metadata/__next._tree.txt +1 -1
- pycharter/ui/static/static/metadata/__next.metadata.__PAGE__.txt +1 -1
- pycharter/ui/static/static/metadata/__next.metadata.txt +1 -1
- pycharter/ui/static/static/metadata/index.html +1 -1
- pycharter/ui/static/static/metadata/index.txt +1 -1
- pycharter/ui/static/static/quality/__next._full.txt +2 -2
- pycharter/ui/static/static/quality/__next._head.txt +1 -1
- pycharter/ui/static/static/quality/__next._index.txt +1 -1
- pycharter/ui/static/static/quality/__next._tree.txt +1 -1
- pycharter/ui/static/static/quality/__next.quality.__PAGE__.txt +2 -2
- pycharter/ui/static/static/quality/__next.quality.txt +1 -1
- pycharter/ui/static/static/quality/index.html +2 -2
- pycharter/ui/static/static/quality/index.txt +2 -2
- pycharter/ui/static/static/rules/__next._full.txt +1 -1
- pycharter/ui/static/static/rules/__next._head.txt +1 -1
- pycharter/ui/static/static/rules/__next._index.txt +1 -1
- pycharter/ui/static/static/rules/__next._tree.txt +1 -1
- pycharter/ui/static/static/rules/__next.rules.__PAGE__.txt +1 -1
- pycharter/ui/static/static/rules/__next.rules.txt +1 -1
- pycharter/ui/static/static/rules/index.html +1 -1
- pycharter/ui/static/static/rules/index.txt +1 -1
- pycharter/ui/static/static/schemas/__next._full.txt +1 -1
- pycharter/ui/static/static/schemas/__next._head.txt +1 -1
- pycharter/ui/static/static/schemas/__next._index.txt +1 -1
- pycharter/ui/static/static/schemas/__next._tree.txt +1 -1
- pycharter/ui/static/static/schemas/__next.schemas.__PAGE__.txt +1 -1
- pycharter/ui/static/static/schemas/__next.schemas.txt +1 -1
- pycharter/ui/static/static/schemas/index.html +1 -1
- pycharter/ui/static/static/schemas/index.txt +1 -1
- pycharter/ui/static/static/settings/__next._full.txt +1 -1
- pycharter/ui/static/static/settings/__next._head.txt +1 -1
- pycharter/ui/static/static/settings/__next._index.txt +1 -1
- pycharter/ui/static/static/settings/__next._tree.txt +1 -1
- pycharter/ui/static/static/settings/__next.settings.__PAGE__.txt +1 -1
- pycharter/ui/static/static/settings/__next.settings.txt +1 -1
- pycharter/ui/static/static/settings/index.html +1 -1
- pycharter/ui/static/static/settings/index.txt +1 -1
- pycharter/ui/static/static/static/404/index.html +1 -1
- pycharter/ui/static/static/static/404.html +1 -1
- pycharter/ui/static/static/static/__next.__PAGE__.txt +1 -1
- pycharter/ui/static/static/static/__next._full.txt +2 -2
- pycharter/ui/static/static/static/__next._head.txt +1 -1
- pycharter/ui/static/static/static/__next._index.txt +2 -2
- pycharter/ui/static/static/static/__next._tree.txt +2 -2
- pycharter/ui/static/static/static/_next/static/chunks/f7d1a90dd75d2572.js +1 -0
- pycharter/ui/static/static/static/_not-found/__next._full.txt +2 -2
- pycharter/ui/static/static/static/_not-found/__next._head.txt +1 -1
- pycharter/ui/static/static/static/_not-found/__next._index.txt +2 -2
- pycharter/ui/static/static/static/_not-found/__next._not-found.__PAGE__.txt +1 -1
- pycharter/ui/static/static/static/_not-found/__next._not-found.txt +1 -1
- pycharter/ui/static/static/static/_not-found/__next._tree.txt +2 -2
- pycharter/ui/static/static/static/_not-found/index.html +1 -1
- pycharter/ui/static/static/static/_not-found/index.txt +2 -2
- pycharter/ui/static/static/static/contracts/__next._full.txt +3 -3
- pycharter/ui/static/static/static/contracts/__next._head.txt +1 -1
- pycharter/ui/static/static/static/contracts/__next._index.txt +2 -2
- pycharter/ui/static/static/static/contracts/__next._tree.txt +2 -2
- pycharter/ui/static/static/static/contracts/__next.contracts.__PAGE__.txt +2 -2
- pycharter/ui/static/static/static/contracts/__next.contracts.txt +1 -1
- pycharter/ui/static/static/static/contracts/index.html +1 -1
- pycharter/ui/static/static/static/contracts/index.txt +3 -3
- pycharter/ui/static/static/static/documentation/__next._full.txt +3 -3
- pycharter/ui/static/static/static/documentation/__next._head.txt +1 -1
- pycharter/ui/static/static/static/documentation/__next._index.txt +2 -2
- pycharter/ui/static/static/static/documentation/__next._tree.txt +2 -2
- pycharter/ui/static/static/static/documentation/__next.documentation.__PAGE__.txt +2 -2
- pycharter/ui/static/static/static/documentation/__next.documentation.txt +1 -1
- pycharter/ui/static/static/static/documentation/index.html +2 -2
- pycharter/ui/static/static/static/documentation/index.txt +3 -3
- pycharter/ui/static/static/static/index.html +1 -1
- pycharter/ui/static/static/static/index.txt +2 -2
- pycharter/ui/static/static/static/metadata/__next._full.txt +2 -2
- pycharter/ui/static/static/static/metadata/__next._head.txt +1 -1
- pycharter/ui/static/static/static/metadata/__next._index.txt +2 -2
- pycharter/ui/static/static/static/metadata/__next._tree.txt +2 -2
- pycharter/ui/static/static/static/metadata/__next.metadata.__PAGE__.txt +1 -1
- pycharter/ui/static/static/static/metadata/__next.metadata.txt +1 -1
- pycharter/ui/static/static/static/metadata/index.html +1 -1
- pycharter/ui/static/static/static/metadata/index.txt +2 -2
- pycharter/ui/static/static/static/quality/__next._full.txt +2 -2
- pycharter/ui/static/static/static/quality/__next._head.txt +1 -1
- pycharter/ui/static/static/static/quality/__next._index.txt +2 -2
- pycharter/ui/static/static/static/quality/__next._tree.txt +2 -2
- pycharter/ui/static/static/static/quality/__next.quality.__PAGE__.txt +1 -1
- pycharter/ui/static/static/static/quality/__next.quality.txt +1 -1
- pycharter/ui/static/static/static/quality/index.html +2 -2
- pycharter/ui/static/static/static/quality/index.txt +2 -2
- pycharter/ui/static/static/static/rules/__next._full.txt +2 -2
- pycharter/ui/static/static/static/rules/__next._head.txt +1 -1
- pycharter/ui/static/static/static/rules/__next._index.txt +2 -2
- pycharter/ui/static/static/static/rules/__next._tree.txt +2 -2
- pycharter/ui/static/static/static/rules/__next.rules.__PAGE__.txt +1 -1
- pycharter/ui/static/static/static/rules/__next.rules.txt +1 -1
- pycharter/ui/static/static/static/rules/index.html +1 -1
- pycharter/ui/static/static/static/rules/index.txt +2 -2
- pycharter/ui/static/static/static/schemas/__next._full.txt +2 -2
- pycharter/ui/static/static/static/schemas/__next._head.txt +1 -1
- pycharter/ui/static/static/static/schemas/__next._index.txt +2 -2
- pycharter/ui/static/static/static/schemas/__next._tree.txt +2 -2
- pycharter/ui/static/static/static/schemas/__next.schemas.__PAGE__.txt +1 -1
- pycharter/ui/static/static/static/schemas/__next.schemas.txt +1 -1
- pycharter/ui/static/static/static/schemas/index.html +1 -1
- pycharter/ui/static/static/static/schemas/index.txt +2 -2
- pycharter/ui/static/static/static/settings/__next._full.txt +2 -2
- pycharter/ui/static/static/static/settings/__next._head.txt +1 -1
- pycharter/ui/static/static/static/settings/__next._index.txt +2 -2
- pycharter/ui/static/static/static/settings/__next._tree.txt +2 -2
- pycharter/ui/static/static/static/settings/__next.settings.__PAGE__.txt +1 -1
- pycharter/ui/static/static/static/settings/__next.settings.txt +1 -1
- pycharter/ui/static/static/static/settings/index.html +1 -1
- pycharter/ui/static/static/static/settings/index.txt +2 -2
- pycharter/ui/static/static/static/static/.gitkeep +0 -0
- pycharter/ui/static/static/static/static/404/index.html +1 -0
- pycharter/ui/static/static/static/static/404.html +1 -0
- pycharter/ui/static/static/static/static/__next.__PAGE__.txt +10 -0
- pycharter/ui/static/static/static/static/__next._full.txt +30 -0
- pycharter/ui/static/static/static/static/__next._head.txt +7 -0
- pycharter/ui/static/static/static/static/__next._index.txt +9 -0
- pycharter/ui/static/static/static/static/__next._tree.txt +2 -0
- pycharter/ui/static/static/static/static/_next/static/chunks/222442f6da32302a.js +1 -0
- pycharter/ui/static/static/static/static/_next/static/chunks/247eb132b7f7b574.js +1 -0
- pycharter/ui/static/static/static/static/_next/static/chunks/297d55555b71baba.js +1 -0
- pycharter/ui/static/static/static/static/_next/static/chunks/414e77373f8ff61c.js +1 -0
- pycharter/ui/static/static/static/static/_next/static/chunks/652ad0aa26265c47.js +2 -0
- pycharter/ui/static/static/static/static/_next/static/chunks/9c23f44fff36548a.js +1 -0
- pycharter/ui/static/static/static/static/_next/static/chunks/a6dad97d9634a72d.js +1 -0
- pycharter/ui/static/static/static/static/_next/static/chunks/b32a0963684b9933.js +4 -0
- pycharter/ui/static/static/static/static/_next/static/chunks/db913959c675cea6.js +1 -0
- pycharter/ui/static/static/static/static/_next/static/chunks/f2e7afeab1178138.js +1 -0
- pycharter/ui/static/static/static/static/_next/static/chunks/ff1a16fafef87110.js +1 -0
- pycharter/ui/static/static/static/static/_next/static/chunks/turbopack-ffcb7ab6794027ef.js +3 -0
- pycharter/ui/static/static/static/static/_next/static/tNTkVW6puVXC4bAm4WrHl/_buildManifest.js +11 -0
- pycharter/ui/static/static/static/static/_next/static/tNTkVW6puVXC4bAm4WrHl/_clientMiddlewareManifest.json +1 -0
- pycharter/ui/static/static/static/static/_next/static/tNTkVW6puVXC4bAm4WrHl/_ssgManifest.js +1 -0
- pycharter/ui/static/static/static/static/_not-found/__next._full.txt +17 -0
- pycharter/ui/static/static/static/static/_not-found/__next._head.txt +7 -0
- pycharter/ui/static/static/static/static/_not-found/__next._index.txt +9 -0
- pycharter/ui/static/static/static/static/_not-found/__next._not-found.__PAGE__.txt +5 -0
- pycharter/ui/static/static/static/static/_not-found/__next._not-found.txt +4 -0
- pycharter/ui/static/static/static/static/_not-found/__next._tree.txt +2 -0
- pycharter/ui/static/static/static/static/_not-found/index.html +1 -0
- pycharter/ui/static/static/static/static/_not-found/index.txt +17 -0
- pycharter/ui/static/static/static/static/contracts/__next._full.txt +21 -0
- pycharter/ui/static/static/static/static/contracts/__next._head.txt +7 -0
- pycharter/ui/static/static/static/static/contracts/__next._index.txt +9 -0
- pycharter/ui/static/static/static/static/contracts/__next._tree.txt +2 -0
- pycharter/ui/static/static/static/static/contracts/__next.contracts.__PAGE__.txt +9 -0
- pycharter/ui/static/static/static/static/contracts/__next.contracts.txt +4 -0
- pycharter/ui/static/static/static/static/contracts/index.html +1 -0
- pycharter/ui/static/static/static/static/contracts/index.txt +21 -0
- pycharter/ui/static/static/static/static/documentation/__next._full.txt +21 -0
- pycharter/ui/static/static/static/static/documentation/__next._head.txt +7 -0
- pycharter/ui/static/static/static/static/documentation/__next._index.txt +9 -0
- pycharter/ui/static/static/static/static/documentation/__next._tree.txt +2 -0
- pycharter/ui/static/static/static/static/documentation/__next.documentation.__PAGE__.txt +9 -0
- pycharter/ui/static/static/static/static/documentation/__next.documentation.txt +4 -0
- pycharter/ui/static/static/static/static/documentation/index.html +93 -0
- pycharter/ui/static/static/static/static/documentation/index.txt +21 -0
- pycharter/ui/static/static/static/static/index.html +1 -0
- pycharter/ui/static/static/static/static/index.txt +30 -0
- pycharter/ui/static/static/static/static/metadata/__next._full.txt +21 -0
- pycharter/ui/static/static/static/static/metadata/__next._head.txt +7 -0
- pycharter/ui/static/static/static/static/metadata/__next._index.txt +9 -0
- pycharter/ui/static/static/static/static/metadata/__next._tree.txt +2 -0
- pycharter/ui/static/static/static/static/metadata/__next.metadata.__PAGE__.txt +9 -0
- pycharter/ui/static/static/static/static/metadata/__next.metadata.txt +4 -0
- pycharter/ui/static/static/static/static/metadata/index.html +1 -0
- pycharter/ui/static/static/static/static/metadata/index.txt +21 -0
- pycharter/ui/static/static/static/static/quality/__next._full.txt +21 -0
- pycharter/ui/static/static/static/static/quality/__next._head.txt +7 -0
- pycharter/ui/static/static/static/static/quality/__next._index.txt +9 -0
- pycharter/ui/static/static/static/static/quality/__next._tree.txt +2 -0
- pycharter/ui/static/static/static/static/quality/__next.quality.__PAGE__.txt +9 -0
- pycharter/ui/static/static/static/static/quality/__next.quality.txt +4 -0
- pycharter/ui/static/static/static/static/quality/index.html +2 -0
- pycharter/ui/static/static/static/static/quality/index.txt +21 -0
- pycharter/ui/static/static/static/static/rules/__next._full.txt +21 -0
- pycharter/ui/static/static/static/static/rules/__next._head.txt +7 -0
- pycharter/ui/static/static/static/static/rules/__next._index.txt +9 -0
- pycharter/ui/static/static/static/static/rules/__next._tree.txt +2 -0
- pycharter/ui/static/static/static/static/rules/__next.rules.__PAGE__.txt +9 -0
- pycharter/ui/static/static/static/static/rules/__next.rules.txt +4 -0
- pycharter/ui/static/static/static/static/rules/index.html +1 -0
- pycharter/ui/static/static/static/static/rules/index.txt +21 -0
- pycharter/ui/static/static/static/static/schemas/__next._full.txt +21 -0
- pycharter/ui/static/static/static/static/schemas/__next._head.txt +7 -0
- pycharter/ui/static/static/static/static/schemas/__next._index.txt +9 -0
- pycharter/ui/static/static/static/static/schemas/__next._tree.txt +2 -0
- pycharter/ui/static/static/static/static/schemas/__next.schemas.__PAGE__.txt +9 -0
- pycharter/ui/static/static/static/static/schemas/__next.schemas.txt +4 -0
- pycharter/ui/static/static/static/static/schemas/index.html +1 -0
- pycharter/ui/static/static/static/static/schemas/index.txt +21 -0
- pycharter/ui/static/static/static/static/settings/__next._full.txt +21 -0
- pycharter/ui/static/static/static/static/settings/__next._head.txt +7 -0
- pycharter/ui/static/static/static/static/settings/__next._index.txt +9 -0
- pycharter/ui/static/static/static/static/settings/__next._tree.txt +2 -0
- pycharter/ui/static/static/static/static/settings/__next.settings.__PAGE__.txt +9 -0
- pycharter/ui/static/static/static/static/settings/__next.settings.txt +4 -0
- pycharter/ui/static/static/static/static/settings/index.html +1 -0
- pycharter/ui/static/static/static/static/settings/index.txt +21 -0
- pycharter/ui/static/static/static/static/validation/__next._full.txt +21 -0
- pycharter/ui/static/static/static/static/validation/__next._head.txt +7 -0
- pycharter/ui/static/static/static/static/validation/__next._index.txt +9 -0
- pycharter/ui/static/static/static/static/validation/__next._tree.txt +2 -0
- pycharter/ui/static/static/static/static/validation/__next.validation.__PAGE__.txt +9 -0
- pycharter/ui/static/static/static/static/validation/__next.validation.txt +4 -0
- pycharter/ui/static/static/static/static/validation/index.html +1 -0
- pycharter/ui/static/static/static/static/validation/index.txt +21 -0
- pycharter/ui/static/static/static/validation/__next._full.txt +2 -2
- pycharter/ui/static/static/static/validation/__next._head.txt +1 -1
- pycharter/ui/static/static/static/validation/__next._index.txt +2 -2
- pycharter/ui/static/static/static/validation/__next._tree.txt +2 -2
- pycharter/ui/static/static/static/validation/__next.validation.__PAGE__.txt +1 -1
- pycharter/ui/static/static/static/validation/__next.validation.txt +1 -1
- pycharter/ui/static/static/static/validation/index.html +1 -1
- pycharter/ui/static/static/static/validation/index.txt +2 -2
- pycharter/ui/static/static/validation/__next._full.txt +2 -2
- pycharter/ui/static/static/validation/__next._head.txt +1 -1
- pycharter/ui/static/static/validation/__next._index.txt +1 -1
- pycharter/ui/static/static/validation/__next._tree.txt +1 -1
- pycharter/ui/static/static/validation/__next.validation.__PAGE__.txt +2 -2
- pycharter/ui/static/static/validation/__next.validation.txt +1 -1
- pycharter/ui/static/static/validation/index.html +1 -1
- pycharter/ui/static/static/validation/index.txt +2 -2
- pycharter/ui/static/validation/__next._full.txt +7 -7
- pycharter/ui/static/validation/__next._head.txt +1 -1
- pycharter/ui/static/validation/__next._index.txt +6 -6
- pycharter/ui/static/validation/__next._tree.txt +2 -2
- pycharter/ui/static/validation/__next.validation.__PAGE__.txt +2 -2
- pycharter/ui/static/validation/__next.validation.txt +1 -1
- pycharter/ui/static/validation/index.html +1 -1
- pycharter/ui/static/validation/index.txt +7 -7
- {pycharter-0.0.25.dist-info → pycharter-0.0.26.dist-info}/METADATA +57 -26
- pycharter-0.0.26.dist-info/RECORD +702 -0
- pycharter/etl_generator/config_loader.py +0 -394
- pycharter/etl_generator/loaders/cloud.py +0 -87
- pycharter/etl_generator/loaders/file_loader.py +0 -130
- pycharter/etl_generator/schemas/extract.json +0 -234
- pycharter/etl_generator/schemas/load.json +0 -202
- pycharter/etl_generator/schemas/pipeline.json +0 -94
- pycharter/etl_generator/schemas/transform.json +0 -171
- pycharter-0.0.25.dist-info/RECORD +0 -572
- /pycharter/ui/static/_next/static/{2gKjNv6YvE6BcIdFthBLs → YCnlK66gA7FV5vvcixspB}/_buildManifest.js +0 -0
- /pycharter/ui/static/_next/static/{2gKjNv6YvE6BcIdFthBLs → YCnlK66gA7FV5vvcixspB}/_clientMiddlewareManifest.json +0 -0
- /pycharter/ui/static/_next/static/{2gKjNv6YvE6BcIdFthBLs → YCnlK66gA7FV5vvcixspB}/_ssgManifest.js +0 -0
- /pycharter/ui/static/static/_next/static/{0rYA78L88aUyD2Uh38hhX → 2gKjNv6YvE6BcIdFthBLs}/_buildManifest.js +0 -0
- /pycharter/ui/static/static/_next/static/{0rYA78L88aUyD2Uh38hhX → 2gKjNv6YvE6BcIdFthBLs}/_clientMiddlewareManifest.json +0 -0
- /pycharter/ui/static/static/_next/static/{0rYA78L88aUyD2Uh38hhX → 2gKjNv6YvE6BcIdFthBLs}/_ssgManifest.js +0 -0
- /pycharter/ui/static/{_next → static/_next}/static/chunks/26dfc590f7714c03.js +0 -0
- /pycharter/ui/static/{_next → static/_next}/static/chunks/34d289e6db2ef551.js +0 -0
- /pycharter/ui/static/{_next → static/_next}/static/chunks/99508d9d5869cc27.js +0 -0
- /pycharter/ui/static/{_next → static/_next}/static/chunks/b313c35a6ba76574.js +0 -0
- /pycharter/ui/static/static/static/_next/static/{tNTkVW6puVXC4bAm4WrHl → 0rYA78L88aUyD2Uh38hhX}/_buildManifest.js +0 -0
- /pycharter/ui/static/static/static/_next/static/{tNTkVW6puVXC4bAm4WrHl → 0rYA78L88aUyD2Uh38hhX}/_clientMiddlewareManifest.json +0 -0
- /pycharter/ui/static/static/static/_next/static/{tNTkVW6puVXC4bAm4WrHl → 0rYA78L88aUyD2Uh38hhX}/_ssgManifest.js +0 -0
- /pycharter/ui/static/{_next → static/static/_next}/static/chunks/13d4a0fbd74c1ee4.js +0 -0
- /pycharter/ui/static/{_next → static/static/_next}/static/chunks/2edb43b48432ac04.js +0 -0
- /pycharter/ui/static/static/{_next → static/_next}/static/chunks/c4fa4f4114b7c352.js +0 -0
- /pycharter/ui/static/{_next → static/static/_next}/static/chunks/d2363397e1b2bcab.css +0 -0
- /pycharter/ui/static/{_next → static/static/static/_next}/static/chunks/2ab439ce003cd691.js +0 -0
- /pycharter/ui/static/{_next → static/static/static/_next}/static/chunks/49ca65abd26ae49e.js +0 -0
- /pycharter/ui/static/static/static/{_next → static/_next}/static/chunks/4e310fe5005770a3.css +0 -0
- /pycharter/ui/static/static/{_next → static/static/_next}/static/chunks/5e04d10c4a7b58a3.js +0 -0
- /pycharter/ui/static/static/static/{_next → static/_next}/static/chunks/5fc14c00a2779dc5.js +0 -0
- /pycharter/ui/static/static/{_next → static/static/_next}/static/chunks/75d88a058d8ffaa6.js +0 -0
- /pycharter/ui/static/static/{_next → static/static/_next}/static/chunks/8c89634cf6bad76f.js +0 -0
- /pycharter/ui/static/{_next → static/static/static/_next}/static/chunks/9667e7a3d359eb39.js +0 -0
- /pycharter/ui/static/static/static/{_next → static/_next}/static/chunks/b584574fdc8ab13e.js +0 -0
- /pycharter/ui/static/{_next → static/static/static/_next}/static/chunks/c69f6cba366bd988.js +0 -0
- /pycharter/ui/static/static/static/{_next → static/_next}/static/chunks/d5989c94d3614b3a.js +0 -0
- /pycharter/ui/static/{_next → static/static/static/_next}/static/chunks/f061a4be97bfc3b3.js +0 -0
- {pycharter-0.0.25.dist-info → pycharter-0.0.26.dist-info}/WHEEL +0 -0
- {pycharter-0.0.25.dist-info → pycharter-0.0.26.dist-info}/entry_points.txt +0 -0
- {pycharter-0.0.25.dist-info → pycharter-0.0.26.dist-info}/licenses/LICENSE +0 -0
- {pycharter-0.0.25.dist-info → pycharter-0.0.26.dist-info}/top_level.txt +0 -0
|
@@ -2,28 +2,25 @@
|
|
|
2
2
|
Pipeline class with | operator for chaining.
|
|
3
3
|
|
|
4
4
|
Supports both config-driven and programmatic pipeline construction.
|
|
5
|
+
Includes optional validation at extract (source) and load (target) stages.
|
|
5
6
|
"""
|
|
6
7
|
|
|
7
8
|
import logging
|
|
8
9
|
import os
|
|
9
|
-
import re
|
|
10
10
|
import uuid
|
|
11
11
|
from datetime import datetime, timezone
|
|
12
12
|
from pathlib import Path
|
|
13
|
-
from typing import Any,
|
|
13
|
+
from typing import Any, Dict, List, Optional, Union
|
|
14
14
|
|
|
15
15
|
import yaml
|
|
16
16
|
|
|
17
|
-
from pycharter.etl_generator.context import PipelineContext
|
|
17
|
+
from pycharter.etl_generator.context import PipelineContext, VARIABLE_PATTERN
|
|
18
18
|
from pycharter.etl_generator.protocols import Extractor, Transformer, Loader
|
|
19
19
|
from pycharter.etl_generator.result import PipelineResult, BatchResult
|
|
20
20
|
from pycharter.shared.errors import ErrorContext, ErrorMode, get_error_context
|
|
21
21
|
|
|
22
22
|
logger = logging.getLogger(__name__)
|
|
23
23
|
|
|
24
|
-
# Variable pattern: ${VAR} or ${VAR:-default} or ${VAR:?error}
|
|
25
|
-
VARIABLE_PATTERN = re.compile(r'\$\{([^}:]+)(?::([?-])([^}]*))?\}')
|
|
26
|
-
|
|
27
24
|
|
|
28
25
|
class Pipeline:
|
|
29
26
|
"""
|
|
@@ -68,12 +65,31 @@ class Pipeline:
|
|
|
68
65
|
loader: Optional[Loader] = None,
|
|
69
66
|
context: Optional[PipelineContext] = None,
|
|
70
67
|
name: Optional[str] = None,
|
|
68
|
+
transform_config: Optional[Union[Dict[str, Any], List[Dict[str, Any]]]] = None,
|
|
69
|
+
# Validation configs
|
|
70
|
+
extract_validation_config: Optional[Dict[str, Any]] = None,
|
|
71
|
+
load_validation_config: Optional[Dict[str, Any]] = None,
|
|
72
|
+
# Shared settings
|
|
73
|
+
settings: Optional[Dict[str, Any]] = None,
|
|
74
|
+
# Base directory for resolving relative paths
|
|
75
|
+
base_dir: Optional[Path] = None,
|
|
71
76
|
):
|
|
72
77
|
self.extractor = extractor
|
|
73
78
|
self._transformers: List[Transformer] = list(transformers) if transformers else []
|
|
74
79
|
self.loader = loader
|
|
75
80
|
self.context = context or PipelineContext()
|
|
76
81
|
self.name = name
|
|
82
|
+
# For config-driven runs, store raw transform config and use apply_transforms
|
|
83
|
+
self._transform_config = transform_config
|
|
84
|
+
# Validation configs
|
|
85
|
+
self._extract_validation_config = extract_validation_config
|
|
86
|
+
self._load_validation_config = load_validation_config
|
|
87
|
+
# Shared settings from settings.yaml
|
|
88
|
+
self._settings = settings or {}
|
|
89
|
+
# Base directory for resolving file paths
|
|
90
|
+
self._base_dir = base_dir
|
|
91
|
+
# Run ID (set during run())
|
|
92
|
+
self._run_id: Optional[str] = None
|
|
77
93
|
|
|
78
94
|
def __or__(self, other: Union[Transformer, Loader]) -> "Pipeline":
|
|
79
95
|
"""Chain transformer or set loader using | operator."""
|
|
@@ -84,6 +100,10 @@ class Pipeline:
|
|
|
84
100
|
loader=other,
|
|
85
101
|
context=self.context,
|
|
86
102
|
name=self.name,
|
|
103
|
+
extract_validation_config=self._extract_validation_config,
|
|
104
|
+
load_validation_config=self._load_validation_config,
|
|
105
|
+
settings=self._settings,
|
|
106
|
+
base_dir=self._base_dir,
|
|
87
107
|
)
|
|
88
108
|
else:
|
|
89
109
|
new_transformers = self._transformers.copy()
|
|
@@ -94,6 +114,10 @@ class Pipeline:
|
|
|
94
114
|
loader=self.loader,
|
|
95
115
|
context=self.context,
|
|
96
116
|
name=self.name,
|
|
117
|
+
extract_validation_config=self._extract_validation_config,
|
|
118
|
+
load_validation_config=self._load_validation_config,
|
|
119
|
+
settings=self._settings,
|
|
120
|
+
base_dir=self._base_dir,
|
|
97
121
|
)
|
|
98
122
|
|
|
99
123
|
async def run(
|
|
@@ -117,6 +141,7 @@ class Pipeline:
|
|
|
117
141
|
PipelineResult with counts and any errors.
|
|
118
142
|
"""
|
|
119
143
|
run_id = str(uuid.uuid4())[:8]
|
|
144
|
+
self._run_id = run_id
|
|
120
145
|
start_time = datetime.now(timezone.utc)
|
|
121
146
|
ctx = error_context or get_error_context()
|
|
122
147
|
|
|
@@ -133,14 +158,58 @@ class Pipeline:
|
|
|
133
158
|
|
|
134
159
|
logger.info(f"[{run_id}] Starting pipeline: {self.name or 'unnamed'}")
|
|
135
160
|
|
|
161
|
+
# Create validators if configured
|
|
162
|
+
extract_validator = self._create_extract_validator()
|
|
163
|
+
load_validator = self._create_load_validator()
|
|
164
|
+
|
|
136
165
|
try:
|
|
137
166
|
batch_index = 0
|
|
138
167
|
async for batch in self.extractor.extract(**params):
|
|
139
168
|
batch_result = BatchResult(batch_index=batch_index, rows_in=len(batch))
|
|
169
|
+
result.rows_extracted += len(batch)
|
|
170
|
+
|
|
171
|
+
# Source validation (after extract)
|
|
172
|
+
if extract_validator:
|
|
173
|
+
try:
|
|
174
|
+
batch, quarantined, error_count = await extract_validator.validate(
|
|
175
|
+
batch, run_id=run_id
|
|
176
|
+
)
|
|
177
|
+
result.rows_quarantined_extract += len(quarantined)
|
|
178
|
+
if error_count > 0:
|
|
179
|
+
batch_result.errors.append(
|
|
180
|
+
f"Extract validation: {error_count} record(s) invalid"
|
|
181
|
+
)
|
|
182
|
+
except Exception as e:
|
|
183
|
+
# Validation error with on_error=fail
|
|
184
|
+
ctx.handle_error(str(e), e, category="extract_validation")
|
|
185
|
+
batch_result.errors.append(f"Extract validation failed: {e}")
|
|
186
|
+
result.success = False
|
|
187
|
+
result.errors.append(str(e))
|
|
188
|
+
break
|
|
140
189
|
|
|
141
190
|
# Transform
|
|
142
191
|
transformed = self._apply_transforms(batch)
|
|
143
192
|
batch_result.rows_out = len(transformed)
|
|
193
|
+
result.rows_transformed += len(transformed)
|
|
194
|
+
|
|
195
|
+
# Target validation (before load)
|
|
196
|
+
if load_validator and transformed:
|
|
197
|
+
try:
|
|
198
|
+
transformed, quarantined, error_count = await load_validator.validate(
|
|
199
|
+
transformed, run_id=run_id
|
|
200
|
+
)
|
|
201
|
+
result.rows_quarantined_load += len(quarantined)
|
|
202
|
+
if error_count > 0:
|
|
203
|
+
batch_result.errors.append(
|
|
204
|
+
f"Load validation: {error_count} record(s) invalid"
|
|
205
|
+
)
|
|
206
|
+
except Exception as e:
|
|
207
|
+
# Validation error with on_error=fail
|
|
208
|
+
ctx.handle_error(str(e), e, category="load_validation")
|
|
209
|
+
batch_result.errors.append(f"Load validation failed: {e}")
|
|
210
|
+
result.success = False
|
|
211
|
+
result.errors.append(str(e))
|
|
212
|
+
break
|
|
144
213
|
|
|
145
214
|
# Load
|
|
146
215
|
if not dry_run and self.loader and transformed:
|
|
@@ -160,8 +229,6 @@ class Pipeline:
|
|
|
160
229
|
elif dry_run:
|
|
161
230
|
result.rows_loaded += len(transformed)
|
|
162
231
|
|
|
163
|
-
result.rows_extracted += len(batch)
|
|
164
|
-
result.rows_transformed += len(transformed)
|
|
165
232
|
result.batches_processed += 1
|
|
166
233
|
result.batch_results.append(batch_result)
|
|
167
234
|
batch_index += 1
|
|
@@ -179,11 +246,115 @@ class Pipeline:
|
|
|
179
246
|
if result.errors:
|
|
180
247
|
result.success = False
|
|
181
248
|
|
|
182
|
-
logger.info(
|
|
249
|
+
logger.info(
|
|
250
|
+
f"[{run_id}] Complete: extracted={result.rows_extracted}, "
|
|
251
|
+
f"loaded={result.rows_loaded}, "
|
|
252
|
+
f"quarantined_extract={result.rows_quarantined_extract}, "
|
|
253
|
+
f"quarantined_load={result.rows_quarantined_load}"
|
|
254
|
+
)
|
|
183
255
|
return result
|
|
184
256
|
|
|
257
|
+
def _create_extract_validator(self):
|
|
258
|
+
"""Create extract validator if configured."""
|
|
259
|
+
if not self._extract_validation_config:
|
|
260
|
+
return None
|
|
261
|
+
|
|
262
|
+
from pycharter.etl_generator.validation import create_etl_validator
|
|
263
|
+
from pycharter.etl_generator.config_models import DLQConfig
|
|
264
|
+
|
|
265
|
+
# Get default DLQ config from settings
|
|
266
|
+
default_dlq = None
|
|
267
|
+
if self._settings.get("dlq"):
|
|
268
|
+
default_dlq = DLQConfig(**self._settings["dlq"])
|
|
269
|
+
|
|
270
|
+
return create_etl_validator(
|
|
271
|
+
validation_config=self._extract_validation_config,
|
|
272
|
+
pipeline_name=self.name or "unnamed",
|
|
273
|
+
stage="extract",
|
|
274
|
+
metadata_store=None, # Extract uses local schema, not store
|
|
275
|
+
default_contract=None,
|
|
276
|
+
default_dlq_config=default_dlq,
|
|
277
|
+
base_dir=self._base_dir,
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
def _create_load_validator(self):
|
|
281
|
+
"""Create load validator if configured."""
|
|
282
|
+
if not self._load_validation_config:
|
|
283
|
+
return None
|
|
284
|
+
|
|
285
|
+
from pycharter.etl_generator.validation import create_etl_validator
|
|
286
|
+
from pycharter.etl_generator.config_models import DLQConfig
|
|
287
|
+
from pycharter.metadata_store import MetadataStoreClient
|
|
288
|
+
|
|
289
|
+
# Get default DLQ config from settings
|
|
290
|
+
default_dlq = None
|
|
291
|
+
if self._settings.get("dlq"):
|
|
292
|
+
default_dlq = DLQConfig(**self._settings["dlq"])
|
|
293
|
+
|
|
294
|
+
# Get default contract from settings
|
|
295
|
+
default_contract = self._settings.get("contract")
|
|
296
|
+
|
|
297
|
+
# Create metadata store if configured
|
|
298
|
+
metadata_store = None
|
|
299
|
+
if self._settings.get("metadata_store"):
|
|
300
|
+
metadata_store = self._create_metadata_store()
|
|
301
|
+
|
|
302
|
+
return create_etl_validator(
|
|
303
|
+
validation_config=self._load_validation_config,
|
|
304
|
+
pipeline_name=self.name or "unnamed",
|
|
305
|
+
stage="load",
|
|
306
|
+
metadata_store=metadata_store,
|
|
307
|
+
default_contract=default_contract,
|
|
308
|
+
default_dlq_config=default_dlq,
|
|
309
|
+
base_dir=self._base_dir,
|
|
310
|
+
)
|
|
311
|
+
|
|
312
|
+
def _create_metadata_store(self) -> Optional["MetadataStoreClient"]:
|
|
313
|
+
"""Create metadata store from settings."""
|
|
314
|
+
store_config = self._settings.get("metadata_store")
|
|
315
|
+
if not store_config:
|
|
316
|
+
return None
|
|
317
|
+
|
|
318
|
+
store_type = store_config.get("type", "").lower()
|
|
319
|
+
connection_string = store_config.get("connection_string")
|
|
320
|
+
|
|
321
|
+
if not connection_string:
|
|
322
|
+
logger.warning("metadata_store missing connection_string")
|
|
323
|
+
return None
|
|
324
|
+
|
|
325
|
+
if store_type == "postgres":
|
|
326
|
+
from pycharter.metadata_store import PostgresMetadataStore
|
|
327
|
+
return PostgresMetadataStore(connection_string)
|
|
328
|
+
elif store_type == "sqlite":
|
|
329
|
+
from pycharter.metadata_store import SQLiteMetadataStore
|
|
330
|
+
return SQLiteMetadataStore(connection_string)
|
|
331
|
+
elif store_type == "mongodb":
|
|
332
|
+
from pycharter.metadata_store import MongoDBMetadataStore
|
|
333
|
+
return MongoDBMetadataStore(connection_string)
|
|
334
|
+
elif store_type == "redis":
|
|
335
|
+
from pycharter.metadata_store import RedisMetadataStore
|
|
336
|
+
return RedisMetadataStore(connection_string)
|
|
337
|
+
elif store_type == "memory":
|
|
338
|
+
from pycharter.metadata_store import InMemoryMetadataStore
|
|
339
|
+
return InMemoryMetadataStore()
|
|
340
|
+
else:
|
|
341
|
+
logger.warning(f"Unknown metadata_store type: {store_type}")
|
|
342
|
+
return None
|
|
343
|
+
|
|
185
344
|
def _apply_transforms(self, data: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
186
|
-
"""Apply all transformers to data.
|
|
345
|
+
"""Apply all transformers to data.
|
|
346
|
+
|
|
347
|
+
For config-driven pipelines (when _transform_config is set), uses the unified
|
|
348
|
+
apply_transforms() which supports simple_ops, jsonata, and custom_function.
|
|
349
|
+
For programmatic pipelines (when _transformers is set), loops through the
|
|
350
|
+
transformer chain.
|
|
351
|
+
"""
|
|
352
|
+
# Config-driven path: use unified apply_transforms (supports jsonata, etc.)
|
|
353
|
+
if self._transform_config:
|
|
354
|
+
from pycharter.etl_generator.transformers import apply_transforms
|
|
355
|
+
return apply_transforms(data, self._transform_config)
|
|
356
|
+
|
|
357
|
+
# Programmatic path: loop through transformer instances
|
|
187
358
|
result = data
|
|
188
359
|
for transformer in self._transformers:
|
|
189
360
|
result = transformer.transform(result)
|
|
@@ -263,6 +434,7 @@ class Pipeline:
|
|
|
263
434
|
- extract.yaml (required)
|
|
264
435
|
- transform.yaml (optional)
|
|
265
436
|
- load.yaml (required)
|
|
437
|
+
- settings.yaml (optional, for shared config like DLQ, metadata store)
|
|
266
438
|
|
|
267
439
|
Args:
|
|
268
440
|
directory: Path to directory containing config files
|
|
@@ -289,6 +461,7 @@ class Pipeline:
|
|
|
289
461
|
extract_file = directory / "extract.yaml"
|
|
290
462
|
load_file = directory / "load.yaml"
|
|
291
463
|
transform_file = directory / "transform.yaml"
|
|
464
|
+
settings_file = directory / "settings.yaml"
|
|
292
465
|
|
|
293
466
|
if not extract_file.exists():
|
|
294
467
|
raise FileNotFoundError(f"Required file not found: {extract_file}")
|
|
@@ -299,6 +472,11 @@ class Pipeline:
|
|
|
299
472
|
extract_config = _load_config_input(extract_file, variables)
|
|
300
473
|
load_config = _load_config_input(load_file, variables)
|
|
301
474
|
transform_config = _load_config_input(transform_file, variables) if transform_file.exists() else {}
|
|
475
|
+
settings_config = _load_config_input(settings_file, variables) if settings_file.exists() else {}
|
|
476
|
+
|
|
477
|
+
# Extract validation configs from extract/load
|
|
478
|
+
extract_validation_config = extract_config.pop("validation", None) if isinstance(extract_config, dict) else None
|
|
479
|
+
load_validation_config = load_config.pop("validation", None) if isinstance(load_config, dict) else None
|
|
302
480
|
|
|
303
481
|
return cls._build_from_configs(
|
|
304
482
|
extract_config=extract_config,
|
|
@@ -307,6 +485,10 @@ class Pipeline:
|
|
|
307
485
|
variables=variables,
|
|
308
486
|
validate=validate,
|
|
309
487
|
name=name or directory.name,
|
|
488
|
+
extract_validation_config=extract_validation_config,
|
|
489
|
+
load_validation_config=load_validation_config,
|
|
490
|
+
settings=settings_config if isinstance(settings_config, dict) else {},
|
|
491
|
+
base_dir=directory.resolve(),
|
|
310
492
|
)
|
|
311
493
|
|
|
312
494
|
@classmethod
|
|
@@ -325,11 +507,25 @@ class Pipeline:
|
|
|
325
507
|
extract:
|
|
326
508
|
type: http
|
|
327
509
|
url: https://api.example.com
|
|
510
|
+
validation: # optional
|
|
511
|
+
schema: ./source_schema.yaml
|
|
512
|
+
on_error: quarantine
|
|
328
513
|
transform:
|
|
329
514
|
- rename: {old: new}
|
|
330
515
|
load:
|
|
331
516
|
type: file
|
|
332
517
|
path: output.json
|
|
518
|
+
validation: # optional
|
|
519
|
+
contract: ./contracts/orders
|
|
520
|
+
on_error: fail
|
|
521
|
+
# Optional shared settings
|
|
522
|
+
metadata_store:
|
|
523
|
+
type: postgres
|
|
524
|
+
connection_string: ...
|
|
525
|
+
dlq:
|
|
526
|
+
enabled: true
|
|
527
|
+
backend: file
|
|
528
|
+
path: ./dlq
|
|
333
529
|
|
|
334
530
|
Args:
|
|
335
531
|
path: Path to pipeline config file (YAML)
|
|
@@ -361,13 +557,32 @@ class Pipeline:
|
|
|
361
557
|
if "load" not in config:
|
|
362
558
|
raise ValueError(f"Config file missing 'load' section: {path}")
|
|
363
559
|
|
|
560
|
+
# Extract validation configs
|
|
561
|
+
extract_config = config["extract"]
|
|
562
|
+
load_config = config["load"]
|
|
563
|
+
extract_validation_config = extract_config.pop("validation", None) if isinstance(extract_config, dict) else None
|
|
564
|
+
load_validation_config = load_config.pop("validation", None) if isinstance(load_config, dict) else None
|
|
565
|
+
|
|
566
|
+
# Build inline settings from top-level keys
|
|
567
|
+
settings: Dict[str, Any] = {}
|
|
568
|
+
if config.get("metadata_store"):
|
|
569
|
+
settings["metadata_store"] = config["metadata_store"]
|
|
570
|
+
if config.get("dlq"):
|
|
571
|
+
settings["dlq"] = config["dlq"]
|
|
572
|
+
if config.get("contract"):
|
|
573
|
+
settings["contract"] = config["contract"]
|
|
574
|
+
|
|
364
575
|
return cls._build_from_configs(
|
|
365
|
-
extract_config=
|
|
576
|
+
extract_config=extract_config,
|
|
366
577
|
transform_config=config.get("transform", {}),
|
|
367
|
-
load_config=
|
|
578
|
+
load_config=load_config,
|
|
368
579
|
variables=variables,
|
|
369
580
|
validate=validate,
|
|
370
581
|
name=config.get("name"),
|
|
582
|
+
extract_validation_config=extract_validation_config,
|
|
583
|
+
load_validation_config=load_validation_config,
|
|
584
|
+
settings=settings,
|
|
585
|
+
base_dir=path.parent.resolve(),
|
|
371
586
|
)
|
|
372
587
|
|
|
373
588
|
@classmethod
|
|
@@ -376,6 +591,7 @@ class Pipeline:
|
|
|
376
591
|
config: Dict[str, Any],
|
|
377
592
|
variables: Optional[Dict[str, str]] = None,
|
|
378
593
|
validate: bool = True,
|
|
594
|
+
base_dir: Optional[Path] = None,
|
|
379
595
|
) -> "Pipeline":
|
|
380
596
|
"""
|
|
381
597
|
Create pipeline from a configuration dictionary.
|
|
@@ -384,6 +600,7 @@ class Pipeline:
|
|
|
384
600
|
config: Dict with 'extract', 'transform' (optional), 'load' sections
|
|
385
601
|
variables: Variables for ${VAR} substitution
|
|
386
602
|
validate: If True, validate config against schema
|
|
603
|
+
base_dir: Base directory for resolving relative file paths
|
|
387
604
|
|
|
388
605
|
Returns:
|
|
389
606
|
Configured Pipeline instance
|
|
@@ -406,15 +623,40 @@ class Pipeline:
|
|
|
406
623
|
|
|
407
624
|
# Resolve variables in config
|
|
408
625
|
extract_config = context.resolve_dict(config["extract"])
|
|
626
|
+
load_config = context.resolve_dict(config["load"])
|
|
627
|
+
|
|
628
|
+
# Extract validation configs
|
|
629
|
+
extract_validation_config = extract_config.pop("validation", None) if isinstance(extract_config, dict) else None
|
|
630
|
+
load_validation_config = load_config.pop("validation", None) if isinstance(load_config, dict) else None
|
|
631
|
+
|
|
632
|
+
# Build transform config - include transform, jsonata, and custom_function
|
|
633
|
+
# This allows top-level jsonata/custom_function in config (common in yaml files)
|
|
634
|
+
transform_config: Dict[str, Any] = {}
|
|
409
635
|
raw_transform = config.get("transform", {})
|
|
410
636
|
if isinstance(raw_transform, list):
|
|
411
|
-
transform_config = [
|
|
637
|
+
transform_config["transform"] = [
|
|
412
638
|
context.resolve_dict(item) if isinstance(item, dict) else item
|
|
413
639
|
for item in raw_transform
|
|
414
640
|
]
|
|
415
|
-
|
|
416
|
-
transform_config = context.resolve_dict(raw_transform)
|
|
417
|
-
|
|
641
|
+
elif raw_transform:
|
|
642
|
+
transform_config["transform"] = context.resolve_dict(raw_transform)
|
|
643
|
+
|
|
644
|
+
# Include top-level jsonata if present
|
|
645
|
+
if config.get("jsonata"):
|
|
646
|
+
transform_config["jsonata"] = context.resolve_dict(config["jsonata"])
|
|
647
|
+
|
|
648
|
+
# Include top-level custom_function if present
|
|
649
|
+
if config.get("custom_function"):
|
|
650
|
+
transform_config["custom_function"] = context.resolve_dict(config["custom_function"])
|
|
651
|
+
|
|
652
|
+
# Build inline settings from top-level keys
|
|
653
|
+
settings: Dict[str, Any] = {}
|
|
654
|
+
if config.get("metadata_store"):
|
|
655
|
+
settings["metadata_store"] = context.resolve_dict(config["metadata_store"])
|
|
656
|
+
if config.get("dlq"):
|
|
657
|
+
settings["dlq"] = context.resolve_dict(config["dlq"])
|
|
658
|
+
if config.get("contract"):
|
|
659
|
+
settings["contract"] = config["contract"]
|
|
418
660
|
|
|
419
661
|
return cls._build_from_configs(
|
|
420
662
|
extract_config=extract_config,
|
|
@@ -423,6 +665,10 @@ class Pipeline:
|
|
|
423
665
|
variables=variables,
|
|
424
666
|
validate=validate,
|
|
425
667
|
name=config.get("name"),
|
|
668
|
+
extract_validation_config=extract_validation_config,
|
|
669
|
+
load_validation_config=load_validation_config,
|
|
670
|
+
settings=settings,
|
|
671
|
+
base_dir=base_dir,
|
|
426
672
|
)
|
|
427
673
|
|
|
428
674
|
@classmethod
|
|
@@ -434,8 +680,21 @@ class Pipeline:
|
|
|
434
680
|
variables: Dict[str, str],
|
|
435
681
|
validate: bool,
|
|
436
682
|
name: Optional[str],
|
|
683
|
+
extract_validation_config: Optional[Dict[str, Any]] = None,
|
|
684
|
+
load_validation_config: Optional[Dict[str, Any]] = None,
|
|
685
|
+
settings: Optional[Dict[str, Any]] = None,
|
|
686
|
+
base_dir: Optional[Path] = None,
|
|
437
687
|
) -> "Pipeline":
|
|
438
|
-
"""Internal method to build pipeline from resolved configs.
|
|
688
|
+
"""Internal method to build pipeline from resolved configs.
|
|
689
|
+
|
|
690
|
+
Uses the unified apply_transforms path for transformations, which supports:
|
|
691
|
+
- Simple ops: rename, convert, defaults, add, select, drop, filter
|
|
692
|
+
- JSONata expressions
|
|
693
|
+
- Custom functions
|
|
694
|
+
|
|
695
|
+
Also supports validation at extract and load stages via extract_validation_config
|
|
696
|
+
and load_validation_config parameters.
|
|
697
|
+
"""
|
|
439
698
|
from pycharter.etl_generator.config_validator import ConfigValidator
|
|
440
699
|
|
|
441
700
|
# Validate if enabled
|
|
@@ -455,15 +714,20 @@ class Pipeline:
|
|
|
455
714
|
|
|
456
715
|
# Create components
|
|
457
716
|
extractor = _create_extractor(extract_config)
|
|
458
|
-
transformers = _create_transformers(transform_config)
|
|
459
717
|
loader_instance = _create_loader(load_config)
|
|
460
718
|
|
|
719
|
+
# Pass raw transform config - will be used by apply_transforms in _apply_transforms
|
|
461
720
|
return cls(
|
|
462
721
|
extractor=extractor,
|
|
463
|
-
transformers=
|
|
722
|
+
transformers=None, # No transformer objects for config-driven runs
|
|
464
723
|
loader=loader_instance,
|
|
465
724
|
context=context,
|
|
466
725
|
name=name,
|
|
726
|
+
transform_config=transform_config if transform_config else None,
|
|
727
|
+
extract_validation_config=extract_validation_config,
|
|
728
|
+
load_validation_config=load_validation_config,
|
|
729
|
+
settings=settings or {},
|
|
730
|
+
base_dir=base_dir,
|
|
467
731
|
)
|
|
468
732
|
|
|
469
733
|
|
|
@@ -557,151 +821,6 @@ def _create_extractor(config: Dict[str, Any]) -> Optional[Extractor]:
|
|
|
557
821
|
return extractor_class.from_config(config)
|
|
558
822
|
|
|
559
823
|
|
|
560
|
-
def _create_transformers(config: Union[Dict[str, Any], List[Dict[str, Any]]]) -> List[Transformer]:
|
|
561
|
-
"""Create transformer chain from config."""
|
|
562
|
-
if not config:
|
|
563
|
-
return []
|
|
564
|
-
|
|
565
|
-
# Handle nested 'transform' key
|
|
566
|
-
if isinstance(config, dict) and "transform" in config:
|
|
567
|
-
config = config["transform"]
|
|
568
|
-
|
|
569
|
-
# List format - ordered transforms
|
|
570
|
-
if isinstance(config, list):
|
|
571
|
-
return _create_transformers_from_list(config)
|
|
572
|
-
|
|
573
|
-
# Dict format - fixed order
|
|
574
|
-
return _create_transformers_from_dict(config)
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
def _create_transformers_from_list(config: List[Dict[str, Any]]) -> List[Transformer]:
|
|
578
|
-
"""Create transformers from list format (user-specified order)."""
|
|
579
|
-
transformers = []
|
|
580
|
-
|
|
581
|
-
for step in config:
|
|
582
|
-
if not isinstance(step, dict):
|
|
583
|
-
logger.warning(f"Invalid transform step (expected dict): {step}")
|
|
584
|
-
continue
|
|
585
|
-
|
|
586
|
-
for op_name, op_config in step.items():
|
|
587
|
-
transformer = _create_single_transformer(op_name, op_config)
|
|
588
|
-
if transformer:
|
|
589
|
-
if isinstance(transformer, list):
|
|
590
|
-
transformers.extend(transformer)
|
|
591
|
-
else:
|
|
592
|
-
transformers.append(transformer)
|
|
593
|
-
|
|
594
|
-
return transformers
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
def _create_transformers_from_dict(config: Dict[str, Any]) -> List[Transformer]:
|
|
598
|
-
"""Create transformers from dict format (fixed order)."""
|
|
599
|
-
transformers = []
|
|
600
|
-
ordered_ops = ["rename", "convert", "defaults", "add", "select", "drop", "filter", "custom_function"]
|
|
601
|
-
|
|
602
|
-
for op_name in ordered_ops:
|
|
603
|
-
if op_name in config:
|
|
604
|
-
transformer = _create_single_transformer(op_name, config[op_name])
|
|
605
|
-
if transformer:
|
|
606
|
-
if isinstance(transformer, list):
|
|
607
|
-
transformers.extend(transformer)
|
|
608
|
-
else:
|
|
609
|
-
transformers.append(transformer)
|
|
610
|
-
|
|
611
|
-
return transformers
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
def _create_single_transformer(op_name: str, op_config: Any) -> Optional[Union[Transformer, List[Transformer]]]:
|
|
615
|
-
"""Create a single transformer from operation name and config."""
|
|
616
|
-
from pycharter.etl_generator.transformers import (
|
|
617
|
-
Rename, AddField, Drop, Select, Filter, Convert, Default, CustomFunction,
|
|
618
|
-
)
|
|
619
|
-
from pycharter.etl_generator.transformers.simple_operations import convert_type
|
|
620
|
-
|
|
621
|
-
op_name = op_name.lower()
|
|
622
|
-
|
|
623
|
-
if op_name == "rename":
|
|
624
|
-
if isinstance(op_config, dict):
|
|
625
|
-
return Rename(op_config)
|
|
626
|
-
|
|
627
|
-
elif op_name == "add":
|
|
628
|
-
if isinstance(op_config, dict):
|
|
629
|
-
return [AddField(field, value) for field, value in op_config.items()]
|
|
630
|
-
|
|
631
|
-
elif op_name == "drop":
|
|
632
|
-
if isinstance(op_config, list):
|
|
633
|
-
return Drop(op_config)
|
|
634
|
-
|
|
635
|
-
elif op_name == "select":
|
|
636
|
-
if isinstance(op_config, list):
|
|
637
|
-
return Select(op_config)
|
|
638
|
-
|
|
639
|
-
elif op_name == "convert":
|
|
640
|
-
if isinstance(op_config, dict):
|
|
641
|
-
type_map = {
|
|
642
|
-
"int": int, "integer": int,
|
|
643
|
-
"float": float, "number": float, "numeric": float,
|
|
644
|
-
"str": str, "string": str,
|
|
645
|
-
"bool": bool, "boolean": bool,
|
|
646
|
-
}
|
|
647
|
-
conversions = {}
|
|
648
|
-
for field, target_type in op_config.items():
|
|
649
|
-
target_lower = target_type.lower() if isinstance(target_type, str) else str(target_type)
|
|
650
|
-
if target_lower in type_map:
|
|
651
|
-
conversions[field] = type_map[target_lower]
|
|
652
|
-
elif target_lower in ("datetime", "date"):
|
|
653
|
-
conversions[field] = lambda v, t=target_lower: convert_type(v, t)
|
|
654
|
-
else:
|
|
655
|
-
conversions[field] = str
|
|
656
|
-
return Convert(conversions)
|
|
657
|
-
|
|
658
|
-
elif op_name == "defaults":
|
|
659
|
-
if isinstance(op_config, dict):
|
|
660
|
-
return Default(op_config)
|
|
661
|
-
|
|
662
|
-
elif op_name == "filter":
|
|
663
|
-
if isinstance(op_config, dict):
|
|
664
|
-
field = op_config.get("field")
|
|
665
|
-
operator = op_config.get("operator", "eq")
|
|
666
|
-
value = op_config.get("value")
|
|
667
|
-
if field and operator:
|
|
668
|
-
predicate = _create_filter_predicate(field, operator, value)
|
|
669
|
-
if predicate:
|
|
670
|
-
return Filter(predicate)
|
|
671
|
-
|
|
672
|
-
elif op_name == "custom_function":
|
|
673
|
-
if isinstance(op_config, dict):
|
|
674
|
-
return CustomFunction(
|
|
675
|
-
module=op_config.get("module"),
|
|
676
|
-
function=op_config.get("function"),
|
|
677
|
-
kwargs=op_config.get("kwargs", {}),
|
|
678
|
-
)
|
|
679
|
-
|
|
680
|
-
else:
|
|
681
|
-
logger.warning(f"Unknown transform operation: {op_name}")
|
|
682
|
-
|
|
683
|
-
return None
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
def _create_filter_predicate(field: str, operator: str, value: Any) -> Optional[Callable]:
|
|
687
|
-
"""Create a filter predicate function from operator and value."""
|
|
688
|
-
operators = {
|
|
689
|
-
"eq": lambda r: r.get(field) == value,
|
|
690
|
-
"ne": lambda r: r.get(field) != value,
|
|
691
|
-
"gt": lambda r: r.get(field) is not None and r.get(field) > value,
|
|
692
|
-
"gte": lambda r: r.get(field) is not None and r.get(field) >= value,
|
|
693
|
-
"lt": lambda r: r.get(field) is not None and r.get(field) < value,
|
|
694
|
-
"lte": lambda r: r.get(field) is not None and r.get(field) <= value,
|
|
695
|
-
"in": lambda r: r.get(field) in (value if isinstance(value, (list, tuple, set)) else [value]),
|
|
696
|
-
"not_in": lambda r: r.get(field) not in (value if isinstance(value, (list, tuple, set)) else [value]),
|
|
697
|
-
"contains": lambda r: value in str(r.get(field, "")),
|
|
698
|
-
"not_contains": lambda r: value not in str(r.get(field, "")),
|
|
699
|
-
"is_null": lambda r: r.get(field) is None,
|
|
700
|
-
"is_not_null": lambda r: r.get(field) is not None,
|
|
701
|
-
}
|
|
702
|
-
return operators.get(operator)
|
|
703
|
-
|
|
704
|
-
|
|
705
824
|
def _create_loader(config: Dict[str, Any]) -> Optional[Loader]:
|
|
706
825
|
"""Create loader from config using explicit type field."""
|
|
707
826
|
if not config:
|
|
@@ -39,6 +39,12 @@ class PipelineResult:
|
|
|
39
39
|
rows_transformed: int = 0
|
|
40
40
|
rows_loaded: int = 0
|
|
41
41
|
rows_failed: int = 0
|
|
42
|
+
# Validation tracking
|
|
43
|
+
rows_quarantined_extract: int = 0 # Records quarantined at extract stage
|
|
44
|
+
rows_quarantined_load: int = 0 # Records quarantined at load stage
|
|
45
|
+
validation_errors_extract: List[str] = field(default_factory=list)
|
|
46
|
+
validation_errors_load: List[str] = field(default_factory=list)
|
|
47
|
+
# Timing
|
|
42
48
|
start_time: Optional[datetime] = None
|
|
43
49
|
end_time: Optional[datetime] = None
|
|
44
50
|
duration_seconds: Optional[float] = None
|
|
@@ -48,6 +54,11 @@ class PipelineResult:
|
|
|
48
54
|
pipeline_name: Optional[str] = None
|
|
49
55
|
run_id: Optional[str] = None
|
|
50
56
|
|
|
57
|
+
@property
|
|
58
|
+
def total_quarantined(self) -> int:
|
|
59
|
+
"""Total records quarantined at both extract and load stages."""
|
|
60
|
+
return self.rows_quarantined_extract + self.rows_quarantined_load
|
|
61
|
+
|
|
51
62
|
def to_dict(self) -> Dict[str, Any]:
|
|
52
63
|
return {
|
|
53
64
|
"success": self.success,
|
|
@@ -55,6 +66,11 @@ class PipelineResult:
|
|
|
55
66
|
"rows_transformed": self.rows_transformed,
|
|
56
67
|
"rows_loaded": self.rows_loaded,
|
|
57
68
|
"rows_failed": self.rows_failed,
|
|
69
|
+
"rows_quarantined_extract": self.rows_quarantined_extract,
|
|
70
|
+
"rows_quarantined_load": self.rows_quarantined_load,
|
|
71
|
+
"total_quarantined": self.total_quarantined,
|
|
72
|
+
"validation_errors_extract": self.validation_errors_extract,
|
|
73
|
+
"validation_errors_load": self.validation_errors_load,
|
|
58
74
|
"duration_seconds": self.duration_seconds,
|
|
59
75
|
"batches_processed": self.batches_processed,
|
|
60
76
|
"errors": self.errors,
|