pycharter 0.0.22__py3-none-any.whl → 0.0.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- api/main.py +27 -1
- api/models/docs.py +68 -0
- api/models/evolution.py +117 -0
- api/models/tracking.py +111 -0
- api/models/validation.py +46 -6
- api/routes/v1/__init__.py +14 -1
- api/routes/v1/docs.py +187 -0
- api/routes/v1/evolution.py +337 -0
- api/routes/v1/templates.py +211 -27
- api/routes/v1/tracking.py +301 -0
- api/routes/v1/validation.py +68 -31
- pycharter/__init__.py +268 -58
- pycharter/data/templates/contract/template_coercion_rules.yaml +57 -0
- pycharter/data/templates/contract/template_contract.yaml +122 -0
- pycharter/data/templates/contract/template_metadata.yaml +68 -0
- pycharter/data/templates/contract/template_schema.yaml +100 -0
- pycharter/data/templates/contract/template_validation_rules.yaml +75 -0
- pycharter/data/templates/etl/README.md +224 -0
- pycharter/data/templates/etl/extract_cloud_azure.yaml +24 -0
- pycharter/data/templates/etl/extract_cloud_gcs.yaml +25 -0
- pycharter/data/templates/etl/extract_cloud_s3.yaml +30 -0
- pycharter/data/templates/etl/extract_database.yaml +34 -0
- pycharter/data/templates/etl/extract_database_ssh.yaml +40 -0
- pycharter/data/templates/etl/extract_file_csv.yaml +21 -0
- pycharter/data/templates/etl/extract_file_glob.yaml +25 -0
- pycharter/data/templates/etl/extract_file_json.yaml +24 -0
- pycharter/data/templates/etl/extract_file_parquet.yaml +20 -0
- pycharter/data/templates/etl/extract_http_paginated.yaml +79 -0
- pycharter/data/templates/etl/extract_http_path_params.yaml +38 -0
- pycharter/data/templates/etl/extract_http_simple.yaml +62 -0
- pycharter/data/templates/etl/load_cloud_azure.yaml +24 -0
- pycharter/data/templates/etl/load_cloud_gcs.yaml +22 -0
- pycharter/data/templates/etl/load_cloud_s3.yaml +27 -0
- pycharter/data/templates/etl/load_file.yaml +34 -0
- pycharter/data/templates/etl/load_insert.yaml +18 -0
- pycharter/data/templates/etl/load_postgresql.yaml +39 -0
- pycharter/data/templates/etl/load_sqlite.yaml +21 -0
- pycharter/data/templates/etl/load_truncate_and_load.yaml +20 -0
- pycharter/data/templates/etl/load_upsert.yaml +25 -0
- pycharter/data/templates/etl/load_with_dlq.yaml +34 -0
- pycharter/data/templates/etl/load_with_ssh_tunnel.yaml +35 -0
- pycharter/data/templates/etl/pipeline_http_to_db.yaml +75 -0
- pycharter/data/templates/etl/transform_combined.yaml +48 -0
- pycharter/data/templates/etl/transform_custom_function.yaml +58 -0
- pycharter/data/templates/etl/transform_jsonata.yaml +51 -0
- pycharter/data/templates/etl/transform_simple.yaml +59 -0
- pycharter/db/schemas/.ipynb_checkpoints/data_contract-checkpoint.py +160 -0
- pycharter/docs_generator/__init__.py +43 -0
- pycharter/docs_generator/generator.py +465 -0
- pycharter/docs_generator/renderers.py +247 -0
- pycharter/etl_generator/__init__.py +168 -80
- pycharter/etl_generator/builder.py +121 -0
- pycharter/etl_generator/config_loader.py +394 -0
- pycharter/etl_generator/config_validator.py +418 -0
- pycharter/etl_generator/context.py +132 -0
- pycharter/etl_generator/expression.py +499 -0
- pycharter/etl_generator/extractors/__init__.py +30 -0
- pycharter/etl_generator/extractors/base.py +70 -0
- pycharter/etl_generator/extractors/cloud_storage.py +530 -0
- pycharter/etl_generator/extractors/database.py +221 -0
- pycharter/etl_generator/extractors/factory.py +185 -0
- pycharter/etl_generator/extractors/file.py +475 -0
- pycharter/etl_generator/extractors/http.py +895 -0
- pycharter/etl_generator/extractors/streaming.py +57 -0
- pycharter/etl_generator/loaders/__init__.py +41 -0
- pycharter/etl_generator/loaders/base.py +35 -0
- pycharter/etl_generator/loaders/cloud.py +87 -0
- pycharter/etl_generator/loaders/cloud_storage_loader.py +275 -0
- pycharter/etl_generator/loaders/database.py +274 -0
- pycharter/etl_generator/loaders/factory.py +180 -0
- pycharter/etl_generator/loaders/file.py +72 -0
- pycharter/etl_generator/loaders/file_loader.py +130 -0
- pycharter/etl_generator/pipeline.py +743 -0
- pycharter/etl_generator/protocols.py +54 -0
- pycharter/etl_generator/result.py +63 -0
- pycharter/etl_generator/schemas/__init__.py +49 -0
- pycharter/etl_generator/transformers/__init__.py +49 -0
- pycharter/etl_generator/transformers/base.py +63 -0
- pycharter/etl_generator/transformers/config.py +45 -0
- pycharter/etl_generator/transformers/custom_function.py +101 -0
- pycharter/etl_generator/transformers/jsonata_transformer.py +56 -0
- pycharter/etl_generator/transformers/operations.py +218 -0
- pycharter/etl_generator/transformers/pipeline.py +54 -0
- pycharter/etl_generator/transformers/simple_operations.py +131 -0
- pycharter/quality/__init__.py +25 -0
- pycharter/quality/tracking/__init__.py +64 -0
- pycharter/quality/tracking/collector.py +318 -0
- pycharter/quality/tracking/exporters.py +238 -0
- pycharter/quality/tracking/models.py +194 -0
- pycharter/quality/tracking/store.py +385 -0
- pycharter/runtime_validator/__init__.py +20 -7
- pycharter/runtime_validator/builder.py +328 -0
- pycharter/runtime_validator/validator.py +311 -7
- pycharter/runtime_validator/validator_core.py +61 -0
- pycharter/schema_evolution/__init__.py +61 -0
- pycharter/schema_evolution/compatibility.py +270 -0
- pycharter/schema_evolution/diff.py +496 -0
- pycharter/schema_evolution/models.py +201 -0
- pycharter/shared/__init__.py +56 -0
- pycharter/shared/errors.py +296 -0
- pycharter/shared/protocols.py +234 -0
- {pycharter-0.0.22.dist-info → pycharter-0.0.24.dist-info}/METADATA +146 -26
- pycharter-0.0.24.dist-info/RECORD +543 -0
- {pycharter-0.0.22.dist-info → pycharter-0.0.24.dist-info}/WHEEL +1 -1
- ui/static/404/index.html +1 -1
- ui/static/404.html +1 -1
- ui/static/__next.__PAGE__.txt +1 -1
- ui/static/__next._full.txt +1 -1
- ui/static/__next._head.txt +1 -1
- ui/static/__next._index.txt +1 -1
- ui/static/__next._tree.txt +1 -1
- ui/static/_next/static/chunks/26dfc590f7714c03.js +1 -0
- ui/static/_next/static/chunks/34d289e6db2ef551.js +1 -0
- ui/static/_next/static/chunks/99508d9d5869cc27.js +1 -0
- ui/static/_next/static/chunks/b313c35a6ba76574.js +1 -0
- ui/static/_not-found/__next._full.txt +1 -1
- ui/static/_not-found/__next._head.txt +1 -1
- ui/static/_not-found/__next._index.txt +1 -1
- ui/static/_not-found/__next._not-found.__PAGE__.txt +1 -1
- ui/static/_not-found/__next._not-found.txt +1 -1
- ui/static/_not-found/__next._tree.txt +1 -1
- ui/static/_not-found/index.html +1 -1
- ui/static/_not-found/index.txt +1 -1
- ui/static/contracts/__next._full.txt +2 -2
- ui/static/contracts/__next._head.txt +1 -1
- ui/static/contracts/__next._index.txt +1 -1
- ui/static/contracts/__next._tree.txt +1 -1
- ui/static/contracts/__next.contracts.__PAGE__.txt +2 -2
- ui/static/contracts/__next.contracts.txt +1 -1
- ui/static/contracts/index.html +1 -1
- ui/static/contracts/index.txt +2 -2
- ui/static/documentation/__next._full.txt +1 -1
- ui/static/documentation/__next._head.txt +1 -1
- ui/static/documentation/__next._index.txt +1 -1
- ui/static/documentation/__next._tree.txt +1 -1
- ui/static/documentation/__next.documentation.__PAGE__.txt +1 -1
- ui/static/documentation/__next.documentation.txt +1 -1
- ui/static/documentation/index.html +2 -2
- ui/static/documentation/index.txt +1 -1
- ui/static/index.html +1 -1
- ui/static/index.txt +1 -1
- ui/static/metadata/__next._full.txt +1 -1
- ui/static/metadata/__next._head.txt +1 -1
- ui/static/metadata/__next._index.txt +1 -1
- ui/static/metadata/__next._tree.txt +1 -1
- ui/static/metadata/__next.metadata.__PAGE__.txt +1 -1
- ui/static/metadata/__next.metadata.txt +1 -1
- ui/static/metadata/index.html +1 -1
- ui/static/metadata/index.txt +1 -1
- ui/static/quality/__next._full.txt +2 -2
- ui/static/quality/__next._head.txt +1 -1
- ui/static/quality/__next._index.txt +1 -1
- ui/static/quality/__next._tree.txt +1 -1
- ui/static/quality/__next.quality.__PAGE__.txt +2 -2
- ui/static/quality/__next.quality.txt +1 -1
- ui/static/quality/index.html +2 -2
- ui/static/quality/index.txt +2 -2
- ui/static/rules/__next._full.txt +1 -1
- ui/static/rules/__next._head.txt +1 -1
- ui/static/rules/__next._index.txt +1 -1
- ui/static/rules/__next._tree.txt +1 -1
- ui/static/rules/__next.rules.__PAGE__.txt +1 -1
- ui/static/rules/__next.rules.txt +1 -1
- ui/static/rules/index.html +1 -1
- ui/static/rules/index.txt +1 -1
- ui/static/schemas/__next._full.txt +1 -1
- ui/static/schemas/__next._head.txt +1 -1
- ui/static/schemas/__next._index.txt +1 -1
- ui/static/schemas/__next._tree.txt +1 -1
- ui/static/schemas/__next.schemas.__PAGE__.txt +1 -1
- ui/static/schemas/__next.schemas.txt +1 -1
- ui/static/schemas/index.html +1 -1
- ui/static/schemas/index.txt +1 -1
- ui/static/settings/__next._full.txt +1 -1
- ui/static/settings/__next._head.txt +1 -1
- ui/static/settings/__next._index.txt +1 -1
- ui/static/settings/__next._tree.txt +1 -1
- ui/static/settings/__next.settings.__PAGE__.txt +1 -1
- ui/static/settings/__next.settings.txt +1 -1
- ui/static/settings/index.html +1 -1
- ui/static/settings/index.txt +1 -1
- ui/static/static/404/index.html +1 -1
- ui/static/static/404.html +1 -1
- ui/static/static/__next.__PAGE__.txt +1 -1
- ui/static/static/__next._full.txt +2 -2
- ui/static/static/__next._head.txt +1 -1
- ui/static/static/__next._index.txt +2 -2
- ui/static/static/__next._tree.txt +2 -2
- ui/static/static/_next/static/chunks/13d4a0fbd74c1ee4.js +1 -0
- ui/static/static/_next/static/chunks/2edb43b48432ac04.js +441 -0
- ui/static/static/_next/static/chunks/d2363397e1b2bcab.css +1 -0
- ui/static/static/_next/static/chunks/f7d1a90dd75d2572.js +1 -0
- ui/static/static/_not-found/__next._full.txt +2 -2
- ui/static/static/_not-found/__next._head.txt +1 -1
- ui/static/static/_not-found/__next._index.txt +2 -2
- ui/static/static/_not-found/__next._not-found.__PAGE__.txt +1 -1
- ui/static/static/_not-found/__next._not-found.txt +1 -1
- ui/static/static/_not-found/__next._tree.txt +2 -2
- ui/static/static/_not-found/index.html +1 -1
- ui/static/static/_not-found/index.txt +2 -2
- ui/static/static/contracts/__next._full.txt +3 -3
- ui/static/static/contracts/__next._head.txt +1 -1
- ui/static/static/contracts/__next._index.txt +2 -2
- ui/static/static/contracts/__next._tree.txt +2 -2
- ui/static/static/contracts/__next.contracts.__PAGE__.txt +2 -2
- ui/static/static/contracts/__next.contracts.txt +1 -1
- ui/static/static/contracts/index.html +1 -1
- ui/static/static/contracts/index.txt +3 -3
- ui/static/static/documentation/__next._full.txt +3 -3
- ui/static/static/documentation/__next._head.txt +1 -1
- ui/static/static/documentation/__next._index.txt +2 -2
- ui/static/static/documentation/__next._tree.txt +2 -2
- ui/static/static/documentation/__next.documentation.__PAGE__.txt +2 -2
- ui/static/static/documentation/__next.documentation.txt +1 -1
- ui/static/static/documentation/index.html +2 -2
- ui/static/static/documentation/index.txt +3 -3
- ui/static/static/index.html +1 -1
- ui/static/static/index.txt +2 -2
- ui/static/static/metadata/__next._full.txt +2 -2
- ui/static/static/metadata/__next._head.txt +1 -1
- ui/static/static/metadata/__next._index.txt +2 -2
- ui/static/static/metadata/__next._tree.txt +2 -2
- ui/static/static/metadata/__next.metadata.__PAGE__.txt +1 -1
- ui/static/static/metadata/__next.metadata.txt +1 -1
- ui/static/static/metadata/index.html +1 -1
- ui/static/static/metadata/index.txt +2 -2
- ui/static/static/quality/__next._full.txt +2 -2
- ui/static/static/quality/__next._head.txt +1 -1
- ui/static/static/quality/__next._index.txt +2 -2
- ui/static/static/quality/__next._tree.txt +2 -2
- ui/static/static/quality/__next.quality.__PAGE__.txt +1 -1
- ui/static/static/quality/__next.quality.txt +1 -1
- ui/static/static/quality/index.html +2 -2
- ui/static/static/quality/index.txt +2 -2
- ui/static/static/rules/__next._full.txt +2 -2
- ui/static/static/rules/__next._head.txt +1 -1
- ui/static/static/rules/__next._index.txt +2 -2
- ui/static/static/rules/__next._tree.txt +2 -2
- ui/static/static/rules/__next.rules.__PAGE__.txt +1 -1
- ui/static/static/rules/__next.rules.txt +1 -1
- ui/static/static/rules/index.html +1 -1
- ui/static/static/rules/index.txt +2 -2
- ui/static/static/schemas/__next._full.txt +2 -2
- ui/static/static/schemas/__next._head.txt +1 -1
- ui/static/static/schemas/__next._index.txt +2 -2
- ui/static/static/schemas/__next._tree.txt +2 -2
- ui/static/static/schemas/__next.schemas.__PAGE__.txt +1 -1
- ui/static/static/schemas/__next.schemas.txt +1 -1
- ui/static/static/schemas/index.html +1 -1
- ui/static/static/schemas/index.txt +2 -2
- ui/static/static/settings/__next._full.txt +2 -2
- ui/static/static/settings/__next._head.txt +1 -1
- ui/static/static/settings/__next._index.txt +2 -2
- ui/static/static/settings/__next._tree.txt +2 -2
- ui/static/static/settings/__next.settings.__PAGE__.txt +1 -1
- ui/static/static/settings/__next.settings.txt +1 -1
- ui/static/static/settings/index.html +1 -1
- ui/static/static/settings/index.txt +2 -2
- ui/static/static/static/.gitkeep +0 -0
- ui/static/static/static/404/index.html +1 -0
- ui/static/static/static/404.html +1 -0
- ui/static/static/static/__next.__PAGE__.txt +10 -0
- ui/static/static/static/__next._full.txt +30 -0
- ui/static/static/static/__next._head.txt +7 -0
- ui/static/static/static/__next._index.txt +9 -0
- ui/static/static/static/__next._tree.txt +2 -0
- ui/static/static/static/_next/static/chunks/222442f6da32302a.js +1 -0
- ui/static/static/static/_next/static/chunks/247eb132b7f7b574.js +1 -0
- ui/static/static/static/_next/static/chunks/297d55555b71baba.js +1 -0
- ui/static/static/static/_next/static/chunks/2ab439ce003cd691.js +1 -0
- ui/static/static/static/_next/static/chunks/414e77373f8ff61c.js +1 -0
- ui/static/static/static/_next/static/chunks/49ca65abd26ae49e.js +1 -0
- ui/static/static/static/_next/static/chunks/652ad0aa26265c47.js +2 -0
- ui/static/static/static/_next/static/chunks/9667e7a3d359eb39.js +1 -0
- ui/static/static/static/_next/static/chunks/9c23f44fff36548a.js +1 -0
- ui/static/static/static/_next/static/chunks/a6dad97d9634a72d.js +1 -0
- ui/static/static/static/_next/static/chunks/b32a0963684b9933.js +4 -0
- ui/static/static/static/_next/static/chunks/c69f6cba366bd988.js +1 -0
- ui/static/static/static/_next/static/chunks/db913959c675cea6.js +1 -0
- ui/static/static/static/_next/static/chunks/f061a4be97bfc3b3.js +1 -0
- ui/static/static/static/_next/static/chunks/f2e7afeab1178138.js +1 -0
- ui/static/static/static/_next/static/chunks/ff1a16fafef87110.js +1 -0
- ui/static/static/static/_next/static/chunks/turbopack-ffcb7ab6794027ef.js +3 -0
- ui/static/static/static/_next/static/tNTkVW6puVXC4bAm4WrHl/_buildManifest.js +11 -0
- ui/static/static/static/_next/static/tNTkVW6puVXC4bAm4WrHl/_ssgManifest.js +1 -0
- ui/static/static/static/_not-found/__next._full.txt +17 -0
- ui/static/static/static/_not-found/__next._head.txt +7 -0
- ui/static/static/static/_not-found/__next._index.txt +9 -0
- ui/static/static/static/_not-found/__next._not-found.__PAGE__.txt +5 -0
- ui/static/static/static/_not-found/__next._not-found.txt +4 -0
- ui/static/static/static/_not-found/__next._tree.txt +2 -0
- ui/static/static/static/_not-found/index.html +1 -0
- ui/static/static/static/_not-found/index.txt +17 -0
- ui/static/static/static/contracts/__next._full.txt +21 -0
- ui/static/static/static/contracts/__next._head.txt +7 -0
- ui/static/static/static/contracts/__next._index.txt +9 -0
- ui/static/static/static/contracts/__next._tree.txt +2 -0
- ui/static/static/static/contracts/__next.contracts.__PAGE__.txt +9 -0
- ui/static/static/static/contracts/__next.contracts.txt +4 -0
- ui/static/static/static/contracts/index.html +1 -0
- ui/static/static/static/contracts/index.txt +21 -0
- ui/static/static/static/documentation/__next._full.txt +21 -0
- ui/static/static/static/documentation/__next._head.txt +7 -0
- ui/static/static/static/documentation/__next._index.txt +9 -0
- ui/static/static/static/documentation/__next._tree.txt +2 -0
- ui/static/static/static/documentation/__next.documentation.__PAGE__.txt +9 -0
- ui/static/static/static/documentation/__next.documentation.txt +4 -0
- ui/static/static/static/documentation/index.html +93 -0
- ui/static/static/static/documentation/index.txt +21 -0
- ui/static/static/static/index.html +1 -0
- ui/static/static/static/index.txt +30 -0
- ui/static/static/static/metadata/__next._full.txt +21 -0
- ui/static/static/static/metadata/__next._head.txt +7 -0
- ui/static/static/static/metadata/__next._index.txt +9 -0
- ui/static/static/static/metadata/__next._tree.txt +2 -0
- ui/static/static/static/metadata/__next.metadata.__PAGE__.txt +9 -0
- ui/static/static/static/metadata/__next.metadata.txt +4 -0
- ui/static/static/static/metadata/index.html +1 -0
- ui/static/static/static/metadata/index.txt +21 -0
- ui/static/static/static/quality/__next._full.txt +21 -0
- ui/static/static/static/quality/__next._head.txt +7 -0
- ui/static/static/static/quality/__next._index.txt +9 -0
- ui/static/static/static/quality/__next._tree.txt +2 -0
- ui/static/static/static/quality/__next.quality.__PAGE__.txt +9 -0
- ui/static/static/static/quality/__next.quality.txt +4 -0
- ui/static/static/static/quality/index.html +2 -0
- ui/static/static/static/quality/index.txt +21 -0
- ui/static/static/static/rules/__next._full.txt +21 -0
- ui/static/static/static/rules/__next._head.txt +7 -0
- ui/static/static/static/rules/__next._index.txt +9 -0
- ui/static/static/static/rules/__next._tree.txt +2 -0
- ui/static/static/static/rules/__next.rules.__PAGE__.txt +9 -0
- ui/static/static/static/rules/__next.rules.txt +4 -0
- ui/static/static/static/rules/index.html +1 -0
- ui/static/static/static/rules/index.txt +21 -0
- ui/static/static/static/schemas/__next._full.txt +21 -0
- ui/static/static/static/schemas/__next._head.txt +7 -0
- ui/static/static/static/schemas/__next._index.txt +9 -0
- ui/static/static/static/schemas/__next._tree.txt +2 -0
- ui/static/static/static/schemas/__next.schemas.__PAGE__.txt +9 -0
- ui/static/static/static/schemas/__next.schemas.txt +4 -0
- ui/static/static/static/schemas/index.html +1 -0
- ui/static/static/static/schemas/index.txt +21 -0
- ui/static/static/static/settings/__next._full.txt +21 -0
- ui/static/static/static/settings/__next._head.txt +7 -0
- ui/static/static/static/settings/__next._index.txt +9 -0
- ui/static/static/static/settings/__next._tree.txt +2 -0
- ui/static/static/static/settings/__next.settings.__PAGE__.txt +9 -0
- ui/static/static/static/settings/__next.settings.txt +4 -0
- ui/static/static/static/settings/index.html +1 -0
- ui/static/static/static/settings/index.txt +21 -0
- ui/static/static/static/validation/__next._full.txt +21 -0
- ui/static/static/static/validation/__next._head.txt +7 -0
- ui/static/static/static/validation/__next._index.txt +9 -0
- ui/static/static/static/validation/__next._tree.txt +2 -0
- ui/static/static/static/validation/__next.validation.__PAGE__.txt +9 -0
- ui/static/static/static/validation/__next.validation.txt +4 -0
- ui/static/static/static/validation/index.html +1 -0
- ui/static/static/static/validation/index.txt +21 -0
- ui/static/static/validation/__next._full.txt +2 -2
- ui/static/static/validation/__next._head.txt +1 -1
- ui/static/static/validation/__next._index.txt +2 -2
- ui/static/static/validation/__next._tree.txt +2 -2
- ui/static/static/validation/__next.validation.__PAGE__.txt +1 -1
- ui/static/static/validation/__next.validation.txt +1 -1
- ui/static/static/validation/index.html +1 -1
- ui/static/static/validation/index.txt +2 -2
- ui/static/validation/__next._full.txt +2 -2
- ui/static/validation/__next._head.txt +1 -1
- ui/static/validation/__next._index.txt +1 -1
- ui/static/validation/__next._tree.txt +1 -1
- ui/static/validation/__next.validation.__PAGE__.txt +2 -2
- ui/static/validation/__next.validation.txt +1 -1
- ui/static/validation/index.html +1 -1
- ui/static/validation/index.txt +2 -2
- pycharter/data/templates/template_coercion_rules.yaml +0 -15
- pycharter/data/templates/template_contract.yaml +0 -587
- pycharter/data/templates/template_metadata.yaml +0 -38
- pycharter/data/templates/template_schema.yaml +0 -22
- pycharter/data/templates/template_transform_advanced.yaml +0 -50
- pycharter/data/templates/template_transform_simple.yaml +0 -59
- pycharter/data/templates/template_validation_rules.yaml +0 -29
- pycharter/etl_generator/extraction.py +0 -916
- pycharter/etl_generator/factory.py +0 -174
- pycharter/etl_generator/orchestrator.py +0 -1650
- pycharter/integrations/__init__.py +0 -19
- pycharter/integrations/kafka.py +0 -178
- pycharter/integrations/streaming.py +0 -100
- pycharter-0.0.22.dist-info/RECORD +0 -358
- {pycharter-0.0.22.dist-info → pycharter-0.0.24.dist-info}/entry_points.txt +0 -0
- {pycharter-0.0.22.dist-info → pycharter-0.0.24.dist-info}/licenses/LICENSE +0 -0
- {pycharter-0.0.22.dist-info → pycharter-0.0.24.dist-info}/top_level.txt +0 -0
- /ui/static/_next/static/{0rYA78L88aUyD2Uh38hhX → 2gKjNv6YvE6BcIdFthBLs}/_buildManifest.js +0 -0
- /ui/static/_next/static/{0rYA78L88aUyD2Uh38hhX → 2gKjNv6YvE6BcIdFthBLs}/_ssgManifest.js +0 -0
- /ui/static/static/_next/static/{tNTkVW6puVXC4bAm4WrHl → 0rYA78L88aUyD2Uh38hhX}/_buildManifest.js +0 -0
- /ui/static/static/_next/static/{tNTkVW6puVXC4bAm4WrHl → 0rYA78L88aUyD2Uh38hhX}/_ssgManifest.js +0 -0
- /ui/static/{_next → static/_next}/static/chunks/c4fa4f4114b7c352.js +0 -0
- /ui/static/static/{_next → static/_next}/static/chunks/4e310fe5005770a3.css +0 -0
- /ui/static/{_next → static/static/_next}/static/chunks/5e04d10c4a7b58a3.js +0 -0
- /ui/static/static/{_next → static/_next}/static/chunks/5fc14c00a2779dc5.js +0 -0
- /ui/static/{_next → static/static/_next}/static/chunks/75d88a058d8ffaa6.js +0 -0
- /ui/static/{_next → static/static/_next}/static/chunks/8c89634cf6bad76f.js +0 -0
- /ui/static/static/{_next → static/_next}/static/chunks/b584574fdc8ab13e.js +0 -0
- /ui/static/static/{_next → static/_next}/static/chunks/d5989c94d3614b3a.js +0 -0
|
@@ -0,0 +1,743 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Pipeline class with | operator for chaining.
|
|
3
|
+
|
|
4
|
+
Supports both config-driven and programmatic pipeline construction.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import logging
|
|
8
|
+
import os
|
|
9
|
+
import re
|
|
10
|
+
import uuid
|
|
11
|
+
from datetime import datetime, timezone
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import Any, Callable, Dict, List, Optional, Union
|
|
14
|
+
|
|
15
|
+
import yaml
|
|
16
|
+
|
|
17
|
+
from pycharter.etl_generator.context import PipelineContext
|
|
18
|
+
from pycharter.etl_generator.protocols import Extractor, Transformer, Loader
|
|
19
|
+
from pycharter.etl_generator.result import PipelineResult, BatchResult
|
|
20
|
+
from pycharter.shared.errors import ErrorContext, ErrorMode, get_error_context
|
|
21
|
+
|
|
22
|
+
logger = logging.getLogger(__name__)
|
|
23
|
+
|
|
24
|
+
# Variable pattern: ${VAR} or ${VAR:-default} or ${VAR:?error}
|
|
25
|
+
VARIABLE_PATTERN = re.compile(r'\$\{([^}:]+)(?::([?-])([^}]*))?\}')
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class Pipeline:
|
|
29
|
+
"""
|
|
30
|
+
ETL Pipeline with | operator for chaining transformers.
|
|
31
|
+
|
|
32
|
+
Programmatic usage:
|
|
33
|
+
>>> pipeline = (
|
|
34
|
+
... Pipeline(HTTPExtractor(url="..."))
|
|
35
|
+
... | Rename({"old": "new"})
|
|
36
|
+
... | PostgresLoader(...)
|
|
37
|
+
... )
|
|
38
|
+
>>> result = await pipeline.run()
|
|
39
|
+
|
|
40
|
+
Config-driven usage:
|
|
41
|
+
>>> # From explicit files (most flexible)
|
|
42
|
+
>>> pipeline = Pipeline.from_config_files(
|
|
43
|
+
... extract="configs/extract.yaml",
|
|
44
|
+
... load="configs/load.yaml",
|
|
45
|
+
... variables={"API_KEY": "secret"}
|
|
46
|
+
... )
|
|
47
|
+
>>>
|
|
48
|
+
>>> # From directory (expects extract.yaml, transform.yaml, load.yaml)
|
|
49
|
+
>>> pipeline = Pipeline.from_config_dir("pipelines/users/")
|
|
50
|
+
>>>
|
|
51
|
+
>>> # From single file (pipeline.yaml with all sections)
|
|
52
|
+
>>> pipeline = Pipeline.from_config_file("pipelines/users/pipeline.yaml")
|
|
53
|
+
>>>
|
|
54
|
+
>>> result = await pipeline.run()
|
|
55
|
+
|
|
56
|
+
Async execution:
|
|
57
|
+
run() is async. From a script use asyncio.run():
|
|
58
|
+
asyncio.run(pipeline.run())
|
|
59
|
+
From an async context (FastAPI, Jupyter) await directly:
|
|
60
|
+
result = await pipeline.run()
|
|
61
|
+
See pycharter/etl_generator/ASYNC_AND_EXECUTION.md for details.
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
def __init__(
|
|
65
|
+
self,
|
|
66
|
+
extractor: Optional[Extractor] = None,
|
|
67
|
+
transformers: Optional[List[Transformer]] = None,
|
|
68
|
+
loader: Optional[Loader] = None,
|
|
69
|
+
context: Optional[PipelineContext] = None,
|
|
70
|
+
name: Optional[str] = None,
|
|
71
|
+
):
|
|
72
|
+
self.extractor = extractor
|
|
73
|
+
self._transformers: List[Transformer] = list(transformers) if transformers else []
|
|
74
|
+
self.loader = loader
|
|
75
|
+
self.context = context or PipelineContext()
|
|
76
|
+
self.name = name
|
|
77
|
+
|
|
78
|
+
def __or__(self, other: Union[Transformer, Loader]) -> "Pipeline":
|
|
79
|
+
"""Chain transformer or set loader using | operator."""
|
|
80
|
+
if isinstance(other, Loader):
|
|
81
|
+
return Pipeline(
|
|
82
|
+
extractor=self.extractor,
|
|
83
|
+
transformers=self._transformers.copy(),
|
|
84
|
+
loader=other,
|
|
85
|
+
context=self.context,
|
|
86
|
+
name=self.name,
|
|
87
|
+
)
|
|
88
|
+
else:
|
|
89
|
+
new_transformers = self._transformers.copy()
|
|
90
|
+
new_transformers.append(other)
|
|
91
|
+
return Pipeline(
|
|
92
|
+
extractor=self.extractor,
|
|
93
|
+
transformers=new_transformers,
|
|
94
|
+
loader=self.loader,
|
|
95
|
+
context=self.context,
|
|
96
|
+
name=self.name,
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
async def run(
|
|
100
|
+
self,
|
|
101
|
+
dry_run: bool = False,
|
|
102
|
+
error_context: Optional[ErrorContext] = None,
|
|
103
|
+
**params,
|
|
104
|
+
) -> PipelineResult:
|
|
105
|
+
"""
|
|
106
|
+
Run the ETL pipeline.
|
|
107
|
+
|
|
108
|
+
Args:
|
|
109
|
+
dry_run: If True, extract and transform but do not load.
|
|
110
|
+
error_context: Optional error context for handling failures.
|
|
111
|
+
If not set, uses the default from get_error_context().
|
|
112
|
+
In STRICT mode, extraction or load failures raise.
|
|
113
|
+
In LENIENT/COLLECT mode, errors are logged and appended to result.errors.
|
|
114
|
+
**params: Passed to extractor.extract() and loader.load().
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
PipelineResult with counts and any errors.
|
|
118
|
+
"""
|
|
119
|
+
run_id = str(uuid.uuid4())[:8]
|
|
120
|
+
start_time = datetime.now(timezone.utc)
|
|
121
|
+
ctx = error_context or get_error_context()
|
|
122
|
+
|
|
123
|
+
result = PipelineResult(
|
|
124
|
+
pipeline_name=self.name,
|
|
125
|
+
run_id=run_id,
|
|
126
|
+
start_time=start_time,
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
if not self.extractor:
|
|
130
|
+
result.success = False
|
|
131
|
+
result.errors.append("No extractor configured")
|
|
132
|
+
return result
|
|
133
|
+
|
|
134
|
+
logger.info(f"[{run_id}] Starting pipeline: {self.name or 'unnamed'}")
|
|
135
|
+
|
|
136
|
+
try:
|
|
137
|
+
batch_index = 0
|
|
138
|
+
async for batch in self.extractor.extract(**params):
|
|
139
|
+
batch_result = BatchResult(batch_index=batch_index, rows_in=len(batch))
|
|
140
|
+
|
|
141
|
+
# Transform
|
|
142
|
+
transformed = self._apply_transforms(batch)
|
|
143
|
+
batch_result.rows_out = len(transformed)
|
|
144
|
+
|
|
145
|
+
# Load
|
|
146
|
+
if not dry_run and self.loader and transformed:
|
|
147
|
+
try:
|
|
148
|
+
load_result = await self.loader.load(transformed, **params)
|
|
149
|
+
if load_result.success:
|
|
150
|
+
result.rows_loaded += load_result.rows_loaded
|
|
151
|
+
else:
|
|
152
|
+
msg = load_result.error or "Load failed"
|
|
153
|
+
ctx.handle_error(msg, category="load")
|
|
154
|
+
batch_result.errors.append(msg)
|
|
155
|
+
batch_result.rows_failed += len(transformed)
|
|
156
|
+
except Exception as e:
|
|
157
|
+
ctx.handle_error(str(e), e, category="load")
|
|
158
|
+
batch_result.errors.append(str(e))
|
|
159
|
+
batch_result.rows_failed += len(transformed)
|
|
160
|
+
elif dry_run:
|
|
161
|
+
result.rows_loaded += len(transformed)
|
|
162
|
+
|
|
163
|
+
result.rows_extracted += len(batch)
|
|
164
|
+
result.rows_transformed += len(transformed)
|
|
165
|
+
result.batches_processed += 1
|
|
166
|
+
result.batch_results.append(batch_result)
|
|
167
|
+
batch_index += 1
|
|
168
|
+
|
|
169
|
+
except Exception as e:
|
|
170
|
+
result.success = False
|
|
171
|
+
result.errors.append(str(e))
|
|
172
|
+
ctx.handle_error(str(e), e, category="pipeline")
|
|
173
|
+
logger.error(f"[{run_id}] Pipeline error: {e}")
|
|
174
|
+
|
|
175
|
+
result.end_time = datetime.now(timezone.utc)
|
|
176
|
+
result.duration_seconds = (result.end_time - start_time).total_seconds()
|
|
177
|
+
result.rows_failed = sum(br.rows_failed for br in result.batch_results)
|
|
178
|
+
|
|
179
|
+
if result.errors:
|
|
180
|
+
result.success = False
|
|
181
|
+
|
|
182
|
+
logger.info(f"[{run_id}] Complete: extracted={result.rows_extracted}, loaded={result.rows_loaded}")
|
|
183
|
+
return result
|
|
184
|
+
|
|
185
|
+
def _apply_transforms(self, data: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
186
|
+
"""Apply all transformers to data."""
|
|
187
|
+
result = data
|
|
188
|
+
for transformer in self._transformers:
|
|
189
|
+
result = transformer.transform(result)
|
|
190
|
+
return result
|
|
191
|
+
|
|
192
|
+
# =========================================================================
|
|
193
|
+
# CONFIG-DRIVEN FACTORY METHODS
|
|
194
|
+
# =========================================================================
|
|
195
|
+
|
|
196
|
+
@classmethod
|
|
197
|
+
def from_config_files(
|
|
198
|
+
cls,
|
|
199
|
+
extract: Union[str, Path, Dict[str, Any]],
|
|
200
|
+
load: Union[str, Path, Dict[str, Any]],
|
|
201
|
+
transform: Optional[Union[str, Path, Dict[str, Any], List[Dict[str, Any]]]] = None,
|
|
202
|
+
variables: Optional[Dict[str, str]] = None,
|
|
203
|
+
validate: bool = True,
|
|
204
|
+
name: Optional[str] = None,
|
|
205
|
+
) -> "Pipeline":
|
|
206
|
+
"""
|
|
207
|
+
Create pipeline from explicit file paths or dictionaries.
|
|
208
|
+
|
|
209
|
+
This is the most flexible method - use any file paths without any
|
|
210
|
+
assumptions about directory structure or file naming.
|
|
211
|
+
|
|
212
|
+
Args:
|
|
213
|
+
extract: Path to extract config file OR config as dict
|
|
214
|
+
load: Path to load config file OR config as dict
|
|
215
|
+
transform: Optional path to transform config OR config as dict/list
|
|
216
|
+
variables: Variables for ${VAR} substitution in config values
|
|
217
|
+
validate: If True, validate configs against schemas
|
|
218
|
+
name: Optional pipeline name
|
|
219
|
+
|
|
220
|
+
Returns:
|
|
221
|
+
Configured Pipeline instance
|
|
222
|
+
|
|
223
|
+
Example:
|
|
224
|
+
pipeline = Pipeline.from_config_files(
|
|
225
|
+
extract="configs/my_http_source.yaml",
|
|
226
|
+
transform="configs/my_transforms.yaml",
|
|
227
|
+
load="configs/my_postgres_sink.yaml",
|
|
228
|
+
variables={"API_KEY": "secret", "DB_URL": "postgresql://..."}
|
|
229
|
+
)
|
|
230
|
+
"""
|
|
231
|
+
variables = variables or {}
|
|
232
|
+
|
|
233
|
+
# Load configs
|
|
234
|
+
extract_config = _load_config_input(extract, variables)
|
|
235
|
+
load_config = _load_config_input(load, variables)
|
|
236
|
+
|
|
237
|
+
if transform is not None:
|
|
238
|
+
transform_config = _load_config_input(transform, variables)
|
|
239
|
+
else:
|
|
240
|
+
transform_config = {}
|
|
241
|
+
|
|
242
|
+
return cls._build_from_configs(
|
|
243
|
+
extract_config=extract_config,
|
|
244
|
+
transform_config=transform_config,
|
|
245
|
+
load_config=load_config,
|
|
246
|
+
variables=variables,
|
|
247
|
+
validate=validate,
|
|
248
|
+
name=name,
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
@classmethod
|
|
252
|
+
def from_config_dir(
|
|
253
|
+
cls,
|
|
254
|
+
directory: Union[str, Path],
|
|
255
|
+
variables: Optional[Dict[str, str]] = None,
|
|
256
|
+
validate: bool = True,
|
|
257
|
+
name: Optional[str] = None,
|
|
258
|
+
) -> "Pipeline":
|
|
259
|
+
"""
|
|
260
|
+
Create pipeline from a directory containing config files.
|
|
261
|
+
|
|
262
|
+
Expects files with standard names:
|
|
263
|
+
- extract.yaml (required)
|
|
264
|
+
- transform.yaml (optional)
|
|
265
|
+
- load.yaml (required)
|
|
266
|
+
|
|
267
|
+
Args:
|
|
268
|
+
directory: Path to directory containing config files
|
|
269
|
+
variables: Variables for ${VAR} substitution
|
|
270
|
+
validate: If True, validate configs against schemas
|
|
271
|
+
name: Optional pipeline name (defaults to directory name)
|
|
272
|
+
|
|
273
|
+
Returns:
|
|
274
|
+
Configured Pipeline instance
|
|
275
|
+
|
|
276
|
+
Example:
|
|
277
|
+
pipeline = Pipeline.from_config_dir(
|
|
278
|
+
"pipelines/users/",
|
|
279
|
+
variables={"DATA_DIR": "./data", "OUTPUT_DIR": "./output"}
|
|
280
|
+
)
|
|
281
|
+
"""
|
|
282
|
+
directory = Path(directory)
|
|
283
|
+
if not directory.is_dir():
|
|
284
|
+
raise NotADirectoryError(f"Not a directory: {directory}")
|
|
285
|
+
|
|
286
|
+
variables = variables or {}
|
|
287
|
+
|
|
288
|
+
# Check for required files
|
|
289
|
+
extract_file = directory / "extract.yaml"
|
|
290
|
+
load_file = directory / "load.yaml"
|
|
291
|
+
transform_file = directory / "transform.yaml"
|
|
292
|
+
|
|
293
|
+
if not extract_file.exists():
|
|
294
|
+
raise FileNotFoundError(f"Required file not found: {extract_file}")
|
|
295
|
+
if not load_file.exists():
|
|
296
|
+
raise FileNotFoundError(f"Required file not found: {load_file}")
|
|
297
|
+
|
|
298
|
+
# Load configs
|
|
299
|
+
extract_config = _load_config_input(extract_file, variables)
|
|
300
|
+
load_config = _load_config_input(load_file, variables)
|
|
301
|
+
transform_config = _load_config_input(transform_file, variables) if transform_file.exists() else {}
|
|
302
|
+
|
|
303
|
+
return cls._build_from_configs(
|
|
304
|
+
extract_config=extract_config,
|
|
305
|
+
transform_config=transform_config,
|
|
306
|
+
load_config=load_config,
|
|
307
|
+
variables=variables,
|
|
308
|
+
validate=validate,
|
|
309
|
+
name=name or directory.name,
|
|
310
|
+
)
|
|
311
|
+
|
|
312
|
+
@classmethod
|
|
313
|
+
def from_config_file(
|
|
314
|
+
cls,
|
|
315
|
+
path: Union[str, Path],
|
|
316
|
+
variables: Optional[Dict[str, str]] = None,
|
|
317
|
+
validate: bool = True,
|
|
318
|
+
) -> "Pipeline":
|
|
319
|
+
"""
|
|
320
|
+
Create pipeline from a single config file containing all sections.
|
|
321
|
+
|
|
322
|
+
The file should have extract, transform (optional), and load sections:
|
|
323
|
+
|
|
324
|
+
name: my_pipeline
|
|
325
|
+
extract:
|
|
326
|
+
type: http
|
|
327
|
+
url: https://api.example.com
|
|
328
|
+
transform:
|
|
329
|
+
- rename: {old: new}
|
|
330
|
+
load:
|
|
331
|
+
type: file
|
|
332
|
+
path: output.json
|
|
333
|
+
|
|
334
|
+
Args:
|
|
335
|
+
path: Path to pipeline config file (YAML)
|
|
336
|
+
variables: Variables for ${VAR} substitution
|
|
337
|
+
validate: If True, validate config against schema
|
|
338
|
+
|
|
339
|
+
Returns:
|
|
340
|
+
Configured Pipeline instance
|
|
341
|
+
|
|
342
|
+
Example:
|
|
343
|
+
pipeline = Pipeline.from_config_file(
|
|
344
|
+
"pipelines/users/pipeline.yaml",
|
|
345
|
+
variables={"API_KEY": "secret"}
|
|
346
|
+
)
|
|
347
|
+
"""
|
|
348
|
+
path = Path(path)
|
|
349
|
+
if not path.exists():
|
|
350
|
+
raise FileNotFoundError(f"Config file not found: {path}")
|
|
351
|
+
if not path.is_file():
|
|
352
|
+
raise ValueError(f"Not a file: {path}. Use from_config_dir() for directories.")
|
|
353
|
+
|
|
354
|
+
variables = variables or {}
|
|
355
|
+
|
|
356
|
+
# Load the full config
|
|
357
|
+
config = _load_config_input(path, variables)
|
|
358
|
+
|
|
359
|
+
if "extract" not in config:
|
|
360
|
+
raise ValueError(f"Config file missing 'extract' section: {path}")
|
|
361
|
+
if "load" not in config:
|
|
362
|
+
raise ValueError(f"Config file missing 'load' section: {path}")
|
|
363
|
+
|
|
364
|
+
return cls._build_from_configs(
|
|
365
|
+
extract_config=config["extract"],
|
|
366
|
+
transform_config=config.get("transform", {}),
|
|
367
|
+
load_config=config["load"],
|
|
368
|
+
variables=variables,
|
|
369
|
+
validate=validate,
|
|
370
|
+
name=config.get("name"),
|
|
371
|
+
)
|
|
372
|
+
|
|
373
|
+
@classmethod
|
|
374
|
+
def from_dict(
|
|
375
|
+
cls,
|
|
376
|
+
config: Dict[str, Any],
|
|
377
|
+
variables: Optional[Dict[str, str]] = None,
|
|
378
|
+
validate: bool = True,
|
|
379
|
+
) -> "Pipeline":
|
|
380
|
+
"""
|
|
381
|
+
Create pipeline from a configuration dictionary.
|
|
382
|
+
|
|
383
|
+
Args:
|
|
384
|
+
config: Dict with 'extract', 'transform' (optional), 'load' sections
|
|
385
|
+
variables: Variables for ${VAR} substitution
|
|
386
|
+
validate: If True, validate config against schema
|
|
387
|
+
|
|
388
|
+
Returns:
|
|
389
|
+
Configured Pipeline instance
|
|
390
|
+
|
|
391
|
+
Example:
|
|
392
|
+
pipeline = Pipeline.from_dict({
|
|
393
|
+
"name": "my_pipeline",
|
|
394
|
+
"extract": {"type": "http", "url": "https://api.example.com"},
|
|
395
|
+
"transform": [{"rename": {"userId": "user_id"}}],
|
|
396
|
+
"load": {"type": "file", "path": "${OUTPUT_DIR}/result.json"}
|
|
397
|
+
}, variables={"OUTPUT_DIR": "./output"})
|
|
398
|
+
"""
|
|
399
|
+
if "extract" not in config:
|
|
400
|
+
raise ValueError("Config dict missing 'extract' section")
|
|
401
|
+
if "load" not in config:
|
|
402
|
+
raise ValueError("Config dict missing 'load' section")
|
|
403
|
+
|
|
404
|
+
variables = variables or {}
|
|
405
|
+
context = PipelineContext(variables=variables)
|
|
406
|
+
|
|
407
|
+
# Resolve variables in config
|
|
408
|
+
extract_config = context.resolve_dict(config["extract"])
|
|
409
|
+
raw_transform = config.get("transform", {})
|
|
410
|
+
if isinstance(raw_transform, list):
|
|
411
|
+
transform_config = [
|
|
412
|
+
context.resolve_dict(item) if isinstance(item, dict) else item
|
|
413
|
+
for item in raw_transform
|
|
414
|
+
]
|
|
415
|
+
else:
|
|
416
|
+
transform_config = context.resolve_dict(raw_transform)
|
|
417
|
+
load_config = context.resolve_dict(config["load"])
|
|
418
|
+
|
|
419
|
+
return cls._build_from_configs(
|
|
420
|
+
extract_config=extract_config,
|
|
421
|
+
transform_config=transform_config,
|
|
422
|
+
load_config=load_config,
|
|
423
|
+
variables=variables,
|
|
424
|
+
validate=validate,
|
|
425
|
+
name=config.get("name"),
|
|
426
|
+
)
|
|
427
|
+
|
|
428
|
+
@classmethod
|
|
429
|
+
def _build_from_configs(
|
|
430
|
+
cls,
|
|
431
|
+
extract_config: Dict[str, Any],
|
|
432
|
+
transform_config: Union[Dict[str, Any], List[Dict[str, Any]]],
|
|
433
|
+
load_config: Dict[str, Any],
|
|
434
|
+
variables: Dict[str, str],
|
|
435
|
+
validate: bool,
|
|
436
|
+
name: Optional[str],
|
|
437
|
+
) -> "Pipeline":
|
|
438
|
+
"""Internal method to build pipeline from resolved configs."""
|
|
439
|
+
from pycharter.etl_generator.config_validator import ConfigValidator
|
|
440
|
+
|
|
441
|
+
# Validate if enabled
|
|
442
|
+
if validate:
|
|
443
|
+
validator = ConfigValidator(strict=True)
|
|
444
|
+
validator.validate_extract(extract_config)
|
|
445
|
+
if transform_config:
|
|
446
|
+
# Wrap list in dict for validation
|
|
447
|
+
if isinstance(transform_config, list):
|
|
448
|
+
validator.validate_transform({"transform": transform_config})
|
|
449
|
+
else:
|
|
450
|
+
validator.validate_transform(transform_config)
|
|
451
|
+
validator.validate_load(load_config)
|
|
452
|
+
|
|
453
|
+
# Create context
|
|
454
|
+
context = PipelineContext(variables=variables)
|
|
455
|
+
|
|
456
|
+
# Create components
|
|
457
|
+
extractor = _create_extractor(extract_config)
|
|
458
|
+
transformers = _create_transformers(transform_config)
|
|
459
|
+
loader_instance = _create_loader(load_config)
|
|
460
|
+
|
|
461
|
+
return cls(
|
|
462
|
+
extractor=extractor,
|
|
463
|
+
transformers=transformers,
|
|
464
|
+
loader=loader_instance,
|
|
465
|
+
context=context,
|
|
466
|
+
name=name,
|
|
467
|
+
)
|
|
468
|
+
|
|
469
|
+
|
|
470
|
+
# =============================================================================
|
|
471
|
+
# HELPER FUNCTIONS
|
|
472
|
+
# =============================================================================
|
|
473
|
+
|
|
474
|
+
def _load_config_input(
|
|
475
|
+
config_input: Union[str, Path, Dict[str, Any], List[Dict[str, Any]]],
|
|
476
|
+
variables: Dict[str, str],
|
|
477
|
+
) -> Union[Dict[str, Any], List[Dict[str, Any]]]:
|
|
478
|
+
"""Load config from file path or return dict/list directly."""
|
|
479
|
+
if isinstance(config_input, (dict, list)):
|
|
480
|
+
return config_input
|
|
481
|
+
|
|
482
|
+
path = Path(config_input)
|
|
483
|
+
if not path.exists():
|
|
484
|
+
raise FileNotFoundError(f"Config file not found: {path}")
|
|
485
|
+
|
|
486
|
+
with open(path) as f:
|
|
487
|
+
content = f.read()
|
|
488
|
+
|
|
489
|
+
# Resolve variables in content before parsing
|
|
490
|
+
content = _resolve_variables(content, variables)
|
|
491
|
+
|
|
492
|
+
return yaml.safe_load(content) or {}
|
|
493
|
+
|
|
494
|
+
|
|
495
|
+
def _resolve_variables(content: str, variables: Dict[str, str]) -> str:
|
|
496
|
+
"""Resolve ${VAR} placeholders in content string."""
|
|
497
|
+
def replace_var(match):
|
|
498
|
+
var_name = match.group(1)
|
|
499
|
+
modifier = match.group(2)
|
|
500
|
+
modifier_value = match.group(3)
|
|
501
|
+
|
|
502
|
+
# Check provided variables first, then environment
|
|
503
|
+
value = variables.get(var_name) or os.environ.get(var_name)
|
|
504
|
+
|
|
505
|
+
if value:
|
|
506
|
+
return value
|
|
507
|
+
|
|
508
|
+
# Handle modifiers
|
|
509
|
+
if modifier == "-":
|
|
510
|
+
return modifier_value if modifier_value is not None else ""
|
|
511
|
+
elif modifier == "?":
|
|
512
|
+
error_msg = modifier_value or f"Required variable {var_name} is not set"
|
|
513
|
+
raise ValueError(error_msg)
|
|
514
|
+
|
|
515
|
+
return match.group(0)
|
|
516
|
+
|
|
517
|
+
return VARIABLE_PATTERN.sub(replace_var, content)
|
|
518
|
+
|
|
519
|
+
|
|
520
|
+
def _create_extractor(config: Dict[str, Any]) -> Optional[Extractor]:
|
|
521
|
+
"""Create extractor from config using explicit type field."""
|
|
522
|
+
if not config:
|
|
523
|
+
return None
|
|
524
|
+
|
|
525
|
+
from pycharter.etl_generator.extractors import (
|
|
526
|
+
HTTPExtractor,
|
|
527
|
+
FileExtractor,
|
|
528
|
+
DatabaseExtractor,
|
|
529
|
+
CloudStorageExtractor,
|
|
530
|
+
)
|
|
531
|
+
|
|
532
|
+
EXTRACTOR_REGISTRY = {
|
|
533
|
+
"http": HTTPExtractor,
|
|
534
|
+
"file": FileExtractor,
|
|
535
|
+
"database": DatabaseExtractor,
|
|
536
|
+
"cloud_storage": CloudStorageExtractor,
|
|
537
|
+
}
|
|
538
|
+
|
|
539
|
+
# Get type field
|
|
540
|
+
extract_type = config.get("type")
|
|
541
|
+
|
|
542
|
+
if not extract_type:
|
|
543
|
+
raise ValueError(
|
|
544
|
+
"Extract config missing required 'type' field. "
|
|
545
|
+
f"Supported types: {list(EXTRACTOR_REGISTRY.keys())}"
|
|
546
|
+
)
|
|
547
|
+
|
|
548
|
+
extract_type = extract_type.lower()
|
|
549
|
+
extractor_class = EXTRACTOR_REGISTRY.get(extract_type)
|
|
550
|
+
|
|
551
|
+
if not extractor_class:
|
|
552
|
+
raise ValueError(
|
|
553
|
+
f"Unknown extractor type: '{extract_type}'. "
|
|
554
|
+
f"Supported types: {list(EXTRACTOR_REGISTRY.keys())}"
|
|
555
|
+
)
|
|
556
|
+
|
|
557
|
+
return extractor_class.from_config(config)
|
|
558
|
+
|
|
559
|
+
|
|
560
|
+
def _create_transformers(config: Union[Dict[str, Any], List[Dict[str, Any]]]) -> List[Transformer]:
|
|
561
|
+
"""Create transformer chain from config."""
|
|
562
|
+
if not config:
|
|
563
|
+
return []
|
|
564
|
+
|
|
565
|
+
# Handle nested 'transform' key
|
|
566
|
+
if isinstance(config, dict) and "transform" in config:
|
|
567
|
+
config = config["transform"]
|
|
568
|
+
|
|
569
|
+
# List format - ordered transforms
|
|
570
|
+
if isinstance(config, list):
|
|
571
|
+
return _create_transformers_from_list(config)
|
|
572
|
+
|
|
573
|
+
# Dict format - fixed order
|
|
574
|
+
return _create_transformers_from_dict(config)
|
|
575
|
+
|
|
576
|
+
|
|
577
|
+
def _create_transformers_from_list(config: List[Dict[str, Any]]) -> List[Transformer]:
|
|
578
|
+
"""Create transformers from list format (user-specified order)."""
|
|
579
|
+
transformers = []
|
|
580
|
+
|
|
581
|
+
for step in config:
|
|
582
|
+
if not isinstance(step, dict):
|
|
583
|
+
logger.warning(f"Invalid transform step (expected dict): {step}")
|
|
584
|
+
continue
|
|
585
|
+
|
|
586
|
+
for op_name, op_config in step.items():
|
|
587
|
+
transformer = _create_single_transformer(op_name, op_config)
|
|
588
|
+
if transformer:
|
|
589
|
+
if isinstance(transformer, list):
|
|
590
|
+
transformers.extend(transformer)
|
|
591
|
+
else:
|
|
592
|
+
transformers.append(transformer)
|
|
593
|
+
|
|
594
|
+
return transformers
|
|
595
|
+
|
|
596
|
+
|
|
597
|
+
def _create_transformers_from_dict(config: Dict[str, Any]) -> List[Transformer]:
|
|
598
|
+
"""Create transformers from dict format (fixed order)."""
|
|
599
|
+
transformers = []
|
|
600
|
+
ordered_ops = ["rename", "convert", "defaults", "add", "select", "drop", "filter", "custom_function"]
|
|
601
|
+
|
|
602
|
+
for op_name in ordered_ops:
|
|
603
|
+
if op_name in config:
|
|
604
|
+
transformer = _create_single_transformer(op_name, config[op_name])
|
|
605
|
+
if transformer:
|
|
606
|
+
if isinstance(transformer, list):
|
|
607
|
+
transformers.extend(transformer)
|
|
608
|
+
else:
|
|
609
|
+
transformers.append(transformer)
|
|
610
|
+
|
|
611
|
+
return transformers
|
|
612
|
+
|
|
613
|
+
|
|
614
|
+
def _create_single_transformer(op_name: str, op_config: Any) -> Optional[Union[Transformer, List[Transformer]]]:
|
|
615
|
+
"""Create a single transformer from operation name and config."""
|
|
616
|
+
from pycharter.etl_generator.transformers import (
|
|
617
|
+
Rename, AddField, Drop, Select, Filter, Convert, Default, CustomFunction,
|
|
618
|
+
)
|
|
619
|
+
from pycharter.etl_generator.transformers.simple_operations import convert_type
|
|
620
|
+
|
|
621
|
+
op_name = op_name.lower()
|
|
622
|
+
|
|
623
|
+
if op_name == "rename":
|
|
624
|
+
if isinstance(op_config, dict):
|
|
625
|
+
return Rename(op_config)
|
|
626
|
+
|
|
627
|
+
elif op_name == "add":
|
|
628
|
+
if isinstance(op_config, dict):
|
|
629
|
+
return [AddField(field, value) for field, value in op_config.items()]
|
|
630
|
+
|
|
631
|
+
elif op_name == "drop":
|
|
632
|
+
if isinstance(op_config, list):
|
|
633
|
+
return Drop(op_config)
|
|
634
|
+
|
|
635
|
+
elif op_name == "select":
|
|
636
|
+
if isinstance(op_config, list):
|
|
637
|
+
return Select(op_config)
|
|
638
|
+
|
|
639
|
+
elif op_name == "convert":
|
|
640
|
+
if isinstance(op_config, dict):
|
|
641
|
+
type_map = {
|
|
642
|
+
"int": int, "integer": int,
|
|
643
|
+
"float": float, "number": float, "numeric": float,
|
|
644
|
+
"str": str, "string": str,
|
|
645
|
+
"bool": bool, "boolean": bool,
|
|
646
|
+
}
|
|
647
|
+
conversions = {}
|
|
648
|
+
for field, target_type in op_config.items():
|
|
649
|
+
target_lower = target_type.lower() if isinstance(target_type, str) else str(target_type)
|
|
650
|
+
if target_lower in type_map:
|
|
651
|
+
conversions[field] = type_map[target_lower]
|
|
652
|
+
elif target_lower in ("datetime", "date"):
|
|
653
|
+
conversions[field] = lambda v, t=target_lower: convert_type(v, t)
|
|
654
|
+
else:
|
|
655
|
+
conversions[field] = str
|
|
656
|
+
return Convert(conversions)
|
|
657
|
+
|
|
658
|
+
elif op_name == "defaults":
|
|
659
|
+
if isinstance(op_config, dict):
|
|
660
|
+
return Default(op_config)
|
|
661
|
+
|
|
662
|
+
elif op_name == "filter":
|
|
663
|
+
if isinstance(op_config, dict):
|
|
664
|
+
field = op_config.get("field")
|
|
665
|
+
operator = op_config.get("operator", "eq")
|
|
666
|
+
value = op_config.get("value")
|
|
667
|
+
if field and operator:
|
|
668
|
+
predicate = _create_filter_predicate(field, operator, value)
|
|
669
|
+
if predicate:
|
|
670
|
+
return Filter(predicate)
|
|
671
|
+
|
|
672
|
+
elif op_name == "custom_function":
|
|
673
|
+
if isinstance(op_config, dict):
|
|
674
|
+
return CustomFunction(
|
|
675
|
+
module=op_config.get("module"),
|
|
676
|
+
function=op_config.get("function"),
|
|
677
|
+
kwargs=op_config.get("kwargs", {}),
|
|
678
|
+
)
|
|
679
|
+
|
|
680
|
+
else:
|
|
681
|
+
logger.warning(f"Unknown transform operation: {op_name}")
|
|
682
|
+
|
|
683
|
+
return None
|
|
684
|
+
|
|
685
|
+
|
|
686
|
+
def _create_filter_predicate(field: str, operator: str, value: Any) -> Optional[Callable]:
|
|
687
|
+
"""Create a filter predicate function from operator and value."""
|
|
688
|
+
operators = {
|
|
689
|
+
"eq": lambda r: r.get(field) == value,
|
|
690
|
+
"ne": lambda r: r.get(field) != value,
|
|
691
|
+
"gt": lambda r: r.get(field) is not None and r.get(field) > value,
|
|
692
|
+
"gte": lambda r: r.get(field) is not None and r.get(field) >= value,
|
|
693
|
+
"lt": lambda r: r.get(field) is not None and r.get(field) < value,
|
|
694
|
+
"lte": lambda r: r.get(field) is not None and r.get(field) <= value,
|
|
695
|
+
"in": lambda r: r.get(field) in (value if isinstance(value, (list, tuple, set)) else [value]),
|
|
696
|
+
"not_in": lambda r: r.get(field) not in (value if isinstance(value, (list, tuple, set)) else [value]),
|
|
697
|
+
"contains": lambda r: value in str(r.get(field, "")),
|
|
698
|
+
"not_contains": lambda r: value not in str(r.get(field, "")),
|
|
699
|
+
"is_null": lambda r: r.get(field) is None,
|
|
700
|
+
"is_not_null": lambda r: r.get(field) is not None,
|
|
701
|
+
}
|
|
702
|
+
return operators.get(operator)
|
|
703
|
+
|
|
704
|
+
|
|
705
|
+
def _create_loader(config: Dict[str, Any]) -> Optional[Loader]:
|
|
706
|
+
"""Create loader from config using explicit type field."""
|
|
707
|
+
if not config:
|
|
708
|
+
return None
|
|
709
|
+
|
|
710
|
+
from pycharter.etl_generator.loaders import (
|
|
711
|
+
PostgresLoader,
|
|
712
|
+
FileLoader,
|
|
713
|
+
CloudStorageLoader,
|
|
714
|
+
)
|
|
715
|
+
|
|
716
|
+
LOADER_REGISTRY = {
|
|
717
|
+
"postgres": PostgresLoader,
|
|
718
|
+
"postgresql": PostgresLoader,
|
|
719
|
+
"database": PostgresLoader,
|
|
720
|
+
"sqlite": PostgresLoader,
|
|
721
|
+
"file": FileLoader,
|
|
722
|
+
"cloud_storage": CloudStorageLoader,
|
|
723
|
+
}
|
|
724
|
+
|
|
725
|
+
# Get type field
|
|
726
|
+
load_type = config.get("type")
|
|
727
|
+
|
|
728
|
+
if not load_type:
|
|
729
|
+
raise ValueError(
|
|
730
|
+
"Load config missing required 'type' field. "
|
|
731
|
+
f"Supported types: postgres, sqlite, file, cloud_storage"
|
|
732
|
+
)
|
|
733
|
+
|
|
734
|
+
load_type = load_type.lower()
|
|
735
|
+
loader_class = LOADER_REGISTRY.get(load_type)
|
|
736
|
+
|
|
737
|
+
if not loader_class:
|
|
738
|
+
raise ValueError(
|
|
739
|
+
f"Unknown loader type: '{load_type}'. "
|
|
740
|
+
f"Supported types: postgres, sqlite, file, cloud_storage"
|
|
741
|
+
)
|
|
742
|
+
|
|
743
|
+
return loader_class.from_config(config)
|