pycharter 0.0.22__py3-none-any.whl → 0.0.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- api/main.py +27 -1
- api/models/docs.py +68 -0
- api/models/evolution.py +117 -0
- api/models/tracking.py +111 -0
- api/models/validation.py +46 -6
- api/routes/v1/__init__.py +14 -1
- api/routes/v1/docs.py +187 -0
- api/routes/v1/evolution.py +337 -0
- api/routes/v1/templates.py +211 -27
- api/routes/v1/tracking.py +301 -0
- api/routes/v1/validation.py +68 -31
- pycharter/__init__.py +268 -58
- pycharter/data/templates/contract/template_coercion_rules.yaml +57 -0
- pycharter/data/templates/contract/template_contract.yaml +122 -0
- pycharter/data/templates/contract/template_metadata.yaml +68 -0
- pycharter/data/templates/contract/template_schema.yaml +100 -0
- pycharter/data/templates/contract/template_validation_rules.yaml +75 -0
- pycharter/data/templates/etl/README.md +224 -0
- pycharter/data/templates/etl/extract_cloud_azure.yaml +24 -0
- pycharter/data/templates/etl/extract_cloud_gcs.yaml +25 -0
- pycharter/data/templates/etl/extract_cloud_s3.yaml +30 -0
- pycharter/data/templates/etl/extract_database.yaml +34 -0
- pycharter/data/templates/etl/extract_database_ssh.yaml +40 -0
- pycharter/data/templates/etl/extract_file_csv.yaml +21 -0
- pycharter/data/templates/etl/extract_file_glob.yaml +25 -0
- pycharter/data/templates/etl/extract_file_json.yaml +24 -0
- pycharter/data/templates/etl/extract_file_parquet.yaml +20 -0
- pycharter/data/templates/etl/extract_http_paginated.yaml +79 -0
- pycharter/data/templates/etl/extract_http_path_params.yaml +38 -0
- pycharter/data/templates/etl/extract_http_simple.yaml +62 -0
- pycharter/data/templates/etl/load_cloud_azure.yaml +24 -0
- pycharter/data/templates/etl/load_cloud_gcs.yaml +22 -0
- pycharter/data/templates/etl/load_cloud_s3.yaml +27 -0
- pycharter/data/templates/etl/load_file.yaml +34 -0
- pycharter/data/templates/etl/load_insert.yaml +18 -0
- pycharter/data/templates/etl/load_postgresql.yaml +39 -0
- pycharter/data/templates/etl/load_sqlite.yaml +21 -0
- pycharter/data/templates/etl/load_truncate_and_load.yaml +20 -0
- pycharter/data/templates/etl/load_upsert.yaml +25 -0
- pycharter/data/templates/etl/load_with_dlq.yaml +34 -0
- pycharter/data/templates/etl/load_with_ssh_tunnel.yaml +35 -0
- pycharter/data/templates/etl/pipeline_http_to_db.yaml +75 -0
- pycharter/data/templates/etl/transform_combined.yaml +48 -0
- pycharter/data/templates/etl/transform_custom_function.yaml +58 -0
- pycharter/data/templates/etl/transform_jsonata.yaml +51 -0
- pycharter/data/templates/etl/transform_simple.yaml +59 -0
- pycharter/db/schemas/.ipynb_checkpoints/data_contract-checkpoint.py +160 -0
- pycharter/docs_generator/__init__.py +43 -0
- pycharter/docs_generator/generator.py +465 -0
- pycharter/docs_generator/renderers.py +247 -0
- pycharter/etl_generator/__init__.py +168 -80
- pycharter/etl_generator/builder.py +121 -0
- pycharter/etl_generator/config_loader.py +394 -0
- pycharter/etl_generator/config_validator.py +418 -0
- pycharter/etl_generator/context.py +132 -0
- pycharter/etl_generator/expression.py +499 -0
- pycharter/etl_generator/extractors/__init__.py +30 -0
- pycharter/etl_generator/extractors/base.py +70 -0
- pycharter/etl_generator/extractors/cloud_storage.py +530 -0
- pycharter/etl_generator/extractors/database.py +221 -0
- pycharter/etl_generator/extractors/factory.py +185 -0
- pycharter/etl_generator/extractors/file.py +475 -0
- pycharter/etl_generator/extractors/http.py +895 -0
- pycharter/etl_generator/extractors/streaming.py +57 -0
- pycharter/etl_generator/loaders/__init__.py +41 -0
- pycharter/etl_generator/loaders/base.py +35 -0
- pycharter/etl_generator/loaders/cloud.py +87 -0
- pycharter/etl_generator/loaders/cloud_storage_loader.py +275 -0
- pycharter/etl_generator/loaders/database.py +274 -0
- pycharter/etl_generator/loaders/factory.py +180 -0
- pycharter/etl_generator/loaders/file.py +72 -0
- pycharter/etl_generator/loaders/file_loader.py +130 -0
- pycharter/etl_generator/pipeline.py +743 -0
- pycharter/etl_generator/protocols.py +54 -0
- pycharter/etl_generator/result.py +63 -0
- pycharter/etl_generator/schemas/__init__.py +49 -0
- pycharter/etl_generator/transformers/__init__.py +49 -0
- pycharter/etl_generator/transformers/base.py +63 -0
- pycharter/etl_generator/transformers/config.py +45 -0
- pycharter/etl_generator/transformers/custom_function.py +101 -0
- pycharter/etl_generator/transformers/jsonata_transformer.py +56 -0
- pycharter/etl_generator/transformers/operations.py +218 -0
- pycharter/etl_generator/transformers/pipeline.py +54 -0
- pycharter/etl_generator/transformers/simple_operations.py +131 -0
- pycharter/quality/__init__.py +25 -0
- pycharter/quality/tracking/__init__.py +64 -0
- pycharter/quality/tracking/collector.py +318 -0
- pycharter/quality/tracking/exporters.py +238 -0
- pycharter/quality/tracking/models.py +194 -0
- pycharter/quality/tracking/store.py +385 -0
- pycharter/runtime_validator/__init__.py +20 -7
- pycharter/runtime_validator/builder.py +328 -0
- pycharter/runtime_validator/validator.py +311 -7
- pycharter/runtime_validator/validator_core.py +61 -0
- pycharter/schema_evolution/__init__.py +61 -0
- pycharter/schema_evolution/compatibility.py +270 -0
- pycharter/schema_evolution/diff.py +496 -0
- pycharter/schema_evolution/models.py +201 -0
- pycharter/shared/__init__.py +56 -0
- pycharter/shared/errors.py +296 -0
- pycharter/shared/protocols.py +234 -0
- {pycharter-0.0.22.dist-info → pycharter-0.0.24.dist-info}/METADATA +146 -26
- pycharter-0.0.24.dist-info/RECORD +543 -0
- {pycharter-0.0.22.dist-info → pycharter-0.0.24.dist-info}/WHEEL +1 -1
- ui/static/404/index.html +1 -1
- ui/static/404.html +1 -1
- ui/static/__next.__PAGE__.txt +1 -1
- ui/static/__next._full.txt +1 -1
- ui/static/__next._head.txt +1 -1
- ui/static/__next._index.txt +1 -1
- ui/static/__next._tree.txt +1 -1
- ui/static/_next/static/chunks/26dfc590f7714c03.js +1 -0
- ui/static/_next/static/chunks/34d289e6db2ef551.js +1 -0
- ui/static/_next/static/chunks/99508d9d5869cc27.js +1 -0
- ui/static/_next/static/chunks/b313c35a6ba76574.js +1 -0
- ui/static/_not-found/__next._full.txt +1 -1
- ui/static/_not-found/__next._head.txt +1 -1
- ui/static/_not-found/__next._index.txt +1 -1
- ui/static/_not-found/__next._not-found.__PAGE__.txt +1 -1
- ui/static/_not-found/__next._not-found.txt +1 -1
- ui/static/_not-found/__next._tree.txt +1 -1
- ui/static/_not-found/index.html +1 -1
- ui/static/_not-found/index.txt +1 -1
- ui/static/contracts/__next._full.txt +2 -2
- ui/static/contracts/__next._head.txt +1 -1
- ui/static/contracts/__next._index.txt +1 -1
- ui/static/contracts/__next._tree.txt +1 -1
- ui/static/contracts/__next.contracts.__PAGE__.txt +2 -2
- ui/static/contracts/__next.contracts.txt +1 -1
- ui/static/contracts/index.html +1 -1
- ui/static/contracts/index.txt +2 -2
- ui/static/documentation/__next._full.txt +1 -1
- ui/static/documentation/__next._head.txt +1 -1
- ui/static/documentation/__next._index.txt +1 -1
- ui/static/documentation/__next._tree.txt +1 -1
- ui/static/documentation/__next.documentation.__PAGE__.txt +1 -1
- ui/static/documentation/__next.documentation.txt +1 -1
- ui/static/documentation/index.html +2 -2
- ui/static/documentation/index.txt +1 -1
- ui/static/index.html +1 -1
- ui/static/index.txt +1 -1
- ui/static/metadata/__next._full.txt +1 -1
- ui/static/metadata/__next._head.txt +1 -1
- ui/static/metadata/__next._index.txt +1 -1
- ui/static/metadata/__next._tree.txt +1 -1
- ui/static/metadata/__next.metadata.__PAGE__.txt +1 -1
- ui/static/metadata/__next.metadata.txt +1 -1
- ui/static/metadata/index.html +1 -1
- ui/static/metadata/index.txt +1 -1
- ui/static/quality/__next._full.txt +2 -2
- ui/static/quality/__next._head.txt +1 -1
- ui/static/quality/__next._index.txt +1 -1
- ui/static/quality/__next._tree.txt +1 -1
- ui/static/quality/__next.quality.__PAGE__.txt +2 -2
- ui/static/quality/__next.quality.txt +1 -1
- ui/static/quality/index.html +2 -2
- ui/static/quality/index.txt +2 -2
- ui/static/rules/__next._full.txt +1 -1
- ui/static/rules/__next._head.txt +1 -1
- ui/static/rules/__next._index.txt +1 -1
- ui/static/rules/__next._tree.txt +1 -1
- ui/static/rules/__next.rules.__PAGE__.txt +1 -1
- ui/static/rules/__next.rules.txt +1 -1
- ui/static/rules/index.html +1 -1
- ui/static/rules/index.txt +1 -1
- ui/static/schemas/__next._full.txt +1 -1
- ui/static/schemas/__next._head.txt +1 -1
- ui/static/schemas/__next._index.txt +1 -1
- ui/static/schemas/__next._tree.txt +1 -1
- ui/static/schemas/__next.schemas.__PAGE__.txt +1 -1
- ui/static/schemas/__next.schemas.txt +1 -1
- ui/static/schemas/index.html +1 -1
- ui/static/schemas/index.txt +1 -1
- ui/static/settings/__next._full.txt +1 -1
- ui/static/settings/__next._head.txt +1 -1
- ui/static/settings/__next._index.txt +1 -1
- ui/static/settings/__next._tree.txt +1 -1
- ui/static/settings/__next.settings.__PAGE__.txt +1 -1
- ui/static/settings/__next.settings.txt +1 -1
- ui/static/settings/index.html +1 -1
- ui/static/settings/index.txt +1 -1
- ui/static/static/404/index.html +1 -1
- ui/static/static/404.html +1 -1
- ui/static/static/__next.__PAGE__.txt +1 -1
- ui/static/static/__next._full.txt +2 -2
- ui/static/static/__next._head.txt +1 -1
- ui/static/static/__next._index.txt +2 -2
- ui/static/static/__next._tree.txt +2 -2
- ui/static/static/_next/static/chunks/13d4a0fbd74c1ee4.js +1 -0
- ui/static/static/_next/static/chunks/2edb43b48432ac04.js +441 -0
- ui/static/static/_next/static/chunks/d2363397e1b2bcab.css +1 -0
- ui/static/static/_next/static/chunks/f7d1a90dd75d2572.js +1 -0
- ui/static/static/_not-found/__next._full.txt +2 -2
- ui/static/static/_not-found/__next._head.txt +1 -1
- ui/static/static/_not-found/__next._index.txt +2 -2
- ui/static/static/_not-found/__next._not-found.__PAGE__.txt +1 -1
- ui/static/static/_not-found/__next._not-found.txt +1 -1
- ui/static/static/_not-found/__next._tree.txt +2 -2
- ui/static/static/_not-found/index.html +1 -1
- ui/static/static/_not-found/index.txt +2 -2
- ui/static/static/contracts/__next._full.txt +3 -3
- ui/static/static/contracts/__next._head.txt +1 -1
- ui/static/static/contracts/__next._index.txt +2 -2
- ui/static/static/contracts/__next._tree.txt +2 -2
- ui/static/static/contracts/__next.contracts.__PAGE__.txt +2 -2
- ui/static/static/contracts/__next.contracts.txt +1 -1
- ui/static/static/contracts/index.html +1 -1
- ui/static/static/contracts/index.txt +3 -3
- ui/static/static/documentation/__next._full.txt +3 -3
- ui/static/static/documentation/__next._head.txt +1 -1
- ui/static/static/documentation/__next._index.txt +2 -2
- ui/static/static/documentation/__next._tree.txt +2 -2
- ui/static/static/documentation/__next.documentation.__PAGE__.txt +2 -2
- ui/static/static/documentation/__next.documentation.txt +1 -1
- ui/static/static/documentation/index.html +2 -2
- ui/static/static/documentation/index.txt +3 -3
- ui/static/static/index.html +1 -1
- ui/static/static/index.txt +2 -2
- ui/static/static/metadata/__next._full.txt +2 -2
- ui/static/static/metadata/__next._head.txt +1 -1
- ui/static/static/metadata/__next._index.txt +2 -2
- ui/static/static/metadata/__next._tree.txt +2 -2
- ui/static/static/metadata/__next.metadata.__PAGE__.txt +1 -1
- ui/static/static/metadata/__next.metadata.txt +1 -1
- ui/static/static/metadata/index.html +1 -1
- ui/static/static/metadata/index.txt +2 -2
- ui/static/static/quality/__next._full.txt +2 -2
- ui/static/static/quality/__next._head.txt +1 -1
- ui/static/static/quality/__next._index.txt +2 -2
- ui/static/static/quality/__next._tree.txt +2 -2
- ui/static/static/quality/__next.quality.__PAGE__.txt +1 -1
- ui/static/static/quality/__next.quality.txt +1 -1
- ui/static/static/quality/index.html +2 -2
- ui/static/static/quality/index.txt +2 -2
- ui/static/static/rules/__next._full.txt +2 -2
- ui/static/static/rules/__next._head.txt +1 -1
- ui/static/static/rules/__next._index.txt +2 -2
- ui/static/static/rules/__next._tree.txt +2 -2
- ui/static/static/rules/__next.rules.__PAGE__.txt +1 -1
- ui/static/static/rules/__next.rules.txt +1 -1
- ui/static/static/rules/index.html +1 -1
- ui/static/static/rules/index.txt +2 -2
- ui/static/static/schemas/__next._full.txt +2 -2
- ui/static/static/schemas/__next._head.txt +1 -1
- ui/static/static/schemas/__next._index.txt +2 -2
- ui/static/static/schemas/__next._tree.txt +2 -2
- ui/static/static/schemas/__next.schemas.__PAGE__.txt +1 -1
- ui/static/static/schemas/__next.schemas.txt +1 -1
- ui/static/static/schemas/index.html +1 -1
- ui/static/static/schemas/index.txt +2 -2
- ui/static/static/settings/__next._full.txt +2 -2
- ui/static/static/settings/__next._head.txt +1 -1
- ui/static/static/settings/__next._index.txt +2 -2
- ui/static/static/settings/__next._tree.txt +2 -2
- ui/static/static/settings/__next.settings.__PAGE__.txt +1 -1
- ui/static/static/settings/__next.settings.txt +1 -1
- ui/static/static/settings/index.html +1 -1
- ui/static/static/settings/index.txt +2 -2
- ui/static/static/static/.gitkeep +0 -0
- ui/static/static/static/404/index.html +1 -0
- ui/static/static/static/404.html +1 -0
- ui/static/static/static/__next.__PAGE__.txt +10 -0
- ui/static/static/static/__next._full.txt +30 -0
- ui/static/static/static/__next._head.txt +7 -0
- ui/static/static/static/__next._index.txt +9 -0
- ui/static/static/static/__next._tree.txt +2 -0
- ui/static/static/static/_next/static/chunks/222442f6da32302a.js +1 -0
- ui/static/static/static/_next/static/chunks/247eb132b7f7b574.js +1 -0
- ui/static/static/static/_next/static/chunks/297d55555b71baba.js +1 -0
- ui/static/static/static/_next/static/chunks/2ab439ce003cd691.js +1 -0
- ui/static/static/static/_next/static/chunks/414e77373f8ff61c.js +1 -0
- ui/static/static/static/_next/static/chunks/49ca65abd26ae49e.js +1 -0
- ui/static/static/static/_next/static/chunks/652ad0aa26265c47.js +2 -0
- ui/static/static/static/_next/static/chunks/9667e7a3d359eb39.js +1 -0
- ui/static/static/static/_next/static/chunks/9c23f44fff36548a.js +1 -0
- ui/static/static/static/_next/static/chunks/a6dad97d9634a72d.js +1 -0
- ui/static/static/static/_next/static/chunks/b32a0963684b9933.js +4 -0
- ui/static/static/static/_next/static/chunks/c69f6cba366bd988.js +1 -0
- ui/static/static/static/_next/static/chunks/db913959c675cea6.js +1 -0
- ui/static/static/static/_next/static/chunks/f061a4be97bfc3b3.js +1 -0
- ui/static/static/static/_next/static/chunks/f2e7afeab1178138.js +1 -0
- ui/static/static/static/_next/static/chunks/ff1a16fafef87110.js +1 -0
- ui/static/static/static/_next/static/chunks/turbopack-ffcb7ab6794027ef.js +3 -0
- ui/static/static/static/_next/static/tNTkVW6puVXC4bAm4WrHl/_buildManifest.js +11 -0
- ui/static/static/static/_next/static/tNTkVW6puVXC4bAm4WrHl/_ssgManifest.js +1 -0
- ui/static/static/static/_not-found/__next._full.txt +17 -0
- ui/static/static/static/_not-found/__next._head.txt +7 -0
- ui/static/static/static/_not-found/__next._index.txt +9 -0
- ui/static/static/static/_not-found/__next._not-found.__PAGE__.txt +5 -0
- ui/static/static/static/_not-found/__next._not-found.txt +4 -0
- ui/static/static/static/_not-found/__next._tree.txt +2 -0
- ui/static/static/static/_not-found/index.html +1 -0
- ui/static/static/static/_not-found/index.txt +17 -0
- ui/static/static/static/contracts/__next._full.txt +21 -0
- ui/static/static/static/contracts/__next._head.txt +7 -0
- ui/static/static/static/contracts/__next._index.txt +9 -0
- ui/static/static/static/contracts/__next._tree.txt +2 -0
- ui/static/static/static/contracts/__next.contracts.__PAGE__.txt +9 -0
- ui/static/static/static/contracts/__next.contracts.txt +4 -0
- ui/static/static/static/contracts/index.html +1 -0
- ui/static/static/static/contracts/index.txt +21 -0
- ui/static/static/static/documentation/__next._full.txt +21 -0
- ui/static/static/static/documentation/__next._head.txt +7 -0
- ui/static/static/static/documentation/__next._index.txt +9 -0
- ui/static/static/static/documentation/__next._tree.txt +2 -0
- ui/static/static/static/documentation/__next.documentation.__PAGE__.txt +9 -0
- ui/static/static/static/documentation/__next.documentation.txt +4 -0
- ui/static/static/static/documentation/index.html +93 -0
- ui/static/static/static/documentation/index.txt +21 -0
- ui/static/static/static/index.html +1 -0
- ui/static/static/static/index.txt +30 -0
- ui/static/static/static/metadata/__next._full.txt +21 -0
- ui/static/static/static/metadata/__next._head.txt +7 -0
- ui/static/static/static/metadata/__next._index.txt +9 -0
- ui/static/static/static/metadata/__next._tree.txt +2 -0
- ui/static/static/static/metadata/__next.metadata.__PAGE__.txt +9 -0
- ui/static/static/static/metadata/__next.metadata.txt +4 -0
- ui/static/static/static/metadata/index.html +1 -0
- ui/static/static/static/metadata/index.txt +21 -0
- ui/static/static/static/quality/__next._full.txt +21 -0
- ui/static/static/static/quality/__next._head.txt +7 -0
- ui/static/static/static/quality/__next._index.txt +9 -0
- ui/static/static/static/quality/__next._tree.txt +2 -0
- ui/static/static/static/quality/__next.quality.__PAGE__.txt +9 -0
- ui/static/static/static/quality/__next.quality.txt +4 -0
- ui/static/static/static/quality/index.html +2 -0
- ui/static/static/static/quality/index.txt +21 -0
- ui/static/static/static/rules/__next._full.txt +21 -0
- ui/static/static/static/rules/__next._head.txt +7 -0
- ui/static/static/static/rules/__next._index.txt +9 -0
- ui/static/static/static/rules/__next._tree.txt +2 -0
- ui/static/static/static/rules/__next.rules.__PAGE__.txt +9 -0
- ui/static/static/static/rules/__next.rules.txt +4 -0
- ui/static/static/static/rules/index.html +1 -0
- ui/static/static/static/rules/index.txt +21 -0
- ui/static/static/static/schemas/__next._full.txt +21 -0
- ui/static/static/static/schemas/__next._head.txt +7 -0
- ui/static/static/static/schemas/__next._index.txt +9 -0
- ui/static/static/static/schemas/__next._tree.txt +2 -0
- ui/static/static/static/schemas/__next.schemas.__PAGE__.txt +9 -0
- ui/static/static/static/schemas/__next.schemas.txt +4 -0
- ui/static/static/static/schemas/index.html +1 -0
- ui/static/static/static/schemas/index.txt +21 -0
- ui/static/static/static/settings/__next._full.txt +21 -0
- ui/static/static/static/settings/__next._head.txt +7 -0
- ui/static/static/static/settings/__next._index.txt +9 -0
- ui/static/static/static/settings/__next._tree.txt +2 -0
- ui/static/static/static/settings/__next.settings.__PAGE__.txt +9 -0
- ui/static/static/static/settings/__next.settings.txt +4 -0
- ui/static/static/static/settings/index.html +1 -0
- ui/static/static/static/settings/index.txt +21 -0
- ui/static/static/static/validation/__next._full.txt +21 -0
- ui/static/static/static/validation/__next._head.txt +7 -0
- ui/static/static/static/validation/__next._index.txt +9 -0
- ui/static/static/static/validation/__next._tree.txt +2 -0
- ui/static/static/static/validation/__next.validation.__PAGE__.txt +9 -0
- ui/static/static/static/validation/__next.validation.txt +4 -0
- ui/static/static/static/validation/index.html +1 -0
- ui/static/static/static/validation/index.txt +21 -0
- ui/static/static/validation/__next._full.txt +2 -2
- ui/static/static/validation/__next._head.txt +1 -1
- ui/static/static/validation/__next._index.txt +2 -2
- ui/static/static/validation/__next._tree.txt +2 -2
- ui/static/static/validation/__next.validation.__PAGE__.txt +1 -1
- ui/static/static/validation/__next.validation.txt +1 -1
- ui/static/static/validation/index.html +1 -1
- ui/static/static/validation/index.txt +2 -2
- ui/static/validation/__next._full.txt +2 -2
- ui/static/validation/__next._head.txt +1 -1
- ui/static/validation/__next._index.txt +1 -1
- ui/static/validation/__next._tree.txt +1 -1
- ui/static/validation/__next.validation.__PAGE__.txt +2 -2
- ui/static/validation/__next.validation.txt +1 -1
- ui/static/validation/index.html +1 -1
- ui/static/validation/index.txt +2 -2
- pycharter/data/templates/template_coercion_rules.yaml +0 -15
- pycharter/data/templates/template_contract.yaml +0 -587
- pycharter/data/templates/template_metadata.yaml +0 -38
- pycharter/data/templates/template_schema.yaml +0 -22
- pycharter/data/templates/template_transform_advanced.yaml +0 -50
- pycharter/data/templates/template_transform_simple.yaml +0 -59
- pycharter/data/templates/template_validation_rules.yaml +0 -29
- pycharter/etl_generator/extraction.py +0 -916
- pycharter/etl_generator/factory.py +0 -174
- pycharter/etl_generator/orchestrator.py +0 -1650
- pycharter/integrations/__init__.py +0 -19
- pycharter/integrations/kafka.py +0 -178
- pycharter/integrations/streaming.py +0 -100
- pycharter-0.0.22.dist-info/RECORD +0 -358
- {pycharter-0.0.22.dist-info → pycharter-0.0.24.dist-info}/entry_points.txt +0 -0
- {pycharter-0.0.22.dist-info → pycharter-0.0.24.dist-info}/licenses/LICENSE +0 -0
- {pycharter-0.0.22.dist-info → pycharter-0.0.24.dist-info}/top_level.txt +0 -0
- /ui/static/_next/static/{0rYA78L88aUyD2Uh38hhX → 2gKjNv6YvE6BcIdFthBLs}/_buildManifest.js +0 -0
- /ui/static/_next/static/{0rYA78L88aUyD2Uh38hhX → 2gKjNv6YvE6BcIdFthBLs}/_ssgManifest.js +0 -0
- /ui/static/static/_next/static/{tNTkVW6puVXC4bAm4WrHl → 0rYA78L88aUyD2Uh38hhX}/_buildManifest.js +0 -0
- /ui/static/static/_next/static/{tNTkVW6puVXC4bAm4WrHl → 0rYA78L88aUyD2Uh38hhX}/_ssgManifest.js +0 -0
- /ui/static/{_next → static/_next}/static/chunks/c4fa4f4114b7c352.js +0 -0
- /ui/static/static/{_next → static/_next}/static/chunks/4e310fe5005770a3.css +0 -0
- /ui/static/{_next → static/static/_next}/static/chunks/5e04d10c4a7b58a3.js +0 -0
- /ui/static/static/{_next → static/_next}/static/chunks/5fc14c00a2779dc5.js +0 -0
- /ui/static/{_next → static/static/_next}/static/chunks/75d88a058d8ffaa6.js +0 -0
- /ui/static/{_next → static/static/_next}/static/chunks/8c89634cf6bad76f.js +0 -0
- /ui/static/static/{_next → static/_next}/static/chunks/b584574fdc8ab13e.js +0 -0
- /ui/static/static/{_next → static/_next}/static/chunks/d5989c94d3614b3a.js +0 -0
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Protocol definitions for ETL components.
|
|
3
|
+
|
|
4
|
+
Uses Python's Protocol for structural subtyping (duck typing with type hints).
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import Any, AsyncIterator, Dict, List, Protocol, runtime_checkable
|
|
8
|
+
|
|
9
|
+
from pycharter.etl_generator.result import LoadResult
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@runtime_checkable
|
|
13
|
+
class Extractor(Protocol):
|
|
14
|
+
"""
|
|
15
|
+
Protocol for data extractors.
|
|
16
|
+
|
|
17
|
+
Extractors read data from sources (HTTP, files, databases, cloud storage)
|
|
18
|
+
and yield batches of records.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
async def extract(self, **params) -> AsyncIterator[List[Dict[str, Any]]]:
|
|
22
|
+
"""
|
|
23
|
+
Extract data from the source.
|
|
24
|
+
|
|
25
|
+
Yields:
|
|
26
|
+
Batches of records (list of dicts)
|
|
27
|
+
"""
|
|
28
|
+
...
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@runtime_checkable
|
|
32
|
+
class Transformer(Protocol):
|
|
33
|
+
"""
|
|
34
|
+
Protocol for data transformers.
|
|
35
|
+
|
|
36
|
+
Transformers process batches of records. They can be chained with |.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
def transform(self, data: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
40
|
+
"""Transform a batch of records."""
|
|
41
|
+
...
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@runtime_checkable
|
|
45
|
+
class Loader(Protocol):
|
|
46
|
+
"""
|
|
47
|
+
Protocol for data loaders.
|
|
48
|
+
|
|
49
|
+
Loaders write data to destinations (databases, files, cloud storage).
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
async def load(self, data: List[Dict[str, Any]], **params) -> LoadResult:
|
|
53
|
+
"""Load data to the destination."""
|
|
54
|
+
...
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Result classes for ETL operations.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from datetime import datetime, timezone
|
|
7
|
+
from typing import Any, Dict, List, Optional
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class LoadResult:
|
|
12
|
+
"""Result from a load operation."""
|
|
13
|
+
success: bool = True
|
|
14
|
+
rows_loaded: int = 0
|
|
15
|
+
rows_failed: int = 0
|
|
16
|
+
error: Optional[str] = None
|
|
17
|
+
duration_seconds: Optional[float] = None
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass
|
|
21
|
+
class BatchResult:
|
|
22
|
+
"""Result from processing a single batch."""
|
|
23
|
+
batch_index: int
|
|
24
|
+
rows_in: int = 0
|
|
25
|
+
rows_out: int = 0
|
|
26
|
+
rows_failed: int = 0
|
|
27
|
+
errors: List[str] = field(default_factory=list)
|
|
28
|
+
|
|
29
|
+
@property
|
|
30
|
+
def success(self) -> bool:
|
|
31
|
+
return len(self.errors) == 0 and self.rows_failed == 0
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@dataclass
|
|
35
|
+
class PipelineResult:
|
|
36
|
+
"""Complete result from running an ETL pipeline."""
|
|
37
|
+
success: bool = True
|
|
38
|
+
rows_extracted: int = 0
|
|
39
|
+
rows_transformed: int = 0
|
|
40
|
+
rows_loaded: int = 0
|
|
41
|
+
rows_failed: int = 0
|
|
42
|
+
start_time: Optional[datetime] = None
|
|
43
|
+
end_time: Optional[datetime] = None
|
|
44
|
+
duration_seconds: Optional[float] = None
|
|
45
|
+
batches_processed: int = 0
|
|
46
|
+
batch_results: List[BatchResult] = field(default_factory=list)
|
|
47
|
+
errors: List[str] = field(default_factory=list)
|
|
48
|
+
pipeline_name: Optional[str] = None
|
|
49
|
+
run_id: Optional[str] = None
|
|
50
|
+
|
|
51
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
52
|
+
return {
|
|
53
|
+
"success": self.success,
|
|
54
|
+
"rows_extracted": self.rows_extracted,
|
|
55
|
+
"rows_transformed": self.rows_transformed,
|
|
56
|
+
"rows_loaded": self.rows_loaded,
|
|
57
|
+
"rows_failed": self.rows_failed,
|
|
58
|
+
"duration_seconds": self.duration_seconds,
|
|
59
|
+
"batches_processed": self.batches_processed,
|
|
60
|
+
"errors": self.errors,
|
|
61
|
+
"pipeline_name": self.pipeline_name,
|
|
62
|
+
"run_id": self.run_id,
|
|
63
|
+
}
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
"""
|
|
2
|
+
JSON Schemas for ETL Pipeline Configuration.
|
|
3
|
+
|
|
4
|
+
Provides validation schemas for extract, transform, load, and pipeline configs.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Dict, Any
|
|
10
|
+
|
|
11
|
+
SCHEMA_DIR = Path(__file__).parent
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def load_schema(name: str) -> Dict[str, Any]:
|
|
15
|
+
"""Load a JSON schema by name."""
|
|
16
|
+
schema_path = SCHEMA_DIR / f"{name}.json"
|
|
17
|
+
if not schema_path.exists():
|
|
18
|
+
raise FileNotFoundError(f"Schema not found: {schema_path}")
|
|
19
|
+
with open(schema_path) as f:
|
|
20
|
+
return json.load(f)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def get_extract_schema() -> Dict[str, Any]:
|
|
24
|
+
"""Get the extract config schema."""
|
|
25
|
+
return load_schema("extract")
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def get_transform_schema() -> Dict[str, Any]:
|
|
29
|
+
"""Get the transform config schema."""
|
|
30
|
+
return load_schema("transform")
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def get_load_schema() -> Dict[str, Any]:
|
|
34
|
+
"""Get the load config schema."""
|
|
35
|
+
return load_schema("load")
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def get_pipeline_schema() -> Dict[str, Any]:
|
|
39
|
+
"""Get the combined pipeline config schema."""
|
|
40
|
+
return load_schema("pipeline")
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
__all__ = [
|
|
44
|
+
"load_schema",
|
|
45
|
+
"get_extract_schema",
|
|
46
|
+
"get_transform_schema",
|
|
47
|
+
"get_load_schema",
|
|
48
|
+
"get_pipeline_schema",
|
|
49
|
+
]
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Transform stage for ETL pipelines.
|
|
3
|
+
|
|
4
|
+
Two APIs:
|
|
5
|
+
1. Config-driven: apply_transforms(data, config) - uses YAML config
|
|
6
|
+
2. Programmatic: Rename(...) | AddField(...) | Filter(...) - chainable
|
|
7
|
+
|
|
8
|
+
Pipeline order for config: Simple operations → JSONata → Custom function.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
# Config-driven API
|
|
12
|
+
from pycharter.etl_generator.transformers.pipeline import apply_transforms
|
|
13
|
+
|
|
14
|
+
# Chainable transformers
|
|
15
|
+
from pycharter.etl_generator.transformers.base import (
|
|
16
|
+
BaseTransformer,
|
|
17
|
+
TransformerChain,
|
|
18
|
+
)
|
|
19
|
+
from pycharter.etl_generator.transformers.operations import (
|
|
20
|
+
Rename,
|
|
21
|
+
AddField,
|
|
22
|
+
Drop,
|
|
23
|
+
Select,
|
|
24
|
+
Filter,
|
|
25
|
+
Convert,
|
|
26
|
+
Default,
|
|
27
|
+
Map,
|
|
28
|
+
FlatMap,
|
|
29
|
+
CustomFunction,
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
__all__ = [
|
|
33
|
+
# Config-driven
|
|
34
|
+
"apply_transforms",
|
|
35
|
+
# Base classes
|
|
36
|
+
"BaseTransformer",
|
|
37
|
+
"TransformerChain",
|
|
38
|
+
# Operations
|
|
39
|
+
"Rename",
|
|
40
|
+
"AddField",
|
|
41
|
+
"Drop",
|
|
42
|
+
"Select",
|
|
43
|
+
"Filter",
|
|
44
|
+
"Convert",
|
|
45
|
+
"Default",
|
|
46
|
+
"Map",
|
|
47
|
+
"FlatMap",
|
|
48
|
+
"CustomFunction",
|
|
49
|
+
]
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Base transformer class with | operator support.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from abc import ABC, abstractmethod
|
|
6
|
+
from typing import Any, Dict, List, Optional
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class BaseTransformer(ABC):
|
|
10
|
+
"""
|
|
11
|
+
Base class for chainable transformers.
|
|
12
|
+
|
|
13
|
+
Supports | operator for chaining:
|
|
14
|
+
>>> chain = Rename({"a": "b"}) | AddField("c", "value") | Drop(["d"])
|
|
15
|
+
>>> result = chain.transform(data)
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
@abstractmethod
|
|
19
|
+
def transform(self, data: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
20
|
+
"""Transform a batch of records."""
|
|
21
|
+
...
|
|
22
|
+
|
|
23
|
+
def __or__(self, other: "BaseTransformer") -> "TransformerChain":
|
|
24
|
+
"""Chain transformers with | operator."""
|
|
25
|
+
if isinstance(other, TransformerChain):
|
|
26
|
+
return TransformerChain([self] + other.transformers)
|
|
27
|
+
return TransformerChain([self, other])
|
|
28
|
+
|
|
29
|
+
def __ror__(self, other: "BaseTransformer") -> "TransformerChain":
|
|
30
|
+
"""Support other | self."""
|
|
31
|
+
if isinstance(other, TransformerChain):
|
|
32
|
+
return TransformerChain(other.transformers + [self])
|
|
33
|
+
return TransformerChain([other, self])
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class TransformerChain(BaseTransformer):
|
|
37
|
+
"""
|
|
38
|
+
Chain of transformers that processes data through each in sequence.
|
|
39
|
+
|
|
40
|
+
Created automatically when using | operator.
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
def __init__(self, transformers: Optional[List[BaseTransformer]] = None):
|
|
44
|
+
self.transformers: List[BaseTransformer] = list(transformers) if transformers else []
|
|
45
|
+
|
|
46
|
+
def transform(self, data: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
47
|
+
"""Transform data through all transformers."""
|
|
48
|
+
result = data
|
|
49
|
+
for transformer in self.transformers:
|
|
50
|
+
result = transformer.transform(result)
|
|
51
|
+
return result
|
|
52
|
+
|
|
53
|
+
def __or__(self, other: BaseTransformer) -> "TransformerChain":
|
|
54
|
+
"""Add transformer to chain."""
|
|
55
|
+
if isinstance(other, TransformerChain):
|
|
56
|
+
return TransformerChain(self.transformers + other.transformers)
|
|
57
|
+
return TransformerChain(self.transformers + [other])
|
|
58
|
+
|
|
59
|
+
def __len__(self) -> int:
|
|
60
|
+
return len(self.transformers)
|
|
61
|
+
|
|
62
|
+
def __iter__(self):
|
|
63
|
+
return iter(self.transformers)
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Normalize transform configuration for use by transformer modules.
|
|
3
|
+
|
|
4
|
+
Supports both the canonical shape (transform: { rename, convert, ... }) and
|
|
5
|
+
legacy top-level keys; outputs a single normalized dict for each step.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from typing import Any, Dict
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def normalize_transform_config(raw: Dict[str, Any]) -> Dict[str, Any]:
|
|
12
|
+
"""
|
|
13
|
+
Normalize transform config so transformers see a single shape.
|
|
14
|
+
|
|
15
|
+
Canonical: transform: { rename, convert, defaults, add, select, drop }
|
|
16
|
+
Legacy: rename, convert, ... at top level (when 'transform' not in config)
|
|
17
|
+
|
|
18
|
+
Returns a dict with keys: simple_ops, jsonata, custom_function.
|
|
19
|
+
Each is a dict or None if not configured.
|
|
20
|
+
"""
|
|
21
|
+
out: Dict[str, Any] = {
|
|
22
|
+
"simple_ops": None,
|
|
23
|
+
"jsonata": None,
|
|
24
|
+
"custom_function": None,
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
# Simple operations: merge from transform.X or top-level X
|
|
28
|
+
simple_ops: Dict[str, Any] = {}
|
|
29
|
+
if "transform" in raw:
|
|
30
|
+
simple_ops = dict(raw.get("transform") or {})
|
|
31
|
+
for key in ("rename", "convert", "defaults", "add", "select", "drop"):
|
|
32
|
+
if key in raw and "transform" not in raw:
|
|
33
|
+
val = raw.get(key)
|
|
34
|
+
if val is not None:
|
|
35
|
+
simple_ops[key] = val
|
|
36
|
+
if simple_ops:
|
|
37
|
+
out["simple_ops"] = simple_ops
|
|
38
|
+
|
|
39
|
+
if raw.get("jsonata"):
|
|
40
|
+
out["jsonata"] = dict(raw["jsonata"])
|
|
41
|
+
|
|
42
|
+
if raw.get("custom_function"):
|
|
43
|
+
out["custom_function"] = dict(raw["custom_function"])
|
|
44
|
+
|
|
45
|
+
return out
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Custom Python function transformation.
|
|
3
|
+
|
|
4
|
+
Dynamically imports and invokes a module/function or callable path,
|
|
5
|
+
with optional class instantiation (optimize/run/__call__).
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import importlib
|
|
9
|
+
import logging
|
|
10
|
+
from typing import Any, Dict, List
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def apply_custom_function(
|
|
16
|
+
data: List[Dict[str, Any]], config: Dict[str, Any], **kwargs: Any
|
|
17
|
+
) -> List[Dict[str, Any]]:
|
|
18
|
+
"""
|
|
19
|
+
Run a custom Python function for transformation.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
data: Input data.
|
|
23
|
+
config: 'callable' ("module.func") or 'module' + 'function'.
|
|
24
|
+
Optional 'mode': "batch" (default) or "record".
|
|
25
|
+
Optional 'kwargs': dict merged with **kwargs.
|
|
26
|
+
**kwargs: Runtime kwargs merged with config['kwargs'].
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
Transformed list of records.
|
|
30
|
+
|
|
31
|
+
Example config:
|
|
32
|
+
custom_function:
|
|
33
|
+
module: "pyoptima"
|
|
34
|
+
function: "optimize_from_etl_inputs"
|
|
35
|
+
mode: "batch"
|
|
36
|
+
kwargs:
|
|
37
|
+
method: "min_volatility"
|
|
38
|
+
"""
|
|
39
|
+
callable_path = config.get("callable")
|
|
40
|
+
module_path = config.get("module")
|
|
41
|
+
func_name = config.get("function")
|
|
42
|
+
|
|
43
|
+
if callable_path:
|
|
44
|
+
parts = callable_path.rsplit(".", 1)
|
|
45
|
+
if len(parts) != 2:
|
|
46
|
+
raise ValueError(
|
|
47
|
+
f"Invalid callable path: {callable_path}. "
|
|
48
|
+
"Use 'module.function' format."
|
|
49
|
+
)
|
|
50
|
+
module_path, func_name = parts
|
|
51
|
+
|
|
52
|
+
if not module_path or not func_name:
|
|
53
|
+
raise ValueError(
|
|
54
|
+
"custom_function requires either 'callable' or 'module' + 'function'"
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
try:
|
|
58
|
+
module = importlib.import_module(module_path)
|
|
59
|
+
func = getattr(module, func_name)
|
|
60
|
+
except ImportError as e:
|
|
61
|
+
raise ValueError(f"Cannot import module '{module_path}': {e}") from e
|
|
62
|
+
except AttributeError as e:
|
|
63
|
+
raise ValueError(
|
|
64
|
+
f"Function '{func_name}' not found in module '{module_path}'"
|
|
65
|
+
) from e
|
|
66
|
+
|
|
67
|
+
if isinstance(func, type):
|
|
68
|
+
instance = func()
|
|
69
|
+
if hasattr(instance, "optimize"):
|
|
70
|
+
func = instance.optimize
|
|
71
|
+
elif hasattr(instance, "run"):
|
|
72
|
+
func = instance.run
|
|
73
|
+
elif hasattr(instance, "__call__"):
|
|
74
|
+
func = instance
|
|
75
|
+
else:
|
|
76
|
+
raise ValueError(
|
|
77
|
+
f"Class '{func_name}' has no 'optimize', 'run', or '__call__'"
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
mode = config.get("mode", "batch")
|
|
81
|
+
func_kwargs = config.get("kwargs", {})
|
|
82
|
+
merged_kwargs = {**func_kwargs, **kwargs}
|
|
83
|
+
|
|
84
|
+
try:
|
|
85
|
+
if mode == "batch":
|
|
86
|
+
result = func(data, **merged_kwargs)
|
|
87
|
+
if result is None:
|
|
88
|
+
return []
|
|
89
|
+
return result if isinstance(result, list) else [result]
|
|
90
|
+
results = []
|
|
91
|
+
for record in data:
|
|
92
|
+
record_result = func(record, **merged_kwargs)
|
|
93
|
+
if record_result is not None:
|
|
94
|
+
if isinstance(record_result, list):
|
|
95
|
+
results.extend(record_result)
|
|
96
|
+
else:
|
|
97
|
+
results.append(record_result)
|
|
98
|
+
return results
|
|
99
|
+
except Exception as e:
|
|
100
|
+
logger.error("Custom function %r failed: %s", func_name, e)
|
|
101
|
+
raise ValueError(f"Custom function error: {e}") from e
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
"""
|
|
2
|
+
JSONata-based transformation.
|
|
3
|
+
|
|
4
|
+
Applies a JSONata expression to data in batch or record mode.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import logging
|
|
8
|
+
from typing import Any, Dict, List
|
|
9
|
+
|
|
10
|
+
import jsonata
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def apply_jsonata(
|
|
16
|
+
data: List[Dict[str, Any]], config: Dict[str, Any]
|
|
17
|
+
) -> List[Dict[str, Any]]:
|
|
18
|
+
"""
|
|
19
|
+
Apply a JSONata expression to transform data.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
data: Input data (list of records).
|
|
23
|
+
config: Must have 'expression'. Optional 'mode': "batch" (default) or "record".
|
|
24
|
+
|
|
25
|
+
Returns:
|
|
26
|
+
Transformed list of records.
|
|
27
|
+
|
|
28
|
+
Example config:
|
|
29
|
+
jsonata:
|
|
30
|
+
expression: |
|
|
31
|
+
$.{"ticker": symbol, "avg_price": $average(prices)}
|
|
32
|
+
mode: "batch"
|
|
33
|
+
"""
|
|
34
|
+
expression_str = config.get("expression")
|
|
35
|
+
if not expression_str:
|
|
36
|
+
return data
|
|
37
|
+
|
|
38
|
+
mode = config.get("mode", "batch")
|
|
39
|
+
|
|
40
|
+
try:
|
|
41
|
+
expr = jsonata.Jsonata(expression_str)
|
|
42
|
+
|
|
43
|
+
if mode == "batch":
|
|
44
|
+
result = expr.evaluate(data)
|
|
45
|
+
if result is None:
|
|
46
|
+
return []
|
|
47
|
+
return result if isinstance(result, list) else [result]
|
|
48
|
+
# record mode
|
|
49
|
+
return [
|
|
50
|
+
expr.evaluate(record)
|
|
51
|
+
for record in data
|
|
52
|
+
if expr.evaluate(record) is not None
|
|
53
|
+
]
|
|
54
|
+
except Exception as e:
|
|
55
|
+
logger.error("JSONata transformation failed: %s", e)
|
|
56
|
+
raise ValueError(f"JSONata transformation error: {e}") from e
|
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Built-in transformer operations.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from typing import Any, Callable, Dict, List, Optional, Union
|
|
6
|
+
|
|
7
|
+
from pycharter.etl_generator.transformers.base import BaseTransformer
|
|
8
|
+
from pycharter.etl_generator.expression import evaluate_expression, is_expression
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class Rename(BaseTransformer):
|
|
12
|
+
"""Rename fields in records."""
|
|
13
|
+
|
|
14
|
+
def __init__(self, mapping: Dict[str, str]):
|
|
15
|
+
self.mapping = mapping
|
|
16
|
+
|
|
17
|
+
def transform(self, data: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
18
|
+
return [
|
|
19
|
+
{self.mapping.get(k, k): v for k, v in record.items()}
|
|
20
|
+
for record in data
|
|
21
|
+
]
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class AddField(BaseTransformer):
|
|
25
|
+
"""
|
|
26
|
+
Add a new field to records.
|
|
27
|
+
|
|
28
|
+
Supports:
|
|
29
|
+
- Static values: AddField("status", "active")
|
|
30
|
+
- Callable: AddField("full_name", lambda r: f"{r['first']} {r['last']}")
|
|
31
|
+
- Expressions: AddField("full_name", "${first_name} ${last_name}")
|
|
32
|
+
- Functions: AddField("id", "uuid()")
|
|
33
|
+
|
|
34
|
+
Expression syntax:
|
|
35
|
+
- ${field_name} - Reference field value
|
|
36
|
+
- ${field_name:-default} - Field with default
|
|
37
|
+
- now() - Current timestamp
|
|
38
|
+
- uuid() - Generate UUID
|
|
39
|
+
- concat(${a}, " ", ${b}) - Concatenate values
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
def __init__(
|
|
43
|
+
self,
|
|
44
|
+
field: str,
|
|
45
|
+
value: Union[Any, Callable[[Dict[str, Any]], Any]],
|
|
46
|
+
evaluate_expressions: bool = True,
|
|
47
|
+
):
|
|
48
|
+
"""
|
|
49
|
+
Initialize AddField transformer.
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
field: Name of the field to add
|
|
53
|
+
value: Value, callable, or expression string
|
|
54
|
+
evaluate_expressions: If True, evaluate string expressions.
|
|
55
|
+
Set to False to use literal string values.
|
|
56
|
+
"""
|
|
57
|
+
self.field = field
|
|
58
|
+
self.value = value
|
|
59
|
+
self.evaluate_expressions = evaluate_expressions
|
|
60
|
+
|
|
61
|
+
# Pre-check if value is an expression to optimize
|
|
62
|
+
self._is_expression = (
|
|
63
|
+
evaluate_expressions
|
|
64
|
+
and isinstance(value, str)
|
|
65
|
+
and is_expression(value)
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
def transform(self, data: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
69
|
+
result = []
|
|
70
|
+
for record in data:
|
|
71
|
+
new_record = dict(record)
|
|
72
|
+
|
|
73
|
+
if callable(self.value):
|
|
74
|
+
# Callable - call with record
|
|
75
|
+
new_record[self.field] = self.value(record)
|
|
76
|
+
elif self._is_expression:
|
|
77
|
+
# Expression - evaluate in record context
|
|
78
|
+
new_record[self.field] = evaluate_expression(self.value, record)
|
|
79
|
+
else:
|
|
80
|
+
# Static value
|
|
81
|
+
new_record[self.field] = self.value
|
|
82
|
+
|
|
83
|
+
result.append(new_record)
|
|
84
|
+
return result
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
class Drop(BaseTransformer):
|
|
88
|
+
"""Drop fields from records."""
|
|
89
|
+
|
|
90
|
+
def __init__(self, fields: List[str]):
|
|
91
|
+
self.fields = set(fields)
|
|
92
|
+
|
|
93
|
+
def transform(self, data: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
94
|
+
return [
|
|
95
|
+
{k: v for k, v in record.items() if k not in self.fields}
|
|
96
|
+
for record in data
|
|
97
|
+
]
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
class Select(BaseTransformer):
|
|
101
|
+
"""Select only specific fields."""
|
|
102
|
+
|
|
103
|
+
def __init__(self, fields: List[str]):
|
|
104
|
+
self.fields = set(fields)
|
|
105
|
+
|
|
106
|
+
def transform(self, data: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
107
|
+
return [
|
|
108
|
+
{k: v for k, v in record.items() if k in self.fields}
|
|
109
|
+
for record in data
|
|
110
|
+
]
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
class Filter(BaseTransformer):
|
|
114
|
+
"""Filter records based on a predicate."""
|
|
115
|
+
|
|
116
|
+
def __init__(self, predicate: Callable[[Dict[str, Any]], bool]):
|
|
117
|
+
self.predicate = predicate
|
|
118
|
+
|
|
119
|
+
def transform(self, data: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
120
|
+
return [record for record in data if self.predicate(record)]
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
class Convert(BaseTransformer):
|
|
124
|
+
"""Convert field types."""
|
|
125
|
+
|
|
126
|
+
def __init__(self, conversions: Dict[str, Callable[[Any], Any]], errors: str = "ignore"):
|
|
127
|
+
self.conversions = conversions
|
|
128
|
+
self.errors = errors
|
|
129
|
+
|
|
130
|
+
def transform(self, data: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
131
|
+
result = []
|
|
132
|
+
for record in data:
|
|
133
|
+
new_record = dict(record)
|
|
134
|
+
for field, converter in self.conversions.items():
|
|
135
|
+
if field in new_record:
|
|
136
|
+
try:
|
|
137
|
+
new_record[field] = converter(new_record[field])
|
|
138
|
+
except (ValueError, TypeError):
|
|
139
|
+
if self.errors == "raise":
|
|
140
|
+
raise
|
|
141
|
+
elif self.errors == "null":
|
|
142
|
+
new_record[field] = None
|
|
143
|
+
result.append(new_record)
|
|
144
|
+
return result
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
class Default(BaseTransformer):
|
|
148
|
+
"""Set default values for missing or null fields."""
|
|
149
|
+
|
|
150
|
+
def __init__(self, defaults: Dict[str, Any], replace_null: bool = True):
|
|
151
|
+
self.defaults = defaults
|
|
152
|
+
self.replace_null = replace_null
|
|
153
|
+
|
|
154
|
+
def transform(self, data: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
155
|
+
result = []
|
|
156
|
+
for record in data:
|
|
157
|
+
new_record = dict(record)
|
|
158
|
+
for field, default in self.defaults.items():
|
|
159
|
+
if field not in new_record:
|
|
160
|
+
new_record[field] = default
|
|
161
|
+
elif self.replace_null and new_record[field] is None:
|
|
162
|
+
new_record[field] = default
|
|
163
|
+
result.append(new_record)
|
|
164
|
+
return result
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
class Map(BaseTransformer):
|
|
168
|
+
"""Apply a function to each record."""
|
|
169
|
+
|
|
170
|
+
def __init__(self, func: Callable[[Dict[str, Any]], Dict[str, Any]]):
|
|
171
|
+
self.func = func
|
|
172
|
+
|
|
173
|
+
def transform(self, data: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
174
|
+
return [self.func(record) for record in data]
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
class FlatMap(BaseTransformer):
|
|
178
|
+
"""Apply a function that returns multiple records per input."""
|
|
179
|
+
|
|
180
|
+
def __init__(self, func: Callable[[Dict[str, Any]], List[Dict[str, Any]]]):
|
|
181
|
+
self.func = func
|
|
182
|
+
|
|
183
|
+
def transform(self, data: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
184
|
+
result = []
|
|
185
|
+
for record in data:
|
|
186
|
+
result.extend(self.func(record))
|
|
187
|
+
return result
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
class CustomFunction(BaseTransformer):
|
|
191
|
+
"""Run a custom Python function on data."""
|
|
192
|
+
|
|
193
|
+
def __init__(
|
|
194
|
+
self,
|
|
195
|
+
module: Optional[str] = None,
|
|
196
|
+
function: Optional[str] = None,
|
|
197
|
+
func: Optional[Callable[[List[Dict[str, Any]]], List[Dict[str, Any]]]] = None,
|
|
198
|
+
kwargs: Optional[Dict[str, Any]] = None,
|
|
199
|
+
):
|
|
200
|
+
self.module = module
|
|
201
|
+
self.function = function
|
|
202
|
+
self._func = func
|
|
203
|
+
self.kwargs = kwargs or {}
|
|
204
|
+
|
|
205
|
+
if func is not None:
|
|
206
|
+
self._resolved_func = func
|
|
207
|
+
elif module and function:
|
|
208
|
+
self._resolved_func = self._import_function(module, function)
|
|
209
|
+
else:
|
|
210
|
+
raise ValueError("Must provide either 'func' or both 'module' and 'function'")
|
|
211
|
+
|
|
212
|
+
def _import_function(self, module: str, function: str) -> Callable:
|
|
213
|
+
import importlib
|
|
214
|
+
mod = importlib.import_module(module)
|
|
215
|
+
return getattr(mod, function)
|
|
216
|
+
|
|
217
|
+
def transform(self, data: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
218
|
+
return self._resolved_func(data, **self.kwargs)
|