pycharter 0.0.22__py3-none-any.whl → 0.0.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- api/main.py +27 -1
- api/models/docs.py +68 -0
- api/models/evolution.py +117 -0
- api/models/tracking.py +111 -0
- api/models/validation.py +46 -6
- api/routes/v1/__init__.py +14 -1
- api/routes/v1/docs.py +187 -0
- api/routes/v1/evolution.py +337 -0
- api/routes/v1/templates.py +211 -27
- api/routes/v1/tracking.py +301 -0
- api/routes/v1/validation.py +68 -31
- pycharter/__init__.py +268 -58
- pycharter/data/templates/contract/template_coercion_rules.yaml +57 -0
- pycharter/data/templates/contract/template_contract.yaml +122 -0
- pycharter/data/templates/contract/template_metadata.yaml +68 -0
- pycharter/data/templates/contract/template_schema.yaml +100 -0
- pycharter/data/templates/contract/template_validation_rules.yaml +75 -0
- pycharter/data/templates/etl/README.md +224 -0
- pycharter/data/templates/etl/extract_cloud_azure.yaml +24 -0
- pycharter/data/templates/etl/extract_cloud_gcs.yaml +25 -0
- pycharter/data/templates/etl/extract_cloud_s3.yaml +30 -0
- pycharter/data/templates/etl/extract_database.yaml +34 -0
- pycharter/data/templates/etl/extract_database_ssh.yaml +40 -0
- pycharter/data/templates/etl/extract_file_csv.yaml +21 -0
- pycharter/data/templates/etl/extract_file_glob.yaml +25 -0
- pycharter/data/templates/etl/extract_file_json.yaml +24 -0
- pycharter/data/templates/etl/extract_file_parquet.yaml +20 -0
- pycharter/data/templates/etl/extract_http_paginated.yaml +79 -0
- pycharter/data/templates/etl/extract_http_path_params.yaml +38 -0
- pycharter/data/templates/etl/extract_http_simple.yaml +62 -0
- pycharter/data/templates/etl/load_cloud_azure.yaml +24 -0
- pycharter/data/templates/etl/load_cloud_gcs.yaml +22 -0
- pycharter/data/templates/etl/load_cloud_s3.yaml +27 -0
- pycharter/data/templates/etl/load_file.yaml +34 -0
- pycharter/data/templates/etl/load_insert.yaml +18 -0
- pycharter/data/templates/etl/load_postgresql.yaml +39 -0
- pycharter/data/templates/etl/load_sqlite.yaml +21 -0
- pycharter/data/templates/etl/load_truncate_and_load.yaml +20 -0
- pycharter/data/templates/etl/load_upsert.yaml +25 -0
- pycharter/data/templates/etl/load_with_dlq.yaml +34 -0
- pycharter/data/templates/etl/load_with_ssh_tunnel.yaml +35 -0
- pycharter/data/templates/etl/pipeline_http_to_db.yaml +75 -0
- pycharter/data/templates/etl/transform_combined.yaml +48 -0
- pycharter/data/templates/etl/transform_custom_function.yaml +58 -0
- pycharter/data/templates/etl/transform_jsonata.yaml +51 -0
- pycharter/data/templates/etl/transform_simple.yaml +59 -0
- pycharter/db/schemas/.ipynb_checkpoints/data_contract-checkpoint.py +160 -0
- pycharter/docs_generator/__init__.py +43 -0
- pycharter/docs_generator/generator.py +465 -0
- pycharter/docs_generator/renderers.py +247 -0
- pycharter/etl_generator/__init__.py +168 -80
- pycharter/etl_generator/builder.py +121 -0
- pycharter/etl_generator/config_loader.py +394 -0
- pycharter/etl_generator/config_validator.py +418 -0
- pycharter/etl_generator/context.py +132 -0
- pycharter/etl_generator/expression.py +499 -0
- pycharter/etl_generator/extractors/__init__.py +30 -0
- pycharter/etl_generator/extractors/base.py +70 -0
- pycharter/etl_generator/extractors/cloud_storage.py +530 -0
- pycharter/etl_generator/extractors/database.py +221 -0
- pycharter/etl_generator/extractors/factory.py +185 -0
- pycharter/etl_generator/extractors/file.py +475 -0
- pycharter/etl_generator/extractors/http.py +895 -0
- pycharter/etl_generator/extractors/streaming.py +57 -0
- pycharter/etl_generator/loaders/__init__.py +41 -0
- pycharter/etl_generator/loaders/base.py +35 -0
- pycharter/etl_generator/loaders/cloud.py +87 -0
- pycharter/etl_generator/loaders/cloud_storage_loader.py +275 -0
- pycharter/etl_generator/loaders/database.py +274 -0
- pycharter/etl_generator/loaders/factory.py +180 -0
- pycharter/etl_generator/loaders/file.py +72 -0
- pycharter/etl_generator/loaders/file_loader.py +130 -0
- pycharter/etl_generator/pipeline.py +743 -0
- pycharter/etl_generator/protocols.py +54 -0
- pycharter/etl_generator/result.py +63 -0
- pycharter/etl_generator/schemas/__init__.py +49 -0
- pycharter/etl_generator/transformers/__init__.py +49 -0
- pycharter/etl_generator/transformers/base.py +63 -0
- pycharter/etl_generator/transformers/config.py +45 -0
- pycharter/etl_generator/transformers/custom_function.py +101 -0
- pycharter/etl_generator/transformers/jsonata_transformer.py +56 -0
- pycharter/etl_generator/transformers/operations.py +218 -0
- pycharter/etl_generator/transformers/pipeline.py +54 -0
- pycharter/etl_generator/transformers/simple_operations.py +131 -0
- pycharter/quality/__init__.py +25 -0
- pycharter/quality/tracking/__init__.py +64 -0
- pycharter/quality/tracking/collector.py +318 -0
- pycharter/quality/tracking/exporters.py +238 -0
- pycharter/quality/tracking/models.py +194 -0
- pycharter/quality/tracking/store.py +385 -0
- pycharter/runtime_validator/__init__.py +20 -7
- pycharter/runtime_validator/builder.py +328 -0
- pycharter/runtime_validator/validator.py +311 -7
- pycharter/runtime_validator/validator_core.py +61 -0
- pycharter/schema_evolution/__init__.py +61 -0
- pycharter/schema_evolution/compatibility.py +270 -0
- pycharter/schema_evolution/diff.py +496 -0
- pycharter/schema_evolution/models.py +201 -0
- pycharter/shared/__init__.py +56 -0
- pycharter/shared/errors.py +296 -0
- pycharter/shared/protocols.py +234 -0
- {pycharter-0.0.22.dist-info → pycharter-0.0.24.dist-info}/METADATA +146 -26
- pycharter-0.0.24.dist-info/RECORD +543 -0
- {pycharter-0.0.22.dist-info → pycharter-0.0.24.dist-info}/WHEEL +1 -1
- ui/static/404/index.html +1 -1
- ui/static/404.html +1 -1
- ui/static/__next.__PAGE__.txt +1 -1
- ui/static/__next._full.txt +1 -1
- ui/static/__next._head.txt +1 -1
- ui/static/__next._index.txt +1 -1
- ui/static/__next._tree.txt +1 -1
- ui/static/_next/static/chunks/26dfc590f7714c03.js +1 -0
- ui/static/_next/static/chunks/34d289e6db2ef551.js +1 -0
- ui/static/_next/static/chunks/99508d9d5869cc27.js +1 -0
- ui/static/_next/static/chunks/b313c35a6ba76574.js +1 -0
- ui/static/_not-found/__next._full.txt +1 -1
- ui/static/_not-found/__next._head.txt +1 -1
- ui/static/_not-found/__next._index.txt +1 -1
- ui/static/_not-found/__next._not-found.__PAGE__.txt +1 -1
- ui/static/_not-found/__next._not-found.txt +1 -1
- ui/static/_not-found/__next._tree.txt +1 -1
- ui/static/_not-found/index.html +1 -1
- ui/static/_not-found/index.txt +1 -1
- ui/static/contracts/__next._full.txt +2 -2
- ui/static/contracts/__next._head.txt +1 -1
- ui/static/contracts/__next._index.txt +1 -1
- ui/static/contracts/__next._tree.txt +1 -1
- ui/static/contracts/__next.contracts.__PAGE__.txt +2 -2
- ui/static/contracts/__next.contracts.txt +1 -1
- ui/static/contracts/index.html +1 -1
- ui/static/contracts/index.txt +2 -2
- ui/static/documentation/__next._full.txt +1 -1
- ui/static/documentation/__next._head.txt +1 -1
- ui/static/documentation/__next._index.txt +1 -1
- ui/static/documentation/__next._tree.txt +1 -1
- ui/static/documentation/__next.documentation.__PAGE__.txt +1 -1
- ui/static/documentation/__next.documentation.txt +1 -1
- ui/static/documentation/index.html +2 -2
- ui/static/documentation/index.txt +1 -1
- ui/static/index.html +1 -1
- ui/static/index.txt +1 -1
- ui/static/metadata/__next._full.txt +1 -1
- ui/static/metadata/__next._head.txt +1 -1
- ui/static/metadata/__next._index.txt +1 -1
- ui/static/metadata/__next._tree.txt +1 -1
- ui/static/metadata/__next.metadata.__PAGE__.txt +1 -1
- ui/static/metadata/__next.metadata.txt +1 -1
- ui/static/metadata/index.html +1 -1
- ui/static/metadata/index.txt +1 -1
- ui/static/quality/__next._full.txt +2 -2
- ui/static/quality/__next._head.txt +1 -1
- ui/static/quality/__next._index.txt +1 -1
- ui/static/quality/__next._tree.txt +1 -1
- ui/static/quality/__next.quality.__PAGE__.txt +2 -2
- ui/static/quality/__next.quality.txt +1 -1
- ui/static/quality/index.html +2 -2
- ui/static/quality/index.txt +2 -2
- ui/static/rules/__next._full.txt +1 -1
- ui/static/rules/__next._head.txt +1 -1
- ui/static/rules/__next._index.txt +1 -1
- ui/static/rules/__next._tree.txt +1 -1
- ui/static/rules/__next.rules.__PAGE__.txt +1 -1
- ui/static/rules/__next.rules.txt +1 -1
- ui/static/rules/index.html +1 -1
- ui/static/rules/index.txt +1 -1
- ui/static/schemas/__next._full.txt +1 -1
- ui/static/schemas/__next._head.txt +1 -1
- ui/static/schemas/__next._index.txt +1 -1
- ui/static/schemas/__next._tree.txt +1 -1
- ui/static/schemas/__next.schemas.__PAGE__.txt +1 -1
- ui/static/schemas/__next.schemas.txt +1 -1
- ui/static/schemas/index.html +1 -1
- ui/static/schemas/index.txt +1 -1
- ui/static/settings/__next._full.txt +1 -1
- ui/static/settings/__next._head.txt +1 -1
- ui/static/settings/__next._index.txt +1 -1
- ui/static/settings/__next._tree.txt +1 -1
- ui/static/settings/__next.settings.__PAGE__.txt +1 -1
- ui/static/settings/__next.settings.txt +1 -1
- ui/static/settings/index.html +1 -1
- ui/static/settings/index.txt +1 -1
- ui/static/static/404/index.html +1 -1
- ui/static/static/404.html +1 -1
- ui/static/static/__next.__PAGE__.txt +1 -1
- ui/static/static/__next._full.txt +2 -2
- ui/static/static/__next._head.txt +1 -1
- ui/static/static/__next._index.txt +2 -2
- ui/static/static/__next._tree.txt +2 -2
- ui/static/static/_next/static/chunks/13d4a0fbd74c1ee4.js +1 -0
- ui/static/static/_next/static/chunks/2edb43b48432ac04.js +441 -0
- ui/static/static/_next/static/chunks/d2363397e1b2bcab.css +1 -0
- ui/static/static/_next/static/chunks/f7d1a90dd75d2572.js +1 -0
- ui/static/static/_not-found/__next._full.txt +2 -2
- ui/static/static/_not-found/__next._head.txt +1 -1
- ui/static/static/_not-found/__next._index.txt +2 -2
- ui/static/static/_not-found/__next._not-found.__PAGE__.txt +1 -1
- ui/static/static/_not-found/__next._not-found.txt +1 -1
- ui/static/static/_not-found/__next._tree.txt +2 -2
- ui/static/static/_not-found/index.html +1 -1
- ui/static/static/_not-found/index.txt +2 -2
- ui/static/static/contracts/__next._full.txt +3 -3
- ui/static/static/contracts/__next._head.txt +1 -1
- ui/static/static/contracts/__next._index.txt +2 -2
- ui/static/static/contracts/__next._tree.txt +2 -2
- ui/static/static/contracts/__next.contracts.__PAGE__.txt +2 -2
- ui/static/static/contracts/__next.contracts.txt +1 -1
- ui/static/static/contracts/index.html +1 -1
- ui/static/static/contracts/index.txt +3 -3
- ui/static/static/documentation/__next._full.txt +3 -3
- ui/static/static/documentation/__next._head.txt +1 -1
- ui/static/static/documentation/__next._index.txt +2 -2
- ui/static/static/documentation/__next._tree.txt +2 -2
- ui/static/static/documentation/__next.documentation.__PAGE__.txt +2 -2
- ui/static/static/documentation/__next.documentation.txt +1 -1
- ui/static/static/documentation/index.html +2 -2
- ui/static/static/documentation/index.txt +3 -3
- ui/static/static/index.html +1 -1
- ui/static/static/index.txt +2 -2
- ui/static/static/metadata/__next._full.txt +2 -2
- ui/static/static/metadata/__next._head.txt +1 -1
- ui/static/static/metadata/__next._index.txt +2 -2
- ui/static/static/metadata/__next._tree.txt +2 -2
- ui/static/static/metadata/__next.metadata.__PAGE__.txt +1 -1
- ui/static/static/metadata/__next.metadata.txt +1 -1
- ui/static/static/metadata/index.html +1 -1
- ui/static/static/metadata/index.txt +2 -2
- ui/static/static/quality/__next._full.txt +2 -2
- ui/static/static/quality/__next._head.txt +1 -1
- ui/static/static/quality/__next._index.txt +2 -2
- ui/static/static/quality/__next._tree.txt +2 -2
- ui/static/static/quality/__next.quality.__PAGE__.txt +1 -1
- ui/static/static/quality/__next.quality.txt +1 -1
- ui/static/static/quality/index.html +2 -2
- ui/static/static/quality/index.txt +2 -2
- ui/static/static/rules/__next._full.txt +2 -2
- ui/static/static/rules/__next._head.txt +1 -1
- ui/static/static/rules/__next._index.txt +2 -2
- ui/static/static/rules/__next._tree.txt +2 -2
- ui/static/static/rules/__next.rules.__PAGE__.txt +1 -1
- ui/static/static/rules/__next.rules.txt +1 -1
- ui/static/static/rules/index.html +1 -1
- ui/static/static/rules/index.txt +2 -2
- ui/static/static/schemas/__next._full.txt +2 -2
- ui/static/static/schemas/__next._head.txt +1 -1
- ui/static/static/schemas/__next._index.txt +2 -2
- ui/static/static/schemas/__next._tree.txt +2 -2
- ui/static/static/schemas/__next.schemas.__PAGE__.txt +1 -1
- ui/static/static/schemas/__next.schemas.txt +1 -1
- ui/static/static/schemas/index.html +1 -1
- ui/static/static/schemas/index.txt +2 -2
- ui/static/static/settings/__next._full.txt +2 -2
- ui/static/static/settings/__next._head.txt +1 -1
- ui/static/static/settings/__next._index.txt +2 -2
- ui/static/static/settings/__next._tree.txt +2 -2
- ui/static/static/settings/__next.settings.__PAGE__.txt +1 -1
- ui/static/static/settings/__next.settings.txt +1 -1
- ui/static/static/settings/index.html +1 -1
- ui/static/static/settings/index.txt +2 -2
- ui/static/static/static/.gitkeep +0 -0
- ui/static/static/static/404/index.html +1 -0
- ui/static/static/static/404.html +1 -0
- ui/static/static/static/__next.__PAGE__.txt +10 -0
- ui/static/static/static/__next._full.txt +30 -0
- ui/static/static/static/__next._head.txt +7 -0
- ui/static/static/static/__next._index.txt +9 -0
- ui/static/static/static/__next._tree.txt +2 -0
- ui/static/static/static/_next/static/chunks/222442f6da32302a.js +1 -0
- ui/static/static/static/_next/static/chunks/247eb132b7f7b574.js +1 -0
- ui/static/static/static/_next/static/chunks/297d55555b71baba.js +1 -0
- ui/static/static/static/_next/static/chunks/2ab439ce003cd691.js +1 -0
- ui/static/static/static/_next/static/chunks/414e77373f8ff61c.js +1 -0
- ui/static/static/static/_next/static/chunks/49ca65abd26ae49e.js +1 -0
- ui/static/static/static/_next/static/chunks/652ad0aa26265c47.js +2 -0
- ui/static/static/static/_next/static/chunks/9667e7a3d359eb39.js +1 -0
- ui/static/static/static/_next/static/chunks/9c23f44fff36548a.js +1 -0
- ui/static/static/static/_next/static/chunks/a6dad97d9634a72d.js +1 -0
- ui/static/static/static/_next/static/chunks/b32a0963684b9933.js +4 -0
- ui/static/static/static/_next/static/chunks/c69f6cba366bd988.js +1 -0
- ui/static/static/static/_next/static/chunks/db913959c675cea6.js +1 -0
- ui/static/static/static/_next/static/chunks/f061a4be97bfc3b3.js +1 -0
- ui/static/static/static/_next/static/chunks/f2e7afeab1178138.js +1 -0
- ui/static/static/static/_next/static/chunks/ff1a16fafef87110.js +1 -0
- ui/static/static/static/_next/static/chunks/turbopack-ffcb7ab6794027ef.js +3 -0
- ui/static/static/static/_next/static/tNTkVW6puVXC4bAm4WrHl/_buildManifest.js +11 -0
- ui/static/static/static/_next/static/tNTkVW6puVXC4bAm4WrHl/_ssgManifest.js +1 -0
- ui/static/static/static/_not-found/__next._full.txt +17 -0
- ui/static/static/static/_not-found/__next._head.txt +7 -0
- ui/static/static/static/_not-found/__next._index.txt +9 -0
- ui/static/static/static/_not-found/__next._not-found.__PAGE__.txt +5 -0
- ui/static/static/static/_not-found/__next._not-found.txt +4 -0
- ui/static/static/static/_not-found/__next._tree.txt +2 -0
- ui/static/static/static/_not-found/index.html +1 -0
- ui/static/static/static/_not-found/index.txt +17 -0
- ui/static/static/static/contracts/__next._full.txt +21 -0
- ui/static/static/static/contracts/__next._head.txt +7 -0
- ui/static/static/static/contracts/__next._index.txt +9 -0
- ui/static/static/static/contracts/__next._tree.txt +2 -0
- ui/static/static/static/contracts/__next.contracts.__PAGE__.txt +9 -0
- ui/static/static/static/contracts/__next.contracts.txt +4 -0
- ui/static/static/static/contracts/index.html +1 -0
- ui/static/static/static/contracts/index.txt +21 -0
- ui/static/static/static/documentation/__next._full.txt +21 -0
- ui/static/static/static/documentation/__next._head.txt +7 -0
- ui/static/static/static/documentation/__next._index.txt +9 -0
- ui/static/static/static/documentation/__next._tree.txt +2 -0
- ui/static/static/static/documentation/__next.documentation.__PAGE__.txt +9 -0
- ui/static/static/static/documentation/__next.documentation.txt +4 -0
- ui/static/static/static/documentation/index.html +93 -0
- ui/static/static/static/documentation/index.txt +21 -0
- ui/static/static/static/index.html +1 -0
- ui/static/static/static/index.txt +30 -0
- ui/static/static/static/metadata/__next._full.txt +21 -0
- ui/static/static/static/metadata/__next._head.txt +7 -0
- ui/static/static/static/metadata/__next._index.txt +9 -0
- ui/static/static/static/metadata/__next._tree.txt +2 -0
- ui/static/static/static/metadata/__next.metadata.__PAGE__.txt +9 -0
- ui/static/static/static/metadata/__next.metadata.txt +4 -0
- ui/static/static/static/metadata/index.html +1 -0
- ui/static/static/static/metadata/index.txt +21 -0
- ui/static/static/static/quality/__next._full.txt +21 -0
- ui/static/static/static/quality/__next._head.txt +7 -0
- ui/static/static/static/quality/__next._index.txt +9 -0
- ui/static/static/static/quality/__next._tree.txt +2 -0
- ui/static/static/static/quality/__next.quality.__PAGE__.txt +9 -0
- ui/static/static/static/quality/__next.quality.txt +4 -0
- ui/static/static/static/quality/index.html +2 -0
- ui/static/static/static/quality/index.txt +21 -0
- ui/static/static/static/rules/__next._full.txt +21 -0
- ui/static/static/static/rules/__next._head.txt +7 -0
- ui/static/static/static/rules/__next._index.txt +9 -0
- ui/static/static/static/rules/__next._tree.txt +2 -0
- ui/static/static/static/rules/__next.rules.__PAGE__.txt +9 -0
- ui/static/static/static/rules/__next.rules.txt +4 -0
- ui/static/static/static/rules/index.html +1 -0
- ui/static/static/static/rules/index.txt +21 -0
- ui/static/static/static/schemas/__next._full.txt +21 -0
- ui/static/static/static/schemas/__next._head.txt +7 -0
- ui/static/static/static/schemas/__next._index.txt +9 -0
- ui/static/static/static/schemas/__next._tree.txt +2 -0
- ui/static/static/static/schemas/__next.schemas.__PAGE__.txt +9 -0
- ui/static/static/static/schemas/__next.schemas.txt +4 -0
- ui/static/static/static/schemas/index.html +1 -0
- ui/static/static/static/schemas/index.txt +21 -0
- ui/static/static/static/settings/__next._full.txt +21 -0
- ui/static/static/static/settings/__next._head.txt +7 -0
- ui/static/static/static/settings/__next._index.txt +9 -0
- ui/static/static/static/settings/__next._tree.txt +2 -0
- ui/static/static/static/settings/__next.settings.__PAGE__.txt +9 -0
- ui/static/static/static/settings/__next.settings.txt +4 -0
- ui/static/static/static/settings/index.html +1 -0
- ui/static/static/static/settings/index.txt +21 -0
- ui/static/static/static/validation/__next._full.txt +21 -0
- ui/static/static/static/validation/__next._head.txt +7 -0
- ui/static/static/static/validation/__next._index.txt +9 -0
- ui/static/static/static/validation/__next._tree.txt +2 -0
- ui/static/static/static/validation/__next.validation.__PAGE__.txt +9 -0
- ui/static/static/static/validation/__next.validation.txt +4 -0
- ui/static/static/static/validation/index.html +1 -0
- ui/static/static/static/validation/index.txt +21 -0
- ui/static/static/validation/__next._full.txt +2 -2
- ui/static/static/validation/__next._head.txt +1 -1
- ui/static/static/validation/__next._index.txt +2 -2
- ui/static/static/validation/__next._tree.txt +2 -2
- ui/static/static/validation/__next.validation.__PAGE__.txt +1 -1
- ui/static/static/validation/__next.validation.txt +1 -1
- ui/static/static/validation/index.html +1 -1
- ui/static/static/validation/index.txt +2 -2
- ui/static/validation/__next._full.txt +2 -2
- ui/static/validation/__next._head.txt +1 -1
- ui/static/validation/__next._index.txt +1 -1
- ui/static/validation/__next._tree.txt +1 -1
- ui/static/validation/__next.validation.__PAGE__.txt +2 -2
- ui/static/validation/__next.validation.txt +1 -1
- ui/static/validation/index.html +1 -1
- ui/static/validation/index.txt +2 -2
- pycharter/data/templates/template_coercion_rules.yaml +0 -15
- pycharter/data/templates/template_contract.yaml +0 -587
- pycharter/data/templates/template_metadata.yaml +0 -38
- pycharter/data/templates/template_schema.yaml +0 -22
- pycharter/data/templates/template_transform_advanced.yaml +0 -50
- pycharter/data/templates/template_transform_simple.yaml +0 -59
- pycharter/data/templates/template_validation_rules.yaml +0 -29
- pycharter/etl_generator/extraction.py +0 -916
- pycharter/etl_generator/factory.py +0 -174
- pycharter/etl_generator/orchestrator.py +0 -1650
- pycharter/integrations/__init__.py +0 -19
- pycharter/integrations/kafka.py +0 -178
- pycharter/integrations/streaming.py +0 -100
- pycharter-0.0.22.dist-info/RECORD +0 -358
- {pycharter-0.0.22.dist-info → pycharter-0.0.24.dist-info}/entry_points.txt +0 -0
- {pycharter-0.0.22.dist-info → pycharter-0.0.24.dist-info}/licenses/LICENSE +0 -0
- {pycharter-0.0.22.dist-info → pycharter-0.0.24.dist-info}/top_level.txt +0 -0
- /ui/static/_next/static/{0rYA78L88aUyD2Uh38hhX → 2gKjNv6YvE6BcIdFthBLs}/_buildManifest.js +0 -0
- /ui/static/_next/static/{0rYA78L88aUyD2Uh38hhX → 2gKjNv6YvE6BcIdFthBLs}/_ssgManifest.js +0 -0
- /ui/static/static/_next/static/{tNTkVW6puVXC4bAm4WrHl → 0rYA78L88aUyD2Uh38hhX}/_buildManifest.js +0 -0
- /ui/static/static/_next/static/{tNTkVW6puVXC4bAm4WrHl → 0rYA78L88aUyD2Uh38hhX}/_ssgManifest.js +0 -0
- /ui/static/{_next → static/_next}/static/chunks/c4fa4f4114b7c352.js +0 -0
- /ui/static/static/{_next → static/_next}/static/chunks/4e310fe5005770a3.css +0 -0
- /ui/static/{_next → static/static/_next}/static/chunks/5e04d10c4a7b58a3.js +0 -0
- /ui/static/static/{_next → static/_next}/static/chunks/5fc14c00a2779dc5.js +0 -0
- /ui/static/{_next → static/static/_next}/static/chunks/75d88a058d8ffaa6.js +0 -0
- /ui/static/{_next → static/static/_next}/static/chunks/8c89634cf6bad76f.js +0 -0
- /ui/static/static/{_next → static/_next}/static/chunks/b584574fdc8ab13e.js +0 -0
- /ui/static/static/{_next → static/_next}/static/chunks/d5989c94d3614b3a.js +0 -0
|
@@ -0,0 +1,274 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Database loaders for ETL pipelines.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
import time
|
|
7
|
+
from typing import Any, Dict, List, Optional, Union
|
|
8
|
+
|
|
9
|
+
from sqlalchemy import create_engine, text
|
|
10
|
+
from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine
|
|
11
|
+
from sqlalchemy.orm import sessionmaker
|
|
12
|
+
|
|
13
|
+
from pycharter.etl_generator.database import (
|
|
14
|
+
detect_database_type,
|
|
15
|
+
create_ssh_tunnel,
|
|
16
|
+
modify_url_for_tunnel,
|
|
17
|
+
load_data_postgresql,
|
|
18
|
+
load_data_mysql,
|
|
19
|
+
load_data_sqlite,
|
|
20
|
+
load_data_mssql,
|
|
21
|
+
DEFAULT_TUNNEL_LOCAL_PORT,
|
|
22
|
+
DB_POSTGRESQL,
|
|
23
|
+
DB_MYSQL,
|
|
24
|
+
DB_SQLITE,
|
|
25
|
+
DB_MSSQL,
|
|
26
|
+
)
|
|
27
|
+
from pycharter.etl_generator.loaders.base import BaseLoader
|
|
28
|
+
from pycharter.etl_generator.result import LoadResult
|
|
29
|
+
|
|
30
|
+
logger = logging.getLogger(__name__)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class PostgresLoader(BaseLoader):
|
|
34
|
+
"""
|
|
35
|
+
Loader for PostgreSQL databases.
|
|
36
|
+
|
|
37
|
+
Supports:
|
|
38
|
+
- Insert, upsert, replace, update, delete, truncate_and_load
|
|
39
|
+
- Bulk operations for efficiency
|
|
40
|
+
- SSH tunneling
|
|
41
|
+
|
|
42
|
+
Example:
|
|
43
|
+
>>> loader = PostgresLoader(
|
|
44
|
+
... connection_string="postgresql://user:pass@localhost/db",
|
|
45
|
+
... table="users",
|
|
46
|
+
... write_method="upsert",
|
|
47
|
+
... primary_key="id",
|
|
48
|
+
... )
|
|
49
|
+
>>> result = await loader.load(data)
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
def __init__(
|
|
53
|
+
self,
|
|
54
|
+
connection_string: str,
|
|
55
|
+
table: str,
|
|
56
|
+
schema: str = "public",
|
|
57
|
+
write_method: str = "upsert",
|
|
58
|
+
primary_key: Optional[Union[str, List[str]]] = None,
|
|
59
|
+
batch_size: int = 1000,
|
|
60
|
+
ssh_tunnel: Optional[Dict[str, Any]] = None,
|
|
61
|
+
):
|
|
62
|
+
self.connection_string = connection_string
|
|
63
|
+
self.table = table
|
|
64
|
+
self.schema = schema
|
|
65
|
+
self.write_method = write_method
|
|
66
|
+
self.primary_key = primary_key
|
|
67
|
+
self.batch_size = batch_size
|
|
68
|
+
self.ssh_tunnel = ssh_tunnel
|
|
69
|
+
|
|
70
|
+
@classmethod
|
|
71
|
+
def from_config(cls, config: Dict[str, Any]) -> "PostgresLoader":
|
|
72
|
+
"""Create loader from configuration dict."""
|
|
73
|
+
db_config = config.get("database", {})
|
|
74
|
+
return cls(
|
|
75
|
+
connection_string=db_config.get("url") or config.get("connection_string"),
|
|
76
|
+
table=db_config.get("table") or config.get("table"),
|
|
77
|
+
schema=db_config.get("schema", config.get("schema", "public")),
|
|
78
|
+
write_method=db_config.get("write_method", config.get("write_method", "upsert")),
|
|
79
|
+
primary_key=db_config.get("primary_key") or config.get("primary_key"),
|
|
80
|
+
batch_size=config.get("batch_size", 1000),
|
|
81
|
+
ssh_tunnel=db_config.get("ssh_tunnel"),
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
async def load(self, data: List[Dict[str, Any]], **params) -> LoadResult:
|
|
85
|
+
"""Load data to PostgreSQL."""
|
|
86
|
+
start_time = time.time()
|
|
87
|
+
|
|
88
|
+
if not data:
|
|
89
|
+
return LoadResult(success=True, rows_loaded=0)
|
|
90
|
+
|
|
91
|
+
# Handle SSH tunnel if configured
|
|
92
|
+
tunnel = None
|
|
93
|
+
connection_string = self.connection_string
|
|
94
|
+
|
|
95
|
+
if self.ssh_tunnel and self.ssh_tunnel.get("enabled"):
|
|
96
|
+
tunnel = create_ssh_tunnel(self.ssh_tunnel)
|
|
97
|
+
if tunnel:
|
|
98
|
+
local_port = int(self.ssh_tunnel.get("local_port", DEFAULT_TUNNEL_LOCAL_PORT))
|
|
99
|
+
connection_string = modify_url_for_tunnel(
|
|
100
|
+
connection_string, local_port, DB_POSTGRESQL
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
try:
|
|
104
|
+
# Use async engine for PostgreSQL
|
|
105
|
+
# Convert sync URL to async if needed
|
|
106
|
+
if "+asyncpg" not in connection_string:
|
|
107
|
+
async_url = connection_string.replace("postgresql://", "postgresql+asyncpg://")
|
|
108
|
+
else:
|
|
109
|
+
async_url = connection_string
|
|
110
|
+
|
|
111
|
+
engine = create_async_engine(async_url, echo=False)
|
|
112
|
+
async_session = sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
|
|
113
|
+
|
|
114
|
+
async with async_session() as session:
|
|
115
|
+
result = await load_data_postgresql(
|
|
116
|
+
data=data,
|
|
117
|
+
session=session,
|
|
118
|
+
schema_name=self.schema,
|
|
119
|
+
table_name=self.table,
|
|
120
|
+
write_method=self.write_method,
|
|
121
|
+
primary_key=self.primary_key,
|
|
122
|
+
batch_size=self.batch_size,
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
await engine.dispose()
|
|
126
|
+
|
|
127
|
+
duration = time.time() - start_time
|
|
128
|
+
logger.info(f"Loaded {result['total']} records to {self.schema}.{self.table} in {duration:.2f}s")
|
|
129
|
+
|
|
130
|
+
return LoadResult(
|
|
131
|
+
success=True,
|
|
132
|
+
rows_loaded=result.get("inserted", 0) + result.get("updated", 0),
|
|
133
|
+
duration_seconds=duration,
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
except Exception as e:
|
|
137
|
+
logger.error(f"PostgreSQL load failed: {e}", exc_info=True)
|
|
138
|
+
return LoadResult(
|
|
139
|
+
success=False,
|
|
140
|
+
error=str(e),
|
|
141
|
+
duration_seconds=time.time() - start_time,
|
|
142
|
+
)
|
|
143
|
+
finally:
|
|
144
|
+
if tunnel:
|
|
145
|
+
tunnel.stop()
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
class DatabaseLoader(BaseLoader):
|
|
149
|
+
"""
|
|
150
|
+
Generic database loader that auto-detects database type.
|
|
151
|
+
|
|
152
|
+
Supports PostgreSQL, MySQL, SQLite, and MSSQL.
|
|
153
|
+
|
|
154
|
+
Example:
|
|
155
|
+
>>> loader = DatabaseLoader(
|
|
156
|
+
... connection_string="mysql://user:pass@localhost/db",
|
|
157
|
+
... table="users",
|
|
158
|
+
... )
|
|
159
|
+
>>> result = await loader.load(data)
|
|
160
|
+
"""
|
|
161
|
+
|
|
162
|
+
def __init__(
|
|
163
|
+
self,
|
|
164
|
+
connection_string: str,
|
|
165
|
+
table: str,
|
|
166
|
+
schema: Optional[str] = None,
|
|
167
|
+
write_method: str = "upsert",
|
|
168
|
+
primary_key: Optional[Union[str, List[str]]] = None,
|
|
169
|
+
batch_size: int = 1000,
|
|
170
|
+
ssh_tunnel: Optional[Dict[str, Any]] = None,
|
|
171
|
+
):
|
|
172
|
+
self.connection_string = connection_string
|
|
173
|
+
self.table = table
|
|
174
|
+
self.schema = schema
|
|
175
|
+
self.write_method = write_method
|
|
176
|
+
self.primary_key = primary_key
|
|
177
|
+
self.batch_size = batch_size
|
|
178
|
+
self.ssh_tunnel = ssh_tunnel
|
|
179
|
+
self.db_type = detect_database_type(connection_string)
|
|
180
|
+
|
|
181
|
+
@classmethod
|
|
182
|
+
def from_config(cls, config: Dict[str, Any]) -> "DatabaseLoader":
|
|
183
|
+
"""Create loader from configuration dict."""
|
|
184
|
+
db_config = config.get("database", {})
|
|
185
|
+
return cls(
|
|
186
|
+
connection_string=db_config.get("url") or config.get("connection_string"),
|
|
187
|
+
table=db_config.get("table") or config.get("table"),
|
|
188
|
+
schema=db_config.get("schema") or config.get("schema"),
|
|
189
|
+
write_method=db_config.get("write_method", config.get("write_method", "upsert")),
|
|
190
|
+
primary_key=db_config.get("primary_key") or config.get("primary_key"),
|
|
191
|
+
batch_size=config.get("batch_size", 1000),
|
|
192
|
+
ssh_tunnel=db_config.get("ssh_tunnel"),
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
async def load(self, data: List[Dict[str, Any]], **params) -> LoadResult:
|
|
196
|
+
"""Load data using appropriate database loader."""
|
|
197
|
+
if self.db_type == DB_POSTGRESQL:
|
|
198
|
+
loader = PostgresLoader(
|
|
199
|
+
connection_string=self.connection_string,
|
|
200
|
+
table=self.table,
|
|
201
|
+
schema=self.schema or "public",
|
|
202
|
+
write_method=self.write_method,
|
|
203
|
+
primary_key=self.primary_key,
|
|
204
|
+
batch_size=self.batch_size,
|
|
205
|
+
ssh_tunnel=self.ssh_tunnel,
|
|
206
|
+
)
|
|
207
|
+
return await loader.load(data, **params)
|
|
208
|
+
else:
|
|
209
|
+
# For non-PostgreSQL databases, use sync loading
|
|
210
|
+
return await self._load_sync(data, **params)
|
|
211
|
+
|
|
212
|
+
async def _load_sync(self, data: List[Dict[str, Any]], **params) -> LoadResult:
|
|
213
|
+
"""Load data using sync database operations."""
|
|
214
|
+
start_time = time.time()
|
|
215
|
+
|
|
216
|
+
if not data:
|
|
217
|
+
return LoadResult(success=True, rows_loaded=0)
|
|
218
|
+
|
|
219
|
+
# Handle SSH tunnel if configured
|
|
220
|
+
tunnel = None
|
|
221
|
+
connection_string = self.connection_string
|
|
222
|
+
|
|
223
|
+
if self.ssh_tunnel and self.ssh_tunnel.get("enabled"):
|
|
224
|
+
tunnel = create_ssh_tunnel(self.ssh_tunnel)
|
|
225
|
+
if tunnel:
|
|
226
|
+
local_port = int(self.ssh_tunnel.get("local_port", DEFAULT_TUNNEL_LOCAL_PORT))
|
|
227
|
+
connection_string = modify_url_for_tunnel(
|
|
228
|
+
connection_string, local_port, self.db_type
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
try:
|
|
232
|
+
engine = create_engine(connection_string, echo=False)
|
|
233
|
+
Session = sessionmaker(bind=engine)
|
|
234
|
+
session = Session()
|
|
235
|
+
|
|
236
|
+
# Select appropriate load function
|
|
237
|
+
if self.db_type == DB_MYSQL:
|
|
238
|
+
result = load_data_mysql(
|
|
239
|
+
data, session, self.schema or "", self.table,
|
|
240
|
+
self.write_method, self.primary_key, self.batch_size
|
|
241
|
+
)
|
|
242
|
+
elif self.db_type == DB_SQLITE:
|
|
243
|
+
result = load_data_sqlite(
|
|
244
|
+
data, session, "", self.table,
|
|
245
|
+
self.write_method, self.primary_key, self.batch_size
|
|
246
|
+
)
|
|
247
|
+
elif self.db_type == DB_MSSQL:
|
|
248
|
+
result = load_data_mssql(
|
|
249
|
+
data, session, self.schema or "dbo", self.table,
|
|
250
|
+
self.write_method, self.primary_key, self.batch_size
|
|
251
|
+
)
|
|
252
|
+
else:
|
|
253
|
+
raise ValueError(f"Unsupported database type: {self.db_type}")
|
|
254
|
+
|
|
255
|
+
session.close()
|
|
256
|
+
engine.dispose()
|
|
257
|
+
|
|
258
|
+
duration = time.time() - start_time
|
|
259
|
+
return LoadResult(
|
|
260
|
+
success=True,
|
|
261
|
+
rows_loaded=result.get("inserted", 0) + result.get("updated", 0),
|
|
262
|
+
duration_seconds=duration,
|
|
263
|
+
)
|
|
264
|
+
|
|
265
|
+
except Exception as e:
|
|
266
|
+
logger.error(f"Database load failed: {e}", exc_info=True)
|
|
267
|
+
return LoadResult(
|
|
268
|
+
success=False,
|
|
269
|
+
error=str(e),
|
|
270
|
+
duration_seconds=time.time() - start_time,
|
|
271
|
+
)
|
|
272
|
+
finally:
|
|
273
|
+
if tunnel:
|
|
274
|
+
tunnel.stop()
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Loader factory for ETL pipelines.
|
|
3
|
+
|
|
4
|
+
Provides a registry pattern to select and instantiate the appropriate loader
|
|
5
|
+
based on the target type specified in load configuration.
|
|
6
|
+
|
|
7
|
+
Usage:
|
|
8
|
+
from pycharter.etl_generator.loaders.factory import LoaderFactory
|
|
9
|
+
|
|
10
|
+
# Create loader from config
|
|
11
|
+
loader = LoaderFactory.create(load_config)
|
|
12
|
+
|
|
13
|
+
# Register custom loader
|
|
14
|
+
LoaderFactory.register("bigquery", BigQueryLoader)
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
import logging
|
|
18
|
+
from typing import Any, Dict, List, Optional, Type
|
|
19
|
+
|
|
20
|
+
from pycharter.etl_generator.loaders.base import BaseLoader
|
|
21
|
+
from pycharter.etl_generator.loaders.database import PostgresLoader, DatabaseLoader
|
|
22
|
+
from pycharter.etl_generator.loaders.file import FileLoader
|
|
23
|
+
from pycharter.etl_generator.loaders.cloud import CloudStorageLoader
|
|
24
|
+
|
|
25
|
+
logger = logging.getLogger(__name__)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class LoaderFactory:
|
|
29
|
+
"""
|
|
30
|
+
Factory for creating loader instances based on target type.
|
|
31
|
+
|
|
32
|
+
Supports:
|
|
33
|
+
- Explicit 'type' field (recommended)
|
|
34
|
+
- Legacy 'target_type' field
|
|
35
|
+
- Auto-detection from config keys (for backward compatibility)
|
|
36
|
+
|
|
37
|
+
Example:
|
|
38
|
+
# With explicit type (recommended)
|
|
39
|
+
config = {"type": "postgres", "table": "users", "database": {"url": "..."}}
|
|
40
|
+
loader = LoaderFactory.create(config)
|
|
41
|
+
|
|
42
|
+
# Auto-detected (legacy)
|
|
43
|
+
config = {"table": "users", "connection_string": "postgresql://..."}
|
|
44
|
+
loader = LoaderFactory.create(config) # Detected as postgres
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
# Registry of loaders by target type
|
|
48
|
+
_registry: Dict[str, Type[BaseLoader]] = {
|
|
49
|
+
"postgres": PostgresLoader,
|
|
50
|
+
"postgresql": PostgresLoader,
|
|
51
|
+
"database": DatabaseLoader,
|
|
52
|
+
"sqlite": DatabaseLoader,
|
|
53
|
+
"file": FileLoader,
|
|
54
|
+
"cloud_storage": CloudStorageLoader,
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
@classmethod
|
|
58
|
+
def register(cls, type_name: str, loader_class: Type[BaseLoader]) -> None:
|
|
59
|
+
"""
|
|
60
|
+
Register a custom loader class.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
type_name: Type identifier (e.g., 'bigquery', 'snowflake')
|
|
64
|
+
loader_class: Loader class that inherits from BaseLoader
|
|
65
|
+
|
|
66
|
+
Example:
|
|
67
|
+
class BigQueryLoader(BaseLoader):
|
|
68
|
+
...
|
|
69
|
+
|
|
70
|
+
LoaderFactory.register("bigquery", BigQueryLoader)
|
|
71
|
+
"""
|
|
72
|
+
if not issubclass(loader_class, BaseLoader):
|
|
73
|
+
raise TypeError(f"Loader class must inherit from BaseLoader: {loader_class}")
|
|
74
|
+
cls._registry[type_name.lower()] = loader_class
|
|
75
|
+
logger.info(f"Registered loader: {type_name} -> {loader_class.__name__}")
|
|
76
|
+
|
|
77
|
+
@classmethod
|
|
78
|
+
def unregister(cls, type_name: str) -> None:
|
|
79
|
+
"""Remove a loader from the registry."""
|
|
80
|
+
cls._registry.pop(type_name.lower(), None)
|
|
81
|
+
|
|
82
|
+
@classmethod
|
|
83
|
+
def list_types(cls) -> List[str]:
|
|
84
|
+
"""List all registered loader types."""
|
|
85
|
+
return list(cls._registry.keys())
|
|
86
|
+
|
|
87
|
+
@classmethod
|
|
88
|
+
def create(cls, config: Dict[str, Any]) -> BaseLoader:
|
|
89
|
+
"""
|
|
90
|
+
Create a loader instance from configuration.
|
|
91
|
+
|
|
92
|
+
Args:
|
|
93
|
+
config: Load configuration dictionary
|
|
94
|
+
|
|
95
|
+
Returns:
|
|
96
|
+
Configured loader instance
|
|
97
|
+
|
|
98
|
+
Raises:
|
|
99
|
+
ValueError: If type cannot be determined or is not registered
|
|
100
|
+
"""
|
|
101
|
+
# Get type from config (check 'type' first, then 'target_type' for legacy)
|
|
102
|
+
load_type = config.get("type") or config.get("target_type")
|
|
103
|
+
|
|
104
|
+
# Auto-detect if not specified
|
|
105
|
+
if not load_type:
|
|
106
|
+
load_type = cls._detect_type(config)
|
|
107
|
+
if load_type:
|
|
108
|
+
logger.debug(f"Auto-detected loader type: {load_type}")
|
|
109
|
+
else:
|
|
110
|
+
raise ValueError(
|
|
111
|
+
"Cannot determine loader type. "
|
|
112
|
+
f"Add 'type' field with one of: {', '.join(set(cls._registry.keys()))}"
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
load_type = load_type.lower()
|
|
116
|
+
|
|
117
|
+
# Get loader class from registry
|
|
118
|
+
loader_class = cls._registry.get(load_type)
|
|
119
|
+
if not loader_class:
|
|
120
|
+
raise ValueError(
|
|
121
|
+
f"Unknown loader type: '{load_type}'. "
|
|
122
|
+
f"Available types: {', '.join(set(cls._registry.keys()))}. "
|
|
123
|
+
f"Register custom loaders with LoaderFactory.register()"
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
# Create loader using from_config if available
|
|
127
|
+
if hasattr(loader_class, "from_config"):
|
|
128
|
+
loader = loader_class.from_config(config)
|
|
129
|
+
else:
|
|
130
|
+
loader = loader_class()
|
|
131
|
+
|
|
132
|
+
logger.debug(f"Created {loader_class.__name__} for type: {load_type}")
|
|
133
|
+
return loader
|
|
134
|
+
|
|
135
|
+
@classmethod
|
|
136
|
+
def _detect_type(cls, config: Dict[str, Any]) -> Optional[str]:
|
|
137
|
+
"""
|
|
138
|
+
Auto-detect loader type from configuration keys.
|
|
139
|
+
|
|
140
|
+
This is for backward compatibility. New configs should use explicit 'type'.
|
|
141
|
+
"""
|
|
142
|
+
# Database indicators
|
|
143
|
+
if "table" in config:
|
|
144
|
+
if "connection_string" in config or "database" in config:
|
|
145
|
+
# Check if it's SQLite
|
|
146
|
+
conn_str = config.get("connection_string", "")
|
|
147
|
+
if not conn_str and "database" in config:
|
|
148
|
+
conn_str = config["database"].get("url", "")
|
|
149
|
+
if "sqlite" in conn_str.lower():
|
|
150
|
+
return "sqlite"
|
|
151
|
+
return "postgres"
|
|
152
|
+
|
|
153
|
+
# File indicators
|
|
154
|
+
if any(key in config for key in ("path", "file_path")) and "storage" not in config:
|
|
155
|
+
return "file"
|
|
156
|
+
|
|
157
|
+
# Cloud storage indicators
|
|
158
|
+
if any(key in config for key in ("storage", "bucket", "container")):
|
|
159
|
+
return "cloud_storage"
|
|
160
|
+
|
|
161
|
+
return None
|
|
162
|
+
|
|
163
|
+
# Legacy method name for consistency with ExtractorFactory
|
|
164
|
+
@classmethod
|
|
165
|
+
def get_loader(cls, load_config: Dict[str, Any]) -> BaseLoader:
|
|
166
|
+
"""Legacy method. Use create() instead."""
|
|
167
|
+
return cls.create(load_config)
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def get_loader(load_config: Dict[str, Any]) -> BaseLoader:
|
|
171
|
+
"""
|
|
172
|
+
Convenience function to get loader instance.
|
|
173
|
+
|
|
174
|
+
Args:
|
|
175
|
+
load_config: Load configuration dictionary
|
|
176
|
+
|
|
177
|
+
Returns:
|
|
178
|
+
Loader instance
|
|
179
|
+
"""
|
|
180
|
+
return LoaderFactory.create(load_config)
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
"""
|
|
2
|
+
File loader for ETL pipelines.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import time
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any, Dict, List, Optional
|
|
8
|
+
|
|
9
|
+
from pycharter.etl_generator.loaders.base import BaseLoader
|
|
10
|
+
from pycharter.etl_generator.loaders.file_loader import load_to_file
|
|
11
|
+
from pycharter.etl_generator.result import LoadResult
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class FileLoader(BaseLoader):
|
|
15
|
+
"""
|
|
16
|
+
Loader for local files.
|
|
17
|
+
|
|
18
|
+
Supports JSON, CSV, Parquet, and JSONL formats.
|
|
19
|
+
|
|
20
|
+
Example:
|
|
21
|
+
>>> loader = FileLoader(path="output/data.json", format="json")
|
|
22
|
+
>>> result = await loader.load(data)
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
def __init__(
|
|
26
|
+
self,
|
|
27
|
+
path: str,
|
|
28
|
+
file_format: str = "json",
|
|
29
|
+
write_mode: str = "overwrite",
|
|
30
|
+
):
|
|
31
|
+
self.path = path
|
|
32
|
+
self.file_format = file_format
|
|
33
|
+
self.write_mode = write_mode
|
|
34
|
+
|
|
35
|
+
@classmethod
|
|
36
|
+
def from_config(cls, config: Dict[str, Any]) -> "FileLoader":
|
|
37
|
+
"""Create loader from configuration dict."""
|
|
38
|
+
return cls(
|
|
39
|
+
path=config.get("file_path") or config.get("path"),
|
|
40
|
+
file_format=config.get("format", "json"),
|
|
41
|
+
write_mode=config.get("write_mode", "overwrite"),
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
async def load(self, data: List[Dict[str, Any]], **params) -> LoadResult:
|
|
45
|
+
"""Load data to file."""
|
|
46
|
+
start_time = time.time()
|
|
47
|
+
|
|
48
|
+
if not data:
|
|
49
|
+
return LoadResult(success=True, rows_loaded=0)
|
|
50
|
+
|
|
51
|
+
try:
|
|
52
|
+
load_config = {
|
|
53
|
+
"file_path": self.path,
|
|
54
|
+
"format": self.file_format,
|
|
55
|
+
"write_mode": self.write_mode,
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
result = load_to_file(data, load_config)
|
|
59
|
+
|
|
60
|
+
duration = time.time() - start_time
|
|
61
|
+
return LoadResult(
|
|
62
|
+
success=True,
|
|
63
|
+
rows_loaded=result.get("written", 0),
|
|
64
|
+
duration_seconds=duration,
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
except Exception as e:
|
|
68
|
+
return LoadResult(
|
|
69
|
+
success=False,
|
|
70
|
+
error=str(e),
|
|
71
|
+
duration_seconds=time.time() - start_time,
|
|
72
|
+
)
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
"""
|
|
2
|
+
File-based loader for ETL orchestrator.
|
|
3
|
+
|
|
4
|
+
Writes transformed data to local files in JSON, CSV, Parquet, or JSONL format.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import io
|
|
8
|
+
import json
|
|
9
|
+
import logging
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Any, Dict, List, Optional
|
|
12
|
+
|
|
13
|
+
from pycharter.utils.value_injector import resolve_values
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
SUPPORTED_FORMATS = ("json", "csv", "parquet", "jsonl")
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def load_to_file(
|
|
21
|
+
data: List[Dict[str, Any]],
|
|
22
|
+
load_config: Dict[str, Any],
|
|
23
|
+
contract_dir: Optional[Any] = None,
|
|
24
|
+
config_context: Optional[Dict[str, Any]] = None,
|
|
25
|
+
) -> Dict[str, Any]:
|
|
26
|
+
"""
|
|
27
|
+
Write transformed data to a local file.
|
|
28
|
+
|
|
29
|
+
Load config (destination_type: file):
|
|
30
|
+
file_path: Path to output file (required). Supports ${VAR} resolution.
|
|
31
|
+
format: json | csv | parquet | jsonl (default: json)
|
|
32
|
+
write_mode: overwrite | append (default: overwrite).
|
|
33
|
+
append: for jsonl/csv, appends lines; for json, read-merge-write (array concat).
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
Dict with keys: written, total, path, format
|
|
37
|
+
"""
|
|
38
|
+
source_file = str(contract_dir / "load.yaml") if contract_dir else None
|
|
39
|
+
file_path = load_config.get("file_path")
|
|
40
|
+
if not file_path:
|
|
41
|
+
raise ValueError(
|
|
42
|
+
"File loader requires 'file_path' in load configuration. "
|
|
43
|
+
"Example: file_path: ./output/data.json"
|
|
44
|
+
)
|
|
45
|
+
file_path = resolve_values(
|
|
46
|
+
file_path, context=config_context, source_file=source_file
|
|
47
|
+
)
|
|
48
|
+
path = Path(file_path)
|
|
49
|
+
|
|
50
|
+
fmt = (load_config.get("format") or "json").lower()
|
|
51
|
+
if fmt not in SUPPORTED_FORMATS:
|
|
52
|
+
raise ValueError(
|
|
53
|
+
f"File loader format must be one of {SUPPORTED_FORMATS}, got '{fmt}'"
|
|
54
|
+
)
|
|
55
|
+
write_mode = (load_config.get("write_mode") or "overwrite").lower()
|
|
56
|
+
if write_mode not in ("overwrite", "append"):
|
|
57
|
+
raise ValueError(
|
|
58
|
+
"File loader write_mode must be 'overwrite' or 'append', "
|
|
59
|
+
f"got '{write_mode}'"
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
63
|
+
|
|
64
|
+
if fmt == "json":
|
|
65
|
+
_write_json(data, path, write_mode)
|
|
66
|
+
elif fmt == "jsonl":
|
|
67
|
+
_write_jsonl(data, path, write_mode)
|
|
68
|
+
elif fmt == "csv":
|
|
69
|
+
_write_csv(data, path, write_mode)
|
|
70
|
+
elif fmt == "parquet":
|
|
71
|
+
_write_parquet(data, path, write_mode)
|
|
72
|
+
|
|
73
|
+
logger.info(f"File loader wrote {len(data)} records to {path} ({fmt})")
|
|
74
|
+
return {"written": len(data), "total": len(data), "path": str(path), "format": fmt}
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def _write_json(
|
|
78
|
+
data: List[Dict[str, Any]], path: Path, write_mode: str
|
|
79
|
+
) -> None:
|
|
80
|
+
if write_mode == "append" and path.exists():
|
|
81
|
+
with open(path, "r", encoding="utf-8") as f:
|
|
82
|
+
existing = json.load(f)
|
|
83
|
+
if isinstance(existing, list):
|
|
84
|
+
data = existing + data
|
|
85
|
+
else:
|
|
86
|
+
data = [existing] + data
|
|
87
|
+
with open(path, "w", encoding="utf-8") as f:
|
|
88
|
+
json.dump(data, f, indent=2, default=str)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def _write_jsonl(
|
|
92
|
+
data: List[Dict[str, Any]], path: Path, write_mode: str
|
|
93
|
+
) -> None:
|
|
94
|
+
mode = "a" if write_mode == "append" and path.exists() else "w"
|
|
95
|
+
with open(path, mode, encoding="utf-8") as f:
|
|
96
|
+
for record in data:
|
|
97
|
+
f.write(json.dumps(record, default=str) + "\n")
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _write_csv(
|
|
101
|
+
data: List[Dict[str, Any]], path: Path, write_mode: str
|
|
102
|
+
) -> None:
|
|
103
|
+
if not data:
|
|
104
|
+
return
|
|
105
|
+
import csv
|
|
106
|
+
|
|
107
|
+
mode = "a" if write_mode == "append" and path.exists() else "w"
|
|
108
|
+
newfile = mode == "w"
|
|
109
|
+
with open(path, mode, encoding="utf-8", newline="") as f:
|
|
110
|
+
writer = csv.DictWriter(f, fieldnames=data[0].keys())
|
|
111
|
+
if newfile:
|
|
112
|
+
writer.writeheader()
|
|
113
|
+
writer.writerows(data)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def _write_parquet(
|
|
117
|
+
data: List[Dict[str, Any]], path: Path, write_mode: str
|
|
118
|
+
) -> None:
|
|
119
|
+
try:
|
|
120
|
+
import pandas as pd
|
|
121
|
+
except ImportError as e:
|
|
122
|
+
raise ImportError(
|
|
123
|
+
"pandas is required for Parquet file load. "
|
|
124
|
+
"Install with: pip install pandas pyarrow"
|
|
125
|
+
) from e
|
|
126
|
+
df = pd.DataFrame(data)
|
|
127
|
+
if write_mode == "append" and path.exists():
|
|
128
|
+
existing = pd.read_parquet(path)
|
|
129
|
+
df = pd.concat([existing, df], ignore_index=True)
|
|
130
|
+
df.to_parquet(path, index=False)
|