pycharter 0.0.22__py3-none-any.whl → 0.0.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- api/main.py +27 -1
- api/models/docs.py +68 -0
- api/models/evolution.py +117 -0
- api/models/tracking.py +111 -0
- api/models/validation.py +46 -6
- api/routes/v1/__init__.py +14 -1
- api/routes/v1/docs.py +187 -0
- api/routes/v1/evolution.py +337 -0
- api/routes/v1/templates.py +211 -27
- api/routes/v1/tracking.py +301 -0
- api/routes/v1/validation.py +68 -31
- pycharter/__init__.py +268 -58
- pycharter/data/templates/contract/template_coercion_rules.yaml +57 -0
- pycharter/data/templates/contract/template_contract.yaml +122 -0
- pycharter/data/templates/contract/template_metadata.yaml +68 -0
- pycharter/data/templates/contract/template_schema.yaml +100 -0
- pycharter/data/templates/contract/template_validation_rules.yaml +75 -0
- pycharter/data/templates/etl/README.md +224 -0
- pycharter/data/templates/etl/extract_cloud_azure.yaml +24 -0
- pycharter/data/templates/etl/extract_cloud_gcs.yaml +25 -0
- pycharter/data/templates/etl/extract_cloud_s3.yaml +30 -0
- pycharter/data/templates/etl/extract_database.yaml +34 -0
- pycharter/data/templates/etl/extract_database_ssh.yaml +40 -0
- pycharter/data/templates/etl/extract_file_csv.yaml +21 -0
- pycharter/data/templates/etl/extract_file_glob.yaml +25 -0
- pycharter/data/templates/etl/extract_file_json.yaml +24 -0
- pycharter/data/templates/etl/extract_file_parquet.yaml +20 -0
- pycharter/data/templates/etl/extract_http_paginated.yaml +79 -0
- pycharter/data/templates/etl/extract_http_path_params.yaml +38 -0
- pycharter/data/templates/etl/extract_http_simple.yaml +62 -0
- pycharter/data/templates/etl/load_cloud_azure.yaml +24 -0
- pycharter/data/templates/etl/load_cloud_gcs.yaml +22 -0
- pycharter/data/templates/etl/load_cloud_s3.yaml +27 -0
- pycharter/data/templates/etl/load_file.yaml +34 -0
- pycharter/data/templates/etl/load_insert.yaml +18 -0
- pycharter/data/templates/etl/load_postgresql.yaml +39 -0
- pycharter/data/templates/etl/load_sqlite.yaml +21 -0
- pycharter/data/templates/etl/load_truncate_and_load.yaml +20 -0
- pycharter/data/templates/etl/load_upsert.yaml +25 -0
- pycharter/data/templates/etl/load_with_dlq.yaml +34 -0
- pycharter/data/templates/etl/load_with_ssh_tunnel.yaml +35 -0
- pycharter/data/templates/etl/pipeline_http_to_db.yaml +75 -0
- pycharter/data/templates/etl/transform_combined.yaml +48 -0
- pycharter/data/templates/etl/transform_custom_function.yaml +58 -0
- pycharter/data/templates/etl/transform_jsonata.yaml +51 -0
- pycharter/data/templates/etl/transform_simple.yaml +59 -0
- pycharter/db/schemas/.ipynb_checkpoints/data_contract-checkpoint.py +160 -0
- pycharter/docs_generator/__init__.py +43 -0
- pycharter/docs_generator/generator.py +465 -0
- pycharter/docs_generator/renderers.py +247 -0
- pycharter/etl_generator/__init__.py +168 -80
- pycharter/etl_generator/builder.py +121 -0
- pycharter/etl_generator/config_loader.py +394 -0
- pycharter/etl_generator/config_validator.py +418 -0
- pycharter/etl_generator/context.py +132 -0
- pycharter/etl_generator/expression.py +499 -0
- pycharter/etl_generator/extractors/__init__.py +30 -0
- pycharter/etl_generator/extractors/base.py +70 -0
- pycharter/etl_generator/extractors/cloud_storage.py +530 -0
- pycharter/etl_generator/extractors/database.py +221 -0
- pycharter/etl_generator/extractors/factory.py +185 -0
- pycharter/etl_generator/extractors/file.py +475 -0
- pycharter/etl_generator/extractors/http.py +895 -0
- pycharter/etl_generator/extractors/streaming.py +57 -0
- pycharter/etl_generator/loaders/__init__.py +41 -0
- pycharter/etl_generator/loaders/base.py +35 -0
- pycharter/etl_generator/loaders/cloud.py +87 -0
- pycharter/etl_generator/loaders/cloud_storage_loader.py +275 -0
- pycharter/etl_generator/loaders/database.py +274 -0
- pycharter/etl_generator/loaders/factory.py +180 -0
- pycharter/etl_generator/loaders/file.py +72 -0
- pycharter/etl_generator/loaders/file_loader.py +130 -0
- pycharter/etl_generator/pipeline.py +743 -0
- pycharter/etl_generator/protocols.py +54 -0
- pycharter/etl_generator/result.py +63 -0
- pycharter/etl_generator/schemas/__init__.py +49 -0
- pycharter/etl_generator/transformers/__init__.py +49 -0
- pycharter/etl_generator/transformers/base.py +63 -0
- pycharter/etl_generator/transformers/config.py +45 -0
- pycharter/etl_generator/transformers/custom_function.py +101 -0
- pycharter/etl_generator/transformers/jsonata_transformer.py +56 -0
- pycharter/etl_generator/transformers/operations.py +218 -0
- pycharter/etl_generator/transformers/pipeline.py +54 -0
- pycharter/etl_generator/transformers/simple_operations.py +131 -0
- pycharter/quality/__init__.py +25 -0
- pycharter/quality/tracking/__init__.py +64 -0
- pycharter/quality/tracking/collector.py +318 -0
- pycharter/quality/tracking/exporters.py +238 -0
- pycharter/quality/tracking/models.py +194 -0
- pycharter/quality/tracking/store.py +385 -0
- pycharter/runtime_validator/__init__.py +20 -7
- pycharter/runtime_validator/builder.py +328 -0
- pycharter/runtime_validator/validator.py +311 -7
- pycharter/runtime_validator/validator_core.py +61 -0
- pycharter/schema_evolution/__init__.py +61 -0
- pycharter/schema_evolution/compatibility.py +270 -0
- pycharter/schema_evolution/diff.py +496 -0
- pycharter/schema_evolution/models.py +201 -0
- pycharter/shared/__init__.py +56 -0
- pycharter/shared/errors.py +296 -0
- pycharter/shared/protocols.py +234 -0
- {pycharter-0.0.22.dist-info → pycharter-0.0.24.dist-info}/METADATA +146 -26
- pycharter-0.0.24.dist-info/RECORD +543 -0
- {pycharter-0.0.22.dist-info → pycharter-0.0.24.dist-info}/WHEEL +1 -1
- ui/static/404/index.html +1 -1
- ui/static/404.html +1 -1
- ui/static/__next.__PAGE__.txt +1 -1
- ui/static/__next._full.txt +1 -1
- ui/static/__next._head.txt +1 -1
- ui/static/__next._index.txt +1 -1
- ui/static/__next._tree.txt +1 -1
- ui/static/_next/static/chunks/26dfc590f7714c03.js +1 -0
- ui/static/_next/static/chunks/34d289e6db2ef551.js +1 -0
- ui/static/_next/static/chunks/99508d9d5869cc27.js +1 -0
- ui/static/_next/static/chunks/b313c35a6ba76574.js +1 -0
- ui/static/_not-found/__next._full.txt +1 -1
- ui/static/_not-found/__next._head.txt +1 -1
- ui/static/_not-found/__next._index.txt +1 -1
- ui/static/_not-found/__next._not-found.__PAGE__.txt +1 -1
- ui/static/_not-found/__next._not-found.txt +1 -1
- ui/static/_not-found/__next._tree.txt +1 -1
- ui/static/_not-found/index.html +1 -1
- ui/static/_not-found/index.txt +1 -1
- ui/static/contracts/__next._full.txt +2 -2
- ui/static/contracts/__next._head.txt +1 -1
- ui/static/contracts/__next._index.txt +1 -1
- ui/static/contracts/__next._tree.txt +1 -1
- ui/static/contracts/__next.contracts.__PAGE__.txt +2 -2
- ui/static/contracts/__next.contracts.txt +1 -1
- ui/static/contracts/index.html +1 -1
- ui/static/contracts/index.txt +2 -2
- ui/static/documentation/__next._full.txt +1 -1
- ui/static/documentation/__next._head.txt +1 -1
- ui/static/documentation/__next._index.txt +1 -1
- ui/static/documentation/__next._tree.txt +1 -1
- ui/static/documentation/__next.documentation.__PAGE__.txt +1 -1
- ui/static/documentation/__next.documentation.txt +1 -1
- ui/static/documentation/index.html +2 -2
- ui/static/documentation/index.txt +1 -1
- ui/static/index.html +1 -1
- ui/static/index.txt +1 -1
- ui/static/metadata/__next._full.txt +1 -1
- ui/static/metadata/__next._head.txt +1 -1
- ui/static/metadata/__next._index.txt +1 -1
- ui/static/metadata/__next._tree.txt +1 -1
- ui/static/metadata/__next.metadata.__PAGE__.txt +1 -1
- ui/static/metadata/__next.metadata.txt +1 -1
- ui/static/metadata/index.html +1 -1
- ui/static/metadata/index.txt +1 -1
- ui/static/quality/__next._full.txt +2 -2
- ui/static/quality/__next._head.txt +1 -1
- ui/static/quality/__next._index.txt +1 -1
- ui/static/quality/__next._tree.txt +1 -1
- ui/static/quality/__next.quality.__PAGE__.txt +2 -2
- ui/static/quality/__next.quality.txt +1 -1
- ui/static/quality/index.html +2 -2
- ui/static/quality/index.txt +2 -2
- ui/static/rules/__next._full.txt +1 -1
- ui/static/rules/__next._head.txt +1 -1
- ui/static/rules/__next._index.txt +1 -1
- ui/static/rules/__next._tree.txt +1 -1
- ui/static/rules/__next.rules.__PAGE__.txt +1 -1
- ui/static/rules/__next.rules.txt +1 -1
- ui/static/rules/index.html +1 -1
- ui/static/rules/index.txt +1 -1
- ui/static/schemas/__next._full.txt +1 -1
- ui/static/schemas/__next._head.txt +1 -1
- ui/static/schemas/__next._index.txt +1 -1
- ui/static/schemas/__next._tree.txt +1 -1
- ui/static/schemas/__next.schemas.__PAGE__.txt +1 -1
- ui/static/schemas/__next.schemas.txt +1 -1
- ui/static/schemas/index.html +1 -1
- ui/static/schemas/index.txt +1 -1
- ui/static/settings/__next._full.txt +1 -1
- ui/static/settings/__next._head.txt +1 -1
- ui/static/settings/__next._index.txt +1 -1
- ui/static/settings/__next._tree.txt +1 -1
- ui/static/settings/__next.settings.__PAGE__.txt +1 -1
- ui/static/settings/__next.settings.txt +1 -1
- ui/static/settings/index.html +1 -1
- ui/static/settings/index.txt +1 -1
- ui/static/static/404/index.html +1 -1
- ui/static/static/404.html +1 -1
- ui/static/static/__next.__PAGE__.txt +1 -1
- ui/static/static/__next._full.txt +2 -2
- ui/static/static/__next._head.txt +1 -1
- ui/static/static/__next._index.txt +2 -2
- ui/static/static/__next._tree.txt +2 -2
- ui/static/static/_next/static/chunks/13d4a0fbd74c1ee4.js +1 -0
- ui/static/static/_next/static/chunks/2edb43b48432ac04.js +441 -0
- ui/static/static/_next/static/chunks/d2363397e1b2bcab.css +1 -0
- ui/static/static/_next/static/chunks/f7d1a90dd75d2572.js +1 -0
- ui/static/static/_not-found/__next._full.txt +2 -2
- ui/static/static/_not-found/__next._head.txt +1 -1
- ui/static/static/_not-found/__next._index.txt +2 -2
- ui/static/static/_not-found/__next._not-found.__PAGE__.txt +1 -1
- ui/static/static/_not-found/__next._not-found.txt +1 -1
- ui/static/static/_not-found/__next._tree.txt +2 -2
- ui/static/static/_not-found/index.html +1 -1
- ui/static/static/_not-found/index.txt +2 -2
- ui/static/static/contracts/__next._full.txt +3 -3
- ui/static/static/contracts/__next._head.txt +1 -1
- ui/static/static/contracts/__next._index.txt +2 -2
- ui/static/static/contracts/__next._tree.txt +2 -2
- ui/static/static/contracts/__next.contracts.__PAGE__.txt +2 -2
- ui/static/static/contracts/__next.contracts.txt +1 -1
- ui/static/static/contracts/index.html +1 -1
- ui/static/static/contracts/index.txt +3 -3
- ui/static/static/documentation/__next._full.txt +3 -3
- ui/static/static/documentation/__next._head.txt +1 -1
- ui/static/static/documentation/__next._index.txt +2 -2
- ui/static/static/documentation/__next._tree.txt +2 -2
- ui/static/static/documentation/__next.documentation.__PAGE__.txt +2 -2
- ui/static/static/documentation/__next.documentation.txt +1 -1
- ui/static/static/documentation/index.html +2 -2
- ui/static/static/documentation/index.txt +3 -3
- ui/static/static/index.html +1 -1
- ui/static/static/index.txt +2 -2
- ui/static/static/metadata/__next._full.txt +2 -2
- ui/static/static/metadata/__next._head.txt +1 -1
- ui/static/static/metadata/__next._index.txt +2 -2
- ui/static/static/metadata/__next._tree.txt +2 -2
- ui/static/static/metadata/__next.metadata.__PAGE__.txt +1 -1
- ui/static/static/metadata/__next.metadata.txt +1 -1
- ui/static/static/metadata/index.html +1 -1
- ui/static/static/metadata/index.txt +2 -2
- ui/static/static/quality/__next._full.txt +2 -2
- ui/static/static/quality/__next._head.txt +1 -1
- ui/static/static/quality/__next._index.txt +2 -2
- ui/static/static/quality/__next._tree.txt +2 -2
- ui/static/static/quality/__next.quality.__PAGE__.txt +1 -1
- ui/static/static/quality/__next.quality.txt +1 -1
- ui/static/static/quality/index.html +2 -2
- ui/static/static/quality/index.txt +2 -2
- ui/static/static/rules/__next._full.txt +2 -2
- ui/static/static/rules/__next._head.txt +1 -1
- ui/static/static/rules/__next._index.txt +2 -2
- ui/static/static/rules/__next._tree.txt +2 -2
- ui/static/static/rules/__next.rules.__PAGE__.txt +1 -1
- ui/static/static/rules/__next.rules.txt +1 -1
- ui/static/static/rules/index.html +1 -1
- ui/static/static/rules/index.txt +2 -2
- ui/static/static/schemas/__next._full.txt +2 -2
- ui/static/static/schemas/__next._head.txt +1 -1
- ui/static/static/schemas/__next._index.txt +2 -2
- ui/static/static/schemas/__next._tree.txt +2 -2
- ui/static/static/schemas/__next.schemas.__PAGE__.txt +1 -1
- ui/static/static/schemas/__next.schemas.txt +1 -1
- ui/static/static/schemas/index.html +1 -1
- ui/static/static/schemas/index.txt +2 -2
- ui/static/static/settings/__next._full.txt +2 -2
- ui/static/static/settings/__next._head.txt +1 -1
- ui/static/static/settings/__next._index.txt +2 -2
- ui/static/static/settings/__next._tree.txt +2 -2
- ui/static/static/settings/__next.settings.__PAGE__.txt +1 -1
- ui/static/static/settings/__next.settings.txt +1 -1
- ui/static/static/settings/index.html +1 -1
- ui/static/static/settings/index.txt +2 -2
- ui/static/static/static/.gitkeep +0 -0
- ui/static/static/static/404/index.html +1 -0
- ui/static/static/static/404.html +1 -0
- ui/static/static/static/__next.__PAGE__.txt +10 -0
- ui/static/static/static/__next._full.txt +30 -0
- ui/static/static/static/__next._head.txt +7 -0
- ui/static/static/static/__next._index.txt +9 -0
- ui/static/static/static/__next._tree.txt +2 -0
- ui/static/static/static/_next/static/chunks/222442f6da32302a.js +1 -0
- ui/static/static/static/_next/static/chunks/247eb132b7f7b574.js +1 -0
- ui/static/static/static/_next/static/chunks/297d55555b71baba.js +1 -0
- ui/static/static/static/_next/static/chunks/2ab439ce003cd691.js +1 -0
- ui/static/static/static/_next/static/chunks/414e77373f8ff61c.js +1 -0
- ui/static/static/static/_next/static/chunks/49ca65abd26ae49e.js +1 -0
- ui/static/static/static/_next/static/chunks/652ad0aa26265c47.js +2 -0
- ui/static/static/static/_next/static/chunks/9667e7a3d359eb39.js +1 -0
- ui/static/static/static/_next/static/chunks/9c23f44fff36548a.js +1 -0
- ui/static/static/static/_next/static/chunks/a6dad97d9634a72d.js +1 -0
- ui/static/static/static/_next/static/chunks/b32a0963684b9933.js +4 -0
- ui/static/static/static/_next/static/chunks/c69f6cba366bd988.js +1 -0
- ui/static/static/static/_next/static/chunks/db913959c675cea6.js +1 -0
- ui/static/static/static/_next/static/chunks/f061a4be97bfc3b3.js +1 -0
- ui/static/static/static/_next/static/chunks/f2e7afeab1178138.js +1 -0
- ui/static/static/static/_next/static/chunks/ff1a16fafef87110.js +1 -0
- ui/static/static/static/_next/static/chunks/turbopack-ffcb7ab6794027ef.js +3 -0
- ui/static/static/static/_next/static/tNTkVW6puVXC4bAm4WrHl/_buildManifest.js +11 -0
- ui/static/static/static/_next/static/tNTkVW6puVXC4bAm4WrHl/_ssgManifest.js +1 -0
- ui/static/static/static/_not-found/__next._full.txt +17 -0
- ui/static/static/static/_not-found/__next._head.txt +7 -0
- ui/static/static/static/_not-found/__next._index.txt +9 -0
- ui/static/static/static/_not-found/__next._not-found.__PAGE__.txt +5 -0
- ui/static/static/static/_not-found/__next._not-found.txt +4 -0
- ui/static/static/static/_not-found/__next._tree.txt +2 -0
- ui/static/static/static/_not-found/index.html +1 -0
- ui/static/static/static/_not-found/index.txt +17 -0
- ui/static/static/static/contracts/__next._full.txt +21 -0
- ui/static/static/static/contracts/__next._head.txt +7 -0
- ui/static/static/static/contracts/__next._index.txt +9 -0
- ui/static/static/static/contracts/__next._tree.txt +2 -0
- ui/static/static/static/contracts/__next.contracts.__PAGE__.txt +9 -0
- ui/static/static/static/contracts/__next.contracts.txt +4 -0
- ui/static/static/static/contracts/index.html +1 -0
- ui/static/static/static/contracts/index.txt +21 -0
- ui/static/static/static/documentation/__next._full.txt +21 -0
- ui/static/static/static/documentation/__next._head.txt +7 -0
- ui/static/static/static/documentation/__next._index.txt +9 -0
- ui/static/static/static/documentation/__next._tree.txt +2 -0
- ui/static/static/static/documentation/__next.documentation.__PAGE__.txt +9 -0
- ui/static/static/static/documentation/__next.documentation.txt +4 -0
- ui/static/static/static/documentation/index.html +93 -0
- ui/static/static/static/documentation/index.txt +21 -0
- ui/static/static/static/index.html +1 -0
- ui/static/static/static/index.txt +30 -0
- ui/static/static/static/metadata/__next._full.txt +21 -0
- ui/static/static/static/metadata/__next._head.txt +7 -0
- ui/static/static/static/metadata/__next._index.txt +9 -0
- ui/static/static/static/metadata/__next._tree.txt +2 -0
- ui/static/static/static/metadata/__next.metadata.__PAGE__.txt +9 -0
- ui/static/static/static/metadata/__next.metadata.txt +4 -0
- ui/static/static/static/metadata/index.html +1 -0
- ui/static/static/static/metadata/index.txt +21 -0
- ui/static/static/static/quality/__next._full.txt +21 -0
- ui/static/static/static/quality/__next._head.txt +7 -0
- ui/static/static/static/quality/__next._index.txt +9 -0
- ui/static/static/static/quality/__next._tree.txt +2 -0
- ui/static/static/static/quality/__next.quality.__PAGE__.txt +9 -0
- ui/static/static/static/quality/__next.quality.txt +4 -0
- ui/static/static/static/quality/index.html +2 -0
- ui/static/static/static/quality/index.txt +21 -0
- ui/static/static/static/rules/__next._full.txt +21 -0
- ui/static/static/static/rules/__next._head.txt +7 -0
- ui/static/static/static/rules/__next._index.txt +9 -0
- ui/static/static/static/rules/__next._tree.txt +2 -0
- ui/static/static/static/rules/__next.rules.__PAGE__.txt +9 -0
- ui/static/static/static/rules/__next.rules.txt +4 -0
- ui/static/static/static/rules/index.html +1 -0
- ui/static/static/static/rules/index.txt +21 -0
- ui/static/static/static/schemas/__next._full.txt +21 -0
- ui/static/static/static/schemas/__next._head.txt +7 -0
- ui/static/static/static/schemas/__next._index.txt +9 -0
- ui/static/static/static/schemas/__next._tree.txt +2 -0
- ui/static/static/static/schemas/__next.schemas.__PAGE__.txt +9 -0
- ui/static/static/static/schemas/__next.schemas.txt +4 -0
- ui/static/static/static/schemas/index.html +1 -0
- ui/static/static/static/schemas/index.txt +21 -0
- ui/static/static/static/settings/__next._full.txt +21 -0
- ui/static/static/static/settings/__next._head.txt +7 -0
- ui/static/static/static/settings/__next._index.txt +9 -0
- ui/static/static/static/settings/__next._tree.txt +2 -0
- ui/static/static/static/settings/__next.settings.__PAGE__.txt +9 -0
- ui/static/static/static/settings/__next.settings.txt +4 -0
- ui/static/static/static/settings/index.html +1 -0
- ui/static/static/static/settings/index.txt +21 -0
- ui/static/static/static/validation/__next._full.txt +21 -0
- ui/static/static/static/validation/__next._head.txt +7 -0
- ui/static/static/static/validation/__next._index.txt +9 -0
- ui/static/static/static/validation/__next._tree.txt +2 -0
- ui/static/static/static/validation/__next.validation.__PAGE__.txt +9 -0
- ui/static/static/static/validation/__next.validation.txt +4 -0
- ui/static/static/static/validation/index.html +1 -0
- ui/static/static/static/validation/index.txt +21 -0
- ui/static/static/validation/__next._full.txt +2 -2
- ui/static/static/validation/__next._head.txt +1 -1
- ui/static/static/validation/__next._index.txt +2 -2
- ui/static/static/validation/__next._tree.txt +2 -2
- ui/static/static/validation/__next.validation.__PAGE__.txt +1 -1
- ui/static/static/validation/__next.validation.txt +1 -1
- ui/static/static/validation/index.html +1 -1
- ui/static/static/validation/index.txt +2 -2
- ui/static/validation/__next._full.txt +2 -2
- ui/static/validation/__next._head.txt +1 -1
- ui/static/validation/__next._index.txt +1 -1
- ui/static/validation/__next._tree.txt +1 -1
- ui/static/validation/__next.validation.__PAGE__.txt +2 -2
- ui/static/validation/__next.validation.txt +1 -1
- ui/static/validation/index.html +1 -1
- ui/static/validation/index.txt +2 -2
- pycharter/data/templates/template_coercion_rules.yaml +0 -15
- pycharter/data/templates/template_contract.yaml +0 -587
- pycharter/data/templates/template_metadata.yaml +0 -38
- pycharter/data/templates/template_schema.yaml +0 -22
- pycharter/data/templates/template_transform_advanced.yaml +0 -50
- pycharter/data/templates/template_transform_simple.yaml +0 -59
- pycharter/data/templates/template_validation_rules.yaml +0 -29
- pycharter/etl_generator/extraction.py +0 -916
- pycharter/etl_generator/factory.py +0 -174
- pycharter/etl_generator/orchestrator.py +0 -1650
- pycharter/integrations/__init__.py +0 -19
- pycharter/integrations/kafka.py +0 -178
- pycharter/integrations/streaming.py +0 -100
- pycharter-0.0.22.dist-info/RECORD +0 -358
- {pycharter-0.0.22.dist-info → pycharter-0.0.24.dist-info}/entry_points.txt +0 -0
- {pycharter-0.0.22.dist-info → pycharter-0.0.24.dist-info}/licenses/LICENSE +0 -0
- {pycharter-0.0.22.dist-info → pycharter-0.0.24.dist-info}/top_level.txt +0 -0
- /ui/static/_next/static/{0rYA78L88aUyD2Uh38hhX → 2gKjNv6YvE6BcIdFthBLs}/_buildManifest.js +0 -0
- /ui/static/_next/static/{0rYA78L88aUyD2Uh38hhX → 2gKjNv6YvE6BcIdFthBLs}/_ssgManifest.js +0 -0
- /ui/static/static/_next/static/{tNTkVW6puVXC4bAm4WrHl → 0rYA78L88aUyD2Uh38hhX}/_buildManifest.js +0 -0
- /ui/static/static/_next/static/{tNTkVW6puVXC4bAm4WrHl → 0rYA78L88aUyD2Uh38hhX}/_ssgManifest.js +0 -0
- /ui/static/{_next → static/_next}/static/chunks/c4fa4f4114b7c352.js +0 -0
- /ui/static/static/{_next → static/_next}/static/chunks/4e310fe5005770a3.css +0 -0
- /ui/static/{_next → static/static/_next}/static/chunks/5e04d10c4a7b58a3.js +0 -0
- /ui/static/static/{_next → static/_next}/static/chunks/5fc14c00a2779dc5.js +0 -0
- /ui/static/{_next → static/static/_next}/static/chunks/75d88a058d8ffaa6.js +0 -0
- /ui/static/{_next → static/static/_next}/static/chunks/8c89634cf6bad76f.js +0 -0
- /ui/static/static/{_next → static/_next}/static/chunks/b584574fdc8ab13e.js +0 -0
- /ui/static/static/{_next → static/_next}/static/chunks/d5989c94d3614b3a.js +0 -0
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
# Template: HTTP/API Extraction with Pagination
|
|
2
|
+
# Copy to your pipeline directory as extract.yaml
|
|
3
|
+
#
|
|
4
|
+
# Supported pagination strategies:
|
|
5
|
+
# - page: Page number pagination (?page=1, ?page=2)
|
|
6
|
+
# - offset: Offset pagination (?offset=0, ?offset=100)
|
|
7
|
+
# - cursor: Cursor-based pagination (response contains next_cursor)
|
|
8
|
+
# - next_url: URL-based pagination (response contains next URL)
|
|
9
|
+
# - link_header: RFC 5988 Link header pagination
|
|
10
|
+
|
|
11
|
+
title: http_paginated_extraction
|
|
12
|
+
description: Extract from HTTP API with pagination
|
|
13
|
+
version: "1.0.0"
|
|
14
|
+
|
|
15
|
+
# Source type (required)
|
|
16
|
+
type: http
|
|
17
|
+
|
|
18
|
+
base_url: https://api.example.com
|
|
19
|
+
api_endpoint: /v1/users
|
|
20
|
+
method: GET
|
|
21
|
+
|
|
22
|
+
params:
|
|
23
|
+
api_key: ${API_KEY}
|
|
24
|
+
limit: 100 # Records per page
|
|
25
|
+
|
|
26
|
+
headers:
|
|
27
|
+
Accept: application/json
|
|
28
|
+
|
|
29
|
+
batch_size: 1000
|
|
30
|
+
response_format: json
|
|
31
|
+
response_path: data.items # Path to data array in response
|
|
32
|
+
|
|
33
|
+
# Pagination configuration
|
|
34
|
+
pagination:
|
|
35
|
+
enabled: true
|
|
36
|
+
|
|
37
|
+
# Strategy: page | offset | cursor | next_url | link_header
|
|
38
|
+
strategy: page
|
|
39
|
+
|
|
40
|
+
# Page-based pagination
|
|
41
|
+
page:
|
|
42
|
+
param_name: page # Query param name
|
|
43
|
+
start: 1 # Starting page number
|
|
44
|
+
increment: 1 # Page increment
|
|
45
|
+
|
|
46
|
+
# Offset-based pagination (uncomment to use)
|
|
47
|
+
# offset:
|
|
48
|
+
# param_name: offset
|
|
49
|
+
# start: 0
|
|
50
|
+
# increment_by: limit # or a number like 100
|
|
51
|
+
|
|
52
|
+
# Cursor-based pagination (uncomment to use)
|
|
53
|
+
# cursor:
|
|
54
|
+
# param_name: cursor
|
|
55
|
+
# response_path: meta.next_cursor # Path to cursor in response
|
|
56
|
+
|
|
57
|
+
# URL-based pagination (uncomment to use)
|
|
58
|
+
# next_url:
|
|
59
|
+
# response_path: links.next # Path to next URL in response
|
|
60
|
+
|
|
61
|
+
# Stop conditions
|
|
62
|
+
stop_conditions:
|
|
63
|
+
- type: fewer_records # Stop when page returns fewer records than limit
|
|
64
|
+
- type: max_pages
|
|
65
|
+
value: 100 # Maximum pages to fetch
|
|
66
|
+
# - type: max_records
|
|
67
|
+
# value: 10000 # Maximum total records
|
|
68
|
+
# - type: empty_response
|
|
69
|
+
|
|
70
|
+
# Delay between page requests (seconds)
|
|
71
|
+
page_delay: 0.1
|
|
72
|
+
|
|
73
|
+
retry:
|
|
74
|
+
max_attempts: 3
|
|
75
|
+
backoff_factor: 2
|
|
76
|
+
|
|
77
|
+
timeout:
|
|
78
|
+
connect: 10
|
|
79
|
+
read: 30
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# Template: HTTP Extraction with Path Parameters
|
|
2
|
+
# Copy to your pipeline directory as extract.yaml
|
|
3
|
+
# Use when API endpoint contains path parameters like /users/{user_id}
|
|
4
|
+
|
|
5
|
+
title: http_path_params_extraction
|
|
6
|
+
description: HTTP extraction with path parameter substitution
|
|
7
|
+
version: "1.0.0"
|
|
8
|
+
|
|
9
|
+
# Source type (required)
|
|
10
|
+
type: http
|
|
11
|
+
|
|
12
|
+
base_url: https://api.example.com
|
|
13
|
+
api_endpoint: /v1/users/{user_id}/orders
|
|
14
|
+
|
|
15
|
+
method: GET
|
|
16
|
+
|
|
17
|
+
# Path parameters (substituted into {param} placeholders in endpoint)
|
|
18
|
+
# Pass values at runtime: pipeline.run(user_id="123")
|
|
19
|
+
input_params:
|
|
20
|
+
user_id:
|
|
21
|
+
type: string
|
|
22
|
+
required: true
|
|
23
|
+
description: User ID to fetch orders for
|
|
24
|
+
|
|
25
|
+
# Query parameters
|
|
26
|
+
params:
|
|
27
|
+
api_key: ${API_KEY}
|
|
28
|
+
limit: 100
|
|
29
|
+
|
|
30
|
+
headers:
|
|
31
|
+
Accept: application/json
|
|
32
|
+
|
|
33
|
+
batch_size: 1000
|
|
34
|
+
response_format: json
|
|
35
|
+
response_path: data.orders # Path to data array in response
|
|
36
|
+
|
|
37
|
+
pagination:
|
|
38
|
+
enabled: false
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
# Template: HTTP/API Extraction (Single Request)
|
|
2
|
+
# Copy to your pipeline directory as extract.yaml
|
|
3
|
+
#
|
|
4
|
+
# Usage:
|
|
5
|
+
# pipeline = Pipeline.from_config("path/to/pipeline")
|
|
6
|
+
# result = await pipeline.run()
|
|
7
|
+
|
|
8
|
+
title: http_extraction
|
|
9
|
+
description: Extract from HTTP API
|
|
10
|
+
version: "1.0.0"
|
|
11
|
+
|
|
12
|
+
# Source type (required)
|
|
13
|
+
type: http
|
|
14
|
+
|
|
15
|
+
# Option 1: Full URL
|
|
16
|
+
url: https://api.example.com/v1/users
|
|
17
|
+
|
|
18
|
+
# Option 2: Base URL + endpoint (uncomment to use)
|
|
19
|
+
# base_url: https://api.example.com
|
|
20
|
+
# api_endpoint: /v1/users
|
|
21
|
+
|
|
22
|
+
# HTTP method
|
|
23
|
+
method: GET
|
|
24
|
+
|
|
25
|
+
# Query parameters
|
|
26
|
+
params:
|
|
27
|
+
api_key: ${API_KEY:?API_KEY is required}
|
|
28
|
+
# limit: 100
|
|
29
|
+
# offset: 0
|
|
30
|
+
|
|
31
|
+
# Request headers
|
|
32
|
+
headers:
|
|
33
|
+
Accept: application/json
|
|
34
|
+
User-Agent: PyCharter-ETL/1.0
|
|
35
|
+
# Authorization: Bearer ${AUTH_TOKEN}
|
|
36
|
+
|
|
37
|
+
# Batch size for yielding records
|
|
38
|
+
batch_size: 1000
|
|
39
|
+
|
|
40
|
+
# Response handling
|
|
41
|
+
response_format: json
|
|
42
|
+
response_path: null # e.g. "data.items" for nested response
|
|
43
|
+
|
|
44
|
+
# Retry configuration
|
|
45
|
+
retry:
|
|
46
|
+
max_attempts: 3
|
|
47
|
+
backoff_factor: 2
|
|
48
|
+
retry_on_status: [429, 500, 502, 503, 504]
|
|
49
|
+
|
|
50
|
+
# Timeout configuration (seconds)
|
|
51
|
+
timeout:
|
|
52
|
+
connect: 10
|
|
53
|
+
read: 30
|
|
54
|
+
write: 10
|
|
55
|
+
pool: 10
|
|
56
|
+
|
|
57
|
+
# Rate limiting delay between requests (seconds)
|
|
58
|
+
rate_limit_delay: 0.2
|
|
59
|
+
|
|
60
|
+
# Pagination (disabled for simple extraction)
|
|
61
|
+
pagination:
|
|
62
|
+
enabled: false
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# Template: Load to Azure Blob Storage
|
|
2
|
+
# Copy to your pipeline directory as load.yaml
|
|
3
|
+
# Requires: pip install azure-storage-blob
|
|
4
|
+
|
|
5
|
+
title: load_azure
|
|
6
|
+
description: Load data to Azure Blob Storage
|
|
7
|
+
version: "1.0.0"
|
|
8
|
+
|
|
9
|
+
# Target type (required)
|
|
10
|
+
type: cloud_storage
|
|
11
|
+
|
|
12
|
+
storage:
|
|
13
|
+
provider: azure
|
|
14
|
+
container: my-container # Azure uses "container" instead of "bucket"
|
|
15
|
+
path: output/data.json # Blob path
|
|
16
|
+
|
|
17
|
+
# Credentials
|
|
18
|
+
credentials:
|
|
19
|
+
connection_string: ${AZURE_STORAGE_CONNECTION_STRING}
|
|
20
|
+
# Or use account name + key:
|
|
21
|
+
# account_name: mystorageaccount
|
|
22
|
+
# account_key: ${AZURE_STORAGE_KEY}
|
|
23
|
+
|
|
24
|
+
format: json
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# Template: Load to Google Cloud Storage
|
|
2
|
+
# Copy to your pipeline directory as load.yaml
|
|
3
|
+
# Requires: pip install google-cloud-storage
|
|
4
|
+
|
|
5
|
+
title: load_gcs
|
|
6
|
+
description: Load data to Google Cloud Storage
|
|
7
|
+
version: "1.0.0"
|
|
8
|
+
|
|
9
|
+
# Target type (required)
|
|
10
|
+
type: cloud_storage
|
|
11
|
+
|
|
12
|
+
storage:
|
|
13
|
+
provider: gcs
|
|
14
|
+
bucket: my-bucket-name
|
|
15
|
+
path: output/data.json # Object path
|
|
16
|
+
|
|
17
|
+
# Credentials (optional - uses default credentials if omitted)
|
|
18
|
+
# credentials:
|
|
19
|
+
# service_account_file: /path/to/service-account.json
|
|
20
|
+
# project: my-project-id
|
|
21
|
+
|
|
22
|
+
format: json
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# Template: Load to AWS S3
|
|
2
|
+
# Copy to your pipeline directory as load.yaml
|
|
3
|
+
# Requires: pip install boto3
|
|
4
|
+
|
|
5
|
+
title: load_s3
|
|
6
|
+
description: Load data to AWS S3
|
|
7
|
+
version: "1.0.0"
|
|
8
|
+
|
|
9
|
+
# Target type (required)
|
|
10
|
+
type: cloud_storage
|
|
11
|
+
|
|
12
|
+
storage:
|
|
13
|
+
provider: s3
|
|
14
|
+
bucket: my-bucket-name
|
|
15
|
+
path: output/data.json # Object key/prefix
|
|
16
|
+
|
|
17
|
+
# Credentials (optional - uses AWS credential chain if omitted)
|
|
18
|
+
# credentials:
|
|
19
|
+
# aws_access_key_id: ${AWS_ACCESS_KEY_ID}
|
|
20
|
+
# aws_secret_access_key: ${AWS_SECRET_ACCESS_KEY}
|
|
21
|
+
# region: us-east-1
|
|
22
|
+
|
|
23
|
+
# Output format: json, jsonl, csv, parquet
|
|
24
|
+
format: json
|
|
25
|
+
|
|
26
|
+
# For partitioned output:
|
|
27
|
+
# partition_by: [year, month] # Creates year=2024/month=01/data.parquet
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# Template: Load to File
|
|
2
|
+
# Copy to your pipeline directory as load.yaml
|
|
3
|
+
#
|
|
4
|
+
# Supported formats: json, jsonl, csv, parquet
|
|
5
|
+
# Write modes: overwrite, append
|
|
6
|
+
|
|
7
|
+
title: load_file
|
|
8
|
+
description: Load data to local file
|
|
9
|
+
version: "1.0.0"
|
|
10
|
+
|
|
11
|
+
# Target type (required)
|
|
12
|
+
type: file
|
|
13
|
+
|
|
14
|
+
# File path
|
|
15
|
+
path: ./output/data.json
|
|
16
|
+
# Or use environment variable:
|
|
17
|
+
# path: ${OUTPUT_DIR:-./output}/data.json
|
|
18
|
+
|
|
19
|
+
# Output format: json, jsonl, csv, parquet
|
|
20
|
+
format: json
|
|
21
|
+
|
|
22
|
+
# Write mode: overwrite, append
|
|
23
|
+
write_mode: overwrite
|
|
24
|
+
|
|
25
|
+
# CSV-specific options (when format: csv)
|
|
26
|
+
# csv_options:
|
|
27
|
+
# delimiter: ","
|
|
28
|
+
# quotechar: '"'
|
|
29
|
+
# include_header: true
|
|
30
|
+
|
|
31
|
+
# JSON-specific options (when format: json)
|
|
32
|
+
# json_options:
|
|
33
|
+
# indent: 2
|
|
34
|
+
# ensure_ascii: false
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# Template: Load with Insert Only
|
|
2
|
+
# Inserts new records, fails on duplicates
|
|
3
|
+
# Copy to your pipeline directory as load.yaml
|
|
4
|
+
|
|
5
|
+
title: load_insert
|
|
6
|
+
description: Insert only (no updates)
|
|
7
|
+
version: "1.0.0"
|
|
8
|
+
|
|
9
|
+
# Target type (required)
|
|
10
|
+
type: postgres
|
|
11
|
+
|
|
12
|
+
table: my_table
|
|
13
|
+
schema: public
|
|
14
|
+
write_method: insert
|
|
15
|
+
batch_size: 1000
|
|
16
|
+
|
|
17
|
+
database:
|
|
18
|
+
url: ${DATABASE_URL:?DATABASE_URL is required}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# Template: Load to PostgreSQL
|
|
2
|
+
# Copy to your pipeline directory as load.yaml
|
|
3
|
+
#
|
|
4
|
+
# Write methods:
|
|
5
|
+
# - insert: Insert only (fails on duplicates)
|
|
6
|
+
# - upsert: Insert or update by primary key (default)
|
|
7
|
+
# - replace: Delete and insert
|
|
8
|
+
# - truncate_and_load: Truncate table then insert
|
|
9
|
+
# - update: Update existing records only
|
|
10
|
+
# - delete: Delete matching records
|
|
11
|
+
|
|
12
|
+
title: load_postgresql
|
|
13
|
+
description: Load data into PostgreSQL
|
|
14
|
+
version: "1.0.0"
|
|
15
|
+
|
|
16
|
+
# Target type (required)
|
|
17
|
+
type: postgres
|
|
18
|
+
|
|
19
|
+
# Target table configuration
|
|
20
|
+
table: my_table
|
|
21
|
+
schema: public
|
|
22
|
+
write_method: upsert
|
|
23
|
+
primary_key: id # or ["col1", "col2"] for composite key
|
|
24
|
+
batch_size: 1000
|
|
25
|
+
|
|
26
|
+
# Database connection
|
|
27
|
+
database:
|
|
28
|
+
url: ${DATABASE_URL:?DATABASE_URL is required}
|
|
29
|
+
# Format: postgresql://user:password@host:port/database
|
|
30
|
+
# Async: postgresql+asyncpg://user:password@host:port/database
|
|
31
|
+
|
|
32
|
+
# SSH Tunnel (optional)
|
|
33
|
+
# ssh_tunnel:
|
|
34
|
+
# enabled: true
|
|
35
|
+
# host: bastion.example.com
|
|
36
|
+
# port: 22
|
|
37
|
+
# username: ssh_user
|
|
38
|
+
# key_file: ~/.ssh/id_rsa
|
|
39
|
+
# local_port: 5433
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# Template: Load to SQLite
|
|
2
|
+
# Copy to your pipeline directory as load.yaml
|
|
3
|
+
#
|
|
4
|
+
# Good for: Local development, testing, small datasets
|
|
5
|
+
|
|
6
|
+
title: load_sqlite
|
|
7
|
+
description: Load data into SQLite
|
|
8
|
+
version: "1.0.0"
|
|
9
|
+
|
|
10
|
+
# Target type (required)
|
|
11
|
+
type: sqlite
|
|
12
|
+
|
|
13
|
+
table: my_table
|
|
14
|
+
write_method: upsert
|
|
15
|
+
primary_key: id
|
|
16
|
+
batch_size: 1000
|
|
17
|
+
|
|
18
|
+
database:
|
|
19
|
+
url: sqlite:///./data/my_database.db
|
|
20
|
+
# Or use environment variable:
|
|
21
|
+
# url: ${SQLITE_DB_PATH:-sqlite:///./data/local.db}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# Template: Truncate and Load
|
|
2
|
+
# Clears the table before loading (full refresh)
|
|
3
|
+
# Copy to your pipeline directory as load.yaml
|
|
4
|
+
|
|
5
|
+
title: load_truncate_and_load
|
|
6
|
+
description: Truncate table then insert all records
|
|
7
|
+
version: "1.0.0"
|
|
8
|
+
|
|
9
|
+
# Target type (required)
|
|
10
|
+
type: postgres
|
|
11
|
+
|
|
12
|
+
table: my_table
|
|
13
|
+
schema: public
|
|
14
|
+
write_method: truncate_and_load
|
|
15
|
+
batch_size: 1000
|
|
16
|
+
|
|
17
|
+
database:
|
|
18
|
+
url: ${DATABASE_URL:?DATABASE_URL is required}
|
|
19
|
+
|
|
20
|
+
# Warning: This deletes all existing data before loading!
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# Template: Load with Upsert
|
|
2
|
+
# Insert new records, update existing ones based on primary key
|
|
3
|
+
# Copy to your pipeline directory as load.yaml
|
|
4
|
+
|
|
5
|
+
title: load_upsert
|
|
6
|
+
description: Upsert (insert or update) into database
|
|
7
|
+
version: "1.0.0"
|
|
8
|
+
|
|
9
|
+
# Target type (required)
|
|
10
|
+
type: postgres
|
|
11
|
+
|
|
12
|
+
table: my_table
|
|
13
|
+
schema: public
|
|
14
|
+
write_method: upsert
|
|
15
|
+
primary_key: id # or ["col1", "col2"] for composite key
|
|
16
|
+
batch_size: 1000
|
|
17
|
+
|
|
18
|
+
database:
|
|
19
|
+
url: ${DATABASE_URL:?DATABASE_URL is required}
|
|
20
|
+
|
|
21
|
+
# Optional: specify which columns to update on conflict
|
|
22
|
+
# update_columns:
|
|
23
|
+
# - name
|
|
24
|
+
# - updated_at
|
|
25
|
+
# - status
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# Template: Load with Dead Letter Queue
|
|
2
|
+
# Copy to your pipeline directory as load.yaml
|
|
3
|
+
# Failed records are written to a DLQ table/file for later processing
|
|
4
|
+
|
|
5
|
+
title: load_with_dlq
|
|
6
|
+
description: Load with dead letter queue for failed records
|
|
7
|
+
version: "1.0.0"
|
|
8
|
+
|
|
9
|
+
# Target type (required)
|
|
10
|
+
type: postgres
|
|
11
|
+
|
|
12
|
+
table: my_table
|
|
13
|
+
schema: public
|
|
14
|
+
write_method: upsert
|
|
15
|
+
primary_key: id
|
|
16
|
+
batch_size: 1000
|
|
17
|
+
|
|
18
|
+
database:
|
|
19
|
+
url: ${DATABASE_URL}
|
|
20
|
+
|
|
21
|
+
# Dead Letter Queue configuration
|
|
22
|
+
# Note: Use 'dead_letter_queue' as the key (not 'dlq')
|
|
23
|
+
dead_letter_queue:
|
|
24
|
+
enabled: true
|
|
25
|
+
|
|
26
|
+
# DLQ backend type: "database", "file", or "memory"
|
|
27
|
+
backend: database
|
|
28
|
+
|
|
29
|
+
# Schema for DLQ table (optional, defaults to 'public')
|
|
30
|
+
schema_name: public
|
|
31
|
+
|
|
32
|
+
# For file-based DLQ, specify storage_path:
|
|
33
|
+
# backend: file
|
|
34
|
+
# storage_path: ./dlq/my_table_failures.jsonl
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# Template: Load to Database via SSH Tunnel
|
|
2
|
+
# Copy to your pipeline directory as load.yaml
|
|
3
|
+
# Use when database is only accessible through a bastion/jump host
|
|
4
|
+
|
|
5
|
+
title: load_ssh_tunnel
|
|
6
|
+
description: Load to database through SSH tunnel
|
|
7
|
+
version: "1.0.0"
|
|
8
|
+
|
|
9
|
+
# Target type (required)
|
|
10
|
+
type: postgres
|
|
11
|
+
|
|
12
|
+
table: my_table
|
|
13
|
+
schema: public
|
|
14
|
+
write_method: upsert
|
|
15
|
+
primary_key: id
|
|
16
|
+
batch_size: 1000
|
|
17
|
+
|
|
18
|
+
# Database connection (use localhost since tunnel forwards)
|
|
19
|
+
database:
|
|
20
|
+
url: postgresql://user:pass@localhost:5433/database
|
|
21
|
+
|
|
22
|
+
# SSH tunnel configuration
|
|
23
|
+
ssh_tunnel:
|
|
24
|
+
enabled: true
|
|
25
|
+
host: bastion.example.com
|
|
26
|
+
port: 22
|
|
27
|
+
username: ${SSH_USER}
|
|
28
|
+
key_file: ~/.ssh/id_rsa
|
|
29
|
+
|
|
30
|
+
# Remote database endpoint
|
|
31
|
+
remote_host: db.internal.example.com
|
|
32
|
+
remote_port: 5432
|
|
33
|
+
|
|
34
|
+
# Local port for tunnel
|
|
35
|
+
local_port: 5433
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
# Template: Complete HTTP-to-Database Pipeline
|
|
2
|
+
# Single-file format: extract, transform, load in one file
|
|
3
|
+
# Alternative: Use separate extract.yaml, transform.yaml, load.yaml files
|
|
4
|
+
|
|
5
|
+
name: http_to_db_pipeline
|
|
6
|
+
version: "1.0.0"
|
|
7
|
+
description: Complete ETL pipeline example
|
|
8
|
+
|
|
9
|
+
# =============================================================================
|
|
10
|
+
# EXTRACT: HTTP API
|
|
11
|
+
# =============================================================================
|
|
12
|
+
extract:
|
|
13
|
+
type: http # Required: http | file | database | cloud_storage
|
|
14
|
+
url: https://api.example.com/v1/users
|
|
15
|
+
method: GET
|
|
16
|
+
params:
|
|
17
|
+
api_key: ${API_KEY}
|
|
18
|
+
limit: 100
|
|
19
|
+
headers:
|
|
20
|
+
Accept: application/json
|
|
21
|
+
batch_size: 1000
|
|
22
|
+
response_path: data.users
|
|
23
|
+
pagination:
|
|
24
|
+
enabled: true
|
|
25
|
+
strategy: page
|
|
26
|
+
page:
|
|
27
|
+
param_name: page
|
|
28
|
+
start: 1
|
|
29
|
+
stop_conditions:
|
|
30
|
+
- type: fewer_records
|
|
31
|
+
|
|
32
|
+
# =============================================================================
|
|
33
|
+
# TRANSFORM: Ordered list of operations (new format)
|
|
34
|
+
# =============================================================================
|
|
35
|
+
transform:
|
|
36
|
+
# List format for explicit ordering
|
|
37
|
+
- rename:
|
|
38
|
+
userId: user_id
|
|
39
|
+
firstName: first_name
|
|
40
|
+
lastName: last_name
|
|
41
|
+
createdAt: created_at
|
|
42
|
+
|
|
43
|
+
- convert:
|
|
44
|
+
user_id: int
|
|
45
|
+
created_at: datetime
|
|
46
|
+
|
|
47
|
+
- defaults:
|
|
48
|
+
status: "active"
|
|
49
|
+
|
|
50
|
+
- add:
|
|
51
|
+
full_name: "${first_name} ${last_name}"
|
|
52
|
+
loaded_at: now()
|
|
53
|
+
|
|
54
|
+
- select:
|
|
55
|
+
- user_id
|
|
56
|
+
- first_name
|
|
57
|
+
- last_name
|
|
58
|
+
- full_name
|
|
59
|
+
- email
|
|
60
|
+
- status
|
|
61
|
+
- created_at
|
|
62
|
+
- loaded_at
|
|
63
|
+
|
|
64
|
+
# =============================================================================
|
|
65
|
+
# LOAD: PostgreSQL
|
|
66
|
+
# =============================================================================
|
|
67
|
+
load:
|
|
68
|
+
type: postgres # Required: postgres | sqlite | file | cloud_storage
|
|
69
|
+
table: users
|
|
70
|
+
schema: public
|
|
71
|
+
write_method: upsert
|
|
72
|
+
primary_key: user_id
|
|
73
|
+
batch_size: 1000
|
|
74
|
+
database:
|
|
75
|
+
url: ${DATABASE_URL}
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# Template: Combined Transformations
|
|
2
|
+
# Apply simple ops, then JSONata, then custom function
|
|
3
|
+
# Copy to your pipeline directory as transform.yaml
|
|
4
|
+
|
|
5
|
+
title: combined_transformation
|
|
6
|
+
description: Simple operations + JSONata + custom function
|
|
7
|
+
version: "1.0.0"
|
|
8
|
+
|
|
9
|
+
# Step 1: Simple operations (applied first)
|
|
10
|
+
transform:
|
|
11
|
+
rename:
|
|
12
|
+
userId: user_id
|
|
13
|
+
fullName: full_name
|
|
14
|
+
|
|
15
|
+
convert:
|
|
16
|
+
score: float
|
|
17
|
+
count: int
|
|
18
|
+
|
|
19
|
+
defaults:
|
|
20
|
+
source: "api"
|
|
21
|
+
|
|
22
|
+
add:
|
|
23
|
+
loaded_at: "now()"
|
|
24
|
+
|
|
25
|
+
select:
|
|
26
|
+
- user_id
|
|
27
|
+
- full_name
|
|
28
|
+
- score
|
|
29
|
+
- source
|
|
30
|
+
- loaded_at
|
|
31
|
+
|
|
32
|
+
# Step 2: JSONata expressions (applied after simple ops)
|
|
33
|
+
# Requires: pip install pyjsonata
|
|
34
|
+
jsonata:
|
|
35
|
+
# Mode: record (per-record) or batch (all records)
|
|
36
|
+
mode: record
|
|
37
|
+
expression: |
|
|
38
|
+
$ ~> |$|{
|
|
39
|
+
"score_pct": score * 100,
|
|
40
|
+
"name_upper": $uppercase(full_name)
|
|
41
|
+
}|
|
|
42
|
+
|
|
43
|
+
# Step 3: Custom function (applied last)
|
|
44
|
+
# custom_function:
|
|
45
|
+
# module: mypackage.transforms
|
|
46
|
+
# function: enrich_records
|
|
47
|
+
# kwargs:
|
|
48
|
+
# add_metadata: true
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
# Template: Custom Function Transformation
|
|
2
|
+
# Copy to your pipeline directory as transform.yaml
|
|
3
|
+
#
|
|
4
|
+
# Calls a Python function for complex transformations
|
|
5
|
+
#
|
|
6
|
+
# Your function signature:
|
|
7
|
+
# def my_transform(records: List[Dict], **kwargs) -> List[Dict]:
|
|
8
|
+
# return [transform_record(r) for r in records]
|
|
9
|
+
|
|
10
|
+
title: custom_function_transformation
|
|
11
|
+
description: Call custom Python function for transformation
|
|
12
|
+
version: "1.0.0"
|
|
13
|
+
|
|
14
|
+
transform:
|
|
15
|
+
# Simple operations applied first (optional)
|
|
16
|
+
rename:
|
|
17
|
+
userId: user_id
|
|
18
|
+
|
|
19
|
+
# Custom function called after simple operations
|
|
20
|
+
custom_function:
|
|
21
|
+
# Python module path (relative to working directory or installed package)
|
|
22
|
+
module: mypackage.transforms
|
|
23
|
+
# Function name to call
|
|
24
|
+
function: transform_records
|
|
25
|
+
# Optional keyword arguments passed to function
|
|
26
|
+
kwargs:
|
|
27
|
+
validate: true
|
|
28
|
+
enrich: true
|
|
29
|
+
default_region: "US"
|
|
30
|
+
|
|
31
|
+
# Example custom function implementation:
|
|
32
|
+
#
|
|
33
|
+
# # mypackage/transforms.py
|
|
34
|
+
# from typing import Any, Dict, List
|
|
35
|
+
#
|
|
36
|
+
# def transform_records(
|
|
37
|
+
# records: List[Dict[str, Any]],
|
|
38
|
+
# validate: bool = False,
|
|
39
|
+
# enrich: bool = False,
|
|
40
|
+
# default_region: str = "US",
|
|
41
|
+
# ) -> List[Dict[str, Any]]:
|
|
42
|
+
# """Transform a batch of records."""
|
|
43
|
+
# result = []
|
|
44
|
+
# for record in records:
|
|
45
|
+
# # Add region if missing
|
|
46
|
+
# if "region" not in record:
|
|
47
|
+
# record["region"] = default_region
|
|
48
|
+
#
|
|
49
|
+
# # Enrich with computed fields
|
|
50
|
+
# if enrich:
|
|
51
|
+
# record["full_name"] = f"{record.get('first', '')} {record.get('last', '')}".strip()
|
|
52
|
+
#
|
|
53
|
+
# # Validate and skip invalid records
|
|
54
|
+
# if validate and not record.get("id"):
|
|
55
|
+
# continue
|
|
56
|
+
#
|
|
57
|
+
# result.append(record)
|
|
58
|
+
# return result
|