pycharter 0.0.22__py3-none-any.whl → 0.0.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- api/main.py +27 -1
- api/models/docs.py +68 -0
- api/models/evolution.py +117 -0
- api/models/tracking.py +111 -0
- api/models/validation.py +46 -6
- api/routes/v1/__init__.py +14 -1
- api/routes/v1/docs.py +187 -0
- api/routes/v1/evolution.py +337 -0
- api/routes/v1/templates.py +211 -27
- api/routes/v1/tracking.py +301 -0
- api/routes/v1/validation.py +68 -31
- pycharter/__init__.py +268 -58
- pycharter/data/templates/contract/template_coercion_rules.yaml +57 -0
- pycharter/data/templates/contract/template_contract.yaml +122 -0
- pycharter/data/templates/contract/template_metadata.yaml +68 -0
- pycharter/data/templates/contract/template_schema.yaml +100 -0
- pycharter/data/templates/contract/template_validation_rules.yaml +75 -0
- pycharter/data/templates/etl/README.md +224 -0
- pycharter/data/templates/etl/extract_cloud_azure.yaml +24 -0
- pycharter/data/templates/etl/extract_cloud_gcs.yaml +25 -0
- pycharter/data/templates/etl/extract_cloud_s3.yaml +30 -0
- pycharter/data/templates/etl/extract_database.yaml +34 -0
- pycharter/data/templates/etl/extract_database_ssh.yaml +40 -0
- pycharter/data/templates/etl/extract_file_csv.yaml +21 -0
- pycharter/data/templates/etl/extract_file_glob.yaml +25 -0
- pycharter/data/templates/etl/extract_file_json.yaml +24 -0
- pycharter/data/templates/etl/extract_file_parquet.yaml +20 -0
- pycharter/data/templates/etl/extract_http_paginated.yaml +79 -0
- pycharter/data/templates/etl/extract_http_path_params.yaml +38 -0
- pycharter/data/templates/etl/extract_http_simple.yaml +62 -0
- pycharter/data/templates/etl/load_cloud_azure.yaml +24 -0
- pycharter/data/templates/etl/load_cloud_gcs.yaml +22 -0
- pycharter/data/templates/etl/load_cloud_s3.yaml +27 -0
- pycharter/data/templates/etl/load_file.yaml +34 -0
- pycharter/data/templates/etl/load_insert.yaml +18 -0
- pycharter/data/templates/etl/load_postgresql.yaml +39 -0
- pycharter/data/templates/etl/load_sqlite.yaml +21 -0
- pycharter/data/templates/etl/load_truncate_and_load.yaml +20 -0
- pycharter/data/templates/etl/load_upsert.yaml +25 -0
- pycharter/data/templates/etl/load_with_dlq.yaml +34 -0
- pycharter/data/templates/etl/load_with_ssh_tunnel.yaml +35 -0
- pycharter/data/templates/etl/pipeline_http_to_db.yaml +75 -0
- pycharter/data/templates/etl/transform_combined.yaml +48 -0
- pycharter/data/templates/etl/transform_custom_function.yaml +58 -0
- pycharter/data/templates/etl/transform_jsonata.yaml +51 -0
- pycharter/data/templates/etl/transform_simple.yaml +59 -0
- pycharter/db/schemas/.ipynb_checkpoints/data_contract-checkpoint.py +160 -0
- pycharter/docs_generator/__init__.py +43 -0
- pycharter/docs_generator/generator.py +465 -0
- pycharter/docs_generator/renderers.py +247 -0
- pycharter/etl_generator/__init__.py +168 -80
- pycharter/etl_generator/builder.py +121 -0
- pycharter/etl_generator/config_loader.py +394 -0
- pycharter/etl_generator/config_validator.py +418 -0
- pycharter/etl_generator/context.py +132 -0
- pycharter/etl_generator/expression.py +499 -0
- pycharter/etl_generator/extractors/__init__.py +30 -0
- pycharter/etl_generator/extractors/base.py +70 -0
- pycharter/etl_generator/extractors/cloud_storage.py +530 -0
- pycharter/etl_generator/extractors/database.py +221 -0
- pycharter/etl_generator/extractors/factory.py +185 -0
- pycharter/etl_generator/extractors/file.py +475 -0
- pycharter/etl_generator/extractors/http.py +895 -0
- pycharter/etl_generator/extractors/streaming.py +57 -0
- pycharter/etl_generator/loaders/__init__.py +41 -0
- pycharter/etl_generator/loaders/base.py +35 -0
- pycharter/etl_generator/loaders/cloud.py +87 -0
- pycharter/etl_generator/loaders/cloud_storage_loader.py +275 -0
- pycharter/etl_generator/loaders/database.py +274 -0
- pycharter/etl_generator/loaders/factory.py +180 -0
- pycharter/etl_generator/loaders/file.py +72 -0
- pycharter/etl_generator/loaders/file_loader.py +130 -0
- pycharter/etl_generator/pipeline.py +743 -0
- pycharter/etl_generator/protocols.py +54 -0
- pycharter/etl_generator/result.py +63 -0
- pycharter/etl_generator/schemas/__init__.py +49 -0
- pycharter/etl_generator/transformers/__init__.py +49 -0
- pycharter/etl_generator/transformers/base.py +63 -0
- pycharter/etl_generator/transformers/config.py +45 -0
- pycharter/etl_generator/transformers/custom_function.py +101 -0
- pycharter/etl_generator/transformers/jsonata_transformer.py +56 -0
- pycharter/etl_generator/transformers/operations.py +218 -0
- pycharter/etl_generator/transformers/pipeline.py +54 -0
- pycharter/etl_generator/transformers/simple_operations.py +131 -0
- pycharter/quality/__init__.py +25 -0
- pycharter/quality/tracking/__init__.py +64 -0
- pycharter/quality/tracking/collector.py +318 -0
- pycharter/quality/tracking/exporters.py +238 -0
- pycharter/quality/tracking/models.py +194 -0
- pycharter/quality/tracking/store.py +385 -0
- pycharter/runtime_validator/__init__.py +20 -7
- pycharter/runtime_validator/builder.py +328 -0
- pycharter/runtime_validator/validator.py +311 -7
- pycharter/runtime_validator/validator_core.py +61 -0
- pycharter/schema_evolution/__init__.py +61 -0
- pycharter/schema_evolution/compatibility.py +270 -0
- pycharter/schema_evolution/diff.py +496 -0
- pycharter/schema_evolution/models.py +201 -0
- pycharter/shared/__init__.py +56 -0
- pycharter/shared/errors.py +296 -0
- pycharter/shared/protocols.py +234 -0
- {pycharter-0.0.22.dist-info → pycharter-0.0.24.dist-info}/METADATA +146 -26
- pycharter-0.0.24.dist-info/RECORD +543 -0
- {pycharter-0.0.22.dist-info → pycharter-0.0.24.dist-info}/WHEEL +1 -1
- ui/static/404/index.html +1 -1
- ui/static/404.html +1 -1
- ui/static/__next.__PAGE__.txt +1 -1
- ui/static/__next._full.txt +1 -1
- ui/static/__next._head.txt +1 -1
- ui/static/__next._index.txt +1 -1
- ui/static/__next._tree.txt +1 -1
- ui/static/_next/static/chunks/26dfc590f7714c03.js +1 -0
- ui/static/_next/static/chunks/34d289e6db2ef551.js +1 -0
- ui/static/_next/static/chunks/99508d9d5869cc27.js +1 -0
- ui/static/_next/static/chunks/b313c35a6ba76574.js +1 -0
- ui/static/_not-found/__next._full.txt +1 -1
- ui/static/_not-found/__next._head.txt +1 -1
- ui/static/_not-found/__next._index.txt +1 -1
- ui/static/_not-found/__next._not-found.__PAGE__.txt +1 -1
- ui/static/_not-found/__next._not-found.txt +1 -1
- ui/static/_not-found/__next._tree.txt +1 -1
- ui/static/_not-found/index.html +1 -1
- ui/static/_not-found/index.txt +1 -1
- ui/static/contracts/__next._full.txt +2 -2
- ui/static/contracts/__next._head.txt +1 -1
- ui/static/contracts/__next._index.txt +1 -1
- ui/static/contracts/__next._tree.txt +1 -1
- ui/static/contracts/__next.contracts.__PAGE__.txt +2 -2
- ui/static/contracts/__next.contracts.txt +1 -1
- ui/static/contracts/index.html +1 -1
- ui/static/contracts/index.txt +2 -2
- ui/static/documentation/__next._full.txt +1 -1
- ui/static/documentation/__next._head.txt +1 -1
- ui/static/documentation/__next._index.txt +1 -1
- ui/static/documentation/__next._tree.txt +1 -1
- ui/static/documentation/__next.documentation.__PAGE__.txt +1 -1
- ui/static/documentation/__next.documentation.txt +1 -1
- ui/static/documentation/index.html +2 -2
- ui/static/documentation/index.txt +1 -1
- ui/static/index.html +1 -1
- ui/static/index.txt +1 -1
- ui/static/metadata/__next._full.txt +1 -1
- ui/static/metadata/__next._head.txt +1 -1
- ui/static/metadata/__next._index.txt +1 -1
- ui/static/metadata/__next._tree.txt +1 -1
- ui/static/metadata/__next.metadata.__PAGE__.txt +1 -1
- ui/static/metadata/__next.metadata.txt +1 -1
- ui/static/metadata/index.html +1 -1
- ui/static/metadata/index.txt +1 -1
- ui/static/quality/__next._full.txt +2 -2
- ui/static/quality/__next._head.txt +1 -1
- ui/static/quality/__next._index.txt +1 -1
- ui/static/quality/__next._tree.txt +1 -1
- ui/static/quality/__next.quality.__PAGE__.txt +2 -2
- ui/static/quality/__next.quality.txt +1 -1
- ui/static/quality/index.html +2 -2
- ui/static/quality/index.txt +2 -2
- ui/static/rules/__next._full.txt +1 -1
- ui/static/rules/__next._head.txt +1 -1
- ui/static/rules/__next._index.txt +1 -1
- ui/static/rules/__next._tree.txt +1 -1
- ui/static/rules/__next.rules.__PAGE__.txt +1 -1
- ui/static/rules/__next.rules.txt +1 -1
- ui/static/rules/index.html +1 -1
- ui/static/rules/index.txt +1 -1
- ui/static/schemas/__next._full.txt +1 -1
- ui/static/schemas/__next._head.txt +1 -1
- ui/static/schemas/__next._index.txt +1 -1
- ui/static/schemas/__next._tree.txt +1 -1
- ui/static/schemas/__next.schemas.__PAGE__.txt +1 -1
- ui/static/schemas/__next.schemas.txt +1 -1
- ui/static/schemas/index.html +1 -1
- ui/static/schemas/index.txt +1 -1
- ui/static/settings/__next._full.txt +1 -1
- ui/static/settings/__next._head.txt +1 -1
- ui/static/settings/__next._index.txt +1 -1
- ui/static/settings/__next._tree.txt +1 -1
- ui/static/settings/__next.settings.__PAGE__.txt +1 -1
- ui/static/settings/__next.settings.txt +1 -1
- ui/static/settings/index.html +1 -1
- ui/static/settings/index.txt +1 -1
- ui/static/static/404/index.html +1 -1
- ui/static/static/404.html +1 -1
- ui/static/static/__next.__PAGE__.txt +1 -1
- ui/static/static/__next._full.txt +2 -2
- ui/static/static/__next._head.txt +1 -1
- ui/static/static/__next._index.txt +2 -2
- ui/static/static/__next._tree.txt +2 -2
- ui/static/static/_next/static/chunks/13d4a0fbd74c1ee4.js +1 -0
- ui/static/static/_next/static/chunks/2edb43b48432ac04.js +441 -0
- ui/static/static/_next/static/chunks/d2363397e1b2bcab.css +1 -0
- ui/static/static/_next/static/chunks/f7d1a90dd75d2572.js +1 -0
- ui/static/static/_not-found/__next._full.txt +2 -2
- ui/static/static/_not-found/__next._head.txt +1 -1
- ui/static/static/_not-found/__next._index.txt +2 -2
- ui/static/static/_not-found/__next._not-found.__PAGE__.txt +1 -1
- ui/static/static/_not-found/__next._not-found.txt +1 -1
- ui/static/static/_not-found/__next._tree.txt +2 -2
- ui/static/static/_not-found/index.html +1 -1
- ui/static/static/_not-found/index.txt +2 -2
- ui/static/static/contracts/__next._full.txt +3 -3
- ui/static/static/contracts/__next._head.txt +1 -1
- ui/static/static/contracts/__next._index.txt +2 -2
- ui/static/static/contracts/__next._tree.txt +2 -2
- ui/static/static/contracts/__next.contracts.__PAGE__.txt +2 -2
- ui/static/static/contracts/__next.contracts.txt +1 -1
- ui/static/static/contracts/index.html +1 -1
- ui/static/static/contracts/index.txt +3 -3
- ui/static/static/documentation/__next._full.txt +3 -3
- ui/static/static/documentation/__next._head.txt +1 -1
- ui/static/static/documentation/__next._index.txt +2 -2
- ui/static/static/documentation/__next._tree.txt +2 -2
- ui/static/static/documentation/__next.documentation.__PAGE__.txt +2 -2
- ui/static/static/documentation/__next.documentation.txt +1 -1
- ui/static/static/documentation/index.html +2 -2
- ui/static/static/documentation/index.txt +3 -3
- ui/static/static/index.html +1 -1
- ui/static/static/index.txt +2 -2
- ui/static/static/metadata/__next._full.txt +2 -2
- ui/static/static/metadata/__next._head.txt +1 -1
- ui/static/static/metadata/__next._index.txt +2 -2
- ui/static/static/metadata/__next._tree.txt +2 -2
- ui/static/static/metadata/__next.metadata.__PAGE__.txt +1 -1
- ui/static/static/metadata/__next.metadata.txt +1 -1
- ui/static/static/metadata/index.html +1 -1
- ui/static/static/metadata/index.txt +2 -2
- ui/static/static/quality/__next._full.txt +2 -2
- ui/static/static/quality/__next._head.txt +1 -1
- ui/static/static/quality/__next._index.txt +2 -2
- ui/static/static/quality/__next._tree.txt +2 -2
- ui/static/static/quality/__next.quality.__PAGE__.txt +1 -1
- ui/static/static/quality/__next.quality.txt +1 -1
- ui/static/static/quality/index.html +2 -2
- ui/static/static/quality/index.txt +2 -2
- ui/static/static/rules/__next._full.txt +2 -2
- ui/static/static/rules/__next._head.txt +1 -1
- ui/static/static/rules/__next._index.txt +2 -2
- ui/static/static/rules/__next._tree.txt +2 -2
- ui/static/static/rules/__next.rules.__PAGE__.txt +1 -1
- ui/static/static/rules/__next.rules.txt +1 -1
- ui/static/static/rules/index.html +1 -1
- ui/static/static/rules/index.txt +2 -2
- ui/static/static/schemas/__next._full.txt +2 -2
- ui/static/static/schemas/__next._head.txt +1 -1
- ui/static/static/schemas/__next._index.txt +2 -2
- ui/static/static/schemas/__next._tree.txt +2 -2
- ui/static/static/schemas/__next.schemas.__PAGE__.txt +1 -1
- ui/static/static/schemas/__next.schemas.txt +1 -1
- ui/static/static/schemas/index.html +1 -1
- ui/static/static/schemas/index.txt +2 -2
- ui/static/static/settings/__next._full.txt +2 -2
- ui/static/static/settings/__next._head.txt +1 -1
- ui/static/static/settings/__next._index.txt +2 -2
- ui/static/static/settings/__next._tree.txt +2 -2
- ui/static/static/settings/__next.settings.__PAGE__.txt +1 -1
- ui/static/static/settings/__next.settings.txt +1 -1
- ui/static/static/settings/index.html +1 -1
- ui/static/static/settings/index.txt +2 -2
- ui/static/static/static/.gitkeep +0 -0
- ui/static/static/static/404/index.html +1 -0
- ui/static/static/static/404.html +1 -0
- ui/static/static/static/__next.__PAGE__.txt +10 -0
- ui/static/static/static/__next._full.txt +30 -0
- ui/static/static/static/__next._head.txt +7 -0
- ui/static/static/static/__next._index.txt +9 -0
- ui/static/static/static/__next._tree.txt +2 -0
- ui/static/static/static/_next/static/chunks/222442f6da32302a.js +1 -0
- ui/static/static/static/_next/static/chunks/247eb132b7f7b574.js +1 -0
- ui/static/static/static/_next/static/chunks/297d55555b71baba.js +1 -0
- ui/static/static/static/_next/static/chunks/2ab439ce003cd691.js +1 -0
- ui/static/static/static/_next/static/chunks/414e77373f8ff61c.js +1 -0
- ui/static/static/static/_next/static/chunks/49ca65abd26ae49e.js +1 -0
- ui/static/static/static/_next/static/chunks/652ad0aa26265c47.js +2 -0
- ui/static/static/static/_next/static/chunks/9667e7a3d359eb39.js +1 -0
- ui/static/static/static/_next/static/chunks/9c23f44fff36548a.js +1 -0
- ui/static/static/static/_next/static/chunks/a6dad97d9634a72d.js +1 -0
- ui/static/static/static/_next/static/chunks/b32a0963684b9933.js +4 -0
- ui/static/static/static/_next/static/chunks/c69f6cba366bd988.js +1 -0
- ui/static/static/static/_next/static/chunks/db913959c675cea6.js +1 -0
- ui/static/static/static/_next/static/chunks/f061a4be97bfc3b3.js +1 -0
- ui/static/static/static/_next/static/chunks/f2e7afeab1178138.js +1 -0
- ui/static/static/static/_next/static/chunks/ff1a16fafef87110.js +1 -0
- ui/static/static/static/_next/static/chunks/turbopack-ffcb7ab6794027ef.js +3 -0
- ui/static/static/static/_next/static/tNTkVW6puVXC4bAm4WrHl/_buildManifest.js +11 -0
- ui/static/static/static/_next/static/tNTkVW6puVXC4bAm4WrHl/_ssgManifest.js +1 -0
- ui/static/static/static/_not-found/__next._full.txt +17 -0
- ui/static/static/static/_not-found/__next._head.txt +7 -0
- ui/static/static/static/_not-found/__next._index.txt +9 -0
- ui/static/static/static/_not-found/__next._not-found.__PAGE__.txt +5 -0
- ui/static/static/static/_not-found/__next._not-found.txt +4 -0
- ui/static/static/static/_not-found/__next._tree.txt +2 -0
- ui/static/static/static/_not-found/index.html +1 -0
- ui/static/static/static/_not-found/index.txt +17 -0
- ui/static/static/static/contracts/__next._full.txt +21 -0
- ui/static/static/static/contracts/__next._head.txt +7 -0
- ui/static/static/static/contracts/__next._index.txt +9 -0
- ui/static/static/static/contracts/__next._tree.txt +2 -0
- ui/static/static/static/contracts/__next.contracts.__PAGE__.txt +9 -0
- ui/static/static/static/contracts/__next.contracts.txt +4 -0
- ui/static/static/static/contracts/index.html +1 -0
- ui/static/static/static/contracts/index.txt +21 -0
- ui/static/static/static/documentation/__next._full.txt +21 -0
- ui/static/static/static/documentation/__next._head.txt +7 -0
- ui/static/static/static/documentation/__next._index.txt +9 -0
- ui/static/static/static/documentation/__next._tree.txt +2 -0
- ui/static/static/static/documentation/__next.documentation.__PAGE__.txt +9 -0
- ui/static/static/static/documentation/__next.documentation.txt +4 -0
- ui/static/static/static/documentation/index.html +93 -0
- ui/static/static/static/documentation/index.txt +21 -0
- ui/static/static/static/index.html +1 -0
- ui/static/static/static/index.txt +30 -0
- ui/static/static/static/metadata/__next._full.txt +21 -0
- ui/static/static/static/metadata/__next._head.txt +7 -0
- ui/static/static/static/metadata/__next._index.txt +9 -0
- ui/static/static/static/metadata/__next._tree.txt +2 -0
- ui/static/static/static/metadata/__next.metadata.__PAGE__.txt +9 -0
- ui/static/static/static/metadata/__next.metadata.txt +4 -0
- ui/static/static/static/metadata/index.html +1 -0
- ui/static/static/static/metadata/index.txt +21 -0
- ui/static/static/static/quality/__next._full.txt +21 -0
- ui/static/static/static/quality/__next._head.txt +7 -0
- ui/static/static/static/quality/__next._index.txt +9 -0
- ui/static/static/static/quality/__next._tree.txt +2 -0
- ui/static/static/static/quality/__next.quality.__PAGE__.txt +9 -0
- ui/static/static/static/quality/__next.quality.txt +4 -0
- ui/static/static/static/quality/index.html +2 -0
- ui/static/static/static/quality/index.txt +21 -0
- ui/static/static/static/rules/__next._full.txt +21 -0
- ui/static/static/static/rules/__next._head.txt +7 -0
- ui/static/static/static/rules/__next._index.txt +9 -0
- ui/static/static/static/rules/__next._tree.txt +2 -0
- ui/static/static/static/rules/__next.rules.__PAGE__.txt +9 -0
- ui/static/static/static/rules/__next.rules.txt +4 -0
- ui/static/static/static/rules/index.html +1 -0
- ui/static/static/static/rules/index.txt +21 -0
- ui/static/static/static/schemas/__next._full.txt +21 -0
- ui/static/static/static/schemas/__next._head.txt +7 -0
- ui/static/static/static/schemas/__next._index.txt +9 -0
- ui/static/static/static/schemas/__next._tree.txt +2 -0
- ui/static/static/static/schemas/__next.schemas.__PAGE__.txt +9 -0
- ui/static/static/static/schemas/__next.schemas.txt +4 -0
- ui/static/static/static/schemas/index.html +1 -0
- ui/static/static/static/schemas/index.txt +21 -0
- ui/static/static/static/settings/__next._full.txt +21 -0
- ui/static/static/static/settings/__next._head.txt +7 -0
- ui/static/static/static/settings/__next._index.txt +9 -0
- ui/static/static/static/settings/__next._tree.txt +2 -0
- ui/static/static/static/settings/__next.settings.__PAGE__.txt +9 -0
- ui/static/static/static/settings/__next.settings.txt +4 -0
- ui/static/static/static/settings/index.html +1 -0
- ui/static/static/static/settings/index.txt +21 -0
- ui/static/static/static/validation/__next._full.txt +21 -0
- ui/static/static/static/validation/__next._head.txt +7 -0
- ui/static/static/static/validation/__next._index.txt +9 -0
- ui/static/static/static/validation/__next._tree.txt +2 -0
- ui/static/static/static/validation/__next.validation.__PAGE__.txt +9 -0
- ui/static/static/static/validation/__next.validation.txt +4 -0
- ui/static/static/static/validation/index.html +1 -0
- ui/static/static/static/validation/index.txt +21 -0
- ui/static/static/validation/__next._full.txt +2 -2
- ui/static/static/validation/__next._head.txt +1 -1
- ui/static/static/validation/__next._index.txt +2 -2
- ui/static/static/validation/__next._tree.txt +2 -2
- ui/static/static/validation/__next.validation.__PAGE__.txt +1 -1
- ui/static/static/validation/__next.validation.txt +1 -1
- ui/static/static/validation/index.html +1 -1
- ui/static/static/validation/index.txt +2 -2
- ui/static/validation/__next._full.txt +2 -2
- ui/static/validation/__next._head.txt +1 -1
- ui/static/validation/__next._index.txt +1 -1
- ui/static/validation/__next._tree.txt +1 -1
- ui/static/validation/__next.validation.__PAGE__.txt +2 -2
- ui/static/validation/__next.validation.txt +1 -1
- ui/static/validation/index.html +1 -1
- ui/static/validation/index.txt +2 -2
- pycharter/data/templates/template_coercion_rules.yaml +0 -15
- pycharter/data/templates/template_contract.yaml +0 -587
- pycharter/data/templates/template_metadata.yaml +0 -38
- pycharter/data/templates/template_schema.yaml +0 -22
- pycharter/data/templates/template_transform_advanced.yaml +0 -50
- pycharter/data/templates/template_transform_simple.yaml +0 -59
- pycharter/data/templates/template_validation_rules.yaml +0 -29
- pycharter/etl_generator/extraction.py +0 -916
- pycharter/etl_generator/factory.py +0 -174
- pycharter/etl_generator/orchestrator.py +0 -1650
- pycharter/integrations/__init__.py +0 -19
- pycharter/integrations/kafka.py +0 -178
- pycharter/integrations/streaming.py +0 -100
- pycharter-0.0.22.dist-info/RECORD +0 -358
- {pycharter-0.0.22.dist-info → pycharter-0.0.24.dist-info}/entry_points.txt +0 -0
- {pycharter-0.0.22.dist-info → pycharter-0.0.24.dist-info}/licenses/LICENSE +0 -0
- {pycharter-0.0.22.dist-info → pycharter-0.0.24.dist-info}/top_level.txt +0 -0
- /ui/static/_next/static/{0rYA78L88aUyD2Uh38hhX → 2gKjNv6YvE6BcIdFthBLs}/_buildManifest.js +0 -0
- /ui/static/_next/static/{0rYA78L88aUyD2Uh38hhX → 2gKjNv6YvE6BcIdFthBLs}/_ssgManifest.js +0 -0
- /ui/static/static/_next/static/{tNTkVW6puVXC4bAm4WrHl → 0rYA78L88aUyD2Uh38hhX}/_buildManifest.js +0 -0
- /ui/static/static/_next/static/{tNTkVW6puVXC4bAm4WrHl → 0rYA78L88aUyD2Uh38hhX}/_ssgManifest.js +0 -0
- /ui/static/{_next → static/_next}/static/chunks/c4fa4f4114b7c352.js +0 -0
- /ui/static/static/{_next → static/_next}/static/chunks/4e310fe5005770a3.css +0 -0
- /ui/static/{_next → static/static/_next}/static/chunks/5e04d10c4a7b58a3.js +0 -0
- /ui/static/static/{_next → static/_next}/static/chunks/5fc14c00a2779dc5.js +0 -0
- /ui/static/{_next → static/static/_next}/static/chunks/75d88a058d8ffaa6.js +0 -0
- /ui/static/{_next → static/static/_next}/static/chunks/8c89634cf6bad76f.js +0 -0
- /ui/static/static/{_next → static/_next}/static/chunks/b584574fdc8ab13e.js +0 -0
- /ui/static/static/{_next → static/_next}/static/chunks/d5989c94d3614b3a.js +0 -0
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
# JSON Schema Template
|
|
2
|
+
# Defines the structure and constraints of your data
|
|
3
|
+
#
|
|
4
|
+
# Usage:
|
|
5
|
+
# from pycharter import from_dict
|
|
6
|
+
# Model = from_dict(schema, "MyModel")
|
|
7
|
+
|
|
8
|
+
type: object
|
|
9
|
+
title: my_entity
|
|
10
|
+
version: "1.0.0"
|
|
11
|
+
|
|
12
|
+
properties:
|
|
13
|
+
# Required string field
|
|
14
|
+
id:
|
|
15
|
+
type: string
|
|
16
|
+
description: Unique identifier
|
|
17
|
+
title: ID
|
|
18
|
+
minLength: 1
|
|
19
|
+
|
|
20
|
+
# Required string with max length
|
|
21
|
+
name:
|
|
22
|
+
type: string
|
|
23
|
+
description: Entity name
|
|
24
|
+
title: Name
|
|
25
|
+
minLength: 1
|
|
26
|
+
maxLength: 100
|
|
27
|
+
|
|
28
|
+
# Enum field
|
|
29
|
+
status:
|
|
30
|
+
type: string
|
|
31
|
+
description: Current status
|
|
32
|
+
enum: [active, inactive, pending]
|
|
33
|
+
|
|
34
|
+
# Integer with constraints
|
|
35
|
+
count:
|
|
36
|
+
type: integer
|
|
37
|
+
description: Item count
|
|
38
|
+
minimum: 0
|
|
39
|
+
|
|
40
|
+
# Number (float) with constraints
|
|
41
|
+
amount:
|
|
42
|
+
type: number
|
|
43
|
+
description: Monetary amount
|
|
44
|
+
minimum: 0
|
|
45
|
+
|
|
46
|
+
# Boolean field
|
|
47
|
+
is_active:
|
|
48
|
+
type: boolean
|
|
49
|
+
description: Active flag
|
|
50
|
+
default: true
|
|
51
|
+
|
|
52
|
+
# Date-time field
|
|
53
|
+
created_at:
|
|
54
|
+
type: string
|
|
55
|
+
format: date-time
|
|
56
|
+
description: Creation timestamp
|
|
57
|
+
|
|
58
|
+
# Email field
|
|
59
|
+
email:
|
|
60
|
+
type: string
|
|
61
|
+
format: email
|
|
62
|
+
description: Contact email
|
|
63
|
+
|
|
64
|
+
# UUID field
|
|
65
|
+
uuid:
|
|
66
|
+
type: string
|
|
67
|
+
format: uuid
|
|
68
|
+
description: Unique UUID
|
|
69
|
+
|
|
70
|
+
# Optional/nullable field (use anyOf for nullable)
|
|
71
|
+
notes:
|
|
72
|
+
anyOf:
|
|
73
|
+
- type: string
|
|
74
|
+
maxLength: 500
|
|
75
|
+
- type: "null"
|
|
76
|
+
default: null
|
|
77
|
+
description: Optional notes
|
|
78
|
+
|
|
79
|
+
# Nested object
|
|
80
|
+
metadata:
|
|
81
|
+
type: object
|
|
82
|
+
properties:
|
|
83
|
+
source:
|
|
84
|
+
type: string
|
|
85
|
+
version:
|
|
86
|
+
type: string
|
|
87
|
+
required: [source]
|
|
88
|
+
|
|
89
|
+
# Array of strings
|
|
90
|
+
tags:
|
|
91
|
+
type: array
|
|
92
|
+
items:
|
|
93
|
+
type: string
|
|
94
|
+
description: Tags list
|
|
95
|
+
|
|
96
|
+
required:
|
|
97
|
+
- id
|
|
98
|
+
- name
|
|
99
|
+
- status
|
|
100
|
+
- created_at
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
# Validation Rules Template
|
|
2
|
+
# Post-validation business rule checks
|
|
3
|
+
#
|
|
4
|
+
# Usage:
|
|
5
|
+
# Validation rules check data AFTER Pydantic validation.
|
|
6
|
+
# Useful for business rules that go beyond type checking.
|
|
7
|
+
#
|
|
8
|
+
# Available validations:
|
|
9
|
+
# - min_length: {threshold: N}
|
|
10
|
+
# - max_length: {threshold: N}
|
|
11
|
+
# - greater_than_or_equal_to:{threshold: N}
|
|
12
|
+
# - less_than_or_equal_to: {threshold: N}
|
|
13
|
+
# - only_allow: {allowed_values: [a, b, c]}
|
|
14
|
+
# - only_allow_if: {condition_field: x, condition_value: y, allowed_values: [...]}
|
|
15
|
+
# - non_empty_string: null (no params needed)
|
|
16
|
+
# - is_positive: null
|
|
17
|
+
# - is_email: null
|
|
18
|
+
# - is_url: null
|
|
19
|
+
# - is_alphanumeric: null
|
|
20
|
+
# - is_numeric_string: null
|
|
21
|
+
# - no_capital_characters: null
|
|
22
|
+
# - no_special_characters: null
|
|
23
|
+
# - matches_regex: {pattern: "^[A-Z]{2}[0-9]{4}$"}
|
|
24
|
+
|
|
25
|
+
title: my_validation_rules
|
|
26
|
+
description: Business validation rules
|
|
27
|
+
version: "1.0.0"
|
|
28
|
+
|
|
29
|
+
rules:
|
|
30
|
+
# String length validation
|
|
31
|
+
id:
|
|
32
|
+
min_length:
|
|
33
|
+
threshold: 1
|
|
34
|
+
max_length:
|
|
35
|
+
threshold: 50
|
|
36
|
+
|
|
37
|
+
name:
|
|
38
|
+
non_empty_string: null
|
|
39
|
+
max_length:
|
|
40
|
+
threshold: 100
|
|
41
|
+
|
|
42
|
+
# Enum-like validation
|
|
43
|
+
status:
|
|
44
|
+
only_allow:
|
|
45
|
+
allowed_values:
|
|
46
|
+
- active
|
|
47
|
+
- inactive
|
|
48
|
+
- pending
|
|
49
|
+
|
|
50
|
+
# Numeric range validation
|
|
51
|
+
count:
|
|
52
|
+
greater_than_or_equal_to:
|
|
53
|
+
threshold: 0
|
|
54
|
+
|
|
55
|
+
amount:
|
|
56
|
+
greater_than_or_equal_to:
|
|
57
|
+
threshold: 0
|
|
58
|
+
less_than_or_equal_to:
|
|
59
|
+
threshold: 1000000
|
|
60
|
+
|
|
61
|
+
# Email validation
|
|
62
|
+
email:
|
|
63
|
+
is_email: null
|
|
64
|
+
|
|
65
|
+
# Regex pattern validation
|
|
66
|
+
# code:
|
|
67
|
+
# matches_regex:
|
|
68
|
+
# pattern: "^[A-Z]{2}[0-9]{4}$"
|
|
69
|
+
|
|
70
|
+
# Conditional validation
|
|
71
|
+
# discount_rate:
|
|
72
|
+
# only_allow_if:
|
|
73
|
+
# condition_field: customer_type
|
|
74
|
+
# condition_value: premium
|
|
75
|
+
# allowed_values: [0.1, 0.15, 0.2]
|
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
# ETL Config Templates
|
|
2
|
+
|
|
3
|
+
Templates for ETL pipeline configuration. Supports two formats:
|
|
4
|
+
|
|
5
|
+
1. **Single-file format**: `pipeline.yaml` with extract, transform, load sections
|
|
6
|
+
2. **Multi-file format**: Separate `extract.yaml`, `transform.yaml`, `load.yaml` files
|
|
7
|
+
|
|
8
|
+
## Quick Start
|
|
9
|
+
|
|
10
|
+
### Single-File Format (Recommended)
|
|
11
|
+
|
|
12
|
+
```yaml
|
|
13
|
+
# pipelines/users/pipeline.yaml
|
|
14
|
+
name: users_pipeline
|
|
15
|
+
version: "1.0.0"
|
|
16
|
+
|
|
17
|
+
extract:
|
|
18
|
+
type: http # Required: http | file | database | cloud_storage
|
|
19
|
+
url: https://api.example.com/users
|
|
20
|
+
|
|
21
|
+
transform:
|
|
22
|
+
- rename:
|
|
23
|
+
userId: user_id
|
|
24
|
+
- convert:
|
|
25
|
+
user_id: int
|
|
26
|
+
- add:
|
|
27
|
+
full_name: "${first_name} ${last_name}"
|
|
28
|
+
loaded_at: now()
|
|
29
|
+
|
|
30
|
+
load:
|
|
31
|
+
type: postgres # Required: postgres | sqlite | file | cloud_storage
|
|
32
|
+
table: users
|
|
33
|
+
database:
|
|
34
|
+
url: ${DATABASE_URL}
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
```python
|
|
38
|
+
from pycharter import Pipeline
|
|
39
|
+
import asyncio
|
|
40
|
+
|
|
41
|
+
async def main():
|
|
42
|
+
pipeline = Pipeline.from_config("pipelines/users/pipeline.yaml")
|
|
43
|
+
result = await pipeline.run()
|
|
44
|
+
print(f"Loaded {result.rows_loaded} rows")
|
|
45
|
+
|
|
46
|
+
asyncio.run(main())
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
### Multi-File Format
|
|
50
|
+
|
|
51
|
+
```bash
|
|
52
|
+
mkdir -p pipelines/users
|
|
53
|
+
cp pycharter/data/templates/etl/extract_http_simple.yaml pipelines/users/extract.yaml
|
|
54
|
+
cp pycharter/data/templates/etl/transform_simple.yaml pipelines/users/transform.yaml
|
|
55
|
+
cp pycharter/data/templates/etl/load_postgresql.yaml pipelines/users/load.yaml
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
```python
|
|
59
|
+
pipeline = Pipeline.from_config("pipelines/users/")
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
## Type Field (Required)
|
|
63
|
+
|
|
64
|
+
All extract and load configs require an explicit `type` field:
|
|
65
|
+
|
|
66
|
+
**Extract types:**
|
|
67
|
+
- `http` - HTTP/API extraction
|
|
68
|
+
- `file` - Local file extraction (CSV, JSON, Parquet, etc.)
|
|
69
|
+
- `database` - SQL database extraction
|
|
70
|
+
- `cloud_storage` - Cloud storage (S3, GCS, Azure)
|
|
71
|
+
|
|
72
|
+
**Load types:**
|
|
73
|
+
- `postgres` / `postgresql` - PostgreSQL
|
|
74
|
+
- `sqlite` - SQLite
|
|
75
|
+
- `file` - Local file (JSON, CSV, Parquet)
|
|
76
|
+
- `cloud_storage` - Cloud storage (S3, GCS, Azure)
|
|
77
|
+
|
|
78
|
+
## Extract Templates
|
|
79
|
+
|
|
80
|
+
| Template | Type | Description |
|
|
81
|
+
|----------|------|-------------|
|
|
82
|
+
| `extract_http_simple.yaml` | http | Single HTTP request (no pagination) |
|
|
83
|
+
| `extract_http_paginated.yaml` | http | HTTP with pagination (page/offset/cursor) |
|
|
84
|
+
| `extract_http_path_params.yaml` | http | HTTP with `{param}` path substitution |
|
|
85
|
+
| `extract_file_csv.yaml` | file | CSV file extraction |
|
|
86
|
+
| `extract_file_json.yaml` | file | JSON file extraction |
|
|
87
|
+
| `extract_file_parquet.yaml` | file | Parquet file extraction |
|
|
88
|
+
| `extract_file_glob.yaml` | file | Multiple files via glob pattern |
|
|
89
|
+
| `extract_database.yaml` | database | SQL database extraction |
|
|
90
|
+
| `extract_database_ssh.yaml` | database | Database via SSH tunnel |
|
|
91
|
+
| `extract_cloud_s3.yaml` | cloud_storage | AWS S3 extraction |
|
|
92
|
+
| `extract_cloud_gcs.yaml` | cloud_storage | Google Cloud Storage |
|
|
93
|
+
| `extract_cloud_azure.yaml` | cloud_storage | Azure Blob Storage |
|
|
94
|
+
|
|
95
|
+
## Transform Templates
|
|
96
|
+
|
|
97
|
+
| Template | Description |
|
|
98
|
+
|----------|-------------|
|
|
99
|
+
| `transform_simple.yaml` | rename, convert, defaults, add, select, drop |
|
|
100
|
+
| `transform_custom_function.yaml` | Call Python function |
|
|
101
|
+
| `transform_jsonata.yaml` | JSONata expressions |
|
|
102
|
+
| `transform_combined.yaml` | Simple + JSONata + custom |
|
|
103
|
+
|
|
104
|
+
### Transform Formats
|
|
105
|
+
|
|
106
|
+
**List format (ordered)** - Transforms applied in specified order:
|
|
107
|
+
|
|
108
|
+
```yaml
|
|
109
|
+
transform:
|
|
110
|
+
- rename: {old_field: new_field}
|
|
111
|
+
- convert: {field: int}
|
|
112
|
+
- add:
|
|
113
|
+
full_name: "${first_name} ${last_name}"
|
|
114
|
+
timestamp: now()
|
|
115
|
+
- select: [id, name, email]
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
**Dict format (legacy)** - Fixed order: rename → convert → defaults → add → select → drop
|
|
119
|
+
|
|
120
|
+
```yaml
|
|
121
|
+
transform:
|
|
122
|
+
rename:
|
|
123
|
+
old_field: new_field
|
|
124
|
+
convert:
|
|
125
|
+
field: int
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
### Expression Syntax
|
|
129
|
+
|
|
130
|
+
In `add` fields, you can use expressions:
|
|
131
|
+
|
|
132
|
+
- `${field_name}` - Reference field value
|
|
133
|
+
- `${field_name:-default}` - Field with default if missing
|
|
134
|
+
- `now()` - Current timestamp (ISO format)
|
|
135
|
+
- `uuid()` - Generate UUID
|
|
136
|
+
- `concat(${a}, " ", ${b})` - Concatenate values
|
|
137
|
+
- `lower(${field})` / `upper(${field})` - Case conversion
|
|
138
|
+
|
|
139
|
+
## Load Templates
|
|
140
|
+
|
|
141
|
+
| Template | Type | Description |
|
|
142
|
+
|----------|------|-------------|
|
|
143
|
+
| `load_postgresql.yaml` | postgres | PostgreSQL (upsert, insert, etc.) |
|
|
144
|
+
| `load_sqlite.yaml` | sqlite | SQLite database |
|
|
145
|
+
| `load_file.yaml` | file | JSON, CSV, Parquet file |
|
|
146
|
+
| `load_upsert.yaml` | postgres | Upsert by primary key |
|
|
147
|
+
| `load_insert.yaml` | postgres | Insert only |
|
|
148
|
+
| `load_truncate_and_load.yaml` | postgres | Truncate then insert |
|
|
149
|
+
| `load_with_dlq.yaml` | postgres | With dead letter queue |
|
|
150
|
+
| `load_with_ssh_tunnel.yaml` | postgres | Via SSH tunnel |
|
|
151
|
+
| `load_cloud_s3.yaml` | cloud_storage | AWS S3 |
|
|
152
|
+
| `load_cloud_gcs.yaml` | cloud_storage | Google Cloud Storage |
|
|
153
|
+
| `load_cloud_azure.yaml` | cloud_storage | Azure Blob Storage |
|
|
154
|
+
|
|
155
|
+
## Complete Pipeline Template
|
|
156
|
+
|
|
157
|
+
See `pipeline_http_to_db.yaml` for a complete single-file pipeline example.
|
|
158
|
+
|
|
159
|
+
## Variable Substitution
|
|
160
|
+
|
|
161
|
+
Configs support `${VAR}` syntax for flexible configuration:
|
|
162
|
+
|
|
163
|
+
```yaml
|
|
164
|
+
path: ${DATA_DIR}/input.json # From variables
|
|
165
|
+
params:
|
|
166
|
+
api_key: ${API_KEY:?API_KEY is required} # Required - error if missing
|
|
167
|
+
limit: ${BATCH_LIMIT:-100} # With default value
|
|
168
|
+
database:
|
|
169
|
+
url: ${DATABASE_URL} # From environment
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
Provide values via the `variables` parameter (recommended) or environment variables:
|
|
173
|
+
|
|
174
|
+
```python
|
|
175
|
+
# Recommended: explicit variables - no assumptions about structure
|
|
176
|
+
pipeline = Pipeline.from_config_files(
|
|
177
|
+
extract="my_extract.yaml",
|
|
178
|
+
load="my_load.yaml",
|
|
179
|
+
variables={
|
|
180
|
+
"DATA_DIR": "./data",
|
|
181
|
+
"OUTPUT_DIR": "./output",
|
|
182
|
+
"API_KEY": "xxx",
|
|
183
|
+
"DATABASE_URL": "postgresql://..."
|
|
184
|
+
}
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
# Or with from_config() for directory-based loading
|
|
188
|
+
pipeline = Pipeline.from_config(
|
|
189
|
+
"pipelines/users",
|
|
190
|
+
variables={"API_KEY": "xxx"}
|
|
191
|
+
)
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
## Programmatic API
|
|
195
|
+
|
|
196
|
+
Instead of config files, use the Pipeline API directly:
|
|
197
|
+
|
|
198
|
+
```python
|
|
199
|
+
from pycharter import (
|
|
200
|
+
Pipeline, HTTPExtractor, PostgresLoader,
|
|
201
|
+
Rename, Select, Filter, Convert, AddField
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
pipeline = (
|
|
205
|
+
Pipeline(HTTPExtractor(url="https://api.example.com/users"))
|
|
206
|
+
| Rename({"userName": "user_name"})
|
|
207
|
+
| AddField("full_name", "${first_name} ${last_name}") # Expression support!
|
|
208
|
+
| Select(["id", "user_name", "email", "full_name"])
|
|
209
|
+
| Convert({"id": int})
|
|
210
|
+
| Filter(lambda r: r.get("email"))
|
|
211
|
+
| PostgresLoader(
|
|
212
|
+
connection_string="postgresql://localhost/db",
|
|
213
|
+
table="users",
|
|
214
|
+
write_method="upsert",
|
|
215
|
+
primary_key="id"
|
|
216
|
+
)
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
result = await pipeline.run()
|
|
220
|
+
```
|
|
221
|
+
|
|
222
|
+
## Examples
|
|
223
|
+
|
|
224
|
+
See `examples/etl_config_example/` for working examples with config files.
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# Template: Extract from Azure Blob Storage
|
|
2
|
+
# Copy to your pipeline directory as extract.yaml
|
|
3
|
+
# Requires: pip install azure-storage-blob
|
|
4
|
+
|
|
5
|
+
title: azure_extraction
|
|
6
|
+
description: Extract data from Azure Blob Storage
|
|
7
|
+
version: "1.0.0"
|
|
8
|
+
|
|
9
|
+
# Source type (required)
|
|
10
|
+
type: cloud_storage
|
|
11
|
+
|
|
12
|
+
storage:
|
|
13
|
+
provider: azure
|
|
14
|
+
container: my-container
|
|
15
|
+
path: data/input.json
|
|
16
|
+
|
|
17
|
+
# Or prefix for multiple blobs
|
|
18
|
+
# prefix: data/inputs/
|
|
19
|
+
|
|
20
|
+
credentials:
|
|
21
|
+
connection_string: ${AZURE_STORAGE_CONNECTION_STRING}
|
|
22
|
+
|
|
23
|
+
format: json
|
|
24
|
+
batch_size: 1000
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# Template: Extract from Google Cloud Storage
|
|
2
|
+
# Copy to your pipeline directory as extract.yaml
|
|
3
|
+
# Requires: pip install google-cloud-storage
|
|
4
|
+
|
|
5
|
+
title: gcs_extraction
|
|
6
|
+
description: Extract data from Google Cloud Storage
|
|
7
|
+
version: "1.0.0"
|
|
8
|
+
|
|
9
|
+
# Source type (required)
|
|
10
|
+
type: cloud_storage
|
|
11
|
+
|
|
12
|
+
storage:
|
|
13
|
+
provider: gcs
|
|
14
|
+
bucket: my-bucket-name
|
|
15
|
+
path: data/input.json
|
|
16
|
+
|
|
17
|
+
# Or prefix for multiple objects
|
|
18
|
+
# prefix: data/inputs/
|
|
19
|
+
|
|
20
|
+
# Credentials (optional - uses default credentials if omitted)
|
|
21
|
+
# credentials:
|
|
22
|
+
# service_account_file: /path/to/service-account.json
|
|
23
|
+
|
|
24
|
+
format: json
|
|
25
|
+
batch_size: 1000
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# Template: Extract from AWS S3
|
|
2
|
+
# Copy to your pipeline directory as extract.yaml
|
|
3
|
+
# Requires: pip install boto3
|
|
4
|
+
|
|
5
|
+
title: s3_extraction
|
|
6
|
+
description: Extract data from AWS S3
|
|
7
|
+
version: "1.0.0"
|
|
8
|
+
|
|
9
|
+
# Source type (required)
|
|
10
|
+
type: cloud_storage
|
|
11
|
+
|
|
12
|
+
storage:
|
|
13
|
+
provider: s3
|
|
14
|
+
bucket: my-bucket-name
|
|
15
|
+
|
|
16
|
+
# Single object
|
|
17
|
+
path: data/input.json
|
|
18
|
+
|
|
19
|
+
# Or prefix for multiple objects (uncomment)
|
|
20
|
+
# prefix: data/inputs/
|
|
21
|
+
# pattern: "*.json" # Filter by pattern
|
|
22
|
+
|
|
23
|
+
# Credentials (optional - uses AWS credential chain if omitted)
|
|
24
|
+
# credentials:
|
|
25
|
+
# aws_access_key_id: ${AWS_ACCESS_KEY_ID}
|
|
26
|
+
# aws_secret_access_key: ${AWS_SECRET_ACCESS_KEY}
|
|
27
|
+
# region: us-east-1
|
|
28
|
+
|
|
29
|
+
format: json
|
|
30
|
+
batch_size: 1000
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# Template: Extract from Database
|
|
2
|
+
# Copy to your pipeline directory as extract.yaml
|
|
3
|
+
#
|
|
4
|
+
# Supported databases: PostgreSQL, MySQL, SQLite, MSSQL, Oracle
|
|
5
|
+
|
|
6
|
+
title: database_extraction
|
|
7
|
+
description: Extract data from SQL database
|
|
8
|
+
version: "1.0.0"
|
|
9
|
+
|
|
10
|
+
# Source type (required)
|
|
11
|
+
type: database
|
|
12
|
+
|
|
13
|
+
# SQL query to execute
|
|
14
|
+
query: |
|
|
15
|
+
SELECT id, name, email, created_at
|
|
16
|
+
FROM users
|
|
17
|
+
WHERE status = :status
|
|
18
|
+
ORDER BY created_at DESC
|
|
19
|
+
LIMIT :limit
|
|
20
|
+
|
|
21
|
+
# Query parameters (substituted into :param placeholders)
|
|
22
|
+
params:
|
|
23
|
+
status: active
|
|
24
|
+
limit: 10000
|
|
25
|
+
|
|
26
|
+
# Database connection
|
|
27
|
+
connection_string: ${DATABASE_URL:?DATABASE_URL is required}
|
|
28
|
+
# Formats:
|
|
29
|
+
# PostgreSQL: postgresql://user:pass@host:5432/db
|
|
30
|
+
# MySQL: mysql://user:pass@host:3306/db
|
|
31
|
+
# SQLite: sqlite:///./data/local.db
|
|
32
|
+
# MSSQL: mssql+pyodbc://user:pass@host/db?driver=ODBC+Driver+17+for+SQL+Server
|
|
33
|
+
|
|
34
|
+
batch_size: 1000
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
# Template: Extract from Database via SSH Tunnel
|
|
2
|
+
# Copy to your pipeline directory as extract.yaml
|
|
3
|
+
# Use when database is only accessible through a bastion/jump host
|
|
4
|
+
|
|
5
|
+
title: database_ssh_extraction
|
|
6
|
+
description: Extract from database through SSH tunnel
|
|
7
|
+
version: "1.0.0"
|
|
8
|
+
|
|
9
|
+
# Source type (required)
|
|
10
|
+
type: database
|
|
11
|
+
|
|
12
|
+
query: |
|
|
13
|
+
SELECT id, name, email, created_at
|
|
14
|
+
FROM users
|
|
15
|
+
WHERE status = :status
|
|
16
|
+
|
|
17
|
+
params:
|
|
18
|
+
status: active
|
|
19
|
+
|
|
20
|
+
# Database connection (use localhost since tunnel forwards)
|
|
21
|
+
connection_string: postgresql://user:pass@localhost:5433/database
|
|
22
|
+
|
|
23
|
+
# SSH tunnel configuration
|
|
24
|
+
ssh_tunnel:
|
|
25
|
+
enabled: true
|
|
26
|
+
host: bastion.example.com
|
|
27
|
+
port: 22
|
|
28
|
+
username: ${SSH_USER}
|
|
29
|
+
key_file: ~/.ssh/id_rsa
|
|
30
|
+
# Or use password:
|
|
31
|
+
# password: ${SSH_PASSWORD}
|
|
32
|
+
|
|
33
|
+
# Remote database endpoint (the actual database host)
|
|
34
|
+
remote_host: db.internal.example.com
|
|
35
|
+
remote_port: 5432
|
|
36
|
+
|
|
37
|
+
# Local port for tunnel (connect to this)
|
|
38
|
+
local_port: 5433
|
|
39
|
+
|
|
40
|
+
batch_size: 1000
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# Template: Extract from CSV File
|
|
2
|
+
# Copy to your pipeline directory as extract.yaml
|
|
3
|
+
|
|
4
|
+
title: csv_file_extraction
|
|
5
|
+
description: Extract data from CSV file
|
|
6
|
+
version: "1.0.0"
|
|
7
|
+
|
|
8
|
+
# Source type (required)
|
|
9
|
+
type: file
|
|
10
|
+
|
|
11
|
+
path: ./data/input.csv
|
|
12
|
+
format: csv
|
|
13
|
+
batch_size: 1000
|
|
14
|
+
|
|
15
|
+
# CSV-specific options
|
|
16
|
+
csv_options:
|
|
17
|
+
delimiter: ","
|
|
18
|
+
quotechar: '"'
|
|
19
|
+
has_header: true
|
|
20
|
+
# encoding: utf-8
|
|
21
|
+
# skip_rows: 0
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# Template: Extract from Multiple Files (Glob Pattern)
|
|
2
|
+
# Copy to your pipeline directory as extract.yaml
|
|
3
|
+
|
|
4
|
+
title: glob_file_extraction
|
|
5
|
+
description: Extract from multiple files matching pattern
|
|
6
|
+
version: "1.0.0"
|
|
7
|
+
|
|
8
|
+
# Source type (required)
|
|
9
|
+
type: file
|
|
10
|
+
|
|
11
|
+
# Glob pattern to match files
|
|
12
|
+
path: ./data/inputs/*.json
|
|
13
|
+
# Examples:
|
|
14
|
+
# ./data/*.csv - All CSV files in data/
|
|
15
|
+
# ./data/**/*.json - All JSON files recursively
|
|
16
|
+
# ./data/2024-*.parquet - Parquet files starting with 2024-
|
|
17
|
+
|
|
18
|
+
# Format (auto-detected from first file if omitted)
|
|
19
|
+
format: json
|
|
20
|
+
|
|
21
|
+
batch_size: 1000
|
|
22
|
+
|
|
23
|
+
# Processing options
|
|
24
|
+
# sort_files: true # Process files in sorted order
|
|
25
|
+
# fail_on_empty: false # Don't fail if no files match
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# Template: Extract from JSON File
|
|
2
|
+
# Copy to your pipeline directory as extract.yaml
|
|
3
|
+
|
|
4
|
+
title: json_file_extraction
|
|
5
|
+
description: Extract data from JSON file
|
|
6
|
+
version: "1.0.0"
|
|
7
|
+
|
|
8
|
+
# Source type (required)
|
|
9
|
+
type: file
|
|
10
|
+
|
|
11
|
+
# File path
|
|
12
|
+
path: ./data/input.json
|
|
13
|
+
# Or use environment variable:
|
|
14
|
+
# path: ${INPUT_DIR:-./data}/input.json
|
|
15
|
+
|
|
16
|
+
# Format (auto-detected from extension if omitted)
|
|
17
|
+
format: json
|
|
18
|
+
|
|
19
|
+
# Batch size for yielding records
|
|
20
|
+
batch_size: 1000
|
|
21
|
+
|
|
22
|
+
# For JSON files containing an object with data array:
|
|
23
|
+
# response_path: data.items
|
|
24
|
+
# response_path: results
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# Template: Extract from Parquet File
|
|
2
|
+
# Copy to your pipeline directory as extract.yaml
|
|
3
|
+
# Requires: pip install pyarrow or pip install fastparquet
|
|
4
|
+
|
|
5
|
+
title: parquet_file_extraction
|
|
6
|
+
description: Extract data from Parquet file
|
|
7
|
+
version: "1.0.0"
|
|
8
|
+
|
|
9
|
+
# Source type (required)
|
|
10
|
+
type: file
|
|
11
|
+
|
|
12
|
+
path: ./data/input.parquet
|
|
13
|
+
format: parquet
|
|
14
|
+
batch_size: 1000
|
|
15
|
+
|
|
16
|
+
# Optional: select specific columns
|
|
17
|
+
# columns:
|
|
18
|
+
# - id
|
|
19
|
+
# - name
|
|
20
|
+
# - created_at
|