pycharter 0.0.22__py3-none-any.whl → 0.0.23__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- api/routes/v1/templates.py +43 -24
- pycharter/data/templates/etl/README.md +91 -0
- pycharter/data/templates/etl/extract_cloud_azure.yaml +23 -0
- pycharter/data/templates/etl/extract_cloud_gcs.yaml +22 -0
- pycharter/data/templates/etl/extract_cloud_s3.yaml +24 -0
- pycharter/data/templates/etl/extract_database.yaml +28 -0
- pycharter/data/templates/etl/extract_database_ssh.yaml +27 -0
- pycharter/data/templates/etl/extract_file_csv.yaml +17 -0
- pycharter/data/templates/etl/extract_file_glob.yaml +17 -0
- pycharter/data/templates/etl/extract_file_json.yaml +14 -0
- pycharter/data/templates/etl/extract_file_parquet.yaml +13 -0
- pycharter/data/templates/etl/extract_http_paginated.yaml +75 -0
- pycharter/data/templates/etl/extract_http_path_params.yaml +45 -0
- pycharter/data/templates/etl/extract_http_simple.yaml +52 -0
- pycharter/data/templates/etl/load_insert.yaml +17 -0
- pycharter/data/templates/etl/load_postgresql.yaml +17 -0
- pycharter/data/templates/etl/load_sqlite.yaml +16 -0
- pycharter/data/templates/etl/load_truncate_and_load.yaml +18 -0
- pycharter/data/templates/etl/load_upsert.yaml +28 -0
- pycharter/data/templates/etl/load_with_dlq.yaml +24 -0
- pycharter/data/templates/etl/load_with_ssh_tunnel.yaml +28 -0
- pycharter/data/templates/etl/pipeline_http_to_db.yaml +38 -0
- pycharter/data/templates/etl/transform_combined.yaml +38 -0
- pycharter/data/templates/etl/transform_custom_function.yaml +18 -0
- pycharter/data/templates/etl/transform_jsonata.yaml +20 -0
- pycharter/data/templates/etl/transform_simple.yaml +41 -0
- pycharter/db/schemas/.ipynb_checkpoints/data_contract-checkpoint.py +160 -0
- pycharter/etl_generator/extraction.py +47 -262
- pycharter/etl_generator/extractors/__init__.py +26 -0
- pycharter/etl_generator/extractors/base.py +70 -0
- pycharter/etl_generator/extractors/cloud_storage.py +454 -0
- pycharter/etl_generator/extractors/database.py +151 -0
- pycharter/etl_generator/extractors/factory.py +141 -0
- pycharter/etl_generator/extractors/file.py +418 -0
- pycharter/etl_generator/extractors/http.py +816 -0
- {pycharter-0.0.22.dist-info → pycharter-0.0.23.dist-info}/METADATA +6 -1
- pycharter-0.0.23.dist-info/RECORD +498 -0
- {pycharter-0.0.22.dist-info → pycharter-0.0.23.dist-info}/WHEEL +1 -1
- ui/static/404/index.html +1 -1
- ui/static/404.html +1 -1
- ui/static/__next.__PAGE__.txt +1 -1
- ui/static/__next._full.txt +1 -1
- ui/static/__next._head.txt +1 -1
- ui/static/__next._index.txt +1 -1
- ui/static/__next._tree.txt +1 -1
- ui/static/_next/static/chunks/26dfc590f7714c03.js +1 -0
- ui/static/_next/static/chunks/34d289e6db2ef551.js +1 -0
- ui/static/_next/static/chunks/99508d9d5869cc27.js +1 -0
- ui/static/_next/static/chunks/b313c35a6ba76574.js +1 -0
- ui/static/_not-found/__next._full.txt +1 -1
- ui/static/_not-found/__next._head.txt +1 -1
- ui/static/_not-found/__next._index.txt +1 -1
- ui/static/_not-found/__next._not-found.__PAGE__.txt +1 -1
- ui/static/_not-found/__next._not-found.txt +1 -1
- ui/static/_not-found/__next._tree.txt +1 -1
- ui/static/_not-found/index.html +1 -1
- ui/static/_not-found/index.txt +1 -1
- ui/static/contracts/__next._full.txt +2 -2
- ui/static/contracts/__next._head.txt +1 -1
- ui/static/contracts/__next._index.txt +1 -1
- ui/static/contracts/__next._tree.txt +1 -1
- ui/static/contracts/__next.contracts.__PAGE__.txt +2 -2
- ui/static/contracts/__next.contracts.txt +1 -1
- ui/static/contracts/index.html +1 -1
- ui/static/contracts/index.txt +2 -2
- ui/static/documentation/__next._full.txt +1 -1
- ui/static/documentation/__next._head.txt +1 -1
- ui/static/documentation/__next._index.txt +1 -1
- ui/static/documentation/__next._tree.txt +1 -1
- ui/static/documentation/__next.documentation.__PAGE__.txt +1 -1
- ui/static/documentation/__next.documentation.txt +1 -1
- ui/static/documentation/index.html +2 -2
- ui/static/documentation/index.txt +1 -1
- ui/static/index.html +1 -1
- ui/static/index.txt +1 -1
- ui/static/metadata/__next._full.txt +1 -1
- ui/static/metadata/__next._head.txt +1 -1
- ui/static/metadata/__next._index.txt +1 -1
- ui/static/metadata/__next._tree.txt +1 -1
- ui/static/metadata/__next.metadata.__PAGE__.txt +1 -1
- ui/static/metadata/__next.metadata.txt +1 -1
- ui/static/metadata/index.html +1 -1
- ui/static/metadata/index.txt +1 -1
- ui/static/quality/__next._full.txt +2 -2
- ui/static/quality/__next._head.txt +1 -1
- ui/static/quality/__next._index.txt +1 -1
- ui/static/quality/__next._tree.txt +1 -1
- ui/static/quality/__next.quality.__PAGE__.txt +2 -2
- ui/static/quality/__next.quality.txt +1 -1
- ui/static/quality/index.html +2 -2
- ui/static/quality/index.txt +2 -2
- ui/static/rules/__next._full.txt +1 -1
- ui/static/rules/__next._head.txt +1 -1
- ui/static/rules/__next._index.txt +1 -1
- ui/static/rules/__next._tree.txt +1 -1
- ui/static/rules/__next.rules.__PAGE__.txt +1 -1
- ui/static/rules/__next.rules.txt +1 -1
- ui/static/rules/index.html +1 -1
- ui/static/rules/index.txt +1 -1
- ui/static/schemas/__next._full.txt +1 -1
- ui/static/schemas/__next._head.txt +1 -1
- ui/static/schemas/__next._index.txt +1 -1
- ui/static/schemas/__next._tree.txt +1 -1
- ui/static/schemas/__next.schemas.__PAGE__.txt +1 -1
- ui/static/schemas/__next.schemas.txt +1 -1
- ui/static/schemas/index.html +1 -1
- ui/static/schemas/index.txt +1 -1
- ui/static/settings/__next._full.txt +1 -1
- ui/static/settings/__next._head.txt +1 -1
- ui/static/settings/__next._index.txt +1 -1
- ui/static/settings/__next._tree.txt +1 -1
- ui/static/settings/__next.settings.__PAGE__.txt +1 -1
- ui/static/settings/__next.settings.txt +1 -1
- ui/static/settings/index.html +1 -1
- ui/static/settings/index.txt +1 -1
- ui/static/static/404/index.html +1 -1
- ui/static/static/404.html +1 -1
- ui/static/static/__next.__PAGE__.txt +1 -1
- ui/static/static/__next._full.txt +2 -2
- ui/static/static/__next._head.txt +1 -1
- ui/static/static/__next._index.txt +2 -2
- ui/static/static/__next._tree.txt +2 -2
- ui/static/static/_next/static/chunks/13d4a0fbd74c1ee4.js +1 -0
- ui/static/static/_next/static/chunks/2edb43b48432ac04.js +441 -0
- ui/static/static/_next/static/chunks/d2363397e1b2bcab.css +1 -0
- ui/static/static/_next/static/chunks/f7d1a90dd75d2572.js +1 -0
- ui/static/static/_not-found/__next._full.txt +2 -2
- ui/static/static/_not-found/__next._head.txt +1 -1
- ui/static/static/_not-found/__next._index.txt +2 -2
- ui/static/static/_not-found/__next._not-found.__PAGE__.txt +1 -1
- ui/static/static/_not-found/__next._not-found.txt +1 -1
- ui/static/static/_not-found/__next._tree.txt +2 -2
- ui/static/static/_not-found/index.html +1 -1
- ui/static/static/_not-found/index.txt +2 -2
- ui/static/static/contracts/__next._full.txt +3 -3
- ui/static/static/contracts/__next._head.txt +1 -1
- ui/static/static/contracts/__next._index.txt +2 -2
- ui/static/static/contracts/__next._tree.txt +2 -2
- ui/static/static/contracts/__next.contracts.__PAGE__.txt +2 -2
- ui/static/static/contracts/__next.contracts.txt +1 -1
- ui/static/static/contracts/index.html +1 -1
- ui/static/static/contracts/index.txt +3 -3
- ui/static/static/documentation/__next._full.txt +3 -3
- ui/static/static/documentation/__next._head.txt +1 -1
- ui/static/static/documentation/__next._index.txt +2 -2
- ui/static/static/documentation/__next._tree.txt +2 -2
- ui/static/static/documentation/__next.documentation.__PAGE__.txt +2 -2
- ui/static/static/documentation/__next.documentation.txt +1 -1
- ui/static/static/documentation/index.html +2 -2
- ui/static/static/documentation/index.txt +3 -3
- ui/static/static/index.html +1 -1
- ui/static/static/index.txt +2 -2
- ui/static/static/metadata/__next._full.txt +2 -2
- ui/static/static/metadata/__next._head.txt +1 -1
- ui/static/static/metadata/__next._index.txt +2 -2
- ui/static/static/metadata/__next._tree.txt +2 -2
- ui/static/static/metadata/__next.metadata.__PAGE__.txt +1 -1
- ui/static/static/metadata/__next.metadata.txt +1 -1
- ui/static/static/metadata/index.html +1 -1
- ui/static/static/metadata/index.txt +2 -2
- ui/static/static/quality/__next._full.txt +2 -2
- ui/static/static/quality/__next._head.txt +1 -1
- ui/static/static/quality/__next._index.txt +2 -2
- ui/static/static/quality/__next._tree.txt +2 -2
- ui/static/static/quality/__next.quality.__PAGE__.txt +1 -1
- ui/static/static/quality/__next.quality.txt +1 -1
- ui/static/static/quality/index.html +2 -2
- ui/static/static/quality/index.txt +2 -2
- ui/static/static/rules/__next._full.txt +2 -2
- ui/static/static/rules/__next._head.txt +1 -1
- ui/static/static/rules/__next._index.txt +2 -2
- ui/static/static/rules/__next._tree.txt +2 -2
- ui/static/static/rules/__next.rules.__PAGE__.txt +1 -1
- ui/static/static/rules/__next.rules.txt +1 -1
- ui/static/static/rules/index.html +1 -1
- ui/static/static/rules/index.txt +2 -2
- ui/static/static/schemas/__next._full.txt +2 -2
- ui/static/static/schemas/__next._head.txt +1 -1
- ui/static/static/schemas/__next._index.txt +2 -2
- ui/static/static/schemas/__next._tree.txt +2 -2
- ui/static/static/schemas/__next.schemas.__PAGE__.txt +1 -1
- ui/static/static/schemas/__next.schemas.txt +1 -1
- ui/static/static/schemas/index.html +1 -1
- ui/static/static/schemas/index.txt +2 -2
- ui/static/static/settings/__next._full.txt +2 -2
- ui/static/static/settings/__next._head.txt +1 -1
- ui/static/static/settings/__next._index.txt +2 -2
- ui/static/static/settings/__next._tree.txt +2 -2
- ui/static/static/settings/__next.settings.__PAGE__.txt +1 -1
- ui/static/static/settings/__next.settings.txt +1 -1
- ui/static/static/settings/index.html +1 -1
- ui/static/static/settings/index.txt +2 -2
- ui/static/static/static/.gitkeep +0 -0
- ui/static/static/static/404/index.html +1 -0
- ui/static/static/static/404.html +1 -0
- ui/static/static/static/__next.__PAGE__.txt +10 -0
- ui/static/static/static/__next._full.txt +30 -0
- ui/static/static/static/__next._head.txt +7 -0
- ui/static/static/static/__next._index.txt +9 -0
- ui/static/static/static/__next._tree.txt +2 -0
- ui/static/static/static/_next/static/chunks/222442f6da32302a.js +1 -0
- ui/static/static/static/_next/static/chunks/247eb132b7f7b574.js +1 -0
- ui/static/static/static/_next/static/chunks/297d55555b71baba.js +1 -0
- ui/static/static/static/_next/static/chunks/2ab439ce003cd691.js +1 -0
- ui/static/static/static/_next/static/chunks/414e77373f8ff61c.js +1 -0
- ui/static/static/static/_next/static/chunks/49ca65abd26ae49e.js +1 -0
- ui/static/static/static/_next/static/chunks/652ad0aa26265c47.js +2 -0
- ui/static/static/static/_next/static/chunks/9667e7a3d359eb39.js +1 -0
- ui/static/static/static/_next/static/chunks/9c23f44fff36548a.js +1 -0
- ui/static/static/static/_next/static/chunks/a6dad97d9634a72d.js +1 -0
- ui/static/static/static/_next/static/chunks/b32a0963684b9933.js +4 -0
- ui/static/static/static/_next/static/chunks/c69f6cba366bd988.js +1 -0
- ui/static/static/static/_next/static/chunks/db913959c675cea6.js +1 -0
- ui/static/static/static/_next/static/chunks/f061a4be97bfc3b3.js +1 -0
- ui/static/static/static/_next/static/chunks/f2e7afeab1178138.js +1 -0
- ui/static/static/static/_next/static/chunks/ff1a16fafef87110.js +1 -0
- ui/static/static/static/_next/static/chunks/turbopack-ffcb7ab6794027ef.js +3 -0
- ui/static/static/static/_next/static/tNTkVW6puVXC4bAm4WrHl/_buildManifest.js +11 -0
- ui/static/static/static/_next/static/tNTkVW6puVXC4bAm4WrHl/_ssgManifest.js +1 -0
- ui/static/static/static/_not-found/__next._full.txt +17 -0
- ui/static/static/static/_not-found/__next._head.txt +7 -0
- ui/static/static/static/_not-found/__next._index.txt +9 -0
- ui/static/static/static/_not-found/__next._not-found.__PAGE__.txt +5 -0
- ui/static/static/static/_not-found/__next._not-found.txt +4 -0
- ui/static/static/static/_not-found/__next._tree.txt +2 -0
- ui/static/static/static/_not-found/index.html +1 -0
- ui/static/static/static/_not-found/index.txt +17 -0
- ui/static/static/static/contracts/__next._full.txt +21 -0
- ui/static/static/static/contracts/__next._head.txt +7 -0
- ui/static/static/static/contracts/__next._index.txt +9 -0
- ui/static/static/static/contracts/__next._tree.txt +2 -0
- ui/static/static/static/contracts/__next.contracts.__PAGE__.txt +9 -0
- ui/static/static/static/contracts/__next.contracts.txt +4 -0
- ui/static/static/static/contracts/index.html +1 -0
- ui/static/static/static/contracts/index.txt +21 -0
- ui/static/static/static/documentation/__next._full.txt +21 -0
- ui/static/static/static/documentation/__next._head.txt +7 -0
- ui/static/static/static/documentation/__next._index.txt +9 -0
- ui/static/static/static/documentation/__next._tree.txt +2 -0
- ui/static/static/static/documentation/__next.documentation.__PAGE__.txt +9 -0
- ui/static/static/static/documentation/__next.documentation.txt +4 -0
- ui/static/static/static/documentation/index.html +93 -0
- ui/static/static/static/documentation/index.txt +21 -0
- ui/static/static/static/index.html +1 -0
- ui/static/static/static/index.txt +30 -0
- ui/static/static/static/metadata/__next._full.txt +21 -0
- ui/static/static/static/metadata/__next._head.txt +7 -0
- ui/static/static/static/metadata/__next._index.txt +9 -0
- ui/static/static/static/metadata/__next._tree.txt +2 -0
- ui/static/static/static/metadata/__next.metadata.__PAGE__.txt +9 -0
- ui/static/static/static/metadata/__next.metadata.txt +4 -0
- ui/static/static/static/metadata/index.html +1 -0
- ui/static/static/static/metadata/index.txt +21 -0
- ui/static/static/static/quality/__next._full.txt +21 -0
- ui/static/static/static/quality/__next._head.txt +7 -0
- ui/static/static/static/quality/__next._index.txt +9 -0
- ui/static/static/static/quality/__next._tree.txt +2 -0
- ui/static/static/static/quality/__next.quality.__PAGE__.txt +9 -0
- ui/static/static/static/quality/__next.quality.txt +4 -0
- ui/static/static/static/quality/index.html +2 -0
- ui/static/static/static/quality/index.txt +21 -0
- ui/static/static/static/rules/__next._full.txt +21 -0
- ui/static/static/static/rules/__next._head.txt +7 -0
- ui/static/static/static/rules/__next._index.txt +9 -0
- ui/static/static/static/rules/__next._tree.txt +2 -0
- ui/static/static/static/rules/__next.rules.__PAGE__.txt +9 -0
- ui/static/static/static/rules/__next.rules.txt +4 -0
- ui/static/static/static/rules/index.html +1 -0
- ui/static/static/static/rules/index.txt +21 -0
- ui/static/static/static/schemas/__next._full.txt +21 -0
- ui/static/static/static/schemas/__next._head.txt +7 -0
- ui/static/static/static/schemas/__next._index.txt +9 -0
- ui/static/static/static/schemas/__next._tree.txt +2 -0
- ui/static/static/static/schemas/__next.schemas.__PAGE__.txt +9 -0
- ui/static/static/static/schemas/__next.schemas.txt +4 -0
- ui/static/static/static/schemas/index.html +1 -0
- ui/static/static/static/schemas/index.txt +21 -0
- ui/static/static/static/settings/__next._full.txt +21 -0
- ui/static/static/static/settings/__next._head.txt +7 -0
- ui/static/static/static/settings/__next._index.txt +9 -0
- ui/static/static/static/settings/__next._tree.txt +2 -0
- ui/static/static/static/settings/__next.settings.__PAGE__.txt +9 -0
- ui/static/static/static/settings/__next.settings.txt +4 -0
- ui/static/static/static/settings/index.html +1 -0
- ui/static/static/static/settings/index.txt +21 -0
- ui/static/static/static/validation/__next._full.txt +21 -0
- ui/static/static/static/validation/__next._head.txt +7 -0
- ui/static/static/static/validation/__next._index.txt +9 -0
- ui/static/static/static/validation/__next._tree.txt +2 -0
- ui/static/static/static/validation/__next.validation.__PAGE__.txt +9 -0
- ui/static/static/static/validation/__next.validation.txt +4 -0
- ui/static/static/static/validation/index.html +1 -0
- ui/static/static/static/validation/index.txt +21 -0
- ui/static/static/validation/__next._full.txt +2 -2
- ui/static/static/validation/__next._head.txt +1 -1
- ui/static/static/validation/__next._index.txt +2 -2
- ui/static/static/validation/__next._tree.txt +2 -2
- ui/static/static/validation/__next.validation.__PAGE__.txt +1 -1
- ui/static/static/validation/__next.validation.txt +1 -1
- ui/static/static/validation/index.html +1 -1
- ui/static/static/validation/index.txt +2 -2
- ui/static/validation/__next._full.txt +2 -2
- ui/static/validation/__next._head.txt +1 -1
- ui/static/validation/__next._index.txt +1 -1
- ui/static/validation/__next._tree.txt +1 -1
- ui/static/validation/__next.validation.__PAGE__.txt +2 -2
- ui/static/validation/__next.validation.txt +1 -1
- ui/static/validation/index.html +1 -1
- ui/static/validation/index.txt +2 -2
- pycharter/data/templates/template_transform_advanced.yaml +0 -50
- pycharter/data/templates/template_transform_simple.yaml +0 -59
- pycharter-0.0.22.dist-info/RECORD +0 -358
- /pycharter/data/templates/{template_coercion_rules.yaml → contract/template_coercion_rules.yaml} +0 -0
- /pycharter/data/templates/{template_contract.yaml → contract/template_contract.yaml} +0 -0
- /pycharter/data/templates/{template_metadata.yaml → contract/template_metadata.yaml} +0 -0
- /pycharter/data/templates/{template_schema.yaml → contract/template_schema.yaml} +0 -0
- /pycharter/data/templates/{template_validation_rules.yaml → contract/template_validation_rules.yaml} +0 -0
- {pycharter-0.0.22.dist-info → pycharter-0.0.23.dist-info}/entry_points.txt +0 -0
- {pycharter-0.0.22.dist-info → pycharter-0.0.23.dist-info}/licenses/LICENSE +0 -0
- {pycharter-0.0.22.dist-info → pycharter-0.0.23.dist-info}/top_level.txt +0 -0
- /ui/static/_next/static/{0rYA78L88aUyD2Uh38hhX → 2gKjNv6YvE6BcIdFthBLs}/_buildManifest.js +0 -0
- /ui/static/_next/static/{0rYA78L88aUyD2Uh38hhX → 2gKjNv6YvE6BcIdFthBLs}/_ssgManifest.js +0 -0
- /ui/static/static/_next/static/{tNTkVW6puVXC4bAm4WrHl → 0rYA78L88aUyD2Uh38hhX}/_buildManifest.js +0 -0
- /ui/static/static/_next/static/{tNTkVW6puVXC4bAm4WrHl → 0rYA78L88aUyD2Uh38hhX}/_ssgManifest.js +0 -0
- /ui/static/{_next → static/_next}/static/chunks/c4fa4f4114b7c352.js +0 -0
- /ui/static/static/{_next → static/_next}/static/chunks/4e310fe5005770a3.css +0 -0
- /ui/static/{_next → static/static/_next}/static/chunks/5e04d10c4a7b58a3.js +0 -0
- /ui/static/static/{_next → static/_next}/static/chunks/5fc14c00a2779dc5.js +0 -0
- /ui/static/{_next → static/static/_next}/static/chunks/75d88a058d8ffaa6.js +0 -0
- /ui/static/{_next → static/static/_next}/static/chunks/8c89634cf6bad76f.js +0 -0
- /ui/static/static/{_next → static/_next}/static/chunks/b584574fdc8ab13e.js +0 -0
- /ui/static/static/{_next → static/_next}/static/chunks/d5989c94d3614b3a.js +0 -0
api/routes/v1/templates.py
CHANGED
|
@@ -18,40 +18,59 @@ router = APIRouter()
|
|
|
18
18
|
|
|
19
19
|
def _find_template_dir() -> Path:
|
|
20
20
|
"""
|
|
21
|
-
Find template directory in multiple locations (package, source, etc.).
|
|
22
|
-
|
|
21
|
+
Find contract template directory in multiple locations (package, source, etc.).
|
|
22
|
+
|
|
23
|
+
Contract templates (schema, metadata, coercion/validation rules, contract)
|
|
24
|
+
live under templates/contract/.
|
|
25
|
+
|
|
23
26
|
Priority:
|
|
24
|
-
1. Installed package location (pycharter/data/templates/)
|
|
27
|
+
1. Installed package location (pycharter/data/templates/contract/)
|
|
25
28
|
2. Source location (relative to api/routes/v1/)
|
|
26
29
|
3. Legacy location (data/aviation_examples/template/)
|
|
27
|
-
|
|
30
|
+
|
|
28
31
|
Returns:
|
|
29
|
-
Path to template directory
|
|
32
|
+
Path to contract template directory (templates/contract/)
|
|
30
33
|
"""
|
|
31
34
|
# Try installed package location (when package is installed)
|
|
32
35
|
try:
|
|
33
36
|
import pycharter
|
|
37
|
+
|
|
34
38
|
pycharter_path = Path(pycharter.__file__).parent
|
|
35
|
-
|
|
36
|
-
if
|
|
37
|
-
|
|
39
|
+
package_contract = pycharter_path / "data" / "templates" / "contract"
|
|
40
|
+
if (
|
|
41
|
+
package_contract.exists()
|
|
42
|
+
and (package_contract / "template_schema.yaml").exists()
|
|
43
|
+
):
|
|
44
|
+
return package_contract
|
|
38
45
|
except (ImportError, AttributeError):
|
|
39
46
|
pass
|
|
40
|
-
|
|
47
|
+
|
|
41
48
|
# Try source locations (development)
|
|
42
49
|
possible_paths = [
|
|
43
|
-
Path(__file__).parent.parent.parent.parent
|
|
44
|
-
|
|
45
|
-
|
|
50
|
+
Path(__file__).parent.parent.parent.parent
|
|
51
|
+
/ "pycharter"
|
|
52
|
+
/ "data"
|
|
53
|
+
/ "templates"
|
|
54
|
+
/ "contract",
|
|
55
|
+
Path(__file__).parent.parent.parent.parent
|
|
56
|
+
/ "data"
|
|
57
|
+
/ "aviation_examples"
|
|
58
|
+
/ "template",
|
|
59
|
+
Path.cwd() / "pycharter" / "data" / "templates" / "contract",
|
|
46
60
|
Path.cwd() / "data" / "aviation_examples" / "template",
|
|
47
61
|
]
|
|
48
|
-
|
|
62
|
+
|
|
49
63
|
for template_path in possible_paths:
|
|
50
64
|
if template_path.exists() and (template_path / "template_schema.yaml").exists():
|
|
51
65
|
return template_path
|
|
52
|
-
|
|
66
|
+
|
|
53
67
|
# Fallback to original location
|
|
54
|
-
fallback =
|
|
68
|
+
fallback = (
|
|
69
|
+
Path(__file__).parent.parent.parent.parent
|
|
70
|
+
/ "data"
|
|
71
|
+
/ "aviation_examples"
|
|
72
|
+
/ "template"
|
|
73
|
+
)
|
|
55
74
|
return fallback
|
|
56
75
|
|
|
57
76
|
|
|
@@ -77,7 +96,7 @@ def _get_template_path(filename: str) -> Path:
|
|
|
77
96
|
async def download_schema_template() -> FileResponse:
|
|
78
97
|
"""
|
|
79
98
|
Download the schema template file.
|
|
80
|
-
|
|
99
|
+
|
|
81
100
|
Returns:
|
|
82
101
|
YAML file containing a template schema structure
|
|
83
102
|
"""
|
|
@@ -111,7 +130,7 @@ async def download_schema_template() -> FileResponse:
|
|
|
111
130
|
async def download_metadata_template() -> FileResponse:
|
|
112
131
|
"""
|
|
113
132
|
Download the metadata template file.
|
|
114
|
-
|
|
133
|
+
|
|
115
134
|
Returns:
|
|
116
135
|
YAML file containing a template metadata structure
|
|
117
136
|
"""
|
|
@@ -145,7 +164,7 @@ async def download_metadata_template() -> FileResponse:
|
|
|
145
164
|
async def download_coercion_rules_template() -> FileResponse:
|
|
146
165
|
"""
|
|
147
166
|
Download the coercion rules template file.
|
|
148
|
-
|
|
167
|
+
|
|
149
168
|
Returns:
|
|
150
169
|
YAML file containing a template coercion rules structure
|
|
151
170
|
"""
|
|
@@ -179,7 +198,7 @@ async def download_coercion_rules_template() -> FileResponse:
|
|
|
179
198
|
async def download_validation_rules_template() -> FileResponse:
|
|
180
199
|
"""
|
|
181
200
|
Download the validation rules template file.
|
|
182
|
-
|
|
201
|
+
|
|
183
202
|
Returns:
|
|
184
203
|
YAML file containing a template validation rules structure
|
|
185
204
|
"""
|
|
@@ -213,14 +232,14 @@ async def download_validation_rules_template() -> FileResponse:
|
|
|
213
232
|
async def download_contract_artifacts() -> Response:
|
|
214
233
|
"""
|
|
215
234
|
Download all contract artifact templates as a ZIP archive.
|
|
216
|
-
|
|
235
|
+
|
|
217
236
|
The ZIP file contains:
|
|
218
237
|
- template_schema.yaml
|
|
219
238
|
- template_metadata.yaml
|
|
220
239
|
- template_coercion_rules.yaml
|
|
221
240
|
- template_validation_rules.yaml
|
|
222
241
|
- template_contract.yaml
|
|
223
|
-
|
|
242
|
+
|
|
224
243
|
Returns:
|
|
225
244
|
ZIP file containing all template files
|
|
226
245
|
"""
|
|
@@ -233,7 +252,7 @@ async def download_contract_artifacts() -> Response:
|
|
|
233
252
|
"template_validation_rules.yaml",
|
|
234
253
|
"template_contract.yaml",
|
|
235
254
|
]
|
|
236
|
-
|
|
255
|
+
|
|
237
256
|
# Create ZIP archive in memory
|
|
238
257
|
zip_buffer = io.BytesIO()
|
|
239
258
|
with zipfile.ZipFile(zip_buffer, "w", zipfile.ZIP_DEFLATED) as zip_file:
|
|
@@ -241,12 +260,12 @@ async def download_contract_artifacts() -> Response:
|
|
|
241
260
|
template_path = _get_template_path(template_file)
|
|
242
261
|
# Read file content and add to ZIP
|
|
243
262
|
zip_file.write(template_path, arcname=template_file)
|
|
244
|
-
|
|
263
|
+
|
|
245
264
|
# Get ZIP content
|
|
246
265
|
zip_buffer.seek(0)
|
|
247
266
|
zip_content = zip_buffer.read()
|
|
248
267
|
zip_buffer.close()
|
|
249
|
-
|
|
268
|
+
|
|
250
269
|
return Response(
|
|
251
270
|
content=zip_content,
|
|
252
271
|
media_type="application/zip",
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
# ETL Config Templates
|
|
2
|
+
|
|
3
|
+
Templates for ETL pipeline configuration files. Copy into your contract directory as `extract.yaml`, `transform.yaml`, or `load.yaml`, and adjust for your environment.
|
|
4
|
+
|
|
5
|
+
## Directory layout
|
|
6
|
+
|
|
7
|
+
Use a contract directory that contains at least:
|
|
8
|
+
|
|
9
|
+
- `schema.json` or `schema.yaml` (required)
|
|
10
|
+
- `extract.yaml` (required) – pick one of the extract templates below
|
|
11
|
+
- `transform.yaml` (optional) – pick one of the transform templates
|
|
12
|
+
- `load.yaml` (required) – pick one of the load templates
|
|
13
|
+
|
|
14
|
+
## Extract templates
|
|
15
|
+
|
|
16
|
+
| Template | Description |
|
|
17
|
+
|----------|-------------|
|
|
18
|
+
| `extract_http_simple.yaml` | HTTP API, single request, no pagination |
|
|
19
|
+
| `extract_http_paginated.yaml` | HTTP API with pagination (page / offset / cursor / next_url / link_header) |
|
|
20
|
+
| `extract_http_path_params.yaml` | HTTP API with `{param}` path substitution |
|
|
21
|
+
| `extract_file_csv.yaml` | Local CSV file |
|
|
22
|
+
| `extract_file_json.yaml` | Local JSON file (array or object with data/results/items) |
|
|
23
|
+
| `extract_file_parquet.yaml` | Local Parquet file |
|
|
24
|
+
| `extract_file_glob.yaml` | Multiple files via glob (e.g. `*.csv`, `data/*.json`) |
|
|
25
|
+
| `extract_database.yaml` | SQL query against PostgreSQL/MySQL/SQLite/MSSQL/Oracle |
|
|
26
|
+
| `extract_database_ssh.yaml` | Database extraction through an SSH tunnel |
|
|
27
|
+
| `extract_cloud_s3.yaml` | AWS S3 (single object or prefix) |
|
|
28
|
+
| `extract_cloud_gcs.yaml` | Google Cloud Storage |
|
|
29
|
+
| `extract_cloud_azure.yaml` | Azure Blob Storage |
|
|
30
|
+
|
|
31
|
+
Source type is chosen by `source_type` or by auto-detection:
|
|
32
|
+
|
|
33
|
+
- `base_url` / `api_endpoint` → `http`
|
|
34
|
+
- `file_path` → `file`
|
|
35
|
+
- `database` → `database`
|
|
36
|
+
- `storage` → `cloud_storage`
|
|
37
|
+
|
|
38
|
+
## Transform templates
|
|
39
|
+
|
|
40
|
+
| Template | Description |
|
|
41
|
+
|----------|-------------|
|
|
42
|
+
| `transform_simple.yaml` | Rename, convert, defaults, add, select, drop |
|
|
43
|
+
| `transform_jsonata.yaml` | JSONata expressions (record or batch) |
|
|
44
|
+
| `transform_custom_function.yaml` | Call a Python function (module.function) |
|
|
45
|
+
| `transform_combined.yaml` | Simple ops + JSONata + optional custom function |
|
|
46
|
+
|
|
47
|
+
Order in the pipeline: **Simple operations → JSONata → Custom function**.
|
|
48
|
+
|
|
49
|
+
## Load templates
|
|
50
|
+
|
|
51
|
+
| Template | Description |
|
|
52
|
+
|----------|-------------|
|
|
53
|
+
| `load_upsert.yaml` | Upsert by primary key (default) |
|
|
54
|
+
| `load_insert.yaml` | Insert only |
|
|
55
|
+
| `load_truncate_and_load.yaml` | Truncate table then load (full refresh) |
|
|
56
|
+
| `load_with_dlq.yaml` | Load with Dead Letter Queue for failed rows |
|
|
57
|
+
| `load_postgresql.yaml` | Load into PostgreSQL |
|
|
58
|
+
| `load_sqlite.yaml` | Load into SQLite |
|
|
59
|
+
| `load_with_ssh_tunnel.yaml` | Load into a DB reachable only via SSH tunnel |
|
|
60
|
+
|
|
61
|
+
Write methods: `insert`, `upsert`, `replace`, `update`, `delete`, `append`, `truncate_and_load`.
|
|
62
|
+
|
|
63
|
+
## Variable injection
|
|
64
|
+
|
|
65
|
+
Configs support `${VAR}` and `${VAR:-default}`. Provide values via:
|
|
66
|
+
|
|
67
|
+
- Environment variables
|
|
68
|
+
- `config_context` when creating `ETLOrchestrator(...)`
|
|
69
|
+
- Optional `source_file` for resolution (e.g. path to `extract.yaml`)
|
|
70
|
+
|
|
71
|
+
Examples:
|
|
72
|
+
|
|
73
|
+
- `url: ${TARGET_DATABASE_URL:?TARGET_DATABASE_URL is required}`
|
|
74
|
+
- `api_key: ${API_KEY}`
|
|
75
|
+
- `rate_limit_delay: ${RATE_LIMIT_DELAY:-0.2}`
|
|
76
|
+
|
|
77
|
+
## Full pipeline reference
|
|
78
|
+
|
|
79
|
+
See `pipeline_http_to_db.yaml` for a combined extract/transform/load example. Real pipelines use separate `extract.yaml`, `transform.yaml`, and `load.yaml` in the contract directory.
|
|
80
|
+
|
|
81
|
+
## Usage
|
|
82
|
+
|
|
83
|
+
```python
|
|
84
|
+
from pycharter.etl_generator import ETLOrchestrator
|
|
85
|
+
|
|
86
|
+
# From contract dir (contains schema + extract/transform/load from these templates)
|
|
87
|
+
orchestrator = ETLOrchestrator(contract_dir="path/to/contract_dir")
|
|
88
|
+
result = await orchestrator.run(**input_params)
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
For more examples, see `examples/10_basic_etl.py` through `examples/14_etl_with_transforms.py`.
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# Template: Azure Blob Storage extraction
|
|
2
|
+
# source_type: cloud_storage. Requires azure-storage-blob.
|
|
3
|
+
# Copy to your contract dir as extract.yaml and customize.
|
|
4
|
+
|
|
5
|
+
title: azure_blob_extraction
|
|
6
|
+
description: Extract data from Azure Blob Storage
|
|
7
|
+
version: 1.0.0
|
|
8
|
+
|
|
9
|
+
source_type: cloud_storage
|
|
10
|
+
storage:
|
|
11
|
+
provider: azure
|
|
12
|
+
bucket: my-container # Azure container name
|
|
13
|
+
path: data/records.json
|
|
14
|
+
credentials:
|
|
15
|
+
connection_string: ${AZURE_STORAGE_CONNECTION_STRING}
|
|
16
|
+
# Or: account_name + account_key
|
|
17
|
+
|
|
18
|
+
format: json
|
|
19
|
+
batch_size: 1000
|
|
20
|
+
max_records: null
|
|
21
|
+
|
|
22
|
+
# Prefix for multiple blobs:
|
|
23
|
+
# path: data/daily/
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# Template: Google Cloud Storage extraction
|
|
2
|
+
# source_type: cloud_storage. Requires google-cloud-storage.
|
|
3
|
+
# Copy to your contract dir as extract.yaml and customize.
|
|
4
|
+
|
|
5
|
+
title: gcs_extraction
|
|
6
|
+
description: Extract data from Google Cloud Storage
|
|
7
|
+
version: 1.0.0
|
|
8
|
+
|
|
9
|
+
source_type: cloud_storage
|
|
10
|
+
storage:
|
|
11
|
+
provider: gcs
|
|
12
|
+
bucket: my-gcs-bucket
|
|
13
|
+
path: exports/data.csv
|
|
14
|
+
credentials: /path/to/service-account-key.json
|
|
15
|
+
# Or inline: credentials: { "type": "service_account", ... }
|
|
16
|
+
|
|
17
|
+
format: csv
|
|
18
|
+
batch_size: 1000
|
|
19
|
+
max_records: null
|
|
20
|
+
|
|
21
|
+
# Prefix for multiple files:
|
|
22
|
+
# path: exports/2024/
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# Template: AWS S3 extraction
|
|
2
|
+
# source_type: cloud_storage. Requires boto3 (pip install boto3 or pycharter[etl]).
|
|
3
|
+
# Copy to your contract dir as extract.yaml and customize.
|
|
4
|
+
|
|
5
|
+
title: s3_extraction
|
|
6
|
+
description: Extract data from AWS S3
|
|
7
|
+
version: 1.0.0
|
|
8
|
+
|
|
9
|
+
source_type: cloud_storage
|
|
10
|
+
storage:
|
|
11
|
+
provider: s3
|
|
12
|
+
bucket: my-bucket-name
|
|
13
|
+
path: data/records.parquet # Single file
|
|
14
|
+
credentials:
|
|
15
|
+
aws_access_key_id: ${AWS_ACCESS_KEY_ID}
|
|
16
|
+
aws_secret_access_key: ${AWS_SECRET_ACCESS_KEY}
|
|
17
|
+
region: us-east-1
|
|
18
|
+
|
|
19
|
+
format: parquet # csv | json | parquet | excel | xml
|
|
20
|
+
batch_size: 1000
|
|
21
|
+
max_records: null
|
|
22
|
+
|
|
23
|
+
# Multiple files: use prefix (trailing slash or path without filename)
|
|
24
|
+
# path: data/daily/ # All objects with this prefix
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# Template: Database extraction
|
|
2
|
+
# source_type: database. Supports PostgreSQL, MySQL, SQLite, MSSQL, Oracle.
|
|
3
|
+
# Copy to your contract dir as extract.yaml and customize.
|
|
4
|
+
|
|
5
|
+
title: database_extraction
|
|
6
|
+
description: Extract data from database via SQL query
|
|
7
|
+
version: 1.0.0
|
|
8
|
+
|
|
9
|
+
source_type: database
|
|
10
|
+
database:
|
|
11
|
+
url: postgresql://user:pass@host:5432/dbname
|
|
12
|
+
# type: postgresql # Optional; auto-detected from URL
|
|
13
|
+
|
|
14
|
+
query: |
|
|
15
|
+
SELECT id, name, email, created_at
|
|
16
|
+
FROM users
|
|
17
|
+
WHERE created_at > :start_date
|
|
18
|
+
ORDER BY id
|
|
19
|
+
|
|
20
|
+
query_params:
|
|
21
|
+
start_date: "2024-01-01"
|
|
22
|
+
|
|
23
|
+
batch_size: 1000
|
|
24
|
+
max_records: null
|
|
25
|
+
|
|
26
|
+
# Use variable injection for credentials:
|
|
27
|
+
# database:
|
|
28
|
+
# url: ${SOURCE_DATABASE_URL}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# Template: Database extraction with SSH tunnel
|
|
2
|
+
# For databases not directly reachable. Requires sshtunnel (pip install pycharter[etl]).
|
|
3
|
+
# Copy to your contract dir as extract.yaml and customize.
|
|
4
|
+
|
|
5
|
+
title: database_ssh_extraction
|
|
6
|
+
description: Extract from database via SSH tunnel
|
|
7
|
+
version: 1.0.0
|
|
8
|
+
|
|
9
|
+
source_type: database
|
|
10
|
+
database:
|
|
11
|
+
url: postgresql://user:pass@localhost:5433/remote_db
|
|
12
|
+
ssh_tunnel:
|
|
13
|
+
enabled: true
|
|
14
|
+
host: jump.example.com
|
|
15
|
+
port: 22
|
|
16
|
+
username: ${SSH_USER}
|
|
17
|
+
password: ${SSH_PASSWORD}
|
|
18
|
+
# key_file: ~/.ssh/id_rsa # Alternative to password
|
|
19
|
+
remote_host: db.internal.example.com
|
|
20
|
+
remote_port: 5432
|
|
21
|
+
local_port: 5433
|
|
22
|
+
|
|
23
|
+
query: "SELECT id, name, value FROM data_table"
|
|
24
|
+
query_params: {}
|
|
25
|
+
|
|
26
|
+
batch_size: 1000
|
|
27
|
+
max_records: null
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# Template: File-based extraction - CSV
|
|
2
|
+
# source_type: file. Supports CSV, TSV, JSON, JSONL, Parquet, Excel, XML.
|
|
3
|
+
# Copy to your contract dir as extract.yaml and customize.
|
|
4
|
+
|
|
5
|
+
title: file_csv_extraction
|
|
6
|
+
description: Extract data from CSV file
|
|
7
|
+
version: 1.0.0
|
|
8
|
+
|
|
9
|
+
source_type: file
|
|
10
|
+
file_path: /path/to/data.csv
|
|
11
|
+
format: csv # Optional; auto-detected from extension if omitted
|
|
12
|
+
|
|
13
|
+
batch_size: 1000
|
|
14
|
+
max_records: null
|
|
15
|
+
|
|
16
|
+
# Variable injection supported:
|
|
17
|
+
# file_path: ${DATA_DIR}/exports/records.csv
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# Template: File-based extraction - multiple files via glob pattern
|
|
2
|
+
# Processes all files matching the pattern. Copy as extract.yaml and customize.
|
|
3
|
+
|
|
4
|
+
title: file_glob_extraction
|
|
5
|
+
description: Extract from multiple files using glob pattern
|
|
6
|
+
version: 1.0.0
|
|
7
|
+
|
|
8
|
+
source_type: file
|
|
9
|
+
file_path: /path/to/daily/exports/*.csv
|
|
10
|
+
format: csv # Same format for all matched files
|
|
11
|
+
|
|
12
|
+
batch_size: 1000
|
|
13
|
+
max_records: null
|
|
14
|
+
|
|
15
|
+
# Examples:
|
|
16
|
+
# file_path: /data/2024/*/sales_*.json
|
|
17
|
+
# file_path: ${EXPORTS_DIR}/**/*.parquet
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# Template: File-based extraction - JSON
|
|
2
|
+
# For JSON: expects array or object with key in [data, results, items, records, values].
|
|
3
|
+
# Copy to your contract dir as extract.yaml and customize.
|
|
4
|
+
|
|
5
|
+
title: file_json_extraction
|
|
6
|
+
description: Extract data from JSON file
|
|
7
|
+
version: 1.0.0
|
|
8
|
+
|
|
9
|
+
source_type: file
|
|
10
|
+
file_path: /path/to/data.json
|
|
11
|
+
format: json
|
|
12
|
+
|
|
13
|
+
batch_size: 1000
|
|
14
|
+
max_records: null
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# Template: File-based extraction - Parquet
|
|
2
|
+
# Requires pandas. Copy to your contract dir as extract.yaml and customize.
|
|
3
|
+
|
|
4
|
+
title: file_parquet_extraction
|
|
5
|
+
description: Extract data from Parquet file
|
|
6
|
+
version: 1.0.0
|
|
7
|
+
|
|
8
|
+
source_type: file
|
|
9
|
+
file_path: /path/to/data.parquet
|
|
10
|
+
format: parquet
|
|
11
|
+
|
|
12
|
+
batch_size: 1000
|
|
13
|
+
max_records: null
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
# Template: HTTP/API extraction with pagination
|
|
2
|
+
# Strategies: page | offset | cursor | next_url | link_header
|
|
3
|
+
# Copy to your contract dir as extract.yaml and customize.
|
|
4
|
+
|
|
5
|
+
title: http_paginated_extraction
|
|
6
|
+
description: Extract data from HTTP API with pagination
|
|
7
|
+
version: 1.0.0
|
|
8
|
+
|
|
9
|
+
source_type: http
|
|
10
|
+
base_url: https://api.example.com
|
|
11
|
+
api_endpoint: /v1/records
|
|
12
|
+
method: GET
|
|
13
|
+
|
|
14
|
+
params:
|
|
15
|
+
api_key: ${API_KEY}
|
|
16
|
+
limit: 100
|
|
17
|
+
page: 0 # Updated by pagination; use param_name from page config
|
|
18
|
+
|
|
19
|
+
input_params: {}
|
|
20
|
+
headers:
|
|
21
|
+
Accept: application/json
|
|
22
|
+
|
|
23
|
+
batch_size: 1000
|
|
24
|
+
max_records: null
|
|
25
|
+
|
|
26
|
+
response_format: json
|
|
27
|
+
response_path: data # Path to array in response, e.g. "data.results"
|
|
28
|
+
|
|
29
|
+
retry:
|
|
30
|
+
max_attempts: 3
|
|
31
|
+
backoff_factor: 2
|
|
32
|
+
retry_on_status: [429, 500, 502, 503, 504]
|
|
33
|
+
|
|
34
|
+
timeout:
|
|
35
|
+
connect: 10
|
|
36
|
+
read: 60
|
|
37
|
+
write: 10
|
|
38
|
+
pool: 10
|
|
39
|
+
|
|
40
|
+
rate_limit_delay: 0.2
|
|
41
|
+
|
|
42
|
+
# Pagination config
|
|
43
|
+
pagination:
|
|
44
|
+
enabled: true
|
|
45
|
+
strategy: page # page | offset | cursor | next_url | link_header
|
|
46
|
+
page_delay: 0.1
|
|
47
|
+
|
|
48
|
+
# Strategy: page
|
|
49
|
+
page:
|
|
50
|
+
param_name: page
|
|
51
|
+
start: 0
|
|
52
|
+
increment: 1
|
|
53
|
+
|
|
54
|
+
# Strategy: offset (alternative)
|
|
55
|
+
# offset:
|
|
56
|
+
# param_name: offset
|
|
57
|
+
# start: 0
|
|
58
|
+
# increment_by: limit
|
|
59
|
+
|
|
60
|
+
# Strategy: cursor (alternative)
|
|
61
|
+
# cursor:
|
|
62
|
+
# param_name: cursor
|
|
63
|
+
# response_path: next_cursor
|
|
64
|
+
|
|
65
|
+
# Strategy: next_url (alternative) - next URL in response
|
|
66
|
+
# next_url:
|
|
67
|
+
# response_path: links.next
|
|
68
|
+
|
|
69
|
+
stop_conditions:
|
|
70
|
+
- type: empty_response
|
|
71
|
+
- type: fewer_records
|
|
72
|
+
- type: max_pages
|
|
73
|
+
value: 1000
|
|
74
|
+
# - type: max_records
|
|
75
|
+
# value: 50000
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# Template: HTTP extraction with path parameters in URL
|
|
2
|
+
# Use {param_name} in api_endpoint; values come from params or input_params.
|
|
3
|
+
# Copy to your contract dir as extract.yaml and customize.
|
|
4
|
+
|
|
5
|
+
title: http_path_params_extraction
|
|
6
|
+
description: HTTP API with path parameter substitution
|
|
7
|
+
version: 1.0.0
|
|
8
|
+
|
|
9
|
+
source_type: http
|
|
10
|
+
base_url: https://api.example.com
|
|
11
|
+
api_endpoint: /v1/symbols/{symbol}/prices/{start_date}/{end_date}
|
|
12
|
+
method: GET
|
|
13
|
+
|
|
14
|
+
params:
|
|
15
|
+
api_key: ${API_KEY}
|
|
16
|
+
symbol: AAPL # Or from input_params at runtime
|
|
17
|
+
start_date: 2024-01-01
|
|
18
|
+
end_date: 2024-12-31
|
|
19
|
+
|
|
20
|
+
input_params:
|
|
21
|
+
symbol:
|
|
22
|
+
type: string
|
|
23
|
+
required: true
|
|
24
|
+
description: Stock or symbol code
|
|
25
|
+
start_date:
|
|
26
|
+
type: string
|
|
27
|
+
required: false
|
|
28
|
+
end_date:
|
|
29
|
+
type: string
|
|
30
|
+
required: false
|
|
31
|
+
|
|
32
|
+
headers:
|
|
33
|
+
Accept: application/json
|
|
34
|
+
|
|
35
|
+
batch_size: 1000
|
|
36
|
+
response_format: json
|
|
37
|
+
response_path: null
|
|
38
|
+
|
|
39
|
+
retry:
|
|
40
|
+
max_attempts: 3
|
|
41
|
+
backoff_factor: 2
|
|
42
|
+
retry_on_status: [429, 500, 502, 503, 504]
|
|
43
|
+
|
|
44
|
+
pagination:
|
|
45
|
+
enabled: false
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
# Template: HTTP/API extraction (single request, no pagination)
|
|
2
|
+
# Use with source_type: http (or omit; http is default when base_url/api_endpoint present)
|
|
3
|
+
# Copy to your contract dir as extract.yaml and customize.
|
|
4
|
+
|
|
5
|
+
title: http_api_extraction
|
|
6
|
+
description: Extract data from HTTP API - single request
|
|
7
|
+
version: 1.0.0
|
|
8
|
+
|
|
9
|
+
# Optional: set explicitly. Auto-detected from base_url/api_endpoint if omitted.
|
|
10
|
+
# source_type: http
|
|
11
|
+
|
|
12
|
+
base_url: https://api.example.com
|
|
13
|
+
api_endpoint: /v1/records
|
|
14
|
+
method: GET
|
|
15
|
+
|
|
16
|
+
params:
|
|
17
|
+
api_key: ${API_KEY:?API_KEY is required}
|
|
18
|
+
# Add query params as needed
|
|
19
|
+
|
|
20
|
+
# Input parameters (passed at runtime via orchestrator.run(**kwargs))
|
|
21
|
+
input_params:
|
|
22
|
+
id:
|
|
23
|
+
type: string
|
|
24
|
+
required: false
|
|
25
|
+
description: Optional filter by ID
|
|
26
|
+
|
|
27
|
+
headers:
|
|
28
|
+
Accept: application/json
|
|
29
|
+
User-Agent: PyCharter-ETL/1.0
|
|
30
|
+
|
|
31
|
+
batch_size: 1000
|
|
32
|
+
max_records: null
|
|
33
|
+
|
|
34
|
+
# Response handling
|
|
35
|
+
response_format: json
|
|
36
|
+
response_path: null # e.g. "data.items" to extract nested array
|
|
37
|
+
|
|
38
|
+
retry:
|
|
39
|
+
max_attempts: 3
|
|
40
|
+
backoff_factor: 2
|
|
41
|
+
retry_on_status: [429, 500, 502, 503, 504]
|
|
42
|
+
|
|
43
|
+
timeout:
|
|
44
|
+
connect: 10
|
|
45
|
+
read: 30
|
|
46
|
+
write: 10
|
|
47
|
+
pool: 10
|
|
48
|
+
|
|
49
|
+
rate_limit_delay: 0.2
|
|
50
|
+
|
|
51
|
+
pagination:
|
|
52
|
+
enabled: false
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# Template: Load with insert only (no conflict handling)
|
|
2
|
+
# Use for append-only or when duplicates are impossible.
|
|
3
|
+
# Copy to your contract dir as load.yaml and customize.
|
|
4
|
+
|
|
5
|
+
title: load_insert
|
|
6
|
+
description: Insert new rows only
|
|
7
|
+
version: 1.0.0
|
|
8
|
+
|
|
9
|
+
target_table: events
|
|
10
|
+
schema_name: public
|
|
11
|
+
write_method: insert
|
|
12
|
+
# primary_key not required for insert
|
|
13
|
+
|
|
14
|
+
batch_size: 1000
|
|
15
|
+
|
|
16
|
+
database:
|
|
17
|
+
url: ${TARGET_DATABASE_URL}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# Template: Load to PostgreSQL
|
|
2
|
+
# Copy to your contract dir as load.yaml and customize.
|
|
3
|
+
|
|
4
|
+
title: load_postgresql
|
|
5
|
+
description: Load into PostgreSQL
|
|
6
|
+
version: 1.0.0
|
|
7
|
+
|
|
8
|
+
target_table: my_table
|
|
9
|
+
schema_name: public
|
|
10
|
+
write_method: upsert
|
|
11
|
+
primary_key: id
|
|
12
|
+
batch_size: 1000
|
|
13
|
+
|
|
14
|
+
database:
|
|
15
|
+
type: postgresql
|
|
16
|
+
url: postgresql://user:pass@host:5432/dbname
|
|
17
|
+
# With async (if supported): postgresql+asyncpg://...
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# Template: Load to SQLite
|
|
2
|
+
# Copy to your contract dir as load.yaml and customize.
|
|
3
|
+
|
|
4
|
+
title: load_sqlite
|
|
5
|
+
description: Load into SQLite
|
|
6
|
+
version: 1.0.0
|
|
7
|
+
|
|
8
|
+
target_table: my_table
|
|
9
|
+
schema_name: main # SQLite default schema
|
|
10
|
+
write_method: upsert
|
|
11
|
+
primary_key: id
|
|
12
|
+
batch_size: 1000
|
|
13
|
+
|
|
14
|
+
database:
|
|
15
|
+
type: sqlite
|
|
16
|
+
url: sqlite:///./data.db
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# Template: Truncate table then load (full refresh)
|
|
2
|
+
# Destructive: removes all existing rows. Use for dim tables or full snapshots.
|
|
3
|
+
# Copy to your contract dir as load.yaml and customize.
|
|
4
|
+
|
|
5
|
+
title: load_truncate_and_load
|
|
6
|
+
description: Full table refresh - truncate then insert all
|
|
7
|
+
version: 1.0.0
|
|
8
|
+
|
|
9
|
+
target_table: dimension_customers
|
|
10
|
+
schema_name: public
|
|
11
|
+
write_method: truncate_and_load
|
|
12
|
+
primary_key: id # Optional for truncate; useful for schema/docs
|
|
13
|
+
|
|
14
|
+
batch_size: 1000
|
|
15
|
+
|
|
16
|
+
database:
|
|
17
|
+
type: postgresql
|
|
18
|
+
url: ${TARGET_DATABASE_URL}
|