etlplus 0.3.14__tar.gz → 0.11.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- etlplus-0.11.8/.coveragerc +21 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/.github/workflows/ci.yml +19 -2
- {etlplus-0.3.14 → etlplus-0.11.8}/.pre-commit-config.yaml +5 -1
- {etlplus-0.3.14 → etlplus-0.11.8}/DEMO.md +27 -25
- etlplus-0.11.8/MANIFEST.in +12 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/Makefile +1 -1
- {etlplus-0.3.14/etlplus.egg-info → etlplus-0.11.8}/PKG-INFO +155 -49
- {etlplus-0.3.14 → etlplus-0.11.8}/README.md +150 -48
- etlplus-0.11.8/docs/README.md +18 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/docs/pipeline-guide.md +33 -13
- {etlplus-0.3.14 → etlplus-0.11.8}/etlplus/__main__.py +1 -2
- {etlplus-0.3.14 → etlplus-0.11.8}/etlplus/api/README.md +24 -26
- etlplus-0.11.8/etlplus/cli/__init__.py +15 -0
- etlplus-0.11.8/etlplus/cli/commands.py +924 -0
- etlplus-0.11.8/etlplus/cli/constants.py +71 -0
- etlplus-0.11.8/etlplus/cli/handlers.py +656 -0
- etlplus-0.11.8/etlplus/cli/io.py +336 -0
- etlplus-0.11.8/etlplus/cli/main.py +214 -0
- etlplus-0.11.8/etlplus/cli/options.py +49 -0
- etlplus-0.11.8/etlplus/cli/state.py +336 -0
- etlplus-0.11.8/etlplus/cli/types.py +33 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/etlplus/config/pipeline.py +13 -2
- etlplus-0.11.8/etlplus/database/__init__.py +44 -0
- etlplus-0.11.8/etlplus/database/ddl.py +319 -0
- etlplus-0.11.8/etlplus/database/engine.py +151 -0
- etlplus-0.11.8/etlplus/database/orm.py +354 -0
- etlplus-0.11.8/etlplus/database/schema.py +274 -0
- etlplus-0.11.8/etlplus/database/types.py +33 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/etlplus/enums.py +3 -77
- {etlplus-0.3.14 → etlplus-0.11.8}/etlplus/extract.py +5 -7
- etlplus-0.11.8/etlplus/file/__init__.py +25 -0
- etlplus-0.11.8/etlplus/file/core.py +228 -0
- etlplus-0.11.8/etlplus/file/csv.py +82 -0
- etlplus-0.11.8/etlplus/file/enums.py +238 -0
- etlplus-0.11.8/etlplus/file/json.py +87 -0
- etlplus-0.11.8/etlplus/file/xml.py +165 -0
- etlplus-0.11.8/etlplus/file/yaml.py +125 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/etlplus/load.py +10 -13
- {etlplus-0.3.14 → etlplus-0.11.8}/etlplus/run.py +8 -13
- etlplus-0.11.8/etlplus/templates/__init__.py +5 -0
- etlplus-0.11.8/etlplus/templates/ddl.sql.j2 +128 -0
- etlplus-0.11.8/etlplus/templates/view.sql.j2 +69 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/etlplus/transform.py +12 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/etlplus/types.py +5 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/etlplus/utils.py +1 -32
- {etlplus-0.3.14 → etlplus-0.11.8/etlplus.egg-info}/PKG-INFO +155 -49
- {etlplus-0.3.14 → etlplus-0.11.8}/etlplus.egg-info/SOURCES.txt +50 -5
- {etlplus-0.3.14 → etlplus-0.11.8}/etlplus.egg-info/requires.txt +4 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/examples/README.md +8 -7
- etlplus-0.11.8/examples/configs/ddl_spec.yml +67 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/examples/quickstart_python.py +1 -1
- {etlplus-0.3.14 → etlplus-0.11.8}/pyproject.toml +5 -1
- {etlplus-0.3.14 → etlplus-0.11.8}/setup.py +9 -1
- etlplus-0.11.8/tests/conftest.py +210 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/tests/integration/conftest.py +105 -16
- etlplus-0.11.8/tests/integration/test_i_cli.py +299 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/tests/integration/test_i_examples_data_parity.py +7 -2
- etlplus-0.11.8/tests/integration/test_i_pagination_strategy.py +556 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/tests/integration/test_i_pipeline_smoke.py +46 -40
- {etlplus-0.3.14 → etlplus-0.11.8}/tests/integration/test_i_pipeline_yaml_load.py +6 -0
- etlplus-0.11.8/tests/integration/test_i_run.py +69 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/tests/integration/test_i_run_profile_pagination_defaults.py +11 -7
- {etlplus-0.3.14 → etlplus-0.11.8}/tests/integration/test_i_run_profile_rate_limit_defaults.py +6 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/tests/unit/api/conftest.py +42 -15
- {etlplus-0.3.14 → etlplus-0.11.8}/tests/unit/api/test_u_auth.py +114 -124
- {etlplus-0.3.14 → etlplus-0.11.8}/tests/unit/api/test_u_config.py +60 -16
- {etlplus-0.3.14 → etlplus-0.11.8}/tests/unit/api/test_u_endpoint_client.py +456 -276
- {etlplus-0.3.14 → etlplus-0.11.8}/tests/unit/api/test_u_pagination_client.py +6 -1
- {etlplus-0.3.14 → etlplus-0.11.8}/tests/unit/api/test_u_pagination_config.py +5 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/tests/unit/api/test_u_paginator.py +6 -1
- {etlplus-0.3.14 → etlplus-0.11.8}/tests/unit/api/test_u_rate_limit_config.py +5 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/tests/unit/api/test_u_rate_limiter.py +6 -1
- etlplus-0.11.8/tests/unit/api/test_u_request_manager.py +349 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/tests/unit/api/test_u_retry_manager.py +6 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/tests/unit/api/test_u_transport.py +53 -1
- etlplus-0.11.8/tests/unit/api/test_u_types.py +135 -0
- etlplus-0.11.8/tests/unit/cli/conftest.py +284 -0
- etlplus-0.11.8/tests/unit/cli/test_u_cli_handlers.py +884 -0
- etlplus-0.11.8/tests/unit/cli/test_u_cli_io.py +326 -0
- etlplus-0.11.8/tests/unit/cli/test_u_cli_main.py +216 -0
- etlplus-0.11.8/tests/unit/cli/test_u_cli_state.py +347 -0
- etlplus-0.11.8/tests/unit/config/test_u_config_utils.py +129 -0
- etlplus-0.11.8/tests/unit/config/test_u_connector.py +119 -0
- etlplus-0.11.8/tests/unit/config/test_u_jobs.py +131 -0
- etlplus-0.11.8/tests/unit/config/test_u_pipeline.py +315 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/tests/unit/conftest.py +30 -30
- etlplus-0.11.8/tests/unit/database/test_u_database_ddl.py +268 -0
- etlplus-0.11.8/tests/unit/database/test_u_database_engine.py +199 -0
- etlplus-0.11.8/tests/unit/database/test_u_database_orm.py +308 -0
- etlplus-0.11.8/tests/unit/database/test_u_database_schema.py +241 -0
- etlplus-0.11.8/tests/unit/file/test_u_file_core.py +261 -0
- etlplus-0.11.8/tests/unit/file/test_u_file_enums.py +90 -0
- etlplus-0.11.8/tests/unit/file/test_u_file_yaml.py +110 -0
- etlplus-0.11.8/tests/unit/test_u_enums.py +102 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/tests/unit/test_u_extract.py +213 -1
- {etlplus-0.3.14 → etlplus-0.11.8}/tests/unit/test_u_load.py +206 -5
- etlplus-0.11.8/tests/unit/test_u_main.py +58 -0
- etlplus-0.11.8/tests/unit/test_u_mixins.py +47 -0
- etlplus-0.11.8/tests/unit/test_u_run.py +602 -0
- etlplus-0.11.8/tests/unit/test_u_run_helpers.py +385 -0
- etlplus-0.11.8/tests/unit/test_u_transform.py +860 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/tests/unit/test_u_utils.py +84 -5
- {etlplus-0.3.14 → etlplus-0.11.8}/tests/unit/test_u_validate.py +42 -2
- etlplus-0.11.8/tests/unit/test_u_version.py +53 -0
- etlplus-0.3.14/etlplus/cli.py +0 -868
- etlplus-0.3.14/etlplus/file.py +0 -657
- etlplus-0.3.14/tests/conftest.py +0 -11
- etlplus-0.3.14/tests/integration/test_i_cli.py +0 -348
- etlplus-0.3.14/tests/integration/test_i_pagination_strategy.py +0 -452
- etlplus-0.3.14/tests/integration/test_i_run.py +0 -133
- etlplus-0.3.14/tests/unit/api/test_u_request_manager.py +0 -134
- etlplus-0.3.14/tests/unit/config/test_u_connector.py +0 -54
- etlplus-0.3.14/tests/unit/config/test_u_pipeline.py +0 -194
- etlplus-0.3.14/tests/unit/test_u_cli.py +0 -124
- etlplus-0.3.14/tests/unit/test_u_file.py +0 -100
- etlplus-0.3.14/tests/unit/test_u_transform.py +0 -483
- etlplus-0.3.14/tools/run_pipeline.py +0 -561
- {etlplus-0.3.14 → etlplus-0.11.8}/.editorconfig +0 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/.gitattributes +0 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/.github/actions/python-bootstrap/action.yml +0 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/.gitignore +0 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/.ruff.toml +0 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/CODE_OF_CONDUCT.md +0 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/CONTRIBUTING.md +0 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/LICENSE +0 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/REFERENCES.md +0 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/docs/snippets/installation_version.md +0 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/etlplus/__init__.py +0 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/etlplus/__version__.py +0 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/etlplus/api/__init__.py +0 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/etlplus/api/auth.py +0 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/etlplus/api/config.py +0 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/etlplus/api/endpoint_client.py +0 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/etlplus/api/errors.py +0 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/etlplus/api/pagination/__init__.py +0 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/etlplus/api/pagination/client.py +0 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/etlplus/api/pagination/config.py +0 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/etlplus/api/pagination/paginator.py +0 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/etlplus/api/rate_limiting/__init__.py +0 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/etlplus/api/rate_limiting/config.py +0 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/etlplus/api/rate_limiting/rate_limiter.py +0 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/etlplus/api/request_manager.py +0 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/etlplus/api/retry_manager.py +0 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/etlplus/api/transport.py +0 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/etlplus/api/types.py +0 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/etlplus/config/__init__.py +0 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/etlplus/config/connector.py +0 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/etlplus/config/jobs.py +0 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/etlplus/config/profile.py +0 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/etlplus/config/types.py +0 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/etlplus/config/utils.py +0 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/etlplus/mixins.py +0 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/etlplus/py.typed +0 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/etlplus/run_helpers.py +0 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/etlplus/validate.py +0 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/etlplus/validation/__init__.py +0 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/etlplus/validation/utils.py +0 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/etlplus.egg-info/dependency_links.txt +0 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/etlplus.egg-info/entry_points.txt +0 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/etlplus.egg-info/top_level.txt +0 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/examples/configs/pipeline.yml +0 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/examples/data/sample.csv +0 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/examples/data/sample.json +0 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/examples/data/sample.xml +0 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/examples/data/sample.xsd +0 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/examples/data/sample.yaml +0 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/pytest.ini +0 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/setup.cfg +0 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/tests/__init__.py +0 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/tests/unit/api/test_u_mocks.py +0 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/tests/unit/validation/test_u_validation_utils.py +0 -0
- {etlplus-0.3.14 → etlplus-0.11.8}/tools/update_demo_snippets.py +0 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# .coveragerc
|
|
2
|
+
# ETLPlus
|
|
3
|
+
#
|
|
4
|
+
# Copyright © 2025 Dagitali LLC. All rights reserved.
|
|
5
|
+
#
|
|
6
|
+
# An optional pytest-cov configuration file. Limits coverage measurement to the
|
|
7
|
+
# ETLPlus package and ignore test modules.
|
|
8
|
+
#
|
|
9
|
+
# See:
|
|
10
|
+
# 1. https://pytest-cov.readthedocs.io/en/latest/config.html
|
|
11
|
+
|
|
12
|
+
[run]
|
|
13
|
+
source = etlplus
|
|
14
|
+
branch = true
|
|
15
|
+
omit =
|
|
16
|
+
tests/*
|
|
17
|
+
*/tests/*
|
|
18
|
+
|
|
19
|
+
[report]
|
|
20
|
+
skip_covered = true
|
|
21
|
+
show_missing = true
|
|
@@ -74,9 +74,26 @@ jobs:
|
|
|
74
74
|
with:
|
|
75
75
|
python-version: ${{ matrix.python-version }}
|
|
76
76
|
python-bootstrap: "-e .[dev,yaml]"
|
|
77
|
-
- name: Run tests
|
|
77
|
+
- name: Run tests (with coverage)
|
|
78
78
|
run: |
|
|
79
|
-
pytest -q
|
|
79
|
+
pytest -q \
|
|
80
|
+
--cov \
|
|
81
|
+
--cov-branch \
|
|
82
|
+
--cov-config=.coveragerc \
|
|
83
|
+
--cov-report=term-missing \
|
|
84
|
+
--cov-report=xml \
|
|
85
|
+
tests/
|
|
86
|
+
|
|
87
|
+
- name: Upload coverage reports to Codecov
|
|
88
|
+
if: matrix.python-version == '3.13'
|
|
89
|
+
uses: codecov/codecov-action@671740ac38dd9b0130fbe1cec585b89eea48d3de # Pinned v5.5.2
|
|
90
|
+
with:
|
|
91
|
+
fail_ci_if_error: true
|
|
92
|
+
files: coverage.xml
|
|
93
|
+
flags: unit
|
|
94
|
+
name: etlplus
|
|
95
|
+
token: ${{ secrets.CODECOV_TOKEN }} # Omit for public repo
|
|
96
|
+
verbose: true
|
|
80
97
|
|
|
81
98
|
build:
|
|
82
99
|
name: Build distributions
|
|
@@ -159,7 +159,11 @@ repos:
|
|
|
159
159
|
rev: v1.19.0
|
|
160
160
|
hooks:
|
|
161
161
|
- id: mypy
|
|
162
|
-
args:
|
|
162
|
+
args:
|
|
163
|
+
- --cache-dir=.mypy_cache/pre-commit
|
|
164
|
+
- --ignore-missing-imports
|
|
165
|
+
- --install-types
|
|
166
|
+
- --non-interactive
|
|
163
167
|
|
|
164
168
|
- repo: https://github.com/pycqa/flake8
|
|
165
169
|
rev: 7.3.0
|
|
@@ -58,7 +58,7 @@ John Doe,30,New York
|
|
|
58
58
|
Jane Smith,25,Los Angeles
|
|
59
59
|
CSVDATA
|
|
60
60
|
|
|
61
|
-
$ etlplus extract
|
|
61
|
+
$ etlplus extract users.csv
|
|
62
62
|
[
|
|
63
63
|
{
|
|
64
64
|
"name": "John Doe",
|
|
@@ -89,14 +89,14 @@ $ etlplus validate '{"email": "user@example.com", "age": 25}' \
|
|
|
89
89
|
|
|
90
90
|
### Filter and Select
|
|
91
91
|
```bash
|
|
92
|
-
$ etlplus transform '
|
|
92
|
+
$ etlplus transform --operations '{
|
|
93
|
+
"filter": {"field": "age", "op": "gt", "value": 26},
|
|
94
|
+
"select": ["name", "age"]
|
|
95
|
+
}' '[
|
|
93
96
|
{"name": "John", "age": 30, "city": "NYC"},
|
|
94
97
|
{"name": "Jane", "age": 25, "city": "LA"},
|
|
95
98
|
{"name": "Bob", "age": 35, "city": "Chicago"}
|
|
96
|
-
]'
|
|
97
|
-
"filter": {"field": "age", "op": "gt", "value": 26},
|
|
98
|
-
"select": ["name", "age"]
|
|
99
|
-
}'
|
|
99
|
+
]'
|
|
100
100
|
[
|
|
101
101
|
{
|
|
102
102
|
"name": "John",
|
|
@@ -111,24 +111,19 @@ $ etlplus transform '[
|
|
|
111
111
|
|
|
112
112
|
### Sort Data
|
|
113
113
|
```bash
|
|
114
|
-
$ etlplus transform
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
{"name": "Bob", "score": 90}
|
|
118
|
-
]' --operations '{
|
|
119
|
-
"sort": {"field": "score", "reverse": true}
|
|
120
|
-
}'
|
|
114
|
+
$ etlplus transform -\
|
|
115
|
+
-operations '{"sort": {"field": "score", "reverse": true}}' \
|
|
116
|
+
'[{"name": "Charlie", "score": 85}, {"name": "Alice", "score": 95}, {"name": "Bob", "score": 90}]'
|
|
121
117
|
```
|
|
122
118
|
|
|
123
119
|
### Aggregate Data
|
|
124
120
|
```bash
|
|
125
|
-
$ etlplus transform '
|
|
121
|
+
$ etlplus transform --operations '{"aggregate": {"field": "sales", "func": "sum"}}' \
|
|
122
|
+
'[
|
|
126
123
|
{"product": "A", "sales": 100},
|
|
127
124
|
{"product": "B", "sales": 150},
|
|
128
125
|
{"product": "C", "sales": 200}
|
|
129
|
-
]'
|
|
130
|
-
"aggregate": {"field": "sales", "func": "sum"}
|
|
131
|
-
}'
|
|
126
|
+
]'
|
|
132
127
|
{
|
|
133
128
|
"sum_sales": 450
|
|
134
129
|
}
|
|
@@ -138,7 +133,9 @@ $ etlplus transform '[
|
|
|
138
133
|
|
|
139
134
|
### Load to JSON File
|
|
140
135
|
```bash
|
|
141
|
-
$ etlplus load
|
|
136
|
+
$ etlplus load \
|
|
137
|
+
'{"name": "John", "status": "active"}' \
|
|
138
|
+
output.json --target-type file
|
|
142
139
|
{
|
|
143
140
|
"status": "success",
|
|
144
141
|
"message": "Data loaded to output.json",
|
|
@@ -148,10 +145,12 @@ $ etlplus load '{"name": "John", "status": "active"}' file output.json
|
|
|
148
145
|
|
|
149
146
|
### Load to CSV File
|
|
150
147
|
```bash
|
|
151
|
-
$ etlplus load
|
|
148
|
+
$ etlplus load \
|
|
149
|
+
'[
|
|
152
150
|
{"name": "John", "email": "john@example.com"},
|
|
153
151
|
{"name": "Jane", "email": "jane@example.com"}
|
|
154
|
-
]'
|
|
152
|
+
]' \
|
|
153
|
+
users.csv --target-type file
|
|
155
154
|
{
|
|
156
155
|
"status": "success",
|
|
157
156
|
"message": "Data loaded to users.csv",
|
|
@@ -170,22 +169,25 @@ This example shows a complete ETL workflow:
|
|
|
170
169
|
|
|
171
170
|
```bash
|
|
172
171
|
# Step 1: Extract
|
|
173
|
-
$ etlplus extract
|
|
172
|
+
$ etlplus extract raw_data.csv > extracted.json
|
|
174
173
|
|
|
175
174
|
# Step 2: Transform
|
|
176
|
-
$ etlplus transform
|
|
175
|
+
$ etlplus transform \
|
|
177
176
|
--operations '{
|
|
178
177
|
"filter": {"field": "age", "op": "gte", "value": 18},
|
|
179
178
|
"select": ["name", "email", "age"]
|
|
180
|
-
}'
|
|
179
|
+
}' \
|
|
180
|
+
extracted.json \
|
|
181
|
+
transformed.json
|
|
181
182
|
|
|
182
183
|
# Step 3: Validate
|
|
183
|
-
$ etlplus validate
|
|
184
|
+
$ etlplus validate \
|
|
184
185
|
--rules '{
|
|
185
186
|
"name": {"type": "string", "required": true},
|
|
186
187
|
"email": {"type": "string", "required": true, "pattern": "^[\\w.-]+@[\\w.-]+\\.\\w+$"},
|
|
187
188
|
"age": {"type": "number", "min": 18, "max": 120}
|
|
188
|
-
}'
|
|
189
|
+
}' \
|
|
190
|
+
transformed.json
|
|
189
191
|
|
|
190
192
|
# Step 4: Load
|
|
191
193
|
$ etlplus load transformed.json file final_output.csv
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
# MANIFEST.in
|
|
2
|
+
# ETLPlus
|
|
3
|
+
#
|
|
4
|
+
# Copyright © 2026 Dagitali LLC. All rights reserved.
|
|
5
|
+
#
|
|
6
|
+
# Contains commands that allow lists of files to be discovered and manipulated.
|
|
7
|
+
#
|
|
8
|
+
# See:
|
|
9
|
+
# 1. https://setuptools.pypa.io/en/latest/userguide/miscellaneous.html
|
|
10
|
+
|
|
11
|
+
# Include Jinja template files in the etlplus package
|
|
12
|
+
recursive-include etlplus/templates *.j2
|
|
@@ -253,7 +253,7 @@ venv: ## Create the virtual environment (at $(VENV_DIR))
|
|
|
253
253
|
else \
|
|
254
254
|
$(call ECHO_INFO, "Using existing venv: $(VENV_DIR)"); \
|
|
255
255
|
fi
|
|
256
|
-
@$(PYTHON) -m pip install --upgrade pip
|
|
256
|
+
@$(PYTHON) -m pip install --upgrade pip setuptools wheel >/dev/null
|
|
257
257
|
@$(call ECHO_OK,"venv ready")
|
|
258
258
|
|
|
259
259
|
##@ CI
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: etlplus
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.11.8
|
|
4
4
|
Summary: A Swiss Army knife for simple ETL operations
|
|
5
5
|
Home-page: https://github.com/Dagitali/ETLPlus
|
|
6
6
|
Author: ETLPlus Team
|
|
@@ -21,7 +21,11 @@ Requires-Dist: jinja2>=3.1.6
|
|
|
21
21
|
Requires-Dist: pyodbc>=5.3.0
|
|
22
22
|
Requires-Dist: python-dotenv>=1.2.1
|
|
23
23
|
Requires-Dist: pandas>=2.3.3
|
|
24
|
+
Requires-Dist: pydantic>=2.12.5
|
|
25
|
+
Requires-Dist: PyYAML>=6.0.3
|
|
24
26
|
Requires-Dist: requests>=2.32.5
|
|
27
|
+
Requires-Dist: SQLAlchemy>=2.0.45
|
|
28
|
+
Requires-Dist: typer>=0.21.0
|
|
25
29
|
Provides-Extra: dev
|
|
26
30
|
Requires-Dist: black>=25.9.0; extra == "dev"
|
|
27
31
|
Requires-Dist: build>=1.2.2; extra == "dev"
|
|
@@ -60,13 +64,16 @@ package and command-line interface for data extraction, validation, transformati
|
|
|
60
64
|
- [Quickstart](#quickstart)
|
|
61
65
|
- [Usage](#usage)
|
|
62
66
|
- [Command Line Interface](#command-line-interface)
|
|
67
|
+
- [Argument Order and Required Options](#argument-order-and-required-options)
|
|
68
|
+
- [Check Pipelines](#check-pipelines)
|
|
69
|
+
- [Render SQL DDL](#render-sql-ddl)
|
|
63
70
|
- [Extract Data](#extract-data)
|
|
64
71
|
- [Validate Data](#validate-data)
|
|
65
72
|
- [Transform Data](#transform-data)
|
|
66
73
|
- [Load Data](#load-data)
|
|
67
74
|
- [Python API](#python-api)
|
|
68
75
|
- [Complete ETL Pipeline Example](#complete-etl-pipeline-example)
|
|
69
|
-
- [
|
|
76
|
+
- [Format Overrides](#format-overrides)
|
|
70
77
|
- [Transformation Operations](#transformation-operations)
|
|
71
78
|
- [Filter Operations](#filter-operations)
|
|
72
79
|
- [Aggregation Functions](#aggregation-functions)
|
|
@@ -78,6 +85,8 @@ package and command-line interface for data extraction, validation, transformati
|
|
|
78
85
|
- [Test Layers](#test-layers)
|
|
79
86
|
- [Code Coverage](#code-coverage)
|
|
80
87
|
- [Linting](#linting)
|
|
88
|
+
- [Updating Demo Snippets](#updating-demo-snippets)
|
|
89
|
+
- [Releasing to PyPI](#releasing-to-pypi)
|
|
81
90
|
- [Links](#links)
|
|
82
91
|
- [License](#license)
|
|
83
92
|
- [Contributing](#contributing)
|
|
@@ -85,6 +94,14 @@ package and command-line interface for data extraction, validation, transformati
|
|
|
85
94
|
|
|
86
95
|
## Features
|
|
87
96
|
|
|
97
|
+
- **Check** data pipeline definitions before running them:
|
|
98
|
+
- Summarize jobs, sources, targets, and transforms
|
|
99
|
+
- Confirm configuration changes by printing focused sections on demand
|
|
100
|
+
|
|
101
|
+
- **Render** SQL DDL from shared table specs:
|
|
102
|
+
- Generate CREATE TABLE or view statements
|
|
103
|
+
- Swap templates or direct output to files for database migrations
|
|
104
|
+
|
|
88
105
|
- **Extract** data from multiple sources:
|
|
89
106
|
- Files (CSV, JSON, XML, YAML)
|
|
90
107
|
- Databases (connection string support)
|
|
@@ -135,8 +152,8 @@ etlplus --version
|
|
|
135
152
|
|
|
136
153
|
# One-liner: extract CSV, filter, select, and write JSON
|
|
137
154
|
etlplus extract file examples/data/sample.csv \
|
|
138
|
-
| etlplus transform
|
|
139
|
-
-
|
|
155
|
+
| etlplus transform --operations '{"filter": {"field": "age", "op": "gt", "value": 25}, "select": ["name", "email"]}' \
|
|
156
|
+
- temp/sample_output.json
|
|
140
157
|
```
|
|
141
158
|
|
|
142
159
|
[Python API](#python-api):
|
|
@@ -166,11 +183,73 @@ etlplus --help
|
|
|
166
183
|
etlplus --version
|
|
167
184
|
```
|
|
168
185
|
|
|
186
|
+
The CLI is implemented with Typer (Click-based). There is no argparse compatibility layer, so rely
|
|
187
|
+
on the documented commands/flags and run `etlplus <command> --help` for current options.
|
|
188
|
+
|
|
189
|
+
**Example error messages:**
|
|
190
|
+
|
|
191
|
+
- If you omit a required argument: `Error: Missing required argument 'SOURCE'.`
|
|
192
|
+
- If you place an option before its argument: `Error: Option '--source-format' must follow the 'SOURCE' argument.`
|
|
193
|
+
|
|
194
|
+
#### Argument Order and Required Options
|
|
195
|
+
|
|
196
|
+
For each command, positional arguments must precede options. Required options must follow their
|
|
197
|
+
associated argument:
|
|
198
|
+
|
|
199
|
+
- **extract**: `etlplus extract SOURCE [--source-format ...] [--source-type ...]`
|
|
200
|
+
- `SOURCE` is required. `--source-format` and `--source-type` must follow `SOURCE`.
|
|
201
|
+
- **transform**: `etlplus transform [--operations ...] SOURCE [--source-format ...] [--source-type ...] TARGET [--target-format ...] [--target-type ...]`
|
|
202
|
+
- `SOURCE` and `TARGET` are required. Format/type options must follow their respective argument.
|
|
203
|
+
- **load**: `etlplus load TARGET [--target-format ...] [--target-type ...] [--source-format ...]`
|
|
204
|
+
- `TARGET` is required. `--target-format` and `--target-type` must follow `TARGET`.
|
|
205
|
+
- **validate**: `etlplus validate SOURCE [--rules ...] [--source-format ...] [--source-type ...]`
|
|
206
|
+
- `SOURCE` is required. `--rules` and format/type options must follow `SOURCE`.
|
|
207
|
+
|
|
208
|
+
If required arguments or options are missing, or if options are placed before their associated argument, the CLI will display a clear error message.
|
|
209
|
+
|
|
210
|
+
#### Check Pipelines
|
|
211
|
+
|
|
212
|
+
Use `etlplus check` to explore pipeline YAML definitions without running them. The command can print
|
|
213
|
+
job names, summarize configured sources and targets, or drill into specific sections.
|
|
214
|
+
|
|
215
|
+
List jobs and show a pipeline summary:
|
|
216
|
+
```bash
|
|
217
|
+
etlplus check --config examples/configs/pipeline.yml --jobs
|
|
218
|
+
etlplus check --config examples/configs/pipeline.yml --summary
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
Show sources or transforms for troubleshooting:
|
|
222
|
+
```bash
|
|
223
|
+
etlplus check --config examples/configs/pipeline.yml --sources
|
|
224
|
+
etlplus check --config examples/configs/pipeline.yml --transforms
|
|
225
|
+
```
|
|
226
|
+
|
|
227
|
+
#### Render SQL DDL
|
|
228
|
+
|
|
229
|
+
Use `etlplus render` to turn table schema specs into ready-to-run SQL. Render from a pipeline config
|
|
230
|
+
or from a standalone schema file, and choose the built-in `ddl` or `view` templates (or provide your
|
|
231
|
+
own).
|
|
232
|
+
|
|
233
|
+
Render all tables defined in a pipeline:
|
|
234
|
+
```bash
|
|
235
|
+
etlplus render --config examples/configs/pipeline.yml --template ddl
|
|
236
|
+
```
|
|
237
|
+
|
|
238
|
+
Render a single table in that pipeline:
|
|
239
|
+
```bash
|
|
240
|
+
etlplus render --config examples/configs/pipeline.yml --table customers --template view
|
|
241
|
+
```
|
|
242
|
+
|
|
243
|
+
Render from a standalone table spec to a file:
|
|
244
|
+
```bash
|
|
245
|
+
etlplus render --spec schemas/customer.yml --template view -o temp/customer_view.sql
|
|
246
|
+
```
|
|
247
|
+
|
|
169
248
|
#### Extract Data
|
|
170
249
|
|
|
171
|
-
Note: For file sources, the format is inferred from the filename extension
|
|
172
|
-
|
|
173
|
-
|
|
250
|
+
Note: For file sources, the format is normally inferred from the filename extension. Use
|
|
251
|
+
`--source-format` to override inference when a file lacks an extension or when you want to force a
|
|
252
|
+
specific parser.
|
|
174
253
|
|
|
175
254
|
Extract from JSON file:
|
|
176
255
|
```bash
|
|
@@ -194,7 +273,7 @@ etlplus extract api https://api.example.com/data
|
|
|
194
273
|
|
|
195
274
|
Save extracted data to file:
|
|
196
275
|
```bash
|
|
197
|
-
etlplus extract file examples/data/sample.csv
|
|
276
|
+
etlplus extract file examples/data/sample.csv > temp/sample_output.json
|
|
198
277
|
```
|
|
199
278
|
|
|
200
279
|
#### Validate Data
|
|
@@ -211,42 +290,69 @@ etlplus validate examples/data/sample.json --rules '{"email": {"type": "string",
|
|
|
211
290
|
|
|
212
291
|
#### Transform Data
|
|
213
292
|
|
|
293
|
+
When piping data through `etlplus transform`, use `--source-format` whenever the SOURCE argument is
|
|
294
|
+
`-` or a literal payload, mirroring the `etlplus extract` semantics. Use `--target-format` to
|
|
295
|
+
control the emitted format for STDOUT or other non-file outputs, just like `etlplus load`. File
|
|
296
|
+
paths continue to infer formats from their extensions. Use `--source-type` to override the inferred
|
|
297
|
+
source connector type and `--target-type` to override the inferred target connector type, matching
|
|
298
|
+
the `etlplus extract`/`etlplus load` behavior.
|
|
299
|
+
|
|
300
|
+
Transform file inputs while overriding connector types:
|
|
301
|
+
```bash
|
|
302
|
+
etlplus transform \
|
|
303
|
+
--operations '{"select": ["name", "email"]}' \
|
|
304
|
+
examples/data/sample.json --source-type file \
|
|
305
|
+
temp/selected_output.json --target-type file
|
|
306
|
+
```
|
|
307
|
+
|
|
214
308
|
Filter and select fields:
|
|
215
309
|
```bash
|
|
216
|
-
etlplus transform
|
|
217
|
-
--operations '{"filter": {"field": "age", "op": "gt", "value": 26}, "select": ["name"]}'
|
|
310
|
+
etlplus transform \
|
|
311
|
+
--operations '{"filter": {"field": "age", "op": "gt", "value": 26}, "select": ["name"]}' \
|
|
312
|
+
'[{"name": "John", "age": 30}, {"name": "Jane", "age": 25}]'
|
|
218
313
|
```
|
|
219
314
|
|
|
220
315
|
Sort data:
|
|
221
316
|
```bash
|
|
222
|
-
etlplus transform
|
|
317
|
+
etlplus transform \
|
|
318
|
+
--operations '{"sort": {"field": "age", "reverse": true}}' \
|
|
319
|
+
examples/data/sample.json
|
|
223
320
|
```
|
|
224
321
|
|
|
225
322
|
Aggregate data:
|
|
226
323
|
```bash
|
|
227
|
-
etlplus transform
|
|
324
|
+
etlplus transform \
|
|
325
|
+
--operations '{"aggregate": {"field": "age", "func": "sum"}}' \
|
|
326
|
+
examples/data/sample.json
|
|
228
327
|
```
|
|
229
328
|
|
|
230
329
|
Map/rename fields:
|
|
231
330
|
```bash
|
|
232
|
-
etlplus transform
|
|
331
|
+
etlplus transform \
|
|
332
|
+
--operations '{"map": {"name": "new_name"}}' \
|
|
333
|
+
examples/data/sample.json
|
|
233
334
|
```
|
|
234
335
|
|
|
235
336
|
#### Load Data
|
|
236
337
|
|
|
338
|
+
`etlplus load` consumes JSON from STDIN; provide only the target argument plus optional flags.
|
|
339
|
+
|
|
237
340
|
Load to JSON file:
|
|
238
341
|
```bash
|
|
239
|
-
etlplus
|
|
342
|
+
etlplus extract file examples/data/sample.json \
|
|
343
|
+
| etlplus load temp/sample_output.json --target-type file
|
|
240
344
|
```
|
|
241
345
|
|
|
242
346
|
Load to CSV file:
|
|
243
347
|
```bash
|
|
244
|
-
etlplus
|
|
348
|
+
etlplus extract file examples/data/sample.csv \
|
|
349
|
+
| etlplus load temp/sample_output.csv --target-type file
|
|
245
350
|
```
|
|
246
351
|
|
|
247
352
|
Load to REST API:
|
|
248
353
|
```bash
|
|
249
|
-
|
|
354
|
+
cat examples/data/sample.json \
|
|
355
|
+
| etlplus load https://api.example.com/endpoint --target-type api
|
|
250
356
|
```
|
|
251
357
|
|
|
252
358
|
### Python API
|
|
@@ -284,57 +390,57 @@ For YAML-driven pipelines executed end-to-end (extract → validate → transfor
|
|
|
284
390
|
- Authoring: [`docs/pipeline-guide.md`](docs/pipeline-guide.md)
|
|
285
391
|
- Runner API and internals: [`docs/run-module.md`](docs/run-module.md)
|
|
286
392
|
|
|
393
|
+
CLI quick reference for pipelines:
|
|
394
|
+
|
|
395
|
+
```bash
|
|
396
|
+
# List jobs or show a pipeline summary
|
|
397
|
+
etlplus check --config examples/configs/pipeline.yml --jobs
|
|
398
|
+
etlplus check --config examples/configs/pipeline.yml --summary
|
|
399
|
+
|
|
400
|
+
# Run a job
|
|
401
|
+
etlplus run --config examples/configs/pipeline.yml --job file_to_file_customers
|
|
402
|
+
```
|
|
403
|
+
|
|
287
404
|
### Complete ETL Pipeline Example
|
|
288
405
|
|
|
289
406
|
```bash
|
|
290
407
|
# 1. Extract from CSV
|
|
291
|
-
etlplus extract file examples/data/sample.csv
|
|
408
|
+
etlplus extract file examples/data/sample.csv > temp/sample_extracted.json
|
|
292
409
|
|
|
293
410
|
# 2. Transform (filter and select fields)
|
|
294
|
-
etlplus transform
|
|
411
|
+
etlplus transform \
|
|
295
412
|
--operations '{"filter": {"field": "age", "op": "gt", "value": 25}, "select": ["name", "email"]}' \
|
|
296
|
-
|
|
413
|
+
temp/sample_extracted.json \
|
|
414
|
+
temp/sample_transformed.json
|
|
297
415
|
|
|
298
416
|
# 3. Validate transformed data
|
|
299
|
-
etlplus validate
|
|
300
|
-
--rules '{"name": {"type": "string", "required": true}, "email": {"type": "string", "required": true}}'
|
|
417
|
+
etlplus validate \
|
|
418
|
+
--rules '{"name": {"type": "string", "required": true}, "email": {"type": "string", "required": true}}' \
|
|
419
|
+
temo/sample_transformed.json
|
|
301
420
|
|
|
302
421
|
# 4. Load to CSV
|
|
303
|
-
|
|
422
|
+
cat temp/sample_transformed.json \
|
|
423
|
+
| etlplus load temp/sample_output.csv
|
|
304
424
|
```
|
|
305
425
|
|
|
306
|
-
###
|
|
307
|
-
|
|
308
|
-
ETLPlus honors a small number of environment toggles to refine CLI behavior:
|
|
426
|
+
### Format Overrides
|
|
309
427
|
|
|
310
|
-
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
- `error|fail|strict`: treat as error (non-zero exit)
|
|
314
|
-
- `warn` (default): print a warning to stderr
|
|
315
|
-
- `ignore|silent`: no message
|
|
316
|
-
- Precedence: the CLI flag `--strict-format` overrides the environment.
|
|
428
|
+
`--source-format` and `--target-format` override whichever format would normally be inferred from a
|
|
429
|
+
file extension. This is useful when an input lacks an extension (for example, `records.txt` that
|
|
430
|
+
actually contains CSV) or when you intentionally want to treat a file as another format.
|
|
317
431
|
|
|
318
432
|
Examples (zsh):
|
|
319
433
|
|
|
320
434
|
```zsh
|
|
321
|
-
#
|
|
322
|
-
etlplus extract
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
# Equivalent strict behavior via flag (overrides environment)
|
|
332
|
-
etlplus extract file data.csv --format csv --strict-format
|
|
333
|
-
etlplus load data.json file out.csv --format csv --strict-format
|
|
334
|
-
|
|
335
|
-
# Recommended: rely on extension, no --format needed for files
|
|
336
|
-
etlplus extract file data.csv
|
|
337
|
-
etlplus load data.json file out.csv
|
|
435
|
+
# Force CSV parsing for an extension-less file
|
|
436
|
+
etlplus extract data.txt --source-type file --source-format csv
|
|
437
|
+
|
|
438
|
+
# Write CSV to a file without the .csv suffix
|
|
439
|
+
etlplus load output.bin --target-type file --target-format csv < data.json
|
|
440
|
+
|
|
441
|
+
# Leave the flags off when extensions already match the desired format
|
|
442
|
+
etlplus extract data.csv --source-type file
|
|
443
|
+
etlplus load data.json --target-type file < data.json
|
|
338
444
|
```
|
|
339
445
|
|
|
340
446
|
## Transformation Operations
|