etlplus 0.4.1__tar.gz → 0.5.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {etlplus-0.4.1 → etlplus-0.5.5}/.pre-commit-config.yaml +5 -1
- {etlplus-0.4.1 → etlplus-0.5.5}/DEMO.md +5 -5
- etlplus-0.5.5/MANIFEST.in +12 -0
- {etlplus-0.4.1/etlplus.egg-info → etlplus-0.5.5}/PKG-INFO +105 -36
- {etlplus-0.4.1 → etlplus-0.5.5}/README.md +104 -35
- {etlplus-0.4.1 → etlplus-0.5.5}/docs/pipeline-guide.md +14 -13
- etlplus-0.5.5/etlplus/cli/app.py +1367 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/cli/handlers.py +340 -252
- etlplus-0.5.5/etlplus/cli/main.py +616 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/config/pipeline.py +11 -0
- etlplus-0.5.5/etlplus/ddl.py +197 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/run.py +2 -4
- etlplus-0.5.5/etlplus/templates/__init__.py +5 -0
- etlplus-0.5.5/etlplus/templates/ddl.sql.j2 +128 -0
- etlplus-0.5.5/etlplus/templates/view.sql.j2 +69 -0
- {etlplus-0.4.1 → etlplus-0.5.5/etlplus.egg-info}/PKG-INFO +105 -36
- {etlplus-0.4.1 → etlplus-0.5.5}/etlplus.egg-info/SOURCES.txt +10 -1
- {etlplus-0.4.1 → etlplus-0.5.5}/examples/README.md +4 -4
- etlplus-0.5.5/examples/configs/ddl_spec.yml +67 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/pyproject.toml +1 -1
- {etlplus-0.4.1 → etlplus-0.5.5}/setup.py +4 -0
- etlplus-0.5.5/tests/integration/test_i_cli.py +172 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/tests/integration/test_i_pagination_strategy.py +31 -31
- {etlplus-0.4.1 → etlplus-0.5.5}/tests/integration/test_i_pipeline_smoke.py +5 -4
- {etlplus-0.4.1 → etlplus-0.5.5}/tests/unit/api/test_u_pagination_client.py +1 -1
- {etlplus-0.4.1 → etlplus-0.5.5}/tests/unit/api/test_u_paginator.py +1 -1
- etlplus-0.5.5/tests/unit/cli/conftest.py +29 -0
- etlplus-0.5.5/tests/unit/cli/test_u_cli_app.py +582 -0
- etlplus-0.5.5/tests/unit/cli/test_u_cli_handlers.py +947 -0
- etlplus-0.5.5/tests/unit/cli/test_u_cli_main.py +293 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/tests/unit/config/test_u_connector.py +1 -1
- {etlplus-0.4.1 → etlplus-0.5.5}/tests/unit/config/test_u_pipeline.py +31 -1
- {etlplus-0.4.1 → etlplus-0.5.5}/tests/unit/test_u_extract.py +1 -1
- {etlplus-0.4.1 → etlplus-0.5.5}/tests/unit/test_u_load.py +1 -1
- etlplus-0.4.1/etlplus/cli/app.py +0 -1000
- etlplus-0.4.1/etlplus/cli/main.py +0 -404
- etlplus-0.4.1/tests/integration/test_i_cli.py +0 -244
- etlplus-0.4.1/tests/unit/test_u_cli.py +0 -576
- {etlplus-0.4.1 → etlplus-0.5.5}/.coveragerc +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/.editorconfig +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/.gitattributes +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/.github/actions/python-bootstrap/action.yml +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/.github/workflows/ci.yml +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/.gitignore +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/.ruff.toml +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/CODE_OF_CONDUCT.md +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/CONTRIBUTING.md +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/LICENSE +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/Makefile +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/REFERENCES.md +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/docs/snippets/installation_version.md +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/__init__.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/__main__.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/__version__.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/api/README.md +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/api/__init__.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/api/auth.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/api/config.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/api/endpoint_client.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/api/errors.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/api/pagination/__init__.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/api/pagination/client.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/api/pagination/config.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/api/pagination/paginator.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/api/rate_limiting/__init__.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/api/rate_limiting/config.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/api/rate_limiting/rate_limiter.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/api/request_manager.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/api/retry_manager.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/api/transport.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/api/types.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/cli/__init__.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/config/__init__.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/config/connector.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/config/jobs.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/config/profile.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/config/types.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/config/utils.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/enums.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/extract.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/file.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/load.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/mixins.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/py.typed +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/run_helpers.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/transform.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/types.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/utils.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/validate.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/validation/__init__.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/validation/utils.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/etlplus.egg-info/dependency_links.txt +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/etlplus.egg-info/entry_points.txt +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/etlplus.egg-info/requires.txt +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/etlplus.egg-info/top_level.txt +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/examples/configs/pipeline.yml +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/examples/data/sample.csv +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/examples/data/sample.json +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/examples/data/sample.xml +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/examples/data/sample.xsd +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/examples/data/sample.yaml +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/examples/quickstart_python.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/pytest.ini +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/setup.cfg +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/tests/__init__.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/tests/conftest.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/tests/integration/conftest.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/tests/integration/test_i_examples_data_parity.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/tests/integration/test_i_pipeline_yaml_load.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/tests/integration/test_i_run.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/tests/integration/test_i_run_profile_pagination_defaults.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/tests/integration/test_i_run_profile_rate_limit_defaults.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/tests/unit/api/conftest.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/tests/unit/api/test_u_auth.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/tests/unit/api/test_u_config.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/tests/unit/api/test_u_endpoint_client.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/tests/unit/api/test_u_mocks.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/tests/unit/api/test_u_pagination_config.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/tests/unit/api/test_u_rate_limit_config.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/tests/unit/api/test_u_rate_limiter.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/tests/unit/api/test_u_request_manager.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/tests/unit/api/test_u_retry_manager.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/tests/unit/api/test_u_transport.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/tests/unit/api/test_u_types.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/tests/unit/config/test_u_config_utils.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/tests/unit/config/test_u_jobs.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/tests/unit/conftest.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/tests/unit/test_u_enums.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/tests/unit/test_u_file.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/tests/unit/test_u_main.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/tests/unit/test_u_mixins.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/tests/unit/test_u_run.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/tests/unit/test_u_run_helpers.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/tests/unit/test_u_transform.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/tests/unit/test_u_utils.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/tests/unit/test_u_validate.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/tests/unit/test_u_version.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/tests/unit/validation/test_u_validation_utils.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/tools/run_pipeline.py +0 -0
- {etlplus-0.4.1 → etlplus-0.5.5}/tools/update_demo_snippets.py +0 -0
|
@@ -159,7 +159,11 @@ repos:
|
|
|
159
159
|
rev: v1.19.0
|
|
160
160
|
hooks:
|
|
161
161
|
- id: mypy
|
|
162
|
-
args:
|
|
162
|
+
args:
|
|
163
|
+
- --cache-dir=.mypy_cache/pre-commit
|
|
164
|
+
- --ignore-missing-imports
|
|
165
|
+
- --install-types
|
|
166
|
+
- --non-interactive
|
|
163
167
|
|
|
164
168
|
- repo: https://github.com/pycqa/flake8
|
|
165
169
|
rev: 7.3.0
|
|
@@ -58,7 +58,7 @@ John Doe,30,New York
|
|
|
58
58
|
Jane Smith,25,Los Angeles
|
|
59
59
|
CSVDATA
|
|
60
60
|
|
|
61
|
-
$ etlplus extract
|
|
61
|
+
$ etlplus extract users.csv
|
|
62
62
|
[
|
|
63
63
|
{
|
|
64
64
|
"name": "John Doe",
|
|
@@ -151,7 +151,7 @@ $ etlplus load '{"name": "John", "status": "active"}' file output.json
|
|
|
151
151
|
$ etlplus load '[
|
|
152
152
|
{"name": "John", "email": "john@example.com"},
|
|
153
153
|
{"name": "Jane", "email": "jane@example.com"}
|
|
154
|
-
]'
|
|
154
|
+
]' --to users.csv
|
|
155
155
|
{
|
|
156
156
|
"status": "success",
|
|
157
157
|
"message": "Data loaded to users.csv",
|
|
@@ -170,14 +170,14 @@ This example shows a complete ETL workflow:
|
|
|
170
170
|
|
|
171
171
|
```bash
|
|
172
172
|
# Step 1: Extract
|
|
173
|
-
$ etlplus extract
|
|
173
|
+
$ etlplus extract raw_data.csv > extracted.json
|
|
174
174
|
|
|
175
175
|
# Step 2: Transform
|
|
176
|
-
$ etlplus transform extracted.json \
|
|
176
|
+
$ etlplus transform --from extracted.json \
|
|
177
177
|
--operations '{
|
|
178
178
|
"filter": {"field": "age", "op": "gte", "value": 18},
|
|
179
179
|
"select": ["name", "email", "age"]
|
|
180
|
-
}'
|
|
180
|
+
}' --to transformed.json
|
|
181
181
|
|
|
182
182
|
# Step 3: Validate
|
|
183
183
|
$ etlplus validate transformed.json \
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
# MANIFEST.in
|
|
2
|
+
# ETLPlus
|
|
3
|
+
#
|
|
4
|
+
# Copyright © 2026 Dagitali LLC. All rights reserved.
|
|
5
|
+
#
|
|
6
|
+
# Contains commands that allow lists of files to be discovered and manipulated.
|
|
7
|
+
#
|
|
8
|
+
# See:
|
|
9
|
+
# 1. https://setuptools.pypa.io/en/latest/userguide/miscellaneous.html
|
|
10
|
+
|
|
11
|
+
# Include Jinja template files in the etlplus package
|
|
12
|
+
recursive-include etlplus/templates *.j2
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: etlplus
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.5.5
|
|
4
4
|
Summary: A Swiss Army knife for simple ETL operations
|
|
5
5
|
Home-page: https://github.com/Dagitali/ETLPlus
|
|
6
6
|
Author: ETLPlus Team
|
|
@@ -61,13 +61,15 @@ package and command-line interface for data extraction, validation, transformati
|
|
|
61
61
|
- [Quickstart](#quickstart)
|
|
62
62
|
- [Usage](#usage)
|
|
63
63
|
- [Command Line Interface](#command-line-interface)
|
|
64
|
+
- [Inspect Pipelines](#inspect-pipelines)
|
|
65
|
+
- [Render SQL DDL](#render-sql-ddl)
|
|
64
66
|
- [Extract Data](#extract-data)
|
|
65
67
|
- [Validate Data](#validate-data)
|
|
66
68
|
- [Transform Data](#transform-data)
|
|
67
69
|
- [Load Data](#load-data)
|
|
68
70
|
- [Python API](#python-api)
|
|
69
71
|
- [Complete ETL Pipeline Example](#complete-etl-pipeline-example)
|
|
70
|
-
- [
|
|
72
|
+
- [Format Overrides](#format-overrides)
|
|
71
73
|
- [Transformation Operations](#transformation-operations)
|
|
72
74
|
- [Filter Operations](#filter-operations)
|
|
73
75
|
- [Aggregation Functions](#aggregation-functions)
|
|
@@ -79,6 +81,8 @@ package and command-line interface for data extraction, validation, transformati
|
|
|
79
81
|
- [Test Layers](#test-layers)
|
|
80
82
|
- [Code Coverage](#code-coverage)
|
|
81
83
|
- [Linting](#linting)
|
|
84
|
+
- [Updating Demo Snippets](#updating-demo-snippets)
|
|
85
|
+
- [Releasing to PyPI](#releasing-to-pypi)
|
|
82
86
|
- [Links](#links)
|
|
83
87
|
- [License](#license)
|
|
84
88
|
- [Contributing](#contributing)
|
|
@@ -86,6 +90,14 @@ package and command-line interface for data extraction, validation, transformati
|
|
|
86
90
|
|
|
87
91
|
## Features
|
|
88
92
|
|
|
93
|
+
- **Check** data pipeline definitions before running them:
|
|
94
|
+
- Summarize jobs, sources, targets, and transforms
|
|
95
|
+
- Confirm configuration changes by printing focused sections on demand
|
|
96
|
+
|
|
97
|
+
- **Render** SQL DDL from shared table specs:
|
|
98
|
+
- Generate CREATE TABLE or view statements
|
|
99
|
+
- Swap templates or direct output to files for database migrations
|
|
100
|
+
|
|
89
101
|
- **Extract** data from multiple sources:
|
|
90
102
|
- Files (CSV, JSON, XML, YAML)
|
|
91
103
|
- Databases (connection string support)
|
|
@@ -167,11 +179,49 @@ etlplus --help
|
|
|
167
179
|
etlplus --version
|
|
168
180
|
```
|
|
169
181
|
|
|
182
|
+
#### Check Pipelines
|
|
183
|
+
|
|
184
|
+
Use `etlplus check` to explore pipeline YAML definitions without running them. The command can print
|
|
185
|
+
job names, summarize configured sources and targets, or drill into specific sections.
|
|
186
|
+
|
|
187
|
+
List jobs and show a pipeline summary:
|
|
188
|
+
```bash
|
|
189
|
+
etlplus check --config examples/configs/pipeline.yml --jobs
|
|
190
|
+
etlplus check --config examples/configs/pipeline.yml --summary
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
Show sources or transforms for troubleshooting:
|
|
194
|
+
```bash
|
|
195
|
+
etlplus check --config examples/configs/pipeline.yml --sources
|
|
196
|
+
etlplus check --config examples/configs/pipeline.yml --transforms
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
#### Render SQL DDL
|
|
200
|
+
|
|
201
|
+
Use `etlplus render` to turn table schema specs into ready-to-run SQL. Render from a pipeline config
|
|
202
|
+
or from a standalone schema file, and choose the built-in `ddl` or `view` templates (or provide your
|
|
203
|
+
own).
|
|
204
|
+
|
|
205
|
+
Render all tables defined in a pipeline:
|
|
206
|
+
```bash
|
|
207
|
+
etlplus render --config examples/configs/pipeline.yml --template ddl
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
Render a single table in that pipeline:
|
|
211
|
+
```bash
|
|
212
|
+
etlplus render --config examples/configs/pipeline.yml --table customers --template view
|
|
213
|
+
```
|
|
214
|
+
|
|
215
|
+
Render from a standalone table spec to a file:
|
|
216
|
+
```bash
|
|
217
|
+
etlplus render --spec schemas/customer.yml --template view -o temp/customer_view.sql
|
|
218
|
+
```
|
|
219
|
+
|
|
170
220
|
#### Extract Data
|
|
171
221
|
|
|
172
|
-
Note: For file sources, the format is inferred from the filename extension
|
|
173
|
-
|
|
174
|
-
|
|
222
|
+
Note: For file sources, the format is normally inferred from the filename extension. Use
|
|
223
|
+
`--source-format` to override inference when a file lacks an extension or when you want to force a
|
|
224
|
+
specific parser.
|
|
175
225
|
|
|
176
226
|
Extract from JSON file:
|
|
177
227
|
```bash
|
|
@@ -212,6 +262,20 @@ etlplus validate examples/data/sample.json --rules '{"email": {"type": "string",
|
|
|
212
262
|
|
|
213
263
|
#### Transform Data
|
|
214
264
|
|
|
265
|
+
When piping data through `etlplus transform`, use `--source-format` whenever the SOURCE argument is
|
|
266
|
+
`-` or a literal payload, mirroring the `etlplus extract` semantics. Use `--target-format` to
|
|
267
|
+
control the emitted format for stdout or other non-file outputs, just like `etlplus load`. File
|
|
268
|
+
paths continue to infer formats from their extensions. Use `--from` to override the inferred source
|
|
269
|
+
connector type and `--to` to override the inferred target connector type, matching the `etlplus
|
|
270
|
+
extract`/`etlplus load` behavior.
|
|
271
|
+
|
|
272
|
+
Transform file inputs while overriding connector types:
|
|
273
|
+
```bash
|
|
274
|
+
etlplus transform --from file examples/data/sample.json \
|
|
275
|
+
--operations '{"select": ["name", "email"]}' \
|
|
276
|
+
--to file -o temp/selected_output.json
|
|
277
|
+
```
|
|
278
|
+
|
|
215
279
|
Filter and select fields:
|
|
216
280
|
```bash
|
|
217
281
|
etlplus transform '[{"name": "John", "age": 30}, {"name": "Jane", "age": 25}]' \
|
|
@@ -235,19 +299,24 @@ etlplus transform examples/data/sample.json --operations '{"map": {"name": "new_
|
|
|
235
299
|
|
|
236
300
|
#### Load Data
|
|
237
301
|
|
|
302
|
+
`etlplus load` consumes JSON from stdin; provide only the target argument plus optional flags.
|
|
303
|
+
|
|
238
304
|
Load to JSON file:
|
|
239
305
|
```bash
|
|
240
|
-
etlplus
|
|
306
|
+
etlplus extract file examples/data/sample.json \
|
|
307
|
+
| etlplus load --to file temp/sample_output.json
|
|
241
308
|
```
|
|
242
309
|
|
|
243
310
|
Load to CSV file:
|
|
244
311
|
```bash
|
|
245
|
-
etlplus
|
|
312
|
+
etlplus extract file examples/data/sample.csv \
|
|
313
|
+
| etlplus load --to file temp/sample_output.csv
|
|
246
314
|
```
|
|
247
315
|
|
|
248
316
|
Load to REST API:
|
|
249
317
|
```bash
|
|
250
|
-
|
|
318
|
+
cat examples/data/sample.json \
|
|
319
|
+
| etlplus load --to api https://api.example.com/endpoint
|
|
251
320
|
```
|
|
252
321
|
|
|
253
322
|
### Python API
|
|
@@ -285,6 +354,19 @@ For YAML-driven pipelines executed end-to-end (extract → validate → transfor
|
|
|
285
354
|
- Authoring: [`docs/pipeline-guide.md`](docs/pipeline-guide.md)
|
|
286
355
|
- Runner API and internals: [`docs/run-module.md`](docs/run-module.md)
|
|
287
356
|
|
|
357
|
+
CLI quick reference for pipelines:
|
|
358
|
+
|
|
359
|
+
```bash
|
|
360
|
+
# List jobs or show a pipeline summary
|
|
361
|
+
etlplus check --config examples/configs/pipeline.yml --jobs
|
|
362
|
+
etlplus check --config examples/configs/pipeline.yml --summary
|
|
363
|
+
|
|
364
|
+
# Run a job
|
|
365
|
+
etlplus run --config examples/configs/pipeline.yml --job file_to_file_customers
|
|
366
|
+
|
|
367
|
+
# Deprecated shim (will be removed): etlplus pipeline
|
|
368
|
+
```
|
|
369
|
+
|
|
288
370
|
### Complete ETL Pipeline Example
|
|
289
371
|
|
|
290
372
|
```bash
|
|
@@ -301,41 +383,28 @@ etlplus validate temp/sample_transformed.json \
|
|
|
301
383
|
--rules '{"name": {"type": "string", "required": true}, "email": {"type": "string", "required": true}}'
|
|
302
384
|
|
|
303
385
|
# 4. Load to CSV
|
|
304
|
-
|
|
386
|
+
cat temp/sample_transformed.json \
|
|
387
|
+
| etlplus load --to temp/sample_output.csv
|
|
305
388
|
```
|
|
306
389
|
|
|
307
|
-
###
|
|
308
|
-
|
|
309
|
-
ETLPlus honors a small number of environment toggles to refine CLI behavior:
|
|
390
|
+
### Format Overrides
|
|
310
391
|
|
|
311
|
-
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
- `error|fail|strict`: treat as error (non-zero exit)
|
|
315
|
-
- `warn` (default): print a warning to stderr
|
|
316
|
-
- `ignore|silent`: no message
|
|
317
|
-
- Precedence: the CLI flag `--strict-format` overrides the environment.
|
|
392
|
+
`--source-format` and `--target-format` override whichever format would normally be inferred from a
|
|
393
|
+
file extension. This is useful when an input lacks an extension (for example, `records.txt` that
|
|
394
|
+
actually contains CSV) or when you intentionally want to treat a file as another format.
|
|
318
395
|
|
|
319
396
|
Examples (zsh):
|
|
320
397
|
|
|
321
398
|
```zsh
|
|
322
|
-
#
|
|
323
|
-
etlplus extract file data.
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
# Equivalent strict behavior via flag (overrides environment)
|
|
333
|
-
etlplus extract file data.csv --format csv --strict-format
|
|
334
|
-
etlplus load data.json file out.csv --format csv --strict-format
|
|
335
|
-
|
|
336
|
-
# Recommended: rely on extension, no --format needed for files
|
|
337
|
-
etlplus extract file data.csv
|
|
338
|
-
etlplus load data.json file out.csv
|
|
399
|
+
# Force CSV parsing for an extension-less file
|
|
400
|
+
etlplus extract --from file data.txt --source-format csv
|
|
401
|
+
|
|
402
|
+
# Write CSV to a file without the .csv suffix
|
|
403
|
+
etlplus load --to file output.bin --target-format csv < data.json
|
|
404
|
+
|
|
405
|
+
# Leave the flags off when extensions already match the desired format
|
|
406
|
+
etlplus extract --from file data.csv
|
|
407
|
+
etlplus load --to file data.json < data.json
|
|
339
408
|
```
|
|
340
409
|
|
|
341
410
|
## Transformation Operations
|
|
@@ -19,13 +19,15 @@ package and command-line interface for data extraction, validation, transformati
|
|
|
19
19
|
- [Quickstart](#quickstart)
|
|
20
20
|
- [Usage](#usage)
|
|
21
21
|
- [Command Line Interface](#command-line-interface)
|
|
22
|
+
- [Inspect Pipelines](#inspect-pipelines)
|
|
23
|
+
- [Render SQL DDL](#render-sql-ddl)
|
|
22
24
|
- [Extract Data](#extract-data)
|
|
23
25
|
- [Validate Data](#validate-data)
|
|
24
26
|
- [Transform Data](#transform-data)
|
|
25
27
|
- [Load Data](#load-data)
|
|
26
28
|
- [Python API](#python-api)
|
|
27
29
|
- [Complete ETL Pipeline Example](#complete-etl-pipeline-example)
|
|
28
|
-
- [
|
|
30
|
+
- [Format Overrides](#format-overrides)
|
|
29
31
|
- [Transformation Operations](#transformation-operations)
|
|
30
32
|
- [Filter Operations](#filter-operations)
|
|
31
33
|
- [Aggregation Functions](#aggregation-functions)
|
|
@@ -37,6 +39,8 @@ package and command-line interface for data extraction, validation, transformati
|
|
|
37
39
|
- [Test Layers](#test-layers)
|
|
38
40
|
- [Code Coverage](#code-coverage)
|
|
39
41
|
- [Linting](#linting)
|
|
42
|
+
- [Updating Demo Snippets](#updating-demo-snippets)
|
|
43
|
+
- [Releasing to PyPI](#releasing-to-pypi)
|
|
40
44
|
- [Links](#links)
|
|
41
45
|
- [License](#license)
|
|
42
46
|
- [Contributing](#contributing)
|
|
@@ -44,6 +48,14 @@ package and command-line interface for data extraction, validation, transformati
|
|
|
44
48
|
|
|
45
49
|
## Features
|
|
46
50
|
|
|
51
|
+
- **Check** data pipeline definitions before running them:
|
|
52
|
+
- Summarize jobs, sources, targets, and transforms
|
|
53
|
+
- Confirm configuration changes by printing focused sections on demand
|
|
54
|
+
|
|
55
|
+
- **Render** SQL DDL from shared table specs:
|
|
56
|
+
- Generate CREATE TABLE or view statements
|
|
57
|
+
- Swap templates or direct output to files for database migrations
|
|
58
|
+
|
|
47
59
|
- **Extract** data from multiple sources:
|
|
48
60
|
- Files (CSV, JSON, XML, YAML)
|
|
49
61
|
- Databases (connection string support)
|
|
@@ -125,11 +137,49 @@ etlplus --help
|
|
|
125
137
|
etlplus --version
|
|
126
138
|
```
|
|
127
139
|
|
|
140
|
+
#### Check Pipelines
|
|
141
|
+
|
|
142
|
+
Use `etlplus check` to explore pipeline YAML definitions without running them. The command can print
|
|
143
|
+
job names, summarize configured sources and targets, or drill into specific sections.
|
|
144
|
+
|
|
145
|
+
List jobs and show a pipeline summary:
|
|
146
|
+
```bash
|
|
147
|
+
etlplus check --config examples/configs/pipeline.yml --jobs
|
|
148
|
+
etlplus check --config examples/configs/pipeline.yml --summary
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
Show sources or transforms for troubleshooting:
|
|
152
|
+
```bash
|
|
153
|
+
etlplus check --config examples/configs/pipeline.yml --sources
|
|
154
|
+
etlplus check --config examples/configs/pipeline.yml --transforms
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
#### Render SQL DDL
|
|
158
|
+
|
|
159
|
+
Use `etlplus render` to turn table schema specs into ready-to-run SQL. Render from a pipeline config
|
|
160
|
+
or from a standalone schema file, and choose the built-in `ddl` or `view` templates (or provide your
|
|
161
|
+
own).
|
|
162
|
+
|
|
163
|
+
Render all tables defined in a pipeline:
|
|
164
|
+
```bash
|
|
165
|
+
etlplus render --config examples/configs/pipeline.yml --template ddl
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
Render a single table in that pipeline:
|
|
169
|
+
```bash
|
|
170
|
+
etlplus render --config examples/configs/pipeline.yml --table customers --template view
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
Render from a standalone table spec to a file:
|
|
174
|
+
```bash
|
|
175
|
+
etlplus render --spec schemas/customer.yml --template view -o temp/customer_view.sql
|
|
176
|
+
```
|
|
177
|
+
|
|
128
178
|
#### Extract Data
|
|
129
179
|
|
|
130
|
-
Note: For file sources, the format is inferred from the filename extension
|
|
131
|
-
|
|
132
|
-
|
|
180
|
+
Note: For file sources, the format is normally inferred from the filename extension. Use
|
|
181
|
+
`--source-format` to override inference when a file lacks an extension or when you want to force a
|
|
182
|
+
specific parser.
|
|
133
183
|
|
|
134
184
|
Extract from JSON file:
|
|
135
185
|
```bash
|
|
@@ -170,6 +220,20 @@ etlplus validate examples/data/sample.json --rules '{"email": {"type": "string",
|
|
|
170
220
|
|
|
171
221
|
#### Transform Data
|
|
172
222
|
|
|
223
|
+
When piping data through `etlplus transform`, use `--source-format` whenever the SOURCE argument is
|
|
224
|
+
`-` or a literal payload, mirroring the `etlplus extract` semantics. Use `--target-format` to
|
|
225
|
+
control the emitted format for stdout or other non-file outputs, just like `etlplus load`. File
|
|
226
|
+
paths continue to infer formats from their extensions. Use `--from` to override the inferred source
|
|
227
|
+
connector type and `--to` to override the inferred target connector type, matching the `etlplus
|
|
228
|
+
extract`/`etlplus load` behavior.
|
|
229
|
+
|
|
230
|
+
Transform file inputs while overriding connector types:
|
|
231
|
+
```bash
|
|
232
|
+
etlplus transform --from file examples/data/sample.json \
|
|
233
|
+
--operations '{"select": ["name", "email"]}' \
|
|
234
|
+
--to file -o temp/selected_output.json
|
|
235
|
+
```
|
|
236
|
+
|
|
173
237
|
Filter and select fields:
|
|
174
238
|
```bash
|
|
175
239
|
etlplus transform '[{"name": "John", "age": 30}, {"name": "Jane", "age": 25}]' \
|
|
@@ -193,19 +257,24 @@ etlplus transform examples/data/sample.json --operations '{"map": {"name": "new_
|
|
|
193
257
|
|
|
194
258
|
#### Load Data
|
|
195
259
|
|
|
260
|
+
`etlplus load` consumes JSON from stdin; provide only the target argument plus optional flags.
|
|
261
|
+
|
|
196
262
|
Load to JSON file:
|
|
197
263
|
```bash
|
|
198
|
-
etlplus
|
|
264
|
+
etlplus extract file examples/data/sample.json \
|
|
265
|
+
| etlplus load --to file temp/sample_output.json
|
|
199
266
|
```
|
|
200
267
|
|
|
201
268
|
Load to CSV file:
|
|
202
269
|
```bash
|
|
203
|
-
etlplus
|
|
270
|
+
etlplus extract file examples/data/sample.csv \
|
|
271
|
+
| etlplus load --to file temp/sample_output.csv
|
|
204
272
|
```
|
|
205
273
|
|
|
206
274
|
Load to REST API:
|
|
207
275
|
```bash
|
|
208
|
-
|
|
276
|
+
cat examples/data/sample.json \
|
|
277
|
+
| etlplus load --to api https://api.example.com/endpoint
|
|
209
278
|
```
|
|
210
279
|
|
|
211
280
|
### Python API
|
|
@@ -243,6 +312,19 @@ For YAML-driven pipelines executed end-to-end (extract → validate → transfor
|
|
|
243
312
|
- Authoring: [`docs/pipeline-guide.md`](docs/pipeline-guide.md)
|
|
244
313
|
- Runner API and internals: [`docs/run-module.md`](docs/run-module.md)
|
|
245
314
|
|
|
315
|
+
CLI quick reference for pipelines:
|
|
316
|
+
|
|
317
|
+
```bash
|
|
318
|
+
# List jobs or show a pipeline summary
|
|
319
|
+
etlplus check --config examples/configs/pipeline.yml --jobs
|
|
320
|
+
etlplus check --config examples/configs/pipeline.yml --summary
|
|
321
|
+
|
|
322
|
+
# Run a job
|
|
323
|
+
etlplus run --config examples/configs/pipeline.yml --job file_to_file_customers
|
|
324
|
+
|
|
325
|
+
# Deprecated shim (will be removed): etlplus pipeline
|
|
326
|
+
```
|
|
327
|
+
|
|
246
328
|
### Complete ETL Pipeline Example
|
|
247
329
|
|
|
248
330
|
```bash
|
|
@@ -259,41 +341,28 @@ etlplus validate temp/sample_transformed.json \
|
|
|
259
341
|
--rules '{"name": {"type": "string", "required": true}, "email": {"type": "string", "required": true}}'
|
|
260
342
|
|
|
261
343
|
# 4. Load to CSV
|
|
262
|
-
|
|
344
|
+
cat temp/sample_transformed.json \
|
|
345
|
+
| etlplus load --to temp/sample_output.csv
|
|
263
346
|
```
|
|
264
347
|
|
|
265
|
-
###
|
|
266
|
-
|
|
267
|
-
ETLPlus honors a small number of environment toggles to refine CLI behavior:
|
|
348
|
+
### Format Overrides
|
|
268
349
|
|
|
269
|
-
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
- `error|fail|strict`: treat as error (non-zero exit)
|
|
273
|
-
- `warn` (default): print a warning to stderr
|
|
274
|
-
- `ignore|silent`: no message
|
|
275
|
-
- Precedence: the CLI flag `--strict-format` overrides the environment.
|
|
350
|
+
`--source-format` and `--target-format` override whichever format would normally be inferred from a
|
|
351
|
+
file extension. This is useful when an input lacks an extension (for example, `records.txt` that
|
|
352
|
+
actually contains CSV) or when you intentionally want to treat a file as another format.
|
|
276
353
|
|
|
277
354
|
Examples (zsh):
|
|
278
355
|
|
|
279
356
|
```zsh
|
|
280
|
-
#
|
|
281
|
-
etlplus extract file data.
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
# Equivalent strict behavior via flag (overrides environment)
|
|
291
|
-
etlplus extract file data.csv --format csv --strict-format
|
|
292
|
-
etlplus load data.json file out.csv --format csv --strict-format
|
|
293
|
-
|
|
294
|
-
# Recommended: rely on extension, no --format needed for files
|
|
295
|
-
etlplus extract file data.csv
|
|
296
|
-
etlplus load data.json file out.csv
|
|
357
|
+
# Force CSV parsing for an extension-less file
|
|
358
|
+
etlplus extract --from file data.txt --source-format csv
|
|
359
|
+
|
|
360
|
+
# Write CSV to a file without the .csv suffix
|
|
361
|
+
etlplus load --to file output.bin --target-format csv < data.json
|
|
362
|
+
|
|
363
|
+
# Leave the flags off when extensions already match the desired format
|
|
364
|
+
etlplus extract --from file data.csv
|
|
365
|
+
etlplus load --to file data.json < data.json
|
|
297
366
|
```
|
|
298
367
|
|
|
299
368
|
## Transformation Operations
|
|
@@ -245,13 +245,13 @@ job. Those values are merged into the client configuration and forwarded to
|
|
|
245
245
|
`EndpointClient.paginate(..., rate_limit_overrides=...)`, ensuring only that job’s paginator is sped
|
|
246
246
|
up or slowed down.
|
|
247
247
|
|
|
248
|
-
|
|
248
|
+
Format override note:
|
|
249
249
|
|
|
250
|
-
When extracting from file sources, ETLPlus infers the format from the filename extension
|
|
251
|
-
`.csv`, `.json`, `.xml`, `.yaml`).
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
250
|
+
When extracting from file sources, ETLPlus still infers the format from the filename extension
|
|
251
|
+
(`.csv`, `.json`, `.xml`, `.yaml`). However, `--source-format` and `--target-format` now override
|
|
252
|
+
that inference for both Typer- and argparse-based CLIs. This means you can safely point at files
|
|
253
|
+
without/extensions or with misleading suffixes and force the desired parser or writer without having
|
|
254
|
+
to rename the file first.
|
|
255
255
|
|
|
256
256
|
Note: When using a service + endpoint in a source, URL composition (including `base_path`) is
|
|
257
257
|
handled automatically. See “Runner behavior with base_path (sources and targets)” in the APIs
|
|
@@ -378,31 +378,32 @@ jobs:
|
|
|
378
378
|
Once you have a pipeline YAML, you can run jobs either from the
|
|
379
379
|
command line or directly from Python.
|
|
380
380
|
|
|
381
|
-
### CLI: `etlplus
|
|
381
|
+
### CLI: `etlplus check` (inspect) and `etlplus run` (execute)
|
|
382
382
|
|
|
383
|
-
List jobs
|
|
383
|
+
List jobs or show a summary from a pipeline file:
|
|
384
384
|
|
|
385
385
|
```bash
|
|
386
|
-
etlplus
|
|
386
|
+
etlplus check --config examples/configs/pipeline.yml --jobs
|
|
387
|
+
etlplus check --config examples/configs/pipeline.yml --summary
|
|
387
388
|
```
|
|
388
389
|
|
|
389
390
|
Run a specific job end-to-end (extract → validate → transform → load):
|
|
390
391
|
|
|
391
392
|
```bash
|
|
392
|
-
etlplus pipeline --config examples/configs/pipeline.yml --run file_to_file_customers
|
|
393
|
-
|
|
394
|
-
# Equivalent, using the dedicated run command
|
|
395
393
|
etlplus run --config examples/configs/pipeline.yml --job file_to_file_customers
|
|
396
394
|
```
|
|
397
395
|
|
|
398
396
|
Notes:
|
|
399
397
|
|
|
400
|
-
-
|
|
398
|
+
- These commands read the same YAML schema described in this guide.
|
|
401
399
|
- Environment-variable substitution (e.g. `${GITHUB_TOKEN}`) is applied the same way as when loading
|
|
402
400
|
configs via the Python API.
|
|
403
401
|
- For more details on the orchestration implementation, see
|
|
404
402
|
[Runner internals: etlplus.run](run-module.md).
|
|
405
403
|
|
|
404
|
+
Deprecated: `etlplus pipeline` is still available as a shim but will be removed in a future release;
|
|
405
|
+
prefer `check` and `run`.
|
|
406
|
+
|
|
406
407
|
### Python: `etlplus.run.run`
|
|
407
408
|
|
|
408
409
|
To trigger a job programmatically, use the high-level runner function exposed by the package:
|