etlplus 0.4.1__tar.gz → 0.4.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {etlplus-0.4.1 → etlplus-0.4.5}/DEMO.md +5 -5
- {etlplus-0.4.1/etlplus.egg-info → etlplus-0.4.5}/PKG-INFO +44 -36
- {etlplus-0.4.1 → etlplus-0.4.5}/README.md +43 -35
- {etlplus-0.4.1 → etlplus-0.4.5}/docs/pipeline-guide.md +6 -6
- etlplus-0.4.5/etlplus/cli/app.py +1239 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/etlplus/cli/handlers.py +175 -196
- {etlplus-0.4.1 → etlplus-0.4.5}/etlplus/cli/main.py +131 -74
- {etlplus-0.4.1 → etlplus-0.4.5/etlplus.egg-info}/PKG-INFO +44 -36
- {etlplus-0.4.1 → etlplus-0.4.5}/etlplus.egg-info/SOURCES.txt +4 -1
- etlplus-0.4.5/tests/integration/test_i_cli.py +172 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/tests/integration/test_i_pagination_strategy.py +31 -31
- {etlplus-0.4.1 → etlplus-0.4.5}/tests/integration/test_i_pipeline_smoke.py +1 -1
- {etlplus-0.4.1 → etlplus-0.4.5}/tests/unit/api/test_u_pagination_client.py +1 -1
- {etlplus-0.4.1 → etlplus-0.4.5}/tests/unit/api/test_u_paginator.py +1 -1
- etlplus-0.4.5/tests/unit/cli/conftest.py +29 -0
- etlplus-0.4.5/tests/unit/cli/test_u_cli_app.py +530 -0
- etlplus-0.4.5/tests/unit/cli/test_u_cli_handlers.py +855 -0
- etlplus-0.4.5/tests/unit/cli/test_u_cli_main.py +170 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/tests/unit/config/test_u_connector.py +1 -1
- {etlplus-0.4.1 → etlplus-0.4.5}/tests/unit/config/test_u_pipeline.py +1 -1
- {etlplus-0.4.1 → etlplus-0.4.5}/tests/unit/test_u_extract.py +1 -1
- {etlplus-0.4.1 → etlplus-0.4.5}/tests/unit/test_u_load.py +1 -1
- etlplus-0.4.1/etlplus/cli/app.py +0 -1000
- etlplus-0.4.1/tests/integration/test_i_cli.py +0 -244
- etlplus-0.4.1/tests/unit/test_u_cli.py +0 -576
- {etlplus-0.4.1 → etlplus-0.4.5}/.coveragerc +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/.editorconfig +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/.gitattributes +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/.github/actions/python-bootstrap/action.yml +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/.github/workflows/ci.yml +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/.gitignore +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/.pre-commit-config.yaml +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/.ruff.toml +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/CODE_OF_CONDUCT.md +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/CONTRIBUTING.md +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/LICENSE +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/Makefile +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/REFERENCES.md +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/docs/snippets/installation_version.md +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/etlplus/__init__.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/etlplus/__main__.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/etlplus/__version__.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/etlplus/api/README.md +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/etlplus/api/__init__.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/etlplus/api/auth.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/etlplus/api/config.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/etlplus/api/endpoint_client.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/etlplus/api/errors.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/etlplus/api/pagination/__init__.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/etlplus/api/pagination/client.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/etlplus/api/pagination/config.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/etlplus/api/pagination/paginator.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/etlplus/api/rate_limiting/__init__.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/etlplus/api/rate_limiting/config.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/etlplus/api/rate_limiting/rate_limiter.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/etlplus/api/request_manager.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/etlplus/api/retry_manager.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/etlplus/api/transport.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/etlplus/api/types.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/etlplus/cli/__init__.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/etlplus/config/__init__.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/etlplus/config/connector.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/etlplus/config/jobs.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/etlplus/config/pipeline.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/etlplus/config/profile.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/etlplus/config/types.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/etlplus/config/utils.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/etlplus/enums.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/etlplus/extract.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/etlplus/file.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/etlplus/load.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/etlplus/mixins.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/etlplus/py.typed +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/etlplus/run.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/etlplus/run_helpers.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/etlplus/transform.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/etlplus/types.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/etlplus/utils.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/etlplus/validate.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/etlplus/validation/__init__.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/etlplus/validation/utils.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/etlplus.egg-info/dependency_links.txt +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/etlplus.egg-info/entry_points.txt +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/etlplus.egg-info/requires.txt +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/etlplus.egg-info/top_level.txt +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/examples/README.md +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/examples/configs/pipeline.yml +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/examples/data/sample.csv +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/examples/data/sample.json +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/examples/data/sample.xml +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/examples/data/sample.xsd +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/examples/data/sample.yaml +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/examples/quickstart_python.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/pyproject.toml +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/pytest.ini +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/setup.cfg +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/setup.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/tests/__init__.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/tests/conftest.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/tests/integration/conftest.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/tests/integration/test_i_examples_data_parity.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/tests/integration/test_i_pipeline_yaml_load.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/tests/integration/test_i_run.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/tests/integration/test_i_run_profile_pagination_defaults.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/tests/integration/test_i_run_profile_rate_limit_defaults.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/tests/unit/api/conftest.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/tests/unit/api/test_u_auth.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/tests/unit/api/test_u_config.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/tests/unit/api/test_u_endpoint_client.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/tests/unit/api/test_u_mocks.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/tests/unit/api/test_u_pagination_config.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/tests/unit/api/test_u_rate_limit_config.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/tests/unit/api/test_u_rate_limiter.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/tests/unit/api/test_u_request_manager.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/tests/unit/api/test_u_retry_manager.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/tests/unit/api/test_u_transport.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/tests/unit/api/test_u_types.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/tests/unit/config/test_u_config_utils.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/tests/unit/config/test_u_jobs.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/tests/unit/conftest.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/tests/unit/test_u_enums.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/tests/unit/test_u_file.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/tests/unit/test_u_main.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/tests/unit/test_u_mixins.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/tests/unit/test_u_run.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/tests/unit/test_u_run_helpers.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/tests/unit/test_u_transform.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/tests/unit/test_u_utils.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/tests/unit/test_u_validate.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/tests/unit/test_u_version.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/tests/unit/validation/test_u_validation_utils.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/tools/run_pipeline.py +0 -0
- {etlplus-0.4.1 → etlplus-0.4.5}/tools/update_demo_snippets.py +0 -0
|
@@ -58,7 +58,7 @@ John Doe,30,New York
|
|
|
58
58
|
Jane Smith,25,Los Angeles
|
|
59
59
|
CSVDATA
|
|
60
60
|
|
|
61
|
-
$ etlplus extract
|
|
61
|
+
$ etlplus extract users.csv
|
|
62
62
|
[
|
|
63
63
|
{
|
|
64
64
|
"name": "John Doe",
|
|
@@ -151,7 +151,7 @@ $ etlplus load '{"name": "John", "status": "active"}' file output.json
|
|
|
151
151
|
$ etlplus load '[
|
|
152
152
|
{"name": "John", "email": "john@example.com"},
|
|
153
153
|
{"name": "Jane", "email": "jane@example.com"}
|
|
154
|
-
]'
|
|
154
|
+
]' --to users.csv
|
|
155
155
|
{
|
|
156
156
|
"status": "success",
|
|
157
157
|
"message": "Data loaded to users.csv",
|
|
@@ -170,14 +170,14 @@ This example shows a complete ETL workflow:
|
|
|
170
170
|
|
|
171
171
|
```bash
|
|
172
172
|
# Step 1: Extract
|
|
173
|
-
$ etlplus extract
|
|
173
|
+
$ etlplus extract raw_data.csv > extracted.json
|
|
174
174
|
|
|
175
175
|
# Step 2: Transform
|
|
176
|
-
$ etlplus transform extracted.json \
|
|
176
|
+
$ etlplus transform --from extracted.json \
|
|
177
177
|
--operations '{
|
|
178
178
|
"filter": {"field": "age", "op": "gte", "value": 18},
|
|
179
179
|
"select": ["name", "email", "age"]
|
|
180
|
-
}'
|
|
180
|
+
}' --to transformed.json
|
|
181
181
|
|
|
182
182
|
# Step 3: Validate
|
|
183
183
|
$ etlplus validate transformed.json \
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: etlplus
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.5
|
|
4
4
|
Summary: A Swiss Army knife for simple ETL operations
|
|
5
5
|
Home-page: https://github.com/Dagitali/ETLPlus
|
|
6
6
|
Author: ETLPlus Team
|
|
@@ -67,7 +67,7 @@ package and command-line interface for data extraction, validation, transformati
|
|
|
67
67
|
- [Load Data](#load-data)
|
|
68
68
|
- [Python API](#python-api)
|
|
69
69
|
- [Complete ETL Pipeline Example](#complete-etl-pipeline-example)
|
|
70
|
-
- [
|
|
70
|
+
- [Format Overrides](#format-overrides)
|
|
71
71
|
- [Transformation Operations](#transformation-operations)
|
|
72
72
|
- [Filter Operations](#filter-operations)
|
|
73
73
|
- [Aggregation Functions](#aggregation-functions)
|
|
@@ -79,6 +79,8 @@ package and command-line interface for data extraction, validation, transformati
|
|
|
79
79
|
- [Test Layers](#test-layers)
|
|
80
80
|
- [Code Coverage](#code-coverage)
|
|
81
81
|
- [Linting](#linting)
|
|
82
|
+
- [Updating Demo Snippets](#updating-demo-snippets)
|
|
83
|
+
- [Releasing to PyPI](#releasing-to-pypi)
|
|
82
84
|
- [Links](#links)
|
|
83
85
|
- [License](#license)
|
|
84
86
|
- [Contributing](#contributing)
|
|
@@ -169,9 +171,9 @@ etlplus --version
|
|
|
169
171
|
|
|
170
172
|
#### Extract Data
|
|
171
173
|
|
|
172
|
-
Note: For file sources, the format is inferred from the filename extension
|
|
173
|
-
|
|
174
|
-
|
|
174
|
+
Note: For file sources, the format is normally inferred from the filename extension. Use
|
|
175
|
+
`--source-format` to override inference when a file lacks an extension or when you want to force a
|
|
176
|
+
specific parser.
|
|
175
177
|
|
|
176
178
|
Extract from JSON file:
|
|
177
179
|
```bash
|
|
@@ -212,6 +214,20 @@ etlplus validate examples/data/sample.json --rules '{"email": {"type": "string",
|
|
|
212
214
|
|
|
213
215
|
#### Transform Data
|
|
214
216
|
|
|
217
|
+
When piping data through `etlplus transform`, use `--source-format` whenever the SOURCE argument is
|
|
218
|
+
`-` or a literal payload, mirroring the `etlplus extract` semantics. Use `--target-format` to
|
|
219
|
+
control the emitted format for stdout or other non-file outputs, just like `etlplus load`. File
|
|
220
|
+
paths continue to infer formats from their extensions. Use `--from` to override the inferred source
|
|
221
|
+
connector type and `--to` to override the inferred target connector type, matching the `etlplus
|
|
222
|
+
extract`/`etlplus load` behavior.
|
|
223
|
+
|
|
224
|
+
Transform file inputs while overriding connector types:
|
|
225
|
+
```bash
|
|
226
|
+
etlplus transform --from file examples/data/sample.json \
|
|
227
|
+
--operations '{"select": ["name", "email"]}' \
|
|
228
|
+
--to file -o temp/selected_output.json
|
|
229
|
+
```
|
|
230
|
+
|
|
215
231
|
Filter and select fields:
|
|
216
232
|
```bash
|
|
217
233
|
etlplus transform '[{"name": "John", "age": 30}, {"name": "Jane", "age": 25}]' \
|
|
@@ -235,19 +251,24 @@ etlplus transform examples/data/sample.json --operations '{"map": {"name": "new_
|
|
|
235
251
|
|
|
236
252
|
#### Load Data
|
|
237
253
|
|
|
254
|
+
`etlplus load` consumes JSON from stdin; provide only the target argument plus optional flags.
|
|
255
|
+
|
|
238
256
|
Load to JSON file:
|
|
239
257
|
```bash
|
|
240
|
-
etlplus
|
|
258
|
+
etlplus extract file examples/data/sample.json \
|
|
259
|
+
| etlplus load --to file temp/sample_output.json
|
|
241
260
|
```
|
|
242
261
|
|
|
243
262
|
Load to CSV file:
|
|
244
263
|
```bash
|
|
245
|
-
etlplus
|
|
264
|
+
etlplus extract file examples/data/sample.csv \
|
|
265
|
+
| etlplus load --to file temp/sample_output.csv
|
|
246
266
|
```
|
|
247
267
|
|
|
248
268
|
Load to REST API:
|
|
249
269
|
```bash
|
|
250
|
-
|
|
270
|
+
cat examples/data/sample.json \
|
|
271
|
+
| etlplus load --to api https://api.example.com/endpoint
|
|
251
272
|
```
|
|
252
273
|
|
|
253
274
|
### Python API
|
|
@@ -301,41 +322,28 @@ etlplus validate temp/sample_transformed.json \
|
|
|
301
322
|
--rules '{"name": {"type": "string", "required": true}, "email": {"type": "string", "required": true}}'
|
|
302
323
|
|
|
303
324
|
# 4. Load to CSV
|
|
304
|
-
|
|
325
|
+
cat temp/sample_transformed.json \
|
|
326
|
+
| etlplus load --to temp/sample_output.csv
|
|
305
327
|
```
|
|
306
328
|
|
|
307
|
-
###
|
|
308
|
-
|
|
309
|
-
ETLPlus honors a small number of environment toggles to refine CLI behavior:
|
|
329
|
+
### Format Overrides
|
|
310
330
|
|
|
311
|
-
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
- `error|fail|strict`: treat as error (non-zero exit)
|
|
315
|
-
- `warn` (default): print a warning to stderr
|
|
316
|
-
- `ignore|silent`: no message
|
|
317
|
-
- Precedence: the CLI flag `--strict-format` overrides the environment.
|
|
331
|
+
`--source-format` and `--target-format` override whichever format would normally be inferred from a
|
|
332
|
+
file extension. This is useful when an input lacks an extension (for example, `records.txt` that
|
|
333
|
+
actually contains CSV) or when you intentionally want to treat a file as another format.
|
|
318
334
|
|
|
319
335
|
Examples (zsh):
|
|
320
336
|
|
|
321
337
|
```zsh
|
|
322
|
-
#
|
|
323
|
-
etlplus extract file data.
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
# Equivalent strict behavior via flag (overrides environment)
|
|
333
|
-
etlplus extract file data.csv --format csv --strict-format
|
|
334
|
-
etlplus load data.json file out.csv --format csv --strict-format
|
|
335
|
-
|
|
336
|
-
# Recommended: rely on extension, no --format needed for files
|
|
337
|
-
etlplus extract file data.csv
|
|
338
|
-
etlplus load data.json file out.csv
|
|
338
|
+
# Force CSV parsing for an extension-less file
|
|
339
|
+
etlplus extract --from file data.txt --source-format csv
|
|
340
|
+
|
|
341
|
+
# Write CSV to a file without the .csv suffix
|
|
342
|
+
etlplus load --to file output.bin --target-format csv < data.json
|
|
343
|
+
|
|
344
|
+
# Leave the flags off when extensions already match the desired format
|
|
345
|
+
etlplus extract --from file data.csv
|
|
346
|
+
etlplus load --to file data.json < data.json
|
|
339
347
|
```
|
|
340
348
|
|
|
341
349
|
## Transformation Operations
|
|
@@ -25,7 +25,7 @@ package and command-line interface for data extraction, validation, transformati
|
|
|
25
25
|
- [Load Data](#load-data)
|
|
26
26
|
- [Python API](#python-api)
|
|
27
27
|
- [Complete ETL Pipeline Example](#complete-etl-pipeline-example)
|
|
28
|
-
- [
|
|
28
|
+
- [Format Overrides](#format-overrides)
|
|
29
29
|
- [Transformation Operations](#transformation-operations)
|
|
30
30
|
- [Filter Operations](#filter-operations)
|
|
31
31
|
- [Aggregation Functions](#aggregation-functions)
|
|
@@ -37,6 +37,8 @@ package and command-line interface for data extraction, validation, transformati
|
|
|
37
37
|
- [Test Layers](#test-layers)
|
|
38
38
|
- [Code Coverage](#code-coverage)
|
|
39
39
|
- [Linting](#linting)
|
|
40
|
+
- [Updating Demo Snippets](#updating-demo-snippets)
|
|
41
|
+
- [Releasing to PyPI](#releasing-to-pypi)
|
|
40
42
|
- [Links](#links)
|
|
41
43
|
- [License](#license)
|
|
42
44
|
- [Contributing](#contributing)
|
|
@@ -127,9 +129,9 @@ etlplus --version
|
|
|
127
129
|
|
|
128
130
|
#### Extract Data
|
|
129
131
|
|
|
130
|
-
Note: For file sources, the format is inferred from the filename extension
|
|
131
|
-
|
|
132
|
-
|
|
132
|
+
Note: For file sources, the format is normally inferred from the filename extension. Use
|
|
133
|
+
`--source-format` to override inference when a file lacks an extension or when you want to force a
|
|
134
|
+
specific parser.
|
|
133
135
|
|
|
134
136
|
Extract from JSON file:
|
|
135
137
|
```bash
|
|
@@ -170,6 +172,20 @@ etlplus validate examples/data/sample.json --rules '{"email": {"type": "string",
|
|
|
170
172
|
|
|
171
173
|
#### Transform Data
|
|
172
174
|
|
|
175
|
+
When piping data through `etlplus transform`, use `--source-format` whenever the SOURCE argument is
|
|
176
|
+
`-` or a literal payload, mirroring the `etlplus extract` semantics. Use `--target-format` to
|
|
177
|
+
control the emitted format for stdout or other non-file outputs, just like `etlplus load`. File
|
|
178
|
+
paths continue to infer formats from their extensions. Use `--from` to override the inferred source
|
|
179
|
+
connector type and `--to` to override the inferred target connector type, matching the `etlplus
|
|
180
|
+
extract`/`etlplus load` behavior.
|
|
181
|
+
|
|
182
|
+
Transform file inputs while overriding connector types:
|
|
183
|
+
```bash
|
|
184
|
+
etlplus transform --from file examples/data/sample.json \
|
|
185
|
+
--operations '{"select": ["name", "email"]}' \
|
|
186
|
+
--to file -o temp/selected_output.json
|
|
187
|
+
```
|
|
188
|
+
|
|
173
189
|
Filter and select fields:
|
|
174
190
|
```bash
|
|
175
191
|
etlplus transform '[{"name": "John", "age": 30}, {"name": "Jane", "age": 25}]' \
|
|
@@ -193,19 +209,24 @@ etlplus transform examples/data/sample.json --operations '{"map": {"name": "new_
|
|
|
193
209
|
|
|
194
210
|
#### Load Data
|
|
195
211
|
|
|
212
|
+
`etlplus load` consumes JSON from stdin; provide only the target argument plus optional flags.
|
|
213
|
+
|
|
196
214
|
Load to JSON file:
|
|
197
215
|
```bash
|
|
198
|
-
etlplus
|
|
216
|
+
etlplus extract file examples/data/sample.json \
|
|
217
|
+
| etlplus load --to file temp/sample_output.json
|
|
199
218
|
```
|
|
200
219
|
|
|
201
220
|
Load to CSV file:
|
|
202
221
|
```bash
|
|
203
|
-
etlplus
|
|
222
|
+
etlplus extract file examples/data/sample.csv \
|
|
223
|
+
| etlplus load --to file temp/sample_output.csv
|
|
204
224
|
```
|
|
205
225
|
|
|
206
226
|
Load to REST API:
|
|
207
227
|
```bash
|
|
208
|
-
|
|
228
|
+
cat examples/data/sample.json \
|
|
229
|
+
| etlplus load --to api https://api.example.com/endpoint
|
|
209
230
|
```
|
|
210
231
|
|
|
211
232
|
### Python API
|
|
@@ -259,41 +280,28 @@ etlplus validate temp/sample_transformed.json \
|
|
|
259
280
|
--rules '{"name": {"type": "string", "required": true}, "email": {"type": "string", "required": true}}'
|
|
260
281
|
|
|
261
282
|
# 4. Load to CSV
|
|
262
|
-
|
|
283
|
+
cat temp/sample_transformed.json \
|
|
284
|
+
| etlplus load --to temp/sample_output.csv
|
|
263
285
|
```
|
|
264
286
|
|
|
265
|
-
###
|
|
266
|
-
|
|
267
|
-
ETLPlus honors a small number of environment toggles to refine CLI behavior:
|
|
287
|
+
### Format Overrides
|
|
268
288
|
|
|
269
|
-
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
- `error|fail|strict`: treat as error (non-zero exit)
|
|
273
|
-
- `warn` (default): print a warning to stderr
|
|
274
|
-
- `ignore|silent`: no message
|
|
275
|
-
- Precedence: the CLI flag `--strict-format` overrides the environment.
|
|
289
|
+
`--source-format` and `--target-format` override whichever format would normally be inferred from a
|
|
290
|
+
file extension. This is useful when an input lacks an extension (for example, `records.txt` that
|
|
291
|
+
actually contains CSV) or when you intentionally want to treat a file as another format.
|
|
276
292
|
|
|
277
293
|
Examples (zsh):
|
|
278
294
|
|
|
279
295
|
```zsh
|
|
280
|
-
#
|
|
281
|
-
etlplus extract file data.
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
# Equivalent strict behavior via flag (overrides environment)
|
|
291
|
-
etlplus extract file data.csv --format csv --strict-format
|
|
292
|
-
etlplus load data.json file out.csv --format csv --strict-format
|
|
293
|
-
|
|
294
|
-
# Recommended: rely on extension, no --format needed for files
|
|
295
|
-
etlplus extract file data.csv
|
|
296
|
-
etlplus load data.json file out.csv
|
|
296
|
+
# Force CSV parsing for an extension-less file
|
|
297
|
+
etlplus extract --from file data.txt --source-format csv
|
|
298
|
+
|
|
299
|
+
# Write CSV to a file without the .csv suffix
|
|
300
|
+
etlplus load --to file output.bin --target-format csv < data.json
|
|
301
|
+
|
|
302
|
+
# Leave the flags off when extensions already match the desired format
|
|
303
|
+
etlplus extract --from file data.csv
|
|
304
|
+
etlplus load --to file data.json < data.json
|
|
297
305
|
```
|
|
298
306
|
|
|
299
307
|
## Transformation Operations
|
|
@@ -245,13 +245,13 @@ job. Those values are merged into the client configuration and forwarded to
|
|
|
245
245
|
`EndpointClient.paginate(..., rate_limit_overrides=...)`, ensuring only that job’s paginator is sped
|
|
246
246
|
up or slowed down.
|
|
247
247
|
|
|
248
|
-
|
|
248
|
+
Format override note:
|
|
249
249
|
|
|
250
|
-
When extracting from file sources, ETLPlus infers the format from the filename extension
|
|
251
|
-
`.csv`, `.json`, `.xml`, `.yaml`).
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
250
|
+
When extracting from file sources, ETLPlus still infers the format from the filename extension
|
|
251
|
+
(`.csv`, `.json`, `.xml`, `.yaml`). However, `--source-format` and `--target-format` now override
|
|
252
|
+
that inference for both Typer- and argparse-based CLIs. This means you can safely point at files
|
|
253
|
+
without/extensions or with misleading suffixes and force the desired parser or writer without having
|
|
254
|
+
to rename the file first.
|
|
255
255
|
|
|
256
256
|
Note: When using a service + endpoint in a source, URL composition (including `base_path`) is
|
|
257
257
|
handled automatically. See “Runner behavior with base_path (sources and targets)” in the APIs
|