etlplus 0.4.1__tar.gz → 0.5.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. {etlplus-0.4.1 → etlplus-0.5.5}/.pre-commit-config.yaml +5 -1
  2. {etlplus-0.4.1 → etlplus-0.5.5}/DEMO.md +5 -5
  3. etlplus-0.5.5/MANIFEST.in +12 -0
  4. {etlplus-0.4.1/etlplus.egg-info → etlplus-0.5.5}/PKG-INFO +105 -36
  5. {etlplus-0.4.1 → etlplus-0.5.5}/README.md +104 -35
  6. {etlplus-0.4.1 → etlplus-0.5.5}/docs/pipeline-guide.md +14 -13
  7. etlplus-0.5.5/etlplus/cli/app.py +1367 -0
  8. {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/cli/handlers.py +340 -252
  9. etlplus-0.5.5/etlplus/cli/main.py +616 -0
  10. {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/config/pipeline.py +11 -0
  11. etlplus-0.5.5/etlplus/ddl.py +197 -0
  12. {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/run.py +2 -4
  13. etlplus-0.5.5/etlplus/templates/__init__.py +5 -0
  14. etlplus-0.5.5/etlplus/templates/ddl.sql.j2 +128 -0
  15. etlplus-0.5.5/etlplus/templates/view.sql.j2 +69 -0
  16. {etlplus-0.4.1 → etlplus-0.5.5/etlplus.egg-info}/PKG-INFO +105 -36
  17. {etlplus-0.4.1 → etlplus-0.5.5}/etlplus.egg-info/SOURCES.txt +10 -1
  18. {etlplus-0.4.1 → etlplus-0.5.5}/examples/README.md +4 -4
  19. etlplus-0.5.5/examples/configs/ddl_spec.yml +67 -0
  20. {etlplus-0.4.1 → etlplus-0.5.5}/pyproject.toml +1 -1
  21. {etlplus-0.4.1 → etlplus-0.5.5}/setup.py +4 -0
  22. etlplus-0.5.5/tests/integration/test_i_cli.py +172 -0
  23. {etlplus-0.4.1 → etlplus-0.5.5}/tests/integration/test_i_pagination_strategy.py +31 -31
  24. {etlplus-0.4.1 → etlplus-0.5.5}/tests/integration/test_i_pipeline_smoke.py +5 -4
  25. {etlplus-0.4.1 → etlplus-0.5.5}/tests/unit/api/test_u_pagination_client.py +1 -1
  26. {etlplus-0.4.1 → etlplus-0.5.5}/tests/unit/api/test_u_paginator.py +1 -1
  27. etlplus-0.5.5/tests/unit/cli/conftest.py +29 -0
  28. etlplus-0.5.5/tests/unit/cli/test_u_cli_app.py +582 -0
  29. etlplus-0.5.5/tests/unit/cli/test_u_cli_handlers.py +947 -0
  30. etlplus-0.5.5/tests/unit/cli/test_u_cli_main.py +293 -0
  31. {etlplus-0.4.1 → etlplus-0.5.5}/tests/unit/config/test_u_connector.py +1 -1
  32. {etlplus-0.4.1 → etlplus-0.5.5}/tests/unit/config/test_u_pipeline.py +31 -1
  33. {etlplus-0.4.1 → etlplus-0.5.5}/tests/unit/test_u_extract.py +1 -1
  34. {etlplus-0.4.1 → etlplus-0.5.5}/tests/unit/test_u_load.py +1 -1
  35. etlplus-0.4.1/etlplus/cli/app.py +0 -1000
  36. etlplus-0.4.1/etlplus/cli/main.py +0 -404
  37. etlplus-0.4.1/tests/integration/test_i_cli.py +0 -244
  38. etlplus-0.4.1/tests/unit/test_u_cli.py +0 -576
  39. {etlplus-0.4.1 → etlplus-0.5.5}/.coveragerc +0 -0
  40. {etlplus-0.4.1 → etlplus-0.5.5}/.editorconfig +0 -0
  41. {etlplus-0.4.1 → etlplus-0.5.5}/.gitattributes +0 -0
  42. {etlplus-0.4.1 → etlplus-0.5.5}/.github/actions/python-bootstrap/action.yml +0 -0
  43. {etlplus-0.4.1 → etlplus-0.5.5}/.github/workflows/ci.yml +0 -0
  44. {etlplus-0.4.1 → etlplus-0.5.5}/.gitignore +0 -0
  45. {etlplus-0.4.1 → etlplus-0.5.5}/.ruff.toml +0 -0
  46. {etlplus-0.4.1 → etlplus-0.5.5}/CODE_OF_CONDUCT.md +0 -0
  47. {etlplus-0.4.1 → etlplus-0.5.5}/CONTRIBUTING.md +0 -0
  48. {etlplus-0.4.1 → etlplus-0.5.5}/LICENSE +0 -0
  49. {etlplus-0.4.1 → etlplus-0.5.5}/Makefile +0 -0
  50. {etlplus-0.4.1 → etlplus-0.5.5}/REFERENCES.md +0 -0
  51. {etlplus-0.4.1 → etlplus-0.5.5}/docs/snippets/installation_version.md +0 -0
  52. {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/__init__.py +0 -0
  53. {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/__main__.py +0 -0
  54. {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/__version__.py +0 -0
  55. {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/api/README.md +0 -0
  56. {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/api/__init__.py +0 -0
  57. {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/api/auth.py +0 -0
  58. {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/api/config.py +0 -0
  59. {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/api/endpoint_client.py +0 -0
  60. {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/api/errors.py +0 -0
  61. {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/api/pagination/__init__.py +0 -0
  62. {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/api/pagination/client.py +0 -0
  63. {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/api/pagination/config.py +0 -0
  64. {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/api/pagination/paginator.py +0 -0
  65. {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/api/rate_limiting/__init__.py +0 -0
  66. {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/api/rate_limiting/config.py +0 -0
  67. {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/api/rate_limiting/rate_limiter.py +0 -0
  68. {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/api/request_manager.py +0 -0
  69. {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/api/retry_manager.py +0 -0
  70. {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/api/transport.py +0 -0
  71. {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/api/types.py +0 -0
  72. {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/cli/__init__.py +0 -0
  73. {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/config/__init__.py +0 -0
  74. {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/config/connector.py +0 -0
  75. {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/config/jobs.py +0 -0
  76. {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/config/profile.py +0 -0
  77. {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/config/types.py +0 -0
  78. {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/config/utils.py +0 -0
  79. {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/enums.py +0 -0
  80. {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/extract.py +0 -0
  81. {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/file.py +0 -0
  82. {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/load.py +0 -0
  83. {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/mixins.py +0 -0
  84. {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/py.typed +0 -0
  85. {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/run_helpers.py +0 -0
  86. {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/transform.py +0 -0
  87. {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/types.py +0 -0
  88. {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/utils.py +0 -0
  89. {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/validate.py +0 -0
  90. {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/validation/__init__.py +0 -0
  91. {etlplus-0.4.1 → etlplus-0.5.5}/etlplus/validation/utils.py +0 -0
  92. {etlplus-0.4.1 → etlplus-0.5.5}/etlplus.egg-info/dependency_links.txt +0 -0
  93. {etlplus-0.4.1 → etlplus-0.5.5}/etlplus.egg-info/entry_points.txt +0 -0
  94. {etlplus-0.4.1 → etlplus-0.5.5}/etlplus.egg-info/requires.txt +0 -0
  95. {etlplus-0.4.1 → etlplus-0.5.5}/etlplus.egg-info/top_level.txt +0 -0
  96. {etlplus-0.4.1 → etlplus-0.5.5}/examples/configs/pipeline.yml +0 -0
  97. {etlplus-0.4.1 → etlplus-0.5.5}/examples/data/sample.csv +0 -0
  98. {etlplus-0.4.1 → etlplus-0.5.5}/examples/data/sample.json +0 -0
  99. {etlplus-0.4.1 → etlplus-0.5.5}/examples/data/sample.xml +0 -0
  100. {etlplus-0.4.1 → etlplus-0.5.5}/examples/data/sample.xsd +0 -0
  101. {etlplus-0.4.1 → etlplus-0.5.5}/examples/data/sample.yaml +0 -0
  102. {etlplus-0.4.1 → etlplus-0.5.5}/examples/quickstart_python.py +0 -0
  103. {etlplus-0.4.1 → etlplus-0.5.5}/pytest.ini +0 -0
  104. {etlplus-0.4.1 → etlplus-0.5.5}/setup.cfg +0 -0
  105. {etlplus-0.4.1 → etlplus-0.5.5}/tests/__init__.py +0 -0
  106. {etlplus-0.4.1 → etlplus-0.5.5}/tests/conftest.py +0 -0
  107. {etlplus-0.4.1 → etlplus-0.5.5}/tests/integration/conftest.py +0 -0
  108. {etlplus-0.4.1 → etlplus-0.5.5}/tests/integration/test_i_examples_data_parity.py +0 -0
  109. {etlplus-0.4.1 → etlplus-0.5.5}/tests/integration/test_i_pipeline_yaml_load.py +0 -0
  110. {etlplus-0.4.1 → etlplus-0.5.5}/tests/integration/test_i_run.py +0 -0
  111. {etlplus-0.4.1 → etlplus-0.5.5}/tests/integration/test_i_run_profile_pagination_defaults.py +0 -0
  112. {etlplus-0.4.1 → etlplus-0.5.5}/tests/integration/test_i_run_profile_rate_limit_defaults.py +0 -0
  113. {etlplus-0.4.1 → etlplus-0.5.5}/tests/unit/api/conftest.py +0 -0
  114. {etlplus-0.4.1 → etlplus-0.5.5}/tests/unit/api/test_u_auth.py +0 -0
  115. {etlplus-0.4.1 → etlplus-0.5.5}/tests/unit/api/test_u_config.py +0 -0
  116. {etlplus-0.4.1 → etlplus-0.5.5}/tests/unit/api/test_u_endpoint_client.py +0 -0
  117. {etlplus-0.4.1 → etlplus-0.5.5}/tests/unit/api/test_u_mocks.py +0 -0
  118. {etlplus-0.4.1 → etlplus-0.5.5}/tests/unit/api/test_u_pagination_config.py +0 -0
  119. {etlplus-0.4.1 → etlplus-0.5.5}/tests/unit/api/test_u_rate_limit_config.py +0 -0
  120. {etlplus-0.4.1 → etlplus-0.5.5}/tests/unit/api/test_u_rate_limiter.py +0 -0
  121. {etlplus-0.4.1 → etlplus-0.5.5}/tests/unit/api/test_u_request_manager.py +0 -0
  122. {etlplus-0.4.1 → etlplus-0.5.5}/tests/unit/api/test_u_retry_manager.py +0 -0
  123. {etlplus-0.4.1 → etlplus-0.5.5}/tests/unit/api/test_u_transport.py +0 -0
  124. {etlplus-0.4.1 → etlplus-0.5.5}/tests/unit/api/test_u_types.py +0 -0
  125. {etlplus-0.4.1 → etlplus-0.5.5}/tests/unit/config/test_u_config_utils.py +0 -0
  126. {etlplus-0.4.1 → etlplus-0.5.5}/tests/unit/config/test_u_jobs.py +0 -0
  127. {etlplus-0.4.1 → etlplus-0.5.5}/tests/unit/conftest.py +0 -0
  128. {etlplus-0.4.1 → etlplus-0.5.5}/tests/unit/test_u_enums.py +0 -0
  129. {etlplus-0.4.1 → etlplus-0.5.5}/tests/unit/test_u_file.py +0 -0
  130. {etlplus-0.4.1 → etlplus-0.5.5}/tests/unit/test_u_main.py +0 -0
  131. {etlplus-0.4.1 → etlplus-0.5.5}/tests/unit/test_u_mixins.py +0 -0
  132. {etlplus-0.4.1 → etlplus-0.5.5}/tests/unit/test_u_run.py +0 -0
  133. {etlplus-0.4.1 → etlplus-0.5.5}/tests/unit/test_u_run_helpers.py +0 -0
  134. {etlplus-0.4.1 → etlplus-0.5.5}/tests/unit/test_u_transform.py +0 -0
  135. {etlplus-0.4.1 → etlplus-0.5.5}/tests/unit/test_u_utils.py +0 -0
  136. {etlplus-0.4.1 → etlplus-0.5.5}/tests/unit/test_u_validate.py +0 -0
  137. {etlplus-0.4.1 → etlplus-0.5.5}/tests/unit/test_u_version.py +0 -0
  138. {etlplus-0.4.1 → etlplus-0.5.5}/tests/unit/validation/test_u_validation_utils.py +0 -0
  139. {etlplus-0.4.1 → etlplus-0.5.5}/tools/run_pipeline.py +0 -0
  140. {etlplus-0.4.1 → etlplus-0.5.5}/tools/update_demo_snippets.py +0 -0
@@ -159,7 +159,11 @@ repos:
159
159
  rev: v1.19.0
160
160
  hooks:
161
161
  - id: mypy
162
- args: [--ignore-missing-imports, --install-types, --non-interactive]
162
+ args:
163
+ - --cache-dir=.mypy_cache/pre-commit
164
+ - --ignore-missing-imports
165
+ - --install-types
166
+ - --non-interactive
163
167
 
164
168
  - repo: https://github.com/pycqa/flake8
165
169
  rev: 7.3.0
@@ -58,7 +58,7 @@ John Doe,30,New York
58
58
  Jane Smith,25,Los Angeles
59
59
  CSVDATA
60
60
 
61
- $ etlplus extract file users.csv --format csv
61
+ $ etlplus extract users.csv
62
62
  [
63
63
  {
64
64
  "name": "John Doe",
@@ -151,7 +151,7 @@ $ etlplus load '{"name": "John", "status": "active"}' file output.json
151
151
  $ etlplus load '[
152
152
  {"name": "John", "email": "john@example.com"},
153
153
  {"name": "Jane", "email": "jane@example.com"}
154
- ]' file users.csv --format csv
154
+ ]' --to users.csv
155
155
  {
156
156
  "status": "success",
157
157
  "message": "Data loaded to users.csv",
@@ -170,14 +170,14 @@ This example shows a complete ETL workflow:
170
170
 
171
171
  ```bash
172
172
  # Step 1: Extract
173
- $ etlplus extract file raw_data.csv --format csv -o extracted.json
173
+ $ etlplus extract raw_data.csv > extracted.json
174
174
 
175
175
  # Step 2: Transform
176
- $ etlplus transform extracted.json \
176
+ $ etlplus transform --from extracted.json \
177
177
  --operations '{
178
178
  "filter": {"field": "age", "op": "gte", "value": 18},
179
179
  "select": ["name", "email", "age"]
180
- }' -o transformed.json
180
+ }' --to transformed.json
181
181
 
182
182
  # Step 3: Validate
183
183
  $ etlplus validate transformed.json \
@@ -0,0 +1,12 @@
1
+ # MANIFEST.in
2
+ # ETLPlus
3
+ #
4
+ # Copyright © 2026 Dagitali LLC. All rights reserved.
5
+ #
6
+ # Contains commands that allow lists of files to be discovered and manipulated.
7
+ #
8
+ # See:
9
+ # 1. https://setuptools.pypa.io/en/latest/userguide/miscellaneous.html
10
+
11
+ # Include Jinja template files in the etlplus package
12
+ recursive-include etlplus/templates *.j2
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: etlplus
3
- Version: 0.4.1
3
+ Version: 0.5.5
4
4
  Summary: A Swiss Army knife for simple ETL operations
5
5
  Home-page: https://github.com/Dagitali/ETLPlus
6
6
  Author: ETLPlus Team
@@ -61,13 +61,15 @@ package and command-line interface for data extraction, validation, transformati
61
61
  - [Quickstart](#quickstart)
62
62
  - [Usage](#usage)
63
63
  - [Command Line Interface](#command-line-interface)
64
+ - [Inspect Pipelines](#inspect-pipelines)
65
+ - [Render SQL DDL](#render-sql-ddl)
64
66
  - [Extract Data](#extract-data)
65
67
  - [Validate Data](#validate-data)
66
68
  - [Transform Data](#transform-data)
67
69
  - [Load Data](#load-data)
68
70
  - [Python API](#python-api)
69
71
  - [Complete ETL Pipeline Example](#complete-etl-pipeline-example)
70
- - [Environment Variables](#environment-variables)
72
+ - [Format Overrides](#format-overrides)
71
73
  - [Transformation Operations](#transformation-operations)
72
74
  - [Filter Operations](#filter-operations)
73
75
  - [Aggregation Functions](#aggregation-functions)
@@ -79,6 +81,8 @@ package and command-line interface for data extraction, validation, transformati
79
81
  - [Test Layers](#test-layers)
80
82
  - [Code Coverage](#code-coverage)
81
83
  - [Linting](#linting)
84
+ - [Updating Demo Snippets](#updating-demo-snippets)
85
+ - [Releasing to PyPI](#releasing-to-pypi)
82
86
  - [Links](#links)
83
87
  - [License](#license)
84
88
  - [Contributing](#contributing)
@@ -86,6 +90,14 @@ package and command-line interface for data extraction, validation, transformati
86
90
 
87
91
  ## Features
88
92
 
93
+ - **Check** data pipeline definitions before running them:
94
+ - Summarize jobs, sources, targets, and transforms
95
+ - Confirm configuration changes by printing focused sections on demand
96
+
97
+ - **Render** SQL DDL from shared table specs:
98
+ - Generate CREATE TABLE or view statements
99
+ - Swap templates or direct output to files for database migrations
100
+
89
101
  - **Extract** data from multiple sources:
90
102
  - Files (CSV, JSON, XML, YAML)
91
103
  - Databases (connection string support)
@@ -167,11 +179,49 @@ etlplus --help
167
179
  etlplus --version
168
180
  ```
169
181
 
182
+ #### Check Pipelines
183
+
184
+ Use `etlplus check` to explore pipeline YAML definitions without running them. The command can print
185
+ job names, summarize configured sources and targets, or drill into specific sections.
186
+
187
+ List jobs and show a pipeline summary:
188
+ ```bash
189
+ etlplus check --config examples/configs/pipeline.yml --jobs
190
+ etlplus check --config examples/configs/pipeline.yml --summary
191
+ ```
192
+
193
+ Show sources or transforms for troubleshooting:
194
+ ```bash
195
+ etlplus check --config examples/configs/pipeline.yml --sources
196
+ etlplus check --config examples/configs/pipeline.yml --transforms
197
+ ```
198
+
199
+ #### Render SQL DDL
200
+
201
+ Use `etlplus render` to turn table schema specs into ready-to-run SQL. Render from a pipeline config
202
+ or from a standalone schema file, and choose the built-in `ddl` or `view` templates (or provide your
203
+ own).
204
+
205
+ Render all tables defined in a pipeline:
206
+ ```bash
207
+ etlplus render --config examples/configs/pipeline.yml --template ddl
208
+ ```
209
+
210
+ Render a single table in that pipeline:
211
+ ```bash
212
+ etlplus render --config examples/configs/pipeline.yml --table customers --template view
213
+ ```
214
+
215
+ Render from a standalone table spec to a file:
216
+ ```bash
217
+ etlplus render --spec schemas/customer.yml --template view -o temp/customer_view.sql
218
+ ```
219
+
170
220
  #### Extract Data
171
221
 
172
- Note: For file sources, the format is inferred from the filename extension; the `--format` option is
173
- ignored. To treat passing `--format` as an error for file sources, either set
174
- `ETLPLUS_FORMAT_BEHAVIOR=error` or pass the CLI flag `--strict-format`.
222
+ Note: For file sources, the format is normally inferred from the filename extension. Use
223
+ `--source-format` to override inference when a file lacks an extension or when you want to force a
224
+ specific parser.
175
225
 
176
226
  Extract from JSON file:
177
227
  ```bash
@@ -212,6 +262,20 @@ etlplus validate examples/data/sample.json --rules '{"email": {"type": "string",
212
262
 
213
263
  #### Transform Data
214
264
 
265
+ When piping data through `etlplus transform`, use `--source-format` whenever the SOURCE argument is
266
+ `-` or a literal payload, mirroring the `etlplus extract` semantics. Use `--target-format` to
267
+ control the emitted format for stdout or other non-file outputs, just like `etlplus load`. File
268
+ paths continue to infer formats from their extensions. Use `--from` to override the inferred source
269
+ connector type and `--to` to override the inferred target connector type, matching the `etlplus
270
+ extract`/`etlplus load` behavior.
271
+
272
+ Transform file inputs while overriding connector types:
273
+ ```bash
274
+ etlplus transform --from file examples/data/sample.json \
275
+ --operations '{"select": ["name", "email"]}' \
276
+ --to file -o temp/selected_output.json
277
+ ```
278
+
215
279
  Filter and select fields:
216
280
  ```bash
217
281
  etlplus transform '[{"name": "John", "age": 30}, {"name": "Jane", "age": 25}]' \
@@ -235,19 +299,24 @@ etlplus transform examples/data/sample.json --operations '{"map": {"name": "new_
235
299
 
236
300
  #### Load Data
237
301
 
302
+ `etlplus load` consumes JSON from stdin; provide only the target argument plus optional flags.
303
+
238
304
  Load to JSON file:
239
305
  ```bash
240
- etlplus load '{"name": "John", "age": 30}' file temp/sample_output.json
306
+ etlplus extract file examples/data/sample.json \
307
+ | etlplus load --to file temp/sample_output.json
241
308
  ```
242
309
 
243
310
  Load to CSV file:
244
311
  ```bash
245
- etlplus load '[{"name": "John", "age": 30}]' file temp/sample_output.csv
312
+ etlplus extract file examples/data/sample.csv \
313
+ | etlplus load --to file temp/sample_output.csv
246
314
  ```
247
315
 
248
316
  Load to REST API:
249
317
  ```bash
250
- etlplus load examples/data/sample.json api https://api.example.com/endpoint
318
+ cat examples/data/sample.json \
319
+ | etlplus load --to api https://api.example.com/endpoint
251
320
  ```
252
321
 
253
322
  ### Python API
@@ -285,6 +354,19 @@ For YAML-driven pipelines executed end-to-end (extract → validate → transfor
285
354
  - Authoring: [`docs/pipeline-guide.md`](docs/pipeline-guide.md)
286
355
  - Runner API and internals: [`docs/run-module.md`](docs/run-module.md)
287
356
 
357
+ CLI quick reference for pipelines:
358
+
359
+ ```bash
360
+ # List jobs or show a pipeline summary
361
+ etlplus check --config examples/configs/pipeline.yml --jobs
362
+ etlplus check --config examples/configs/pipeline.yml --summary
363
+
364
+ # Run a job
365
+ etlplus run --config examples/configs/pipeline.yml --job file_to_file_customers
366
+
367
+ # Deprecated shim (will be removed): etlplus pipeline
368
+ ```
369
+
288
370
  ### Complete ETL Pipeline Example
289
371
 
290
372
  ```bash
@@ -301,41 +383,28 @@ etlplus validate temp/sample_transformed.json \
301
383
  --rules '{"name": {"type": "string", "required": true}, "email": {"type": "string", "required": true}}'
302
384
 
303
385
  # 4. Load to CSV
304
- etlplus load temp/sample_transformed.json file temp/sample_output.csv
386
+ cat temp/sample_transformed.json \
387
+ | etlplus load --to temp/sample_output.csv
305
388
  ```
306
389
 
307
- ### Environment Variables
308
-
309
- ETLPlus honors a small number of environment toggles to refine CLI behavior:
390
+ ### Format Overrides
310
391
 
311
- - `ETLPLUS_FORMAT_BEHAVIOR`: controls what happens when `--format` is provided for
312
- file sources or targets (extract/load) where the format is inferred from the
313
- filename extension.
314
- - `error|fail|strict`: treat as error (non-zero exit)
315
- - `warn` (default): print a warning to stderr
316
- - `ignore|silent`: no message
317
- - Precedence: the CLI flag `--strict-format` overrides the environment.
392
+ `--source-format` and `--target-format` override whichever format would normally be inferred from a
393
+ file extension. This is useful when an input lacks an extension (for example, `records.txt` that
394
+ actually contains CSV) or when you intentionally want to treat a file as another format.
318
395
 
319
396
  Examples (zsh):
320
397
 
321
398
  ```zsh
322
- # Warn (default)
323
- etlplus extract file data.csv --format csv
324
- etlplus load data.json file out.csv --format csv
325
-
326
- # Enforce error via environment
327
- ETLPLUS_FORMAT_BEHAVIOR=error \
328
- etlplus extract file data.csv --format csv
329
- ETLPLUS_FORMAT_BEHAVIOR=error \
330
- etlplus load data.json file out.csv --format csv
331
-
332
- # Equivalent strict behavior via flag (overrides environment)
333
- etlplus extract file data.csv --format csv --strict-format
334
- etlplus load data.json file out.csv --format csv --strict-format
335
-
336
- # Recommended: rely on extension, no --format needed for files
337
- etlplus extract file data.csv
338
- etlplus load data.json file out.csv
399
+ # Force CSV parsing for an extension-less file
400
+ etlplus extract --from file data.txt --source-format csv
401
+
402
+ # Write CSV to a file without the .csv suffix
403
+ etlplus load --to file output.bin --target-format csv < data.json
404
+
405
+ # Leave the flags off when extensions already match the desired format
406
+ etlplus extract --from file data.csv
407
+ etlplus load --to file data.json < data.json
339
408
  ```
340
409
 
341
410
  ## Transformation Operations
@@ -19,13 +19,15 @@ package and command-line interface for data extraction, validation, transformati
19
19
  - [Quickstart](#quickstart)
20
20
  - [Usage](#usage)
21
21
  - [Command Line Interface](#command-line-interface)
22
+ - [Inspect Pipelines](#inspect-pipelines)
23
+ - [Render SQL DDL](#render-sql-ddl)
22
24
  - [Extract Data](#extract-data)
23
25
  - [Validate Data](#validate-data)
24
26
  - [Transform Data](#transform-data)
25
27
  - [Load Data](#load-data)
26
28
  - [Python API](#python-api)
27
29
  - [Complete ETL Pipeline Example](#complete-etl-pipeline-example)
28
- - [Environment Variables](#environment-variables)
30
+ - [Format Overrides](#format-overrides)
29
31
  - [Transformation Operations](#transformation-operations)
30
32
  - [Filter Operations](#filter-operations)
31
33
  - [Aggregation Functions](#aggregation-functions)
@@ -37,6 +39,8 @@ package and command-line interface for data extraction, validation, transformati
37
39
  - [Test Layers](#test-layers)
38
40
  - [Code Coverage](#code-coverage)
39
41
  - [Linting](#linting)
42
+ - [Updating Demo Snippets](#updating-demo-snippets)
43
+ - [Releasing to PyPI](#releasing-to-pypi)
40
44
  - [Links](#links)
41
45
  - [License](#license)
42
46
  - [Contributing](#contributing)
@@ -44,6 +48,14 @@ package and command-line interface for data extraction, validation, transformati
44
48
 
45
49
  ## Features
46
50
 
51
+ - **Check** data pipeline definitions before running them:
52
+ - Summarize jobs, sources, targets, and transforms
53
+ - Confirm configuration changes by printing focused sections on demand
54
+
55
+ - **Render** SQL DDL from shared table specs:
56
+ - Generate CREATE TABLE or view statements
57
+ - Swap templates or direct output to files for database migrations
58
+
47
59
  - **Extract** data from multiple sources:
48
60
  - Files (CSV, JSON, XML, YAML)
49
61
  - Databases (connection string support)
@@ -125,11 +137,49 @@ etlplus --help
125
137
  etlplus --version
126
138
  ```
127
139
 
140
+ #### Check Pipelines
141
+
142
+ Use `etlplus check` to explore pipeline YAML definitions without running them. The command can print
143
+ job names, summarize configured sources and targets, or drill into specific sections.
144
+
145
+ List jobs and show a pipeline summary:
146
+ ```bash
147
+ etlplus check --config examples/configs/pipeline.yml --jobs
148
+ etlplus check --config examples/configs/pipeline.yml --summary
149
+ ```
150
+
151
+ Show sources or transforms for troubleshooting:
152
+ ```bash
153
+ etlplus check --config examples/configs/pipeline.yml --sources
154
+ etlplus check --config examples/configs/pipeline.yml --transforms
155
+ ```
156
+
157
+ #### Render SQL DDL
158
+
159
+ Use `etlplus render` to turn table schema specs into ready-to-run SQL. Render from a pipeline config
160
+ or from a standalone schema file, and choose the built-in `ddl` or `view` templates (or provide your
161
+ own).
162
+
163
+ Render all tables defined in a pipeline:
164
+ ```bash
165
+ etlplus render --config examples/configs/pipeline.yml --template ddl
166
+ ```
167
+
168
+ Render a single table in that pipeline:
169
+ ```bash
170
+ etlplus render --config examples/configs/pipeline.yml --table customers --template view
171
+ ```
172
+
173
+ Render from a standalone table spec to a file:
174
+ ```bash
175
+ etlplus render --spec schemas/customer.yml --template view -o temp/customer_view.sql
176
+ ```
177
+
128
178
  #### Extract Data
129
179
 
130
- Note: For file sources, the format is inferred from the filename extension; the `--format` option is
131
- ignored. To treat passing `--format` as an error for file sources, either set
132
- `ETLPLUS_FORMAT_BEHAVIOR=error` or pass the CLI flag `--strict-format`.
180
+ Note: For file sources, the format is normally inferred from the filename extension. Use
181
+ `--source-format` to override inference when a file lacks an extension or when you want to force a
182
+ specific parser.
133
183
 
134
184
  Extract from JSON file:
135
185
  ```bash
@@ -170,6 +220,20 @@ etlplus validate examples/data/sample.json --rules '{"email": {"type": "string",
170
220
 
171
221
  #### Transform Data
172
222
 
223
+ When piping data through `etlplus transform`, use `--source-format` whenever the SOURCE argument is
224
+ `-` or a literal payload, mirroring the `etlplus extract` semantics. Use `--target-format` to
225
+ control the emitted format for stdout or other non-file outputs, just like `etlplus load`. File
226
+ paths continue to infer formats from their extensions. Use `--from` to override the inferred source
227
+ connector type and `--to` to override the inferred target connector type, matching the `etlplus
228
+ extract`/`etlplus load` behavior.
229
+
230
+ Transform file inputs while overriding connector types:
231
+ ```bash
232
+ etlplus transform --from file examples/data/sample.json \
233
+ --operations '{"select": ["name", "email"]}' \
234
+ --to file -o temp/selected_output.json
235
+ ```
236
+
173
237
  Filter and select fields:
174
238
  ```bash
175
239
  etlplus transform '[{"name": "John", "age": 30}, {"name": "Jane", "age": 25}]' \
@@ -193,19 +257,24 @@ etlplus transform examples/data/sample.json --operations '{"map": {"name": "new_
193
257
 
194
258
  #### Load Data
195
259
 
260
+ `etlplus load` consumes JSON from stdin; provide only the target argument plus optional flags.
261
+
196
262
  Load to JSON file:
197
263
  ```bash
198
- etlplus load '{"name": "John", "age": 30}' file temp/sample_output.json
264
+ etlplus extract file examples/data/sample.json \
265
+ | etlplus load --to file temp/sample_output.json
199
266
  ```
200
267
 
201
268
  Load to CSV file:
202
269
  ```bash
203
- etlplus load '[{"name": "John", "age": 30}]' file temp/sample_output.csv
270
+ etlplus extract file examples/data/sample.csv \
271
+ | etlplus load --to file temp/sample_output.csv
204
272
  ```
205
273
 
206
274
  Load to REST API:
207
275
  ```bash
208
- etlplus load examples/data/sample.json api https://api.example.com/endpoint
276
+ cat examples/data/sample.json \
277
+ | etlplus load --to api https://api.example.com/endpoint
209
278
  ```
210
279
 
211
280
  ### Python API
@@ -243,6 +312,19 @@ For YAML-driven pipelines executed end-to-end (extract → validate → transfor
243
312
  - Authoring: [`docs/pipeline-guide.md`](docs/pipeline-guide.md)
244
313
  - Runner API and internals: [`docs/run-module.md`](docs/run-module.md)
245
314
 
315
+ CLI quick reference for pipelines:
316
+
317
+ ```bash
318
+ # List jobs or show a pipeline summary
319
+ etlplus check --config examples/configs/pipeline.yml --jobs
320
+ etlplus check --config examples/configs/pipeline.yml --summary
321
+
322
+ # Run a job
323
+ etlplus run --config examples/configs/pipeline.yml --job file_to_file_customers
324
+
325
+ # Deprecated shim (will be removed): etlplus pipeline
326
+ ```
327
+
246
328
  ### Complete ETL Pipeline Example
247
329
 
248
330
  ```bash
@@ -259,41 +341,28 @@ etlplus validate temp/sample_transformed.json \
259
341
  --rules '{"name": {"type": "string", "required": true}, "email": {"type": "string", "required": true}}'
260
342
 
261
343
  # 4. Load to CSV
262
- etlplus load temp/sample_transformed.json file temp/sample_output.csv
344
+ cat temp/sample_transformed.json \
345
+ | etlplus load --to temp/sample_output.csv
263
346
  ```
264
347
 
265
- ### Environment Variables
266
-
267
- ETLPlus honors a small number of environment toggles to refine CLI behavior:
348
+ ### Format Overrides
268
349
 
269
- - `ETLPLUS_FORMAT_BEHAVIOR`: controls what happens when `--format` is provided for
270
- file sources or targets (extract/load) where the format is inferred from the
271
- filename extension.
272
- - `error|fail|strict`: treat as error (non-zero exit)
273
- - `warn` (default): print a warning to stderr
274
- - `ignore|silent`: no message
275
- - Precedence: the CLI flag `--strict-format` overrides the environment.
350
+ `--source-format` and `--target-format` override whichever format would normally be inferred from a
351
+ file extension. This is useful when an input lacks an extension (for example, `records.txt` that
352
+ actually contains CSV) or when you intentionally want to treat a file as another format.
276
353
 
277
354
  Examples (zsh):
278
355
 
279
356
  ```zsh
280
- # Warn (default)
281
- etlplus extract file data.csv --format csv
282
- etlplus load data.json file out.csv --format csv
283
-
284
- # Enforce error via environment
285
- ETLPLUS_FORMAT_BEHAVIOR=error \
286
- etlplus extract file data.csv --format csv
287
- ETLPLUS_FORMAT_BEHAVIOR=error \
288
- etlplus load data.json file out.csv --format csv
289
-
290
- # Equivalent strict behavior via flag (overrides environment)
291
- etlplus extract file data.csv --format csv --strict-format
292
- etlplus load data.json file out.csv --format csv --strict-format
293
-
294
- # Recommended: rely on extension, no --format needed for files
295
- etlplus extract file data.csv
296
- etlplus load data.json file out.csv
357
+ # Force CSV parsing for an extension-less file
358
+ etlplus extract --from file data.txt --source-format csv
359
+
360
+ # Write CSV to a file without the .csv suffix
361
+ etlplus load --to file output.bin --target-format csv < data.json
362
+
363
+ # Leave the flags off when extensions already match the desired format
364
+ etlplus extract --from file data.csv
365
+ etlplus load --to file data.json < data.json
297
366
  ```
298
367
 
299
368
  ## Transformation Operations
@@ -245,13 +245,13 @@ job. Those values are merged into the client configuration and forwarded to
245
245
  `EndpointClient.paginate(..., rate_limit_overrides=...)`, ensuring only that job’s paginator is sped
246
246
  up or slowed down.
247
247
 
248
- Environment / format inference note:
248
+ Format override note:
249
249
 
250
- When extracting from file sources, ETLPlus infers the format from the filename extension (e.g.
251
- `.csv`, `.json`, `.xml`, `.yaml`). Passing an explicit CLI `--format` for files is ignored unless
252
- strict mode is enabled. To enforce an error if contributors still specify a redundant `--format`
253
- flag in scripts or CI runners, set the environment variable `ETLPLUS_FORMAT_BEHAVIOR=error` or use
254
- the CLI flag `--strict-format`. This keeps pipelines cleaner by relying on naming conventions.
250
+ When extracting from file sources, ETLPlus still infers the format from the filename extension
251
+ (`.csv`, `.json`, `.xml`, `.yaml`). However, `--source-format` and `--target-format` now override
252
+ that inference for both Typer- and argparse-based CLIs. This means you can safely point at files
253
+ without/extensions or with misleading suffixes and force the desired parser or writer without having
254
+ to rename the file first.
255
255
 
256
256
  Note: When using a service + endpoint in a source, URL composition (including `base_path`) is
257
257
  handled automatically. See “Runner behavior with base_path (sources and targets)” in the APIs
@@ -378,31 +378,32 @@ jobs:
378
378
  Once you have a pipeline YAML, you can run jobs either from the
379
379
  command line or directly from Python.
380
380
 
381
- ### CLI: `etlplus pipeline` and `etlplus run`
381
+ ### CLI: `etlplus check` (inspect) and `etlplus run` (execute)
382
382
 
383
- List jobs defined in a pipeline file:
383
+ List jobs or show a summary from a pipeline file:
384
384
 
385
385
  ```bash
386
- etlplus pipeline --config examples/configs/pipeline.yml --list
386
+ etlplus check --config examples/configs/pipeline.yml --jobs
387
+ etlplus check --config examples/configs/pipeline.yml --summary
387
388
  ```
388
389
 
389
390
  Run a specific job end-to-end (extract → validate → transform → load):
390
391
 
391
392
  ```bash
392
- etlplus pipeline --config examples/configs/pipeline.yml --run file_to_file_customers
393
-
394
- # Equivalent, using the dedicated run command
395
393
  etlplus run --config examples/configs/pipeline.yml --job file_to_file_customers
396
394
  ```
397
395
 
398
396
  Notes:
399
397
 
400
- - Both commands read the same YAML schema described in this guide.
398
+ - These commands read the same YAML schema described in this guide.
401
399
  - Environment-variable substitution (e.g. `${GITHUB_TOKEN}`) is applied the same way as when loading
402
400
  configs via the Python API.
403
401
  - For more details on the orchestration implementation, see
404
402
  [Runner internals: etlplus.run](run-module.md).
405
403
 
404
+ Deprecated: `etlplus pipeline` is still available as a shim but will be removed in a future release;
405
+ prefer `check` and `run`.
406
+
406
407
  ### Python: `etlplus.run.run`
407
408
 
408
409
  To trigger a job programmatically, use the high-level runner function exposed by the package: