etlplus 0.4.0__tar.gz → 0.8.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (154) hide show
  1. {etlplus-0.4.0 → etlplus-0.8.3}/.pre-commit-config.yaml +5 -1
  2. {etlplus-0.4.0 → etlplus-0.8.3}/DEMO.md +5 -5
  3. etlplus-0.8.3/MANIFEST.in +12 -0
  4. {etlplus-0.4.0/etlplus.egg-info → etlplus-0.8.3}/PKG-INFO +109 -36
  5. {etlplus-0.4.0 → etlplus-0.8.3}/README.md +105 -35
  6. etlplus-0.8.3/docs/README.md +18 -0
  7. {etlplus-0.4.0 → etlplus-0.8.3}/docs/pipeline-guide.md +33 -13
  8. {etlplus-0.4.0 → etlplus-0.8.3}/etlplus/__main__.py +1 -2
  9. {etlplus-0.4.0 → etlplus-0.8.3}/etlplus/api/README.md +24 -26
  10. etlplus-0.8.3/etlplus/cli/__init__.py +15 -0
  11. etlplus-0.8.3/etlplus/cli/commands.py +870 -0
  12. etlplus-0.8.3/etlplus/cli/constants.py +65 -0
  13. etlplus-0.8.3/etlplus/cli/handlers.py +657 -0
  14. etlplus-0.8.3/etlplus/cli/io.py +320 -0
  15. etlplus-0.8.3/etlplus/cli/main.py +213 -0
  16. etlplus-0.8.3/etlplus/cli/options.py +49 -0
  17. etlplus-0.8.3/etlplus/cli/state.py +335 -0
  18. etlplus-0.8.3/etlplus/cli/types.py +33 -0
  19. {etlplus-0.4.0 → etlplus-0.8.3}/etlplus/config/pipeline.py +11 -0
  20. etlplus-0.8.3/etlplus/database/__init__.py +44 -0
  21. etlplus-0.8.3/etlplus/database/ddl.py +319 -0
  22. etlplus-0.8.3/etlplus/database/engine.py +151 -0
  23. etlplus-0.8.3/etlplus/database/orm.py +354 -0
  24. etlplus-0.8.3/etlplus/database/schema.py +274 -0
  25. etlplus-0.8.3/etlplus/database/types.py +33 -0
  26. {etlplus-0.4.0 → etlplus-0.8.3}/etlplus/run.py +2 -4
  27. etlplus-0.8.3/etlplus/templates/__init__.py +5 -0
  28. etlplus-0.8.3/etlplus/templates/ddl.sql.j2 +128 -0
  29. etlplus-0.8.3/etlplus/templates/view.sql.j2 +69 -0
  30. {etlplus-0.4.0 → etlplus-0.8.3}/etlplus/types.py +5 -0
  31. {etlplus-0.4.0 → etlplus-0.8.3}/etlplus/utils.py +0 -31
  32. {etlplus-0.4.0 → etlplus-0.8.3/etlplus.egg-info}/PKG-INFO +109 -36
  33. {etlplus-0.4.0 → etlplus-0.8.3}/etlplus.egg-info/SOURCES.txt +29 -3
  34. {etlplus-0.4.0 → etlplus-0.8.3}/etlplus.egg-info/requires.txt +3 -0
  35. {etlplus-0.4.0 → etlplus-0.8.3}/examples/README.md +4 -4
  36. etlplus-0.8.3/examples/configs/ddl_spec.yml +67 -0
  37. {etlplus-0.4.0 → etlplus-0.8.3}/pyproject.toml +4 -1
  38. {etlplus-0.4.0 → etlplus-0.8.3}/setup.py +7 -0
  39. etlplus-0.8.3/tests/integration/test_i_cli.py +172 -0
  40. {etlplus-0.4.0 → etlplus-0.8.3}/tests/integration/test_i_pagination_strategy.py +39 -34
  41. {etlplus-0.4.0 → etlplus-0.8.3}/tests/integration/test_i_pipeline_smoke.py +5 -4
  42. {etlplus-0.4.0 → etlplus-0.8.3}/tests/unit/api/test_u_pagination_client.py +1 -1
  43. {etlplus-0.4.0 → etlplus-0.8.3}/tests/unit/api/test_u_paginator.py +1 -1
  44. etlplus-0.8.3/tests/unit/cli/conftest.py +183 -0
  45. etlplus-0.8.3/tests/unit/cli/test_u_cli_handlers.py +797 -0
  46. etlplus-0.8.3/tests/unit/cli/test_u_cli_main.py +182 -0
  47. etlplus-0.8.3/tests/unit/cli/test_u_cli_state.py +343 -0
  48. {etlplus-0.4.0 → etlplus-0.8.3}/tests/unit/config/test_u_connector.py +1 -1
  49. {etlplus-0.4.0 → etlplus-0.8.3}/tests/unit/config/test_u_pipeline.py +31 -1
  50. etlplus-0.8.3/tests/unit/database/test_u_database_ddl.py +265 -0
  51. etlplus-0.8.3/tests/unit/database/test_u_database_engine.py +198 -0
  52. etlplus-0.8.3/tests/unit/database/test_u_database_orm.py +308 -0
  53. etlplus-0.8.3/tests/unit/database/test_u_database_schema.py +243 -0
  54. {etlplus-0.4.0 → etlplus-0.8.3}/tests/unit/test_u_extract.py +1 -1
  55. {etlplus-0.4.0 → etlplus-0.8.3}/tests/unit/test_u_load.py +1 -1
  56. {etlplus-0.4.0 → etlplus-0.8.3}/tests/unit/test_u_utils.py +0 -10
  57. etlplus-0.4.0/etlplus/cli.py +0 -1186
  58. etlplus-0.4.0/tests/integration/test_i_cli.py +0 -244
  59. etlplus-0.4.0/tests/unit/test_u_cli.py +0 -545
  60. etlplus-0.4.0/tools/run_pipeline.py +0 -561
  61. {etlplus-0.4.0 → etlplus-0.8.3}/.coveragerc +0 -0
  62. {etlplus-0.4.0 → etlplus-0.8.3}/.editorconfig +0 -0
  63. {etlplus-0.4.0 → etlplus-0.8.3}/.gitattributes +0 -0
  64. {etlplus-0.4.0 → etlplus-0.8.3}/.github/actions/python-bootstrap/action.yml +0 -0
  65. {etlplus-0.4.0 → etlplus-0.8.3}/.github/workflows/ci.yml +0 -0
  66. {etlplus-0.4.0 → etlplus-0.8.3}/.gitignore +0 -0
  67. {etlplus-0.4.0 → etlplus-0.8.3}/.ruff.toml +0 -0
  68. {etlplus-0.4.0 → etlplus-0.8.3}/CODE_OF_CONDUCT.md +0 -0
  69. {etlplus-0.4.0 → etlplus-0.8.3}/CONTRIBUTING.md +0 -0
  70. {etlplus-0.4.0 → etlplus-0.8.3}/LICENSE +0 -0
  71. {etlplus-0.4.0 → etlplus-0.8.3}/Makefile +0 -0
  72. {etlplus-0.4.0 → etlplus-0.8.3}/REFERENCES.md +0 -0
  73. {etlplus-0.4.0 → etlplus-0.8.3}/docs/snippets/installation_version.md +0 -0
  74. {etlplus-0.4.0 → etlplus-0.8.3}/etlplus/__init__.py +0 -0
  75. {etlplus-0.4.0 → etlplus-0.8.3}/etlplus/__version__.py +0 -0
  76. {etlplus-0.4.0 → etlplus-0.8.3}/etlplus/api/__init__.py +0 -0
  77. {etlplus-0.4.0 → etlplus-0.8.3}/etlplus/api/auth.py +0 -0
  78. {etlplus-0.4.0 → etlplus-0.8.3}/etlplus/api/config.py +0 -0
  79. {etlplus-0.4.0 → etlplus-0.8.3}/etlplus/api/endpoint_client.py +0 -0
  80. {etlplus-0.4.0 → etlplus-0.8.3}/etlplus/api/errors.py +0 -0
  81. {etlplus-0.4.0 → etlplus-0.8.3}/etlplus/api/pagination/__init__.py +0 -0
  82. {etlplus-0.4.0 → etlplus-0.8.3}/etlplus/api/pagination/client.py +0 -0
  83. {etlplus-0.4.0 → etlplus-0.8.3}/etlplus/api/pagination/config.py +0 -0
  84. {etlplus-0.4.0 → etlplus-0.8.3}/etlplus/api/pagination/paginator.py +0 -0
  85. {etlplus-0.4.0 → etlplus-0.8.3}/etlplus/api/rate_limiting/__init__.py +0 -0
  86. {etlplus-0.4.0 → etlplus-0.8.3}/etlplus/api/rate_limiting/config.py +0 -0
  87. {etlplus-0.4.0 → etlplus-0.8.3}/etlplus/api/rate_limiting/rate_limiter.py +0 -0
  88. {etlplus-0.4.0 → etlplus-0.8.3}/etlplus/api/request_manager.py +0 -0
  89. {etlplus-0.4.0 → etlplus-0.8.3}/etlplus/api/retry_manager.py +0 -0
  90. {etlplus-0.4.0 → etlplus-0.8.3}/etlplus/api/transport.py +0 -0
  91. {etlplus-0.4.0 → etlplus-0.8.3}/etlplus/api/types.py +0 -0
  92. {etlplus-0.4.0 → etlplus-0.8.3}/etlplus/config/__init__.py +0 -0
  93. {etlplus-0.4.0 → etlplus-0.8.3}/etlplus/config/connector.py +0 -0
  94. {etlplus-0.4.0 → etlplus-0.8.3}/etlplus/config/jobs.py +0 -0
  95. {etlplus-0.4.0 → etlplus-0.8.3}/etlplus/config/profile.py +0 -0
  96. {etlplus-0.4.0 → etlplus-0.8.3}/etlplus/config/types.py +0 -0
  97. {etlplus-0.4.0 → etlplus-0.8.3}/etlplus/config/utils.py +0 -0
  98. {etlplus-0.4.0 → etlplus-0.8.3}/etlplus/enums.py +0 -0
  99. {etlplus-0.4.0 → etlplus-0.8.3}/etlplus/extract.py +0 -0
  100. {etlplus-0.4.0 → etlplus-0.8.3}/etlplus/file.py +0 -0
  101. {etlplus-0.4.0 → etlplus-0.8.3}/etlplus/load.py +0 -0
  102. {etlplus-0.4.0 → etlplus-0.8.3}/etlplus/mixins.py +0 -0
  103. {etlplus-0.4.0 → etlplus-0.8.3}/etlplus/py.typed +0 -0
  104. {etlplus-0.4.0 → etlplus-0.8.3}/etlplus/run_helpers.py +0 -0
  105. {etlplus-0.4.0 → etlplus-0.8.3}/etlplus/transform.py +0 -0
  106. {etlplus-0.4.0 → etlplus-0.8.3}/etlplus/validate.py +0 -0
  107. {etlplus-0.4.0 → etlplus-0.8.3}/etlplus/validation/__init__.py +0 -0
  108. {etlplus-0.4.0 → etlplus-0.8.3}/etlplus/validation/utils.py +0 -0
  109. {etlplus-0.4.0 → etlplus-0.8.3}/etlplus.egg-info/dependency_links.txt +0 -0
  110. {etlplus-0.4.0 → etlplus-0.8.3}/etlplus.egg-info/entry_points.txt +0 -0
  111. {etlplus-0.4.0 → etlplus-0.8.3}/etlplus.egg-info/top_level.txt +0 -0
  112. {etlplus-0.4.0 → etlplus-0.8.3}/examples/configs/pipeline.yml +0 -0
  113. {etlplus-0.4.0 → etlplus-0.8.3}/examples/data/sample.csv +0 -0
  114. {etlplus-0.4.0 → etlplus-0.8.3}/examples/data/sample.json +0 -0
  115. {etlplus-0.4.0 → etlplus-0.8.3}/examples/data/sample.xml +0 -0
  116. {etlplus-0.4.0 → etlplus-0.8.3}/examples/data/sample.xsd +0 -0
  117. {etlplus-0.4.0 → etlplus-0.8.3}/examples/data/sample.yaml +0 -0
  118. {etlplus-0.4.0 → etlplus-0.8.3}/examples/quickstart_python.py +0 -0
  119. {etlplus-0.4.0 → etlplus-0.8.3}/pytest.ini +0 -0
  120. {etlplus-0.4.0 → etlplus-0.8.3}/setup.cfg +0 -0
  121. {etlplus-0.4.0 → etlplus-0.8.3}/tests/__init__.py +0 -0
  122. {etlplus-0.4.0 → etlplus-0.8.3}/tests/conftest.py +0 -0
  123. {etlplus-0.4.0 → etlplus-0.8.3}/tests/integration/conftest.py +0 -0
  124. {etlplus-0.4.0 → etlplus-0.8.3}/tests/integration/test_i_examples_data_parity.py +0 -0
  125. {etlplus-0.4.0 → etlplus-0.8.3}/tests/integration/test_i_pipeline_yaml_load.py +0 -0
  126. {etlplus-0.4.0 → etlplus-0.8.3}/tests/integration/test_i_run.py +0 -0
  127. {etlplus-0.4.0 → etlplus-0.8.3}/tests/integration/test_i_run_profile_pagination_defaults.py +0 -0
  128. {etlplus-0.4.0 → etlplus-0.8.3}/tests/integration/test_i_run_profile_rate_limit_defaults.py +0 -0
  129. {etlplus-0.4.0 → etlplus-0.8.3}/tests/unit/api/conftest.py +0 -0
  130. {etlplus-0.4.0 → etlplus-0.8.3}/tests/unit/api/test_u_auth.py +0 -0
  131. {etlplus-0.4.0 → etlplus-0.8.3}/tests/unit/api/test_u_config.py +0 -0
  132. {etlplus-0.4.0 → etlplus-0.8.3}/tests/unit/api/test_u_endpoint_client.py +0 -0
  133. {etlplus-0.4.0 → etlplus-0.8.3}/tests/unit/api/test_u_mocks.py +0 -0
  134. {etlplus-0.4.0 → etlplus-0.8.3}/tests/unit/api/test_u_pagination_config.py +0 -0
  135. {etlplus-0.4.0 → etlplus-0.8.3}/tests/unit/api/test_u_rate_limit_config.py +0 -0
  136. {etlplus-0.4.0 → etlplus-0.8.3}/tests/unit/api/test_u_rate_limiter.py +0 -0
  137. {etlplus-0.4.0 → etlplus-0.8.3}/tests/unit/api/test_u_request_manager.py +0 -0
  138. {etlplus-0.4.0 → etlplus-0.8.3}/tests/unit/api/test_u_retry_manager.py +0 -0
  139. {etlplus-0.4.0 → etlplus-0.8.3}/tests/unit/api/test_u_transport.py +0 -0
  140. {etlplus-0.4.0 → etlplus-0.8.3}/tests/unit/api/test_u_types.py +0 -0
  141. {etlplus-0.4.0 → etlplus-0.8.3}/tests/unit/config/test_u_config_utils.py +0 -0
  142. {etlplus-0.4.0 → etlplus-0.8.3}/tests/unit/config/test_u_jobs.py +0 -0
  143. {etlplus-0.4.0 → etlplus-0.8.3}/tests/unit/conftest.py +0 -0
  144. {etlplus-0.4.0 → etlplus-0.8.3}/tests/unit/test_u_enums.py +0 -0
  145. {etlplus-0.4.0 → etlplus-0.8.3}/tests/unit/test_u_file.py +0 -0
  146. {etlplus-0.4.0 → etlplus-0.8.3}/tests/unit/test_u_main.py +0 -0
  147. {etlplus-0.4.0 → etlplus-0.8.3}/tests/unit/test_u_mixins.py +0 -0
  148. {etlplus-0.4.0 → etlplus-0.8.3}/tests/unit/test_u_run.py +0 -0
  149. {etlplus-0.4.0 → etlplus-0.8.3}/tests/unit/test_u_run_helpers.py +0 -0
  150. {etlplus-0.4.0 → etlplus-0.8.3}/tests/unit/test_u_transform.py +0 -0
  151. {etlplus-0.4.0 → etlplus-0.8.3}/tests/unit/test_u_validate.py +0 -0
  152. {etlplus-0.4.0 → etlplus-0.8.3}/tests/unit/test_u_version.py +0 -0
  153. {etlplus-0.4.0 → etlplus-0.8.3}/tests/unit/validation/test_u_validation_utils.py +0 -0
  154. {etlplus-0.4.0 → etlplus-0.8.3}/tools/update_demo_snippets.py +0 -0
@@ -159,7 +159,11 @@ repos:
159
159
  rev: v1.19.0
160
160
  hooks:
161
161
  - id: mypy
162
- args: [--ignore-missing-imports, --install-types, --non-interactive]
162
+ args:
163
+ - --cache-dir=.mypy_cache/pre-commit
164
+ - --ignore-missing-imports
165
+ - --install-types
166
+ - --non-interactive
163
167
 
164
168
  - repo: https://github.com/pycqa/flake8
165
169
  rev: 7.3.0
@@ -58,7 +58,7 @@ John Doe,30,New York
58
58
  Jane Smith,25,Los Angeles
59
59
  CSVDATA
60
60
 
61
- $ etlplus extract file users.csv --format csv
61
+ $ etlplus extract users.csv
62
62
  [
63
63
  {
64
64
  "name": "John Doe",
@@ -151,7 +151,7 @@ $ etlplus load '{"name": "John", "status": "active"}' file output.json
151
151
  $ etlplus load '[
152
152
  {"name": "John", "email": "john@example.com"},
153
153
  {"name": "Jane", "email": "jane@example.com"}
154
- ]' file users.csv --format csv
154
+ ]' --to users.csv
155
155
  {
156
156
  "status": "success",
157
157
  "message": "Data loaded to users.csv",
@@ -170,14 +170,14 @@ This example shows a complete ETL workflow:
170
170
 
171
171
  ```bash
172
172
  # Step 1: Extract
173
- $ etlplus extract file raw_data.csv --format csv -o extracted.json
173
+ $ etlplus extract raw_data.csv > extracted.json
174
174
 
175
175
  # Step 2: Transform
176
- $ etlplus transform extracted.json \
176
+ $ etlplus transform --from extracted.json \
177
177
  --operations '{
178
178
  "filter": {"field": "age", "op": "gte", "value": 18},
179
179
  "select": ["name", "email", "age"]
180
- }' -o transformed.json
180
+ }' --to transformed.json
181
181
 
182
182
  # Step 3: Validate
183
183
  $ etlplus validate transformed.json \
@@ -0,0 +1,12 @@
1
+ # MANIFEST.in
2
+ # ETLPlus
3
+ #
4
+ # Copyright © 2026 Dagitali LLC. All rights reserved.
5
+ #
6
+ # Contains commands that allow lists of files to be discovered and manipulated.
7
+ #
8
+ # See:
9
+ # 1. https://setuptools.pypa.io/en/latest/userguide/miscellaneous.html
10
+
11
+ # Include Jinja template files in the etlplus package
12
+ recursive-include etlplus/templates *.j2
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: etlplus
3
- Version: 0.4.0
3
+ Version: 0.8.3
4
4
  Summary: A Swiss Army knife for simple ETL operations
5
5
  Home-page: https://github.com/Dagitali/ETLPlus
6
6
  Author: ETLPlus Team
@@ -21,7 +21,10 @@ Requires-Dist: jinja2>=3.1.6
21
21
  Requires-Dist: pyodbc>=5.3.0
22
22
  Requires-Dist: python-dotenv>=1.2.1
23
23
  Requires-Dist: pandas>=2.3.3
24
+ Requires-Dist: pydantic>=2.12.5
25
+ Requires-Dist: PyYAML>=6.0.3
24
26
  Requires-Dist: requests>=2.32.5
27
+ Requires-Dist: SQLAlchemy>=2.0.45
25
28
  Requires-Dist: typer>=0.21.0
26
29
  Provides-Extra: dev
27
30
  Requires-Dist: black>=25.9.0; extra == "dev"
@@ -61,13 +64,15 @@ package and command-line interface for data extraction, validation, transformati
61
64
  - [Quickstart](#quickstart)
62
65
  - [Usage](#usage)
63
66
  - [Command Line Interface](#command-line-interface)
67
+ - [Check Pipelines](#check-pipelines)
68
+ - [Render SQL DDL](#render-sql-ddl)
64
69
  - [Extract Data](#extract-data)
65
70
  - [Validate Data](#validate-data)
66
71
  - [Transform Data](#transform-data)
67
72
  - [Load Data](#load-data)
68
73
  - [Python API](#python-api)
69
74
  - [Complete ETL Pipeline Example](#complete-etl-pipeline-example)
70
- - [Environment Variables](#environment-variables)
75
+ - [Format Overrides](#format-overrides)
71
76
  - [Transformation Operations](#transformation-operations)
72
77
  - [Filter Operations](#filter-operations)
73
78
  - [Aggregation Functions](#aggregation-functions)
@@ -79,6 +84,8 @@ package and command-line interface for data extraction, validation, transformati
79
84
  - [Test Layers](#test-layers)
80
85
  - [Code Coverage](#code-coverage)
81
86
  - [Linting](#linting)
87
+ - [Updating Demo Snippets](#updating-demo-snippets)
88
+ - [Releasing to PyPI](#releasing-to-pypi)
82
89
  - [Links](#links)
83
90
  - [License](#license)
84
91
  - [Contributing](#contributing)
@@ -86,6 +93,14 @@ package and command-line interface for data extraction, validation, transformati
86
93
 
87
94
  ## Features
88
95
 
96
+ - **Check** data pipeline definitions before running them:
97
+ - Summarize jobs, sources, targets, and transforms
98
+ - Confirm configuration changes by printing focused sections on demand
99
+
100
+ - **Render** SQL DDL from shared table specs:
101
+ - Generate CREATE TABLE or view statements
102
+ - Swap templates or direct output to files for database migrations
103
+
89
104
  - **Extract** data from multiple sources:
90
105
  - Files (CSV, JSON, XML, YAML)
91
106
  - Databases (connection string support)
@@ -167,11 +182,52 @@ etlplus --help
167
182
  etlplus --version
168
183
  ```
169
184
 
185
+ The CLI is implemented with Typer (Click-based). There is no argparse compatibility layer, so rely
186
+ on the documented commands/flags and run `etlplus <command> --help` for current options.
187
+
188
+ #### Check Pipelines
189
+
190
+ Use `etlplus check` to explore pipeline YAML definitions without running them. The command can print
191
+ job names, summarize configured sources and targets, or drill into specific sections.
192
+
193
+ List jobs and show a pipeline summary:
194
+ ```bash
195
+ etlplus check --config examples/configs/pipeline.yml --jobs
196
+ etlplus check --config examples/configs/pipeline.yml --summary
197
+ ```
198
+
199
+ Show sources or transforms for troubleshooting:
200
+ ```bash
201
+ etlplus check --config examples/configs/pipeline.yml --sources
202
+ etlplus check --config examples/configs/pipeline.yml --transforms
203
+ ```
204
+
205
+ #### Render SQL DDL
206
+
207
+ Use `etlplus render` to turn table schema specs into ready-to-run SQL. Render from a pipeline config
208
+ or from a standalone schema file, and choose the built-in `ddl` or `view` templates (or provide your
209
+ own).
210
+
211
+ Render all tables defined in a pipeline:
212
+ ```bash
213
+ etlplus render --config examples/configs/pipeline.yml --template ddl
214
+ ```
215
+
216
+ Render a single table in that pipeline:
217
+ ```bash
218
+ etlplus render --config examples/configs/pipeline.yml --table customers --template view
219
+ ```
220
+
221
+ Render from a standalone table spec to a file:
222
+ ```bash
223
+ etlplus render --spec schemas/customer.yml --template view -o temp/customer_view.sql
224
+ ```
225
+
170
226
  #### Extract Data
171
227
 
172
- Note: For file sources, the format is inferred from the filename extension; the `--format` option is
173
- ignored. To treat passing `--format` as an error for file sources, either set
174
- `ETLPLUS_FORMAT_BEHAVIOR=error` or pass the CLI flag `--strict-format`.
228
+ Note: For file sources, the format is normally inferred from the filename extension. Use
229
+ `--source-format` to override inference when a file lacks an extension or when you want to force a
230
+ specific parser.
175
231
 
176
232
  Extract from JSON file:
177
233
  ```bash
@@ -212,6 +268,20 @@ etlplus validate examples/data/sample.json --rules '{"email": {"type": "string",
212
268
 
213
269
  #### Transform Data
214
270
 
271
+ When piping data through `etlplus transform`, use `--source-format` whenever the SOURCE argument is
272
+ `-` or a literal payload, mirroring the `etlplus extract` semantics. Use `--target-format` to
273
+ control the emitted format for stdout or other non-file outputs, just like `etlplus load`. File
274
+ paths continue to infer formats from their extensions. Use `--from` to override the inferred source
275
+ connector type and `--to` to override the inferred target connector type, matching the `etlplus
276
+ extract`/`etlplus load` behavior.
277
+
278
+ Transform file inputs while overriding connector types:
279
+ ```bash
280
+ etlplus transform --from file examples/data/sample.json \
281
+ --operations '{"select": ["name", "email"]}' \
282
+ --to file -o temp/selected_output.json
283
+ ```
284
+
215
285
  Filter and select fields:
216
286
  ```bash
217
287
  etlplus transform '[{"name": "John", "age": 30}, {"name": "Jane", "age": 25}]' \
@@ -235,19 +305,24 @@ etlplus transform examples/data/sample.json --operations '{"map": {"name": "new_
235
305
 
236
306
  #### Load Data
237
307
 
308
+ `etlplus load` consumes JSON from stdin; provide only the target argument plus optional flags.
309
+
238
310
  Load to JSON file:
239
311
  ```bash
240
- etlplus load '{"name": "John", "age": 30}' file temp/sample_output.json
312
+ etlplus extract file examples/data/sample.json \
313
+ | etlplus load --to file temp/sample_output.json
241
314
  ```
242
315
 
243
316
  Load to CSV file:
244
317
  ```bash
245
- etlplus load '[{"name": "John", "age": 30}]' file temp/sample_output.csv
318
+ etlplus extract file examples/data/sample.csv \
319
+ | etlplus load --to file temp/sample_output.csv
246
320
  ```
247
321
 
248
322
  Load to REST API:
249
323
  ```bash
250
- etlplus load examples/data/sample.json api https://api.example.com/endpoint
324
+ cat examples/data/sample.json \
325
+ | etlplus load --to api https://api.example.com/endpoint
251
326
  ```
252
327
 
253
328
  ### Python API
@@ -285,6 +360,17 @@ For YAML-driven pipelines executed end-to-end (extract → validate → transfor
285
360
  - Authoring: [`docs/pipeline-guide.md`](docs/pipeline-guide.md)
286
361
  - Runner API and internals: [`docs/run-module.md`](docs/run-module.md)
287
362
 
363
+ CLI quick reference for pipelines:
364
+
365
+ ```bash
366
+ # List jobs or show a pipeline summary
367
+ etlplus check --config examples/configs/pipeline.yml --jobs
368
+ etlplus check --config examples/configs/pipeline.yml --summary
369
+
370
+ # Run a job
371
+ etlplus run --config examples/configs/pipeline.yml --job file_to_file_customers
372
+ ```
373
+
288
374
  ### Complete ETL Pipeline Example
289
375
 
290
376
  ```bash
@@ -301,41 +387,28 @@ etlplus validate temp/sample_transformed.json \
301
387
  --rules '{"name": {"type": "string", "required": true}, "email": {"type": "string", "required": true}}'
302
388
 
303
389
  # 4. Load to CSV
304
- etlplus load temp/sample_transformed.json file temp/sample_output.csv
390
+ cat temp/sample_transformed.json \
391
+ | etlplus load --to temp/sample_output.csv
305
392
  ```
306
393
 
307
- ### Environment Variables
308
-
309
- ETLPlus honors a small number of environment toggles to refine CLI behavior:
394
+ ### Format Overrides
310
395
 
311
- - `ETLPLUS_FORMAT_BEHAVIOR`: controls what happens when `--format` is provided for
312
- file sources or targets (extract/load) where the format is inferred from the
313
- filename extension.
314
- - `error|fail|strict`: treat as error (non-zero exit)
315
- - `warn` (default): print a warning to stderr
316
- - `ignore|silent`: no message
317
- - Precedence: the CLI flag `--strict-format` overrides the environment.
396
+ `--source-format` and `--target-format` override whichever format would normally be inferred from a
397
+ file extension. This is useful when an input lacks an extension (for example, `records.txt` that
398
+ actually contains CSV) or when you intentionally want to treat a file as another format.
318
399
 
319
400
  Examples (zsh):
320
401
 
321
402
  ```zsh
322
- # Warn (default)
323
- etlplus extract file data.csv --format csv
324
- etlplus load data.json file out.csv --format csv
325
-
326
- # Enforce error via environment
327
- ETLPLUS_FORMAT_BEHAVIOR=error \
328
- etlplus extract file data.csv --format csv
329
- ETLPLUS_FORMAT_BEHAVIOR=error \
330
- etlplus load data.json file out.csv --format csv
331
-
332
- # Equivalent strict behavior via flag (overrides environment)
333
- etlplus extract file data.csv --format csv --strict-format
334
- etlplus load data.json file out.csv --format csv --strict-format
335
-
336
- # Recommended: rely on extension, no --format needed for files
337
- etlplus extract file data.csv
338
- etlplus load data.json file out.csv
403
+ # Force CSV parsing for an extension-less file
404
+ etlplus extract --from file data.txt --source-format csv
405
+
406
+ # Write CSV to a file without the .csv suffix
407
+ etlplus load --to file output.bin --target-format csv < data.json
408
+
409
+ # Leave the flags off when extensions already match the desired format
410
+ etlplus extract --from file data.csv
411
+ etlplus load --to file data.json < data.json
339
412
  ```
340
413
 
341
414
  ## Transformation Operations
@@ -19,13 +19,15 @@ package and command-line interface for data extraction, validation, transformati
19
19
  - [Quickstart](#quickstart)
20
20
  - [Usage](#usage)
21
21
  - [Command Line Interface](#command-line-interface)
22
+ - [Check Pipelines](#check-pipelines)
23
+ - [Render SQL DDL](#render-sql-ddl)
22
24
  - [Extract Data](#extract-data)
23
25
  - [Validate Data](#validate-data)
24
26
  - [Transform Data](#transform-data)
25
27
  - [Load Data](#load-data)
26
28
  - [Python API](#python-api)
27
29
  - [Complete ETL Pipeline Example](#complete-etl-pipeline-example)
28
- - [Environment Variables](#environment-variables)
30
+ - [Format Overrides](#format-overrides)
29
31
  - [Transformation Operations](#transformation-operations)
30
32
  - [Filter Operations](#filter-operations)
31
33
  - [Aggregation Functions](#aggregation-functions)
@@ -37,6 +39,8 @@ package and command-line interface for data extraction, validation, transformati
37
39
  - [Test Layers](#test-layers)
38
40
  - [Code Coverage](#code-coverage)
39
41
  - [Linting](#linting)
42
+ - [Updating Demo Snippets](#updating-demo-snippets)
43
+ - [Releasing to PyPI](#releasing-to-pypi)
40
44
  - [Links](#links)
41
45
  - [License](#license)
42
46
  - [Contributing](#contributing)
@@ -44,6 +48,14 @@ package and command-line interface for data extraction, validation, transformati
44
48
 
45
49
  ## Features
46
50
 
51
+ - **Check** data pipeline definitions before running them:
52
+ - Summarize jobs, sources, targets, and transforms
53
+ - Confirm configuration changes by printing focused sections on demand
54
+
55
+ - **Render** SQL DDL from shared table specs:
56
+ - Generate CREATE TABLE or view statements
57
+ - Swap templates or direct output to files for database migrations
58
+
47
59
  - **Extract** data from multiple sources:
48
60
  - Files (CSV, JSON, XML, YAML)
49
61
  - Databases (connection string support)
@@ -125,11 +137,52 @@ etlplus --help
125
137
  etlplus --version
126
138
  ```
127
139
 
140
+ The CLI is implemented with Typer (Click-based). There is no argparse compatibility layer, so rely
141
+ on the documented commands/flags and run `etlplus <command> --help` for current options.
142
+
143
+ #### Check Pipelines
144
+
145
+ Use `etlplus check` to explore pipeline YAML definitions without running them. The command can print
146
+ job names, summarize configured sources and targets, or drill into specific sections.
147
+
148
+ List jobs and show a pipeline summary:
149
+ ```bash
150
+ etlplus check --config examples/configs/pipeline.yml --jobs
151
+ etlplus check --config examples/configs/pipeline.yml --summary
152
+ ```
153
+
154
+ Show sources or transforms for troubleshooting:
155
+ ```bash
156
+ etlplus check --config examples/configs/pipeline.yml --sources
157
+ etlplus check --config examples/configs/pipeline.yml --transforms
158
+ ```
159
+
160
+ #### Render SQL DDL
161
+
162
+ Use `etlplus render` to turn table schema specs into ready-to-run SQL. Render from a pipeline config
163
+ or from a standalone schema file, and choose the built-in `ddl` or `view` templates (or provide your
164
+ own).
165
+
166
+ Render all tables defined in a pipeline:
167
+ ```bash
168
+ etlplus render --config examples/configs/pipeline.yml --template ddl
169
+ ```
170
+
171
+ Render a single table in that pipeline:
172
+ ```bash
173
+ etlplus render --config examples/configs/pipeline.yml --table customers --template view
174
+ ```
175
+
176
+ Render from a standalone table spec to a file:
177
+ ```bash
178
+ etlplus render --spec schemas/customer.yml --template view -o temp/customer_view.sql
179
+ ```
180
+
128
181
  #### Extract Data
129
182
 
130
- Note: For file sources, the format is inferred from the filename extension; the `--format` option is
131
- ignored. To treat passing `--format` as an error for file sources, either set
132
- `ETLPLUS_FORMAT_BEHAVIOR=error` or pass the CLI flag `--strict-format`.
183
+ Note: For file sources, the format is normally inferred from the filename extension. Use
184
+ `--source-format` to override inference when a file lacks an extension or when you want to force a
185
+ specific parser.
133
186
 
134
187
  Extract from JSON file:
135
188
  ```bash
@@ -170,6 +223,20 @@ etlplus validate examples/data/sample.json --rules '{"email": {"type": "string",
170
223
 
171
224
  #### Transform Data
172
225
 
226
+ When piping data through `etlplus transform`, use `--source-format` whenever the SOURCE argument is
227
+ `-` or a literal payload, mirroring the `etlplus extract` semantics. Use `--target-format` to
228
+ control the emitted format for stdout or other non-file outputs, just like `etlplus load`. File
229
+ paths continue to infer formats from their extensions. Use `--from` to override the inferred source
230
+ connector type and `--to` to override the inferred target connector type, matching the `etlplus
231
+ extract`/`etlplus load` behavior.
232
+
233
+ Transform file inputs while overriding connector types:
234
+ ```bash
235
+ etlplus transform --from file examples/data/sample.json \
236
+ --operations '{"select": ["name", "email"]}' \
237
+ --to file -o temp/selected_output.json
238
+ ```
239
+
173
240
  Filter and select fields:
174
241
  ```bash
175
242
  etlplus transform '[{"name": "John", "age": 30}, {"name": "Jane", "age": 25}]' \
@@ -193,19 +260,24 @@ etlplus transform examples/data/sample.json --operations '{"map": {"name": "new_
193
260
 
194
261
  #### Load Data
195
262
 
263
+ `etlplus load` consumes JSON from stdin; provide only the target argument plus optional flags.
264
+
196
265
  Load to JSON file:
197
266
  ```bash
198
- etlplus load '{"name": "John", "age": 30}' file temp/sample_output.json
267
+ etlplus extract file examples/data/sample.json \
268
+ | etlplus load --to file temp/sample_output.json
199
269
  ```
200
270
 
201
271
  Load to CSV file:
202
272
  ```bash
203
- etlplus load '[{"name": "John", "age": 30}]' file temp/sample_output.csv
273
+ etlplus extract file examples/data/sample.csv \
274
+ | etlplus load --to file temp/sample_output.csv
204
275
  ```
205
276
 
206
277
  Load to REST API:
207
278
  ```bash
208
- etlplus load examples/data/sample.json api https://api.example.com/endpoint
279
+ cat examples/data/sample.json \
280
+ | etlplus load --to api https://api.example.com/endpoint
209
281
  ```
210
282
 
211
283
  ### Python API
@@ -243,6 +315,17 @@ For YAML-driven pipelines executed end-to-end (extract → validate → transfor
243
315
  - Authoring: [`docs/pipeline-guide.md`](docs/pipeline-guide.md)
244
316
  - Runner API and internals: [`docs/run-module.md`](docs/run-module.md)
245
317
 
318
+ CLI quick reference for pipelines:
319
+
320
+ ```bash
321
+ # List jobs or show a pipeline summary
322
+ etlplus check --config examples/configs/pipeline.yml --jobs
323
+ etlplus check --config examples/configs/pipeline.yml --summary
324
+
325
+ # Run a job
326
+ etlplus run --config examples/configs/pipeline.yml --job file_to_file_customers
327
+ ```
328
+
246
329
  ### Complete ETL Pipeline Example
247
330
 
248
331
  ```bash
@@ -259,41 +342,28 @@ etlplus validate temp/sample_transformed.json \
259
342
  --rules '{"name": {"type": "string", "required": true}, "email": {"type": "string", "required": true}}'
260
343
 
261
344
  # 4. Load to CSV
262
- etlplus load temp/sample_transformed.json file temp/sample_output.csv
345
+ cat temp/sample_transformed.json \
346
+ | etlplus load --to temp/sample_output.csv
263
347
  ```
264
348
 
265
- ### Environment Variables
266
-
267
- ETLPlus honors a small number of environment toggles to refine CLI behavior:
349
+ ### Format Overrides
268
350
 
269
- - `ETLPLUS_FORMAT_BEHAVIOR`: controls what happens when `--format` is provided for
270
- file sources or targets (extract/load) where the format is inferred from the
271
- filename extension.
272
- - `error|fail|strict`: treat as error (non-zero exit)
273
- - `warn` (default): print a warning to stderr
274
- - `ignore|silent`: no message
275
- - Precedence: the CLI flag `--strict-format` overrides the environment.
351
+ `--source-format` and `--target-format` override whichever format would normally be inferred from a
352
+ file extension. This is useful when an input lacks an extension (for example, `records.txt` that
353
+ actually contains CSV) or when you intentionally want to treat a file as another format.
276
354
 
277
355
  Examples (zsh):
278
356
 
279
357
  ```zsh
280
- # Warn (default)
281
- etlplus extract file data.csv --format csv
282
- etlplus load data.json file out.csv --format csv
283
-
284
- # Enforce error via environment
285
- ETLPLUS_FORMAT_BEHAVIOR=error \
286
- etlplus extract file data.csv --format csv
287
- ETLPLUS_FORMAT_BEHAVIOR=error \
288
- etlplus load data.json file out.csv --format csv
289
-
290
- # Equivalent strict behavior via flag (overrides environment)
291
- etlplus extract file data.csv --format csv --strict-format
292
- etlplus load data.json file out.csv --format csv --strict-format
293
-
294
- # Recommended: rely on extension, no --format needed for files
295
- etlplus extract file data.csv
296
- etlplus load data.json file out.csv
358
+ # Force CSV parsing for an extension-less file
359
+ etlplus extract --from file data.txt --source-format csv
360
+
361
+ # Write CSV to a file without the .csv suffix
362
+ etlplus load --to file output.bin --target-format csv < data.json
363
+
364
+ # Leave the flags off when extensions already match the desired format
365
+ etlplus extract --from file data.csv
366
+ etlplus load --to file data.json < data.json
297
367
  ```
298
368
 
299
369
  ## Transformation Operations
@@ -0,0 +1,18 @@
1
+ # Documentation Notes
2
+
3
+ ## CLI Parser Status
4
+ - The CLI is now Typer/Click-only. The historical `argparse` parser and `create_parser` entrypoint
5
+ are deprecated and no longer supported for new integrations.
6
+ - Downstream tools should invoke the Typer app exported at `etlplus.cli.commands.app` (e.g., `python
7
+ -m etlplus` or `etlplus ...`).
8
+ - Handler functions still accept keyword arguments; the legacy namespace shim is temporary and will
9
+ be removed in a future release. Avoid constructing `argparse.Namespace` objects and instead call
10
+ handlers with explicit keyword arguments if you integrate programmatically.
11
+
12
+ ## Migration Hints
13
+ - Replace any imports of `etlplus.cli.main.create_parser` with Typer invocations (`etlplus` binary
14
+ or `app` directly).
15
+ - If you maintained custom subcommands around the old parser, port them to Typer by attaching to
16
+ `app` or wrapping the `etlplus` executable.
17
+ - Tests and examples now target the Typer surface; expect argparse-focused helpers (e.g., namespace
18
+ format flags) to be absent.
@@ -7,6 +7,28 @@ ETLPlus focuses on simple, JSON-first ETL. The pipeline file is a declarative de
7
7
  runner (a script, Makefile, CI job) can parse and execute using ETLPlus primitives: `extract`,
8
8
  `validate`, `transform`, and `load`.
9
9
 
10
+ CLI note: ETLPlus uses Typer for command parsing and does not ship an argparse shim. Use the
11
+ documented `etlplus` commands and flags (check `etlplus --help`) when wiring your runner.
12
+
13
+ ## Running a pipeline from YAML (CLI)
14
+
15
+ Use the built-in `etlplus run` command to execute jobs defined in a pipeline YAML. The command reads
16
+ your config, resolves vars and env placeholders, then runs the requested job:
17
+
18
+ ```bash
19
+ # List jobs with the check command
20
+ etlplus check --config examples/configs/pipeline.yml --jobs
21
+
22
+ # Run a specific job
23
+ etlplus run --config examples/configs/pipeline.yml --job file_to_file_customers
24
+
25
+ # Run another job from the same config
26
+ etlplus run --config examples/configs/pipeline.yml --job api_to_file_github_repos
27
+ ```
28
+
29
+ For scripted usage inside a larger Python project, prefer importing the Python API directly (e.g.,
30
+ `extract`, `transform`, `validate`, `load`) instead of invoking the CLI subprocess.
31
+
10
32
  ## Top-level structure
11
33
 
12
34
  A pipeline file typically includes:
@@ -245,13 +267,13 @@ job. Those values are merged into the client configuration and forwarded to
245
267
  `EndpointClient.paginate(..., rate_limit_overrides=...)`, ensuring only that job’s paginator is sped
246
268
  up or slowed down.
247
269
 
248
- Environment / format inference note:
270
+ Format override note:
249
271
 
250
- When extracting from file sources, ETLPlus infers the format from the filename extension (e.g.
251
- `.csv`, `.json`, `.xml`, `.yaml`). Passing an explicit CLI `--format` for files is ignored unless
252
- strict mode is enabled. To enforce an error if contributors still specify a redundant `--format`
253
- flag in scripts or CI runners, set the environment variable `ETLPLUS_FORMAT_BEHAVIOR=error` or use
254
- the CLI flag `--strict-format`. This keeps pipelines cleaner by relying on naming conventions.
272
+ When extracting from file sources, ETLPlus still infers the format from the filename extension
273
+ (`.csv`, `.json`, `.xml`, `.yaml`). However, `--source-format` and `--target-format` now override
274
+ that inference for both Typer- and argparse-based CLIs. This means you can safely point at files
275
+ without/extensions or with misleading suffixes and force the desired parser or writer without having
276
+ to rename the file first.
255
277
 
256
278
  Note: When using a service + endpoint in a source, URL composition (including `base_path`) is
257
279
  handled automatically. See “Runner behavior with base_path (sources and targets)” in the APIs
@@ -378,26 +400,24 @@ jobs:
378
400
  Once you have a pipeline YAML, you can run jobs either from the
379
401
  command line or directly from Python.
380
402
 
381
- ### CLI: `etlplus pipeline` and `etlplus run`
403
+ ### CLI: `etlplus check` (inspect) and `etlplus run` (execute)
382
404
 
383
- List jobs defined in a pipeline file:
405
+ List jobs or show a summary from a pipeline file:
384
406
 
385
407
  ```bash
386
- etlplus pipeline --config examples/configs/pipeline.yml --list
408
+ etlplus check --config examples/configs/pipeline.yml --jobs
409
+ etlplus check --config examples/configs/pipeline.yml --summary
387
410
  ```
388
411
 
389
412
  Run a specific job end-to-end (extract → validate → transform → load):
390
413
 
391
414
  ```bash
392
- etlplus pipeline --config examples/configs/pipeline.yml --run file_to_file_customers
393
-
394
- # Equivalent, using the dedicated run command
395
415
  etlplus run --config examples/configs/pipeline.yml --job file_to_file_customers
396
416
  ```
397
417
 
398
418
  Notes:
399
419
 
400
- - Both commands read the same YAML schema described in this guide.
420
+ - These commands read the same YAML schema described in this guide.
401
421
  - Environment-variable substitution (e.g. `${GITHUB_TOKEN}`) is applied the same way as when loading
402
422
  configs via the Python API.
403
423
  - For more details on the orchestration implementation, see
@@ -11,8 +11,7 @@ from .cli import main
11
11
 
12
12
 
13
13
  def _run() -> int:
14
- """Return the exit status from :func:`etlplus.cli.main`."""
15
-
14
+ """Return the exit status."""
16
15
  return main()
17
16
 
18
17