etlplus 0.11.10__tar.gz → 0.12.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {etlplus-0.11.10 → etlplus-0.12.3}/PKG-INFO +58 -14
- etlplus-0.11.10/etlplus.egg-info/PKG-INFO → etlplus-0.12.3/README.md +52 -58
- etlplus-0.12.3/SECURITY.md +15 -0
- etlplus-0.12.3/SUPPORT.md +18 -0
- etlplus-0.12.3/etlplus/README.md +37 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/etlplus/api/README.md +20 -3
- etlplus-0.12.3/etlplus/cli/README.md +40 -0
- etlplus-0.12.3/etlplus/config/README.md +52 -0
- etlplus-0.12.3/etlplus/database/README.md +48 -0
- etlplus-0.12.3/etlplus/file/README.md +105 -0
- etlplus-0.12.3/etlplus/file/_io.py +120 -0
- etlplus-0.12.3/etlplus/file/_pandas.py +58 -0
- etlplus-0.12.3/etlplus/file/avro.py +186 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/etlplus/file/core.py +3 -3
- etlplus-0.12.3/etlplus/file/csv.py +67 -0
- etlplus-0.12.3/etlplus/file/feather.py +99 -0
- etlplus-0.12.3/etlplus/file/gz.py +123 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/etlplus/file/json.py +13 -2
- etlplus-0.12.3/etlplus/file/ndjson.py +109 -0
- etlplus-0.12.3/etlplus/file/orc.py +99 -0
- etlplus-0.12.3/etlplus/file/parquet.py +101 -0
- etlplus-0.12.3/etlplus/file/tsv.py +67 -0
- etlplus-0.12.3/etlplus/file/txt.py +99 -0
- etlplus-0.12.3/etlplus/file/xls.py +88 -0
- etlplus-0.12.3/etlplus/file/xlsx.py +99 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/etlplus/file/xml.py +12 -3
- {etlplus-0.11.10 → etlplus-0.12.3}/etlplus/file/yaml.py +13 -2
- etlplus-0.12.3/etlplus/file/zip.py +175 -0
- etlplus-0.12.3/etlplus/templates/README.md +46 -0
- etlplus-0.12.3/etlplus/validation/README.md +50 -0
- etlplus-0.11.10/README.md → etlplus-0.12.3/etlplus.egg-info/PKG-INFO +102 -13
- {etlplus-0.11.10 → etlplus-0.12.3}/etlplus.egg-info/SOURCES.txt +11 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/etlplus.egg-info/requires.txt +5 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/pyproject.toml +5 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/setup.py +5 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/tests/unit/file/test_u_file_core.py +254 -0
- etlplus-0.11.10/etlplus/file/avro.py +0 -59
- etlplus-0.11.10/etlplus/file/csv.py +0 -82
- etlplus-0.11.10/etlplus/file/feather.py +0 -59
- etlplus-0.11.10/etlplus/file/gz.py +0 -59
- etlplus-0.11.10/etlplus/file/ndjson.py +0 -59
- etlplus-0.11.10/etlplus/file/orc.py +0 -59
- etlplus-0.11.10/etlplus/file/parquet.py +0 -59
- etlplus-0.11.10/etlplus/file/tsv.py +0 -59
- etlplus-0.11.10/etlplus/file/txt.py +0 -59
- etlplus-0.11.10/etlplus/file/xls.py +0 -59
- etlplus-0.11.10/etlplus/file/xlsx.py +0 -59
- etlplus-0.11.10/etlplus/file/zip.py +0 -49
- {etlplus-0.11.10 → etlplus-0.12.3}/.coveragerc +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/.editorconfig +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/.gitattributes +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/.github/actions/python-bootstrap/action.yml +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/.github/workflows/ci.yml +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/.gitignore +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/.pre-commit-config.yaml +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/.ruff.toml +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/CODE_OF_CONDUCT.md +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/CONTRIBUTING.md +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/DEMO.md +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/LICENSE +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/MANIFEST.in +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/Makefile +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/REFERENCES.md +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/docs/README.md +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/docs/pipeline-guide.md +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/docs/snippets/installation_version.md +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/etlplus/__init__.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/etlplus/__main__.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/etlplus/__version__.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/etlplus/api/__init__.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/etlplus/api/auth.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/etlplus/api/config.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/etlplus/api/endpoint_client.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/etlplus/api/errors.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/etlplus/api/pagination/__init__.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/etlplus/api/pagination/client.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/etlplus/api/pagination/config.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/etlplus/api/pagination/paginator.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/etlplus/api/rate_limiting/__init__.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/etlplus/api/rate_limiting/config.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/etlplus/api/rate_limiting/rate_limiter.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/etlplus/api/request_manager.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/etlplus/api/retry_manager.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/etlplus/api/transport.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/etlplus/api/types.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/etlplus/cli/__init__.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/etlplus/cli/commands.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/etlplus/cli/constants.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/etlplus/cli/handlers.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/etlplus/cli/io.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/etlplus/cli/main.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/etlplus/cli/options.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/etlplus/cli/state.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/etlplus/cli/types.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/etlplus/config/__init__.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/etlplus/config/connector.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/etlplus/config/jobs.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/etlplus/config/pipeline.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/etlplus/config/profile.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/etlplus/config/types.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/etlplus/config/utils.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/etlplus/database/__init__.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/etlplus/database/ddl.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/etlplus/database/engine.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/etlplus/database/orm.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/etlplus/database/schema.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/etlplus/database/types.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/etlplus/enums.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/etlplus/extract.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/etlplus/file/__init__.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/etlplus/file/enums.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/etlplus/load.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/etlplus/mixins.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/etlplus/py.typed +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/etlplus/run.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/etlplus/run_helpers.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/etlplus/templates/__init__.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/etlplus/templates/ddl.sql.j2 +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/etlplus/templates/view.sql.j2 +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/etlplus/transform.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/etlplus/types.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/etlplus/utils.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/etlplus/validate.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/etlplus/validation/__init__.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/etlplus/validation/utils.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/etlplus.egg-info/dependency_links.txt +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/etlplus.egg-info/entry_points.txt +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/etlplus.egg-info/top_level.txt +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/examples/README.md +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/examples/configs/ddl_spec.yml +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/examples/configs/pipeline.yml +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/examples/data/sample.csv +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/examples/data/sample.json +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/examples/data/sample.xml +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/examples/data/sample.xsd +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/examples/data/sample.yaml +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/examples/quickstart_python.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/pytest.ini +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/setup.cfg +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/tests/__init__.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/tests/conftest.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/tests/integration/conftest.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/tests/integration/test_i_cli.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/tests/integration/test_i_examples_data_parity.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/tests/integration/test_i_pagination_strategy.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/tests/integration/test_i_pipeline_smoke.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/tests/integration/test_i_pipeline_yaml_load.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/tests/integration/test_i_run.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/tests/integration/test_i_run_profile_pagination_defaults.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/tests/integration/test_i_run_profile_rate_limit_defaults.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/tests/unit/api/conftest.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/tests/unit/api/test_u_auth.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/tests/unit/api/test_u_config.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/tests/unit/api/test_u_endpoint_client.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/tests/unit/api/test_u_mocks.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/tests/unit/api/test_u_pagination_client.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/tests/unit/api/test_u_pagination_config.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/tests/unit/api/test_u_paginator.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/tests/unit/api/test_u_rate_limit_config.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/tests/unit/api/test_u_rate_limiter.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/tests/unit/api/test_u_request_manager.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/tests/unit/api/test_u_retry_manager.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/tests/unit/api/test_u_transport.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/tests/unit/api/test_u_types.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/tests/unit/cli/conftest.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/tests/unit/cli/test_u_cli_handlers.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/tests/unit/cli/test_u_cli_io.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/tests/unit/cli/test_u_cli_main.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/tests/unit/cli/test_u_cli_state.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/tests/unit/config/test_u_config_utils.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/tests/unit/config/test_u_connector.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/tests/unit/config/test_u_jobs.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/tests/unit/config/test_u_pipeline.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/tests/unit/conftest.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/tests/unit/database/test_u_database_ddl.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/tests/unit/database/test_u_database_engine.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/tests/unit/database/test_u_database_orm.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/tests/unit/database/test_u_database_schema.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/tests/unit/file/test_u_file_enums.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/tests/unit/file/test_u_file_yaml.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/tests/unit/test_u_enums.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/tests/unit/test_u_extract.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/tests/unit/test_u_load.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/tests/unit/test_u_main.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/tests/unit/test_u_mixins.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/tests/unit/test_u_run.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/tests/unit/test_u_run_helpers.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/tests/unit/test_u_transform.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/tests/unit/test_u_utils.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/tests/unit/test_u_validate.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/tests/unit/test_u_version.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/tests/unit/validation/test_u_validation_utils.py +0 -0
- {etlplus-0.11.10 → etlplus-0.12.3}/tools/update_demo_snippets.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: etlplus
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.12.3
|
|
4
4
|
Summary: A Swiss Army knife for simple ETL operations
|
|
5
5
|
Home-page: https://github.com/Dagitali/ETLPlus
|
|
6
6
|
Author: ETLPlus Team
|
|
@@ -17,8 +17,11 @@ Classifier: Programming Language :: Python :: 3.14
|
|
|
17
17
|
Requires-Python: >=3.13,<3.15
|
|
18
18
|
Description-Content-Type: text/markdown
|
|
19
19
|
License-File: LICENSE
|
|
20
|
+
Requires-Dist: fastavro>=1.12.1
|
|
20
21
|
Requires-Dist: jinja2>=3.1.6
|
|
22
|
+
Requires-Dist: openpyxl>=3.1.5
|
|
21
23
|
Requires-Dist: pyodbc>=5.3.0
|
|
24
|
+
Requires-Dist: pyarrow>=22.0.0
|
|
22
25
|
Requires-Dist: python-dotenv>=1.2.1
|
|
23
26
|
Requires-Dist: pandas>=2.3.3
|
|
24
27
|
Requires-Dist: pydantic>=2.12.5
|
|
@@ -26,6 +29,8 @@ Requires-Dist: PyYAML>=6.0.3
|
|
|
26
29
|
Requires-Dist: requests>=2.32.5
|
|
27
30
|
Requires-Dist: SQLAlchemy>=2.0.45
|
|
28
31
|
Requires-Dist: typer>=0.21.0
|
|
32
|
+
Requires-Dist: xlrd>=2.0.2
|
|
33
|
+
Requires-Dist: xlwt>=1.3.0
|
|
29
34
|
Provides-Extra: dev
|
|
30
35
|
Requires-Dist: black>=25.9.0; extra == "dev"
|
|
31
36
|
Requires-Dist: build>=1.2.2; extra == "dev"
|
|
@@ -59,6 +64,7 @@ ETLPlus is a veritable Swiss Army knife for enabling simple ETL operations, offe
|
|
|
59
64
|
package and command-line interface for data extraction, validation, transformation, and loading.
|
|
60
65
|
|
|
61
66
|
- [ETLPlus](#etlplus)
|
|
67
|
+
- [Getting Started](#getting-started)
|
|
62
68
|
- [Features](#features)
|
|
63
69
|
- [Installation](#installation)
|
|
64
70
|
- [Quickstart](#quickstart)
|
|
@@ -87,11 +93,27 @@ package and command-line interface for data extraction, validation, transformati
|
|
|
87
93
|
- [Linting](#linting)
|
|
88
94
|
- [Updating Demo Snippets](#updating-demo-snippets)
|
|
89
95
|
- [Releasing to PyPI](#releasing-to-pypi)
|
|
90
|
-
- [Links](#links)
|
|
91
96
|
- [License](#license)
|
|
92
97
|
- [Contributing](#contributing)
|
|
98
|
+
- [Documentation](#documentation)
|
|
99
|
+
- [Python Packages/Subpackage](#python-packagessubpackage)
|
|
100
|
+
- [Community Health](#community-health)
|
|
101
|
+
- [Other](#other)
|
|
93
102
|
- [Acknowledgments](#acknowledgments)
|
|
94
103
|
|
|
104
|
+
## Getting Started
|
|
105
|
+
|
|
106
|
+
ETLPlus helps you extract, validate, transform, and load data from files, databases, and APIs, either
|
|
107
|
+
as a Python library or from the command line.
|
|
108
|
+
|
|
109
|
+
To get started:
|
|
110
|
+
|
|
111
|
+
- See [Installation](#installation) for setup instructions.
|
|
112
|
+
- Try the [Quickstart](#quickstart) for a minimal working example (CLI and Python).
|
|
113
|
+
- Explore [Usage](#usage) for more detailed options and workflows.
|
|
114
|
+
|
|
115
|
+
ETLPlus supports Python 3.13 and above.
|
|
116
|
+
|
|
95
117
|
## Features
|
|
96
118
|
|
|
97
119
|
- **Check** data pipeline definitions before running them:
|
|
@@ -416,7 +438,7 @@ etlplus transform \
|
|
|
416
438
|
# 3. Validate transformed data
|
|
417
439
|
etlplus validate \
|
|
418
440
|
--rules '{"name": {"type": "string", "required": true}, "email": {"type": "string", "required": true}}' \
|
|
419
|
-
|
|
441
|
+
temp/sample_transformed.json
|
|
420
442
|
|
|
421
443
|
# 4. Load to CSV
|
|
422
444
|
cat temp/sample_transformed.json \
|
|
@@ -603,17 +625,6 @@ git push origin v1.4.0
|
|
|
603
625
|
If you want an extra smoke-test before tagging, run `make dist && pip install dist/*.whl` locally;
|
|
604
626
|
this exercises the same build path the workflow uses.
|
|
605
627
|
|
|
606
|
-
## Links
|
|
607
|
-
|
|
608
|
-
- API client docs: [`etlplus/api/README.md`](etlplus/api/README.md)
|
|
609
|
-
- Examples: [`examples/README.md`](examples/README.md)
|
|
610
|
-
- Pipeline authoring guide: [`docs/pipeline-guide.md`](docs/pipeline-guide.md)
|
|
611
|
-
- Runner internals: [`docs/run-module.md`](docs/run-module.md)
|
|
612
|
-
- Design notes (Mapping inputs, dict outputs): [`docs/pipeline-guide.md#design-notes-mapping-inputs-dict-outputs`](docs/pipeline-guide.md#design-notes-mapping-inputs-dict-outputs)
|
|
613
|
-
- Typing philosophy: [`CONTRIBUTING.md#typing-philosophy`](CONTRIBUTING.md#typing-philosophy)
|
|
614
|
-
- Demo and walkthrough: [`DEMO.md`](DEMO.md)
|
|
615
|
-
- Additional references: [`REFERENCES.md`](`REFERENCES.md)
|
|
616
|
-
|
|
617
628
|
## License
|
|
618
629
|
|
|
619
630
|
This project is licensed under the [MIT License](LICENSE).
|
|
@@ -637,6 +648,39 @@ If you choose to be a code contributor, please first refer these documents:
|
|
|
637
648
|
- Typing philosophy (TypedDicts as editor hints, permissive runtime):
|
|
638
649
|
[`CONTRIBUTING.md#typing-philosophy`](CONTRIBUTING.md#typing-philosophy)
|
|
639
650
|
|
|
651
|
+
## Documentation
|
|
652
|
+
|
|
653
|
+
### Python Packages/Subpackage
|
|
654
|
+
|
|
655
|
+
Navigate to detailed documentation for each subpackage:
|
|
656
|
+
|
|
657
|
+
- [etlplus.api](etlplus/api/README.md): Lightweight HTTP client and paginated REST helpers
|
|
658
|
+
- [etlplus.file](etlplus/file/README.md): Unified file format support and helpers
|
|
659
|
+
- [etlplus.config](etlplus/config/README.md): Configuration helpers for connectors, pipelines, jobs,
|
|
660
|
+
and profiles
|
|
661
|
+
- [etlplus.cli](etlplus/cli/README.md): Command-line interface for ETLPlus workflows
|
|
662
|
+
- [etlplus.database](etlplus/database/README.md): Database engine, schema, and ORM helpers
|
|
663
|
+
- [etlplus.templates](etlplus/templates/README.md): SQL and DDL template helpers
|
|
664
|
+
- [etlplus.validation](etlplus/validation/README.md): Data validation utilities and helpers
|
|
665
|
+
|
|
666
|
+
### Community Health
|
|
667
|
+
|
|
668
|
+
- [Contributing Guidelines](CONTRIBUTING.md): How to contribute, report issues, and submit PRs
|
|
669
|
+
- [Code of Conduct](CODE_OF_CONDUCT.md): Community standards and expectations
|
|
670
|
+
- [Security Policy](SECURITY.md): Responsible disclosure and vulnerability reporting
|
|
671
|
+
- [Support](SUPPORT.md): Where to get help
|
|
672
|
+
|
|
673
|
+
### Other
|
|
674
|
+
|
|
675
|
+
- API client docs: [`etlplus/api/README.md`](etlplus/api/README.md)
|
|
676
|
+
- Examples: [`examples/README.md`](examples/README.md)
|
|
677
|
+
- Pipeline authoring guide: [`docs/pipeline-guide.md`](docs/pipeline-guide.md)
|
|
678
|
+
- Runner internals: [`docs/run-module.md`](docs/run-module.md)
|
|
679
|
+
- Design notes (Mapping inputs, dict outputs): [`docs/pipeline-guide.md#design-notes-mapping-inputs-dict-outputs`](docs/pipeline-guide.md#design-notes-mapping-inputs-dict-outputs)
|
|
680
|
+
- Typing philosophy: [`CONTRIBUTING.md#typing-philosophy`](CONTRIBUTING.md#typing-philosophy)
|
|
681
|
+
- Demo and walkthrough: [`DEMO.md`](DEMO.md)
|
|
682
|
+
- Additional references: [`REFERENCES.md`](REFERENCES.md)
|
|
683
|
+
|
|
640
684
|
## Acknowledgments
|
|
641
685
|
|
|
642
686
|
ETLPlus is inspired by common work patterns in data engineering and software engineering patterns in
|
|
@@ -1,48 +1,3 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: etlplus
|
|
3
|
-
Version: 0.11.10
|
|
4
|
-
Summary: A Swiss Army knife for simple ETL operations
|
|
5
|
-
Home-page: https://github.com/Dagitali/ETLPlus
|
|
6
|
-
Author: ETLPlus Team
|
|
7
|
-
License: MIT
|
|
8
|
-
Project-URL: Homepage, https://github.com/Dagitali/ETLPlus
|
|
9
|
-
Project-URL: Repository, https://github.com/Dagitali/ETLPlus
|
|
10
|
-
Classifier: Development Status :: 3 - Alpha
|
|
11
|
-
Classifier: Intended Audience :: Developers
|
|
12
|
-
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
13
|
-
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
-
Classifier: Programming Language :: Python :: 3
|
|
15
|
-
Classifier: Programming Language :: Python :: 3.13
|
|
16
|
-
Classifier: Programming Language :: Python :: 3.14
|
|
17
|
-
Requires-Python: >=3.13,<3.15
|
|
18
|
-
Description-Content-Type: text/markdown
|
|
19
|
-
License-File: LICENSE
|
|
20
|
-
Requires-Dist: jinja2>=3.1.6
|
|
21
|
-
Requires-Dist: pyodbc>=5.3.0
|
|
22
|
-
Requires-Dist: python-dotenv>=1.2.1
|
|
23
|
-
Requires-Dist: pandas>=2.3.3
|
|
24
|
-
Requires-Dist: pydantic>=2.12.5
|
|
25
|
-
Requires-Dist: PyYAML>=6.0.3
|
|
26
|
-
Requires-Dist: requests>=2.32.5
|
|
27
|
-
Requires-Dist: SQLAlchemy>=2.0.45
|
|
28
|
-
Requires-Dist: typer>=0.21.0
|
|
29
|
-
Provides-Extra: dev
|
|
30
|
-
Requires-Dist: black>=25.9.0; extra == "dev"
|
|
31
|
-
Requires-Dist: build>=1.2.2; extra == "dev"
|
|
32
|
-
Requires-Dist: flake8>=7.3.0; extra == "dev"
|
|
33
|
-
Requires-Dist: PyYAML>=6.0.3; extra == "dev"
|
|
34
|
-
Requires-Dist: pydoclint>=0.8.1; extra == "dev"
|
|
35
|
-
Requires-Dist: pydocstyle>=6.3.0; extra == "dev"
|
|
36
|
-
Requires-Dist: pytest>=8.4.2; extra == "dev"
|
|
37
|
-
Requires-Dist: pytest-cov>=7.0.0; extra == "dev"
|
|
38
|
-
Requires-Dist: ruff>=0.14.4; extra == "dev"
|
|
39
|
-
Provides-Extra: docs
|
|
40
|
-
Requires-Dist: sphinx>=4.0.0; extra == "docs"
|
|
41
|
-
Requires-Dist: sphinx-rtd-theme>=1.0.0; extra == "docs"
|
|
42
|
-
Dynamic: home-page
|
|
43
|
-
Dynamic: license-file
|
|
44
|
-
Dynamic: requires-python
|
|
45
|
-
|
|
46
1
|
# ETLPlus
|
|
47
2
|
|
|
48
3
|
[][PyPI package]
|
|
@@ -59,6 +14,7 @@ ETLPlus is a veritable Swiss Army knife for enabling simple ETL operations, offe
|
|
|
59
14
|
package and command-line interface for data extraction, validation, transformation, and loading.
|
|
60
15
|
|
|
61
16
|
- [ETLPlus](#etlplus)
|
|
17
|
+
- [Getting Started](#getting-started)
|
|
62
18
|
- [Features](#features)
|
|
63
19
|
- [Installation](#installation)
|
|
64
20
|
- [Quickstart](#quickstart)
|
|
@@ -87,11 +43,27 @@ package and command-line interface for data extraction, validation, transformati
|
|
|
87
43
|
- [Linting](#linting)
|
|
88
44
|
- [Updating Demo Snippets](#updating-demo-snippets)
|
|
89
45
|
- [Releasing to PyPI](#releasing-to-pypi)
|
|
90
|
-
- [Links](#links)
|
|
91
46
|
- [License](#license)
|
|
92
47
|
- [Contributing](#contributing)
|
|
48
|
+
- [Documentation](#documentation)
|
|
49
|
+
- [Python Packages/Subpackage](#python-packagessubpackage)
|
|
50
|
+
- [Community Health](#community-health)
|
|
51
|
+
- [Other](#other)
|
|
93
52
|
- [Acknowledgments](#acknowledgments)
|
|
94
53
|
|
|
54
|
+
## Getting Started
|
|
55
|
+
|
|
56
|
+
ETLPlus helps you extract, validate, transform, and load data from files, databases, and APIs, either
|
|
57
|
+
as a Python library or from the command line.
|
|
58
|
+
|
|
59
|
+
To get started:
|
|
60
|
+
|
|
61
|
+
- See [Installation](#installation) for setup instructions.
|
|
62
|
+
- Try the [Quickstart](#quickstart) for a minimal working example (CLI and Python).
|
|
63
|
+
- Explore [Usage](#usage) for more detailed options and workflows.
|
|
64
|
+
|
|
65
|
+
ETLPlus supports Python 3.13 and above.
|
|
66
|
+
|
|
95
67
|
## Features
|
|
96
68
|
|
|
97
69
|
- **Check** data pipeline definitions before running them:
|
|
@@ -416,7 +388,7 @@ etlplus transform \
|
|
|
416
388
|
# 3. Validate transformed data
|
|
417
389
|
etlplus validate \
|
|
418
390
|
--rules '{"name": {"type": "string", "required": true}, "email": {"type": "string", "required": true}}' \
|
|
419
|
-
|
|
391
|
+
temp/sample_transformed.json
|
|
420
392
|
|
|
421
393
|
# 4. Load to CSV
|
|
422
394
|
cat temp/sample_transformed.json \
|
|
@@ -603,17 +575,6 @@ git push origin v1.4.0
|
|
|
603
575
|
If you want an extra smoke-test before tagging, run `make dist && pip install dist/*.whl` locally;
|
|
604
576
|
this exercises the same build path the workflow uses.
|
|
605
577
|
|
|
606
|
-
## Links
|
|
607
|
-
|
|
608
|
-
- API client docs: [`etlplus/api/README.md`](etlplus/api/README.md)
|
|
609
|
-
- Examples: [`examples/README.md`](examples/README.md)
|
|
610
|
-
- Pipeline authoring guide: [`docs/pipeline-guide.md`](docs/pipeline-guide.md)
|
|
611
|
-
- Runner internals: [`docs/run-module.md`](docs/run-module.md)
|
|
612
|
-
- Design notes (Mapping inputs, dict outputs): [`docs/pipeline-guide.md#design-notes-mapping-inputs-dict-outputs`](docs/pipeline-guide.md#design-notes-mapping-inputs-dict-outputs)
|
|
613
|
-
- Typing philosophy: [`CONTRIBUTING.md#typing-philosophy`](CONTRIBUTING.md#typing-philosophy)
|
|
614
|
-
- Demo and walkthrough: [`DEMO.md`](DEMO.md)
|
|
615
|
-
- Additional references: [`REFERENCES.md`](`REFERENCES.md)
|
|
616
|
-
|
|
617
578
|
## License
|
|
618
579
|
|
|
619
580
|
This project is licensed under the [MIT License](LICENSE).
|
|
@@ -637,6 +598,39 @@ If you choose to be a code contributor, please first refer these documents:
|
|
|
637
598
|
- Typing philosophy (TypedDicts as editor hints, permissive runtime):
|
|
638
599
|
[`CONTRIBUTING.md#typing-philosophy`](CONTRIBUTING.md#typing-philosophy)
|
|
639
600
|
|
|
601
|
+
## Documentation
|
|
602
|
+
|
|
603
|
+
### Python Packages/Subpackage
|
|
604
|
+
|
|
605
|
+
Navigate to detailed documentation for each subpackage:
|
|
606
|
+
|
|
607
|
+
- [etlplus.api](etlplus/api/README.md): Lightweight HTTP client and paginated REST helpers
|
|
608
|
+
- [etlplus.file](etlplus/file/README.md): Unified file format support and helpers
|
|
609
|
+
- [etlplus.config](etlplus/config/README.md): Configuration helpers for connectors, pipelines, jobs,
|
|
610
|
+
and profiles
|
|
611
|
+
- [etlplus.cli](etlplus/cli/README.md): Command-line interface for ETLPlus workflows
|
|
612
|
+
- [etlplus.database](etlplus/database/README.md): Database engine, schema, and ORM helpers
|
|
613
|
+
- [etlplus.templates](etlplus/templates/README.md): SQL and DDL template helpers
|
|
614
|
+
- [etlplus.validation](etlplus/validation/README.md): Data validation utilities and helpers
|
|
615
|
+
|
|
616
|
+
### Community Health
|
|
617
|
+
|
|
618
|
+
- [Contributing Guidelines](CONTRIBUTING.md): How to contribute, report issues, and submit PRs
|
|
619
|
+
- [Code of Conduct](CODE_OF_CONDUCT.md): Community standards and expectations
|
|
620
|
+
- [Security Policy](SECURITY.md): Responsible disclosure and vulnerability reporting
|
|
621
|
+
- [Support](SUPPORT.md): Where to get help
|
|
622
|
+
|
|
623
|
+
### Other
|
|
624
|
+
|
|
625
|
+
- API client docs: [`etlplus/api/README.md`](etlplus/api/README.md)
|
|
626
|
+
- Examples: [`examples/README.md`](examples/README.md)
|
|
627
|
+
- Pipeline authoring guide: [`docs/pipeline-guide.md`](docs/pipeline-guide.md)
|
|
628
|
+
- Runner internals: [`docs/run-module.md`](docs/run-module.md)
|
|
629
|
+
- Design notes (Mapping inputs, dict outputs): [`docs/pipeline-guide.md#design-notes-mapping-inputs-dict-outputs`](docs/pipeline-guide.md#design-notes-mapping-inputs-dict-outputs)
|
|
630
|
+
- Typing philosophy: [`CONTRIBUTING.md#typing-philosophy`](CONTRIBUTING.md#typing-philosophy)
|
|
631
|
+
- Demo and walkthrough: [`DEMO.md`](DEMO.md)
|
|
632
|
+
- Additional references: [`REFERENCES.md`](REFERENCES.md)
|
|
633
|
+
|
|
640
634
|
## Acknowledgments
|
|
641
635
|
|
|
642
636
|
ETLPlus is inspired by common work patterns in data engineering and software engineering patterns in
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# Security Policy
|
|
2
|
+
|
|
3
|
+
## Reporting a Vulnerability
|
|
4
|
+
|
|
5
|
+
If you discover a security vulnerability in ETLPlus, please report it responsibly:
|
|
6
|
+
|
|
7
|
+
- **Do not** open a public issue for security problems.
|
|
8
|
+
- Email the maintainers at security@dagitali.com (or use a private contact method listed in the
|
|
9
|
+
repository).
|
|
10
|
+
- Include as much detail as possible: affected versions, steps to reproduce, and any relevant logs
|
|
11
|
+
or code.
|
|
12
|
+
- We will acknowledge your report within 3 business days and work with you to resolve the issue
|
|
13
|
+
promptly.
|
|
14
|
+
|
|
15
|
+
Thank you for helping keep ETLPlus and its users safe!
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# Support
|
|
2
|
+
|
|
3
|
+
Thank you for using ETLPlus!
|
|
4
|
+
|
|
5
|
+
## Where to Get Help
|
|
6
|
+
|
|
7
|
+
- **Questions & Usage**: Please use [GitHub Discussions][discussions] for general questions, usage
|
|
8
|
+
help, and best practices.
|
|
9
|
+
- **Bugs & Feature Requests**: Open an issue in the [GitHub Issues][issues] tracker.
|
|
10
|
+
- **Security Issues**: See [SECURITY.md](SECURITY.md) for responsible disclosure.
|
|
11
|
+
- **Documentation**: See the [README](README.md) and [docs/](docs/) directory for guides and
|
|
12
|
+
references.
|
|
13
|
+
|
|
14
|
+
We aim to respond to questions and issues as quickly as possible. Community contributions and peer
|
|
15
|
+
support are also welcome!
|
|
16
|
+
|
|
17
|
+
[discussions]: https://github.com/Dagitali/ETLPlus/discussions
|
|
18
|
+
[issues]: https://github.com/Dagitali/ETLPlus/issues
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# etlplus package
|
|
2
|
+
|
|
3
|
+
The `etlplus` package provides a unified Python API and CLI for ETL operations: extraction,
|
|
4
|
+
validation, transformation, and loading of data from files, APIs, and databases.
|
|
5
|
+
|
|
6
|
+
- Top-level entry points for extract, validate, transform, and load
|
|
7
|
+
- Utilities for pipeline orchestration and helpers
|
|
8
|
+
- Exposes all subpackages for advanced usage
|
|
9
|
+
|
|
10
|
+
Back to project overview: see the top-level [README](../README.md).
|
|
11
|
+
|
|
12
|
+
## Subpackages
|
|
13
|
+
|
|
14
|
+
- [etlplus.api](api/README.md): Lightweight HTTP client and paginated REST helpers
|
|
15
|
+
- [etlplus.file](file/README.md): Unified file format support and helpers
|
|
16
|
+
- [etlplus.config](config/README.md): Configuration helpers for connectors, pipelines, jobs, and
|
|
17
|
+
profiles
|
|
18
|
+
- [etlplus.cli](cli/README.md): Command-line interface for ETLPlus workflows
|
|
19
|
+
- [etlplus.database](database/README.md): Database engine, schema, and ORM helpers
|
|
20
|
+
- [etlplus.templates](templates/README.md): SQL and DDL template helpers
|
|
21
|
+
- [etlplus.validation](validation/README.md): Data validation utilities and helpers
|
|
22
|
+
|
|
23
|
+
## Quickstart
|
|
24
|
+
|
|
25
|
+
```python
|
|
26
|
+
from etlplus import extract, validate, transform, load
|
|
27
|
+
|
|
28
|
+
data = extract("file", "input.csv")
|
|
29
|
+
filtered = transform(data, {"filter": {"field": "age", "op": "gt", "value": 25}})
|
|
30
|
+
assert validate(filtered, {"age": {"type": "number", "min": 0}})["valid"]
|
|
31
|
+
load(filtered, "file", "output.json", file_format="json")
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
## See Also
|
|
35
|
+
|
|
36
|
+
- [Top-level project README](../README.md)
|
|
37
|
+
- [API reference](../docs/README.md)
|
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
# etlplus.api
|
|
1
|
+
# etlplus.api subpackage
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
|
|
3
|
+
Documentation for the `etlplus.api` subpackage: a lightweight HTTP client and helpers for paginated
|
|
4
|
+
REST endpoints.
|
|
5
5
|
|
|
6
6
|
- Provides a small `EndpointClient` for calling JSON APIs
|
|
7
7
|
- Supports page-, offset-, and cursor-based pagination via `PaginationConfig`
|
|
@@ -12,6 +12,20 @@ paginated REST endpoints.
|
|
|
12
12
|
|
|
13
13
|
Back to project overview: see the top-level [README](../../README.md).
|
|
14
14
|
|
|
15
|
+
- [etlplus.api subpackage](#etlplusapi-subpackage)
|
|
16
|
+
- [Installation](#installation)
|
|
17
|
+
- [Quickstart](#quickstart)
|
|
18
|
+
- [Overriding Rate Limits Per Call](#overriding-rate-limits-per-call)
|
|
19
|
+
- [Choosing `records_path` and `cursor_path`](#choosing-records_path-and-cursor_path)
|
|
20
|
+
- [Cursor-Based Pagination Example](#cursor-based-pagination-example)
|
|
21
|
+
- [Offset-based pagination example](#offset-based-pagination-example)
|
|
22
|
+
- [Authentication](#authentication)
|
|
23
|
+
- [Errors and Rate Limiting](#errors-and-rate-limiting)
|
|
24
|
+
- [Types and Transport](#types-and-transport)
|
|
25
|
+
- [Supporting Modules](#supporting-modules)
|
|
26
|
+
- [Minimal Contract](#minimal-contract)
|
|
27
|
+
- [See also](#see-also)
|
|
28
|
+
|
|
15
29
|
## Installation
|
|
16
30
|
|
|
17
31
|
`etlplus.api` ships as part of the `etlplus` package. Install the package as usual:
|
|
@@ -233,3 +247,6 @@ providers can fall back to their own defaults. If you already possess a static t
|
|
|
233
247
|
## See also
|
|
234
248
|
|
|
235
249
|
- Top-level CLI and library usage in the main [README](../../README.md)
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
[def]: #installation
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
# etlplus.cli subpackage
|
|
2
|
+
|
|
3
|
+
Documentation for the `etlplus.cli` subpackage: command-line interface for ETLPlus workflows.
|
|
4
|
+
|
|
5
|
+
- Provides a CLI for running ETL pipelines, jobs, and utilities
|
|
6
|
+
- Supports commands for running, validating, and inspecting pipelines
|
|
7
|
+
- Includes options for configuration, state, and output control
|
|
8
|
+
- Exposes handlers for custom command integration
|
|
9
|
+
|
|
10
|
+
Back to project overview: see the top-level [README](../../README.md).
|
|
11
|
+
|
|
12
|
+
- [etlplus.cli subpackage](#etlpluscli-subpackage)
|
|
13
|
+
- [Available Commands](#available-commands)
|
|
14
|
+
- [Command Options](#command-options)
|
|
15
|
+
- [Example: Running a Pipeline](#example-running-a-pipeline)
|
|
16
|
+
- [See Also](#see-also)
|
|
17
|
+
|
|
18
|
+
## Available Commands
|
|
19
|
+
|
|
20
|
+
- **run**: Execute a pipeline or job
|
|
21
|
+
- **validate**: Validate pipeline or config files
|
|
22
|
+
- **inspect**: Show pipeline/job details
|
|
23
|
+
|
|
24
|
+
## Command Options
|
|
25
|
+
|
|
26
|
+
- `--config`: Path to config file
|
|
27
|
+
- `--state`: Path to state file
|
|
28
|
+
- `--output`: Output file or format
|
|
29
|
+
|
|
30
|
+
## Example: Running a Pipeline
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
etlplus run --config configs/pipeline.yml --output results.json
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
## See Also
|
|
37
|
+
|
|
38
|
+
- Top-level CLI and library usage in the main [README](../../README.md)
|
|
39
|
+
- Command handlers in [handlers.py](handlers.py)
|
|
40
|
+
- Command options in [options.py](options.py)
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
# etlplus.config subpackage
|
|
2
|
+
|
|
3
|
+
Documentation for the `etlplus.config` subpackage: configuration helpers for connectors, pipelines,
|
|
4
|
+
jobs, and profiles.
|
|
5
|
+
|
|
6
|
+
- Provides classes and utilities for managing ETL pipeline configuration
|
|
7
|
+
- Supports YAML/JSON config loading and validation
|
|
8
|
+
- Includes helpers for connectors, jobs, pipelines, and profiles
|
|
9
|
+
- Exposes type definitions for config schemas
|
|
10
|
+
|
|
11
|
+
Back to project overview: see the top-level [README](../../README.md).
|
|
12
|
+
|
|
13
|
+
- [etlplus.config subpackage](#etlplusconfig-subpackage)
|
|
14
|
+
- [Supported Configuration Types](#supported-configuration-types)
|
|
15
|
+
- [Loading and Validating Configs](#loading-and-validating-configs)
|
|
16
|
+
- [Example: Loading a Pipeline Config](#example-loading-a-pipeline-config)
|
|
17
|
+
- [See Also](#see-also)
|
|
18
|
+
|
|
19
|
+
## Supported Configuration Types
|
|
20
|
+
|
|
21
|
+
- **Connector**: Connection details for databases, files, or APIs
|
|
22
|
+
- **Job**: ETL job definitions and scheduling
|
|
23
|
+
- **Pipeline**: End-to-end pipeline configuration
|
|
24
|
+
- **Profile**: User or environment-specific settings
|
|
25
|
+
|
|
26
|
+
## Loading and Validating Configs
|
|
27
|
+
|
|
28
|
+
Use the provided classes to load and validate configuration files:
|
|
29
|
+
|
|
30
|
+
```python
|
|
31
|
+
from etlplus.config import PipelineConfig
|
|
32
|
+
|
|
33
|
+
cfg = PipelineConfig.from_yaml("pipeline.yml")
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
- Supports YAML and JSON formats
|
|
37
|
+
- Validates against expected schema
|
|
38
|
+
|
|
39
|
+
## Example: Loading a Pipeline Config
|
|
40
|
+
|
|
41
|
+
```python
|
|
42
|
+
from etlplus.config import PipelineConfig
|
|
43
|
+
|
|
44
|
+
pipeline = PipelineConfig.from_yaml("configs/pipeline.yml")
|
|
45
|
+
print(pipeline)
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
## See Also
|
|
49
|
+
|
|
50
|
+
- Top-level CLI and library usage in the main [README](../../README.md)
|
|
51
|
+
- Config type definitions in [types.py](types.py)
|
|
52
|
+
- Config utilities in [utils.py](utils.py)
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# etlplus.database subpackage
|
|
2
|
+
|
|
3
|
+
Documentation for the `etlplus.database` subpackage: database engine, schema, and ORM helpers.
|
|
4
|
+
|
|
5
|
+
- Provides database engine and connection management
|
|
6
|
+
- Supports schema definition and DDL generation
|
|
7
|
+
- Includes lightweight ORM utilities for tabular data
|
|
8
|
+
- Exposes type definitions for database objects
|
|
9
|
+
|
|
10
|
+
Back to project overview: see the top-level [README](../../README.md).
|
|
11
|
+
|
|
12
|
+
- [etlplus.database subpackage](#etlplusdatabase-subpackage)
|
|
13
|
+
- [Database Engine and Connections](#database-engine-and-connections)
|
|
14
|
+
- [Schema and DDL Helpers](#schema-and-ddl-helpers)
|
|
15
|
+
- [ORM Utilities](#orm-utilities)
|
|
16
|
+
- [Example: Creating a Table](#example-creating-a-table)
|
|
17
|
+
- [See Also](#see-also)
|
|
18
|
+
|
|
19
|
+
## Database Engine and Connections
|
|
20
|
+
|
|
21
|
+
- Manage connections to supported databases
|
|
22
|
+
- Configure engines for different backends
|
|
23
|
+
|
|
24
|
+
## Schema and DDL Helpers
|
|
25
|
+
|
|
26
|
+
- Define table schemas and columns
|
|
27
|
+
- Generate DDL statements for supported databases
|
|
28
|
+
|
|
29
|
+
## ORM Utilities
|
|
30
|
+
|
|
31
|
+
- Map rows to Python objects
|
|
32
|
+
- Simple CRUD helpers for tabular data
|
|
33
|
+
|
|
34
|
+
## Example: Creating a Table
|
|
35
|
+
|
|
36
|
+
```python
|
|
37
|
+
from etlplus.database import Schema, Engine
|
|
38
|
+
|
|
39
|
+
engine = Engine.connect("sqlite:///example.db")
|
|
40
|
+
schema = Schema.from_dict({"name": "users", "columns": [ ... ]})
|
|
41
|
+
engine.create_table(schema)
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## See Also
|
|
45
|
+
|
|
46
|
+
- Top-level CLI and library usage in the main [README](../../README.md)
|
|
47
|
+
- Schema helpers in [schema.py](schema.py)
|
|
48
|
+
- ORM utilities in [orm.py](orm.py)
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
# etlplus.file subpackage
|
|
2
|
+
|
|
3
|
+
Documentation for the `etlplus.file` subpackage: unified file format support and helpers for reading
|
|
4
|
+
and writing data files.
|
|
5
|
+
|
|
6
|
+
- Provides a consistent interface for reading and writing files in various formats
|
|
7
|
+
- Supports all formats defined in `FileFormat` (see below)
|
|
8
|
+
- Includes helpers for inferring file format and compression from filenames, extensions, or MIME
|
|
9
|
+
types
|
|
10
|
+
- Exposes a `File` class with instance methods for reading and writing data
|
|
11
|
+
|
|
12
|
+
Back to project overview: see the top-level [README](../../README.md).
|
|
13
|
+
|
|
14
|
+
- [etlplus.file subpackage](#etlplusfile-subpackage)
|
|
15
|
+
- [Supported File Formats](#supported-file-formats)
|
|
16
|
+
- [Inferring File Format and Compression](#inferring-file-format-and-compression)
|
|
17
|
+
- [Reading and Writing Files](#reading-and-writing-files)
|
|
18
|
+
- [Reading a File](#reading-a-file)
|
|
19
|
+
- [Writing a File](#writing-a-file)
|
|
20
|
+
- [File Instance Methods](#file-instance-methods)
|
|
21
|
+
- [Example: Reading and Writing](#example-reading-and-writing)
|
|
22
|
+
- [See Also](#see-also)
|
|
23
|
+
|
|
24
|
+
## Supported File Formats
|
|
25
|
+
|
|
26
|
+
The following formats are defined in `FileFormat` and supported for reading and writing:
|
|
27
|
+
|
|
28
|
+
| Format | Description |
|
|
29
|
+
|-----------|---------------------------------------------|
|
|
30
|
+
| avro | Apache Avro binary serialization |
|
|
31
|
+
| csv | Comma-separated values text files |
|
|
32
|
+
| feather | Apache Arrow Feather columnar format |
|
|
33
|
+
| gz | Gzip-compressed files (see Compression) |
|
|
34
|
+
| json | Standard JSON files |
|
|
35
|
+
| ndjson | Newline-delimited JSON (JSON Lines) |
|
|
36
|
+
| orc | Apache ORC columnar format |
|
|
37
|
+
| parquet | Apache Parquet columnar format |
|
|
38
|
+
| tsv | Tab-separated values text files |
|
|
39
|
+
| txt | Plain text files |
|
|
40
|
+
| xls | Microsoft Excel (legacy .xls) |
|
|
41
|
+
| xlsx | Microsoft Excel (modern .xlsx) |
|
|
42
|
+
| zip | ZIP-compressed files (see Compression) |
|
|
43
|
+
| xml | XML files |
|
|
44
|
+
| yaml | YAML files |
|
|
45
|
+
|
|
46
|
+
Compression formats (gz, zip) are also supported as wrappers for other formats.
|
|
47
|
+
|
|
48
|
+
## Inferring File Format and Compression
|
|
49
|
+
|
|
50
|
+
Use `infer_file_format_and_compression(value, filename=None)` to infer the file format and
|
|
51
|
+
compression from a filename, extension, or MIME type. Returns a tuple `(file_format,
|
|
52
|
+
compression_format)`.
|
|
53
|
+
|
|
54
|
+
## Reading and Writing Files
|
|
55
|
+
|
|
56
|
+
The main entry point for file operations is the `File` class. To read or write files:
|
|
57
|
+
|
|
58
|
+
### Reading a File
|
|
59
|
+
|
|
60
|
+
```python
|
|
61
|
+
from etlplus.file import File
|
|
62
|
+
|
|
63
|
+
f = File("data/sample.csv")
|
|
64
|
+
data = f.read()
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
- The `read()` method automatically detects the format and compression.
|
|
68
|
+
- Returns parsed data (e.g., list of dicts for tabular formats).
|
|
69
|
+
|
|
70
|
+
### Writing a File
|
|
71
|
+
|
|
72
|
+
```python
|
|
73
|
+
from etlplus.file import File
|
|
74
|
+
|
|
75
|
+
f = File("output.json")
|
|
76
|
+
f.write(data)
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
- The `write()` method serializes and writes data in the appropriate format.
|
|
80
|
+
- Supports all formats listed above.
|
|
81
|
+
|
|
82
|
+
## File Instance Methods
|
|
83
|
+
|
|
84
|
+
- `read()`: Reads and parses the file, returning structured data.
|
|
85
|
+
- `write(data)`: Writes structured data to the file in the detected format.
|
|
86
|
+
|
|
87
|
+
## Example: Reading and Writing
|
|
88
|
+
|
|
89
|
+
```python
|
|
90
|
+
from etlplus.file import File
|
|
91
|
+
|
|
92
|
+
# Read CSV
|
|
93
|
+
csv_file = File("data.csv")
|
|
94
|
+
rows = csv_file.read()
|
|
95
|
+
|
|
96
|
+
# Write JSON
|
|
97
|
+
json_file = File("output.json")
|
|
98
|
+
json_file.write(rows)
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
## See Also
|
|
102
|
+
|
|
103
|
+
- Top-level CLI and library usage in the main [README](../../README.md)
|
|
104
|
+
- File format enums in [enums.py](enums.py)
|
|
105
|
+
- Compression format enums in [enums.py](enums.py)
|