etlplus 0.9.2__tar.gz → 0.10.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {etlplus-0.9.2 → etlplus-0.10.2}/CONTRIBUTING.md +1 -1
- {etlplus-0.9.2 → etlplus-0.10.2}/DEMO.md +1 -1
- {etlplus-0.9.2/etlplus.egg-info → etlplus-0.10.2}/PKG-INFO +17 -210
- {etlplus-0.9.2 → etlplus-0.10.2}/README.md +16 -204
- {etlplus-0.9.2 → etlplus-0.10.2}/docs/pipeline-guide.md +7 -17
- etlplus-0.10.2/etlplus/__init__.py +43 -0
- {etlplus-0.9.2 → etlplus-0.10.2}/etlplus/api/README.md +3 -51
- {etlplus-0.9.2 → etlplus-0.10.2}/etlplus/api/__init__.py +0 -10
- {etlplus-0.9.2 → etlplus-0.10.2}/etlplus/api/config.py +28 -39
- {etlplus-0.9.2 → etlplus-0.10.2}/etlplus/api/endpoint_client.py +3 -3
- {etlplus-0.9.2 → etlplus-0.10.2}/etlplus/api/pagination/client.py +1 -1
- {etlplus-0.9.2 → etlplus-0.10.2}/etlplus/api/rate_limiting/config.py +1 -13
- {etlplus-0.9.2 → etlplus-0.10.2}/etlplus/api/rate_limiting/rate_limiter.py +11 -8
- {etlplus-0.9.2 → etlplus-0.10.2}/etlplus/api/request_manager.py +6 -11
- {etlplus-0.9.2 → etlplus-0.10.2}/etlplus/api/transport.py +2 -14
- {etlplus-0.9.2 → etlplus-0.10.2}/etlplus/api/types.py +6 -96
- {etlplus-0.9.2 → etlplus-0.10.2}/etlplus/cli/commands.py +43 -76
- {etlplus-0.9.2 → etlplus-0.10.2}/etlplus/cli/constants.py +1 -1
- {etlplus-0.9.2 → etlplus-0.10.2}/etlplus/cli/handlers.py +12 -40
- {etlplus-0.9.2 → etlplus-0.10.2}/etlplus/cli/io.py +2 -2
- {etlplus-0.9.2 → etlplus-0.10.2}/etlplus/cli/main.py +1 -1
- {etlplus-0.9.2 → etlplus-0.10.2}/etlplus/cli/state.py +7 -4
- {etlplus-0.9.2/etlplus/workflow → etlplus-0.10.2/etlplus/config}/__init__.py +23 -10
- {etlplus-0.9.2/etlplus/workflow → etlplus-0.10.2/etlplus/config}/connector.py +44 -58
- {etlplus-0.9.2/etlplus/workflow → etlplus-0.10.2/etlplus/config}/jobs.py +32 -105
- {etlplus-0.9.2/etlplus/workflow → etlplus-0.10.2/etlplus/config}/pipeline.py +51 -59
- {etlplus-0.9.2/etlplus/workflow → etlplus-0.10.2/etlplus/config}/profile.py +5 -8
- etlplus-0.10.2/etlplus/config/types.py +204 -0
- etlplus-0.10.2/etlplus/config/utils.py +120 -0
- {etlplus-0.9.2 → etlplus-0.10.2}/etlplus/database/ddl.py +1 -1
- {etlplus-0.9.2 → etlplus-0.10.2}/etlplus/database/engine.py +3 -19
- {etlplus-0.9.2 → etlplus-0.10.2}/etlplus/database/orm.py +0 -2
- {etlplus-0.9.2 → etlplus-0.10.2}/etlplus/database/schema.py +1 -1
- etlplus-0.10.2/etlplus/enums.py +596 -0
- {etlplus-0.9.2/etlplus/ops → etlplus-0.10.2/etlplus}/extract.py +99 -81
- etlplus-0.10.2/etlplus/file.py +652 -0
- {etlplus-0.9.2/etlplus/ops → etlplus-0.10.2/etlplus}/load.py +101 -78
- {etlplus-0.9.2/etlplus/ops → etlplus-0.10.2/etlplus}/run.py +127 -159
- etlplus-0.9.2/etlplus/api/utils.py → etlplus-0.10.2/etlplus/run_helpers.py +153 -209
- {etlplus-0.9.2/etlplus/ops → etlplus-0.10.2/etlplus}/transform.py +68 -75
- {etlplus-0.9.2 → etlplus-0.10.2}/etlplus/types.py +4 -5
- {etlplus-0.9.2 → etlplus-0.10.2}/etlplus/utils.py +2 -136
- {etlplus-0.9.2/etlplus/ops → etlplus-0.10.2/etlplus}/validate.py +12 -22
- etlplus-0.10.2/etlplus/validation/__init__.py +44 -0
- {etlplus-0.9.2/etlplus/ops → etlplus-0.10.2/etlplus/validation}/utils.py +17 -53
- {etlplus-0.9.2 → etlplus-0.10.2/etlplus.egg-info}/PKG-INFO +17 -210
- {etlplus-0.9.2 → etlplus-0.10.2}/etlplus.egg-info/SOURCES.txt +28 -102
- {etlplus-0.9.2 → etlplus-0.10.2}/etlplus.egg-info/requires.txt +0 -5
- {etlplus-0.9.2 → etlplus-0.10.2}/examples/README.md +2 -2
- {etlplus-0.9.2 → etlplus-0.10.2}/examples/configs/pipeline.yml +4 -4
- {etlplus-0.9.2 → etlplus-0.10.2}/examples/quickstart_python.py +5 -5
- {etlplus-0.9.2 → etlplus-0.10.2}/pyproject.toml +0 -5
- {etlplus-0.9.2 → etlplus-0.10.2}/setup.py +0 -5
- {etlplus-0.9.2 → etlplus-0.10.2}/tests/integration/conftest.py +10 -10
- {etlplus-0.9.2 → etlplus-0.10.2}/tests/integration/test_i_examples_data_parity.py +2 -2
- {etlplus-0.9.2 → etlplus-0.10.2}/tests/integration/test_i_pagination_strategy.py +13 -13
- {etlplus-0.9.2 → etlplus-0.10.2}/tests/integration/test_i_pipeline_yaml_load.py +1 -1
- {etlplus-0.9.2 → etlplus-0.10.2}/tests/integration/test_i_run.py +2 -2
- {etlplus-0.9.2 → etlplus-0.10.2}/tests/integration/test_i_run_profile_pagination_defaults.py +1 -1
- {etlplus-0.9.2 → etlplus-0.10.2}/tests/integration/test_i_run_profile_rate_limit_defaults.py +1 -1
- {etlplus-0.9.2 → etlplus-0.10.2}/tests/unit/cli/conftest.py +6 -107
- {etlplus-0.9.2 → etlplus-0.10.2}/tests/unit/cli/test_u_cli_handlers.py +380 -97
- {etlplus-0.9.2 → etlplus-0.10.2}/tests/unit/cli/test_u_cli_main.py +25 -59
- {etlplus-0.9.2 → etlplus-0.10.2}/tests/unit/cli/test_u_cli_state.py +41 -45
- etlplus-0.10.2/tests/unit/config/test_u_config_utils.py +129 -0
- etlplus-0.9.2/tests/unit/workflow/test_u_workflow_connector.py → etlplus-0.10.2/tests/unit/config/test_u_connector.py +6 -6
- etlplus-0.9.2/tests/unit/workflow/test_u_workflow_jobs.py → etlplus-0.10.2/tests/unit/config/test_u_jobs.py +3 -3
- etlplus-0.9.2/tests/unit/workflow/test_u_workflow_pipeline.py → etlplus-0.10.2/tests/unit/config/test_u_pipeline.py +34 -38
- {etlplus-0.9.2 → etlplus-0.10.2}/tests/unit/conftest.py +1 -1
- {etlplus-0.9.2 → etlplus-0.10.2}/tests/unit/database/test_u_database_ddl.py +5 -9
- {etlplus-0.9.2 → etlplus-0.10.2}/tests/unit/database/test_u_database_engine.py +4 -5
- {etlplus-0.9.2 → etlplus-0.10.2}/tests/unit/database/test_u_database_schema.py +14 -12
- etlplus-0.10.2/tests/unit/test_u_enums.py +173 -0
- etlplus-0.9.2/tests/unit/ops/test_u_ops_extract.py → etlplus-0.10.2/tests/unit/test_u_extract.py +12 -12
- etlplus-0.10.2/tests/unit/test_u_file.py +345 -0
- etlplus-0.9.2/tests/unit/ops/test_u_ops_load.py → etlplus-0.10.2/tests/unit/test_u_load.py +17 -18
- etlplus-0.9.2/tests/unit/ops/test_u_ops_run.py → etlplus-0.10.2/tests/unit/test_u_run.py +4 -4
- etlplus-0.9.2/tests/unit/api/test_u_api_utils.py → etlplus-0.10.2/tests/unit/test_u_run_helpers.py +15 -17
- etlplus-0.9.2/tests/unit/ops/test_u_ops_transform.py → etlplus-0.10.2/tests/unit/test_u_transform.py +64 -65
- {etlplus-0.9.2 → etlplus-0.10.2}/tests/unit/test_u_utils.py +4 -110
- etlplus-0.9.2/tests/unit/ops/test_u_ops_validate.py → etlplus-0.10.2/tests/unit/test_u_validate.py +9 -9
- etlplus-0.9.2/tests/unit/ops/test_u_ops_utils.py → etlplus-0.10.2/tests/unit/validation/test_u_validation_utils.py +4 -4
- etlplus-0.9.2/SECURITY.md +0 -15
- etlplus-0.9.2/SUPPORT.md +0 -18
- etlplus-0.9.2/etlplus/README.md +0 -37
- etlplus-0.9.2/etlplus/__init__.py +0 -18
- etlplus-0.9.2/etlplus/api/enums.py +0 -51
- etlplus-0.9.2/etlplus/cli/README.md +0 -40
- etlplus-0.9.2/etlplus/database/README.md +0 -48
- etlplus-0.9.2/etlplus/enums.py +0 -308
- etlplus-0.9.2/etlplus/file/README.md +0 -105
- etlplus-0.9.2/etlplus/file/__init__.py +0 -25
- etlplus-0.9.2/etlplus/file/_imports.py +0 -141
- etlplus-0.9.2/etlplus/file/_io.py +0 -160
- etlplus-0.9.2/etlplus/file/accdb.py +0 -78
- etlplus-0.9.2/etlplus/file/arrow.py +0 -78
- etlplus-0.9.2/etlplus/file/avro.py +0 -176
- etlplus-0.9.2/etlplus/file/bson.py +0 -77
- etlplus-0.9.2/etlplus/file/cbor.py +0 -78
- etlplus-0.9.2/etlplus/file/cfg.py +0 -79
- etlplus-0.9.2/etlplus/file/conf.py +0 -80
- etlplus-0.9.2/etlplus/file/core.py +0 -322
- etlplus-0.9.2/etlplus/file/csv.py +0 -79
- etlplus-0.9.2/etlplus/file/dat.py +0 -78
- etlplus-0.9.2/etlplus/file/dta.py +0 -77
- etlplus-0.9.2/etlplus/file/duckdb.py +0 -78
- etlplus-0.9.2/etlplus/file/enums.py +0 -343
- etlplus-0.9.2/etlplus/file/feather.py +0 -111
- etlplus-0.9.2/etlplus/file/fwf.py +0 -77
- etlplus-0.9.2/etlplus/file/gz.py +0 -123
- etlplus-0.9.2/etlplus/file/hbs.py +0 -78
- etlplus-0.9.2/etlplus/file/hdf5.py +0 -78
- etlplus-0.9.2/etlplus/file/ini.py +0 -79
- etlplus-0.9.2/etlplus/file/ion.py +0 -78
- etlplus-0.9.2/etlplus/file/jinja2.py +0 -78
- etlplus-0.9.2/etlplus/file/json.py +0 -98
- etlplus-0.9.2/etlplus/file/log.py +0 -78
- etlplus-0.9.2/etlplus/file/mat.py +0 -78
- etlplus-0.9.2/etlplus/file/mdb.py +0 -78
- etlplus-0.9.2/etlplus/file/msgpack.py +0 -78
- etlplus-0.9.2/etlplus/file/mustache.py +0 -78
- etlplus-0.9.2/etlplus/file/nc.py +0 -78
- etlplus-0.9.2/etlplus/file/ndjson.py +0 -108
- etlplus-0.9.2/etlplus/file/numbers.py +0 -75
- etlplus-0.9.2/etlplus/file/ods.py +0 -79
- etlplus-0.9.2/etlplus/file/orc.py +0 -111
- etlplus-0.9.2/etlplus/file/parquet.py +0 -113
- etlplus-0.9.2/etlplus/file/pb.py +0 -78
- etlplus-0.9.2/etlplus/file/pbf.py +0 -77
- etlplus-0.9.2/etlplus/file/properties.py +0 -78
- etlplus-0.9.2/etlplus/file/proto.py +0 -77
- etlplus-0.9.2/etlplus/file/psv.py +0 -79
- etlplus-0.9.2/etlplus/file/rda.py +0 -78
- etlplus-0.9.2/etlplus/file/rds.py +0 -78
- etlplus-0.9.2/etlplus/file/sas7bdat.py +0 -78
- etlplus-0.9.2/etlplus/file/sav.py +0 -77
- etlplus-0.9.2/etlplus/file/sqlite.py +0 -78
- etlplus-0.9.2/etlplus/file/stub.py +0 -84
- etlplus-0.9.2/etlplus/file/sylk.py +0 -77
- etlplus-0.9.2/etlplus/file/tab.py +0 -81
- etlplus-0.9.2/etlplus/file/toml.py +0 -78
- etlplus-0.9.2/etlplus/file/tsv.py +0 -80
- etlplus-0.9.2/etlplus/file/txt.py +0 -102
- etlplus-0.9.2/etlplus/file/vm.py +0 -78
- etlplus-0.9.2/etlplus/file/wks.py +0 -77
- etlplus-0.9.2/etlplus/file/xls.py +0 -88
- etlplus-0.9.2/etlplus/file/xlsm.py +0 -79
- etlplus-0.9.2/etlplus/file/xlsx.py +0 -99
- etlplus-0.9.2/etlplus/file/xml.py +0 -185
- etlplus-0.9.2/etlplus/file/xpt.py +0 -78
- etlplus-0.9.2/etlplus/file/yaml.py +0 -95
- etlplus-0.9.2/etlplus/file/zip.py +0 -175
- etlplus-0.9.2/etlplus/file/zsav.py +0 -77
- etlplus-0.9.2/etlplus/ops/README.md +0 -50
- etlplus-0.9.2/etlplus/ops/__init__.py +0 -61
- etlplus-0.9.2/etlplus/templates/README.md +0 -46
- etlplus-0.9.2/etlplus/workflow/README.md +0 -52
- etlplus-0.9.2/etlplus/workflow/dag.py +0 -105
- etlplus-0.9.2/etlplus/workflow/types.py +0 -115
- etlplus-0.9.2/tests/unit/api/test_u_api_enums.py +0 -34
- etlplus-0.9.2/tests/unit/cli/test_u_cli_io.py +0 -326
- etlplus-0.9.2/tests/unit/file/test_u_file_core.py +0 -571
- etlplus-0.9.2/tests/unit/file/test_u_file_enums.py +0 -99
- etlplus-0.9.2/tests/unit/file/test_u_file_yaml.py +0 -109
- etlplus-0.9.2/tests/unit/test_u_enums.py +0 -86
- {etlplus-0.9.2 → etlplus-0.10.2}/.coveragerc +0 -0
- {etlplus-0.9.2 → etlplus-0.10.2}/.editorconfig +0 -0
- {etlplus-0.9.2 → etlplus-0.10.2}/.gitattributes +0 -0
- {etlplus-0.9.2 → etlplus-0.10.2}/.github/actions/python-bootstrap/action.yml +0 -0
- {etlplus-0.9.2 → etlplus-0.10.2}/.github/workflows/ci.yml +0 -0
- {etlplus-0.9.2 → etlplus-0.10.2}/.gitignore +0 -0
- {etlplus-0.9.2 → etlplus-0.10.2}/.pre-commit-config.yaml +0 -0
- {etlplus-0.9.2 → etlplus-0.10.2}/.ruff.toml +0 -0
- {etlplus-0.9.2 → etlplus-0.10.2}/CODE_OF_CONDUCT.md +0 -0
- {etlplus-0.9.2 → etlplus-0.10.2}/LICENSE +0 -0
- {etlplus-0.9.2 → etlplus-0.10.2}/MANIFEST.in +0 -0
- {etlplus-0.9.2 → etlplus-0.10.2}/Makefile +0 -0
- {etlplus-0.9.2 → etlplus-0.10.2}/REFERENCES.md +0 -0
- {etlplus-0.9.2 → etlplus-0.10.2}/docs/README.md +0 -0
- {etlplus-0.9.2 → etlplus-0.10.2}/docs/snippets/installation_version.md +0 -0
- {etlplus-0.9.2 → etlplus-0.10.2}/etlplus/__main__.py +0 -0
- {etlplus-0.9.2 → etlplus-0.10.2}/etlplus/__version__.py +0 -0
- {etlplus-0.9.2 → etlplus-0.10.2}/etlplus/api/auth.py +0 -0
- {etlplus-0.9.2 → etlplus-0.10.2}/etlplus/api/errors.py +0 -0
- {etlplus-0.9.2 → etlplus-0.10.2}/etlplus/api/pagination/__init__.py +0 -0
- {etlplus-0.9.2 → etlplus-0.10.2}/etlplus/api/pagination/config.py +0 -0
- {etlplus-0.9.2 → etlplus-0.10.2}/etlplus/api/pagination/paginator.py +0 -0
- {etlplus-0.9.2 → etlplus-0.10.2}/etlplus/api/rate_limiting/__init__.py +0 -0
- {etlplus-0.9.2 → etlplus-0.10.2}/etlplus/api/retry_manager.py +0 -0
- {etlplus-0.9.2 → etlplus-0.10.2}/etlplus/cli/__init__.py +0 -0
- {etlplus-0.9.2 → etlplus-0.10.2}/etlplus/cli/options.py +0 -0
- {etlplus-0.9.2 → etlplus-0.10.2}/etlplus/cli/types.py +0 -0
- {etlplus-0.9.2 → etlplus-0.10.2}/etlplus/database/__init__.py +0 -0
- {etlplus-0.9.2 → etlplus-0.10.2}/etlplus/database/types.py +0 -0
- {etlplus-0.9.2 → etlplus-0.10.2}/etlplus/mixins.py +0 -0
- {etlplus-0.9.2 → etlplus-0.10.2}/etlplus/py.typed +0 -0
- {etlplus-0.9.2 → etlplus-0.10.2}/etlplus/templates/__init__.py +0 -0
- {etlplus-0.9.2 → etlplus-0.10.2}/etlplus/templates/ddl.sql.j2 +0 -0
- {etlplus-0.9.2 → etlplus-0.10.2}/etlplus/templates/view.sql.j2 +0 -0
- {etlplus-0.9.2 → etlplus-0.10.2}/etlplus.egg-info/dependency_links.txt +0 -0
- {etlplus-0.9.2 → etlplus-0.10.2}/etlplus.egg-info/entry_points.txt +0 -0
- {etlplus-0.9.2 → etlplus-0.10.2}/etlplus.egg-info/top_level.txt +0 -0
- {etlplus-0.9.2 → etlplus-0.10.2}/examples/configs/ddl_spec.yml +0 -0
- {etlplus-0.9.2 → etlplus-0.10.2}/examples/data/sample.csv +0 -0
- {etlplus-0.9.2 → etlplus-0.10.2}/examples/data/sample.json +0 -0
- {etlplus-0.9.2 → etlplus-0.10.2}/examples/data/sample.xml +0 -0
- {etlplus-0.9.2 → etlplus-0.10.2}/examples/data/sample.xsd +0 -0
- {etlplus-0.9.2 → etlplus-0.10.2}/examples/data/sample.yaml +0 -0
- {etlplus-0.9.2 → etlplus-0.10.2}/pytest.ini +0 -0
- {etlplus-0.9.2 → etlplus-0.10.2}/setup.cfg +0 -0
- {etlplus-0.9.2 → etlplus-0.10.2}/tests/__init__.py +0 -0
- {etlplus-0.9.2 → etlplus-0.10.2}/tests/conftest.py +0 -0
- {etlplus-0.9.2 → etlplus-0.10.2}/tests/integration/test_i_cli.py +0 -0
- {etlplus-0.9.2 → etlplus-0.10.2}/tests/integration/test_i_pipeline_smoke.py +0 -0
- {etlplus-0.9.2 → etlplus-0.10.2}/tests/unit/api/conftest.py +0 -0
- {etlplus-0.9.2 → etlplus-0.10.2}/tests/unit/api/test_u_auth.py +0 -0
- {etlplus-0.9.2 → etlplus-0.10.2}/tests/unit/api/test_u_config.py +0 -0
- {etlplus-0.9.2 → etlplus-0.10.2}/tests/unit/api/test_u_endpoint_client.py +0 -0
- {etlplus-0.9.2 → etlplus-0.10.2}/tests/unit/api/test_u_mocks.py +0 -0
- {etlplus-0.9.2 → etlplus-0.10.2}/tests/unit/api/test_u_pagination_client.py +0 -0
- {etlplus-0.9.2 → etlplus-0.10.2}/tests/unit/api/test_u_pagination_config.py +0 -0
- {etlplus-0.9.2 → etlplus-0.10.2}/tests/unit/api/test_u_paginator.py +0 -0
- {etlplus-0.9.2 → etlplus-0.10.2}/tests/unit/api/test_u_rate_limit_config.py +0 -0
- {etlplus-0.9.2 → etlplus-0.10.2}/tests/unit/api/test_u_rate_limiter.py +0 -0
- {etlplus-0.9.2 → etlplus-0.10.2}/tests/unit/api/test_u_request_manager.py +0 -0
- {etlplus-0.9.2 → etlplus-0.10.2}/tests/unit/api/test_u_retry_manager.py +0 -0
- {etlplus-0.9.2 → etlplus-0.10.2}/tests/unit/api/test_u_transport.py +0 -0
- {etlplus-0.9.2 → etlplus-0.10.2}/tests/unit/api/test_u_types.py +0 -0
- {etlplus-0.9.2 → etlplus-0.10.2}/tests/unit/database/test_u_database_orm.py +0 -0
- {etlplus-0.9.2 → etlplus-0.10.2}/tests/unit/test_u_main.py +0 -0
- {etlplus-0.9.2 → etlplus-0.10.2}/tests/unit/test_u_mixins.py +0 -0
- {etlplus-0.9.2 → etlplus-0.10.2}/tests/unit/test_u_version.py +0 -0
- {etlplus-0.9.2 → etlplus-0.10.2}/tools/update_demo_snippets.py +0 -0
|
@@ -119,7 +119,7 @@ Use these guidelines to decide whether a test belongs in the unit or integration
|
|
|
119
119
|
- Can use temporary files/directories, and stub network with fakes/mocks.
|
|
120
120
|
- Examples in this repo: CLI end-to-end, pipeline smoke tests, pagination strategy, runner defaults for pagination/rate limits, target URL composition.
|
|
121
121
|
|
|
122
|
-
If a test calls `etlplus.cli.main()` or `etlplus.
|
|
122
|
+
If a test calls `etlplus.cli.main()` or `etlplus.run.run()`, it is integration by default.
|
|
123
123
|
|
|
124
124
|
### Where to put tests
|
|
125
125
|
|
|
@@ -196,7 +196,7 @@ $ etlplus load transformed.json file final_output.csv
|
|
|
196
196
|
## Demo 6: Using Python API
|
|
197
197
|
|
|
198
198
|
```python
|
|
199
|
-
from etlplus
|
|
199
|
+
from etlplus import extract, validate, transform, load
|
|
200
200
|
|
|
201
201
|
# Extract
|
|
202
202
|
data = extract("file", "data.csv", format="csv")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: etlplus
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.10.2
|
|
4
4
|
Summary: A Swiss Army knife for simple ETL operations
|
|
5
5
|
Home-page: https://github.com/Dagitali/ETLPlus
|
|
6
6
|
Author: ETLPlus Team
|
|
@@ -17,11 +17,8 @@ Classifier: Programming Language :: Python :: 3.14
|
|
|
17
17
|
Requires-Python: >=3.13,<3.15
|
|
18
18
|
Description-Content-Type: text/markdown
|
|
19
19
|
License-File: LICENSE
|
|
20
|
-
Requires-Dist: fastavro>=1.12.1
|
|
21
20
|
Requires-Dist: jinja2>=3.1.6
|
|
22
|
-
Requires-Dist: openpyxl>=3.1.5
|
|
23
21
|
Requires-Dist: pyodbc>=5.3.0
|
|
24
|
-
Requires-Dist: pyarrow>=22.0.0
|
|
25
22
|
Requires-Dist: python-dotenv>=1.2.1
|
|
26
23
|
Requires-Dist: pandas>=2.3.3
|
|
27
24
|
Requires-Dist: pydantic>=2.12.5
|
|
@@ -29,8 +26,6 @@ Requires-Dist: PyYAML>=6.0.3
|
|
|
29
26
|
Requires-Dist: requests>=2.32.5
|
|
30
27
|
Requires-Dist: SQLAlchemy>=2.0.45
|
|
31
28
|
Requires-Dist: typer>=0.21.0
|
|
32
|
-
Requires-Dist: xlrd>=2.0.2
|
|
33
|
-
Requires-Dist: xlwt>=1.3.0
|
|
34
29
|
Provides-Extra: dev
|
|
35
30
|
Requires-Dist: black>=25.9.0; extra == "dev"
|
|
36
31
|
Requires-Dist: build>=1.2.2; extra == "dev"
|
|
@@ -64,25 +59,9 @@ ETLPlus is a veritable Swiss Army knife for enabling simple ETL operations, offe
|
|
|
64
59
|
package and command-line interface for data extraction, validation, transformation, and loading.
|
|
65
60
|
|
|
66
61
|
- [ETLPlus](#etlplus)
|
|
67
|
-
- [Getting Started](#getting-started)
|
|
68
62
|
- [Features](#features)
|
|
69
63
|
- [Installation](#installation)
|
|
70
64
|
- [Quickstart](#quickstart)
|
|
71
|
-
- [Data Connectors](#data-connectors)
|
|
72
|
-
- [REST APIs (`api`)](#rest-apis-api)
|
|
73
|
-
- [Databases (`database`)](#databases-database)
|
|
74
|
-
- [Files (`file`)](#files-file)
|
|
75
|
-
- [Stubbed / Placeholder](#stubbed--placeholder)
|
|
76
|
-
- [Tabular \& Delimited Text](#tabular--delimited-text)
|
|
77
|
-
- [Semi-Structured Text](#semi-structured-text)
|
|
78
|
-
- [Columnar / Analytics-Friendly](#columnar--analytics-friendly)
|
|
79
|
-
- [Binary Serialization and Interchange](#binary-serialization-and-interchange)
|
|
80
|
-
- [Databases and Embedded Storage](#databases-and-embedded-storage)
|
|
81
|
-
- [Spreadsheets](#spreadsheets)
|
|
82
|
-
- [Statistical / Scientific / Numeric Computing](#statistical--scientific--numeric-computing)
|
|
83
|
-
- [Logs and Event Streams](#logs-and-event-streams)
|
|
84
|
-
- [Data Archives](#data-archives)
|
|
85
|
-
- [Templates](#templates)
|
|
86
65
|
- [Usage](#usage)
|
|
87
66
|
- [Command Line Interface](#command-line-interface)
|
|
88
67
|
- [Argument Order and Required Options](#argument-order-and-required-options)
|
|
@@ -108,27 +87,11 @@ package and command-line interface for data extraction, validation, transformati
|
|
|
108
87
|
- [Linting](#linting)
|
|
109
88
|
- [Updating Demo Snippets](#updating-demo-snippets)
|
|
110
89
|
- [Releasing to PyPI](#releasing-to-pypi)
|
|
90
|
+
- [Links](#links)
|
|
111
91
|
- [License](#license)
|
|
112
92
|
- [Contributing](#contributing)
|
|
113
|
-
- [Documentation](#documentation)
|
|
114
|
-
- [Python Packages/Subpackage](#python-packagessubpackage)
|
|
115
|
-
- [Community Health](#community-health)
|
|
116
|
-
- [Other](#other)
|
|
117
93
|
- [Acknowledgments](#acknowledgments)
|
|
118
94
|
|
|
119
|
-
## Getting Started
|
|
120
|
-
|
|
121
|
-
ETLPlus helps you extract, validate, transform, and load data from files, databases, and APIs, either
|
|
122
|
-
as a Python library or from the command line.
|
|
123
|
-
|
|
124
|
-
To get started:
|
|
125
|
-
|
|
126
|
-
- See [Installation](#installation) for setup instructions.
|
|
127
|
-
- Try the [Quickstart](#quickstart) for a minimal working example (CLI and Python).
|
|
128
|
-
- Explore [Usage](#usage) for more detailed options and workflows.
|
|
129
|
-
|
|
130
|
-
ETLPlus supports Python 3.13 and above.
|
|
131
|
-
|
|
132
95
|
## Features
|
|
133
96
|
|
|
134
97
|
- **Check** data pipeline definitions before running them:
|
|
@@ -196,7 +159,7 @@ etlplus extract file examples/data/sample.csv \
|
|
|
196
159
|
[Python API](#python-api):
|
|
197
160
|
|
|
198
161
|
```python
|
|
199
|
-
from etlplus
|
|
162
|
+
from etlplus import extract, transform, validate, load
|
|
200
163
|
|
|
201
164
|
data = extract("file", "input.csv")
|
|
202
165
|
ops = {"filter": {"field": "age", "op": "gt", "value": 25}, "select": ["name", "email"]}
|
|
@@ -206,140 +169,6 @@ assert validate(filtered, rules)["valid"]
|
|
|
206
169
|
load(filtered, "file", "temp/sample_output.json", file_format="json")
|
|
207
170
|
```
|
|
208
171
|
|
|
209
|
-
## Data Connectors
|
|
210
|
-
|
|
211
|
-
Data connectors abstract sources from which to extract data and targets to which to load data. They
|
|
212
|
-
are differentiated by their types, each of which is represented in the subsections below.
|
|
213
|
-
|
|
214
|
-
### REST APIs (`api`)
|
|
215
|
-
|
|
216
|
-
ETLPlus can extract from REST APIs and load results via common HTTP methods. Supported operations
|
|
217
|
-
include GET for extract and PATCH/POST/PUT for load.
|
|
218
|
-
|
|
219
|
-
### Databases (`database`)
|
|
220
|
-
|
|
221
|
-
Database connectors use connection strings for extraction and loading, and
|
|
222
|
-
DDL can be rendered from table specs for migrations or schema checks.
|
|
223
|
-
|
|
224
|
-
### Files (`file`)
|
|
225
|
-
|
|
226
|
-
Recognized file formats are listed in the tables below. Support for reading to or writing from a recognized file format is marked as:
|
|
227
|
-
|
|
228
|
-
- **Y**: implemented (may require optional dependencies)
|
|
229
|
-
- **N**: stubbed or not yet implemented
|
|
230
|
-
|
|
231
|
-
#### Stubbed / Placeholder
|
|
232
|
-
|
|
233
|
-
| Format | Read | Write | Description |
|
|
234
|
-
| --- | --- | --- | --- |
|
|
235
|
-
| `stub` | N | Placeholder format for tests and future connectors. |
|
|
236
|
-
|
|
237
|
-
#### Tabular & Delimited Text
|
|
238
|
-
|
|
239
|
-
| Format | Read | Write | Description |
|
|
240
|
-
| --- | --- | --- | --- |
|
|
241
|
-
| `csv` | Y | Y | Comma-Separated Values |
|
|
242
|
-
| `dat` | N | N | Generic data file, often delimited or fixed-width |
|
|
243
|
-
| `fwf` | N | N | Fixed-Width Fields |
|
|
244
|
-
| `psv` | N | N | Pipe-Separated Values |
|
|
245
|
-
| `tab` | N | N | Often synonymous with TSV |
|
|
246
|
-
| `tsv` | Y | Y | Tab-Separated Values |
|
|
247
|
-
| `txt` | Y | Y | Plain text, often delimited or fixed-width |
|
|
248
|
-
|
|
249
|
-
#### Semi-Structured Text
|
|
250
|
-
|
|
251
|
-
| Format | Read | Write | Description |
|
|
252
|
-
| --- | --- | --- | --- |
|
|
253
|
-
| `cfg` | N | N | Config-style key-value pairs |
|
|
254
|
-
| `conf` | N | N | Config-style key-value pairs |
|
|
255
|
-
| `ini` | N | N | Config-style key-value pairs |
|
|
256
|
-
| `json` | Y | Y | JavaScript Object Notation |
|
|
257
|
-
| `ndjson` | Y | Y | Newline-Delimited JSON |
|
|
258
|
-
| `properties` | N | N | Java-style key-value pairs |
|
|
259
|
-
| `toml` | N | N | Tom's Obvious Minimal Language |
|
|
260
|
-
| `xml` | Y | Y | Extensible Markup Language |
|
|
261
|
-
| `yaml` | Y | Y | YAML Ain't Markup Language |
|
|
262
|
-
|
|
263
|
-
#### Columnar / Analytics-Friendly
|
|
264
|
-
|
|
265
|
-
| Format | Read | Write | Description |
|
|
266
|
-
| --- | --- | --- | --- |
|
|
267
|
-
| `arrow` | N | N | Apache Arrow IPC |
|
|
268
|
-
| `feather` | Y | Y | Apache Arrow Feather |
|
|
269
|
-
| `orc` | Y | Y | Optimized Row Columnar; common in Hadoop |
|
|
270
|
-
| `parquet` | Y | Y | Apache Parquet; common in Big Data |
|
|
271
|
-
|
|
272
|
-
#### Binary Serialization and Interchange
|
|
273
|
-
|
|
274
|
-
| Format | Read | Write | Description |
|
|
275
|
-
| --- | --- | --- | --- |
|
|
276
|
-
| `avro` | Y | Y | Apache Avro |
|
|
277
|
-
| `bson` | N | N | Binary JSON; common with MongoDB exports/dumps |
|
|
278
|
-
| `cbor` | N | N | Concise Binary Object Representation |
|
|
279
|
-
| `ion` | N | N | Amazon Ion |
|
|
280
|
-
| `msgpack` | N | N | MessagePack |
|
|
281
|
-
| `pb` | N | N | Protocol Buffers (Google Protobuf) |
|
|
282
|
-
| `pbf` | N | N | Protocolbuffer Binary Format; often for GIS data |
|
|
283
|
-
| `proto` | N | N | Protocol Buffers schema; often in .pb / .bin |
|
|
284
|
-
|
|
285
|
-
#### Databases and Embedded Storage
|
|
286
|
-
|
|
287
|
-
| Format | Read | Write | Description |
|
|
288
|
-
| --- | --- | --- | --- |
|
|
289
|
-
| `accdb` | N | N | Microsoft Access (newer format) |
|
|
290
|
-
| `duckdb` | N | N | DuckDB |
|
|
291
|
-
| `mdb` | N | N | Microsoft Access (older format) |
|
|
292
|
-
| `sqlite` | N | N | SQLite |
|
|
293
|
-
|
|
294
|
-
#### Spreadsheets
|
|
295
|
-
|
|
296
|
-
| Format | Read | Write | Description |
|
|
297
|
-
| --- | --- | --- | --- |
|
|
298
|
-
| `numbers` | N | N | Apple Numbers |
|
|
299
|
-
| `ods` | N | N | OpenDocument |
|
|
300
|
-
| `wks` | N | N | Lotus 1-2-3 |
|
|
301
|
-
| `xls` | Y | Y | Microsoft Excel (BIFF) |
|
|
302
|
-
| `xlsm` | N | N | Microsoft Excel Macro-Enabled (Open XML) |
|
|
303
|
-
| `xlsx` | Y | Y | Microsoft Excel (Open XML) |
|
|
304
|
-
|
|
305
|
-
#### Statistical / Scientific / Numeric Computing
|
|
306
|
-
|
|
307
|
-
| Format | Read | Write | Description |
|
|
308
|
-
| --- | --- | --- | --- |
|
|
309
|
-
| `dta` | N | N | Stata |
|
|
310
|
-
| `hdf5` | N | N | Hierarchical Data Format |
|
|
311
|
-
| `mat` | N | N | MATLAB |
|
|
312
|
-
| `nc` | N | N | NetCDF |
|
|
313
|
-
| `rda` | N | N | RData workspace/object |
|
|
314
|
-
| `rds` | N | N | R data |
|
|
315
|
-
| `sas7bdat` | N | N | SAS data |
|
|
316
|
-
| `sav` | N | N | SPSS data |
|
|
317
|
-
| `sylk` | N | N | Symbolic Link |
|
|
318
|
-
| `xpt` | N | N | SAS Transport |
|
|
319
|
-
| `zsav` | N | N | Compressed SPSS data |
|
|
320
|
-
|
|
321
|
-
#### Logs and Event Streams
|
|
322
|
-
|
|
323
|
-
| Format | Supported | Description |
|
|
324
|
-
| --- | --- | --- |
|
|
325
|
-
| `log` | N | N | Generic log file |
|
|
326
|
-
|
|
327
|
-
#### Data Archives
|
|
328
|
-
|
|
329
|
-
| Format | Read | Write | Description |
|
|
330
|
-
| --- | --- | --- | --- |
|
|
331
|
-
| `gz` | Y | Y | Gzip-compressed file |
|
|
332
|
-
| `zip` | Y | Y | ZIP archive |
|
|
333
|
-
|
|
334
|
-
#### Templates
|
|
335
|
-
|
|
336
|
-
| Format | Read | Write | Description |
|
|
337
|
-
| --- | --- | --- | --- |
|
|
338
|
-
| `hbs` | N | N | Handlebars |
|
|
339
|
-
| `jinja2` | N | N | Jinja2 |
|
|
340
|
-
| `mustache` | N | N | Mustache |
|
|
341
|
-
| `vm` | N | N | Apache Velocity |
|
|
342
|
-
|
|
343
172
|
## Usage
|
|
344
173
|
|
|
345
174
|
### Command Line Interface
|
|
@@ -531,7 +360,7 @@ cat examples/data/sample.json \
|
|
|
531
360
|
Use ETLPlus as a Python library:
|
|
532
361
|
|
|
533
362
|
```python
|
|
534
|
-
from etlplus
|
|
363
|
+
from etlplus import extract, validate, transform, load
|
|
535
364
|
|
|
536
365
|
# Extract data
|
|
537
366
|
data = extract("file", "data.json")
|
|
@@ -587,7 +416,7 @@ etlplus transform \
|
|
|
587
416
|
# 3. Validate transformed data
|
|
588
417
|
etlplus validate \
|
|
589
418
|
--rules '{"name": {"type": "string", "required": true}, "email": {"type": "string", "required": true}}' \
|
|
590
|
-
|
|
419
|
+
temo/sample_transformed.json
|
|
591
420
|
|
|
592
421
|
# 4. Load to CSV
|
|
593
422
|
cat temp/sample_transformed.json \
|
|
@@ -726,7 +555,7 @@ We split tests into two layers:
|
|
|
726
555
|
pagination + rate limit defaults, file/API connector interactions) may touch temp files and use
|
|
727
556
|
fake clients.
|
|
728
557
|
|
|
729
|
-
If a test calls `etlplus.cli.main()` or `etlplus.
|
|
558
|
+
If a test calls `etlplus.cli.main()` or `etlplus.run.run()` it’s integration by default. Full
|
|
730
559
|
criteria: [`CONTRIBUTING.md#testing`](CONTRIBUTING.md#testing).
|
|
731
560
|
|
|
732
561
|
### Code Coverage
|
|
@@ -774,6 +603,17 @@ git push origin v1.4.0
|
|
|
774
603
|
If you want an extra smoke-test before tagging, run `make dist && pip install dist/*.whl` locally;
|
|
775
604
|
this exercises the same build path the workflow uses.
|
|
776
605
|
|
|
606
|
+
## Links
|
|
607
|
+
|
|
608
|
+
- API client docs: [`etlplus/api/README.md`](etlplus/api/README.md)
|
|
609
|
+
- Examples: [`examples/README.md`](examples/README.md)
|
|
610
|
+
- Pipeline authoring guide: [`docs/pipeline-guide.md`](docs/pipeline-guide.md)
|
|
611
|
+
- Runner internals: [`docs/run-module.md`](docs/run-module.md)
|
|
612
|
+
- Design notes (Mapping inputs, dict outputs): [`docs/pipeline-guide.md#design-notes-mapping-inputs-dict-outputs`](docs/pipeline-guide.md#design-notes-mapping-inputs-dict-outputs)
|
|
613
|
+
- Typing philosophy: [`CONTRIBUTING.md#typing-philosophy`](CONTRIBUTING.md#typing-philosophy)
|
|
614
|
+
- Demo and walkthrough: [`DEMO.md`](DEMO.md)
|
|
615
|
+
- Additional references: [`REFERENCES.md`](`REFERENCES.md)
|
|
616
|
+
|
|
777
617
|
## License
|
|
778
618
|
|
|
779
619
|
This project is licensed under the [MIT License](LICENSE).
|
|
@@ -797,39 +637,6 @@ If you choose to be a code contributor, please first refer these documents:
|
|
|
797
637
|
- Typing philosophy (TypedDicts as editor hints, permissive runtime):
|
|
798
638
|
[`CONTRIBUTING.md#typing-philosophy`](CONTRIBUTING.md#typing-philosophy)
|
|
799
639
|
|
|
800
|
-
## Documentation
|
|
801
|
-
|
|
802
|
-
### Python Packages/Subpackage
|
|
803
|
-
|
|
804
|
-
Navigate to detailed documentation for each subpackage:
|
|
805
|
-
|
|
806
|
-
- [etlplus.api](etlplus/api/README.md): Lightweight HTTP client and paginated REST helpers
|
|
807
|
-
- [etlplus.file](etlplus/file/README.md): Unified file format support and helpers
|
|
808
|
-
- [etlplus.cli](etlplus/cli/README.md): Command-line interface definitions for `etlplus`
|
|
809
|
-
- [etlplus.database](etlplus/database/README.md): Database engine, schema, and ORM helpers
|
|
810
|
-
- [etlplus.templates](etlplus/templates/README.md): SQL and DDL template helpers
|
|
811
|
-
- [etlplus.validation](etlplus/validation/README.md): Data validation utilities and helpers
|
|
812
|
-
- [etlplus.workflow](etlplus/workflow/README.md): Helpers for data connectors, pipelines, jobs, and
|
|
813
|
-
profiles
|
|
814
|
-
|
|
815
|
-
### Community Health
|
|
816
|
-
|
|
817
|
-
- [Contributing Guidelines](CONTRIBUTING.md): How to contribute, report issues, and submit PRs
|
|
818
|
-
- [Code of Conduct](CODE_OF_CONDUCT.md): Community standards and expectations
|
|
819
|
-
- [Security Policy](SECURITY.md): Responsible disclosure and vulnerability reporting
|
|
820
|
-
- [Support](SUPPORT.md): Where to get help
|
|
821
|
-
|
|
822
|
-
### Other
|
|
823
|
-
|
|
824
|
-
- API client docs: [`etlplus/api/README.md`](etlplus/api/README.md)
|
|
825
|
-
- Examples: [`examples/README.md`](examples/README.md)
|
|
826
|
-
- Pipeline authoring guide: [`docs/pipeline-guide.md`](docs/pipeline-guide.md)
|
|
827
|
-
- Runner internals: [`docs/run-module.md`](docs/run-module.md)
|
|
828
|
-
- Design notes (Mapping inputs, dict outputs): [`docs/pipeline-guide.md#design-notes-mapping-inputs-dict-outputs`](docs/pipeline-guide.md#design-notes-mapping-inputs-dict-outputs)
|
|
829
|
-
- Typing philosophy: [`CONTRIBUTING.md#typing-philosophy`](CONTRIBUTING.md#typing-philosophy)
|
|
830
|
-
- Demo and walkthrough: [`DEMO.md`](DEMO.md)
|
|
831
|
-
- Additional references: [`REFERENCES.md`](REFERENCES.md)
|
|
832
|
-
|
|
833
640
|
## Acknowledgments
|
|
834
641
|
|
|
835
642
|
ETLPlus is inspired by common work patterns in data engineering and software engineering patterns in
|
|
@@ -14,25 +14,9 @@ ETLPlus is a veritable Swiss Army knife for enabling simple ETL operations, offe
|
|
|
14
14
|
package and command-line interface for data extraction, validation, transformation, and loading.
|
|
15
15
|
|
|
16
16
|
- [ETLPlus](#etlplus)
|
|
17
|
-
- [Getting Started](#getting-started)
|
|
18
17
|
- [Features](#features)
|
|
19
18
|
- [Installation](#installation)
|
|
20
19
|
- [Quickstart](#quickstart)
|
|
21
|
-
- [Data Connectors](#data-connectors)
|
|
22
|
-
- [REST APIs (`api`)](#rest-apis-api)
|
|
23
|
-
- [Databases (`database`)](#databases-database)
|
|
24
|
-
- [Files (`file`)](#files-file)
|
|
25
|
-
- [Stubbed / Placeholder](#stubbed--placeholder)
|
|
26
|
-
- [Tabular \& Delimited Text](#tabular--delimited-text)
|
|
27
|
-
- [Semi-Structured Text](#semi-structured-text)
|
|
28
|
-
- [Columnar / Analytics-Friendly](#columnar--analytics-friendly)
|
|
29
|
-
- [Binary Serialization and Interchange](#binary-serialization-and-interchange)
|
|
30
|
-
- [Databases and Embedded Storage](#databases-and-embedded-storage)
|
|
31
|
-
- [Spreadsheets](#spreadsheets)
|
|
32
|
-
- [Statistical / Scientific / Numeric Computing](#statistical--scientific--numeric-computing)
|
|
33
|
-
- [Logs and Event Streams](#logs-and-event-streams)
|
|
34
|
-
- [Data Archives](#data-archives)
|
|
35
|
-
- [Templates](#templates)
|
|
36
20
|
- [Usage](#usage)
|
|
37
21
|
- [Command Line Interface](#command-line-interface)
|
|
38
22
|
- [Argument Order and Required Options](#argument-order-and-required-options)
|
|
@@ -58,27 +42,11 @@ package and command-line interface for data extraction, validation, transformati
|
|
|
58
42
|
- [Linting](#linting)
|
|
59
43
|
- [Updating Demo Snippets](#updating-demo-snippets)
|
|
60
44
|
- [Releasing to PyPI](#releasing-to-pypi)
|
|
45
|
+
- [Links](#links)
|
|
61
46
|
- [License](#license)
|
|
62
47
|
- [Contributing](#contributing)
|
|
63
|
-
- [Documentation](#documentation)
|
|
64
|
-
- [Python Packages/Subpackage](#python-packagessubpackage)
|
|
65
|
-
- [Community Health](#community-health)
|
|
66
|
-
- [Other](#other)
|
|
67
48
|
- [Acknowledgments](#acknowledgments)
|
|
68
49
|
|
|
69
|
-
## Getting Started
|
|
70
|
-
|
|
71
|
-
ETLPlus helps you extract, validate, transform, and load data from files, databases, and APIs, either
|
|
72
|
-
as a Python library or from the command line.
|
|
73
|
-
|
|
74
|
-
To get started:
|
|
75
|
-
|
|
76
|
-
- See [Installation](#installation) for setup instructions.
|
|
77
|
-
- Try the [Quickstart](#quickstart) for a minimal working example (CLI and Python).
|
|
78
|
-
- Explore [Usage](#usage) for more detailed options and workflows.
|
|
79
|
-
|
|
80
|
-
ETLPlus supports Python 3.13 and above.
|
|
81
|
-
|
|
82
50
|
## Features
|
|
83
51
|
|
|
84
52
|
- **Check** data pipeline definitions before running them:
|
|
@@ -146,7 +114,7 @@ etlplus extract file examples/data/sample.csv \
|
|
|
146
114
|
[Python API](#python-api):
|
|
147
115
|
|
|
148
116
|
```python
|
|
149
|
-
from etlplus
|
|
117
|
+
from etlplus import extract, transform, validate, load
|
|
150
118
|
|
|
151
119
|
data = extract("file", "input.csv")
|
|
152
120
|
ops = {"filter": {"field": "age", "op": "gt", "value": 25}, "select": ["name", "email"]}
|
|
@@ -156,140 +124,6 @@ assert validate(filtered, rules)["valid"]
|
|
|
156
124
|
load(filtered, "file", "temp/sample_output.json", file_format="json")
|
|
157
125
|
```
|
|
158
126
|
|
|
159
|
-
## Data Connectors
|
|
160
|
-
|
|
161
|
-
Data connectors abstract sources from which to extract data and targets to which to load data. They
|
|
162
|
-
are differentiated by their types, each of which is represented in the subsections below.
|
|
163
|
-
|
|
164
|
-
### REST APIs (`api`)
|
|
165
|
-
|
|
166
|
-
ETLPlus can extract from REST APIs and load results via common HTTP methods. Supported operations
|
|
167
|
-
include GET for extract and PATCH/POST/PUT for load.
|
|
168
|
-
|
|
169
|
-
### Databases (`database`)
|
|
170
|
-
|
|
171
|
-
Database connectors use connection strings for extraction and loading, and
|
|
172
|
-
DDL can be rendered from table specs for migrations or schema checks.
|
|
173
|
-
|
|
174
|
-
### Files (`file`)
|
|
175
|
-
|
|
176
|
-
Recognized file formats are listed in the tables below. Support for reading to or writing from a recognized file format is marked as:
|
|
177
|
-
|
|
178
|
-
- **Y**: implemented (may require optional dependencies)
|
|
179
|
-
- **N**: stubbed or not yet implemented
|
|
180
|
-
|
|
181
|
-
#### Stubbed / Placeholder
|
|
182
|
-
|
|
183
|
-
| Format | Read | Write | Description |
|
|
184
|
-
| --- | --- | --- | --- |
|
|
185
|
-
| `stub` | N | Placeholder format for tests and future connectors. |
|
|
186
|
-
|
|
187
|
-
#### Tabular & Delimited Text
|
|
188
|
-
|
|
189
|
-
| Format | Read | Write | Description |
|
|
190
|
-
| --- | --- | --- | --- |
|
|
191
|
-
| `csv` | Y | Y | Comma-Separated Values |
|
|
192
|
-
| `dat` | N | N | Generic data file, often delimited or fixed-width |
|
|
193
|
-
| `fwf` | N | N | Fixed-Width Fields |
|
|
194
|
-
| `psv` | N | N | Pipe-Separated Values |
|
|
195
|
-
| `tab` | N | N | Often synonymous with TSV |
|
|
196
|
-
| `tsv` | Y | Y | Tab-Separated Values |
|
|
197
|
-
| `txt` | Y | Y | Plain text, often delimited or fixed-width |
|
|
198
|
-
|
|
199
|
-
#### Semi-Structured Text
|
|
200
|
-
|
|
201
|
-
| Format | Read | Write | Description |
|
|
202
|
-
| --- | --- | --- | --- |
|
|
203
|
-
| `cfg` | N | N | Config-style key-value pairs |
|
|
204
|
-
| `conf` | N | N | Config-style key-value pairs |
|
|
205
|
-
| `ini` | N | N | Config-style key-value pairs |
|
|
206
|
-
| `json` | Y | Y | JavaScript Object Notation |
|
|
207
|
-
| `ndjson` | Y | Y | Newline-Delimited JSON |
|
|
208
|
-
| `properties` | N | N | Java-style key-value pairs |
|
|
209
|
-
| `toml` | N | N | Tom's Obvious Minimal Language |
|
|
210
|
-
| `xml` | Y | Y | Extensible Markup Language |
|
|
211
|
-
| `yaml` | Y | Y | YAML Ain't Markup Language |
|
|
212
|
-
|
|
213
|
-
#### Columnar / Analytics-Friendly
|
|
214
|
-
|
|
215
|
-
| Format | Read | Write | Description |
|
|
216
|
-
| --- | --- | --- | --- |
|
|
217
|
-
| `arrow` | N | N | Apache Arrow IPC |
|
|
218
|
-
| `feather` | Y | Y | Apache Arrow Feather |
|
|
219
|
-
| `orc` | Y | Y | Optimized Row Columnar; common in Hadoop |
|
|
220
|
-
| `parquet` | Y | Y | Apache Parquet; common in Big Data |
|
|
221
|
-
|
|
222
|
-
#### Binary Serialization and Interchange
|
|
223
|
-
|
|
224
|
-
| Format | Read | Write | Description |
|
|
225
|
-
| --- | --- | --- | --- |
|
|
226
|
-
| `avro` | Y | Y | Apache Avro |
|
|
227
|
-
| `bson` | N | N | Binary JSON; common with MongoDB exports/dumps |
|
|
228
|
-
| `cbor` | N | N | Concise Binary Object Representation |
|
|
229
|
-
| `ion` | N | N | Amazon Ion |
|
|
230
|
-
| `msgpack` | N | N | MessagePack |
|
|
231
|
-
| `pb` | N | N | Protocol Buffers (Google Protobuf) |
|
|
232
|
-
| `pbf` | N | N | Protocolbuffer Binary Format; often for GIS data |
|
|
233
|
-
| `proto` | N | N | Protocol Buffers schema; often in .pb / .bin |
|
|
234
|
-
|
|
235
|
-
#### Databases and Embedded Storage
|
|
236
|
-
|
|
237
|
-
| Format | Read | Write | Description |
|
|
238
|
-
| --- | --- | --- | --- |
|
|
239
|
-
| `accdb` | N | N | Microsoft Access (newer format) |
|
|
240
|
-
| `duckdb` | N | N | DuckDB |
|
|
241
|
-
| `mdb` | N | N | Microsoft Access (older format) |
|
|
242
|
-
| `sqlite` | N | N | SQLite |
|
|
243
|
-
|
|
244
|
-
#### Spreadsheets
|
|
245
|
-
|
|
246
|
-
| Format | Read | Write | Description |
|
|
247
|
-
| --- | --- | --- | --- |
|
|
248
|
-
| `numbers` | N | N | Apple Numbers |
|
|
249
|
-
| `ods` | N | N | OpenDocument |
|
|
250
|
-
| `wks` | N | N | Lotus 1-2-3 |
|
|
251
|
-
| `xls` | Y | Y | Microsoft Excel (BIFF) |
|
|
252
|
-
| `xlsm` | N | N | Microsoft Excel Macro-Enabled (Open XML) |
|
|
253
|
-
| `xlsx` | Y | Y | Microsoft Excel (Open XML) |
|
|
254
|
-
|
|
255
|
-
#### Statistical / Scientific / Numeric Computing
|
|
256
|
-
|
|
257
|
-
| Format | Read | Write | Description |
|
|
258
|
-
| --- | --- | --- | --- |
|
|
259
|
-
| `dta` | N | N | Stata |
|
|
260
|
-
| `hdf5` | N | N | Hierarchical Data Format |
|
|
261
|
-
| `mat` | N | N | MATLAB |
|
|
262
|
-
| `nc` | N | N | NetCDF |
|
|
263
|
-
| `rda` | N | N | RData workspace/object |
|
|
264
|
-
| `rds` | N | N | R data |
|
|
265
|
-
| `sas7bdat` | N | N | SAS data |
|
|
266
|
-
| `sav` | N | N | SPSS data |
|
|
267
|
-
| `sylk` | N | N | Symbolic Link |
|
|
268
|
-
| `xpt` | N | N | SAS Transport |
|
|
269
|
-
| `zsav` | N | N | Compressed SPSS data |
|
|
270
|
-
|
|
271
|
-
#### Logs and Event Streams
|
|
272
|
-
|
|
273
|
-
| Format | Supported | Description |
|
|
274
|
-
| --- | --- | --- |
|
|
275
|
-
| `log` | N | N | Generic log file |
|
|
276
|
-
|
|
277
|
-
#### Data Archives
|
|
278
|
-
|
|
279
|
-
| Format | Read | Write | Description |
|
|
280
|
-
| --- | --- | --- | --- |
|
|
281
|
-
| `gz` | Y | Y | Gzip-compressed file |
|
|
282
|
-
| `zip` | Y | Y | ZIP archive |
|
|
283
|
-
|
|
284
|
-
#### Templates
|
|
285
|
-
|
|
286
|
-
| Format | Read | Write | Description |
|
|
287
|
-
| --- | --- | --- | --- |
|
|
288
|
-
| `hbs` | N | N | Handlebars |
|
|
289
|
-
| `jinja2` | N | N | Jinja2 |
|
|
290
|
-
| `mustache` | N | N | Mustache |
|
|
291
|
-
| `vm` | N | N | Apache Velocity |
|
|
292
|
-
|
|
293
127
|
## Usage
|
|
294
128
|
|
|
295
129
|
### Command Line Interface
|
|
@@ -481,7 +315,7 @@ cat examples/data/sample.json \
|
|
|
481
315
|
Use ETLPlus as a Python library:
|
|
482
316
|
|
|
483
317
|
```python
|
|
484
|
-
from etlplus
|
|
318
|
+
from etlplus import extract, validate, transform, load
|
|
485
319
|
|
|
486
320
|
# Extract data
|
|
487
321
|
data = extract("file", "data.json")
|
|
@@ -537,7 +371,7 @@ etlplus transform \
|
|
|
537
371
|
# 3. Validate transformed data
|
|
538
372
|
etlplus validate \
|
|
539
373
|
--rules '{"name": {"type": "string", "required": true}, "email": {"type": "string", "required": true}}' \
|
|
540
|
-
|
|
374
|
+
temo/sample_transformed.json
|
|
541
375
|
|
|
542
376
|
# 4. Load to CSV
|
|
543
377
|
cat temp/sample_transformed.json \
|
|
@@ -676,7 +510,7 @@ We split tests into two layers:
|
|
|
676
510
|
pagination + rate limit defaults, file/API connector interactions) may touch temp files and use
|
|
677
511
|
fake clients.
|
|
678
512
|
|
|
679
|
-
If a test calls `etlplus.cli.main()` or `etlplus.
|
|
513
|
+
If a test calls `etlplus.cli.main()` or `etlplus.run.run()` it’s integration by default. Full
|
|
680
514
|
criteria: [`CONTRIBUTING.md#testing`](CONTRIBUTING.md#testing).
|
|
681
515
|
|
|
682
516
|
### Code Coverage
|
|
@@ -724,6 +558,17 @@ git push origin v1.4.0
|
|
|
724
558
|
If you want an extra smoke-test before tagging, run `make dist && pip install dist/*.whl` locally;
|
|
725
559
|
this exercises the same build path the workflow uses.
|
|
726
560
|
|
|
561
|
+
## Links
|
|
562
|
+
|
|
563
|
+
- API client docs: [`etlplus/api/README.md`](etlplus/api/README.md)
|
|
564
|
+
- Examples: [`examples/README.md`](examples/README.md)
|
|
565
|
+
- Pipeline authoring guide: [`docs/pipeline-guide.md`](docs/pipeline-guide.md)
|
|
566
|
+
- Runner internals: [`docs/run-module.md`](docs/run-module.md)
|
|
567
|
+
- Design notes (Mapping inputs, dict outputs): [`docs/pipeline-guide.md#design-notes-mapping-inputs-dict-outputs`](docs/pipeline-guide.md#design-notes-mapping-inputs-dict-outputs)
|
|
568
|
+
- Typing philosophy: [`CONTRIBUTING.md#typing-philosophy`](CONTRIBUTING.md#typing-philosophy)
|
|
569
|
+
- Demo and walkthrough: [`DEMO.md`](DEMO.md)
|
|
570
|
+
- Additional references: [`REFERENCES.md`](`REFERENCES.md)
|
|
571
|
+
|
|
727
572
|
## License
|
|
728
573
|
|
|
729
574
|
This project is licensed under the [MIT License](LICENSE).
|
|
@@ -747,39 +592,6 @@ If you choose to be a code contributor, please first refer these documents:
|
|
|
747
592
|
- Typing philosophy (TypedDicts as editor hints, permissive runtime):
|
|
748
593
|
[`CONTRIBUTING.md#typing-philosophy`](CONTRIBUTING.md#typing-philosophy)
|
|
749
594
|
|
|
750
|
-
## Documentation
|
|
751
|
-
|
|
752
|
-
### Python Packages/Subpackage
|
|
753
|
-
|
|
754
|
-
Navigate to detailed documentation for each subpackage:
|
|
755
|
-
|
|
756
|
-
- [etlplus.api](etlplus/api/README.md): Lightweight HTTP client and paginated REST helpers
|
|
757
|
-
- [etlplus.file](etlplus/file/README.md): Unified file format support and helpers
|
|
758
|
-
- [etlplus.cli](etlplus/cli/README.md): Command-line interface definitions for `etlplus`
|
|
759
|
-
- [etlplus.database](etlplus/database/README.md): Database engine, schema, and ORM helpers
|
|
760
|
-
- [etlplus.templates](etlplus/templates/README.md): SQL and DDL template helpers
|
|
761
|
-
- [etlplus.validation](etlplus/validation/README.md): Data validation utilities and helpers
|
|
762
|
-
- [etlplus.workflow](etlplus/workflow/README.md): Helpers for data connectors, pipelines, jobs, and
|
|
763
|
-
profiles
|
|
764
|
-
|
|
765
|
-
### Community Health
|
|
766
|
-
|
|
767
|
-
- [Contributing Guidelines](CONTRIBUTING.md): How to contribute, report issues, and submit PRs
|
|
768
|
-
- [Code of Conduct](CODE_OF_CONDUCT.md): Community standards and expectations
|
|
769
|
-
- [Security Policy](SECURITY.md): Responsible disclosure and vulnerability reporting
|
|
770
|
-
- [Support](SUPPORT.md): Where to get help
|
|
771
|
-
|
|
772
|
-
### Other
|
|
773
|
-
|
|
774
|
-
- API client docs: [`etlplus/api/README.md`](etlplus/api/README.md)
|
|
775
|
-
- Examples: [`examples/README.md`](examples/README.md)
|
|
776
|
-
- Pipeline authoring guide: [`docs/pipeline-guide.md`](docs/pipeline-guide.md)
|
|
777
|
-
- Runner internals: [`docs/run-module.md`](docs/run-module.md)
|
|
778
|
-
- Design notes (Mapping inputs, dict outputs): [`docs/pipeline-guide.md#design-notes-mapping-inputs-dict-outputs`](docs/pipeline-guide.md#design-notes-mapping-inputs-dict-outputs)
|
|
779
|
-
- Typing philosophy: [`CONTRIBUTING.md#typing-philosophy`](CONTRIBUTING.md#typing-philosophy)
|
|
780
|
-
- Demo and walkthrough: [`DEMO.md`](DEMO.md)
|
|
781
|
-
- Additional references: [`REFERENCES.md`](REFERENCES.md)
|
|
782
|
-
|
|
783
595
|
## Acknowledgments
|
|
784
596
|
|
|
785
597
|
ETLPlus is inspired by common work patterns in data engineering and software engineering patterns in
|
|
@@ -281,7 +281,7 @@ section.
|
|
|
281
281
|
|
|
282
282
|
## Validations
|
|
283
283
|
|
|
284
|
-
Validation rule sets map field names to rules, mirroring `etlplus.
|
|
284
|
+
Validation rule sets map field names to rules, mirroring `etlplus.validate.FieldRules`:
|
|
285
285
|
|
|
286
286
|
```yaml
|
|
287
287
|
validations:
|
|
@@ -297,7 +297,7 @@ validations:
|
|
|
297
297
|
|
|
298
298
|
## Transforms
|
|
299
299
|
|
|
300
|
-
Transformation pipelines follow `etlplus.
|
|
300
|
+
Transformation pipelines follow `etlplus.transform` shapes exactly:
|
|
301
301
|
|
|
302
302
|
```yaml
|
|
303
303
|
transforms:
|
|
@@ -377,9 +377,9 @@ Details:
|
|
|
377
377
|
- Unknown or malformed entries are skipped rather than failing the whole load (keeping pipeline
|
|
378
378
|
authoring permissive).
|
|
379
379
|
- The connector kind is also available as a type-safe literal in code as
|
|
380
|
-
`etlplus.
|
|
380
|
+
`etlplus.config.ConnectorType` (values: `"file" | "database" | "api"`).
|
|
381
381
|
|
|
382
|
-
To add new connector kinds in the future, implement a new dataclass in `etlplus.
|
|
382
|
+
To add new connector kinds in the future, implement a new dataclass in `etlplus.config.connector`
|
|
383
383
|
and extend the internal parser to handle its `type` value.
|
|
384
384
|
|
|
385
385
|
## Jobs
|
|
@@ -390,20 +390,10 @@ target:
|
|
|
390
390
|
```yaml
|
|
391
391
|
jobs:
|
|
392
392
|
- name: file_to_file_customers
|
|
393
|
-
depends_on: [seed_customers]
|
|
394
393
|
extract: { source: customers_csv }
|
|
395
394
|
validate: { ruleset: customers_basic }
|
|
396
395
|
transform: { pipeline: clean_customers }
|
|
397
396
|
load: { target: customers_json_out }
|
|
398
|
-
- name: seed_customers
|
|
399
|
-
extract: { source: seed_customers_csv }
|
|
400
|
-
load: { target: customers_db_out }
|
|
401
|
-
```
|
|
402
|
-
|
|
403
|
-
Notes:
|
|
404
|
-
|
|
405
|
-
- `depends_on` is optional and can be a string or list of job names.
|
|
406
|
-
- Jobs without dependencies run first when ordered as a DAG.
|
|
407
397
|
|
|
408
398
|
## Running pipelines (CLI and Python)
|
|
409
399
|
|
|
@@ -431,14 +421,14 @@ Notes:
|
|
|
431
421
|
- Environment-variable substitution (e.g. `${GITHUB_TOKEN}`) is applied the same way as when loading
|
|
432
422
|
configs via the Python API.
|
|
433
423
|
- For more details on the orchestration implementation, see
|
|
434
|
-
[Runner internals: etlplus.
|
|
424
|
+
[Runner internals: etlplus.run](run-module.md).
|
|
435
425
|
|
|
436
|
-
### Python: `etlplus.
|
|
426
|
+
### Python: `etlplus.run.run`
|
|
437
427
|
|
|
438
428
|
To trigger a job programmatically, use the high-level runner function exposed by the package:
|
|
439
429
|
|
|
440
430
|
```python
|
|
441
|
-
from etlplus.
|
|
431
|
+
from etlplus.run import run as run_job
|
|
442
432
|
|
|
443
433
|
result = run_job(
|
|
444
434
|
job="file_to_file_customers",
|