etlplus 0.9.1__tar.gz → 0.9.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {etlplus-0.9.1 → etlplus-0.9.2}/CONTRIBUTING.md +1 -1
- {etlplus-0.9.1 → etlplus-0.9.2}/DEMO.md +1 -1
- {etlplus-0.9.1/etlplus.egg-info → etlplus-0.9.2}/PKG-INFO +210 -17
- {etlplus-0.9.1 → etlplus-0.9.2}/README.md +204 -16
- etlplus-0.9.2/SECURITY.md +15 -0
- etlplus-0.9.2/SUPPORT.md +18 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/docs/pipeline-guide.md +17 -7
- etlplus-0.9.2/etlplus/README.md +37 -0
- etlplus-0.9.2/etlplus/__init__.py +18 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/etlplus/api/README.md +51 -3
- {etlplus-0.9.1 → etlplus-0.9.2}/etlplus/api/__init__.py +10 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/etlplus/api/config.py +39 -28
- {etlplus-0.9.1 → etlplus-0.9.2}/etlplus/api/endpoint_client.py +3 -3
- etlplus-0.9.2/etlplus/api/enums.py +51 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/etlplus/api/pagination/client.py +1 -1
- {etlplus-0.9.1 → etlplus-0.9.2}/etlplus/api/rate_limiting/config.py +13 -1
- {etlplus-0.9.1 → etlplus-0.9.2}/etlplus/api/rate_limiting/rate_limiter.py +8 -11
- {etlplus-0.9.1 → etlplus-0.9.2}/etlplus/api/request_manager.py +11 -6
- {etlplus-0.9.1 → etlplus-0.9.2}/etlplus/api/transport.py +14 -2
- {etlplus-0.9.1 → etlplus-0.9.2}/etlplus/api/types.py +96 -6
- etlplus-0.9.1/etlplus/run_helpers.py → etlplus-0.9.2/etlplus/api/utils.py +209 -153
- etlplus-0.9.2/etlplus/cli/README.md +40 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/etlplus/cli/commands.py +76 -43
- {etlplus-0.9.1 → etlplus-0.9.2}/etlplus/cli/constants.py +1 -1
- {etlplus-0.9.1 → etlplus-0.9.2}/etlplus/cli/handlers.py +40 -12
- {etlplus-0.9.1 → etlplus-0.9.2}/etlplus/cli/io.py +2 -2
- {etlplus-0.9.1 → etlplus-0.9.2}/etlplus/cli/main.py +1 -1
- {etlplus-0.9.1 → etlplus-0.9.2}/etlplus/cli/state.py +4 -7
- etlplus-0.9.2/etlplus/database/README.md +48 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/etlplus/database/ddl.py +1 -1
- {etlplus-0.9.1 → etlplus-0.9.2}/etlplus/database/engine.py +19 -3
- {etlplus-0.9.1 → etlplus-0.9.2}/etlplus/database/orm.py +2 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/etlplus/database/schema.py +1 -1
- {etlplus-0.9.1 → etlplus-0.9.2}/etlplus/enums.py +1 -157
- etlplus-0.9.2/etlplus/file/README.md +105 -0
- etlplus-0.9.2/etlplus/file/__init__.py +25 -0
- etlplus-0.9.2/etlplus/file/_imports.py +141 -0
- etlplus-0.9.2/etlplus/file/_io.py +160 -0
- etlplus-0.9.2/etlplus/file/accdb.py +78 -0
- etlplus-0.9.2/etlplus/file/arrow.py +78 -0
- etlplus-0.9.2/etlplus/file/avro.py +176 -0
- etlplus-0.9.2/etlplus/file/bson.py +77 -0
- etlplus-0.9.2/etlplus/file/cbor.py +78 -0
- etlplus-0.9.2/etlplus/file/cfg.py +79 -0
- etlplus-0.9.2/etlplus/file/conf.py +80 -0
- etlplus-0.9.2/etlplus/file/core.py +322 -0
- etlplus-0.9.2/etlplus/file/csv.py +79 -0
- etlplus-0.9.2/etlplus/file/dat.py +78 -0
- etlplus-0.9.2/etlplus/file/dta.py +77 -0
- etlplus-0.9.2/etlplus/file/duckdb.py +78 -0
- etlplus-0.9.2/etlplus/file/enums.py +343 -0
- etlplus-0.9.2/etlplus/file/feather.py +111 -0
- etlplus-0.9.2/etlplus/file/fwf.py +77 -0
- etlplus-0.9.2/etlplus/file/gz.py +123 -0
- etlplus-0.9.2/etlplus/file/hbs.py +78 -0
- etlplus-0.9.2/etlplus/file/hdf5.py +78 -0
- etlplus-0.9.2/etlplus/file/ini.py +79 -0
- etlplus-0.9.2/etlplus/file/ion.py +78 -0
- etlplus-0.9.2/etlplus/file/jinja2.py +78 -0
- etlplus-0.9.2/etlplus/file/json.py +98 -0
- etlplus-0.9.2/etlplus/file/log.py +78 -0
- etlplus-0.9.2/etlplus/file/mat.py +78 -0
- etlplus-0.9.2/etlplus/file/mdb.py +78 -0
- etlplus-0.9.2/etlplus/file/msgpack.py +78 -0
- etlplus-0.9.2/etlplus/file/mustache.py +78 -0
- etlplus-0.9.2/etlplus/file/nc.py +78 -0
- etlplus-0.9.2/etlplus/file/ndjson.py +108 -0
- etlplus-0.9.2/etlplus/file/numbers.py +75 -0
- etlplus-0.9.2/etlplus/file/ods.py +79 -0
- etlplus-0.9.2/etlplus/file/orc.py +111 -0
- etlplus-0.9.2/etlplus/file/parquet.py +113 -0
- etlplus-0.9.2/etlplus/file/pb.py +78 -0
- etlplus-0.9.2/etlplus/file/pbf.py +77 -0
- etlplus-0.9.2/etlplus/file/properties.py +78 -0
- etlplus-0.9.2/etlplus/file/proto.py +77 -0
- etlplus-0.9.2/etlplus/file/psv.py +79 -0
- etlplus-0.9.2/etlplus/file/rda.py +78 -0
- etlplus-0.9.2/etlplus/file/rds.py +78 -0
- etlplus-0.9.2/etlplus/file/sas7bdat.py +78 -0
- etlplus-0.9.2/etlplus/file/sav.py +77 -0
- etlplus-0.9.2/etlplus/file/sqlite.py +78 -0
- etlplus-0.9.2/etlplus/file/stub.py +84 -0
- etlplus-0.9.2/etlplus/file/sylk.py +77 -0
- etlplus-0.9.2/etlplus/file/tab.py +81 -0
- etlplus-0.9.2/etlplus/file/toml.py +78 -0
- etlplus-0.9.2/etlplus/file/tsv.py +80 -0
- etlplus-0.9.2/etlplus/file/txt.py +102 -0
- etlplus-0.9.2/etlplus/file/vm.py +78 -0
- etlplus-0.9.2/etlplus/file/wks.py +77 -0
- etlplus-0.9.2/etlplus/file/xls.py +88 -0
- etlplus-0.9.2/etlplus/file/xlsm.py +79 -0
- etlplus-0.9.2/etlplus/file/xlsx.py +99 -0
- etlplus-0.9.2/etlplus/file/xml.py +185 -0
- etlplus-0.9.2/etlplus/file/xpt.py +78 -0
- etlplus-0.9.2/etlplus/file/yaml.py +95 -0
- etlplus-0.9.2/etlplus/file/zip.py +175 -0
- etlplus-0.9.2/etlplus/file/zsav.py +77 -0
- etlplus-0.9.2/etlplus/ops/README.md +50 -0
- etlplus-0.9.2/etlplus/ops/__init__.py +61 -0
- {etlplus-0.9.1/etlplus → etlplus-0.9.2/etlplus/ops}/extract.py +81 -99
- {etlplus-0.9.1/etlplus → etlplus-0.9.2/etlplus/ops}/load.py +78 -101
- {etlplus-0.9.1/etlplus → etlplus-0.9.2/etlplus/ops}/run.py +159 -127
- {etlplus-0.9.1/etlplus → etlplus-0.9.2/etlplus/ops}/transform.py +75 -68
- {etlplus-0.9.1/etlplus/validation → etlplus-0.9.2/etlplus/ops}/utils.py +53 -17
- {etlplus-0.9.1/etlplus → etlplus-0.9.2/etlplus/ops}/validate.py +22 -12
- etlplus-0.9.2/etlplus/templates/README.md +46 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/etlplus/types.py +5 -4
- {etlplus-0.9.1 → etlplus-0.9.2}/etlplus/utils.py +136 -2
- etlplus-0.9.2/etlplus/workflow/README.md +52 -0
- {etlplus-0.9.1/etlplus/config → etlplus-0.9.2/etlplus/workflow}/__init__.py +10 -23
- {etlplus-0.9.1/etlplus/config → etlplus-0.9.2/etlplus/workflow}/connector.py +58 -44
- etlplus-0.9.2/etlplus/workflow/dag.py +105 -0
- {etlplus-0.9.1/etlplus/config → etlplus-0.9.2/etlplus/workflow}/jobs.py +105 -32
- {etlplus-0.9.1/etlplus/config → etlplus-0.9.2/etlplus/workflow}/pipeline.py +59 -51
- {etlplus-0.9.1/etlplus/config → etlplus-0.9.2/etlplus/workflow}/profile.py +8 -5
- etlplus-0.9.2/etlplus/workflow/types.py +115 -0
- {etlplus-0.9.1 → etlplus-0.9.2/etlplus.egg-info}/PKG-INFO +210 -17
- {etlplus-0.9.1 → etlplus-0.9.2}/etlplus.egg-info/SOURCES.txt +102 -28
- {etlplus-0.9.1 → etlplus-0.9.2}/etlplus.egg-info/requires.txt +5 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/examples/README.md +2 -2
- {etlplus-0.9.1 → etlplus-0.9.2}/examples/configs/pipeline.yml +4 -4
- {etlplus-0.9.1 → etlplus-0.9.2}/examples/quickstart_python.py +5 -5
- {etlplus-0.9.1 → etlplus-0.9.2}/pyproject.toml +5 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/setup.py +5 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/tests/integration/conftest.py +10 -10
- {etlplus-0.9.1 → etlplus-0.9.2}/tests/integration/test_i_examples_data_parity.py +2 -2
- {etlplus-0.9.1 → etlplus-0.9.2}/tests/integration/test_i_pagination_strategy.py +13 -13
- {etlplus-0.9.1 → etlplus-0.9.2}/tests/integration/test_i_pipeline_yaml_load.py +1 -1
- {etlplus-0.9.1 → etlplus-0.9.2}/tests/integration/test_i_run.py +2 -2
- {etlplus-0.9.1 → etlplus-0.9.2}/tests/integration/test_i_run_profile_pagination_defaults.py +1 -1
- {etlplus-0.9.1 → etlplus-0.9.2}/tests/integration/test_i_run_profile_rate_limit_defaults.py +1 -1
- etlplus-0.9.2/tests/unit/api/test_u_api_enums.py +34 -0
- etlplus-0.9.1/tests/unit/test_u_run_helpers.py → etlplus-0.9.2/tests/unit/api/test_u_api_utils.py +17 -15
- {etlplus-0.9.1 → etlplus-0.9.2}/tests/unit/cli/conftest.py +107 -6
- {etlplus-0.9.1 → etlplus-0.9.2}/tests/unit/cli/test_u_cli_handlers.py +97 -380
- etlplus-0.9.2/tests/unit/cli/test_u_cli_io.py +326 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/tests/unit/cli/test_u_cli_main.py +59 -25
- {etlplus-0.9.1 → etlplus-0.9.2}/tests/unit/cli/test_u_cli_state.py +45 -41
- {etlplus-0.9.1 → etlplus-0.9.2}/tests/unit/conftest.py +1 -1
- {etlplus-0.9.1 → etlplus-0.9.2}/tests/unit/database/test_u_database_ddl.py +9 -5
- {etlplus-0.9.1 → etlplus-0.9.2}/tests/unit/database/test_u_database_engine.py +5 -4
- {etlplus-0.9.1 → etlplus-0.9.2}/tests/unit/database/test_u_database_schema.py +12 -14
- etlplus-0.9.2/tests/unit/file/test_u_file_core.py +571 -0
- etlplus-0.9.2/tests/unit/file/test_u_file_enums.py +99 -0
- etlplus-0.9.2/tests/unit/file/test_u_file_yaml.py +109 -0
- etlplus-0.9.1/tests/unit/test_u_extract.py → etlplus-0.9.2/tests/unit/ops/test_u_ops_extract.py +12 -12
- etlplus-0.9.1/tests/unit/test_u_load.py → etlplus-0.9.2/tests/unit/ops/test_u_ops_load.py +18 -17
- etlplus-0.9.1/tests/unit/test_u_run.py → etlplus-0.9.2/tests/unit/ops/test_u_ops_run.py +4 -4
- etlplus-0.9.1/tests/unit/test_u_transform.py → etlplus-0.9.2/tests/unit/ops/test_u_ops_transform.py +65 -64
- etlplus-0.9.1/tests/unit/validation/test_u_validation_utils.py → etlplus-0.9.2/tests/unit/ops/test_u_ops_utils.py +4 -4
- etlplus-0.9.1/tests/unit/test_u_validate.py → etlplus-0.9.2/tests/unit/ops/test_u_ops_validate.py +9 -9
- {etlplus-0.9.1 → etlplus-0.9.2}/tests/unit/test_u_enums.py +2 -51
- {etlplus-0.9.1 → etlplus-0.9.2}/tests/unit/test_u_utils.py +110 -4
- etlplus-0.9.1/tests/unit/config/test_u_connector.py → etlplus-0.9.2/tests/unit/workflow/test_u_workflow_connector.py +6 -6
- etlplus-0.9.1/tests/unit/config/test_u_jobs.py → etlplus-0.9.2/tests/unit/workflow/test_u_workflow_jobs.py +3 -3
- etlplus-0.9.1/tests/unit/config/test_u_pipeline.py → etlplus-0.9.2/tests/unit/workflow/test_u_workflow_pipeline.py +38 -34
- etlplus-0.9.1/etlplus/__init__.py +0 -43
- etlplus-0.9.1/etlplus/config/types.py +0 -204
- etlplus-0.9.1/etlplus/config/utils.py +0 -120
- etlplus-0.9.1/etlplus/file.py +0 -657
- etlplus-0.9.1/etlplus/validation/__init__.py +0 -44
- etlplus-0.9.1/tests/unit/config/test_u_config_utils.py +0 -129
- etlplus-0.9.1/tests/unit/test_u_file.py +0 -296
- {etlplus-0.9.1 → etlplus-0.9.2}/.coveragerc +0 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/.editorconfig +0 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/.gitattributes +0 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/.github/actions/python-bootstrap/action.yml +0 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/.github/workflows/ci.yml +0 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/.gitignore +0 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/.pre-commit-config.yaml +0 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/.ruff.toml +0 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/CODE_OF_CONDUCT.md +0 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/LICENSE +0 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/MANIFEST.in +0 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/Makefile +0 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/REFERENCES.md +0 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/docs/README.md +0 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/docs/snippets/installation_version.md +0 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/etlplus/__main__.py +0 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/etlplus/__version__.py +0 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/etlplus/api/auth.py +0 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/etlplus/api/errors.py +0 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/etlplus/api/pagination/__init__.py +0 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/etlplus/api/pagination/config.py +0 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/etlplus/api/pagination/paginator.py +0 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/etlplus/api/rate_limiting/__init__.py +0 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/etlplus/api/retry_manager.py +0 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/etlplus/cli/__init__.py +0 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/etlplus/cli/options.py +0 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/etlplus/cli/types.py +0 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/etlplus/database/__init__.py +0 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/etlplus/database/types.py +0 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/etlplus/mixins.py +0 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/etlplus/py.typed +0 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/etlplus/templates/__init__.py +0 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/etlplus/templates/ddl.sql.j2 +0 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/etlplus/templates/view.sql.j2 +0 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/etlplus.egg-info/dependency_links.txt +0 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/etlplus.egg-info/entry_points.txt +0 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/etlplus.egg-info/top_level.txt +0 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/examples/configs/ddl_spec.yml +0 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/examples/data/sample.csv +0 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/examples/data/sample.json +0 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/examples/data/sample.xml +0 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/examples/data/sample.xsd +0 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/examples/data/sample.yaml +0 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/pytest.ini +0 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/setup.cfg +0 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/tests/__init__.py +0 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/tests/conftest.py +0 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/tests/integration/test_i_cli.py +0 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/tests/integration/test_i_pipeline_smoke.py +0 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/tests/unit/api/conftest.py +0 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/tests/unit/api/test_u_auth.py +0 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/tests/unit/api/test_u_config.py +0 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/tests/unit/api/test_u_endpoint_client.py +0 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/tests/unit/api/test_u_mocks.py +0 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/tests/unit/api/test_u_pagination_client.py +0 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/tests/unit/api/test_u_pagination_config.py +0 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/tests/unit/api/test_u_paginator.py +0 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/tests/unit/api/test_u_rate_limit_config.py +0 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/tests/unit/api/test_u_rate_limiter.py +0 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/tests/unit/api/test_u_request_manager.py +0 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/tests/unit/api/test_u_retry_manager.py +0 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/tests/unit/api/test_u_transport.py +0 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/tests/unit/api/test_u_types.py +0 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/tests/unit/database/test_u_database_orm.py +0 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/tests/unit/test_u_main.py +0 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/tests/unit/test_u_mixins.py +0 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/tests/unit/test_u_version.py +0 -0
- {etlplus-0.9.1 → etlplus-0.9.2}/tools/update_demo_snippets.py +0 -0
|
@@ -119,7 +119,7 @@ Use these guidelines to decide whether a test belongs in the unit or integration
|
|
|
119
119
|
- Can use temporary files/directories, and stub network with fakes/mocks.
|
|
120
120
|
- Examples in this repo: CLI end-to-end, pipeline smoke tests, pagination strategy, runner defaults for pagination/rate limits, target URL composition.
|
|
121
121
|
|
|
122
|
-
If a test calls `etlplus.cli.main()` or `etlplus.run.run()`, it is integration by default.
|
|
122
|
+
If a test calls `etlplus.cli.main()` or `etlplus.ops.run.run()`, it is integration by default.
|
|
123
123
|
|
|
124
124
|
### Where to put tests
|
|
125
125
|
|
|
@@ -196,7 +196,7 @@ $ etlplus load transformed.json file final_output.csv
|
|
|
196
196
|
## Demo 6: Using Python API
|
|
197
197
|
|
|
198
198
|
```python
|
|
199
|
-
from etlplus import extract, validate, transform, load
|
|
199
|
+
from etlplus.ops import extract, validate, transform, load
|
|
200
200
|
|
|
201
201
|
# Extract
|
|
202
202
|
data = extract("file", "data.csv", format="csv")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: etlplus
|
|
3
|
-
Version: 0.9.
|
|
3
|
+
Version: 0.9.2
|
|
4
4
|
Summary: A Swiss Army knife for simple ETL operations
|
|
5
5
|
Home-page: https://github.com/Dagitali/ETLPlus
|
|
6
6
|
Author: ETLPlus Team
|
|
@@ -17,8 +17,11 @@ Classifier: Programming Language :: Python :: 3.14
|
|
|
17
17
|
Requires-Python: >=3.13,<3.15
|
|
18
18
|
Description-Content-Type: text/markdown
|
|
19
19
|
License-File: LICENSE
|
|
20
|
+
Requires-Dist: fastavro>=1.12.1
|
|
20
21
|
Requires-Dist: jinja2>=3.1.6
|
|
22
|
+
Requires-Dist: openpyxl>=3.1.5
|
|
21
23
|
Requires-Dist: pyodbc>=5.3.0
|
|
24
|
+
Requires-Dist: pyarrow>=22.0.0
|
|
22
25
|
Requires-Dist: python-dotenv>=1.2.1
|
|
23
26
|
Requires-Dist: pandas>=2.3.3
|
|
24
27
|
Requires-Dist: pydantic>=2.12.5
|
|
@@ -26,6 +29,8 @@ Requires-Dist: PyYAML>=6.0.3
|
|
|
26
29
|
Requires-Dist: requests>=2.32.5
|
|
27
30
|
Requires-Dist: SQLAlchemy>=2.0.45
|
|
28
31
|
Requires-Dist: typer>=0.21.0
|
|
32
|
+
Requires-Dist: xlrd>=2.0.2
|
|
33
|
+
Requires-Dist: xlwt>=1.3.0
|
|
29
34
|
Provides-Extra: dev
|
|
30
35
|
Requires-Dist: black>=25.9.0; extra == "dev"
|
|
31
36
|
Requires-Dist: build>=1.2.2; extra == "dev"
|
|
@@ -59,9 +64,25 @@ ETLPlus is a veritable Swiss Army knife for enabling simple ETL operations, offe
|
|
|
59
64
|
package and command-line interface for data extraction, validation, transformation, and loading.
|
|
60
65
|
|
|
61
66
|
- [ETLPlus](#etlplus)
|
|
67
|
+
- [Getting Started](#getting-started)
|
|
62
68
|
- [Features](#features)
|
|
63
69
|
- [Installation](#installation)
|
|
64
70
|
- [Quickstart](#quickstart)
|
|
71
|
+
- [Data Connectors](#data-connectors)
|
|
72
|
+
- [REST APIs (`api`)](#rest-apis-api)
|
|
73
|
+
- [Databases (`database`)](#databases-database)
|
|
74
|
+
- [Files (`file`)](#files-file)
|
|
75
|
+
- [Stubbed / Placeholder](#stubbed--placeholder)
|
|
76
|
+
- [Tabular \& Delimited Text](#tabular--delimited-text)
|
|
77
|
+
- [Semi-Structured Text](#semi-structured-text)
|
|
78
|
+
- [Columnar / Analytics-Friendly](#columnar--analytics-friendly)
|
|
79
|
+
- [Binary Serialization and Interchange](#binary-serialization-and-interchange)
|
|
80
|
+
- [Databases and Embedded Storage](#databases-and-embedded-storage)
|
|
81
|
+
- [Spreadsheets](#spreadsheets)
|
|
82
|
+
- [Statistical / Scientific / Numeric Computing](#statistical--scientific--numeric-computing)
|
|
83
|
+
- [Logs and Event Streams](#logs-and-event-streams)
|
|
84
|
+
- [Data Archives](#data-archives)
|
|
85
|
+
- [Templates](#templates)
|
|
65
86
|
- [Usage](#usage)
|
|
66
87
|
- [Command Line Interface](#command-line-interface)
|
|
67
88
|
- [Argument Order and Required Options](#argument-order-and-required-options)
|
|
@@ -87,11 +108,27 @@ package and command-line interface for data extraction, validation, transformati
|
|
|
87
108
|
- [Linting](#linting)
|
|
88
109
|
- [Updating Demo Snippets](#updating-demo-snippets)
|
|
89
110
|
- [Releasing to PyPI](#releasing-to-pypi)
|
|
90
|
-
- [Links](#links)
|
|
91
111
|
- [License](#license)
|
|
92
112
|
- [Contributing](#contributing)
|
|
113
|
+
- [Documentation](#documentation)
|
|
114
|
+
- [Python Packages/Subpackage](#python-packagessubpackage)
|
|
115
|
+
- [Community Health](#community-health)
|
|
116
|
+
- [Other](#other)
|
|
93
117
|
- [Acknowledgments](#acknowledgments)
|
|
94
118
|
|
|
119
|
+
## Getting Started
|
|
120
|
+
|
|
121
|
+
ETLPlus helps you extract, validate, transform, and load data from files, databases, and APIs, either
|
|
122
|
+
as a Python library or from the command line.
|
|
123
|
+
|
|
124
|
+
To get started:
|
|
125
|
+
|
|
126
|
+
- See [Installation](#installation) for setup instructions.
|
|
127
|
+
- Try the [Quickstart](#quickstart) for a minimal working example (CLI and Python).
|
|
128
|
+
- Explore [Usage](#usage) for more detailed options and workflows.
|
|
129
|
+
|
|
130
|
+
ETLPlus supports Python 3.13 and above.
|
|
131
|
+
|
|
95
132
|
## Features
|
|
96
133
|
|
|
97
134
|
- **Check** data pipeline definitions before running them:
|
|
@@ -159,7 +196,7 @@ etlplus extract file examples/data/sample.csv \
|
|
|
159
196
|
[Python API](#python-api):
|
|
160
197
|
|
|
161
198
|
```python
|
|
162
|
-
from etlplus import extract, transform, validate, load
|
|
199
|
+
from etlplus.ops import extract, transform, validate, load
|
|
163
200
|
|
|
164
201
|
data = extract("file", "input.csv")
|
|
165
202
|
ops = {"filter": {"field": "age", "op": "gt", "value": 25}, "select": ["name", "email"]}
|
|
@@ -169,6 +206,140 @@ assert validate(filtered, rules)["valid"]
|
|
|
169
206
|
load(filtered, "file", "temp/sample_output.json", file_format="json")
|
|
170
207
|
```
|
|
171
208
|
|
|
209
|
+
## Data Connectors
|
|
210
|
+
|
|
211
|
+
Data connectors abstract sources from which to extract data and targets to which to load data. They
|
|
212
|
+
are differentiated by their types, each of which is represented in the subsections below.
|
|
213
|
+
|
|
214
|
+
### REST APIs (`api`)
|
|
215
|
+
|
|
216
|
+
ETLPlus can extract from REST APIs and load results via common HTTP methods. Supported operations
|
|
217
|
+
include GET for extract and PATCH/POST/PUT for load.
|
|
218
|
+
|
|
219
|
+
### Databases (`database`)
|
|
220
|
+
|
|
221
|
+
Database connectors use connection strings for extraction and loading, and
|
|
222
|
+
DDL can be rendered from table specs for migrations or schema checks.
|
|
223
|
+
|
|
224
|
+
### Files (`file`)
|
|
225
|
+
|
|
226
|
+
Recognized file formats are listed in the tables below. Support for reading to or writing from a recognized file format is marked as:
|
|
227
|
+
|
|
228
|
+
- **Y**: implemented (may require optional dependencies)
|
|
229
|
+
- **N**: stubbed or not yet implemented
|
|
230
|
+
|
|
231
|
+
#### Stubbed / Placeholder
|
|
232
|
+
|
|
233
|
+
| Format | Read | Write | Description |
|
|
234
|
+
| --- | --- | --- | --- |
|
|
235
|
+
| `stub` | N | Placeholder format for tests and future connectors. |
|
|
236
|
+
|
|
237
|
+
#### Tabular & Delimited Text
|
|
238
|
+
|
|
239
|
+
| Format | Read | Write | Description |
|
|
240
|
+
| --- | --- | --- | --- |
|
|
241
|
+
| `csv` | Y | Y | Comma-Separated Values |
|
|
242
|
+
| `dat` | N | N | Generic data file, often delimited or fixed-width |
|
|
243
|
+
| `fwf` | N | N | Fixed-Width Fields |
|
|
244
|
+
| `psv` | N | N | Pipe-Separated Values |
|
|
245
|
+
| `tab` | N | N | Often synonymous with TSV |
|
|
246
|
+
| `tsv` | Y | Y | Tab-Separated Values |
|
|
247
|
+
| `txt` | Y | Y | Plain text, often delimited or fixed-width |
|
|
248
|
+
|
|
249
|
+
#### Semi-Structured Text
|
|
250
|
+
|
|
251
|
+
| Format | Read | Write | Description |
|
|
252
|
+
| --- | --- | --- | --- |
|
|
253
|
+
| `cfg` | N | N | Config-style key-value pairs |
|
|
254
|
+
| `conf` | N | N | Config-style key-value pairs |
|
|
255
|
+
| `ini` | N | N | Config-style key-value pairs |
|
|
256
|
+
| `json` | Y | Y | JavaScript Object Notation |
|
|
257
|
+
| `ndjson` | Y | Y | Newline-Delimited JSON |
|
|
258
|
+
| `properties` | N | N | Java-style key-value pairs |
|
|
259
|
+
| `toml` | N | N | Tom's Obvious Minimal Language |
|
|
260
|
+
| `xml` | Y | Y | Extensible Markup Language |
|
|
261
|
+
| `yaml` | Y | Y | YAML Ain't Markup Language |
|
|
262
|
+
|
|
263
|
+
#### Columnar / Analytics-Friendly
|
|
264
|
+
|
|
265
|
+
| Format | Read | Write | Description |
|
|
266
|
+
| --- | --- | --- | --- |
|
|
267
|
+
| `arrow` | N | N | Apache Arrow IPC |
|
|
268
|
+
| `feather` | Y | Y | Apache Arrow Feather |
|
|
269
|
+
| `orc` | Y | Y | Optimized Row Columnar; common in Hadoop |
|
|
270
|
+
| `parquet` | Y | Y | Apache Parquet; common in Big Data |
|
|
271
|
+
|
|
272
|
+
#### Binary Serialization and Interchange
|
|
273
|
+
|
|
274
|
+
| Format | Read | Write | Description |
|
|
275
|
+
| --- | --- | --- | --- |
|
|
276
|
+
| `avro` | Y | Y | Apache Avro |
|
|
277
|
+
| `bson` | N | N | Binary JSON; common with MongoDB exports/dumps |
|
|
278
|
+
| `cbor` | N | N | Concise Binary Object Representation |
|
|
279
|
+
| `ion` | N | N | Amazon Ion |
|
|
280
|
+
| `msgpack` | N | N | MessagePack |
|
|
281
|
+
| `pb` | N | N | Protocol Buffers (Google Protobuf) |
|
|
282
|
+
| `pbf` | N | N | Protocolbuffer Binary Format; often for GIS data |
|
|
283
|
+
| `proto` | N | N | Protocol Buffers schema; often in .pb / .bin |
|
|
284
|
+
|
|
285
|
+
#### Databases and Embedded Storage
|
|
286
|
+
|
|
287
|
+
| Format | Read | Write | Description |
|
|
288
|
+
| --- | --- | --- | --- |
|
|
289
|
+
| `accdb` | N | N | Microsoft Access (newer format) |
|
|
290
|
+
| `duckdb` | N | N | DuckDB |
|
|
291
|
+
| `mdb` | N | N | Microsoft Access (older format) |
|
|
292
|
+
| `sqlite` | N | N | SQLite |
|
|
293
|
+
|
|
294
|
+
#### Spreadsheets
|
|
295
|
+
|
|
296
|
+
| Format | Read | Write | Description |
|
|
297
|
+
| --- | --- | --- | --- |
|
|
298
|
+
| `numbers` | N | N | Apple Numbers |
|
|
299
|
+
| `ods` | N | N | OpenDocument |
|
|
300
|
+
| `wks` | N | N | Lotus 1-2-3 |
|
|
301
|
+
| `xls` | Y | Y | Microsoft Excel (BIFF) |
|
|
302
|
+
| `xlsm` | N | N | Microsoft Excel Macro-Enabled (Open XML) |
|
|
303
|
+
| `xlsx` | Y | Y | Microsoft Excel (Open XML) |
|
|
304
|
+
|
|
305
|
+
#### Statistical / Scientific / Numeric Computing
|
|
306
|
+
|
|
307
|
+
| Format | Read | Write | Description |
|
|
308
|
+
| --- | --- | --- | --- |
|
|
309
|
+
| `dta` | N | N | Stata |
|
|
310
|
+
| `hdf5` | N | N | Hierarchical Data Format |
|
|
311
|
+
| `mat` | N | N | MATLAB |
|
|
312
|
+
| `nc` | N | N | NetCDF |
|
|
313
|
+
| `rda` | N | N | RData workspace/object |
|
|
314
|
+
| `rds` | N | N | R data |
|
|
315
|
+
| `sas7bdat` | N | N | SAS data |
|
|
316
|
+
| `sav` | N | N | SPSS data |
|
|
317
|
+
| `sylk` | N | N | Symbolic Link |
|
|
318
|
+
| `xpt` | N | N | SAS Transport |
|
|
319
|
+
| `zsav` | N | N | Compressed SPSS data |
|
|
320
|
+
|
|
321
|
+
#### Logs and Event Streams
|
|
322
|
+
|
|
323
|
+
| Format | Supported | Description |
|
|
324
|
+
| --- | --- | --- |
|
|
325
|
+
| `log` | N | N | Generic log file |
|
|
326
|
+
|
|
327
|
+
#### Data Archives
|
|
328
|
+
|
|
329
|
+
| Format | Read | Write | Description |
|
|
330
|
+
| --- | --- | --- | --- |
|
|
331
|
+
| `gz` | Y | Y | Gzip-compressed file |
|
|
332
|
+
| `zip` | Y | Y | ZIP archive |
|
|
333
|
+
|
|
334
|
+
#### Templates
|
|
335
|
+
|
|
336
|
+
| Format | Read | Write | Description |
|
|
337
|
+
| --- | --- | --- | --- |
|
|
338
|
+
| `hbs` | N | N | Handlebars |
|
|
339
|
+
| `jinja2` | N | N | Jinja2 |
|
|
340
|
+
| `mustache` | N | N | Mustache |
|
|
341
|
+
| `vm` | N | N | Apache Velocity |
|
|
342
|
+
|
|
172
343
|
## Usage
|
|
173
344
|
|
|
174
345
|
### Command Line Interface
|
|
@@ -360,7 +531,7 @@ cat examples/data/sample.json \
|
|
|
360
531
|
Use ETLPlus as a Python library:
|
|
361
532
|
|
|
362
533
|
```python
|
|
363
|
-
from etlplus import extract, validate, transform, load
|
|
534
|
+
from etlplus.ops import extract, validate, transform, load
|
|
364
535
|
|
|
365
536
|
# Extract data
|
|
366
537
|
data = extract("file", "data.json")
|
|
@@ -416,7 +587,7 @@ etlplus transform \
|
|
|
416
587
|
# 3. Validate transformed data
|
|
417
588
|
etlplus validate \
|
|
418
589
|
--rules '{"name": {"type": "string", "required": true}, "email": {"type": "string", "required": true}}' \
|
|
419
|
-
|
|
590
|
+
temp/sample_transformed.json
|
|
420
591
|
|
|
421
592
|
# 4. Load to CSV
|
|
422
593
|
cat temp/sample_transformed.json \
|
|
@@ -555,7 +726,7 @@ We split tests into two layers:
|
|
|
555
726
|
pagination + rate limit defaults, file/API connector interactions) may touch temp files and use
|
|
556
727
|
fake clients.
|
|
557
728
|
|
|
558
|
-
If a test calls `etlplus.cli.main()` or `etlplus.run.run()` it’s integration by default.
|
|
729
|
+
If a test calls `etlplus.cli.main()` or `etlplus.ops.run.run()` it’s integration by default. Full
|
|
559
730
|
criteria: [`CONTRIBUTING.md#testing`](CONTRIBUTING.md#testing).
|
|
560
731
|
|
|
561
732
|
### Code Coverage
|
|
@@ -603,17 +774,6 @@ git push origin v1.4.0
|
|
|
603
774
|
If you want an extra smoke-test before tagging, run `make dist && pip install dist/*.whl` locally;
|
|
604
775
|
this exercises the same build path the workflow uses.
|
|
605
776
|
|
|
606
|
-
## Links
|
|
607
|
-
|
|
608
|
-
- API client docs: [`etlplus/api/README.md`](etlplus/api/README.md)
|
|
609
|
-
- Examples: [`examples/README.md`](examples/README.md)
|
|
610
|
-
- Pipeline authoring guide: [`docs/pipeline-guide.md`](docs/pipeline-guide.md)
|
|
611
|
-
- Runner internals: [`docs/run-module.md`](docs/run-module.md)
|
|
612
|
-
- Design notes (Mapping inputs, dict outputs): [`docs/pipeline-guide.md#design-notes-mapping-inputs-dict-outputs`](docs/pipeline-guide.md#design-notes-mapping-inputs-dict-outputs)
|
|
613
|
-
- Typing philosophy: [`CONTRIBUTING.md#typing-philosophy`](CONTRIBUTING.md#typing-philosophy)
|
|
614
|
-
- Demo and walkthrough: [`DEMO.md`](DEMO.md)
|
|
615
|
-
- Additional references: [`REFERENCES.md`](`REFERENCES.md)
|
|
616
|
-
|
|
617
777
|
## License
|
|
618
778
|
|
|
619
779
|
This project is licensed under the [MIT License](LICENSE).
|
|
@@ -637,6 +797,39 @@ If you choose to be a code contributor, please first refer these documents:
|
|
|
637
797
|
- Typing philosophy (TypedDicts as editor hints, permissive runtime):
|
|
638
798
|
[`CONTRIBUTING.md#typing-philosophy`](CONTRIBUTING.md#typing-philosophy)
|
|
639
799
|
|
|
800
|
+
## Documentation
|
|
801
|
+
|
|
802
|
+
### Python Packages/Subpackage
|
|
803
|
+
|
|
804
|
+
Navigate to detailed documentation for each subpackage:
|
|
805
|
+
|
|
806
|
+
- [etlplus.api](etlplus/api/README.md): Lightweight HTTP client and paginated REST helpers
|
|
807
|
+
- [etlplus.file](etlplus/file/README.md): Unified file format support and helpers
|
|
808
|
+
- [etlplus.cli](etlplus/cli/README.md): Command-line interface definitions for `etlplus`
|
|
809
|
+
- [etlplus.database](etlplus/database/README.md): Database engine, schema, and ORM helpers
|
|
810
|
+
- [etlplus.templates](etlplus/templates/README.md): SQL and DDL template helpers
|
|
811
|
+
- [etlplus.validation](etlplus/validation/README.md): Data validation utilities and helpers
|
|
812
|
+
- [etlplus.workflow](etlplus/workflow/README.md): Helpers for data connectors, pipelines, jobs, and
|
|
813
|
+
profiles
|
|
814
|
+
|
|
815
|
+
### Community Health
|
|
816
|
+
|
|
817
|
+
- [Contributing Guidelines](CONTRIBUTING.md): How to contribute, report issues, and submit PRs
|
|
818
|
+
- [Code of Conduct](CODE_OF_CONDUCT.md): Community standards and expectations
|
|
819
|
+
- [Security Policy](SECURITY.md): Responsible disclosure and vulnerability reporting
|
|
820
|
+
- [Support](SUPPORT.md): Where to get help
|
|
821
|
+
|
|
822
|
+
### Other
|
|
823
|
+
|
|
824
|
+
- API client docs: [`etlplus/api/README.md`](etlplus/api/README.md)
|
|
825
|
+
- Examples: [`examples/README.md`](examples/README.md)
|
|
826
|
+
- Pipeline authoring guide: [`docs/pipeline-guide.md`](docs/pipeline-guide.md)
|
|
827
|
+
- Runner internals: [`docs/run-module.md`](docs/run-module.md)
|
|
828
|
+
- Design notes (Mapping inputs, dict outputs): [`docs/pipeline-guide.md#design-notes-mapping-inputs-dict-outputs`](docs/pipeline-guide.md#design-notes-mapping-inputs-dict-outputs)
|
|
829
|
+
- Typing philosophy: [`CONTRIBUTING.md#typing-philosophy`](CONTRIBUTING.md#typing-philosophy)
|
|
830
|
+
- Demo and walkthrough: [`DEMO.md`](DEMO.md)
|
|
831
|
+
- Additional references: [`REFERENCES.md`](REFERENCES.md)
|
|
832
|
+
|
|
640
833
|
## Acknowledgments
|
|
641
834
|
|
|
642
835
|
ETLPlus is inspired by common work patterns in data engineering and software engineering patterns in
|
|
@@ -14,9 +14,25 @@ ETLPlus is a veritable Swiss Army knife for enabling simple ETL operations, offe
|
|
|
14
14
|
package and command-line interface for data extraction, validation, transformation, and loading.
|
|
15
15
|
|
|
16
16
|
- [ETLPlus](#etlplus)
|
|
17
|
+
- [Getting Started](#getting-started)
|
|
17
18
|
- [Features](#features)
|
|
18
19
|
- [Installation](#installation)
|
|
19
20
|
- [Quickstart](#quickstart)
|
|
21
|
+
- [Data Connectors](#data-connectors)
|
|
22
|
+
- [REST APIs (`api`)](#rest-apis-api)
|
|
23
|
+
- [Databases (`database`)](#databases-database)
|
|
24
|
+
- [Files (`file`)](#files-file)
|
|
25
|
+
- [Stubbed / Placeholder](#stubbed--placeholder)
|
|
26
|
+
- [Tabular \& Delimited Text](#tabular--delimited-text)
|
|
27
|
+
- [Semi-Structured Text](#semi-structured-text)
|
|
28
|
+
- [Columnar / Analytics-Friendly](#columnar--analytics-friendly)
|
|
29
|
+
- [Binary Serialization and Interchange](#binary-serialization-and-interchange)
|
|
30
|
+
- [Databases and Embedded Storage](#databases-and-embedded-storage)
|
|
31
|
+
- [Spreadsheets](#spreadsheets)
|
|
32
|
+
- [Statistical / Scientific / Numeric Computing](#statistical--scientific--numeric-computing)
|
|
33
|
+
- [Logs and Event Streams](#logs-and-event-streams)
|
|
34
|
+
- [Data Archives](#data-archives)
|
|
35
|
+
- [Templates](#templates)
|
|
20
36
|
- [Usage](#usage)
|
|
21
37
|
- [Command Line Interface](#command-line-interface)
|
|
22
38
|
- [Argument Order and Required Options](#argument-order-and-required-options)
|
|
@@ -42,11 +58,27 @@ package and command-line interface for data extraction, validation, transformati
|
|
|
42
58
|
- [Linting](#linting)
|
|
43
59
|
- [Updating Demo Snippets](#updating-demo-snippets)
|
|
44
60
|
- [Releasing to PyPI](#releasing-to-pypi)
|
|
45
|
-
- [Links](#links)
|
|
46
61
|
- [License](#license)
|
|
47
62
|
- [Contributing](#contributing)
|
|
63
|
+
- [Documentation](#documentation)
|
|
64
|
+
- [Python Packages/Subpackage](#python-packagessubpackage)
|
|
65
|
+
- [Community Health](#community-health)
|
|
66
|
+
- [Other](#other)
|
|
48
67
|
- [Acknowledgments](#acknowledgments)
|
|
49
68
|
|
|
69
|
+
## Getting Started
|
|
70
|
+
|
|
71
|
+
ETLPlus helps you extract, validate, transform, and load data from files, databases, and APIs, either
|
|
72
|
+
as a Python library or from the command line.
|
|
73
|
+
|
|
74
|
+
To get started:
|
|
75
|
+
|
|
76
|
+
- See [Installation](#installation) for setup instructions.
|
|
77
|
+
- Try the [Quickstart](#quickstart) for a minimal working example (CLI and Python).
|
|
78
|
+
- Explore [Usage](#usage) for more detailed options and workflows.
|
|
79
|
+
|
|
80
|
+
ETLPlus supports Python 3.13 and above.
|
|
81
|
+
|
|
50
82
|
## Features
|
|
51
83
|
|
|
52
84
|
- **Check** data pipeline definitions before running them:
|
|
@@ -114,7 +146,7 @@ etlplus extract file examples/data/sample.csv \
|
|
|
114
146
|
[Python API](#python-api):
|
|
115
147
|
|
|
116
148
|
```python
|
|
117
|
-
from etlplus import extract, transform, validate, load
|
|
149
|
+
from etlplus.ops import extract, transform, validate, load
|
|
118
150
|
|
|
119
151
|
data = extract("file", "input.csv")
|
|
120
152
|
ops = {"filter": {"field": "age", "op": "gt", "value": 25}, "select": ["name", "email"]}
|
|
@@ -124,6 +156,140 @@ assert validate(filtered, rules)["valid"]
|
|
|
124
156
|
load(filtered, "file", "temp/sample_output.json", file_format="json")
|
|
125
157
|
```
|
|
126
158
|
|
|
159
|
+
## Data Connectors
|
|
160
|
+
|
|
161
|
+
Data connectors abstract sources from which to extract data and targets to which to load data. They
|
|
162
|
+
are differentiated by their types, each of which is represented in the subsections below.
|
|
163
|
+
|
|
164
|
+
### REST APIs (`api`)
|
|
165
|
+
|
|
166
|
+
ETLPlus can extract from REST APIs and load results via common HTTP methods. Supported operations
|
|
167
|
+
include GET for extract and PATCH/POST/PUT for load.
|
|
168
|
+
|
|
169
|
+
### Databases (`database`)
|
|
170
|
+
|
|
171
|
+
Database connectors use connection strings for extraction and loading, and
|
|
172
|
+
DDL can be rendered from table specs for migrations or schema checks.
|
|
173
|
+
|
|
174
|
+
### Files (`file`)
|
|
175
|
+
|
|
176
|
+
Recognized file formats are listed in the tables below. Support for reading to or writing from a recognized file format is marked as:
|
|
177
|
+
|
|
178
|
+
- **Y**: implemented (may require optional dependencies)
|
|
179
|
+
- **N**: stubbed or not yet implemented
|
|
180
|
+
|
|
181
|
+
#### Stubbed / Placeholder
|
|
182
|
+
|
|
183
|
+
| Format | Read | Write | Description |
|
|
184
|
+
| --- | --- | --- | --- |
|
|
185
|
+
| `stub` | N | Placeholder format for tests and future connectors. |
|
|
186
|
+
|
|
187
|
+
#### Tabular & Delimited Text
|
|
188
|
+
|
|
189
|
+
| Format | Read | Write | Description |
|
|
190
|
+
| --- | --- | --- | --- |
|
|
191
|
+
| `csv` | Y | Y | Comma-Separated Values |
|
|
192
|
+
| `dat` | N | N | Generic data file, often delimited or fixed-width |
|
|
193
|
+
| `fwf` | N | N | Fixed-Width Fields |
|
|
194
|
+
| `psv` | N | N | Pipe-Separated Values |
|
|
195
|
+
| `tab` | N | N | Often synonymous with TSV |
|
|
196
|
+
| `tsv` | Y | Y | Tab-Separated Values |
|
|
197
|
+
| `txt` | Y | Y | Plain text, often delimited or fixed-width |
|
|
198
|
+
|
|
199
|
+
#### Semi-Structured Text
|
|
200
|
+
|
|
201
|
+
| Format | Read | Write | Description |
|
|
202
|
+
| --- | --- | --- | --- |
|
|
203
|
+
| `cfg` | N | N | Config-style key-value pairs |
|
|
204
|
+
| `conf` | N | N | Config-style key-value pairs |
|
|
205
|
+
| `ini` | N | N | Config-style key-value pairs |
|
|
206
|
+
| `json` | Y | Y | JavaScript Object Notation |
|
|
207
|
+
| `ndjson` | Y | Y | Newline-Delimited JSON |
|
|
208
|
+
| `properties` | N | N | Java-style key-value pairs |
|
|
209
|
+
| `toml` | N | N | Tom's Obvious Minimal Language |
|
|
210
|
+
| `xml` | Y | Y | Extensible Markup Language |
|
|
211
|
+
| `yaml` | Y | Y | YAML Ain't Markup Language |
|
|
212
|
+
|
|
213
|
+
#### Columnar / Analytics-Friendly
|
|
214
|
+
|
|
215
|
+
| Format | Read | Write | Description |
|
|
216
|
+
| --- | --- | --- | --- |
|
|
217
|
+
| `arrow` | N | N | Apache Arrow IPC |
|
|
218
|
+
| `feather` | Y | Y | Apache Arrow Feather |
|
|
219
|
+
| `orc` | Y | Y | Optimized Row Columnar; common in Hadoop |
|
|
220
|
+
| `parquet` | Y | Y | Apache Parquet; common in Big Data |
|
|
221
|
+
|
|
222
|
+
#### Binary Serialization and Interchange
|
|
223
|
+
|
|
224
|
+
| Format | Read | Write | Description |
|
|
225
|
+
| --- | --- | --- | --- |
|
|
226
|
+
| `avro` | Y | Y | Apache Avro |
|
|
227
|
+
| `bson` | N | N | Binary JSON; common with MongoDB exports/dumps |
|
|
228
|
+
| `cbor` | N | N | Concise Binary Object Representation |
|
|
229
|
+
| `ion` | N | N | Amazon Ion |
|
|
230
|
+
| `msgpack` | N | N | MessagePack |
|
|
231
|
+
| `pb` | N | N | Protocol Buffers (Google Protobuf) |
|
|
232
|
+
| `pbf` | N | N | Protocolbuffer Binary Format; often for GIS data |
|
|
233
|
+
| `proto` | N | N | Protocol Buffers schema; often in .pb / .bin |
|
|
234
|
+
|
|
235
|
+
#### Databases and Embedded Storage
|
|
236
|
+
|
|
237
|
+
| Format | Read | Write | Description |
|
|
238
|
+
| --- | --- | --- | --- |
|
|
239
|
+
| `accdb` | N | N | Microsoft Access (newer format) |
|
|
240
|
+
| `duckdb` | N | N | DuckDB |
|
|
241
|
+
| `mdb` | N | N | Microsoft Access (older format) |
|
|
242
|
+
| `sqlite` | N | N | SQLite |
|
|
243
|
+
|
|
244
|
+
#### Spreadsheets
|
|
245
|
+
|
|
246
|
+
| Format | Read | Write | Description |
|
|
247
|
+
| --- | --- | --- | --- |
|
|
248
|
+
| `numbers` | N | N | Apple Numbers |
|
|
249
|
+
| `ods` | N | N | OpenDocument |
|
|
250
|
+
| `wks` | N | N | Lotus 1-2-3 |
|
|
251
|
+
| `xls` | Y | Y | Microsoft Excel (BIFF) |
|
|
252
|
+
| `xlsm` | N | N | Microsoft Excel Macro-Enabled (Open XML) |
|
|
253
|
+
| `xlsx` | Y | Y | Microsoft Excel (Open XML) |
|
|
254
|
+
|
|
255
|
+
#### Statistical / Scientific / Numeric Computing
|
|
256
|
+
|
|
257
|
+
| Format | Read | Write | Description |
|
|
258
|
+
| --- | --- | --- | --- |
|
|
259
|
+
| `dta` | N | N | Stata |
|
|
260
|
+
| `hdf5` | N | N | Hierarchical Data Format |
|
|
261
|
+
| `mat` | N | N | MATLAB |
|
|
262
|
+
| `nc` | N | N | NetCDF |
|
|
263
|
+
| `rda` | N | N | RData workspace/object |
|
|
264
|
+
| `rds` | N | N | R data |
|
|
265
|
+
| `sas7bdat` | N | N | SAS data |
|
|
266
|
+
| `sav` | N | N | SPSS data |
|
|
267
|
+
| `sylk` | N | N | Symbolic Link |
|
|
268
|
+
| `xpt` | N | N | SAS Transport |
|
|
269
|
+
| `zsav` | N | N | Compressed SPSS data |
|
|
270
|
+
|
|
271
|
+
#### Logs and Event Streams
|
|
272
|
+
|
|
273
|
+
| Format | Supported | Description |
|
|
274
|
+
| --- | --- | --- |
|
|
275
|
+
| `log` | N | N | Generic log file |
|
|
276
|
+
|
|
277
|
+
#### Data Archives
|
|
278
|
+
|
|
279
|
+
| Format | Read | Write | Description |
|
|
280
|
+
| --- | --- | --- | --- |
|
|
281
|
+
| `gz` | Y | Y | Gzip-compressed file |
|
|
282
|
+
| `zip` | Y | Y | ZIP archive |
|
|
283
|
+
|
|
284
|
+
#### Templates
|
|
285
|
+
|
|
286
|
+
| Format | Read | Write | Description |
|
|
287
|
+
| --- | --- | --- | --- |
|
|
288
|
+
| `hbs` | N | N | Handlebars |
|
|
289
|
+
| `jinja2` | N | N | Jinja2 |
|
|
290
|
+
| `mustache` | N | N | Mustache |
|
|
291
|
+
| `vm` | N | N | Apache Velocity |
|
|
292
|
+
|
|
127
293
|
## Usage
|
|
128
294
|
|
|
129
295
|
### Command Line Interface
|
|
@@ -315,7 +481,7 @@ cat examples/data/sample.json \
|
|
|
315
481
|
Use ETLPlus as a Python library:
|
|
316
482
|
|
|
317
483
|
```python
|
|
318
|
-
from etlplus import extract, validate, transform, load
|
|
484
|
+
from etlplus.ops import extract, validate, transform, load
|
|
319
485
|
|
|
320
486
|
# Extract data
|
|
321
487
|
data = extract("file", "data.json")
|
|
@@ -371,7 +537,7 @@ etlplus transform \
|
|
|
371
537
|
# 3. Validate transformed data
|
|
372
538
|
etlplus validate \
|
|
373
539
|
--rules '{"name": {"type": "string", "required": true}, "email": {"type": "string", "required": true}}' \
|
|
374
|
-
|
|
540
|
+
temp/sample_transformed.json
|
|
375
541
|
|
|
376
542
|
# 4. Load to CSV
|
|
377
543
|
cat temp/sample_transformed.json \
|
|
@@ -510,7 +676,7 @@ We split tests into two layers:
|
|
|
510
676
|
pagination + rate limit defaults, file/API connector interactions) may touch temp files and use
|
|
511
677
|
fake clients.
|
|
512
678
|
|
|
513
|
-
If a test calls `etlplus.cli.main()` or `etlplus.run.run()` it’s integration by default.
|
|
679
|
+
If a test calls `etlplus.cli.main()` or `etlplus.ops.run.run()` it’s integration by default. Full
|
|
514
680
|
criteria: [`CONTRIBUTING.md#testing`](CONTRIBUTING.md#testing).
|
|
515
681
|
|
|
516
682
|
### Code Coverage
|
|
@@ -558,17 +724,6 @@ git push origin v1.4.0
|
|
|
558
724
|
If you want an extra smoke-test before tagging, run `make dist && pip install dist/*.whl` locally;
|
|
559
725
|
this exercises the same build path the workflow uses.
|
|
560
726
|
|
|
561
|
-
## Links
|
|
562
|
-
|
|
563
|
-
- API client docs: [`etlplus/api/README.md`](etlplus/api/README.md)
|
|
564
|
-
- Examples: [`examples/README.md`](examples/README.md)
|
|
565
|
-
- Pipeline authoring guide: [`docs/pipeline-guide.md`](docs/pipeline-guide.md)
|
|
566
|
-
- Runner internals: [`docs/run-module.md`](docs/run-module.md)
|
|
567
|
-
- Design notes (Mapping inputs, dict outputs): [`docs/pipeline-guide.md#design-notes-mapping-inputs-dict-outputs`](docs/pipeline-guide.md#design-notes-mapping-inputs-dict-outputs)
|
|
568
|
-
- Typing philosophy: [`CONTRIBUTING.md#typing-philosophy`](CONTRIBUTING.md#typing-philosophy)
|
|
569
|
-
- Demo and walkthrough: [`DEMO.md`](DEMO.md)
|
|
570
|
-
- Additional references: [`REFERENCES.md`](`REFERENCES.md)
|
|
571
|
-
|
|
572
727
|
## License
|
|
573
728
|
|
|
574
729
|
This project is licensed under the [MIT License](LICENSE).
|
|
@@ -592,6 +747,39 @@ If you choose to be a code contributor, please first refer these documents:
|
|
|
592
747
|
- Typing philosophy (TypedDicts as editor hints, permissive runtime):
|
|
593
748
|
[`CONTRIBUTING.md#typing-philosophy`](CONTRIBUTING.md#typing-philosophy)
|
|
594
749
|
|
|
750
|
+
## Documentation
|
|
751
|
+
|
|
752
|
+
### Python Packages/Subpackage
|
|
753
|
+
|
|
754
|
+
Navigate to detailed documentation for each subpackage:
|
|
755
|
+
|
|
756
|
+
- [etlplus.api](etlplus/api/README.md): Lightweight HTTP client and paginated REST helpers
|
|
757
|
+
- [etlplus.file](etlplus/file/README.md): Unified file format support and helpers
|
|
758
|
+
- [etlplus.cli](etlplus/cli/README.md): Command-line interface definitions for `etlplus`
|
|
759
|
+
- [etlplus.database](etlplus/database/README.md): Database engine, schema, and ORM helpers
|
|
760
|
+
- [etlplus.templates](etlplus/templates/README.md): SQL and DDL template helpers
|
|
761
|
+
- [etlplus.validation](etlplus/validation/README.md): Data validation utilities and helpers
|
|
762
|
+
- [etlplus.workflow](etlplus/workflow/README.md): Helpers for data connectors, pipelines, jobs, and
|
|
763
|
+
profiles
|
|
764
|
+
|
|
765
|
+
### Community Health
|
|
766
|
+
|
|
767
|
+
- [Contributing Guidelines](CONTRIBUTING.md): How to contribute, report issues, and submit PRs
|
|
768
|
+
- [Code of Conduct](CODE_OF_CONDUCT.md): Community standards and expectations
|
|
769
|
+
- [Security Policy](SECURITY.md): Responsible disclosure and vulnerability reporting
|
|
770
|
+
- [Support](SUPPORT.md): Where to get help
|
|
771
|
+
|
|
772
|
+
### Other
|
|
773
|
+
|
|
774
|
+
- API client docs: [`etlplus/api/README.md`](etlplus/api/README.md)
|
|
775
|
+
- Examples: [`examples/README.md`](examples/README.md)
|
|
776
|
+
- Pipeline authoring guide: [`docs/pipeline-guide.md`](docs/pipeline-guide.md)
|
|
777
|
+
- Runner internals: [`docs/run-module.md`](docs/run-module.md)
|
|
778
|
+
- Design notes (Mapping inputs, dict outputs): [`docs/pipeline-guide.md#design-notes-mapping-inputs-dict-outputs`](docs/pipeline-guide.md#design-notes-mapping-inputs-dict-outputs)
|
|
779
|
+
- Typing philosophy: [`CONTRIBUTING.md#typing-philosophy`](CONTRIBUTING.md#typing-philosophy)
|
|
780
|
+
- Demo and walkthrough: [`DEMO.md`](DEMO.md)
|
|
781
|
+
- Additional references: [`REFERENCES.md`](REFERENCES.md)
|
|
782
|
+
|
|
595
783
|
## Acknowledgments
|
|
596
784
|
|
|
597
785
|
ETLPlus is inspired by common work patterns in data engineering and software engineering patterns in
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# Security Policy
|
|
2
|
+
|
|
3
|
+
## Reporting a Vulnerability
|
|
4
|
+
|
|
5
|
+
If you discover a security vulnerability in ETLPlus, please report it responsibly:
|
|
6
|
+
|
|
7
|
+
- **Do not** open a public issue for security problems.
|
|
8
|
+
- Email the maintainers at security@dagitali.com (or use a private contact method listed in the
|
|
9
|
+
repository).
|
|
10
|
+
- Include as much detail as possible: affected versions, steps to reproduce, and any relevant logs
|
|
11
|
+
or code.
|
|
12
|
+
- We will acknowledge your report within 3 business days and work with you to resolve the issue
|
|
13
|
+
promptly.
|
|
14
|
+
|
|
15
|
+
Thank you for helping keep ETLPlus and its users safe!
|
etlplus-0.9.2/SUPPORT.md
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# Support
|
|
2
|
+
|
|
3
|
+
Thank you for using ETLPlus!
|
|
4
|
+
|
|
5
|
+
## Where to Get Help
|
|
6
|
+
|
|
7
|
+
- **Questions & Usage**: Please use [GitHub Discussions][discussions] for general questions, usage
|
|
8
|
+
help, and best practices.
|
|
9
|
+
- **Bugs & Feature Requests**: Open an issue in the [GitHub Issues][issues] tracker.
|
|
10
|
+
- **Security Issues**: See [SECURITY.md](SECURITY.md) for responsible disclosure.
|
|
11
|
+
- **Documentation**: See the [README](README.md) and [docs/](docs/) directory for guides and
|
|
12
|
+
references.
|
|
13
|
+
|
|
14
|
+
We aim to respond to questions and issues as quickly as possible. Community contributions and peer
|
|
15
|
+
support are also welcome!
|
|
16
|
+
|
|
17
|
+
[discussions]: https://github.com/Dagitali/ETLPlus/discussions
|
|
18
|
+
[issues]: https://github.com/Dagitali/ETLPlus/issues
|