etlplus 0.12.5__tar.gz → 0.15.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {etlplus-0.12.5 → etlplus-0.15.2}/CONTRIBUTING.md +1 -1
- {etlplus-0.12.5 → etlplus-0.15.2}/DEMO.md +1 -1
- {etlplus-0.12.5/etlplus.egg-info → etlplus-0.15.2}/PKG-INFO +156 -7
- {etlplus-0.12.5 → etlplus-0.15.2}/README.md +155 -6
- {etlplus-0.12.5 → etlplus-0.15.2}/docs/pipeline-guide.md +17 -7
- {etlplus-0.12.5 → etlplus-0.15.2}/etlplus/README.md +5 -5
- etlplus-0.15.2/etlplus/__init__.py +18 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/etlplus/api/README.md +33 -2
- {etlplus-0.12.5 → etlplus-0.15.2}/etlplus/api/__init__.py +10 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/etlplus/api/config.py +39 -28
- {etlplus-0.12.5 → etlplus-0.15.2}/etlplus/api/endpoint_client.py +3 -3
- etlplus-0.15.2/etlplus/api/enums.py +51 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/etlplus/api/pagination/client.py +1 -1
- {etlplus-0.12.5 → etlplus-0.15.2}/etlplus/api/rate_limiting/config.py +13 -1
- {etlplus-0.12.5 → etlplus-0.15.2}/etlplus/api/rate_limiting/rate_limiter.py +8 -11
- {etlplus-0.12.5 → etlplus-0.15.2}/etlplus/api/request_manager.py +11 -6
- {etlplus-0.12.5 → etlplus-0.15.2}/etlplus/api/transport.py +14 -2
- {etlplus-0.12.5 → etlplus-0.15.2}/etlplus/api/types.py +96 -6
- etlplus-0.12.5/etlplus/run_helpers.py → etlplus-0.15.2/etlplus/api/utils.py +209 -153
- {etlplus-0.12.5 → etlplus-0.15.2}/etlplus/cli/README.md +2 -2
- {etlplus-0.12.5 → etlplus-0.15.2}/etlplus/cli/commands.py +75 -42
- {etlplus-0.12.5 → etlplus-0.15.2}/etlplus/cli/handlers.py +40 -12
- {etlplus-0.12.5 → etlplus-0.15.2}/etlplus/cli/main.py +1 -1
- {etlplus-0.12.5 → etlplus-0.15.2}/etlplus/cli/state.py +4 -7
- {etlplus-0.12.5 → etlplus-0.15.2}/etlplus/database/README.md +2 -2
- {etlplus-0.12.5 → etlplus-0.15.2}/etlplus/database/engine.py +18 -2
- {etlplus-0.12.5 → etlplus-0.15.2}/etlplus/database/orm.py +2 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/etlplus/enums.py +0 -32
- {etlplus-0.12.5 → etlplus-0.15.2}/etlplus/file/README.md +2 -2
- etlplus-0.15.2/etlplus/file/_imports.py +141 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/etlplus/file/_io.py +40 -0
- etlplus-0.15.2/etlplus/file/accdb.py +78 -0
- etlplus-0.15.2/etlplus/file/arrow.py +78 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/etlplus/file/avro.py +17 -27
- etlplus-0.15.2/etlplus/file/bson.py +77 -0
- etlplus-0.15.2/etlplus/file/cbor.py +78 -0
- etlplus-0.15.2/etlplus/file/cfg.py +79 -0
- etlplus-0.15.2/etlplus/file/conf.py +80 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/etlplus/file/core.py +119 -89
- {etlplus-0.12.5 → etlplus-0.15.2}/etlplus/file/csv.py +13 -1
- etlplus-0.15.2/etlplus/file/dat.py +78 -0
- etlplus-0.15.2/etlplus/file/dta.py +77 -0
- etlplus-0.15.2/etlplus/file/duckdb.py +78 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/etlplus/file/enums.py +119 -16
- {etlplus-0.12.5 → etlplus-0.15.2}/etlplus/file/feather.py +14 -2
- etlplus-0.15.2/etlplus/file/fwf.py +77 -0
- etlplus-0.15.2/etlplus/file/hbs.py +78 -0
- etlplus-0.15.2/etlplus/file/hdf5.py +78 -0
- etlplus-0.15.2/etlplus/file/ini.py +79 -0
- etlplus-0.15.2/etlplus/file/ion.py +78 -0
- etlplus-0.15.2/etlplus/file/jinja2.py +78 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/etlplus/file/json.py +15 -15
- etlplus-0.15.2/etlplus/file/log.py +78 -0
- etlplus-0.15.2/etlplus/file/mat.py +78 -0
- etlplus-0.15.2/etlplus/file/mdb.py +78 -0
- etlplus-0.15.2/etlplus/file/msgpack.py +78 -0
- etlplus-0.15.2/etlplus/file/mustache.py +78 -0
- etlplus-0.15.2/etlplus/file/nc.py +78 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/etlplus/file/ndjson.py +14 -15
- etlplus-0.15.2/etlplus/file/numbers.py +75 -0
- etlplus-0.15.2/etlplus/file/ods.py +79 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/etlplus/file/orc.py +14 -2
- {etlplus-0.12.5 → etlplus-0.15.2}/etlplus/file/parquet.py +14 -2
- etlplus-0.15.2/etlplus/file/pb.py +78 -0
- etlplus-0.15.2/etlplus/file/pbf.py +77 -0
- etlplus-0.15.2/etlplus/file/properties.py +78 -0
- etlplus-0.15.2/etlplus/file/proto.py +77 -0
- etlplus-0.15.2/etlplus/file/psv.py +79 -0
- etlplus-0.15.2/etlplus/file/rda.py +78 -0
- etlplus-0.15.2/etlplus/file/rds.py +78 -0
- etlplus-0.15.2/etlplus/file/sas7bdat.py +78 -0
- etlplus-0.15.2/etlplus/file/sav.py +77 -0
- etlplus-0.15.2/etlplus/file/sqlite.py +78 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/etlplus/file/stub.py +13 -4
- etlplus-0.15.2/etlplus/file/sylk.py +77 -0
- etlplus-0.15.2/etlplus/file/tab.py +81 -0
- etlplus-0.15.2/etlplus/file/toml.py +78 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/etlplus/file/tsv.py +14 -1
- {etlplus-0.12.5 → etlplus-0.15.2}/etlplus/file/txt.py +13 -10
- etlplus-0.15.2/etlplus/file/vm.py +78 -0
- etlplus-0.15.2/etlplus/file/wks.py +77 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/etlplus/file/xls.py +1 -1
- etlplus-0.15.2/etlplus/file/xlsm.py +79 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/etlplus/file/xlsx.py +1 -1
- {etlplus-0.12.5 → etlplus-0.15.2}/etlplus/file/xml.py +12 -1
- etlplus-0.15.2/etlplus/file/xpt.py +78 -0
- etlplus-0.15.2/etlplus/file/yaml.py +95 -0
- etlplus-0.15.2/etlplus/file/zsav.py +77 -0
- {etlplus-0.12.5/etlplus/validation → etlplus-0.15.2/etlplus/ops}/README.md +2 -2
- etlplus-0.15.2/etlplus/ops/__init__.py +61 -0
- {etlplus-0.12.5/etlplus → etlplus-0.15.2/etlplus/ops}/extract.py +78 -94
- {etlplus-0.12.5/etlplus → etlplus-0.15.2/etlplus/ops}/load.py +73 -93
- {etlplus-0.12.5/etlplus → etlplus-0.15.2/etlplus/ops}/run.py +153 -118
- {etlplus-0.12.5/etlplus → etlplus-0.15.2/etlplus/ops}/transform.py +75 -68
- {etlplus-0.12.5/etlplus/validation → etlplus-0.15.2/etlplus/ops}/utils.py +53 -17
- {etlplus-0.12.5/etlplus → etlplus-0.15.2/etlplus/ops}/validate.py +22 -12
- {etlplus-0.12.5 → etlplus-0.15.2}/etlplus/templates/README.md +2 -2
- {etlplus-0.12.5 → etlplus-0.15.2}/etlplus/types.py +5 -4
- {etlplus-0.12.5 → etlplus-0.15.2}/etlplus/utils.py +136 -2
- {etlplus-0.12.5/etlplus/config → etlplus-0.15.2/etlplus/workflow}/README.md +6 -6
- {etlplus-0.12.5/etlplus/config → etlplus-0.15.2/etlplus/workflow}/__init__.py +10 -23
- {etlplus-0.12.5/etlplus/config → etlplus-0.15.2/etlplus/workflow}/connector.py +58 -44
- etlplus-0.15.2/etlplus/workflow/dag.py +105 -0
- {etlplus-0.12.5/etlplus/config → etlplus-0.15.2/etlplus/workflow}/jobs.py +105 -32
- {etlplus-0.12.5/etlplus/config → etlplus-0.15.2/etlplus/workflow}/pipeline.py +57 -49
- {etlplus-0.12.5/etlplus/config → etlplus-0.15.2/etlplus/workflow}/profile.py +8 -5
- etlplus-0.15.2/etlplus/workflow/types.py +115 -0
- {etlplus-0.12.5 → etlplus-0.15.2/etlplus.egg-info}/PKG-INFO +156 -7
- {etlplus-0.12.5 → etlplus-0.15.2}/etlplus.egg-info/SOURCES.txt +71 -29
- {etlplus-0.12.5 → etlplus-0.15.2}/examples/README.md +2 -2
- {etlplus-0.12.5 → etlplus-0.15.2}/examples/configs/pipeline.yml +4 -4
- {etlplus-0.12.5 → etlplus-0.15.2}/examples/quickstart_python.py +5 -5
- {etlplus-0.12.5 → etlplus-0.15.2}/tests/integration/conftest.py +10 -10
- {etlplus-0.12.5 → etlplus-0.15.2}/tests/integration/test_i_pagination_strategy.py +13 -13
- {etlplus-0.12.5 → etlplus-0.15.2}/tests/integration/test_i_pipeline_yaml_load.py +1 -1
- {etlplus-0.12.5 → etlplus-0.15.2}/tests/integration/test_i_run.py +2 -2
- {etlplus-0.12.5 → etlplus-0.15.2}/tests/integration/test_i_run_profile_pagination_defaults.py +1 -1
- {etlplus-0.12.5 → etlplus-0.15.2}/tests/integration/test_i_run_profile_rate_limit_defaults.py +1 -1
- etlplus-0.15.2/tests/unit/api/test_u_api_enums.py +34 -0
- etlplus-0.12.5/tests/unit/test_u_run_helpers.py → etlplus-0.15.2/tests/unit/api/test_u_api_utils.py +17 -15
- {etlplus-0.12.5 → etlplus-0.15.2}/tests/unit/cli/conftest.py +2 -2
- {etlplus-0.12.5 → etlplus-0.15.2}/tests/unit/cli/test_u_cli_handlers.py +3 -1
- {etlplus-0.12.5 → etlplus-0.15.2}/tests/unit/conftest.py +1 -1
- {etlplus-0.12.5 → etlplus-0.15.2}/tests/unit/database/test_u_database_ddl.py +3 -2
- {etlplus-0.12.5 → etlplus-0.15.2}/tests/unit/file/test_u_file_core.py +87 -7
- {etlplus-0.12.5 → etlplus-0.15.2}/tests/unit/file/test_u_file_enums.py +1 -1
- {etlplus-0.12.5 → etlplus-0.15.2}/tests/unit/file/test_u_file_yaml.py +4 -4
- etlplus-0.12.5/tests/unit/test_u_extract.py → etlplus-0.15.2/tests/unit/ops/test_u_ops_extract.py +12 -12
- etlplus-0.12.5/tests/unit/test_u_load.py → etlplus-0.15.2/tests/unit/ops/test_u_ops_load.py +18 -17
- etlplus-0.12.5/tests/unit/test_u_run.py → etlplus-0.15.2/tests/unit/ops/test_u_ops_run.py +4 -4
- etlplus-0.12.5/tests/unit/test_u_transform.py → etlplus-0.15.2/tests/unit/ops/test_u_ops_transform.py +65 -64
- etlplus-0.12.5/tests/unit/validation/test_u_validation_utils.py → etlplus-0.15.2/tests/unit/ops/test_u_ops_utils.py +4 -4
- etlplus-0.12.5/tests/unit/test_u_validate.py → etlplus-0.15.2/tests/unit/ops/test_u_ops_validate.py +9 -9
- {etlplus-0.12.5 → etlplus-0.15.2}/tests/unit/test_u_enums.py +0 -16
- {etlplus-0.12.5 → etlplus-0.15.2}/tests/unit/test_u_utils.py +110 -4
- etlplus-0.12.5/tests/unit/config/test_u_connector.py → etlplus-0.15.2/tests/unit/workflow/test_u_workflow_connector.py +6 -6
- etlplus-0.12.5/tests/unit/config/test_u_jobs.py → etlplus-0.15.2/tests/unit/workflow/test_u_workflow_jobs.py +3 -3
- etlplus-0.12.5/tests/unit/config/test_u_pipeline.py → etlplus-0.15.2/tests/unit/workflow/test_u_workflow_pipeline.py +38 -34
- etlplus-0.12.5/etlplus/__init__.py +0 -43
- etlplus-0.12.5/etlplus/config/types.py +0 -204
- etlplus-0.12.5/etlplus/config/utils.py +0 -120
- etlplus-0.12.5/etlplus/file/_pandas.py +0 -58
- etlplus-0.12.5/etlplus/file/yaml.py +0 -136
- etlplus-0.12.5/etlplus/validation/__init__.py +0 -44
- etlplus-0.12.5/tests/unit/config/test_u_config_utils.py +0 -129
- {etlplus-0.12.5 → etlplus-0.15.2}/.coveragerc +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/.editorconfig +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/.gitattributes +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/.github/actions/python-bootstrap/action.yml +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/.github/workflows/ci.yml +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/.gitignore +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/.pre-commit-config.yaml +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/.ruff.toml +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/CODE_OF_CONDUCT.md +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/LICENSE +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/MANIFEST.in +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/Makefile +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/REFERENCES.md +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/SECURITY.md +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/SUPPORT.md +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/docs/README.md +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/docs/snippets/installation_version.md +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/etlplus/__main__.py +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/etlplus/__version__.py +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/etlplus/api/auth.py +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/etlplus/api/errors.py +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/etlplus/api/pagination/__init__.py +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/etlplus/api/pagination/config.py +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/etlplus/api/pagination/paginator.py +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/etlplus/api/rate_limiting/__init__.py +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/etlplus/api/retry_manager.py +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/etlplus/cli/__init__.py +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/etlplus/cli/constants.py +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/etlplus/cli/io.py +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/etlplus/cli/options.py +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/etlplus/cli/types.py +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/etlplus/database/__init__.py +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/etlplus/database/ddl.py +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/etlplus/database/schema.py +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/etlplus/database/types.py +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/etlplus/file/__init__.py +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/etlplus/file/gz.py +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/etlplus/file/zip.py +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/etlplus/mixins.py +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/etlplus/py.typed +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/etlplus/templates/__init__.py +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/etlplus/templates/ddl.sql.j2 +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/etlplus/templates/view.sql.j2 +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/etlplus.egg-info/dependency_links.txt +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/etlplus.egg-info/entry_points.txt +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/etlplus.egg-info/requires.txt +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/etlplus.egg-info/top_level.txt +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/examples/configs/ddl_spec.yml +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/examples/data/sample.csv +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/examples/data/sample.json +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/examples/data/sample.xml +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/examples/data/sample.xsd +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/examples/data/sample.yaml +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/pyproject.toml +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/pytest.ini +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/setup.cfg +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/setup.py +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/tests/__init__.py +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/tests/conftest.py +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/tests/integration/test_i_cli.py +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/tests/integration/test_i_examples_data_parity.py +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/tests/integration/test_i_pipeline_smoke.py +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/tests/unit/api/conftest.py +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/tests/unit/api/test_u_auth.py +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/tests/unit/api/test_u_config.py +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/tests/unit/api/test_u_endpoint_client.py +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/tests/unit/api/test_u_mocks.py +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/tests/unit/api/test_u_pagination_client.py +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/tests/unit/api/test_u_pagination_config.py +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/tests/unit/api/test_u_paginator.py +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/tests/unit/api/test_u_rate_limit_config.py +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/tests/unit/api/test_u_rate_limiter.py +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/tests/unit/api/test_u_request_manager.py +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/tests/unit/api/test_u_retry_manager.py +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/tests/unit/api/test_u_transport.py +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/tests/unit/api/test_u_types.py +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/tests/unit/cli/test_u_cli_io.py +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/tests/unit/cli/test_u_cli_main.py +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/tests/unit/cli/test_u_cli_state.py +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/tests/unit/database/test_u_database_engine.py +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/tests/unit/database/test_u_database_orm.py +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/tests/unit/database/test_u_database_schema.py +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/tests/unit/test_u_main.py +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/tests/unit/test_u_mixins.py +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/tests/unit/test_u_version.py +0 -0
- {etlplus-0.12.5 → etlplus-0.15.2}/tools/update_demo_snippets.py +0 -0
|
@@ -119,7 +119,7 @@ Use these guidelines to decide whether a test belongs in the unit or integration
|
|
|
119
119
|
- Can use temporary files/directories, and stub network with fakes/mocks.
|
|
120
120
|
- Examples in this repo: CLI end-to-end, pipeline smoke tests, pagination strategy, runner defaults for pagination/rate limits, target URL composition.
|
|
121
121
|
|
|
122
|
-
If a test calls `etlplus.cli.main()` or `etlplus.run.run()`, it is integration by default.
|
|
122
|
+
If a test calls `etlplus.cli.main()` or `etlplus.ops.run.run()`, it is integration by default.
|
|
123
123
|
|
|
124
124
|
### Where to put tests
|
|
125
125
|
|
|
@@ -196,7 +196,7 @@ $ etlplus load transformed.json file final_output.csv
|
|
|
196
196
|
## Demo 6: Using Python API
|
|
197
197
|
|
|
198
198
|
```python
|
|
199
|
-
from etlplus import extract, validate, transform, load
|
|
199
|
+
from etlplus.ops import extract, validate, transform, load
|
|
200
200
|
|
|
201
201
|
# Extract
|
|
202
202
|
data = extract("file", "data.csv", format="csv")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: etlplus
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.15.2
|
|
4
4
|
Summary: A Swiss Army knife for simple ETL operations
|
|
5
5
|
Home-page: https://github.com/Dagitali/ETLPlus
|
|
6
6
|
Author: ETLPlus Team
|
|
@@ -68,6 +68,21 @@ package and command-line interface for data extraction, validation, transformati
|
|
|
68
68
|
- [Features](#features)
|
|
69
69
|
- [Installation](#installation)
|
|
70
70
|
- [Quickstart](#quickstart)
|
|
71
|
+
- [Data Connectors](#data-connectors)
|
|
72
|
+
- [REST APIs (`api`)](#rest-apis-api)
|
|
73
|
+
- [Databases (`database`)](#databases-database)
|
|
74
|
+
- [Files (`file`)](#files-file)
|
|
75
|
+
- [Stubbed / Placeholder](#stubbed--placeholder)
|
|
76
|
+
- [Tabular \& Delimited Text](#tabular--delimited-text)
|
|
77
|
+
- [Semi-Structured Text](#semi-structured-text)
|
|
78
|
+
- [Columnar / Analytics-Friendly](#columnar--analytics-friendly)
|
|
79
|
+
- [Binary Serialization and Interchange](#binary-serialization-and-interchange)
|
|
80
|
+
- [Databases and Embedded Storage](#databases-and-embedded-storage)
|
|
81
|
+
- [Spreadsheets](#spreadsheets)
|
|
82
|
+
- [Statistical / Scientific / Numeric Computing](#statistical--scientific--numeric-computing)
|
|
83
|
+
- [Logs and Event Streams](#logs-and-event-streams)
|
|
84
|
+
- [Data Archives](#data-archives)
|
|
85
|
+
- [Templates](#templates)
|
|
71
86
|
- [Usage](#usage)
|
|
72
87
|
- [Command Line Interface](#command-line-interface)
|
|
73
88
|
- [Argument Order and Required Options](#argument-order-and-required-options)
|
|
@@ -181,7 +196,7 @@ etlplus extract file examples/data/sample.csv \
|
|
|
181
196
|
[Python API](#python-api):
|
|
182
197
|
|
|
183
198
|
```python
|
|
184
|
-
from etlplus import extract, transform, validate, load
|
|
199
|
+
from etlplus.ops import extract, transform, validate, load
|
|
185
200
|
|
|
186
201
|
data = extract("file", "input.csv")
|
|
187
202
|
ops = {"filter": {"field": "age", "op": "gt", "value": 25}, "select": ["name", "email"]}
|
|
@@ -191,6 +206,140 @@ assert validate(filtered, rules)["valid"]
|
|
|
191
206
|
load(filtered, "file", "temp/sample_output.json", file_format="json")
|
|
192
207
|
```
|
|
193
208
|
|
|
209
|
+
## Data Connectors
|
|
210
|
+
|
|
211
|
+
Data connectors abstract sources from which to extract data and targets to which to load data. They
|
|
212
|
+
are differentiated by their types, each of which is represented in the subsections below.
|
|
213
|
+
|
|
214
|
+
### REST APIs (`api`)
|
|
215
|
+
|
|
216
|
+
ETLPlus can extract from REST APIs and load results via common HTTP methods. Supported operations
|
|
217
|
+
include GET for extract and PATCH/POST/PUT for load.
|
|
218
|
+
|
|
219
|
+
### Databases (`database`)
|
|
220
|
+
|
|
221
|
+
Database connectors use connection strings for extraction and loading, and
|
|
222
|
+
DDL can be rendered from table specs for migrations or schema checks.
|
|
223
|
+
|
|
224
|
+
### Files (`file`)
|
|
225
|
+
|
|
226
|
+
Recognized file formats are listed in the tables below. Support for reading to or writing from a recognized file format is marked as:
|
|
227
|
+
|
|
228
|
+
- **Y**: implemented (may require optional dependencies)
|
|
229
|
+
- **N**: stubbed or not yet implemented
|
|
230
|
+
|
|
231
|
+
#### Stubbed / Placeholder
|
|
232
|
+
|
|
233
|
+
| Format | Read | Write | Description |
|
|
234
|
+
| --- | --- | --- | --- |
|
|
235
|
+
| `stub` | N | Placeholder format for tests and future connectors. |
|
|
236
|
+
|
|
237
|
+
#### Tabular & Delimited Text
|
|
238
|
+
|
|
239
|
+
| Format | Read | Write | Description |
|
|
240
|
+
| --- | --- | --- | --- |
|
|
241
|
+
| `csv` | Y | Y | Comma-Separated Values |
|
|
242
|
+
| `dat` | N | N | Generic data file, often delimited or fixed-width |
|
|
243
|
+
| `fwf` | N | N | Fixed-Width Fields |
|
|
244
|
+
| `psv` | N | N | Pipe-Separated Values |
|
|
245
|
+
| `tab` | N | N | Often synonymous with TSV |
|
|
246
|
+
| `tsv` | Y | Y | Tab-Separated Values |
|
|
247
|
+
| `txt` | Y | Y | Plain text, often delimited or fixed-width |
|
|
248
|
+
|
|
249
|
+
#### Semi-Structured Text
|
|
250
|
+
|
|
251
|
+
| Format | Read | Write | Description |
|
|
252
|
+
| --- | --- | --- | --- |
|
|
253
|
+
| `cfg` | N | N | Config-style key-value pairs |
|
|
254
|
+
| `conf` | N | N | Config-style key-value pairs |
|
|
255
|
+
| `ini` | N | N | Config-style key-value pairs |
|
|
256
|
+
| `json` | Y | Y | JavaScript Object Notation |
|
|
257
|
+
| `ndjson` | Y | Y | Newline-Delimited JSON |
|
|
258
|
+
| `properties` | N | N | Java-style key-value pairs |
|
|
259
|
+
| `toml` | N | N | Tom's Obvious Minimal Language |
|
|
260
|
+
| `xml` | Y | Y | Extensible Markup Language |
|
|
261
|
+
| `yaml` | Y | Y | YAML Ain't Markup Language |
|
|
262
|
+
|
|
263
|
+
#### Columnar / Analytics-Friendly
|
|
264
|
+
|
|
265
|
+
| Format | Read | Write | Description |
|
|
266
|
+
| --- | --- | --- | --- |
|
|
267
|
+
| `arrow` | N | N | Apache Arrow IPC |
|
|
268
|
+
| `feather` | Y | Y | Apache Arrow Feather |
|
|
269
|
+
| `orc` | Y | Y | Optimized Row Columnar; common in Hadoop |
|
|
270
|
+
| `parquet` | Y | Y | Apache Parquet; common in Big Data |
|
|
271
|
+
|
|
272
|
+
#### Binary Serialization and Interchange
|
|
273
|
+
|
|
274
|
+
| Format | Read | Write | Description |
|
|
275
|
+
| --- | --- | --- | --- |
|
|
276
|
+
| `avro` | Y | Y | Apache Avro |
|
|
277
|
+
| `bson` | N | N | Binary JSON; common with MongoDB exports/dumps |
|
|
278
|
+
| `cbor` | N | N | Concise Binary Object Representation |
|
|
279
|
+
| `ion` | N | N | Amazon Ion |
|
|
280
|
+
| `msgpack` | N | N | MessagePack |
|
|
281
|
+
| `pb` | N | N | Protocol Buffers (Google Protobuf) |
|
|
282
|
+
| `pbf` | N | N | Protocolbuffer Binary Format; often for GIS data |
|
|
283
|
+
| `proto` | N | N | Protocol Buffers schema; often in .pb / .bin |
|
|
284
|
+
|
|
285
|
+
#### Databases and Embedded Storage
|
|
286
|
+
|
|
287
|
+
| Format | Read | Write | Description |
|
|
288
|
+
| --- | --- | --- | --- |
|
|
289
|
+
| `accdb` | N | N | Microsoft Access (newer format) |
|
|
290
|
+
| `duckdb` | N | N | DuckDB |
|
|
291
|
+
| `mdb` | N | N | Microsoft Access (older format) |
|
|
292
|
+
| `sqlite` | N | N | SQLite |
|
|
293
|
+
|
|
294
|
+
#### Spreadsheets
|
|
295
|
+
|
|
296
|
+
| Format | Read | Write | Description |
|
|
297
|
+
| --- | --- | --- | --- |
|
|
298
|
+
| `numbers` | N | N | Apple Numbers |
|
|
299
|
+
| `ods` | N | N | OpenDocument |
|
|
300
|
+
| `wks` | N | N | Lotus 1-2-3 |
|
|
301
|
+
| `xls` | Y | Y | Microsoft Excel (BIFF) |
|
|
302
|
+
| `xlsm` | N | N | Microsoft Excel Macro-Enabled (Open XML) |
|
|
303
|
+
| `xlsx` | Y | Y | Microsoft Excel (Open XML) |
|
|
304
|
+
|
|
305
|
+
#### Statistical / Scientific / Numeric Computing
|
|
306
|
+
|
|
307
|
+
| Format | Read | Write | Description |
|
|
308
|
+
| --- | --- | --- | --- |
|
|
309
|
+
| `dta` | N | N | Stata |
|
|
310
|
+
| `hdf5` | N | N | Hierarchical Data Format |
|
|
311
|
+
| `mat` | N | N | MATLAB |
|
|
312
|
+
| `nc` | N | N | NetCDF |
|
|
313
|
+
| `rda` | N | N | RData workspace/object |
|
|
314
|
+
| `rds` | N | N | R data |
|
|
315
|
+
| `sas7bdat` | N | N | SAS data |
|
|
316
|
+
| `sav` | N | N | SPSS data |
|
|
317
|
+
| `sylk` | N | N | Symbolic Link |
|
|
318
|
+
| `xpt` | N | N | SAS Transport |
|
|
319
|
+
| `zsav` | N | N | Compressed SPSS data |
|
|
320
|
+
|
|
321
|
+
#### Logs and Event Streams
|
|
322
|
+
|
|
323
|
+
| Format | Supported | Description |
|
|
324
|
+
| --- | --- | --- |
|
|
325
|
+
| `log` | N | N | Generic log file |
|
|
326
|
+
|
|
327
|
+
#### Data Archives
|
|
328
|
+
|
|
329
|
+
| Format | Read | Write | Description |
|
|
330
|
+
| --- | --- | --- | --- |
|
|
331
|
+
| `gz` | Y | Y | Gzip-compressed file |
|
|
332
|
+
| `zip` | Y | Y | ZIP archive |
|
|
333
|
+
|
|
334
|
+
#### Templates
|
|
335
|
+
|
|
336
|
+
| Format | Read | Write | Description |
|
|
337
|
+
| --- | --- | --- | --- |
|
|
338
|
+
| `hbs` | N | N | Handlebars |
|
|
339
|
+
| `jinja2` | N | N | Jinja2 |
|
|
340
|
+
| `mustache` | N | N | Mustache |
|
|
341
|
+
| `vm` | N | N | Apache Velocity |
|
|
342
|
+
|
|
194
343
|
## Usage
|
|
195
344
|
|
|
196
345
|
### Command Line Interface
|
|
@@ -382,7 +531,7 @@ cat examples/data/sample.json \
|
|
|
382
531
|
Use ETLPlus as a Python library:
|
|
383
532
|
|
|
384
533
|
```python
|
|
385
|
-
from etlplus import extract, validate, transform, load
|
|
534
|
+
from etlplus.ops import extract, validate, transform, load
|
|
386
535
|
|
|
387
536
|
# Extract data
|
|
388
537
|
data = extract("file", "data.json")
|
|
@@ -577,7 +726,7 @@ We split tests into two layers:
|
|
|
577
726
|
pagination + rate limit defaults, file/API connector interactions) may touch temp files and use
|
|
578
727
|
fake clients.
|
|
579
728
|
|
|
580
|
-
If a test calls `etlplus.cli.main()` or `etlplus.run.run()` it’s integration by default.
|
|
729
|
+
If a test calls `etlplus.cli.main()` or `etlplus.ops.run.run()` it’s integration by default. Full
|
|
581
730
|
criteria: [`CONTRIBUTING.md#testing`](CONTRIBUTING.md#testing).
|
|
582
731
|
|
|
583
732
|
### Code Coverage
|
|
@@ -656,12 +805,12 @@ Navigate to detailed documentation for each subpackage:
|
|
|
656
805
|
|
|
657
806
|
- [etlplus.api](etlplus/api/README.md): Lightweight HTTP client and paginated REST helpers
|
|
658
807
|
- [etlplus.file](etlplus/file/README.md): Unified file format support and helpers
|
|
659
|
-
- [etlplus.
|
|
660
|
-
and profiles
|
|
661
|
-
- [etlplus.cli](etlplus/cli/README.md): Command-line interface for ETLPlus workflows
|
|
808
|
+
- [etlplus.cli](etlplus/cli/README.md): Command-line interface definitions for `etlplus`
|
|
662
809
|
- [etlplus.database](etlplus/database/README.md): Database engine, schema, and ORM helpers
|
|
663
810
|
- [etlplus.templates](etlplus/templates/README.md): SQL and DDL template helpers
|
|
664
811
|
- [etlplus.validation](etlplus/validation/README.md): Data validation utilities and helpers
|
|
812
|
+
- [etlplus.workflow](etlplus/workflow/README.md): Helpers for data connectors, pipelines, jobs, and
|
|
813
|
+
profiles
|
|
665
814
|
|
|
666
815
|
### Community Health
|
|
667
816
|
|
|
@@ -18,6 +18,21 @@ package and command-line interface for data extraction, validation, transformati
|
|
|
18
18
|
- [Features](#features)
|
|
19
19
|
- [Installation](#installation)
|
|
20
20
|
- [Quickstart](#quickstart)
|
|
21
|
+
- [Data Connectors](#data-connectors)
|
|
22
|
+
- [REST APIs (`api`)](#rest-apis-api)
|
|
23
|
+
- [Databases (`database`)](#databases-database)
|
|
24
|
+
- [Files (`file`)](#files-file)
|
|
25
|
+
- [Stubbed / Placeholder](#stubbed--placeholder)
|
|
26
|
+
- [Tabular \& Delimited Text](#tabular--delimited-text)
|
|
27
|
+
- [Semi-Structured Text](#semi-structured-text)
|
|
28
|
+
- [Columnar / Analytics-Friendly](#columnar--analytics-friendly)
|
|
29
|
+
- [Binary Serialization and Interchange](#binary-serialization-and-interchange)
|
|
30
|
+
- [Databases and Embedded Storage](#databases-and-embedded-storage)
|
|
31
|
+
- [Spreadsheets](#spreadsheets)
|
|
32
|
+
- [Statistical / Scientific / Numeric Computing](#statistical--scientific--numeric-computing)
|
|
33
|
+
- [Logs and Event Streams](#logs-and-event-streams)
|
|
34
|
+
- [Data Archives](#data-archives)
|
|
35
|
+
- [Templates](#templates)
|
|
21
36
|
- [Usage](#usage)
|
|
22
37
|
- [Command Line Interface](#command-line-interface)
|
|
23
38
|
- [Argument Order and Required Options](#argument-order-and-required-options)
|
|
@@ -131,7 +146,7 @@ etlplus extract file examples/data/sample.csv \
|
|
|
131
146
|
[Python API](#python-api):
|
|
132
147
|
|
|
133
148
|
```python
|
|
134
|
-
from etlplus import extract, transform, validate, load
|
|
149
|
+
from etlplus.ops import extract, transform, validate, load
|
|
135
150
|
|
|
136
151
|
data = extract("file", "input.csv")
|
|
137
152
|
ops = {"filter": {"field": "age", "op": "gt", "value": 25}, "select": ["name", "email"]}
|
|
@@ -141,6 +156,140 @@ assert validate(filtered, rules)["valid"]
|
|
|
141
156
|
load(filtered, "file", "temp/sample_output.json", file_format="json")
|
|
142
157
|
```
|
|
143
158
|
|
|
159
|
+
## Data Connectors
|
|
160
|
+
|
|
161
|
+
Data connectors abstract sources from which to extract data and targets to which to load data. They
|
|
162
|
+
are differentiated by their types, each of which is represented in the subsections below.
|
|
163
|
+
|
|
164
|
+
### REST APIs (`api`)
|
|
165
|
+
|
|
166
|
+
ETLPlus can extract from REST APIs and load results via common HTTP methods. Supported operations
|
|
167
|
+
include GET for extract and PATCH/POST/PUT for load.
|
|
168
|
+
|
|
169
|
+
### Databases (`database`)
|
|
170
|
+
|
|
171
|
+
Database connectors use connection strings for extraction and loading, and
|
|
172
|
+
DDL can be rendered from table specs for migrations or schema checks.
|
|
173
|
+
|
|
174
|
+
### Files (`file`)
|
|
175
|
+
|
|
176
|
+
Recognized file formats are listed in the tables below. Support for reading to or writing from a recognized file format is marked as:
|
|
177
|
+
|
|
178
|
+
- **Y**: implemented (may require optional dependencies)
|
|
179
|
+
- **N**: stubbed or not yet implemented
|
|
180
|
+
|
|
181
|
+
#### Stubbed / Placeholder
|
|
182
|
+
|
|
183
|
+
| Format | Read | Write | Description |
|
|
184
|
+
| --- | --- | --- | --- |
|
|
185
|
+
| `stub` | N | Placeholder format for tests and future connectors. |
|
|
186
|
+
|
|
187
|
+
#### Tabular & Delimited Text
|
|
188
|
+
|
|
189
|
+
| Format | Read | Write | Description |
|
|
190
|
+
| --- | --- | --- | --- |
|
|
191
|
+
| `csv` | Y | Y | Comma-Separated Values |
|
|
192
|
+
| `dat` | N | N | Generic data file, often delimited or fixed-width |
|
|
193
|
+
| `fwf` | N | N | Fixed-Width Fields |
|
|
194
|
+
| `psv` | N | N | Pipe-Separated Values |
|
|
195
|
+
| `tab` | N | N | Often synonymous with TSV |
|
|
196
|
+
| `tsv` | Y | Y | Tab-Separated Values |
|
|
197
|
+
| `txt` | Y | Y | Plain text, often delimited or fixed-width |
|
|
198
|
+
|
|
199
|
+
#### Semi-Structured Text
|
|
200
|
+
|
|
201
|
+
| Format | Read | Write | Description |
|
|
202
|
+
| --- | --- | --- | --- |
|
|
203
|
+
| `cfg` | N | N | Config-style key-value pairs |
|
|
204
|
+
| `conf` | N | N | Config-style key-value pairs |
|
|
205
|
+
| `ini` | N | N | Config-style key-value pairs |
|
|
206
|
+
| `json` | Y | Y | JavaScript Object Notation |
|
|
207
|
+
| `ndjson` | Y | Y | Newline-Delimited JSON |
|
|
208
|
+
| `properties` | N | N | Java-style key-value pairs |
|
|
209
|
+
| `toml` | N | N | Tom's Obvious Minimal Language |
|
|
210
|
+
| `xml` | Y | Y | Extensible Markup Language |
|
|
211
|
+
| `yaml` | Y | Y | YAML Ain't Markup Language |
|
|
212
|
+
|
|
213
|
+
#### Columnar / Analytics-Friendly
|
|
214
|
+
|
|
215
|
+
| Format | Read | Write | Description |
|
|
216
|
+
| --- | --- | --- | --- |
|
|
217
|
+
| `arrow` | N | N | Apache Arrow IPC |
|
|
218
|
+
| `feather` | Y | Y | Apache Arrow Feather |
|
|
219
|
+
| `orc` | Y | Y | Optimized Row Columnar; common in Hadoop |
|
|
220
|
+
| `parquet` | Y | Y | Apache Parquet; common in Big Data |
|
|
221
|
+
|
|
222
|
+
#### Binary Serialization and Interchange
|
|
223
|
+
|
|
224
|
+
| Format | Read | Write | Description |
|
|
225
|
+
| --- | --- | --- | --- |
|
|
226
|
+
| `avro` | Y | Y | Apache Avro |
|
|
227
|
+
| `bson` | N | N | Binary JSON; common with MongoDB exports/dumps |
|
|
228
|
+
| `cbor` | N | N | Concise Binary Object Representation |
|
|
229
|
+
| `ion` | N | N | Amazon Ion |
|
|
230
|
+
| `msgpack` | N | N | MessagePack |
|
|
231
|
+
| `pb` | N | N | Protocol Buffers (Google Protobuf) |
|
|
232
|
+
| `pbf` | N | N | Protocolbuffer Binary Format; often for GIS data |
|
|
233
|
+
| `proto` | N | N | Protocol Buffers schema; often in .pb / .bin |
|
|
234
|
+
|
|
235
|
+
#### Databases and Embedded Storage
|
|
236
|
+
|
|
237
|
+
| Format | Read | Write | Description |
|
|
238
|
+
| --- | --- | --- | --- |
|
|
239
|
+
| `accdb` | N | N | Microsoft Access (newer format) |
|
|
240
|
+
| `duckdb` | N | N | DuckDB |
|
|
241
|
+
| `mdb` | N | N | Microsoft Access (older format) |
|
|
242
|
+
| `sqlite` | N | N | SQLite |
|
|
243
|
+
|
|
244
|
+
#### Spreadsheets
|
|
245
|
+
|
|
246
|
+
| Format | Read | Write | Description |
|
|
247
|
+
| --- | --- | --- | --- |
|
|
248
|
+
| `numbers` | N | N | Apple Numbers |
|
|
249
|
+
| `ods` | N | N | OpenDocument |
|
|
250
|
+
| `wks` | N | N | Lotus 1-2-3 |
|
|
251
|
+
| `xls` | Y | Y | Microsoft Excel (BIFF) |
|
|
252
|
+
| `xlsm` | N | N | Microsoft Excel Macro-Enabled (Open XML) |
|
|
253
|
+
| `xlsx` | Y | Y | Microsoft Excel (Open XML) |
|
|
254
|
+
|
|
255
|
+
#### Statistical / Scientific / Numeric Computing
|
|
256
|
+
|
|
257
|
+
| Format | Read | Write | Description |
|
|
258
|
+
| --- | --- | --- | --- |
|
|
259
|
+
| `dta` | N | N | Stata |
|
|
260
|
+
| `hdf5` | N | N | Hierarchical Data Format |
|
|
261
|
+
| `mat` | N | N | MATLAB |
|
|
262
|
+
| `nc` | N | N | NetCDF |
|
|
263
|
+
| `rda` | N | N | RData workspace/object |
|
|
264
|
+
| `rds` | N | N | R data |
|
|
265
|
+
| `sas7bdat` | N | N | SAS data |
|
|
266
|
+
| `sav` | N | N | SPSS data |
|
|
267
|
+
| `sylk` | N | N | Symbolic Link |
|
|
268
|
+
| `xpt` | N | N | SAS Transport |
|
|
269
|
+
| `zsav` | N | N | Compressed SPSS data |
|
|
270
|
+
|
|
271
|
+
#### Logs and Event Streams
|
|
272
|
+
|
|
273
|
+
| Format | Supported | Description |
|
|
274
|
+
| --- | --- | --- |
|
|
275
|
+
| `log` | N | N | Generic log file |
|
|
276
|
+
|
|
277
|
+
#### Data Archives
|
|
278
|
+
|
|
279
|
+
| Format | Read | Write | Description |
|
|
280
|
+
| --- | --- | --- | --- |
|
|
281
|
+
| `gz` | Y | Y | Gzip-compressed file |
|
|
282
|
+
| `zip` | Y | Y | ZIP archive |
|
|
283
|
+
|
|
284
|
+
#### Templates
|
|
285
|
+
|
|
286
|
+
| Format | Read | Write | Description |
|
|
287
|
+
| --- | --- | --- | --- |
|
|
288
|
+
| `hbs` | N | N | Handlebars |
|
|
289
|
+
| `jinja2` | N | N | Jinja2 |
|
|
290
|
+
| `mustache` | N | N | Mustache |
|
|
291
|
+
| `vm` | N | N | Apache Velocity |
|
|
292
|
+
|
|
144
293
|
## Usage
|
|
145
294
|
|
|
146
295
|
### Command Line Interface
|
|
@@ -332,7 +481,7 @@ cat examples/data/sample.json \
|
|
|
332
481
|
Use ETLPlus as a Python library:
|
|
333
482
|
|
|
334
483
|
```python
|
|
335
|
-
from etlplus import extract, validate, transform, load
|
|
484
|
+
from etlplus.ops import extract, validate, transform, load
|
|
336
485
|
|
|
337
486
|
# Extract data
|
|
338
487
|
data = extract("file", "data.json")
|
|
@@ -527,7 +676,7 @@ We split tests into two layers:
|
|
|
527
676
|
pagination + rate limit defaults, file/API connector interactions) may touch temp files and use
|
|
528
677
|
fake clients.
|
|
529
678
|
|
|
530
|
-
If a test calls `etlplus.cli.main()` or `etlplus.run.run()` it’s integration by default.
|
|
679
|
+
If a test calls `etlplus.cli.main()` or `etlplus.ops.run.run()` it’s integration by default. Full
|
|
531
680
|
criteria: [`CONTRIBUTING.md#testing`](CONTRIBUTING.md#testing).
|
|
532
681
|
|
|
533
682
|
### Code Coverage
|
|
@@ -606,12 +755,12 @@ Navigate to detailed documentation for each subpackage:
|
|
|
606
755
|
|
|
607
756
|
- [etlplus.api](etlplus/api/README.md): Lightweight HTTP client and paginated REST helpers
|
|
608
757
|
- [etlplus.file](etlplus/file/README.md): Unified file format support and helpers
|
|
609
|
-
- [etlplus.
|
|
610
|
-
and profiles
|
|
611
|
-
- [etlplus.cli](etlplus/cli/README.md): Command-line interface for ETLPlus workflows
|
|
758
|
+
- [etlplus.cli](etlplus/cli/README.md): Command-line interface definitions for `etlplus`
|
|
612
759
|
- [etlplus.database](etlplus/database/README.md): Database engine, schema, and ORM helpers
|
|
613
760
|
- [etlplus.templates](etlplus/templates/README.md): SQL and DDL template helpers
|
|
614
761
|
- [etlplus.validation](etlplus/validation/README.md): Data validation utilities and helpers
|
|
762
|
+
- [etlplus.workflow](etlplus/workflow/README.md): Helpers for data connectors, pipelines, jobs, and
|
|
763
|
+
profiles
|
|
615
764
|
|
|
616
765
|
### Community Health
|
|
617
766
|
|
|
@@ -281,7 +281,7 @@ section.
|
|
|
281
281
|
|
|
282
282
|
## Validations
|
|
283
283
|
|
|
284
|
-
Validation rule sets map field names to rules, mirroring `etlplus.validate.FieldRules`:
|
|
284
|
+
Validation rule sets map field names to rules, mirroring `etlplus.ops.validate.FieldRules`:
|
|
285
285
|
|
|
286
286
|
```yaml
|
|
287
287
|
validations:
|
|
@@ -297,7 +297,7 @@ validations:
|
|
|
297
297
|
|
|
298
298
|
## Transforms
|
|
299
299
|
|
|
300
|
-
Transformation pipelines follow `etlplus.transform` shapes exactly:
|
|
300
|
+
Transformation pipelines follow `etlplus.ops.transform` shapes exactly:
|
|
301
301
|
|
|
302
302
|
```yaml
|
|
303
303
|
transforms:
|
|
@@ -377,9 +377,9 @@ Details:
|
|
|
377
377
|
- Unknown or malformed entries are skipped rather than failing the whole load (keeping pipeline
|
|
378
378
|
authoring permissive).
|
|
379
379
|
- The connector kind is also available as a type-safe literal in code as
|
|
380
|
-
`etlplus.
|
|
380
|
+
`etlplus.workflow.ConnectorType` (values: `"file" | "database" | "api"`).
|
|
381
381
|
|
|
382
|
-
To add new connector kinds in the future, implement a new dataclass in `etlplus.
|
|
382
|
+
To add new connector kinds in the future, implement a new dataclass in `etlplus.workflow.connector`
|
|
383
383
|
and extend the internal parser to handle its `type` value.
|
|
384
384
|
|
|
385
385
|
## Jobs
|
|
@@ -390,10 +390,20 @@ target:
|
|
|
390
390
|
```yaml
|
|
391
391
|
jobs:
|
|
392
392
|
- name: file_to_file_customers
|
|
393
|
+
depends_on: [seed_customers]
|
|
393
394
|
extract: { source: customers_csv }
|
|
394
395
|
validate: { ruleset: customers_basic }
|
|
395
396
|
transform: { pipeline: clean_customers }
|
|
396
397
|
load: { target: customers_json_out }
|
|
398
|
+
- name: seed_customers
|
|
399
|
+
extract: { source: seed_customers_csv }
|
|
400
|
+
load: { target: customers_db_out }
|
|
401
|
+
```
|
|
402
|
+
|
|
403
|
+
Notes:
|
|
404
|
+
|
|
405
|
+
- `depends_on` is optional and can be a string or list of job names.
|
|
406
|
+
- Jobs without dependencies run first when ordered as a DAG.
|
|
397
407
|
|
|
398
408
|
## Running pipelines (CLI and Python)
|
|
399
409
|
|
|
@@ -421,14 +431,14 @@ Notes:
|
|
|
421
431
|
- Environment-variable substitution (e.g. `${GITHUB_TOKEN}`) is applied the same way as when loading
|
|
422
432
|
configs via the Python API.
|
|
423
433
|
- For more details on the orchestration implementation, see
|
|
424
|
-
[Runner internals: etlplus.run](run-module.md).
|
|
434
|
+
[Runner internals: etlplus.ops.run](run-module.md).
|
|
425
435
|
|
|
426
|
-
### Python: `etlplus.run.run`
|
|
436
|
+
### Python: `etlplus.ops.run.run`
|
|
427
437
|
|
|
428
438
|
To trigger a job programmatically, use the high-level runner function exposed by the package:
|
|
429
439
|
|
|
430
440
|
```python
|
|
431
|
-
from etlplus.run import run as run_job
|
|
441
|
+
from etlplus.ops.run import run as run_job
|
|
432
442
|
|
|
433
443
|
result = run_job(
|
|
434
444
|
job="file_to_file_customers",
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# etlplus
|
|
1
|
+
# `etlplus` Package
|
|
2
2
|
|
|
3
3
|
The `etlplus` package provides a unified Python API and CLI for ETL operations: extraction,
|
|
4
4
|
validation, transformation, and loading of data from files, APIs, and databases.
|
|
@@ -13,17 +13,17 @@ Back to project overview: see the top-level [README](../README.md).
|
|
|
13
13
|
|
|
14
14
|
- [etlplus.api](api/README.md): Lightweight HTTP client and paginated REST helpers
|
|
15
15
|
- [etlplus.file](file/README.md): Unified file format support and helpers
|
|
16
|
-
- [etlplus.
|
|
17
|
-
profiles
|
|
18
|
-
- [etlplus.cli](cli/README.md): Command-line interface for ETLPlus workflows
|
|
16
|
+
- [etlplus.cli](cli/README.md): Command-line interface definitions for `etlplus`
|
|
19
17
|
- [etlplus.database](database/README.md): Database engine, schema, and ORM helpers
|
|
20
18
|
- [etlplus.templates](templates/README.md): SQL and DDL template helpers
|
|
21
19
|
- [etlplus.validation](validation/README.md): Data validation utilities and helpers
|
|
20
|
+
- [etlplus.workflow](etlplus/workflow/README.md): Helpers for data connectors, pipelines, jobs, and
|
|
21
|
+
profiles
|
|
22
22
|
|
|
23
23
|
## Quickstart
|
|
24
24
|
|
|
25
25
|
```python
|
|
26
|
-
from etlplus import extract, validate, transform, load
|
|
26
|
+
from etlplus.ops import extract, validate, transform, load
|
|
27
27
|
|
|
28
28
|
data = extract("file", "input.csv")
|
|
29
29
|
filtered = transform(data, {"filter": {"field": "age", "op": "gt", "value": 25}})
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
"""
|
|
2
|
+
:mod:`etlplus` package.
|
|
3
|
+
|
|
4
|
+
Top-level facade for the ETLPlus toolkit.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from .__version__ import __version__
|
|
8
|
+
|
|
9
|
+
__author__ = 'ETLPlus Team'
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
# SECTION: EXPORTS ========================================================== #
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
__all__ = [
|
|
16
|
+
'__author__',
|
|
17
|
+
'__version__',
|
|
18
|
+
]
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# etlplus.api
|
|
1
|
+
# `etlplus.api` Subpackage
|
|
2
2
|
|
|
3
3
|
Documentation for the `etlplus.api` subpackage: a lightweight HTTP client and helpers for paginated
|
|
4
4
|
REST endpoints.
|
|
@@ -12,7 +12,7 @@ REST endpoints.
|
|
|
12
12
|
|
|
13
13
|
Back to project overview: see the top-level [README](../../README.md).
|
|
14
14
|
|
|
15
|
-
- [etlplus.api
|
|
15
|
+
- [`etlplus.api` Subpackage](#etlplusapi-subpackage)
|
|
16
16
|
- [Installation](#installation)
|
|
17
17
|
- [Quickstart](#quickstart)
|
|
18
18
|
- [Overriding Rate Limits Per Call](#overriding-rate-limits-per-call)
|
|
@@ -22,6 +22,7 @@ Back to project overview: see the top-level [README](../../README.md).
|
|
|
22
22
|
- [Authentication](#authentication)
|
|
23
23
|
- [Errors and Rate Limiting](#errors-and-rate-limiting)
|
|
24
24
|
- [Types and Transport](#types-and-transport)
|
|
25
|
+
- [Config Schemas](#config-schemas)
|
|
25
26
|
- [Supporting Modules](#supporting-modules)
|
|
26
27
|
- [Minimal Contract](#minimal-contract)
|
|
27
28
|
- [See also](#see-also)
|
|
@@ -225,6 +226,36 @@ providers can fall back to their own defaults. If you already possess a static t
|
|
|
225
226
|
`etlplus/api/request_manager.py` wraps `requests` sessions plus retry orchestration. Advanced
|
|
226
227
|
users may consult those modules to adapt behavior.
|
|
227
228
|
|
|
229
|
+
## Config Schemas
|
|
230
|
+
|
|
231
|
+
`etlplus.api.types` defines TypedDict-based configuration shapes for API profiles and endpoints.
|
|
232
|
+
Runtime parsing remains permissive in `etlplus.api.config`, but these types improve IDE
|
|
233
|
+
autocomplete and static analysis.
|
|
234
|
+
|
|
235
|
+
Exported types:
|
|
236
|
+
|
|
237
|
+
- `ApiConfigMap`: top-level API config shape
|
|
238
|
+
- `ApiProfileConfigMap`: per-profile API config shape
|
|
239
|
+
- `ApiProfileDefaultsMap`: defaults block within a profile
|
|
240
|
+
- `EndpointMap`: endpoint config shape
|
|
241
|
+
|
|
242
|
+
Example:
|
|
243
|
+
|
|
244
|
+
```python
|
|
245
|
+
from etlplus.api import ApiConfigMap
|
|
246
|
+
|
|
247
|
+
api_cfg: ApiConfigMap = {
|
|
248
|
+
"base_url": "https://example.test",
|
|
249
|
+
"headers": {"Authorization": "Bearer token"},
|
|
250
|
+
"endpoints": {
|
|
251
|
+
"users": {
|
|
252
|
+
"path": "/users",
|
|
253
|
+
"method": "GET",
|
|
254
|
+
},
|
|
255
|
+
},
|
|
256
|
+
}
|
|
257
|
+
```
|
|
258
|
+
|
|
228
259
|
## Supporting Modules
|
|
229
260
|
|
|
230
261
|
- `etlplus.api.types` collects friendly aliases such as `Headers`, `Params`, `Url`, and
|
|
@@ -78,6 +78,7 @@ from .config import ApiConfig
|
|
|
78
78
|
from .config import ApiProfileConfig
|
|
79
79
|
from .config import EndpointConfig
|
|
80
80
|
from .endpoint_client import EndpointClient
|
|
81
|
+
from .enums import HttpMethod
|
|
81
82
|
from .pagination import CursorPaginationConfigMap
|
|
82
83
|
from .pagination import PagePaginationConfigMap
|
|
83
84
|
from .pagination import PaginationClient
|
|
@@ -98,6 +99,10 @@ from .types import Headers
|
|
|
98
99
|
from .types import Params
|
|
99
100
|
from .types import RequestOptions
|
|
100
101
|
from .types import Url
|
|
102
|
+
from .utils import compose_api_request_env
|
|
103
|
+
from .utils import compose_api_target_env
|
|
104
|
+
from .utils import paginate_with_client
|
|
105
|
+
from .utils import resolve_request
|
|
101
106
|
|
|
102
107
|
# SECTION: EXPORTS ========================================================== #
|
|
103
108
|
|
|
@@ -119,9 +124,14 @@ __all__ = [
|
|
|
119
124
|
'RequestOptions',
|
|
120
125
|
'RetryStrategy',
|
|
121
126
|
# Enums
|
|
127
|
+
'HttpMethod',
|
|
122
128
|
'PaginationType',
|
|
123
129
|
# Functions
|
|
124
130
|
'build_http_adapter',
|
|
131
|
+
'compose_api_request_env',
|
|
132
|
+
'compose_api_target_env',
|
|
133
|
+
'paginate_with_client',
|
|
134
|
+
'resolve_request',
|
|
125
135
|
# Type Aliases
|
|
126
136
|
'CursorPaginationConfigMap',
|
|
127
137
|
'Headers',
|