etlplus 0.12.3__tar.gz → 0.13.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {etlplus-0.12.3/etlplus.egg-info → etlplus-0.13.0}/PKG-INFO +150 -1
- {etlplus-0.12.3 → etlplus-0.13.0}/README.md +149 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/docs/pipeline-guide.md +10 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/config/jobs.py +14 -4
- etlplus-0.13.0/etlplus/dag.py +103 -0
- etlplus-0.13.0/etlplus/file/_imports.py +141 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/file/_io.py +1 -0
- etlplus-0.13.0/etlplus/file/accdb.py +78 -0
- etlplus-0.13.0/etlplus/file/arrow.py +78 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/file/avro.py +17 -27
- etlplus-0.13.0/etlplus/file/bson.py +77 -0
- etlplus-0.13.0/etlplus/file/cbor.py +78 -0
- etlplus-0.13.0/etlplus/file/cfg.py +79 -0
- etlplus-0.13.0/etlplus/file/conf.py +80 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/file/core.py +119 -84
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/file/csv.py +13 -1
- etlplus-0.13.0/etlplus/file/dat.py +78 -0
- etlplus-0.13.0/etlplus/file/dta.py +77 -0
- etlplus-0.13.0/etlplus/file/duckdb.py +78 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/file/enums.py +120 -15
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/file/feather.py +14 -2
- etlplus-0.13.0/etlplus/file/fwf.py +77 -0
- etlplus-0.13.0/etlplus/file/hbs.py +78 -0
- etlplus-0.13.0/etlplus/file/hdf5.py +78 -0
- etlplus-0.13.0/etlplus/file/ini.py +79 -0
- etlplus-0.13.0/etlplus/file/ion.py +78 -0
- etlplus-0.13.0/etlplus/file/jinja2.py +78 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/file/json.py +13 -1
- etlplus-0.13.0/etlplus/file/log.py +78 -0
- etlplus-0.13.0/etlplus/file/mat.py +78 -0
- etlplus-0.13.0/etlplus/file/mdb.py +78 -0
- etlplus-0.13.0/etlplus/file/msgpack.py +78 -0
- etlplus-0.13.0/etlplus/file/mustache.py +78 -0
- etlplus-0.13.0/etlplus/file/nc.py +78 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/file/ndjson.py +14 -15
- etlplus-0.13.0/etlplus/file/numbers.py +75 -0
- etlplus-0.13.0/etlplus/file/ods.py +79 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/file/orc.py +14 -2
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/file/parquet.py +14 -2
- etlplus-0.13.0/etlplus/file/pb.py +78 -0
- etlplus-0.13.0/etlplus/file/pbf.py +77 -0
- etlplus-0.13.0/etlplus/file/properties.py +78 -0
- etlplus-0.13.0/etlplus/file/proto.py +77 -0
- etlplus-0.13.0/etlplus/file/psv.py +79 -0
- etlplus-0.13.0/etlplus/file/rda.py +78 -0
- etlplus-0.13.0/etlplus/file/rds.py +78 -0
- etlplus-0.13.0/etlplus/file/sas7bdat.py +78 -0
- etlplus-0.13.0/etlplus/file/sav.py +77 -0
- etlplus-0.13.0/etlplus/file/sqlite.py +78 -0
- etlplus-0.13.0/etlplus/file/stub.py +84 -0
- etlplus-0.13.0/etlplus/file/sylk.py +77 -0
- etlplus-0.13.0/etlplus/file/tab.py +81 -0
- etlplus-0.13.0/etlplus/file/toml.py +78 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/file/tsv.py +14 -1
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/file/txt.py +13 -10
- etlplus-0.13.0/etlplus/file/vm.py +78 -0
- etlplus-0.13.0/etlplus/file/wks.py +77 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/file/xls.py +1 -1
- etlplus-0.13.0/etlplus/file/xlsm.py +79 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/file/xlsx.py +1 -1
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/file/xml.py +12 -1
- etlplus-0.13.0/etlplus/file/xpt.py +78 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/file/yaml.py +15 -44
- etlplus-0.13.0/etlplus/file/zsav.py +77 -0
- {etlplus-0.12.3 → etlplus-0.13.0/etlplus.egg-info}/PKG-INFO +150 -1
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus.egg-info/SOURCES.txt +44 -1
- {etlplus-0.12.3 → etlplus-0.13.0}/tests/unit/database/test_u_database_ddl.py +3 -2
- {etlplus-0.12.3 → etlplus-0.13.0}/tests/unit/file/test_u_file_core.py +132 -76
- {etlplus-0.12.3 → etlplus-0.13.0}/tests/unit/file/test_u_file_enums.py +28 -19
- {etlplus-0.12.3 → etlplus-0.13.0}/tests/unit/file/test_u_file_yaml.py +8 -9
- etlplus-0.12.3/etlplus/file/_pandas.py +0 -58
- {etlplus-0.12.3 → etlplus-0.13.0}/.coveragerc +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/.editorconfig +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/.gitattributes +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/.github/actions/python-bootstrap/action.yml +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/.github/workflows/ci.yml +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/.gitignore +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/.pre-commit-config.yaml +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/.ruff.toml +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/CODE_OF_CONDUCT.md +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/CONTRIBUTING.md +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/DEMO.md +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/LICENSE +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/MANIFEST.in +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/Makefile +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/REFERENCES.md +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/SECURITY.md +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/SUPPORT.md +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/docs/README.md +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/docs/snippets/installation_version.md +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/README.md +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/__init__.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/__main__.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/__version__.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/api/README.md +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/api/__init__.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/api/auth.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/api/config.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/api/endpoint_client.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/api/errors.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/api/pagination/__init__.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/api/pagination/client.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/api/pagination/config.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/api/pagination/paginator.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/api/rate_limiting/__init__.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/api/rate_limiting/config.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/api/rate_limiting/rate_limiter.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/api/request_manager.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/api/retry_manager.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/api/transport.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/api/types.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/cli/README.md +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/cli/__init__.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/cli/commands.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/cli/constants.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/cli/handlers.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/cli/io.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/cli/main.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/cli/options.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/cli/state.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/cli/types.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/config/README.md +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/config/__init__.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/config/connector.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/config/pipeline.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/config/profile.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/config/types.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/config/utils.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/database/README.md +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/database/__init__.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/database/ddl.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/database/engine.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/database/orm.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/database/schema.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/database/types.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/enums.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/extract.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/file/README.md +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/file/__init__.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/file/gz.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/file/zip.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/load.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/mixins.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/py.typed +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/run.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/run_helpers.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/templates/README.md +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/templates/__init__.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/templates/ddl.sql.j2 +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/templates/view.sql.j2 +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/transform.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/types.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/utils.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/validate.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/validation/README.md +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/validation/__init__.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus/validation/utils.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus.egg-info/dependency_links.txt +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus.egg-info/entry_points.txt +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus.egg-info/requires.txt +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/etlplus.egg-info/top_level.txt +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/examples/README.md +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/examples/configs/ddl_spec.yml +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/examples/configs/pipeline.yml +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/examples/data/sample.csv +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/examples/data/sample.json +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/examples/data/sample.xml +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/examples/data/sample.xsd +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/examples/data/sample.yaml +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/examples/quickstart_python.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/pyproject.toml +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/pytest.ini +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/setup.cfg +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/setup.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/tests/__init__.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/tests/conftest.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/tests/integration/conftest.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/tests/integration/test_i_cli.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/tests/integration/test_i_examples_data_parity.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/tests/integration/test_i_pagination_strategy.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/tests/integration/test_i_pipeline_smoke.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/tests/integration/test_i_pipeline_yaml_load.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/tests/integration/test_i_run.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/tests/integration/test_i_run_profile_pagination_defaults.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/tests/integration/test_i_run_profile_rate_limit_defaults.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/tests/unit/api/conftest.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/tests/unit/api/test_u_auth.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/tests/unit/api/test_u_config.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/tests/unit/api/test_u_endpoint_client.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/tests/unit/api/test_u_mocks.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/tests/unit/api/test_u_pagination_client.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/tests/unit/api/test_u_pagination_config.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/tests/unit/api/test_u_paginator.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/tests/unit/api/test_u_rate_limit_config.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/tests/unit/api/test_u_rate_limiter.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/tests/unit/api/test_u_request_manager.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/tests/unit/api/test_u_retry_manager.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/tests/unit/api/test_u_transport.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/tests/unit/api/test_u_types.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/tests/unit/cli/conftest.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/tests/unit/cli/test_u_cli_handlers.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/tests/unit/cli/test_u_cli_io.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/tests/unit/cli/test_u_cli_main.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/tests/unit/cli/test_u_cli_state.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/tests/unit/config/test_u_config_utils.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/tests/unit/config/test_u_connector.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/tests/unit/config/test_u_jobs.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/tests/unit/config/test_u_pipeline.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/tests/unit/conftest.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/tests/unit/database/test_u_database_engine.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/tests/unit/database/test_u_database_orm.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/tests/unit/database/test_u_database_schema.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/tests/unit/test_u_enums.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/tests/unit/test_u_extract.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/tests/unit/test_u_load.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/tests/unit/test_u_main.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/tests/unit/test_u_mixins.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/tests/unit/test_u_run.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/tests/unit/test_u_run_helpers.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/tests/unit/test_u_transform.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/tests/unit/test_u_utils.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/tests/unit/test_u_validate.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/tests/unit/test_u_version.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/tests/unit/validation/test_u_validation_utils.py +0 -0
- {etlplus-0.12.3 → etlplus-0.13.0}/tools/update_demo_snippets.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: etlplus
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.13.0
|
|
4
4
|
Summary: A Swiss Army knife for simple ETL operations
|
|
5
5
|
Home-page: https://github.com/Dagitali/ETLPlus
|
|
6
6
|
Author: ETLPlus Team
|
|
@@ -68,6 +68,21 @@ package and command-line interface for data extraction, validation, transformati
|
|
|
68
68
|
- [Features](#features)
|
|
69
69
|
- [Installation](#installation)
|
|
70
70
|
- [Quickstart](#quickstart)
|
|
71
|
+
- [Data Connectors](#data-connectors)
|
|
72
|
+
- [REST APIs (`api`)](#rest-apis-api)
|
|
73
|
+
- [Databases (`database`)](#databases-database)
|
|
74
|
+
- [Files (`file`)](#files-file)
|
|
75
|
+
- [Stubbed / Placeholder](#stubbed--placeholder)
|
|
76
|
+
- [Tabular \& Delimited Text](#tabular--delimited-text)
|
|
77
|
+
- [Semi-Structured Text](#semi-structured-text)
|
|
78
|
+
- [Columnar / Analytics-Friendly](#columnar--analytics-friendly)
|
|
79
|
+
- [Binary Serialization and Interchange](#binary-serialization-and-interchange)
|
|
80
|
+
- [Databases and Embedded Storage](#databases-and-embedded-storage)
|
|
81
|
+
- [Spreadsheets](#spreadsheets)
|
|
82
|
+
- [Statistical / Scientific / Numeric Computing](#statistical--scientific--numeric-computing)
|
|
83
|
+
- [Logs and Event Streams](#logs-and-event-streams)
|
|
84
|
+
- [Data Archives](#data-archives)
|
|
85
|
+
- [Templates](#templates)
|
|
71
86
|
- [Usage](#usage)
|
|
72
87
|
- [Command Line Interface](#command-line-interface)
|
|
73
88
|
- [Argument Order and Required Options](#argument-order-and-required-options)
|
|
@@ -191,6 +206,140 @@ assert validate(filtered, rules)["valid"]
|
|
|
191
206
|
load(filtered, "file", "temp/sample_output.json", file_format="json")
|
|
192
207
|
```
|
|
193
208
|
|
|
209
|
+
## Data Connectors
|
|
210
|
+
|
|
211
|
+
Data connectors abstract sources from which to extract data and targets to which to load data. They
|
|
212
|
+
are differentiated by their types, each of which is represented in the subsections below.
|
|
213
|
+
|
|
214
|
+
### REST APIs (`api`)
|
|
215
|
+
|
|
216
|
+
ETLPlus can extract from REST APIs and load results via common HTTP methods. Supported operations
|
|
217
|
+
include GET for extract and PATCH/POST/PUT for load.
|
|
218
|
+
|
|
219
|
+
### Databases (`database`)
|
|
220
|
+
|
|
221
|
+
Database connectors use connection strings for extraction and loading, and
|
|
222
|
+
DDL can be rendered from table specs for migrations or schema checks.
|
|
223
|
+
|
|
224
|
+
### Files (`file`)
|
|
225
|
+
|
|
226
|
+
Recognized file formats are listed in the tables below. Support for reading to or writing from a recognized file format is marked as:
|
|
227
|
+
|
|
228
|
+
- **Y**: implemented (may require optional dependencies)
|
|
229
|
+
- **N**: stubbed or not yet implemented
|
|
230
|
+
|
|
231
|
+
#### Stubbed / Placeholder
|
|
232
|
+
|
|
233
|
+
| Format | Read | Write | Description |
|
|
234
|
+
| --- | --- | --- | --- |
|
|
235
|
+
| `stub` | N | Placeholder format for tests and future connectors. |
|
|
236
|
+
|
|
237
|
+
#### Tabular & Delimited Text
|
|
238
|
+
|
|
239
|
+
| Format | Read | Write | Description |
|
|
240
|
+
| --- | --- | --- | --- |
|
|
241
|
+
| `csv` | Y | Y | Comma-Separated Values |
|
|
242
|
+
| `dat` | N | N | Generic data file, often delimited or fixed-width |
|
|
243
|
+
| `fwf` | N | N | Fixed-Width Fields |
|
|
244
|
+
| `psv` | N | N | Pipe-Separated Values |
|
|
245
|
+
| `tab` | N | N | Often synonymous with TSV |
|
|
246
|
+
| `tsv` | Y | Y | Tab-Separated Values |
|
|
247
|
+
| `txt` | Y | Y | Plain text, often delimited or fixed-width |
|
|
248
|
+
|
|
249
|
+
#### Semi-Structured Text
|
|
250
|
+
|
|
251
|
+
| Format | Read | Write | Description |
|
|
252
|
+
| --- | --- | --- | --- |
|
|
253
|
+
| `cfg` | N | N | Config-style key-value pairs |
|
|
254
|
+
| `conf` | N | N | Config-style key-value pairs |
|
|
255
|
+
| `ini` | N | N | Config-style key-value pairs |
|
|
256
|
+
| `json` | Y | Y | JavaScript Object Notation |
|
|
257
|
+
| `ndjson` | Y | Y | Newline-Delimited JSON |
|
|
258
|
+
| `properties` | N | N | Java-style key-value pairs |
|
|
259
|
+
| `toml` | N | N | Tom's Obvious Minimal Language |
|
|
260
|
+
| `xml` | Y | Y | Extensible Markup Language |
|
|
261
|
+
| `yaml` | Y | Y | YAML Ain't Markup Language |
|
|
262
|
+
|
|
263
|
+
#### Columnar / Analytics-Friendly
|
|
264
|
+
|
|
265
|
+
| Format | Read | Write | Description |
|
|
266
|
+
| --- | --- | --- | --- |
|
|
267
|
+
| `arrow` | N | N | Apache Arrow IPC |
|
|
268
|
+
| `feather` | Y | Y | Apache Arrow Feather |
|
|
269
|
+
| `orc` | Y | Y | Optimized Row Columnar; common in Hadoop |
|
|
270
|
+
| `parquet` | Y | Y | Apache Parquet; common in Big Data |
|
|
271
|
+
|
|
272
|
+
#### Binary Serialization and Interchange
|
|
273
|
+
|
|
274
|
+
| Format | Read | Write | Description |
|
|
275
|
+
| --- | --- | --- | --- |
|
|
276
|
+
| `avro` | Y | Y | Apache Avro |
|
|
277
|
+
| `bson` | N | N | Binary JSON; common with MongoDB exports/dumps |
|
|
278
|
+
| `cbor` | N | N | Concise Binary Object Representation |
|
|
279
|
+
| `ion` | N | N | Amazon Ion |
|
|
280
|
+
| `msgpack` | N | N | MessagePack |
|
|
281
|
+
| `pb` | N | N | Protocol Buffers (Google Protobuf) |
|
|
282
|
+
| `pbf` | N | N | Protocolbuffer Binary Format; often for GIS data |
|
|
283
|
+
| `proto` | N | N | Protocol Buffers schema; often in .pb / .bin |
|
|
284
|
+
|
|
285
|
+
#### Databases and Embedded Storage
|
|
286
|
+
|
|
287
|
+
| Format | Read | Write | Description |
|
|
288
|
+
| --- | --- | --- | --- |
|
|
289
|
+
| `accdb` | N | N | Microsoft Access (newer format) |
|
|
290
|
+
| `duckdb` | N | N | DuckDB |
|
|
291
|
+
| `mdb` | N | N | Microsoft Access (older format) |
|
|
292
|
+
| `sqlite` | N | N | SQLite |
|
|
293
|
+
|
|
294
|
+
#### Spreadsheets
|
|
295
|
+
|
|
296
|
+
| Format | Read | Write | Description |
|
|
297
|
+
| --- | --- | --- | --- |
|
|
298
|
+
| `numbers` | N | N | Apple Numbers |
|
|
299
|
+
| `ods` | N | N | OpenDocument |
|
|
300
|
+
| `wks` | N | N | Lotus 1-2-3 |
|
|
301
|
+
| `xls` | Y | Y | Microsoft Excel (BIFF) |
|
|
302
|
+
| `xlsm` | N | N | Microsoft Excel Macro-Enabled (Open XML) |
|
|
303
|
+
| `xlsx` | Y | Y | Microsoft Excel (Open XML) |
|
|
304
|
+
|
|
305
|
+
#### Statistical / Scientific / Numeric Computing
|
|
306
|
+
|
|
307
|
+
| Format | Read | Write | Description |
|
|
308
|
+
| --- | --- | --- | --- |
|
|
309
|
+
| `dta` | N | N | Stata |
|
|
310
|
+
| `hdf5` | N | N | Hierarchical Data Format |
|
|
311
|
+
| `mat` | N | N | MATLAB |
|
|
312
|
+
| `nc` | N | N | NetCDF |
|
|
313
|
+
| `rda` | N | N | RData workspace/object |
|
|
314
|
+
| `rds` | N | N | R data |
|
|
315
|
+
| `sas7bdat` | N | N | SAS data |
|
|
316
|
+
| `sav` | N | N | SPSS data |
|
|
317
|
+
| `sylk` | N | N | Symbolic Link |
|
|
318
|
+
| `xpt` | N | N | SAS Transport |
|
|
319
|
+
| `zsav` | N | N | Compressed SPSS data |
|
|
320
|
+
|
|
321
|
+
#### Logs and Event Streams
|
|
322
|
+
|
|
323
|
+
| Format | Supported | Description |
|
|
324
|
+
| --- | --- | --- |
|
|
325
|
+
| `log` | N | N | Generic log file |
|
|
326
|
+
|
|
327
|
+
#### Data Archives
|
|
328
|
+
|
|
329
|
+
| Format | Read | Write | Description |
|
|
330
|
+
| --- | --- | --- | --- |
|
|
331
|
+
| `gz` | Y | Y | Gzip-compressed file |
|
|
332
|
+
| `zip` | Y | Y | ZIP archive |
|
|
333
|
+
|
|
334
|
+
#### Templates
|
|
335
|
+
|
|
336
|
+
| Format | Read | Write | Description |
|
|
337
|
+
| --- | --- | --- | --- |
|
|
338
|
+
| `hbs` | N | N | Handlebars |
|
|
339
|
+
| `jinja2` | N | N | Jinja2 |
|
|
340
|
+
| `mustache` | N | N | Mustache |
|
|
341
|
+
| `vm` | N | N | Apache Velocity |
|
|
342
|
+
|
|
194
343
|
## Usage
|
|
195
344
|
|
|
196
345
|
### Command Line Interface
|
|
@@ -18,6 +18,21 @@ package and command-line interface for data extraction, validation, transformati
|
|
|
18
18
|
- [Features](#features)
|
|
19
19
|
- [Installation](#installation)
|
|
20
20
|
- [Quickstart](#quickstart)
|
|
21
|
+
- [Data Connectors](#data-connectors)
|
|
22
|
+
- [REST APIs (`api`)](#rest-apis-api)
|
|
23
|
+
- [Databases (`database`)](#databases-database)
|
|
24
|
+
- [Files (`file`)](#files-file)
|
|
25
|
+
- [Stubbed / Placeholder](#stubbed--placeholder)
|
|
26
|
+
- [Tabular \& Delimited Text](#tabular--delimited-text)
|
|
27
|
+
- [Semi-Structured Text](#semi-structured-text)
|
|
28
|
+
- [Columnar / Analytics-Friendly](#columnar--analytics-friendly)
|
|
29
|
+
- [Binary Serialization and Interchange](#binary-serialization-and-interchange)
|
|
30
|
+
- [Databases and Embedded Storage](#databases-and-embedded-storage)
|
|
31
|
+
- [Spreadsheets](#spreadsheets)
|
|
32
|
+
- [Statistical / Scientific / Numeric Computing](#statistical--scientific--numeric-computing)
|
|
33
|
+
- [Logs and Event Streams](#logs-and-event-streams)
|
|
34
|
+
- [Data Archives](#data-archives)
|
|
35
|
+
- [Templates](#templates)
|
|
21
36
|
- [Usage](#usage)
|
|
22
37
|
- [Command Line Interface](#command-line-interface)
|
|
23
38
|
- [Argument Order and Required Options](#argument-order-and-required-options)
|
|
@@ -141,6 +156,140 @@ assert validate(filtered, rules)["valid"]
|
|
|
141
156
|
load(filtered, "file", "temp/sample_output.json", file_format="json")
|
|
142
157
|
```
|
|
143
158
|
|
|
159
|
+
## Data Connectors
|
|
160
|
+
|
|
161
|
+
Data connectors abstract sources from which to extract data and targets to which to load data. They
|
|
162
|
+
are differentiated by their types, each of which is represented in the subsections below.
|
|
163
|
+
|
|
164
|
+
### REST APIs (`api`)
|
|
165
|
+
|
|
166
|
+
ETLPlus can extract from REST APIs and load results via common HTTP methods. Supported operations
|
|
167
|
+
include GET for extract and PATCH/POST/PUT for load.
|
|
168
|
+
|
|
169
|
+
### Databases (`database`)
|
|
170
|
+
|
|
171
|
+
Database connectors use connection strings for extraction and loading, and
|
|
172
|
+
DDL can be rendered from table specs for migrations or schema checks.
|
|
173
|
+
|
|
174
|
+
### Files (`file`)
|
|
175
|
+
|
|
176
|
+
Recognized file formats are listed in the tables below. Support for reading to or writing from a recognized file format is marked as:
|
|
177
|
+
|
|
178
|
+
- **Y**: implemented (may require optional dependencies)
|
|
179
|
+
- **N**: stubbed or not yet implemented
|
|
180
|
+
|
|
181
|
+
#### Stubbed / Placeholder
|
|
182
|
+
|
|
183
|
+
| Format | Read | Write | Description |
|
|
184
|
+
| --- | --- | --- | --- |
|
|
185
|
+
| `stub` | N | Placeholder format for tests and future connectors. |
|
|
186
|
+
|
|
187
|
+
#### Tabular & Delimited Text
|
|
188
|
+
|
|
189
|
+
| Format | Read | Write | Description |
|
|
190
|
+
| --- | --- | --- | --- |
|
|
191
|
+
| `csv` | Y | Y | Comma-Separated Values |
|
|
192
|
+
| `dat` | N | N | Generic data file, often delimited or fixed-width |
|
|
193
|
+
| `fwf` | N | N | Fixed-Width Fields |
|
|
194
|
+
| `psv` | N | N | Pipe-Separated Values |
|
|
195
|
+
| `tab` | N | N | Often synonymous with TSV |
|
|
196
|
+
| `tsv` | Y | Y | Tab-Separated Values |
|
|
197
|
+
| `txt` | Y | Y | Plain text, often delimited or fixed-width |
|
|
198
|
+
|
|
199
|
+
#### Semi-Structured Text
|
|
200
|
+
|
|
201
|
+
| Format | Read | Write | Description |
|
|
202
|
+
| --- | --- | --- | --- |
|
|
203
|
+
| `cfg` | N | N | Config-style key-value pairs |
|
|
204
|
+
| `conf` | N | N | Config-style key-value pairs |
|
|
205
|
+
| `ini` | N | N | Config-style key-value pairs |
|
|
206
|
+
| `json` | Y | Y | JavaScript Object Notation |
|
|
207
|
+
| `ndjson` | Y | Y | Newline-Delimited JSON |
|
|
208
|
+
| `properties` | N | N | Java-style key-value pairs |
|
|
209
|
+
| `toml` | N | N | Tom's Obvious Minimal Language |
|
|
210
|
+
| `xml` | Y | Y | Extensible Markup Language |
|
|
211
|
+
| `yaml` | Y | Y | YAML Ain't Markup Language |
|
|
212
|
+
|
|
213
|
+
#### Columnar / Analytics-Friendly
|
|
214
|
+
|
|
215
|
+
| Format | Read | Write | Description |
|
|
216
|
+
| --- | --- | --- | --- |
|
|
217
|
+
| `arrow` | N | N | Apache Arrow IPC |
|
|
218
|
+
| `feather` | Y | Y | Apache Arrow Feather |
|
|
219
|
+
| `orc` | Y | Y | Optimized Row Columnar; common in Hadoop |
|
|
220
|
+
| `parquet` | Y | Y | Apache Parquet; common in Big Data |
|
|
221
|
+
|
|
222
|
+
#### Binary Serialization and Interchange
|
|
223
|
+
|
|
224
|
+
| Format | Read | Write | Description |
|
|
225
|
+
| --- | --- | --- | --- |
|
|
226
|
+
| `avro` | Y | Y | Apache Avro |
|
|
227
|
+
| `bson` | N | N | Binary JSON; common with MongoDB exports/dumps |
|
|
228
|
+
| `cbor` | N | N | Concise Binary Object Representation |
|
|
229
|
+
| `ion` | N | N | Amazon Ion |
|
|
230
|
+
| `msgpack` | N | N | MessagePack |
|
|
231
|
+
| `pb` | N | N | Protocol Buffers (Google Protobuf) |
|
|
232
|
+
| `pbf` | N | N | Protocolbuffer Binary Format; often for GIS data |
|
|
233
|
+
| `proto` | N | N | Protocol Buffers schema; often in .pb / .bin |
|
|
234
|
+
|
|
235
|
+
#### Databases and Embedded Storage
|
|
236
|
+
|
|
237
|
+
| Format | Read | Write | Description |
|
|
238
|
+
| --- | --- | --- | --- |
|
|
239
|
+
| `accdb` | N | N | Microsoft Access (newer format) |
|
|
240
|
+
| `duckdb` | N | N | DuckDB |
|
|
241
|
+
| `mdb` | N | N | Microsoft Access (older format) |
|
|
242
|
+
| `sqlite` | N | N | SQLite |
|
|
243
|
+
|
|
244
|
+
#### Spreadsheets
|
|
245
|
+
|
|
246
|
+
| Format | Read | Write | Description |
|
|
247
|
+
| --- | --- | --- | --- |
|
|
248
|
+
| `numbers` | N | N | Apple Numbers |
|
|
249
|
+
| `ods` | N | N | OpenDocument |
|
|
250
|
+
| `wks` | N | N | Lotus 1-2-3 |
|
|
251
|
+
| `xls` | Y | Y | Microsoft Excel (BIFF) |
|
|
252
|
+
| `xlsm` | N | N | Microsoft Excel Macro-Enabled (Open XML) |
|
|
253
|
+
| `xlsx` | Y | Y | Microsoft Excel (Open XML) |
|
|
254
|
+
|
|
255
|
+
#### Statistical / Scientific / Numeric Computing
|
|
256
|
+
|
|
257
|
+
| Format | Read | Write | Description |
|
|
258
|
+
| --- | --- | --- | --- |
|
|
259
|
+
| `dta` | N | N | Stata |
|
|
260
|
+
| `hdf5` | N | N | Hierarchical Data Format |
|
|
261
|
+
| `mat` | N | N | MATLAB |
|
|
262
|
+
| `nc` | N | N | NetCDF |
|
|
263
|
+
| `rda` | N | N | RData workspace/object |
|
|
264
|
+
| `rds` | N | N | R data |
|
|
265
|
+
| `sas7bdat` | N | N | SAS data |
|
|
266
|
+
| `sav` | N | N | SPSS data |
|
|
267
|
+
| `sylk` | N | N | Symbolic Link |
|
|
268
|
+
| `xpt` | N | N | SAS Transport |
|
|
269
|
+
| `zsav` | N | N | Compressed SPSS data |
|
|
270
|
+
|
|
271
|
+
#### Logs and Event Streams
|
|
272
|
+
|
|
273
|
+
| Format | Supported | Description |
|
|
274
|
+
| --- | --- | --- |
|
|
275
|
+
| `log` | N | N | Generic log file |
|
|
276
|
+
|
|
277
|
+
#### Data Archives
|
|
278
|
+
|
|
279
|
+
| Format | Read | Write | Description |
|
|
280
|
+
| --- | --- | --- | --- |
|
|
281
|
+
| `gz` | Y | Y | Gzip-compressed file |
|
|
282
|
+
| `zip` | Y | Y | ZIP archive |
|
|
283
|
+
|
|
284
|
+
#### Templates
|
|
285
|
+
|
|
286
|
+
| Format | Read | Write | Description |
|
|
287
|
+
| --- | --- | --- | --- |
|
|
288
|
+
| `hbs` | N | N | Handlebars |
|
|
289
|
+
| `jinja2` | N | N | Jinja2 |
|
|
290
|
+
| `mustache` | N | N | Mustache |
|
|
291
|
+
| `vm` | N | N | Apache Velocity |
|
|
292
|
+
|
|
144
293
|
## Usage
|
|
145
294
|
|
|
146
295
|
### Command Line Interface
|
|
@@ -390,10 +390,20 @@ target:
|
|
|
390
390
|
```yaml
|
|
391
391
|
jobs:
|
|
392
392
|
- name: file_to_file_customers
|
|
393
|
+
depends_on: [seed_customers]
|
|
393
394
|
extract: { source: customers_csv }
|
|
394
395
|
validate: { ruleset: customers_basic }
|
|
395
396
|
transform: { pipeline: clean_customers }
|
|
396
397
|
load: { target: customers_json_out }
|
|
398
|
+
- name: seed_customers
|
|
399
|
+
extract: { source: seed_customers_csv }
|
|
400
|
+
load: { target: customers_db_out }
|
|
401
|
+
```
|
|
402
|
+
|
|
403
|
+
Notes:
|
|
404
|
+
|
|
405
|
+
- `depends_on` is optional and can be a string or list of job names.
|
|
406
|
+
- Jobs without dependencies run first when ordered as a DAG.
|
|
397
407
|
|
|
398
408
|
## Running pipelines (CLI and Python)
|
|
399
409
|
|
|
@@ -34,10 +34,7 @@ __all__ = [
|
|
|
34
34
|
]
|
|
35
35
|
|
|
36
36
|
|
|
37
|
-
# SECTION:
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
# SECTION: CLASSES ========================================================== #
|
|
37
|
+
# SECTION: DATA CLASSES ===================================================== #
|
|
41
38
|
|
|
42
39
|
|
|
43
40
|
@dataclass(kw_only=True, slots=True)
|
|
@@ -100,6 +97,8 @@ class JobConfig:
|
|
|
100
97
|
Unique job name.
|
|
101
98
|
description : str | None
|
|
102
99
|
Optional human-friendly description.
|
|
100
|
+
depends_on : list[str]
|
|
101
|
+
Optional job dependency list. Dependencies must refer to other jobs.
|
|
103
102
|
extract : ExtractRef | None
|
|
104
103
|
Extraction reference.
|
|
105
104
|
validate : ValidationRef | None
|
|
@@ -114,6 +113,7 @@ class JobConfig:
|
|
|
114
113
|
|
|
115
114
|
name: str
|
|
116
115
|
description: str | None = None
|
|
116
|
+
depends_on: list[str] = field(default_factory=list)
|
|
117
117
|
extract: ExtractRef | None = None
|
|
118
118
|
validate: ValidationRef | None = None
|
|
119
119
|
transform: TransformRef | None = None
|
|
@@ -149,9 +149,19 @@ class JobConfig:
|
|
|
149
149
|
if description is not None and not isinstance(description, str):
|
|
150
150
|
description = str(description)
|
|
151
151
|
|
|
152
|
+
depends_raw = data.get('depends_on')
|
|
153
|
+
depends_on: list[str] = []
|
|
154
|
+
if isinstance(depends_raw, str):
|
|
155
|
+
depends_on = [depends_raw]
|
|
156
|
+
elif isinstance(depends_raw, list):
|
|
157
|
+
for entry in depends_raw:
|
|
158
|
+
if isinstance(entry, str):
|
|
159
|
+
depends_on.append(entry)
|
|
160
|
+
|
|
152
161
|
return cls(
|
|
153
162
|
name=name,
|
|
154
163
|
description=description,
|
|
164
|
+
depends_on=depends_on,
|
|
155
165
|
extract=ExtractRef.from_obj(data.get('extract')),
|
|
156
166
|
validate=ValidationRef.from_obj(data.get('validate')),
|
|
157
167
|
transform=TransformRef.from_obj(data.get('transform')),
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
"""
|
|
2
|
+
:mod:`etlplus.dag` module.
|
|
3
|
+
|
|
4
|
+
Lightweight directed acyclic graph (DAG) helpers for ordering jobs based on
|
|
5
|
+
``depends_on``.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from collections import deque
|
|
11
|
+
from dataclasses import dataclass
|
|
12
|
+
|
|
13
|
+
from .config.jobs import JobConfig
|
|
14
|
+
|
|
15
|
+
# SECTION: EXPORTS ========================================================== #
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
__all__ = [
|
|
19
|
+
'DagError',
|
|
20
|
+
'topological_sort_jobs',
|
|
21
|
+
]
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
# SECTION: ERRORS =========================================================== #
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass(slots=True)
|
|
28
|
+
class DagError(ValueError):
|
|
29
|
+
"""
|
|
30
|
+
Raised when the job dependency graph is invalid.
|
|
31
|
+
|
|
32
|
+
Attributes
|
|
33
|
+
----------
|
|
34
|
+
message : str
|
|
35
|
+
Error message.
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
# -- Attributes -- #
|
|
39
|
+
|
|
40
|
+
message: str
|
|
41
|
+
|
|
42
|
+
# -- Magic Methods (Object Representation) -- #
|
|
43
|
+
|
|
44
|
+
def __str__(self) -> str:
|
|
45
|
+
return self.message
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
# SECTION: FUNCTIONS ======================================================== #
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def topological_sort_jobs(
|
|
52
|
+
jobs: list[JobConfig],
|
|
53
|
+
) -> list[JobConfig]:
|
|
54
|
+
"""
|
|
55
|
+
Return jobs in topological order based on ``depends_on``.
|
|
56
|
+
|
|
57
|
+
Parameters
|
|
58
|
+
----------
|
|
59
|
+
jobs : list[JobConfig]
|
|
60
|
+
List of job configurations to sort.
|
|
61
|
+
|
|
62
|
+
Returns
|
|
63
|
+
-------
|
|
64
|
+
list[JobConfig]
|
|
65
|
+
Jobs sorted in topological order.
|
|
66
|
+
|
|
67
|
+
Raises
|
|
68
|
+
------
|
|
69
|
+
DagError
|
|
70
|
+
If a dependency is missing, self-referential, or when a cycle is
|
|
71
|
+
detected.
|
|
72
|
+
"""
|
|
73
|
+
index = {job.name: job for job in jobs}
|
|
74
|
+
edges: dict[str, set[str]] = {name: set() for name in index}
|
|
75
|
+
indegree: dict[str, int] = {name: 0 for name in index}
|
|
76
|
+
|
|
77
|
+
for job in jobs:
|
|
78
|
+
for dep in job.depends_on:
|
|
79
|
+
if dep not in index:
|
|
80
|
+
raise DagError(
|
|
81
|
+
f'Unknown dependency "{dep}" in job "{job.name}"',
|
|
82
|
+
)
|
|
83
|
+
if dep == job.name:
|
|
84
|
+
raise DagError(f'Job "{job.name}" depends on itself')
|
|
85
|
+
if job.name not in edges[dep]:
|
|
86
|
+
edges[dep].add(job.name)
|
|
87
|
+
indegree[job.name] += 1
|
|
88
|
+
|
|
89
|
+
queue = deque(sorted(name for name, deg in indegree.items() if deg == 0))
|
|
90
|
+
ordered: list[str] = []
|
|
91
|
+
|
|
92
|
+
while queue:
|
|
93
|
+
name = queue.popleft()
|
|
94
|
+
ordered.append(name)
|
|
95
|
+
for child in sorted(edges[name]):
|
|
96
|
+
indegree[child] -= 1
|
|
97
|
+
if indegree[child] == 0:
|
|
98
|
+
queue.append(child)
|
|
99
|
+
|
|
100
|
+
if len(ordered) != len(jobs):
|
|
101
|
+
raise DagError('Dependency cycle detected')
|
|
102
|
+
|
|
103
|
+
return [index[name] for name in ordered]
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
"""
|
|
2
|
+
:mod:`etlplus.file._imports` module.
|
|
3
|
+
|
|
4
|
+
Shared helpers for optional dependency imports.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from importlib import import_module
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
# SECTION: INTERNAL CONSTANTS =============================================== #
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
_MODULE_CACHE: dict[str, Any] = {}
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
# SECTION: INTERNAL FUNCTIONS =============================================== #
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _error_message(
|
|
22
|
+
module_name: str,
|
|
23
|
+
format_name: str,
|
|
24
|
+
) -> str:
|
|
25
|
+
"""
|
|
26
|
+
Build an import error message for an optional dependency.
|
|
27
|
+
|
|
28
|
+
Parameters
|
|
29
|
+
----------
|
|
30
|
+
module_name : str
|
|
31
|
+
Module name to look up.
|
|
32
|
+
format_name : str
|
|
33
|
+
Human-readable format name for templated messages.
|
|
34
|
+
|
|
35
|
+
Returns
|
|
36
|
+
-------
|
|
37
|
+
str
|
|
38
|
+
Formatted error message.
|
|
39
|
+
"""
|
|
40
|
+
return (
|
|
41
|
+
f'{format_name} support requires '
|
|
42
|
+
f'optional dependency "{module_name}".\n'
|
|
43
|
+
f'Install with: pip install {module_name}'
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
# SECTION: FUNCTIONS ======================================================== #
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def get_optional_module(
|
|
51
|
+
module_name: str,
|
|
52
|
+
*,
|
|
53
|
+
error_message: str,
|
|
54
|
+
) -> Any:
|
|
55
|
+
"""
|
|
56
|
+
Return an optional dependency module, caching on first import.
|
|
57
|
+
|
|
58
|
+
Parameters
|
|
59
|
+
----------
|
|
60
|
+
module_name : str
|
|
61
|
+
Name of the module to import.
|
|
62
|
+
error_message : str
|
|
63
|
+
Error message to surface when the module is missing.
|
|
64
|
+
|
|
65
|
+
Returns
|
|
66
|
+
-------
|
|
67
|
+
Any
|
|
68
|
+
The imported module.
|
|
69
|
+
|
|
70
|
+
Raises
|
|
71
|
+
------
|
|
72
|
+
ImportError
|
|
73
|
+
If the optional dependency is missing.
|
|
74
|
+
"""
|
|
75
|
+
cached = _MODULE_CACHE.get(module_name)
|
|
76
|
+
if cached is not None: # pragma: no cover - tiny branch
|
|
77
|
+
return cached
|
|
78
|
+
try:
|
|
79
|
+
module = import_module(module_name)
|
|
80
|
+
except ImportError as e: # pragma: no cover
|
|
81
|
+
raise ImportError(error_message) from e
|
|
82
|
+
_MODULE_CACHE[module_name] = module
|
|
83
|
+
return module
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def get_fastavro() -> Any:
|
|
87
|
+
"""
|
|
88
|
+
Return the fastavro module, importing it on first use.
|
|
89
|
+
|
|
90
|
+
Raises an informative ImportError if the optional dependency is missing.
|
|
91
|
+
|
|
92
|
+
Notes
|
|
93
|
+
-----
|
|
94
|
+
Prefer :func:`get_optional_module` for new call sites.
|
|
95
|
+
"""
|
|
96
|
+
return get_optional_module(
|
|
97
|
+
'fastavro',
|
|
98
|
+
error_message=_error_message('fastavro', format_name='AVRO'),
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def get_pandas(
|
|
103
|
+
format_name: str,
|
|
104
|
+
) -> Any:
|
|
105
|
+
"""
|
|
106
|
+
Return the pandas module, importing it on first use.
|
|
107
|
+
|
|
108
|
+
Parameters
|
|
109
|
+
----------
|
|
110
|
+
format_name : str
|
|
111
|
+
Human-readable format name for error messages.
|
|
112
|
+
|
|
113
|
+
Returns
|
|
114
|
+
-------
|
|
115
|
+
Any
|
|
116
|
+
The pandas module.
|
|
117
|
+
|
|
118
|
+
Notes
|
|
119
|
+
-----
|
|
120
|
+
Prefer :func:`get_optional_module` for new call sites.
|
|
121
|
+
"""
|
|
122
|
+
return get_optional_module(
|
|
123
|
+
'pandas',
|
|
124
|
+
error_message=_error_message('pandas', format_name=format_name),
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def get_yaml() -> Any:
|
|
129
|
+
"""
|
|
130
|
+
Return the PyYAML module, importing it on first use.
|
|
131
|
+
|
|
132
|
+
Raises an informative ImportError if the optional dependency is missing.
|
|
133
|
+
|
|
134
|
+
Notes
|
|
135
|
+
-----
|
|
136
|
+
Prefer :func:`get_optional_module` for new call sites.
|
|
137
|
+
"""
|
|
138
|
+
return get_optional_module(
|
|
139
|
+
'yaml',
|
|
140
|
+
error_message=_error_message('PyYAML', format_name='YAML'),
|
|
141
|
+
)
|
|
@@ -107,6 +107,7 @@ def write_delimited(path: Path, data: JSONData, *, delimiter: str) -> int:
|
|
|
107
107
|
return 0
|
|
108
108
|
|
|
109
109
|
fieldnames = sorted({key for row in rows for key in row})
|
|
110
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
110
111
|
with path.open('w', encoding='utf-8', newline='') as handle:
|
|
111
112
|
writer = csv.DictWriter(
|
|
112
113
|
handle,
|