etlplus 0.12.9__tar.gz → 0.13.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {etlplus-0.12.9/etlplus.egg-info → etlplus-0.13.0}/PKG-INFO +94 -33
- {etlplus-0.12.9 → etlplus-0.13.0}/README.md +93 -32
- {etlplus-0.12.9 → etlplus-0.13.0}/docs/pipeline-guide.md +10 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/config/jobs.py +14 -4
- etlplus-0.13.0/etlplus/dag.py +103 -0
- etlplus-0.13.0/etlplus/file/accdb.py +78 -0
- etlplus-0.13.0/etlplus/file/arrow.py +78 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/file/avro.py +13 -1
- etlplus-0.13.0/etlplus/file/bson.py +77 -0
- etlplus-0.13.0/etlplus/file/cbor.py +78 -0
- etlplus-0.13.0/etlplus/file/cfg.py +79 -0
- etlplus-0.13.0/etlplus/file/conf.py +80 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/file/csv.py +13 -1
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/file/dat.py +13 -1
- etlplus-0.13.0/etlplus/file/dta.py +77 -0
- etlplus-0.13.0/etlplus/file/duckdb.py +78 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/file/enums.py +11 -5
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/file/feather.py +13 -1
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/file/fwf.py +12 -1
- etlplus-0.13.0/etlplus/file/hbs.py +78 -0
- etlplus-0.13.0/etlplus/file/hdf5.py +78 -0
- etlplus-0.13.0/etlplus/file/ini.py +79 -0
- etlplus-0.13.0/etlplus/file/ion.py +78 -0
- etlplus-0.13.0/etlplus/file/jinja2.py +78 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/file/json.py +13 -1
- etlplus-0.13.0/etlplus/file/log.py +78 -0
- etlplus-0.13.0/etlplus/file/mat.py +78 -0
- etlplus-0.13.0/etlplus/file/mdb.py +78 -0
- etlplus-0.13.0/etlplus/file/msgpack.py +78 -0
- etlplus-0.13.0/etlplus/file/mustache.py +78 -0
- etlplus-0.13.0/etlplus/file/nc.py +78 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/file/ndjson.py +12 -6
- etlplus-0.13.0/etlplus/file/numbers.py +75 -0
- etlplus-0.13.0/etlplus/file/ods.py +79 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/file/orc.py +13 -1
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/file/parquet.py +13 -1
- etlplus-0.13.0/etlplus/file/pb.py +78 -0
- etlplus-0.13.0/etlplus/file/pbf.py +77 -0
- etlplus-0.13.0/etlplus/file/properties.py +78 -0
- etlplus-0.13.0/etlplus/file/proto.py +77 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/file/psv.py +14 -1
- etlplus-0.13.0/etlplus/file/rda.py +78 -0
- etlplus-0.13.0/etlplus/file/rds.py +78 -0
- etlplus-0.13.0/etlplus/file/sas7bdat.py +78 -0
- etlplus-0.13.0/etlplus/file/sav.py +77 -0
- etlplus-0.13.0/etlplus/file/sqlite.py +78 -0
- etlplus-0.13.0/etlplus/file/sylk.py +77 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/file/tab.py +3 -4
- etlplus-0.13.0/etlplus/file/toml.py +78 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/file/tsv.py +14 -1
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/file/txt.py +11 -1
- etlplus-0.13.0/etlplus/file/vm.py +78 -0
- etlplus-0.13.0/etlplus/file/wks.py +77 -0
- etlplus-0.13.0/etlplus/file/xlsm.py +79 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/file/xml.py +12 -1
- etlplus-0.13.0/etlplus/file/xpt.py +78 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/file/yaml.py +12 -1
- etlplus-0.13.0/etlplus/file/zsav.py +77 -0
- {etlplus-0.12.9 → etlplus-0.13.0/etlplus.egg-info}/PKG-INFO +94 -33
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus.egg-info/SOURCES.txt +38 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/file/test_u_file_core.py +38 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/.coveragerc +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/.editorconfig +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/.gitattributes +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/.github/actions/python-bootstrap/action.yml +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/.github/workflows/ci.yml +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/.gitignore +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/.pre-commit-config.yaml +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/.ruff.toml +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/CODE_OF_CONDUCT.md +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/CONTRIBUTING.md +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/DEMO.md +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/LICENSE +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/MANIFEST.in +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/Makefile +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/REFERENCES.md +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/SECURITY.md +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/SUPPORT.md +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/docs/README.md +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/docs/snippets/installation_version.md +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/README.md +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/__init__.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/__main__.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/__version__.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/api/README.md +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/api/__init__.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/api/auth.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/api/config.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/api/endpoint_client.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/api/errors.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/api/pagination/__init__.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/api/pagination/client.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/api/pagination/config.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/api/pagination/paginator.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/api/rate_limiting/__init__.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/api/rate_limiting/config.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/api/rate_limiting/rate_limiter.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/api/request_manager.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/api/retry_manager.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/api/transport.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/api/types.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/cli/README.md +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/cli/__init__.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/cli/commands.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/cli/constants.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/cli/handlers.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/cli/io.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/cli/main.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/cli/options.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/cli/state.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/cli/types.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/config/README.md +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/config/__init__.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/config/connector.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/config/pipeline.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/config/profile.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/config/types.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/config/utils.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/database/README.md +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/database/__init__.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/database/ddl.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/database/engine.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/database/orm.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/database/schema.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/database/types.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/enums.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/extract.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/file/README.md +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/file/__init__.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/file/_imports.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/file/_io.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/file/core.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/file/gz.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/file/stub.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/file/xls.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/file/xlsx.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/file/zip.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/load.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/mixins.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/py.typed +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/run.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/run_helpers.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/templates/README.md +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/templates/__init__.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/templates/ddl.sql.j2 +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/templates/view.sql.j2 +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/transform.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/types.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/utils.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/validate.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/validation/README.md +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/validation/__init__.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus/validation/utils.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus.egg-info/dependency_links.txt +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus.egg-info/entry_points.txt +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus.egg-info/requires.txt +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/etlplus.egg-info/top_level.txt +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/examples/README.md +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/examples/configs/ddl_spec.yml +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/examples/configs/pipeline.yml +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/examples/data/sample.csv +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/examples/data/sample.json +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/examples/data/sample.xml +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/examples/data/sample.xsd +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/examples/data/sample.yaml +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/examples/quickstart_python.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/pyproject.toml +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/pytest.ini +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/setup.cfg +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/setup.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/tests/__init__.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/tests/conftest.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/tests/integration/conftest.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/tests/integration/test_i_cli.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/tests/integration/test_i_examples_data_parity.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/tests/integration/test_i_pagination_strategy.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/tests/integration/test_i_pipeline_smoke.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/tests/integration/test_i_pipeline_yaml_load.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/tests/integration/test_i_run.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/tests/integration/test_i_run_profile_pagination_defaults.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/tests/integration/test_i_run_profile_rate_limit_defaults.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/api/conftest.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/api/test_u_auth.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/api/test_u_config.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/api/test_u_endpoint_client.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/api/test_u_mocks.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/api/test_u_pagination_client.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/api/test_u_pagination_config.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/api/test_u_paginator.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/api/test_u_rate_limit_config.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/api/test_u_rate_limiter.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/api/test_u_request_manager.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/api/test_u_retry_manager.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/api/test_u_transport.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/api/test_u_types.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/cli/conftest.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/cli/test_u_cli_handlers.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/cli/test_u_cli_io.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/cli/test_u_cli_main.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/cli/test_u_cli_state.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/config/test_u_config_utils.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/config/test_u_connector.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/config/test_u_jobs.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/config/test_u_pipeline.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/conftest.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/database/test_u_database_ddl.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/database/test_u_database_engine.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/database/test_u_database_orm.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/database/test_u_database_schema.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/file/test_u_file_enums.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/file/test_u_file_yaml.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/test_u_enums.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/test_u_extract.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/test_u_load.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/test_u_main.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/test_u_mixins.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/test_u_run.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/test_u_run_helpers.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/test_u_transform.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/test_u_utils.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/test_u_validate.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/test_u_version.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/tests/unit/validation/test_u_validation_utils.py +0 -0
- {etlplus-0.12.9 → etlplus-0.13.0}/tools/update_demo_snippets.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: etlplus
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.13.0
|
|
4
4
|
Summary: A Swiss Army knife for simple ETL operations
|
|
5
5
|
Home-page: https://github.com/Dagitali/ETLPlus
|
|
6
6
|
Author: ETLPlus Team
|
|
@@ -77,8 +77,12 @@ package and command-line interface for data extraction, validation, transformati
|
|
|
77
77
|
- [Semi-Structured Text](#semi-structured-text)
|
|
78
78
|
- [Columnar / Analytics-Friendly](#columnar--analytics-friendly)
|
|
79
79
|
- [Binary Serialization and Interchange](#binary-serialization-and-interchange)
|
|
80
|
+
- [Databases and Embedded Storage](#databases-and-embedded-storage)
|
|
80
81
|
- [Spreadsheets](#spreadsheets)
|
|
82
|
+
- [Statistical / Scientific / Numeric Computing](#statistical--scientific--numeric-computing)
|
|
83
|
+
- [Logs and Event Streams](#logs-and-event-streams)
|
|
81
84
|
- [Data Archives](#data-archives)
|
|
85
|
+
- [Templates](#templates)
|
|
82
86
|
- [Usage](#usage)
|
|
83
87
|
- [Command Line Interface](#command-line-interface)
|
|
84
88
|
- [Argument Order and Required Options](#argument-order-and-required-options)
|
|
@@ -219,65 +223,122 @@ DDL can be rendered from table specs for migrations or schema checks.
|
|
|
219
223
|
|
|
220
224
|
### Files (`file`)
|
|
221
225
|
|
|
222
|
-
|
|
226
|
+
Recognized file formats are listed in the tables below. Support for reading to or writing from a recognized file format is marked as:
|
|
223
227
|
|
|
224
228
|
- **Y**: implemented (may require optional dependencies)
|
|
225
229
|
- **N**: stubbed or not yet implemented
|
|
226
230
|
|
|
227
231
|
#### Stubbed / Placeholder
|
|
228
232
|
|
|
229
|
-
| Format |
|
|
230
|
-
| --- | --- | --- |
|
|
233
|
+
| Format | Read | Write | Description |
|
|
234
|
+
| --- | --- | --- | --- |
|
|
231
235
|
| `stub` | N | Placeholder format for tests and future connectors. |
|
|
232
236
|
|
|
233
237
|
#### Tabular & Delimited Text
|
|
234
238
|
|
|
235
|
-
| Format |
|
|
236
|
-
| --- | --- | --- |
|
|
237
|
-
| `csv` | Y | Comma-Separated Values |
|
|
238
|
-
| `
|
|
239
|
-
| `
|
|
240
|
-
| `psv` | N | Pipe-Separated Values |
|
|
241
|
-
| `tab` | N | Often synonymous with TSV |
|
|
242
|
-
| `tsv` | Y | Tab-Separated Values |
|
|
243
|
-
| `txt` | Y | Plain text, often delimited or fixed-width |
|
|
239
|
+
| Format | Read | Write | Description |
|
|
240
|
+
| --- | --- | --- | --- |
|
|
241
|
+
| `csv` | Y | Y | Comma-Separated Values |
|
|
242
|
+
| `dat` | N | N | Generic data file, often delimited or fixed-width |
|
|
243
|
+
| `fwf` | N | N | Fixed-Width Fields |
|
|
244
|
+
| `psv` | N | N | Pipe-Separated Values |
|
|
245
|
+
| `tab` | N | N | Often synonymous with TSV |
|
|
246
|
+
| `tsv` | Y | Y | Tab-Separated Values |
|
|
247
|
+
| `txt` | Y | Y | Plain text, often delimited or fixed-width |
|
|
244
248
|
|
|
245
249
|
#### Semi-Structured Text
|
|
246
250
|
|
|
247
|
-
| Format |
|
|
248
|
-
| --- | --- | --- |
|
|
249
|
-
| `
|
|
250
|
-
| `
|
|
251
|
-
| `
|
|
252
|
-
| `
|
|
251
|
+
| Format | Read | Write | Description |
|
|
252
|
+
| --- | --- | --- | --- |
|
|
253
|
+
| `cfg` | N | N | Config-style key-value pairs |
|
|
254
|
+
| `conf` | N | N | Config-style key-value pairs |
|
|
255
|
+
| `ini` | N | N | Config-style key-value pairs |
|
|
256
|
+
| `json` | Y | Y | JavaScript Object Notation |
|
|
257
|
+
| `ndjson` | Y | Y | Newline-Delimited JSON |
|
|
258
|
+
| `properties` | N | N | Java-style key-value pairs |
|
|
259
|
+
| `toml` | N | N | Tom's Obvious Minimal Language |
|
|
260
|
+
| `xml` | Y | Y | Extensible Markup Language |
|
|
261
|
+
| `yaml` | Y | Y | YAML Ain't Markup Language |
|
|
253
262
|
|
|
254
263
|
#### Columnar / Analytics-Friendly
|
|
255
264
|
|
|
256
|
-
| Format |
|
|
257
|
-
| --- | --- | --- |
|
|
258
|
-
| `
|
|
259
|
-
| `
|
|
260
|
-
| `
|
|
265
|
+
| Format | Read | Write | Description |
|
|
266
|
+
| --- | --- | --- | --- |
|
|
267
|
+
| `arrow` | N | N | Apache Arrow IPC |
|
|
268
|
+
| `feather` | Y | Y | Apache Arrow Feather |
|
|
269
|
+
| `orc` | Y | Y | Optimized Row Columnar; common in Hadoop |
|
|
270
|
+
| `parquet` | Y | Y | Apache Parquet; common in Big Data |
|
|
261
271
|
|
|
262
272
|
#### Binary Serialization and Interchange
|
|
263
273
|
|
|
264
|
-
| Format |
|
|
265
|
-
| --- | --- | --- |
|
|
266
|
-
| `avro` | Y | Apache Avro |
|
|
274
|
+
| Format | Read | Write | Description |
|
|
275
|
+
| --- | --- | --- | --- |
|
|
276
|
+
| `avro` | Y | Y | Apache Avro |
|
|
277
|
+
| `bson` | N | N | Binary JSON; common with MongoDB exports/dumps |
|
|
278
|
+
| `cbor` | N | N | Concise Binary Object Representation |
|
|
279
|
+
| `ion` | N | N | Amazon Ion |
|
|
280
|
+
| `msgpack` | N | N | MessagePack |
|
|
281
|
+
| `pb` | N | N | Protocol Buffers (Google Protobuf) |
|
|
282
|
+
| `pbf` | N | N | Protocolbuffer Binary Format; often for GIS data |
|
|
283
|
+
| `proto` | N | N | Protocol Buffers schema; often in .pb / .bin |
|
|
284
|
+
|
|
285
|
+
#### Databases and Embedded Storage
|
|
286
|
+
|
|
287
|
+
| Format | Read | Write | Description |
|
|
288
|
+
| --- | --- | --- | --- |
|
|
289
|
+
| `accdb` | N | N | Microsoft Access (newer format) |
|
|
290
|
+
| `duckdb` | N | N | DuckDB |
|
|
291
|
+
| `mdb` | N | N | Microsoft Access (older format) |
|
|
292
|
+
| `sqlite` | N | N | SQLite |
|
|
267
293
|
|
|
268
294
|
#### Spreadsheets
|
|
269
295
|
|
|
296
|
+
| Format | Read | Write | Description |
|
|
297
|
+
| --- | --- | --- | --- |
|
|
298
|
+
| `numbers` | N | N | Apple Numbers |
|
|
299
|
+
| `ods` | N | N | OpenDocument |
|
|
300
|
+
| `wks` | N | N | Lotus 1-2-3 |
|
|
301
|
+
| `xls` | Y | Y | Microsoft Excel (BIFF) |
|
|
302
|
+
| `xlsm` | N | N | Microsoft Excel Macro-Enabled (Open XML) |
|
|
303
|
+
| `xlsx` | Y | Y | Microsoft Excel (Open XML) |
|
|
304
|
+
|
|
305
|
+
#### Statistical / Scientific / Numeric Computing
|
|
306
|
+
|
|
307
|
+
| Format | Read | Write | Description |
|
|
308
|
+
| --- | --- | --- | --- |
|
|
309
|
+
| `dta` | N | N | Stata |
|
|
310
|
+
| `hdf5` | N | N | Hierarchical Data Format |
|
|
311
|
+
| `mat` | N | N | MATLAB |
|
|
312
|
+
| `nc` | N | N | NetCDF |
|
|
313
|
+
| `rda` | N | N | RData workspace/object |
|
|
314
|
+
| `rds` | N | N | R data |
|
|
315
|
+
| `sas7bdat` | N | N | SAS data |
|
|
316
|
+
| `sav` | N | N | SPSS data |
|
|
317
|
+
| `sylk` | N | N | Symbolic Link |
|
|
318
|
+
| `xpt` | N | N | SAS Transport |
|
|
319
|
+
| `zsav` | N | N | Compressed SPSS data |
|
|
320
|
+
|
|
321
|
+
#### Logs and Event Streams
|
|
322
|
+
|
|
270
323
|
| Format | Supported | Description |
|
|
271
324
|
| --- | --- | --- |
|
|
272
|
-
| `
|
|
273
|
-
| `xlsx` | Y | Microsoft Excel (Open XML) |
|
|
325
|
+
| `log` | N | N | Generic log file |
|
|
274
326
|
|
|
275
327
|
#### Data Archives
|
|
276
328
|
|
|
277
|
-
| Format |
|
|
278
|
-
| --- | --- | --- |
|
|
279
|
-
| `gz` | Y | Gzip-compressed file |
|
|
280
|
-
| `zip` | Y | ZIP archive |
|
|
329
|
+
| Format | Read | Write | Description |
|
|
330
|
+
| --- | --- | --- | --- |
|
|
331
|
+
| `gz` | Y | Y | Gzip-compressed file |
|
|
332
|
+
| `zip` | Y | Y | ZIP archive |
|
|
333
|
+
|
|
334
|
+
#### Templates
|
|
335
|
+
|
|
336
|
+
| Format | Read | Write | Description |
|
|
337
|
+
| --- | --- | --- | --- |
|
|
338
|
+
| `hbs` | N | N | Handlebars |
|
|
339
|
+
| `jinja2` | N | N | Jinja2 |
|
|
340
|
+
| `mustache` | N | N | Mustache |
|
|
341
|
+
| `vm` | N | N | Apache Velocity |
|
|
281
342
|
|
|
282
343
|
## Usage
|
|
283
344
|
|
|
@@ -27,8 +27,12 @@ package and command-line interface for data extraction, validation, transformati
|
|
|
27
27
|
- [Semi-Structured Text](#semi-structured-text)
|
|
28
28
|
- [Columnar / Analytics-Friendly](#columnar--analytics-friendly)
|
|
29
29
|
- [Binary Serialization and Interchange](#binary-serialization-and-interchange)
|
|
30
|
+
- [Databases and Embedded Storage](#databases-and-embedded-storage)
|
|
30
31
|
- [Spreadsheets](#spreadsheets)
|
|
32
|
+
- [Statistical / Scientific / Numeric Computing](#statistical--scientific--numeric-computing)
|
|
33
|
+
- [Logs and Event Streams](#logs-and-event-streams)
|
|
31
34
|
- [Data Archives](#data-archives)
|
|
35
|
+
- [Templates](#templates)
|
|
32
36
|
- [Usage](#usage)
|
|
33
37
|
- [Command Line Interface](#command-line-interface)
|
|
34
38
|
- [Argument Order and Required Options](#argument-order-and-required-options)
|
|
@@ -169,65 +173,122 @@ DDL can be rendered from table specs for migrations or schema checks.
|
|
|
169
173
|
|
|
170
174
|
### Files (`file`)
|
|
171
175
|
|
|
172
|
-
|
|
176
|
+
Recognized file formats are listed in the tables below. Support for reading to or writing from a recognized file format is marked as:
|
|
173
177
|
|
|
174
178
|
- **Y**: implemented (may require optional dependencies)
|
|
175
179
|
- **N**: stubbed or not yet implemented
|
|
176
180
|
|
|
177
181
|
#### Stubbed / Placeholder
|
|
178
182
|
|
|
179
|
-
| Format |
|
|
180
|
-
| --- | --- | --- |
|
|
183
|
+
| Format | Read | Write | Description |
|
|
184
|
+
| --- | --- | --- | --- |
|
|
181
185
|
| `stub` | N | Placeholder format for tests and future connectors. |
|
|
182
186
|
|
|
183
187
|
#### Tabular & Delimited Text
|
|
184
188
|
|
|
185
|
-
| Format |
|
|
186
|
-
| --- | --- | --- |
|
|
187
|
-
| `csv` | Y | Comma-Separated Values |
|
|
188
|
-
| `
|
|
189
|
-
| `
|
|
190
|
-
| `psv` | N | Pipe-Separated Values |
|
|
191
|
-
| `tab` | N | Often synonymous with TSV |
|
|
192
|
-
| `tsv` | Y | Tab-Separated Values |
|
|
193
|
-
| `txt` | Y | Plain text, often delimited or fixed-width |
|
|
189
|
+
| Format | Read | Write | Description |
|
|
190
|
+
| --- | --- | --- | --- |
|
|
191
|
+
| `csv` | Y | Y | Comma-Separated Values |
|
|
192
|
+
| `dat` | N | N | Generic data file, often delimited or fixed-width |
|
|
193
|
+
| `fwf` | N | N | Fixed-Width Fields |
|
|
194
|
+
| `psv` | N | N | Pipe-Separated Values |
|
|
195
|
+
| `tab` | N | N | Often synonymous with TSV |
|
|
196
|
+
| `tsv` | Y | Y | Tab-Separated Values |
|
|
197
|
+
| `txt` | Y | Y | Plain text, often delimited or fixed-width |
|
|
194
198
|
|
|
195
199
|
#### Semi-Structured Text
|
|
196
200
|
|
|
197
|
-
| Format |
|
|
198
|
-
| --- | --- | --- |
|
|
199
|
-
| `
|
|
200
|
-
| `
|
|
201
|
-
| `
|
|
202
|
-
| `
|
|
201
|
+
| Format | Read | Write | Description |
|
|
202
|
+
| --- | --- | --- | --- |
|
|
203
|
+
| `cfg` | N | N | Config-style key-value pairs |
|
|
204
|
+
| `conf` | N | N | Config-style key-value pairs |
|
|
205
|
+
| `ini` | N | N | Config-style key-value pairs |
|
|
206
|
+
| `json` | Y | Y | JavaScript Object Notation |
|
|
207
|
+
| `ndjson` | Y | Y | Newline-Delimited JSON |
|
|
208
|
+
| `properties` | N | N | Java-style key-value pairs |
|
|
209
|
+
| `toml` | N | N | Tom's Obvious Minimal Language |
|
|
210
|
+
| `xml` | Y | Y | Extensible Markup Language |
|
|
211
|
+
| `yaml` | Y | Y | YAML Ain't Markup Language |
|
|
203
212
|
|
|
204
213
|
#### Columnar / Analytics-Friendly
|
|
205
214
|
|
|
206
|
-
| Format |
|
|
207
|
-
| --- | --- | --- |
|
|
208
|
-
| `
|
|
209
|
-
| `
|
|
210
|
-
| `
|
|
215
|
+
| Format | Read | Write | Description |
|
|
216
|
+
| --- | --- | --- | --- |
|
|
217
|
+
| `arrow` | N | N | Apache Arrow IPC |
|
|
218
|
+
| `feather` | Y | Y | Apache Arrow Feather |
|
|
219
|
+
| `orc` | Y | Y | Optimized Row Columnar; common in Hadoop |
|
|
220
|
+
| `parquet` | Y | Y | Apache Parquet; common in Big Data |
|
|
211
221
|
|
|
212
222
|
#### Binary Serialization and Interchange
|
|
213
223
|
|
|
214
|
-
| Format |
|
|
215
|
-
| --- | --- | --- |
|
|
216
|
-
| `avro` | Y | Apache Avro |
|
|
224
|
+
| Format | Read | Write | Description |
|
|
225
|
+
| --- | --- | --- | --- |
|
|
226
|
+
| `avro` | Y | Y | Apache Avro |
|
|
227
|
+
| `bson` | N | N | Binary JSON; common with MongoDB exports/dumps |
|
|
228
|
+
| `cbor` | N | N | Concise Binary Object Representation |
|
|
229
|
+
| `ion` | N | N | Amazon Ion |
|
|
230
|
+
| `msgpack` | N | N | MessagePack |
|
|
231
|
+
| `pb` | N | N | Protocol Buffers (Google Protobuf) |
|
|
232
|
+
| `pbf` | N | N | Protocolbuffer Binary Format; often for GIS data |
|
|
233
|
+
| `proto` | N | N | Protocol Buffers schema; often in .pb / .bin |
|
|
234
|
+
|
|
235
|
+
#### Databases and Embedded Storage
|
|
236
|
+
|
|
237
|
+
| Format | Read | Write | Description |
|
|
238
|
+
| --- | --- | --- | --- |
|
|
239
|
+
| `accdb` | N | N | Microsoft Access (newer format) |
|
|
240
|
+
| `duckdb` | N | N | DuckDB |
|
|
241
|
+
| `mdb` | N | N | Microsoft Access (older format) |
|
|
242
|
+
| `sqlite` | N | N | SQLite |
|
|
217
243
|
|
|
218
244
|
#### Spreadsheets
|
|
219
245
|
|
|
246
|
+
| Format | Read | Write | Description |
|
|
247
|
+
| --- | --- | --- | --- |
|
|
248
|
+
| `numbers` | N | N | Apple Numbers |
|
|
249
|
+
| `ods` | N | N | OpenDocument |
|
|
250
|
+
| `wks` | N | N | Lotus 1-2-3 |
|
|
251
|
+
| `xls` | Y | Y | Microsoft Excel (BIFF) |
|
|
252
|
+
| `xlsm` | N | N | Microsoft Excel Macro-Enabled (Open XML) |
|
|
253
|
+
| `xlsx` | Y | Y | Microsoft Excel (Open XML) |
|
|
254
|
+
|
|
255
|
+
#### Statistical / Scientific / Numeric Computing
|
|
256
|
+
|
|
257
|
+
| Format | Read | Write | Description |
|
|
258
|
+
| --- | --- | --- | --- |
|
|
259
|
+
| `dta` | N | N | Stata |
|
|
260
|
+
| `hdf5` | N | N | Hierarchical Data Format |
|
|
261
|
+
| `mat` | N | N | MATLAB |
|
|
262
|
+
| `nc` | N | N | NetCDF |
|
|
263
|
+
| `rda` | N | N | RData workspace/object |
|
|
264
|
+
| `rds` | N | N | R data |
|
|
265
|
+
| `sas7bdat` | N | N | SAS data |
|
|
266
|
+
| `sav` | N | N | SPSS data |
|
|
267
|
+
| `sylk` | N | N | Symbolic Link |
|
|
268
|
+
| `xpt` | N | N | SAS Transport |
|
|
269
|
+
| `zsav` | N | N | Compressed SPSS data |
|
|
270
|
+
|
|
271
|
+
#### Logs and Event Streams
|
|
272
|
+
|
|
220
273
|
| Format | Supported | Description |
|
|
221
274
|
| --- | --- | --- |
|
|
222
|
-
| `
|
|
223
|
-
| `xlsx` | Y | Microsoft Excel (Open XML) |
|
|
275
|
+
| `log` | N | N | Generic log file |
|
|
224
276
|
|
|
225
277
|
#### Data Archives
|
|
226
278
|
|
|
227
|
-
| Format |
|
|
228
|
-
| --- | --- | --- |
|
|
229
|
-
| `gz` | Y | Gzip-compressed file |
|
|
230
|
-
| `zip` | Y | ZIP archive |
|
|
279
|
+
| Format | Read | Write | Description |
|
|
280
|
+
| --- | --- | --- | --- |
|
|
281
|
+
| `gz` | Y | Y | Gzip-compressed file |
|
|
282
|
+
| `zip` | Y | Y | ZIP archive |
|
|
283
|
+
|
|
284
|
+
#### Templates
|
|
285
|
+
|
|
286
|
+
| Format | Read | Write | Description |
|
|
287
|
+
| --- | --- | --- | --- |
|
|
288
|
+
| `hbs` | N | N | Handlebars |
|
|
289
|
+
| `jinja2` | N | N | Jinja2 |
|
|
290
|
+
| `mustache` | N | N | Mustache |
|
|
291
|
+
| `vm` | N | N | Apache Velocity |
|
|
231
292
|
|
|
232
293
|
## Usage
|
|
233
294
|
|
|
@@ -390,10 +390,20 @@ target:
|
|
|
390
390
|
```yaml
|
|
391
391
|
jobs:
|
|
392
392
|
- name: file_to_file_customers
|
|
393
|
+
depends_on: [seed_customers]
|
|
393
394
|
extract: { source: customers_csv }
|
|
394
395
|
validate: { ruleset: customers_basic }
|
|
395
396
|
transform: { pipeline: clean_customers }
|
|
396
397
|
load: { target: customers_json_out }
|
|
398
|
+
- name: seed_customers
|
|
399
|
+
extract: { source: seed_customers_csv }
|
|
400
|
+
load: { target: customers_db_out }
|
|
401
|
+
```
|
|
402
|
+
|
|
403
|
+
Notes:
|
|
404
|
+
|
|
405
|
+
- `depends_on` is optional and can be a string or list of job names.
|
|
406
|
+
- Jobs without dependencies run first when ordered as a DAG.
|
|
397
407
|
|
|
398
408
|
## Running pipelines (CLI and Python)
|
|
399
409
|
|
|
@@ -34,10 +34,7 @@ __all__ = [
|
|
|
34
34
|
]
|
|
35
35
|
|
|
36
36
|
|
|
37
|
-
# SECTION:
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
# SECTION: CLASSES ========================================================== #
|
|
37
|
+
# SECTION: DATA CLASSES ===================================================== #
|
|
41
38
|
|
|
42
39
|
|
|
43
40
|
@dataclass(kw_only=True, slots=True)
|
|
@@ -100,6 +97,8 @@ class JobConfig:
|
|
|
100
97
|
Unique job name.
|
|
101
98
|
description : str | None
|
|
102
99
|
Optional human-friendly description.
|
|
100
|
+
depends_on : list[str]
|
|
101
|
+
Optional job dependency list. Dependencies must refer to other jobs.
|
|
103
102
|
extract : ExtractRef | None
|
|
104
103
|
Extraction reference.
|
|
105
104
|
validate : ValidationRef | None
|
|
@@ -114,6 +113,7 @@ class JobConfig:
|
|
|
114
113
|
|
|
115
114
|
name: str
|
|
116
115
|
description: str | None = None
|
|
116
|
+
depends_on: list[str] = field(default_factory=list)
|
|
117
117
|
extract: ExtractRef | None = None
|
|
118
118
|
validate: ValidationRef | None = None
|
|
119
119
|
transform: TransformRef | None = None
|
|
@@ -149,9 +149,19 @@ class JobConfig:
|
|
|
149
149
|
if description is not None and not isinstance(description, str):
|
|
150
150
|
description = str(description)
|
|
151
151
|
|
|
152
|
+
depends_raw = data.get('depends_on')
|
|
153
|
+
depends_on: list[str] = []
|
|
154
|
+
if isinstance(depends_raw, str):
|
|
155
|
+
depends_on = [depends_raw]
|
|
156
|
+
elif isinstance(depends_raw, list):
|
|
157
|
+
for entry in depends_raw:
|
|
158
|
+
if isinstance(entry, str):
|
|
159
|
+
depends_on.append(entry)
|
|
160
|
+
|
|
152
161
|
return cls(
|
|
153
162
|
name=name,
|
|
154
163
|
description=description,
|
|
164
|
+
depends_on=depends_on,
|
|
155
165
|
extract=ExtractRef.from_obj(data.get('extract')),
|
|
156
166
|
validate=ValidationRef.from_obj(data.get('validate')),
|
|
157
167
|
transform=TransformRef.from_obj(data.get('transform')),
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
"""
|
|
2
|
+
:mod:`etlplus.dag` module.
|
|
3
|
+
|
|
4
|
+
Lightweight directed acyclic graph (DAG) helpers for ordering jobs based on
|
|
5
|
+
``depends_on``.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from collections import deque
|
|
11
|
+
from dataclasses import dataclass
|
|
12
|
+
|
|
13
|
+
from .config.jobs import JobConfig
|
|
14
|
+
|
|
15
|
+
# SECTION: EXPORTS ========================================================== #
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
__all__ = [
|
|
19
|
+
'DagError',
|
|
20
|
+
'topological_sort_jobs',
|
|
21
|
+
]
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
# SECTION: ERRORS =========================================================== #
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass(slots=True)
|
|
28
|
+
class DagError(ValueError):
|
|
29
|
+
"""
|
|
30
|
+
Raised when the job dependency graph is invalid.
|
|
31
|
+
|
|
32
|
+
Attributes
|
|
33
|
+
----------
|
|
34
|
+
message : str
|
|
35
|
+
Error message.
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
# -- Attributes -- #
|
|
39
|
+
|
|
40
|
+
message: str
|
|
41
|
+
|
|
42
|
+
# -- Magic Methods (Object Representation) -- #
|
|
43
|
+
|
|
44
|
+
def __str__(self) -> str:
|
|
45
|
+
return self.message
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
# SECTION: FUNCTIONS ======================================================== #
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def topological_sort_jobs(
|
|
52
|
+
jobs: list[JobConfig],
|
|
53
|
+
) -> list[JobConfig]:
|
|
54
|
+
"""
|
|
55
|
+
Return jobs in topological order based on ``depends_on``.
|
|
56
|
+
|
|
57
|
+
Parameters
|
|
58
|
+
----------
|
|
59
|
+
jobs : list[JobConfig]
|
|
60
|
+
List of job configurations to sort.
|
|
61
|
+
|
|
62
|
+
Returns
|
|
63
|
+
-------
|
|
64
|
+
list[JobConfig]
|
|
65
|
+
Jobs sorted in topological order.
|
|
66
|
+
|
|
67
|
+
Raises
|
|
68
|
+
------
|
|
69
|
+
DagError
|
|
70
|
+
If a dependency is missing, self-referential, or when a cycle is
|
|
71
|
+
detected.
|
|
72
|
+
"""
|
|
73
|
+
index = {job.name: job for job in jobs}
|
|
74
|
+
edges: dict[str, set[str]] = {name: set() for name in index}
|
|
75
|
+
indegree: dict[str, int] = {name: 0 for name in index}
|
|
76
|
+
|
|
77
|
+
for job in jobs:
|
|
78
|
+
for dep in job.depends_on:
|
|
79
|
+
if dep not in index:
|
|
80
|
+
raise DagError(
|
|
81
|
+
f'Unknown dependency "{dep}" in job "{job.name}"',
|
|
82
|
+
)
|
|
83
|
+
if dep == job.name:
|
|
84
|
+
raise DagError(f'Job "{job.name}" depends on itself')
|
|
85
|
+
if job.name not in edges[dep]:
|
|
86
|
+
edges[dep].add(job.name)
|
|
87
|
+
indegree[job.name] += 1
|
|
88
|
+
|
|
89
|
+
queue = deque(sorted(name for name, deg in indegree.items() if deg == 0))
|
|
90
|
+
ordered: list[str] = []
|
|
91
|
+
|
|
92
|
+
while queue:
|
|
93
|
+
name = queue.popleft()
|
|
94
|
+
ordered.append(name)
|
|
95
|
+
for child in sorted(edges[name]):
|
|
96
|
+
indegree[child] -= 1
|
|
97
|
+
if indegree[child] == 0:
|
|
98
|
+
queue.append(child)
|
|
99
|
+
|
|
100
|
+
if len(ordered) != len(jobs):
|
|
101
|
+
raise DagError('Dependency cycle detected')
|
|
102
|
+
|
|
103
|
+
return [index[name] for name in ordered]
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
"""
|
|
2
|
+
:mod:`etlplus.file.accdb` module.
|
|
3
|
+
|
|
4
|
+
Helpers for reading/writing newer Microsoft Access database (ACCDB) files.
|
|
5
|
+
|
|
6
|
+
Notes
|
|
7
|
+
-----
|
|
8
|
+
- An ACCDB file is a proprietary database file format used by Microsoft Access
|
|
9
|
+
2007 and later.
|
|
10
|
+
- Common cases:
|
|
11
|
+
- Storing relational data for small to medium-sized applications.
|
|
12
|
+
- Desktop database applications.
|
|
13
|
+
- Data management for non-enterprise solutions.
|
|
14
|
+
- Rule of thumb:
|
|
15
|
+
- If the file follows the ACCDB specification, use this module for reading
|
|
16
|
+
and writing.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
from pathlib import Path
|
|
22
|
+
|
|
23
|
+
from ..types import JSONData
|
|
24
|
+
from ..types import JSONList
|
|
25
|
+
from . import stub
|
|
26
|
+
|
|
27
|
+
# SECTION: EXPORTS ========================================================== #
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
__all__ = [
|
|
31
|
+
'read',
|
|
32
|
+
'write',
|
|
33
|
+
]
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
# SECTION: FUNCTIONS ======================================================== #
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def read(
|
|
40
|
+
path: Path,
|
|
41
|
+
) -> JSONList:
|
|
42
|
+
"""
|
|
43
|
+
Read ACCDB content from ``path``.
|
|
44
|
+
|
|
45
|
+
Parameters
|
|
46
|
+
----------
|
|
47
|
+
path : Path
|
|
48
|
+
Path to the ACCDB file on disk.
|
|
49
|
+
|
|
50
|
+
Returns
|
|
51
|
+
-------
|
|
52
|
+
JSONList
|
|
53
|
+
The list of dictionaries read from the ACCDB file.
|
|
54
|
+
"""
|
|
55
|
+
return stub.read(path, format_name='ACCDB')
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def write(
|
|
59
|
+
path: Path,
|
|
60
|
+
data: JSONData,
|
|
61
|
+
) -> int:
|
|
62
|
+
"""
|
|
63
|
+
Write ``data`` to ACCDB at ``path`` and return record count.
|
|
64
|
+
|
|
65
|
+
Parameters
|
|
66
|
+
----------
|
|
67
|
+
path : Path
|
|
68
|
+
Path to the ACCDB file on disk.
|
|
69
|
+
data : JSONData
|
|
70
|
+
Data to write as ACCDB. Should be a list of dictionaries or a single
|
|
71
|
+
dictionary.
|
|
72
|
+
|
|
73
|
+
Returns
|
|
74
|
+
-------
|
|
75
|
+
int
|
|
76
|
+
The number of rows written to the ACCDB file.
|
|
77
|
+
"""
|
|
78
|
+
return stub.write(path, data, format_name='ACCDB')
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
"""
|
|
2
|
+
:mod:`etlplus.file.arrow` module.
|
|
3
|
+
|
|
4
|
+
Helpers for reading/writing Apache Arrow (ARROW) files.
|
|
5
|
+
|
|
6
|
+
Notes
|
|
7
|
+
-----
|
|
8
|
+
- An ARROW file is a binary file format designed for efficient
|
|
9
|
+
columnar data storage and processing.
|
|
10
|
+
- Common cases:
|
|
11
|
+
- High-performance data analytics.
|
|
12
|
+
- Interoperability between different data processing systems.
|
|
13
|
+
- In-memory data representation for fast computations.
|
|
14
|
+
- Rule of thumb:
|
|
15
|
+
- If the file follows the Apache Arrow specification, use this module for
|
|
16
|
+
reading and writing.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
from pathlib import Path
|
|
22
|
+
|
|
23
|
+
from ..types import JSONData
|
|
24
|
+
from ..types import JSONList
|
|
25
|
+
from . import stub
|
|
26
|
+
|
|
27
|
+
# SECTION: EXPORTS ========================================================== #
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
__all__ = [
|
|
31
|
+
'read',
|
|
32
|
+
'write',
|
|
33
|
+
]
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
# SECTION: FUNCTIONS ======================================================== #
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def read(
|
|
40
|
+
path: Path,
|
|
41
|
+
) -> JSONList:
|
|
42
|
+
"""
|
|
43
|
+
Read ARROW content from ``path``.
|
|
44
|
+
|
|
45
|
+
Parameters
|
|
46
|
+
----------
|
|
47
|
+
path : Path
|
|
48
|
+
Path to the Apache Arrow file on disk.
|
|
49
|
+
|
|
50
|
+
Returns
|
|
51
|
+
-------
|
|
52
|
+
JSONList
|
|
53
|
+
The list of dictionaries read from the Apache Arrow file.
|
|
54
|
+
"""
|
|
55
|
+
return stub.read(path, format_name='ARROW')
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def write(
|
|
59
|
+
path: Path,
|
|
60
|
+
data: JSONData,
|
|
61
|
+
) -> int:
|
|
62
|
+
"""
|
|
63
|
+
Write ``data`` to ARROW at ``path`` and return record count.
|
|
64
|
+
|
|
65
|
+
Parameters
|
|
66
|
+
----------
|
|
67
|
+
path : Path
|
|
68
|
+
Path to the ARROW file on disk.
|
|
69
|
+
data : JSONData
|
|
70
|
+
Data to write as ARROW. Should be a list of dictionaries or a
|
|
71
|
+
single dictionary.
|
|
72
|
+
|
|
73
|
+
Returns
|
|
74
|
+
-------
|
|
75
|
+
int
|
|
76
|
+
The number of rows written to the ARROW file.
|
|
77
|
+
"""
|
|
78
|
+
return stub.write(path, data, format_name='ARROW')
|